2021-02-12 19:01:42 +01:00
|
|
|
import urllib.parse
|
|
|
|
|
|
|
|
|
2021-02-16 19:10:11 +01:00
|
|
|
def parse_url(url: str, absolute: bool =False):
|
2021-02-12 19:01:42 +01:00
|
|
|
"""Return URL parts from this URL.
|
|
|
|
|
|
|
|
This uses urllib.parse.urlparse to not reinvent the wheel, with a few
|
|
|
|
adjustments.
|
|
|
|
|
|
|
|
First, urllib does not know the Gemini scheme (yet!) so if it
|
|
|
|
is specified we strip it to get an absolute netloc.
|
|
|
|
|
|
|
|
Second, as this function can be used to process arbitrary user input, we
|
|
|
|
clean it a bit:
|
|
|
|
- strip whitespaces from the URL
|
|
|
|
- if "absolute" is True, consider that the URL is meant to be absolute, even
|
|
|
|
though it technically is not, e.g. "dece.space" is not absolute as it
|
|
|
|
misses either the // delimiter.
|
|
|
|
"""
|
|
|
|
if url.startswith("gemini://"):
|
|
|
|
url = url[7:]
|
|
|
|
parts = urllib.parse.urlparse(url, scheme="gemini")
|
2021-02-16 21:23:06 +01:00
|
|
|
if not parts.netloc or absolute:
|
2021-02-12 19:01:42 +01:00
|
|
|
parts = urllib.parse.urlparse(f"//{url}", scheme="gemini")
|
|
|
|
return parts
|
|
|
|
|
|
|
|
|
2021-02-16 19:10:11 +01:00
|
|
|
def sanitize_url(url: str):
|
2021-02-13 23:34:45 +01:00
|
|
|
"""Parse and unparse an URL to ensure it has been properly formatted."""
|
|
|
|
return urllib.parse.urlunparse(parse_url(url))
|
|
|
|
|
|
|
|
|
2021-02-16 19:10:11 +01:00
|
|
|
def join_url(base_url: str, url: str):
|
2021-02-12 19:01:42 +01:00
|
|
|
"""Join a base URL with a relative url."""
|
|
|
|
if base_url.startswith("gemini://"):
|
|
|
|
base_url = base_url[7:]
|
|
|
|
parts = parse_url(urllib.parse.urljoin(base_url, url))
|
|
|
|
return urllib.parse.urlunparse(parts)
|
2021-02-16 19:10:11 +01:00
|
|
|
|
|
|
|
|
|
|
|
def set_parameter(url: str, user_input: str):
|
|
|
|
"""Return a new URL with the user input escaped (RFC 3986) appended."""
|
|
|
|
quoted_input = urllib.parse.quote(user_input)
|
|
|
|
if "?" in url:
|
|
|
|
url = url.rsplit("?", maxsplit=1)[0]
|
|
|
|
return url + "?" + quoted_input
|