2021-03-11 19:16:15 +01:00
|
|
|
"""URI (RFC 3986) helpers for Gemini navigation."""
|
|
|
|
|
2021-02-12 19:01:42 +01:00
|
|
|
import urllib.parse
|
|
|
|
|
|
|
|
|
2021-02-16 19:10:11 +01:00
|
|
|
def parse_url(url: str, absolute: bool =False):
|
2021-02-12 19:01:42 +01:00
|
|
|
"""Return URL parts from this URL.
|
|
|
|
|
|
|
|
This uses urllib.parse.urlparse to not reinvent the wheel, with a few
|
|
|
|
adjustments.
|
|
|
|
|
|
|
|
First, urllib does not know the Gemini scheme (yet!) so if it
|
|
|
|
is specified we strip it to get an absolute netloc.
|
|
|
|
|
|
|
|
Second, as this function can be used to process arbitrary user input, we
|
|
|
|
clean it a bit:
|
|
|
|
- strip whitespaces from the URL
|
|
|
|
- if "absolute" is True, consider that the URL is meant to be absolute, even
|
|
|
|
though it technically is not, e.g. "dece.space" is not absolute as it
|
|
|
|
misses either the // delimiter.
|
|
|
|
"""
|
2021-03-08 23:40:03 +01:00
|
|
|
url = url.strip()
|
|
|
|
if url.startswith("file://"):
|
|
|
|
return urllib.parse.urlparse(url)
|
2021-02-12 19:01:42 +01:00
|
|
|
if url.startswith("gemini://"):
|
|
|
|
url = url[7:]
|
|
|
|
parts = urllib.parse.urlparse(url, scheme="gemini")
|
2021-02-16 21:23:06 +01:00
|
|
|
if not parts.netloc or absolute:
|
2021-02-12 19:01:42 +01:00
|
|
|
parts = urllib.parse.urlparse(f"//{url}", scheme="gemini")
|
|
|
|
return parts
|
|
|
|
|
|
|
|
|
2021-02-16 19:10:11 +01:00
|
|
|
def sanitize_url(url: str):
|
2021-02-13 23:34:45 +01:00
|
|
|
"""Parse and unparse an URL to ensure it has been properly formatted."""
|
|
|
|
return urllib.parse.urlunparse(parse_url(url))
|
|
|
|
|
|
|
|
|
2021-02-16 19:10:11 +01:00
|
|
|
def join_url(base_url: str, url: str):
|
2021-02-12 19:01:42 +01:00
|
|
|
"""Join a base URL with a relative url."""
|
|
|
|
if base_url.startswith("gemini://"):
|
|
|
|
base_url = base_url[7:]
|
|
|
|
parts = parse_url(urllib.parse.urljoin(base_url, url))
|
|
|
|
return urllib.parse.urlunparse(parts)
|
2021-02-16 19:10:11 +01:00
|
|
|
|
|
|
|
|
|
|
|
def set_parameter(url: str, user_input: str):
|
2021-03-11 19:16:15 +01:00
|
|
|
"""Return a new URL with the escaped user input appended."""
|
2021-02-16 19:10:11 +01:00
|
|
|
quoted_input = urllib.parse.quote(user_input)
|
|
|
|
if "?" in url:
|
2021-03-08 23:40:03 +01:00
|
|
|
url = url.split("?", maxsplit=1)[0]
|
2021-02-16 19:10:11 +01:00
|
|
|
return url + "?" + quoted_input
|
2021-03-14 02:05:42 +01:00
|
|
|
|
|
|
|
|
|
|
|
def get_parent_url(url: str) -> str:
|
|
|
|
"""Return the parent URL (one level up)."""
|
|
|
|
scheme, netloc, path, params, query, frag = parse_url(url)
|
|
|
|
last_slash = path.rstrip("/").rfind("/")
|
|
|
|
if last_slash > -1:
|
|
|
|
path = path[:last_slash + 1]
|
|
|
|
return urllib.parse.urlunparse((scheme, netloc, path, params, query, frag))
|