This repository has been archived on 2024-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
Bebop/bebop/navigation.py

61 lines
2 KiB
Python
Raw Normal View History

2021-03-11 19:16:15 +01:00
"""URI (RFC 3986) helpers for Gemini navigation."""
2021-02-12 19:01:42 +01:00
import urllib.parse
2021-02-16 19:10:11 +01:00
def parse_url(url: str, absolute: bool =False):
2021-02-12 19:01:42 +01:00
"""Return URL parts from this URL.
This uses urllib.parse.urlparse to not reinvent the wheel, with a few
adjustments.
First, urllib does not know the Gemini scheme (yet!) so if it
is specified we strip it to get an absolute netloc.
Second, as this function can be used to process arbitrary user input, we
clean it a bit:
- strip whitespaces from the URL
- if "absolute" is True, consider that the URL is meant to be absolute, even
though it technically is not, e.g. "dece.space" is not absolute as it
misses either the // delimiter.
"""
2021-03-08 23:40:03 +01:00
url = url.strip()
if url.startswith("file://"):
return urllib.parse.urlparse(url)
2021-02-12 19:01:42 +01:00
if url.startswith("gemini://"):
url = url[7:]
parts = urllib.parse.urlparse(url, scheme="gemini")
2021-02-16 21:23:06 +01:00
if not parts.netloc or absolute:
2021-02-12 19:01:42 +01:00
parts = urllib.parse.urlparse(f"//{url}", scheme="gemini")
return parts
2021-02-16 19:10:11 +01:00
def sanitize_url(url: str):
"""Parse and unparse an URL to ensure it has been properly formatted."""
return urllib.parse.urlunparse(parse_url(url))
2021-02-16 19:10:11 +01:00
def join_url(base_url: str, url: str):
2021-02-12 19:01:42 +01:00
"""Join a base URL with a relative url."""
if base_url.startswith("gemini://"):
base_url = base_url[7:]
parts = parse_url(urllib.parse.urljoin(base_url, url))
return urllib.parse.urlunparse(parts)
2021-02-16 19:10:11 +01:00
def set_parameter(url: str, user_input: str):
2021-03-11 19:16:15 +01:00
"""Return a new URL with the escaped user input appended."""
2021-02-16 19:10:11 +01:00
quoted_input = urllib.parse.quote(user_input)
if "?" in url:
2021-03-08 23:40:03 +01:00
url = url.split("?", maxsplit=1)[0]
2021-02-16 19:10:11 +01:00
return url + "?" + quoted_input
def get_parent_url(url: str) -> str:
"""Return the parent URL (one level up)."""
scheme, netloc, path, params, query, frag = parse_url(url)
last_slash = path.rstrip("/").rfind("/")
if last_slash > -1:
path = path[:last_slash + 1]
return urllib.parse.urlunparse((scheme, netloc, path, params, query, frag))