navigation: rework entirely URL management

oh and fix the whole history mess a bit
exec
dece 3 years ago
parent b26dad0d0d
commit 397a143695

@ -18,7 +18,8 @@ from bebop.history import History
from bebop.links import Links
from bebop.mouse import ButtonState
from bebop.navigation import (
get_parent_url, get_root_url, join_url, parse_url, sanitize_url)
get_parent_url, get_root_url, join_url, parse_url, unparse_url
)
from bebop.page import Page
from bebop.page_pad import PagePad
@ -89,6 +90,9 @@ class Browser:
"help": {
"open": self.open_help,
},
"history": {
"open": self.open_history,
},
}
def run(self, *args, **kwargs):
@ -176,6 +180,8 @@ class Browser:
self.add_bookmark()
elif char == ord("e"):
self.edit_page()
elif char == ord("y"):
self.open_history()
elif curses.ascii.isdigit(char):
self.handle_digit_input(char)
elif char == curses.KEY_MOUSE:
@ -300,38 +306,52 @@ class Browser:
return
if assume_absolute or not self.current_url:
parts = parse_url(url, absolute=True)
join = False
parts = parse_url(url, absolute=True, default_scheme="gemini")
else:
parts = parse_url(url)
join = True
if parts.scheme == "gemini":
if parts["netloc"] is None:
base_url = base_url or self.current_url
if base_url:
parts = parse_url(join_url(base_url, url))
else:
self.set_status_error(f"Can't open '{url}'.")
return
# Replace URL passed as parameter by a proper absolute one.
url = unparse_url(parts)
scheme = parts["scheme"] or ""
if scheme == "gemini":
from bebop.browser.gemini import open_gemini_url
# If there is no netloc, this is a relative URL.
if join or base_url:
url = join_url(base_url or self.current_url, url)
open_gemini_url(
success = open_gemini_url(
self,
sanitize_url(url),
url,
redirects=redirects,
history=history,
use_cache=use_cache
)
elif parts.scheme.startswith("http"):
if history and success:
self.history.push(url)
elif scheme.startswith("http"):
from bebop.browser.web import open_web_url
open_web_url(self, url)
elif parts.scheme == "file":
elif scheme == "file":
from bebop.browser.file import open_file
open_file(self, parts.path, history=history)
elif parts.scheme == "bebop":
special_page = self.special_pages.get(parts.netloc)
file_url = open_file(self, parts["path"])
if history and file_url:
self.history.push(file_url)
elif scheme == "bebop":
special_page = self.special_pages.get(parts["path"])
if special_page:
special_page["open"]()
else:
self.set_status_error("Unknown page.")
else:
self.set_status_error(f"Protocol {parts.scheme} not supported.")
self.set_status_error(f"Protocol '{scheme}' not supported.")
def load_page(self, page: Page):
"""Load Gemtext data as the current page."""
@ -455,8 +475,9 @@ class Browser:
def go_back(self):
"""Go back in history if possible."""
if self.history.has_links():
self.open_url(self.history.pop(), history=False)
previous_url = self.history.get_previous()
if previous_url:
self.open_url(previous_url, history=False)
def go_to_parent_page(self):
"""Go to the parent URL if possible."""
@ -475,7 +496,7 @@ class Browser:
self.set_status_error("Failed to open bookmarks.")
return
self.load_page(Page.from_gemtext(content, self.config["text_width"]))
self.current_url = "bebop://bookmarks"
self.current_url = "bebop:bookmarks"
def add_bookmark(self):
"""Add the current URL as bookmark."""
@ -502,8 +523,9 @@ class Browser:
directly from their location on disk.
"""
delete_source_after = False
if self.current_url.startswith("bebop://"):
page_name = self.current_url[len("bebop://"):]
parts = parse_url(self.current_url)
if parts["scheme"] == "bebop":
page_name = parts["path"]
special_pages_functions = self.special_pages.get(page_name)
if not special_pages_functions:
return
@ -530,9 +552,17 @@ class Browser:
def open_help(self):
"""Show the help page."""
self.load_page(Page.from_gemtext(HELP_PAGE, self.config["text_width"]))
self.current_url = "bebop://help"
self.current_url = "bebop:help"
def prompt(self, text, keys):
"""Display the text and allow it to type one of the given keys."""
self.set_status(text)
return self.command_line.prompt_key(keys)
def open_history(self):
"""Show a generated history of visited pages."""
self.load_page(Page.from_gemtext(
self.history.to_gemtext(),
self.config["text_width"]
))
self.current_url = "bebop:history"

@ -4,22 +4,29 @@ from bebop.browser.browser import Browser
from bebop.page import Page
def open_file(browser: Browser, filepath: str, encoding="utf-8", history=True):
def open_file(browser: Browser, filepath: str, encoding="utf-8"):
"""Open a file and render it.
This should be used only on Gemtext files or at least text files.
Anything else will produce garbage and may crash the program. In the
future this should be able to use a different parser according to a MIME
type or something.
Arguments:
- browser: Browser object making the request.
- filepath: a text file path on disk.
- encoding: file's encoding.
Returns:
The loaded file URI on success, None otherwise (e.g. file not found).
"""
try:
with open(filepath, "rt", encoding=encoding) as f:
text = f.read()
except (OSError, ValueError) as exc:
browser.set_status_error(f"Failed to open file: {exc}")
return
return None
browser.load_page(Page.from_text(text))
file_url = "file://" + filepath
if history:
browser.history.push(file_url)
browser.current_url = file_url
return file_url

@ -14,8 +14,7 @@ from bebop.tofu import trust_fingerprint, untrust_fingerprint, WRONG_FP_ALERT
MAX_URL_LEN = 1024
def open_gemini_url(browser: Browser, url, redirects=0, history=True,
use_cache=True):
def open_gemini_url(browser: Browser, url, redirects=0, use_cache=True):
"""Open a Gemini URL and set the formatted response as content.
While the specification is not set in stone, every client takes a slightly
@ -33,12 +32,14 @@ def open_gemini_url(browser: Browser, url, redirects=0, history=True,
as we're doing TOFU here, we could automatically trust it or let the user
choose. For simplicity, we always trust it permanently.
Attributes:
Arguments:
- browser: Browser object making the request.
- url: a valid URL with Gemini scheme to open.
- redirects: current amount of redirections done to open the initial URL.
- history: if true, save the final URL to history.
- use_cache: if true, look up if the page is cached before requesting it.
Returns:
True on success, False otherwise.
"""
if len(url) >= MAX_URL_LEN:
browser.set_status_error("Request URL too long.")
@ -48,11 +49,9 @@ def open_gemini_url(browser: Browser, url, redirects=0, history=True,
if use_cache and url in browser.cache:
browser.load_page(browser.cache[url])
if browser.current_url and history:
browser.history.push(browser.current_url)
browser.current_url = url
browser.set_status(url)
return
return True
req = Request(url, browser.stash)
connect_timeout = browser.config["connect_timeout"]
@ -69,7 +68,7 @@ def open_gemini_url(browser: Browser, url, redirects=0, history=True,
else:
error = f"Connection failed ({url})."
browser.set_status_error(error)
return
return False
if req.state == Request.STATE_INVALID_CERT:
pass
@ -88,13 +87,13 @@ def open_gemini_url(browser: Browser, url, redirects=0, history=True,
data = req.proceed()
if not data:
browser.set_status_error(f"Server did not respond in time ({url}).")
return
return False
response = Response.parse(data)
if not response:
browser.set_status_error(f"Server response parsing failed ({url}).")
return
return False
_handle_response(browser, response, url, redirects, history)
return _handle_response(browser, response, url, redirects)
def _handle_untrusted_cert(browser: Browser, request: Request):
@ -118,10 +117,14 @@ def _handle_untrusted_cert(browser: Browser, request: Request):
def _handle_response(browser: Browser, response: Response, url: str,
redirects: int, history: bool):
"""Handle a response from a Gemini server."""
redirects: int):
"""Handle a response from a Gemini server.
Returns:
True on success, False otherwise.
"""
if response.code == 20:
_handle_successful_response(browser, response, url, history)
return _handle_successful_response(browser, response, url)
elif response.generic_code == 30 and response.meta:
browser.open_url(response.meta, base_url=url, redirects=redirects + 1)
elif response.generic_code in (40, 50):
@ -132,10 +135,10 @@ def _handle_response(browser: Browser, response: Response, url: str,
else:
error = f"Unhandled response code {response.code}"
browser.set_status_error(error)
return False
def _handle_successful_response(browser: Browser, response: Response, url: str,
history: bool):
def _handle_successful_response(browser: Browser, response: Response, url: str):
"""Handle a successful response content from a Gemini server.
According to the MIME type received or inferred, the response is either
@ -150,8 +153,11 @@ def _handle_successful_response(browser: Browser, response: Response, url: str,
- browser: Browser instance that made the initial request.
- url: original URL.
- response: a successful Response.
- history: whether to modify history on a page load.
Returns:
True on success, False otherwise.
"""
# Use appropriate response parser according to the MIME type.
mime_type = response.get_mime_type()
page = None
error = None
@ -171,13 +177,14 @@ def _handle_successful_response(browser: Browser, response: Response, url: str,
else:
filepath = _get_download_path(url)
# If a page has been produced, load it. Else if a file has been retrieved,
# download it.
if page:
browser.load_page(page)
if browser.current_url and history:
browser.history.push(browser.current_url)
browser.current_url = url
browser.cache[url] = page
browser.set_status(url)
return True
elif filepath:
try:
with open(filepath, "wb") as download_file:
@ -186,8 +193,10 @@ def _handle_successful_response(browser: Browser, response: Response, url: str,
browser.set_status_error(f"Failed to save {url} ({exc})")
else:
browser.set_status(f"Downloaded {url} ({mime_type.short}).")
return True
elif error:
browser.set_status_error(error)
return False
def _get_download_path(url: str) -> Path:

@ -32,6 +32,7 @@ name, not the symbol itself.
- b: open bookmarks
- B: add current page to bookmarks
- e: open the current page source in an editor
- y: open history
- digits: go to the corresponding link ID
- escape: reset status line text
```

@ -2,20 +2,34 @@
class History:
"""Basic browsing history manager."""
"""Basic browsing history manager.
The history follows the "by last visited" behaviour of Firefox for the lack
of a better idea. Links are pushed as they are visited. If a link is visited
again, it bubbles up to the top of the history.
"""
def __init__(self):
self.urls = []
def has_links(self):
"""Return True if there is at least one URL in the history."""
return bool(self.urls)
def push(self, url):
"""Add an URL to the history."""
if not self.urls or self.urls[-1] != url:
self.urls.append(url)
"""Add an URL to the history.
If the URL is already in the list, it is moved to the top.
"""
try:
self.urls.remove(url)
except ValueError:
pass
self.urls.append(url)
def get_previous(self):
"""Return previous URL, or None if there is only one or zero URL."""
try:
return self.urls[-2]
except IndexError:
return None
def pop(self):
"""Return latest URL added to history and remove it."""
return self.urls.pop()
def to_gemtext(self):
"""Generate a simple Gemtext page of the current history."""
return "\n".join("=> " + url for url in self.urls)

@ -1,66 +1,192 @@
"""URI (RFC 3986) helpers for Gemini navigation."""
"""URI (RFC 3986) helpers for Gemini navigation.
import urllib.parse
It was supposed to be just thin fixes around urllib.parse functions but as
gemini is not recognized as a valid scheme it breaks a lot of things, so it
turned into a basic re-implementation of the RFC.
"""
import re
from ssl import RAND_pseudo_bytes
from typing import Any, Dict, Optional
from urllib.parse import quote
URI_RE = re.compile(
"^"
r"(?:(?P<scheme>[^:/?#\n]+):)?"
r"(?://(?P<netloc>[^/?#\n]*))?"
r"(?P<path>[^?#\n]*)"
r"(?:\?(?P<query>[^#\n]*))?"
r"(?:#(?P<fragment>.*))?"
"$"
)
def parse_url(url: str, absolute: bool =False):
"""Return URL parts from this URL.
This uses urllib.parse.urlparse to not reinvent the wheel, with a few
adjustments.
class InvalidUrlException(Exception):
"""Generic exception for invalid URLs used in this module."""
First, urllib does not know the Gemini scheme (yet!) so if it
is specified we strip it to get an absolute netloc.
def __init__(self, url):
super().__init__()
self.url = url
Second, as this function can be used to process arbitrary user input, we
clean it a bit:
- strip whitespaces from the URL
- if "absolute" is True, consider that the URL is meant to be absolute, even
though it technically is not, e.g. "dece.space" is not absolute as it
misses either the // delimiter.
def parse_url(
url: str,
absolute: bool =False,
default_scheme: Optional[str] =None
) -> Dict[str, Any]:
"""Return URL parts from this URL.
Use the RFC regex to get parts from URL. This function can be used on
regular URLs but also on not-so-compliant URLs, e.g. "capsule.org/page",
which might be typed by an user (see `absolute` argument).
Arguments:
- url: URL to parse.
- absolute: assume the URL is absolute, e.g. in the case we are trying to
parse an URL an user has written, which is most of the time an absolute
URL even if not perfectly so. This only has an effect if, after the
initial parsing, there is no scheme or netloc available.
- default_scheme: specify the scheme to use if the URL either does not
specify it and we need it (e.g. there is a location), or `absolute` is
true; if absolute is true but `default_scheme` is not specified, use the
gemini scheme.
Returns:
URL parts, as a dictionary with the following keys: "scheme", "netloc",
"path", "query" and "fragment". All keys are present, but all values can be
None, except path which is always a string (but can be empty).
Raises:
InvalidUrlException if you put really really stupid strings in there.
"""
url = url.strip()
if url.startswith("file://"):
return urllib.parse.urlparse(url)
if url.startswith("gemini://"):
url = url[7:]
parts = urllib.parse.urlparse(url, scheme="gemini")
if not parts.netloc or absolute:
parts = urllib.parse.urlparse(f"//{url}", scheme="gemini")
return parts
match = URI_RE.match(url)
if not match:
raise InvalidUrlException(url)
match_dict = match.groupdict()
parts = {
k: match_dict.get(k)
for k in ("scheme", "netloc", "path", "query", "fragment")
}
def sanitize_url(url: str):
"""Parse and unparse an URL to ensure it has been properly formatted."""
return urllib.parse.urlunparse(parse_url(url))
# Smol hack: if we assume it's an absolute URL, just prefix scheme and "//".
if absolute and not parts["scheme"] and not parts["netloc"]:
scheme = default_scheme or "gemini"
return parse_url(scheme + "://" + url)
# Another smol hack: if there is no scheme, use `default_scheme` as default.
if default_scheme and parts["scheme"] is None:
parts["scheme"] = default_scheme
def join_url(base_url: str, url: str):
"""Join a base URL with a relative url."""
if base_url.startswith("gemini://"):
base_url = base_url[7:]
parts = parse_url(urllib.parse.urljoin(base_url, url))
return urllib.parse.urlunparse(parts)
return parts
def set_parameter(url: str, user_input: str):
def unparse_url(parts) -> str:
"""Unparse parts of an URL produced by `parse_url`."""
url = ""
if parts["scheme"] is not None:
url += parts["scheme"] + ":"
if parts["netloc"] is not None:
url += "//" + parts["netloc"]
if parts["path"] is not None:
url += parts["path"]
if parts["query"] is not None:
url += "?" + parts["query"]
if parts["fragment"] is not None:
url += "#" + parts["fragment"]
return url
def clear_post_path(parts) -> None:
"""Clear optional post-path parts (query and fragment)."""
parts["query"] = None
parts["fragment"] = None
def join_url(base_url: str, rel_url: str) -> str:
"""Join a base URL with a relative path."""
parts = parse_url(base_url)
rel_parts = parse_url(rel_url)
if rel_url.startswith("/"):
new_path = rel_parts["path"]
else:
base_path = parts["path"] or ""
new_path = remove_last_segment(base_path) + "/" + rel_parts["path"]
parts["path"] = remove_dot_segments(new_path)
parts["query"] = rel_parts["query"]
parts["fragment"] = rel_parts["fragment"]
return unparse_url(parts)
def remove_dot_segments(path: str):
"""Remove dot segments in an URL path."""
output = ""
while path:
if path.startswith("../"):
path = path[3:]
elif path.startswith("./") or path.startswith("/./"):
path = path[2:] # Either strip "./" or leave a single "/".
elif path == "/.":
path = "/"
elif path.startswith("/../"):
path = "/" + path[4:]
output = remove_last_segment(output)
elif path == "/..":
path = "/"
output = remove_last_segment(output)
elif path in (".", ".."):
path = ""
else:
first_segment, path = pop_first_segment(path)
output += first_segment
return output
def remove_last_segment(path: str):
"""Remove last path segment, including preceding "/" if any."""
return path[:path.rfind("/")]
def pop_first_segment(path: str):
"""Return first segment and the rest.
Return the first segment including the initial "/" if any, and the rest of
the path up to, but not including, the next "/" or the end of the string.
"""
next_slash = path[1:].find("/")
if next_slash == -1:
return path, ""
next_slash += 1
return path[:next_slash], path[next_slash:]
def set_parameter(url: str, user_input: str) -> str:
"""Return a new URL with the escaped user input appended."""
quoted_input = urllib.parse.quote(user_input)
if "?" in url:
url = url.split("?", maxsplit=1)[0]
return url + "?" + quoted_input
parts = parse_url(url)
parts["query"] = quote(user_input)
return unparse_url(parts)
def get_parent_url(url: str) -> str:
"""Return the parent URL (one level up)."""
scheme, netloc, path, _, _, _ = parse_url(url)
def get_parent_path(path: str) -> str:
"""Return the parent path."""
last_slash = path.rstrip("/").rfind("/")
if last_slash > -1:
path = path[:last_slash + 1]
return urllib.parse.urlunparse((scheme, netloc, path, "", "", ""))
return path
def get_parent_url(url: str) -> str:
"""Return the parent URL (one level up)."""
parts = parse_url(url)
parts["path"] = get_parent_path(parts["path"]) # type: ignore
clear_post_path(parts)
return unparse_url(parts)
def get_root_url(url: str) -> str:
"""Return the root URL (basically discards path)."""
scheme, netloc, _, _, _, _ = parse_url(url)
return urllib.parse.urlunparse((scheme, netloc, "/", "", "", ""))
parts = parse_url(url)
parts["path"] = "/"
clear_post_path(parts)
return unparse_url(parts)

@ -15,7 +15,7 @@ def render_lines(metalines, window, max_width):
Arguments:
- metalines: list of metalines to render, must have at least one element.
- window: window that will be resized as filled with rendered lines.
- window: window that will be resized and filled with rendered lines.
- max_width: line length limit for the pad.
Returns:

@ -1,32 +1,54 @@
import unittest
from ..navigation import join_url, parse_url, set_parameter
from ..navigation import (
get_parent_url, get_root_url, join_url, parse_url, pop_first_segment, remove_dot_segments,
remove_last_segment, set_parameter,
)
class TestNavigation(unittest.TestCase):
def test_parse_url(self):
res = parse_url("gemini://dece.space/parse-me.gmi")
self.assertEqual(res.scheme, "gemini")
self.assertEqual(res.netloc, "dece.space")
self.assertEqual(res.path, "/parse-me.gmi")
# Basic complete URL.
res = parse_url("gemini://netloc/parse-me.gmi")
self.assertEqual(res["scheme"], "gemini")
self.assertEqual(res["netloc"], "netloc")
self.assertEqual(res["path"], "/parse-me.gmi")
res_netloc = parse_url("//dece.space/parse-me.gmi")
self.assertEqual(res, res_netloc)
# No scheme.
res_netloc = parse_url("//netloc/parse-me.gmi")
self.assertIsNone(res_netloc["scheme"], None)
for key in res_netloc:
if key == "scheme":
continue
self.assertEqual(res_netloc[key], res[key])
# No scheme but a default is provided.
res_netloc = parse_url("//netloc/parse-me.gmi", default_scheme="gemini")
self.assertDictEqual(res_netloc, res)
# No scheme nor netloc: only a path should be produced.
res = parse_url("dece.space/parse-me.gmi")
self.assertIsNone(res["scheme"])
self.assertIsNone(res["netloc"])
self.assertEqual(res["path"], "dece.space/parse-me.gmi")
# No scheme nor netloc but we should pretend having an absolute URL.
res = parse_url("dece.space/parse-me.gmi", absolute=True)
self.assertEqual(res.scheme, "gemini")
self.assertEqual(res.netloc, "dece.space")
self.assertEqual(res.path, "/parse-me.gmi")
self.assertEqual(res["scheme"], "gemini")
self.assertEqual(res["netloc"], "dece.space")
self.assertEqual(res["path"], "/parse-me.gmi")
# HTTPS scheme.
res = parse_url("https://dece.space/index.html")
self.assertEqual(res.scheme, "https")
self.assertEqual(res.netloc, "dece.space")
self.assertEqual(res.path, "/index.html")
self.assertEqual(res["scheme"], "https")
self.assertEqual(res["netloc"], "dece.space")
self.assertEqual(res["path"], "/index.html")
# File scheme.
res = parse_url("file:///home/dece/gemini/index.gmi")
self.assertEqual(res.scheme, "file")
self.assertEqual(res.path, "/home/dece/gemini/index.gmi")
self.assertEqual(res["scheme"], "file")
self.assertEqual(res["path"], "/home/dece/gemini/index.gmi")
def test_join_url(self):
url = join_url("gemini://dece.space/", "some-file.gmi")
@ -39,9 +61,84 @@ class TestNavigation(unittest.TestCase):
self.assertEqual(url, "gemini://dece.space/dir1/other-file.gmi")
url = join_url("gemini://dece.space/dir1/file.gmi", "../top-level.gmi")
self.assertEqual(url, "gemini://dece.space/top-level.gmi")
url = join_url("s://hard/dir/a", "./../test/b/c/../d/e/f/../.././a.gmi")
self.assertEqual(url, "s://hard/test/b/d/a.gmi")
def test_remove_dot_segments(self):
paths = [
("index.gmi", "index.gmi"),
("/index.gmi", "/index.gmi"),
("./index.gmi", "index.gmi"),
("/./index.gmi", "/index.gmi"),
("/../index.gmi", "/index.gmi"),
("/a/b/c/./../../g", "/a/g"),
("mid/content=5/../6", "mid/6"),
("../../../../g", "g"),
]
for path, expected in paths:
self.assertEqual(
remove_dot_segments(path),
expected,
msg=f"path was " + path
)
def test_remove_last_segment(self):
self.assertEqual(remove_last_segment(""), "")
self.assertEqual(remove_last_segment("/"), "")
self.assertEqual(remove_last_segment("/a"), "")
self.assertEqual(remove_last_segment("/a/"), "/a")
self.assertEqual(remove_last_segment("/a/b"), "/a")
self.assertEqual(remove_last_segment("/a/b/c/d"), "/a/b/c")
self.assertEqual(remove_last_segment("///"), "//")
def test_pop_first_segment(self):
self.assertEqual(pop_first_segment(""), ("", ""))
self.assertEqual(pop_first_segment("a"), ("a", ""))
self.assertEqual(pop_first_segment("/a"), ("/a", ""))
self.assertEqual(pop_first_segment("/a/"), ("/a", "/"))
self.assertEqual(pop_first_segment("/a/b"), ("/a", "/b"))
self.assertEqual(pop_first_segment("a/b"), ("a", "/b"))
def test_set_parameter(self):
url = set_parameter("gemini://gus.guru/search", "my search")
self.assertEqual(url, "gemini://gus.guru/search?my%20search")
url = set_parameter("gemini://gus.guru/search?old%20search", "new")
self.assertEqual(url, "gemini://gus.guru/search?new")
def test_get_parent_url(self):
urls_and_parents = [
("gemini://host", "gemini://host"),
("gemini://host/", "gemini://host/"),
("gemini://host/a", "gemini://host/"),
("gemini://host/a/", "gemini://host/"),
("gemini://host/a/index.gmi", "gemini://host/a/"),
("gemini://host/a/b/", "gemini://host/a/"),
("gemini://host/a/b/file.flac", "gemini://host/a/b/"),
("//host/a/b", "//host/a/"),
("hey", "hey"), # does not really make sense but whatever
("hey/ho", "hey/"),
("hey/ho/letsgo", "hey/ho/"),
]
for url, parent in urls_and_parents:
self.assertEqual(
get_parent_url(url),
parent,
msg=f"URL was " + url)
def test_get_root_url(self):
urls_and_roots = [
("gemini://host", "gemini://host/"),
("gemini://host/", "gemini://host/"),
("gemini://host/a", "gemini://host/"),
("gemini://host/a/b/c", "gemini://host/"),
("//host/path", "//host/"),
("//host/path?query", "//host/"),
("dumb", "/"),
("dumb/dumber", "/"),
]
for url, root in urls_and_roots:
self.assertEqual(
get_root_url(url),
root,
msg=f"URL was " + url
)

Loading…
Cancel
Save