Compare commits

..

No commits in common. "5aa03da3e8a1d5626ad04acd9f5df190d75f5ac3" and "6b0a6220170f78813ad79960c1977cdb17c6cdf2" have entirely different histories.

11 changed files with 163 additions and 533 deletions

View file

@ -4,7 +4,7 @@ TODO DONE
links links
redirections redirections
web links web links
history (back) history (back/forward)
simple caching simple caching
simple text files simple text files
encodings encodings
@ -14,26 +14,20 @@ TODO DONE
configuration configuration
help page help page
TOFU TOFU
view history
open last download open last download
media files
home page home page
media files
view history
identity management identity management
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
BACKLOG BACKLOG
click on links to open them click on links to open them
download to disk, not in memory download to disk, not in memory
download in the background
download view instead of last download
does encoding really work? cf. egsam does encoding really work? cf. egsam
margins / centering margins / centering
pre blocks folding pre blocks folding
buffers (tabs) buffers (tabs)
a11y? tts?
handle soft-hyphens on wrapping handle soft-hyphens on wrapping
bug: combining chars reduce lengths bug: combining chars reduce lengths
non shit command-line non shit command-line
response code 11 (if still there) response code 11 (if still there)
gopher?
save history
history (forward) (useful?)

View file

@ -57,7 +57,7 @@ It also provide these features:
- History - History
- Caching - Caching
- Bookmarks (it's just a text file with bindings) - Bookmarks: it's just a text file with bindings.
- Downloads - Downloads
Check out [this board](BOARD.txt) for what's done and coming next. Check out [this board](BOARD.txt) for what's done and coming next.
@ -74,14 +74,11 @@ you want, just restart Bebop to take changes into account.
Here are the available options: Here are the available options:
| Key | Type | Default | Description | | Key | Type | Default | Description |
|----------------------------|--------------|----------------|---------------------------------------| |-------------------|-------------|----------|---------------------------------------|
| `connect_timeout` | int | 10 | Seconds before connection times out. | | `connect_timeout` | int | 10 | Seconds before connection times out. |
| `text_width` | int | 80 | Rendered line length. | | `text_width` | int | 80 | Rendered line length. |
| `source_editor` | string list | `["vi"]` | Command to use for editing sources. | | `source_editor` | string list | `["vi"]` | Command to use for editing sources. |
| `command_editor` | string list | `["vi"]` | Command to use for editing CLI input. | | `command_editor` | string list | `["vi"]` | Command to use for editing CLI input. |
| `history_limit` | int | 1000 | Maximum entries in history. |
| `external_commands` | (see note 2) | {} | Commands to open various files. |
| `external_command_default` | string list | `["xdg-open"]` | Default command to open files. |
Note: for the "command" parameters such as `source_editor` and `command_editor`, Note: for the "command" parameters such as `source_editor` and `command_editor`,
a string list is used to separate the different program arguments, e.g. if you a string list is used to separate the different program arguments, e.g. if you
@ -89,12 +86,6 @@ wish to use `vim -c 'startinsert'`, you should write the list `["vim", "-c",
"startinsert"]`. In both case, a temporary or regular file name will be appended "startinsert"]`. In both case, a temporary or regular file name will be appended
to this command when run. to this command when run.
2: the `external_commands` dict maps MIME types to commands just as above. For
example, if you want to open video files with VLC and audio files in Clementine,
you can use the following dict: `{"audio": ["clementine"], "video", ["vlc"]}`.
For now only "main" MIME types are supported, i.e. you cannot specify precise
types like "audio/flac", just "audio".
FAQ FAQ

View file

@ -4,11 +4,8 @@ import curses
import curses.ascii import curses.ascii
import curses.textpad import curses.textpad
import os import os
import subprocess
import tempfile import tempfile
from math import inf from math import inf
from pathlib import Path
from typing import Optional, Tuple
from bebop.bookmarks import ( from bebop.bookmarks import (
get_bookmarks_path, get_bookmarks_document, save_bookmark get_bookmarks_path, get_bookmarks_document, save_bookmark
@ -19,11 +16,9 @@ from bebop.external import open_external_program
from bebop.help import HELP_PAGE from bebop.help import HELP_PAGE
from bebop.history import History from bebop.history import History
from bebop.links import Links from bebop.links import Links
from bebop.mime import MimeType
from bebop.mouse import ButtonState from bebop.mouse import ButtonState
from bebop.navigation import ( from bebop.navigation import (
get_parent_url, get_root_url, join_url, parse_url, unparse_url get_parent_url, get_root_url, join_url, parse_url, sanitize_url)
)
from bebop.page import Page from bebop.page import Page
from bebop.page_pad import PagePad from bebop.page_pad import PagePad
@ -48,7 +43,6 @@ class Browser:
values are dicts as well: the "open" key maps to a callable to use when values are dicts as well: the "open" key maps to a callable to use when
the page is accessed, and the optional "source" key maps to callable the page is accessed, and the optional "source" key maps to callable
returning the page source path. returning the page source path.
- last_download: tuple of MimeType and path, or None.
""" """
def __init__(self, config, cert_stash): def __init__(self, config, cert_stash):
@ -61,10 +55,9 @@ class Browser:
self.command_line = None self.command_line = None
self.running = True self.running = True
self.status_data = ("", 0, 0) self.status_data = ("", 0, 0)
self.history = History(self.config["history_limit"]) self.history = History()
self.cache = {} self.cache = {}
self.special_pages = self.setup_special_pages() self.special_pages = self.setup_special_pages()
self.last_download: Optional[Tuple[MimeType, Path]] = None
self._current_url = "" self._current_url = ""
@property @property
@ -96,9 +89,6 @@ class Browser:
"help": { "help": {
"open": self.open_help, "open": self.open_help,
}, },
"history": {
"open": self.open_history,
},
} }
def run(self, *args, **kwargs): def run(self, *args, **kwargs):
@ -174,8 +164,6 @@ class Browser:
self.scroll_page_vertically(inf) self.scroll_page_vertically(inf)
elif char == ord("o"): elif char == ord("o"):
self.quick_command("open") self.quick_command("open")
elif char == ord("O"):
self.open_last_download()
elif char == ord("p"): elif char == ord("p"):
self.go_back() self.go_back()
elif char == ord("u"): elif char == ord("u"):
@ -188,8 +176,6 @@ class Browser:
self.add_bookmark() self.add_bookmark()
elif char == ord("e"): elif char == ord("e"):
self.edit_page() self.edit_page()
elif char == ord("y"):
self.open_history()
elif curses.ascii.isdigit(char): elif curses.ascii.isdigit(char):
self.handle_digit_input(char) self.handle_digit_input(char)
elif char == curses.KEY_MOUSE: elif char == curses.KEY_MOUSE:
@ -211,15 +197,10 @@ class Browser:
elif char == ord("l"): elif char == ord("l"):
self.scroll_page_horizontally(1) self.scroll_page_horizontally(1)
self.screen.nodelay(False) self.screen.nodelay(False)
# elif char == ord("@"): # else:
# self.current_url = "bebop:debugzone" # unctrled = curses.unctrl(char)
# t = "\n".join("* " + u for u in self.history.urls) # if unctrled == b"^T":
# t += "\n\n" + "\n".join("* " + u for u in self.history.backlist) # self.set_status("test!")
# self.load_page(Page.from_text(t))
# # unctrled = curses.unctrl(char)
# # if unctrled == b"^T":
# # self.set_status("test!")
# pass
@property @property
def page_pad_size(self): def page_pad_size(self):
@ -319,52 +300,38 @@ class Browser:
return return
if assume_absolute or not self.current_url: if assume_absolute or not self.current_url:
parts = parse_url(url, absolute=True, default_scheme="gemini") parts = parse_url(url, absolute=True)
join = False
else: else:
parts = parse_url(url) parts = parse_url(url)
join = True
if parts["scheme"] is None and parts["netloc"] is None: if parts.scheme == "gemini":
base_url = base_url or self.current_url
if base_url:
parts = parse_url(join_url(base_url, url))
else:
self.set_status_error(f"Can't open '{url}'.")
return
# Replace URL passed as parameter by a proper absolute one.
url = unparse_url(parts)
scheme = parts["scheme"] or ""
if scheme == "gemini":
from bebop.browser.gemini import open_gemini_url from bebop.browser.gemini import open_gemini_url
success = open_gemini_url( # If there is no netloc, this is a relative URL.
if join or base_url:
url = join_url(base_url or self.current_url, url)
open_gemini_url(
self, self,
url, sanitize_url(url),
redirects=redirects, redirects=redirects,
history=history,
use_cache=use_cache use_cache=use_cache
) )
if history and success: elif parts.scheme.startswith("http"):
self.history.push(url)
elif scheme.startswith("http"):
from bebop.browser.web import open_web_url from bebop.browser.web import open_web_url
open_web_url(self, url) open_web_url(self, url)
elif parts.scheme == "file":
elif scheme == "file":
from bebop.browser.file import open_file from bebop.browser.file import open_file
file_url = open_file(self, parts["path"]) open_file(self, parts.path, history=history)
if history and file_url: elif parts.scheme == "bebop":
self.history.push(file_url) special_page = self.special_pages.get(parts.netloc)
elif scheme == "bebop":
special_page = self.special_pages.get(parts["path"])
if special_page: if special_page:
special_page["open"]() special_page["open"]()
else: else:
self.set_status_error("Unknown page.") self.set_status_error("Unknown page.")
else: else:
self.set_status_error(f"Protocol '{scheme}' not supported.") self.set_status_error(f"Protocol {parts.scheme} not supported.")
def load_page(self, page: Page): def load_page(self, page: Page):
"""Load Gemtext data as the current page.""" """Load Gemtext data as the current page."""
@ -488,12 +455,8 @@ class Browser:
def go_back(self): def go_back(self):
"""Go back in history if possible.""" """Go back in history if possible."""
if self.current_url.startswith("bebop:"): if self.history.has_links():
previous_url = self.history.get_previous(actual_previous=True) self.open_url(self.history.pop(), history=False)
else:
previous_url = self.history.get_previous()
if previous_url:
self.open_url(previous_url, history=False)
def go_to_parent_page(self): def go_to_parent_page(self):
"""Go to the parent URL if possible.""" """Go to the parent URL if possible."""
@ -505,19 +468,14 @@ class Browser:
if self.current_url: if self.current_url:
self.open_url(get_root_url(self.current_url)) self.open_url(get_root_url(self.current_url))
def open_internal_page(self, name, gemtext):
"""Open some content corresponding to a "bebop:" internal URL."""
page = Page.from_gemtext(gemtext, self.config["text_width"])
self.load_page(page)
self.current_url = "bebop:" + name
def open_bookmarks(self): def open_bookmarks(self):
"""Open bookmarks.""" """Open bookmarks."""
content = get_bookmarks_document() content = get_bookmarks_document()
if content is None: if content is None:
self.set_status_error("Failed to open bookmarks.") self.set_status_error("Failed to open bookmarks.")
return return
self.open_internal_page("bookmarks", content) self.load_page(Page.from_gemtext(content, self.config["text_width"]))
self.current_url = "bebop://bookmarks"
def add_bookmark(self): def add_bookmark(self):
"""Add the current URL as bookmark.""" """Add the current URL as bookmark."""
@ -544,9 +502,8 @@ class Browser:
directly from their location on disk. directly from their location on disk.
""" """
delete_source_after = False delete_source_after = False
parts = parse_url(self.current_url) if self.current_url.startswith("bebop://"):
if parts["scheme"] == "bebop": page_name = self.current_url[len("bebop://"):]
page_name = parts["path"]
special_pages_functions = self.special_pages.get(page_name) special_pages_functions = self.special_pages.get(page_name)
if not special_pages_functions: if not special_pages_functions:
return return
@ -572,33 +529,10 @@ class Browser:
def open_help(self): def open_help(self):
"""Show the help page.""" """Show the help page."""
self.open_internal_page("help", HELP_PAGE) self.load_page(Page.from_gemtext(HELP_PAGE, self.config["text_width"]))
self.current_url = "bebop://help"
def prompt(self, text, keys): def prompt(self, text, keys):
"""Display the text and allow it to type one of the given keys.""" """Display the text and allow it to type one of the given keys."""
self.set_status(text) self.set_status(text)
return self.command_line.prompt_key(keys) return self.command_line.prompt_key(keys)
def open_history(self):
"""Show a generated history of visited pages."""
self.open_internal_page("history", self.history.to_gemtext())
def open_last_download(self):
"""Open the last downloaded file."""
if not self.last_download:
return
mime_type, path = self.last_download
command = self.config["external_commands"].get(mime_type.main_type)
if not command:
command = self.config["external_command_default"]
command = command + [str(path)]
self.set_status(f"Running '{' '.join(command)}'...")
try:
subprocess.Popen(
command,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
start_new_session=True
)
except FileNotFoundError as exc:
self.set_status_error(f"Failed to run command: {exc}")

View file

@ -4,29 +4,22 @@ from bebop.browser.browser import Browser
from bebop.page import Page from bebop.page import Page
def open_file(browser: Browser, filepath: str, encoding="utf-8"): def open_file(browser: Browser, filepath: str, encoding="utf-8", history=True):
"""Open a file and render it. """Open a file and render it.
This should be used only on Gemtext files or at least text files. This should be used only on Gemtext files or at least text files.
Anything else will produce garbage and may crash the program. In the Anything else will produce garbage and may crash the program. In the
future this should be able to use a different parser according to a MIME future this should be able to use a different parser according to a MIME
type or something. type or something.
Arguments:
- browser: Browser object making the request.
- filepath: a text file path on disk.
- encoding: file's encoding.
Returns:
The loaded file URI on success, None otherwise (e.g. file not found).
""" """
try: try:
with open(filepath, "rt", encoding=encoding) as f: with open(filepath, "rt", encoding=encoding) as f:
text = f.read() text = f.read()
except (OSError, ValueError) as exc: except (OSError, ValueError) as exc:
browser.set_status_error(f"Failed to open file: {exc}") browser.set_status_error(f"Failed to open file: {exc}")
return None return
browser.load_page(Page.from_text(text)) browser.load_page(Page.from_text(text))
file_url = "file://" + filepath file_url = "file://" + filepath
if history:
browser.history.push(file_url)
browser.current_url = file_url browser.current_url = file_url
return file_url

View file

@ -14,7 +14,8 @@ from bebop.tofu import trust_fingerprint, untrust_fingerprint, WRONG_FP_ALERT
MAX_URL_LEN = 1024 MAX_URL_LEN = 1024
def open_gemini_url(browser: Browser, url, redirects=0, use_cache=True): def open_gemini_url(browser: Browser, url, redirects=0, history=True,
use_cache=True):
"""Open a Gemini URL and set the formatted response as content. """Open a Gemini URL and set the formatted response as content.
While the specification is not set in stone, every client takes a slightly While the specification is not set in stone, every client takes a slightly
@ -32,14 +33,12 @@ def open_gemini_url(browser: Browser, url, redirects=0, use_cache=True):
as we're doing TOFU here, we could automatically trust it or let the user as we're doing TOFU here, we could automatically trust it or let the user
choose. For simplicity, we always trust it permanently. choose. For simplicity, we always trust it permanently.
Arguments: Attributes:
- browser: Browser object making the request. - browser: Browser object making the request.
- url: a valid URL with Gemini scheme to open. - url: a valid URL with Gemini scheme to open.
- redirects: current amount of redirections done to open the initial URL. - redirects: current amount of redirections done to open the initial URL.
- history: if true, save the final URL to history.
- use_cache: if true, look up if the page is cached before requesting it. - use_cache: if true, look up if the page is cached before requesting it.
Returns:
True on success, False otherwise.
""" """
if len(url) >= MAX_URL_LEN: if len(url) >= MAX_URL_LEN:
browser.set_status_error("Request URL too long.") browser.set_status_error("Request URL too long.")
@ -49,9 +48,11 @@ def open_gemini_url(browser: Browser, url, redirects=0, use_cache=True):
if use_cache and url in browser.cache: if use_cache and url in browser.cache:
browser.load_page(browser.cache[url]) browser.load_page(browser.cache[url])
if browser.current_url and history:
browser.history.push(browser.current_url)
browser.current_url = url browser.current_url = url
browser.set_status(url) browser.set_status(url)
return True return
req = Request(url, browser.stash) req = Request(url, browser.stash)
connect_timeout = browser.config["connect_timeout"] connect_timeout = browser.config["connect_timeout"]
@ -68,7 +69,7 @@ def open_gemini_url(browser: Browser, url, redirects=0, use_cache=True):
else: else:
error = f"Connection failed ({url})." error = f"Connection failed ({url})."
browser.set_status_error(error) browser.set_status_error(error)
return False return
if req.state == Request.STATE_INVALID_CERT: if req.state == Request.STATE_INVALID_CERT:
pass pass
@ -87,13 +88,13 @@ def open_gemini_url(browser: Browser, url, redirects=0, use_cache=True):
data = req.proceed() data = req.proceed()
if not data: if not data:
browser.set_status_error(f"Server did not respond in time ({url}).") browser.set_status_error(f"Server did not respond in time ({url}).")
return False return
response = Response.parse(data) response = Response.parse(data)
if not response: if not response:
browser.set_status_error(f"Server response parsing failed ({url}).") browser.set_status_error(f"Server response parsing failed ({url}).")
return False return
return _handle_response(browser, response, url, redirects) _handle_response(browser, response, url, redirects, history)
def _handle_untrusted_cert(browser: Browser, request: Request): def _handle_untrusted_cert(browser: Browser, request: Request):
@ -117,14 +118,10 @@ def _handle_untrusted_cert(browser: Browser, request: Request):
def _handle_response(browser: Browser, response: Response, url: str, def _handle_response(browser: Browser, response: Response, url: str,
redirects: int): redirects: int, history: bool):
"""Handle a response from a Gemini server. """Handle a response from a Gemini server."""
Returns:
True on success, False otherwise.
"""
if response.code == 20: if response.code == 20:
return _handle_successful_response(browser, response, url) _handle_successful_response(browser, response, url, history)
elif response.generic_code == 30 and response.meta: elif response.generic_code == 30 and response.meta:
browser.open_url(response.meta, base_url=url, redirects=redirects + 1) browser.open_url(response.meta, base_url=url, redirects=redirects + 1)
elif response.generic_code in (40, 50): elif response.generic_code in (40, 50):
@ -135,10 +132,10 @@ def _handle_response(browser: Browser, response: Response, url: str,
else: else:
error = f"Unhandled response code {response.code}" error = f"Unhandled response code {response.code}"
browser.set_status_error(error) browser.set_status_error(error)
return False
def _handle_successful_response(browser: Browser, response: Response, url: str): def _handle_successful_response(browser: Browser, response: Response, url: str,
history: bool):
"""Handle a successful response content from a Gemini server. """Handle a successful response content from a Gemini server.
According to the MIME type received or inferred, the response is either According to the MIME type received or inferred, the response is either
@ -153,11 +150,8 @@ def _handle_successful_response(browser: Browser, response: Response, url: str):
- browser: Browser instance that made the initial request. - browser: Browser instance that made the initial request.
- url: original URL. - url: original URL.
- response: a successful Response. - response: a successful Response.
- history: whether to modify history on a page load.
Returns:
True on success, False otherwise.
""" """
# Use appropriate response parser according to the MIME type.
mime_type = response.get_mime_type() mime_type = response.get_mime_type()
page = None page = None
error = None error = None
@ -177,14 +171,13 @@ def _handle_successful_response(browser: Browser, response: Response, url: str):
else: else:
filepath = _get_download_path(url) filepath = _get_download_path(url)
# If a page has been produced, load it. Else if a file has been retrieved,
# download it.
if page: if page:
browser.load_page(page) browser.load_page(page)
if browser.current_url and history:
browser.history.push(browser.current_url)
browser.current_url = url browser.current_url = url
browser.cache[url] = page browser.cache[url] = page
browser.set_status(url) browser.set_status(url)
return True
elif filepath: elif filepath:
try: try:
with open(filepath, "wb") as download_file: with open(filepath, "wb") as download_file:
@ -193,11 +186,8 @@ def _handle_successful_response(browser: Browser, response: Response, url: str):
browser.set_status_error(f"Failed to save {url} ({exc})") browser.set_status_error(f"Failed to save {url} ({exc})")
else: else:
browser.set_status(f"Downloaded {url} ({mime_type.short}).") browser.set_status(f"Downloaded {url} ({mime_type.short}).")
browser.last_download = mime_type, filepath
return True
elif error: elif error:
browser.set_status_error(error) browser.set_status_error(error)
return False
def _get_download_path(url: str) -> Path: def _get_download_path(url: str) -> Path:

View file

@ -9,9 +9,6 @@ DEFAULT_CONFIG = {
"text_width": 80, "text_width": 80,
"source_editor": ["vi"], "source_editor": ["vi"],
"command_editor": ["vi"], "command_editor": ["vi"],
"history_limit": 1000,
"external_commands": {},
"external_command_default": ["xdg-open"]
} }

View file

@ -1,49 +1,38 @@
"""Help page. Currently only keybinds are shown as help.""" """Help page. Currently only keybinds are shown as help."""
HELP_PAGE = """\ HELP_PAGE = """\
# Help # Bebop keybinds
## Keybinds
Keybinds using the SHIFT key are written uppercase. Keybinds using the ALT (or \ Keybinds using the SHIFT key are written uppercase. Keybinds using the ALT (or \
META) key are written using the "M-" prefix. Symbol keys are written as their \ META) key are written using the "M-" prefix. Symbol keys are written as their \
name, not the symbol itself. name, not the symbol itself.
* colon: focus the command-line ``` list of bebop keybinds
* r: reload page - colon: focus the command-line
* h: scroll left a bit - r: reload page
* j: scroll down a bit - h: scroll left a bit
* k: scroll up a bit - j: scroll down a bit
* l: scroll right a bit - k: scroll up a bit
* H: scroll left a whole page - l: scroll right a bit
* J: scroll down a whole page - H: scroll left a whole page
* K: scroll up a whole page - J: scroll down a whole page
* L: scroll right a whole page - K: scroll up a whole page
* M-h: scroll one column left - L: scroll right a whole page
* M-j: scroll one line down - M-h: scroll one column left
* M-k: scroll one line up - M-j: scroll one line down
* M-l: scroll one column right - M-k: scroll one line up
* circumflex: horizontally scroll back to the first column - M-l: scroll one column right
* gg: go to the top of the page - circumflex: horizontally scroll back to the first column
* G: go to the bottom of the page - gg: go to the top of the page
* o: open an URL - G: go to the bottom of the page
* O: open last download with an external command - o: open an URL
* p: go to the previous page - p: go to the previous page
* u: go to the parent page (up a level in URL) - u: go to the parent page (up a level in URL)
* U: go to the root page (root URL for the current domain) - U: go to the root page (root URL for the current domain)
* b: open bookmarks - b: open bookmarks
* B: add current page to bookmarks - B: add current page to bookmarks
* e: open the current page source in an editor - e: open the current page source in an editor
* y: open history - digits: go to the corresponding link ID
* digits: go to the corresponding link ID - escape: reset status line text
* escape: reset status line text ```
## Commands
Commands are mostly for actions requiring user input. You can type a command \
with arguments by pressing the corresponding keybind above.
* o/open <url>: open this URL
* forget_certificate <hostname>: remove saved fingerprint for the hostname
* q/quit: well, quit
""" """

View file

@ -2,56 +2,20 @@
class History: class History:
"""Basic browsing history manager. """Basic browsing history manager."""
""" def __init__(self):
def __init__(self, limit):
self.urls = [] self.urls = []
self.backlist = []
self.limit = limit def has_links(self):
"""Return True if there is at least one URL in the history."""
return bool(self.urls)
def push(self, url): def push(self, url):
"""Add an URL to the history.""" """Add an URL to the history."""
# Append url to our URLs, bubbling it up if it's already there. if not self.urls or self.urls[-1] != url:
try:
self.urls.remove(url)
except ValueError:
pass
self.urls.append(url) self.urls.append(url)
if len(self.urls) > self.limit:
self.urls.pop(0)
# Also simply push it to the backlist. def pop(self):
self.backlist.append(url) """Return latest URL added to history and remove it."""
if len(self.backlist) > self.limit: return self.urls.pop()
self.backlist.pop(0)
def get_previous(self, actual_previous=False):
"""Return previous URL, or None if there is only one or zero URL."""
try:
if actual_previous:
return self.backlist[-1]
# The backlist should be populated with the first link visited and
# never completely emptied afterwards, or we end up in situation
# where you can't get away from internal pages.
if len(self.backlist) > 1:
self.backlist.pop()
return self.backlist[-1]
except IndexError:
return None
def to_gemtext(self):
"""Generate a simple Gemtext page of the current history.
Present a page that follows the "by last visited" behaviour of Firefox
for the lack of a better idea, avoiding duplicated entries.
"""
urls = []
seen = set()
for url in reversed(self.urls):
if url in seen:
continue
urls.append(url)
seen.add(url)
return "# History\n\n" + "\n".join("=> " + url for url in urls)

View file

@ -1,191 +1,66 @@
"""URI (RFC 3986) helpers for Gemini navigation. """URI (RFC 3986) helpers for Gemini navigation."""
It was supposed to be just thin fixes around urllib.parse functions but as import urllib.parse
gemini is not recognized as a valid scheme it breaks a lot of things, so it
turned into a basic re-implementation of the RFC.
"""
import re
from typing import Any, Dict, Optional
from urllib.parse import quote
URI_RE = re.compile( def parse_url(url: str, absolute: bool =False):
"^"
r"(?:(?P<scheme>[^:/?#\n]+):)?"
r"(?://(?P<netloc>[^/?#\n]*))?"
r"(?P<path>[^?#\n]*)"
r"(?:\?(?P<query>[^#\n]*))?"
r"(?:#(?P<fragment>.*))?"
"$"
)
class InvalidUrlException(Exception):
"""Generic exception for invalid URLs used in this module."""
def __init__(self, url):
super().__init__()
self.url = url
def parse_url(
url: str,
absolute: bool =False,
default_scheme: Optional[str] =None
) -> Dict[str, Any]:
"""Return URL parts from this URL. """Return URL parts from this URL.
Use the RFC regex to get parts from URL. This function can be used on This uses urllib.parse.urlparse to not reinvent the wheel, with a few
regular URLs but also on not-so-compliant URLs, e.g. "capsule.org/page", adjustments.
which might be typed by an user (see `absolute` argument).
Arguments: First, urllib does not know the Gemini scheme (yet!) so if it
- url: URL to parse. is specified we strip it to get an absolute netloc.
- absolute: assume the URL is absolute, e.g. in the case we are trying to
parse an URL an user has written, which is most of the time an absolute
URL even if not perfectly so. This only has an effect if, after the
initial parsing, there is no scheme or netloc available.
- default_scheme: specify the scheme to use if the URL either does not
specify it and we need it (e.g. there is a location), or `absolute` is
true; if absolute is true but `default_scheme` is not specified, use the
gemini scheme.
Returns: Second, as this function can be used to process arbitrary user input, we
URL parts, as a dictionary with the following keys: "scheme", "netloc", clean it a bit:
"path", "query" and "fragment". All keys are present, but all values can be - strip whitespaces from the URL
None, except path which is always a string (but can be empty). - if "absolute" is True, consider that the URL is meant to be absolute, even
though it technically is not, e.g. "dece.space" is not absolute as it
Raises: misses either the // delimiter.
InvalidUrlException if you put really really stupid strings in there.
""" """
match = URI_RE.match(url) url = url.strip()
if not match: if url.startswith("file://"):
raise InvalidUrlException(url) return urllib.parse.urlparse(url)
if url.startswith("gemini://"):
match_dict = match.groupdict() url = url[7:]
parts = { parts = urllib.parse.urlparse(url, scheme="gemini")
k: match_dict.get(k) if not parts.netloc or absolute:
for k in ("scheme", "netloc", "path", "query", "fragment") parts = urllib.parse.urlparse(f"//{url}", scheme="gemini")
}
# Smol hack: if we assume it's an absolute URL, just prefix scheme and "//".
if absolute and not parts["scheme"] and not parts["netloc"]:
scheme = default_scheme or "gemini"
return parse_url(scheme + "://" + url)
# Another smol hack: if there is no scheme, use `default_scheme` as default.
if default_scheme and parts["scheme"] is None:
parts["scheme"] = default_scheme
return parts return parts
def unparse_url(parts) -> str: def sanitize_url(url: str):
"""Unparse parts of an URL produced by `parse_url`.""" """Parse and unparse an URL to ensure it has been properly formatted."""
url = "" return urllib.parse.urlunparse(parse_url(url))
if parts["scheme"] is not None:
url += parts["scheme"] + ":"
if parts["netloc"] is not None:
url += "//" + parts["netloc"]
if parts["path"] is not None:
url += parts["path"]
if parts["query"] is not None:
url += "?" + parts["query"]
if parts["fragment"] is not None:
url += "#" + parts["fragment"]
return url
def clear_post_path(parts) -> None: def join_url(base_url: str, url: str):
"""Clear optional post-path parts (query and fragment).""" """Join a base URL with a relative url."""
parts["query"] = None if base_url.startswith("gemini://"):
parts["fragment"] = None base_url = base_url[7:]
parts = parse_url(urllib.parse.urljoin(base_url, url))
return urllib.parse.urlunparse(parts)
def join_url(base_url: str, rel_url: str) -> str: def set_parameter(url: str, user_input: str):
"""Join a base URL with a relative path."""
parts = parse_url(base_url)
rel_parts = parse_url(rel_url)
if rel_url.startswith("/"):
new_path = rel_parts["path"]
else:
base_path = parts["path"] or ""
new_path = remove_last_segment(base_path) + "/" + rel_parts["path"]
parts["path"] = remove_dot_segments(new_path)
parts["query"] = rel_parts["query"]
parts["fragment"] = rel_parts["fragment"]
return unparse_url(parts)
def remove_dot_segments(path: str):
"""Remove dot segments in an URL path."""
output = ""
while path:
if path.startswith("../"):
path = path[3:]
elif path.startswith("./") or path.startswith("/./"):
path = path[2:] # Either strip "./" or leave a single "/".
elif path == "/.":
path = "/"
elif path.startswith("/../"):
path = "/" + path[4:]
output = remove_last_segment(output)
elif path == "/..":
path = "/"
output = remove_last_segment(output)
elif path in (".", ".."):
path = ""
else:
first_segment, path = pop_first_segment(path)
output += first_segment
return output
def remove_last_segment(path: str):
"""Remove last path segment, including preceding "/" if any."""
return path[:path.rfind("/")]
def pop_first_segment(path: str):
"""Return first segment and the rest.
Return the first segment including the initial "/" if any, and the rest of
the path up to, but not including, the next "/" or the end of the string.
"""
next_slash = path[1:].find("/")
if next_slash == -1:
return path, ""
next_slash += 1
return path[:next_slash], path[next_slash:]
def set_parameter(url: str, user_input: str) -> str:
"""Return a new URL with the escaped user input appended.""" """Return a new URL with the escaped user input appended."""
parts = parse_url(url) quoted_input = urllib.parse.quote(user_input)
parts["query"] = quote(user_input) if "?" in url:
return unparse_url(parts) url = url.split("?", maxsplit=1)[0]
return url + "?" + quoted_input
def get_parent_path(path: str) -> str:
"""Return the parent path."""
last_slash = path.rstrip("/").rfind("/")
if last_slash > -1:
path = path[:last_slash + 1]
return path
def get_parent_url(url: str) -> str: def get_parent_url(url: str) -> str:
"""Return the parent URL (one level up).""" """Return the parent URL (one level up)."""
parts = parse_url(url) scheme, netloc, path, _, _, _ = parse_url(url)
parts["path"] = get_parent_path(parts["path"]) # type: ignore last_slash = path.rstrip("/").rfind("/")
clear_post_path(parts) if last_slash > -1:
return unparse_url(parts) path = path[:last_slash + 1]
return urllib.parse.urlunparse((scheme, netloc, path, "", "", ""))
def get_root_url(url: str) -> str: def get_root_url(url: str) -> str:
"""Return the root URL (basically discards path).""" """Return the root URL (basically discards path)."""
parts = parse_url(url) scheme, netloc, _, _, _, _ = parse_url(url)
parts["path"] = "/" return urllib.parse.urlunparse((scheme, netloc, "/", "", "", ""))
clear_post_path(parts)
return unparse_url(parts)

View file

@ -15,7 +15,7 @@ def render_lines(metalines, window, max_width):
Arguments: Arguments:
- metalines: list of metalines to render, must have at least one element. - metalines: list of metalines to render, must have at least one element.
- window: window that will be resized and filled with rendered lines. - window: window that will be resized as filled with rendered lines.
- max_width: line length limit for the pad. - max_width: line length limit for the pad.
Returns: Returns:
@ -24,7 +24,7 @@ def render_lines(metalines, window, max_width):
dimensions of the resized window. dimensions of the resized window.
""" """
num_lines = len(metalines) num_lines = len(metalines)
new_dimensions = max(num_lines, 1), max_width new_dimensions = num_lines, max_width
window.resize(*new_dimensions) window.resize(*new_dimensions)
for line_index, metaline in enumerate(metalines): for line_index, metaline in enumerate(metalines):
try: try:

View file

@ -1,54 +1,32 @@
import unittest import unittest
from ..navigation import ( from ..navigation import join_url, parse_url, set_parameter
get_parent_url, get_root_url, join_url, parse_url, pop_first_segment, remove_dot_segments,
remove_last_segment, set_parameter,
)
class TestNavigation(unittest.TestCase): class TestNavigation(unittest.TestCase):
def test_parse_url(self): def test_parse_url(self):
# Basic complete URL. res = parse_url("gemini://dece.space/parse-me.gmi")
res = parse_url("gemini://netloc/parse-me.gmi") self.assertEqual(res.scheme, "gemini")
self.assertEqual(res["scheme"], "gemini") self.assertEqual(res.netloc, "dece.space")
self.assertEqual(res["netloc"], "netloc") self.assertEqual(res.path, "/parse-me.gmi")
self.assertEqual(res["path"], "/parse-me.gmi")
# No scheme. res_netloc = parse_url("//dece.space/parse-me.gmi")
res_netloc = parse_url("//netloc/parse-me.gmi") self.assertEqual(res, res_netloc)
self.assertIsNone(res_netloc["scheme"], None)
for key in res_netloc:
if key == "scheme":
continue
self.assertEqual(res_netloc[key], res[key])
# No scheme but a default is provided.
res_netloc = parse_url("//netloc/parse-me.gmi", default_scheme="gemini")
self.assertDictEqual(res_netloc, res)
# No scheme nor netloc: only a path should be produced.
res = parse_url("dece.space/parse-me.gmi")
self.assertIsNone(res["scheme"])
self.assertIsNone(res["netloc"])
self.assertEqual(res["path"], "dece.space/parse-me.gmi")
# No scheme nor netloc but we should pretend having an absolute URL.
res = parse_url("dece.space/parse-me.gmi", absolute=True) res = parse_url("dece.space/parse-me.gmi", absolute=True)
self.assertEqual(res["scheme"], "gemini") self.assertEqual(res.scheme, "gemini")
self.assertEqual(res["netloc"], "dece.space") self.assertEqual(res.netloc, "dece.space")
self.assertEqual(res["path"], "/parse-me.gmi") self.assertEqual(res.path, "/parse-me.gmi")
# HTTPS scheme.
res = parse_url("https://dece.space/index.html") res = parse_url("https://dece.space/index.html")
self.assertEqual(res["scheme"], "https") self.assertEqual(res.scheme, "https")
self.assertEqual(res["netloc"], "dece.space") self.assertEqual(res.netloc, "dece.space")
self.assertEqual(res["path"], "/index.html") self.assertEqual(res.path, "/index.html")
# File scheme.
res = parse_url("file:///home/dece/gemini/index.gmi") res = parse_url("file:///home/dece/gemini/index.gmi")
self.assertEqual(res["scheme"], "file") self.assertEqual(res.scheme, "file")
self.assertEqual(res["path"], "/home/dece/gemini/index.gmi") self.assertEqual(res.path, "/home/dece/gemini/index.gmi")
def test_join_url(self): def test_join_url(self):
url = join_url("gemini://dece.space/", "some-file.gmi") url = join_url("gemini://dece.space/", "some-file.gmi")
@ -61,84 +39,9 @@ class TestNavigation(unittest.TestCase):
self.assertEqual(url, "gemini://dece.space/dir1/other-file.gmi") self.assertEqual(url, "gemini://dece.space/dir1/other-file.gmi")
url = join_url("gemini://dece.space/dir1/file.gmi", "../top-level.gmi") url = join_url("gemini://dece.space/dir1/file.gmi", "../top-level.gmi")
self.assertEqual(url, "gemini://dece.space/top-level.gmi") self.assertEqual(url, "gemini://dece.space/top-level.gmi")
url = join_url("s://hard/dir/a", "./../test/b/c/../d/e/f/../.././a.gmi")
self.assertEqual(url, "s://hard/test/b/d/a.gmi")
def test_remove_dot_segments(self):
paths = [
("index.gmi", "index.gmi"),
("/index.gmi", "/index.gmi"),
("./index.gmi", "index.gmi"),
("/./index.gmi", "/index.gmi"),
("/../index.gmi", "/index.gmi"),
("/a/b/c/./../../g", "/a/g"),
("mid/content=5/../6", "mid/6"),
("../../../../g", "g"),
]
for path, expected in paths:
self.assertEqual(
remove_dot_segments(path),
expected,
msg="path was " + path
)
def test_remove_last_segment(self):
self.assertEqual(remove_last_segment(""), "")
self.assertEqual(remove_last_segment("/"), "")
self.assertEqual(remove_last_segment("/a"), "")
self.assertEqual(remove_last_segment("/a/"), "/a")
self.assertEqual(remove_last_segment("/a/b"), "/a")
self.assertEqual(remove_last_segment("/a/b/c/d"), "/a/b/c")
self.assertEqual(remove_last_segment("///"), "//")
def test_pop_first_segment(self):
self.assertEqual(pop_first_segment(""), ("", ""))
self.assertEqual(pop_first_segment("a"), ("a", ""))
self.assertEqual(pop_first_segment("/a"), ("/a", ""))
self.assertEqual(pop_first_segment("/a/"), ("/a", "/"))
self.assertEqual(pop_first_segment("/a/b"), ("/a", "/b"))
self.assertEqual(pop_first_segment("a/b"), ("a", "/b"))
def test_set_parameter(self): def test_set_parameter(self):
url = set_parameter("gemini://gus.guru/search", "my search") url = set_parameter("gemini://gus.guru/search", "my search")
self.assertEqual(url, "gemini://gus.guru/search?my%20search") self.assertEqual(url, "gemini://gus.guru/search?my%20search")
url = set_parameter("gemini://gus.guru/search?old%20search", "new") url = set_parameter("gemini://gus.guru/search?old%20search", "new")
self.assertEqual(url, "gemini://gus.guru/search?new") self.assertEqual(url, "gemini://gus.guru/search?new")
def test_get_parent_url(self):
urls_and_parents = [
("gemini://host", "gemini://host"),
("gemini://host/", "gemini://host/"),
("gemini://host/a", "gemini://host/"),
("gemini://host/a/", "gemini://host/"),
("gemini://host/a/index.gmi", "gemini://host/a/"),
("gemini://host/a/b/", "gemini://host/a/"),
("gemini://host/a/b/file.flac", "gemini://host/a/b/"),
("//host/a/b", "//host/a/"),
("hey", "hey"), # does not really make sense but whatever
("hey/ho", "hey/"),
("hey/ho/letsgo", "hey/ho/"),
]
for url, parent in urls_and_parents:
self.assertEqual(
get_parent_url(url),
parent,
msg="URL was " + url)
def test_get_root_url(self):
urls_and_roots = [
("gemini://host", "gemini://host/"),
("gemini://host/", "gemini://host/"),
("gemini://host/a", "gemini://host/"),
("gemini://host/a/b/c", "gemini://host/"),
("//host/path", "//host/"),
("//host/path?query", "//host/"),
("dumb", "/"),
("dumb/dumber", "/"),
]
for url, root in urls_and_roots:
self.assertEqual(
get_root_url(url),
root,
msg="URL was " + url
)