Compare commits
7 commits
5b3e91336f
...
f3a3a36039
Author | SHA1 | Date | |
---|---|---|---|
dece | f3a3a36039 | ||
dece | 347b5a81cc | ||
dece | bce65a1472 | ||
dece | 6bc4dbcc5d | ||
dece | 8aee7fdfba | ||
dece | 1f938fd2af | ||
dece | bd7cfce520 |
26
BOARD.txt
Normal file
26
BOARD.txt
Normal file
|
@ -0,0 +1,26 @@
|
|||
TODO DONE
|
||||
--------------------------------------------------------------------------------
|
||||
browsing
|
||||
links
|
||||
redirections
|
||||
web links
|
||||
history (back/forward)
|
||||
simple caching
|
||||
simple text files
|
||||
encodings
|
||||
bookmarks
|
||||
view/edit sources
|
||||
non shit command-line
|
||||
home page
|
||||
downloads
|
||||
media files
|
||||
view history
|
||||
identity management
|
||||
configuration
|
||||
--------------------------------------------------------------------------------
|
||||
BACKLOG
|
||||
margins / centering
|
||||
pre blocks folding
|
||||
buffers (tabs)
|
||||
handle soft-hyphens on wrapping
|
||||
bug: combining chars reduce lengths
|
16
README.md
16
README.md
|
@ -27,16 +27,16 @@ Features
|
|||
|
||||
### What works
|
||||
|
||||
- Basic browsing: scrolling, follow links, redirections, Web links.
|
||||
Common basic browsing features work: go to URL, scrolling, follow links,
|
||||
redirections, page encoding.
|
||||
|
||||
### What is planned
|
||||
Bebop also provide these neat features:
|
||||
|
||||
- Handle more content types.
|
||||
- Great config options.
|
||||
- Identity management with temporary and managed certificates.
|
||||
- Buffers (or tabs if you prefer).
|
||||
- Home page.
|
||||
- Bookmarks.
|
||||
- History
|
||||
- Caching
|
||||
- Bookmarks: it's just a text file with bindings.
|
||||
|
||||
Check out [this board](BOARD.txt) for what's done and coming next.
|
||||
|
||||
### What is not planned for now
|
||||
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
import argparse
|
||||
|
||||
from bebop.browser import Browser
|
||||
from bebop.browser.browser import Browser
|
||||
from bebop.fs import get_user_data_path
|
||||
from bebop.tofu import load_cert_stash, save_cert_stash
|
||||
|
||||
|
||||
def main():
|
||||
argparser = argparse.ArgumentParser()
|
||||
argparser.add_argument("url", default=None)
|
||||
argparser.add_argument("url", nargs="?", default=None)
|
||||
args = argparser.parse_args()
|
||||
|
||||
if args.url:
|
||||
|
@ -20,7 +20,7 @@ def main():
|
|||
user_data_path.mkdir()
|
||||
|
||||
cert_stash_path = user_data_path / "known_hosts.txt"
|
||||
cert_stash = load_cert_stash(cert_stash_path)
|
||||
cert_stash = load_cert_stash(cert_stash_path) or {}
|
||||
try:
|
||||
Browser(cert_stash).run(start_url=start_url)
|
||||
finally:
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import io
|
||||
from pathlib import Path
|
||||
|
||||
from bebop.fs import get_user_data_path
|
||||
|
@ -9,7 +8,7 @@ TEMPLATE = """\
|
|||
|
||||
Welcome to your bookmark page! This file has been created in "{original_path}" \
|
||||
and you can edit it as you wish. New bookmarks will be added on a new \
|
||||
line at the end. Always keep an empty line at the end!
|
||||
line at the end, so always keep an empty line there!
|
||||
"""
|
||||
|
||||
|
||||
|
|
|
@ -4,19 +4,22 @@ import curses
|
|||
import curses.ascii
|
||||
import curses.textpad
|
||||
import os
|
||||
import webbrowser
|
||||
import subprocess
|
||||
import tempfile
|
||||
from math import inf
|
||||
|
||||
from bebop.bookmarks import get_bookmarks_document, save_bookmark
|
||||
from bebop.bookmarks import (
|
||||
get_bookmarks_path, get_bookmarks_document, save_bookmark
|
||||
)
|
||||
from bebop.colors import ColorPair, init_colors
|
||||
from bebop.command_line import CommandLine
|
||||
from bebop.history import History
|
||||
from bebop.links import Links
|
||||
from bebop.mouse import ButtonState
|
||||
from bebop.navigation import *
|
||||
from bebop.navigation import (
|
||||
get_parent_url, get_root_url, join_url, parse_url, sanitize_url)
|
||||
from bebop.page import Page
|
||||
from bebop.page_pad import PagePad
|
||||
from bebop.protocol import Request, Response
|
||||
|
||||
|
||||
class Browser:
|
||||
|
@ -132,6 +135,8 @@ class Browser:
|
|||
self.open_bookmarks()
|
||||
elif char == ord("B"):
|
||||
self.add_bookmark()
|
||||
elif char == ord("e"):
|
||||
self.edit_page()
|
||||
elif curses.ascii.isdigit(char):
|
||||
self.handle_digit_input(char)
|
||||
elif char == curses.KEY_MOUSE:
|
||||
|
@ -251,133 +256,38 @@ class Browser:
|
|||
if redirects > 5:
|
||||
self.set_status_error(f"Too many redirections ({url}).")
|
||||
return
|
||||
|
||||
if assume_absolute or not self.current_url:
|
||||
parts = parse_url(url, absolute=True)
|
||||
join = False
|
||||
else:
|
||||
parts = parse_url(url)
|
||||
join = True
|
||||
|
||||
if parts.scheme == "gemini":
|
||||
from bebop.browser.gemini import open_gemini_url
|
||||
# If there is no netloc, this is a relative URL.
|
||||
if join or base_url:
|
||||
url = join_url(base_url or self.current_url, url)
|
||||
self.open_gemini_url(sanitize_url(url), redirects=redirects,
|
||||
history=history, use_cache=use_cache)
|
||||
open_gemini_url(
|
||||
self,
|
||||
sanitize_url(url),
|
||||
redirects=redirects,
|
||||
history=history,
|
||||
use_cache=use_cache
|
||||
)
|
||||
elif parts.scheme.startswith("http"):
|
||||
self.open_web_url(url)
|
||||
from bebop.browser.web import open_web_url
|
||||
open_web_url(self, url)
|
||||
elif parts.scheme == "file":
|
||||
self.open_file(parts.path, history=history)
|
||||
from bebop.browser.file import open_file
|
||||
open_file(self, parts.path, history=history)
|
||||
elif parts.scheme == "bebop":
|
||||
if parts.netloc == "bookmarks":
|
||||
self.open_bookmarks()
|
||||
else:
|
||||
self.set_status_error(f"Protocol {parts.scheme} not supported.")
|
||||
|
||||
def open_gemini_url(self, url, redirects=0, history=True, use_cache=True):
|
||||
"""Open a Gemini URL and set the formatted response as content.
|
||||
|
||||
After initiating the connection, TODO
|
||||
"""
|
||||
self.set_status(f"Loading {url}")
|
||||
|
||||
if use_cache and url in self.cache:
|
||||
self.load_page(self.cache[url])
|
||||
if self.current_url and history:
|
||||
self.history.push(self.current_url)
|
||||
self.current_url = url
|
||||
self.set_status(url)
|
||||
return
|
||||
|
||||
req = Request(url, self.stash)
|
||||
connected = req.connect()
|
||||
if not connected:
|
||||
if req.state == Request.STATE_ERROR_CERT:
|
||||
error = f"Certificate was missing or corrupt ({url})."
|
||||
elif req.state == Request.STATE_UNTRUSTED_CERT:
|
||||
error = f"Certificate has been changed ({url})."
|
||||
# TODO propose the user ways to handle this.
|
||||
elif req.state == Request.STATE_CONNECTION_FAILED:
|
||||
error_details = f": {req.error}" if req.error else "."
|
||||
error = f"Connection failed ({url})" + error_details
|
||||
else:
|
||||
error = f"Connection failed ({url})."
|
||||
self.set_status_error(error)
|
||||
return
|
||||
|
||||
if req.state == Request.STATE_INVALID_CERT:
|
||||
# TODO propose abort / temp trust
|
||||
pass
|
||||
elif req.state == Request.STATE_UNKNOWN_CERT:
|
||||
# TODO propose abort / temp trust / perm trust
|
||||
pass
|
||||
else:
|
||||
pass # TODO
|
||||
|
||||
data = req.proceed()
|
||||
if not data:
|
||||
self.set_status_error(f"Server did not respond in time ({url}).")
|
||||
return
|
||||
response = Response.parse(data)
|
||||
if not response:
|
||||
self.set_status_error(f"Server response parsing failed ({url}).")
|
||||
return
|
||||
|
||||
if response.code == 20:
|
||||
handle_code = self.handle_response_content(response)
|
||||
if handle_code == 0:
|
||||
if self.current_url and history:
|
||||
self.history.push(self.current_url)
|
||||
self.current_url = url
|
||||
self.cache[url] = self.page_pad.current_page
|
||||
self.set_status(url)
|
||||
elif handle_code == 1:
|
||||
self.set_status(f"Downloaded {url}.")
|
||||
elif response.generic_code == 30 and response.meta:
|
||||
self.open_url(response.meta, base_url=url, redirects=redirects + 1)
|
||||
elif response.generic_code in (40, 50):
|
||||
error = f"Server error: {response.meta or Response.code.name}"
|
||||
self.set_status_error(error)
|
||||
elif response.generic_code == 10:
|
||||
self.handle_input_request(url, response)
|
||||
else:
|
||||
error = f"Unhandled response code {response.code}"
|
||||
self.set_status_error(error)
|
||||
|
||||
def handle_response_content(self, response: Response) -> int:
|
||||
"""Handle a response's content from a Gemini server.
|
||||
|
||||
According to the MIME type received or inferred, render or download the
|
||||
response's content.
|
||||
|
||||
Currently only text/gemini content is rendered.
|
||||
|
||||
Arguments:
|
||||
- response: a successful Response.
|
||||
|
||||
Returns:
|
||||
An error code: 0 means a page has been loaded, so any book-keeping such
|
||||
as history management can be applied; 1 means a content has been
|
||||
successfully retrieved but has not been displayed (e.g. non-text
|
||||
content) nor saved as a page; 2 means that the content could not be
|
||||
handled, either due to bogus MIME type or MIME parameters.
|
||||
"""
|
||||
mime_type = response.get_mime_type()
|
||||
if mime_type.main_type == "text":
|
||||
if mime_type.sub_type == "gemini":
|
||||
encoding = mime_type.charset
|
||||
try:
|
||||
text = response.content.decode(encoding, errors="replace")
|
||||
except LookupError:
|
||||
self.set_status_error("Unknown encoding {encoding}.")
|
||||
return 2
|
||||
self.load_page(Page.from_gemtext(text))
|
||||
return 0
|
||||
else:
|
||||
pass # TODO
|
||||
else:
|
||||
pass # TODO
|
||||
return 1
|
||||
|
||||
def load_page(self, page: Page):
|
||||
"""Load Gemtext data as the current page."""
|
||||
old_pad_height = self.page_pad.dim[0]
|
||||
|
@ -391,9 +301,11 @@ class Browser:
|
|||
|
||||
def handle_digit_input(self, init_char: int):
|
||||
"""Focus command-line to select the link ID to follow."""
|
||||
if not self.page_pad or self.page_pad.current_page.links is None:
|
||||
if self.page_pad.current_page is None:
|
||||
return
|
||||
links = self.page_pad.current_page.links
|
||||
if links is None:
|
||||
return
|
||||
err, val = self.command_line.focus_for_link_navigation(init_char, links)
|
||||
if err == 0:
|
||||
self.open_link(links, val) # type: ignore
|
||||
|
@ -407,17 +319,6 @@ class Browser:
|
|||
return
|
||||
self.open_url(links[link_id])
|
||||
|
||||
def handle_input_request(self, from_url: str, response: Response):
|
||||
"""Focus command-line to pass input to the server."""
|
||||
if response.meta:
|
||||
self.set_status(f"Input needed: {response.meta}")
|
||||
else:
|
||||
self.set_status("Input needed:")
|
||||
user_input = self.command_line.focus("?")
|
||||
if user_input:
|
||||
url = set_parameter(from_url, user_input)
|
||||
self.open_gemini_url(url)
|
||||
|
||||
def handle_mouse(self, mouse_id: int, x: int, y: int, z: int, bstate: int):
|
||||
"""Handle mouse events.
|
||||
|
||||
|
@ -488,11 +389,7 @@ class Browser:
|
|||
def reload_page(self):
|
||||
"""Reload the page, if one has been previously loaded."""
|
||||
if self.current_url:
|
||||
self.open_url(
|
||||
self.current_url,
|
||||
history=False,
|
||||
use_cache=False
|
||||
)
|
||||
self.open_url(self.current_url, history=False, use_cache=False)
|
||||
|
||||
def go_back(self):
|
||||
"""Go back in history if possible."""
|
||||
|
@ -502,37 +399,12 @@ class Browser:
|
|||
def go_to_parent_page(self):
|
||||
"""Go to the parent URL if possible."""
|
||||
if self.current_url:
|
||||
self.open_gemini_url(get_parent_url(self.current_url))
|
||||
self.open_url(get_parent_url(self.current_url))
|
||||
|
||||
def go_to_root_page(self):
|
||||
"""Go to the root URL if possible."""
|
||||
if self.current_url:
|
||||
self.open_gemini_url(get_root_url(self.current_url))
|
||||
|
||||
def open_web_url(self, url):
|
||||
"""Open a Web URL. Currently relies in Python's webbrowser module."""
|
||||
self.set_status(f"Opening {url}")
|
||||
webbrowser.open_new_tab(url)
|
||||
|
||||
def open_file(self, filepath, encoding="utf-8", history=True):
|
||||
"""Open a file and render it.
|
||||
|
||||
This should be used only on Gemtext files or at least text files.
|
||||
Anything else will produce garbage and may crash the program. In the
|
||||
future this should be able to use a different parser according to a MIME
|
||||
type or something.
|
||||
"""
|
||||
try:
|
||||
with open(filepath, "rt", encoding=encoding) as f:
|
||||
text = f.read()
|
||||
except (OSError, ValueError) as exc:
|
||||
self.set_status_error(f"Failed to open file: {exc}")
|
||||
return
|
||||
self.load_page(Page.from_gemtext(text))
|
||||
file_url = "file://" + filepath
|
||||
if history:
|
||||
self.history.push(file_url)
|
||||
self.current_url = file_url
|
||||
self.open_url(get_root_url(self.current_url))
|
||||
|
||||
def open_bookmarks(self):
|
||||
"""Open bookmarks."""
|
||||
|
@ -547,7 +419,7 @@ class Browser:
|
|||
"""Add the current URL as bookmark."""
|
||||
if not self.current_url:
|
||||
return
|
||||
self.set_status("Title?")
|
||||
self.set_status("Bookmark title?")
|
||||
current_title = self.page_pad.current_page.title or ""
|
||||
title = self.command_line.focus(">", prefix=current_title)
|
||||
if title:
|
||||
|
@ -555,3 +427,42 @@ class Browser:
|
|||
if title:
|
||||
save_bookmark(self.current_url, title)
|
||||
self.reset_status()
|
||||
|
||||
def open_external_program(self, command):
|
||||
"""Pauses the curses modes to open an external program."""
|
||||
curses.nocbreak()
|
||||
curses.echo()
|
||||
subprocess.run(command)
|
||||
curses.noecho()
|
||||
curses.cbreak()
|
||||
self.refresh_windows()
|
||||
|
||||
def edit_page(self):
|
||||
"""Open a text editor to edit the page source.
|
||||
|
||||
For external pages, the source is written in a temporary file, opened in
|
||||
its editor of choice and so it's up to the user to save it where she
|
||||
needs it, if needed. Internal pages, e.g. the bookmarks page, are loaded
|
||||
directly from their location on disk.
|
||||
"""
|
||||
command = ["vi"]
|
||||
delete_source_after = False
|
||||
|
||||
special_pages = {
|
||||
"bebop://bookmarks": str(get_bookmarks_path())
|
||||
}
|
||||
if self.current_url in special_pages:
|
||||
source_filename = special_pages[self.current_url]
|
||||
else:
|
||||
if not self.page_pad.current_page:
|
||||
return
|
||||
source = self.page_pad.current_page.source
|
||||
with tempfile.NamedTemporaryFile("wt", delete=False) as source_file:
|
||||
source_file.write(source)
|
||||
source_filename = source_file.name
|
||||
delete_source_after = True
|
||||
|
||||
command.append(source_filename)
|
||||
self.open_external_program(command)
|
||||
if delete_source_after:
|
||||
os.unlink(source_filename)
|
25
bebop/browser/file.py
Normal file
25
bebop/browser/file.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
"""Local files browser."""
|
||||
|
||||
from bebop.browser.browser import Browser
|
||||
from bebop.page import Page
|
||||
|
||||
|
||||
def open_file(browser: Browser, filepath: str, encoding="utf-8", history=True):
|
||||
"""Open a file and render it.
|
||||
|
||||
This should be used only on Gemtext files or at least text files.
|
||||
Anything else will produce garbage and may crash the program. In the
|
||||
future this should be able to use a different parser according to a MIME
|
||||
type or something.
|
||||
"""
|
||||
try:
|
||||
with open(filepath, "rt", encoding=encoding) as f:
|
||||
text = f.read()
|
||||
except (OSError, ValueError) as exc:
|
||||
browser.set_status_error(f"Failed to open file: {exc}")
|
||||
return
|
||||
browser.load_page(Page.from_text(text))
|
||||
file_url = "file://" + filepath
|
||||
if history:
|
||||
browser.history.push(file_url)
|
||||
browser.current_url = file_url
|
130
bebop/browser/gemini.py
Normal file
130
bebop/browser/gemini.py
Normal file
|
@ -0,0 +1,130 @@
|
|||
"""Gemini-related features of the browser."""
|
||||
|
||||
from bebop.browser.browser import Browser
|
||||
from bebop.navigation import set_parameter
|
||||
from bebop.page import Page
|
||||
from bebop.protocol import Request, Response
|
||||
|
||||
|
||||
def open_gemini_url(browser: Browser, url, redirects=0, history=True,
|
||||
use_cache=True):
|
||||
"""Open a Gemini URL and set the formatted response as content.
|
||||
|
||||
After initiating the connection, TODO
|
||||
"""
|
||||
browser.set_status(f"Loading {url}")
|
||||
|
||||
if use_cache and url in browser.cache:
|
||||
browser.load_page(browser.cache[url])
|
||||
if browser.current_url and history:
|
||||
browser.history.push(browser.current_url)
|
||||
browser.current_url = url
|
||||
browser.set_status(url)
|
||||
return
|
||||
|
||||
req = Request(url, browser.stash)
|
||||
connected = req.connect()
|
||||
if not connected:
|
||||
if req.state == Request.STATE_ERROR_CERT:
|
||||
error = f"Certificate was missing or corrupt ({url})."
|
||||
elif req.state == Request.STATE_UNTRUSTED_CERT:
|
||||
error = f"Certificate has been changed ({url})."
|
||||
# TODO propose the user ways to handle this.
|
||||
elif req.state == Request.STATE_CONNECTION_FAILED:
|
||||
error_details = f": {req.error}" if req.error else "."
|
||||
error = f"Connection failed ({url})" + error_details
|
||||
else:
|
||||
error = f"Connection failed ({url})."
|
||||
browser.set_status_error(error)
|
||||
return
|
||||
|
||||
if req.state == Request.STATE_INVALID_CERT:
|
||||
# TODO propose abort / temp trust
|
||||
pass
|
||||
elif req.state == Request.STATE_UNKNOWN_CERT:
|
||||
# TODO propose abort / temp trust / perm trust
|
||||
pass
|
||||
else:
|
||||
pass # TODO
|
||||
|
||||
data = req.proceed()
|
||||
if not data:
|
||||
browser.set_status_error(f"Server did not respond in time ({url}).")
|
||||
return
|
||||
response = Response.parse(data)
|
||||
if not response:
|
||||
browser.set_status_error(f"Server response parsing failed ({url}).")
|
||||
return
|
||||
|
||||
if response.code == 20:
|
||||
handle_code = handle_response_content(browser, response)
|
||||
if handle_code == 0:
|
||||
if browser.current_url and history:
|
||||
browser.history.push(browser.current_url)
|
||||
browser.current_url = url
|
||||
browser.cache[url] = browser.page_pad.current_page
|
||||
browser.set_status(url)
|
||||
elif handle_code == 1:
|
||||
browser.set_status(f"Downloaded {url}.")
|
||||
elif response.generic_code == 30 and response.meta:
|
||||
browser.open_url(response.meta, base_url=url, redirects=redirects + 1)
|
||||
elif response.generic_code in (40, 50):
|
||||
error = f"Server error: {response.meta or Response.code.name}"
|
||||
browser.set_status_error(error)
|
||||
elif response.generic_code == 10:
|
||||
handle_input_request(browser, url, response.meta)
|
||||
else:
|
||||
error = f"Unhandled response code {response.code}"
|
||||
browser.set_status_error(error)
|
||||
|
||||
|
||||
def handle_response_content(browser: Browser, response: Response) -> int:
|
||||
"""Handle a response's content from a Gemini server.
|
||||
|
||||
According to the MIME type received or inferred, render or download the
|
||||
response's content.
|
||||
|
||||
Currently only text content is rendered. For Gemini, the encoding specified
|
||||
in the response is used, if available on the Python distribution. For other
|
||||
text formats, only UTF-8 is attempted.
|
||||
|
||||
Arguments:
|
||||
- response: a successful Response.
|
||||
|
||||
Returns:
|
||||
An error code: 0 means a page has been loaded, so any book-keeping such
|
||||
as history management can be applied; 1 means a content has been
|
||||
successfully retrieved but has not been displayed (e.g. non-text
|
||||
content) nor saved as a page; 2 means that the content could not be
|
||||
handled, either due to bogus MIME type or MIME parameters.
|
||||
"""
|
||||
mime_type = response.get_mime_type()
|
||||
if mime_type.main_type == "text":
|
||||
if mime_type.sub_type == "gemini":
|
||||
encoding = mime_type.charset
|
||||
try:
|
||||
text = response.content.decode(encoding, errors="replace")
|
||||
except LookupError:
|
||||
browser.set_status_error("Unknown encoding {encoding}.")
|
||||
return 2
|
||||
browser.load_page(Page.from_gemtext(text))
|
||||
return 0
|
||||
else:
|
||||
text = response.content.decode("utf-8", errors="replace")
|
||||
browser.load_page(Page.from_text(text))
|
||||
return 0
|
||||
else:
|
||||
pass # TODO
|
||||
return 1
|
||||
|
||||
|
||||
def handle_input_request(browser: Browser, from_url: str, message: str =None):
|
||||
"""Focus command-line to pass input to the server."""
|
||||
if message:
|
||||
browser.set_status(f"Input needed: {message}")
|
||||
else:
|
||||
browser.set_status("Input needed:")
|
||||
user_input = browser.command_line.focus("?")
|
||||
if user_input:
|
||||
url = set_parameter(from_url, user_input)
|
||||
open_gemini_url(browser, url)
|
11
bebop/browser/web.py
Normal file
11
bebop/browser/web.py
Normal file
|
@ -0,0 +1,11 @@
|
|||
"""Ha! You thought there would be a Web browser in there?"""
|
||||
|
||||
import webbrowser
|
||||
|
||||
from bebop.browser.browser import Browser
|
||||
|
||||
|
||||
def open_web_url(browser: Browser, url):
|
||||
"""Open a Web URL. Currently relies in Python's webbrowser module."""
|
||||
browser.set_status(f"Opening {url}")
|
||||
webbrowser.open_new_tab(url)
|
|
@ -6,9 +6,12 @@ module. A renderer can then completely abstract the original document.
|
|||
"""
|
||||
|
||||
import re
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
|
||||
from bebop.links import Links
|
||||
|
||||
|
||||
@dataclass
|
||||
class Paragraph:
|
||||
|
@ -26,6 +29,7 @@ class Title:
|
|||
class Link:
|
||||
url: str
|
||||
text: str
|
||||
ident: int = 0
|
||||
RE = re.compile(r"=>\s*(?P<url>\S+)(\s+(?P<text>.+))?")
|
||||
|
||||
|
||||
|
@ -47,9 +51,15 @@ class ListItem:
|
|||
RE = re.compile(r"\*\s(.*)")
|
||||
|
||||
|
||||
def parse_gemtext(text: str):
|
||||
ParsedGemtext = namedtuple("ParsedGemtext", ("elements", "links", "title"))
|
||||
|
||||
|
||||
def parse_gemtext(text: str) -> ParsedGemtext:
|
||||
"""Parse a string of Gemtext into a list of elements."""
|
||||
elements = []
|
||||
links = Links()
|
||||
last_link_id = 0
|
||||
title = ""
|
||||
preformatted = None
|
||||
for line in text.splitlines():
|
||||
line = line.rstrip()
|
||||
|
@ -59,14 +69,19 @@ def parse_gemtext(text: str):
|
|||
match = Title.RE.match(line)
|
||||
if match:
|
||||
hashtags, text = match.groups()
|
||||
elements.append(Title(hashtags.count("#"), text))
|
||||
level = hashtags.count("#")
|
||||
elements.append(Title(level, text))
|
||||
if not title and level == 1:
|
||||
title = text
|
||||
continue
|
||||
|
||||
match = Link.RE.match(line)
|
||||
if match:
|
||||
match_dict = match.groupdict()
|
||||
url, text = match_dict["url"], match_dict.get("text", "")
|
||||
elements.append(Link(url, text))
|
||||
last_link_id += 1
|
||||
links[last_link_id] = url
|
||||
elements.append(Link(url, text, last_link_id))
|
||||
continue
|
||||
|
||||
if line.startswith(Preformatted.FENCE):
|
||||
|
@ -99,4 +114,4 @@ def parse_gemtext(text: str):
|
|||
if preformatted:
|
||||
elements.append(preformatted)
|
||||
|
||||
return elements
|
||||
return ParsedGemtext(elements, links, title)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
"""History management."""
|
||||
|
||||
|
||||
class History:
|
||||
"""Basic browsing history manager."""
|
||||
|
||||
|
|
|
@ -13,11 +13,3 @@ class Links(dict):
|
|||
link_id for link_id, url in self.items()
|
||||
if str(link_id).startswith(digits)
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def from_metalines(metalines: List):
|
||||
links = Links()
|
||||
for meta, _ in metalines:
|
||||
if "link_id" in meta and "url" in meta:
|
||||
links[meta["link_id"]] = meta["url"]
|
||||
return links
|
||||
|
|
222
bebop/metalines.py
Normal file
222
bebop/metalines.py
Normal file
|
@ -0,0 +1,222 @@
|
|||
"""Metalines generation.
|
||||
|
||||
In Bebop we use a list of elements as produced by our parser. These elements are
|
||||
converted into so-called "metalines", which are the text lines as they will be
|
||||
displayed, along with associated meta-data such as its type or a link's URL.
|
||||
"""
|
||||
|
||||
import string
|
||||
from enum import IntEnum
|
||||
from typing import List
|
||||
|
||||
from bebop.gemtext import (
|
||||
Blockquote, Link, ListItem, Paragraph, Preformatted, Title)
|
||||
|
||||
|
||||
SPLIT_CHARS = " \t-"
|
||||
JOIN_CHAR = "-"
|
||||
LIST_ITEM_MARK = "• "
|
||||
|
||||
|
||||
class LineType(IntEnum):
|
||||
"""Type of line.
|
||||
|
||||
Keep lines type along with the content for later rendering.
|
||||
Title type values match the title level to avoid looking it up.
|
||||
"""
|
||||
NONE = 0
|
||||
TITLE_1 = 1
|
||||
TITLE_2 = 2
|
||||
TITLE_3 = 3
|
||||
PARAGRAPH = 4
|
||||
LINK = 5
|
||||
PREFORMATTED = 6
|
||||
BLOCKQUOTE = 7
|
||||
LIST_ITEM = 8
|
||||
|
||||
|
||||
def generate_metalines(elements, width):
|
||||
"""Format elements into a list of lines with metadata.
|
||||
|
||||
The returned list ("metalines") are tuples (meta, line), meta being a
|
||||
dict of metadata and line a text line to display. Currently the only
|
||||
metadata keys used are:
|
||||
- type: one of the Renderer.TYPE constants.
|
||||
- url: only for links, the URL the link on this line refers to. Note
|
||||
that this key is present only for the first line of the link, i.e.
|
||||
long link descriptions wrapped on multiple lines will not have a this
|
||||
key except for the first line.
|
||||
- link_id: only alongside "url" key, ID generated for this link.
|
||||
"""
|
||||
metalines = []
|
||||
context = {"width": width}
|
||||
separator = ({"type": LineType.NONE}, "")
|
||||
has_margins = False
|
||||
thin_type = None
|
||||
for index, element in enumerate(elements):
|
||||
previous_had_margins = has_margins
|
||||
last_thin_type = thin_type
|
||||
has_margins = False
|
||||
thin_type = None
|
||||
if isinstance(element, Title):
|
||||
element_metalines = format_title(element, context)
|
||||
has_margins = True
|
||||
elif isinstance(element, Paragraph):
|
||||
element_metalines = format_paragraph(element, context)
|
||||
has_margins = True
|
||||
elif isinstance(element, Link):
|
||||
element_metalines = format_link(element, context)
|
||||
thin_type = LineType.LINK
|
||||
elif isinstance(element, Preformatted):
|
||||
element_metalines = format_preformatted(element, context)
|
||||
has_margins = True
|
||||
elif isinstance(element, Blockquote):
|
||||
element_metalines = format_blockquote(element, context)
|
||||
has_margins = True
|
||||
elif isinstance(element, ListItem):
|
||||
element_metalines = format_list_item(element, context)
|
||||
thin_type = LineType.LIST_ITEM
|
||||
else:
|
||||
continue
|
||||
# If current element requires margins and is not the first elements,
|
||||
# separate from previous element. Also do it if the current element does
|
||||
# not require margins but follows an element that required it (e.g. link
|
||||
# after a paragraph). Also do it if both the current and previous
|
||||
# elements do not require margins but differ in type.
|
||||
if (
|
||||
(has_margins and index > 0)
|
||||
or (not has_margins and previous_had_margins)
|
||||
or (not has_margins and thin_type != last_thin_type)
|
||||
):
|
||||
metalines.append(separator)
|
||||
# Append the element metalines now.
|
||||
metalines += element_metalines
|
||||
return metalines
|
||||
|
||||
|
||||
def generate_dumb_metalines(lines):
|
||||
"""Generate dumb metalines: all lines are given the PARAGRAPH line type."""
|
||||
return [({"type": LineType.PARAGRAPH}, line) for line in lines]
|
||||
|
||||
|
||||
def format_title(title: Title, context: dict):
|
||||
"""Return metalines for this title."""
|
||||
width = context["width"]
|
||||
if title.level == 1:
|
||||
wrapped = wrap_words(title.text, width)
|
||||
line_template = f"{{:^{width}}}"
|
||||
lines = (line_template.format(line) for line in wrapped)
|
||||
else:
|
||||
if title.level == 2:
|
||||
lines = wrap_words(title.text, width, indent=2)
|
||||
else:
|
||||
lines = wrap_words(title.text, width)
|
||||
# Title levels match the type constants of titles.
|
||||
return [({"type": LineType(title.level)}, line) for line in lines]
|
||||
|
||||
|
||||
def format_paragraph(paragraph: Paragraph, context: dict):
|
||||
"""Return metalines for this paragraph."""
|
||||
lines = wrap_words(paragraph.text, context["width"])
|
||||
return [({"type": LineType.PARAGRAPH}, line) for line in lines]
|
||||
|
||||
|
||||
def format_link(link: Link, context: dict):
|
||||
"""Return metalines for this link."""
|
||||
# Get a new link and build the "[id]" anchor.
|
||||
link_anchor = f"[{link.ident}] "
|
||||
link_text = link.text or link.url
|
||||
# Wrap lines, indented by the link anchor length.
|
||||
lines = wrap_words(link_text, context["width"], indent=len(link_anchor))
|
||||
first_line_meta = {
|
||||
"type": LineType.LINK,
|
||||
"url": link.url,
|
||||
"link_id": link.ident
|
||||
}
|
||||
# Replace first line indentation with the anchor.
|
||||
first_line_text = link_anchor + lines[0][len(link_anchor):]
|
||||
first_line = [(first_line_meta, first_line_text)]
|
||||
other_lines = [({"type": LineType.LINK}, line) for line in lines[1:]]
|
||||
return first_line + other_lines
|
||||
|
||||
|
||||
def format_preformatted(preformatted: Preformatted, context: dict):
|
||||
"""Return metalines for this preformatted block."""
|
||||
return [
|
||||
({"type": LineType.PREFORMATTED}, line)
|
||||
for line in preformatted.lines
|
||||
]
|
||||
|
||||
|
||||
def format_blockquote(blockquote: Blockquote, context: dict):
|
||||
"""Return metalines for this blockquote."""
|
||||
lines = wrap_words(blockquote.text, context["width"], indent=2)
|
||||
return [({"type": LineType.BLOCKQUOTE}, line) for line in lines]
|
||||
|
||||
|
||||
def format_list_item(item: ListItem, context: dict):
|
||||
"""Return metalines for this list item."""
|
||||
indent = len(LIST_ITEM_MARK)
|
||||
lines = wrap_words(item.text, context["width"], indent=indent)
|
||||
first_line = LIST_ITEM_MARK + lines[0][indent:]
|
||||
lines[0] = first_line
|
||||
return [({"type": LineType.LIST_ITEM}, line) for line in lines]
|
||||
|
||||
|
||||
def wrap_words(text: str, width: int, indent: int =0) -> List[str]:
|
||||
"""Wrap a text in several lines according to the renderer's width."""
|
||||
lines = []
|
||||
line = " " * indent
|
||||
words = _explode_words(text)
|
||||
for word in words:
|
||||
line_len, word_len = len(line), len(word)
|
||||
# If adding the new word would overflow the line, use a new line.
|
||||
if line_len + word_len > width:
|
||||
# Push only non-empty lines.
|
||||
if line_len > 0:
|
||||
lines.append(line)
|
||||
line = " " * indent
|
||||
# Force split words that are longer than the width.
|
||||
while word_len > width:
|
||||
split_offset = width - 1 - indent
|
||||
word_line = " " * indent + word[:split_offset] + JOIN_CHAR
|
||||
lines.append(word_line)
|
||||
word = word[split_offset:]
|
||||
word_len = len(word)
|
||||
word = word.lstrip()
|
||||
line += word
|
||||
if line:
|
||||
lines.append(line)
|
||||
return lines
|
||||
|
||||
|
||||
def _explode_words(text: str) -> List[str]:
|
||||
"""Split a string into a list of words."""
|
||||
words = []
|
||||
pos = 0
|
||||
while True:
|
||||
sep, sep_index = _find_next_sep(text[pos:])
|
||||
if not sep:
|
||||
words.append(text[pos:])
|
||||
return words
|
||||
word = text[pos : pos + sep_index]
|
||||
# If the separator is not a space char, append it to the word.
|
||||
if sep in string.whitespace:
|
||||
words.append(word)
|
||||
words.append(sep)
|
||||
else:
|
||||
words.append(word + sep)
|
||||
pos += sep_index + 1
|
||||
|
||||
|
||||
def _find_next_sep(text: str):
|
||||
"""Find the next separator index and return both the separator and index."""
|
||||
indices = []
|
||||
for sep in SPLIT_CHARS:
|
||||
try:
|
||||
indices.append((sep, text.index(sep)))
|
||||
except ValueError:
|
||||
pass
|
||||
if not indices:
|
||||
return ("", 0)
|
||||
return min(indices, key=lambda e: e[1])
|
|
@ -1,13 +1,24 @@
|
|||
from dataclasses import dataclass, field
|
||||
|
||||
from bebop.gemtext import parse_gemtext, Title
|
||||
from bebop.rendering import generate_metalines
|
||||
from bebop.metalines import generate_dumb_metalines, generate_metalines
|
||||
from bebop.links import Links
|
||||
|
||||
|
||||
@dataclass
|
||||
class Page:
|
||||
"""Page-related data."""
|
||||
"""Page-related data.
|
||||
|
||||
Attributes:
|
||||
- source: str used to create the page.
|
||||
- metalines: lines ready to be rendered.
|
||||
- links: Links instance, mapping IDs to links on the page; this data is
|
||||
redundant as the links' URLs/IDs are already available in the
|
||||
corresponding metalines, it is meant to be used as a quick map for link ID
|
||||
lookup and disambiguation.
|
||||
- title: optional page title.
|
||||
"""
|
||||
source: str
|
||||
metalines: list = field(default_factory=list)
|
||||
links: Links = field(default_factory=Links)
|
||||
title: str = ""
|
||||
|
@ -15,13 +26,12 @@ class Page:
|
|||
@staticmethod
|
||||
def from_gemtext(gemtext: str):
|
||||
"""Produce a Page from a Gemtext file or string."""
|
||||
elements = parse_gemtext(gemtext)
|
||||
elements, links, title = parse_gemtext(gemtext)
|
||||
metalines = generate_metalines(elements, 80)
|
||||
links = Links.from_metalines(metalines)
|
||||
# TODO this is horrible; merge parsing with page generation directly
|
||||
title = ""
|
||||
for element in elements:
|
||||
if isinstance(element, Title) and element.level == 1:
|
||||
title = element.text
|
||||
break
|
||||
return Page(metalines, links, title)
|
||||
return Page(gemtext, metalines, links, title)
|
||||
|
||||
@staticmethod
|
||||
def from_text(text: str):
|
||||
"""Produce a Page for a text string."""
|
||||
metalines = generate_dumb_metalines(text.splitlines())
|
||||
return Page(text, metalines)
|
||||
|
|
|
@ -1,223 +1,9 @@
|
|||
"""Rendering Gemtext in curses.
|
||||
|
||||
In Bebop we use a list of elements as produced by our parser. These elements are
|
||||
rendered into so-called "metalines", which are the text lines as they will be
|
||||
displayed, along with associated meta-data such as its type or a link's URL.
|
||||
"""
|
||||
"""Rendering Gemtext in curses."""
|
||||
|
||||
import curses
|
||||
import string
|
||||
from enum import IntEnum
|
||||
from typing import List
|
||||
|
||||
from bebop.colors import ColorPair
|
||||
from bebop.gemtext import (Blockquote, Link, ListItem, Paragraph, Preformatted,
|
||||
Title)
|
||||
|
||||
|
||||
SPLIT_CHARS = " \t-"
|
||||
JOIN_CHAR = "-"
|
||||
LIST_ITEM_MARK = "• "
|
||||
|
||||
|
||||
class LineType(IntEnum):
|
||||
"""Type of line.
|
||||
|
||||
Keep lines type along with the content for later rendering.
|
||||
Title type values match the title level to avoid looking it up.
|
||||
"""
|
||||
NONE = 0
|
||||
TITLE_1 = 1
|
||||
TITLE_2 = 2
|
||||
TITLE_3 = 3
|
||||
PARAGRAPH = 4
|
||||
LINK = 5
|
||||
PREFORMATTED = 6
|
||||
BLOCKQUOTE = 7
|
||||
LIST_ITEM = 8
|
||||
|
||||
|
||||
def generate_metalines(elements, width):
|
||||
"""Format elements into a list of lines with metadata.
|
||||
|
||||
The returned list ("metalines") are tuples (meta, line), meta being a
|
||||
dict of metadata and line a text line to display. Currently the only
|
||||
metadata keys used are:
|
||||
- type: one of the Renderer.TYPE constants.
|
||||
- url: only for links, the URL the link on this line refers to. Note
|
||||
that this key is present only for the first line of the link, i.e.
|
||||
long link descriptions wrapped on multiple lines will not have a this
|
||||
key except for the first line.
|
||||
- link_id: only alongside "url" key, ID generated for this link.
|
||||
"""
|
||||
metalines = []
|
||||
context = {"last_link_id": 0, "width": width}
|
||||
separator = ({"type": LineType.NONE}, "")
|
||||
has_margins = False
|
||||
thin_type = None
|
||||
for index, element in enumerate(elements):
|
||||
previous_had_margins = has_margins
|
||||
last_thin_type = thin_type
|
||||
has_margins = False
|
||||
thin_type = None
|
||||
if isinstance(element, Title):
|
||||
element_metalines = format_title(element, context)
|
||||
has_margins = True
|
||||
elif isinstance(element, Paragraph):
|
||||
element_metalines = format_paragraph(element, context)
|
||||
has_margins = True
|
||||
elif isinstance(element, Link):
|
||||
element_metalines = format_link(element, context)
|
||||
thin_type = LineType.LINK
|
||||
elif isinstance(element, Preformatted):
|
||||
element_metalines = format_preformatted(element, context)
|
||||
has_margins = True
|
||||
elif isinstance(element, Blockquote):
|
||||
element_metalines = format_blockquote(element, context)
|
||||
has_margins = True
|
||||
elif isinstance(element, ListItem):
|
||||
element_metalines = format_list_item(element, context)
|
||||
thin_type = LineType.LIST_ITEM
|
||||
else:
|
||||
continue
|
||||
# If current element requires margins and is not the first elements,
|
||||
# separate from previous element. Also do it if the current element does
|
||||
# not require margins but follows an element that required it (e.g. link
|
||||
# after a paragraph). Also do it if both the current and previous
|
||||
# elements do not require margins but differ in type.
|
||||
if (
|
||||
(has_margins and index > 0)
|
||||
or (not has_margins and previous_had_margins)
|
||||
or (not has_margins and thin_type != last_thin_type)
|
||||
):
|
||||
metalines.append(separator)
|
||||
# Append the element metalines now.
|
||||
metalines += element_metalines
|
||||
return metalines
|
||||
|
||||
|
||||
def format_title(title: Title, context: dict):
|
||||
"""Return metalines for this title."""
|
||||
if title.level == 1:
|
||||
wrapped = wrap_words(title.text, context["width"])
|
||||
line_template = f"{{:^{context['width']}}}"
|
||||
lines = (line_template.format(line) for line in wrapped)
|
||||
else:
|
||||
if title.level == 2:
|
||||
lines = wrap_words(title.text, context["width"], indent=2)
|
||||
else:
|
||||
lines = wrap_words(title.text, context["width"])
|
||||
# Title levels match the type constants of titles.
|
||||
return [({"type": LineType(title.level)}, line) for line in lines]
|
||||
|
||||
|
||||
def format_paragraph(paragraph: Paragraph, context: dict):
|
||||
"""Return metalines for this paragraph."""
|
||||
lines = wrap_words(paragraph.text, context["width"])
|
||||
return [({"type": LineType.PARAGRAPH}, line) for line in lines]
|
||||
|
||||
|
||||
def format_link(link: Link, context: dict):
|
||||
"""Return metalines for this link."""
|
||||
# Get a new link and build the "[id]" anchor.
|
||||
link_id = context["last_link_id"] + 1
|
||||
context["last_link_id"] = link_id
|
||||
link_text = link.text or link.url
|
||||
link_anchor = f"[{link_id}] "
|
||||
# Wrap lines, indented by the link anchor length.
|
||||
lines = wrap_words(link_text, context["width"], indent=len(link_anchor))
|
||||
first_line_meta = {
|
||||
"type": LineType.LINK,
|
||||
"url": link.url,
|
||||
"link_id": link_id
|
||||
}
|
||||
# Replace first line indentation with the anchor.
|
||||
first_line_text = link_anchor + lines[0][len(link_anchor):]
|
||||
first_line = [(first_line_meta, first_line_text)]
|
||||
other_lines = [({"type": LineType.LINK}, line) for line in lines[1:]]
|
||||
return first_line + other_lines
|
||||
|
||||
|
||||
def format_preformatted(preformatted: Preformatted, context: dict):
|
||||
"""Return metalines for this preformatted block."""
|
||||
return [
|
||||
({"type": LineType.PREFORMATTED}, line)
|
||||
for line in preformatted.lines
|
||||
]
|
||||
|
||||
|
||||
def format_blockquote(blockquote: Blockquote, context: dict):
|
||||
"""Return metalines for this blockquote."""
|
||||
lines = wrap_words(blockquote.text, context["width"])
|
||||
return [({"type": LineType.BLOCKQUOTE}, line) for line in lines]
|
||||
|
||||
|
||||
def format_list_item(item: ListItem, context: dict):
|
||||
"""Return metalines for this list item."""
|
||||
indent = len(LIST_ITEM_MARK)
|
||||
lines = wrap_words(item.text, context["width"], indent=indent)
|
||||
first_line = LIST_ITEM_MARK + lines[0][indent:]
|
||||
lines[0] = first_line
|
||||
return [({"type": LineType.LIST_ITEM}, line) for line in lines]
|
||||
|
||||
|
||||
def wrap_words(text: str, width: int, indent: int =0) -> List[str]:
|
||||
"""Wrap a text in several lines according to the renderer's width."""
|
||||
lines = []
|
||||
line = " " * indent
|
||||
words = _explode_words(text)
|
||||
for word in words:
|
||||
line_len, word_len = len(line), len(word)
|
||||
# If adding the new word would overflow the line, use a new line.
|
||||
if line_len + word_len > width:
|
||||
# Push only non-empty lines.
|
||||
if line_len > 0:
|
||||
lines.append(line)
|
||||
line = " " * indent
|
||||
# Force split words that are longer than the width.
|
||||
while word_len > width:
|
||||
split_offset = width - 1 - indent
|
||||
word_line = " " * indent + word[:split_offset] + JOIN_CHAR
|
||||
lines.append(word_line)
|
||||
word = word[split_offset:]
|
||||
word_len = len(word)
|
||||
word = word.lstrip()
|
||||
line += word
|
||||
if line:
|
||||
lines.append(line)
|
||||
return lines
|
||||
|
||||
|
||||
def _explode_words(text: str) -> List[str]:
|
||||
"""Split a string into a list of words."""
|
||||
words = []
|
||||
pos = 0
|
||||
while True:
|
||||
sep, sep_index = _find_next_sep(text[pos:])
|
||||
if not sep:
|
||||
words.append(text[pos:])
|
||||
return words
|
||||
word = text[pos : pos + sep_index]
|
||||
# If the separator is not a space char, append it to the word.
|
||||
if sep in string.whitespace:
|
||||
words.append(word)
|
||||
words.append(sep)
|
||||
else:
|
||||
words.append(word + sep)
|
||||
pos += sep_index + 1
|
||||
|
||||
|
||||
def _find_next_sep(text: str):
|
||||
"""Find the next separator index and return both the separator and index."""
|
||||
indices = []
|
||||
for sep in SPLIT_CHARS:
|
||||
try:
|
||||
indices.append((sep, text.index(sep)))
|
||||
except ValueError:
|
||||
pass
|
||||
if not indices:
|
||||
return ("", 0)
|
||||
return min(indices, key=lambda e: e[1])
|
||||
from bebop.metalines import LineType
|
||||
|
||||
|
||||
def render_lines(metalines, window, max_width):
|
||||
|
|
Reference in a new issue