browser: render all "text/*" Gemini responses

browser: open editor to view/edit sources & pages
browser: minor fixes
2021-04-16 19:56:56 +02:00 · 2021-04-16 19:30:33 +02:00 · 2021-04-16 19:30:14 +02:00 · 2021-03-28 18:57:23 +02:00 · 2021-03-28 18:55:52 +02:00 · 2021-03-28 18:28:54 +02:00
14 changed files with 539 additions and 411 deletions
--- a/BOARD.txt
+++ b/BOARD.txt
@ -0,0 +1,26 @@
+TODO                                    DONE
+--------------------------------------------------------------------------------
+                                        browsing
+                                        links
+                                        redirections
+                                        web links
+                                        history (back/forward)
+                                        simple caching
+                                        simple text files
+                                        encodings
+                                        bookmarks
+                                        view/edit sources
+non shit command-line
+home page
+downloads
+media files
+view history
+identity management
+configuration
+--------------------------------------------------------------------------------
+BACKLOG
+margins / centering
+pre blocks folding
+buffers (tabs)
+handle soft-hyphens on wrapping
+bug: combining chars reduce lengths
--- a/README.md
+++ b/README.md
@ -27,16 +27,16 @@ Features

 ### What works

- Basic browsing: scrolling, follow links, redirections, Web links.
+Common basic browsing features work: go to URL, scrolling, follow links,
+redirections, page encoding.

-### What is planned
+Bebop also provide these neat features:

- Handle more content types.
- Great config options.
- Identity management with temporary and managed certificates.
- Buffers (or tabs if you prefer).
- Home page.
- Bookmarks.
+- History
+- Caching
+- Bookmarks: it's just a text file with bindings.
+
+Check out [this board](BOARD.txt) for what's done and coming next.

 ### What is not planned for now

--- a/bebop/main.py
+++ b/bebop/main.py
@ -1,13 +1,13 @@
 import argparse

-from bebop.browser import Browser
+from bebop.browser.browser import Browser
 from bebop.fs import get_user_data_path
 from bebop.tofu import load_cert_stash, save_cert_stash


 def main():
    argparser = argparse.ArgumentParser()
-    argparser.add_argument("url", default=None)
+    argparser.add_argument("url", nargs="?", default=None)
    args = argparser.parse_args()

    if args.url:
@ -20,7 +20,7 @@ def main():
        user_data_path.mkdir()

    cert_stash_path = user_data_path / "known_hosts.txt"
-    cert_stash = load_cert_stash(cert_stash_path)
+    cert_stash = load_cert_stash(cert_stash_path) or {}
    try:
        Browser(cert_stash).run(start_url=start_url)
    finally:
--- a/bebop/bookmarks.py
+++ b/bebop/bookmarks.py
@ -1,4 +1,3 @@
-import io
 from pathlib import Path

 from bebop.fs import get_user_data_path
@ -9,7 +8,7 @@ TEMPLATE = """\

 Welcome to your bookmark page! This file has been created in "{original_path}" \
 and you can edit it as you wish. New bookmarks will be added on a new \
-line at the end. Always keep an empty line at the end!
+line at the end, so always keep an empty line there!
 """


--- a/bebop/browser/browser.py
+++ b/bebop/browser/browser.py
@ -4,19 +4,22 @@ import curses
 import curses.ascii
 import curses.textpad
 import os
-import webbrowser
+import subprocess
+import tempfile
 from math import inf

-from bebop.bookmarks import get_bookmarks_document, save_bookmark
+from bebop.bookmarks import (
+    get_bookmarks_path, get_bookmarks_document, save_bookmark
+)
 from bebop.colors import ColorPair, init_colors
 from bebop.command_line import CommandLine
 from bebop.history import History
 from bebop.links import Links
 from bebop.mouse import ButtonState
-from bebop.navigation import *
+from bebop.navigation import (
+    get_parent_url, get_root_url, join_url, parse_url, sanitize_url)
 from bebop.page import Page
 from bebop.page_pad import PagePad
-from bebop.protocol import Request, Response


 class Browser:
@ -132,6 +135,8 @@ class Browser:
            self.open_bookmarks()
        elif char == ord("B"):
            self.add_bookmark()
+        elif char == ord("e"):
+            self.edit_page()
        elif curses.ascii.isdigit(char):
            self.handle_digit_input(char)
        elif char == curses.KEY_MOUSE:
@ -251,133 +256,38 @@ class Browser:
        if redirects > 5:
            self.set_status_error(f"Too many redirections ({url}).")
            return
+
        if assume_absolute or not self.current_url:
            parts = parse_url(url, absolute=True)
            join = False
        else:
            parts = parse_url(url)
            join = True
+
        if parts.scheme == "gemini":
+            from bebop.browser.gemini import open_gemini_url
            # If there is no netloc, this is a relative URL.
            if join or base_url:
                url = join_url(base_url or self.current_url, url)
-            self.open_gemini_url(sanitize_url(url), redirects=redirects,
-                                 history=history, use_cache=use_cache)
+            open_gemini_url(
+                self,
+                sanitize_url(url),
+                redirects=redirects,
+                history=history,
+                use_cache=use_cache
+            )
        elif parts.scheme.startswith("http"):
-            self.open_web_url(url)
+            from bebop.browser.web import open_web_url
+            open_web_url(self, url)
        elif parts.scheme == "file":
-            self.open_file(parts.path, history=history)
+            from bebop.browser.file import open_file
+            open_file(self, parts.path, history=history)
        elif parts.scheme == "bebop":
            if parts.netloc == "bookmarks":
                self.open_bookmarks()
        else:
            self.set_status_error(f"Protocol {parts.scheme} not supported.")

-    def open_gemini_url(self, url, redirects=0, history=True, use_cache=True):
-        """Open a Gemini URL and set the formatted response as content.
-
-        After initiating the connection, TODO
-        """
-        self.set_status(f"Loading {url}")
-
-        if use_cache and url in self.cache:
-            self.load_page(self.cache[url])
-            if self.current_url and history:
-                self.history.push(self.current_url)
-            self.current_url = url
-            self.set_status(url)
-            return
-
-        req = Request(url, self.stash)
-        connected = req.connect()
-        if not connected:
-            if req.state == Request.STATE_ERROR_CERT:
-                error = f"Certificate was missing or corrupt ({url})."
-            elif req.state == Request.STATE_UNTRUSTED_CERT:
-                error = f"Certificate has been changed ({url})."
-                # TODO propose the user ways to handle this.
-            elif req.state == Request.STATE_CONNECTION_FAILED:
-                error_details = f": {req.error}" if req.error else "."
-                error = f"Connection failed ({url})" + error_details
-            else:
-                error = f"Connection failed ({url})."
-            self.set_status_error(error)
-            return
-
-        if req.state == Request.STATE_INVALID_CERT:
-            # TODO propose abort / temp trust
-            pass
-        elif req.state == Request.STATE_UNKNOWN_CERT:
-            # TODO propose abort / temp trust / perm trust
-            pass
-        else:
-            pass # TODO
-
-        data = req.proceed()
-        if not data:
-            self.set_status_error(f"Server did not respond in time ({url}).")
-            return
-        response = Response.parse(data)
-        if not response:
-            self.set_status_error(f"Server response parsing failed ({url}).")
-            return
-
-        if response.code == 20:
-            handle_code = self.handle_response_content(response)
-            if handle_code == 0:
-                if self.current_url and history:
-                    self.history.push(self.current_url)
-                self.current_url = url
-                self.cache[url] = self.page_pad.current_page
-                self.set_status(url)
-            elif handle_code == 1:
-                self.set_status(f"Downloaded {url}.")
-        elif response.generic_code == 30 and response.meta:
-            self.open_url(response.meta, base_url=url, redirects=redirects + 1)
-        elif response.generic_code in (40, 50):
-            error = f"Server error: {response.meta or Response.code.name}"
-            self.set_status_error(error)
-        elif response.generic_code == 10:
-            self.handle_input_request(url, response)
-        else:
-            error = f"Unhandled response code {response.code}"
-            self.set_status_error(error)
-
-    def handle_response_content(self, response: Response) -> int:
-        """Handle a response's content from a Gemini server.
-
-        According to the MIME type received or inferred, render or download the
-        response's content.
-
-        Currently only text/gemini content is rendered.
-
-        Arguments:
-        - response: a successful Response.
-
-        Returns:
-        An error code: 0 means a page has been loaded, so any book-keeping such
-        as history management can be applied; 1 means a content has been
-        successfully retrieved but has not been displayed (e.g. non-text
-        content) nor saved as a page; 2 means that the content could not be
-        handled, either due to bogus MIME type or MIME parameters.
-        """
-        mime_type = response.get_mime_type()
-        if mime_type.main_type == "text":
-            if mime_type.sub_type == "gemini":
-                encoding = mime_type.charset
-                try:
-                    text = response.content.decode(encoding, errors="replace")
-                except LookupError:
-                    self.set_status_error("Unknown encoding {encoding}.")
-                    return 2
-                self.load_page(Page.from_gemtext(text))
-                return 0
-            else:
-                pass  # TODO
-        else:
-            pass  # TODO
-        return 1
-
    def load_page(self, page: Page):
        """Load Gemtext data as the current page."""
        old_pad_height = self.page_pad.dim[0]
@ -391,9 +301,11 @@ class Browser:

    def handle_digit_input(self, init_char: int):
        """Focus command-line to select the link ID to follow."""
-        if not self.page_pad or self.page_pad.current_page.links is None:
+        if self.page_pad.current_page is None:
            return
        links = self.page_pad.current_page.links
+        if links is None:
+            return
        err, val = self.command_line.focus_for_link_navigation(init_char, links)
        if err == 0:
            self.open_link(links, val)  # type: ignore
@ -407,17 +319,6 @@ class Browser:
            return
        self.open_url(links[link_id])

-    def handle_input_request(self, from_url: str, response: Response):
-        """Focus command-line to pass input to the server."""
-        if response.meta:
-            self.set_status(f"Input needed: {response.meta}")
-        else:
-            self.set_status("Input needed:")
-        user_input = self.command_line.focus("?")
-        if user_input:
-            url = set_parameter(from_url, user_input)
-            self.open_gemini_url(url)
-
    def handle_mouse(self, mouse_id: int, x: int, y: int, z: int, bstate: int):
        """Handle mouse events.

@ -488,11 +389,7 @@ class Browser:
    def reload_page(self):
        """Reload the page, if one has been previously loaded."""
        if self.current_url:
-            self.open_url(
-                self.current_url,
-                history=False,
-                use_cache=False
-            )
+            self.open_url(self.current_url, history=False, use_cache=False)

    def go_back(self):
        """Go back in history if possible."""
@ -502,37 +399,12 @@ class Browser:
    def go_to_parent_page(self):
        """Go to the parent URL if possible."""
        if self.current_url:
-            self.open_gemini_url(get_parent_url(self.current_url))
+            self.open_url(get_parent_url(self.current_url))

    def go_to_root_page(self):
        """Go to the root URL if possible."""
        if self.current_url:
-            self.open_gemini_url(get_root_url(self.current_url))
-
-    def open_web_url(self, url):
-        """Open a Web URL. Currently relies in Python's webbrowser module."""
-        self.set_status(f"Opening {url}")
-        webbrowser.open_new_tab(url)
-
-    def open_file(self, filepath, encoding="utf-8", history=True):
-        """Open a file and render it.
-
-        This should be used only on Gemtext files or at least text files.
-        Anything else will produce garbage and may crash the program. In the
-        future this should be able to use a different parser according to a MIME
-        type or something.
-        """
-        try:
-            with open(filepath, "rt", encoding=encoding) as f:
-                text = f.read()
-        except (OSError, ValueError) as exc:
-            self.set_status_error(f"Failed to open file: {exc}")
-            return
-        self.load_page(Page.from_gemtext(text))
-        file_url = "file://" + filepath
-        if history:
-            self.history.push(file_url)
-        self.current_url = file_url
+            self.open_url(get_root_url(self.current_url))

    def open_bookmarks(self):
        """Open bookmarks."""
@ -547,7 +419,7 @@ class Browser:
        """Add the current URL as bookmark."""
        if not self.current_url:
            return
-        self.set_status("Title?")
+        self.set_status("Bookmark title?")
        current_title = self.page_pad.current_page.title or ""
        title = self.command_line.focus(">", prefix=current_title)
        if title:
@ -555,3 +427,42 @@ class Browser:
            if title:
                save_bookmark(self.current_url, title)
        self.reset_status()
+
+    def open_external_program(self, command):
+        """Pauses the curses modes to open an external program."""
+        curses.nocbreak()
+        curses.echo()
+        subprocess.run(command)
+        curses.noecho()
+        curses.cbreak()
+        self.refresh_windows()
+
+    def edit_page(self):
+        """Open a text editor to edit the page source.
+
+        For external pages, the source is written in a temporary file, opened in
+        its editor of choice and so it's up to the user to save it where she
+        needs it, if needed. Internal pages, e.g. the bookmarks page, are loaded
+        directly from their location on disk.
+        """
+        command = ["vi"]
+        delete_source_after = False
+
+        special_pages = {
+            "bebop://bookmarks": str(get_bookmarks_path())
+        }
+        if self.current_url in special_pages:
+            source_filename = special_pages[self.current_url]
+        else:
+            if not self.page_pad.current_page:
+                return
+            source = self.page_pad.current_page.source
+            with tempfile.NamedTemporaryFile("wt", delete=False) as source_file:
+                source_file.write(source)
+                source_filename = source_file.name
+            delete_source_after = True
+
+        command.append(source_filename)
+        self.open_external_program(command)
+        if delete_source_after:
+            os.unlink(source_filename)
--- a/bebop/browser/file.py
+++ b/bebop/browser/file.py
@ -0,0 +1,25 @@
+"""Local files browser."""
+
+from bebop.browser.browser import Browser
+from bebop.page import Page
+
+
+def open_file(browser: Browser, filepath: str, encoding="utf-8", history=True):
+    """Open a file and render it.
+
+    This should be used only on Gemtext files or at least text files.
+    Anything else will produce garbage and may crash the program. In the
+    future this should be able to use a different parser according to a MIME
+    type or something.
+    """
+    try:
+        with open(filepath, "rt", encoding=encoding) as f:
+            text = f.read()
+    except (OSError, ValueError) as exc:
+        browser.set_status_error(f"Failed to open file: {exc}")
+        return
+    browser.load_page(Page.from_text(text))
+    file_url = "file://" + filepath
+    if history:
+        browser.history.push(file_url)
+    browser.current_url = file_url
--- a/bebop/browser/gemini.py
+++ b/bebop/browser/gemini.py
@ -0,0 +1,130 @@
+"""Gemini-related features of the browser."""
+
+from bebop.browser.browser import Browser
+from bebop.navigation import set_parameter
+from bebop.page import Page
+from bebop.protocol import Request, Response
+
+
+def open_gemini_url(browser: Browser, url, redirects=0, history=True,
+                    use_cache=True):
+    """Open a Gemini URL and set the formatted response as content.
+
+    After initiating the connection, TODO
+    """
+    browser.set_status(f"Loading {url}")
+
+    if use_cache and url in browser.cache:
+        browser.load_page(browser.cache[url])
+        if browser.current_url and history:
+            browser.history.push(browser.current_url)
+        browser.current_url = url
+        browser.set_status(url)
+        return
+
+    req = Request(url, browser.stash)
+    connected = req.connect()
+    if not connected:
+        if req.state == Request.STATE_ERROR_CERT:
+            error = f"Certificate was missing or corrupt ({url})."
+        elif req.state == Request.STATE_UNTRUSTED_CERT:
+            error = f"Certificate has been changed ({url})."
+            # TODO propose the user ways to handle this.
+        elif req.state == Request.STATE_CONNECTION_FAILED:
+            error_details = f": {req.error}" if req.error else "."
+            error = f"Connection failed ({url})" + error_details
+        else:
+            error = f"Connection failed ({url})."
+        browser.set_status_error(error)
+        return
+
+    if req.state == Request.STATE_INVALID_CERT:
+        # TODO propose abort / temp trust
+        pass
+    elif req.state == Request.STATE_UNKNOWN_CERT:
+        # TODO propose abort / temp trust / perm trust
+        pass
+    else:
+        pass # TODO
+
+    data = req.proceed()
+    if not data:
+        browser.set_status_error(f"Server did not respond in time ({url}).")
+        return
+    response = Response.parse(data)
+    if not response:
+        browser.set_status_error(f"Server response parsing failed ({url}).")
+        return
+
+    if response.code == 20:
+        handle_code = handle_response_content(browser, response)
+        if handle_code == 0:
+            if browser.current_url and history:
+                browser.history.push(browser.current_url)
+            browser.current_url = url
+            browser.cache[url] = browser.page_pad.current_page
+            browser.set_status(url)
+        elif handle_code == 1:
+            browser.set_status(f"Downloaded {url}.")
+    elif response.generic_code == 30 and response.meta:
+        browser.open_url(response.meta, base_url=url, redirects=redirects + 1)
+    elif response.generic_code in (40, 50):
+        error = f"Server error: {response.meta or Response.code.name}"
+        browser.set_status_error(error)
+    elif response.generic_code == 10:
+        handle_input_request(browser, url, response.meta)
+    else:
+        error = f"Unhandled response code {response.code}"
+        browser.set_status_error(error)
+
+
+def handle_response_content(browser: Browser, response: Response) -> int:
+    """Handle a response's content from a Gemini server.
+
+    According to the MIME type received or inferred, render or download the
+    response's content.
+
+    Currently only text content is rendered. For Gemini, the encoding specified
+    in the response is used, if available on the Python distribution. For other
+    text formats, only UTF-8 is attempted.
+
+    Arguments:
+    - response: a successful Response.
+
+    Returns:
+    An error code: 0 means a page has been loaded, so any book-keeping such
+    as history management can be applied; 1 means a content has been
+    successfully retrieved but has not been displayed (e.g. non-text
+    content) nor saved as a page; 2 means that the content could not be
+    handled, either due to bogus MIME type or MIME parameters.
+    """
+    mime_type = response.get_mime_type()
+    if mime_type.main_type == "text":
+        if mime_type.sub_type == "gemini":
+            encoding = mime_type.charset
+            try:
+                text = response.content.decode(encoding, errors="replace")
+            except LookupError:
+                browser.set_status_error("Unknown encoding {encoding}.")
+                return 2
+            browser.load_page(Page.from_gemtext(text))
+            return 0
+        else:
+            text = response.content.decode("utf-8", errors="replace")
+            browser.load_page(Page.from_text(text))
+            return 0
+    else:
+        pass  # TODO
+    return 1
+
+
+def handle_input_request(browser: Browser, from_url: str, message: str =None):
+    """Focus command-line to pass input to the server."""
+    if message:
+        browser.set_status(f"Input needed: {message}")
+    else:
+        browser.set_status("Input needed:")
+    user_input = browser.command_line.focus("?")
+    if user_input:
+        url = set_parameter(from_url, user_input)
+        open_gemini_url(browser, url)
--- a/bebop/browser/web.py
+++ b/bebop/browser/web.py
@ -0,0 +1,11 @@
+"""Ha! You thought there would be a Web browser in there?"""
+
+import webbrowser
+
+from bebop.browser.browser import Browser
+
+
+def open_web_url(browser: Browser, url):
+    """Open a Web URL. Currently relies in Python's webbrowser module."""
+    browser.set_status(f"Opening {url}")
+    webbrowser.open_new_tab(url)
--- a/bebop/gemtext.py
+++ b/bebop/gemtext.py
@ -6,9 +6,12 @@ module. A renderer can then completely abstract the original document.
 """

 import re
+from collections import namedtuple
 from dataclasses import dataclass
 from typing import List

+from bebop.links import Links
+

@dataclass
 class Paragraph:
@ -26,6 +29,7 @@ class Title:
 class Link:
    url: str
    text: str
+    ident: int = 0
    RE = re.compile(r"=>\s*(?P<url>\S+)(\s+(?P<text>.+))?")


@ -47,9 +51,15 @@ class ListItem:
    RE = re.compile(r"\*\s(.*)")


-def parse_gemtext(text: str):
+ParsedGemtext = namedtuple("ParsedGemtext", ("elements", "links", "title"))
+
+
+def parse_gemtext(text: str) -> ParsedGemtext:
    """Parse a string of Gemtext into a list of elements."""
    elements = []
+    links = Links()
+    last_link_id = 0
+    title = ""
    preformatted = None
    for line in text.splitlines():
        line = line.rstrip()
@ -59,14 +69,19 @@ def parse_gemtext(text: str):
        match = Title.RE.match(line)
        if match:
            hashtags, text = match.groups()
-            elements.append(Title(hashtags.count("#"), text))
+            level = hashtags.count("#")
+            elements.append(Title(level, text))
+            if not title and level == 1:
+                title = text
            continue

        match = Link.RE.match(line)
        if match:
            match_dict = match.groupdict()
            url, text = match_dict["url"], match_dict.get("text", "")
-            elements.append(Link(url, text))
+            last_link_id += 1
+            links[last_link_id] = url
+            elements.append(Link(url, text, last_link_id))
            continue

        if line.startswith(Preformatted.FENCE):
@ -99,4 +114,4 @@ def parse_gemtext(text: str):
    if preformatted:
        elements.append(preformatted)

-    return elements
+    return ParsedGemtext(elements, links, title)
--- a/bebop/history.py
+++ b/bebop/history.py
@ -1,5 +1,6 @@
 """History management."""

+
 class History:
    """Basic browsing history manager."""

--- a/bebop/links.py
+++ b/bebop/links.py
@ -13,11 +13,3 @@ class Links(dict):
            link_id for link_id, url in self.items()
            if str(link_id).startswith(digits)
        ]
-
-    @staticmethod
-    def from_metalines(metalines: List):
-        links = Links()
-        for meta, _ in metalines:
-            if "link_id" in meta and "url" in meta:
-                links[meta["link_id"]] = meta["url"]
-        return links
--- a/bebop/metalines.py
+++ b/bebop/metalines.py
@ -0,0 +1,222 @@
+"""Metalines generation.
+
+In Bebop we use a list of elements as produced by our parser. These elements are
+converted into so-called "metalines", which are the text lines as they will be
+displayed, along with associated meta-data such as its type or a link's URL.
+"""
+
+import string
+from enum import IntEnum
+from typing import List
+
+from bebop.gemtext import (
+    Blockquote, Link, ListItem, Paragraph, Preformatted, Title)
+
+
+SPLIT_CHARS = " \t-"
+JOIN_CHAR = "-"
+LIST_ITEM_MARK = "• "
+
+
+class LineType(IntEnum):
+    """Type of line.
+
+    Keep lines type along with the content for later rendering.
+    Title type values match the title level to avoid looking it up.
+    """
+    NONE = 0
+    TITLE_1 = 1
+    TITLE_2 = 2
+    TITLE_3 = 3
+    PARAGRAPH = 4
+    LINK = 5
+    PREFORMATTED = 6
+    BLOCKQUOTE = 7
+    LIST_ITEM = 8
+
+
+def generate_metalines(elements, width):
+    """Format elements into a list of lines with metadata.
+
+    The returned list ("metalines") are tuples (meta, line), meta being a
+    dict of metadata and line a text line to display. Currently the only
+    metadata keys used are:
+    - type: one of the Renderer.TYPE constants.
+    - url: only for links, the URL the link on this line refers to. Note
+      that this key is present only for the first line of the link, i.e.
+      long link descriptions wrapped on multiple lines will not have a this
+      key except for the first line.
+    - link_id: only alongside "url" key, ID generated for this link.
+    """
+    metalines = []
+    context = {"width": width}
+    separator = ({"type": LineType.NONE}, "")
+    has_margins = False
+    thin_type = None
+    for index, element in enumerate(elements):
+        previous_had_margins = has_margins
+        last_thin_type = thin_type
+        has_margins = False
+        thin_type = None
+        if isinstance(element, Title):
+            element_metalines = format_title(element, context)
+            has_margins = True
+        elif isinstance(element, Paragraph):
+            element_metalines = format_paragraph(element, context)
+            has_margins = True
+        elif isinstance(element, Link):
+            element_metalines = format_link(element, context)
+            thin_type = LineType.LINK
+        elif isinstance(element, Preformatted):
+            element_metalines = format_preformatted(element, context)
+            has_margins = True
+        elif isinstance(element, Blockquote):
+            element_metalines = format_blockquote(element, context)
+            has_margins = True
+        elif isinstance(element, ListItem):
+            element_metalines = format_list_item(element, context)
+            thin_type = LineType.LIST_ITEM
+        else:
+            continue
+        # If current element requires margins and is not the first elements,
+        # separate from previous element. Also do it if the current element does
+        # not require margins but follows an element that required it (e.g. link
+        # after a paragraph). Also do it if both the current and previous
+        # elements do not require margins but differ in type.
+        if (
+            (has_margins and index > 0)
+            or (not has_margins and previous_had_margins)
+            or (not has_margins and thin_type != last_thin_type)
+        ):
+            metalines.append(separator)
+        # Append the element metalines now.
+        metalines += element_metalines
+    return metalines
+
+
+def generate_dumb_metalines(lines):
+    """Generate dumb metalines: all lines are given the PARAGRAPH line type."""
+    return [({"type": LineType.PARAGRAPH}, line) for line in lines]
+
+
+def format_title(title: Title, context: dict):
+    """Return metalines for this title."""
+    width = context["width"]
+    if title.level == 1:
+        wrapped = wrap_words(title.text, width)
+        line_template = f"{{:^{width}}}"
+        lines = (line_template.format(line) for line in wrapped)
+    else:
+        if title.level == 2:
+            lines = wrap_words(title.text, width, indent=2)
+        else:
+            lines = wrap_words(title.text, width)
+    # Title levels match the type constants of titles.
+    return [({"type": LineType(title.level)}, line) for line in lines]
+
+
+def format_paragraph(paragraph: Paragraph, context: dict):
+    """Return metalines for this paragraph."""
+    lines = wrap_words(paragraph.text, context["width"])
+    return [({"type": LineType.PARAGRAPH}, line) for line in lines]
+
+
+def format_link(link: Link, context: dict):
+    """Return metalines for this link."""
+    # Get a new link and build the "[id]" anchor.
+    link_anchor = f"[{link.ident}] "
+    link_text = link.text or link.url
+    # Wrap lines, indented by the link anchor length.
+    lines = wrap_words(link_text, context["width"], indent=len(link_anchor))
+    first_line_meta = {
+        "type": LineType.LINK,
+        "url": link.url,
+        "link_id": link.ident
+    }
+    # Replace first line indentation with the anchor.
+    first_line_text = link_anchor + lines[0][len(link_anchor):]
+    first_line = [(first_line_meta, first_line_text)]
+    other_lines = [({"type": LineType.LINK}, line) for line in lines[1:]]
+    return first_line + other_lines
+
+
+def format_preformatted(preformatted: Preformatted, context: dict):
+    """Return metalines for this preformatted block."""
+    return [
+        ({"type": LineType.PREFORMATTED}, line)
+        for line in preformatted.lines
+    ]
+
+
+def format_blockquote(blockquote: Blockquote, context: dict):
+    """Return metalines for this blockquote."""
+    lines = wrap_words(blockquote.text, context["width"], indent=2)
+    return [({"type": LineType.BLOCKQUOTE}, line) for line in lines]
+
+
+def format_list_item(item: ListItem, context: dict):
+    """Return metalines for this list item."""
+    indent = len(LIST_ITEM_MARK)
+    lines = wrap_words(item.text, context["width"], indent=indent)
+    first_line = LIST_ITEM_MARK + lines[0][indent:]
+    lines[0] = first_line
+    return [({"type": LineType.LIST_ITEM}, line) for line in lines]
+
+
+def wrap_words(text: str, width: int, indent: int =0) -> List[str]:
+    """Wrap a text in several lines according to the renderer's width."""
+    lines = []
+    line = " " * indent
+    words = _explode_words(text)
+    for word in words:
+        line_len, word_len = len(line), len(word)
+        # If adding the new word would overflow the line, use a new line.
+        if line_len + word_len > width:
+            # Push only non-empty lines.
+            if line_len > 0:
+                lines.append(line)
+                line = " " * indent
+            # Force split words that are longer than the width.
+            while word_len > width:
+                split_offset = width - 1 - indent
+                word_line = " " * indent + word[:split_offset] + JOIN_CHAR
+                lines.append(word_line)
+                word = word[split_offset:]
+                word_len = len(word)
+            word = word.lstrip()
+        line += word
+    if line:
+        lines.append(line)
+    return lines
+
+
+def _explode_words(text: str) -> List[str]:
+    """Split a string into a list of words."""
+    words = []
+    pos = 0
+    while True:
+        sep, sep_index = _find_next_sep(text[pos:])
+        if not sep:
+            words.append(text[pos:])
+            return words
+        word = text[pos : pos + sep_index]
+        # If the separator is not a space char, append it to the word.
+        if sep in string.whitespace:
+            words.append(word)
+            words.append(sep)
+        else:
+            words.append(word + sep)
+        pos += sep_index + 1
+
+
+def _find_next_sep(text: str):
+    """Find the next separator index and return both the separator and index."""
+    indices = []
+    for sep in SPLIT_CHARS:
+        try:
+            indices.append((sep, text.index(sep)))
+        except ValueError:
+            pass
+    if not indices:
+        return ("", 0)
+    return min(indices, key=lambda e: e[1])
--- a/bebop/page.py
+++ b/bebop/page.py
@ -1,13 +1,24 @@
 from dataclasses import dataclass, field

 from bebop.gemtext import parse_gemtext, Title
-from bebop.rendering import generate_metalines
+from bebop.metalines import generate_dumb_metalines, generate_metalines
 from bebop.links import Links


@dataclass
 class Page:
-    """Page-related data."""
+    """Page-related data.
+
+    Attributes:
+    - source: str used to create the page.
+    - metalines: lines ready to be rendered.
+    - links: Links instance, mapping IDs to links on the page; this data is
+      redundant as the links' URLs/IDs are already available in the
+      corresponding metalines, it is meant to be used as a quick map for link ID
+      lookup and disambiguation.
+    - title: optional page title.
+    """
+    source: str
    metalines: list = field(default_factory=list)
    links: Links = field(default_factory=Links)
    title: str = ""
@ -15,13 +26,12 @@ class Page:
    @staticmethod
    def from_gemtext(gemtext: str):
        """Produce a Page from a Gemtext file or string."""
-        elements = parse_gemtext(gemtext)
+        elements, links, title = parse_gemtext(gemtext)
        metalines = generate_metalines(elements, 80)
-        links = Links.from_metalines(metalines)
-        # TODO this is horrible; merge parsing with page generation directly
-        title = ""
-        for element in elements:
-            if isinstance(element, Title) and element.level == 1:
-                title = element.text
-                break
-        return Page(metalines, links, title)
+        return Page(gemtext, metalines, links, title)
+
+    @staticmethod
+    def from_text(text: str):
+        """Produce a Page for a text string."""
+        metalines = generate_dumb_metalines(text.splitlines())
+        return Page(text, metalines)
--- a/bebop/rendering.py
+++ b/bebop/rendering.py
@ -1,223 +1,9 @@
-"""Rendering Gemtext in curses.
-
-In Bebop we use a list of elements as produced by our parser. These elements are
-rendered into so-called "metalines", which are the text lines as they will be
-displayed, along with associated meta-data such as its type or a link's URL.
-"""
+"""Rendering Gemtext in curses."""

 import curses
-import string
-from enum import IntEnum
-from typing import List

 from bebop.colors import ColorPair
-from bebop.gemtext import (Blockquote, Link, ListItem, Paragraph, Preformatted,
-    Title)
-
-
-SPLIT_CHARS = " \t-"
-JOIN_CHAR = "-"
-LIST_ITEM_MARK = "• "
-
-
-class LineType(IntEnum):
-    """Type of line.
-
-    Keep lines type along with the content for later rendering.
-    Title type values match the title level to avoid looking it up.
-    """
-    NONE = 0
-    TITLE_1 = 1
-    TITLE_2 = 2
-    TITLE_3 = 3
-    PARAGRAPH = 4
-    LINK = 5
-    PREFORMATTED = 6
-    BLOCKQUOTE = 7
-    LIST_ITEM = 8
-
-
-def generate_metalines(elements, width):
-    """Format elements into a list of lines with metadata.
-
-    The returned list ("metalines") are tuples (meta, line), meta being a
-    dict of metadata and line a text line to display. Currently the only
-    metadata keys used are:
-    - type: one of the Renderer.TYPE constants.
-    - url: only for links, the URL the link on this line refers to. Note
-      that this key is present only for the first line of the link, i.e.
-      long link descriptions wrapped on multiple lines will not have a this
-      key except for the first line.
-    - link_id: only alongside "url" key, ID generated for this link.
-    """
-    metalines = []
-    context = {"last_link_id": 0, "width": width}
-    separator = ({"type": LineType.NONE}, "")
-    has_margins = False
-    thin_type = None
-    for index, element in enumerate(elements):
-        previous_had_margins = has_margins
-        last_thin_type = thin_type
-        has_margins = False
-        thin_type = None
-        if isinstance(element, Title):
-            element_metalines = format_title(element, context)
-            has_margins = True
-        elif isinstance(element, Paragraph):
-            element_metalines = format_paragraph(element, context)
-            has_margins = True
-        elif isinstance(element, Link):
-            element_metalines = format_link(element, context)
-            thin_type = LineType.LINK
-        elif isinstance(element, Preformatted):
-            element_metalines = format_preformatted(element, context)
-            has_margins = True
-        elif isinstance(element, Blockquote):
-            element_metalines = format_blockquote(element, context)
-            has_margins = True
-        elif isinstance(element, ListItem):
-            element_metalines = format_list_item(element, context)
-            thin_type = LineType.LIST_ITEM
-        else:
-            continue
-        # If current element requires margins and is not the first elements,
-        # separate from previous element. Also do it if the current element does
-        # not require margins but follows an element that required it (e.g. link
-        # after a paragraph). Also do it if both the current and previous
-        # elements do not require margins but differ in type.
-        if (
-            (has_margins and index > 0)
-            or (not has_margins and previous_had_margins)
-            or (not has_margins and thin_type != last_thin_type)
-        ):
-            metalines.append(separator)
-        # Append the element metalines now.
-        metalines += element_metalines
-    return metalines
-
-
-def format_title(title: Title, context: dict):
-    """Return metalines for this title."""
-    if title.level == 1:
-        wrapped = wrap_words(title.text, context["width"])
-        line_template = f"{{:^{context['width']}}}"
-        lines = (line_template.format(line) for line in wrapped)
-    else:
-        if title.level == 2:
-            lines = wrap_words(title.text, context["width"], indent=2)
-        else:
-            lines = wrap_words(title.text, context["width"])
-    # Title levels match the type constants of titles.
-    return [({"type": LineType(title.level)}, line) for line in lines]
-
-
-def format_paragraph(paragraph: Paragraph, context: dict):
-    """Return metalines for this paragraph."""
-    lines = wrap_words(paragraph.text, context["width"])
-    return [({"type": LineType.PARAGRAPH}, line) for line in lines]
-
-
-def format_link(link: Link, context: dict):
-    """Return metalines for this link."""
-    # Get a new link and build the "[id]" anchor.
-    link_id = context["last_link_id"] + 1
-    context["last_link_id"] = link_id
-    link_text = link.text or link.url
-    link_anchor = f"[{link_id}] "
-    # Wrap lines, indented by the link anchor length.
-    lines = wrap_words(link_text, context["width"], indent=len(link_anchor))
-    first_line_meta = {
-        "type": LineType.LINK,
-        "url": link.url,
-        "link_id": link_id
-    }
-    # Replace first line indentation with the anchor.
-    first_line_text = link_anchor + lines[0][len(link_anchor):]
-    first_line = [(first_line_meta, first_line_text)]
-    other_lines = [({"type": LineType.LINK}, line) for line in lines[1:]]
-    return first_line + other_lines
-
-
-def format_preformatted(preformatted: Preformatted, context: dict):
-    """Return metalines for this preformatted block."""
-    return [
-        ({"type": LineType.PREFORMATTED}, line)
-        for line in preformatted.lines
-    ]
-
-
-def format_blockquote(blockquote: Blockquote, context: dict):
-    """Return metalines for this blockquote."""
-    lines = wrap_words(blockquote.text, context["width"])
-    return [({"type": LineType.BLOCKQUOTE}, line) for line in lines]
-
-
-def format_list_item(item: ListItem, context: dict):
-    """Return metalines for this list item."""
-    indent = len(LIST_ITEM_MARK)
-    lines = wrap_words(item.text, context["width"], indent=indent)
-    first_line = LIST_ITEM_MARK + lines[0][indent:]
-    lines[0] = first_line
-    return [({"type": LineType.LIST_ITEM}, line) for line in lines]
-
-
-def wrap_words(text: str, width: int, indent: int =0) -> List[str]:
-    """Wrap a text in several lines according to the renderer's width."""
-    lines = []
-    line = " " * indent
-    words = _explode_words(text)
-    for word in words:
-        line_len, word_len = len(line), len(word)
-        # If adding the new word would overflow the line, use a new line.
-        if line_len + word_len > width:
-            # Push only non-empty lines.
-            if line_len > 0:
-                lines.append(line)
-                line = " " * indent
-            # Force split words that are longer than the width.
-            while word_len > width:
-                split_offset = width - 1 - indent
-                word_line = " " * indent + word[:split_offset] + JOIN_CHAR
-                lines.append(word_line)
-                word = word[split_offset:]
-                word_len = len(word)
-            word = word.lstrip()
-        line += word
-    if line:
-        lines.append(line)
-    return lines
-
-
-def _explode_words(text: str) -> List[str]:
-    """Split a string into a list of words."""
-    words = []
-    pos = 0
-    while True:
-        sep, sep_index = _find_next_sep(text[pos:])
-        if not sep:
-            words.append(text[pos:])
-            return words
-        word = text[pos : pos + sep_index]
-        # If the separator is not a space char, append it to the word.
-        if sep in string.whitespace:
-            words.append(word)
-            words.append(sep)
-        else:
-            words.append(word + sep)
-        pos += sep_index + 1
-
-
-def _find_next_sep(text: str):
-    """Find the next separator index and return both the separator and index."""
-    indices = []
-    for sep in SPLIT_CHARS:
-        try:
-            indices.append((sep, text.index(sep)))
-        except ValueError:
-            pass
-    if not indices:
-        return ("", 0)
-    return min(indices, key=lambda e: e[1])
+from bebop.metalines import LineType


 def render_lines(metalines, window, max_width):
Author	SHA1	Message	Date
dece	f3a3a36039	browser: render all "text/*" Gemini responses	2021-04-16 19:56:56 +02:00
dece	347b5a81cc	browser: open editor to view/edit sources & pages	2021-04-16 19:30:33 +02:00
dece	bce65a1472	browser: minor fixes	2021-04-16 19:30:14 +02:00
dece	6bc4dbcc5d	metalines: split contents from rendering module	2021-03-28 18:57:23 +02:00
dece	8aee7fdfba	gemtext: add links/title to parsing result parse_gemtext used to return only the element list, requiring subsequent loops to find a title or collect links; now it's all done at the same time!	2021-03-28 18:55:52 +02:00
dece	1f938fd2af	clean and fix minor issues	2021-03-28 18:28:54 +02:00
dece	bd7cfce520	browser: split in several files	2021-03-28 18:28:35 +02:00