gemtext: add links/title to parsing result

parse_gemtext used to return only the element list, requiring subsequent
loops to find a title or collect links; now it's all done at the same
time!
This commit is contained in:
dece 2021-03-28 18:55:52 +02:00
parent 1f938fd2af
commit 8aee7fdfba
4 changed files with 31 additions and 22 deletions

View file

@ -6,9 +6,12 @@ module. A renderer can then completely abstract the original document.
"""
import re
from collections import namedtuple
from dataclasses import dataclass
from typing import List
from bebop.links import Links
@dataclass
class Paragraph:
@ -26,6 +29,7 @@ class Title:
class Link:
url: str
text: str
ident: int = 0
RE = re.compile(r"=>\s*(?P<url>\S+)(\s+(?P<text>.+))?")
@ -47,9 +51,15 @@ class ListItem:
RE = re.compile(r"\*\s(.*)")
def parse_gemtext(text: str):
ParsedGemtext = namedtuple("ParsedGemtext", ("elements", "links", "title"))
def parse_gemtext(text: str) -> ParsedGemtext:
"""Parse a string of Gemtext into a list of elements."""
elements = []
links = Links
last_link_id = 0
title = ""
preformatted = None
for line in text.splitlines():
line = line.rstrip()
@ -59,14 +69,18 @@ def parse_gemtext(text: str):
match = Title.RE.match(line)
if match:
hashtags, text = match.groups()
elements.append(Title(hashtags.count("#"), text))
level = hashtags.count("#")
elements.append(Title(level, text))
if not title and level == 1:
title = text
continue
match = Link.RE.match(line)
if match:
match_dict = match.groupdict()
url, text = match_dict["url"], match_dict.get("text", "")
elements.append(Link(url, text))
last_link_id += 1
elements.append(Link(url, text, last_link_id))
continue
if line.startswith(Preformatted.FENCE):
@ -99,4 +113,4 @@ def parse_gemtext(text: str):
if preformatted:
elements.append(preformatted)
return elements
return ParsedGemtext(elements, links, title)

View file

@ -1,5 +1,6 @@
"""History management."""
class History:
"""Basic browsing history manager."""

View file

@ -13,11 +13,3 @@ class Links(dict):
link_id for link_id, url in self.items()
if str(link_id).startswith(digits)
]
@staticmethod
def from_metalines(metalines: List):
links = Links()
for meta, _ in metalines:
if "link_id" in meta and "url" in meta:
links[meta["link_id"]] = meta["url"]
return links

View file

@ -1,13 +1,22 @@
from dataclasses import dataclass, field
from bebop.gemtext import parse_gemtext, Title
from bebop.rendering import generate_metalines
from bebop.metalines import generate_metalines
from bebop.links import Links
@dataclass
class Page:
"""Page-related data."""
"""Page-related data.
Attributes:
- metalines: lines ready to be rendered.
- links: Links instance, mapping IDs to links on the page; this data is
redundant as the links' URLs/IDs are already available in the
corresponding metalines, it is meant to be used as a quick map for link ID
lookup and disambiguation.
- title: optional page title.
"""
metalines: list = field(default_factory=list)
links: Links = field(default_factory=Links)
title: str = ""
@ -15,13 +24,6 @@ class Page:
@staticmethod
def from_gemtext(gemtext: str):
"""Produce a Page from a Gemtext file or string."""
elements = parse_gemtext(gemtext)
elements, links, title = parse_gemtext(gemtext)
metalines = generate_metalines(elements, 80)
links = Links.from_metalines(metalines)
# TODO this is horrible; merge parsing with page generation directly
title = ""
for element in elements:
if isinstance(element, Title) and element.level == 1:
title = element.text
break
return Page(metalines, links, title)