gemtext: add links/title to parsing result

parse_gemtext used to return only the element list, requiring subsequent
loops to find a title or collect links; now it's all done at the same
time!
This commit is contained in:
dece 2021-03-28 18:55:52 +02:00
parent 1f938fd2af
commit 8aee7fdfba
4 changed files with 31 additions and 22 deletions

View file

@ -6,9 +6,12 @@ module. A renderer can then completely abstract the original document.
""" """
import re import re
from collections import namedtuple
from dataclasses import dataclass from dataclasses import dataclass
from typing import List from typing import List
from bebop.links import Links
@dataclass @dataclass
class Paragraph: class Paragraph:
@ -26,6 +29,7 @@ class Title:
class Link: class Link:
url: str url: str
text: str text: str
ident: int = 0
RE = re.compile(r"=>\s*(?P<url>\S+)(\s+(?P<text>.+))?") RE = re.compile(r"=>\s*(?P<url>\S+)(\s+(?P<text>.+))?")
@ -47,9 +51,15 @@ class ListItem:
RE = re.compile(r"\*\s(.*)") RE = re.compile(r"\*\s(.*)")
def parse_gemtext(text: str): ParsedGemtext = namedtuple("ParsedGemtext", ("elements", "links", "title"))
def parse_gemtext(text: str) -> ParsedGemtext:
"""Parse a string of Gemtext into a list of elements.""" """Parse a string of Gemtext into a list of elements."""
elements = [] elements = []
links = Links
last_link_id = 0
title = ""
preformatted = None preformatted = None
for line in text.splitlines(): for line in text.splitlines():
line = line.rstrip() line = line.rstrip()
@ -59,14 +69,18 @@ def parse_gemtext(text: str):
match = Title.RE.match(line) match = Title.RE.match(line)
if match: if match:
hashtags, text = match.groups() hashtags, text = match.groups()
elements.append(Title(hashtags.count("#"), text)) level = hashtags.count("#")
elements.append(Title(level, text))
if not title and level == 1:
title = text
continue continue
match = Link.RE.match(line) match = Link.RE.match(line)
if match: if match:
match_dict = match.groupdict() match_dict = match.groupdict()
url, text = match_dict["url"], match_dict.get("text", "") url, text = match_dict["url"], match_dict.get("text", "")
elements.append(Link(url, text)) last_link_id += 1
elements.append(Link(url, text, last_link_id))
continue continue
if line.startswith(Preformatted.FENCE): if line.startswith(Preformatted.FENCE):
@ -99,4 +113,4 @@ def parse_gemtext(text: str):
if preformatted: if preformatted:
elements.append(preformatted) elements.append(preformatted)
return elements return ParsedGemtext(elements, links, title)

View file

@ -1,5 +1,6 @@
"""History management.""" """History management."""
class History: class History:
"""Basic browsing history manager.""" """Basic browsing history manager."""

View file

@ -13,11 +13,3 @@ class Links(dict):
link_id for link_id, url in self.items() link_id for link_id, url in self.items()
if str(link_id).startswith(digits) if str(link_id).startswith(digits)
] ]
@staticmethod
def from_metalines(metalines: List):
links = Links()
for meta, _ in metalines:
if "link_id" in meta and "url" in meta:
links[meta["link_id"]] = meta["url"]
return links

View file

@ -1,13 +1,22 @@
from dataclasses import dataclass, field from dataclasses import dataclass, field
from bebop.gemtext import parse_gemtext, Title from bebop.gemtext import parse_gemtext, Title
from bebop.rendering import generate_metalines from bebop.metalines import generate_metalines
from bebop.links import Links from bebop.links import Links
@dataclass @dataclass
class Page: class Page:
"""Page-related data.""" """Page-related data.
Attributes:
- metalines: lines ready to be rendered.
- links: Links instance, mapping IDs to links on the page; this data is
redundant as the links' URLs/IDs are already available in the
corresponding metalines, it is meant to be used as a quick map for link ID
lookup and disambiguation.
- title: optional page title.
"""
metalines: list = field(default_factory=list) metalines: list = field(default_factory=list)
links: Links = field(default_factory=Links) links: Links = field(default_factory=Links)
title: str = "" title: str = ""
@ -15,13 +24,6 @@ class Page:
@staticmethod @staticmethod
def from_gemtext(gemtext: str): def from_gemtext(gemtext: str):
"""Produce a Page from a Gemtext file or string.""" """Produce a Page from a Gemtext file or string."""
elements = parse_gemtext(gemtext) elements, links, title = parse_gemtext(gemtext)
metalines = generate_metalines(elements, 80) metalines = generate_metalines(elements, 80)
links = Links.from_metalines(metalines)
# TODO this is horrible; merge parsing with page generation directly
title = ""
for element in elements:
if isinstance(element, Title) and element.level == 1:
title = element.text
break
return Page(metalines, links, title) return Page(metalines, links, title)