gemtext: add links/title to parsing result
parse_gemtext used to return only the element list, requiring subsequent loops to find a title or collect links; now it's all done at the same time!
This commit is contained in:
parent
1f938fd2af
commit
8aee7fdfba
|
@ -6,9 +6,12 @@ module. A renderer can then completely abstract the original document.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from collections import namedtuple
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
from bebop.links import Links
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Paragraph:
|
class Paragraph:
|
||||||
|
@ -26,6 +29,7 @@ class Title:
|
||||||
class Link:
|
class Link:
|
||||||
url: str
|
url: str
|
||||||
text: str
|
text: str
|
||||||
|
ident: int = 0
|
||||||
RE = re.compile(r"=>\s*(?P<url>\S+)(\s+(?P<text>.+))?")
|
RE = re.compile(r"=>\s*(?P<url>\S+)(\s+(?P<text>.+))?")
|
||||||
|
|
||||||
|
|
||||||
|
@ -47,9 +51,15 @@ class ListItem:
|
||||||
RE = re.compile(r"\*\s(.*)")
|
RE = re.compile(r"\*\s(.*)")
|
||||||
|
|
||||||
|
|
||||||
def parse_gemtext(text: str):
|
ParsedGemtext = namedtuple("ParsedGemtext", ("elements", "links", "title"))
|
||||||
|
|
||||||
|
|
||||||
|
def parse_gemtext(text: str) -> ParsedGemtext:
|
||||||
"""Parse a string of Gemtext into a list of elements."""
|
"""Parse a string of Gemtext into a list of elements."""
|
||||||
elements = []
|
elements = []
|
||||||
|
links = Links
|
||||||
|
last_link_id = 0
|
||||||
|
title = ""
|
||||||
preformatted = None
|
preformatted = None
|
||||||
for line in text.splitlines():
|
for line in text.splitlines():
|
||||||
line = line.rstrip()
|
line = line.rstrip()
|
||||||
|
@ -59,14 +69,18 @@ def parse_gemtext(text: str):
|
||||||
match = Title.RE.match(line)
|
match = Title.RE.match(line)
|
||||||
if match:
|
if match:
|
||||||
hashtags, text = match.groups()
|
hashtags, text = match.groups()
|
||||||
elements.append(Title(hashtags.count("#"), text))
|
level = hashtags.count("#")
|
||||||
|
elements.append(Title(level, text))
|
||||||
|
if not title and level == 1:
|
||||||
|
title = text
|
||||||
continue
|
continue
|
||||||
|
|
||||||
match = Link.RE.match(line)
|
match = Link.RE.match(line)
|
||||||
if match:
|
if match:
|
||||||
match_dict = match.groupdict()
|
match_dict = match.groupdict()
|
||||||
url, text = match_dict["url"], match_dict.get("text", "")
|
url, text = match_dict["url"], match_dict.get("text", "")
|
||||||
elements.append(Link(url, text))
|
last_link_id += 1
|
||||||
|
elements.append(Link(url, text, last_link_id))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if line.startswith(Preformatted.FENCE):
|
if line.startswith(Preformatted.FENCE):
|
||||||
|
@ -99,4 +113,4 @@ def parse_gemtext(text: str):
|
||||||
if preformatted:
|
if preformatted:
|
||||||
elements.append(preformatted)
|
elements.append(preformatted)
|
||||||
|
|
||||||
return elements
|
return ParsedGemtext(elements, links, title)
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
"""History management."""
|
"""History management."""
|
||||||
|
|
||||||
|
|
||||||
class History:
|
class History:
|
||||||
"""Basic browsing history manager."""
|
"""Basic browsing history manager."""
|
||||||
|
|
||||||
|
|
|
@ -13,11 +13,3 @@ class Links(dict):
|
||||||
link_id for link_id, url in self.items()
|
link_id for link_id, url in self.items()
|
||||||
if str(link_id).startswith(digits)
|
if str(link_id).startswith(digits)
|
||||||
]
|
]
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def from_metalines(metalines: List):
|
|
||||||
links = Links()
|
|
||||||
for meta, _ in metalines:
|
|
||||||
if "link_id" in meta and "url" in meta:
|
|
||||||
links[meta["link_id"]] = meta["url"]
|
|
||||||
return links
|
|
||||||
|
|
|
@ -1,13 +1,22 @@
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
from bebop.gemtext import parse_gemtext, Title
|
from bebop.gemtext import parse_gemtext, Title
|
||||||
from bebop.rendering import generate_metalines
|
from bebop.metalines import generate_metalines
|
||||||
from bebop.links import Links
|
from bebop.links import Links
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Page:
|
class Page:
|
||||||
"""Page-related data."""
|
"""Page-related data.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
- metalines: lines ready to be rendered.
|
||||||
|
- links: Links instance, mapping IDs to links on the page; this data is
|
||||||
|
redundant as the links' URLs/IDs are already available in the
|
||||||
|
corresponding metalines, it is meant to be used as a quick map for link ID
|
||||||
|
lookup and disambiguation.
|
||||||
|
- title: optional page title.
|
||||||
|
"""
|
||||||
metalines: list = field(default_factory=list)
|
metalines: list = field(default_factory=list)
|
||||||
links: Links = field(default_factory=Links)
|
links: Links = field(default_factory=Links)
|
||||||
title: str = ""
|
title: str = ""
|
||||||
|
@ -15,13 +24,6 @@ class Page:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def from_gemtext(gemtext: str):
|
def from_gemtext(gemtext: str):
|
||||||
"""Produce a Page from a Gemtext file or string."""
|
"""Produce a Page from a Gemtext file or string."""
|
||||||
elements = parse_gemtext(gemtext)
|
elements, links, title = parse_gemtext(gemtext)
|
||||||
metalines = generate_metalines(elements, 80)
|
metalines = generate_metalines(elements, 80)
|
||||||
links = Links.from_metalines(metalines)
|
|
||||||
# TODO this is horrible; merge parsing with page generation directly
|
|
||||||
title = ""
|
|
||||||
for element in elements:
|
|
||||||
if isinstance(element, Title) and element.level == 1:
|
|
||||||
title = element.text
|
|
||||||
break
|
|
||||||
return Page(metalines, links, title)
|
return Page(metalines, links, title)
|
||||||
|
|
Reference in a new issue