2021-02-12 19:01:42 +01:00
|
|
|
import re
|
|
|
|
import typing
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class Paragraph:
|
|
|
|
text: str
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class Title:
|
|
|
|
level: int
|
|
|
|
text: str
|
|
|
|
RE = re.compile(r"(#{1,3})\s+(.+)")
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class Link:
|
|
|
|
url: str
|
|
|
|
text: str
|
|
|
|
RE = re.compile(r"=>\s*(?P<url>\S+)(\s+(?P<text>.+))?")
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class Preformatted:
|
|
|
|
lines: typing.List[str]
|
|
|
|
FENCE = "```"
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class Blockquote:
|
|
|
|
text: str
|
|
|
|
RE = re.compile(r">\s*(.*)")
|
|
|
|
|
|
|
|
|
2021-02-16 20:23:44 +01:00
|
|
|
@dataclass
|
|
|
|
class ListItem:
|
|
|
|
text: str
|
|
|
|
RE = re.compile(r"\*\s(.*)")
|
|
|
|
|
|
|
|
|
2021-02-12 19:01:42 +01:00
|
|
|
def parse_gemtext(data):
|
|
|
|
"""Parse UTF-8 encoded Gemtext as a list of elements."""
|
|
|
|
text = data.decode(encoding="utf8", errors="ignore")
|
|
|
|
elements = []
|
|
|
|
preformatted = None
|
|
|
|
for line in text.splitlines():
|
|
|
|
line = line.rstrip()
|
|
|
|
if not line:
|
|
|
|
continue
|
|
|
|
|
|
|
|
match = Title.RE.match(line)
|
|
|
|
if match:
|
|
|
|
hashtags, text = match.groups()
|
|
|
|
elements.append(Title(hashtags.count("#"), text))
|
|
|
|
continue
|
|
|
|
|
|
|
|
match = Link.RE.match(line)
|
|
|
|
if match:
|
|
|
|
match_dict = match.groupdict()
|
|
|
|
url, text = match_dict["url"], match_dict.get("text", "")
|
|
|
|
elements.append(Link(url, text))
|
|
|
|
continue
|
|
|
|
|
2021-02-15 18:51:07 +01:00
|
|
|
if line.startswith(Preformatted.FENCE):
|
2021-02-12 19:01:42 +01:00
|
|
|
if preformatted:
|
|
|
|
elements.append(preformatted)
|
|
|
|
preformatted = None
|
|
|
|
else:
|
|
|
|
preformatted = Preformatted([])
|
|
|
|
continue
|
|
|
|
|
|
|
|
match = Blockquote.RE.match(line)
|
|
|
|
if match:
|
|
|
|
text = match.groups()[0]
|
|
|
|
elements.append(Blockquote(text))
|
|
|
|
continue
|
|
|
|
|
2021-02-16 20:23:44 +01:00
|
|
|
match = ListItem.RE.match(line)
|
|
|
|
if match:
|
|
|
|
text = match.groups()[0]
|
|
|
|
elements.append(ListItem(text))
|
|
|
|
continue
|
|
|
|
|
2021-02-12 19:01:42 +01:00
|
|
|
if preformatted:
|
|
|
|
preformatted.lines.append(line)
|
|
|
|
else:
|
|
|
|
elements.append(Paragraph(line))
|
|
|
|
|
|
|
|
return elements
|