tlfi: add lookup with suggestions, and definitions

This commit is contained in:
dece 2021-11-06 16:42:04 +01:00
parent f83c6085da
commit 5737895738

115
tlfi.py Normal file
View file

@ -0,0 +1,115 @@
#!/usr/bin/env python3
"""TLFi command-line interface.
If Blessings is installed on your system, you will get nice formatting and
maybe colors!
"""
import argparse
import difflib
import re
import unicodedata
from pathlib import Path
from bs4 import BeautifulSoup, NavigableString
try:
from blessings import Terminal
t = Terminal()
except ImportError:
class DummyTerminal:
def __getattr__(self, _):
return ""
t = DummyTerminal()
def main():
ap = argparse.ArgumentParser(description="TLFi CLI")
ap.add_argument("query", help="word or words to search")
ap.add_argument("-f", "--lexical-forms", default="lexical_forms.txt",
help="lexical forms file")
ap.add_argument("-d", "--definitions", default="definitions",
help="definitions directory")
args = ap.parse_args()
lookup_result = lookup(args.query, args.lexical_forms)
if lookup_result is None:
exit()
if (definitions := get_definition_paths(lookup_result, args.definitions)):
for d in definitions:
show_definition(d)
def lookup(query, lexical_form_path):
"""Return a form for which a definition might exist, else None.
If we are sure the lexical form does not have definitions, suggest similar
words to the user.
"""
with open(lexical_form_path, "rt") as lf_file:
forms = lf_file.readlines()
if query + "\n" in forms:
return query
print("Did you mean:")
suggestions = (
form for form in map(str.rstrip, forms)
if difflib.SequenceMatcher(None, query, form).ratio() > 0.8
)
for form in suggestions:
print(f"- {form}")
return None
def get_definition_paths(query, defs):
"""Return a list of definition file paths for this lexical form."""
nfkd = unicodedata.normalize("NFKD", query[0])
first_char = next((c for c in nfkd if not unicodedata.combining(c)), "")
if not first_char:
print("Can't understand what the first char is…")
return []
path = Path(defs) / first_char.upper() / query
try:
return [f for f in path.iterdir() if f.suffix == ".txt"]
except FileNotFoundError:
print("Definition not found.")
return []
def show_definition(def_path):
"""Print a definition from a definition file."""
with open(def_path, "rt") as def_file:
html = def_file.read()
soup = BeautifulSoup(html, "html.parser")
content = parse_tag(soup.div.div)
print(content)
TAG_STRIP_RE = re.compile(r"\s+")
def parse_tag(tag):
if isinstance(tag, NavigableString):
return TAG_STRIP_RE.sub(" ", tag)
content = ""
for child in tag.children:
content += parse_tag(child)
if tag.name == "div":
content += "\n"
if tag.name == "span":
classes = tag.get("class") or []
if "tlf_cdefinition" in classes:
content = f"{t.yellow}{content}{t.normal}"
if "tlf_cdomaine" in classes:
content = f"{t.red}{content}{t.normal}"
if "tlf_csyntagme" in classes:
content = f"{t.green}{content}{t.normal}"
if tag.name == "b":
content = f"{t.bold}{content}{t.normal}"
if tag.name == "i":
content = f"{t.italic}{content}{t.no_italic}"
return content
if __name__ == "__main__":
main()