From 5737895738a7c0ae03b45dc3ec150cfe40571d48 Mon Sep 17 00:00:00 2001 From: dece Date: Sat, 6 Nov 2021 16:42:04 +0100 Subject: [PATCH] tlfi: add lookup with suggestions, and definitions --- tlfi.py | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 tlfi.py diff --git a/tlfi.py b/tlfi.py new file mode 100644 index 0000000..9cdf20b --- /dev/null +++ b/tlfi.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +"""TLFi command-line interface. + +If Blessings is installed on your system, you will get nice formatting and +maybe colors! +""" + +import argparse +import difflib +import re +import unicodedata +from pathlib import Path + +from bs4 import BeautifulSoup, NavigableString + +try: + from blessings import Terminal + t = Terminal() +except ImportError: + class DummyTerminal: + def __getattr__(self, _): + return "" + t = DummyTerminal() + + +def main(): + ap = argparse.ArgumentParser(description="TLFi CLI") + ap.add_argument("query", help="word or words to search") + ap.add_argument("-f", "--lexical-forms", default="lexical_forms.txt", + help="lexical forms file") + ap.add_argument("-d", "--definitions", default="definitions", + help="definitions directory") + args = ap.parse_args() + lookup_result = lookup(args.query, args.lexical_forms) + if lookup_result is None: + exit() + + if (definitions := get_definition_paths(lookup_result, args.definitions)): + for d in definitions: + show_definition(d) + + +def lookup(query, lexical_form_path): + """Return a form for which a definition might exist, else None. + + If we are sure the lexical form does not have definitions, suggest similar + words to the user. + """ + with open(lexical_form_path, "rt") as lf_file: + forms = lf_file.readlines() + if query + "\n" in forms: + return query + + print("Did you mean:") + suggestions = ( + form for form in map(str.rstrip, forms) + if difflib.SequenceMatcher(None, query, form).ratio() > 0.8 + ) + for form in suggestions: + print(f"- {form}") + return None + + +def get_definition_paths(query, defs): + """Return a list of definition file paths for this lexical form.""" + nfkd = unicodedata.normalize("NFKD", query[0]) + first_char = next((c for c in nfkd if not unicodedata.combining(c)), "") + if not first_char: + print("Can't understand what the first char is…") + return [] + path = Path(defs) / first_char.upper() / query + try: + return [f for f in path.iterdir() if f.suffix == ".txt"] + except FileNotFoundError: + print("Definition not found.") + return [] + + +def show_definition(def_path): + """Print a definition from a definition file.""" + with open(def_path, "rt") as def_file: + html = def_file.read() + soup = BeautifulSoup(html, "html.parser") + content = parse_tag(soup.div.div) + print(content) + + +TAG_STRIP_RE = re.compile(r"\s+") + + +def parse_tag(tag): + if isinstance(tag, NavigableString): + return TAG_STRIP_RE.sub(" ", tag) + content = "" + for child in tag.children: + content += parse_tag(child) + if tag.name == "div": + content += "\n" + if tag.name == "span": + classes = tag.get("class") or [] + if "tlf_cdefinition" in classes: + content = f"{t.yellow}{content}{t.normal}" + if "tlf_cdomaine" in classes: + content = f"{t.red}{content}{t.normal}" + if "tlf_csyntagme" in classes: + content = f"{t.green}{content}{t.normal}" + if tag.name == "b": + content = f"{t.bold}{content}{t.normal}" + if tag.name == "i": + content = f"{t.italic}{content}{t.no_italic}" + return content + + +if __name__ == "__main__": + main()