#!/usr/bin/env python3 """TLFi command-line interface. If Blessings is installed on your system, you will get pretty colors and formatting almost like in the TLFi. """ import argparse import difflib import gzip import re import unicodedata from pathlib import Path from bs4 import BeautifulSoup, NavigableString try: from blessings import Terminal t = Terminal() except ImportError: class DummyTerminal: def __getattr__(self, _): return "" t = DummyTerminal() def main(): ap = argparse.ArgumentParser(description="TLFi CLI") ap.add_argument("query", help="mot(s) à chercher") ap.add_argument("-f", "--lexical-forms", default="lexical_forms.txt", help="fichier des formes lexicales") ap.add_argument("-d", "--definitions", default="definitions", help="répertoire des définitions") args = ap.parse_args() lookup_res = lookup(args.query, args.lexical_forms) if lookup_res is None: exit() if (defs := get_definition_paths(lookup_res, args.definitions)): for d in defs: show_definition(d) def lookup(query, lexical_forms_path): """Return a form for which a definition might exist, else None. If we are sure the lexical form does not have definitions, suggest similar words to the user. """ with open(lexical_forms_path, "rt") as lexical_forms_file: forms = lexical_forms_file.readlines() if query + "\n" in forms: return query print("Suggestions :") suggestions = ( form for form in map(str.rstrip, forms) if difflib.SequenceMatcher(None, query, form).ratio() > 0.8 ) for form in suggestions: print(f"- {form}") return None def get_definition_paths(query, definitions): """Return a list of definition file paths for this lexical form.""" nfkd = unicodedata.normalize("NFKD", query[0]) first_char = next((c for c in nfkd if not unicodedata.combining(c)), "") if not first_char: print("Pas compris la première lettre…") return [] path = Path(definitions) / first_char.upper() / query try: return [f for f in path.iterdir() if str(f).endswith(".txt.gz")] except FileNotFoundError: print("Définition non trouvée.") return [] def show_definition(def_path): """Print a definition from a definition file.""" with gzip.open(def_path, "rt") as def_file: html = def_file.read() soup = BeautifulSoup(html, "html.parser") content = parse_tag(soup.div.div) print(content) TAG_STRIP_RE = re.compile(r"\s+") def parse_tag(tag): if isinstance(tag, NavigableString): return TAG_STRIP_RE.sub(" ", tag) content = "" for child in tag.children: content += parse_tag(child) if tag.name == "div": content += "\n" if tag.name == "span": classes = tag.get("class") or [] if "tlf_cdefinition" in classes: content = f"{t.yellow}{content}{t.normal}" if "tlf_cdomaine" in classes: content = f"{t.red}{content}{t.normal}" if "tlf_csyntagme" in classes: content = f"{t.green}{content}{t.normal}" if "tlf_cmot" in classes: content = f"{t.reverse}{content}{t.normal}" if tag.name == "b": content = f"{t.bold}{content}{t.normal}" if tag.name == "i": content = f"{t.italic}{content}{t.no_italic}" return content if __name__ == "__main__": main()