From f3d8f0857835ae83ef1dbadf3fe76310faac3ba3 Mon Sep 17 00:00:00 2001 From: dece Date: Sun, 7 Nov 2021 00:26:57 +0100 Subject: [PATCH] tlfi: use compressed def files --- tlfi.py | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/tlfi.py b/tlfi.py index 9cdf20b..48dc082 100644 --- a/tlfi.py +++ b/tlfi.py @@ -8,6 +8,7 @@ maybe colors! import argparse import difflib import re +import gzip import unicodedata from pathlib import Path @@ -25,33 +26,34 @@ except ImportError: def main(): ap = argparse.ArgumentParser(description="TLFi CLI") - ap.add_argument("query", help="word or words to search") + ap.add_argument("query", help="mot(s) à chercher") ap.add_argument("-f", "--lexical-forms", default="lexical_forms.txt", - help="lexical forms file") + help="fichier des formes lexicales") ap.add_argument("-d", "--definitions", default="definitions", - help="definitions directory") + help="répertoire des définitions") args = ap.parse_args() - lookup_result = lookup(args.query, args.lexical_forms) - if lookup_result is None: + + lookup_res = lookup(args.query, args.lexical_forms) + if lookup_res is None: exit() - if (definitions := get_definition_paths(lookup_result, args.definitions)): - for d in definitions: + if (defs := get_definition_paths(lookup_res, args.definitions)): + for d in defs: show_definition(d) -def lookup(query, lexical_form_path): +def lookup(query, lexical_forms_path): """Return a form for which a definition might exist, else None. If we are sure the lexical form does not have definitions, suggest similar words to the user. """ - with open(lexical_form_path, "rt") as lf_file: - forms = lf_file.readlines() + with open(lexical_forms_path, "rt") as lexical_forms_file: + forms = lexical_forms_file.readlines() if query + "\n" in forms: return query - print("Did you mean:") + print("Suggestions :") suggestions = ( form for form in map(str.rstrip, forms) if difflib.SequenceMatcher(None, query, form).ratio() > 0.8 @@ -61,24 +63,24 @@ def lookup(query, lexical_form_path): return None -def get_definition_paths(query, defs): +def get_definition_paths(query, definitions): """Return a list of definition file paths for this lexical form.""" nfkd = unicodedata.normalize("NFKD", query[0]) first_char = next((c for c in nfkd if not unicodedata.combining(c)), "") if not first_char: - print("Can't understand what the first char is…") + print("Pas compris la première lettre…") return [] - path = Path(defs) / first_char.upper() / query + path = Path(definitions) / first_char.upper() / query try: - return [f for f in path.iterdir() if f.suffix == ".txt"] + return [f for f in path.iterdir() if str(f).endswith(".txt.gz")] except FileNotFoundError: - print("Definition not found.") + print("Définition non trouvée.") return [] def show_definition(def_path): """Print a definition from a definition file.""" - with open(def_path, "rt") as def_file: + with gzip.open(def_path, "rt") as def_file: html = def_file.read() soup = BeautifulSoup(html, "html.parser") content = parse_tag(soup.div.div) @@ -104,6 +106,8 @@ def parse_tag(tag): content = f"{t.red}{content}{t.normal}" if "tlf_csyntagme" in classes: content = f"{t.green}{content}{t.normal}" + if "tlf_cmot" in classes: + content = f"{t.reverse}{content}{t.normal}" if tag.name == "b": content = f"{t.bold}{content}{t.normal}" if tag.name == "i":