tlfi: use compressed def files

This commit is contained in:
dece 2021-11-07 00:26:57 +01:00
parent 5737895738
commit f3d8f08578

38
tlfi.py
View file

@ -8,6 +8,7 @@ maybe colors!
import argparse
import difflib
import re
import gzip
import unicodedata
from pathlib import Path
@ -25,33 +26,34 @@ except ImportError:
def main():
ap = argparse.ArgumentParser(description="TLFi CLI")
ap.add_argument("query", help="word or words to search")
ap.add_argument("query", help="mot(s) à chercher")
ap.add_argument("-f", "--lexical-forms", default="lexical_forms.txt",
help="lexical forms file")
help="fichier des formes lexicales")
ap.add_argument("-d", "--definitions", default="definitions",
help="definitions directory")
help="répertoire des définitions")
args = ap.parse_args()
lookup_result = lookup(args.query, args.lexical_forms)
if lookup_result is None:
lookup_res = lookup(args.query, args.lexical_forms)
if lookup_res is None:
exit()
if (definitions := get_definition_paths(lookup_result, args.definitions)):
for d in definitions:
if (defs := get_definition_paths(lookup_res, args.definitions)):
for d in defs:
show_definition(d)
def lookup(query, lexical_form_path):
def lookup(query, lexical_forms_path):
"""Return a form for which a definition might exist, else None.
If we are sure the lexical form does not have definitions, suggest similar
words to the user.
"""
with open(lexical_form_path, "rt") as lf_file:
forms = lf_file.readlines()
with open(lexical_forms_path, "rt") as lexical_forms_file:
forms = lexical_forms_file.readlines()
if query + "\n" in forms:
return query
print("Did you mean:")
print("Suggestions :")
suggestions = (
form for form in map(str.rstrip, forms)
if difflib.SequenceMatcher(None, query, form).ratio() > 0.8
@ -61,24 +63,24 @@ def lookup(query, lexical_form_path):
return None
def get_definition_paths(query, defs):
def get_definition_paths(query, definitions):
"""Return a list of definition file paths for this lexical form."""
nfkd = unicodedata.normalize("NFKD", query[0])
first_char = next((c for c in nfkd if not unicodedata.combining(c)), "")
if not first_char:
print("Can't understand what the first char is")
print("Pas compris la première lettre")
return []
path = Path(defs) / first_char.upper() / query
path = Path(definitions) / first_char.upper() / query
try:
return [f for f in path.iterdir() if f.suffix == ".txt"]
return [f for f in path.iterdir() if str(f).endswith(".txt.gz")]
except FileNotFoundError:
print("Definition not found.")
print("Définition non trouvée.")
return []
def show_definition(def_path):
"""Print a definition from a definition file."""
with open(def_path, "rt") as def_file:
with gzip.open(def_path, "rt") as def_file:
html = def_file.read()
soup = BeautifulSoup(html, "html.parser")
content = parse_tag(soup.div.div)
@ -104,6 +106,8 @@ def parse_tag(tag):
content = f"{t.red}{content}{t.normal}"
if "tlf_csyntagme" in classes:
content = f"{t.green}{content}{t.normal}"
if "tlf_cmot" in classes:
content = f"{t.reverse}{content}{t.normal}"
if tag.name == "b":
content = f"{t.bold}{content}{t.normal}"
if tag.name == "i":