tlfi: use compressed def files
This commit is contained in:
parent
5737895738
commit
f3d8f08578
38
tlfi.py
38
tlfi.py
|
@ -8,6 +8,7 @@ maybe colors!
|
||||||
import argparse
|
import argparse
|
||||||
import difflib
|
import difflib
|
||||||
import re
|
import re
|
||||||
|
import gzip
|
||||||
import unicodedata
|
import unicodedata
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
@ -25,33 +26,34 @@ except ImportError:
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
ap = argparse.ArgumentParser(description="TLFi CLI")
|
ap = argparse.ArgumentParser(description="TLFi CLI")
|
||||||
ap.add_argument("query", help="word or words to search")
|
ap.add_argument("query", help="mot(s) à chercher")
|
||||||
ap.add_argument("-f", "--lexical-forms", default="lexical_forms.txt",
|
ap.add_argument("-f", "--lexical-forms", default="lexical_forms.txt",
|
||||||
help="lexical forms file")
|
help="fichier des formes lexicales")
|
||||||
ap.add_argument("-d", "--definitions", default="definitions",
|
ap.add_argument("-d", "--definitions", default="definitions",
|
||||||
help="definitions directory")
|
help="répertoire des définitions")
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
lookup_result = lookup(args.query, args.lexical_forms)
|
|
||||||
if lookup_result is None:
|
lookup_res = lookup(args.query, args.lexical_forms)
|
||||||
|
if lookup_res is None:
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
if (definitions := get_definition_paths(lookup_result, args.definitions)):
|
if (defs := get_definition_paths(lookup_res, args.definitions)):
|
||||||
for d in definitions:
|
for d in defs:
|
||||||
show_definition(d)
|
show_definition(d)
|
||||||
|
|
||||||
|
|
||||||
def lookup(query, lexical_form_path):
|
def lookup(query, lexical_forms_path):
|
||||||
"""Return a form for which a definition might exist, else None.
|
"""Return a form for which a definition might exist, else None.
|
||||||
|
|
||||||
If we are sure the lexical form does not have definitions, suggest similar
|
If we are sure the lexical form does not have definitions, suggest similar
|
||||||
words to the user.
|
words to the user.
|
||||||
"""
|
"""
|
||||||
with open(lexical_form_path, "rt") as lf_file:
|
with open(lexical_forms_path, "rt") as lexical_forms_file:
|
||||||
forms = lf_file.readlines()
|
forms = lexical_forms_file.readlines()
|
||||||
if query + "\n" in forms:
|
if query + "\n" in forms:
|
||||||
return query
|
return query
|
||||||
|
|
||||||
print("Did you mean:")
|
print("Suggestions :")
|
||||||
suggestions = (
|
suggestions = (
|
||||||
form for form in map(str.rstrip, forms)
|
form for form in map(str.rstrip, forms)
|
||||||
if difflib.SequenceMatcher(None, query, form).ratio() > 0.8
|
if difflib.SequenceMatcher(None, query, form).ratio() > 0.8
|
||||||
|
@ -61,24 +63,24 @@ def lookup(query, lexical_form_path):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_definition_paths(query, defs):
|
def get_definition_paths(query, definitions):
|
||||||
"""Return a list of definition file paths for this lexical form."""
|
"""Return a list of definition file paths for this lexical form."""
|
||||||
nfkd = unicodedata.normalize("NFKD", query[0])
|
nfkd = unicodedata.normalize("NFKD", query[0])
|
||||||
first_char = next((c for c in nfkd if not unicodedata.combining(c)), "")
|
first_char = next((c for c in nfkd if not unicodedata.combining(c)), "")
|
||||||
if not first_char:
|
if not first_char:
|
||||||
print("Can't understand what the first char is…")
|
print("Pas compris la première lettre…")
|
||||||
return []
|
return []
|
||||||
path = Path(defs) / first_char.upper() / query
|
path = Path(definitions) / first_char.upper() / query
|
||||||
try:
|
try:
|
||||||
return [f for f in path.iterdir() if f.suffix == ".txt"]
|
return [f for f in path.iterdir() if str(f).endswith(".txt.gz")]
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
print("Definition not found.")
|
print("Définition non trouvée.")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
def show_definition(def_path):
|
def show_definition(def_path):
|
||||||
"""Print a definition from a definition file."""
|
"""Print a definition from a definition file."""
|
||||||
with open(def_path, "rt") as def_file:
|
with gzip.open(def_path, "rt") as def_file:
|
||||||
html = def_file.read()
|
html = def_file.read()
|
||||||
soup = BeautifulSoup(html, "html.parser")
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
content = parse_tag(soup.div.div)
|
content = parse_tag(soup.div.div)
|
||||||
|
@ -104,6 +106,8 @@ def parse_tag(tag):
|
||||||
content = f"{t.red}{content}{t.normal}"
|
content = f"{t.red}{content}{t.normal}"
|
||||||
if "tlf_csyntagme" in classes:
|
if "tlf_csyntagme" in classes:
|
||||||
content = f"{t.green}{content}{t.normal}"
|
content = f"{t.green}{content}{t.normal}"
|
||||||
|
if "tlf_cmot" in classes:
|
||||||
|
content = f"{t.reverse}{content}{t.normal}"
|
||||||
if tag.name == "b":
|
if tag.name == "b":
|
||||||
content = f"{t.bold}{content}{t.normal}"
|
content = f"{t.bold}{content}{t.normal}"
|
||||||
if tag.name == "i":
|
if tag.name == "i":
|
||||||
|
|
Loading…
Reference in a new issue