2021-11-06 16:42:04 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
"""TLFi command-line interface.
|
|
|
|
|
2021-11-08 10:57:39 +01:00
|
|
|
If Blessings is installed on your system, you will get pretty colors and
|
|
|
|
formatting almost like in the TLFi.
|
2021-11-06 16:42:04 +01:00
|
|
|
"""
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
import difflib
|
2021-11-07 00:26:57 +01:00
|
|
|
import gzip
|
2021-11-08 10:57:39 +01:00
|
|
|
import re
|
2021-11-06 16:42:04 +01:00
|
|
|
import unicodedata
|
2022-01-17 10:57:13 +01:00
|
|
|
from functools import cache
|
|
|
|
from os import environ
|
2021-11-06 16:42:04 +01:00
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
from bs4 import BeautifulSoup, NavigableString
|
|
|
|
|
|
|
|
try:
|
|
|
|
from blessings import Terminal
|
|
|
|
t = Terminal()
|
|
|
|
except ImportError:
|
|
|
|
class DummyTerminal:
|
|
|
|
def __getattr__(self, _):
|
|
|
|
return ""
|
|
|
|
t = DummyTerminal()
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
ap = argparse.ArgumentParser(description="TLFi CLI")
|
2021-11-07 00:26:57 +01:00
|
|
|
ap.add_argument("query", help="mot(s) à chercher")
|
2022-01-17 10:57:13 +01:00
|
|
|
ap.add_argument("-f", "--lexical-forms",
|
|
|
|
default=get_root_path() / "lexical_forms.txt",
|
2021-11-07 00:26:57 +01:00
|
|
|
help="fichier des formes lexicales")
|
2022-01-17 10:57:13 +01:00
|
|
|
ap.add_argument("-d", "--definitions",
|
|
|
|
default=get_root_path() / "definitions",
|
2021-11-07 00:26:57 +01:00
|
|
|
help="répertoire des définitions")
|
2021-11-06 16:42:04 +01:00
|
|
|
args = ap.parse_args()
|
2021-11-07 00:26:57 +01:00
|
|
|
|
|
|
|
lookup_res = lookup(args.query, args.lexical_forms)
|
|
|
|
if lookup_res is None:
|
2021-11-06 16:42:04 +01:00
|
|
|
exit()
|
|
|
|
|
2021-11-07 00:26:57 +01:00
|
|
|
if (defs := get_definition_paths(lookup_res, args.definitions)):
|
|
|
|
for d in defs:
|
2021-11-06 16:42:04 +01:00
|
|
|
show_definition(d)
|
|
|
|
|
|
|
|
|
2022-01-17 10:57:13 +01:00
|
|
|
@cache
|
|
|
|
def get_root_path():
|
|
|
|
return Path(environ.get("TLFI_ROOT", "."))
|
|
|
|
|
|
|
|
|
2021-11-07 00:26:57 +01:00
|
|
|
def lookup(query, lexical_forms_path):
|
2021-11-06 16:42:04 +01:00
|
|
|
"""Return a form for which a definition might exist, else None.
|
|
|
|
|
|
|
|
If we are sure the lexical form does not have definitions, suggest similar
|
|
|
|
words to the user.
|
|
|
|
"""
|
2021-11-07 00:26:57 +01:00
|
|
|
with open(lexical_forms_path, "rt") as lexical_forms_file:
|
|
|
|
forms = lexical_forms_file.readlines()
|
2021-11-06 16:42:04 +01:00
|
|
|
if query + "\n" in forms:
|
|
|
|
return query
|
|
|
|
|
2021-11-07 00:26:57 +01:00
|
|
|
print("Suggestions :")
|
2021-11-06 16:42:04 +01:00
|
|
|
suggestions = (
|
|
|
|
form for form in map(str.rstrip, forms)
|
|
|
|
if difflib.SequenceMatcher(None, query, form).ratio() > 0.8
|
|
|
|
)
|
|
|
|
for form in suggestions:
|
|
|
|
print(f"- {form}")
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2022-01-17 10:57:13 +01:00
|
|
|
def get_definition_paths(query, definitions) -> list:
|
2021-11-06 16:42:04 +01:00
|
|
|
"""Return a list of definition file paths for this lexical form."""
|
|
|
|
nfkd = unicodedata.normalize("NFKD", query[0])
|
|
|
|
first_char = next((c for c in nfkd if not unicodedata.combining(c)), "")
|
|
|
|
if not first_char:
|
2021-11-07 00:26:57 +01:00
|
|
|
print("Pas compris la première lettre…")
|
2021-11-06 16:42:04 +01:00
|
|
|
return []
|
2021-11-07 00:26:57 +01:00
|
|
|
path = Path(definitions) / first_char.upper() / query
|
2021-11-06 16:42:04 +01:00
|
|
|
try:
|
2021-11-07 00:26:57 +01:00
|
|
|
return [f for f in path.iterdir() if str(f).endswith(".txt.gz")]
|
2021-11-06 16:42:04 +01:00
|
|
|
except FileNotFoundError:
|
2021-11-07 00:26:57 +01:00
|
|
|
print("Définition non trouvée.")
|
2021-11-06 16:42:04 +01:00
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
def show_definition(def_path):
|
|
|
|
"""Print a definition from a definition file."""
|
2021-11-07 00:26:57 +01:00
|
|
|
with gzip.open(def_path, "rt") as def_file:
|
2021-11-06 16:42:04 +01:00
|
|
|
html = def_file.read()
|
|
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
|
|
content = parse_tag(soup.div.div)
|
|
|
|
print(content)
|
|
|
|
|
|
|
|
|
|
|
|
TAG_STRIP_RE = re.compile(r"\s+")
|
|
|
|
|
|
|
|
|
2022-01-17 10:57:13 +01:00
|
|
|
def parse_tag(tag) -> str:
|
2021-11-06 16:42:04 +01:00
|
|
|
if isinstance(tag, NavigableString):
|
|
|
|
return TAG_STRIP_RE.sub(" ", tag)
|
|
|
|
content = ""
|
|
|
|
for child in tag.children:
|
|
|
|
content += parse_tag(child)
|
|
|
|
if tag.name == "div":
|
|
|
|
content += "\n"
|
|
|
|
if tag.name == "span":
|
|
|
|
classes = tag.get("class") or []
|
|
|
|
if "tlf_cdefinition" in classes:
|
|
|
|
content = f"{t.yellow}{content}{t.normal}"
|
|
|
|
if "tlf_cdomaine" in classes:
|
|
|
|
content = f"{t.red}{content}{t.normal}"
|
|
|
|
if "tlf_csyntagme" in classes:
|
|
|
|
content = f"{t.green}{content}{t.normal}"
|
2021-11-07 00:26:57 +01:00
|
|
|
if "tlf_cmot" in classes:
|
|
|
|
content = f"{t.reverse}{content}{t.normal}"
|
2021-11-06 16:42:04 +01:00
|
|
|
if tag.name == "b":
|
|
|
|
content = f"{t.bold}{content}{t.normal}"
|
|
|
|
if tag.name == "i":
|
|
|
|
content = f"{t.italic}{content}{t.no_italic}"
|
|
|
|
return content
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|