TLFi/tlfi/__main__.py

129 lines
3.6 KiB
Python

#!/usr/bin/env python3
"""TLFi command-line interface.
If Blessings is installed on your system, you will get pretty colors and
formatting almost like in the TLFi.
"""
import argparse
import difflib
import gzip
import re
import unicodedata
from functools import cache
from os import environ
from pathlib import Path
from bs4 import BeautifulSoup, NavigableString
try:
from blessings import Terminal
t = Terminal()
except ImportError:
class DummyTerminal:
def __getattr__(self, _):
return ""
t = DummyTerminal()
def main():
ap = argparse.ArgumentParser(description="TLFi CLI")
ap.add_argument("query", help="mot(s) à chercher")
ap.add_argument("-f", "--lexical-forms",
default=get_root_path() / "lexical_forms.txt",
help="fichier des formes lexicales")
ap.add_argument("-d", "--definitions",
default=get_root_path() / "definitions",
help="répertoire des définitions")
args = ap.parse_args()
lookup_res = lookup(args.query, args.lexical_forms)
if lookup_res is None:
exit()
if (defs := get_definition_paths(lookup_res, args.definitions)):
for d in defs:
show_definition(d)
@cache
def get_root_path():
return Path(environ.get("TLFI_ROOT", "."))
def lookup(query, lexical_forms_path):
"""Return a form for which a definition might exist, else None.
If we are sure the lexical form does not have definitions, suggest similar
words to the user.
"""
with open(lexical_forms_path, "rt") as lexical_forms_file:
forms = lexical_forms_file.readlines()
if query + "\n" in forms:
return query
print("Suggestions :")
suggestions = (
form for form in map(str.rstrip, forms)
if difflib.SequenceMatcher(None, query, form).ratio() > 0.8
)
for form in suggestions:
print(f"- {form}")
return None
def get_definition_paths(query, definitions) -> list:
"""Return a list of definition file paths for this lexical form."""
nfkd = unicodedata.normalize("NFKD", query[0])
first_char = next((c for c in nfkd if not unicodedata.combining(c)), "")
if not first_char:
print("Pas compris la première lettre…")
return []
path = Path(definitions) / first_char.upper() / query
try:
return [f for f in path.iterdir() if str(f).endswith(".txt.gz")]
except FileNotFoundError:
print("Définition non trouvée.")
return []
def show_definition(def_path):
"""Print a definition from a definition file."""
with gzip.open(def_path, "rt") as def_file:
html = def_file.read()
soup = BeautifulSoup(html, "html.parser")
content = parse_tag(soup.div.div)
print(content)
TAG_STRIP_RE = re.compile(r"\s+")
def parse_tag(tag) -> str:
if isinstance(tag, NavigableString):
return TAG_STRIP_RE.sub(" ", tag)
content = ""
for child in tag.children:
content += parse_tag(child)
if tag.name == "div":
content += "\n"
if tag.name == "span":
classes = tag.get("class") or []
if "tlf_cdefinition" in classes:
content = f"{t.yellow}{content}{t.normal}"
if "tlf_cdomaine" in classes:
content = f"{t.red}{content}{t.normal}"
if "tlf_csyntagme" in classes:
content = f"{t.green}{content}{t.normal}"
if "tlf_cmot" in classes:
content = f"{t.reverse}{content}{t.normal}"
if tag.name == "b":
content = f"{t.bold}{content}{t.normal}"
if tag.name == "i":
content = f"{t.italic}{content}{t.no_italic}"
return content
if __name__ == "__main__":
main()