TLFi/tlfi/__main__.py

129 lines
3.6 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""TLFi command-line interface.
2021-11-08 10:57:39 +01:00
If Blessings is installed on your system, you will get pretty colors and
formatting almost like in the TLFi.
"""
import argparse
import difflib
2021-11-07 00:26:57 +01:00
import gzip
2021-11-08 10:57:39 +01:00
import re
import unicodedata
from functools import cache
from os import environ
from pathlib import Path
from bs4 import BeautifulSoup, NavigableString
try:
from blessings import Terminal
t = Terminal()
except ImportError:
class DummyTerminal:
def __getattr__(self, _):
return ""
t = DummyTerminal()
def main():
ap = argparse.ArgumentParser(description="TLFi CLI")
2021-11-07 00:26:57 +01:00
ap.add_argument("query", help="mot(s) à chercher")
ap.add_argument("-f", "--lexical-forms",
default=get_root_path() / "lexical_forms.txt",
2021-11-07 00:26:57 +01:00
help="fichier des formes lexicales")
ap.add_argument("-d", "--definitions",
default=get_root_path() / "definitions",
2021-11-07 00:26:57 +01:00
help="répertoire des définitions")
args = ap.parse_args()
2021-11-07 00:26:57 +01:00
lookup_res = lookup(args.query, args.lexical_forms)
if lookup_res is None:
exit()
2021-11-07 00:26:57 +01:00
if (defs := get_definition_paths(lookup_res, args.definitions)):
for d in defs:
show_definition(d)
@cache
def get_root_path():
return Path(environ.get("TLFI_ROOT", "."))
2021-11-07 00:26:57 +01:00
def lookup(query, lexical_forms_path):
"""Return a form for which a definition might exist, else None.
If we are sure the lexical form does not have definitions, suggest similar
words to the user.
"""
2021-11-07 00:26:57 +01:00
with open(lexical_forms_path, "rt") as lexical_forms_file:
forms = lexical_forms_file.readlines()
if query + "\n" in forms:
return query
2021-11-07 00:26:57 +01:00
print("Suggestions :")
suggestions = (
form for form in map(str.rstrip, forms)
if difflib.SequenceMatcher(None, query, form).ratio() > 0.8
)
for form in suggestions:
print(f"- {form}")
return None
def get_definition_paths(query, definitions) -> list:
"""Return a list of definition file paths for this lexical form."""
nfkd = unicodedata.normalize("NFKD", query[0])
first_char = next((c for c in nfkd if not unicodedata.combining(c)), "")
if not first_char:
2021-11-07 00:26:57 +01:00
print("Pas compris la première lettre…")
return []
2021-11-07 00:26:57 +01:00
path = Path(definitions) / first_char.upper() / query
try:
2021-11-07 00:26:57 +01:00
return [f for f in path.iterdir() if str(f).endswith(".txt.gz")]
except FileNotFoundError:
2021-11-07 00:26:57 +01:00
print("Définition non trouvée.")
return []
def show_definition(def_path):
"""Print a definition from a definition file."""
2021-11-07 00:26:57 +01:00
with gzip.open(def_path, "rt") as def_file:
html = def_file.read()
soup = BeautifulSoup(html, "html.parser")
content = parse_tag(soup.div.div)
print(content)
TAG_STRIP_RE = re.compile(r"\s+")
def parse_tag(tag) -> str:
if isinstance(tag, NavigableString):
return TAG_STRIP_RE.sub(" ", tag)
content = ""
for child in tag.children:
content += parse_tag(child)
if tag.name == "div":
content += "\n"
if tag.name == "span":
classes = tag.get("class") or []
if "tlf_cdefinition" in classes:
content = f"{t.yellow}{content}{t.normal}"
if "tlf_cdomaine" in classes:
content = f"{t.red}{content}{t.normal}"
if "tlf_csyntagme" in classes:
content = f"{t.green}{content}{t.normal}"
2021-11-07 00:26:57 +01:00
if "tlf_cmot" in classes:
content = f"{t.reverse}{content}{t.normal}"
if tag.name == "b":
content = f"{t.bold}{content}{t.normal}"
if tag.name == "i":
content = f"{t.italic}{content}{t.no_italic}"
return content
if __name__ == "__main__":
main()