TLFi/tlfi/__main__.py

#!/usr/bin/env python3
"""TLFi command-line interface.

If Blessings is installed on your system, you will get pretty colors and
formatting almost like in the TLFi.
"""

import argparse
import difflib
import gzip
import re
import unicodedata
from functools import cache
from os import environ
from pathlib import Path

from bs4 import BeautifulSoup, NavigableString

try:
    from blessings import Terminal
    t = Terminal()
except ImportError:
    class DummyTerminal:
        def __getattr__(self, _):
            return ""
    t = DummyTerminal()


def main():
    ap = argparse.ArgumentParser(description="TLFi CLI")
    ap.add_argument("query", help="mot(s) à chercher")
    ap.add_argument("-f", "--lexical-forms",
                    default=get_root_path() / "lexical_forms.txt",
                    help="fichier des formes lexicales")
    ap.add_argument("-d", "--definitions",
                    default=get_root_path() / "definitions",
                    help="répertoire des définitions")
    args = ap.parse_args()

    lookup_res = lookup(args.query, args.lexical_forms)
    if lookup_res is None:
        exit()

    if (defs := get_definition_paths(lookup_res, args.definitions)):
        for d in defs:
            show_definition(d)


@cache
def get_root_path():
    return Path(environ.get("TLFI_ROOT", "."))


def lookup(query, lexical_forms_path):
    """Return a form for which a definition might exist, else None.

    If we are sure the lexical form does not have definitions, suggest similar
    words to the user.
    """
    with open(lexical_forms_path, "rt") as lexical_forms_file:
        forms = lexical_forms_file.readlines()
    if query + "\n" in forms:
        return query

    print("Suggestions :")
    suggestions = (
        form for form in map(str.rstrip, forms)
        if difflib.SequenceMatcher(None, query, form).ratio() > 0.8
    )
    for form in suggestions:
        print(f"- {form}")
    return None


def get_definition_paths(query, definitions) -> list:
    """Return a list of definition file paths for this lexical form."""
    nfkd = unicodedata.normalize("NFKD", query[0])
    first_char = next((c for c in nfkd if not unicodedata.combining(c)), "")
    if not first_char:
        print("Pas compris la première lettre…")
        return []
    path = Path(definitions) / first_char.upper() / query
    try:
        return [f for f in path.iterdir() if str(f).endswith(".txt.gz")]
    except FileNotFoundError:
        print("Définition non trouvée.")
        return []


def show_definition(def_path):
    """Print a definition from a definition file."""
    with gzip.open(def_path, "rt") as def_file:
        html = def_file.read()
    soup = BeautifulSoup(html, "html.parser")
    content = parse_tag(soup.div.div)
    print(content)


TAG_STRIP_RE = re.compile(r"\s+")


def parse_tag(tag) -> str:
    if isinstance(tag, NavigableString):
        return TAG_STRIP_RE.sub(" ", tag)
    content = ""
    for child in tag.children:
        content += parse_tag(child)
    if tag.name == "div":
        content += "\n"
    if tag.name == "span":
        classes = tag.get("class") or []
        if "tlf_cdefinition" in classes:
            content = f"{t.yellow}{content}{t.normal}"
        if "tlf_cdomaine" in classes:
            content = f"{t.red}{content}{t.normal}"
        if "tlf_csyntagme" in classes:
            content = f"{t.green}{content}{t.normal}"
        if "tlf_cmot" in classes:
            content = f"{t.reverse}{content}{t.normal}"
    if tag.name == "b":
        content = f"{t.bold}{content}{t.normal}"
    if tag.name == "i":
        content = f"{t.italic}{content}{t.no_italic}"
    return content


if __name__ == "__main__":
    main()
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`#!/usr/bin/env python3`
			`"""TLFi command-line interface.`

setup files 2021-11-08 10:57:39 +01:00			`If Blessings is installed on your system, you will get pretty colors and`
			`formatting almost like in the TLFi.`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`"""`

			`import argparse`
			`import difflib`
tlfi: use compressed def files 2021-11-07 00:26:57 +01:00			`import gzip`
setup files 2021-11-08 10:57:39 +01:00			`import re`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`import unicodedata`
use TLFI_ROOT env to avoid specifying dirs 2022-01-17 10:57:13 +01:00			`from functools import cache`
			`from os import environ`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`from pathlib import Path`

			`from bs4 import BeautifulSoup, NavigableString`

			`try:`
			`from blessings import Terminal`
			`t = Terminal()`
			`except ImportError:`
			`class DummyTerminal:`
			`def __getattr__(self, _):`
			`return ""`
			`t = DummyTerminal()`


			`def main():`
			`ap = argparse.ArgumentParser(description="TLFi CLI")`
tlfi: use compressed def files 2021-11-07 00:26:57 +01:00			`ap.add_argument("query", help="mot(s) à chercher")`
use TLFI_ROOT env to avoid specifying dirs 2022-01-17 10:57:13 +01:00			`ap.add_argument("-f", "--lexical-forms",`
			`default=get_root_path() / "lexical_forms.txt",`
tlfi: use compressed def files 2021-11-07 00:26:57 +01:00			`help="fichier des formes lexicales")`
use TLFI_ROOT env to avoid specifying dirs 2022-01-17 10:57:13 +01:00			`ap.add_argument("-d", "--definitions",`
			`default=get_root_path() / "definitions",`
tlfi: use compressed def files 2021-11-07 00:26:57 +01:00			`help="répertoire des définitions")`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`args = ap.parse_args()`
tlfi: use compressed def files 2021-11-07 00:26:57 +01:00
			`lookup_res = lookup(args.query, args.lexical_forms)`
			`if lookup_res is None:`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`exit()`

tlfi: use compressed def files 2021-11-07 00:26:57 +01:00			`if (defs := get_definition_paths(lookup_res, args.definitions)):`
			`for d in defs:`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`show_definition(d)`


use TLFI_ROOT env to avoid specifying dirs 2022-01-17 10:57:13 +01:00			`@cache`
			`def get_root_path():`
			`return Path(environ.get("TLFI_ROOT", "."))`


tlfi: use compressed def files 2021-11-07 00:26:57 +01:00			`def lookup(query, lexical_forms_path):`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`"""Return a form for which a definition might exist, else None.`

			`If we are sure the lexical form does not have definitions, suggest similar`
			`words to the user.`
			`"""`
tlfi: use compressed def files 2021-11-07 00:26:57 +01:00			`with open(lexical_forms_path, "rt") as lexical_forms_file:`
			`forms = lexical_forms_file.readlines()`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`if query + "\n" in forms:`
			`return query`

tlfi: use compressed def files 2021-11-07 00:26:57 +01:00			`print("Suggestions :")`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`suggestions = (`
			`form for form in map(str.rstrip, forms)`
			`if difflib.SequenceMatcher(None, query, form).ratio() > 0.8`
			`)`
			`for form in suggestions:`
			`print(f"- {form}")`
			`return None`


use TLFI_ROOT env to avoid specifying dirs 2022-01-17 10:57:13 +01:00			`def get_definition_paths(query, definitions) -> list:`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`"""Return a list of definition file paths for this lexical form."""`
			`nfkd = unicodedata.normalize("NFKD", query[0])`
			`first_char = next((c for c in nfkd if not unicodedata.combining(c)), "")`
			`if not first_char:`
tlfi: use compressed def files 2021-11-07 00:26:57 +01:00			`print("Pas compris la première lettre…")`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`return []`
tlfi: use compressed def files 2021-11-07 00:26:57 +01:00			`path = Path(definitions) / first_char.upper() / query`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`try:`
tlfi: use compressed def files 2021-11-07 00:26:57 +01:00			`return [f for f in path.iterdir() if str(f).endswith(".txt.gz")]`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`except FileNotFoundError:`
tlfi: use compressed def files 2021-11-07 00:26:57 +01:00			`print("Définition non trouvée.")`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`return []`


			`def show_definition(def_path):`
			`"""Print a definition from a definition file."""`
tlfi: use compressed def files 2021-11-07 00:26:57 +01:00			`with gzip.open(def_path, "rt") as def_file:`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`html = def_file.read()`
			`soup = BeautifulSoup(html, "html.parser")`
			`content = parse_tag(soup.div.div)`
			`print(content)`


			`TAG_STRIP_RE = re.compile(r"\s+")`


use TLFI_ROOT env to avoid specifying dirs 2022-01-17 10:57:13 +01:00			`def parse_tag(tag) -> str:`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`if isinstance(tag, NavigableString):`
			`return TAG_STRIP_RE.sub(" ", tag)`
			`content = ""`
			`for child in tag.children:`
			`content += parse_tag(child)`
			`if tag.name == "div":`
			`content += "\n"`
			`if tag.name == "span":`
			`classes = tag.get("class") or []`
			`if "tlf_cdefinition" in classes:`
			`content = f"{t.yellow}{content}{t.normal}"`
			`if "tlf_cdomaine" in classes:`
			`content = f"{t.red}{content}{t.normal}"`
			`if "tlf_csyntagme" in classes:`
			`content = f"{t.green}{content}{t.normal}"`
tlfi: use compressed def files 2021-11-07 00:26:57 +01:00			`if "tlf_cmot" in classes:`
			`content = f"{t.reverse}{content}{t.normal}"`
tlfi: add lookup with suggestions, and definitions 2021-11-06 16:42:04 +01:00			`if tag.name == "b":`
			`content = f"{t.bold}{content}{t.normal}"`
			`if tag.name == "i":`
			`content = f"{t.italic}{content}{t.no_italic}"`
			`return content`


			`if __name__ == "__main__":`
			`main()`