Compare commits

...

3 Commits

@ -1,6 +1,4 @@
import string
import sys
import time
import requests
from bs4 import BeautifulSoup

@ -1,4 +1,3 @@
import os
import sys
import time
import urllib.parse
@ -6,7 +5,6 @@ from pathlib import Path
import requests
from bs4 import BeautifulSoup
from requests import models
BASE_URL = "https://www.cnrtl.fr/definition/"

@ -0,0 +1,119 @@
#!/usr/bin/env python3
"""TLFi command-line interface.
If Blessings is installed on your system, you will get nice formatting and
maybe colors!
"""
import argparse
import difflib
import re
import gzip
import unicodedata
from pathlib import Path
from bs4 import BeautifulSoup, NavigableString
try:
from blessings import Terminal
t = Terminal()
except ImportError:
class DummyTerminal:
def __getattr__(self, _):
return ""
t = DummyTerminal()
def main():
ap = argparse.ArgumentParser(description="TLFi CLI")
ap.add_argument("query", help="mot(s) à chercher")
ap.add_argument("-f", "--lexical-forms", default="lexical_forms.txt",
help="fichier des formes lexicales")
ap.add_argument("-d", "--definitions", default="definitions",
help="répertoire des définitions")
args = ap.parse_args()
lookup_res = lookup(args.query, args.lexical_forms)
if lookup_res is None:
exit()
if (defs := get_definition_paths(lookup_res, args.definitions)):
for d in defs:
show_definition(d)
def lookup(query, lexical_forms_path):
"""Return a form for which a definition might exist, else None.
If we are sure the lexical form does not have definitions, suggest similar
words to the user.
"""
with open(lexical_forms_path, "rt") as lexical_forms_file:
forms = lexical_forms_file.readlines()
if query + "\n" in forms:
return query
print("Suggestions :")
suggestions = (
form for form in map(str.rstrip, forms)
if difflib.SequenceMatcher(None, query, form).ratio() > 0.8
)
for form in suggestions:
print(f"- {form}")
return None
def get_definition_paths(query, definitions):
"""Return a list of definition file paths for this lexical form."""
nfkd = unicodedata.normalize("NFKD", query[0])
first_char = next((c for c in nfkd if not unicodedata.combining(c)), "")
if not first_char:
print("Pas compris la première lettre…")
return []
path = Path(definitions) / first_char.upper() / query
try:
return [f for f in path.iterdir() if str(f).endswith(".txt.gz")]
except FileNotFoundError:
print("Définition non trouvée.")
return []
def show_definition(def_path):
"""Print a definition from a definition file."""
with gzip.open(def_path, "rt") as def_file:
html = def_file.read()
soup = BeautifulSoup(html, "html.parser")
content = parse_tag(soup.div.div)
print(content)
TAG_STRIP_RE = re.compile(r"\s+")
def parse_tag(tag):
if isinstance(tag, NavigableString):
return TAG_STRIP_RE.sub(" ", tag)
content = ""
for child in tag.children:
content += parse_tag(child)
if tag.name == "div":
content += "\n"
if tag.name == "span":
classes = tag.get("class") or []
if "tlf_cdefinition" in classes:
content = f"{t.yellow}{content}{t.normal}"
if "tlf_cdomaine" in classes:
content = f"{t.red}{content}{t.normal}"
if "tlf_csyntagme" in classes:
content = f"{t.green}{content}{t.normal}"
if "tlf_cmot" in classes:
content = f"{t.reverse}{content}{t.normal}"
if tag.name == "b":
content = f"{t.bold}{content}{t.normal}"
if tag.name == "i":
content = f"{t.italic}{content}{t.no_italic}"
return content
if __name__ == "__main__":
main()
Loading…
Cancel
Save