tlfi: add lookup with suggestions, and definitions
This commit is contained in:
parent
f83c6085da
commit
5737895738
115
tlfi.py
Normal file
115
tlfi.py
Normal file
|
@ -0,0 +1,115 @@
|
|||
#!/usr/bin/env python3
|
||||
"""TLFi command-line interface.
|
||||
|
||||
If Blessings is installed on your system, you will get nice formatting and
|
||||
maybe colors!
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import difflib
|
||||
import re
|
||||
import unicodedata
|
||||
from pathlib import Path
|
||||
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
|
||||
try:
|
||||
from blessings import Terminal
|
||||
t = Terminal()
|
||||
except ImportError:
|
||||
class DummyTerminal:
|
||||
def __getattr__(self, _):
|
||||
return ""
|
||||
t = DummyTerminal()
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description="TLFi CLI")
|
||||
ap.add_argument("query", help="word or words to search")
|
||||
ap.add_argument("-f", "--lexical-forms", default="lexical_forms.txt",
|
||||
help="lexical forms file")
|
||||
ap.add_argument("-d", "--definitions", default="definitions",
|
||||
help="definitions directory")
|
||||
args = ap.parse_args()
|
||||
lookup_result = lookup(args.query, args.lexical_forms)
|
||||
if lookup_result is None:
|
||||
exit()
|
||||
|
||||
if (definitions := get_definition_paths(lookup_result, args.definitions)):
|
||||
for d in definitions:
|
||||
show_definition(d)
|
||||
|
||||
|
||||
def lookup(query, lexical_form_path):
|
||||
"""Return a form for which a definition might exist, else None.
|
||||
|
||||
If we are sure the lexical form does not have definitions, suggest similar
|
||||
words to the user.
|
||||
"""
|
||||
with open(lexical_form_path, "rt") as lf_file:
|
||||
forms = lf_file.readlines()
|
||||
if query + "\n" in forms:
|
||||
return query
|
||||
|
||||
print("Did you mean:")
|
||||
suggestions = (
|
||||
form for form in map(str.rstrip, forms)
|
||||
if difflib.SequenceMatcher(None, query, form).ratio() > 0.8
|
||||
)
|
||||
for form in suggestions:
|
||||
print(f"- {form}")
|
||||
return None
|
||||
|
||||
|
||||
def get_definition_paths(query, defs):
|
||||
"""Return a list of definition file paths for this lexical form."""
|
||||
nfkd = unicodedata.normalize("NFKD", query[0])
|
||||
first_char = next((c for c in nfkd if not unicodedata.combining(c)), "")
|
||||
if not first_char:
|
||||
print("Can't understand what the first char is…")
|
||||
return []
|
||||
path = Path(defs) / first_char.upper() / query
|
||||
try:
|
||||
return [f for f in path.iterdir() if f.suffix == ".txt"]
|
||||
except FileNotFoundError:
|
||||
print("Definition not found.")
|
||||
return []
|
||||
|
||||
|
||||
def show_definition(def_path):
|
||||
"""Print a definition from a definition file."""
|
||||
with open(def_path, "rt") as def_file:
|
||||
html = def_file.read()
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
content = parse_tag(soup.div.div)
|
||||
print(content)
|
||||
|
||||
|
||||
TAG_STRIP_RE = re.compile(r"\s+")
|
||||
|
||||
|
||||
def parse_tag(tag):
|
||||
if isinstance(tag, NavigableString):
|
||||
return TAG_STRIP_RE.sub(" ", tag)
|
||||
content = ""
|
||||
for child in tag.children:
|
||||
content += parse_tag(child)
|
||||
if tag.name == "div":
|
||||
content += "\n"
|
||||
if tag.name == "span":
|
||||
classes = tag.get("class") or []
|
||||
if "tlf_cdefinition" in classes:
|
||||
content = f"{t.yellow}{content}{t.normal}"
|
||||
if "tlf_cdomaine" in classes:
|
||||
content = f"{t.red}{content}{t.normal}"
|
||||
if "tlf_csyntagme" in classes:
|
||||
content = f"{t.green}{content}{t.normal}"
|
||||
if tag.name == "b":
|
||||
content = f"{t.bold}{content}{t.normal}"
|
||||
if tag.name == "i":
|
||||
content = f"{t.italic}{content}{t.no_italic}"
|
||||
return content
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in a new issue