275 lines
8.6 KiB
Python
Executable file
275 lines
8.6 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""Translate words from the terminal using WordReference. Licence WTFPLv2.
|
|
|
|
As the website frontend is rather stable now it should not break completely but
|
|
weird thing could show up on some cases I missed; tell me if you find a bug!
|
|
|
|
Requires requests and beautifulsoup4 on your system; the Debian packages for
|
|
both are fine.
|
|
|
|
If colorama is installed on your system (it often is for some reason), the
|
|
output will be colored; else it will still properly display text.
|
|
"""
|
|
|
|
import argparse
|
|
import dataclasses
|
|
import enum
|
|
import urllib.parse
|
|
from shutil import which
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup, NavigableString
|
|
|
|
|
|
class DummyColorama:
|
|
def __getattr__(self, _):
|
|
return ""
|
|
|
|
|
|
HAS_COLORAMA = True
|
|
Fore = None
|
|
Style = None
|
|
|
|
try:
|
|
import colorama
|
|
except ImportError:
|
|
HAS_COLORAMA = False
|
|
Fore = DummyColorama()
|
|
Style = DummyColorama()
|
|
|
|
URL = "https://www.wordreference.com"
|
|
|
|
MeaningType = enum.Enum("MeaningType", "MAIN ADD COMPOUND")
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class Translation:
|
|
desc: str
|
|
nature: str
|
|
precision: str = ""
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class Meaning:
|
|
ident: str
|
|
mtype: MeaningType
|
|
original: str = ""
|
|
nature: str = ""
|
|
desc: list[str] = dataclasses.field(default_factory=list)
|
|
ex: list[str] = dataclasses.field(default_factory=list)
|
|
trans: list[Translation] = dataclasses.field(default_factory=list)
|
|
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("lang", help="4-letter code, e.g. 'fren' or 'enfr'")
|
|
ap.add_argument("words", nargs="+", help="word or words to translate")
|
|
ap.add_argument("-s", "--suggestions", action="store_true",
|
|
help="show suggestions instead of translations")
|
|
ap.add_argument("-C", "--no-color", action="store_true",
|
|
help="disable colors")
|
|
args = ap.parse_args()
|
|
|
|
global Fore, Style
|
|
if HAS_COLORAMA and not args.no_color:
|
|
Fore = colorama.Fore
|
|
Style = colorama.Style
|
|
colorama.init()
|
|
else:
|
|
Fore = DummyColorama()
|
|
Style = DummyColorama()
|
|
|
|
lang = args.lang
|
|
words = " ".join(args.words)
|
|
|
|
if args.suggestions:
|
|
get_suggestions(lang, words)
|
|
else:
|
|
get_translations(lang, words)
|
|
|
|
|
|
def get_translations(lang, words):
|
|
"""Get translations for these words."""
|
|
encoded_words = urllib.parse.quote(words)
|
|
response = requests.get(f"{URL}/{lang}/{encoded_words}")
|
|
if response.status_code != 200:
|
|
exit("Could not connect to WordReference.")
|
|
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
article = soup.find(id="articleWRD")
|
|
|
|
meanings = []
|
|
for table in article.find_all("table"):
|
|
# Discard error tables.
|
|
if "WRD" not in table.get("class"):
|
|
continue
|
|
|
|
top_row = table.find("tr", class_="wrtopsection")
|
|
ph_span = top_row.find("span", class_="ph")
|
|
if ph_span:
|
|
# Main meanings
|
|
if ph_span.get("data-ph") == "sMainMeanings":
|
|
parse_rows(table, meanings, MeaningType.MAIN)
|
|
|
|
# Additional translations
|
|
if ph_span.get("data-ph") == "sAddTrans":
|
|
parse_rows(table, meanings, MeaningType.ADD)
|
|
|
|
# Compound forms
|
|
if table.get("id") == "compound_forms":
|
|
parse_rows(table, meanings, MeaningType.COMPOUND)
|
|
|
|
for meaning in meanings:
|
|
print_meaning(meaning)
|
|
|
|
|
|
def parse_rows(table, meanings, mtype):
|
|
"""Parse all good rows of this table and store results in meanings."""
|
|
meaning = None
|
|
for row in table.find_all("tr"):
|
|
# Discard rows that aren't meanings.
|
|
row_classes = row.get("class")
|
|
if all(c not in row_classes for c in ("even", "odd")):
|
|
continue
|
|
|
|
# New meaning start with a row that has an ID.
|
|
is_new_meaning_row = False
|
|
if (meaning_id := row.get("id")):
|
|
if meaning:
|
|
meanings.append(meaning)
|
|
meaning = Meaning(ident=meaning_id, mtype=mtype)
|
|
is_new_meaning_row = True
|
|
|
|
cells = row.find_all("td")
|
|
|
|
# Rows with 3 cells are definitions or complementary meanings.
|
|
if len(cells) == 3:
|
|
parse_common_cells(cells, meaning, is_new_meaning_row)
|
|
# Rows with 2 cells are examples.
|
|
else:
|
|
parse_example_cells(cells, meaning)
|
|
|
|
if meaning:
|
|
meanings.append(meaning)
|
|
|
|
|
|
def parse_common_cells(cells, meaning, is_new_meaning):
|
|
"""Parse common cells: meaning, definition, translations, etc."""
|
|
lcell, ccell, rcell = cells
|
|
|
|
# For new meanings, use the left cell info.
|
|
if is_new_meaning:
|
|
meaning.original = lcell.strong.text
|
|
if (nature_elements := lcell.em.contents):
|
|
meaning.nature = nature_elements[0]
|
|
|
|
# Each 3-cell row is a translation.
|
|
trans_desc = []
|
|
for content in rcell.contents:
|
|
if isinstance(content, NavigableString):
|
|
trans_desc.append(content.strip())
|
|
elif "POS2" not in (content.get("class") or []):
|
|
trans_desc.append(content.text)
|
|
nature = ""
|
|
if (nature_content := rcell.contents[-1]):
|
|
if len(nature_content):
|
|
nature = nature_content.contents[0]
|
|
translation = Translation(desc=" ".join(trans_desc), nature=nature)
|
|
|
|
# Center cell mixes original description and translation info…
|
|
for child in ccell.children:
|
|
# "dsense" classes are for this specific translation,
|
|
# not the current "row-group" meaning.
|
|
if not isinstance(child, NavigableString):
|
|
if "dsense" in (child.get("class") or []):
|
|
translation.precision += child.text
|
|
elif (text := child.text.strip()):
|
|
meaning.desc.append(text)
|
|
elif (text := str(child).strip()):
|
|
meaning.desc.append(text)
|
|
meaning.trans.append(translation)
|
|
|
|
|
|
def parse_example_cells(cells, meaning):
|
|
"""Parse cells of an example line (pretty much just the last one)."""
|
|
last_cell = cells[-1]
|
|
if (span := last_cell.span):
|
|
meaning.ex.append(span.text)
|
|
|
|
|
|
def print_meaning(meaning):
|
|
"""Print a few formatted lines for this meaning."""
|
|
meaning_colors = {
|
|
MeaningType.MAIN: Fore.GREEN,
|
|
MeaningType.ADD: Fore.CYAN,
|
|
MeaningType.COMPOUND: Fore.MAGENTA,
|
|
}
|
|
|
|
# First line contains the original word and its definition.
|
|
first_line = (
|
|
meaning_colors[meaning.mtype] +
|
|
f"{Style.BRIGHT}{meaning.original}{Style.NORMAL}{Fore.RESET} "
|
|
)
|
|
if meaning.nature:
|
|
first_line += f"{Style.DIM}({meaning.nature}){Style.NORMAL} "
|
|
first_line += " ".join(meaning.desc)
|
|
print(first_line)
|
|
# Each translation is on its own line.
|
|
for trans in meaning.trans:
|
|
trans_line = f"— {trans.desc}"
|
|
if trans.nature:
|
|
trans_line += f" {Style.DIM}({trans.nature}){Style.NORMAL}"
|
|
if trans.precision:
|
|
trans_line += f" {Style.DIM}{trans.precision}{Style.NORMAL}"
|
|
print(trans_line)
|
|
# Show examples on different, dimmed line.
|
|
for example in meaning.ex:
|
|
print(f" {Style.DIM}e.g. {example}{Style.NORMAL}")
|
|
|
|
|
|
AUTOCOMP_URL = f"{URL}/2012/autocomplete/autocomplete.aspx"
|
|
|
|
|
|
def get_suggestions(lang, words):
|
|
"""Show completion suggestions for these words."""
|
|
params = {"dict": lang, "query": words}
|
|
response = requests.get(AUTOCOMP_URL, params=params)
|
|
if response.status_code != 200:
|
|
exit("Could not connect to WordReference.")
|
|
|
|
# The response is rows of tab-separated values. 1st record is the word
|
|
# itself, 2nd is its language. The 3rd is an integer that I guess matches
|
|
# the word popularity or a similarity score to the query… anyway it can be
|
|
# used for sorting. 4th record is 0 or 1 if the word has conjugation
|
|
# available.
|
|
suggestions = (
|
|
line.rstrip().split("\t")
|
|
for line in response.text.splitlines()
|
|
)
|
|
|
|
# If FZF is available, let the user pick a word to perform the search.
|
|
if (fzf := which("fzf")):
|
|
from subprocess import CalledProcessError, PIPE, Popen
|
|
process = Popen([fzf], stdin=PIPE, stdout=PIPE)
|
|
input_data = "\n".join(
|
|
f"{word} [{wlang}, {pop}, {conj}]"
|
|
for word, wlang, pop, conj in suggestions
|
|
).encode()
|
|
try:
|
|
stdout, _ = process.communicate(input_data)
|
|
except CalledProcessError:
|
|
exit("Could not call FZF.")
|
|
result = stdout.decode().split("[", maxsplit=1)[0]
|
|
get_translations(lang, result)
|
|
# Else just display the suggestions with information.
|
|
else:
|
|
for word, wlang, pop, conj in suggestions:
|
|
output = f"[{wlang}] {word} ({pop})"
|
|
if conj == "1":
|
|
output += " (conj.)"
|
|
print(output)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|