From b76088ca77e879cfc3ed04cc00f1707d2a10ece7 Mon Sep 17 00:00:00 2001 From: dece Date: Sun, 31 Oct 2021 21:20:32 +0100 Subject: [PATCH] wordreference: add script --- wordreference.py | 250 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 250 insertions(+) create mode 100644 wordreference.py diff --git a/wordreference.py b/wordreference.py new file mode 100644 index 0000000..c17cd7a --- /dev/null +++ b/wordreference.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 +"""Translate words from the terminal using WordReference. Licence WTFPLv2. + +As the website frontend is rather stable now it should not break completely but +weird thing could show up on some cases I missed; tell me if you find a bug! + +Requires requests and beautifulsoup4 on your system; the Debian packages for +both are fine. + +If colorama is installed on your system (it often is for some reason), the +output will be colored; else it will still properly display text. +""" + +import argparse +import dataclasses +import enum +import urllib.parse +from shutil import which + +import requests +from bs4 import BeautifulSoup, NavigableString + +class DummyColorama: + def __getattr__(self, _): + return "" + +HAS_COLORAMA = True +Fore = None +Style = None + +try: + import colorama +except ImportError: + HAS_COLORAMA = False + Fore = DummyColorama() + Style = DummyColorama() + +URL = "https://www.wordreference.com" + +MeaningType = enum.Enum("MeaningType", "MAIN ADD COMPOUND") + +@dataclasses.dataclass +class Translation: + desc: str + nature: str + precision: str = "" + +@dataclasses.dataclass +class Meaning: + ident: str + mtype: MeaningType + original: str = "" + nature: str = "" + desc: list[str] = dataclasses.field(default_factory=list) + ex: list[str] = dataclasses.field(default_factory=list) + trans: list[Translation] = dataclasses.field(default_factory=list) + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("lang", help="4-letter code, e.g. 'fren' or 'enfr'") + ap.add_argument("words", nargs="+", help="word or words to translate") + ap.add_argument("-s", "--suggestions", action="store_true", + help="show suggestions instead of translations") + ap.add_argument("-C", "--no-color", action="store_true", + help="disable colors") + args = ap.parse_args() + + global Fore, Style + if args.no_color and HAS_COLORAMA: + Fore = DummyColorama() + Style = DummyColorama() + else: + Fore = colorama.Fore + Style = colorama.Style + colorama.init() + + lang = args.lang + words = " ".join(args.words) + + if args.suggestions: + get_suggestions(lang, words) + else: + get_translations(lang, words) + +def get_translations(lang, words): + """Get translations for these words.""" + encoded_words = urllib.parse.quote(words) + response = requests.get(f"{URL}/{lang}/{encoded_words}") + if response.status_code != 200: + exit("Could not connect to WordReference.") + + soup = BeautifulSoup(response.text, "html.parser") + article = soup.find(id="articleWRD") + + meanings = [] + for table in article.find_all("table"): + # Discard error tables. + if "WRD" not in table.get("class"): + continue + + top_row = table.find("tr", class_="wrtopsection") + ph_span = top_row.find("span", class_="ph") + if ph_span: + # Main meanings + if ph_span.get("data-ph") == "sMainMeanings": + parse_rows(table, meanings, MeaningType.MAIN) + + # Additional translations + if ph_span.get("data-ph") == "sAddTrans": + parse_rows(table, meanings, MeaningType.ADD) + + # Compound forms + if table.get("id") == "compound_forms": + parse_rows(table, meanings, MeaningType.COMPOUND) + + for meaning in meanings: + print_meaning(meaning) + +def parse_rows(table, meanings, mtype): + """Parse all meaningful rows of this table and store results in meanings.""" + meaning = None + for row in table.find_all("tr"): + # Discard rows that aren't meanings. + row_classes = row.get("class") + if all(c not in row_classes for c in ("even", "odd")): + continue + + # New meaning start with a row that has an ID. + new_meaning_row = False + if (meaning_id := row.get("id")): + if meaning: + meanings.append(meaning) + meaning = Meaning(ident=meaning_id, mtype=mtype) + new_meaning_row = True + + cells = row.find_all("td") + + # Rows with 3 cells are definitions or complementary meanings. + if len(cells) == 3: + lcell, ccell, rcell = cells + + # For new meanings, use the left cell info. + if new_meaning_row: + meaning.original = lcell.strong.text + if (nature_elements := lcell.em.contents): + meaning.nature = nature_elements[0] + + # Each 3-cell row is a translation. + trans_desc = [] + for content in rcell.contents: + if isinstance(content, NavigableString): + trans_desc.append(content.strip()) + elif "POS2" not in (content.get("class") or []): + trans_desc.append(content.text) + translation = Translation( + desc=" ".join(trans_desc), + nature=rcell.contents[-1].contents[0], + ) + + # Center cell mixes original description and translation info… + for child in ccell.children: + # "dsense" classes are for this specific translation, + # not the current "row-group" meaning. + if not isinstance(child, NavigableString): + if "dsense" in (child.get("class") or []): + translation.precision += child.text + elif (text := child.text.strip()): + meaning.desc.append(text) + elif (text := str(child).strip()): + meaning.desc.append(text) + meaning.trans.append(translation) + + # Rows with 2 cells are examples. + else: + example_cell = cells[-1] + meaning.ex.append(example_cell.span.text) + + if meaning: + meanings.append(meaning) + +def print_meaning(meaning): + """Print a few formatted lines for this meaning.""" + meaning_colors = { + MeaningType.MAIN: Fore.GREEN, + MeaningType.ADD: Fore.CYAN, + MeaningType.COMPOUND: Fore.MAGENTA, + } + + # First line contains the original word and its definition. + first_line = ( + meaning_colors[meaning.mtype] + + f"{Style.BRIGHT}{meaning.original}{Style.NORMAL}{Fore.RESET} " + ) + if meaning.nature: + first_line += f"{Style.DIM}({meaning.nature}){Style.NORMAL} " + first_line += " ".join(meaning.desc) + print(first_line) + # Each translation is on its own line. + for trans in meaning.trans: + print( + f"— {trans.desc} " + + f"{Style.DIM}({trans.nature}) {trans.precision}{Style.NORMAL}") + # Show examples on different, dimmed line. + for example in meaning.ex: + print(f" {Style.DIM}e.g. {example}{Style.NORMAL}") + +AUTOCOMP_URL = f"{URL}/2012/autocomplete/autocomplete.aspx" + +def get_suggestions(lang, words): + """Show completion suggestions for these words.""" + params = {"dict": lang, "query": words} + response = requests.get(AUTOCOMP_URL, params=params) + if response.status_code != 200: + exit("Could not connect to WordReference.") + + # The response is rows of tab-separated values. 1st record is the word + # itself, 2nd is its language. The 3rd is an integer that I guess matches + # the word popularity or a similarity score to the query… anyway it can be + # used for sorting. 4th record is 0 or 1 if the word has conjugation + # available. + suggestions = ( + line.rstrip().split("\t") + for line in response.text.splitlines() + ) + + # If FZF is available, let the user pick a word to perform the search. + if (fzf := which("fzf")): + from subprocess import CalledProcessError, PIPE, Popen + process = Popen([fzf], stdin=PIPE, stdout=PIPE) + input_data = "\n".join( + f"{word} [{wlang}, {pop}, {conj}]" + for word, wlang, pop, conj in suggestions + ).encode() + try: + stdout, _ = process.communicate(input_data) + except CalledProcessError: + exit("Could not call FZF.") + result = stdout.decode().split("[", maxsplit=1)[0] + get_translations(lang, result) + # Else just display the suggestions with information. + else: + for word, wlang, pop, conj in suggestions: + output = f"[{wlang}] {word} ({pop})" + if conj == "1": + output += " (conj.)" + print(output) + + +if __name__ == "__main__": + main()