get-definitions: handle ambiguous and missing defs
This commit is contained in:
parent
d4eb485f45
commit
ba27b4cd9a
|
@ -37,32 +37,55 @@ def get_definitions(form):
|
|||
print(f"Processing {url}")
|
||||
content = get_page(url)
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
process_definition_page(soup, form, 0)
|
||||
# Check other definitions.
|
||||
tabs_bar = soup.find("div", id="vtoolbar")
|
||||
if not tabs_bar:
|
||||
exit("No tabs bar!\n")
|
||||
num_defs = len(tabs_bar("li"))
|
||||
for variant in range(1, num_defs): # skip the first, we got it above.
|
||||
variant_url = url + f"/{variant}"
|
||||
print(f"Processing {variant_url}")
|
||||
content = get_page(variant_url)
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
process_definition_page(soup, form, variant)
|
||||
success = process_definition_page(soup, form, 0)
|
||||
if success:
|
||||
# Check other definitions.
|
||||
tabs_bar = soup.find("div", id="vtoolbar")
|
||||
if not tabs_bar:
|
||||
exit("No tabs bar!\n")
|
||||
num_defs = len(tabs_bar("li"))
|
||||
for variant in range(1, num_defs): # Skip the first, we got it above.
|
||||
variant_url = url + f"/{variant}"
|
||||
print(f"Processing {variant_url}")
|
||||
content = get_page(variant_url)
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
process_definition_page(soup, form, variant)
|
||||
else:
|
||||
# Might be an ambiguity?
|
||||
contentbox = soup.find("div", id="contentbox")
|
||||
if not contentbox:
|
||||
exit("No contentbox!")
|
||||
h2 = contentbox.find("h2")
|
||||
if h2:
|
||||
title = str(h2.string).strip()
|
||||
if title == "Terme introuvable":
|
||||
links = contentbox("a")
|
||||
for link in links:
|
||||
alternative_form = str(link.string).strip()
|
||||
if alternative_form == form:
|
||||
continue
|
||||
get_definitions(alternative_form)
|
||||
elif title == "Erreur":
|
||||
print("Nothing there :O")
|
||||
else:
|
||||
exit(f"Unknown error title: {h2}")
|
||||
else:
|
||||
exit("Bork! no definition nor ambiguity list")
|
||||
|
||||
|
||||
def get_page(url):
|
||||
response = requests.get(url)
|
||||
if response.status_code != 200:
|
||||
sys.stderr.write(f"ERROR {response.status_code} at {url}\n")
|
||||
exit(f"ERROR {response.status_code} at {url}\n")
|
||||
return response.text
|
||||
|
||||
|
||||
def process_definition_page(soup, form, variant):
|
||||
definition = soup.find("div", id="lexicontent")
|
||||
if not definition:
|
||||
exit("No definition!\n")
|
||||
return False
|
||||
save_definition(form, variant, definition.prettify())
|
||||
return True
|
||||
|
||||
|
||||
def save_definition(name, variant, content):
|
||||
|
@ -73,7 +96,7 @@ def save_definition(name, variant, content):
|
|||
with open(def_file_path, "wt") as def_file:
|
||||
def_file.write(content)
|
||||
except OSError as exc:
|
||||
sys.stderr.write(f"Could not save definition at {def_file_path}: {exc}")
|
||||
exit(f"Could not save definition at {def_file_path}: {exc}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Reference in a new issue