Edm0nd/edmond/plugins/taxref.py

246 lines
8.4 KiB
Python
Raw Normal View History

import random
2022-05-19 14:12:33 +02:00
import urllib.parse
from typing import cast, Optional
2022-05-19 14:12:33 +02:00
import requests
from edmond.plugin import Plugin
from edmond.plugins.shrlok import ShrlokPlugin
2022-05-19 14:12:33 +02:00
BASE_URL = "https://taxref.mnhn.fr/api"
2022-07-10 18:25:40 +02:00
IMG_FETCH_HTML = """\
<!doctype html>
<html>
<head>
<meta charset="UTF-8"/>
<style>img {{ display: block; max-width: 95%; }}</style>
</head>
2022-07-10 18:25:40 +02:00
<body></body>
<script>
const urls = [{}];
urls.forEach(url => {{
fetch(url)
.then(r => r.blob())
.then(blob => {{
let img = document.createElement("img");
img.src = window.URL.createObjectURL(blob);
document.body.appendChild(img);
}});
}});
</script>
</html>
"""
2022-05-19 14:12:33 +02:00
class TaxrefPlugin(Plugin):
REQUIRED_CONFIGS = [
"commands",
"not_found_reply",
"reply",
"ambiguous_reply",
"unnamed_species",
2022-05-19 14:12:33 +02:00
]
def __init__(self, bot):
super().__init__(bot)
def on_pubmsg(self, event):
if not self.should_handle_command(event.arguments[0]):
return False
# "taxref"
2022-05-19 14:12:33 +02:00
if self.command.ident == self.config["commands"][0]:
self.search_by_name(self.command.content, event.target)
# "scientifize"
if self.command.ident == self.config["commands"][1]:
self.find_scientific_name(self.command.content, event.target)
2022-05-19 14:12:33 +02:00
return True
def search_by_name(self, name: str, target: str) -> None:
"""Get species data from a scientific name.
Try to disambiguate the results by focusing on species only and their
scientific name.
"""
name = name.lower()
enc_name = urllib.parse.quote(name)
url = (
f"{BASE_URL}/taxa/search?scientificNames={enc_name}"
"&page=1&size=100"
)
2022-05-19 14:12:33 +02:00
response = requests.get(url)
if response.status_code != 200:
self.signal_failure(target)
return
2022-09-12 12:37:32 +02:00
try:
data = response.json()
except ValueError:
self.signal_failure(target)
return
2022-05-19 14:12:33 +02:00
items = data.get("_embedded", {}).get("taxa", [])
if not items:
self.bot.say(target, self.config["not_found_reply"])
return
if len(items) == 1:
# Only one result: use it.
item_to_use = items[0]
else:
# More than one result: if the results contain a corresponding
# species, use it, else return names for sub-species etc.
species_items = []
for item in items:
if item["rankId"] == "ES":
species_items.append(item)
num_species = len(species_items)
self.bot.log_d(f"{num_species} species.")
if num_species == 1:
2022-05-19 14:12:33 +02:00
item_to_use = species_items[0]
else:
# If there are several species, check if one of them has the
# exact same name; else show an ambiguous reply.
species_with_same_name = [
item
for item in species_items
if item["scientificName"].lower() == name
]
if len(species_with_same_name) != 1:
reply = self.get_ambiguous_reply(species_items)
self.bot.say(target, reply)
return
item_to_use = species_with_same_name[0]
unnamed = self.config["unnamed_species"]
2022-05-19 14:12:33 +02:00
reply = self.config["reply"].format(
sci_name=item_to_use["scientificName"],
fr_name=item_to_use["frenchVernacularName"] or unnamed,
2022-05-19 14:12:33 +02:00
family=item_to_use["familyName"],
cd_nom=item_to_use["id"],
cd_ref=item_to_use["referenceId"],
)
self.bot.say(target, reply)
if images_reply := self.get_images_reply(item_to_use):
self.bot.say(target, images_reply)
def get_ambiguous_reply(self, items) -> str:
"""Show a reply with potential species."""
reply = self.config["ambiguous_reply"]
append = ""
if len(items) > 5:
append = f"… (+{len(items)})"
items = items[:5]
reply += ", ".join(item["scientificName"] for item in items)
if append:
reply += append
return reply
def get_images_reply(self, item) -> Optional[str]:
"""If there are media available, return one in a message.
2022-07-10 18:25:40 +02:00
If shrlok is available, return a link to an HTML page shared by shrlok.
The HTML page, whose source code is generated from the template
IMG_FETCH_HTML, fetches a random sample of 1 to 10 images from the
results and embed the images directly into the page so it is not
necessary to download the images before seeing them.
If shrlok is not available, return a string with an URL to an image if
one is available, or None if no image could be found or we encountered
an error. The image is selected randomly. Yes, media links on TAXREF
are downloaded by the browser and not shown directly, thus the benefits
of having shrlok available.
"""
m_url = item.get("_links", {}).get("media", {}).get("href")
if not m_url:
self.bot.log_d("No media links.")
return None
response = requests.get(m_url)
if (code := response.status_code) != 200:
self.bot.log_d(f"Failed to reach media link ({code}).")
return None
media_data = response.json()
items = media_data.get("_embedded", {}).get("media", [])
if not items:
self.bot.log_d("No media found in response.")
return None
def get_img_url(item) -> Optional[str]:
2022-07-10 18:25:40 +02:00
return item.get("_links", {}).get("file", {}).get("href")
if shrlok := cast(ShrlokPlugin, self.bot.get_plugin("shrlok")):
2022-07-10 18:25:40 +02:00
if len(items) > 10:
items = random.sample(items, 10)
urls = map(get_img_url, items)
urls_text = ",".join(map(lambda url: f'"{url}"', urls))
html = IMG_FETCH_HTML.format(urls_text).encode()
link = shrlok.post({"type": "raw", "ext": "html"}, html)
if not link:
self.bot.log_d("shrlok plugin returned an empty string.")
2022-07-10 18:25:40 +02:00
else:
link = get_img_url(random.choice(items))
if not link:
self.bot.log_d("No link found.")
2022-07-10 18:25:40 +02:00
if link:
return "📷 " + link
return None
def find_scientific_name(self, name: str, target: str):
"""Find a corresponding scientific name for a vernacular name."""
name = name.lower()
enc_name = urllib.parse.quote(name)
url = (
f"{BASE_URL}/taxa/search?frenchVernacularNames={enc_name}"
"&page=1&size=100"
)
response = requests.get(url)
if response.status_code != 200:
self.signal_failure(target)
return
2022-09-12 12:37:32 +02:00
try:
data = response.json()
except ValueError:
self.signal_failure(target)
return
items = data.get("_embedded", {}).get("taxa", [])
if not items:
self.bot.say(target, self.config["not_found_reply"])
return
if len(items) == 1:
# Only one result: use it.
2022-07-07 19:07:50 +02:00
reply = TaxrefPlugin.item_to_full_name(items[0])
else:
# More than one result? For simplicity sake, use the shrlok plugin
# if available or just show an ambiguous response.
if shrlok := cast(ShrlokPlugin, self.bot.get_plugin("shrlok")):
text = (
"\n".join(
(
item["frenchVernacularName"]
+ ""
+ TaxrefPlugin.item_to_full_name(item)
)
for item in items
2022-07-07 19:07:50 +02:00
)
+ "\n"
)
reply = shrlok.post({"type": "txt"}, text.encode())
2022-11-29 13:00:10 +01:00
if not reply:
self.bot.log_d("shrlok plugin returned an empty string.")
return
else:
reply = self.get_ambiguous_reply(items)
self.bot.say(target, reply)
2022-07-07 19:07:50 +02:00
@staticmethod
2022-11-29 13:00:10 +01:00
def item_to_full_name(item: dict) -> str:
2022-07-07 19:07:50 +02:00
family_name = item.get("familyName")
sci_name = item.get("scientificName")
return f"{family_name} {sci_name}"