2022-05-19 14:34:41 +02:00
|
|
|
import random
|
2022-05-19 14:12:33 +02:00
|
|
|
import urllib.parse
|
2022-09-03 20:43:56 +02:00
|
|
|
from typing import cast, Optional
|
2022-05-19 14:12:33 +02:00
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
from edmond.plugin import Plugin
|
2022-09-03 20:43:56 +02:00
|
|
|
from edmond.plugins.shrlok import ShrlokPlugin
|
2022-05-19 14:12:33 +02:00
|
|
|
|
|
|
|
BASE_URL = "https://taxref.mnhn.fr/api"
|
2022-07-10 18:25:40 +02:00
|
|
|
IMG_FETCH_HTML = """\
|
|
|
|
<!doctype html>
|
|
|
|
<html>
|
2022-09-02 18:36:41 +02:00
|
|
|
<head>
|
|
|
|
<meta charset="UTF-8"/>
|
|
|
|
<style>img {{ display: block; max-width: 95%; }}</style>
|
|
|
|
</head>
|
2022-07-10 18:25:40 +02:00
|
|
|
<body></body>
|
|
|
|
<script>
|
|
|
|
const urls = [{}];
|
|
|
|
urls.forEach(url => {{
|
|
|
|
fetch(url)
|
|
|
|
.then(r => r.blob())
|
|
|
|
.then(blob => {{
|
|
|
|
let img = document.createElement("img");
|
|
|
|
img.src = window.URL.createObjectURL(blob);
|
|
|
|
document.body.appendChild(img);
|
|
|
|
}});
|
|
|
|
}});
|
|
|
|
</script>
|
|
|
|
</html>
|
|
|
|
"""
|
2022-05-19 14:12:33 +02:00
|
|
|
|
|
|
|
|
|
|
|
class TaxrefPlugin(Plugin):
|
|
|
|
|
|
|
|
REQUIRED_CONFIGS = [
|
2022-08-09 23:47:28 +02:00
|
|
|
"commands",
|
|
|
|
"not_found_reply",
|
|
|
|
"reply",
|
|
|
|
"ambiguous_reply",
|
|
|
|
"unnamed_species",
|
2022-05-19 14:12:33 +02:00
|
|
|
]
|
|
|
|
|
|
|
|
def __init__(self, bot):
|
|
|
|
super().__init__(bot)
|
|
|
|
|
|
|
|
def on_pubmsg(self, event):
|
|
|
|
if not self.should_handle_command(event.arguments[0]):
|
|
|
|
return False
|
|
|
|
|
2022-07-06 17:18:28 +02:00
|
|
|
# "taxref"
|
2022-05-19 14:12:33 +02:00
|
|
|
if self.command.ident == self.config["commands"][0]:
|
|
|
|
self.search_by_name(self.command.content, event.target)
|
2022-07-06 17:18:28 +02:00
|
|
|
# "scientifize"
|
|
|
|
if self.command.ident == self.config["commands"][1]:
|
|
|
|
self.find_scientific_name(self.command.content, event.target)
|
2022-05-19 14:12:33 +02:00
|
|
|
return True
|
|
|
|
|
2022-09-02 18:36:41 +02:00
|
|
|
def search_by_name(self, name: str, target: str) -> None:
|
2022-07-06 17:18:28 +02:00
|
|
|
"""Get species data from a scientific name.
|
|
|
|
|
|
|
|
Try to disambiguate the results by focusing on species only and their
|
|
|
|
scientific name.
|
|
|
|
"""
|
2022-06-16 16:59:58 +02:00
|
|
|
name = name.lower()
|
|
|
|
enc_name = urllib.parse.quote(name)
|
|
|
|
url = (
|
|
|
|
f"{BASE_URL}/taxa/search?scientificNames={enc_name}"
|
|
|
|
"&page=1&size=100"
|
|
|
|
)
|
2022-05-19 14:12:33 +02:00
|
|
|
response = requests.get(url)
|
|
|
|
if response.status_code != 200:
|
|
|
|
self.signal_failure(target)
|
|
|
|
return
|
2022-09-12 12:37:32 +02:00
|
|
|
try:
|
|
|
|
data = response.json()
|
|
|
|
except ValueError:
|
|
|
|
self.signal_failure(target)
|
|
|
|
return
|
|
|
|
|
2022-05-19 14:12:33 +02:00
|
|
|
items = data.get("_embedded", {}).get("taxa", [])
|
|
|
|
|
|
|
|
if not items:
|
|
|
|
self.bot.say(target, self.config["not_found_reply"])
|
|
|
|
return
|
|
|
|
|
|
|
|
if len(items) == 1:
|
|
|
|
# Only one result: use it.
|
|
|
|
item_to_use = items[0]
|
|
|
|
else:
|
|
|
|
# More than one result: if the results contain a corresponding
|
|
|
|
# species, use it, else return names for sub-species etc.
|
|
|
|
species_items = []
|
|
|
|
for item in items:
|
|
|
|
if item["rankId"] == "ES":
|
|
|
|
species_items.append(item)
|
2022-06-16 16:59:58 +02:00
|
|
|
num_species = len(species_items)
|
|
|
|
self.bot.log_d(f"{num_species} species.")
|
|
|
|
if num_species == 1:
|
2022-05-19 14:12:33 +02:00
|
|
|
item_to_use = species_items[0]
|
|
|
|
else:
|
2022-06-16 16:59:58 +02:00
|
|
|
# If there are several species, check if one of them has the
|
|
|
|
# exact same name; else show an ambiguous reply.
|
|
|
|
species_with_same_name = [
|
2022-08-09 23:47:28 +02:00
|
|
|
item
|
|
|
|
for item in species_items
|
2022-06-16 16:59:58 +02:00
|
|
|
if item["scientificName"].lower() == name
|
|
|
|
]
|
|
|
|
if len(species_with_same_name) != 1:
|
2022-07-06 17:18:28 +02:00
|
|
|
reply = self.get_ambiguous_reply(species_items)
|
|
|
|
self.bot.say(target, reply)
|
2022-06-16 16:59:58 +02:00
|
|
|
return
|
|
|
|
item_to_use = species_with_same_name[0]
|
|
|
|
|
|
|
|
unnamed = self.config["unnamed_species"]
|
2022-05-19 14:12:33 +02:00
|
|
|
reply = self.config["reply"].format(
|
|
|
|
sci_name=item_to_use["scientificName"],
|
2022-06-16 16:59:58 +02:00
|
|
|
fr_name=item_to_use["frenchVernacularName"] or unnamed,
|
2022-05-19 14:12:33 +02:00
|
|
|
family=item_to_use["familyName"],
|
|
|
|
cd_nom=item_to_use["id"],
|
|
|
|
cd_ref=item_to_use["referenceId"],
|
|
|
|
)
|
|
|
|
self.bot.say(target, reply)
|
2022-05-19 14:34:41 +02:00
|
|
|
|
2022-08-09 23:47:28 +02:00
|
|
|
if images_reply := self.get_images_reply(item_to_use):
|
2022-07-06 17:18:28 +02:00
|
|
|
self.bot.say(target, images_reply)
|
2022-06-16 16:59:58 +02:00
|
|
|
|
2022-09-02 18:36:41 +02:00
|
|
|
def get_ambiguous_reply(self, items) -> str:
|
2022-06-16 16:59:58 +02:00
|
|
|
"""Show a reply with potential species."""
|
|
|
|
reply = self.config["ambiguous_reply"]
|
|
|
|
append = ""
|
|
|
|
if len(items) > 5:
|
|
|
|
append = f"… (+{len(items)})"
|
|
|
|
items = items[:5]
|
|
|
|
reply += ", ".join(item["scientificName"] for item in items)
|
|
|
|
if append:
|
|
|
|
reply += append
|
2022-07-06 17:18:28 +02:00
|
|
|
return reply
|
2022-06-16 16:59:58 +02:00
|
|
|
|
2022-09-02 18:36:41 +02:00
|
|
|
def get_images_reply(self, item) -> Optional[str]:
|
2022-07-06 17:18:28 +02:00
|
|
|
"""If there are media available, return one in a message.
|
|
|
|
|
2022-07-10 18:25:40 +02:00
|
|
|
If shrlok is available, return a link to an HTML page shared by shrlok.
|
|
|
|
The HTML page, whose source code is generated from the template
|
|
|
|
IMG_FETCH_HTML, fetches a random sample of 1 to 10 images from the
|
|
|
|
results and embed the images directly into the page so it is not
|
|
|
|
necessary to download the images before seeing them.
|
|
|
|
|
|
|
|
If shrlok is not available, return a string with an URL to an image if
|
|
|
|
one is available, or None if no image could be found or we encountered
|
|
|
|
an error. The image is selected randomly. Yes, media links on TAXREF
|
|
|
|
are downloaded by the browser and not shown directly, thus the benefits
|
|
|
|
of having shrlok available.
|
2022-07-06 17:18:28 +02:00
|
|
|
"""
|
|
|
|
m_url = item.get("_links", {}).get("media", {}).get("href")
|
2022-05-19 14:34:41 +02:00
|
|
|
if not m_url:
|
2022-09-02 18:36:41 +02:00
|
|
|
self.bot.log_d("No media links.")
|
2022-07-06 17:18:28 +02:00
|
|
|
return None
|
2022-05-19 14:34:41 +02:00
|
|
|
response = requests.get(m_url)
|
2022-09-02 18:36:41 +02:00
|
|
|
if (code := response.status_code) != 200:
|
|
|
|
self.bot.log_d(f"Failed to reach media link ({code}).")
|
2022-07-06 17:18:28 +02:00
|
|
|
return None
|
2022-05-19 14:34:41 +02:00
|
|
|
media_data = response.json()
|
|
|
|
items = media_data.get("_embedded", {}).get("media", [])
|
|
|
|
if not items:
|
2022-09-02 18:36:41 +02:00
|
|
|
self.bot.log_d("No media found in response.")
|
2022-07-06 17:18:28 +02:00
|
|
|
return None
|
2022-05-19 14:34:41 +02:00
|
|
|
|
2022-09-02 18:36:41 +02:00
|
|
|
def get_img_url(item) -> Optional[str]:
|
2022-07-10 18:25:40 +02:00
|
|
|
return item.get("_links", {}).get("file", {}).get("href")
|
|
|
|
|
2022-09-03 20:43:56 +02:00
|
|
|
if shrlok := cast(ShrlokPlugin, self.bot.get_plugin("shrlok")):
|
2022-07-10 18:25:40 +02:00
|
|
|
if len(items) > 10:
|
|
|
|
items = random.sample(items, 10)
|
|
|
|
urls = map(get_img_url, items)
|
|
|
|
urls_text = ",".join(map(lambda url: f'"{url}"', urls))
|
2022-09-03 20:43:56 +02:00
|
|
|
html = IMG_FETCH_HTML.format(urls_text).encode()
|
|
|
|
link = shrlok.post({"type": "raw", "ext": "html"}, html)
|
2022-09-02 18:36:41 +02:00
|
|
|
if not link:
|
|
|
|
self.bot.log_d("shrlok plugin returned an empty string.")
|
2022-07-10 18:25:40 +02:00
|
|
|
else:
|
|
|
|
link = get_img_url(random.choice(items))
|
2022-09-02 18:36:41 +02:00
|
|
|
if not link:
|
|
|
|
self.bot.log_d("No link found.")
|
2022-07-10 18:25:40 +02:00
|
|
|
|
|
|
|
if link:
|
|
|
|
return "📷 " + link
|
2022-09-02 18:36:41 +02:00
|
|
|
return None
|
2022-07-06 17:18:28 +02:00
|
|
|
|
2022-09-02 18:36:41 +02:00
|
|
|
def find_scientific_name(self, name: str, target: str):
|
2022-07-06 17:18:28 +02:00
|
|
|
"""Find a corresponding scientific name for a vernacular name."""
|
|
|
|
name = name.lower()
|
|
|
|
enc_name = urllib.parse.quote(name)
|
|
|
|
url = (
|
|
|
|
f"{BASE_URL}/taxa/search?frenchVernacularNames={enc_name}"
|
|
|
|
"&page=1&size=100"
|
|
|
|
)
|
|
|
|
response = requests.get(url)
|
|
|
|
if response.status_code != 200:
|
|
|
|
self.signal_failure(target)
|
|
|
|
return
|
2022-09-12 12:37:32 +02:00
|
|
|
try:
|
|
|
|
data = response.json()
|
|
|
|
except ValueError:
|
|
|
|
self.signal_failure(target)
|
|
|
|
return
|
2022-07-06 17:18:28 +02:00
|
|
|
items = data.get("_embedded", {}).get("taxa", [])
|
|
|
|
|
|
|
|
if not items:
|
|
|
|
self.bot.say(target, self.config["not_found_reply"])
|
2022-05-19 14:34:41 +02:00
|
|
|
return
|
2022-07-06 17:18:28 +02:00
|
|
|
|
|
|
|
if len(items) == 1:
|
|
|
|
# Only one result: use it.
|
2022-07-07 19:07:50 +02:00
|
|
|
reply = TaxrefPlugin.item_to_full_name(items[0])
|
2022-07-06 17:18:28 +02:00
|
|
|
else:
|
|
|
|
# More than one result? For simplicity sake, use the shrlok plugin
|
|
|
|
# if available or just show an ambiguous response.
|
2022-09-03 20:43:56 +02:00
|
|
|
if shrlok := cast(ShrlokPlugin, self.bot.get_plugin("shrlok")):
|
2022-08-09 23:47:28 +02:00
|
|
|
text = (
|
|
|
|
"\n".join(
|
|
|
|
(
|
|
|
|
item["frenchVernacularName"]
|
|
|
|
+ " → "
|
|
|
|
+ TaxrefPlugin.item_to_full_name(item)
|
|
|
|
)
|
|
|
|
for item in items
|
2022-07-07 19:07:50 +02:00
|
|
|
)
|
2022-08-09 23:47:28 +02:00
|
|
|
+ "\n"
|
|
|
|
)
|
2022-09-03 20:43:56 +02:00
|
|
|
reply = shrlok.post({"type": "txt"}, text.encode())
|
2022-07-06 17:18:28 +02:00
|
|
|
else:
|
|
|
|
reply = self.get_ambiguous_reply(items)
|
|
|
|
|
|
|
|
self.bot.say(target, reply)
|
2022-07-07 19:07:50 +02:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def item_to_full_name(item):
|
|
|
|
family_name = item.get("familyName")
|
|
|
|
sci_name = item.get("scientificName")
|
|
|
|
return f"{family_name} {sci_name}"
|