import random import urllib.parse from typing import cast, Optional import requests from edmond.plugin import Plugin from edmond.plugins.shrlok import ShrlokPlugin BASE_URL = "https://taxref.mnhn.fr/api" IMG_FETCH_HTML = """\ """ class TaxrefPlugin(Plugin): REQUIRED_CONFIGS = [ "commands", "not_found_reply", "reply", "ambiguous_reply", "unnamed_species", ] def __init__(self, bot): super().__init__(bot) def on_pubmsg(self, event): if not self.should_handle_command(event.arguments[0]): return False # "taxref" if self.command.ident == self.config["commands"][0]: self.search_by_name(self.command.content, event.target) # "scientifize" if self.command.ident == self.config["commands"][1]: self.find_scientific_name(self.command.content, event.target) return True def search_by_name(self, name: str, target: str) -> None: """Get species data from a scientific name. Try to disambiguate the results by focusing on species only and their scientific name. """ name = name.lower() enc_name = urllib.parse.quote(name) url = ( f"{BASE_URL}/taxa/search?scientificNames={enc_name}" "&page=1&size=100" ) response = requests.get(url) if response.status_code != 200: self.signal_failure(target) return try: data = response.json() except ValueError: self.signal_failure(target) return items = data.get("_embedded", {}).get("taxa", []) if not items: self.bot.say(target, self.config["not_found_reply"]) return if len(items) == 1: # Only one result: use it. item_to_use = items[0] else: # More than one result: if the results contain a corresponding # species, use it, else return names for sub-species etc. species_items = [] for item in items: if item["rankId"] == "ES": species_items.append(item) num_species = len(species_items) self.bot.log_d(f"{num_species} species.") if num_species == 1: item_to_use = species_items[0] else: # If there are several species, check if one of them has the # exact same name; else show an ambiguous reply. species_with_same_name = [ item for item in species_items if item["scientificName"].lower() == name ] if len(species_with_same_name) != 1: reply = self.get_ambiguous_reply(species_items) self.bot.say(target, reply) return item_to_use = species_with_same_name[0] unnamed = self.config["unnamed_species"] reply = self.config["reply"].format( sci_name=item_to_use["scientificName"], fr_name=item_to_use["frenchVernacularName"] or unnamed, family=item_to_use["familyName"], cd_nom=item_to_use["id"], cd_ref=item_to_use["referenceId"], ) self.bot.say(target, reply) if images_reply := self.get_images_reply(item_to_use): self.bot.say(target, images_reply) def get_ambiguous_reply(self, items) -> str: """Show a reply with potential species.""" reply = self.config["ambiguous_reply"] append = "" if len(items) > 5: append = f"… (+{len(items)})" items = items[:5] reply += ", ".join(item["scientificName"] for item in items) if append: reply += append return reply def get_images_reply(self, item) -> Optional[str]: """If there are media available, return one in a message. If shrlok is available, return a link to an HTML page shared by shrlok. The HTML page, whose source code is generated from the template IMG_FETCH_HTML, fetches a random sample of 1 to 10 images from the results and embed the images directly into the page so it is not necessary to download the images before seeing them. If shrlok is not available, return a string with an URL to an image if one is available, or None if no image could be found or we encountered an error. The image is selected randomly. Yes, media links on TAXREF are downloaded by the browser and not shown directly, thus the benefits of having shrlok available. """ m_url = item.get("_links", {}).get("media", {}).get("href") if not m_url: self.bot.log_d("No media links.") return None response = requests.get(m_url) if (code := response.status_code) != 200: self.bot.log_d(f"Failed to reach media link ({code}).") return None media_data = response.json() items = media_data.get("_embedded", {}).get("media", []) if not items: self.bot.log_d("No media found in response.") return None def get_img_url(item) -> Optional[str]: return item.get("_links", {}).get("file", {}).get("href") if shrlok := cast(ShrlokPlugin, self.bot.get_plugin("shrlok")): if len(items) > 10: items = random.sample(items, 10) urls = map(get_img_url, items) urls_text = ",".join(map(lambda url: f'"{url}"', urls)) html = IMG_FETCH_HTML.format(urls_text).encode() link = shrlok.post({"type": "raw", "ext": "html"}, html) if not link: self.bot.log_d("shrlok plugin returned an empty string.") else: link = get_img_url(random.choice(items)) if not link: self.bot.log_d("No link found.") if link: return "📷 " + link return None def find_scientific_name(self, name: str, target: str): """Find a corresponding scientific name for a vernacular name.""" name = name.lower() enc_name = urllib.parse.quote(name) url = ( f"{BASE_URL}/taxa/search?frenchVernacularNames={enc_name}" "&page=1&size=100" ) response = requests.get(url) if response.status_code != 200: self.signal_failure(target) return try: data = response.json() except ValueError: self.signal_failure(target) return items = data.get("_embedded", {}).get("taxa", []) if not items: self.bot.say(target, self.config["not_found_reply"]) return if len(items) == 1: # Only one result: use it. reply = TaxrefPlugin.item_to_full_name(items[0]) else: # More than one result? For simplicity sake, use the shrlok plugin # if available or just show an ambiguous response. if shrlok := cast(ShrlokPlugin, self.bot.get_plugin("shrlok")): text = ( "\n".join( ( item["frenchVernacularName"] + " → " + TaxrefPlugin.item_to_full_name(item) ) for item in items ) + "\n" ) reply = shrlok.post({"type": "txt"}, text.encode()) else: reply = self.get_ambiguous_reply(items) self.bot.say(target, reply) @staticmethod def item_to_full_name(item): family_name = item.get("familyName") sci_name = item.get("scientificName") return f"{family_name} {sci_name}"