Edm0nd/edmond/plugins/taxref.py

import random
import urllib.parse
from typing import cast, Optional

import requests

from edmond.plugin import Plugin
from edmond.plugins.shrlok import ShrlokPlugin

BASE_URL = "https://taxref.mnhn.fr/api"
IMG_FETCH_HTML = """\
<!doctype html>
<html>
  <head>
    <meta charset="UTF-8"/>
    <style>img {{ display: block; max-width: 95%; }}</style>
  </head>
  <body></body>
  <script>
    const urls = [{}];
    urls.forEach(url => {{
      fetch(url)
        .then(r => r.blob())
        .then(blob => {{
          let img = document.createElement("img");
          img.src = window.URL.createObjectURL(blob);
          document.body.appendChild(img);
        }});
    }});
  </script>
</html>
"""


class TaxrefPlugin(Plugin):

    REQUIRED_CONFIGS = [
        "commands",
        "not_found_reply",
        "reply",
        "ambiguous_reply",
        "unnamed_species",
    ]

    def __init__(self, bot):
        super().__init__(bot)

    def on_pubmsg(self, event):
        if not self.should_handle_command(event.arguments[0]):
            return False

        # "taxref"
        if self.command.ident == self.config["commands"][0]:
            self.search_by_name(self.command.content, event.target)
        # "scientifize"
        if self.command.ident == self.config["commands"][1]:
            self.find_scientific_name(self.command.content, event.target)
        return True

    def search_by_name(self, name: str, target: str) -> None:
        """Get species data from a scientific name.

        Try to disambiguate the results by focusing on species only and their
        scientific name.
        """
        name = name.lower()
        enc_name = urllib.parse.quote(name)
        url = (
            f"{BASE_URL}/taxa/search?scientificNames={enc_name}"
            "&page=1&size=100"
        )
        response = requests.get(url)
        if response.status_code != 200:
            self.signal_failure(target)
            return
        try:
            data = response.json()
        except ValueError:
            self.signal_failure(target)
            return

        items = data.get("_embedded", {}).get("taxa", [])

        if not items:
            self.bot.say(target, self.config["not_found_reply"])
            return

        if len(items) == 1:
            # Only one result: use it.
            item_to_use = items[0]
        else:
            # More than one result: if the results contain a corresponding
            # species, use it, else return names for sub-species etc.
            species_items = []
            for item in items:
                if item["rankId"] == "ES":
                    species_items.append(item)
            num_species = len(species_items)
            self.bot.log_d(f"{num_species} species.")
            if num_species == 1:
                item_to_use = species_items[0]
            else:
                # If there are several species, check if one of them has the
                # exact same name; else show an ambiguous reply.
                species_with_same_name = [
                    item
                    for item in species_items
                    if item["scientificName"].lower() == name
                ]
                if len(species_with_same_name) != 1:
                    reply = self.get_ambiguous_reply(species_items)
                    self.bot.say(target, reply)
                    return
                item_to_use = species_with_same_name[0]

        unnamed = self.config["unnamed_species"]
        reply = self.config["reply"].format(
            sci_name=item_to_use["scientificName"],
            fr_name=item_to_use["frenchVernacularName"] or unnamed,
            family=item_to_use["familyName"],
            cd_nom=item_to_use["id"],
            cd_ref=item_to_use["referenceId"],
        )
        self.bot.say(target, reply)

        if images_reply := self.get_images_reply(item_to_use):
            self.bot.say(target, images_reply)

    def get_ambiguous_reply(self, items) -> str:
        """Show a reply with potential species."""
        reply = self.config["ambiguous_reply"]
        append = ""
        if len(items) > 5:
            append = f"… (+{len(items)})"
            items = items[:5]
        reply += ", ".join(item["scientificName"] for item in items)
        if append:
            reply += append
        return reply

    def get_images_reply(self, item) -> Optional[str]:
        """If there are media available, return one in a message.

        If shrlok is available, return a link to an HTML page shared by shrlok.
        The HTML page, whose source code is generated from the template
        IMG_FETCH_HTML, fetches a random sample of 1 to 10 images from the
        results and embed the images directly into the page so it is not
        necessary to download the images before seeing them.

        If shrlok is not available, return a string with an URL to an image if
        one is available, or None if no image could be found or we encountered
        an error. The image is selected randomly. Yes, media links on TAXREF
        are downloaded by the browser and not shown directly, thus the benefits
        of having shrlok available.
        """
        m_url = item.get("_links", {}).get("media", {}).get("href")
        if not m_url:
            self.bot.log_d("No media links.")
            return None
        response = requests.get(m_url)
        if (code := response.status_code) != 200:
            self.bot.log_d(f"Failed to reach media link ({code}).")
            return None
        media_data = response.json()
        items = media_data.get("_embedded", {}).get("media", [])
        if not items:
            self.bot.log_d("No media found in response.")
            return None

        def get_img_url(item) -> Optional[str]:
            return item.get("_links", {}).get("file", {}).get("href")

        if shrlok := cast(ShrlokPlugin, self.bot.get_plugin("shrlok")):
            if len(items) > 10:
                items = random.sample(items, 10)
            urls = map(get_img_url, items)
            urls_text = ",".join(map(lambda url: f'"{url}"', urls))
            html = IMG_FETCH_HTML.format(urls_text).encode()
            link = shrlok.post({"type": "raw", "ext": "html"}, html)
            if not link:
                self.bot.log_d("shrlok plugin returned an empty string.")
        else:
            link = get_img_url(random.choice(items))
            if not link:
                self.bot.log_d("No link found.")

        if link:
            return "📷 " + link
        return None

    def find_scientific_name(self, name: str, target: str):
        """Find a corresponding scientific name for a vernacular name."""
        name = name.lower()
        enc_name = urllib.parse.quote(name)
        url = (
            f"{BASE_URL}/taxa/search?frenchVernacularNames={enc_name}"
            "&page=1&size=100"
        )
        response = requests.get(url)
        if response.status_code != 200:
            self.signal_failure(target)
            return
        try:
            data = response.json()
        except ValueError:
            self.signal_failure(target)
            return
        items = data.get("_embedded", {}).get("taxa", [])

        if not items:
            self.bot.say(target, self.config["not_found_reply"])
            return

        if len(items) == 1:
            # Only one result: use it.
            reply = TaxrefPlugin.item_to_full_name(items[0])
        else:
            # More than one result? For simplicity sake, use the shrlok plugin
            # if available or just show an ambiguous response.
            if shrlok := cast(ShrlokPlugin, self.bot.get_plugin("shrlok")):
                text = (
                    "\n".join(
                        (
                            item["frenchVernacularName"]
                            + " → "
                            + TaxrefPlugin.item_to_full_name(item)
                        )
                        for item in items
                    )
                    + "\n"
                )
                reply = shrlok.post({"type": "txt"}, text.encode())
                if not reply:
                    self.bot.log_d("shrlok plugin returned an empty string.")
                    return
            else:
                reply = self.get_ambiguous_reply(items)

        self.bot.say(target, reply)

    @staticmethod
    def item_to_full_name(item: dict) -> str:
        family_name = item.get("familyName")
        sci_name = item.get("scientificName")
        return f"{family_name} {sci_name}"
taxref: show a random photo if any are available 2022-05-19 14:34:41 +02:00			`import random`
taxref: new plugin! 2022-05-19 14:12:33 +02:00			`import urllib.parse`
shrlok: update and clean plugin (and taxref) 2022-09-03 20:43:56 +02:00			`from typing import cast, Optional`
taxref: new plugin! 2022-05-19 14:12:33 +02:00
			`import requests`

			`from edmond.plugin import Plugin`
shrlok: update and clean plugin (and taxref) 2022-09-03 20:43:56 +02:00			`from edmond.plugins.shrlok import ShrlokPlugin`
taxref: new plugin! 2022-05-19 14:12:33 +02:00
			`BASE_URL = "https://taxref.mnhn.fr/api"`
taxref: use shrlok to show images 2022-07-10 18:25:40 +02:00			`IMG_FETCH_HTML = """\`
			`<!doctype html>`
			`<html>`
taxref: complete showing image gallery if possible 2022-09-02 18:36:41 +02:00			`<head>`
			`<meta charset="UTF-8"/>`
			`<style>img {{ display: block; max-width: 95%; }}</style>`
			`</head>`
taxref: use shrlok to show images 2022-07-10 18:25:40 +02:00			`<body></body>`
			`<script>`
			`const urls = [{}];`
			`urls.forEach(url => {{`
			`fetch(url)`
			`.then(r => r.blob())`
			`.then(blob => {{`
			`let img = document.createElement("img");`
			`img.src = window.URL.createObjectURL(blob);`
			`document.body.appendChild(img);`
			`}});`
			`}});`
			`</script>`
			`</html>`
			`"""`
taxref: new plugin! 2022-05-19 14:12:33 +02:00

			`class TaxrefPlugin(Plugin):`

			`REQUIRED_CONFIGS = [`
style: run Black over the whole project 2022-08-09 23:47:28 +02:00			`"commands",`
			`"not_found_reply",`
			`"reply",`
			`"ambiguous_reply",`
			`"unnamed_species",`
taxref: new plugin! 2022-05-19 14:12:33 +02:00			`]`

			`def __init__(self, bot):`
			`super().__init__(bot)`

			`def on_pubmsg(self, event):`
			`if not self.should_handle_command(event.arguments[0]):`
			`return False`

taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`# "taxref"`
taxref: new plugin! 2022-05-19 14:12:33 +02:00			`if self.command.ident == self.config["commands"][0]:`
			`self.search_by_name(self.command.content, event.target)`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`# "scientifize"`
			`if self.command.ident == self.config["commands"][1]:`
			`self.find_scientific_name(self.command.content, event.target)`
taxref: new plugin! 2022-05-19 14:12:33 +02:00			`return True`

taxref: complete showing image gallery if possible 2022-09-02 18:36:41 +02:00			`def search_by_name(self, name: str, target: str) -> None:`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`"""Get species data from a scientific name.`

			`Try to disambiguate the results by focusing on species only and their`
			`scientific name.`
			`"""`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00			`name = name.lower()`
			`enc_name = urllib.parse.quote(name)`
			`url = (`
			`f"{BASE_URL}/taxa/search?scientificNames={enc_name}"`
			`"&page=1&size=100"`
			`)`
taxref: new plugin! 2022-05-19 14:12:33 +02:00			`response = requests.get(url)`
			`if response.status_code != 200:`
			`self.signal_failure(target)`
			`return`
taxref: catch errors on JSON decoding 2022-09-12 12:37:32 +02:00			`try:`
			`data = response.json()`
			`except ValueError:`
			`self.signal_failure(target)`
			`return`

taxref: new plugin! 2022-05-19 14:12:33 +02:00			`items = data.get("_embedded", {}).get("taxa", [])`

			`if not items:`
			`self.bot.say(target, self.config["not_found_reply"])`
			`return`

			`if len(items) == 1:`
			`# Only one result: use it.`
			`item_to_use = items[0]`
			`else:`
			`# More than one result: if the results contain a corresponding`
			`# species, use it, else return names for sub-species etc.`
			`species_items = []`
			`for item in items:`
			`if item["rankId"] == "ES":`
			`species_items.append(item)`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00			`num_species = len(species_items)`
			`self.bot.log_d(f"{num_species} species.")`
			`if num_species == 1:`
taxref: new plugin! 2022-05-19 14:12:33 +02:00			`item_to_use = species_items[0]`
			`else:`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00			`# If there are several species, check if one of them has the`
			`# exact same name; else show an ambiguous reply.`
			`species_with_same_name = [`
style: run Black over the whole project 2022-08-09 23:47:28 +02:00			`item`
			`for item in species_items`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00			`if item["scientificName"].lower() == name`
			`]`
			`if len(species_with_same_name) != 1:`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`reply = self.get_ambiguous_reply(species_items)`
			`self.bot.say(target, reply)`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00			`return`
			`item_to_use = species_with_same_name[0]`

			`unnamed = self.config["unnamed_species"]`
taxref: new plugin! 2022-05-19 14:12:33 +02:00			`reply = self.config["reply"].format(`
			`sci_name=item_to_use["scientificName"],`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00			`fr_name=item_to_use["frenchVernacularName"] or unnamed,`
taxref: new plugin! 2022-05-19 14:12:33 +02:00			`family=item_to_use["familyName"],`
			`cd_nom=item_to_use["id"],`
			`cd_ref=item_to_use["referenceId"],`
			`)`
			`self.bot.say(target, reply)`
taxref: show a random photo if any are available 2022-05-19 14:34:41 +02:00
style: run Black over the whole project 2022-08-09 23:47:28 +02:00			`if images_reply := self.get_images_reply(item_to_use):`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`self.bot.say(target, images_reply)`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00
taxref: complete showing image gallery if possible 2022-09-02 18:36:41 +02:00			`def get_ambiguous_reply(self, items) -> str:`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00			`"""Show a reply with potential species."""`
			`reply = self.config["ambiguous_reply"]`
			`append = ""`
			`if len(items) > 5:`
			`append = f"… (+{len(items)})"`
			`items = items[:5]`
			`reply += ", ".join(item["scientificName"] for item in items)`
			`if append:`
			`reply += append`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`return reply`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00
taxref: complete showing image gallery if possible 2022-09-02 18:36:41 +02:00			`def get_images_reply(self, item) -> Optional[str]:`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`"""If there are media available, return one in a message.`

taxref: use shrlok to show images 2022-07-10 18:25:40 +02:00			`If shrlok is available, return a link to an HTML page shared by shrlok.`
			`The HTML page, whose source code is generated from the template`
			`IMG_FETCH_HTML, fetches a random sample of 1 to 10 images from the`
			`results and embed the images directly into the page so it is not`
			`necessary to download the images before seeing them.`

			`If shrlok is not available, return a string with an URL to an image if`
			`one is available, or None if no image could be found or we encountered`
			`an error. The image is selected randomly. Yes, media links on TAXREF`
			`are downloaded by the browser and not shown directly, thus the benefits`
			`of having shrlok available.`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`"""`
			`m_url = item.get("_links", {}).get("media", {}).get("href")`
taxref: show a random photo if any are available 2022-05-19 14:34:41 +02:00			`if not m_url:`
taxref: complete showing image gallery if possible 2022-09-02 18:36:41 +02:00			`self.bot.log_d("No media links.")`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`return None`
taxref: show a random photo if any are available 2022-05-19 14:34:41 +02:00			`response = requests.get(m_url)`
taxref: complete showing image gallery if possible 2022-09-02 18:36:41 +02:00			`if (code := response.status_code) != 200:`
			`self.bot.log_d(f"Failed to reach media link ({code}).")`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`return None`
taxref: show a random photo if any are available 2022-05-19 14:34:41 +02:00			`media_data = response.json()`
			`items = media_data.get("_embedded", {}).get("media", [])`
			`if not items:`
taxref: complete showing image gallery if possible 2022-09-02 18:36:41 +02:00			`self.bot.log_d("No media found in response.")`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`return None`
taxref: show a random photo if any are available 2022-05-19 14:34:41 +02:00
taxref: complete showing image gallery if possible 2022-09-02 18:36:41 +02:00			`def get_img_url(item) -> Optional[str]:`
taxref: use shrlok to show images 2022-07-10 18:25:40 +02:00			`return item.get("_links", {}).get("file", {}).get("href")`

shrlok: update and clean plugin (and taxref) 2022-09-03 20:43:56 +02:00			`if shrlok := cast(ShrlokPlugin, self.bot.get_plugin("shrlok")):`
taxref: use shrlok to show images 2022-07-10 18:25:40 +02:00			`if len(items) > 10:`
			`items = random.sample(items, 10)`
			`urls = map(get_img_url, items)`
			`urls_text = ",".join(map(lambda url: f'"{url}"', urls))`
shrlok: update and clean plugin (and taxref) 2022-09-03 20:43:56 +02:00			`html = IMG_FETCH_HTML.format(urls_text).encode()`
			`link = shrlok.post({"type": "raw", "ext": "html"}, html)`
taxref: complete showing image gallery if possible 2022-09-02 18:36:41 +02:00			`if not link:`
			`self.bot.log_d("shrlok plugin returned an empty string.")`
taxref: use shrlok to show images 2022-07-10 18:25:40 +02:00			`else:`
			`link = get_img_url(random.choice(items))`
taxref: complete showing image gallery if possible 2022-09-02 18:36:41 +02:00			`if not link:`
			`self.bot.log_d("No link found.")`
taxref: use shrlok to show images 2022-07-10 18:25:40 +02:00
			`if link:`
			`return "📷 " + link`
taxref: complete showing image gallery if possible 2022-09-02 18:36:41 +02:00			`return None`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00
taxref: complete showing image gallery if possible 2022-09-02 18:36:41 +02:00			`def find_scientific_name(self, name: str, target: str):`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`"""Find a corresponding scientific name for a vernacular name."""`
			`name = name.lower()`
			`enc_name = urllib.parse.quote(name)`
			`url = (`
			`f"{BASE_URL}/taxa/search?frenchVernacularNames={enc_name}"`
			`"&page=1&size=100"`
			`)`
			`response = requests.get(url)`
			`if response.status_code != 200:`
			`self.signal_failure(target)`
			`return`
taxref: catch errors on JSON decoding 2022-09-12 12:37:32 +02:00			`try:`
			`data = response.json()`
			`except ValueError:`
			`self.signal_failure(target)`
			`return`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`items = data.get("_embedded", {}).get("taxa", [])`

			`if not items:`
			`self.bot.say(target, self.config["not_found_reply"])`
taxref: show a random photo if any are available 2022-05-19 14:34:41 +02:00			`return`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00
			`if len(items) == 1:`
			`# Only one result: use it.`
taxref: tell family name as well 2022-07-07 19:07:50 +02:00			`reply = TaxrefPlugin.item_to_full_name(items[0])`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`else:`
			`# More than one result? For simplicity sake, use the shrlok plugin`
			`# if available or just show an ambiguous response.`
shrlok: update and clean plugin (and taxref) 2022-09-03 20:43:56 +02:00			`if shrlok := cast(ShrlokPlugin, self.bot.get_plugin("shrlok")):`
style: run Black over the whole project 2022-08-09 23:47:28 +02:00			`text = (`
			`"\n".join(`
			`(`
			`item["frenchVernacularName"]`
			`+ " → "`
			`+ TaxrefPlugin.item_to_full_name(item)`
			`)`
			`for item in items`
taxref: tell family name as well 2022-07-07 19:07:50 +02:00			`)`
style: run Black over the whole project 2022-08-09 23:47:28 +02:00			`+ "\n"`
			`)`
shrlok: update and clean plugin (and taxref) 2022-09-03 20:43:56 +02:00			`reply = shrlok.post({"type": "txt"}, text.encode())`
taxref: fix shrlok issue 2022-11-29 13:00:10 +01:00			`if not reply:`
			`self.bot.log_d("shrlok plugin returned an empty string.")`
			`return`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`else:`
			`reply = self.get_ambiguous_reply(items)`

			`self.bot.say(target, reply)`
taxref: tell family name as well 2022-07-07 19:07:50 +02:00
			`@staticmethod`
taxref: fix shrlok issue 2022-11-29 13:00:10 +01:00			`def item_to_full_name(item: dict) -> str:`
taxref: tell family name as well 2022-07-07 19:07:50 +02:00			`family_name = item.get("familyName")`
			`sci_name = item.get("scientificName")`
			`return f"{family_name} {sci_name}"`