Edm0nd/edmond/plugins/taxref.py

import random
import urllib.parse

import requests

from edmond.plugin import Plugin

BASE_URL = "https://taxref.mnhn.fr/api"
IMG_FETCH_HTML = """\
<!doctype html>
<html>
  <head><meta charset="UTF-8"/></head>
  <body></body>
  <script>
    const urls = [{}];
    urls.forEach(url => {{
      fetch(url)
        .then(r => r.blob())
        .then(blob => {{
          let img = document.createElement("img");
          img.src = window.URL.createObjectURL(blob);
          document.body.appendChild(img);
        }});
    }});
  </script>
</html>
"""


class TaxrefPlugin(Plugin):

    REQUIRED_CONFIGS = [
        "commands",
        "not_found_reply",
        "reply",
        "ambiguous_reply",
        "unnamed_species",
    ]

    def __init__(self, bot):
        super().__init__(bot)

    def on_pubmsg(self, event):
        if not self.should_handle_command(event.arguments[0]):
            return False

        # "taxref"
        if self.command.ident == self.config["commands"][0]:
            self.search_by_name(self.command.content, event.target)
        # "scientifize"
        if self.command.ident == self.config["commands"][1]:
            self.find_scientific_name(self.command.content, event.target)
        return True

    def search_by_name(self, name, target):
        """Get species data from a scientific name.

        Try to disambiguate the results by focusing on species only and their
        scientific name.
        """
        name = name.lower()
        enc_name = urllib.parse.quote(name)
        url = (
            f"{BASE_URL}/taxa/search?scientificNames={enc_name}"
            "&page=1&size=100"
        )
        response = requests.get(url)
        if response.status_code != 200:
            self.signal_failure(target)
            return
        data = response.json()
        items = data.get("_embedded", {}).get("taxa", [])

        if not items:
            self.bot.say(target, self.config["not_found_reply"])
            return

        if len(items) == 1:
            # Only one result: use it.
            item_to_use = items[0]
        else:
            # More than one result: if the results contain a corresponding
            # species, use it, else return names for sub-species etc.
            species_items = []
            for item in items:
                if item["rankId"] == "ES":
                    species_items.append(item)
            num_species = len(species_items)
            self.bot.log_d(f"{num_species} species.")
            if num_species == 1:
                item_to_use = species_items[0]
            else:
                # If there are several species, check if one of them has the
                # exact same name; else show an ambiguous reply.
                species_with_same_name = [
                    item
                    for item in species_items
                    if item["scientificName"].lower() == name
                ]
                if len(species_with_same_name) != 1:
                    reply = self.get_ambiguous_reply(species_items)
                    self.bot.say(target, reply)
                    return
                item_to_use = species_with_same_name[0]

        unnamed = self.config["unnamed_species"]
        reply = self.config["reply"].format(
            sci_name=item_to_use["scientificName"],
            fr_name=item_to_use["frenchVernacularName"] or unnamed,
            family=item_to_use["familyName"],
            cd_nom=item_to_use["id"],
            cd_ref=item_to_use["referenceId"],
        )
        self.bot.say(target, reply)

        if images_reply := self.get_images_reply(item_to_use):
            self.bot.say(target, images_reply)

    def get_ambiguous_reply(self, items):
        """Show a reply with potential species."""
        reply = self.config["ambiguous_reply"]
        append = ""
        if len(items) > 5:
            append = f"… (+{len(items)})"
            items = items[:5]
        reply += ", ".join(item["scientificName"] for item in items)
        if append:
            reply += append
        return reply

    def get_images_reply(self, item):
        """If there are media available, return one in a message.

        If shrlok is available, return a link to an HTML page shared by shrlok.
        The HTML page, whose source code is generated from the template
        IMG_FETCH_HTML, fetches a random sample of 1 to 10 images from the
        results and embed the images directly into the page so it is not
        necessary to download the images before seeing them.

        If shrlok is not available, return a string with an URL to an image if
        one is available, or None if no image could be found or we encountered
        an error. The image is selected randomly. Yes, media links on TAXREF
        are downloaded by the browser and not shown directly, thus the benefits
        of having shrlok available.
        """
        m_url = item.get("_links", {}).get("media", {}).get("href")
        if not m_url:
            return None
        response = requests.get(m_url)
        if response.status_code != 200:
            return None
        media_data = response.json()
        items = media_data.get("_embedded", {}).get("media", [])
        if not items:
            return None

        def get_img_url(item):
            return item.get("_links", {}).get("file", {}).get("href")

        if shrlok := self.bot.get_plugin("shrlok"):
            if len(items) > 10:
                items = random.sample(items, 10)
            urls = map(get_img_url, items)
            urls_text = ",".join(map(lambda url: f'"{url}"', urls))
            html = IMG_FETCH_HTML.format(urls_text)
            link = shrlok.post_html(html)
        else:
            link = get_img_url(random.choice(items))

        if link:
            return "📷 " + link

    def find_scientific_name(self, name, target):
        """Find a corresponding scientific name for a vernacular name."""
        name = name.lower()
        enc_name = urllib.parse.quote(name)
        url = (
            f"{BASE_URL}/taxa/search?frenchVernacularNames={enc_name}"
            "&page=1&size=100"
        )
        response = requests.get(url)
        if response.status_code != 200:
            self.signal_failure(target)
            return
        data = response.json()
        items = data.get("_embedded", {}).get("taxa", [])

        if not items:
            self.bot.say(target, self.config["not_found_reply"])
            return

        if len(items) == 1:
            # Only one result: use it.
            reply = TaxrefPlugin.item_to_full_name(items[0])
        else:
            # More than one result? For simplicity sake, use the shrlok plugin
            # if available or just show an ambiguous response.
            if shrlok := self.bot.get_plugin("shrlok"):
                text = (
                    "\n".join(
                        (
                            item["frenchVernacularName"]
                            + " → "
                            + TaxrefPlugin.item_to_full_name(item)
                        )
                        for item in items
                    )
                    + "\n"
                )
                reply = shrlok.post_text(text)
            else:
                reply = self.get_ambiguous_reply(items)

        self.bot.say(target, reply)

    @staticmethod
    def item_to_full_name(item):
        family_name = item.get("familyName")
        sci_name = item.get("scientificName")
        return f"{family_name} {sci_name}"
taxref: show a random photo if any are available 2022-05-19 14:34:41 +02:00			`import random`
taxref: new plugin! 2022-05-19 14:12:33 +02:00			`import urllib.parse`

			`import requests`

			`from edmond.plugin import Plugin`

			`BASE_URL = "https://taxref.mnhn.fr/api"`
taxref: use shrlok to show images 2022-07-10 18:25:40 +02:00			`IMG_FETCH_HTML = """\`
			`<!doctype html>`
			`<html>`
			`<head><meta charset="UTF-8"/></head>`
			`<body></body>`
			`<script>`
			`const urls = [{}];`
			`urls.forEach(url => {{`
			`fetch(url)`
			`.then(r => r.blob())`
			`.then(blob => {{`
			`let img = document.createElement("img");`
			`img.src = window.URL.createObjectURL(blob);`
			`document.body.appendChild(img);`
			`}});`
			`}});`
			`</script>`
			`</html>`
			`"""`
taxref: new plugin! 2022-05-19 14:12:33 +02:00

			`class TaxrefPlugin(Plugin):`

			`REQUIRED_CONFIGS = [`
style: run Black over the whole project 2022-08-09 23:47:28 +02:00			`"commands",`
			`"not_found_reply",`
			`"reply",`
			`"ambiguous_reply",`
			`"unnamed_species",`
taxref: new plugin! 2022-05-19 14:12:33 +02:00			`]`

			`def __init__(self, bot):`
			`super().__init__(bot)`

			`def on_pubmsg(self, event):`
			`if not self.should_handle_command(event.arguments[0]):`
			`return False`

taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`# "taxref"`
taxref: new plugin! 2022-05-19 14:12:33 +02:00			`if self.command.ident == self.config["commands"][0]:`
			`self.search_by_name(self.command.content, event.target)`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`# "scientifize"`
			`if self.command.ident == self.config["commands"][1]:`
			`self.find_scientific_name(self.command.content, event.target)`
taxref: new plugin! 2022-05-19 14:12:33 +02:00			`return True`

			`def search_by_name(self, name, target):`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`"""Get species data from a scientific name.`

			`Try to disambiguate the results by focusing on species only and their`
			`scientific name.`
			`"""`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00			`name = name.lower()`
			`enc_name = urllib.parse.quote(name)`
			`url = (`
			`f"{BASE_URL}/taxa/search?scientificNames={enc_name}"`
			`"&page=1&size=100"`
			`)`
taxref: new plugin! 2022-05-19 14:12:33 +02:00			`response = requests.get(url)`
			`if response.status_code != 200:`
			`self.signal_failure(target)`
			`return`
			`data = response.json()`
			`items = data.get("_embedded", {}).get("taxa", [])`

			`if not items:`
			`self.bot.say(target, self.config["not_found_reply"])`
			`return`

			`if len(items) == 1:`
			`# Only one result: use it.`
			`item_to_use = items[0]`
			`else:`
			`# More than one result: if the results contain a corresponding`
			`# species, use it, else return names for sub-species etc.`
			`species_items = []`
			`for item in items:`
			`if item["rankId"] == "ES":`
			`species_items.append(item)`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00			`num_species = len(species_items)`
			`self.bot.log_d(f"{num_species} species.")`
			`if num_species == 1:`
taxref: new plugin! 2022-05-19 14:12:33 +02:00			`item_to_use = species_items[0]`
			`else:`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00			`# If there are several species, check if one of them has the`
			`# exact same name; else show an ambiguous reply.`
			`species_with_same_name = [`
style: run Black over the whole project 2022-08-09 23:47:28 +02:00			`item`
			`for item in species_items`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00			`if item["scientificName"].lower() == name`
			`]`
			`if len(species_with_same_name) != 1:`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`reply = self.get_ambiguous_reply(species_items)`
			`self.bot.say(target, reply)`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00			`return`
			`item_to_use = species_with_same_name[0]`

			`unnamed = self.config["unnamed_species"]`
taxref: new plugin! 2022-05-19 14:12:33 +02:00			`reply = self.config["reply"].format(`
			`sci_name=item_to_use["scientificName"],`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00			`fr_name=item_to_use["frenchVernacularName"] or unnamed,`
taxref: new plugin! 2022-05-19 14:12:33 +02:00			`family=item_to_use["familyName"],`
			`cd_nom=item_to_use["id"],`
			`cd_ref=item_to_use["referenceId"],`
			`)`
			`self.bot.say(target, reply)`
taxref: show a random photo if any are available 2022-05-19 14:34:41 +02:00
style: run Black over the whole project 2022-08-09 23:47:28 +02:00			`if images_reply := self.get_images_reply(item_to_use):`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`self.bot.say(target, images_reply)`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`def get_ambiguous_reply(self, items):`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00			`"""Show a reply with potential species."""`
			`reply = self.config["ambiguous_reply"]`
			`append = ""`
			`if len(items) > 5:`
			`append = f"… (+{len(items)})"`
			`items = items[:5]`
			`reply += ", ".join(item["scientificName"] for item in items)`
			`if append:`
			`reply += append`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`return reply`
taxref: do not show ambig. reply needlessly esp. if there is a matching species with the exact same name… 2022-06-16 16:59:58 +02:00
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`def get_images_reply(self, item):`
			`"""If there are media available, return one in a message.`

taxref: use shrlok to show images 2022-07-10 18:25:40 +02:00			`If shrlok is available, return a link to an HTML page shared by shrlok.`
			`The HTML page, whose source code is generated from the template`
			`IMG_FETCH_HTML, fetches a random sample of 1 to 10 images from the`
			`results and embed the images directly into the page so it is not`
			`necessary to download the images before seeing them.`

			`If shrlok is not available, return a string with an URL to an image if`
			`one is available, or None if no image could be found or we encountered`
			`an error. The image is selected randomly. Yes, media links on TAXREF`
			`are downloaded by the browser and not shown directly, thus the benefits`
			`of having shrlok available.`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`"""`
			`m_url = item.get("_links", {}).get("media", {}).get("href")`
taxref: show a random photo if any are available 2022-05-19 14:34:41 +02:00			`if not m_url:`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`return None`
taxref: show a random photo if any are available 2022-05-19 14:34:41 +02:00			`response = requests.get(m_url)`
			`if response.status_code != 200:`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`return None`
taxref: show a random photo if any are available 2022-05-19 14:34:41 +02:00			`media_data = response.json()`
			`items = media_data.get("_embedded", {}).get("media", [])`
			`if not items:`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`return None`
taxref: show a random photo if any are available 2022-05-19 14:34:41 +02:00
taxref: use shrlok to show images 2022-07-10 18:25:40 +02:00			`def get_img_url(item):`
			`return item.get("_links", {}).get("file", {}).get("href")`

style: run Black over the whole project 2022-08-09 23:47:28 +02:00			`if shrlok := self.bot.get_plugin("shrlok"):`
taxref: use shrlok to show images 2022-07-10 18:25:40 +02:00			`if len(items) > 10:`
			`items = random.sample(items, 10)`
			`urls = map(get_img_url, items)`
			`urls_text = ",".join(map(lambda url: f'"{url}"', urls))`
			`html = IMG_FETCH_HTML.format(urls_text)`
			`link = shrlok.post_html(html)`
			`else:`
			`link = get_img_url(random.choice(items))`

			`if link:`
			`return "📷 " + link`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00
			`def find_scientific_name(self, name, target):`
			`"""Find a corresponding scientific name for a vernacular name."""`
			`name = name.lower()`
			`enc_name = urllib.parse.quote(name)`
			`url = (`
			`f"{BASE_URL}/taxa/search?frenchVernacularNames={enc_name}"`
			`"&page=1&size=100"`
			`)`
			`response = requests.get(url)`
			`if response.status_code != 200:`
			`self.signal_failure(target)`
			`return`
			`data = response.json()`
			`items = data.get("_embedded", {}).get("taxa", [])`

			`if not items:`
			`self.bot.say(target, self.config["not_found_reply"])`
taxref: show a random photo if any are available 2022-05-19 14:34:41 +02:00			`return`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00
			`if len(items) == 1:`
			`# Only one result: use it.`
taxref: tell family name as well 2022-07-07 19:07:50 +02:00			`reply = TaxrefPlugin.item_to_full_name(items[0])`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`else:`
			`# More than one result? For simplicity sake, use the shrlok plugin`
			`# if available or just show an ambiguous response.`
style: run Black over the whole project 2022-08-09 23:47:28 +02:00			`if shrlok := self.bot.get_plugin("shrlok"):`
			`text = (`
			`"\n".join(`
			`(`
			`item["frenchVernacularName"]`
			`+ " → "`
			`+ TaxrefPlugin.item_to_full_name(item)`
			`)`
			`for item in items`
taxref: tell family name as well 2022-07-07 19:07:50 +02:00			`)`
style: run Black over the whole project 2022-08-09 23:47:28 +02:00			`+ "\n"`
			`)`
taxref: add function to get scientific name 2022-07-06 17:18:28 +02:00			`reply = shrlok.post_text(text)`
			`else:`
			`reply = self.get_ambiguous_reply(items)`

			`self.bot.say(target, reply)`
taxref: tell family name as well 2022-07-07 19:07:50 +02:00
			`@staticmethod`
			`def item_to_full_name(item):`
			`family_name = item.get("familyName")`
			`sci_name = item.get("scientificName")`
			`return f"{family_name} {sci_name}"`