journee_mondiale: restore website scraping

Official UN days are soooo boring…
2022-09-13 18:15:57 +02:00 · 2022-09-13 18:15:57 +02:00 · 7f2fa0817e
parent 92f0ab3fef
commit 7f2fa0817e
1 changed files with 52 additions and 7 deletions
--- a/edmond/plugins/journee_mondiale.py
+++ b/edmond/plugins/journee_mondiale.py
@ -1,14 +1,24 @@
 import datetime

+# BS is optional and only for scrapping journee-mondiale.com, thus why we do not
+# mark the dependencies flag here.
+try:
+    from bs4 import BeautifulSoup
+except ImportError:
+    BeautifulSoup = None
+
 from edmond.plugin import Plugin
+from edmond.utils import http_get


 class JourneeMondialePlugin(Plugin):
    """This plugin shows today's international observance.

    It used to fetch data from the website journee-mondiale.com but it is
-    regularly broken so it is now loading a static list as a resource. This
-    list uses the format "MM-DD Name", one per line, e.g.:
+    regularly broken so it is now loading a static list as a resource, and the
+    website is fetched only if the user adds the "jmcom" config value to True.
+
+    The local list uses the format "MM-DD Name", one per line, e.g.:

    ```
    01-01 NYE
@ -29,6 +39,22 @@ class JourneeMondialePlugin(Plugin):
        if not self.should_handle_command(event.arguments[0], no_content=True):
            return False

+        main_reply = self.get_registered_days()
+        if main_reply:
+            self.bot.say(event.target, main_reply)
+
+        jmcom_reply = ""
+        if self.config.get("jmcom", False) is True:
+            jmcom_reply = self.get_jmcom_days()
+            if jmcom_reply:
+                self.bot.say(event.target, jmcom_reply)
+
+        if not (main_reply or jmcom_reply):
+            self.bot.say(event.target, self.config["no_entry_reply"])
+        return True
+
+    def get_registered_days(self) -> str:
+        """Get international days for the local list."""
        now = datetime.datetime.now()
        date_tag = f"{now.month:02}-{now.day:02}"
        today_obs = map(
@ -37,8 +63,27 @@ class JourneeMondialePlugin(Plugin):
                lambda line: line.startswith(date_tag), self.config["dates"]
            ),
        )
-        reply = ", ".join(today_obs)
-        if not reply:
-            reply = self.config["no_entry_reply"]
-        self.bot.say(event.target, reply)
-        return True
+        days = ", ".join(today_obs)
+        return reply
+
+    def get_jmcom_days(self) -> str:
+        """Get international days from journee-mondiale.com."""
+        response = http_get(self.config["url"])
+        if not response:
+            return ""
+
+        soup = BeautifulSoup(response, "html.parser")
+        entries = []
+        try:
+            items = soup.find("div", id="journeesDuJour").find_all("article")
+            for item in items:
+                entries.append({
+                    "url": item.find("a").href,
+                    "title": item.find("h2").string,
+                })
+        except (ValueError, KeyError):
+            return ""
+
+        # TODO add plus plugin support
+
+        return ", ".join(map(lambda i: i["title"], entries))