From b34e1121c12f59d2a673d9d57272c3352f639e1d Mon Sep 17 00:00:00 2001
From: Adrien Abraham <aabraham@freebox.fr>
Date: Wed, 31 Aug 2022 11:41:29 +0200
Subject: [PATCH] emoji: generate a list file from emoji

This is much better than keeping the UCD around (12KB against ~8MB) and
also a lot faster, unsure what the hell I was thinking about previously.
---
 emoji.sh | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/emoji.sh b/emoji.sh
index b557d84..274f2ec 100755
--- a/emoji.sh
+++ b/emoji.sh
@@ -7,7 +7,8 @@
 # Made with 💖 by dece. s/o to mon loulou, the bash samurai. License: WTFPLv2.
 
 UCD_URL="https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.all.flat.zip"
-UCD="$HOME/.local/share/emoji/ucd.all.flat.zip"
+DIR="$HOME/.local/share/emoji"
+LIST="$DIR/emojis.txt.gz"
 GREP="rg"
 
 usage() {
@@ -18,15 +19,20 @@ usage() {
     echo "  -l LIMIT  limit number of output lines"
     echo "  -u        unique result (equals -n and -l 1), no new line"
     echo "  -c        show code point"
-    echo "  -d        download UCD zip (requires curl)"
+    echo "  -d        download UCD zip to create list file (requires curl)"
 }
 
 [ $# -eq 0 ] && usage && exit
 
 download_ucdxml() {
-    directory="$(dirname "$UCD")"
-    [ ! -d "$directory" ] && mkdir -p "$directory"
-    curl -L -o "$UCD" "$UCD_URL"
+    [ ! -d "$DIR" ] && mkdir -p "$DIR"
+    unc_list="${LIST%.gz}"
+    curl -L "$UCD_URL" | zcat | "$GREP" 'Emoji="Y"' | while read -r line; do
+        codepoint="$(echo "$line" | sed -E 's/.* cp="([0-9A-F]+)".*/\1/g')"
+        name="$(echo "$line" | sed -E 's/.* na="([^"]+)".*/\1/g')"
+        echo "$codepoint;$name" >> "$unc_list"
+    done
+    gzip "$unc_list"
 }
 
 HIDE_NAME=
@@ -47,22 +53,19 @@ done
 shift $(( OPTIND - 1 ))
 FILTER="$*"
 
-if [ ! -f "$UCD" ]; then
-    echo "Can't find UCD archive at $UCD. Use -d to download it!"
+if [ ! -f "$LIST" ]; then
+    echo "Can't find list file at $LIST. Use -d to download it!"
     exit 1
 fi
 
-search_chars() {
-    zcat "$UCD" | "$GREP" 'Emoji="Y"' | "$GREP" -i "na.?=\"[^\"]*$1[^\"]*\""
-}
-
 line_id=0
-search_chars "$FILTER" | while read -r line; do
+zcat "$LIST" | "$GREP" -i "$FILTER" | while read -r line; do
     [ -n "$LIMIT" ] && (( line_id >= LIMIT )) && break
-    codepoint="$(echo "$line" | sed -E 's/.* cp="([0-9A-F]+)".*/\1/g')"
+    readarray -d ";" -t elements <<< "$line"
+    codepoint="${elements[0]}"
     result="$(echo -e "\\U$codepoint")"
     if [ "$HIDE_NAME" != true ]; then
-        name="$(echo "$line" | sed -E 's/.* na="([^"]+)".*/\1/g')"
+        name="${elements[1]}"
         result="$result $(echo "$name" | tr '[:upper:]' '[:lower:]')"
     fi
     if [ "$SHOW_CP" = true ]; then