remove-orientation.sh

scrape-interesting-breton-music-website.py
2023-09-17 16:47:47 +02:00 · 2023-09-17 16:47:47 +02:00
2 changed files with 102 additions and 0 deletions
--- a/remove-orientation.sh
+++ b/remove-orientation.sh
@ -0,0 +1,28 @@
+#!/bin/bash
+# Remove orientation information from the EXIF metadata WITHOUT touching pixels.
+# Can also remove thumbnail because some programs like Ristretto are bugged by
+# the remaining thumbnail orientation metadata. Useful when a photo has wrong
+# orientation metadata (e.g. buggy LineageOS app).
+
+usage() {
+    echo "Usage: $0 [-t] [FILE]"
+    echo "  -t  remove thumbnail"
+}
+
+OPT_FLAG_T=
+while getopts "ht" OPTION; do
+    case $OPTION in
+        h) usage; exit 0 ;;
+        t) OPT_FLAG_T=true ;;
+        *) usage; exit 1 ;;
+    esac
+done
+shift $(( OPTIND - 1 ))
+
+for jpg in "$@"; do
+    echo "Fixing \"$jpg\"…"
+    exiftool -Orientation= -overwrite_original "$jpg"
+    if [[ "$OPT_FLAG_T" = true ]]; then
+        exiftool -IFD1:all= -overwrite_original "$jpg" 
+    fi
+done
--- a/scrape-interesting-breton-music-website.py
+++ b/scrape-interesting-breton-music-website.py
@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+# Ahem…
+import argparse
+from pathlib import Path
+from urllib.parse import urljoin, urlparse
+
+from bs4 import BeautifulSoup
+import requests
+
+
+def scrape_course(course_url: str, cookies: dict, output_dir: Path):
+    response = requests.get(course_url, cookies=cookies)
+    response.raise_for_status()
+    soup = BeautifulSoup(response.text, features="lxml")
+    for link in soup.find_all("a", class_="card-container-link"):
+        video_url = urljoin(course_url, link["href"])
+        scrape_video(video_url, cookies, output_dir)
+
+
+def scrape_video(video_url: str, cookies: dict, output_dir: Path):
+    print(f"Video {video_url}")
+    response = requests.get(video_url, cookies=cookies)
+    response.raise_for_status()
+    soup = BeautifulSoup(response.text, features="lxml")
+    for container in soup.find_all("div", class_="player-container"):
+        link = container.find("a")
+        if link is None:
+            continue
+        file_url = link["href"]
+        file_name = urlparse(file_url).path.split("/")[-1]
+        output_path = output_dir / file_name
+        download_file(file_url, cookies, output_path)
+    for link in soup.find_all("a"):
+        file_url = link["href"]
+        if not file_url.startswith("/scripts/files/"):
+            continue
+        file_url = urljoin(video_url, file_url)
+        file_name = urlparse(file_url).path.split("/")[-1]
+        output_path = output_dir / file_name
+        download_file(file_url, cookies, output_path)
+
+
+def download_file(url: str, cookies: dict, output_path: Path):
+    print(f'Downloading: "{url}"')
+    print(f'  → "{output_path}"')
+    with requests.get(url, cookies=cookies, stream=True) as response:
+        response.raise_for_status()
+        with open(output_path, 'wb') as file:
+            for chunk in response.iter_content(chunk_size=8192):
+                file.write(chunk)
+
+
+def main():
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--course")
+    argparser.add_argument("--video")  # either video or course
+    argparser.add_argument("--phpsessid", help="PHPSESSID")
+    argparser.add_argument("-o", "--output")
+    args = argparser.parse_args()
+    cookies = {"PHPSESSID": args.phpsessid}
+    output_dir = Path(args.output) if args.output else Path.cwd()
+    if not output_dir.exists():
+        output_dir.mkdir(parents=True)
+
+    if course_url := args.course:
+        scrape_course(course_url, cookies, output_dir)
+    elif video_url := args.video:
+        scrape_video(video_url, cookies, output_dir)
+    else:
+        print("Nothing to do.")
+
+
+if __name__ == "__main__":
+    main()
Author	SHA1	Message	Date
dece	5a18b172cc	remove-orientation.sh	2023-09-17 16:47:47 +02:00
dece	08f2735f82	scrape-interesting-breton-music-website.py	2023-09-17 16:47:47 +02:00