#!/usr/bin/env python3 # Ahem… import argparse from pathlib import Path from urllib.parse import urljoin, urlparse from bs4 import BeautifulSoup import requests def scrape_course(course_url: str, cookies: dict, output_dir: Path): response = requests.get(course_url, cookies=cookies) response.raise_for_status() soup = BeautifulSoup(response.text, features="lxml") for link in soup.find_all("a", class_="card-container-link"): video_url = urljoin(course_url, link["href"]) scrape_video(video_url, cookies, output_dir) def scrape_video(video_url: str, cookies: dict, output_dir: Path): print(f"Video {video_url}") response = requests.get(video_url, cookies=cookies) response.raise_for_status() soup = BeautifulSoup(response.text, features="lxml") for container in soup.find_all("div", class_="player-container"): link = container.find("a") if link is None: continue file_url = link["href"] file_name = urlparse(file_url).path.split("/")[-1] output_path = output_dir / file_name download_file(file_url, cookies, output_path) for link in soup.find_all("a"): file_url = link["href"] if not file_url.startswith("/scripts/files/"): continue file_url = urljoin(video_url, file_url) file_name = urlparse(file_url).path.split("/")[-1] output_path = output_dir / file_name download_file(file_url, cookies, output_path) def download_file(url: str, cookies: dict, output_path: Path): print(f'Downloading: "{url}"') print(f' → "{output_path}"') with requests.get(url, cookies=cookies, stream=True) as response: response.raise_for_status() with open(output_path, 'wb') as file: for chunk in response.iter_content(chunk_size=8192): file.write(chunk) def main(): argparser = argparse.ArgumentParser() argparser.add_argument("--course") argparser.add_argument("--video") # either video or course argparser.add_argument("--phpsessid", help="PHPSESSID") argparser.add_argument("-o", "--output") args = argparser.parse_args() cookies = {"PHPSESSID": args.phpsessid} output_dir = Path(args.output) if args.output else Path.cwd() if not output_dir.exists(): output_dir.mkdir(parents=True) if course_url := args.course: scrape_course(course_url, cookies, output_dir) elif video_url := args.video: scrape_video(video_url, cookies, output_dir) else: print("Nothing to do.") if __name__ == "__main__": main()