1
0
Fork 0

scrape-djtracklists: WIP

This commit is contained in:
Adrien Abraham 2022-09-23 18:50:16 +02:00
parent ff39027205
commit 559646bc11

View file

@ -2,11 +2,23 @@
"""Download tracklists from djtracklists.com.""" """Download tracklists from djtracklists.com."""
import argparse import argparse
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@dataclass
class Track:
title: str
artists: list[str]
mix: Optional[str]
mix_artists: Optional[list[str]]
timestamp: str
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("-s", "--series", help="download this series") parser.add_argument("-s", "--series", help="download this series")
@ -14,31 +26,63 @@ def main():
args = parser.parse_args() args = parser.parse_args()
if tracklist_url := args.tracklist: if tracklist_url := args.tracklist:
download_tracklist(tracklist_url) tracklist = download_tracklist(tracklist_url)
name = tracklist_url.rstrip("/").rsplit("/", maxsplit=1)[-1]
file_name = Path.cwd() / (name + ".txt")
save_tracklist(tracklist, file_name)
def is_track_row(css_class: str) -> bool: def is_track_row(css_class: str) -> bool:
return css_class == "on" or css_class == "off" return css_class in ("on", "off")
def download_tracklist(url: str): def download_tracklist(url: str) -> list[Track]:
response = requests.get(url) response = requests.get(url, timeout=10)
response.raise_for_status() response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser") soup = BeautifulSoup(response.text, "html.parser")
tracklist = []
for row in soup.find_all("div", class_=is_track_row): for row in soup.find_all("div", class_=is_track_row):
print("*" * 80) artists = []
mix_artists = []
try: try:
print("track", row.find("a", class_="track").string) title = row.find("a", class_="track").string
print("release", row.find("a", class_="release").string) mix = row.find("a", class_="release").string
except AttributeError: except AttributeError:
print("track", row.find("b").string) title = row.find("b").string
mix = None
timestamp = row.find("span", class_="index_time").string
for artist in row.find_all("a", class_="artist"): for artist in row.find_all("a", class_="artist"):
prev_tag = artist.previous_sibling.string prev_tag = artist.previous_sibling.string
if getattr(prev_tag, "string", "").strip() == "remixed by": if getattr(prev_tag, "string", "").strip() == "remixed by":
print("remixing artist", artist.string) mix_artists.append(artist.string)
else: else:
print("artist", artist.string) artists.append(artist.string)
print("\n" * 10) tracklist.append(
Track(
title=title,
mix=mix,
artists=artists,
mix_artists=mix_artists or None,
timestamp=timestamp,
)
)
return tracklist
def save_tracklist(tracklist: list[Track], file_name: Path):
try:
with open(file_name, "wt", encoding="utf8") as file:
for track in tracklist:
artists = " & ".join(track.artists)
line = f"{track.timestamp}{artists}{track.title}"
if track.mix:
line += f" ({track.mix})"
if track.mix_artists:
mix_artists = " & ".join(track.mix_artists)
line += f" remixed by {mix_artists}"
file.write(line + "\n")
except OSError as exc:
print(f"Can't save tracklist: {exc}")
if __name__ == "__main__": if __name__ == "__main__":