1
0
Fork 0

scrape-djtracklists: WIP

This commit is contained in:
Adrien Abraham 2022-09-23 18:50:16 +02:00
parent ff39027205
commit 559646bc11

View file

@ -2,11 +2,23 @@
"""Download tracklists from djtracklists.com."""
import argparse
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
import requests
from bs4 import BeautifulSoup
@dataclass
class Track:
title: str
artists: list[str]
mix: Optional[str]
mix_artists: Optional[list[str]]
timestamp: str
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-s", "--series", help="download this series")
@ -14,31 +26,63 @@ def main():
args = parser.parse_args()
if tracklist_url := args.tracklist:
download_tracklist(tracklist_url)
tracklist = download_tracklist(tracklist_url)
name = tracklist_url.rstrip("/").rsplit("/", maxsplit=1)[-1]
file_name = Path.cwd() / (name + ".txt")
save_tracklist(tracklist, file_name)
def is_track_row(css_class: str) -> bool:
return css_class == "on" or css_class == "off"
return css_class in ("on", "off")
def download_tracklist(url: str):
response = requests.get(url)
def download_tracklist(url: str) -> list[Track]:
response = requests.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
tracklist = []
for row in soup.find_all("div", class_=is_track_row):
print("*" * 80)
artists = []
mix_artists = []
try:
print("track", row.find("a", class_="track").string)
print("release", row.find("a", class_="release").string)
title = row.find("a", class_="track").string
mix = row.find("a", class_="release").string
except AttributeError:
print("track", row.find("b").string)
title = row.find("b").string
mix = None
timestamp = row.find("span", class_="index_time").string
for artist in row.find_all("a", class_="artist"):
prev_tag = artist.previous_sibling.string
if getattr(prev_tag, "string", "").strip() == "remixed by":
print("remixing artist", artist.string)
mix_artists.append(artist.string)
else:
print("artist", artist.string)
print("\n" * 10)
artists.append(artist.string)
tracklist.append(
Track(
title=title,
mix=mix,
artists=artists,
mix_artists=mix_artists or None,
timestamp=timestamp,
)
)
return tracklist
def save_tracklist(tracklist: list[Track], file_name: Path):
try:
with open(file_name, "wt", encoding="utf8") as file:
for track in tracklist:
artists = " & ".join(track.artists)
line = f"{track.timestamp}{artists}{track.title}"
if track.mix:
line += f" ({track.mix})"
if track.mix_artists:
mix_artists = " & ".join(track.mix_artists)
line += f" remixed by {mix_artists}"
file.write(line + "\n")
except OSError as exc:
print(f"Can't save tracklist: {exc}")
if __name__ == "__main__":