2022-09-14 19:04:02 +02:00
|
|
|
#!/usr/bin/env python3
|
2022-09-28 19:04:09 +02:00
|
|
|
"""Download tracklists from djtracklists.com as CSV."""
|
2022-09-14 19:04:02 +02:00
|
|
|
|
|
|
|
import argparse
|
2022-09-28 19:04:09 +02:00
|
|
|
import csv
|
|
|
|
import time
|
2022-09-23 18:50:16 +02:00
|
|
|
from dataclasses import dataclass
|
|
|
|
from pathlib import Path
|
|
|
|
from typing import Optional
|
2022-09-14 19:04:02 +02:00
|
|
|
|
|
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
|
2022-09-23 18:50:16 +02:00
|
|
|
@dataclass
|
|
|
|
class Track:
|
2022-09-28 19:04:09 +02:00
|
|
|
"""One track, parsed from a tracklist page."""
|
2022-09-23 18:50:16 +02:00
|
|
|
title: str
|
|
|
|
artists: list[str]
|
|
|
|
mix: Optional[str]
|
|
|
|
mix_artists: Optional[list[str]]
|
|
|
|
timestamp: str
|
|
|
|
|
2022-09-28 19:04:09 +02:00
|
|
|
def format_artists(self):
|
|
|
|
return " & ".join(self.artists)
|
|
|
|
|
|
|
|
def format_mix_artists(self):
|
|
|
|
return " & ".join(self.mix_artists) if self.mix_artists else ""
|
|
|
|
|
2022-09-23 18:50:16 +02:00
|
|
|
|
2022-09-14 19:04:02 +02:00
|
|
|
def main():
|
|
|
|
parser = argparse.ArgumentParser()
|
2022-09-28 19:04:09 +02:00
|
|
|
parser.add_argument("-s", "--series",
|
|
|
|
help="download this series (provide URL of 1st page)")
|
|
|
|
parser.add_argument("-t", "--tracklist",
|
|
|
|
help="download this tracklist (provide URL)")
|
|
|
|
parser.add_argument("--pretty", help="pretty print a CSV file.")
|
2022-09-14 19:04:02 +02:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
2022-09-28 19:04:09 +02:00
|
|
|
if csv_file_name := args.pretty:
|
|
|
|
pretty_print_csv(csv_file_name)
|
|
|
|
elif series_url := args.series:
|
|
|
|
download_series(series_url)
|
|
|
|
elif tracklist_url := args.tracklist:
|
|
|
|
download_tracklist(tracklist_url)
|
|
|
|
|
|
|
|
|
|
|
|
def download_series(series_url: str):
|
|
|
|
while series_url:
|
|
|
|
print("Processing series URL", series_url)
|
|
|
|
response = requests.get(series_url, timeout=10)
|
|
|
|
response.raise_for_status()
|
|
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
|
|
# Download all tracklists.
|
|
|
|
for tracklist_link in soup.find_all("a", class_="mix"):
|
|
|
|
tracklist_url = tracklist_link["href"]
|
|
|
|
print("Processing tracklist URL", tracklist_url)
|
|
|
|
download_tracklist(tracklist_url)
|
|
|
|
time.sleep(1) # throttle
|
|
|
|
# Look for the next page button.
|
|
|
|
for page_link in soup.find_all("a", class_="pagenumber"):
|
|
|
|
if "Next" in page_link.string:
|
|
|
|
series_url = page_link["href"]
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
series_url = None
|
|
|
|
|
|
|
|
|
|
|
|
def download_tracklist(url: str):
|
|
|
|
tracklist = get_tracklist_from_url(url)
|
|
|
|
name = url.rstrip("/").rsplit("/", maxsplit=1)[-1]
|
|
|
|
file_name = Path.cwd() / (name + ".csv")
|
|
|
|
save_tracklist_as_csv(tracklist, file_name)
|
2022-09-14 19:04:02 +02:00
|
|
|
|
|
|
|
|
|
|
|
def is_track_row(css_class: str) -> bool:
|
2022-09-23 18:50:16 +02:00
|
|
|
return css_class in ("on", "off")
|
2022-09-14 19:04:02 +02:00
|
|
|
|
|
|
|
|
2022-09-28 19:04:09 +02:00
|
|
|
def get_tracklist_from_url(url: str) -> list[Track]:
|
|
|
|
"""Get tracklist from the Web and parse it into a list of Track objects."""
|
2022-09-23 18:50:16 +02:00
|
|
|
response = requests.get(url, timeout=10)
|
2022-09-14 19:04:02 +02:00
|
|
|
response.raise_for_status()
|
|
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
2022-09-23 18:50:16 +02:00
|
|
|
tracklist = []
|
2022-09-14 19:04:02 +02:00
|
|
|
for row in soup.find_all("div", class_=is_track_row):
|
2022-09-23 18:50:16 +02:00
|
|
|
artists = []
|
|
|
|
mix_artists = []
|
2022-09-28 19:04:09 +02:00
|
|
|
|
2022-09-14 19:04:02 +02:00
|
|
|
try:
|
2022-09-23 18:50:16 +02:00
|
|
|
title = row.find("a", class_="track").string
|
|
|
|
mix = row.find("a", class_="release").string
|
2022-09-14 19:04:02 +02:00
|
|
|
except AttributeError:
|
2022-09-28 19:04:09 +02:00
|
|
|
try:
|
|
|
|
title = row.find("b").string
|
|
|
|
except AttributeError:
|
|
|
|
title = "(unknown title)"
|
2022-09-23 18:50:16 +02:00
|
|
|
mix = None
|
2022-09-28 19:04:09 +02:00
|
|
|
|
|
|
|
try:
|
|
|
|
timestamp = row.find("span", class_="index_time").string
|
|
|
|
except AttributeError:
|
|
|
|
timestamp = "(unknown timestamp)"
|
|
|
|
|
2022-09-14 19:04:02 +02:00
|
|
|
for artist in row.find_all("a", class_="artist"):
|
|
|
|
prev_tag = artist.previous_sibling.string
|
|
|
|
if getattr(prev_tag, "string", "").strip() == "remixed by":
|
2022-09-23 18:50:16 +02:00
|
|
|
mix_artists.append(artist.string)
|
2022-09-14 19:04:02 +02:00
|
|
|
else:
|
2022-09-23 18:50:16 +02:00
|
|
|
artists.append(artist.string)
|
|
|
|
tracklist.append(
|
|
|
|
Track(
|
|
|
|
title=title,
|
|
|
|
mix=mix,
|
|
|
|
artists=artists,
|
|
|
|
mix_artists=mix_artists or None,
|
|
|
|
timestamp=timestamp,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
return tracklist
|
|
|
|
|
|
|
|
|
2022-09-28 19:04:09 +02:00
|
|
|
def save_tracklist_as_csv(tracklist: list[Track], file_name: Path):
|
2022-09-23 18:50:16 +02:00
|
|
|
try:
|
2022-09-28 19:04:09 +02:00
|
|
|
with open(file_name, "wt", encoding="utf8", newline="") as file:
|
|
|
|
writer = csv.writer(file)
|
|
|
|
writer.writerow(
|
|
|
|
["Timestamp", "Artists", "Title", "Mix", "Remix artists"]
|
|
|
|
)
|
2022-09-23 18:50:16 +02:00
|
|
|
for track in tracklist:
|
2022-09-28 19:04:09 +02:00
|
|
|
writer.writerow([
|
|
|
|
track.timestamp,
|
|
|
|
track.format_artists(),
|
|
|
|
track.title,
|
|
|
|
track.mix or "",
|
|
|
|
track.format_mix_artists(),
|
|
|
|
])
|
2022-09-23 18:50:16 +02:00
|
|
|
except OSError as exc:
|
|
|
|
print(f"Can't save tracklist: {exc}")
|
2022-09-14 19:04:02 +02:00
|
|
|
|
|
|
|
|
2022-09-28 19:04:09 +02:00
|
|
|
def pretty_print_csv(csv_file_name: str):
|
|
|
|
try:
|
|
|
|
with open(csv_file_name, "rt", encoding="utf8", newline="") as file:
|
|
|
|
reader = csv.reader(file)
|
|
|
|
first_line_skipped = False
|
|
|
|
for row in reader:
|
|
|
|
if not first_line_skipped:
|
|
|
|
first_line_skipped = True
|
|
|
|
continue
|
|
|
|
ts, artists, title, mix, remix_artists = row
|
|
|
|
if ":" in ts:
|
|
|
|
ts_min, ts_sec = ts.split(":")
|
|
|
|
ts_min = int(ts_min)
|
|
|
|
ts_h, ts_min = ts_min // 60, ts_min % 60
|
|
|
|
ts = f"{ts_h:02}:{ts_min:02}:{ts_sec}"
|
|
|
|
print(f"{ts} {artists or '(unknown)'} — {title}", end="")
|
|
|
|
if mix:
|
|
|
|
print(f" ({mix})", end="")
|
|
|
|
if remix_artists:
|
|
|
|
print(f" by {remix_artists}", end="")
|
|
|
|
print()
|
|
|
|
except OSError as exc:
|
|
|
|
print(f"Can't read CSV: {exc}")
|
|
|
|
|
|
|
|
|
2022-09-14 19:04:02 +02:00
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|