scrape-djtracklists: WIP
This commit is contained in:
parent
bebc1e982a
commit
cb527be92a
45
scrape-djtracklists.py
Executable file
45
scrape-djtracklists.py
Executable file
|
@ -0,0 +1,45 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Download tracklists from djtracklists.com."""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("-s", "--series", help="download this series")
|
||||||
|
parser.add_argument("-t", "--tracklist", help="download this tracklist")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if tracklist_url := args.tracklist:
|
||||||
|
download_tracklist(tracklist_url)
|
||||||
|
|
||||||
|
|
||||||
|
def is_track_row(css_class: str) -> bool:
|
||||||
|
return css_class == "on" or css_class == "off"
|
||||||
|
|
||||||
|
|
||||||
|
def download_tracklist(url: str):
|
||||||
|
response = requests.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
for row in soup.find_all("div", class_=is_track_row):
|
||||||
|
print("*" * 80)
|
||||||
|
try:
|
||||||
|
print("track", row.find("a", class_="track").string)
|
||||||
|
print("release", row.find("a", class_="release").string)
|
||||||
|
except AttributeError:
|
||||||
|
print("track", row.find("b").string)
|
||||||
|
for artist in row.find_all("a", class_="artist"):
|
||||||
|
prev_tag = artist.previous_sibling.string
|
||||||
|
if getattr(prev_tag, "string", "").strip() == "remixed by":
|
||||||
|
print("remixing artist", artist.string)
|
||||||
|
else:
|
||||||
|
print("artist", artist.string)
|
||||||
|
print("\n" * 10)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Reference in a new issue