Compare commits

...

3 commits

Author SHA1 Message Date
dece d0d8c295ea setup: bump to 0.0.3 2021-01-27 15:16:25 +01:00
dece 87eb54286a readme: update with features 2021-01-27 15:16:07 +01:00
dece 212d15b3d2 api: fix _get_ratings_from_lists for some decades 2021-01-27 15:14:01 +01:00
3 changed files with 37 additions and 14 deletions

View file

@ -7,6 +7,13 @@ Piero Scaruffi has written a lot about rock music, jazz, classical, whether it
is reviews or history. It is a valuable ressource for a variety of reasons and is reviews or history. It is a valuable ressource for a variety of reasons and
this script aims to make data fetching easier for personal usage. this script aims to make data fetching easier for personal usage.
Features:
- Get a big list of musicians.
- Get best albums per decade, grouped by rating.
This is a work in progress, I would like to add more content to be usable!
Install Install

View file

@ -69,11 +69,11 @@ class ScaruffiApi:
if not soup: if not soup:
return None return None
ratings_table = max(soup.find_all("table"), key=lambda t: len(t.text)) ratings_table = max(soup.find_all("table"), key=lambda t: len(t.text))
num_lists = len(ratings_table("ul")) lists = ratings_table("ul")
if num_lists == 1: if len(lists) == 1:
return self._get_ratings_from_unique_list(ratings_table.ul) return self._get_ratings_from_unique_list(lists[0])
else: else:
return self._get_ratings_from_lists(ratings_table("ul")) return self._get_ratings_from_lists(lists)
def _get_ratings_from_unique_list(self, messy_list): def _get_ratings_from_unique_list(self, messy_list):
"""Get ratings from decades where one list contains all ratings.""" """Get ratings from decades where one list contains all ratings."""
@ -100,17 +100,33 @@ class ScaruffiApi:
return ratings return ratings
def _get_ratings_from_lists(self, lists): def _get_ratings_from_lists(self, lists):
"""Get ratings from several lists, one per rating.""" """Get ratings from several lists, one per rating.
For some decades, there are two "lists of lists": one for albums per
ratings and one for EP/mini albums per ratings.
"""
ratings = {} ratings = {}
rating = None
for ul in lists: for ul in lists:
rating_tag = ul.span for child in ul:
if rating_tag: tag = child.name
rating = self._match_rating(rating_tag.text) if not tag:
if rating is None: continue
self.log.critical("Failed to find rating tag in list.") if tag in ("p", "span"):
return None parsed_rating = self._match_rating(child.text)
releases = [self._parse_release(li.text) for li in ul("li")] if parsed_rating:
ratings[rating] = releases rating = parsed_rating
if rating not in ratings:
ratings[rating] = []
continue
if rating is None:
self.log.critical("Failed to find rating tag in list.")
return None
if tag != "li":
self.log.warning(f"Unused tag in ratings list: {tag}.")
continue
release = self._parse_release(child.text)
ratings[rating].append(release)
return ratings return ratings
RATING_RE = re.compile(r"\s*(\d(.\d)?)/10\s*") RATING_RE = re.compile(r"\s*(\d(.\d)?)/10\s*")

View file

@ -1,6 +1,6 @@
[metadata] [metadata]
name = scaruffi name = scaruffi
version = 0.0.2 version = 0.0.3
description = Get some data from scaruffi.com. description = Get some data from scaruffi.com.
long_description = file: README.md long_description = file: README.md
license = MIT license = MIT