Compare commits

..

No commits in common. "master" and "0.0.1" have entirely different histories.

4 changed files with 18 additions and 88 deletions

View file

@ -1,59 +1,8 @@
Scaruffi Scaruffi
======== ========
Get some data from scaruffi.com from Python. Get some data from scaruffi.com from Python. Work in progress (i.e. abandoned).
Piero Scaruffi has written a lot about rock music, jazz, classical, whether it Piero Scaruffi has written a lot about rock music, jazz, classical, whether it
is reviews or history. It is a valuable ressource for a variety of reasons and is reviews or history. It is a valuable ressource for a variety of reasons and
this script aims to make data fetching easier for personal usage. this script aims to make data fetching easier for personal usage.
Features:
- Get a big list of musicians.
- Get best albums per decade, grouped by rating.
This is a work in progress, I would like to add more content to be usable!
Install
-------
This library is available on PyPI:
```bash
pip install scaruffi
```
Usage
-----
Check out the `ScaruffiApi` for all available methods.
```python
from scaruffi.api import ScaruffiApi
api = ScaruffiApi()
api.get_ratings(1960)
# { 9.5: [ Release(title='Trout Mask Replica', ...
```
This module can also be used as a command-line tool:
```bash
scaruffi --help
scaruffi --musicians --offset 5555 --limit 5
# Mooseheart Faith
# Morbid Angel
# Morcheeba
# Morgan Fisher
# Morning 40 Federation
scaruffi --ratings 1960
# 9.5
# - Captain Beefheart - Trout Mask Replica (1969)
# 9.0
# - Bob Dylan - Blonde On Blonde (1966)
# - Captain Beefheart - Safe As Milk (1967)
# ...
```

View file

@ -4,7 +4,7 @@
import argparse import argparse
import logging import logging
from scaruffi.api import ScaruffiApi from scaruffi import api
def main(): def main():
@ -22,7 +22,8 @@ def main():
args = parser.parse_args() args = parser.parse_args()
log_level = logging.DEBUG if args.verbose else logging.WARNING log_level = logging.DEBUG if args.verbose else logging.WARNING
api = ScaruffiApi(log_level=log_level) global LOG
LOG = api.setup_logging("scaruffi", level=log_level)
if args.musicians: if args.musicians:
musicians = api.get_musicians(args.offset, args.limit) musicians = api.get_musicians(args.offset, args.limit)

View file

@ -69,11 +69,11 @@ class ScaruffiApi:
if not soup: if not soup:
return None return None
ratings_table = max(soup.find_all("table"), key=lambda t: len(t.text)) ratings_table = max(soup.find_all("table"), key=lambda t: len(t.text))
lists = ratings_table("ul") num_lists = len(ratings_table("ul"))
if len(lists) == 1: if num_lists == 1:
return self._get_ratings_from_unique_list(lists[0]) return self._get_ratings_from_unique_list(ratings_table.ul)
else: else:
return self._get_ratings_from_lists(lists) return self._get_ratings_from_lists(ratings_table("ul"))
def _get_ratings_from_unique_list(self, messy_list): def _get_ratings_from_unique_list(self, messy_list):
"""Get ratings from decades where one list contains all ratings.""" """Get ratings from decades where one list contains all ratings."""
@ -100,33 +100,17 @@ class ScaruffiApi:
return ratings return ratings
def _get_ratings_from_lists(self, lists): def _get_ratings_from_lists(self, lists):
"""Get ratings from several lists, one per rating. """Get ratings from several lists, one per rating."""
For some decades, there are two "lists of lists": one for albums per
ratings and one for EP/mini albums per ratings.
"""
ratings = {} ratings = {}
rating = None
for ul in lists: for ul in lists:
for child in ul: rating_tag = ul.span
tag = child.name if rating_tag:
if not tag: rating = self._match_rating(rating_tag.text)
continue if rating is None:
if tag in ("p", "span"): self.log.critical("Failed to find rating tag in list.")
parsed_rating = self._match_rating(child.text) return None
if parsed_rating: releases = [self._parse_release(li.text) for li in ul("li")]
rating = parsed_rating ratings[rating] = releases
if rating not in ratings:
ratings[rating] = []
continue
if rating is None:
self.log.critical("Failed to find rating tag in list.")
return None
if tag != "li":
self.log.warning(f"Unused tag in ratings list: {tag}.")
continue
release = self._parse_release(child.text)
ratings[rating].append(release)
return ratings return ratings
RATING_RE = re.compile(r"\s*(\d(.\d)?)/10\s*") RATING_RE = re.compile(r"\s*(\d(.\d)?)/10\s*")

View file

@ -1,6 +1,6 @@
[metadata] [metadata]
name = scaruffi name = scaruffi
version = 0.0.3 version = 0.0.1
description = Get some data from scaruffi.com. description = Get some data from scaruffi.com.
long_description = file: README.md long_description = file: README.md
license = MIT license = MIT
@ -17,10 +17,6 @@ classifiers =
packages = scaruffi packages = scaruffi
python_requires = >= 3.7 python_requires = >= 3.7
setup_requires = setuptools >= 38.3.0 setup_requires = setuptools >= 38.3.0
install_requires =
requests~=2.24
beautifulsoup4~=4.9
html5lib~=1.1
[options.entry_points] [options.entry_points]
console_scripts = console_scripts =