Compare commits
7 commits
Author | SHA1 | Date | |
---|---|---|---|
|
d0d8c295ea | ||
|
87eb54286a | ||
|
212d15b3d2 | ||
|
df1b560afe | ||
|
03d4bb0015 | ||
|
3c534d9c25 | ||
|
f8439dee4a |
53
README.md
53
README.md
|
@ -1,8 +1,59 @@
|
||||||
Scaruffi
|
Scaruffi
|
||||||
========
|
========
|
||||||
|
|
||||||
Get some data from scaruffi.com from Python. Work in progress (i.e. abandoned).
|
Get some data from scaruffi.com from Python.
|
||||||
|
|
||||||
Piero Scaruffi has written a lot about rock music, jazz, classical, whether it
|
Piero Scaruffi has written a lot about rock music, jazz, classical, whether it
|
||||||
is reviews or history. It is a valuable ressource for a variety of reasons and
|
is reviews or history. It is a valuable ressource for a variety of reasons and
|
||||||
this script aims to make data fetching easier for personal usage.
|
this script aims to make data fetching easier for personal usage.
|
||||||
|
|
||||||
|
Features:
|
||||||
|
|
||||||
|
- Get a big list of musicians.
|
||||||
|
- Get best albums per decade, grouped by rating.
|
||||||
|
|
||||||
|
This is a work in progress, I would like to add more content to be usable!
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Install
|
||||||
|
-------
|
||||||
|
|
||||||
|
This library is available on PyPI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install scaruffi
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Usage
|
||||||
|
-----
|
||||||
|
|
||||||
|
Check out the `ScaruffiApi` for all available methods.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from scaruffi.api import ScaruffiApi
|
||||||
|
api = ScaruffiApi()
|
||||||
|
api.get_ratings(1960)
|
||||||
|
# { 9.5: [ Release(title='Trout Mask Replica', ...
|
||||||
|
```
|
||||||
|
|
||||||
|
This module can also be used as a command-line tool:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
scaruffi --help
|
||||||
|
scaruffi --musicians --offset 5555 --limit 5
|
||||||
|
# Mooseheart Faith
|
||||||
|
# Morbid Angel
|
||||||
|
# Morcheeba
|
||||||
|
# Morgan Fisher
|
||||||
|
# Morning 40 Federation
|
||||||
|
scaruffi --ratings 1960
|
||||||
|
# 9.5
|
||||||
|
# - Captain Beefheart - Trout Mask Replica (1969)
|
||||||
|
# 9.0
|
||||||
|
# - Bob Dylan - Blonde On Blonde (1966)
|
||||||
|
# - Captain Beefheart - Safe As Milk (1967)
|
||||||
|
# ...
|
||||||
|
```
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from scaruffi import api
|
from scaruffi.api import ScaruffiApi
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -22,8 +22,7 @@ def main():
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
log_level = logging.DEBUG if args.verbose else logging.WARNING
|
log_level = logging.DEBUG if args.verbose else logging.WARNING
|
||||||
global LOG
|
api = ScaruffiApi(log_level=log_level)
|
||||||
LOG = api.setup_logging("scaruffi", level=log_level)
|
|
||||||
|
|
||||||
if args.musicians:
|
if args.musicians:
|
||||||
musicians = api.get_musicians(args.offset, args.limit)
|
musicians = api.get_musicians(args.offset, args.limit)
|
||||||
|
|
|
@ -69,11 +69,11 @@ class ScaruffiApi:
|
||||||
if not soup:
|
if not soup:
|
||||||
return None
|
return None
|
||||||
ratings_table = max(soup.find_all("table"), key=lambda t: len(t.text))
|
ratings_table = max(soup.find_all("table"), key=lambda t: len(t.text))
|
||||||
num_lists = len(ratings_table("ul"))
|
lists = ratings_table("ul")
|
||||||
if num_lists == 1:
|
if len(lists) == 1:
|
||||||
return self._get_ratings_from_unique_list(ratings_table.ul)
|
return self._get_ratings_from_unique_list(lists[0])
|
||||||
else:
|
else:
|
||||||
return self._get_ratings_from_lists(ratings_table("ul"))
|
return self._get_ratings_from_lists(lists)
|
||||||
|
|
||||||
def _get_ratings_from_unique_list(self, messy_list):
|
def _get_ratings_from_unique_list(self, messy_list):
|
||||||
"""Get ratings from decades where one list contains all ratings."""
|
"""Get ratings from decades where one list contains all ratings."""
|
||||||
|
@ -100,17 +100,33 @@ class ScaruffiApi:
|
||||||
return ratings
|
return ratings
|
||||||
|
|
||||||
def _get_ratings_from_lists(self, lists):
|
def _get_ratings_from_lists(self, lists):
|
||||||
"""Get ratings from several lists, one per rating."""
|
"""Get ratings from several lists, one per rating.
|
||||||
|
|
||||||
|
For some decades, there are two "lists of lists": one for albums per
|
||||||
|
ratings and one for EP/mini albums per ratings.
|
||||||
|
"""
|
||||||
ratings = {}
|
ratings = {}
|
||||||
|
rating = None
|
||||||
for ul in lists:
|
for ul in lists:
|
||||||
rating_tag = ul.span
|
for child in ul:
|
||||||
if rating_tag:
|
tag = child.name
|
||||||
rating = self._match_rating(rating_tag.text)
|
if not tag:
|
||||||
if rating is None:
|
continue
|
||||||
self.log.critical("Failed to find rating tag in list.")
|
if tag in ("p", "span"):
|
||||||
return None
|
parsed_rating = self._match_rating(child.text)
|
||||||
releases = [self._parse_release(li.text) for li in ul("li")]
|
if parsed_rating:
|
||||||
ratings[rating] = releases
|
rating = parsed_rating
|
||||||
|
if rating not in ratings:
|
||||||
|
ratings[rating] = []
|
||||||
|
continue
|
||||||
|
if rating is None:
|
||||||
|
self.log.critical("Failed to find rating tag in list.")
|
||||||
|
return None
|
||||||
|
if tag != "li":
|
||||||
|
self.log.warning(f"Unused tag in ratings list: {tag}.")
|
||||||
|
continue
|
||||||
|
release = self._parse_release(child.text)
|
||||||
|
ratings[rating].append(release)
|
||||||
return ratings
|
return ratings
|
||||||
|
|
||||||
RATING_RE = re.compile(r"\s*(\d(.\d)?)/10\s*")
|
RATING_RE = re.compile(r"\s*(\d(.\d)?)/10\s*")
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[metadata]
|
[metadata]
|
||||||
name = scaruffi
|
name = scaruffi
|
||||||
version = 0.0.1
|
version = 0.0.3
|
||||||
description = Get some data from scaruffi.com.
|
description = Get some data from scaruffi.com.
|
||||||
long_description = file: README.md
|
long_description = file: README.md
|
||||||
license = MIT
|
license = MIT
|
||||||
|
@ -17,6 +17,10 @@ classifiers =
|
||||||
packages = scaruffi
|
packages = scaruffi
|
||||||
python_requires = >= 3.7
|
python_requires = >= 3.7
|
||||||
setup_requires = setuptools >= 38.3.0
|
setup_requires = setuptools >= 38.3.0
|
||||||
|
install_requires =
|
||||||
|
requests~=2.24
|
||||||
|
beautifulsoup4~=4.9
|
||||||
|
html5lib~=1.1
|
||||||
|
|
||||||
[options.entry_points]
|
[options.entry_points]
|
||||||
console_scripts =
|
console_scripts =
|
||||||
|
|
Loading…
Reference in a new issue