49 lines
1.1 KiB
Python
49 lines
1.1 KiB
Python
|
#!/usr/bin/env python3
|
||
|
"""A simple library to get data from scaruffi.com."""
|
||
|
|
||
|
import logging
|
||
|
|
||
|
from bs4 import BeautifulSoup as Soup
|
||
|
|
||
|
import requests
|
||
|
|
||
|
import log
|
||
|
|
||
|
|
||
|
LOG = log.get_logger("scaruffi", level=logging.WARNING)
|
||
|
GENERAL_INDEX_URL = "https://scaruffi.com/music/groups.html"
|
||
|
|
||
|
|
||
|
def main():
|
||
|
print(get_musicians())
|
||
|
|
||
|
|
||
|
def _get_url(url):
|
||
|
LOG.debug(f"GET {url}")
|
||
|
try:
|
||
|
response = requests.get(url)
|
||
|
except requests.exceptions.RequestException as exc:
|
||
|
LOG.error(f"An exception occured during HTTP GET: {exc}")
|
||
|
return None
|
||
|
sc = response.status_code
|
||
|
if sc != 200:
|
||
|
LOG.error(f"Server returned HTTP response {sc} to {url}.")
|
||
|
return None
|
||
|
return response.text
|
||
|
|
||
|
|
||
|
def get_musicians(offset=0, limit=20):
|
||
|
"""Get a list of musicians."""
|
||
|
html = _get_url(GENERAL_INDEX_URL)
|
||
|
if not html:
|
||
|
return None
|
||
|
|
||
|
soup = Soup(html, 'html5lib')
|
||
|
# Semantic Web? Just find the fattest table.
|
||
|
mu_table = max(soup.find_all('table'), key=lambda t: len(t.text))
|
||
|
return [a_tag.text for a_tag in mu_table.find_all("a")]
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|