This commit is contained in:
dece 2020-09-18 19:19:25 +02:00
commit a3ff6d8c4a
5 changed files with 312 additions and 0 deletions

14
Pipfile Normal file
View file

@ -0,0 +1,14 @@
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true
[dev-packages]
[packages]
requests = "~=2.24"
beautifulsoup4 = "~=4.9"
html5lib = "~=1.1"
[requires]
python_version = "3.7"

99
Pipfile.lock generated Normal file
View file

@ -0,0 +1,99 @@
{
"_meta": {
"hash": {
"sha256": "38279dd8a59254b5d642ef1254d1f3d27e89ccc4b2dede749e4bd82a836c34d5"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.7"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"beautifulsoup4": {
"hashes": [
"sha256:73cc4d115b96f79c7d77c1c7f7a0a8d4c57860d1041df407dd1aae7f07a77fd7",
"sha256:a6237df3c32ccfaee4fd201c8f5f9d9df619b93121d01353a64a73ce8c6ef9a8",
"sha256:e718f2342e2e099b640a34ab782407b7b676f47ee272d6739e60b8ea23829f2c"
],
"index": "pypi",
"version": "==4.9.1"
},
"certifi": {
"hashes": [
"sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3",
"sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41"
],
"version": "==2020.6.20"
},
"chardet": {
"hashes": [
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
],
"version": "==3.0.4"
},
"html5lib": {
"hashes": [
"sha256:0d78f8fde1c230e99fe37986a60526d7049ed4bf8a9fadbad5f00e22e58e041d",
"sha256:b2e5b40261e20f354d198eae92afc10d750afb487ed5e50f9c4eaf07c184146f"
],
"index": "pypi",
"version": "==1.1"
},
"idna": {
"hashes": [
"sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
"sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.10"
},
"requests": {
"hashes": [
"sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b",
"sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898"
],
"index": "pypi",
"version": "==2.24.0"
},
"six": {
"hashes": [
"sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
"sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.15.0"
},
"soupsieve": {
"hashes": [
"sha256:1634eea42ab371d3d346309b93df7870a88610f0725d47528be902a0d95ecc55",
"sha256:a59dc181727e95d25f781f0eb4fd1825ff45590ec8ff49eadfd7f1a537cc0232"
],
"markers": "python_version >= '3.5'",
"version": "==2.0.1"
},
"urllib3": {
"hashes": [
"sha256:91056c15fa70756691db97756772bb1eb9678fa585d9184f24534b100dc60f4a",
"sha256:e7983572181f5e1522d9c98453462384ee92a0be7fac5f1413a1e35c56cc0461"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
"version": "==1.25.10"
},
"webencodings": {
"hashes": [
"sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78",
"sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"
],
"version": "==0.5.1"
}
},
"develop": {}
}

0
__init__.py Normal file
View file

48
__main__.py Normal file
View file

@ -0,0 +1,48 @@
#!/usr/bin/env python3
"""A simple library to get data from scaruffi.com."""
import logging
from bs4 import BeautifulSoup as Soup
import requests
import log
LOG = log.get_logger("scaruffi", level=logging.WARNING)
GENERAL_INDEX_URL = "https://scaruffi.com/music/groups.html"
def main():
print(get_musicians())
def _get_url(url):
LOG.debug(f"GET {url}")
try:
response = requests.get(url)
except requests.exceptions.RequestException as exc:
LOG.error(f"An exception occured during HTTP GET: {exc}")
return None
sc = response.status_code
if sc != 200:
LOG.error(f"Server returned HTTP response {sc} to {url}.")
return None
return response.text
def get_musicians(offset=0, limit=20):
"""Get a list of musicians."""
html = _get_url(GENERAL_INDEX_URL)
if not html:
return None
soup = Soup(html, 'html5lib')
# Semantic Web? Just find the fattest table.
mu_table = max(soup.find_all('table'), key=lambda t: len(t.text))
return [a_tag.text for a_tag in mu_table.find_all("a")]
if __name__ == "__main__":
main()

151
log.py Normal file
View file

@ -0,0 +1,151 @@
"""A cross-platform, package independant, colored stream/file logger."""
import logging
import platform
import ctypes
import ctypes.util
class _AnsiColorStreamHandler(logging.StreamHandler):
DEFAULT = '\x1b[0m'
RED = '\x1b[31m'
GREEN = '\x1b[32m'
YELLOW = '\x1b[33m'
CYAN = '\x1b[36m'
CRITICAL = RED
ERROR = RED
WARNING = YELLOW
INFO = GREEN
DEBUG = CYAN
def __init__(self, stream=None):
super().__init__(stream)
def format(self, record):
text = super().format(record)
color = self._get_color_code(record.levelno)
return color + text + self.DEFAULT
@classmethod
def _get_color_code(cls, level):
if level >= logging.CRITICAL:
return cls.CRITICAL
elif level >= logging.ERROR:
return cls.ERROR
elif level >= logging.WARNING:
return cls.WARNING
elif level >= logging.INFO:
return cls.INFO
elif level >= logging.DEBUG:
return cls.DEBUG
else:
return cls.DEFAULT
# Disable protected member access warning for MSVC functions.
# pylint: disable=W0212
class _WinColorStreamHandler(logging.StreamHandler):
STD_INPUT_HANDLE = -10
STD_OUTPUT_HANDLE = -11
STD_ERROR_HANDLE = -12
FOREGROUND_BLACK = 0x0000
FOREGROUND_BLUE = 0x0001
FOREGROUND_GREEN = 0x0002
FOREGROUND_CYAN = 0x0003
FOREGROUND_RED = 0x0004
FOREGROUND_MAGENTA = 0x0005
FOREGROUND_YELLOW = 0x0006
FOREGROUND_GREY = 0x0007
FOREGROUND_INTENSITY = 0x0008
FOREGROUND_WHITE = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED
BACKGROUND_BLACK = 0x0000
BACKGROUND_BLUE = 0x0010
BACKGROUND_GREEN = 0x0020
BACKGROUND_CYAN = 0x0030
BACKGROUND_RED = 0x0040
BACKGROUND_MAGENTA = 0x0050
BACKGROUND_YELLOW = 0x0060
BACKGROUND_GREY = 0x0070
BACKGROUND_INTENSITY = 0x0080
DEFAULT = FOREGROUND_WHITE
CRITICAL = FOREGROUND_RED | FOREGROUND_INTENSITY
ERROR = FOREGROUND_RED | FOREGROUND_INTENSITY
WARNING = FOREGROUND_YELLOW | FOREGROUND_INTENSITY
INFO = FOREGROUND_GREEN
DEBUG = FOREGROUND_CYAN
def __init__(self, stream=None):
super().__init__(stream)
self.output_handle = self._get_output_handle(stream)
@classmethod
def _get_output_handle(cls, stream):
if stream is None:
return ctypes.windll.kernel32.GetStdHandle(cls.STD_OUTPUT_HANDLE)
else:
msvcrt_loc = ctypes.util.find_msvcrt()
msvcrt_lib = ctypes.cdll.LoadLibrary(msvcrt_loc)
return msvcrt_lib._get_osfhandle(stream.fileno())
def emit(self, record):
color_code = self._get_color_code(record.levelno)
self._set_color_code(color_code)
super().emit(record)
self._set_color_code(self.FOREGROUND_WHITE)
@classmethod
def _get_color_code(cls, level):
if level >= logging.CRITICAL:
return cls.CRITICAL
elif level >= logging.ERROR:
return cls.ERROR
elif level >= logging.WARNING:
return cls.WARNING
elif level >= logging.INFO:
return cls.INFO
elif level >= logging.DEBUG:
return cls.DEBUG
else:
return cls.DEFAULT
def _set_color_code(self, code):
ctypes.windll.kernel32.SetConsoleTextAttribute(self.output_handle, code)
if platform.system() == "Windows":
ColorStreamHandler = _WinColorStreamHandler
else:
ColorStreamHandler = _AnsiColorStreamHandler
_LOG_LEVEL = logging.DEBUG
_FORMAT = "%(asctime)s %(levelname)-8s %(message)s"
_DATE_FORMAT = "%H:%M:%S"
def get_logger(name, level=_LOG_LEVEL, log_format=_FORMAT,
date_format=_DATE_FORMAT, into_stderr=True, into_log_file=None):
logger = logging.getLogger(name)
logger.setLevel(level)
formatter = logging.Formatter(fmt=log_format, datefmt=date_format)
if into_stderr:
stream_handler = ColorStreamHandler()
stream_handler.setLevel(level)
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
if into_log_file is not None:
file_handler = logging.FileHandler(into_log_file, mode="w")
file_handler.setLevel(level)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
return logger