This repository has been archived on 2024-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
Bebop/bebop/protocol.py

302 lines
10 KiB
Python
Raw Permalink Normal View History

2021-03-11 19:16:15 +01:00
"""Gemini protocol implementation."""
import logging
2021-02-12 19:01:42 +01:00
import re
import socket
import ssl
from dataclasses import dataclass
from enum import IntEnum
2021-03-14 00:04:55 +01:00
from typing import Optional
2021-02-12 19:01:42 +01:00
2021-03-14 00:04:55 +01:00
from bebop.mime import DEFAULT_MIME_TYPE, MimeType
2021-06-05 01:27:12 +02:00
from bebop.navigation import parse_host_and_port
2021-04-19 02:04:18 +02:00
from bebop.tofu import CertStatus, validate_cert
2021-02-12 19:01:42 +01:00
GEMINI_URL_RE = re.compile(r"gemini://(?P<host>[^/]+)(?P<path>.*)")
LINE_TERM = b"\r\n"
class Request:
2021-03-11 19:16:15 +01:00
"""A Gemini request.
Details about the request itself can be found in the Gemini specification.
This class allows you to do a request in 2 times: first opening the
TLS connection to apply security checks, then aborting or proceeding by
sending the request header and receiving the response:
1. Instantiate a Request.
2021-11-15 09:51:20 +01:00
2. `connect` opens the connection and aborts it or leaves the caller free
to check stuff.
2021-03-11 19:16:15 +01:00
3. `proceed` or `abort` can be called.
2021-04-19 02:04:18 +02:00
Attributes:
- url: URL to open.
- cert_stash: certificate stash to use an possibly update.
- state: request state.
- hostname: hostname derived from url, stored when `connect` is called.
2021-11-15 09:51:20 +01:00
- payload: bytes object of the payload request; build during `connect`,
used during `proceed`.
2021-04-19 02:04:18 +02:00
- ssock: TLS-wrapped socket.
- cert_validation: validation results dict, set after certificate has been
reviewed.
- error: human-readable connection error, may be set during `connect`.
2021-03-11 19:16:15 +01:00
"""
2021-02-12 19:01:42 +01:00
# Initial state, connection is not established yet.
STATE_INIT = 0
# An error has occured during cert verification, connection is aborted.
STATE_ERROR_CERT = 1
# An invalid URL has been provided, connection is aborted.
STATE_INVALID_URL = 2
# Invalid cert: user should abort or temporarily trust the cert.
STATE_INVALID_CERT = 3
# Unknown cert: user should abort, temporarily or always trust the cert.
STATE_UNKNOWN_CERT = 4
# Untrusted cert: connection is aborted, manually edit the stash.
STATE_UNTRUSTED_CERT = 5
# Valid and trusted cert: proceed.
STATE_OK = 6
2021-02-15 19:57:49 +01:00
# Connection failed.
STATE_CONNECTION_FAILED = 7
2021-02-12 19:01:42 +01:00
def __init__(self, url, cert_stash, identity=None):
2021-02-12 19:01:42 +01:00
self.url = url
self.cert_stash = cert_stash
self.state = Request.STATE_INIT
2021-04-19 02:04:18 +02:00
self.hostname = ""
2021-02-12 19:01:42 +01:00
self.payload = b""
self.ssock = None
2021-04-19 02:04:18 +02:00
self.cert_validation = None
2021-02-15 19:57:49 +01:00
self.error = ""
self.identity = identity
2021-02-12 19:01:42 +01:00
2021-04-19 02:04:18 +02:00
def connect(self, timeout: int) -> bool:
2021-02-12 19:01:42 +01:00
"""Connect to a Gemini server and return a RequestEventType.
Return True if the connection is established. The caller has to verify
the request state and propose appropriate choices to the user if the
certificate status is not CertStatus.VALID (Request.STATE_OK).
2021-04-19 02:04:18 +02:00
If connect returns False, the secure socket is aborted before return so
2021-11-15 09:51:20 +01:00
there is no need to call `abort`. If connect returns True, it is up to
the caller to decide whether to continue (call `proceed`) the
connection or abort it (call `abort`).
2021-04-19 02:04:18 +02:00
2021-11-15 09:51:20 +01:00
The request `state` is updated to reflect the connection state after
the function returns. The following list describes states related to
2021-04-19 02:04:18 +02:00
connection failure (False returned):
- STATE_INVALID_URL: URL is not valid.
- STATE_CONNECTION_FAILED: connection failed, either TCP timeout or
local TLS failure. Additionally, the request `error` attribute is set
to an error string describing the issue.
For all request states from now on, the `cert_validation` attribute is
updated with the result of the certificate validation.
2021-11-15 09:51:20 +01:00
The following list describes states related to validation failure
(False returned):
2021-04-19 02:04:18 +02:00
- STATE_ERROR_CERT: server certificate could not be validated at all.
- STATE_UNTRUSTED_CERT: server certificate mismatched the known
certificate for that hostname. The user should be presented with
options to solve the matter.
For other states, the connection is not aborted (True returned):
- STATE_INVALID_CERT: the certificate has one or more issues, e.g.
mismatching hostname or it is expired.
- STATE_UNKNOWN_CERT: the certificate is valid but unknown.
- STATE_OK: the certificate is valid and matches the known certificate
of that hostname.
After this function returns, the request state cannot be STATE_INIT.
Additional notes:
- The DER hash is compared against the fingerprint for this hostname
*and port*; the specification does not tell much about that, but we
2021-11-15 09:51:20 +01:00
are slightly more restrictive here by adding the port in the
equation.
2021-04-19 02:04:18 +02:00
- The state STATE_INVALID_CERT is actually never used in Bebop because
of the current tendency to ignore any certificate fields and only
check the whole cert fingerprint. Here it is considered the same as a
valid certificate.
2021-02-12 19:01:42 +01:00
"""
# Get hostname and port from the URL.
2021-04-19 02:04:18 +02:00
url_parts = GEMINI_URL_RE.match(self.url)
2021-02-12 19:01:42 +01:00
if not url_parts:
self.state = Request.STATE_INVALID_URL
return False
2021-06-05 01:27:12 +02:00
host = url_parts.groupdict()["host"]
host_and_port = parse_host_and_port(host, 1965)
if host_and_port is None:
self.state = Request.STATE_INVALID_URL
return False
hostname, port = host_and_port
2021-04-19 02:04:18 +02:00
self.hostname = hostname
2021-02-12 19:01:42 +01:00
# Prepare the Gemini request.
2021-02-12 19:01:42 +01:00
try:
self.payload = self.url.encode()
except ValueError:
self.state = Request.STATE_INVALID_URL
return False
self.payload += LINE_TERM
# Connect to the server.
2021-02-15 19:57:49 +01:00
try:
sock = socket.create_connection((hostname, port), timeout=timeout)
2021-02-16 21:23:06 +01:00
except OSError as exc:
2021-02-15 19:57:49 +01:00
self.state = Request.STATE_CONNECTION_FAILED
self.error = exc.strerror
return False
# Setup TLS.
2021-02-12 19:01:42 +01:00
context = Request.get_ssl_context()
if self.identity:
try:
context.load_cert_chain(*self.identity)
except FileNotFoundError as exc:
sock.close()
self.state = Request.STATE_CONNECTION_FAILED
2021-06-04 15:35:53 +02:00
self.error = f"Could not load identity files ({exc})"
logging.error(f"Failed to load identity files {self.identity}")
return False
2021-02-15 19:57:49 +01:00
try:
self.ssock = context.wrap_socket(sock, server_hostname=hostname)
2021-02-15 19:57:49 +01:00
except OSError as exc:
2021-04-19 02:04:18 +02:00
sock.close()
2021-02-15 19:57:49 +01:00
self.state = Request.STATE_CONNECTION_FAILED
self.error = exc.strerror
return False
# Validate server certificate.
2021-02-12 19:01:42 +01:00
der = self.ssock.getpeercert(binary_form=True)
2021-04-19 02:04:18 +02:00
self.cert_validation = validate_cert(der, hostname, self.cert_stash)
cert_status = self.cert_validation["status"]
if cert_status == CertStatus.ERROR:
2021-02-12 19:01:42 +01:00
self.abort()
self.state = Request.STATE_ERROR_CERT
return False
2021-04-19 02:04:18 +02:00
if cert_status == CertStatus.WRONG_FINGERPRINT:
2021-02-12 19:01:42 +01:00
self.abort()
self.state = Request.STATE_UNTRUSTED_CERT
return False
2021-04-19 02:04:18 +02:00
if cert_status == CertStatus.VALID_NEW:
2021-02-12 19:01:42 +01:00
self.state = Request.STATE_UNKNOWN_CERT
2021-04-19 02:04:18 +02:00
else: # self.cert_status in (VALID, VALID_NEW, INVALID_CERT)
2021-02-12 19:01:42 +01:00
self.state = Request.STATE_OK
return True
def abort(self):
"""Close the connection."""
self.ssock.close()
def proceed(self):
"""Complete the request: send the payload and return received data."""
self.ssock.sendall(self.payload)
response = b""
while True:
try:
buf = self.ssock.recv(4096)
except socket.timeout:
buf = None
2021-02-12 19:01:42 +01:00
if not buf:
return response
response += buf
@staticmethod
def get_ssl_context():
"""Return a secure SSL context that is adequate for Gemini."""
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
context.options |= ssl.OP_NO_TLSv1
context.options |= ssl.OP_NO_TLSv1_1
context.check_hostname = False
context.verify_mode = ssl.CERT_NONE
return context
class StatusCode(IntEnum):
UNKNOWN = 0
INPUT = 10
SENSITIVE_INPUT = 11
SUCCESS = 20
REDIRECT = 30
PERMANENT_REDIRECT = 31
TEMP_FAILURE = 40
SERVER_UNAVAILABLE = 41
CGI_ERROR = 42
PROXY_ERROR = 43
SLOW_DOWN = 44
PERM_FAILURE = 50
NOT_FOUND = 51
GONE = 52
PROXY_REQUEST_REFUSED = 53
BAD_REQUEST = 59
CERT_REQUIRED = 60
CERT_NOT_AUTHORISED = 61
CERT_NOT_VALID = 62
_missing_ = lambda _: StatusCode.UNKNOWN
@dataclass
class Response:
2021-04-17 22:59:54 +02:00
"""A Gemini response.
Response objects can be created only by parsing a Gemini response using the
static `parse` method, so you're guaranteed to have a valid object.
Attributes:
- code: the status code returned by the server.
- meta: optional meta content.
- content: bytes as returned by the server, only in successful requests.
"""
2021-02-12 19:01:42 +01:00
code: StatusCode
meta: str = ""
content: bytes = b""
HEADER_RE = re.compile(r"(\d{2}) (.*)")
MAX_META_LEN = 1024
2021-02-12 19:01:42 +01:00
@property
2021-03-14 00:04:55 +01:00
def generic_code(self) -> int:
"""See `Response.get_generic_code`."""
return Response.get_generic_code(self.code)
2021-03-14 00:04:55 +01:00
def get_mime_type(self) -> MimeType:
"""Return the MIME type if possible, else the default MIME type."""
return MimeType.from_str(self.meta) or DEFAULT_MIME_TYPE
2021-02-12 19:01:42 +01:00
@staticmethod
2021-03-14 00:04:55 +01:00
def parse(data: bytes) -> Optional["Response"]:
2021-02-12 19:01:42 +01:00
"""Parse a received response."""
try:
response_header_len = data.index(LINE_TERM)
response_header = data[:response_header_len].decode()
except ValueError:
return None
match = Response.HEADER_RE.match(response_header)
if not match:
return None
code, meta = match.groups()
if len(meta) > Response.MAX_META_LEN:
return None
response = Response(StatusCode(int(code)), meta=meta)
if response.generic_code == StatusCode.SUCCESS:
2021-02-12 19:01:42 +01:00
content_offset = response_header_len + len(LINE_TERM)
response.content = data[content_offset:]
elif response.code == StatusCode.UNKNOWN:
return None
return response
@staticmethod
2021-04-19 02:04:18 +02:00
def get_generic_code(code: int) -> int:
2021-02-12 19:01:42 +01:00
"""Return the generic version (x0) of this code."""
return code - (code % 10)