2021-03-28 18:28:35 +02:00
|
|
|
"""Gemini-related features of the browser."""
|
|
|
|
|
2021-05-13 01:25:50 +02:00
|
|
|
import logging
|
2021-04-17 22:59:54 +02:00
|
|
|
from pathlib import Path
|
2021-05-12 22:29:03 +02:00
|
|
|
from typing import Optional
|
2021-04-17 22:59:54 +02:00
|
|
|
|
2021-03-28 18:28:35 +02:00
|
|
|
from bebop.browser.browser import Browser
|
2021-04-19 00:28:20 +02:00
|
|
|
from bebop.command_line import CommandLine
|
2021-05-16 01:30:00 +02:00
|
|
|
from bebop.fs import get_downloads_path, get_identities_list_path
|
2021-05-12 22:29:03 +02:00
|
|
|
from bebop.identity import (
|
|
|
|
ClientCertificateException, create_certificate, get_cert_and_key,
|
|
|
|
get_identities_for_url, load_identities, save_identities
|
|
|
|
)
|
2021-03-28 18:28:35 +02:00
|
|
|
from bebop.navigation import set_parameter
|
|
|
|
from bebop.page import Page
|
|
|
|
from bebop.protocol import Request, Response
|
2021-04-19 02:04:18 +02:00
|
|
|
from bebop.tofu import trust_fingerprint, untrust_fingerprint, WRONG_FP_ALERT
|
2021-03-28 18:28:35 +02:00
|
|
|
|
|
|
|
|
2021-04-18 01:17:33 +02:00
|
|
|
MAX_URL_LEN = 1024
|
|
|
|
|
|
|
|
|
2021-05-12 22:29:03 +02:00
|
|
|
def open_gemini_url(
|
|
|
|
browser: Browser,
|
|
|
|
url: str,
|
|
|
|
redirects: int =0,
|
2021-05-14 23:15:22 +02:00
|
|
|
use_cache: bool =False,
|
2021-05-13 01:24:29 +02:00
|
|
|
cert_and_key=None
|
2021-05-12 22:29:03 +02:00
|
|
|
) -> Optional[str]:
|
2021-03-28 18:28:35 +02:00
|
|
|
"""Open a Gemini URL and set the formatted response as content.
|
|
|
|
|
2021-04-19 02:04:18 +02:00
|
|
|
While the specification is not set in stone, every client takes a slightly
|
|
|
|
different approach to enforcing TOFU. Read the `Request.connect` docs to
|
|
|
|
find about cases where connection is aborted without asking the user. What
|
|
|
|
interests us here is what happens when the user should decide herself? This
|
|
|
|
happens in several cases, matching the request possible states. Here is
|
|
|
|
what Bebop do (or want to do):
|
|
|
|
|
|
|
|
- STATE_INVALID_CERT: the certificate has non-fatal issues; we may
|
|
|
|
present the user the problems found and let her decide whether to trust
|
|
|
|
temporarily the certificate or not BUT we currently do not parse the
|
2021-05-12 22:29:03 +02:00
|
|
|
certificate's fields, not even the pubkey, so this state is never used.
|
2021-04-19 02:04:18 +02:00
|
|
|
- STATE_UNKNOWN_CERT: the certificate is valid but has not been seen before;
|
|
|
|
as we're doing TOFU here, we could automatically trust it or let the user
|
|
|
|
choose. For simplicity, we always trust it permanently.
|
|
|
|
|
2021-05-08 22:41:42 +02:00
|
|
|
Arguments:
|
2021-04-19 02:04:18 +02:00
|
|
|
- browser: Browser object making the request.
|
|
|
|
- url: a valid URL with Gemini scheme to open.
|
|
|
|
- redirects: current amount of redirections done to open the initial URL.
|
|
|
|
- use_cache: if true, look up if the page is cached before requesting it.
|
2021-05-13 01:24:29 +02:00
|
|
|
- cert_and_key: if not None, a tuple of paths to a client cert/key to use.
|
2021-05-08 22:41:42 +02:00
|
|
|
|
|
|
|
Returns:
|
2021-05-12 22:29:03 +02:00
|
|
|
The final successfully handled URL on success, None otherwise. Redirected
|
|
|
|
URLs are not returned.
|
2021-03-28 18:28:35 +02:00
|
|
|
"""
|
2021-04-18 01:17:33 +02:00
|
|
|
if len(url) >= MAX_URL_LEN:
|
2021-04-19 02:04:18 +02:00
|
|
|
browser.set_status_error("Request URL too long.")
|
2021-05-12 22:29:03 +02:00
|
|
|
return None
|
2021-04-18 01:17:33 +02:00
|
|
|
|
2021-05-12 22:29:03 +02:00
|
|
|
loading_message_verb = "Loading" if redirects == 0 else "Redirecting to"
|
|
|
|
loading_message = f"{loading_message_verb} {url}…"
|
|
|
|
browser.set_status(loading_message)
|
2021-03-28 18:28:35 +02:00
|
|
|
|
2021-05-13 01:24:29 +02:00
|
|
|
# If this URL used to request an identity, provide it.
|
|
|
|
if not cert_and_key:
|
|
|
|
url_identities = get_identities_for_url(browser.identities, url)
|
|
|
|
identity = select_identity(url_identities)
|
|
|
|
if identity:
|
|
|
|
cert_and_key = get_cert_and_key(identity["id"])
|
|
|
|
|
2021-03-28 18:28:35 +02:00
|
|
|
if use_cache and url in browser.cache:
|
|
|
|
browser.load_page(browser.cache[url])
|
|
|
|
browser.current_url = url
|
|
|
|
browser.set_status(url)
|
2021-05-12 22:29:03 +02:00
|
|
|
return url
|
2021-03-28 18:28:35 +02:00
|
|
|
|
2021-05-13 01:25:50 +02:00
|
|
|
logging.info(
|
|
|
|
f"Request {url}"
|
|
|
|
+ (f" using cert and key {cert_and_key}" if cert_and_key else "")
|
|
|
|
)
|
2021-05-13 01:24:29 +02:00
|
|
|
req = Request(url, browser.stash, identity=cert_and_key)
|
2021-04-18 02:27:05 +02:00
|
|
|
connect_timeout = browser.config["connect_timeout"]
|
|
|
|
connected = req.connect(connect_timeout)
|
2021-03-28 18:28:35 +02:00
|
|
|
if not connected:
|
|
|
|
if req.state == Request.STATE_ERROR_CERT:
|
|
|
|
error = f"Certificate was missing or corrupt ({url})."
|
|
|
|
elif req.state == Request.STATE_UNTRUSTED_CERT:
|
2021-04-19 02:04:18 +02:00
|
|
|
_handle_untrusted_cert(browser, req)
|
2021-03-28 18:28:35 +02:00
|
|
|
error = f"Certificate has been changed ({url})."
|
|
|
|
elif req.state == Request.STATE_CONNECTION_FAILED:
|
2021-04-19 00:28:20 +02:00
|
|
|
error_details = ": " + req.error if req.error else "."
|
2021-03-28 18:28:35 +02:00
|
|
|
error = f"Connection failed ({url})" + error_details
|
|
|
|
else:
|
|
|
|
error = f"Connection failed ({url})."
|
|
|
|
browser.set_status_error(error)
|
2021-05-12 22:29:03 +02:00
|
|
|
return None
|
2021-03-28 18:28:35 +02:00
|
|
|
|
|
|
|
if req.state == Request.STATE_INVALID_CERT:
|
|
|
|
pass
|
|
|
|
elif req.state == Request.STATE_UNKNOWN_CERT:
|
2021-04-19 02:04:18 +02:00
|
|
|
# Certificate is valid but unknown: trust it permanently.
|
|
|
|
hostname = req.hostname
|
|
|
|
fingerprint = req.cert_validation["hash"]
|
|
|
|
trust_fingerprint(
|
|
|
|
browser.stash,
|
|
|
|
hostname,
|
|
|
|
"SHA-512",
|
|
|
|
fingerprint,
|
|
|
|
trust_always=True
|
|
|
|
)
|
2021-03-28 18:28:35 +02:00
|
|
|
|
|
|
|
data = req.proceed()
|
|
|
|
if not data:
|
|
|
|
browser.set_status_error(f"Server did not respond in time ({url}).")
|
2021-05-12 22:29:03 +02:00
|
|
|
return None
|
2021-03-28 18:28:35 +02:00
|
|
|
response = Response.parse(data)
|
|
|
|
if not response:
|
|
|
|
browser.set_status_error(f"Server response parsing failed ({url}).")
|
2021-05-12 22:29:03 +02:00
|
|
|
return None
|
2021-03-28 18:28:35 +02:00
|
|
|
|
2021-05-08 22:41:42 +02:00
|
|
|
return _handle_response(browser, response, url, redirects)
|
2021-04-19 00:28:20 +02:00
|
|
|
|
|
|
|
|
2021-04-19 02:04:18 +02:00
|
|
|
def _handle_untrusted_cert(browser: Browser, request: Request):
|
|
|
|
"""Handle a mismatch between known & server fingerprints.
|
|
|
|
|
|
|
|
This function formats an alert page to explain to the user what the hell is
|
|
|
|
going on and displays it.
|
|
|
|
"""
|
|
|
|
remote_fp = request.cert_validation["hash"]
|
|
|
|
local_fp = request.cert_validation["saved_hash"]
|
|
|
|
alert_page_source = WRONG_FP_ALERT.format(
|
|
|
|
hostname=request.hostname,
|
|
|
|
local_fp=local_fp,
|
|
|
|
remote_fp=remote_fp,
|
|
|
|
)
|
|
|
|
alert_page = Page.from_gemtext(
|
|
|
|
alert_page_source,
|
|
|
|
browser.config["text_width"]
|
|
|
|
)
|
|
|
|
browser.load_page(alert_page)
|
|
|
|
|
|
|
|
|
2021-05-12 22:29:03 +02:00
|
|
|
def _handle_response(
|
|
|
|
browser: Browser,
|
|
|
|
response: Response,
|
|
|
|
url: str,
|
|
|
|
redirects: int
|
|
|
|
) -> Optional[str]:
|
2021-05-08 22:41:42 +02:00
|
|
|
"""Handle a response from a Gemini server.
|
|
|
|
|
|
|
|
Returns:
|
2021-05-12 22:29:03 +02:00
|
|
|
The final URL on success, None otherwise.
|
2021-05-08 22:41:42 +02:00
|
|
|
"""
|
2021-05-13 01:25:50 +02:00
|
|
|
logging.info(f"Response {response.code} {response.meta}")
|
2021-03-28 18:28:35 +02:00
|
|
|
if response.code == 20:
|
2021-05-08 22:41:42 +02:00
|
|
|
return _handle_successful_response(browser, response, url)
|
2021-03-28 18:28:35 +02:00
|
|
|
elif response.generic_code == 30 and response.meta:
|
2021-05-12 22:29:03 +02:00
|
|
|
# On redirections, we go back to open_url as the redirection may be to
|
|
|
|
# another protocol. Discard the result of this request.
|
|
|
|
browser.open_url(
|
|
|
|
response.meta,
|
|
|
|
base_url=url,
|
|
|
|
redirects=redirects + 1
|
|
|
|
)
|
2021-03-28 18:28:35 +02:00
|
|
|
elif response.generic_code in (40, 50):
|
|
|
|
error = f"Server error: {response.meta or Response.code.name}"
|
|
|
|
browser.set_status_error(error)
|
|
|
|
elif response.generic_code == 10:
|
2021-05-12 22:29:03 +02:00
|
|
|
return _handle_input_request(browser, url, response.meta)
|
|
|
|
elif response.code == 60:
|
|
|
|
return _handle_cert_required(browser, response, url, redirects)
|
|
|
|
elif response.code in (61, 62):
|
|
|
|
details = response.meta or Response.code.name
|
|
|
|
error = f"Client certificate error: {details}"
|
|
|
|
browser.set_status_error(error)
|
2021-03-28 18:28:35 +02:00
|
|
|
else:
|
|
|
|
error = f"Unhandled response code {response.code}"
|
|
|
|
browser.set_status_error(error)
|
2021-05-12 22:29:03 +02:00
|
|
|
return None
|
2021-03-28 18:28:35 +02:00
|
|
|
|
|
|
|
|
2021-05-08 22:41:42 +02:00
|
|
|
def _handle_successful_response(browser: Browser, response: Response, url: str):
|
2021-04-17 22:59:54 +02:00
|
|
|
"""Handle a successful response content from a Gemini server.
|
2021-03-28 18:28:35 +02:00
|
|
|
|
2021-04-17 22:59:54 +02:00
|
|
|
According to the MIME type received or inferred, the response is either
|
|
|
|
rendered by the browser, or saved to disk. If an error occurs, the browser
|
|
|
|
displays it.
|
2021-03-28 18:28:35 +02:00
|
|
|
|
2021-04-17 22:59:54 +02:00
|
|
|
Only text content is rendered. For Gemini, the encoding specified in the
|
|
|
|
response is used, if available on the Python distribution. For other text
|
|
|
|
formats, only UTF-8 is attempted.
|
2021-03-28 18:28:35 +02:00
|
|
|
|
|
|
|
Arguments:
|
2021-04-17 22:59:54 +02:00
|
|
|
- browser: Browser instance that made the initial request.
|
|
|
|
- url: original URL.
|
2021-03-28 18:28:35 +02:00
|
|
|
- response: a successful Response.
|
2021-05-08 22:41:42 +02:00
|
|
|
|
|
|
|
Returns:
|
2021-05-12 22:29:03 +02:00
|
|
|
The successfully handled URL on success, None otherwise.
|
2021-03-28 18:28:35 +02:00
|
|
|
"""
|
2021-05-08 22:41:42 +02:00
|
|
|
# Use appropriate response parser according to the MIME type.
|
2021-03-28 18:28:35 +02:00
|
|
|
mime_type = response.get_mime_type()
|
2021-04-17 22:59:54 +02:00
|
|
|
page = None
|
|
|
|
error = None
|
|
|
|
filepath = None
|
2021-03-28 18:28:35 +02:00
|
|
|
if mime_type.main_type == "text":
|
|
|
|
if mime_type.sub_type == "gemini":
|
|
|
|
encoding = mime_type.charset
|
|
|
|
try:
|
|
|
|
text = response.content.decode(encoding, errors="replace")
|
|
|
|
except LookupError:
|
2021-04-17 22:59:54 +02:00
|
|
|
error = f"Unknown encoding {encoding}."
|
|
|
|
else:
|
2021-04-18 02:27:05 +02:00
|
|
|
page = Page.from_gemtext(text, browser.config["text_width"])
|
2021-03-28 18:28:35 +02:00
|
|
|
else:
|
2021-04-16 19:56:56 +02:00
|
|
|
text = response.content.decode("utf-8", errors="replace")
|
2021-04-17 22:59:54 +02:00
|
|
|
page = Page.from_text(text)
|
|
|
|
else:
|
2021-05-09 23:02:56 +02:00
|
|
|
download_dir = browser.config["download_path"]
|
|
|
|
filepath = _get_download_path(url, download_dir=download_dir)
|
2021-04-17 22:59:54 +02:00
|
|
|
|
2021-05-08 22:41:42 +02:00
|
|
|
# If a page has been produced, load it. Else if a file has been retrieved,
|
|
|
|
# download it.
|
2021-04-17 22:59:54 +02:00
|
|
|
if page:
|
|
|
|
browser.load_page(page)
|
|
|
|
browser.current_url = url
|
|
|
|
browser.cache[url] = page
|
|
|
|
browser.set_status(url)
|
2021-05-12 22:29:03 +02:00
|
|
|
return url
|
2021-04-17 22:59:54 +02:00
|
|
|
elif filepath:
|
|
|
|
try:
|
|
|
|
with open(filepath, "wb") as download_file:
|
|
|
|
download_file.write(response.content)
|
|
|
|
except OSError as exc:
|
|
|
|
browser.set_status_error(f"Failed to save {url} ({exc})")
|
|
|
|
else:
|
|
|
|
browser.set_status(f"Downloaded {url} ({mime_type.short}).")
|
2021-05-09 01:39:33 +02:00
|
|
|
browser.last_download = mime_type, filepath
|
2021-05-12 22:29:03 +02:00
|
|
|
return url
|
2021-04-17 22:59:54 +02:00
|
|
|
elif error:
|
|
|
|
browser.set_status_error(error)
|
2021-05-12 22:29:03 +02:00
|
|
|
return None
|
2021-04-17 22:59:54 +02:00
|
|
|
|
|
|
|
|
2021-05-09 23:02:56 +02:00
|
|
|
def _get_download_path(url: str, download_dir: Optional[str] =None) -> Path:
|
2021-04-17 22:59:54 +02:00
|
|
|
"""Try to find the best download file path possible from this URL."""
|
2021-05-09 23:02:56 +02:00
|
|
|
download_path = Path(download_dir) if download_dir else get_downloads_path()
|
|
|
|
if not download_path.exists():
|
|
|
|
download_path.mkdir(parents=True)
|
2021-04-17 22:59:54 +02:00
|
|
|
url_parts = url.rsplit("/", maxsplit=1)
|
|
|
|
if url_parts:
|
|
|
|
filename = url_parts[-1]
|
2021-03-28 18:28:35 +02:00
|
|
|
else:
|
2021-04-17 22:59:54 +02:00
|
|
|
filename = url.split("://")[1] if "://" in url else url
|
|
|
|
filename = filename.replace("/", "_")
|
2021-05-09 23:02:56 +02:00
|
|
|
return download_path / filename
|
2021-03-28 18:28:35 +02:00
|
|
|
|
|
|
|
|
2021-05-12 22:29:03 +02:00
|
|
|
def _handle_input_request(
|
|
|
|
browser: Browser,
|
|
|
|
from_url: str,
|
|
|
|
message: str =None
|
|
|
|
) -> Optional[str]:
|
|
|
|
"""Focus command-line to pass input to the server.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The result of `open_gemini_url` with the new request including user input.
|
|
|
|
"""
|
2021-03-28 18:28:35 +02:00
|
|
|
if message:
|
|
|
|
browser.set_status(f"Input needed: {message}")
|
|
|
|
else:
|
|
|
|
browser.set_status("Input needed:")
|
2021-04-19 00:28:20 +02:00
|
|
|
user_input = browser.command_line.focus(CommandLine.CHAR_TEXT)
|
2021-04-18 01:17:33 +02:00
|
|
|
if not user_input:
|
|
|
|
return
|
|
|
|
url = set_parameter(from_url, user_input)
|
2021-05-12 22:29:03 +02:00
|
|
|
return open_gemini_url(browser, url)
|
|
|
|
|
|
|
|
|
|
|
|
def _handle_cert_required(
|
|
|
|
browser: Browser,
|
|
|
|
response: Response,
|
|
|
|
url: str,
|
|
|
|
redirects: int
|
|
|
|
) -> Optional[str]:
|
|
|
|
"""Find a matching identity and resend the request with it.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The result of `open_gemini_url` with the client certificate provided.
|
|
|
|
"""
|
|
|
|
identities = load_identities(get_identities_list_path())
|
2021-05-13 01:24:29 +02:00
|
|
|
if not identities:
|
2021-05-16 01:30:00 +02:00
|
|
|
browser.set_status_error("Can't load identities.")
|
2021-05-12 22:29:03 +02:00
|
|
|
return None
|
2021-05-13 01:24:29 +02:00
|
|
|
browser.identities = identities
|
2021-05-12 22:29:03 +02:00
|
|
|
|
2021-05-13 01:24:29 +02:00
|
|
|
url_identities = get_identities_for_url(browser.identities, url)
|
2021-05-12 22:29:03 +02:00
|
|
|
if not url_identities:
|
|
|
|
identity = create_identity(browser, url)
|
|
|
|
if not identity:
|
|
|
|
return None
|
2021-05-13 01:24:29 +02:00
|
|
|
browser.identities[url] = [identity]
|
|
|
|
save_identities(browser.identities, get_identities_list_path())
|
2021-05-12 22:29:03 +02:00
|
|
|
else:
|
2021-05-13 01:24:29 +02:00
|
|
|
identity = select_identity(url_identities)
|
2021-05-12 22:29:03 +02:00
|
|
|
|
|
|
|
cert_path, key_path = get_cert_and_key(identity["id"])
|
|
|
|
return open_gemini_url(
|
|
|
|
browser,
|
|
|
|
url,
|
|
|
|
redirects=redirects + 1,
|
2021-05-13 01:24:29 +02:00
|
|
|
cert_and_key=(cert_path, key_path)
|
2021-05-12 22:29:03 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2021-05-13 01:24:29 +02:00
|
|
|
def select_identity(identities: list):
|
|
|
|
"""Let user select the appropriate identity among candidates."""
|
|
|
|
# TODO support multiple identities; for now we just use the first available.
|
|
|
|
return identities[0] if identities else None
|
|
|
|
|
|
|
|
|
2021-05-12 22:29:03 +02:00
|
|
|
def create_identity(browser: Browser, url: str):
|
|
|
|
"""Walk the user through identity creation.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The created identity on success (already registered in identities
|
|
|
|
"""
|
2021-05-16 00:40:21 +02:00
|
|
|
key = browser.prompt("Create client certificate?")
|
2021-05-12 22:29:03 +02:00
|
|
|
if key != "y":
|
|
|
|
browser.reset_status()
|
|
|
|
return None
|
|
|
|
|
|
|
|
common_name = browser.get_user_text_input(
|
|
|
|
"Name? The server will see this, you can leave it empty.",
|
|
|
|
CommandLine.CHAR_TEXT,
|
|
|
|
strip=True,
|
|
|
|
)
|
|
|
|
if not common_name:
|
|
|
|
browser.reset_status()
|
|
|
|
return None
|
|
|
|
|
|
|
|
browser.set_status("Generating certificate…")
|
|
|
|
try:
|
|
|
|
mangled_name = create_certificate(url, common_name)
|
|
|
|
except ClientCertificateException as exc:
|
|
|
|
browser.set_status_error(exc.message)
|
|
|
|
return None
|
|
|
|
|
|
|
|
browser.reset_status()
|
|
|
|
return {"name": common_name, "id": mangled_name}
|
2021-04-19 02:04:18 +02:00
|
|
|
|
|
|
|
|
|
|
|
def forget_certificate(browser: Browser, hostname: str):
|
|
|
|
"""Remove the fingerprint associated to this hostname for the cert stash."""
|
2021-05-16 00:40:21 +02:00
|
|
|
key = browser.prompt(f"Remove fingerprint for {hostname}?")
|
2021-04-19 02:04:18 +02:00
|
|
|
if key != "y":
|
|
|
|
browser.reset_status()
|
|
|
|
return
|
|
|
|
if untrust_fingerprint(browser.stash, hostname):
|
|
|
|
browser.set_status(f"Known certificate for {hostname} removed.")
|
|
|
|
else:
|
|
|
|
browser.set_status_error(f"Known certificate for {hostname} not found.")
|