From 2712dfd79473745641c9b42410f5cf9d68e1df41 Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Tue, 14 Feb 2023 23:44:51 +0100 Subject: use requests Session for tile requests This allows us to 1. Get better performance by re-using Keep-Alive connections instead of re-opening one for each single tile and 2. Better honor the usage policies of the tile servers by not hammering them with many concurrent connections --- fietsboek/__init__.py | 6 ++++ fietsboek/views/tileproxy.py | 81 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 80 insertions(+), 7 deletions(-) diff --git a/fietsboek/__init__.py b/fietsboek/__init__.py index 5ca5a4b..98e7b99 100644 --- a/fietsboek/__init__.py +++ b/fietsboek/__init__.py @@ -89,6 +89,10 @@ def maintenance_mode( def main(_global_config, **settings): """This function returns a Pyramid WSGI application.""" + # Avoid a circular import by not importing at the top level + # pylint: disable=import-outside-toplevel,cyclic-import + from .views.tileproxy import TileRequester + parsed_config = mod_config.parse(settings) settings["jinja2.newstyle"] = True @@ -134,6 +138,8 @@ def main(_global_config, **settings): config.add_request_method(redis_, name="redis", reify=True) config.add_request_method(config_, name="config", reify=True) + config.registry.registerUtility(TileRequester()) + jinja2_env = config.get_jinja2_environment() jinja2_env.filters["format_decimal"] = mod_jinja2.filter_format_decimal jinja2_env.filters["format_datetime"] = mod_jinja2.filter_format_datetime diff --git a/fietsboek/views/tileproxy.py b/fietsboek/views/tileproxy.py index 87a742d..55d52cf 100644 --- a/fietsboek/views/tileproxy.py +++ b/fietsboek/views/tileproxy.py @@ -9,8 +9,9 @@ Additionally, this protects the users' IP, as only fietsboek can see it. import datetime import logging import random +import threading from itertools import chain -from typing import List +from typing import List, Optional import requests from pyramid.httpexceptions import HTTPBadRequest, HTTPGatewayTimeout @@ -18,6 +19,7 @@ from pyramid.request import Request from pyramid.response import Response from pyramid.view import view_config from requests.exceptions import ReadTimeout +from zope.interface import Interface, implementer from .. import __VERSION__ from ..config import Config, LayerAccess, LayerType, TileLayerConfig @@ -217,6 +219,71 @@ PUNISHMENT_TTL = datetime.timedelta(minutes=10) PUNISHMENT_THRESHOLD = 10 """Block a provider after that many requests have timed out.""" +MAX_CONCURRENT_CONNECTIONS = 2 +"""Maximum TCP connections per tile host.""" + +CONNECTION_CLOSE_TIMEOUT = datetime.timedelta(seconds=2) +"""Timeout after which keep-alive connections are killed. + +Note that new requests reset the timeout. +""" + + +class ITileRequester(Interface): # pylint: disable=inherit-non-class + """An interface to define the tile requester.""" + + def load_tile(self, url: str, headers: Optional[dict[str, str]] = None) -> requests.Response: + """Loads a tile at the given URL. + + :param url: The URL of the tile to load. + :param headers: Additional headers to send. + :return: The response. + """ + raise NotImplementedError() + + +@implementer(ITileRequester) +class TileRequester: # pylint: disable=too-few-public-methods + """Implementation of the tile requester using requests sessions. + + The benefit of this over doing ``requests.get`` is that we can re-use + connections, and we ensure that we comply with the use policy of the tile + servers by not hammering them with too many connections. + """ + + def __init__(self): + self.session = requests.Session() + adapter = requests.adapters.HTTPAdapter( + pool_maxsize=MAX_CONCURRENT_CONNECTIONS, + pool_block=True, + ) + self.session.mount("http://", adapter) + self.session.mount("https://", adapter) + self.lock = threading.Lock() + self.closer = None + + def load_tile(self, url: str, headers: Optional[dict[str, str]] = None) -> requests.Response: + """Implementation of :meth:`ITileRequester.load_tile`.""" + response = self.session.get(url, headers=headers, timeout=TIMEOUT.total_seconds()) + response.raise_for_status() + self._schedule_session_close() + return response + + def _schedule_session_close(self): + with self.lock: + if self.closer: + self.closer.cancel() + self.closer = threading.Timer( + CONNECTION_CLOSE_TIMEOUT.total_seconds(), + self._close_session, + ) + self.closer.start() + + def _close_session(self): + with self.lock: + self.closer = None + self.session.close() + @view_config(route_name="tile-proxy", http_cache=3600) def tile_proxy(request): @@ -227,6 +294,7 @@ def tile_proxy(request): :return: The HTTP response. :rtype: pyramid.response.Response """ + # pylint: disable=too-many-locals if request.config.disable_tile_proxy: raise HTTPBadRequest("Tile proxying is disabled") @@ -262,19 +330,18 @@ def tile_proxy(request): if from_mail: headers["from"] = from_mail + loader: ITileRequester = request.registry.getUtility(ITileRequester) try: - resp = requests.get(url, headers=headers, timeout=TIMEOUT.total_seconds()) + resp = loader.load_tile(url, headers=headers) except ReadTimeout: LOGGER.debug("Proxy timeout when accessing %r", url) request.redis.incr(timeout_tracker) request.redis.expire(timeout_tracker, PUNISHMENT_TTL) raise HTTPGatewayTimeout(f"No response in time from {provider}") from None + except requests.HTTPError as exc: + LOGGER.info("Proxy request failed for %s: %s", provider, exc) + return Response(f"Failed to get tile from {provider}", status_code=exc.response.status_code) else: - try: - resp.raise_for_status() - except requests.HTTPError as exc: - LOGGER.info("Proxy request failed for %s: %s", provider, exc) - return Response(f"Failed to get tile from {provider}", status_code=resp.status_code) request.redis.set(cache_key, resp.content, ex=TTL) return Response(resp.content, content_type=resp.headers.get("Content-type", content_type)) -- cgit v1.2.3 From 604a37524552eadcaba6a1eda393a24558ad5472 Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Tue, 14 Feb 2023 23:46:59 +0100 Subject: remove {abc} tile server prefix Those prefixes mainly exist to help browsers with their per-domain connection limit [1]: > Subdomains are used to help with browser parallel requests per domain limitation For us, we don't care about that limit, because we do not implement it (server side at least). It's even better to not have those domains, as otherwise our new connection limitation will get confused. We might think about adding them back if the tile proxy is disabled, but for now they seem to serve no purpose for us. [1]: https://wiki.openstreetmap.org/wiki/Raster_tile_providers --- fietsboek/views/tileproxy.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fietsboek/views/tileproxy.py b/fietsboek/views/tileproxy.py index 55d52cf..5ed79a5 100644 --- a/fietsboek/views/tileproxy.py +++ b/fietsboek/views/tileproxy.py @@ -39,7 +39,7 @@ DEFAULT_TILE_LAYERS = [ TileLayerConfig( layer_id="osm", name="OSM", - url="https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png", + url="https://tile.openstreetmap.org/{z}/{x}/{y}.png", layer_type=LayerType.BASE, zoom=19, access=LayerAccess.PUBLIC, @@ -74,7 +74,7 @@ DEFAULT_TILE_LAYERS = [ TileLayerConfig( layer_id="osmde", name="OSMDE", - url="https://{s}.tile.openstreetmap.de/tiles/osmde/{z}/{x}/{y}.png", + url="https://tile.openstreetmap.de/tiles/osmde/{z}/{x}/{y}.png", layer_type=LayerType.BASE, zoom=19, access=LayerAccess.PUBLIC, @@ -91,7 +91,7 @@ DEFAULT_TILE_LAYERS = [ TileLayerConfig( layer_id="opentopo", name="Open Topo", - url="https://{s}.tile.opentopomap.org/{z}/{x}/{y}.png", + url="https://tile.opentopomap.org/{z}/{x}/{y}.png", layer_type=LayerType.BASE, zoom=17, access=LayerAccess.PUBLIC, -- cgit v1.2.3