diff options
| -rw-r--r-- | fietsboek/models/track.py | 82 | ||||
| -rw-r--r-- | fietsboek/util.py | 40 | ||||
| -rw-r--r-- | fietsboek/views/detail.py | 20 | ||||
| -rw-r--r-- | tests/unit/test_util.py | 12 |
4 files changed, 99 insertions, 55 deletions
diff --git a/fietsboek/models/track.py b/fietsboek/models/track.py index 26de414..92ee978 100644 --- a/fietsboek/models/track.py +++ b/fietsboek/models/track.py @@ -17,12 +17,12 @@ meta information. import datetime import enum import gzip +import io import json import logging from itertools import chain from typing import TYPE_CHECKING, Optional -import gpxpy import sqlalchemy.types from babel.numbers import format_decimal from markupsafe import Markup @@ -435,34 +435,64 @@ class Track(Base): :return: The XML representation (a GPX file). """ - gpx = gpxpy.gpx.GPX() - gpx.description = self.description - gpx.name = self.title - segment = gpxpy.gpx.GPXTrackSegment() + # This is a cumbersome way to do it, as we're re-implementing XML + # serialization logic. However, recreating the track in gpxpy and + # letting it serialize it is much slower: + # For a track with around 50,000 points, the gpxpy method takes + # ~5.9 seconds here, while the "manual" buffer takes only ~2.4 seconds. + # This is a speed-up we're happy to take! + buf = io.BytesIO() + buf.write(b'<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>') + buf.write(b'<gpx version="1.1" xmlns="http://www.topografix.com/GPX/1/1">') + + buf.write(b"<metadata>") + if self.title: + buf.write(b"<name>%s</name>" % util.xml_escape(self.title)) + if self.description: + buf.write(b"<desc>%s</desc>" % util.xml_escape(self.description)) + buf.write(b"</metadata>") + + # Cache for easy access, especially the date is important since it's a + # dynamic property + date = self.date + write = buf.write + + write(b"<trk>") + write(b"<trkseg>") for point in self.path().points: - segment.points.append( - gpxpy.gpx.GPXTrackPoint( - latitude=point.latitude, - longitude=point.longitude, - elevation=point.elevation, - time=self.date + datetime.timedelta(seconds=point.time_offset), - ) - ) - track = gpxpy.gpx.GPXTrack() - track.segments.append(segment) - gpx.tracks.append(track) + write(b'<trkpt lat="') + write(str(point.latitude).encode("ascii")) + write(b'" lon="') + write(str(point.longitude).encode("ascii")) + write(b'">') + write(b"<ele>") + write(str(point.elevation).encode("ascii")) + write(b"</ele>") + write(b"<time>") + write(str(date + datetime.timedelta(seconds=point.time_offset)).encode("ascii")) + write(b"</time>") + write(b"</trkpt>\n") + write(b"</trkseg>") + write(b"</trk>") + + # This loop is not as hot: for wpt in self.waypoints: - gpx.waypoints.append( - gpxpy.gpx.GPXWaypoint( - longitude=wpt.longitude, - latitude=wpt.latitude, - elevation=wpt.elevation, - name=wpt.name, - comment=wpt.description, - description=wpt.description, - ) + write( + b'<wpt lat="%s" lon="%s">' + % (util.xml_escape(str(wpt.latitude)), util.xml_escape(str(wpt.longitude))) ) - return gpx.to_xml(prettyprint=False).encode("utf-8") + if wpt.elevation is not None: + write(b"<ele>%s</ele>" % util.xml_escape(str(wpt.elevation))) + if wpt.name is not None: + write(b"<name>%s</name>" % util.xml_escape(wpt.name)) + if wpt.description is not None: + write(b"<cmt>%s</cmt>" % util.xml_escape(wpt.description)) + write(b"<desc>%s</desc>" % util.xml_escape(wpt.description)) + write(b"</wpt>") + + write(b"</gpx>") + + return buf.getvalue() @property def date(self): diff --git a/fietsboek/util.py b/fietsboek/util.py index 156b7d4..ecb3f43 100644 --- a/fietsboek/util.py +++ b/fietsboek/util.py @@ -1,7 +1,6 @@ """Various utility functions.""" import datetime -import html import importlib.resources import os import re @@ -64,6 +63,9 @@ _windows_device_files = ( ) +_valid_xml_chars = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,-: ") + + def safe_markdown(md_source: str) -> Markup: """Transform a markdown document into a safe HTML document. @@ -406,22 +408,6 @@ def tile_url(request: Request, route_name: str, **kwargs: str) -> str: return route.replace("__x__", "{x}").replace("__y__", "{y}").replace("__z__", "{z}") -def encode_gpx(gpx: gpxpy.gpx.GPX) -> bytes: - """Encodes a GPX in-memory representation to the XML representation. - - This ensures that the resulting XML file is valid. - - Returns the contents of the XML as bytes, ready to be written to disk. - - :param gpx: The GPX file to encode. Might be modified! - :return: The encoded GPX content. - """ - for track in gpx.tracks: - if track.link: - track.link = html.escape(track.link) - return gpx.to_xml().encode("utf-8") - - def secure_filename(filename: str) -> str: r"""Pass it a filename and it will return a secure version of it. This filename can then safely be stored on a regular file system and passed @@ -480,6 +466,24 @@ def recursive_size(path: Path) -> int: return size +def xml_escape(value: str) -> bytes: + """Escapes and encodes a string to be embedded in a XML document. + + This replaces characters like < and > with their entities. + + :param value: The value. + :return: The escaped and encoded string. + """ + return b"".join( + ( + char.encode("ascii") + if char in _valid_xml_chars + else b"&#x%s;" % hex(ord(char))[2:].encode("ascii") + ) + for char in value + ) + + __all__ = [ "ALLOWED_TAGS", "ALLOWED_ATTRIBUTES", @@ -502,7 +506,7 @@ __all__ = [ "locale_display_name", "list_locales", "tile_url", - "encode_gpx", "secure_filename", "recursive_size", + "xml_escape", ] diff --git a/fietsboek/views/detail.py b/fietsboek/views/detail.py index 16c896a..ca3a0af 100644 --- a/fietsboek/views/detail.py +++ b/fietsboek/views/detail.py @@ -6,7 +6,6 @@ import io import logging from html.parser import HTMLParser -import brotli from markupsafe import Markup from pyramid.httpexceptions import ( HTTPFound, @@ -134,18 +133,17 @@ def gpx(request): wanted_filename = f"{track.id}.gpx" content_disposition = f'attachment; filename="{wanted_filename}"' gpx_data = track.gpx_xml() - # We can be nice to the client if they support it, and deliver the gzipped - # data straight. This saves decompression time on the server and saves a - # lot of bandwidth. - accepted = request.accept_encoding.acceptable_offers(["br", "gzip", "identity"]) + # We used to offer brotli compression here as well, but brotli is too + # inefficient to do on-the-fly. That was only useful while we had the data + # stored brotli-compressed. For comparison, a track with ~50k points: + # identity -- 2.4s -- 5.49 MiB + # gzip -- 2.5s -- 657.53 KiB + # brotli -- 12s -- 389.45 KiB + # So yes, brotli does give better compression than gzip, but the time is + # not worth paying for. + accepted = request.accept_encoding.acceptable_offers(["gzip", "identity"]) for encoding, _qvalue in accepted: - if encoding == "br": - data = brotli.compress(gpx_data) - response = Response(data, content_type="application/gpx+xml", content_encoding="br") - break if encoding == "gzip": - # gzip'ed GPX files are so much smaller than uncompressed ones, it - # is worth re-compressing them for the client data = gzip.compress(gpx_data) response = Response(data, content_type="application/gpx+xml", content_encoding="gzip") break diff --git a/tests/unit/test_util.py b/tests/unit/test_util.py index 0ecfdb2..cc92058 100644 --- a/tests/unit/test_util.py +++ b/tests/unit/test_util.py @@ -99,3 +99,15 @@ def test_tile_url(app_request): assert "{y}" in route_url assert "{z}" in route_url assert "bobby" in route_url + + +@pytest.mark.parametrize("value, expected", [ + ("", b""), + ("foo", b"foo"), + ("<foo>", b"<foo>"), + ("foo bar", b"foo bar"), + ("</gpx>", b"</gpx>"), + ("äÖß", b"äÖß"), +]) +def test_xml_escape(value, expected): + assert util.xml_escape(value) == expected |
