aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fietsboek/models/track.py82
-rw-r--r--fietsboek/util.py40
-rw-r--r--fietsboek/views/detail.py20
-rw-r--r--tests/unit/test_util.py12
4 files changed, 99 insertions, 55 deletions
diff --git a/fietsboek/models/track.py b/fietsboek/models/track.py
index 26de414..92ee978 100644
--- a/fietsboek/models/track.py
+++ b/fietsboek/models/track.py
@@ -17,12 +17,12 @@ meta information.
import datetime
import enum
import gzip
+import io
import json
import logging
from itertools import chain
from typing import TYPE_CHECKING, Optional
-import gpxpy
import sqlalchemy.types
from babel.numbers import format_decimal
from markupsafe import Markup
@@ -435,34 +435,64 @@ class Track(Base):
:return: The XML representation (a GPX file).
"""
- gpx = gpxpy.gpx.GPX()
- gpx.description = self.description
- gpx.name = self.title
- segment = gpxpy.gpx.GPXTrackSegment()
+ # This is a cumbersome way to do it, as we're re-implementing XML
+ # serialization logic. However, recreating the track in gpxpy and
+ # letting it serialize it is much slower:
+ # For a track with around 50,000 points, the gpxpy method takes
+ # ~5.9 seconds here, while the "manual" buffer takes only ~2.4 seconds.
+ # This is a speed-up we're happy to take!
+ buf = io.BytesIO()
+ buf.write(b'<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>')
+ buf.write(b'<gpx version="1.1" xmlns="http://www.topografix.com/GPX/1/1">')
+
+ buf.write(b"<metadata>")
+ if self.title:
+ buf.write(b"<name>%s</name>" % util.xml_escape(self.title))
+ if self.description:
+ buf.write(b"<desc>%s</desc>" % util.xml_escape(self.description))
+ buf.write(b"</metadata>")
+
+ # Cache for easy access, especially the date is important since it's a
+ # dynamic property
+ date = self.date
+ write = buf.write
+
+ write(b"<trk>")
+ write(b"<trkseg>")
for point in self.path().points:
- segment.points.append(
- gpxpy.gpx.GPXTrackPoint(
- latitude=point.latitude,
- longitude=point.longitude,
- elevation=point.elevation,
- time=self.date + datetime.timedelta(seconds=point.time_offset),
- )
- )
- track = gpxpy.gpx.GPXTrack()
- track.segments.append(segment)
- gpx.tracks.append(track)
+ write(b'<trkpt lat="')
+ write(str(point.latitude).encode("ascii"))
+ write(b'" lon="')
+ write(str(point.longitude).encode("ascii"))
+ write(b'">')
+ write(b"<ele>")
+ write(str(point.elevation).encode("ascii"))
+ write(b"</ele>")
+ write(b"<time>")
+ write(str(date + datetime.timedelta(seconds=point.time_offset)).encode("ascii"))
+ write(b"</time>")
+ write(b"</trkpt>\n")
+ write(b"</trkseg>")
+ write(b"</trk>")
+
+ # This loop is not as hot:
for wpt in self.waypoints:
- gpx.waypoints.append(
- gpxpy.gpx.GPXWaypoint(
- longitude=wpt.longitude,
- latitude=wpt.latitude,
- elevation=wpt.elevation,
- name=wpt.name,
- comment=wpt.description,
- description=wpt.description,
- )
+ write(
+ b'<wpt lat="%s" lon="%s">'
+ % (util.xml_escape(str(wpt.latitude)), util.xml_escape(str(wpt.longitude)))
)
- return gpx.to_xml(prettyprint=False).encode("utf-8")
+ if wpt.elevation is not None:
+ write(b"<ele>%s</ele>" % util.xml_escape(str(wpt.elevation)))
+ if wpt.name is not None:
+ write(b"<name>%s</name>" % util.xml_escape(wpt.name))
+ if wpt.description is not None:
+ write(b"<cmt>%s</cmt>" % util.xml_escape(wpt.description))
+ write(b"<desc>%s</desc>" % util.xml_escape(wpt.description))
+ write(b"</wpt>")
+
+ write(b"</gpx>")
+
+ return buf.getvalue()
@property
def date(self):
diff --git a/fietsboek/util.py b/fietsboek/util.py
index 156b7d4..ecb3f43 100644
--- a/fietsboek/util.py
+++ b/fietsboek/util.py
@@ -1,7 +1,6 @@
"""Various utility functions."""
import datetime
-import html
import importlib.resources
import os
import re
@@ -64,6 +63,9 @@ _windows_device_files = (
)
+_valid_xml_chars = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,-: ")
+
+
def safe_markdown(md_source: str) -> Markup:
"""Transform a markdown document into a safe HTML document.
@@ -406,22 +408,6 @@ def tile_url(request: Request, route_name: str, **kwargs: str) -> str:
return route.replace("__x__", "{x}").replace("__y__", "{y}").replace("__z__", "{z}")
-def encode_gpx(gpx: gpxpy.gpx.GPX) -> bytes:
- """Encodes a GPX in-memory representation to the XML representation.
-
- This ensures that the resulting XML file is valid.
-
- Returns the contents of the XML as bytes, ready to be written to disk.
-
- :param gpx: The GPX file to encode. Might be modified!
- :return: The encoded GPX content.
- """
- for track in gpx.tracks:
- if track.link:
- track.link = html.escape(track.link)
- return gpx.to_xml().encode("utf-8")
-
-
def secure_filename(filename: str) -> str:
r"""Pass it a filename and it will return a secure version of it. This
filename can then safely be stored on a regular file system and passed
@@ -480,6 +466,24 @@ def recursive_size(path: Path) -> int:
return size
+def xml_escape(value: str) -> bytes:
+ """Escapes and encodes a string to be embedded in a XML document.
+
+ This replaces characters like < and > with their entities.
+
+ :param value: The value.
+ :return: The escaped and encoded string.
+ """
+ return b"".join(
+ (
+ char.encode("ascii")
+ if char in _valid_xml_chars
+ else b"&#x%s;" % hex(ord(char))[2:].encode("ascii")
+ )
+ for char in value
+ )
+
+
__all__ = [
"ALLOWED_TAGS",
"ALLOWED_ATTRIBUTES",
@@ -502,7 +506,7 @@ __all__ = [
"locale_display_name",
"list_locales",
"tile_url",
- "encode_gpx",
"secure_filename",
"recursive_size",
+ "xml_escape",
]
diff --git a/fietsboek/views/detail.py b/fietsboek/views/detail.py
index 16c896a..ca3a0af 100644
--- a/fietsboek/views/detail.py
+++ b/fietsboek/views/detail.py
@@ -6,7 +6,6 @@ import io
import logging
from html.parser import HTMLParser
-import brotli
from markupsafe import Markup
from pyramid.httpexceptions import (
HTTPFound,
@@ -134,18 +133,17 @@ def gpx(request):
wanted_filename = f"{track.id}.gpx"
content_disposition = f'attachment; filename="{wanted_filename}"'
gpx_data = track.gpx_xml()
- # We can be nice to the client if they support it, and deliver the gzipped
- # data straight. This saves decompression time on the server and saves a
- # lot of bandwidth.
- accepted = request.accept_encoding.acceptable_offers(["br", "gzip", "identity"])
+ # We used to offer brotli compression here as well, but brotli is too
+ # inefficient to do on-the-fly. That was only useful while we had the data
+ # stored brotli-compressed. For comparison, a track with ~50k points:
+ # identity -- 2.4s -- 5.49 MiB
+ # gzip -- 2.5s -- 657.53 KiB
+ # brotli -- 12s -- 389.45 KiB
+ # So yes, brotli does give better compression than gzip, but the time is
+ # not worth paying for.
+ accepted = request.accept_encoding.acceptable_offers(["gzip", "identity"])
for encoding, _qvalue in accepted:
- if encoding == "br":
- data = brotli.compress(gpx_data)
- response = Response(data, content_type="application/gpx+xml", content_encoding="br")
- break
if encoding == "gzip":
- # gzip'ed GPX files are so much smaller than uncompressed ones, it
- # is worth re-compressing them for the client
data = gzip.compress(gpx_data)
response = Response(data, content_type="application/gpx+xml", content_encoding="gzip")
break
diff --git a/tests/unit/test_util.py b/tests/unit/test_util.py
index 0ecfdb2..cc92058 100644
--- a/tests/unit/test_util.py
+++ b/tests/unit/test_util.py
@@ -99,3 +99,15 @@ def test_tile_url(app_request):
assert "{y}" in route_url
assert "{z}" in route_url
assert "bobby" in route_url
+
+
+@pytest.mark.parametrize("value, expected", [
+ ("", b""),
+ ("foo", b"foo"),
+ ("<foo>", b"&#x3c;foo&#x3e;"),
+ ("foo bar", b"foo bar"),
+ ("</gpx>", b"&#x3c;&#x2f;gpx&#x3e;"),
+ ("äÖß", b"&#xe4;&#xd6;&#xdf;"),
+])
+def test_xml_escape(value, expected):
+ assert util.xml_escape(value) == expected