From 324fc8923b056bf985d4ae49a7de3ac028a89722 Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Sat, 22 Nov 2025 19:43:34 +0100 Subject: speed up track xml serialization The comment explains it. --- fietsboek/models/track.py | 81 ++++++++++++++++++++++++++++++++--------------- fietsboek/util.py | 22 +++++++++++++ 2 files changed, 78 insertions(+), 25 deletions(-) diff --git a/fietsboek/models/track.py b/fietsboek/models/track.py index 3bf7eee..5f1e081 100644 --- a/fietsboek/models/track.py +++ b/fietsboek/models/track.py @@ -17,6 +17,7 @@ meta information. import datetime import enum import gzip +import io import json import logging from itertools import chain @@ -435,34 +436,64 @@ class Track(Base): :return: The XML representation (a GPX file). """ - gpx = gpxpy.gpx.GPX() - gpx.description = self.description - gpx.name = self.title - segment = gpxpy.gpx.GPXTrackSegment() + # This is a cumbersome way to do it, as we're re-implementing XML + # serialization logic. However, recreating the track in gpxpy and + # letting it serialize it is much slower: + # For a track with around 50,000 points, the gpxpy method takes + # ~5.9 seconds here, while the "manual" buffer takes only ~2.4 seconds. + # This is a speed-up we're happy to take! + buf = io.BytesIO() + buf.write(b'') + buf.write(b'') + + buf.write(b"") + if self.title: + buf.write(b"%s" % util.xml_escape(self.title)) + if self.description: + buf.write(b"%s" % util.xml_escape(self.description)) + buf.write(b"") + + # Cache for easy access, especially the date is important since it's a + # dynamic property + date = self.date + write = buf.write + + write(b"") + write(b"") for point in self.path().points: - segment.points.append( - gpxpy.gpx.GPXTrackPoint( - latitude=point.latitude, - longitude=point.longitude, - elevation=point.elevation, - time=self.date + datetime.timedelta(seconds=point.time_offset), - ) - ) - track = gpxpy.gpx.GPXTrack() - track.segments.append(segment) - gpx.tracks.append(track) + write(b'') + write(b"") + write(str(point.elevation).encode("ascii")) + write(b"") + write(b"") + write(b"\n") + write(b"") + write(b"") + + # This loop is not as hot: for wpt in self.waypoints: - gpx.waypoints.append( - gpxpy.gpx.GPXWaypoint( - longitude=wpt.longitude, - latitude=wpt.latitude, - elevation=wpt.elevation, - name=wpt.name, - comment=wpt.description, - description=wpt.description, - ) + write( + b'' + % (util.xml_escape(str(wpt.latitude)), util.xml_escape(str(wpt.longitude))) ) - return gpx.to_xml(prettyprint=False).encode("utf-8") + if wpt.elevation is not None: + write(b"%s" % util.xml_escape(str(wpt.elevation))) + if wpt.name is not None: + write(b"%s" % util.xml_escape(wpt.name)) + if wpt.description is not None: + write(b"%s" % util.xml_escape(wpt.description)) + write(b"%s" % util.xml_escape(wpt.description)) + write(b"") + + write(b"") + + return buf.getvalue() @property def date(self): diff --git a/fietsboek/util.py b/fietsboek/util.py index 156b7d4..37e0943 100644 --- a/fietsboek/util.py +++ b/fietsboek/util.py @@ -64,6 +64,9 @@ _windows_device_files = ( ) +_valid_xml_chars = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,-: ") + + def safe_markdown(md_source: str) -> Markup: """Transform a markdown document into a safe HTML document. @@ -480,6 +483,24 @@ def recursive_size(path: Path) -> int: return size +def xml_escape(value: str) -> bytes: + """Escapes and encodes a string to be embedded in a XML document. + + This replaces characters like < and > with their entities. + + :param value: The value. + :return: The escaped and encoded string. + """ + return b"".join( + ( + char.encode("ascii") + if char in _valid_xml_chars + else b"&#x%s;" % hex(ord(char))[2:].encode("ascii") + ) + for char in value + ) + + __all__ = [ "ALLOWED_TAGS", "ALLOWED_ATTRIBUTES", @@ -505,4 +526,5 @@ __all__ = [ "encode_gpx", "secure_filename", "recursive_size", + "xml_escape", ] -- cgit v1.2.3