diff options
| author | Daniel Schadt <kingdread@gmx.de> | 2025-11-16 00:45:22 +0100 |
|---|---|---|
| committer | Daniel Schadt <kingdread@gmx.de> | 2025-11-16 00:45:22 +0100 |
| commit | 5b311986e47d580fb149085be2bb524952ead319 (patch) | |
| tree | 5a1f4db9edb99b5eaa923065af1feb92053649f5 | |
| parent | 510164a381d426050128769f58f8aae8daed9007 (diff) | |
| download | fietsboek-5b311986e47d580fb149085be2bb524952ead319.tar.gz fietsboek-5b311986e47d580fb149085be2bb524952ead319.tar.bz2 fietsboek-5b311986e47d580fb149085be2bb524952ead319.zip | |
It turns out that adding multiple thousands of track points via the ORM
is quite slow, which is especially noticable in the browse_paged test
(as 50 tracks are added there).
This provides the fast_set_path method, which goes directly to the
database, utilizing the executemany() capability.
On the CI, the gpx-influx decreased test runtime from ~4min to ~18min.
This change should make it much faster again.
| -rw-r--r-- | fietsboek/actions.py | 9 | ||||
| -rw-r--r-- | fietsboek/models/track.py | 32 | ||||
| -rw-r--r-- | fietsboek/views/edit.py | 2 | ||||
| -rw-r--r-- | tests/integration/test_browse.py | 19 |
4 files changed, 51 insertions, 11 deletions
diff --git a/fietsboek/actions.py b/fietsboek/actions.py index b4bced3..bb6cafd 100644 --- a/fietsboek/actions.py +++ b/fietsboek/actions.py @@ -73,6 +73,10 @@ def add_track( # pylint: disable=too-many-positional-arguments,too-many-locals,too-many-arguments LOGGER.debug("Inserting new track...") track = convert.smart_convert(gpx_data) + path = track.path() + # We set the path later using fast_set_path. By setting the empty list + # here, we avoid doing an insert of many thousand points prematurely. + track.points = [] track.owner = owner track.title = title track.visibility = visibility @@ -89,7 +93,6 @@ def add_track( # Save the GPX data LOGGER.debug("Creating a new data folder for %d", track.id) assert track.id is not None - path = track.path() with data_manager.initialize(track.id) as manager: LOGGER.debug("Saving backup to %s", manager.backup_path()) manager.compress_backup(gpx_data) @@ -101,7 +104,7 @@ def add_track( [tfm.identifier(), tfm.parameters.model_dump()] for tfm in transformers ] - track.set_path(path) + track.fast_set_path(path) # Best time to build the cache is right after the upload, but *after* the # transformers have been applied! @@ -222,7 +225,7 @@ def execute_transformers(request: Request, track: models.Track): LOGGER.debug("Running %s with %r", transformer, transformer.parameters) transformer.execute(path) - track.set_path(path) + track.fast_set_path(path) LOGGER.debug("Saving new transformers on %d", track.id) track.transformers = serialized diff --git a/fietsboek/models/track.py b/fietsboek/models/track.py index 6e132ea..ab9d698 100644 --- a/fietsboek/models/track.py +++ b/fietsboek/models/track.py @@ -47,6 +47,9 @@ from sqlalchemy import ( LargeBinary, Table, Text, + delete, + insert, + inspect, select, ) from sqlalchemy.orm import Mapped, mapped_column, relationship @@ -375,6 +378,9 @@ class Track(Base): def set_path(self, path: geo.Path): """Sets this track's represented path to the given path. + If the track is in the database session, you can use + :meth:`fast_set_path`. + :param path: The new GPS path of this track. """ self.points = [ @@ -389,6 +395,32 @@ class Track(Base): for i, point in enumerate(path.points) ] + def fast_set_path(self, path: geo.Path): + """Sets this track's represented path to the given path. + + This method is faster than using :meth:`set_path`, as it does a bulk + insert of the path points. However, this requires the path to be in the + database and the session. + + :param path: The new GPS path of this track. + """ + session = inspect(self).session + assert session + points = [ + { + "track_id": self.id, + "index": index, + "longitude": p.longitude, + "latitude": p.latitude, + "elevation": p.elevation, + "time_offset": p.time_offset, + } + for index, p in enumerate(path.points) + ] + session.execute(delete(TrackPoint).where(TrackPoint.track_id == self.id)) + session.execute(insert(TrackPoint), points) + session.expire(self, ["points"]) + def path(self) -> geo.Path: """Returns the path of this track. diff --git a/fietsboek/views/edit.py b/fietsboek/views/edit.py index a40b8c2..d5e3a92 100644 --- a/fietsboek/views/edit.py +++ b/fietsboek/views/edit.py @@ -99,7 +99,7 @@ def do_edit(request): LOGGER.info("Could not parse gpx: %s", exc) return HTTPFound(request.route_url("edit", track_id=track.id)) data.compress_backup(gpx_bytes) - track.set_path(new_track.path()) + track.fast_set_path(new_track.path()) track.transformers = [] redo_cache = True diff --git a/tests/integration/test_browse.py b/tests/integration/test_browse.py index 4b38ddf..1994186 100644 --- a/tests/integration/test_browse.py +++ b/tests/integration/test_browse.py @@ -3,6 +3,8 @@ import zipfile from contextlib import contextmanager from datetime import datetime +from sqlalchemy import delete, inspect, text + from testutils import load_gpx_asset from fietsboek import convert, models from fietsboek.models.track import Visibility @@ -87,6 +89,7 @@ def a_lot_of_tracks(tm, dbsession, owner, data_manager): gpx_data = load_gpx_asset("MyTourbook_1.gpx.gz") skel = convert.smart_convert(gpx_data) + path = skel.path() tracks = [] track_ids = [] @@ -102,12 +105,12 @@ def a_lot_of_tracks(tm, dbsession, owner, data_manager): tagged_people=[], ) track.date = datetime(2022 - index, 3, 14, 9, 26, 59) - track.set_path(skel.path()) dbsession.add(track) dbsession.flush() - data_manager.initialize(track.id) + track.fast_set_path(path) tracks.append(track) track_ids.append(track.id) + data_manager.initialize(track.id) tm.begin() tm.doom() @@ -116,9 +119,11 @@ def a_lot_of_tracks(tm, dbsession, owner, data_manager): yield track_ids finally: tm.abort() + table = inspect(models.track.TrackPoint).tables[0] with tm: - for track in tracks: - dbsession.delete(track) + for track_id in track_ids: + dbsession.execute(delete(table).where(table.c.track_id == track_id)) + dbsession.execute(delete(models.Track).where(models.Track.id == track_id)) tm.begin() tm.doom() @@ -128,7 +133,7 @@ def test_browse(testapp, dbsession, route_path, logged_in, tm, data_manager): # Ensure there are some tracks in the database with added_tracks(tm, dbsession, logged_in, data_manager): # Now go to the browse page - browse = testapp.get(route_path('browse')) + browse = testapp.get(route_path("browse")) assert "Foobar" in browse.text assert "Barfoo" in browse.text @@ -164,13 +169,13 @@ def test_archive(testapp, dbsession, route_path, logged_in, tm, data_manager): with added_tracks(tm, dbsession, logged_in, data_manager) as tracks: archive = testapp.get( route_path( - 'track-archive', + "track-archive", _query=[("track_id[]", tracks[0]), ("track_id[]", tracks[1])], ) ) result = io.BytesIO(archive.body) - with zipfile.ZipFile(result, 'r') as zipped: + with zipfile.ZipFile(result, "r") as zipped: assert len(zipped.namelist()) == 2 assert f"track_{tracks[0]}.gpx" in zipped.namelist() assert f"track_{tracks[1]}.gpx" in zipped.namelist() |
