From cba02bdd0b8d7c10b9d25a3052e60ef9670eccf8 Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Wed, 4 Jan 2023 00:42:33 +0100 Subject: Add fietscron script for maintenance --- CHANGELOG.rst | 1 + doc/administration.rst | 1 + doc/administration/cronjobs.rst | 29 ++++++++++++++++ fietsboek/scripts/fietscron.py | 75 +++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 1 + 5 files changed, 107 insertions(+) create mode 100644 doc/administration/cronjobs.rst create mode 100644 fietsboek/scripts/fietscron.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ecf48e4..8668b5c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,6 +11,7 @@ Added - The ability to load external (third party) language packs. - GPX metadata (track title, description & author) are now embedded into the actual GPX file. +- The ``fietscron`` maintenance script. Changed ^^^^^^^ diff --git a/doc/administration.rst b/doc/administration.rst index 7aa2393..706a4cb 100644 --- a/doc/administration.rst +++ b/doc/administration.rst @@ -9,6 +9,7 @@ Administration Guide administration/configuration administration/upgrading administration/backup + administration/cronjobs administration/custom-pages This guide contains information pertaining to the administration of a Fietsboek diff --git a/doc/administration/cronjobs.rst b/doc/administration/cronjobs.rst new file mode 100644 index 0000000..a7d1a95 --- /dev/null +++ b/doc/administration/cronjobs.rst @@ -0,0 +1,29 @@ +Cronjobs +======== + +There are some maintenance jobs that should be done at regular intervals to +keep the instance running smoothly: + +* Old, abandoned uploads should be deleted from the database to free up the + space of the temporary GPX files. +* Tracks that are not present in the cache should have their cache entry + populated by pre-calculating their length/uphill/downhill/… values. This + makes accessing those values faster when a user requests the track's details. + +Fietsboek comes with a script to deal with those jobs: ``fietscron``. You can +run it like this:: + + fietscron -c path_to_config.ini + +You can use any mechanism you like to run this command at pre-defined intervals +(e.g. once an hour). You can find information about this in various sources: + +* https://wiki.archlinux.org/title/Systemd/Timers +* https://wiki.archlinux.org/title/Cron +* https://wiki.gentoo.org/wiki/Cron + +For example, the classic *cron*-way to run ``fietscron`` once per hour would be +to add the following to your *crontab* (using ``crontab -e``):: + + 0 * * * * /path/to/venv/bin/fietscron -c /path/to/settings.ini + diff --git a/fietsboek/scripts/fietscron.py b/fietsboek/scripts/fietscron.py new file mode 100644 index 0000000..3b22ac2 --- /dev/null +++ b/fietsboek/scripts/fietscron.py @@ -0,0 +1,75 @@ +"""Script to do maintenance actions for fietsboek.""" +import datetime +import logging +import logging.config + +import click +import pyramid.paster +from sqlalchemy import create_engine, delete, exists, not_, select +from sqlalchemy.engine import Engine +from sqlalchemy.orm import Session + +from .. import config as mod_config +from .. import models +from ..data import DataManager + +LOGGER = logging.getLogger(__name__) + + +@click.command() +@click.option( + "-c", + "--config", + type=click.Path(exists=True, dir_okay=False), + required=True, + help="Path to the Fietsboek configuration file", +) +def cli(config): + """Runs regular maintenance operations on the instance. + + This does the following: + + \b + * Deletes pending uploads that are older than 24 hours. + * Rebuilds the cache for missing tracks. + """ + logging.config.fileConfig(config) + settings = pyramid.paster.get_appsettings(config) + + config = mod_config.parse(settings) + engine = create_engine(config.sqlalchemy_url) + + data_manager = DataManager(config.data_dir) + + LOGGER.debug("Starting maintenance tasks") + remove_old_uploads(engine) + rebuild_cache(engine, data_manager) + + +def remove_old_uploads(engine: Engine): + """Removes old uploads from the database.""" + LOGGER.info("Deleting old uploads") + limit = datetime.datetime.now() - datetime.timedelta(hours=24) + session = Session(engine) + stmt = delete(models.Upload).where(models.Upload.uploaded_at < limit) + session.execute(stmt) + session.commit() + + +def rebuild_cache(engine: Engine, data_manager: DataManager): + """Rebuilds the cache entries that are currently missing.""" + LOGGER.debug("Rebuilding caches") + session = Session(engine) + needed_rebuilds = select(models.Track).where( + not_(exists(select(models.TrackCache).where(models.TrackCache.track_id == models.Track.id))) + ) + with session: + for track in session.execute(needed_rebuilds).scalars(): + LOGGER.info("Rebuilding cache for track %d", track.id) + gpx_data = data_manager.open(track.id).decompress_gpx() + track.ensure_cache(gpx_data) + session.add(track) + session.commit() + + +__all__ = ["cli"] diff --git a/pyproject.toml b/pyproject.toml index 2ad5db5..01eb607 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,6 +92,7 @@ types-redis = "^4.3.21.6" [tool.poetry.scripts] fietsctl = "fietsboek.scripts.fietsctl:main" +fietscron = "fietsboek.scripts.fietscron:cli" fietsupdate = "fietsboek.updater.cli:cli" [tool.poetry.plugins."paste.app_factory"] -- cgit v1.2.3