From 253eb516875a34c14e2ca9f99614f8b9d1cfcc1a Mon Sep 17 00:00:00 2001 From: Earl Warren Date: Mon, 27 Jan 2025 09:59:38 +0100 Subject: [PATCH 1/3] fix: forgejo-runner-service.sh misses LXC config When re-creating the LXC container, the INPUTS_LXC_CONFIG must be set otherwise it will always use the default. --- examples/lxc-systemd/forgejo-runner-service.sh | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/examples/lxc-systemd/forgejo-runner-service.sh b/examples/lxc-systemd/forgejo-runner-service.sh index fa76c7a..224ffbd 100755 --- a/examples/lxc-systemd/forgejo-runner-service.sh +++ b/examples/lxc-systemd/forgejo-runner-service.sh @@ -159,13 +159,14 @@ function inside() { local name=$(lxc_name) lxc-helpers.sh lxc_container_run $name -- sudo --user $LXC_USER_NAME \ - INPUTS_SERIAL=$INPUTS_SERIAL \ - INPUTS_TOKEN=$INPUTS_TOKEN \ - INPUTS_FORGEJO=$INPUTS_FORGEJO \ - INPUTS_LIFETIME=$INPUTS_LIFETIME \ - KILL_AFTER=$KILL_AFTER \ - VERBOSE=$VERBOSE \ - HOST=$HOST \ + INPUTS_SERIAL="$INPUTS_SERIAL" \ + INPUTS_LXC_CONFIG="$INPUTS_LXC_CONFIG" \ + INPUTS_TOKEN="$INPUTS_TOKEN" \ + INPUTS_FORGEJO="$INPUTS_FORGEJO" \ + INPUTS_LIFETIME="$INPUTS_LIFETIME" \ + KILL_AFTER="$KILL_AFTER" \ + VERBOSE="$VERBOSE" \ + HOST="$HOST" \ $SELF_FILENAME "$@" } From 0232fe12552102eb6c4bb012078e44ba548b16e6 Mon Sep 17 00:00:00 2001 From: Earl Warren Date: Mon, 27 Jan 2025 12:02:45 +0100 Subject: [PATCH 2/3] fix: forgejo-runner-service.sh config.yml must point to the cache It is not the default $HOME/.cache/actcache and must be set in config.yml otherwise it won't be used. --- .forgejo/workflows/example-lxc-systemd.yml | 4 ++++ examples/lxc-systemd/forgejo-runner-service.sh | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/.forgejo/workflows/example-lxc-systemd.yml b/.forgejo/workflows/example-lxc-systemd.yml index c0c5a9e..d1d3327 100644 --- a/.forgejo/workflows/example-lxc-systemd.yml +++ b/.forgejo/workflows/example-lxc-systemd.yml @@ -82,6 +82,10 @@ jobs: service=/etc/systemd/system/forgejo-runner@.service cat $service + cache=/var/lib/forgejo-runner/runner-$INPUTS_SERIAL-lxc/.cache/actcache + touch $cache/something + lxc-attach runner-$INPUTS_SERIAL-lxc -- test -f $cache/something + - name: forgejo-runner-service.sh start / stop run: | set -x diff --git a/examples/lxc-systemd/forgejo-runner-service.sh b/examples/lxc-systemd/forgejo-runner-service.sh index 224ffbd..8bdc73d 100755 --- a/examples/lxc-systemd/forgejo-runner-service.sh +++ b/examples/lxc-systemd/forgejo-runner-service.sh @@ -207,6 +207,11 @@ function ensure_configuration_and_registration() { .runner.labels = ["docker:docker://data.forgejo.org/oci/node:${NODEJS_VERSION}-${DEBIAN_RELEASE}","lxc:lxc://debian:${DEBIAN_RELEASE}"] EOF yq --inplace --from-file $TMPDIR/edit-config $etc/config.yml + cat >$TMPDIR/edit-config < Date: Mon, 27 Jan 2025 14:17:15 +0100 Subject: [PATCH 3/3] feat: forgejo-runner-service.sh safeguard to avoid overheating re-creating the LXC container from scratch is expensive. When rebooting or when multiple units start at the same time, it may cause an execessive load. Use a global lock to guard this operation so they happen in sequence and not in parallel. They typically take around one minute to complete which means that in the event of a reboot, it will take around 1 minutes * the number of runners for the unlucky one to start. During this interval workflows will have to wait. --- examples/lxc-systemd/forgejo-runner-service.sh | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/examples/lxc-systemd/forgejo-runner-service.sh b/examples/lxc-systemd/forgejo-runner-service.sh index 8bdc73d..2ff51dc 100755 --- a/examples/lxc-systemd/forgejo-runner-service.sh +++ b/examples/lxc-systemd/forgejo-runner-service.sh @@ -31,6 +31,7 @@ SELF_FILENAME=$(basename "$SELF") ETC=/etc/forgejo-runner LIB=/var/lib/forgejo-runner LOG=/var/log/forgejo-runner +LOCK=/var/lock/forgejo-runner : ${HOST:=$(hostname)} LXC_IPV4_PREFIX="10.105.7" @@ -253,10 +254,19 @@ function daemon() { set -e } -function start() { +function destroy_and_create() { stop lxc-helpers.sh lxc_container_destroy $(lxc_name) lxc_create +} + +function start() { + # it should be more than + # (time it takes for one runner to be recreated) * (number of runners) + # because they will all start at the same time on boot + local timeout=3600 + + flock --timeout $timeout $LOCK $SELF destroy_and_create local log=$LOG/$INPUTS_SERIAL.log if test -f $log; then