From 5f92ffdbc6ee67ce710ec711809d10a25270acd5 Mon Sep 17 00:00:00 2001 From: Earl Warren Date: Sat, 25 Jan 2025 12:24:31 +0000 Subject: [PATCH] feat: systemd unit example for managing LXC containers (#451) Refs: https://code.forgejo.org/forgejo/runner/issues/450 Reviewed-on: https://code.forgejo.org/forgejo/runner/pulls/451 Reviewed-by: Michael Kriese Co-authored-by: Earl Warren Co-committed-by: Earl Warren --- .forgejo/workflows/example-lxc-systemd.yml | 132 ++++++++ examples/README.md | 4 +- examples/lxc-systemd/README.md | 66 ++++ .../lxc-systemd/forgejo-runner-service.sh | 318 ++++++++++++++++++ 4 files changed, 518 insertions(+), 2 deletions(-) create mode 100644 .forgejo/workflows/example-lxc-systemd.yml create mode 100644 examples/lxc-systemd/README.md create mode 100755 examples/lxc-systemd/forgejo-runner-service.sh diff --git a/.forgejo/workflows/example-lxc-systemd.yml b/.forgejo/workflows/example-lxc-systemd.yml new file mode 100644 index 0000000..c0c5a9e --- /dev/null +++ b/.forgejo/workflows/example-lxc-systemd.yml @@ -0,0 +1,132 @@ +# SPDX-License-Identifier: MIT +on: + push: + branches: + - 'main' + pull_request: + +env: + SERIAL: "30" + LIFETIME: "60" + SYSTEMD_OPTIONS: "--no-pager --full" + +jobs: + example-lxc-systemd: + if: github.repository_owner != 'forgejo-integration' && github.repository_owner != 'forgejo-experimental' && github.repository_owner != 'forgejo-release' + runs-on: lxc-bookworm + steps: + - uses: https://data.forgejo.org/actions/checkout@v4 + + - name: forgejo-runner-service.sh dependencies + # run before setup-forgejo because it installs LXC and + # this would do nothing (false positive if a bug sneaks in) + run: | + set -x + cd examples/lxc-systemd + VERBOSE=true ./forgejo-runner-service.sh dependencies + lxc-ls + + - id: forgejo + uses: https://data.forgejo.org/actions/setup-forgejo@v2.0.7 + with: + user: root + password: admin1234 + binary: https://code.forgejo.org/forgejo/forgejo/releases/download/v7.0.12/forgejo-7.0.12-linux-amd64 + # must be the same as LXC_IPV4_PREFIX in examples/lxc-systemd/forgejo-runner-service.sh + lxc-ip-prefix: 10.105.7 + + - name: forgejo-runner-service.sh env + run: | + set -x + # this Forgejo instance needs to be reachable from within the LXC + # container created by forgejo-runner-service.sh + url=http://root:admin1234@${{ steps.forgejo.outputs.host-port }} + docker ps --all + export PATH=$(dirname /tmp/*/forgejocli):$PATH + token=$(su -c 'forgejocli -- actions generate-runner-token' forgejo) + cat > /tmp/env <> $env + + service=/etc/systemd/system/forgejo-runner@.service + cat $service + + - name: forgejo-runner-service.sh start / stop + run: | + set -x + serial=${{ env.SERIAL }} + all="${{ env.SYSTEMD_OPTIONS }}" + + systemctl start forgejo-runner@$serial + systemctl $all status forgejo-runner@$serial + started_running=/etc/forgejo-runner/$serial/started-running + killed_gracefully=/etc/forgejo-runner/$serial/killed-gracefully + stopped_gracefully=/etc/forgejo-runner/$serial/stopped-gracefully + retry --delay 5 --times 20 cp -a $started_running /tmp/first-run + retry --delay 1 --times 30 grep --quiet 'Starting runner daemon' /var/log/forgejo-runner/$serial.log + systemctl stop forgejo-runner@$serial + ! systemctl $all status forgejo-runner@$serial + ls -l /etc/forgejo-runner/$serial + test -f $killed_gracefully + test -f $stopped_gracefully + + systemctl start forgejo-runner@$serial + retry --delay 5 --times 20 cp -a $started_running /tmp/second-run + ! test -f $killed_gracefully + ! test -f $stopped_gracefully + lifetime=${{ env.LIFETIME }} + # give it time to restart at least once + ls -l /etc/forgejo-runner/$serial + sleep $lifetime ; sleep $lifetime + ls -l /etc/forgejo-runner/$serial + ! test -f $killed_gracefully + ! test -f $stopped_gracefully + retry --delay 5 --times 20 cp -a $started_running /tmp/third-run + systemctl stop forgejo-runner@$serial + ls -l /etc/forgejo-runner/$serial + test -f $killed_gracefully + test -f $stopped_gracefully + + ls -l /tmp/*-run + test /tmp/first-run -ot /tmp/second-run + test /tmp/second-run -ot /tmp/third-run + + - name: forgejo-runner-service.sh status & destroy + if: always() + run: | + eval $(cat /tmp/env) + set -x + cat /var/log/forgejo-runner/${{ env.SERIAL }}.log || true + journalctl ${{ env.SYSTEMD_OPTIONS }} --unit forgejo-runner@${{ env.SERIAL }} || true + ./examples/lxc-systemd/forgejo-runner-service.sh lxc_destroy diff --git a/examples/README.md b/examples/README.md index f9dd774..c29abb0 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,10 +1,10 @@ This directory contains a collection of usage and deployment examples. -Workflow examples can be found [in the documentation](https://forgejo.org/docs/next/user/actions/) -and in the [sources of the setup-forgejo](https://code.forgejo.org/actions/setup-forgejo/src/branch/main/testdata) action. +Workflow examples can be found [in the documentation](https://forgejo.org/docs/next/user/actions/). | Section | Description | |-----------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | [`docker`](docker) | using the host docker server by mounting the socket | +| [`LXC systemd`](lxc-systemd) | systemd unit managing LXC containers dedicated to a single runner | | [`docker-compose`](docker-compose) | all in one docker-compose with the Forgejo server, the runner and docker in docker | | [`kubernetes`](kubernetes) | a sample deployment for the Forgejo runner | diff --git a/examples/lxc-systemd/README.md b/examples/lxc-systemd/README.md new file mode 100644 index 0000000..b039ee8 --- /dev/null +++ b/examples/lxc-systemd/README.md @@ -0,0 +1,66 @@ +forgejo-runner-service.sh installs a [Forgejo runner](https://forgejo.org/docs/next/admin/runner-installation/) within an [LXC container](https://linuxcontainers.org/lxc/) and runs it from a systemd service. + +## Quickstart + +- Install: `sudo wget -O /usr/local/bin/forgejo-runner-service.sh https://code.forgejo.org/forgejo/runner/raw/branch/main/examples/lxc-systemd/forgejo-runner-service.sh && sudo chmod +x /usr/local/bin/forgejo-runner-service.sh` +- Obtain a runner registration token ($TOKEN) +- Choose a serial number that is not already in use in `/etc/forgejo-runner` +- Create a runner `INPUTS_SERIAL=30 INPUTS_TOKEN=$TOKEN INPUTS_FORGEJO=https://code.forgejo.org forgejo-runner-service.sh` +- Start `systemctl enable --now forgejo-runner@$INPUTS_SERIAL` +- Monitor with: + - `systemctl status forgejo-runner@$INPUTS_SERIAL` + - `tail --follow=name /var/log/forgejo-runner/$INPUTS_SERIAL.log` + +## Installation or upgrade + +- `sudo wget -O /usr/local/bin/forgejo-runner-service.sh https://code.forgejo.org/forgejo/runner/raw/branch/main/examples/lxc-systemd/forgejo-runner-service.sh && sudo chmod +x /usr/local/bin/forgejo-runner-service.sh` + +## Description + +- Each runner is assigned a unique serial number (`$INPUTS_SERIAL`) +- The configuration is in `/etc/forgejo-runner/$INPUTS_SERIAL` +- The environment variables are in `/etc/forgejo-runner/$INPUTS_SERIAL/env` +- The cache is in `/var/lib/forgejo-runner/runner-$INPUTS_SERIAL` +- The systemd service unit is `forgejo-runner@$INPUTS_SERIAL` +- The logs of the runner daemon are in `/var/log/forgejo-runner/$INPUTS_SERIAL.log` + +## How it works + +- Creating a runner (for instance with `INPUTS_SERIAL=30 INPUTS_TOKEN=$TOKEN INPUTS_FORGEJO=https://code.forgejo.org forgejo-runner-service.sh`) will: + - use `$INPUTS_TOKEN` to register on `$INPUTS_FORGEJO` and save the result in the `/etc/forgejo-runner/$INPUTS_SERIAL/.runner` file + - generate a default configuration file in the `/etc/forgejo-runner/$INPUTS_SERIAL/config.yml` file which can then be manually edited +- Each runner is launched in a dedicated LXC container named `runner-$INPUTS_SERIAL-lxc` with the following bind mounts: + - `/etc/forgejo-runner/$INPUTS_SERIAL` + - `/var/lib/forgejo-runner/runner-$INPUTS_SERIAL/.cache/actcache` +- `systemctl start forgejo-runner@$INPUTS_SERIAL` will do the following when it starts and every `$INPUTS_LIFETIME` interval after that: + - attempt to gracefully stop (SIGTERM) the runner, waiting for all jobs to complete + - forcibly kill the runner if it does not stop within 6h + - shutdown the LXC container and delete it (the volumes bind mounted are preserved) + - create a brand new LXC container (with the specified `$INPUTS_LXC_CONFIG`) + - install and run a Forgejo runner daemon in the LXC container using `/etc/forgejo-runner/$INPUTS_SERIAL/config.yml` + - redirect the output of the runner to `/var/log/forgejo-runner/$INPUTS_SERIAL.log` +- `systemctl stop forgejo-runner@$INPUTS_SERIAL` will stop the runner but keep the LXC container running + +## Creation + +The creation of a new runner is driven by the following environment variables: + +- `INPUTS_SERIAL`: unique number in the range `[10-100]` (check `/etc/forgejo-runner`) +- `INPUTS_TOKEN`: a runner registration token obtained from the web UI +- `INPUTS_FORGEJO`: the Forgejo instance from which `INPUTS_TOKEN` was obtained (e.g. https://code.forgejo.org) +- `INPUTS_RUNNER_VERSION`: the version of the Forgejo runner as found in https://code.forgejo.org/forgejo/runner/releases (e.g. 6.2.0) +- `INPUTS_LXC_CONFIG`: the value of the `--config` argument of [lxc-helpers](https://code.forgejo.org/forgejo/lxc-helpers/#usage) used when creating the LXC container for the runner (e.g. `docker`) +- `INPUTS_LIFETIME`: the LXC container is re-created when its lifetime expires (e.g. 7d) + +## Hacking + +- An existing LXC configuration will not be modified. If `lxc-ls` exists, it is assumed that LXC is configured and ready to be used. +- Migrating an existing runner: + ```sh + serial=10 + mkdir /etc/forgejo-runner/$serial + cp .runner config.yml /etc/forgejo-runner/$serial + INPUTS_SERIAL=$serial INPUTS_FORGEJO=https://code.forgejo.org forgejo-runner-service.sh + systemctl status forgejo-runner@$serial + ``` +- Set debug by adding `VERBOSE=true` in `/etc/forgejo-runner/$INPUTS_SERIAL/env` diff --git a/examples/lxc-systemd/forgejo-runner-service.sh b/examples/lxc-systemd/forgejo-runner-service.sh new file mode 100755 index 0000000..fa76c7a --- /dev/null +++ b/examples/lxc-systemd/forgejo-runner-service.sh @@ -0,0 +1,318 @@ +#!/bin/bash +# Copyright Forgejo Authors. +# SPDX-License-Identifier: MIT + +set -o pipefail + +: ${TMPDIR:=$(mktemp -d)} + +export -n TMPDIR + +if ! test -d "$TMPDIR"; then + echo "TMPDIR=$TMPDIR is expected to be a directory" + exit 1 +fi + +trap "rm -fr $TMPDIR" EXIT + +: ${INPUTS_LXC_CONFIG:=docker libvirt lxc} +: ${INPUTS_SERIAL:=} +: ${INPUTS_TOKEN:=} +: ${INPUTS_FORGEJO:=https://code.forgejo.org} +: ${INPUTS_LIFETIME:=7d} +: ${INPUTS_RUNNER_VERSION:=6.2.0} + +: ${KILL_AFTER:=21600} # 6h == 21600 +NODEJS_VERSION=20 +DEBIAN_RELEASE=bookworm +YQ_VERSION=v4.45.1 +SELF=${BASH_SOURCE[0]} +SELF_FILENAME=$(basename "$SELF") +ETC=/etc/forgejo-runner +LIB=/var/lib/forgejo-runner +LOG=/var/log/forgejo-runner +: ${HOST:=$(hostname)} + +LXC_IPV4_PREFIX="10.105.7" +LXC_IPV6_PREFIX="fd91" +LXC_USER_NAME=debian +LXC_USER_ID=1000 + +if ${VERBOSE:-false}; then + set -ex + PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}: ' + # export LXC_VERBOSE=true # use with caution, it will block .forgejo/workflows/example-lxc-systemd.yml +else + set -e +fi + +if test $(id -u) != 0; then + SUDO=sudo +fi + +function config_inotify() { + if grep --quiet fs.inotify.max_user_instances=8192 /etc/sysctl.conf; then + return + fi + echo fs.inotify.max_user_instances=8192 | $SUDO tee -a /etc/sysctl.conf + $SUDO sysctl -p +} + +function dependencies() { + if ! which curl jq retry >/dev/null; then + export DEBIAN_FRONTEND=noninteractive + $SUDO apt-get update -qq + $SUDO apt-get install -y -qq curl jq retry + fi + if ! which lxc-helpers.sh >/dev/null; then + $SUDO curl --fail -sS -o /usr/local/bin/lxc-helpers-lib.sh https://code.forgejo.org/forgejo/lxc-helpers/raw/branch/main/lxc-helpers-lib.sh + $SUDO curl --fail -sS -o /usr/local/bin/lxc-helpers.sh https://code.forgejo.org/forgejo/lxc-helpers/raw/branch/main/lxc-helpers.sh + $SUDO chmod +x /usr/local/bin/lxc-helpers*.sh + fi + if ! which lxc-ls >/dev/null; then + $SUDO lxc-helpers.sh lxc_install_lxc_inside $LXC_IPV4_PREFIX $LXC_IPV6_PREFIX + fi + if ! which yq >/dev/null; then + $SUDO curl -L --fail -sS -o /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/${YQ_VERSION}/yq_linux_arm64 + $SUDO chmod +x /usr/local/bin/yq + fi + if ! cmp $SELF /usr/local/bin/$SELF_FILENAME; then + cp -a $SELF /usr/local/bin/$SELF_FILENAME + fi +} + +function lxc_name() { + echo runner-${INPUTS_SERIAL}-lxc +} + +function lxc_destroy() { + $SUDO lxc-destroy -f $(lxc_name) >/dev/null || true +} + +function lxc_create() { + local name=$(lxc_name) + local lib=$LIB/$name + local etc=$ETC/$INPUTS_SERIAL + + lxc-helpers.sh --config "$INPUTS_LXC_CONFIG" lxc_container_create $name + echo "lxc.start.auto = 1" | sudo tee -a /var/lib/lxc/$name/config + + local bin=/var/lib/lxc/$name/rootfs/usr/local/bin + $SUDO cp -a $SELF $bin/$SELF_FILENAME + $SUDO cp -a /usr/local/bin/forgejo-runner-$INPUTS_RUNNER_VERSION $bin/forgejo-runner + $SUDO cp -a /usr/local/bin/yq $bin/yq + $SUDO cp -a $(which jq) $bin/jq + + $SUDO mkdir -p $lib/.cache/actcache + $SUDO chown -R $LXC_USER_ID $lib + lxc-helpers.sh lxc_container_mount $name $lib/.cache/actcache + + $SUDO mkdir -p $etc + $SUDO chown -R $LXC_USER_ID $etc + lxc-helpers.sh lxc_container_mount $name $etc + + lxc-helpers.sh lxc_container_start $name + if echo $INPUTS_LXC_CONFIG | grep --quiet 'docker'; then + lxc-helpers.sh lxc_install_docker $name + fi + if echo $INPUTS_LXC_CONFIG | grep --quiet 'lxc'; then + local ipv4="10.48.$INPUTS_SERIAL" + local ipv6="fd$INPUTS_SERIAL" + lxc-helpers.sh lxc_install_lxc $name $ipv4 $ipv6 + fi + lxc-helpers.sh lxc_container_user_install $name $LXC_USER_ID $LXC_USER_NAME +} + +function service_create() { + cat >$TMPDIR/forgejo-runner@.service <$etc/config + fi + + $SUDO mkdir -p $LIB/$(lxc_name)/.cache/actcache +} + +function ensure_configuration_and_registration() { + local etc=$ETC/$INPUTS_SERIAL + + if ! test -f $etc/config.yml; then + forgejo-runner generate-config >$etc/config.yml + cat >$TMPDIR/edit-config <$etc/env <&$log +} + +function kill_runner() { + cd $ETC/$INPUTS_SERIAL + rm -f killed-* started-running + + set +e + pkill --exact forgejo-runner + if test $? = 1; then + touch killed-already + return + fi + + timeout $KILL_AFTER pidwait --exact forgejo-runner + status=$? + set -e + + # pidwait will exit 1 if the process is already gone + # pidwait will exit 0 if the process terminated gracefully before the timeout + if test $status = 0 || test $status = 1; then + touch killed-gracefully + echo "forgejo-runner stopped gracefully" + else + pkill --exact --signal=KILL forgejo-runner + touch killed-forcefully + echo "forgejo-runner stopped forcefully" + fi +} + +function stop() { + inside kill_runner +} + +function main() { + config_inotify + dependencies + install_runner + service_create + lxc_create + inside ensure_configuration_and_registration +} + +# +# ensure an update of the current script does not break a long +# running function (such as `start`) by running from a copy instead +# of the script itself +# +function run_in_copy() { + if test "$#" = 0; then + echo "run_in_copy needs an argument" + return 1 + fi + + export TMPDIR # otherwise it will not be removed by trap + cp $SELF $TMPDIR/$SELF_FILENAME + exec $TMPDIR/$SELF_FILENAME "$@" +} + +"${@:-main}"