From acdb4840aa9868e61be9270e03b8f1c406ca09ad Mon Sep 17 00:00:00 2001 From: Thomas Hintz Date: Thu, 7 May 2026 08:45:55 -0700 Subject: [PATCH] Initial version of instance control and snapshot triggering --- Makefile | 12 ++- all-apps/docker-compose.yaml | 1 + .../docker-compose.yaml | 25 ++++++ .../hooks/hooks.yaml | 33 +++++++ .../hooks/queue-restic-snapshot.sh | 15 ++++ .../hooks/restic-snapshot-status.sh | 10 +++ all-apps/restic-snapshot.service | 12 +++ cl.yaml | 7 ++ config/apps.config.tmpl | 3 +- make-caddyfile.sh | 15 ++++ restic-snapshot.sh | 88 +++++++++++++++++++ src/nassella.scm | 13 ++- 12 files changed, 228 insertions(+), 6 deletions(-) create mode 100644 all-apps/instance-control-webhooks/docker-compose.yaml create mode 100644 all-apps/instance-control-webhooks/hooks/hooks.yaml create mode 100755 all-apps/instance-control-webhooks/hooks/queue-restic-snapshot.sh create mode 100755 all-apps/instance-control-webhooks/hooks/restic-snapshot-status.sh create mode 100644 all-apps/restic-snapshot.service create mode 100644 restic-snapshot.sh diff --git a/Makefile b/Makefile index 6efe585..1795989 100644 --- a/Makefile +++ b/Makefile @@ -6,8 +6,11 @@ apps_config := $(config_dir)apps.config # .dirstamp plus && $@ is like make magic to get this rule # to only run if the contents of all-apps changes app/.dirstamp: all-apps/app.service all-apps/docker-compose.yaml all-apps/.env \ +all-apps/restic-snapshot.service \ +all-apps/instance-control-webhooks/webhook_secret \ all-apps/lb/Caddyfile \ $(wildcard all-apps/lb/*) \ +$(wildcard all-apps/instance-control-webhooks/*) \ $(wildcard all-apps/nextcloud/*) \ $(wildcard all-apps/wg-easy/*) \ $(wildcard all-apps/ghost/*) \ @@ -19,8 +22,10 @@ $(wildcard all-apps/dozzle/*) rm -Rf app/ mkdir app/ cp all-apps/app.service app/ + cp all-apps/restic-snapshot.service app/ cp all-apps/docker-compose.yaml app/ cp all-apps/.env app/ + cp -a all-apps/instance-control-webhooks app/ # TODO remove once this is added to DNS/LB/app-config ./copy-apps.sh $(apps_config) && touch $@ # compose .env files @@ -33,6 +38,9 @@ all-apps/lb/Caddyfile: $(apps_config) make-caddyfile.sh mkdir -p all-apps/lb ./make-caddyfile.sh $(apps_config) > all-apps/lb/Caddyfile +all-apps/instance-control-webhooks/webhook_secret: $(apps_config) + bash -c 'source $(apps_config); printf "%s\n" "$$INSTANCE_CONTROL_WEBHOOKS_SECRET" > $@' + # Nextcloud all-apps/nextcloud/nextcloud_admin_user: $(apps_config) bash -c 'source $(apps_config); printf "%s\n" "$$NEXTCLOUD_ADMIN_USER" > $@' @@ -89,6 +97,7 @@ restic-password: $(apps_config) make-restic-password.sh ignition.json: cl.yaml app/.dirstamp \ all-apps/lb/Caddyfile \ +all-apps/instance-control-webhooks/webhook_secret \ all-apps/nextcloud/nextcloud_admin_user \ all-apps/nextcloud/nextcloud_admin_password \ all-apps/nextcloud/postgres_db \ @@ -112,6 +121,7 @@ all-apps/ghost/.compose-env \ restic-env \ restic-password \ restic-restore.sh \ +restic-snapshot.sh \ all-apps/.env \ $(config_dir)ssh-keys cat cl.yaml | docker run --rm --volume $(config_dir)/ssh-keys:/pwd/ssh-keys --volume ${PWD}:/pwd --workdir /pwd -i quay.io/coreos/butane:latest -d /pwd > ignition.json @@ -148,7 +158,7 @@ restic-snapshots: $(apps_config) restic-password .PHONY: archive archive: tar -cf nassella-latest.tar all-apps cl.yaml init-restic.sh main.tf make-caddyfile.sh Makefile \ - make-generated.sh make-nextcloud-env.sh make-ghost-env.sh make-restic-generated.sh make-restic-password.sh restic-snapshots.sh copy-apps.sh restic-restore.sh \ + make-generated.sh make-nextcloud-env.sh make-ghost-env.sh make-restic-generated.sh make-restic-password.sh restic-snapshots.sh copy-apps.sh restic-restore.sh restic-snapshot.sh \ make-nassella-authelia-config.sh make-nassella-lldap-config.sh .terraform.lock.hcl cp nassella-latest.tar src/ diff --git a/all-apps/docker-compose.yaml b/all-apps/docker-compose.yaml index 4fcbb9c..6f09a1d 100644 --- a/all-apps/docker-compose.yaml +++ b/all-apps/docker-compose.yaml @@ -7,6 +7,7 @@ services: - ./lb/:/etc/caddy - /nassella/lb/config:/config - /nassella/lb/data:/data + - /app/maintenance:/maintenance networks: - lb restart: unless-stopped diff --git a/all-apps/instance-control-webhooks/docker-compose.yaml b/all-apps/instance-control-webhooks/docker-compose.yaml new file mode 100644 index 0000000..2eb7952 --- /dev/null +++ b/all-apps/instance-control-webhooks/docker-compose.yaml @@ -0,0 +1,25 @@ +version: '3' + +secrets: + instance_control_webhooks_secret: + file: ./instance-control-webhooks/webhook_secret + +services: + node_webhooks: + image: almir/webhook + volumes: + - ./instance-control-webhooks/hooks/:/etc/webhook + - /tmp/restic:/tmp/restic + secrets: + - instance_control_webhooks_secret + command: + - -template + - "-hooks=/etc/webhook/hooks.yaml" + - -verbose + networks: + - lb + restart: unless-stopped + ports: + - 9000:9000 +networks: + lb: diff --git a/all-apps/instance-control-webhooks/hooks/hooks.yaml b/all-apps/instance-control-webhooks/hooks/hooks.yaml new file mode 100644 index 0000000..ee32d53 --- /dev/null +++ b/all-apps/instance-control-webhooks/hooks/hooks.yaml @@ -0,0 +1,33 @@ +- id: queue-restic-snapshot + pass-environment-to-command: + - source: payload + name: version + - source: payload + name: path + - source: payload + name: tag + - source: payload + name: request_id + trigger-rule: + # - match: + # type: payload-hmac-sha256 + # secret: '{{ cat "/run/secrets/instance_control_webhooks_secret" }}' + # parameter: + # source: header + # name: X-Nassella-Signature + execute-command: "/etc/webhook/queue-restic-snapshot.sh" +- id: restic-snapshot-status + include-command-output-in-response: true + pass-environment-to-command: + - source: payload + name: version + - source: payload + name: request_id + # trigger-rule: + # - match: + # type: payload-hmac-sha256 + # secret: '{{ cat "/run/secrets/instance_control_webhooks_secret" }}' + # parameter: + # source: header + # name: X-Nassella-Signature + execute-command: "/etc/webhook/restic-snapshot-status.sh" diff --git a/all-apps/instance-control-webhooks/hooks/queue-restic-snapshot.sh b/all-apps/instance-control-webhooks/hooks/queue-restic-snapshot.sh new file mode 100755 index 0000000..2784e3a --- /dev/null +++ b/all-apps/instance-control-webhooks/hooks/queue-restic-snapshot.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# TODO the systemd unit should actually do this +# touch /maintenance/maintenance.on +# rm /maintenance/maintenance.on + +# for instance-control-webhooks docker compose setup: +# make a directory in /tmp for these pipes and mount that as a volume +# into the container + +# TODO read 'version' arg from request and make sure it +# matches the version of this script + +# use a named pipe +printf "%s\t%s\t%s\n" "$HOOK_tag" "$HOOK_request_id" "$HOOK_path" > /tmp/restic/snapshot_trigger_pipe diff --git a/all-apps/instance-control-webhooks/hooks/restic-snapshot-status.sh b/all-apps/instance-control-webhooks/hooks/restic-snapshot-status.sh new file mode 100755 index 0000000..b74b017 --- /dev/null +++ b/all-apps/instance-control-webhooks/hooks/restic-snapshot-status.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +set -e + +# TODO read 'version' arg from request and make sure it +# matches the version of this script + +status=`cat /tmp/restic/snapshot_status_$HOOK_request_id` + +echo "{\"status\":\"$status\"}" diff --git a/all-apps/restic-snapshot.service b/all-apps/restic-snapshot.service new file mode 100644 index 0000000..0ce3069 --- /dev/null +++ b/all-apps/restic-snapshot.service @@ -0,0 +1,12 @@ +[Unit] +Description=Restic Snapshotter +After=app.service +Requires=docker.service app.service +[Service] +EnvironmentFile=/restic-env +TimeoutStartSec=0 +ExecStart=/restic-snapshot.sh +Restart=always +RestartSec=5s +[Install] +WantedBy=multi-user.target diff --git a/cl.yaml b/cl.yaml index c228d8c..1d79d55 100644 --- a/cl.yaml +++ b/cl.yaml @@ -37,6 +37,9 @@ systemd: - name: app.service enabled: true contents_local: app/app.service + - name: restic-snapshot.service + enabled: true + contents_local: app/restic-snapshot.service - name: restic-backup.service contents: | [Unit] @@ -113,6 +116,10 @@ storage: mode: 0755 contents: local: restic-restore.sh + - path: /restic-snapshot.sh + mode: 0755 + contents: + local: restic-snapshot.sh - path: /etc/ssh/sshd_config.d/custom.conf overwrite: true mode: 0600 diff --git a/config/apps.config.tmpl b/config/apps.config.tmpl index bbc7e69..72f9a10 100644 --- a/config/apps.config.tmpl +++ b/config/apps.config.tmpl @@ -16,4 +16,5 @@ SMTP_FROM= BACKBLAZE_KEY_ID= # the key ID for a application key created on backblaze that has permissions for the bucket in BACKBLAZE_BUCKET_URL BACKBLAZE_APPLICATION_KEY= # the application key for the application key created on backblaze BACKBLAZE_BUCKET_URL= # the full URL for the backblaze bucket, found on the backblaze UI for the bucket -RESTIC_PASSWORD= # should be a secure, randomly generated, restic compatible password. Used for making encrypted backups of the application data \ No newline at end of file +RESTIC_PASSWORD= # should be a secure, randomly generated, restic compatible password. Used for making encrypted backups of the application data +INSTANCE_CONTROL_WEBHOOKS_SECRET= # hmac secret used when calling webhooks \ No newline at end of file diff --git a/make-caddyfile.sh b/make-caddyfile.sh index 7086dd7..bd12e3b 100755 --- a/make-caddyfile.sh +++ b/make-caddyfile.sh @@ -65,6 +65,14 @@ for config_string in ${APP_CONFIGS[@]}; do fulldomain="$subdomain.$ROOT_DOMAIN" echo "$fulldomain {" + # config for maintenance mode + echo "@maintenanceModeActive file /maintenance/maintenance.on {" + echo " root /" + echo "}" + echo "handle @maintenanceModeActive {" + echo " respond \"We are performing a maintenance, come back later\" 503" + echo "}" + echo $body echo "}" @@ -76,6 +84,13 @@ for config_string in ${APP_CONFIGS[@]}; do # domain with 'lldap' if [ "$app" = "nassella" ]; then echo "lldap.$subdomain.$ROOT_DOMAIN {" + # config for maintenance mode + echo "@maintenanceModeActive file /maintenance/maintenance.on {" + echo " root /" + echo "}" + echo "handle @maintenanceModeActive {" + echo " respond \"We are performing a maintenance, come back later\" 503" + echo "}" echo " reverse_proxy nassella_lldap:17170" echo "}" fi diff --git a/restic-snapshot.sh b/restic-snapshot.sh new file mode 100644 index 0000000..0af46dd --- /dev/null +++ b/restic-snapshot.sh @@ -0,0 +1,88 @@ +#!/bin/bash + +# curl -X POST -H "Content-Type: application/json" -d '{"path":"/","tag":"test3","request_id":"1","version":0}' http://127.0.0.1:9000/hooks/queue-restic-snapshot + +pipe="/tmp/restic/snapshot_trigger_pipe" +if [ ! -p "$pipe" ]; then + mkdir -p "/tmp/restic" + mkfifo "$pipe" + chmod 777 -R "/tmp/restic" # TODO fix this, the webhook web server runs as a user that can access this otherwise +fi + +# keep pipe open +exec 3<"$pipe" + +while read -u 3 msg; do + ghost_db_running=false + nassella_lldap_db_running=false + nassella_authelia_db_running=false + nassella_db_running=false + nextcloud_db_running=false + nextcloud_redis_running=false + + IFS=$'\t' # set IFS to tab (IFS is the delimiter used to split strings when parsing in the shell) + set -- $msg # this splits msg based on IFS (tab) + tag=$1 + request_id=$2 + path=$3 # TODO not currently used + + # update status for webhooks + printf "%s\n" "running" > "/tmp/restic/snapshot_status_$request_id" + + touch /app/maintenance/maintenance.on + + # shut down databases so we can get a clean snapshot + if docker ps --filter "name=^app-ghost_db-1$" --filter "status=running" | grep -q app-ghost_db-1; then + ghost_db_running=true + docker stop app-ghost_db-1 + fi + if docker ps --filter "name=^app-nassella_lldap_db-1$" --filter "status=running" | grep -q app-nassella_lldap_db-1; then + nassella_lldap_db_running=true + docker stop app-nassella_lldap_db-1 + fi + if docker ps --filter "name=^app-nassella_authelia_db-1$" --filter "status=running" | grep -q app-nassella_authelia_db-1; then + nassella_authelia_db_running=true + docker stop app-nassella_authelia_db-1 + fi + if docker ps --filter "name=^app-nassella_db-1$" --filter "status=running" | grep -q app-nassella_db-1; then + nassella_db_running=true + docker stop app-nassella_db-1 + fi + if docker ps --filter "name=^app-nextcloud_db-1$" --filter "status=running" | grep -q app-nextcloud_db-1; then + nextcloud_db_running=true + docker stop app-nextcloud_db-1 + fi + if docker ps --filter "name=^app-nextcloud_redis-1$" --filter "status=running" | grep -q app-nextcloud_redis-1; then + nextcloud_redis_running=true + docker stop app-nextcloud_redis-1 + fi + + # perform the snapshot + docker run --rm --volume /nassella:/nassella --volume /restic-password:/restic-password -e AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} -e AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} -i restic/restic:0.18.0 backup --verbose --repo s3:${BACKBLAZE_BUCKET_URL} --password-file /restic-password --tag "$tag" "/nassella" + + # restart databases + if [ $ghost_db_running = true ]; then + docker start app-ghost_db-1 + fi + if [ $nassella_lldap_db_running = true ]; then + docker start app-nassella_lldap_db-1 + fi + if [ $nassella_authelia_db_running = true ]; then + docker start app-nassella_authelia_db-1 + fi + if [ $nassella_db_running = true ]; then + docker start app-nassella_db-1 + fi + if [ $nextcloud_db_running = true ]; then + docker start app-nextcloud_db-1 + fi + if [ $nextcloud_redis_running = true ]; then + docker start app-nextcloud_redis-1 + fi + + rm /app/maintenance/maintenance.on + + # update status for webhooks + printf "%s\n" "complete" > "/tmp/restic/snapshot_status_$request_id" + +done diff --git a/src/nassella.scm b/src/nassella.scm index a81b504..113df2d 100644 --- a/src/nassella.scm +++ b/src/nassella.scm @@ -1237,7 +1237,10 @@ chmod -R 777 /opt/keys"))) (smtp-port . ,(alist-ref 'smtp-port (current-params))) (smtp-auth-user . ,(alist-ref 'smtp-auth-user (current-params))) (smtp-auth-password . ,(alist-ref 'smtp-auth-password (current-params))) - (smtp-from . ,(alist-ref 'smtp-from (current-params)))))))))) + (smtp-from . ,(alist-ref 'smtp-from (current-params))))) + (instance-control . ((webhooks-secret . ,(or (alist-ref 'webhooks-secret + (alist-ref 'instance-control config eq? '())) + (generate-jwt-secret)))))))))) (redirect (conc "/config/wizard/machine/" instance-id)))) (get/widgets @@ -1432,6 +1435,7 @@ chmod -R 777 /opt/keys"))) ("BACKBLAZE_APPLICATION_KEY" . ,(alist-ref 'backblaze-application-key service-config)) ("BACKBLAZE_BUCKET_URL" . ,(alist-ref 'backblaze-bucket-url service-config)) ("RESTIC_PASSWORD" . ,restic-password) + ("INSTANCE_CONTROL_WEBHOOKS_SECRET" . ,(alist-ref 'webhooks-secret (alist-ref 'instance-control config))) ,@(if (and restic-snapshot-id (not (string=? restic-snapshot-id ""))) `(("RESTIC_SNAPSHOT_ID" . ,restic-snapshot-id)) '()))))) (with-output-to-file (string-append dir "/config/production.tfvars") (lambda () @@ -1445,7 +1449,7 @@ chmod -R 777 /opt/keys"))) ("cluster_name" . "nassella") ("datacenter" . ,(alist-ref 'digitalocean-region service-config)) ;; (source <(curl -sSfL https://stable.release.flatcar-linux.net/amd64-usr/current/version.txt); echo "${FLATCAR_VERSION_ID}") - ("flatcar_stable_version" . "4459.2.4"))) + ("flatcar_stable_version" . "4593.2.0"))) ;; remove the newline that generating the ssh key adds (display "ssh_keys=[\"") (display (string-drop-right ssh-pub-key 1)) (print "\"]")))) (let* ((instance-id (alist-ref "id" (current-params) equal?)) @@ -1689,7 +1693,8 @@ chmod -R 777 /opt/keys"))) ("BACKBLAZE_KEY_ID" . ,(alist-ref 'backblaze-key-id service-config)) ("BACKBLAZE_APPLICATION_KEY" . ,(alist-ref 'backblaze-application-key service-config)) ("BACKBLAZE_BUCKET_URL" . ,(alist-ref 'backblaze-bucket-url service-config)) - ("RESTIC_PASSWORD" . ,restic-password))))) + ("RESTIC_PASSWORD" . ,restic-password) + ("INSTANCE_CONTROL_WEBHOOKS_SECRET" . ,(alist-ref 'webhooks-secret (alist-ref 'instance-control config))))))) (with-output-to-file (string-append dir "/config/production.tfvars") (lambda () (map (lambda (e) @@ -1702,7 +1707,7 @@ chmod -R 777 /opt/keys"))) ("cluster_name" . "nassella") ("datacenter" . ,(alist-ref 'digitalocean-region service-config)) ;; (source <(curl -sSfL https://stable.release.flatcar-linux.net/amd64-usr/current/version.txt); echo "${FLATCAR_VERSION_ID}") - ("flatcar_stable_version" . "4459.2.4"))) + ("flatcar_stable_version" . "4593.2.0"))) ;; remove the newline that generating the ssh key adds (display "ssh_keys=[\"") (display (string-drop-right ssh-pub-key 1)) (print "\"]"))) ;; TODO need a new table to track destroying?