diff --git a/README.org b/README.org index d53672e..dcb72dd 100644 --- a/README.org +++ b/README.org @@ -128,6 +128,48 @@ homey-deploy-rpi-main NixOS activates the new config on the Pi immediately, with an automatic rollback if activation fails. +* Post-deploy setup + +Some services require manual one-time configuration after the first deploy. + +** Ntfy — push notifications + +Ntfy's admin user is created automatically from sops on first start. You +still need to create a phone token and subscribe to the alerts topic. + +1. Visit =https://ntfy.zakobar.com= and log in with the admin password + (=ntfy/admin_password= in =secrets/secrets.yaml=). +2. Go to *Account → Access Tokens → Create token* — give it a name (e.g. + "phone") and copy the token value. +3. In the [[https://ntfy.sh][Ntfy mobile app]]: + - *Server*: =https://ntfy.zakobar.com= + - *Access token*: the token you just created +4. Subscribe to the =alerts= topic in the app. + +** Uptime Kuma — notifications (two-deploy process) + +Uptime Kuma monitors are created automatically by the sync script on first +deploy, but notification channels must be configured in the UI before they +can be attached to monitors. This requires two deploys: + +*Deploy 1* — services are up, monitors exist, but no notifications assigned yet. + +Then, in the Uptime Kuma UI (=https://uptime.zakobar.com=): + +1. Go to *Settings → Notifications → Add Notification*. +2. Choose *ntfy* as the type and fill in: + - *Server URL*: =https://ntfy.zakobar.com= + - *Topic*: =alerts= + - *Token*: use the admin token (or create a dedicated one in ntfy) +3. Save — you do *not* need to manually assign it to any monitor. + +*Deploy 2* — run =homey-deploy-rpi-main= again. The sync script will detect +the newly configured notification channel and attach it to every monitor +automatically. + +Any notifications added to Uptime Kuma in the future will also be picked up +on the next deploy. + * Backing up Backups use [[https://restic.net/][restic]] and run automatically via systemd on a daily schedule. diff --git a/modules/monitoring.nix b/modules/monitoring.nix index 020e3b3..85c628a 100644 --- a/modules/monitoring.nix +++ b/modules/monitoring.nix @@ -200,7 +200,7 @@ in environment.etc."grafana/dashboards/node-exporter-full.json" = { source = pkgs.fetchurl { url = "https://grafana.com/api/dashboards/1860/revisions/37/download"; - hash = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="; + hash = "sha256-1DE1aaanRHHeCOMWDGdOS1wBXxOF84UXAjJzT5Ek6mM="; }; mode = "0444"; }; diff --git a/modules/services/authelia.nix b/modules/services/authelia.nix index 4e6afa8..a20752c 100644 --- a/modules/services/authelia.nix +++ b/modules/services/authelia.nix @@ -192,6 +192,10 @@ in AUTHELIA_SESSION_SECRET_FILE = "/run/secrets/session_secret"; AUTHELIA_STORAGE_ENCRYPTION_KEY_FILE = "/run/secrets/storage_encryption_key"; AUTHELIA_AUTHENTICATION_BACKEND_LDAP_PASSWORD_FILE = "/run/secrets/ldap_ro_password"; + # Changing this forces a container restart when the config changes. + # NixOS bind-mounts resolve symlinks at container start, so the running + # container would otherwise keep the old nix-store config until restarted. + NIXOS_CONFIG_HASH = builtins.hashString "sha256" autheliaConfig; }; volumes = [ diff --git a/modules/services/nextcloud.nix b/modules/services/nextcloud.nix index 311a3e1..ab1f2f8 100644 --- a/modules/services/nextcloud.nix +++ b/modules/services/nextcloud.nix @@ -170,9 +170,13 @@ in # Uptime Kuma monitor for this service # ----------------------------------------------------------------------- homey.monitoring.monitors = [{ - name = "Nextcloud"; - url = "https://nextcloud.${domain}/status.php"; - interval = 60; + name = "Nextcloud"; + url = "https://nextcloud.${domain}/status.php"; + interval = 60; + keyword = "\"maintenance\":false"; + # Nightly maintenance is expected — only alert if stuck for 4+ hours. + # 240 retries × 60s = 4 hours of consecutive failures before notifying. + maxretries = 240; }]; systemd.services."podman-nextcloud" = { diff --git a/modules/services/ntfy.nix b/modules/services/ntfy.nix index 8a44b9e..9d59066 100644 --- a/modules/services/ntfy.nix +++ b/modules/services/ntfy.nix @@ -75,6 +75,46 @@ in }; }; + # Minimal config for the `ntfy user` CLI — the NixOS module puts its + # generated config in the nix store under an unpredictable path, so we + # write a separate file just containing the auth-file path. The server + # ignores this file (it uses the module-generated one via -c flag). + environment.etc."ntfy-sh/user-cli.yml" = { + text = "auth-file: ${dataDir}/ntfy/auth.db\n"; + mode = "0444"; + }; + + # Create ntfy data directories on the external HD before ntfy starts. + # Runs as a separate root service (outside ntfy-sh's restricted namespace) + # so it can access /mnt/data without hitting ReadWritePaths restrictions. + systemd.services.ntfy-sh-mkdir = { + description = "Create Ntfy data directories on external HD"; + wantedBy = [ "ntfy-sh.service" ]; + before = [ "ntfy-sh.service" ]; + after = [ "mnt-data.mount" ]; + requires = [ "mnt-data.mount" ]; + + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + ExecStart = pkgs.writeShellScript "ntfy-mkdir" '' + set -euo pipefail + mkdir -p ${dataDir}/ntfy/attachments + chown -R ntfy-sh:ntfy-sh ${dataDir}/ntfy + chmod 0750 ${dataDir}/ntfy ${dataDir}/ntfy/attachments + ''; + }; + }; + + # Ensure ntfy-sh starts after the HD is mounted and dirs are ready. + # Also widen ReadWritePaths so ntfy-sh can write to the external HD path + # (the NixOS module restricts writes to /var/lib/ntfy-sh by default). + systemd.services.ntfy-sh = { + after = lib.mkAfter [ "mnt-data.mount" "ntfy-sh-mkdir.service" ]; + requires = lib.mkAfter [ "mnt-data.mount" "ntfy-sh-mkdir.service" ]; + serviceConfig.ReadWritePaths = lib.mkAfter [ "${dataDir}/ntfy" ]; + }; + # ----------------------------------------------------------------------- # Create the admin user on first start (idempotent) # ----------------------------------------------------------------------- @@ -102,28 +142,21 @@ in PASS=$(cat "$CREDENTIALS_DIRECTORY/ntfy_admin_password") - # ntfy user commands need the config file to find the auth database. - # The NixOS ntfy-sh module writes config to /etc/ntfy-sh/server.yml. - NTFY="${pkgs.ntfy-sh}/bin/ntfy user --config /etc/ntfy-sh/server.yml" + # Use the minimal CLI config (just has auth-file path). + NTFY="${pkgs.ntfy-sh}/bin/ntfy user --config /etc/ntfy-sh/user-cli.yml" - # ntfy user list exits non-zero if the user DB is empty/doesn't exist; - # grep exits non-zero if the pattern is missing. Either means no admin. - if $NTFY list 2>/dev/null | grep -qE "^admin\b"; then + # ntfy user list outputs a Unicode table; grep for admin in it. + # ntfy user add reads password + confirmation from stdin (two lines). + if $NTFY list 2>/dev/null | grep -qE "admin"; then echo "ntfy-sh-setup: admin user already exists" else - echo "$PASS" | $NTFY add --role=admin admin + printf '%s\n%s\n' "$PASS" "$PASS" | $NTFY add --role=admin admin echo "ntfy-sh-setup: admin user created" fi ''; }; }; - # Ensure ntfy-sh starts after the external HD is mounted - systemd.services.ntfy-sh = { - after = lib.mkAfter [ "mnt-data.mount" ]; - requires = lib.mkAfter [ "mnt-data.mount" ]; - }; - # ----------------------------------------------------------------------- # Uptime Kuma monitor for this service # ----------------------------------------------------------------------- diff --git a/modules/services/phpldapadmin.nix b/modules/services/phpldapadmin.nix index 12776e9..6188b63 100644 --- a/modules/services/phpldapadmin.nix +++ b/modules/services/phpldapadmin.nix @@ -55,7 +55,7 @@ in # ----------------------------------------------------------------------- homey.monitoring.monitors = [{ name = "phpLDAPadmin"; - url = "http://localhost:${toString cfg.port}"; + url = "http://phpldapadmin:80"; interval = 60; }]; }; diff --git a/modules/services/uptime-kuma.nix b/modules/services/uptime-kuma.nix index 4d58ae1..d3afa0a 100644 --- a/modules/services/uptime-kuma.nix +++ b/modules/services/uptime-kuma.nix @@ -36,8 +36,16 @@ let monitorsJson = pkgs.writeText "uptime-kuma-monitors.json" (builtins.toJSON config.homey.monitoring.monitors); - # Python environment for the monitor-sync script - pythonEnv = pkgs.python3.withPackages (ps: [ ps."uptime-kuma-api" ]); + # Python environment for the monitor-sync script. + # uptime-kuma-api's transitive deps (requests, socketio, websocket-client) + # are listed explicitly because withPackages doesn't always pull propagated + # deps transitively in all nixpkgs versions. + pythonEnv = pkgs.python3.withPackages (ps: [ + ps."uptime-kuma-api" + ps.requests + ps."python-socketio" + ps."websocket-client" + ]); # Monitor-sync script: idempotent, hash-gated, uses Socket.IO API syncScript = pkgs.writeText "uptime-kuma-sync.py" '' @@ -48,6 +56,9 @@ let Runs as a oneshot systemd service after podman-uptime-kuma.service. Tracks a hash of the monitor list so it only re-syncs when the NixOS config changes. + + Uptime Kuma v1 has no REST API — everything is Socket.IO. Initial admin + creation uses api.setup() which raises if already done (we ignore that). """ import hashlib import json @@ -62,10 +73,11 @@ let CREDS_DIR = os.environ.get("CREDENTIALS_DIRECTORY", "") def wait_for_kuma(timeout=120): + """Wait until Uptime Kuma HTTP responds (any non-5xx — just checks it's up).""" deadline = time.time() + timeout while time.time() < deadline: try: - with urllib.request.urlopen(KUMA_URL + "/", timeout=5) as r: + with urllib.request.urlopen(KUMA_URL, timeout=5) as r: if r.status < 500: return True except Exception: @@ -103,36 +115,58 @@ let api = UptimeKumaApi(KUMA_URL) - # Initial setup (creates admin user on first run; no-op if already done) + # Initial admin setup via Socket.IO — idempotent (raises if already done, ignore it) try: - info = api.info() - if not info.get("isSetup", True): - api.setup("admin", password) - print("uptime-kuma-sync: initial admin user created") + api.setup("admin", password) + print("uptime-kuma-sync: initial admin user created") except Exception as e: - print(f"uptime-kuma-sync: setup check: {e}", file=sys.stderr) + print(f"uptime-kuma-sync: setup skipped (already configured): {e}") # Login - result = api.login("admin", password) - if not result.get("ok"): - print(f"uptime-kuma-sync: login failed: {result}", file=sys.stderr) + try: + api.login("admin", password) + except Exception as e: + print(f"uptime-kuma-sync: login failed: {e}", file=sys.stderr) api.disconnect() sys.exit(1) - # Sync monitors (add missing; skip existing by name) + # Collect all configured notification IDs so every monitor gets them. + notification_ids = [n["id"] for n in api.get_notifications()] + if notification_ids: + print(f"uptime-kuma-sync: attaching notifications: {notification_ids}") + + # Sync monitors: add missing, update changed try: - existing_names = {m["name"] for m in api.get_monitors()} + existing = {m["name"]: m for m in api.get_monitors()} for m in monitors: - if m["name"] in existing_names: - print(f"uptime-kuma-sync: monitor exists, skipping: {m['name']}") - continue - api.add_monitor( - type=MonitorType.HTTP, - name=m["name"], - url=m["url"], - interval=m.get("interval", 60), - ) - print(f"uptime-kuma-sync: created monitor: {m['name']}") + keyword = m.get("keyword") + maxretries = m.get("maxretries", 0) + monitor_type = MonitorType.KEYWORD if keyword else MonitorType.HTTP + extra = {"keyword": keyword} if keyword else {} + if m["name"] not in existing: + api.add_monitor( + type=monitor_type, + name=m["name"], + url=m["url"], + interval=m.get("interval", 60), + maxretries=maxretries, + notification_id_list={str(nid): True for nid in notification_ids}, + **extra, + ) + print(f"uptime-kuma-sync: created monitor: {m['name']}") + elif (existing[m["name"]].get("url") != m["url"] + or existing[m["name"]].get("keyword") != keyword + or existing[m["name"]].get("maxretries") != maxretries): + api.edit_monitor( + existing[m["name"]]["id"], + type=monitor_type, + url=m["url"], + interval=m.get("interval", 60), + maxretries=maxretries, + notification_id_list={str(nid): True for nid in notification_ids}, + **extra, + ) + print(f"uptime-kuma-sync: updated monitor: {m['name']}") finally: api.disconnect() @@ -168,6 +202,16 @@ in default = 60; description = "Check interval in seconds."; }; + keyword = lib.mkOption { + type = lib.types.nullOr lib.types.str; + default = null; + description = "If set, use a keyword monitor that checks for this string in the response body."; + }; + maxretries = lib.mkOption { + type = lib.types.int; + default = 0; + description = "Consecutive failures before a DOWN alert is sent. 0 = alert immediately."; + }; }; }); default = []; @@ -217,13 +261,14 @@ in "${dataDir}/uptime-kuma:/app/data" ]; - # uptime-kuma image expects /app/data to be writable; no extra network - # needed since we reach it from the host on localhost. + # Join the homey network so monitors can reach other containers by name + # (e.g. phpldapadmin:80) without going through the host loopback. + extraOptions = [ "--network=homey" ]; }; systemd.services."podman-uptime-kuma" = { - after = lib.mkAfter [ "mnt-data.mount" ]; - requires = lib.mkAfter [ "mnt-data.mount" ]; + after = lib.mkAfter [ "mnt-data.mount" "podman-homey-network.service" ]; + requires = lib.mkAfter [ "mnt-data.mount" "podman-homey-network.service" ]; }; # -----------------------------------------------------------------------