Monitoring primarily

This commit is contained in:
Aner Zakobar
2026-05-10 11:30:43 +03:00
parent 0e54760e34
commit af744e819c
20 changed files with 1269 additions and 43 deletions
+23 -36
View File
@@ -82,13 +82,25 @@ in
# Pre-backup hook: pg_dump + nextcloud maintenance mode
# -----------------------------------------------------------------------
systemd.services."homey-backup-pre" = {
description = "Pre-backup hooks (pg_dump, NC maintenance mode)";
description = "Pre-backup hooks (pg_dump, NC maintenance mode, secrets env)";
serviceConfig = {
Type = "oneshot";
ExecStart = pkgs.writeShellScript "backup-pre" ''
set -euo pipefail
podman="${pkgs.podman}/bin/podman"
# Write S3 credentials env file now, before restic-backups-homey.service
# starts systemd loads EnvironmentFile= before ExecStartPre runs, so
# the file must already exist when the restic unit activates.
install -m 0600 /dev/null /run/restic-homey-secrets.env
{
printf 'AWS_ACCESS_KEY_ID=%s\n' \
"$(cat ${config.sops.secrets."restic/s3_access_key_id".path})"
printf 'AWS_SECRET_ACCESS_KEY=%s\n' \
"$(cat ${config.sops.secrets."restic/s3_secret_access_key".path})"
printf 'RESTIC_CACHE_DIR=%s\n' "${dataDir}/restic-cache"
} >> /run/restic-homey-secrets.env
# Put Nextcloud into maintenance mode (if running)
if systemctl is-active --quiet podman-nextcloud.service; then
$podman exec nextcloud php occ maintenance:mode --on || true
@@ -105,19 +117,6 @@ in
};
};
systemd.services."homey-backup-post" = {
description = "Post-backup hooks (take NC out of maintenance mode)";
serviceConfig = {
Type = "oneshot";
ExecStart = pkgs.writeShellScript "backup-post" ''
set -euo pipefail
if systemctl is-active --quiet podman-nextcloud.service; then
${pkgs.podman}/bin/podman exec nextcloud php occ maintenance:mode --off || true
fi
'';
};
};
# -----------------------------------------------------------------------
# Restic backup service
# -----------------------------------------------------------------------
@@ -125,7 +124,7 @@ in
repository = cfg.repository;
passwordFile = config.sops.secrets."restic/password".path;
# Runtime env file written by ExecStartPre (see systemd override below)
# Runtime env file written by homey-backup-pre.service (which runs first)
environmentFile = "/run/restic-homey-secrets.env";
paths = [
@@ -137,6 +136,9 @@ in
"${dataDir}/jellyfin"
"${dataDir}/transmission"
# Deliberately excluded: media/* (large, can be re-downloaded)
# Monitoring — uptime-kuma has monitors/history, ntfy has user accounts
"${dataDir}/uptime-kuma"
"${dataDir}/ntfy"
];
# Exclude Nextcloud's raw DB directory in favour of the pg_dump file
@@ -157,36 +159,21 @@ in
];
};
# Wire the pre/post hooks around the restic job and inject secrets
# Wire the pre/post hooks around the restic job
systemd.services."restic-backups-homey" = {
requires = [ "homey-backup-pre.service" ];
after = [ "homey-backup-pre.service" ];
serviceConfig = {
# Write runtime env file with actual secret values (restic needs the
# raw values; it does not support _FILE suffix env vars).
ExecStartPre = [
(pkgs.writeShellScript "restic-inject-secrets" ''
install -m 0600 /dev/null /run/restic-homey-secrets.env
{
printf 'AWS_ACCESS_KEY_ID=%s\n' \
"$(cat ${config.sops.secrets."restic/s3_access_key_id".path})"
printf 'AWS_SECRET_ACCESS_KEY=%s\n' \
"$(cat ${config.sops.secrets."restic/s3_secret_access_key".path})"
printf 'RESTIC_CACHE_DIR=%s\n' "${dataDir}/restic-cache"
} >> /run/restic-homey-secrets.env
'')
];
ExecStopPost = [
(pkgs.writeShellScript "restic-cleanup-secrets" ''
(pkgs.writeShellScript "restic-post-hooks" ''
# Always runs on stop, success or failure
rm -f /run/restic-homey-secrets.env
if systemctl is-active --quiet podman-nextcloud.service; then
${pkgs.podman}/bin/podman exec nextcloud php occ maintenance:mode --off || true
fi
'')
];
};
};
systemd.services."homey-backup-post" = {
after = [ "restic-backups-homey.service" ];
wantedBy = [ "restic-backups-homey.service" ];
};
};
}
+52
View File
@@ -207,6 +207,58 @@ in
'';
};
# ------------------------------------------------------------------
# Uptime Kuma — two_factor, admins only (enforced by authelia policy)
# ------------------------------------------------------------------
"uptime.${domain}" = {
extraConfig = ''
${autheliaForwardAuth}
reverse_proxy localhost:3001
'';
};
"http://uptime.${domain}" = {
extraConfig = ''
${autheliaForwardAuth}
${cfProxy 3001}
'';
};
# ------------------------------------------------------------------
# Ntfy — no forward_auth; ntfy has its own token/password auth so the
# mobile app can connect without Authelia SSO complications.
# ------------------------------------------------------------------
"ntfy.${domain}" = {
extraConfig = ''
reverse_proxy localhost:2586
'';
};
"http://ntfy.${domain}" = {
extraConfig = cfProxy 2586;
};
# ------------------------------------------------------------------
# Grafana — two_factor, admins only (enforced by authelia policy).
# After Authelia verifies the user, Caddy maps the Remote-User header
# to X-WEBAUTH-USER so Grafana's proxy auth auto-signs the user in.
# ------------------------------------------------------------------
"grafana.${domain}" = {
extraConfig = ''
${autheliaForwardAuth}
reverse_proxy localhost:3002 {
header_up X-WEBAUTH-USER {http.request.header.Remote-User}
}
'';
};
"http://grafana.${domain}" = {
extraConfig = ''
${autheliaForwardAuth}
reverse_proxy localhost:3002 {
header_up X-Forwarded-Proto https
header_up X-WEBAUTH-USER {http.request.header.Remote-User}
}
'';
};
};
};
+3
View File
@@ -20,6 +20,9 @@
# ldapadmin.zakobar.com → https://localhost:443
# jellyfin.zakobar.com → https://localhost:443
# torrent.zakobar.com → https://localhost:443
# uptime.zakobar.com → https://localhost:443
# ntfy.zakobar.com → https://localhost:443
# grafana.zakobar.com → https://localhost:443
# Set "No TLS Verify" = true (Caddy's cert is from Let's Encrypt but
# the hostname seen by cloudflared is localhost, so hostname verification
# would fail without this flag).
+5
View File
@@ -21,6 +21,11 @@
"cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY="
"nix-community.cachix.org-1:mB9FkXj6Q3Q4ohOcbM4FJ9Z1X2kCrVK4vZOqsDqqNqk="
];
# Trigger GC automatically when free space drops below 2 GB;
# stop once 5 GB is free. Prevents CI builds from filling the disk
# between weekly GC runs.
min-free = 2147483648; # 2 GiB
max-free = 5368709120; # 5 GiB
};
gc = {
automatic = true;
+217
View File
@@ -0,0 +1,217 @@
{ config, lib, pkgs, homeyConfig, ... }:
# Prometheus + Grafana — metrics collection and dashboarding.
#
# Uses native NixOS services (not containers) for tight integration with
# the host OS and declarative dashboard/datasource provisioning.
#
# Architecture:
# node_exporter → Prometheus ← systemd_exporter
# ↓
# Grafana (pre-provisioned dashboard: Node Exporter Full)
#
# Auth (Grafana):
# Authelia enforces two_factor + admins-only before any request reaches
# Grafana. Caddy then maps the Authelia Remote-User header to
# X-WEBAUTH-USER, and Grafana's proxy auth auto-signs the user in —
# no second login required.
#
# Prometheus is internal-only (127.0.0.1:9090); only Grafana reads it.
# Grafana is exposed at 127.0.0.1:3002 and reverse-proxied by Caddy.
#
# Data dirs:
# Prometheus: /var/lib/prometheus2 (system drive — metrics are ephemeral)
# Grafana: /var/lib/grafana (system drive — dashboards provisioned by Nix)
#
# Secrets consumed from sops:
# grafana/secret_key (session signing key)
# openldap/ro_password (for Grafana → LDAP auth, shared with other modules)
let
cfg = config.homey.monitoring;
domain = homeyConfig.domain;
# LDAP base DN derived from domain (e.g. zakobar.com → dc=zakobar,dc=com)
ldapBaseDN = lib.concatStringsSep ","
(map (p: "dc=${p}") (lib.splitString "." domain));
in
{
options.homey.monitoring = {
enable = lib.mkEnableOption "Prometheus + Grafana monitoring stack";
prometheusPort = lib.mkOption {
type = lib.types.port;
default = 9090;
description = "Prometheus listen port (localhost only).";
};
grafanaPort = lib.mkOption {
type = lib.types.port;
default = 3002;
description = "Grafana listen port (localhost only, reverse-proxied by Caddy).";
};
};
config = lib.mkIf cfg.enable {
# -----------------------------------------------------------------------
# Secrets
# -----------------------------------------------------------------------
sops.secrets."grafana/secret_key" = { owner = "grafana"; };
sops.secrets."openldap/ro_password" = { owner = "root"; };
# -----------------------------------------------------------------------
# Prometheus
# -----------------------------------------------------------------------
services.prometheus = {
enable = true;
listenAddress = "127.0.0.1";
port = cfg.prometheusPort;
globalConfig = {
scrape_interval = "30s";
evaluation_interval = "30s";
};
# Scrape node and systemd metrics from local exporters
scrapeConfigs = [
{
job_name = "node";
static_configs = [{
targets = [ "127.0.0.1:${toString config.services.prometheus.exporters.node.port}" ];
}];
}
{
job_name = "systemd";
static_configs = [{
targets = [ "127.0.0.1:${toString config.services.prometheus.exporters.systemd.port}" ];
}];
}
];
exporters = {
node = {
enable = true;
port = 9100;
# Enable extra collectors beyond the defaults
enabledCollectors = [
"cpu"
"diskstats"
"filesystem"
"loadavg"
"meminfo"
"netdev"
"stat"
"time"
"uname"
"pressure" # CPU/memory/IO pressure stall info (Linux PSI)
"hwmon" # temperature sensors (RPi4 has a CPU temp sensor)
];
};
systemd = {
enable = true;
port = 9558;
};
};
};
# -----------------------------------------------------------------------
# Grafana
# -----------------------------------------------------------------------
services.grafana = {
enable = true;
settings = {
server = {
http_addr = "127.0.0.1";
http_port = cfg.grafanaPort;
domain = "grafana.${domain}";
root_url = "https://grafana.${domain}";
};
# Session signing key — read from sops at runtime via Grafana's
# $__file{} interpolation syntax.
security = {
secret_key = "$__file{${config.sops.secrets."grafana/secret_key".path}}";
# Disable Grafana's own login form — Authelia is the auth gate,
# and proxy auth auto-signs users in via the X-WEBAUTH-USER header.
disable_initial_admin_creation = false;
};
# Proxy auth: trust the X-WEBAUTH-USER header set by Caddy after
# Authelia verifies the user's identity and TOTP.
"auth.proxy" = {
enabled = true;
header_name = "X-WEBAUTH-USER";
header_property = "username";
auto_sign_up = true;
# All users that reach Grafana are already confirmed admins
# (Authelia enforces the admins group + two_factor policy).
headers = "";
};
# Disable Grafana's own login UI — all auth goes via Authelia.
# Set to false to keep a fallback login form (useful for recovery).
"auth" = {
disable_login_form = true;
};
# Assign all proxy-auth users the Admin role automatically.
# Safe because Authelia already restricts access to the admins group.
users = {
auto_assign_org_role = "Admin";
};
analytics.reporting_enabled = false;
};
# -----------------------------------------------------------------------
# Provision Prometheus as a datasource
# -----------------------------------------------------------------------
provision = {
enable = true;
datasources.settings.datasources = [{
name = "Prometheus";
type = "prometheus";
url = "http://127.0.0.1:${toString cfg.prometheusPort}";
isDefault = true;
access = "proxy";
}];
# Pre-load the Node Exporter Full community dashboard (ID 1860).
# The JSON is downloaded via Nix so it's available at build time.
dashboards.settings.providers = [{
name = "default";
options.path = "/etc/grafana/dashboards";
}];
};
};
# -----------------------------------------------------------------------
# Download the Node Exporter Full dashboard JSON at build time.
#
# If the hash is wrong, `nix build` will print the correct one.
# Run: nix store prefetch-file --hash-type sha256 \
# https://grafana.com/api/dashboards/1860/revisions/37/download
# and replace the hash below.
# -----------------------------------------------------------------------
environment.etc."grafana/dashboards/node-exporter-full.json" = {
source = pkgs.fetchurl {
url = "https://grafana.com/api/dashboards/1860/revisions/37/download";
hash = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=";
};
mode = "0444";
};
# -----------------------------------------------------------------------
# Uptime Kuma monitor for Grafana
# -----------------------------------------------------------------------
homey.monitoring.monitors = [{
name = "Grafana";
url = "https://grafana.${domain}";
interval = 60;
}];
};
}
+30
View File
@@ -107,6 +107,27 @@ let
- domain:
- "jellyfin.${domain}"
policy: "one_factor"
- domain:
- "uptime.${domain}"
subject:
- "group:admins"
policy: "two_factor"
- domain:
- "uptime.${domain}"
policy: "deny"
- domain:
- "grafana.${domain}"
subject:
- "group:admins"
policy: "two_factor"
- domain:
- "grafana.${domain}"
policy: "deny"
# ntfy: bypass ntfy enforces its own token/password auth;
# the mobile app must be able to connect without Authelia SSO.
- domain:
- "ntfy.${domain}"
policy: "bypass"
notifier:
filesystem:
@@ -196,5 +217,14 @@ in
after = lib.mkAfter [ "mnt-data.mount" "podman-openldap.service" "podman-homey-network.service" ];
requires = lib.mkAfter [ "mnt-data.mount" "podman-openldap.service" "podman-homey-network.service" ];
};
# -----------------------------------------------------------------------
# Uptime Kuma monitor for this service
# -----------------------------------------------------------------------
homey.monitoring.monitors = [{
name = "Authelia";
url = "https://auth.${domain}/api/health";
interval = 60;
}];
};
}
+68
View File
@@ -0,0 +1,68 @@
{ config, lib, pkgs, homeyConfig, ... }:
# Gitea Actions Runner — executes CI/CD jobs triggered by Gitea Actions.
#
# Uses the NixOS native services.gitea-actions-runner module (act runner).
# Jobs run directly on the host ("host" executor) — no container isolation.
# This is appropriate for a trusted home server and avoids the overhead of
# nested containers on a Pi 4.
#
# The service uses DynamicUser=true so there is no persistent system user.
# nix/git/bash are available in jobs via the system PATH inherited from the
# service environment.
#
# Setup (one-time):
# 1. In Gitea: Site Administration → Actions → Runners → Create Runner Token
# 2. Store the token in sops with KEY=VALUE format:
# gitea/runner_token: "TOKEN=<your-token-here>"
# 3. Enable homey.giteaRunner in the host config and deploy.
#
# After first start the runner registers itself and stores credentials in
# /var/lib/gitea-runner/<name>/.runner — the token file is only needed for
# (re-)registration.
#
# Secrets consumed from sops:
# gitea/runner_token (must contain: TOKEN=<value>)
let
cfg = config.homey.giteaRunner;
domain = homeyConfig.domain;
in
{
options.homey.giteaRunner = {
enable = lib.mkEnableOption "Gitea Actions runner";
name = lib.mkOption {
type = lib.types.str;
default = config.networking.hostName;
description = "Runner name as shown in Gitea's runner list.";
};
labels = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [ "native:host" "ubuntu-latest:host" "debian-latest:host" "nix:host" ];
description = ''
Labels advertised to Gitea. The "host" executor runs jobs directly on
this machine. Workflow files targeting any of these labels will be
picked up by this runner.
'';
};
};
config = lib.mkIf cfg.enable {
# The NixOS module reads tokenFile as a systemd EnvironmentFile (root reads
# it before DynamicUser privilege drop), so owner=root / mode=0400 is correct.
# The file must contain: TOKEN=<registration-token>
sops.secrets."gitea/runner_token" = { owner = "root"; mode = "0400"; };
services.gitea-actions-runner.instances.${cfg.name} = {
enable = true;
url = "https://git.${domain}";
tokenFile = config.sops.secrets."gitea/runner_token".path;
name = cfg.name;
labels = cfg.labels;
# nix/git/bash are accessible via the system PATH (/run/current-system/sw/bin/)
# without any extra configuration — the runner inherits it as a system user.
};
};
}
+12
View File
@@ -143,6 +143,9 @@ in
# [oauth2]
GITEA__oauth2__ENABLED = "false";
# [actions]
GITEA__actions__ENABLED = "true";
};
# Secret env vars written at runtime by ExecStartPre — never in store.
@@ -185,6 +188,15 @@ in
requires = lib.mkAfter [ "mnt-data.mount" "podman-homey-network.service" ];
};
# -----------------------------------------------------------------------
# Uptime Kuma monitor for this service
# -----------------------------------------------------------------------
homey.monitoring.monitors = [{
name = "Gitea";
url = "https://git.${domain}";
interval = 60;
}];
# -----------------------------------------------------------------------
# Ensure the Gitea admin user exists with the correct password after start.
# Runs as a oneshot after podman-gitea; idempotent (create or update).
+9
View File
@@ -166,6 +166,15 @@ in
];
};
# -----------------------------------------------------------------------
# Uptime Kuma monitor for this service
# -----------------------------------------------------------------------
homey.monitoring.monitors = [{
name = "Nextcloud";
url = "https://nextcloud.${domain}/status.php";
interval = 60;
}];
systemd.services."podman-nextcloud" = {
serviceConfig = {
LoadCredential = [
+136
View File
@@ -0,0 +1,136 @@
{ config, lib, pkgs, homeyConfig, ... }:
# Ntfy — self-hosted push notification server.
#
# Mobile app (Android/iOS) connects to https://ntfy.zakobar.com with a token
# and subscribes to the "alerts" topic. Uptime Kuma and Grafana send alerts
# to that topic when services go down.
#
# Auth model:
# - Web UI: public-facing but ntfy enforces its own auth (deny-all by default)
# - Caddy does NOT put forward_auth here; ntfy has native token/password auth
# so the mobile app can connect without Authelia SSO complications.
#
# Setup after first deploy:
# 1. Visit https://ntfy.zakobar.com — log in with the admin password from sops.
# 2. Create an access token for your phone (Admin → Users & Tokens).
# 3. In the Ntfy app: server = https://ntfy.zakobar.com, token = <your-token>.
# 4. Subscribe to the "alerts" topic.
#
# Volume layout:
# <dataDir>/ntfy/auth.db ← user/token database
# <dataDir>/ntfy/cache.db ← message cache (for missed messages)
# <dataDir>/ntfy/attachments/ ← file attachments
#
# Secrets consumed from sops:
# ntfy/admin_password
let
cfg = config.homey.ntfy;
dataDir = config.homey.storage.mountPoint;
domain = homeyConfig.domain;
in
{
options.homey.ntfy = {
enable = lib.mkEnableOption "Ntfy push notification server";
port = lib.mkOption {
type = lib.types.port;
default = 2586;
description = "Host port ntfy listens on (bound to 127.0.0.1).";
};
};
config = lib.mkIf cfg.enable {
# -----------------------------------------------------------------------
# Secrets
# -----------------------------------------------------------------------
sops.secrets."ntfy/admin_password" = { owner = "root"; };
# -----------------------------------------------------------------------
# ntfy-sh native NixOS service
# -----------------------------------------------------------------------
services.ntfy-sh = {
enable = true;
settings = {
# Bind to localhost; Caddy reverse-proxies it
listen-http = "127.0.0.1:${toString cfg.port}";
base-url = "https://ntfy.${domain}";
# Require auth on all topics — deny unauthenticated access entirely
auth-default-access = "deny-all";
# Persistent state on external HD
auth-file = "${dataDir}/ntfy/auth.db";
cache-file = "${dataDir}/ntfy/cache.db";
attachment-root = "${dataDir}/ntfy/attachments";
# Keep messages for 12 hours so the app catches up if offline
cache-duration = "12h";
# Attachment limits
attachment-total-size-limit = "5G";
attachment-file-size-limit = "15M";
attachment-expiry-duration = "3h";
};
};
# -----------------------------------------------------------------------
# Create the admin user on first start (idempotent)
# -----------------------------------------------------------------------
systemd.services.ntfy-sh-setup = {
description = "Create Ntfy admin user";
wantedBy = [ "multi-user.target" ];
after = [ "ntfy-sh.service" "mnt-data.mount" ];
requires = [ "ntfy-sh.service" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
LoadCredential = "ntfy_admin_password:${config.sops.secrets."ntfy/admin_password".path}";
ExecStart = pkgs.writeShellScript "ntfy-create-admin" ''
set -euo pipefail
# Wait until ntfy HTTP endpoint is ready (max 60 s)
for i in $(seq 1 30); do
if ${pkgs.curl}/bin/curl -sf http://127.0.0.1:${toString cfg.port}/v1/health > /dev/null 2>&1; then
break
fi
sleep 2
done
PASS=$(cat "$CREDENTIALS_DIRECTORY/ntfy_admin_password")
# ntfy user commands need the config file to find the auth database.
# The NixOS ntfy-sh module writes config to /etc/ntfy-sh/server.yml.
NTFY="${pkgs.ntfy-sh}/bin/ntfy user --config /etc/ntfy-sh/server.yml"
# ntfy user list exits non-zero if the user DB is empty/doesn't exist;
# grep exits non-zero if the pattern is missing. Either means no admin.
if $NTFY list 2>/dev/null | grep -qE "^admin\b"; then
echo "ntfy-sh-setup: admin user already exists"
else
echo "$PASS" | $NTFY add --role=admin admin
echo "ntfy-sh-setup: admin user created"
fi
'';
};
};
# Ensure ntfy-sh starts after the external HD is mounted
systemd.services.ntfy-sh = {
after = lib.mkAfter [ "mnt-data.mount" ];
requires = lib.mkAfter [ "mnt-data.mount" ];
};
# -----------------------------------------------------------------------
# Uptime Kuma monitor for this service
# -----------------------------------------------------------------------
homey.monitoring.monitors = [{
name = "Ntfy";
url = "https://ntfy.${domain}/v1/health";
interval = 60;
}];
};
}
+9
View File
@@ -49,5 +49,14 @@ in
after = lib.mkAfter [ "podman-openldap.service" "podman-homey-network.service" ];
wants = lib.mkAfter [ "podman-openldap.service" "podman-homey-network.service" ];
};
# -----------------------------------------------------------------------
# Uptime Kuma monitor for this service
# -----------------------------------------------------------------------
homey.monitoring.monitors = [{
name = "phpLDAPadmin";
url = "http://localhost:${toString cfg.port}";
interval = 60;
}];
};
}
+259
View File
@@ -0,0 +1,259 @@
{ config, lib, pkgs, homeyConfig, ... }:
# Uptime Kuma — endpoint uptime monitoring with a status-page UI.
#
# This module does two things:
#
# 1. Declares the shared homey.monitoring.monitors option that any service
# module can contribute to. Adding your service's URL there means it
# automatically appears in Uptime Kuma — no manual UI work needed.
#
# 2. Runs Uptime Kuma as an OCI container and syncs the monitor list via
# the Socket.IO API on startup using the uptime-kuma-api Python library.
#
# Example (in nextcloud.nix):
# homey.monitoring.monitors = [{
# name = "Nextcloud";
# url = "https://nextcloud.zakobar.com/status.php";
# interval = 60;
# }];
#
# Auth: Authelia two_factor, admins-only (enforced in authelia.nix + caddy.nix).
#
# Volume layout:
# <dataDir>/uptime-kuma/ → /app/data (SQLite DB, config)
#
# Secrets consumed from sops:
# uptime-kuma/admin_password
let
cfg = config.homey.uptimeKuma;
dataDir = config.homey.storage.mountPoint;
domain = homeyConfig.domain;
# Serialise the NixOS monitor list to JSON at build time.
# The setup script reads this at runtime to know what to create.
monitorsJson = pkgs.writeText "uptime-kuma-monitors.json"
(builtins.toJSON config.homey.monitoring.monitors);
# Python environment for the monitor-sync script
pythonEnv = pkgs.python3.withPackages (ps: [ ps."uptime-kuma-api" ]);
# Monitor-sync script: idempotent, hash-gated, uses Socket.IO API
syncScript = pkgs.writeText "uptime-kuma-sync.py" ''
#!/usr/bin/env python3
"""
Sync monitors declared in /etc/uptime-kuma/monitors.json into Uptime Kuma.
Runs as a oneshot systemd service after podman-uptime-kuma.service.
Tracks a hash of the monitor list so it only re-syncs when the NixOS
config changes.
"""
import hashlib
import json
import os
import sys
import time
import urllib.request
MONITORS_PATH = "/etc/uptime-kuma/monitors.json"
HASH_PATH = "/var/lib/uptime-kuma-setup/last-hash"
KUMA_URL = "http://localhost:3001"
CREDS_DIR = os.environ.get("CREDENTIALS_DIRECTORY", "")
def wait_for_kuma(timeout=120):
deadline = time.time() + timeout
while time.time() < deadline:
try:
with urllib.request.urlopen(KUMA_URL + "/", timeout=5) as r:
if r.status < 500:
return True
except Exception:
pass
time.sleep(3)
return False
def main():
with open(MONITORS_PATH) as f:
monitors = json.load(f)
config_hash = hashlib.sha256(
json.dumps(monitors, sort_keys=True).encode()
).hexdigest()
# Skip sync if config hasn't changed
try:
with open(HASH_PATH) as f:
if f.read().strip() == config_hash:
print("uptime-kuma-sync: config unchanged, skipping")
return
except FileNotFoundError:
pass
password_file = os.path.join(CREDS_DIR, "uptime_kuma_password")
with open(password_file) as f:
password = f.read().strip()
print("uptime-kuma-sync: waiting for Uptime Kuma to be ready...")
if not wait_for_kuma():
print("uptime-kuma-sync: timed out waiting for Uptime Kuma", file=sys.stderr)
sys.exit(1)
from uptime_kuma_api import UptimeKumaApi, MonitorType
api = UptimeKumaApi(KUMA_URL)
# Initial setup (creates admin user on first run; no-op if already done)
try:
info = api.info()
if not info.get("isSetup", True):
api.setup("admin", password)
print("uptime-kuma-sync: initial admin user created")
except Exception as e:
print(f"uptime-kuma-sync: setup check: {e}", file=sys.stderr)
# Login
result = api.login("admin", password)
if not result.get("ok"):
print(f"uptime-kuma-sync: login failed: {result}", file=sys.stderr)
api.disconnect()
sys.exit(1)
# Sync monitors (add missing; skip existing by name)
try:
existing_names = {m["name"] for m in api.get_monitors()}
for m in monitors:
if m["name"] in existing_names:
print(f"uptime-kuma-sync: monitor exists, skipping: {m['name']}")
continue
api.add_monitor(
type=MonitorType.HTTP,
name=m["name"],
url=m["url"],
interval=m.get("interval", 60),
)
print(f"uptime-kuma-sync: created monitor: {m['name']}")
finally:
api.disconnect()
# Persist hash so we don't re-sync on every boot
os.makedirs(os.path.dirname(HASH_PATH), exist_ok=True)
with open(HASH_PATH, "w") as f:
f.write(config_hash)
print("uptime-kuma-sync: done")
if __name__ == "__main__":
main()
'';
in
{
# ---------------------------------------------------------------------------
# Shared monitor-list option — declared unconditionally so any service module
# can contribute monitors even when Uptime Kuma itself is disabled.
# ---------------------------------------------------------------------------
options.homey.monitoring.monitors = lib.mkOption {
type = lib.types.listOf (lib.types.submodule {
options = {
name = lib.mkOption {
type = lib.types.str;
description = "Display name shown in Uptime Kuma.";
};
url = lib.mkOption {
type = lib.types.str;
description = "URL to check (HTTP/HTTPS).";
};
interval = lib.mkOption {
type = lib.types.int;
default = 60;
description = "Check interval in seconds.";
};
};
});
default = [];
description = ''
List of HTTP endpoints to monitor in Uptime Kuma.
Each service module contributes its own entries here.
'';
};
options.homey.uptimeKuma = {
enable = lib.mkEnableOption "Uptime Kuma uptime monitoring";
image = lib.mkOption {
type = lib.types.str;
default = "docker.io/louislam/uptime-kuma:1";
};
port = lib.mkOption {
type = lib.types.port;
default = 3001;
description = "Host port Uptime Kuma listens on (bound to 127.0.0.1).";
};
};
config = lib.mkIf cfg.enable {
# -----------------------------------------------------------------------
# Secrets
# -----------------------------------------------------------------------
sops.secrets."uptime-kuma/admin_password" = { owner = "root"; };
# -----------------------------------------------------------------------
# Write monitor list to /etc at build time
# -----------------------------------------------------------------------
environment.etc."uptime-kuma/monitors.json" = {
source = monitorsJson;
mode = "0444";
};
# -----------------------------------------------------------------------
# Uptime Kuma container
# -----------------------------------------------------------------------
virtualisation.oci-containers.containers.uptime-kuma = {
image = cfg.image;
ports = [ "127.0.0.1:${toString cfg.port}:3001" ];
volumes = [
"${dataDir}/uptime-kuma:/app/data"
];
# uptime-kuma image expects /app/data to be writable; no extra network
# needed since we reach it from the host on localhost.
};
systemd.services."podman-uptime-kuma" = {
after = lib.mkAfter [ "mnt-data.mount" ];
requires = lib.mkAfter [ "mnt-data.mount" ];
};
# -----------------------------------------------------------------------
# Monitor-sync service: runs after Uptime Kuma is up, syncs monitors
# -----------------------------------------------------------------------
systemd.services."uptime-kuma-sync" = {
description = "Sync Uptime Kuma monitors from NixOS config";
wantedBy = [ "multi-user.target" ];
after = [ "podman-uptime-kuma.service" ];
requires = [ "podman-uptime-kuma.service" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
LoadCredential = "uptime_kuma_password:${config.sops.secrets."uptime-kuma/admin_password".path}";
ExecStart = pkgs.writeShellScript "uptime-kuma-sync-runner" ''
set -euo pipefail
exec ${pythonEnv}/bin/python3 ${syncScript}
'';
};
};
# -----------------------------------------------------------------------
# Uptime Kuma self-monitor
# -----------------------------------------------------------------------
homey.monitoring.monitors = [{
name = "Uptime Kuma";
url = "https://uptime.${domain}";
interval = 60;
}];
};
}
+8
View File
@@ -29,6 +29,11 @@
# complete/
# transmission/
# config/
# uptime-kuma/ ← /app/data in uptime-kuma container (SQLite DB, config)
# ntfy/
# auth.db ← user/token auth database
# cache.db ← message cache
# attachments/ ← file attachments
# restic-cache/ ← restic local cache (not the backup destination)
let
@@ -102,6 +107,9 @@ in
"d ${cfg.mountPoint}/media/complete 0755 root root -"
"d ${cfg.mountPoint}/transmission 0750 root root -"
"d ${cfg.mountPoint}/transmission/config 0750 root root -"
"d ${cfg.mountPoint}/uptime-kuma 0750 root root -"
"d ${cfg.mountPoint}/ntfy 0750 ntfy-sh ntfy-sh -"
"d ${cfg.mountPoint}/ntfy/attachments 0750 ntfy-sh ntfy-sh -"
"d ${cfg.mountPoint}/restic-cache 0700 root root -"
];
};