Better montiring, bug fixes.
This commit is contained in:
+42
@@ -128,6 +128,48 @@ homey-deploy-rpi-main
|
|||||||
NixOS activates the new config on the Pi immediately, with an automatic
|
NixOS activates the new config on the Pi immediately, with an automatic
|
||||||
rollback if activation fails.
|
rollback if activation fails.
|
||||||
|
|
||||||
|
* Post-deploy setup
|
||||||
|
|
||||||
|
Some services require manual one-time configuration after the first deploy.
|
||||||
|
|
||||||
|
** Ntfy — push notifications
|
||||||
|
|
||||||
|
Ntfy's admin user is created automatically from sops on first start. You
|
||||||
|
still need to create a phone token and subscribe to the alerts topic.
|
||||||
|
|
||||||
|
1. Visit =https://ntfy.zakobar.com= and log in with the admin password
|
||||||
|
(=ntfy/admin_password= in =secrets/secrets.yaml=).
|
||||||
|
2. Go to *Account → Access Tokens → Create token* — give it a name (e.g.
|
||||||
|
"phone") and copy the token value.
|
||||||
|
3. In the [[https://ntfy.sh][Ntfy mobile app]]:
|
||||||
|
- *Server*: =https://ntfy.zakobar.com=
|
||||||
|
- *Access token*: the token you just created
|
||||||
|
4. Subscribe to the =alerts= topic in the app.
|
||||||
|
|
||||||
|
** Uptime Kuma — notifications (two-deploy process)
|
||||||
|
|
||||||
|
Uptime Kuma monitors are created automatically by the sync script on first
|
||||||
|
deploy, but notification channels must be configured in the UI before they
|
||||||
|
can be attached to monitors. This requires two deploys:
|
||||||
|
|
||||||
|
*Deploy 1* — services are up, monitors exist, but no notifications assigned yet.
|
||||||
|
|
||||||
|
Then, in the Uptime Kuma UI (=https://uptime.zakobar.com=):
|
||||||
|
|
||||||
|
1. Go to *Settings → Notifications → Add Notification*.
|
||||||
|
2. Choose *ntfy* as the type and fill in:
|
||||||
|
- *Server URL*: =https://ntfy.zakobar.com=
|
||||||
|
- *Topic*: =alerts=
|
||||||
|
- *Token*: use the admin token (or create a dedicated one in ntfy)
|
||||||
|
3. Save — you do *not* need to manually assign it to any monitor.
|
||||||
|
|
||||||
|
*Deploy 2* — run =homey-deploy-rpi-main= again. The sync script will detect
|
||||||
|
the newly configured notification channel and attach it to every monitor
|
||||||
|
automatically.
|
||||||
|
|
||||||
|
Any notifications added to Uptime Kuma in the future will also be picked up
|
||||||
|
on the next deploy.
|
||||||
|
|
||||||
* Backing up
|
* Backing up
|
||||||
|
|
||||||
Backups use [[https://restic.net/][restic]] and run automatically via systemd on a daily schedule.
|
Backups use [[https://restic.net/][restic]] and run automatically via systemd on a daily schedule.
|
||||||
|
|||||||
@@ -200,7 +200,7 @@ in
|
|||||||
environment.etc."grafana/dashboards/node-exporter-full.json" = {
|
environment.etc."grafana/dashboards/node-exporter-full.json" = {
|
||||||
source = pkgs.fetchurl {
|
source = pkgs.fetchurl {
|
||||||
url = "https://grafana.com/api/dashboards/1860/revisions/37/download";
|
url = "https://grafana.com/api/dashboards/1860/revisions/37/download";
|
||||||
hash = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=";
|
hash = "sha256-1DE1aaanRHHeCOMWDGdOS1wBXxOF84UXAjJzT5Ek6mM=";
|
||||||
};
|
};
|
||||||
mode = "0444";
|
mode = "0444";
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -192,6 +192,10 @@ in
|
|||||||
AUTHELIA_SESSION_SECRET_FILE = "/run/secrets/session_secret";
|
AUTHELIA_SESSION_SECRET_FILE = "/run/secrets/session_secret";
|
||||||
AUTHELIA_STORAGE_ENCRYPTION_KEY_FILE = "/run/secrets/storage_encryption_key";
|
AUTHELIA_STORAGE_ENCRYPTION_KEY_FILE = "/run/secrets/storage_encryption_key";
|
||||||
AUTHELIA_AUTHENTICATION_BACKEND_LDAP_PASSWORD_FILE = "/run/secrets/ldap_ro_password";
|
AUTHELIA_AUTHENTICATION_BACKEND_LDAP_PASSWORD_FILE = "/run/secrets/ldap_ro_password";
|
||||||
|
# Changing this forces a container restart when the config changes.
|
||||||
|
# NixOS bind-mounts resolve symlinks at container start, so the running
|
||||||
|
# container would otherwise keep the old nix-store config until restarted.
|
||||||
|
NIXOS_CONFIG_HASH = builtins.hashString "sha256" autheliaConfig;
|
||||||
};
|
};
|
||||||
|
|
||||||
volumes = [
|
volumes = [
|
||||||
|
|||||||
@@ -170,9 +170,13 @@ in
|
|||||||
# Uptime Kuma monitor for this service
|
# Uptime Kuma monitor for this service
|
||||||
# -----------------------------------------------------------------------
|
# -----------------------------------------------------------------------
|
||||||
homey.monitoring.monitors = [{
|
homey.monitoring.monitors = [{
|
||||||
name = "Nextcloud";
|
name = "Nextcloud";
|
||||||
url = "https://nextcloud.${domain}/status.php";
|
url = "https://nextcloud.${domain}/status.php";
|
||||||
interval = 60;
|
interval = 60;
|
||||||
|
keyword = "\"maintenance\":false";
|
||||||
|
# Nightly maintenance is expected — only alert if stuck for 4+ hours.
|
||||||
|
# 240 retries × 60s = 4 hours of consecutive failures before notifying.
|
||||||
|
maxretries = 240;
|
||||||
}];
|
}];
|
||||||
|
|
||||||
systemd.services."podman-nextcloud" = {
|
systemd.services."podman-nextcloud" = {
|
||||||
|
|||||||
+46
-13
@@ -75,6 +75,46 @@ in
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# Minimal config for the `ntfy user` CLI — the NixOS module puts its
|
||||||
|
# generated config in the nix store under an unpredictable path, so we
|
||||||
|
# write a separate file just containing the auth-file path. The server
|
||||||
|
# ignores this file (it uses the module-generated one via -c flag).
|
||||||
|
environment.etc."ntfy-sh/user-cli.yml" = {
|
||||||
|
text = "auth-file: ${dataDir}/ntfy/auth.db\n";
|
||||||
|
mode = "0444";
|
||||||
|
};
|
||||||
|
|
||||||
|
# Create ntfy data directories on the external HD before ntfy starts.
|
||||||
|
# Runs as a separate root service (outside ntfy-sh's restricted namespace)
|
||||||
|
# so it can access /mnt/data without hitting ReadWritePaths restrictions.
|
||||||
|
systemd.services.ntfy-sh-mkdir = {
|
||||||
|
description = "Create Ntfy data directories on external HD";
|
||||||
|
wantedBy = [ "ntfy-sh.service" ];
|
||||||
|
before = [ "ntfy-sh.service" ];
|
||||||
|
after = [ "mnt-data.mount" ];
|
||||||
|
requires = [ "mnt-data.mount" ];
|
||||||
|
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "oneshot";
|
||||||
|
RemainAfterExit = true;
|
||||||
|
ExecStart = pkgs.writeShellScript "ntfy-mkdir" ''
|
||||||
|
set -euo pipefail
|
||||||
|
mkdir -p ${dataDir}/ntfy/attachments
|
||||||
|
chown -R ntfy-sh:ntfy-sh ${dataDir}/ntfy
|
||||||
|
chmod 0750 ${dataDir}/ntfy ${dataDir}/ntfy/attachments
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
# Ensure ntfy-sh starts after the HD is mounted and dirs are ready.
|
||||||
|
# Also widen ReadWritePaths so ntfy-sh can write to the external HD path
|
||||||
|
# (the NixOS module restricts writes to /var/lib/ntfy-sh by default).
|
||||||
|
systemd.services.ntfy-sh = {
|
||||||
|
after = lib.mkAfter [ "mnt-data.mount" "ntfy-sh-mkdir.service" ];
|
||||||
|
requires = lib.mkAfter [ "mnt-data.mount" "ntfy-sh-mkdir.service" ];
|
||||||
|
serviceConfig.ReadWritePaths = lib.mkAfter [ "${dataDir}/ntfy" ];
|
||||||
|
};
|
||||||
|
|
||||||
# -----------------------------------------------------------------------
|
# -----------------------------------------------------------------------
|
||||||
# Create the admin user on first start (idempotent)
|
# Create the admin user on first start (idempotent)
|
||||||
# -----------------------------------------------------------------------
|
# -----------------------------------------------------------------------
|
||||||
@@ -102,28 +142,21 @@ in
|
|||||||
|
|
||||||
PASS=$(cat "$CREDENTIALS_DIRECTORY/ntfy_admin_password")
|
PASS=$(cat "$CREDENTIALS_DIRECTORY/ntfy_admin_password")
|
||||||
|
|
||||||
# ntfy user commands need the config file to find the auth database.
|
# Use the minimal CLI config (just has auth-file path).
|
||||||
# The NixOS ntfy-sh module writes config to /etc/ntfy-sh/server.yml.
|
NTFY="${pkgs.ntfy-sh}/bin/ntfy user --config /etc/ntfy-sh/user-cli.yml"
|
||||||
NTFY="${pkgs.ntfy-sh}/bin/ntfy user --config /etc/ntfy-sh/server.yml"
|
|
||||||
|
|
||||||
# ntfy user list exits non-zero if the user DB is empty/doesn't exist;
|
# ntfy user list outputs a Unicode table; grep for admin in it.
|
||||||
# grep exits non-zero if the pattern is missing. Either means no admin.
|
# ntfy user add reads password + confirmation from stdin (two lines).
|
||||||
if $NTFY list 2>/dev/null | grep -qE "^admin\b"; then
|
if $NTFY list 2>/dev/null | grep -qE "admin"; then
|
||||||
echo "ntfy-sh-setup: admin user already exists"
|
echo "ntfy-sh-setup: admin user already exists"
|
||||||
else
|
else
|
||||||
echo "$PASS" | $NTFY add --role=admin admin
|
printf '%s\n%s\n' "$PASS" "$PASS" | $NTFY add --role=admin admin
|
||||||
echo "ntfy-sh-setup: admin user created"
|
echo "ntfy-sh-setup: admin user created"
|
||||||
fi
|
fi
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
# Ensure ntfy-sh starts after the external HD is mounted
|
|
||||||
systemd.services.ntfy-sh = {
|
|
||||||
after = lib.mkAfter [ "mnt-data.mount" ];
|
|
||||||
requires = lib.mkAfter [ "mnt-data.mount" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# -----------------------------------------------------------------------
|
# -----------------------------------------------------------------------
|
||||||
# Uptime Kuma monitor for this service
|
# Uptime Kuma monitor for this service
|
||||||
# -----------------------------------------------------------------------
|
# -----------------------------------------------------------------------
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ in
|
|||||||
# -----------------------------------------------------------------------
|
# -----------------------------------------------------------------------
|
||||||
homey.monitoring.monitors = [{
|
homey.monitoring.monitors = [{
|
||||||
name = "phpLDAPadmin";
|
name = "phpLDAPadmin";
|
||||||
url = "http://localhost:${toString cfg.port}";
|
url = "http://phpldapadmin:80";
|
||||||
interval = 60;
|
interval = 60;
|
||||||
}];
|
}];
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -36,8 +36,16 @@ let
|
|||||||
monitorsJson = pkgs.writeText "uptime-kuma-monitors.json"
|
monitorsJson = pkgs.writeText "uptime-kuma-monitors.json"
|
||||||
(builtins.toJSON config.homey.monitoring.monitors);
|
(builtins.toJSON config.homey.monitoring.monitors);
|
||||||
|
|
||||||
# Python environment for the monitor-sync script
|
# Python environment for the monitor-sync script.
|
||||||
pythonEnv = pkgs.python3.withPackages (ps: [ ps."uptime-kuma-api" ]);
|
# uptime-kuma-api's transitive deps (requests, socketio, websocket-client)
|
||||||
|
# are listed explicitly because withPackages doesn't always pull propagated
|
||||||
|
# deps transitively in all nixpkgs versions.
|
||||||
|
pythonEnv = pkgs.python3.withPackages (ps: [
|
||||||
|
ps."uptime-kuma-api"
|
||||||
|
ps.requests
|
||||||
|
ps."python-socketio"
|
||||||
|
ps."websocket-client"
|
||||||
|
]);
|
||||||
|
|
||||||
# Monitor-sync script: idempotent, hash-gated, uses Socket.IO API
|
# Monitor-sync script: idempotent, hash-gated, uses Socket.IO API
|
||||||
syncScript = pkgs.writeText "uptime-kuma-sync.py" ''
|
syncScript = pkgs.writeText "uptime-kuma-sync.py" ''
|
||||||
@@ -48,6 +56,9 @@ let
|
|||||||
Runs as a oneshot systemd service after podman-uptime-kuma.service.
|
Runs as a oneshot systemd service after podman-uptime-kuma.service.
|
||||||
Tracks a hash of the monitor list so it only re-syncs when the NixOS
|
Tracks a hash of the monitor list so it only re-syncs when the NixOS
|
||||||
config changes.
|
config changes.
|
||||||
|
|
||||||
|
Uptime Kuma v1 has no REST API — everything is Socket.IO. Initial admin
|
||||||
|
creation uses api.setup() which raises if already done (we ignore that).
|
||||||
"""
|
"""
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
@@ -62,10 +73,11 @@ let
|
|||||||
CREDS_DIR = os.environ.get("CREDENTIALS_DIRECTORY", "")
|
CREDS_DIR = os.environ.get("CREDENTIALS_DIRECTORY", "")
|
||||||
|
|
||||||
def wait_for_kuma(timeout=120):
|
def wait_for_kuma(timeout=120):
|
||||||
|
"""Wait until Uptime Kuma HTTP responds (any non-5xx — just checks it's up)."""
|
||||||
deadline = time.time() + timeout
|
deadline = time.time() + timeout
|
||||||
while time.time() < deadline:
|
while time.time() < deadline:
|
||||||
try:
|
try:
|
||||||
with urllib.request.urlopen(KUMA_URL + "/", timeout=5) as r:
|
with urllib.request.urlopen(KUMA_URL, timeout=5) as r:
|
||||||
if r.status < 500:
|
if r.status < 500:
|
||||||
return True
|
return True
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -103,36 +115,58 @@ let
|
|||||||
|
|
||||||
api = UptimeKumaApi(KUMA_URL)
|
api = UptimeKumaApi(KUMA_URL)
|
||||||
|
|
||||||
# Initial setup (creates admin user on first run; no-op if already done)
|
# Initial admin setup via Socket.IO — idempotent (raises if already done, ignore it)
|
||||||
try:
|
try:
|
||||||
info = api.info()
|
api.setup("admin", password)
|
||||||
if not info.get("isSetup", True):
|
print("uptime-kuma-sync: initial admin user created")
|
||||||
api.setup("admin", password)
|
|
||||||
print("uptime-kuma-sync: initial admin user created")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"uptime-kuma-sync: setup check: {e}", file=sys.stderr)
|
print(f"uptime-kuma-sync: setup skipped (already configured): {e}")
|
||||||
|
|
||||||
# Login
|
# Login
|
||||||
result = api.login("admin", password)
|
try:
|
||||||
if not result.get("ok"):
|
api.login("admin", password)
|
||||||
print(f"uptime-kuma-sync: login failed: {result}", file=sys.stderr)
|
except Exception as e:
|
||||||
|
print(f"uptime-kuma-sync: login failed: {e}", file=sys.stderr)
|
||||||
api.disconnect()
|
api.disconnect()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# Sync monitors (add missing; skip existing by name)
|
# Collect all configured notification IDs so every monitor gets them.
|
||||||
|
notification_ids = [n["id"] for n in api.get_notifications()]
|
||||||
|
if notification_ids:
|
||||||
|
print(f"uptime-kuma-sync: attaching notifications: {notification_ids}")
|
||||||
|
|
||||||
|
# Sync monitors: add missing, update changed
|
||||||
try:
|
try:
|
||||||
existing_names = {m["name"] for m in api.get_monitors()}
|
existing = {m["name"]: m for m in api.get_monitors()}
|
||||||
for m in monitors:
|
for m in monitors:
|
||||||
if m["name"] in existing_names:
|
keyword = m.get("keyword")
|
||||||
print(f"uptime-kuma-sync: monitor exists, skipping: {m['name']}")
|
maxretries = m.get("maxretries", 0)
|
||||||
continue
|
monitor_type = MonitorType.KEYWORD if keyword else MonitorType.HTTP
|
||||||
api.add_monitor(
|
extra = {"keyword": keyword} if keyword else {}
|
||||||
type=MonitorType.HTTP,
|
if m["name"] not in existing:
|
||||||
name=m["name"],
|
api.add_monitor(
|
||||||
url=m["url"],
|
type=monitor_type,
|
||||||
interval=m.get("interval", 60),
|
name=m["name"],
|
||||||
)
|
url=m["url"],
|
||||||
print(f"uptime-kuma-sync: created monitor: {m['name']}")
|
interval=m.get("interval", 60),
|
||||||
|
maxretries=maxretries,
|
||||||
|
notification_id_list={str(nid): True for nid in notification_ids},
|
||||||
|
**extra,
|
||||||
|
)
|
||||||
|
print(f"uptime-kuma-sync: created monitor: {m['name']}")
|
||||||
|
elif (existing[m["name"]].get("url") != m["url"]
|
||||||
|
or existing[m["name"]].get("keyword") != keyword
|
||||||
|
or existing[m["name"]].get("maxretries") != maxretries):
|
||||||
|
api.edit_monitor(
|
||||||
|
existing[m["name"]]["id"],
|
||||||
|
type=monitor_type,
|
||||||
|
url=m["url"],
|
||||||
|
interval=m.get("interval", 60),
|
||||||
|
maxretries=maxretries,
|
||||||
|
notification_id_list={str(nid): True for nid in notification_ids},
|
||||||
|
**extra,
|
||||||
|
)
|
||||||
|
print(f"uptime-kuma-sync: updated monitor: {m['name']}")
|
||||||
finally:
|
finally:
|
||||||
api.disconnect()
|
api.disconnect()
|
||||||
|
|
||||||
@@ -168,6 +202,16 @@ in
|
|||||||
default = 60;
|
default = 60;
|
||||||
description = "Check interval in seconds.";
|
description = "Check interval in seconds.";
|
||||||
};
|
};
|
||||||
|
keyword = lib.mkOption {
|
||||||
|
type = lib.types.nullOr lib.types.str;
|
||||||
|
default = null;
|
||||||
|
description = "If set, use a keyword monitor that checks for this string in the response body.";
|
||||||
|
};
|
||||||
|
maxretries = lib.mkOption {
|
||||||
|
type = lib.types.int;
|
||||||
|
default = 0;
|
||||||
|
description = "Consecutive failures before a DOWN alert is sent. 0 = alert immediately.";
|
||||||
|
};
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
default = [];
|
default = [];
|
||||||
@@ -217,13 +261,14 @@ in
|
|||||||
"${dataDir}/uptime-kuma:/app/data"
|
"${dataDir}/uptime-kuma:/app/data"
|
||||||
];
|
];
|
||||||
|
|
||||||
# uptime-kuma image expects /app/data to be writable; no extra network
|
# Join the homey network so monitors can reach other containers by name
|
||||||
# needed since we reach it from the host on localhost.
|
# (e.g. phpldapadmin:80) without going through the host loopback.
|
||||||
|
extraOptions = [ "--network=homey" ];
|
||||||
};
|
};
|
||||||
|
|
||||||
systemd.services."podman-uptime-kuma" = {
|
systemd.services."podman-uptime-kuma" = {
|
||||||
after = lib.mkAfter [ "mnt-data.mount" ];
|
after = lib.mkAfter [ "mnt-data.mount" "podman-homey-network.service" ];
|
||||||
requires = lib.mkAfter [ "mnt-data.mount" ];
|
requires = lib.mkAfter [ "mnt-data.mount" "podman-homey-network.service" ];
|
||||||
};
|
};
|
||||||
|
|
||||||
# -----------------------------------------------------------------------
|
# -----------------------------------------------------------------------
|
||||||
|
|||||||
Reference in New Issue
Block a user