Better montiring, bug fixes.

This commit is contained in:
Aner Zakobar
2026-05-10 13:44:27 +03:00
parent af744e819c
commit 09052e8aec
7 changed files with 174 additions and 46 deletions
+73 -28
View File
@@ -36,8 +36,16 @@ let
monitorsJson = pkgs.writeText "uptime-kuma-monitors.json"
(builtins.toJSON config.homey.monitoring.monitors);
# Python environment for the monitor-sync script
pythonEnv = pkgs.python3.withPackages (ps: [ ps."uptime-kuma-api" ]);
# Python environment for the monitor-sync script.
# uptime-kuma-api's transitive deps (requests, socketio, websocket-client)
# are listed explicitly because withPackages doesn't always pull propagated
# deps transitively in all nixpkgs versions.
pythonEnv = pkgs.python3.withPackages (ps: [
ps."uptime-kuma-api"
ps.requests
ps."python-socketio"
ps."websocket-client"
]);
# Monitor-sync script: idempotent, hash-gated, uses Socket.IO API
syncScript = pkgs.writeText "uptime-kuma-sync.py" ''
@@ -48,6 +56,9 @@ let
Runs as a oneshot systemd service after podman-uptime-kuma.service.
Tracks a hash of the monitor list so it only re-syncs when the NixOS
config changes.
Uptime Kuma v1 has no REST API everything is Socket.IO. Initial admin
creation uses api.setup() which raises if already done (we ignore that).
"""
import hashlib
import json
@@ -62,10 +73,11 @@ let
CREDS_DIR = os.environ.get("CREDENTIALS_DIRECTORY", "")
def wait_for_kuma(timeout=120):
"""Wait until Uptime Kuma HTTP responds (any non-5xx just checks it's up)."""
deadline = time.time() + timeout
while time.time() < deadline:
try:
with urllib.request.urlopen(KUMA_URL + "/", timeout=5) as r:
with urllib.request.urlopen(KUMA_URL, timeout=5) as r:
if r.status < 500:
return True
except Exception:
@@ -103,36 +115,58 @@ let
api = UptimeKumaApi(KUMA_URL)
# Initial setup (creates admin user on first run; no-op if already done)
# Initial admin setup via Socket.IO idempotent (raises if already done, ignore it)
try:
info = api.info()
if not info.get("isSetup", True):
api.setup("admin", password)
print("uptime-kuma-sync: initial admin user created")
api.setup("admin", password)
print("uptime-kuma-sync: initial admin user created")
except Exception as e:
print(f"uptime-kuma-sync: setup check: {e}", file=sys.stderr)
print(f"uptime-kuma-sync: setup skipped (already configured): {e}")
# Login
result = api.login("admin", password)
if not result.get("ok"):
print(f"uptime-kuma-sync: login failed: {result}", file=sys.stderr)
try:
api.login("admin", password)
except Exception as e:
print(f"uptime-kuma-sync: login failed: {e}", file=sys.stderr)
api.disconnect()
sys.exit(1)
# Sync monitors (add missing; skip existing by name)
# Collect all configured notification IDs so every monitor gets them.
notification_ids = [n["id"] for n in api.get_notifications()]
if notification_ids:
print(f"uptime-kuma-sync: attaching notifications: {notification_ids}")
# Sync monitors: add missing, update changed
try:
existing_names = {m["name"] for m in api.get_monitors()}
existing = {m["name"]: m for m in api.get_monitors()}
for m in monitors:
if m["name"] in existing_names:
print(f"uptime-kuma-sync: monitor exists, skipping: {m['name']}")
continue
api.add_monitor(
type=MonitorType.HTTP,
name=m["name"],
url=m["url"],
interval=m.get("interval", 60),
)
print(f"uptime-kuma-sync: created monitor: {m['name']}")
keyword = m.get("keyword")
maxretries = m.get("maxretries", 0)
monitor_type = MonitorType.KEYWORD if keyword else MonitorType.HTTP
extra = {"keyword": keyword} if keyword else {}
if m["name"] not in existing:
api.add_monitor(
type=monitor_type,
name=m["name"],
url=m["url"],
interval=m.get("interval", 60),
maxretries=maxretries,
notification_id_list={str(nid): True for nid in notification_ids},
**extra,
)
print(f"uptime-kuma-sync: created monitor: {m['name']}")
elif (existing[m["name"]].get("url") != m["url"]
or existing[m["name"]].get("keyword") != keyword
or existing[m["name"]].get("maxretries") != maxretries):
api.edit_monitor(
existing[m["name"]]["id"],
type=monitor_type,
url=m["url"],
interval=m.get("interval", 60),
maxretries=maxretries,
notification_id_list={str(nid): True for nid in notification_ids},
**extra,
)
print(f"uptime-kuma-sync: updated monitor: {m['name']}")
finally:
api.disconnect()
@@ -168,6 +202,16 @@ in
default = 60;
description = "Check interval in seconds.";
};
keyword = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "If set, use a keyword monitor that checks for this string in the response body.";
};
maxretries = lib.mkOption {
type = lib.types.int;
default = 0;
description = "Consecutive failures before a DOWN alert is sent. 0 = alert immediately.";
};
};
});
default = [];
@@ -217,13 +261,14 @@ in
"${dataDir}/uptime-kuma:/app/data"
];
# uptime-kuma image expects /app/data to be writable; no extra network
# needed since we reach it from the host on localhost.
# Join the homey network so monitors can reach other containers by name
# (e.g. phpldapadmin:80) without going through the host loopback.
extraOptions = [ "--network=homey" ];
};
systemd.services."podman-uptime-kuma" = {
after = lib.mkAfter [ "mnt-data.mount" ];
requires = lib.mkAfter [ "mnt-data.mount" ];
after = lib.mkAfter [ "mnt-data.mount" "podman-homey-network.service" ];
requires = lib.mkAfter [ "mnt-data.mount" "podman-homey-network.service" ];
};
# -----------------------------------------------------------------------