{ config, lib, pkgs, homeyConfig, ... }: # Prometheus + Grafana — metrics collection and dashboarding. # # Uses native NixOS services (not containers) for tight integration with # the host OS and declarative dashboard/datasource provisioning. # # Architecture: # node_exporter → Prometheus ← systemd_exporter # ↓ # Grafana (pre-provisioned dashboard: Node Exporter Full) # # Auth (Grafana): # Authelia enforces two_factor + admins-only before any request reaches # Grafana. Caddy then maps the Authelia Remote-User header to # X-WEBAUTH-USER, and Grafana's proxy auth auto-signs the user in — # no second login required. # # Prometheus is internal-only (127.0.0.1:9090); only Grafana reads it. # Grafana is exposed at 127.0.0.1:3002 and reverse-proxied by Caddy. # # Data dirs: # Prometheus: /var/lib/prometheus2 (system drive — metrics are ephemeral) # Grafana: /var/lib/grafana (system drive — dashboards provisioned by Nix) # # Secrets consumed from sops: # grafana/secret_key (session signing key) # openldap/ro_password (for Grafana → LDAP auth, shared with other modules) let cfg = config.homey.monitoring; domain = homeyConfig.domain; # LDAP base DN derived from domain (e.g. zakobar.com → dc=zakobar,dc=com) ldapBaseDN = lib.concatStringsSep "," (map (p: "dc=${p}") (lib.splitString "." domain)); in { options.homey.monitoring = { enable = lib.mkEnableOption "Prometheus + Grafana monitoring stack"; prometheusPort = lib.mkOption { type = lib.types.port; default = 9090; description = "Prometheus listen port (localhost only)."; }; grafanaPort = lib.mkOption { type = lib.types.port; default = 3002; description = "Grafana listen port (localhost only, reverse-proxied by Caddy)."; }; }; config = lib.mkIf cfg.enable { # ----------------------------------------------------------------------- # Secrets # ----------------------------------------------------------------------- sops.secrets."grafana/secret_key" = { owner = "grafana"; }; sops.secrets."openldap/ro_password" = { owner = "root"; }; # ----------------------------------------------------------------------- # Prometheus # ----------------------------------------------------------------------- services.prometheus = { enable = true; listenAddress = "127.0.0.1"; port = cfg.prometheusPort; globalConfig = { scrape_interval = "30s"; evaluation_interval = "30s"; }; # Scrape node and systemd metrics from local exporters scrapeConfigs = [ { job_name = "node"; static_configs = [{ targets = [ "127.0.0.1:${toString config.services.prometheus.exporters.node.port}" ]; }]; } { job_name = "systemd"; static_configs = [{ targets = [ "127.0.0.1:${toString config.services.prometheus.exporters.systemd.port}" ]; }]; } ]; exporters = { node = { enable = true; port = 9100; # Enable extra collectors beyond the defaults enabledCollectors = [ "cpu" "diskstats" "filesystem" "loadavg" "meminfo" "netdev" "stat" "time" "uname" "pressure" # CPU/memory/IO pressure stall info (Linux PSI) "hwmon" # temperature sensors (RPi4 has a CPU temp sensor) ]; }; systemd = { enable = true; port = 9558; }; }; }; # ----------------------------------------------------------------------- # Grafana # ----------------------------------------------------------------------- services.grafana = { enable = true; settings = { server = { http_addr = "127.0.0.1"; http_port = cfg.grafanaPort; domain = "grafana.${domain}"; root_url = "https://grafana.${domain}"; }; # Session signing key — read from sops at runtime via Grafana's # $__file{} interpolation syntax. security = { secret_key = "$__file{${config.sops.secrets."grafana/secret_key".path}}"; # Disable Grafana's own login form — Authelia is the auth gate, # and proxy auth auto-signs users in via the X-WEBAUTH-USER header. disable_initial_admin_creation = false; }; # Proxy auth: trust the X-WEBAUTH-USER header set by Caddy after # Authelia verifies the user's identity and TOTP. "auth.proxy" = { enabled = true; header_name = "X-WEBAUTH-USER"; header_property = "username"; auto_sign_up = true; # All users that reach Grafana are already confirmed admins # (Authelia enforces the admins group + two_factor policy). headers = ""; }; # Disable Grafana's own login UI — all auth goes via Authelia. # Set to false to keep a fallback login form (useful for recovery). "auth" = { disable_login_form = true; }; # Assign all proxy-auth users the Admin role automatically. # Safe because Authelia already restricts access to the admins group. users = { auto_assign_org_role = "Admin"; }; analytics.reporting_enabled = false; }; # ----------------------------------------------------------------------- # Provision Prometheus as a datasource # ----------------------------------------------------------------------- provision = { enable = true; datasources.settings.datasources = [{ name = "Prometheus"; type = "prometheus"; url = "http://127.0.0.1:${toString cfg.prometheusPort}"; isDefault = true; access = "proxy"; }]; # Pre-load the Node Exporter Full community dashboard (ID 1860). # The JSON is downloaded via Nix so it's available at build time. dashboards.settings.providers = [{ name = "default"; options.path = "/etc/grafana/dashboards"; }]; }; }; # ----------------------------------------------------------------------- # Download the Node Exporter Full dashboard JSON at build time. # # If the hash is wrong, `nix build` will print the correct one. # Run: nix store prefetch-file --hash-type sha256 \ # https://grafana.com/api/dashboards/1860/revisions/37/download # and replace the hash below. # ----------------------------------------------------------------------- environment.etc."grafana/dashboards/node-exporter-full.json" = { source = pkgs.fetchurl { url = "https://grafana.com/api/dashboards/1860/revisions/37/download"; hash = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="; }; mode = "0444"; }; # ----------------------------------------------------------------------- # Uptime Kuma monitor for Grafana # ----------------------------------------------------------------------- homey.monitoring.monitors = [{ name = "Grafana"; url = "https://grafana.${domain}"; interval = 60; }]; }; }