Files
homey/modules/monitoring.nix
2026-06-07 00:59:22 +03:00

250 lines
8.8 KiB
Nix

{ config, lib, pkgs, homeyConfig, ... }:
# Prometheus + Grafana — metrics collection and dashboarding.
#
# Uses native NixOS services (not containers) for tight integration with
# the host OS and declarative dashboard/datasource provisioning.
#
# Architecture:
# node_exporter → Prometheus ← systemd_exporter
# ↓
# Grafana (pre-provisioned dashboard: Node Exporter Full)
#
# Auth (Grafana):
# Authelia enforces two_factor + admins-only before any request reaches
# Grafana. Caddy then maps the Authelia Remote-User header to
# X-WEBAUTH-USER, and Grafana's proxy auth auto-signs the user in —
# no second login required.
#
# Prometheus is internal-only (127.0.0.1:9090); only Grafana reads it.
# Grafana is exposed at 127.0.0.1:3002 and reverse-proxied by Caddy.
#
# Data dirs:
# Prometheus: /var/lib/prometheus2 (system drive — metrics are ephemeral)
# Grafana: /var/lib/grafana (system drive — dashboards provisioned by Nix)
#
# Secrets consumed from sops:
# grafana/secret_key (session signing key)
# openldap/ro_password (for Grafana → LDAP auth, shared with other modules)
let
cfg = config.homey.monitoring;
domain = homeyConfig.domain;
# LDAP base DN derived from domain (e.g. zakobar.com → dc=zakobar,dc=com)
ldapBaseDN = lib.concatStringsSep ","
(map (p: "dc=${p}") (lib.splitString "." domain));
in
{
options.homey.monitoring = {
enable = lib.mkEnableOption "Prometheus + Grafana monitoring stack" // { default = true; };
prometheusPort = lib.mkOption {
type = lib.types.port;
default = 9090;
description = "Prometheus listen port (localhost only).";
};
grafanaPort = lib.mkOption {
type = lib.types.port;
default = 3002;
description = "Grafana listen port (localhost only, reverse-proxied by Caddy).";
};
};
config = lib.mkIf cfg.enable {
# -----------------------------------------------------------------------
# Secrets
# -----------------------------------------------------------------------
sops.secrets."grafana/secret_key" = { owner = "grafana"; };
sops.secrets."openldap/ro_password" = { owner = "root"; };
# -----------------------------------------------------------------------
# Prometheus
# -----------------------------------------------------------------------
services.prometheus = {
enable = true;
listenAddress = "127.0.0.1";
port = cfg.prometheusPort;
globalConfig = {
scrape_interval = "30s";
evaluation_interval = "30s";
};
# Scrape node and systemd metrics from local exporters
scrapeConfigs = [
{
job_name = "node";
static_configs = [{
targets = [ "127.0.0.1:${toString config.services.prometheus.exporters.node.port}" ];
}];
}
{
job_name = "systemd";
static_configs = [{
targets = [ "127.0.0.1:${toString config.services.prometheus.exporters.systemd.port}" ];
}];
}
];
exporters = {
node = {
enable = true;
port = 9100;
# Enable extra collectors beyond the defaults
enabledCollectors = [
"cpu"
"diskstats"
"filesystem"
"loadavg"
"meminfo"
"netdev"
"stat"
"time"
"uname"
"pressure" # CPU/memory/IO pressure stall info (Linux PSI)
"hwmon" # temperature sensors (RPi4 has a CPU temp sensor)
];
};
systemd = {
enable = true;
port = 9558;
};
};
};
# -----------------------------------------------------------------------
# Grafana
# -----------------------------------------------------------------------
services.grafana = {
enable = true;
settings = {
server = {
http_addr = "127.0.0.1";
http_port = cfg.grafanaPort;
domain = "grafana.${domain}";
root_url = "https://grafana.${domain}";
};
# Session signing key — read from sops at runtime via Grafana's
# $__file{} interpolation syntax.
security = {
secret_key = "$__file{${config.sops.secrets."grafana/secret_key".path}}";
# Disable Grafana's own login form — Authelia is the auth gate,
# and proxy auth auto-signs users in via the X-WEBAUTH-USER header.
disable_initial_admin_creation = false;
};
# Proxy auth: trust the X-WEBAUTH-USER header set by Caddy after
# Authelia verifies the user's identity and TOTP.
"auth.proxy" = {
enabled = true;
header_name = "X-WEBAUTH-USER";
header_property = "username";
auto_sign_up = true;
# All users that reach Grafana are already confirmed admins
# (Authelia enforces the admins group + two_factor policy).
headers = "";
};
# Disable Grafana's own login UI — all auth goes via Authelia.
# Set to false to keep a fallback login form (useful for recovery).
"auth" = {
disable_login_form = true;
};
# Assign all proxy-auth users the Admin role automatically.
# Safe because Authelia already restricts access to the admins group.
users = {
auto_assign_org_role = "Admin";
};
analytics.reporting_enabled = false;
};
# -----------------------------------------------------------------------
# Provision Prometheus as a datasource
# -----------------------------------------------------------------------
provision = {
enable = true;
datasources.settings.datasources = [{
name = "Prometheus";
type = "prometheus";
url = "http://127.0.0.1:${toString cfg.prometheusPort}";
isDefault = true;
access = "proxy";
}];
# Pre-load the Node Exporter Full community dashboard (ID 1860).
# The JSON is downloaded via Nix so it's available at build time.
dashboards.settings.providers = [{
name = "default";
options.path = "/etc/grafana/dashboards";
}];
};
};
# -----------------------------------------------------------------------
# Download the Node Exporter Full dashboard JSON at build time.
#
# If the hash is wrong, `nix build` will print the correct one.
# Run: nix store prefetch-file --hash-type sha256 \
# https://grafana.com/api/dashboards/1860/revisions/37/download
# and replace the hash below.
# -----------------------------------------------------------------------
environment.etc."grafana/dashboards/node-exporter-full.json" = {
source = pkgs.fetchurl {
url = "https://grafana.com/api/dashboards/1860/revisions/37/download";
hash = "sha256-1DE1aaanRHHeCOMWDGdOS1wBXxOF84UXAjJzT5Ek6mM=";
};
mode = "0444";
};
# -----------------------------------------------------------------------
# Authelia access control — admins only, two_factor; all others denied.
# -----------------------------------------------------------------------
homey.authelia.accessControlRules = [
{ priority = 35; domain = [ "grafana.${domain}" ]; subject = [ "group:admins" ]; policy = "two_factor"; }
{ priority = 36; domain = [ "grafana.${domain}" ]; policy = "deny"; }
];
# -----------------------------------------------------------------------
# Caddy virtual host — forward_auth; Caddy maps Remote-User → X-WEBAUTH-USER
# so Grafana's proxy auth auto-signs the user in
# -----------------------------------------------------------------------
homey.caddy.virtualHosts = [{
subdomain = "grafana";
port = cfg.grafanaPort;
auth = true;
extraConfig = ''
reverse_proxy localhost:${toString cfg.grafanaPort} {
header_up X-WEBAUTH-USER {http.request.header.Remote-User}
}
'';
extraHttpConfig = ''
reverse_proxy localhost:${toString cfg.grafanaPort} {
header_up X-Forwarded-Proto https
header_up X-WEBAUTH-USER {http.request.header.Remote-User}
}
'';
}];
# Grafana and Prometheus use system state dirs (/var/lib/grafana,
# /var/lib/prometheus2) — no extraDirs or backup entries needed.
# -----------------------------------------------------------------------
# Uptime Kuma monitor for Grafana
# -----------------------------------------------------------------------
homey.monitoring.monitors = [{
name = "Grafana";
url = "https://grafana.${domain}";
interval = 60;
}];
};
}