groups: - name: recording_rules rules: - record: job:up:sum expr: sum(up) by (job) - record: job:up:count expr: count(up) by (job) - name: alerting_rules rules: - alert: InstanceDown expr: up == 0 for: 5m labels: severity: critical annotations: summary: "Instance {{ $labels.instance }} down" description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes." - alert: TraefikDown expr: up{job="traefik"} == 0 for: 2m labels: severity: warning annotations: summary: "Traefik is down" description: "Traefik has been down for more than 2 minutes." - alert: AutheliaDown expr: up{job="authelia"} == 0 for: 5m labels: severity: critical annotations: summary: "Authelia is down" description: "Authelia authentication service is unavailable."