36 lines
1018 B
YAML
36 lines
1018 B
YAML
groups:
|
|
- name: recording_rules
|
|
rules:
|
|
- record: job:up:sum
|
|
expr: sum(up) by (job)
|
|
- record: job:up:count
|
|
expr: count(up) by (job)
|
|
|
|
- name: alerting_rules
|
|
rules:
|
|
- alert: InstanceDown
|
|
expr: up == 0
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Instance {{ $labels.instance }} down"
|
|
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
|
|
|
|
- alert: TraefikDown
|
|
expr: up{job="traefik"} == 0
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Traefik is down"
|
|
description: "Traefik has been down for more than 2 minutes."
|
|
|
|
- alert: AutheliaDown
|
|
expr: up{job="authelia"} == 0
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Authelia is down"
|
|
description: "Authelia authentication service is unavailable." |