Files
launchpad-gateway/rules.yml

36 lines
1018 B
YAML

groups:
- name: recording_rules
rules:
- record: job:up:sum
expr: sum(up) by (job)
- record: job:up:count
expr: count(up) by (job)
- name: alerting_rules
rules:
- alert: InstanceDown
expr: up == 0
for: 5m
labels:
severity: critical
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
- alert: TraefikDown
expr: up{job="traefik"} == 0
for: 2m
labels:
severity: warning
annotations:
summary: "Traefik is down"
description: "Traefik has been down for more than 2 minutes."
- alert: AutheliaDown
expr: up{job="authelia"} == 0
for: 5m
labels:
severity: critical
annotations:
summary: "Authelia is down"
description: "Authelia authentication service is unavailable."