Add Prometheus and Grafana services with alerting configuration

This commit is contained in:
elfateh4
2025-12-01 19:18:08 +01:00
parent 47e640b969
commit a924adee27
4 changed files with 170 additions and 0 deletions

36
rules.yml Normal file
View File

@@ -0,0 +1,36 @@
groups:
- name: recording_rules
rules:
- record: job:up:sum
expr: sum(up) by (job)
- record: job:up:count
expr: count(up) by (job)
- name: alerting_rules
rules:
- alert: InstanceDown
expr: up == 0
for: 5m
labels:
severity: critical
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
- alert: TraefikDown
expr: up{job="traefik"} == 0
for: 2m
labels:
severity: warning
annotations:
summary: "Traefik is down"
description: "Traefik has been down for more than 2 minutes."
- alert: AutheliaDown
expr: up{job="authelia"} == 0
for: 5m
labels:
severity: critical
annotations:
summary: "Authelia is down"
description: "Authelia authentication service is unavailable."