Add Prometheus and Grafana services with alerting configuration
This commit is contained in:
36
rules.yml
Normal file
36
rules.yml
Normal file
@@ -0,0 +1,36 @@
|
||||
groups:
|
||||
- name: recording_rules
|
||||
rules:
|
||||
- record: job:up:sum
|
||||
expr: sum(up) by (job)
|
||||
- record: job:up:count
|
||||
expr: count(up) by (job)
|
||||
|
||||
- name: alerting_rules
|
||||
rules:
|
||||
- alert: InstanceDown
|
||||
expr: up == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Instance {{ $labels.instance }} down"
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
|
||||
|
||||
- alert: TraefikDown
|
||||
expr: up{job="traefik"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Traefik is down"
|
||||
description: "Traefik has been down for more than 2 minutes."
|
||||
|
||||
- alert: AutheliaDown
|
||||
expr: up{job="authelia"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Authelia is down"
|
||||
description: "Authelia authentication service is unavailable."
|
||||
Reference in New Issue
Block a user