blob: 22e16cf4f3872df9c35c8aaa7ddd41700920d7b6 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
groups:
- name: prometheus
rules:
# Alert for any instance that is unreachable for >5 minutes.
- alert: prometheus/instance-down
expr: up == 0
for: 5m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
- alert: prometheus/config-failed
expr: prometheus_config_last_reload_successful == 0
for: 0m
labels:
severity: page
annotations:
summary: "Prometheus config reload in pod {{ $labels.kubernetes_pod_name }} has failed"
description: "Prometheus instance {{ $labels.kubernetes_pod_name }} (`{{ $labels.instance }}`) has failed to reload its config."
|