aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml9
1 files changed, 9 insertions, 0 deletions
diff --git a/kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml b/kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml
index 25e555d..6442b13 100644
--- a/kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml
+++ b/kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml
@@ -11,3 +11,12 @@ groups:
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
+
+ - alert: PrometheusConfigFailed
+ expr: prometheus_config_last_reload_successful == 0
+ for: 0m
+ labels:
+ severity: page
+ annotations:
+ summary: "Prometheus config reload in pod {{ $labels.kubernetes_pod_name }} has failed"
+ description: "Prometheus instance {{ $labels.kubernetes_pod_name }} (`{{ $labels.instance }}`) has failed to reload its config."