aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Joe Banks <[email protected]>2024-06-09 16:59:38 +0100
committerGravatar jchristgit <[email protected]>2024-06-10 17:36:08 +0200
commit839e01de7f055550bc963dfc154694c650953c85 (patch)
tree03a057d2686dbe8ff383c1cc8d081a3336c36dd1
parentEnable scraping of Prometheus pods (diff)
Add Alert for Prometheus config reload failure
-rw-r--r--kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml9
1 files changed, 9 insertions, 0 deletions
diff --git a/kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml b/kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml
index 25e555d..6442b13 100644
--- a/kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml
+++ b/kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml
@@ -11,3 +11,12 @@ groups:
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
+
+ - alert: PrometheusConfigFailed
+ expr: prometheus_config_last_reload_successful == 0
+ for: 0m
+ labels:
+ severity: page
+ annotations:
+ summary: "Prometheus config reload in pod {{ $labels.kubernetes_pod_name }} has failed"
+ description: "Prometheus instance {{ $labels.kubernetes_pod_name }} (`{{ $labels.instance }}`) has failed to reload its config."