diff options
author | 2024-06-09 16:59:38 +0100 | |
---|---|---|
committer | 2024-06-10 17:36:08 +0200 | |
commit | 839e01de7f055550bc963dfc154694c650953c85 (patch) | |
tree | 03a057d2686dbe8ff383c1cc8d081a3336c36dd1 | |
parent | Enable scraping of Prometheus pods (diff) |
Add Alert for Prometheus config reload failure
-rw-r--r-- | kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml b/kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml index 25e555d..6442b13 100644 --- a/kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml +++ b/kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml @@ -11,3 +11,12 @@ groups: annotations: summary: "Instance {{ $labels.instance }} down" description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes." + + - alert: PrometheusConfigFailed + expr: prometheus_config_last_reload_successful == 0 + for: 0m + labels: + severity: page + annotations: + summary: "Prometheus config reload in pod {{ $labels.kubernetes_pod_name }} has failed" + description: "Prometheus instance {{ $labels.kubernetes_pod_name }} (`{{ $labels.instance }}`) has failed to reload its config." |