diff options
| -rw-r--r-- | kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml | 9 | 
1 files changed, 9 insertions, 0 deletions
| diff --git a/kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml b/kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml index 25e555d..6442b13 100644 --- a/kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml +++ b/kubernetes/namespaces/monitoring/alerts/alerts.d/prometheus.yaml @@ -11,3 +11,12 @@ groups:      annotations:        summary: "Instance {{ $labels.instance }} down"        description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes." + +  - alert: PrometheusConfigFailed +    expr: prometheus_config_last_reload_successful == 0 +    for: 0m +    labels: +      severity: page +    annotations: +      summary: "Prometheus config reload in pod {{ $labels.kubernetes_pod_name }} has failed" +      description: "Prometheus instance {{ $labels.kubernetes_pod_name }} (`{{ $labels.instance }}`) has failed to reload its config." | 
