From 276645b02d53b1b9833e969ddecdd0d30a30d35a Mon Sep 17 00:00:00 2001 From: Johannes Christ Date: Sat, 31 Aug 2024 20:16:32 +0200 Subject: Raise time threshold for 4xx alerts At present we get plenty of unactionable, flapping alarms. So far, they have shown us nothing of value. Raise the time consecutive errors need to be seen before we alert. --- kubernetes/namespaces/monitoring/alerts/alerts.d/nginx.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kubernetes') diff --git a/kubernetes/namespaces/monitoring/alerts/alerts.d/nginx.yaml b/kubernetes/namespaces/monitoring/alerts/alerts.d/nginx.yaml index bf177c3..8573d67 100644 --- a/kubernetes/namespaces/monitoring/alerts/alerts.d/nginx.yaml +++ b/kubernetes/namespaces/monitoring/alerts/alerts.d/nginx.yaml @@ -4,7 +4,7 @@ groups: - alert: nginx/4xx-requests expr: sum by (service, status) (rate(nginx_ingress_controller_requests{service!="pixels",status!~"404|444",status=~"^4.."}[1m])) / sum by (service, status) (rate(nginx_ingress_controller_requests[1m])) > 0.5 - for: 1m + for: 10m labels: severity: page annotations: -- cgit v1.2.3