aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Johannes Christ <[email protected]>2024-08-27 19:11:06 +0200
committerGravatar Johannes Christ <[email protected]>2024-08-29 17:32:17 +0200
commit6bf0b6cd861fa002003af72a332fae89e156a9d4 (patch)
tree1e56d7a91eec1f4f6fab3e4d95cc2bd9b4889934
parentCompile prevent-duplicates.sieve script to sieve-after filter directory (diff)
Show status code in nginx alerts
-rw-r--r--kubernetes/namespaces/monitoring/alerts/alerts.d/nginx.yaml8
1 files changed, 4 insertions, 4 deletions
diff --git a/kubernetes/namespaces/monitoring/alerts/alerts.d/nginx.yaml b/kubernetes/namespaces/monitoring/alerts/alerts.d/nginx.yaml
index 317a4fe..bf177c3 100644
--- a/kubernetes/namespaces/monitoring/alerts/alerts.d/nginx.yaml
+++ b/kubernetes/namespaces/monitoring/alerts/alerts.d/nginx.yaml
@@ -3,22 +3,22 @@ groups:
rules:
- alert: nginx/4xx-requests
- expr: sum by (service) (rate(nginx_ingress_controller_requests{service!="pixels",status!~"404|444",status=~"^4.."}[1m])) / sum by (service) (rate(nginx_ingress_controller_requests[1m])) > 0.5
+ expr: sum by (service, status) (rate(nginx_ingress_controller_requests{service!="pixels",status!~"404|444",status=~"^4.."}[1m])) / sum by (service, status) (rate(nginx_ingress_controller_requests[1m])) > 0.5
for: 1m
labels:
severity: page
annotations:
summary: "High rate of 4XX requests for inbound requests"
- description: "Rate of 4XX errors is {{ $value | humanizePercentage }} on service `{{ $labels.service }}`"
+ description: "Rate of {{ $labels.status }} errors is {{ $value | humanizePercentage }} on service `{{ $labels.service }}`"
- alert: nginx/5xx-requests
- expr: sum(rate(nginx_ingress_controller_requests{status=~"^5.."}[1m])) by (service) / sum(rate(nginx_ingress_controller_requests{}[1m])) by (service) > 0.5
+ expr: sum(rate(nginx_ingress_controller_requests{status=~"^5.."}[1m])) by (service, status) / sum(rate(nginx_ingress_controller_requests{}[1m])) by (service, status) > 0.5
for: 1m
labels:
severity: page
annotations:
summary: "High rate of 5XX requests for inbound requests"
- description: "Rate of 5XX errors is {{ $value | humanizePercentage }} on service `{{ $labels.service }}`"
+ description: "Rate of {{ $labels.status }} errors is {{ $value | humanizePercentage }} on service `{{ $labels.service }}`"
- alert: nginx/p99-timing
expr: histogram_quantile(0.99, sum by(host, service, le) (rate(nginx_ingress_controller_request_duration_seconds_bucket{service!~"(grafana|metabase|prestashop-svc)"}[5m]))) > 3 and on(service) increase(nginx_ingress_controller_requests[5m]) > 10