diff options
author | 2024-08-27 19:11:06 +0200 | |
---|---|---|
committer | 2024-08-29 17:32:17 +0200 | |
commit | 6bf0b6cd861fa002003af72a332fae89e156a9d4 (patch) | |
tree | 1e56d7a91eec1f4f6fab3e4d95cc2bd9b4889934 | |
parent | Compile prevent-duplicates.sieve script to sieve-after filter directory (diff) |
Show status code in nginx alerts
-rw-r--r-- | kubernetes/namespaces/monitoring/alerts/alerts.d/nginx.yaml | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/kubernetes/namespaces/monitoring/alerts/alerts.d/nginx.yaml b/kubernetes/namespaces/monitoring/alerts/alerts.d/nginx.yaml index 317a4fe..bf177c3 100644 --- a/kubernetes/namespaces/monitoring/alerts/alerts.d/nginx.yaml +++ b/kubernetes/namespaces/monitoring/alerts/alerts.d/nginx.yaml @@ -3,22 +3,22 @@ groups: rules: - alert: nginx/4xx-requests - expr: sum by (service) (rate(nginx_ingress_controller_requests{service!="pixels",status!~"404|444",status=~"^4.."}[1m])) / sum by (service) (rate(nginx_ingress_controller_requests[1m])) > 0.5 + expr: sum by (service, status) (rate(nginx_ingress_controller_requests{service!="pixels",status!~"404|444",status=~"^4.."}[1m])) / sum by (service, status) (rate(nginx_ingress_controller_requests[1m])) > 0.5 for: 1m labels: severity: page annotations: summary: "High rate of 4XX requests for inbound requests" - description: "Rate of 4XX errors is {{ $value | humanizePercentage }} on service `{{ $labels.service }}`" + description: "Rate of {{ $labels.status }} errors is {{ $value | humanizePercentage }} on service `{{ $labels.service }}`" - alert: nginx/5xx-requests - expr: sum(rate(nginx_ingress_controller_requests{status=~"^5.."}[1m])) by (service) / sum(rate(nginx_ingress_controller_requests{}[1m])) by (service) > 0.5 + expr: sum(rate(nginx_ingress_controller_requests{status=~"^5.."}[1m])) by (service, status) / sum(rate(nginx_ingress_controller_requests{}[1m])) by (service, status) > 0.5 for: 1m labels: severity: page annotations: summary: "High rate of 5XX requests for inbound requests" - description: "Rate of 5XX errors is {{ $value | humanizePercentage }} on service `{{ $labels.service }}`" + description: "Rate of {{ $labels.status }} errors is {{ $value | humanizePercentage }} on service `{{ $labels.service }}`" - alert: nginx/p99-timing expr: histogram_quantile(0.99, sum by(host, service, le) (rate(nginx_ingress_controller_request_duration_seconds_bucket{service!~"(grafana|metabase|prestashop-svc)"}[5m]))) > 3 and on(service) increase(nginx_ingress_controller_requests[5m]) > 10 |