diff options
author | 2024-04-17 17:08:29 +0100 | |
---|---|---|
committer | 2024-04-17 17:08:29 +0100 | |
commit | 3afc7efa3f578661ef1a92636dfe8dbeb5a11b00 (patch) | |
tree | 78b2200d7d4aa43247c803d26358715c3675fd3c /kubernetes/namespaces/monitoring | |
parent | Move mongodb to databases namespace (diff) |
Update ContainerOOMEvent alert
Diffstat (limited to 'kubernetes/namespaces/monitoring')
-rw-r--r-- | kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml b/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml index c5aa3c2..dff5352 100644 --- a/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml +++ b/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml @@ -12,10 +12,10 @@ groups: description: 'RAM usage is currently {{ $value | humanizePercentage }} on {{ $labels.kubernetes_node }}' - alert: ContainerOOMEvent - expr: container_oom_events_total{pod=~".+", container_name!="POD", image!=""} > 0 - for: 30s + expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1 + for: 0m labels: severity: page annotations: - summary: "Container {{ $labels.container_name }} in {{ $labels.pod }} was OOM killed" - description: "{{ $labels.container_name }} inside {{ $labels.pod }} has been OOM killed" + summary: Kubernetes Container oom killer (instance {{ $labels.instance }}) + description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" |