aboutsummaryrefslogtreecommitdiffstats
path: root/kubernetes/namespaces/monitoring
diff options
context:
space:
mode:
authorGravatar Joe Banks <[email protected]>2024-04-17 17:08:29 +0100
committerGravatar GitHub <[email protected]>2024-04-17 17:08:29 +0100
commit3afc7efa3f578661ef1a92636dfe8dbeb5a11b00 (patch)
tree78b2200d7d4aa43247c803d26358715c3675fd3c /kubernetes/namespaces/monitoring
parentMove mongodb to databases namespace (diff)
Update ContainerOOMEvent alert
Diffstat (limited to 'kubernetes/namespaces/monitoring')
-rw-r--r--kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml8
1 files changed, 4 insertions, 4 deletions
diff --git a/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml b/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml
index c5aa3c2..dff5352 100644
--- a/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml
+++ b/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml
@@ -12,10 +12,10 @@ groups:
description: 'RAM usage is currently {{ $value | humanizePercentage }} on {{ $labels.kubernetes_node }}'
- alert: ContainerOOMEvent
- expr: container_oom_events_total{pod=~".+", container_name!="POD", image!=""} > 0
- for: 30s
+ expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1
+ for: 0m
labels:
severity: page
annotations:
- summary: "Container {{ $labels.container_name }} in {{ $labels.pod }} was OOM killed"
- description: "{{ $labels.container_name }} inside {{ $labels.pod }} has been OOM killed"
+ summary: Kubernetes Container oom killer (instance {{ $labels.instance }})
+ description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"