aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Joe Banks <[email protected]>2024-04-01 00:57:18 +0100
committerGravatar Joe Banks <[email protected]>2024-04-01 19:54:53 +0100
commitedde0ad9952b0a626c973a3242fae88dccac5213 (patch)
tree3fa0d57a2202e1f259762309c17015f317d7d5fa
parentAdd etcd alerts for increase in failed requests (diff)
Add alerts for container OOMs
-rw-r--r--kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml9
1 files changed, 9 insertions, 0 deletions
diff --git a/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml b/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml
index d53da5e..f847e1c 100644
--- a/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml
+++ b/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml
@@ -10,3 +10,12 @@ groups:
annotations:
summary: "Node {{ $labels.kubernetes_node }} has RAM usage >80% for 5 minutes"
description: 'RAM usage is currently {{ $value | humanizePercentage }} on {{ $labels.kubernetes_node }}'
+
+ - alert: ContainerOOMEvent
+ expr: container_oom_events_total{pod=~".+", container_name!="POD", image!=""} > 0
+ for: 30s
+ labels:
+ severity: page
+ annotations:
+ summary: "Container {{ $labels.container_name }} in {{ $labels.pod }} has OOM killed"
+ description: "{{ $labels.container_name }} inside {{ $labels.pod }} has been OOM killed" \ No newline at end of file