diff options
author | 2024-04-01 00:57:18 +0100 | |
---|---|---|
committer | 2024-04-01 19:54:53 +0100 | |
commit | edde0ad9952b0a626c973a3242fae88dccac5213 (patch) | |
tree | 3fa0d57a2202e1f259762309c17015f317d7d5fa | |
parent | Add etcd alerts for increase in failed requests (diff) |
Add alerts for container OOMs
-rw-r--r-- | kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml b/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml index d53da5e..f847e1c 100644 --- a/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml +++ b/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml @@ -10,3 +10,12 @@ groups: annotations: summary: "Node {{ $labels.kubernetes_node }} has RAM usage >80% for 5 minutes" description: 'RAM usage is currently {{ $value | humanizePercentage }} on {{ $labels.kubernetes_node }}' + + - alert: ContainerOOMEvent + expr: container_oom_events_total{pod=~".+", container_name!="POD", image!=""} > 0 + for: 30s + labels: + severity: page + annotations: + summary: "Container {{ $labels.container_name }} in {{ $labels.pod }} has OOM killed" + description: "{{ $labels.container_name }} inside {{ $labels.pod }} has been OOM killed"
\ No newline at end of file |