diff options
Diffstat (limited to 'kubernetes/namespaces/monitoring')
-rw-r--r-- | kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml b/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml index c5aa3c2..dff5352 100644 --- a/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml +++ b/kubernetes/namespaces/monitoring/alerts/alerts.d/memory.yaml @@ -12,10 +12,10 @@ groups: description: 'RAM usage is currently {{ $value | humanizePercentage }} on {{ $labels.kubernetes_node }}' - alert: ContainerOOMEvent - expr: container_oom_events_total{pod=~".+", container_name!="POD", image!=""} > 0 - for: 30s + expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1 + for: 0m labels: severity: page annotations: - summary: "Container {{ $labels.container_name }} in {{ $labels.pod }} was OOM killed" - description: "{{ $labels.container_name }} inside {{ $labels.pod }} has been OOM killed" + summary: Kubernetes Container oom killer (instance {{ $labels.instance }}) + description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" |