blob: b7de612f69326055717f17bd8cb30a637fd1e9a4 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
groups:
- name: memory
rules:
- alert: node/high-memory-usage
expr: node_memory_Active_bytes / node_memory_MemTotal_bytes > 0.8
for: 30s
labels:
severity: page
annotations:
summary: "Node {{ $labels.kubernetes_node }} has RAM usage >80% for 5 minutes"
description: 'RAM usage is currently {{ $value | humanizePercentage }} on {{ $labels.kubernetes_node }}'
- alert: container/oom
expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1
for: 0m
labels:
severity: page
annotations:
summary: Kubernetes Container oom killer (instance {{ $labels.instance }})
description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|