aboutsummaryrefslogtreecommitdiffstats
path: root/kubernetes/namespaces/monitoring
diff options
context:
space:
mode:
authorGravatar Joe Banks <[email protected]>2024-04-01 00:57:38 +0100
committerGravatar Joe Banks <[email protected]>2024-04-01 19:54:53 +0100
commit7798278204630df1e90e65957393290a8f7a60a6 (patch)
tree7e3707747e10a6e8c5ef2c5bf0aeee24b0ddd04a /kubernetes/namespaces/monitoring
parentAdd alerts for container OOMs (diff)
Add a sanity alert for when a node becomes unschedulable
Diffstat (limited to 'kubernetes/namespaces/monitoring')
-rw-r--r--kubernetes/namespaces/monitoring/alerts/alerts.d/nodes.yaml11
1 files changed, 10 insertions, 1 deletions
diff --git a/kubernetes/namespaces/monitoring/alerts/alerts.d/nodes.yaml b/kubernetes/namespaces/monitoring/alerts/alerts.d/nodes.yaml
index 6bfa6d1..08873ea 100644
--- a/kubernetes/namespaces/monitoring/alerts/alerts.d/nodes.yaml
+++ b/kubernetes/namespaces/monitoring/alerts/alerts.d/nodes.yaml
@@ -41,9 +41,18 @@ groups:
- alert: KubernetesNodeReady
expr: kube_node_status_condition{condition="Ready",status="true"} == 0
- for: 5m
+ for: 1m
labels:
severity: page
annotations:
summary: Kubernetes node ({{ $labels.kubernetes_node }} ) is marked as unready
description: "Node {{ $labels.kubernetes_node }} has been unready for a long time"
+
+ - alert: KubernetesNodeCordoned
+ expr: kube_node_spec_unschedulable == 1
+ for: 1m
+ labels:
+ severity: page
+ annotations:
+ summary: Kubernetes node ({{ $labels.kubernetes_node }}) is cordoned
+ description: "Node {{ $labels.kubernetes_node }} has been cordoned" \ No newline at end of file