diff options
author | 2024-04-01 00:57:38 +0100 | |
---|---|---|
committer | 2024-04-01 19:54:53 +0100 | |
commit | 7798278204630df1e90e65957393290a8f7a60a6 (patch) | |
tree | 7e3707747e10a6e8c5ef2c5bf0aeee24b0ddd04a | |
parent | Add alerts for container OOMs (diff) |
Add a sanity alert for when a node becomes unschedulable
-rw-r--r-- | kubernetes/namespaces/monitoring/alerts/alerts.d/nodes.yaml | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/kubernetes/namespaces/monitoring/alerts/alerts.d/nodes.yaml b/kubernetes/namespaces/monitoring/alerts/alerts.d/nodes.yaml index 6bfa6d1..08873ea 100644 --- a/kubernetes/namespaces/monitoring/alerts/alerts.d/nodes.yaml +++ b/kubernetes/namespaces/monitoring/alerts/alerts.d/nodes.yaml @@ -41,9 +41,18 @@ groups: - alert: KubernetesNodeReady expr: kube_node_status_condition{condition="Ready",status="true"} == 0 - for: 5m + for: 1m labels: severity: page annotations: summary: Kubernetes node ({{ $labels.kubernetes_node }} ) is marked as unready description: "Node {{ $labels.kubernetes_node }} has been unready for a long time" + + - alert: KubernetesNodeCordoned + expr: kube_node_spec_unschedulable == 1 + for: 1m + labels: + severity: page + annotations: + summary: Kubernetes node ({{ $labels.kubernetes_node }}) is cordoned + description: "Node {{ $labels.kubernetes_node }} has been cordoned"
\ No newline at end of file |