diff options
| author | 2024-04-01 00:57:38 +0100 | |
|---|---|---|
| committer | 2024-04-01 19:54:53 +0100 | |
| commit | 7798278204630df1e90e65957393290a8f7a60a6 (patch) | |
| tree | 7e3707747e10a6e8c5ef2c5bf0aeee24b0ddd04a /kubernetes/namespaces | |
| parent | Add alerts for container OOMs (diff) | |
Add a sanity alert for when a node becomes unschedulable
Diffstat (limited to 'kubernetes/namespaces')
| -rw-r--r-- | kubernetes/namespaces/monitoring/alerts/alerts.d/nodes.yaml | 11 | 
1 files changed, 10 insertions, 1 deletions
| diff --git a/kubernetes/namespaces/monitoring/alerts/alerts.d/nodes.yaml b/kubernetes/namespaces/monitoring/alerts/alerts.d/nodes.yaml index 6bfa6d1..08873ea 100644 --- a/kubernetes/namespaces/monitoring/alerts/alerts.d/nodes.yaml +++ b/kubernetes/namespaces/monitoring/alerts/alerts.d/nodes.yaml @@ -41,9 +41,18 @@ groups:    - alert: KubernetesNodeReady      expr: kube_node_status_condition{condition="Ready",status="true"} == 0 -    for: 5m +    for: 1m      labels:        severity: page      annotations:        summary: Kubernetes node ({{ $labels.kubernetes_node }} ) is marked as unready        description: "Node {{ $labels.kubernetes_node }} has been unready for a long time" + +  - alert: KubernetesNodeCordoned +    expr: kube_node_spec_unschedulable == 1 +    for: 1m +    labels: +      severity: page +    annotations: +      summary: Kubernetes node ({{ $labels.kubernetes_node }}) is cordoned +      description: "Node {{ $labels.kubernetes_node }} has been cordoned"
\ No newline at end of file | 
