groups: - name: pods rules: - alert: kubernetes/pod-not-healthy expr: min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[3m:1m]) > 0 for: 3m labels: severity: page annotations: summary: "Kubernetes Pod not healthy: {{ $labels.namespace }}/{{ $labels.pod }}" description: "Pod has been in a non-ready state for longer than 3 minutes." - alert: kubernetes/pod-crash-looping expr: increase(kube_pod_container_status_restarts_total[5m]) > 3 for: 1m labels: severity: warning annotations: summary: "Kubernetes pod crash looping: {{ $labels.kubernetes_namespace }}/{{ $labels.pod }}" description: "Pod {{ $labels.pod }} is crash looping"