groups: - name: alertmanager rules: - alert: alert-manager/cluster-failed-peers expr: alertmanager_cluster_failed_peers > 0 for: 1m labels: severity: page annotations: summary: "An Alertmanager node is reporting failed peers" description: "AM {{ $labels.instance }} is reporting that {{ $value }} of it's peers is invalid." - alert: alert-manager/health-score expr: alertmanager_cluster_health_score > 0 for: 1m labels: severity: page annotations: summary: "An AlertManagerNode is reporting an unhealthy cluster" description: "AM {{ $labels.instance }} is reporting that the cluster has a health score of {{ $value }} (where 0 is healthy.)"