aboutsummaryrefslogtreecommitdiffstats
path: root/kubernetes/namespaces/monitoring/alerts/alerts.d/alertmanager.yaml
blob: e1e986343935c9e58c6ef75720370fa87f94a718 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
groups:
- name: alertmanager
  rules:

  - alert: alert-manager/cluster-failed-peers
    expr: alertmanager_cluster_failed_peers > 0
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "An Alertmanager node is reporting failed peers"
      description: "AM {{ $labels.instance }} is reporting that {{ $value }} of it's peers is invalid."

  - alert: alert-manager/health-score
    expr: alertmanager_cluster_health_score > 0
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "An AlertManagerNode is reporting an unhealthy cluster"
      description: "AM {{ $labels.instance }} is reporting that the cluster has a health score of {{ $value }} (where 0 is healthy.)"