aboutsummaryrefslogtreecommitdiffstats
path: root/kubernetes/namespaces/monitoring/alerts/alerts.d/mail.yaml
blob: 39f6a3e187e3e3f74993df5cea93c52bdf94888e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
groups:
  - name: postfix
    rules:
      - alert: postfix/down
        expr: postfix_up != 1
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: Postfix is down (instance {{ $labels.instance }})
      - alert: postfix/smtp-temporary-errors
        expr: rate(postfix_smtpd_messages_rejected_total{code=~"^4.*"}[15m]) > 0
        for: 0m
        labels:
          severity: warning
        annotations:
          summary: Postfix is rejecting messages due to errors (instance {{ $labels.instance }})
          description: Postfix has seen code {{ $labels.code }} errors recently
            and temporarily rejected emails.
            https://en.wikipedia.org/wiki/List_of_SMTP_server_return_codes and
            `sudo journalctl -xeu postfix@-` may provide more information on
            the current issue.