aboutsummaryrefslogtreecommitdiffstats
path: root/kubernetes/namespaces/monitoring/alerts
diff options
context:
space:
mode:
Diffstat (limited to 'kubernetes/namespaces/monitoring/alerts')
-rw-r--r--kubernetes/namespaces/monitoring/alerts/alerts.d/mail.yaml22
1 files changed, 22 insertions, 0 deletions
diff --git a/kubernetes/namespaces/monitoring/alerts/alerts.d/mail.yaml b/kubernetes/namespaces/monitoring/alerts/alerts.d/mail.yaml
new file mode 100644
index 0000000..39f6a3e
--- /dev/null
+++ b/kubernetes/namespaces/monitoring/alerts/alerts.d/mail.yaml
@@ -0,0 +1,22 @@
+groups:
+ - name: postfix
+ rules:
+ - alert: postfix/down
+ expr: postfix_up != 1
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: Postfix is down (instance {{ $labels.instance }})
+ - alert: postfix/smtp-temporary-errors
+ expr: rate(postfix_smtpd_messages_rejected_total{code=~"^4.*"}[15m]) > 0
+ for: 0m
+ labels:
+ severity: warning
+ annotations:
+ summary: Postfix is rejecting messages due to errors (instance {{ $labels.instance }})
+ description: Postfix has seen code {{ $labels.code }} errors recently
+ and temporarily rejected emails.
+ https://en.wikipedia.org/wiki/List_of_SMTP_server_return_codes and
+ `sudo journalctl -xeu postfix@-` may provide more information on
+ the current issue.