diff options
| author | 2024-05-08 10:18:18 +0200 | |
|---|---|---|
| committer | 2024-05-08 10:18:18 +0200 | |
| commit | a131791e16de17b24df55473a95b7194a85dafda (patch) | |
| tree | b3f3decc55b2b91dccdeb5e45a9eb3a4d2214598 | |
| parent | Correct scheme configuration for Alertmanager (diff) | |
Configure Prometheus alerting for failed systemd units (#278)
The two services that I would normally exclude are intentionally not
excluded right now to test out the alertmanager setup. If all goes well,
we should receive a notification on Discord.
| -rw-r--r-- | ansible/host_vars/lovelace/prometheus.yml | 15 | ||||
| -rw-r--r-- | ansible/roles/prometheus/tasks/main.yml | 2 | 
2 files changed, 16 insertions, 1 deletions
| diff --git a/ansible/host_vars/lovelace/prometheus.yml b/ansible/host_vars/lovelace/prometheus.yml index 41c5a97..d079986 100644 --- a/ansible/host_vars/lovelace/prometheus.yml +++ b/ansible/host_vars/lovelace/prometheus.yml @@ -73,3 +73,18 @@ prometheus_configuration:          - source_labels: [__param_target]            target_label: instance          - target_label: __address__ + +prometheus_rules: | +  {% raw %} +  groups: +    - name: node +      rules: +        - alert: node/systemd-unit-failed +          # expr: node_systemd_unit_state{state="failed", name!="openipmi.service", name!="nvmf-autoconnect.service"} != 0 +          expr: node_systemd_unit_state{state="failed"} != 0 +          for: 15m +          labels: +            severity: warning +          annotations: +            summary: Node systemd unit {{ $labels.name }} has failed (instance {{ $labels.instance }}) +  {% endraw %} diff --git a/ansible/roles/prometheus/tasks/main.yml b/ansible/roles/prometheus/tasks/main.yml index 9333c57..7fd32c4 100644 --- a/ansible/roles/prometheus/tasks/main.yml +++ b/ansible/roles/prometheus/tasks/main.yml @@ -37,7 +37,7 @@    copy:      content: |        # Ansible managed -      {{ prometheus_rules | to_nice_yaml }} +      {{ prometheus_rules }}      dest: /etc/prometheus/rules.yml      owner: prometheus      group: prometheus | 
