[root@prometheus ~]# cd /usr/local/alertmanager/alert-config/
[root@prometheus alert-config]# ls
alert_rules prometheus.yml targets
[root@prometheus alert-config]# cat prometheus.yml
# my global config
# Author: MageEdu <mage@magedu.com>
# Repo: http://gitlab.magedu.com/MageEdu/prometheus-configs/
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- file_sd_configs:
- files:
- "targets/alertmanagers*.yaml"
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "rules/*.yaml"
- "alert_rules/*.yaml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
file_sd_configs:
- files:
- targets/prometheus-*.yaml
refresh_interval: 2m
# All nodes
- job_name: 'nodes'
file_sd_configs:
- files:
- targets/nodes-*.yaml
refresh_interval: 2m
- job_name: 'alertmanagers'
file_sd_configs:
- files:
- targets/alertmanagers*.yaml
refresh_interval: 2m
[root@prometheus alert-config]#
[root@prometheus alert-config]# ls
alert_rules prometheus.yml targets
[root@prometheus alert-config]# cd targets/
[root@prometheus targets]# cat alertmanagers.yaml
- targets:
- 192.168.30.7:9093
labels:
app: alertmanager
[root@prometheus targets]# cat nodes-linux.yaml
- targets:
- 192.168.30.8:9100
- 192.168.30.9:9100
labels:
app: node-exporter
job: node
[root@prometheus targets]# cat prometheus-servers.yaml
- targets:
- 192.168.30.7:9090
labels:
app: prometheus
job: prometheus
[root@prometheus targets]#
[root@prometheus targets]# cd ..
[root@prometheus alert-config]# cd alert_rules/
[root@prometheus alert_rules]# cat instance_down.yaml
groups:
- name: AllInstances
rules:
- alert: InstanceDown
# Condition for alerting
expr: up == 0
for: 20s
# Annotation - additional informational labels to store more information
annotations:
title: 'Instance down'
description: Instance has been down for more than 20 seconds.'
# Labels - additional labels to be attached to the alert
labels:
severity: 'critical'
[root@prometheus alert_rules]#