1、安装mysql_exporter
[root@mysqld ~]# wget https://github.com/prometheus/mysqld_exporter/releases/download/v0.12.1/mysqld_exporter-0.12.1.linux-amd64.tar.gz [root@mysqld ~]# tar xf mysqld_exporter-0.12.1.linux-amd64.tar.gz -C /usr/local/ [root@mysqld ~]# ln -sv /usr/local/mysqld_exporter-0.12.1.linux-amd64/ /usr/local/prometheus_mysqld
2、在MySQL中创建用户并授权
mysql> CREATE USER 'mysqld_exporter'@'%' IDENTIFIED BY 'mysqld_exporter'; mysql> GRANT REPLICATION CLIENT,PROCESS ON *.* TO 'mysqld_exporter'@'%' identified by 'mysqld_exporter'; mysql> GRANT SELECT ON *.* TO 'mysqld_exporter'@'%';
3、写一个mysqld_exporter的配置文件
[root@mysqld ~]# cat /usr/local/prometheus_mysqld/.my.cnf [client] user=mysqld_exporter password=mysqld_exporter port=3306
4、配置systemd启动mysqld_exporter
[root@mysqld ~]# cat /lib/systemd/system/mysqld_exporter.service [Unit] Description=Mysqld_exporter After=network.target [Service] ExecStart=/usr/local/prometheus_mysqld/mysqld_exporter --config.my-cnf=/usr/local/prometheus_mysqld/.my.cnf [Install] WantedBy=multi-user.target systemctl daemon-reload systemctl start mysqld_exporter systemctl enable mysqld_exporter ss -lnpt|grep 9104 LISTEN 0 128 [::]:9104 [::]:* users:(("mysqld_exporter",pid=1853,fd=3))
5、确认监控指标正常
[root@mysqld ~]# curl http://localhost:9104/metrics|grep mysql_up % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 130k 0 130k 0 0 6920k 0 --:--:-- --:--:-- --:--:-- 7269k # HELP mysql_up Whether the MySQL server is up. # TYPE mysql_up gauge mysql_up 1
6、在Prometheus的server端添加job任务
[root@prometheus prometheus]# vim /usr/local/prometheus/prometheus.yml - job_name: 'mysql_db' static_configs: - targets: ['10.0.0.52:9104'] labels: group: mysql_db [root@prometheus prometheus]# systemctl restart prometheus
在浏览器中访问 http://10.0.0.51:9090/targets
7、在grafana中导入MySQL监控图表
点击import,在弹出界面中输入7362,数据源选择Prometheus
在grafana中看到采集到的MySQL数据库的相关参数
8、配置mysql_exporter告警规则
[root@prometheus rules]# pwd /usr/local/prometheus/rules [root@prometheus rules]# cat mysql_rules.yml groups: - name: mysql.rules rules: - alert: MysqlDown expr: up == 0 for: 0m labels: severity: critical annotations: title: 'MySQL down' description: "Mysql实例: 【{{ $labels.instance }}】, MySQL instance is down" - alert: MysqlRestarted expr: mysql_global_status_uptime < 60 for: 0m labels: severity: info annotations: title: 'MySQL Restarted' description: "Mysql实例: 【{{ $labels.instance }}】, MySQL has just been restarted, less than one minute ago" - alert: MysqlTooManyConnections(>80%) expr: avg by (instance) (rate(mysql_global_status_threads_connected[1m])) / avg by (instance) (mysql_global_variables_max_connections) * 100 > 80 for: 2m labels: severity: warning annotations: title: 'MySQL too many connections (> 80%)' description: "Mysql实例: 【{{ $labels.instance }}】, More than 80% of MySQL connections are in use, Current Value: {{ $value }}%" - alert: MysqlThreadsRunningHigh expr: mysql_global_status_threads_running > 40 for: 2m labels: severity: warning annotations: title: 'MySQL Threads_Running High' description: "Mysql实例: 【{{ $labels.instance }}】, Threads_Running above the threshold(40), Current Value: {{ $value }}" - alert: MysqlQpsHigh expr: sum by (instance) (rate(mysql_global_status_queries[2m])) > 500 for: 2m labels: severity: warning annotations: title: 'MySQL QPS High' description: "Mysql实例: 【{{ $labels.instance }}】, MySQL QPS above 500" - alert: MysqlSlowQueries expr: increase(mysql_global_status_slow_queries[1m]) > 0 for: 2m labels: severity: warning annotations: title: 'MySQL slow queries' description: "Mysql实例: 【{{ $labels.instance }}】, has some new slow query." - alert: MysqlTooManyAbortedConnections expr: round(increase(mysql_global_status_aborted_connects[5m])) > 20 for: 2m labels: severity: warning annotations: title: 'MySQL too many Aborted connections in 2 minutes' description: "Mysql实例: 【{{ $labels.instance }}】, {{ $value }} Aborted connections within 2 minutes" - alert: MysqlTooManyAbortedClients expr: round(increase(mysql_global_status_aborted_clients[120m])) > 10 for: 2m labels: severity: warning annotations: title: 'MySQL too many Aborted connections in 2 hours' description: "Mysql实例: 【{{ $labels.instance }}】, {{ $value }} Aborted Clients within 2 hours" - alert: MysqlSlaveIoThreadNotRunning expr: mysql_slave_status_master_server_id > 0 and ON (instance) mysql_slave_status_slave_io_running == 0 for: 0m labels: severity: critical annotations: title: 'MySQL Slave IO thread not running' description: "Mysql实例: 【{{ $labels.instance }}】, MySQL Slave IO thread not running" - alert: MysqlSlaveSqlThreadNotRunning expr: mysql_slave_status_master_server_id > 0 and ON (instance) mysql_slave_status_slave_sql_running == 0 for: 0m labels: severity: critical annotations: title: 'MySQL Slave SQL thread not running' description: "Mysql实例: 【{{ $labels.instance }}】, MySQL Slave SQL thread not running" - alert: MysqlSlaveReplicationLag expr: mysql_slave_status_master_server_id > 0 and ON (instance) (mysql_slave_status_seconds_behind_master - mysql_slave_status_sql_delay) > 30 for: 1m labels: severity: critical annotations: title: 'MySQL Slave replication lag' description: "Mysql实例: 【{{ $labels.instance }}】, MySQL replication lag" - alert: MysqlInnodbLogWaits expr: rate(mysql_global_status_innodb_log_waits[15m]) > 10 for: 0m labels: severity: warning annotations: title: 'MySQL InnoDB log waits' description: "Mysql实例: 【{{ $labels.instance }}】, innodb log writes stalling"
9、将告警规则集成到Prometheus
[root@prometheus prometheus]# vim /usr/local/prometheus/prometheus.yml rule_files: - "/usr/local/prometheus/rules/*.yml" [root@prometheus prometheus]# systemctl restart prometheus