root@k8s-master-01:~# wget https://github.com/prometheus/node_exporter/releases/download/v1.2.2/node_exporter-1.2.2.linux-amd64.tar.gz
root@k8s-master-01:~# tar xf node_exporter-1.2.2.linux-amd64.tar.gz -C /usr/local/ root@k8s-master-01:~# ln -sv /usr/local/node_exporter-1.2.2.linux-amd64/ /usr/local/node_exporter '/usr/local/node_exporter' -> '/usr/local/node_exporter-1.2.2.linux-amd64/'
root@k8s-master-01:~# /usr/local/node_exporter/node_exporter --version node_exporter, version 1.2.2 (branch: HEAD, revision: 26645363b486e12be40af7ce4fc91e731a33104e) build user: root@b9cb4aa2eb17 build date: 20210806-13:44:18 go version: go1.16.7 platform: linux/amd64
root@k8s-master-01:~# cat /lib/systemd/system/node-exporter.service [Unit] Description=node_exporter Documentation=https://prometheus.io After=network.target [Service] type=simple ExecStart=/usr/local/node_exporter/node_exporter --collector.ntp --collector.mountstats --collector.systemd --collector.tcpstat ExecReload=/bin/kill -HUP $MAINPID TimeoutStopSec=20s Restart=always [Install] WantedBy=multi-user.target
root@k8s-master-01:~# systemctl enable node-exporter Created symlink /etc/systemd/system/multi-user.target.wants/node-exporter.service → /lib/systemd/system/node-exporter.service. root@k8s-master-01:~# systemctl start node-exporter root@k8s-master-01:~# systemctl status node-exporter ● node-exporter.service - node_exporter Loaded: loaded (/lib/systemd/system/node-exporter.service; enabled; vendor preset: enabled) Active: active (running) since Tue 2021-11-16 14:54:04 CST; 2s ago Docs: https://prometheus.io Main PID: 270390 (node_exporter) Tasks: 4 (limit: 2245) Memory: 2.6M CGroup: /system.slice/node-exporter.service └─270390 /usr/local/node_exporter/node_exporter --collector.ntp --collector.mountstats --collector.systemd --collector.tcpstat Nov 16 14:54:04 k8s-master-01 node_exporter[270390]: level=info ts=2021-11-16T06:54:04.882Z caller=node_exporter.go:115 collector=thermal_zone Nov 16 14:54:04 k8s-master-01 node_exporter[270390]: level=info ts=2021-11-16T06:54:04.882Z caller=node_exporter.go:115 collector=time Nov 16 14:54:04 k8s-master-01 node_exporter[270390]: level=info ts=2021-11-16T06:54:04.882Z caller=node_exporter.go:115 collector=timex Nov 16 14:54:04 k8s-master-01 node_exporter[270390]: level=info ts=2021-11-16T06:54:04.882Z caller=node_exporter.go:115 collector=udp_queues Nov 16 14:54:04 k8s-master-01 node_exporter[270390]: level=info ts=2021-11-16T06:54:04.882Z caller=node_exporter.go:115 collector=uname Nov 16 14:54:04 k8s-master-01 node_exporter[270390]: level=info ts=2021-11-16T06:54:04.882Z caller=node_exporter.go:115 collector=vmstat Nov 16 14:54:04 k8s-master-01 node_exporter[270390]: level=info ts=2021-11-16T06:54:04.882Z caller=node_exporter.go:115 collector=xfs Nov 16 14:54:04 k8s-master-01 node_exporter[270390]: level=info ts=2021-11-16T06:54:04.882Z caller=node_exporter.go:115 collector=zfs Nov 16 14:54:04 k8s-master-01 node_exporter[270390]: level=info ts=2021-11-16T06:54:04.882Z caller=node_exporter.go:199 msg="Listening on" address=:9100 Nov 16 14:54:04 k8s-master-01 node_exporter[270390]: level=info ts=2021-11-16T06:54:04.883Z caller=tls_config.go:191 msg="TLS is disabled." http2=false
root@prometheus-01:~# cat /usr/local/prometheus/prometheus.yml scrape_configs: # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. - job_name: "prometheus" # metrics_path defaults to '/metrics' # scheme defaults to 'http'. static_configs: - targets: ["192.168.174.103:9090"] - job_name: "prometheus-node" static_configs: - targets: ['192.168.174.100:9100']
root@prometheus-01:~# systemctl restart prometheus
GitHub地址:https://github.com/prometheus/node_exporter
/usr/local/node_exporter/node_exporter --collector.ntp --collector.mountstats --collector.systemd --collector.tcpstat
/usr/local/node_exporter/node_exporter --no-collector.ntp --no-collector.mountstats --no-collector.systemd --no-collector.tcpstat
每台主机cpu在5分钟内的平均使用率: (1-avg(irate(node_cpu_seconds_total{mode='idle'}[5m]))by(instance))*100
跟踪CPU的平均负载就能获取到相关主机的CPU饱和度,实际上,它是将主机上的CPU数据考虑在内的一段时间内的平均运行队列长度。
平均负载少于cpu的数量是正常情况,而长时间内超过cpu数量则表示cpu已然饱和;