最近数据库莫名卡顿, 通过排查发现连接拉满
mysql> show processlist; # 查看线程情况 +-----+-----------------+-----------------+-----------+---------+------+------------------------+------------------+ | Id | User | Host | db | Command | Time | State | Info | +-----+-----------------+-----------------+-----------+---------+------+------------------------+------------------+ | 5 | event_scheduler | localhost | NULL | Daemon | 1757 | Waiting on empty queue | NULL | | 12 | root | localhost:43514 | infosecqa | Sleep | 538 | | NULL | | 49 | root | localhost:43540 | infosecqa | Sleep | 480 | | NULL | | 51 | root | localhost:43544 | infosecqa | Sleep | 479 |
mysql> show variables like "%max_connection%"; # 查看最大连接数 +------------------------+-------+ | Variable_name | Value | +------------------------+-------+ | max_connections | 151 | # mysql最大允许连接数 | mysqlx_max_connections | 100 | +------------------------+-------+ 2 rows in set (0.00 sec)
mysql> show status like "Thread%"; # 查看连接情况 +-------------------+-------+ | Variable_name | Value | +-------------------+-------+ | Threads_cached | 4 | | Threads_connected | 143 | # 已建立的连接数 | Threads_created | 148 | # 打开的连接数 | Threads_running | 2 | # 正在运行的连接数 +-------------------+-------+ 4 rows in set (0.00 sec)
根据show processlist中的某个连接
比如这个看一下他在干什么: localhost:45686
[root@infosec ~]# ss -tp|grep mysql # 发现说的连接都是2817538这个进程创建出来的 ESTAB 0 0 [::ffff:127.0.0.1]:mysql [::ffff:127.0.0.1]:44620 users:(("mysqld",pid=2817538,fd=165)) ESTAB 0 0 [::ffff:127.0.0.1]:mysql [::ffff:127.0.0.1]:44352 users:(("mysqld",pid=2817538,fd=117)) ESTAB 0 0 [::ffff:127.0.0.1]:mysql [::ffff:127.0.0.1]:44540 users:(("mysqld",pid=2817538,fd=150)) ESTAB 0 0 [::ffff:127.0.0.1]:mysql [::ffff:127.0.0.1]:44910 users:(("mysqld",pid=2817538,fd=205))
[root@infosec ~]# ps -ef|grep 281753[8] # 查看当前进程是什么, 发现是mysql的后台的进程, 然我我就不知道该咋们办 mysql 2817538 2817234 1 11:29 ? 00:01:38 /usr/local/mysql/bin/mysqld --basedir=/usr/local/mysql --datadir=/usr/local/mysql/data --plugin-dir=/usr/local/mysql/lib/plugin --user=mysql --log-error=xxx.err --pid-file=/usr/local/mysql/data/xxxx.pid --socket=/tmp/mysql.sock --port=xxxx
突然发现ss查看到后面还有一个fd文件描述符
我也不知道有了文件描述符我可以干嘛, 所有随便搜了一下linux 根据文件描述符它找到正在操作的文件, 这不就是我想要的么?
[root@infosec ~]# lsof -d 150 # ok根据这里看到同事写的代码有问题, 因为你已经知道是哪个文件哪个进程命令在操作, 直接让同事修bug吧 COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME uwsgi 2510 root 150w REG 8,2 33390455 1626717201 /XXX/base_xxx/base_xxx_httprunner.log (deleted) uwsgi 2443362 root 150w REG 8,2 1140192047 1626762311 /XXX/base_test1/base_test1_httprunner_error.log mysqld 2817538 mysql 150u IPv6 2663696291 0t0 TCP localhost:mysql->localhost:44540 (ESTABLISHED) uwsgi 2840143 root 150w REG 8,2 1316132498 1626762312 /XXX/base_test1/base_test1_httprunner.log uwsgi 3911893 root 150w REG 8,2 388836144 544195539 /XXX/base_NettAuthlite/base_NettAuthlite_httprunner.log (deleted) uwsgi 3911894 root 150w REG 8,2 388836144 544195539 /XXXXXX/base_NettAuthlite/base_NettAuthlite_httprunner.log (deleted)
[root@infosec ~]# tail -fn 30 # 看看日志发生了什么, 直接定位到了错误 /XXX/base_test1/base_test1_httprunner.log 2022-04-13 10:40:06.850 | INFO | HttpApi.HttprunDemo.runner:__run_step:324 - run step end: 参数错误_get <<<<<< 2022-04-13 10:40:06.850 | INFO | HttpApi.HttprunDemo.runner:__run_step:324 - run step end: 参数错误_get <<<<<< 2022-04-13 10:40:06.850 | INFO | HttpApi.HttprunDemo.runner:__run_step:324 - run step end: 参数错误_get <<<<<< 2022-04-13 10:40:06.850 | INFO | HttpApi.HttprunDemo.runner:__run_step:324 - run step end: 参数错误_get <<<<<< 2022-04-13 10:40:06.850 | INFO | HttpApi.HttprunDemo.runner:__run_step:324 - run step end: 参数错误_get <<<<<<
先把占满的线程杀掉(mysql中)
select concat("kill ", id, ";") from information_schema.processlist where COMMAND="Sleep" into outfile "/temp/a.txt"; # 拼接kill语句 如果不能into 到文件就把结果复制到一个文件中使用awk手动切割, 按照|和空格切割文件, 并将结果重新写到b.txt awk -F '[\|| ]' '{print $3,$4}' a.txt > b.txt mysql中执行 source /tmp/b.txt