优化前
mysql> use test_old; Database changed mysql> set profiling = 'ON'; Query OK, 0 rows affected mysql> show variables like 'profiling'; +---------------+-------+ | Variable_name | Value | +---------------+-------+ | profiling | ON | +---------------+-------+ 1 row in set mysql> show profiles; +----------+------------+---------------------------------------------------------------------------------------------+ | Query_ID | Duration | Query | +----------+------------+---------------------------------------------------------------------------------------------+ | 1 | 0.00419 | show variables like 'profiling' | | 2 | 1.78590175 | SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824') | +----------+------------+---------------------------------------------------------------------------------------------+ 2 rows in set mysql> EXPLAIN SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824'); +----+-------------+----------------------+------------+-------+---------------+----------+---------+------+--------+----------+----------------------------------+ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+-------------+----------------------+------------+-------+---------------+----------+---------+------+--------+----------+----------------------------------+ | 1 | SIMPLE | table_test | NULL | range | test_id | test_id | 33 | NULL | 170496 | 100 | Using index condition; Using MRR | +----+-------------+----------------------+------------+-------+---------------+----------+---------+------+--------+----------+----------------------------------+ 1 row in set
优化后
mysql> use test; Database changed mysql> set profiling = 'ON'; Query OK, 0 rows affected mysql> show variables like 'profiling'; +---------------+-------+ | Variable_name | Value | +---------------+-------+ | profiling | ON | +---------------+-------+ 1 row in set mysql> show profiles; +----------+-----------+---------------------------------------------------------------------------------------------+ | Query_ID | Duration | Query | +----------+-----------+---------------------------------------------------------------------------------------------+ | 1 | 0.0060565 | show variables like 'profiling' | | 2 | 0.1755525 | SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824') | +----------+-----------+---------------------------------------------------------------------------------------------+ 2 rows in set mysql> EXPLAIN SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824'); +----+-------------+----------------------+--------------------------------------------------------------------------------------+-------+------------------+---------+---------+------+--------+----------+-------------+ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+-------------+----------------------+--------------------------------------------------------------------------------------+-------+------------------+---------+---------+------+--------+----------+-------------+ | 1 | SIMPLE | table_test | p20221126,p20221127,p20221128,p20221129,p20221130,p20221201,p20221202,p20221203,pmax | range | PRIMARY,test_id | PRIMARY | 32 | NULL | 185501 | 100 | Using where | +----+-------------+----------------------+--------------------------------------------------------------------------------------+-------+------------------+---------+---------+------+--------+----------+-------------+ 1 row in set
table_test表信息
-- ---------------------------- -- Table structure for table_test -- ---------------------------- DROP TABLE IF EXISTS `table_test`; CREATE TABLE `table_test` ( `id` int(11) NOT NULL AUTO_INCREMENT COMMENT 'id', `test_id` varchar(10) DEFAULT NULL COMMENT '场景id', `data_time` timestamp NULL DEFAULT NULL COMMENT '数据时间', `service_id` varchar(50) DEFAULT NULL COMMENT '流量类型', `total_traffic` varchar(50) DEFAULT NULL COMMENT '总流量', `ul_traffic` varchar(50) DEFAULT NULL COMMENT '上行流量', `dl_traffic` varchar(50) DEFAULT NULL COMMENT '下行流量', `tcp_conn_req_times` varchar(50) DEFAULT NULL COMMENT 'TCP连接请求次数', `tcp_conn_succ_times` varchar(50) DEFAULT NULL COMMENT 'TCP连接成功次数', `tcp_conn_succ_rat` varchar(50) DEFAULT NULL COMMENT 'TCP连接成功次率', `tcp_conn_total_delay` varchar(50) DEFAULT '0' COMMENT 'TCP连接建立总时长', `tcp_conn_avg_delay` varchar(50) DEFAULT '0' COMMENT 'TCP连接建立平均时延', `tcp_ul_rtt_total_delay` varchar(50) DEFAULT '0' COMMENT 'TCP上行RTT总时延', `tcp_dl_rtt_total_delay` varchar(50) DEFAULT '0' COMMENT 'TCP下行RTT总时延', `tcp_ul_rtt_stat_times` varchar(50) DEFAULT NULL COMMENT 'TCP上行RTT总次数', `tcp_dl_rtt_stat_times` varchar(50) DEFAULT NULL COMMENT 'TCP下行RTT总次数', `tcp_ul_rtt_avg_delay` varchar(50) DEFAULT '0' COMMENT 'TCP上行RTT平均时延', `tcp_dl_rtt_avg_delay` varchar(50) DEFAULT '0' COMMENT 'TCP下行RTT平均时延', `day_id` varchar(50) DEFAULT NULL COMMENT 'day_id', PRIMARY KEY (`id`) USING BTREE, KEY `test_id` (`test_id`) USING BTREE, KEY `data_time` (`data_time`) USING BTREE ) ENGINE=InnoDB AUTO_INCREMENT=19671082 DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC COMMENT='热门app';
开启slow_query_log
mysql> set global slow_query_log = 'ON'; Query OK, 0 rows affected mysql> show variables like 'slow_query_log%'; +---------------------+------------------------------------------+ | Variable_name | Value | +---------------------+------------------------------------------+ | slow_query_log | ON | | slow_query_log_file | /usr/local/mysql/data/hadoop102-slow.log | +---------------------+------------------------------------------+ 2 rows in set
修改long_query_time阈值
mysql> set global long_query_time = 0.5; Query OK, 0 rows affected -- 经过测试,发现设置global时,只针对新的会话有效,对当前会话无效。 -- 所以还需要针对当前会话设置一次。 mysql> set long_query_time = 0.5; Query OK, 0 rows affected mysql> show variables like 'long_query_time'; +-----------------+----------+ | Variable_name | Value | +-----------------+----------+ | long_query_time | 0.500000 | +-----------------+----------+ 1 row in set mysql>
[realeo@hadoop102 ~]$ cd /usr/local/mysql/data/ [realeo@hadoop102 data]$ sudo cat hadoop102-slow.log /usr/sbin/mysqld, Version: 5.7.27 (MySQL Community Server (GPL)). started with: Tcp port: 3306 Unix socket: /var/lib/mysql/mysql.sock Time Id Command Argument # Time: 2023-04-03T14:46:51.877547Z # User@Host: root[root] @ [192.168.10.1] Id: 118 # Query_time: 1.284210 Lock_time: 0.000317 Rows_sent: 91767 Rows_examined: 91767 use test; SET timestamp=1680533211; SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824');
开启show profile
-- 仅对当前会话开启 mysql> set profiling = 'ON'; Query OK, 0 rows affected mysql> show variables like 'profiling'; +---------------+-------+ | Variable_name | Value | +---------------+-------+ | profiling | ON | +---------------+-------+ 1 row in set
查看会话中sql执行情况
mysql> show profiles; +----------+------------+---------------------------------------------------------------------------------------------+ | Query_ID | Duration | Query | +----------+------------+---------------------------------------------------------------------------------------------+ | 1 | 0.00193025 | show variables like 'profiling' | | 2 | 2.095192 | SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824') | +----------+------------+---------------------------------------------------------------------------------------------+ 2 rows in set
查看当前会话某条sql执行记录的资源消耗情况
mysql> show profile cpu, block io for query 2; +----------------------+----------+----------+------------+--------------+---------------+ | Status | Duration | CPU_user | CPU_system | Block_ops_in | Block_ops_out | +----------------------+----------+----------+------------+--------------+---------------+ | starting | 0.000125 | 6.8E-5 | 4.5E-5 | 0 | 0 | | checking permissions | 1E-5 | 5E-6 | 3E-6 | 0 | 0 | | Opening tables | 0.00114 | 0.001145 | 0 | 0 | 0 | | init | 5.1E-5 | 3.1E-5 | 1.6E-5 | 0 | 0 | | System lock | 1E-5 | 6E-6 | 4E-6 | 0 | 0 | | optimizing | 1E-5 | 6E-6 | 4E-6 | 0 | 0 | | statistics | 0.000519 | 0.000521 | 0 | 0 | 0 | | preparing | 1.8E-5 | 1.6E-5 | 0 | 0 | 0 | | executing | 3E-6 | 2E-6 | 0 | 0 | 0 | | Sending data | 2.093149 | 1.301995 | 0.998561 | 0 | 384 | | end | 4.5E-5 | 0 | 1.9E-5 | 0 | 0 | | query end | 1.2E-5 | 0 | 1.2E-5 | 0 | 0 | | closing tables | 1.7E-5 | 0 | 1.7E-5 | 0 | 0 | | freeing items | 2E-5 | 0 | 2E-5 | 0 | 0 | | logging slow query | 5.1E-5 | 0 | 5.1E-5 | 0 | 0 | | cleaning up | 1.5E-5 | 0 | 1.4E-5 | 0 | 0 | +----------------------+----------+----------+------------+--------------+---------------+ 16 rows in set
show profile常用查询参数:
- all:显示所有的开销信息。
- block io:显示块io开销。
- context switches: 上下文切换开销。
- cpu:显示cpu开销信息。
- ipc:显示发送和接受开销信息。
- memory:显示内存开销信息。
- page faults:显示页面错误开销信息。
- source:显示和 source_function,source_file,source_line 相关的开销信息。
- swaps:显示交换次数开销信息。
Hash索引与B+树索引的区别
不能进行范围性
的一个查找,因为hash指向的数据是无序
的,而B+树的叶子节点是个有序的链表。Hash索引仅能满足(=、<>)和in查询。如果进行范围查询,哈希型索引,时间复杂化会退化为O(n)而树型的有序特性,依然能保持O(log2n)的高效率不支持联合索引的最左侧原则
(即联合索引的部分索引无法使用),而B+树可以。对于联合索引来说,Hash索引在计算Hash值得时候将索引键合并后再一起计算Hash值,所以不会针对每个索引单独计算hash值。因此如果用到联合索引的一个或者多个索引时,无法被利用。模糊查找
,而B+树使用模糊查询的方式时,like后面后模糊查询的话就可以起到优化作用。mysql> show profiles; +----------+------------+---------------------------------------------------------------------------------------------+ | Query_ID | Duration | Query | +----------+------------+---------------------------------------------------------------------------------------------+ | 1 | 0.00187275 | show variables like 'profiling' | | 2 | 1.63446275 | SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824') | +----------+------------+---------------------------------------------------------------------------------------------+ 2 rows in set mysql> show index from `table_test`; +----------------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ | Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | +----------------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ | table_test | 0 | PRIMARY | 1 | id | A | 1302941 | NULL | NULL | | BTREE | | | | table_test | 1 | test_id | 1 | test_id | A | 94 | NULL | NULL | YES | BTREE | | | | table_test | 1 | data_time | 1 | data_time | A | 75 | NULL | NULL | YES | BTREE | | | +----------------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ 3 rows in set
mysql> SELECT max(LENGTH(test_id)) FROM `table_test`; +-----------------------+ | max(LENGTH(test_id)) | +-----------------------+ | 8 | +-----------------------+ 1 row in set mysql> show profiles; +----------+------------+---------------------------------------------------------------------------------------------+ | Query_ID | Duration | Query | +----------+------------+---------------------------------------------------------------------------------------------+ | 1 | 0.00324825 | show variables like 'profiling' | | 2 | 12.979875 | alter table table_test modify test_id char(8) | | 3 | 1.683254 | SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824') | +----------+------------+---------------------------------------------------------------------------------------------+ 3 rows in set
查询语句的优化对于MySQL大数据查询速度的提升非常重要。应该避免使用SELECT *,因为这会导致MySQL检索整个表的所有列,从而降低查询速度。应该只查询需要的列,并使用WHERE子句限制检索的行数。
MySQL组合索引(复合索引)的最左优先原则。最左优先就是说组合索引的第一个字段必须出现在查询组句中,这个索引才会被用到。只要组合索引最左边第一个字段出现在Where中,那么不管后面的字段出现与否或者出现顺序如何,MySQL引擎都会自动调用索引来优化查询效率。
在创建多列索引时,要根据业务需求,where 子句中使用最频繁的一列放在最左边。
-- 查看当前表信息 show create table table_test; -- 创建新增索引 ALTER TABLE table_test ADD INDEX test_id_idx (test_id(8));
含义可参考:https://blog.csdn.net/jibaole/article/details/121293188
mysql> EXPLAIN SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824'); +----+-------------+----------------------+------------+-------+---------------+----------+---------+------+--------+----------+----------------------------------+ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+-------------+----------------------+------------+-------+---------------+----------+---------+------+--------+----------+----------------------------------+ | 1 | SIMPLE | table_test | NULL | range | test_id | test_id | 33 | NULL | 163326 | 100 | Using index condition; Using MRR | +----+-------------+----------------------+------------+-------+---------------+----------+---------+------+--------+----------+----------------------------------+ 1 row in set
mysql> EXPLAIN SELECT * FROM `table_test` where test_id in (99863885543,99863900221,99821363824); +----+-------------+----------------------+------------+------+---------------+------+---------+------+---------+----------+-------------+ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+-------------+----------------------+------------+------+---------------+------+---------+------+---------+----------+-------------+ | 1 | SIMPLE | table_test | NULL | ALL | test_id | NULL | NULL | NULL | 1397231 | 30 | Using where | +----+-------------+----------------------+------------+------+---------------+------+---------+------+---------+----------+-------------+ 1 row in set
分区表是将大表分成小表的一种方法。在处理大数据时,使用分区表可以大大提高查询速度。分区表将数据分成多个分区,每个分区可以独立地进行查询。当进行查询时,MySQL只需要扫描需要的分区,而不是整个表。
在进行自动增加分区前一定得先对表手动分几个区
-- 创建复合主键 alter table table_test drop primary key,add primary key(`test_id`,`data_time`,`id`); ALTER TABLE table_test PARTITION BY RANGE (UNIX_TIMESTAMP(data_time))( PARTITION p20221126 VALUES LESS THAN ( UNIX_TIMESTAMP('2022-11-27') ), PARTITION p20221127 VALUES LESS THAN ( UNIX_TIMESTAMP('2022-11-28') ), PARTITION p20221128 VALUES LESS THAN ( UNIX_TIMESTAMP('2022-11-29') ), PARTITION p20221129 VALUES LESS THAN ( UNIX_TIMESTAMP('2022-11-30') ) ) -- 如果有大于分区上限的值想插入表中,系统会返还错误,为了兼容这种情况,我们可以新增一个分区,上限为maxvalue。所有大于当前上限的值都会放入这个分区: alter table table_test add partition(partition pmax values less than(maxvalue)); ALTER TABLE table_test ADD PARTITION (PARTITION p20221130 VALUES LESS THAN (TO_DAYS ('2022-11-30'))) -- 删除分区,同时清除历史数据 alter table table_test drop partition p20221127;
查询表分区信息
mysql> SELECT PARTITION_NAME,PARTITION_METHOD,PARTITION_EXPRESSION,PARTITION_DESCRIPTION, TABLE_ROWS,SUBPARTITION_NAME,SUBPARTITION_METHOD,SUBPARTITION_EXPRESSION FROM information_schema.PARTITIONS WHERE TABLE_SCHEMA=SCHEMA() AND TABLE_NAME='table_test'; +----------------+------------------+---------------------------+-----------------------+------------+-------------------+---------------------+-------------------------+ | PARTITION_NAME | PARTITION_METHOD | PARTITION_EXPRESSION | PARTITION_DESCRIPTION | TABLE_ROWS | SUBPARTITION_NAME | SUBPARTITION_METHOD | SUBPARTITION_EXPRESSION | +----------------+------------------+---------------------------+-----------------------+------------+-------------------+---------------------+-------------------------+ | p20221126 | RANGE | UNIX_TIMESTAMP(data_time) | 1669536000 | 470450 | NULL | NULL | NULL | | p20221127 | RANGE | UNIX_TIMESTAMP(data_time) | 1669622400 | 378562 | NULL | NULL | NULL | | p20221128 | RANGE | UNIX_TIMESTAMP(data_time) | 1669708800 | 419724 | NULL | NULL | NULL | | p20221129 | RANGE | UNIX_TIMESTAMP(data_time) | 1669795200 | 135171 | NULL | NULL | NULL | +----------------+------------------+---------------------------+-----------------------+------------+-------------------+---------------------+-------------------------+ 4 rows in set -- 查询指定分区数据 SELECT * FROM `table_test` PARTITION(p20221129) where test_id in ('99863885543', '99863900221', '99821363824');
按天自动分区存储过程
DELIMITER $$ -- 切换数据库test USE `test`$$ DROP PROCEDURE IF EXISTS `create_partition_by_day`$$ CREATE DEFINER=`root`@`%` PROCEDURE `create_partition_by_day`() BEGIN /* 事务回滚,其实放这里没什么作用,ALTER TABLE是隐式提交,回滚不了的。*/ DECLARE EXIT HANDLER FOR SQLEXCEPTION ROLLBACK; START TRANSACTION; /* 到系统表查出这个表的倒数第二大分区,得到分区的日期。在创建分区的时候,名称就以日期格式存放,方便后面维护 */ SELECT REPLACE(partition_name,'p','') INTO @P12_Name FROM INFORMATION_SCHEMA.PARTITIONS WHERE table_name='table_test' ORDER BY partition_ordinal_position DESC LIMIT 1,1; SET @Max_date= DATE(DATE_ADD(@P12_Name+0, INTERVAL 1 DAY))+0; /* 修改表,在最大分区的后面增加一个分区,时间范围加1天 */ SET @s1=CONCAT('ALTER TABLE table_test REORGANIZE PARTITION pmax INTO (PARTITION p',@Max_date,' VALUES LESS THAN (UNIX_TIMESTAMP (''',DATE(@Max_date+1),''')),partition pmax values less than(maxvalue))'); /* 输出查看增加分区语句*/ SELECT @s1; PREPARE stmt2 FROM @s1; EXECUTE stmt2; DEALLOCATE PREPARE stmt2; /* 取出最小的分区的名称,并删除掉 。 注意:删除分区会同时删除分区内的数据,慎重 */ /*select partition_name into @P0_Name from INFORMATION_SCHEMA.PARTITIONS where table_name='table_test' order by partition_ordinal_position limit 1; SET @s=concat('ALTER TABLE table_test DROP PARTITION ',@P0_Name); PREPARE stmt1 FROM @s; EXECUTE stmt1; DEALLOCATE PREPARE stmt1; */ /* 提交 */ COMMIT ; END$$ DELIMITER ;
增加事件执行
-- 开启任务定时器 mysql> SET GLOBAL event_scheduler = ON; Query OK, 0 rows affected mysql> SHOW VARIABLES LIKE 'event_scheduler'; +-----------------+-------+ | Variable_name | Value | +-----------------+-------+ | event_scheduler | ON | +-----------------+-------+ 1 row in set -- 事件定义 DELIMITER || CREATE EVENT Partition_by_day_event ON SCHEDULE EVERY 1 day STARTS '2022-11-29 07:00:00' DO BEGIN CALL test.`create_partition_by_day`; END || DELIMITER ;
配置可参考:https://www.bbsmax.com/A/gAJG7rZJZR/
性能可参考:https://www.cnblogs.com/mzhaox/p/11201715.html
优化服务器硬件可以提高MySQL大数据查询速度。应该使用更快的CPU、更大的内存和更快的硬盘。MySQL可以更快地读取和处理数据。