作者:IT王小二
博客:https://itwxe.com
不知不觉过年半个多月没更新啦,不得不说老家真冷啊,完全不想伸出小二滴爪子,回到深圳开始更新啦,还是深圳暖和!
上一篇 Explain执行计划详解 中提到执行计划分析查询语句时 possible_keys
列中存在可以使用的索引,但是实际最后 key
列中却并没有使用索引,走的全表扫描,这是为啥捏?
这就涉及小二说的 MySQL 执行查询语句时需要进行的查询成本分析了,而 MySQL 提供的 trace
工具就可以让我们看到 MySQL 计算的查询成本,以及选择索引的大致分析过程。
-- 示例表 CREATE TABLE `employees` ( `id` int(11) NOT NULL AUTO_INCREMENT, `name` varchar(24) NOT NULL DEFAULT '' COMMENT '姓名', `age` int(11) NOT NULL DEFAULT '0' COMMENT '年龄', `position` varchar(20) NOT NULL DEFAULT '' COMMENT '职位', `hire_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '入职时间', PRIMARY KEY (`id`), KEY `idx_name_age_position` (`name`,`age`,`position`) USING BTREE ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COMMENT='员工记录表'; INSERT INTO employees(name,age,position,hire_time) VALUES('itwxe',22,'manager',NOW()); INSERT INTO employees(name,age,position,hire_time) VALUES('weiwei', 23,'test',NOW()); INSERT INTO employees(name,age,position,hire_time) VALUES('leilei',23,'dev',NOW()); -- 插入10w条测试数据 drop procedure if exists insert_employees; delimiter $$ create procedure insert_employees() begin declare i int; set i = 1; while(i <= 100000)do insert into employees(name, age, position) values(CONCAT('itwxe', i), rand() * 42 + 18, 'dev'); set i = i + 1; end while; end$$ delimiter ; call insert_employees();
首先需要说明的是开启 trace 工具会影响 MySQL 性能,所以只能临时分析 SQL 使用,用完之后立即关闭。
使用 trace 工具步骤:
-- 首先开启trace mysql> set session optimizer_trace="enabled=on", end_markers_in_json=on; -- 执行查询SQL mysql> select * from employees where name > 'wei' order by position; -- 查询trace字段 mysql> SELECT * FROM information_schema.OPTIMIZER_TRACE;
使用完成后关闭 trace 工具:
-- 当分析完SQL,关闭trace mysql> set session optimizer_trace="enabled=off";
例如这条语句:
explain select * from employees where name > 'itwxe' order by position; explain select * from employees where name > 'wei' order by position;
可以看到第一条 SQL 中 possible_keys
有可以使用的索引 idx_name_age_position
,但是最后实际上 MySQL 没有使用 idx_name_age_position
,而是使用了全表扫描;而第二条 SQL 中使用到了 idx_name_age_position
中的 name 列,这是为什么呢?
如果非常了解 MySQL 索引底层数据结构的小伙伴应该就知道因为第二条 SQL name > 'itwxe'
扫描的联合索引 idx_name_age_position
的结果太多,回表成本太大,所以 MySQL 选择了全表扫描。要想看到 MySQL 计算的全表扫描成本和使用 idx_name_age_position
成本就得使用咱们这篇的主角 trace 工具了,以第二条SQL为例。
-- 首先开启trace mysql> set session optimizer_trace="enabled=on", end_markers_in_json=on; -- 执行查询SQL mysql> select * from employees where name > 'wei' order by position; -- 查询trace字段 mysql> SELECT * FROM information_schema.OPTIMIZER_TRACE;
执行后可以得到如下的示例结果,重要字段注释如下:
| select * from employees where name > 'wei' order by position | { "steps": [ { "join_preparation": { -- 第一阶段:SQL准备阶段,格式化SQL "select#": 1, "steps": [ { "expanded_query": "/* select#1 */ select `employees`.`id` AS `id`,`employees`.`name` AS `name`,`employees`.`age` AS `age`,`employees`.`position` AS `position`,`employees`.`hire_time` AS `hire_time` from `employees` where (`employees`.`name` > 'wei') order by `employees`.`position`" } ] /* steps */ } /* join_preparation */ }, { "join_optimization": { -- 第二阶段:SQL优化阶段 "select#": 1, "steps": [ { "condition_processing": { -- 条件处理,where,1=1之类的SQL优化 "condition": "WHERE", "original_condition": "(`employees`.`name` > 'wei')", "steps": [ { "transformation": "equality_propagation", "resulting_condition": "(`employees`.`name` > 'wei')" }, { "transformation": "constant_propagation", "resulting_condition": "(`employees`.`name` > 'wei')" }, { "transformation": "trivial_condition_removal", "resulting_condition": "(`employees`.`name` > 'wei')" } ] /* steps */ } /* condition_processing */ }, { "substitute_generated_columns": { } /* substitute_generated_columns */ }, { "table_dependencies": [ -- 表依赖详情 { "table": "`employees`", "row_may_be_null": false, "map_bit": 0, "depends_on_map_bits": [ ] /* depends_on_map_bits */ } ] /* table_dependencies */ }, { "ref_optimizer_key_uses": [ ] /* ref_optimizer_key_uses */ }, { "rows_estimation": [ -- 预估表的访问成本 { "table": "`employees`", "range_analysis": { "table_scan": { -- 全表扫描情况 "rows": 100075, -- 扫描行数,innodb中这个是预估值,MyISAM中时准确值 "cost": 20306 -- 查询成本 } /* table_scan */, "potential_range_indexes": [ -- 查询可能使用的索引 { "index": "PRIMARY", -- 主键索引 "usable": false, "cause": "not_applicable" }, { "index": "idx_name_age_position", -- 联合索引 "usable": true, "key_parts": [ "name", "age", "position", "id" ] /* key_parts */ } ] /* potential_range_indexes */, "setup_range_conditions": [ ] /* setup_range_conditions */, "group_index_range": { "chosen": false, "cause": "not_group_by_or_distinct" } /* group_index_range */, "analyzing_range_alternatives": { -- 分析各个索引使用成本 "range_scan_alternatives": [ { "index": "idx_name_age_position", "ranges": [ "wei < name" -- 索引使用范围 ] /* ranges */, "index_dives_for_eq_ranges": true, "rowid_ordered": false, -- 使用该索引获取的记录是否按照主键排序 "using_mrr": false, -- 是否使用mrr "index_only": false, -- 是否使用覆盖索引 "rows": 1, -- 索引扫描行数,也是个预估值 "cost": 2.21, -- 索引使用成本 "chosen": true -- 是否选择该索引 } ] /* range_scan_alternatives */, "analyzing_roworder_intersect": { "usable": false, "cause": "too_few_roworder_scans" } /* analyzing_roworder_intersect */ } /* analyzing_range_alternatives */, "chosen_range_access_summary": { "range_access_plan": { "type": "range_scan", "index": "idx_name_age_position", "rows": 1, "ranges": [ "wei < name" ] /* ranges */ } /* range_access_plan */, "rows_for_plan": 1, "cost_for_plan": 2.21, "chosen": true } /* chosen_range_access_summary */ } /* range_analysis */ } ] /* rows_estimation */ }, { "considered_execution_plans": [ { "plan_prefix": [ ] /* plan_prefix */, "table": "`employees`", "best_access_path": { -- 最优访问路径 "considered_access_paths": [ -- 最终访问路径 { "rows_to_scan": 1, "access_type": "range", -- 访问类型 range:访问扫描;scan:全表扫描 "range_details": { "used_index": "idx_name_age_position" } /* range_details */, "resulting_rows": 1, "cost": 2.41, "chosen": true, -- 确定选择 "use_tmp_table": true } ] /* considered_access_paths */ } /* best_access_path */, "condition_filtering_pct": 100, "rows_for_plan": 1, "cost_for_plan": 2.41, "sort_cost": 1, "new_cost_for_plan": 3.41, "chosen": true } ] /* considered_execution_plans */ }, { "attaching_conditions_to_tables": { "original_condition": "(`employees`.`name` > 'wei')", "attached_conditions_computation": [ ] /* attached_conditions_computation */, "attached_conditions_summary": [ { "table": "`employees`", "attached": "(`employees`.`name` > 'wei')" } ] /* attached_conditions_summary */ } /* attaching_conditions_to_tables */ }, { "clause_processing": { "clause": "ORDER BY", "original_clause": "`employees`.`position`", "items": [ { "item": "`employees`.`position`" } ] /* items */, "resulting_clause_is_simple": true, "resulting_clause": "`employees`.`position`" } /* clause_processing */ }, { "reconsidering_access_paths_for_index_ordering": { "clause": "ORDER BY", "steps": [ ] /* steps */, "index_order_summary": { "table": "`employees`", "index_provides_order": false, "order_direction": "undefined", "index": "idx_name_age_position", "plan_changed": false } /* index_order_summary */ } /* reconsidering_access_paths_for_index_ordering */ }, { "refine_plan": [ { "table": "`employees`", "pushed_index_condition": "(`employees`.`name` > 'wei')", "table_condition_attached": null } ] /* refine_plan */ } ] /* steps */ } /* join_optimization */ }, { "join_execution": { -- 第三阶段:SQL执行阶段 "select#": 1, "steps": [ { "filesort_information": [ { "direction": "asc", "table": "`employees`", "field": "position" } ] /* filesort_information */, "filesort_priority_queue_optimization": { "usable": false, "cause": "not applicable (no LIMIT)" } /* filesort_priority_queue_optimization */, "filesort_execution": [ ] /* filesort_execution */, "filesort_summary": { "rows": 1, "examined_rows": 1, "number_of_tmp_files": 0, "sort_buffer_size": 262056, "sort_mode": "<sort_key, packed_additional_fields>" } /* filesort_summary */ } ] /* steps */ } /* join_execution */ } ] /* steps */ } | 0 | 0 |
可以看到预估表的访问成本计算过程中,MySQL 会计算全表扫描和可以使用的索引成本,比较各个可以使用的索引和全表扫描的查询成本,最终选择最优的一种方式去查询。例如这个例子中全表扫描成本为20306,使用 idx_name_age_position
联合索引成本为2.21,所以最终 MySQL 选择使用 idx_name_age_position
索引去执行查询。
当然 trace 工具主要看的是 MySQL 大致的优化和计算成本过程,那么 MySQL 是通过什么去计算出的这个成本呢?小二先在这里挖个坑吧,看下啥时候填上,哈哈哈~~~