分表
1.Postgresql分表与优化
2.PostgreSQL分区表的性能损耗验证
3.PostgreSQL 创建分区表,SQL优化之PostgreSQL Table Partitioning
4.PostgreSQL 9.5+ 高效分区表实现 - pg_pathman
5.PostgreSql的Explain命令详解
EXPLAIN ANALYZE
1.Postgresql生成大量测试数据
-- 范例 select (select array_agg(i::date) from generate_series('2015-12-01'::date, '2015-12-30'::date, '1 day'::interval) as t(i) )[floor(random()*4)+1] as date_key, floor(random()*24) as hour_key, floor(random()*1000000)+1 as client_key, floor(random()*100000)+1 as item_key, floor(random()*20)+1 as account, floor(random()*10000)+1 as expense from generate_series(1,30,1);
分表[触发器&索引]:
-- 添加主表--------------------------------------------------------------------------------------------------- create sequence seq_auth_detail cycle; create table tb_auth_detail ( auth_detail_id integer not null, person_id varchar(48) not null, dept_id varchar(48) not null, dev_id varchar(48) not null, door_id varchar(48) not null, auth_status integer not null, finger_status integer not null, face_status integer not null, del_card_number varchar(1024), auth_person_schedule_id varchar(48), auth_dept_schedule_id varchar(48), auth_person_grp_schedule_id varchar(48), config_time timestamp not null, download_time timestamp, dev_group_id integer not null, download_code varchar(48), constraint pk_auth_detail primary key (auth_detail_id) ); comment on table tb_auth_detail is '权限记录表'; comment on column tb_auth_detail.auth_detail_id is '主键'; comment on column tb_auth_detail.person_id is '人员id'; comment on column tb_auth_detail.dept_id is '组织id'; comment on column tb_auth_detail.dev_id is '设备id'; comment on column tb_auth_detail.door_id is '门id'; comment on column tb_auth_detail.auth_status is '权限状态1:待下载;2:待删除;3:已下载;4:已删除;5:正在下载;6:正在删除'; comment on column tb_auth_detail.finger_status is '指纹权限状态1:待下载;2:待删除;3:已下载;4:已禁用;5:正在下载;6:正在删除'; comment on column tb_auth_detail.face_status is '人脸权限状态1:待下载;2:待删除;3:已下载;4:已禁用;5:正在下载;6:正在删除'; comment on column tb_auth_detail.del_card_number is '待删除的卡号'; comment on column tb_auth_detail.auth_person_schedule_id is '人员计划模板'; comment on column tb_auth_detail.auth_dept_schedule_id is '组织计划模板'; comment on column tb_auth_detail.auth_person_grp_schedule_id is '人组计划模板'; comment on column tb_auth_detail.config_time is '配置时间'; comment on column tb_auth_detail.download_time is '下载时间'; comment on column tb_auth_detail.dev_group_id is '设备组id'; comment on column tb_auth_detail.download_code is '下载标识'; -- 添加触发器--------------------------------------------------------------------------------------------------- -- tb_auth_detail权限记录表进行表分区设置:begin create or replace function func_tri_auth_detail() returns trigger as $$ declare my_tbname varchar(128); declare my_dev_grp_id varchar(64); declare sql_str text; begin my_tbname = TG_TABLE_NAME || '_' || NEW.dev_group_id::text; sql_str = 'INSERT INTO '||my_tbname ||' SELECT $1.* '; execute sql_str using NEW; return null; exception when undefined_table then begin my_dev_grp_id = NEW.dev_group_id::text; execute 'create table ' ||my_tbname || '(constraint ck_'|| my_tbname ||' check (dev_group_id =' || my_dev_grp_id || ')) inherits ('|| tg_table_name || ')'; execute 'alter table '||my_tbname ||' add constraint pk_'||my_tbname||' primary key (auth_detail_id)'; execute 'create index idx_'||my_tbname ||'_pid on ' || my_tbname || ' (person_id)'; execute 'create index idx_'||my_tbname ||'_did on ' || my_tbname || ' (dept_id)'; execute 'create index idx_'||my_tbname ||'_devid on ' || my_tbname || ' (dev_id)'; execute 'create index idx_'||my_tbname ||'_doorid on ' || my_tbname || ' (door_id)'; execute 'create index idx_'||my_tbname ||'_sid on ' || my_tbname || ' (dev_group_id)'; execute 'create index idx_'||my_tbname ||'_conf on ' || my_tbname || ' (config_time)'; execute sql_str using NEW; return null; exception when others then execute sql_str using NEW; return null; end; end; $$ language plpgsql; create trigger tri_ins_auth_detail BEFORE insert on tb_auth_detail for each row EXECUTE PROCEDURE func_tri_auth_detail(); -- tb_auth_detail权限记录表进行表分区设置:end -- 定义ID自增序列--------------------------------------------------------------------------------------------------- alter table tb_auth_detail alter column auth_detail_id set default nextval('seq_auth_detail'); -- 添加索引--------------------------------------------------------------------------------------------------- create index idx_auth_detail_person_id on tb_auth_detail (person_id); create index idx_auth_detail_dept_id on tb_auth_detail (dept_id); create index idx_auth_detail_dev_id on tb_auth_detail (dev_id); create index idx_auth_detail_door_id on tb_auth_detail (door_id); create index idx_auth_detail_sdev_group_id on tb_auth_detail (dev_group_id); create index idx_auth_detail_config_time on tb_auth_detail (config_time);
模拟数据构造:
-- N:数据量 -- 随机截取: N=1 select substr('cc3483bf-9d1f-4eaa-a6e2-a376ba312ff1',1,(random()*26)::integer); -- 随机复制: N=1 select repeat('cc3483bf-9d1f-4eaa-a6e2-a376ba312ff1',(random()*40)::integer); -- 生成序列 SELECT * FROM generate_series(1,5); select id from generate_series(1,10) t(id); -- 自定义column -- 生成随机数 select (random()*100)::int from generate_series(1,10); -- 序列、随机字符串、时间戳 select generate_series(1,100000),md5(random()::text),clock_timestamp(); -- 生成时间 SELECT date(generate_series(now(), now() + '1 week', '1 day')); -- 模拟随机数据 select md5(random()::text) as person_id, md5(random()::text) as dept_id, md5(random()::text) as dev_id, md5(random()::text) as door_id, floor(random()*2)+1 as auth_status, floor(random()*3)+1 as finger_status, floor(random()*4)+1 as face_status, floor(random()*5)+1 as del_card_number, md5(random()::text) as auth_person_schedule_id, md5(random()::text) as auth_dept_schedule_id, md5(random()::text) as auth_person_grp_schedule_id, clock_timestamp() as config_time, clock_timestamp() as download_time, md5(random()::text) as dev_group_id, md5(random()::text) as download_code from generate_series(1,10,1);
数据插入Example
explain analyze insert into tb_auth_detail( "person_id", "dept_id", "dev_id", "door_id", "auth_status", "finger_status", "face_status", "del_card_number", "auth_person_schedule_id", "auth_dept_schedule_id", "auth_person_grp_schedule_id", "config_time", "download_time", "dev_group_id", "download_code" ) select floor(random()*1000000)+1 as person_id, floor(random()*1000000)+1 as dept_id, floor(random()*1000000)+1 as dev_id, floor(random()*1000000)+1 as door_id, floor(random()*2)+1 as auth_status, floor(random()*3)+1 as finger_status, floor(random()*4)+1 as face_status, floor(random()*5)+1 as del_card_number, floor(random()*1000000)+1 as auth_person_schedule_id, floor(random()*1000000)+1 as auth_dept_schedule_id, floor(random()*1000000)+1 as auth_person_grp_schedule_id, clock_timestamp() as config_time, now() as download_time, floor(random()*200)+1 as dev_group_id, floor(random()*1000000)+1 as download_code from generate_series(1,100000,1);
1.创建相同表结构的tb_auth_detail_all不进行分表 2.分别插入10W、100W、1000W数据进行测试
10W 数据
插入性能分析
-- 0数据[初次建分区表耗时] Insert on tb_auth_detail (cost=0.00..242.50 rows=1000 width=120) (actual time=118005.833..118005.833 rows=0 loops=1) -> Subquery Scan on "*SELECT*" (cost=0.00..242.50 rows=1000 width=120) (actual time=18.495..7147.848 rows=100000 loops=1) -> Function Scan on generate_series (cost=0.00..145.00 rows=1000 width=0) (actual time=14.280..461.711 rows=100000 loops=1) Planning time: 0.248 ms Trigger tri_ins_auth_detail: time=110770.614 calls=100000 Execution time: 118008.532 ms
-- 已有10W Insert on tb_auth_detail (cost=0.00..242.50 rows=1000 width=120) (actual time=33649.149..33649.149 rows=0 loops=1) -> Subquery Scan on "*SELECT*" (cost=0.00..242.50 rows=1000 width=120) (actual time=13.439..1652.186 rows=100000 loops=1) -> Function Scan on generate_series (cost=0.00..145.00 rows=1000 width=0) (actual time=13.396..411.380 rows=100000 loops=1) Planning time: 0.187 ms Trigger tri_ins_auth_detail: time=31926.839 calls=100000 Execution time: 33651.926 ms
-- 0数据 Insert on tb_auth_detail_all (cost=0.00..242.50 rows=1000 width=120) (actual time=14424.540..14424.540 rows=0 loops=1) -> Subquery Scan on "*SELECT*" (cost=0.00..242.50 rows=1000 width=120) (actual time=20.354..1273.261 rows=100000 loops=1) -> Function Scan on generate_series (cost=0.00..145.00 rows=1000 width=0) (actual time=19.205..273.179 rows=100000 loops=1) Planning time: 0.552 ms Execution time: 14429.240 ms
-- 已有10W Insert on tb_auth_detail_all (cost=0.00..242.50 rows=1000 width=120) (actual time=25942.760..25942.760 rows=0 loops=1) -> Subquery Scan on "*SELECT*" (cost=0.00..242.50 rows=1000 width=120) (actual time=14.394..1307.630 rows=100000 loops=1) -> Function Scan on generate_series (cost=0.00..145.00 rows=1000 width=0) (actual time=14.282..283.456 rows=100000 loops=1) Planning time: 0.281 ms Execution time: 25948.271 ms
搜索性能分析
explain analyze select * from tb_auth_detail_all; Seq Scan on tb_auth_detail_all (cost=0.00..2635.00 rows=100000 width=86) (actual time=0.007..6.387 rows=100000 loops=1) Planning time: 2.292 ms Execution time: 7.462 ms
explain analyze select * from tb_auth_detail; -> Seq Scan on tb_auth_detail_172 (cost=0.00..14.22 rows=522 width=86) (actual time=0.004..0.172 rows=522 loops=1) .... -> Seq Scan on tb_auth_detail_100 (cost=0.00..14.06 rows=506 width=86) (actual time=0.004..0.099 rows=506 loops=1) -> Seq Scan on tb_auth_detail_72 (cost=0.00..12.80 rows=480 width=86) (actual time=0.005..0.197 rows=480 loops=1) Planning time: 419.074 ms Execution time: 79.472 ms
配置约束
官网解释 (避免扫描 PostgreSQL 分区表所有分区 )
constraint_exclusion 的含义是:当PG生产执行计划时是否考虑表上的约束,这个参数有三个选项 "off,on ,partition" ,默认参数为 off, 意思不使用表上的 constraint 来生成计划,如果设置成 on ,则对所有表生效,生成 PLAN 时会考虑表上的 constraint, 建议设置成 partition,只对分区表 生效,从而避免扫描分区表所有分区。
-- constraint_exclusion = partition # on, off, or partition set constraint_exclusion = off;
10W数据
插入
-- off Insert on tb_auth_detail_all (cost=0.00..242.50 rows=1000 width=120) (actual time=7993.545..7993.545 rows=0 loops=1) -> Subquery Scan on "*SELECT*" (cost=0.00..242.50 rows=1000 width=120) (actual time=13.112..1516.601 rows=100000 loops=1) -> Function Scan on generate_series (cost=0.00..145.00 rows=1000 width=0) (actual time=12.765..287.764 rows=100000 loops=1) Planning time: 0.402 ms Execution time: 7997.900 ms
-- on Insert on tb_auth_detail_all (cost=0.00..242.50 rows=1000 width=120) (actual time=8746.615..8746.615 rows=0 loops=1) -> Subquery Scan on "*SELECT*" (cost=0.00..242.50 rows=1000 width=120) (actual time=13.932..1342.997 rows=100000 loops=1) -> Function Scan on generate_series (cost=0.00..145.00 rows=1000 width=0) (actual time=13.331..275.213 rows=100000 loops=1) Planning time: 0.732 ms Execution time: 8751.193 ms
-- partition Insert on tb_auth_detail_all (cost=0.00..242.50 rows=1000 width=120) (actual time=10885.670..10885.670 rows=0 loops=1) -> Subquery Scan on "*SELECT*" (cost=0.00..242.50 rows=1000 width=120) (actual time=14.209..1342.818 rows=100000 loops=1) -> Function Scan on generate_series (cost=0.00..145.00 rows=1000 width=0) (actual time=14.165..290.517 rows=100000 loops=1) Planning time: 0.124 ms Execution time: 10888.549 ms
搜索
explain analyze select * from tb_auth_detail where dev_group_id = 21;
-- off Append (cost=0.00..882.51 rows=1176 width=87) (actual time=10.694..13.909 rows=976 loops=1) -> Seq Scan on tb_auth_detail (cost=0.00..0.00 rows=1 width=1464) (actual time=0.003..0.003 rows=0 loops=1) Filter: (dev_group_id = 21) -> Index Scan using idx_tb_auth_detail_192_sid on tb_auth_detail_192 (cost=0.28..4.29 rows=1 width=86) (actual time=0.215..0.215 rows=0 loops=1) Index Cond: (dev_group_id = 21) ... -> Index Scan using idx_tb_auth_detail_39_sid on tb_auth_detail_39 (cost=0.28..4.29 rows=1 width=86) (actual time=0.041..0.041 rows=0 loops=1) Index Cond: (dev_group_id = 21) Planning time: 652.183 ms Execution time: 17.949 ms
-- on Append (cost=0.00..28.20 rows=977 width=87) (actual time=0.110..1.498 rows=976 loops=1) -> Seq Scan on tb_auth_detail (cost=0.00..0.00 rows=1 width=1464) (actual time=0.003..0.003 rows=0 loops=1) Filter: (dev_group_id = 21) -> Seq Scan on tb_auth_detail_21 (cost=0.00..28.20 rows=976 width=86) (actual time=0.105..1.472 rows=976 loops=1) Filter: (dev_group_id = 21) Planning time: 536.421 ms Execution time: 2.257 ms
-- partition Append (cost=0.00..28.20 rows=977 width=87) (actual time=0.009..0.170 rows=976 loops=1) -> Seq Scan on tb_auth_detail (cost=0.00..0.00 rows=1 width=1464) (actual time=0.001..0.001 rows=0 loops=1) Filter: (dev_group_id = 21) -> Seq Scan on tb_auth_detail_21 (cost=0.00..28.20 rows=976 width=86) (actual time=0.007..0.153 rows=976 loops=1) Filter: (dev_group_id = 21) Planning time: 409.550 ms Execution time: 0.841 ms
explain analyze select * from tb_auth_detail_all where dev_group_id = 20;
-- off Bitmap Heap Scan on tb_auth_detail_all (cost=68.16..2746.17 rows=999 width=86) (actual time=1.079..48.217 rows=993 loops=1) Recheck Cond: (dev_group_id = 20) Heap Blocks: exact=876 -> Bitmap Index Scan on idx_auth_detail_all_sdev_group_id (cost=0.00..67.91 rows=999 width=0) (actual time=0.901..0.901 rows=993 loops=1) Index Cond: (dev_group_id = 20) Planning time: 2.386 ms Execution time: 48.690 ms
-- on Append (cost=0.00..28.20 rows=977 width=87) (actual time=0.110..1.498 rows=976 loops=1) -> Seq Scan on tb_auth_detail (cost=0.00..0.00 rows=1 width=1464) (actual time=0.003..0.003 rows=0 loops=1) Filter: (dev_group_id = 21) -> Seq Scan on tb_auth_detail_21 (cost=0.00..28.20 rows=976 width=86) (actual time=0.105..1.472 rows=976 loops=1) Filter: (dev_group_id = 21) Planning time: 536.421 ms Execution time: 2.257 ms
-- partition Bitmap Heap Scan on tb_auth_detail_all (cost=68.16..2746.17 rows=999 width=86) (actual time=0.195..0.751 rows=993 loops=1) Recheck Cond: (dev_group_id = 20) Heap Blocks: exact=876 -> Bitmap Index Scan on idx_auth_detail_all_sdev_group_id (cost=0.00..67.91 rows=999 width=0) (actual time=0.112..0.112 rows=993 loops=1) Index Cond: (dev_group_id = 20) Planning time: 0.138 ms Execution time: 0.889 ms
分析约束对于搜索的影响
CREATE RULE almart_rule_2015_12_31 AS ON INSERT TO almart WHERE date_key = DATE '2015-12-31' DO INSTEAD INSERT INTO almart_2015_12_31 VALUES (NEW.*);
与Trigger相比,Rule会带来更大的额外开销,但每个请求只造成一次开销而非每条数据都引入一次开销,所以该方法对大批量的数据插入操作更具优势。然而,实际上在绝大部分场景下,Trigger比Rule的效率更高。同时,COPY操作会忽略Rule,而可以正常触发Trigger。另外,如果使用Rule方式,没有比较简单的方法处理没有被Rule覆盖到的插入操作。此时该数据会被插入到主表中而不会报错,从而无法有效利用表分区的优势。除了使用表继承外,还可使用UNION ALL的方式达到表分区的效果。
CREATE VIEW almart AS SELECT * FROM almart_2015_12_10 UNION ALL SELECT * FROM almart_2015_12_11 UNION ALL SELECT * FROM almart_2015_12_12 ... UNION ALL SELECT * FROM almart_2015_12_30;
当有新的分区表时,需要更新该View。实践中,与使用表继承相比,一般不推荐使用该方法。
1.PostgreSQL 创建分区表,SQL优化之PostgreSQL Table Partitioning
2.PostgreSQL wiki 表分区
扫码关注或搜索架构探险之道
获取最新文章,坚持每周一更,坚持技术分享的我和你们一起成长 ^_^ !