PostgreSQL的分区表建立

jopen 9年前

在数据库日渐庞大的时候,为了方便对数据库数据的管理,比如按时间,按地区去统计一些数据时,基数过于庞大,多有不便。很多商业数据库都提供分区的概念,按不同的维度去存放数据,便于后期的管理,PG也不例外。下面是分区表创建步骤:

1.建立主表

create table parent_table(

id int,      name character varying(20),      create_time timestamp without time zone);

2.建立子表,继承于主表

create table parent_table_2012_01(
check (create_time>=date '2012-01-01' and create_time inherits(parent_table);

create table parent_table_2012_02(
check (create_time>=date '2012-02-01' and create_time inherits(parent_table);

create table parent_table_2012_03(
check (create_time>=date '2012-03-01' and create_time inherits(parent_table);

create table parent_table_2012_04(
check (create_time>=date '2012-04-01' and create_time inherits(parent_table);

create table parent_table_2012_05(
check (create_time>=date '2012-05-01' and create_time inherits(parent_table);

create table parent_table_2012_06(
check (create_time>=date '2012-06-01' and create_time inherits(parent_table);

create table parent_table_2012_07(
check (create_time>=date '2012-07-01' and create_time inherits(parent_table);

create table parent_table_2012_08(
check (create_time>=date '2012-08-01' and create_time inherits(parent_table);

create table parent_table_2012_09(
check (create_time>=date '2012-09-01' and create_time inherits(parent_table);

create table parent_table_2012_10(
check (create_time>=date '2012-10-01' and create_time inherits(parent_table);

create table parent_table_2012_11(
check (create_time>=date '2012-11-01' and create_time inherits(parent_table);

create table parent_table_2012_12(
check (create_time>=date '2012-12-01' and create_time inherits(parent_table);

3.创建触发器函数

CREATE OR REPLACE FUNCTION test.tri_parent_tab_insert()

RETURNS TRIGGER AS $$

–author: kenyon

–created:2012-05-24

BEGIN

IF ( NEW.create_time >= DATE '2012-01-01' AND           NEW.create_time < DATE '2012-02-01' ) THEN          INSERT INTO test.parent_table_2012_01 VALUES (NEW.id,NEW.name,NEW.create_time);       ELSIF ( NEW.create_time >= DATE '2012-02-01' AND              NEW.create_time < DATE '2012-03-01' ) THEN          INSERT INTO test.parent_table_2012_02 VALUES (NEW.id,NEW.name,NEW.create_time);       ELSIF ( NEW.create_time >= DATE '2012-03-01' AND              NEW.create_time < DATE '2012-04-01' ) THEN          INSERT INTO test.parent_table_2012_03 VALUES (NEW.id,NEW.name,NEW.create_time);       ELSIF ( NEW.create_time >= DATE '2012-04-01' AND              NEW.create_time < DATE '2012-05-01' ) THEN          INSERT INTO test.parent_table_2012_04 VALUES (NEW.id,NEW.name,NEW.create_time);       ELSIF ( NEW.create_time >= DATE '2012-05-01' AND              NEW.create_time < DATE '2012-06-01' ) THEN          INSERT INTO test.parent_table_2012_05 VALUES (NEW.id,NEW.name,NEW.create_time);       ELSIF ( NEW.create_time >= DATE '2012-06-01' AND              NEW.create_time < DATE '2012-07-01' ) THEN          INSERT INTO test.parent_table_2012_06 VALUES (NEW.id,NEW.name,NEW.create_time);       ELSIF ( NEW.create_time >= DATE '2012-07-01' AND              NEW.create_time < DATE '2012-08-01' ) THEN          INSERT INTO test.parent_table_2012_07 VALUES (NEW.id,NEW.name,NEW.create_time);       ELSIF ( NEW.create_time >= DATE '2012-08-01' AND              NEW.create_time < DATE '2012-09-01' ) THEN          INSERT INTO test.parent_table_2012_08 VALUES (NEW.id,NEW.name,NEW.create_time);       ELSIF ( NEW.create_time >= DATE '2012-09-01' AND              NEW.create_time < DATE '2012-10-01' ) THEN          INSERT INTO test.parent_table_2012_09 VALUES (NEW.id,NEW.name,NEW.create_time);       ELSIF ( NEW.create_time >= DATE '2012-10-01' AND              NEW.create_time < DATE '2012-11-01' ) THEN          INSERT INTO test.parent_table_2012_10 VALUES (NEW.id,NEW.name,NEW.create_time);       ELSIF ( NEW.create_time >= DATE '2012-11-01' AND              NEW.create_time < DATE '2012-12-01' ) THEN          INSERT INTO test.parent_table_2012_11 VALUES (NEW.id,NEW.name,NEW.create_time);       ELSIF ( NEW.create_time >= DATE '2012-12-01' AND              NEW.create_time < DATE '2013-01-01' ) THEN          INSERT INTO test.parent_table_2012_12 VALUES (NEW.id,NEW.name,NEW.create_time);       ELSE          RAISE EXCEPTION 'Date out of range.Fix the test.parent_table_insert_trigger() function!';       END IF;       RETURN NULL;

END;

$$

LANGUAGE plpgsql;

4.创建触发器

CREATE TRIGGER tri_insert_parent_table

BEFORE INSERT ON test.parent_table       FOR EACH ROW EXECUTE PROCEDURE test.tri_parent_tab_insert();

5.测试
至此就OK了。前端插入时只要插入主表就可以自动将数据按时间分类分插到子表里去。
插入一定的测试数据,来看看效果

kenyon=# select count(1) from test.parent_table_2012_03;

count


2293760

(1 row)

kenyon=# select count(1) from test.parent_table;

count


2293761

(1 row)

kenyon=# select pg_size_pretty(pg_relation_size('test.parent_table_2012_03'));

pg_size_pretty


106 MB

(1 row)

kenyon=# select pg_size_pretty(pg_relation_size('test.parent_table'));

pg_size_pretty


8192 bytes

(1 row)

6.总结:
a.可以看到实际的数据是存放在子表里去了,父表是没数据的。
b.这么做前端开发会省去不少工作,但是后端DB会增加不少压力,可以后端建好分区表,前端直接按时间插入分区表中去,可减少因触发器带来的DB压力。
c.可以单独对分区表进行DML或者DDL操作,如truncate。
d.通过explain查看查询是否走得分区,如果未走分区,检查SQL语法和与之相关的系统参数,如constraint_exclusion是否是partition的