GBase 8c 学习笔记 008 —— GBase 8c 高级语法
GBase 8c 学习笔记 008 —— GBase 8c 高级语法
环境准备
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | # linux 命令:在安装节点(即执行gha_ctl start/stop 所在的节点) 执行 gs_guc 命令 修改数据库参数 gs_guc reload -Z coordinator -N all -I all -c "enable_incremental_checkpoint=off" gs_guc reload -Z datanode -N all -I all -c "enable_incremental_checkpoint=off" gs_guc reload -Z gtm -N all -I all -c "enable_incremental_checkpoint=off" gs_guc reload -Z coordinator -N all -I all -c "enable_gbase_mot=on" gs_guc reload -Z datanode -N all -I all -c "enable_gbase_mot=on" gs_guc reload -Z gtm -N all -I all -c "enable_gbase_mot=on" [gbase@gbase8c_5_150 script]$ gs_guc reload -Z coordinator -N all -I all -c "enable_incremental_checkpoint=off" The gs_guc run with the following arguments: [gs_guc -Z coordinator -N all -I all -c enable_incremental_checkpoint=off reload ]. Begin to perform the total nodes: 3. Popen count is 2, Popen success count is 2, Popen failure count is 0. Begin to perform gs_guc for coordinators. Command count is 2, Command success count is 2, Command failure count is 0. Total instances: 2. ALL: Success to perform gs_guc! [gbase@gbase8c_5_150 script]$ gs_guc reload -Z datanode -N all -I all -c "enable_incremental_checkpoint=off" The gs_guc run with the following arguments: [gs_guc -Z datanode -N all -I all -c enable_incremental_checkpoint=off reload ]. Begin to perform the total nodes: 3. Popen count is 3, Popen success count is 3, Popen failure count is 0. Begin to perform gs_guc for datanodes. Command count is 3, Command success count is 3, Command failure count is 0. Total instances: 3. ALL: Success to perform gs_guc! [gbase@gbase8c_5_150 script]$ gs_guc reload -Z gtm -N all -I all -c "enable_incremental_checkpoint=off" The gs_guc run with the following arguments: [gs_guc -Z gtm -N all -I all -c enable_incremental_checkpoint=off reload ]. Begin to perform the total nodes: 3. Popen count is 2, Popen success count is 2, Popen failure count is 0. Begin to perform gs_guc for gtms. Command count is 2, Command success count is 2, Command failure count is 0. Total instances: 2. ALL: Success to perform gs_guc! [gbase@gbase8c_5_150 script]$ gs_guc reload -Z coordinator -N all -I all -c "enable_gbase_mot=on" The gs_guc run with the following arguments: [gs_guc -Z coordinator -N all -I all -c enable_gbase_mot=on reload ]. Begin to perform the total nodes: 3. Popen count is 2, Popen success count is 2, Popen failure count is 0. Begin to perform gs_guc for coordinators. Command count is 2, Command success count is 2, Command failure count is 0. Total instances: 2. ALL: Success to perform gs_guc! [gbase@gbase8c_5_150 script]$ gs_guc reload -Z datanode -N all -I all -c "enable_gbase_mot=on" The gs_guc run with the following arguments: [gs_guc -Z datanode -N all -I all -c enable_gbase_mot=on reload ]. Begin to perform the total nodes: 3. Popen count is 3, Popen success count is 3, Popen failure count is 0. Begin to perform gs_guc for datanodes. Command count is 3, Command success count is 3, Command failure count is 0. Total instances: 3. ALL: Success to perform gs_guc! [gbase@gbase8c_5_150 script]$ gs_guc reload -Z gtm -N all -I all -c "enable_gbase_mot=on" The gs_guc run with the following arguments: [gs_guc -Z gtm -N all -I all -c enable_gbase_mot=on reload ]. Begin to perform the total nodes: 3. Popen count is 2, Popen success count is 2, Popen failure count is 0. Begin to perform gs_guc for gtms. Command count is 2, Command success count is 2, Command failure count is 0. Total instances: 2. ALL: Success to perform gs_guc! |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | # linux 命令:重启数据库加载数据库参数 [gbase@gbase8c_5_150 script]$ gha_ctl stop all -l http://192.168.254.150:2379 { "ret":0, "msg":"Success" } [gbase@gbase8c_5_150 script]$ gha_ctl start all -l http://192.168.254.150:2379 { "ret":0, "msg":"Success" } [gbase@gbase8c_5_150 script]$ gha_ctl monitor all -H -l http://192.168.254.150:2379 +----+-------------+-----------------+-------+---------+--------+ | No | name | host | port | state | leader | +----+-------------+-----------------+-------+---------+--------+ | 0 | gha_server1 | 192.168.254.150 | 20001 | running | True | +----+-------------+-----------------+-------+---------+--------+ +----+------+-----------------+------+---------------------------+---------+---------+ | No | name | host | port | work_dir | state | role | +----+------+-----------------+------+---------------------------+---------+---------+ | 0 | gtm1 | 192.168.254.150 | 6666 | /home/gbase/data/gtm/gtm1 | running | primary | | 1 | gtm2 | 192.168.254.151 | 6666 | /home/gbase/data/gtm/gtm2 | running | standby | +----+------+-----------------+------+---------------------------+---------+---------+ +----+------+-----------------+------+----------------------------+---------+---------+ | No | name | host | port | work_dir | state | role | +----+------+-----------------+------+----------------------------+---------+---------+ | 0 | cn1 | 192.168.254.151 | 5432 | /home/gbase/data/coord/cn1 | running | primary | | 1 | cn2 | 192.168.254.152 | 5432 | /home/gbase/data/coord/cn2 | running | primary | +----+------+-----------------+------+----------------------------+---------+---------+ +----+-------+-------+-----------------+-------+----------------------------+---------+---------+ | No | group | name | host | port | work_dir | state | role | +----+-------+-------+-----------------+-------+----------------------------+---------+---------+ | 0 | dn1 | dn1_1 | 192.168.254.151 | 15432 | /home/gbase/data/dn1/dn1_1 | running | primary | | 1 | dn2 | dn2_1 | 192.168.254.152 | 20010 | /home/gbase/data/dn2/dn2_1 | running | primary | | 2 | dn2 | dn2_2 | 192.168.254.150 | 20010 | /home/gbase/data/dn2/dn2_2 | running | standby | +----+-------+-------+-----------------+-------+----------------------------+---------+---------+ +----+-----------------------------+--------+---------+----------+ | No | url | name | state | isLeader | +----+-----------------------------+--------+---------+----------+ | 0 | http://192.168.254.150:2379 | node_2 | healthy | True | | 1 | http://192.168.254.151:2379 | node_0 | healthy | False | | 2 | http://192.168.254.152:2379 | node_1 | healthy | False | +----+-----------------------------+--------+---------+----------+ [gbase@gbase8c_5_150 script]$ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | [gbase@gbase8c_5_151 ~]$ gsql -d postgres -r gsql ((multiple_nodes GBase8cV5 3.0.0B76 build 47948f99) compiled at 2023-02-27 16:04:20 commit 0 last mr 1232 ) Non-SSL connection (SSL connection is recommended when requiring high-security) Type "help" for help. postgres=# create database test; CREATE DATABASE postgres=# postgres=# \c test Non-SSL connection (SSL connection is recommended when requiring high-security) You are now connected to database "test" as user "gbase". test=# test=# show enable_incremental_checkpoint; enable_incremental_checkpoint ------------------------------- off (1 row) test=# show enable_gbase_mot; enable_gbase_mot ------------------ on (1 row) test=# |
存储引擎
- 目前支持
- USTORE,表示表支持Inplace-Update 存储引擎。
- ASTORE,表示表支持Append-Only 存储引擎。
- 支持三种存储方法
- ROW,表示表的数据将以行式存储。
- 行存储适合于OLTP业务,适用于点查询或者增删操作比较多的场景。
- COLUMN,表示表的数据将以列式存储。
- 列存储适合于数据仓库业务,此类型的表上会做大量的汇聚计算,且涉及的列操作较少。
- MOT,表示表的数据将以内存的形式存储。
- MOT内存适用于高吞吐事务处理,性能瓶颈加速,消除中间层缓存,大规模流数据提取;
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | drop table if exists test_astore_row ; drop table if exists test_astore_column ; drop foreign table if exists test_astore_mot ; -- 创建表时,指定 orientation 参数值为 row,创建行存表。 test=# create table test_astore_row(col text) with (orientation=row) ; CREATE TABLE -- 创建表时,指定 orientation 参数值为 column,创建列存表。 test=# create table test_astore_column(col text) with (orientation=column); CREATE TABLE -- 创建表时,create foreign table ... server 为 mot_server时,创建内存表。 test=# create foreign table test_astore_mot(col int) server mot_server ; CREATE FOREIGN TABLE -- 创建表时,指定 storage_type 参数值为 ustore,创建 ustore 表。 drop table if exists test_ustore ; test=# create table test_ustore(col text)with(storage_type=ustore); CREATE TABLE -- 当sql语句不做指定时,默认使用 astore 存储引擎,row 存储方法。 test=# \d+ List of relations Schema | Name | Type | Owner | Size | Storage | Description --------+--------------------+---------------+-------+---------+------------------------------------------------------+------------- public | dual | view | gbase | 0 bytes | | public | test_astore_column | table | gbase | 48 kB | {orientation=column,compression=low} | public | test_astore_mot | foreign table | gbase | 16 kB | | public | test_astore_row | table | gbase | 16 kB | {orientation=row,compression=no} | public | test_ustore | table | gbase | 16 kB | {orientation=row,storage_type=ustore,compression=no} | (5 rows) test=# \d+ test_astore_column Table "public.test_astore_column" Column | Type | Modifiers | Storage | Stats target | Description --------+------+-----------+----------+--------------+------------- col | text | | extended | | Has OIDs: no Distribute By: HASH(col) Location Nodes: ALL DATANODES Options: orientation=column, compression=low test=# \d+ test_ustore Table "public.test_ustore" Column | Type | Modifiers | Storage | Stats target | Description --------+------+-----------+----------+--------------+------------- col | text | | extended | | Has OIDs: no Distribute By: HASH(col) Location Nodes: ALL DATANODES Options: orientation=row, storage_type=ustore, compression=no, toast.storage_type=ustore |
分区表
GBase 8c 需要将数据按照一定规律分布到各个数据节点,目前支持 hash 分片。
在此基础上,分区表功能,支持二级分区,9种分区组合,interval 分区。即 分片+分区 功能。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | -- 表分片 -- GBase 8c的数据分片策略:在创建表时,通过关键字distribute/replication 来设置表分片策略。 -- 通过distribute by hash 创建 hash 分片表,将数据通过hash算法 均匀存储到每个数据节点。 test=# CREATE TABLE t1_dis(c1 int, c2 int) DISTRIBUTE BY hash(c1); CREATE TABLE -- 通过distribute by replication 创建 复制表,每个数据节点都会存储一份数据。 test=# CREATE TABLE t1_rep(c1 int, c2 int) DISTRIBUTE BY replication; CREATE TABLE -- 备注:在不指定时,默认使用hash分片,分布式键按照顺序为第一个满足hash算法的字段。 test=# \d+ t1_dis Table "public.t1_dis" Column | Type | Modifiers | Storage | Stats target | Description --------+---------+-----------+---------+--------------+------------- c1 | integer | | plain | | c2 | integer | | plain | | Has OIDs: no Distribute By: HASH(c1) Location Nodes: ALL DATANODES Options: orientation=row, compression=no test=# \d+ t1_rep Table "public.t1_rep" Column | Type | Modifiers | Storage | Stats target | Description --------+---------+-----------+---------+--------------+------------- c1 | integer | | plain | | c2 | integer | | plain | | Has OIDs: no Distribute By: REPLICATION Location Nodes: ALL DATANODES Options: orientation=row, compression=no |
interval 分区:根据间隔自动创建分区,例如:1 day、1 month。(主备式)
1 2 3 4 5 6 | CREATE TABLE sales(prod_id NUMBER(6),cust_id NUMBER,time_id DATE,channel_id CHAR(1),promo_id NUMBER(6),quantity_sold NUMBER(3),amount_sold NUMBER(10,2)) PARTITION BY RANGE (time_id) INTERVAL('1 day') ( PARTITION p1 VALUES LESS THAN ('2019-02-01 00:00:00'), PARTITION p2 VALUES LESS THAN ('2019-02-02 00:00:00') ); |
分区自动管理:根据间隔自动创建分区,例如:1 day、1 month。(分布式)
语法:建表时增加 WITH, 或者通过 ALTER TABLE xxx SET (PERIOD=‘1 day’)来实现。(其中 PERIOD范围是 1 hour ~ 1 year)。通过ALTER TABLE xxx RESET (PERIOD)来关闭。
限制:仅支持RANGE分区的一级分区表 (其他分区类型,以及带有二级分区的表不支持)效果:从当前命令时间(向下规约到hour)算起,连续增加能覆盖30个period时间范围的分区,每个分区大小为period。如果现有分区在待新增的分区范围内,则跳过此新增分区。且每隔period时间后再次执行此流程。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | create table range_auto_1hour_tb01(id int,ip text,time timestamp) with(PERIOD='1 hour') partition by range(time)( partition p1 values less than ('2022-11-23 14:00:00')); -- 分区表 -- 一级 hash 分区 drop table if exists mea_hash cascade; test=# create table mea_hash ( city_id int,logdate timestamp,id int ) partition by hash(id) ( partition p1 , partition p2 ); CREATE TABLE --二级分区 hash-list,hash-hash, hash-range drop table if exists mea_hash_list cascade; test=# create table mea_hash_list ( city_id int,logdate timestamp,id int) partition by hash(id) subpartition by list(city_id) ( partition p1 (subpartition p12 values (10),subpartition p13 values (20) )); CREATE TABLE drop table if exists mea_hash_hash cascade; test=# create table mea_hash_hash ( city_id int,logdate timestamp,id int) partition by hash(id) subpartition by hash(city_id) ( partition id_1 (subpartition p12 ,subpartition p13) ); CREATE TABLE drop table if exists mea_hash_range cascade; postgres=# create table mea_hash_range ( city_id int,logdate timestamp,id int) partition by hash(id) subpartition by range(logdate) ( partition meas_y2021 (subpartition p12 values less than ('2021-02-04 12:00:00'),subpartition p13 values less than ('2021-02-04 20:00:00') )); CREATE TABLE -- 一级 range 分区 drop table if exists mea_range cascade; postgres=# create table mea_range ( city_id int,logdate timestamp) partition by range(logdate) ( partition meas_y2021 values less than ('2021-01-01') ); CREATE TABLE --二级 range-range, range-hash,range-list drop table if exists mea_range_range cascade; postgres=# create table mea_range_range ( city_id int,logdate timestamp,id int) partition by range(logdate) subpartition by range(id) ( partition meas_y2021 values less than ('2021-02-04 21:00:00') (subpartition p12 values less than (1),subpartition p13 values less than (10) )); CREATE TABLE drop table if exists mea_range_hash cascade; postgres=# create table mea_range_hash ( city_id int,logdate timestamp,id int) partition by range(logdate) subpartition by hash(city_id) ( partition id_1 values less than ('2021-02-01 01:00:00') (subpartition p12,subpartition p13) ); CREATE TABLE drop table if exists mea_range_list cascade; postgres=# create table mea_range_list ( city_id int,logdate timestamp,id int) partition by range(logdate) subpartition by list(city_id) ( partition p1 values less than ('2021-02-01 01:00:00') (subpartition p12 values (1),subpartition p13 values (20) )); CREATE TABLE -- 一级 list 分区 drop table if exists mea_list cascade; postgres=# create table mea_list ( city_id int,logdate timestamp,id int ) partition by list(id) ( partition p1 values (1), partition p2 values (2) ); CREATE TABLE --期望支持.成功执行 -- 二级 list-list,list-range,list-hash 分区 drop table if exists mea_list_list cascade; postgres=# create table mea_list_list ( city_id int,logdate timestamp,id int) partition by list(id) subpartition by list(city_id) ( partition p1 values (1) (subpartition p12 values (10),subpartition p13 values (20) )); CREATE TABLE drop table if exists mea_list_range cascade; postgres=# create table mea_list_range ( city_id int,logdate timestamp,id int) partition by list(id) subpartition by range(logdate) ( partition meas_y2021 values ('202102') (subpartition p12 values less than ('2021-02-04 12:00:00'),subpartition p13 values less than ('2021-02-04 20:00:00') )); CREATE TABLE drop table if exists mea_list_hash cascade; postgres=# create table mea_list_hash ( city_id int,logdate timestamp,id int) partition by list(id) subpartition by hash(city_id) ( partition id_1 values (2021) (subpartition p12,subpartition p13) ); CREATE TABLE ------------数据库分区查看 postgres=# with RECURSIVE temp_partition as( postgres(# SELECT t1.oid, t1.relname, partstrategy, boundaries,t1.parttype FROM pg_partition t1, pg_class t2 WHERE t1.parentid = t2.oid AND t2.relname = 'mea_hash_hash' postgres(# union postgres(# SELECT t1.oid, t1.relname, t1.partstrategy, t1.boundaries,t1.parttype FROM pg_partition t1 join temp_partition t2 on t1.parentid = t2.oid where t2.parttype='p') postgres-# select t1.relname, t1.partstrategy, t1.boundaries,t1.parttype from temp_partition t1; relname | partstrategy | boundaries | parttype ---------------+--------------+------------+---------- mea_hash_hash | h | | r id_1 | h | {0} | p p12 | h | {0} | s p13 | h | {1} | s (4 rows) --- 自动分区功能,请注意:创建分表时,子分区必须要有一个 大于当前时间 2小时的 分区。时间需按照实际时间 进行修改。 postgres=# create table range_auto_1hour_tb01(id int,ip text,time timestamp) with(PERIOD='1 hour') partition by range(time)( partition p1 values less than ('2023-02-27 23:00')); CREATE TABLE postgres=# select * from dba_tab_partitions where table_name = 'RANGE_AUTO_1HOUR_TB01'; insert into range_auto_1hour_tb01 values(1,'12','2023-2-18 09:00:00'); insert into range_auto_1hour_tb01 values(2,'10','2023-2-18 10:00:00'); insert into range_auto_1hour_tb01 values(2,'10','2023-2-19 18:00:00'); insert into range_auto_1hour_tb01 values(2,'10','2023-2-19 19:00:00'); table_owner | table_name | partition_name | high_value | high_value_length | partition_position | tablespace_name | logging | compression | segment_managed | num_rows | blocks | last_ analyzed -------------+-----------------------+------------------+---------------------+-------------------+--------------------+-----------------+---------+-------------+-----------------+----------+--------+------ --------- PUBLIC | RANGE_AUTO_1HOUR_TB01 | P1 | 2023-02-27 23:00 | 16 | 1 | DEFAULT | YES | NO | NO | 0 | 0 | PUBLIC | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022800 | 2023-02-28 00:00:00 | 19 | 2 | DEFAULT | YES | NO | NO | 0 | 0 | PUBLIC | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022801 | 2023-02-28 01:00:00 | 19 | 3 | DEFAULT | YES | NO | NO | 0 | 0 | PUBLIC | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022802 | 2023-02-28 02:00:00 | 19 | 4 | DEFAULT | YES | NO | NO | 0 | 0 | PUBLIC | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022803 | 2023-02-28 03:00:00 | 19 | 5 | DEFAULT | YES | NO | NO | 0 | 0 | PUBLIC | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022804 | 2023-02-28 04:00:00 | 19 | 6 | DEFAULT | YES | NO | NO | 0 | 0 | PUBLIC | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022805 | 2023-02-28 05:00:00 | 19 | 7 | DEFAULT | YES | NO | NO | 0 | 0 | PUBLIC | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022806 | 2023-02-28 06:00:00 | 19 | 8 | DEFAULT | YES | NO | NO | 0 | 0 | PUBLIC | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022807 | 2023-02-28 07:00:00 | 19 | 9 | DEFAULT | YES | NO | NO | 0 | 0 | PUBLIC | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022808 | 2023-02-28 08:00:00 | 19 | 10 | DEFAULT | YES | NO | NO | 0 | 0 | PUBLIC | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022809 | 2023-02-28 09:00:00 | 19 | 11 | DEFAULT | YES | NO | NO | 0 | 0 | PUBLIC | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022810 | 2023-02-28 10:00:00 | 19 | 12 | DEFAULT | YES | NO | NO | 0 | 0 | PUBLIC | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022811 | 2023-02-28 11:00:00 | 19 | 13 | DEFAULT | YES | NO | NO | 0 | 0 | PUBLIC | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022812 | 2023-02-28 12:00:00 | 19 | 14 | DEFAULT | YES | NO | NO | 0 | 0 | PUBLIC | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022813 | 2023-02-28 13:00:00 | 19 | 15 | DEFAULT | YES | NO | NO | 0 | 0 | PUBLIC | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022814 | 2023-02-28 14:00:00 | 19 | 16 | DEFAULT | YES | NO | NO | 0 | 0 | PUBLIC | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022815 | 2023-02-28 15:00:00 | 19 | 17 | DEFAULT | YES | NO | NO | 0 | 0 | …………………………………………………………………………………………………………………………………… |
索引类型
GBase 8c 提供了多种索引类型:hash,B-tree,gist,gin。每一种索引类型使用了一种不同的算法来适应不同类型的查询。
- B-tree —— 适合所有的数据类型,支持排序,支持大于、小于、等于、大于或等于、小于或等于的搜索。
- hash —— 索引存储的是被索引字段VALUE的哈希值,只支持等值查询。
- Gist(Generalized Search Tree) —— 即通用搜索树。可处理地理数据、图像,空间,树形图等场景。
- gin —— gin索引是“倒排索引”。应用领域是加速全文搜索。
默认情况下,create index 使用B-tree 索引并适合于大部分情况。
hash 索引:只能处理简单等值查询。
通过语法:create index … using hash(column) 。指定使用hash索引
B-tree 索引: btree索引常常用来进行例如大于、小于、等于这些操作。
通过语法:create index … using btree(column) 。指定使用btree索引。
gist 索引:
地理数据、图像:如果我们想要查询在某个地方是否存在某一点,即判断地理位置的"包含“。
对于空间数据,GiST索引可以使用 R树,以支持相对位置运算符(位于左侧,右侧,包含等)。
对于树形图,R树可以支持相交或包含运算符。
通过语法:create index … using gist(column) 。指定使用gist索引。
gin 索引:当需要搜索多值类型内的VALUE时,适合多值类型,例如数组、全文检索、TOKEN。(根据不同的类型,支持相交、包含、大于、在左边、在右边等搜索)。
通过语法:create index … using gin(column) 。指定使用 gin 索引。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 | --多种索引 --hash 索引 postgres=# create table rw_split(col int,name text); CREATE TABLE postgres=# insert into rw_split select generate_series(1,50000), md5(random()::text)::text; INSERT 0 50000 postgres=# create index rw_split_col_hash on rw_split using hash(col); CREATE INDEX postgres=# explain select * from rw_split where col =2; QUERY PLAN ---------------------------------------------------------------------------------------- Data Node Scan (cost=0.00..0.00 rows=1000 width=36) Node/s: dn1 -> Bitmap Heap Scan on rw_split (cost=5.22..197.48 rows=125 width=36) Recheck Cond: (col = 2) -> Bitmap Index Scan on rw_split_col_hash (cost=0.00..5.19 rows=125 width=0) Index Cond: (col = 2) (6 rows) postgres=# -----------------btree 索引 drop index rw_split_col_hash; postgres=# create index rw_split_col_btree on rw_split using btree(col); CREATE INDEX postgres=# explain select * from rw_split where col =2; QUERY PLAN ----------------------------------------------------------------------------------------- Data Node Scan (cost=0.00..0.00 rows=1000 width=36) Node/s: dn1 -> Bitmap Heap Scan on rw_split (cost=5.22..197.48 rows=125 width=36) Recheck Cond: (col = 2) -> Bitmap Index Scan on rw_split_col_btree (cost=0.00..5.19 rows=125 width=0) Index Cond: (col = 2) (6 rows) postgres=# ---------------gist 索引 drop table if exists t_gist; postgres=# create table t_gist(id int,p point); CREATE TABLE postgres=# insert into t_gist select generate_series(1,10000),point(round((random()*1000)::numeric,2),round((random()*1000)::numeric,2)); INSERT 0 10000 postgres=# select * from t_gist limit 2; id | p ----+----------------- 1 | (947.99,748.17) 2 | (410.15,357.92) (2 rows) postgres=# create index on t_gist using gist(p); CREATE INDEX postgres=# explain (analyze,buffers) select * from t_gist where circle '((100,100) 1)' @> p order by p <-> '(100,100)' limit 10; QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------- Limit (cost=8.29..8.29 rows=1 width=20) (actual time=2.769..2.769 rows=0 loops=1) -> Sort (cost=8.29..8.29 rows=1 width=20) (actual time=2.767..2.767 rows=0 loops=1) Sort Key: ((p <-> '(100,100)'::point)) Sort Method: quicksort Memory: 33kB (Buffers: shared hit=3) -> Streaming(type: GATHER) (cost=0.00..8.28 rows=1 width=20) (actual time=2.723..2.723 rows=0 loops=1) Spawn on: All datanodes -> Limit (cost=0.00..8.27 rows=2 width=20) (Actual time: never executed) -> Index Scan using t_gist_p_idx on t_gist (cost=0.00..8.27 rows=1 width=20) (Actual time: never executed) Index Cond: ('<(100,100),1>'::circle @> p) Order By: (p <-> '(100,100)'::point) Total runtime: 4.654 ms (12 rows) ---在 100,100 点,半径10 以内的点。 ---------------gin索引 drop table if exists t_gin; create table t_gin(doc text, doc_tsv tsvector); insert into t_gin(doc) values ('Can a sheet slitter slit sheets?'), ('How many sheets could a sheet slitter slit?'), ('I slit a sheet, a sheet I slit.'), ('Upon a slitted sheet I sit.'), ('Whoever slit the sheets is a good sheet slitter.'), ('I am a sheet slitter.'), ('I slit sheets.'), ('I am the sleekest sheet slitter that ever slit sheets.'), ('She slits the sheet she sits on.'); update t_gin set doc_tsv = to_tsvector(doc); create index on t_gin using gin(doc_tsv); postgres=# create table t_gin(doc text, doc_tsv tsvector); CREATE TABLE postgres=# insert into t_gin(doc) values postgres-# ('Can a sheet slitter slit sheets?'), postgres-# ('How many sheets could a sheet slitter slit?'), postgres-# ('I slit a sheet, a sheet I slit.'), postgres-# ('Upon a slitted sheet I sit.'), postgres-# ('Whoever slit the sheets is a good sheet slitter.'), postgres-# ('I am a sheet slitter.'), postgres-# ('I slit sheets.'), postgres-# ('I am the sleekest sheet slitter that ever slit sheets.'), postgres-# ('She slits the sheet she sits on.'); INSERT 0 9 postgres=# select * from t_gin; doc | doc_tsv --------------------------------------------------------+--------- Can a sheet slitter slit sheets? | I slit sheets. | How many sheets could a sheet slitter slit? | I slit a sheet, a sheet I slit. | Upon a slitted sheet I sit. | Whoever slit the sheets is a good sheet slitter. | I am a sheet slitter. | I am the sleekest sheet slitter that ever slit sheets. | She slits the sheet she sits on. | (9 rows) postgres=# update t_gin set doc_tsv = to_tsvector(doc); UPDATE 9 postgres=# select * from t_gin; doc | doc_tsv --------------------------------------------------------+--------------------------------------------------------- How many sheets could a sheet slitter slit? | 'could':4 'mani':2 'sheet':3,6 'slit':8 'slitter':7 I slit a sheet, a sheet I slit. | 'sheet':4,6 'slit':2,8 Upon a slitted sheet I sit. | 'sheet':4 'sit':6 'slit':3 'upon':1 Whoever slit the sheets is a good sheet slitter. | 'good':7 'sheet':4,8 'slit':2 'slitter':9 'whoever':1 I am a sheet slitter. | 'sheet':4 'slitter':5 I am the sleekest sheet slitter that ever slit sheets. | 'ever':8 'sheet':5,10 'sleekest':4 'slit':9 'slitter':6 She slits the sheet she sits on. | 'sheet':4 'sit':6 'slit':2 Can a sheet slitter slit sheets? | 'sheet':3,6 'slit':5 'slitter':4 I slit sheets. | 'sheet':3 'slit':2 (9 rows) postgres=# create index on t_gin using gin(doc_tsv); CREATE INDEX --- 为了效果,手动关闭顺序扫描。 postgres=# set enable_seqscan=off; SET postgres=# postgres=# explain(costs off) select doc from t_gin where doc_tsv @@ to_tsquery('many & slitter'); QUERY PLAN -------------------------------------------------------------------------- Data Node Scan Node/s: All datanodes -> Bitmap Heap Scan on t_gin Recheck Cond: (doc_tsv @@ '''mani'' & ''slitter'''::tsquery) -> Bitmap Index Scan on t_gin_doc_tsv_idx Index Cond: (doc_tsv @@ '''mani'' & ''slitter'''::tsquery) (6 rows) --------------表达式索引 postgres=# create table test_expression(col int, name varchar(64)); CREATE TABLE postgres=# postgres=# insert into test_expression select 1,'ASDD'; insert into test_expression select 2,'ASDD'; insert into test_expression select 3,'AS'; insert into test_expression select 4,'ASsda'; insert into test_expression select 5,'ASdssa'; insert into test_expression select 6,'Asds'; insert into test_expression select 7,'Assa'; insert into test_expression select 8,'as';INSERT 0 1 postgres=# insert into test_expression select 2,'ASDD'; INSERT 0 1 postgres=# insert into test_expression select 3,'AS'; INSERT 0 1 postgres=# insert into test_expression select 4,'ASsda'; INSERT 0 1 postgres=# insert into test_expression select 5,'ASdssa'; INSERT 0 1 postgres=# insert into test_expression select 6,'Asds'; INSERT 0 1 postgres=# insert into test_expression select 7,'Assa'; INSERT 0 1 postgres=# insert into test_expression select 8,'as'; INSERT 0 1 postgres=# explain select * from test_expression where lower(name) ='as'; QUERY PLAN --------------------------------------------------------------------------------------------- Data Node Scan (cost=0.00..0.00 rows=1000 width=150) Node/s: All datanodes -> Seq Scan on test_expression (cost=10000000000.00..1000000001681.00 rows=2 width=150) Filter: (lower((name)::text) = 'as'::text) (4 rows) postgres=# create index on test_expression (name); CREATE INDEX postgres=# create index test_expression_lower on test_expression (lower(name)); CREATE INDEX postgres=# explain select * from test_expression where lower(name) ='as'; QUERY PLAN ----------------------------------------------------------------------------------------------------- Data Node Scan (cost=0.00..0.00 rows=1000 width=150) Node/s: All datanodes -> [Bypass] Index Scan using test_expression_lower on test_expression (cost=0.00..8.27 rows=1 width=150) Index Cond: (lower((name)::text) = 'as'::text) (5 rows) |
数据压缩
行存表 创建表时通过关键字 compresstype,可实现数据压缩,减少磁盘的占用。
1 | create table test_row_compress(col text) with ( compresstype = 2, COMPRESS_LEVEL= 3); |
- COMPRESSTYPE
行存表参数,设置行存表压缩算法。1代表pglz算法,2代表zstd算法,默认不压缩。(仅支持ASTORE下的普通表)
取值范围:0~2,默认值为0。
- COMPRESS_LEVEL
行存表参数,设置行存表压缩算法等级,仅当COMPRESS_TYPE为2时生效。压缩等级越高,表的压缩效果越好,表的访问速度越慢。(仅支持ASTORE下的普通表)
取值范围:-31~31,默认值为0。
列存表 创建表时通过关键字 compression,可实现数据压缩,减少磁盘的占用。
1 2 | -- 示例 create table test_compress(col text) with (orientation=column,compression=high); |
- COMPRESSION
指定表数据的压缩级别,它决定了表数据的压缩比以及压缩时间。一般来讲,压缩级别越高,压缩比也越大,压缩时间也越长;反之亦然。实际压缩比取决于加载的表数据的分布特征。行存表不支持压缩。
取值范围:
列存表的有效值为YES/NO/LOW/MIDDLE/HIGH,默认值为LOW。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | ---- 行存压缩 drop table if exists test_row_compress; postgres=# create table test_row_compress(col text); CREATE TABLE postgres=# insert into test_row_compress select md5(random()::text) from generate_series(1,1000000); INSERT 0 1000000 postgres=# select * from pg_size_pretty(pg_catalog.pg_table_size('test_row_compress')); pg_size_pretty ---------------- 66 MB (1 row) postgres=# drop table if exists test_row_compress; DROP TABLE postgres=# postgres=# create table test_row_compress(col text) with (compresstype = 2,compress_level= 3); CREATE TABLE postgres=# postgres=# insert into test_row_compress select md5(random()::text) from generate_series(1,1000000); INSERT 0 1000000 postgres=# postgres=# select * from pg_size_pretty(pg_catalog.pg_table_size('test_row_compress')); pg_size_pretty ---------------- 29 MB (1 row) postgres=# ---- 列存压缩 postgres=# drop table test_column_compress; ERROR: table "test_column_compress" does not exist postgres=# postgres=# create table test_column_compress(col text) with (orientation=column,compression=no); CREATE TABLE postgres=# postgres=# insert into test_column_compress select md5(random()::text) from generate_series(1,1000000); INSERT 0 1000000 postgres=# postgres=# select * from pg_size_pretty(pg_catalog.pg_table_size('test_column_compress')); pg_size_pretty ---------------- 34 MB (1 row) postgres=# drop table test_column_compress; DROP TABLE postgres=# postgres=# create table test_column_compress(col text) with (orientation=column,compression=high); CREATE TABLE postgres=# postgres=# insert into test_column_compress select md5(random()::text) from generate_series(1,1000000); INSERT 0 1000000 postgres=# postgres=# postgres=# select * from pg_size_pretty(pg_catalog.pg_table_size('test_column_compress')); pg_size_pretty ---------------- 19 MB (1 row) |
子事务
子事务允许你回滚部分已经事务中完成的工作。可通过关键字 SAVEPOINT,EXCEPTION,Autonomous Transaction 启动子事务。
自治事务
自治事务(Autonomous Transaction),在主事务执行过程中新启的独立的事务。自治事务的提交和回滚不会影响已提交的数据,同时自治事务也不受主事务的影响。
自治事务在存储过程,函数和匿名块中定义,用 PARAGMA AUTONOMOUS_TRANSACTION 关键字来声明。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | ----------------------子事务 -----自治事务 postgres=# create table t2(a int,b int); insert into t2 values(1,2); select * from t2; CREATE TABLE INSERT 0 1 a | b ---+--- 1 | 2 (1 row) postgres=# create or replace procedure autonomus_4(a int ,b int) as postgres$# declare postgres$# num3 int =a; num4 int =b; postgres$# pragma autonomous_transaction; postgres$# begin insert into t2 values (num3,num4); end; postgres$# / CREATE PROCEDURE postgres=# postgres=# postgres=# create or replace procedure autonomus_5(a int ,b int) as postgres$# declare postgres$# begin insert into t2 values (444,55);autonomus_4(a,b);rollback; end; postgres$# / CREATE PROCEDURE postgres=# postgres=# select autonomus_5(11,22); autonomus_5 ------------- (1 row) postgres=# select * from t2; a | b ----+---- 1 | 2 11 | 22 (2 rows) -----------savepoint --设置保存点 --创建表 postgres=# drop table if exists savepoint_test; NOTICE: table "savepoint_test" does not exist, skipping DROP TABLE postgres=# create table savepoint_test(a int primary key,b int) ; NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "savepoint_test_pkey" for table "savepoint_test" CREATE TABLE --开始事务 postgres=# start transaction; START TRANSACTION --插入数据 postgres=# insert into savepoint_test values(1,2); INSERT 0 1 --创建保存点 postgres=# savepoint test_savepoint; SAVEPOINT --插入数据 postgres=# insert into savepoint_test values(2,2); INSERT 0 1 --查看表中数据 postgres=# select * from savepoint_test; a | b ---+--- 1 | 2 2 | 2 (2 rows) --回滚保存点 postgres=# rollback to savepoint test_savepoint; ROLLBACK --查看表中数据(只能查到(1,1),(2,2)被回滚) postgres=# select * from savepoint_test; a | b ---+--- 1 | 2 (1 row) --插入数据 postgres=# insert into savepoint_test values(3,3); INSERT 0 1 --提交事务 postgres=# commit; COMMIT --查看表中数据(=能查到(1,1)、(3,3),(2,2)被回滚) postgres=# select * from savepoint_test; a | b ---+--- 1 | 2 3 | 3 (2 rows) --删除测试表savepoint_test postgres=# drop table savepoint_test; DROP TABLE --------exception create type type_test as (a int,b int); create or replace procedure p1 as c int; begin select a into c from type_test; exception when others then raise 'NULL' ; end; / call p1(); postgres=# create type type_test as (a int,b int); CREATE TYPE postgres=# postgres=# create or replace procedure p1 postgres-# as postgres$# c int; postgres$# begin postgres$# select a into c from type_test; postgres$# exception postgres$# when others then postgres$# raise 'NULL' ; postgres$# end; postgres$# / CREATE PROCEDURE postgres=# postgres=# call p1(); ERROR: NULL |