Doris基础:修订间差异

来自牛奶河Wiki
跳到导航 跳到搜索
无编辑摘要
第1行: 第1行:
==== 连接 ====
mysql -h fe_server -P query_port -uroot
mysql -h 192.168.0.158 -P 9030 -uroot
==== Create ====
===== User =====
create user bi identified by '1234';
-- set password for bi = password('abcd1234');
grant all on bi to bi;
==== 数据模型 ====
==== 数据模型 ====
===== Aggregate =====
===== Aggregate =====

2023年12月1日 (五) 14:37的版本

连接

mysql -h fe_server -P query_port -uroot
mysql -h 192.168.0.158 -P 9030 -uroot

Create

User
create user bi identified by '1234';
-- set password for bi = password('abcd1234');
grant all on bi to bi;

数据模型

Aggregate

聚合模型

  • Value 列会按照设置的 AggregationType 进行聚合,如:sum, max, replace 等
  • AGGREGATE KEY() 指定 key,未被指定的,需要提供 AggregationType,如:`cost` BIGINT SUM DEFAULT "0"
  • 读时合并(merge on read),因此在一些聚合查询上性能不佳
create table test_a
(
   ky   int,
   name varchar(10),
   val  int sum default "0" 
)
aggregate key(ky, name)
distributed by hash(`ky`) buckets 1
properties (
   "replication_allocation" = "tag.location.default: 1"
)
Unique

唯一模型

  • 保持 key 列的唯一,新值替换旧值
  • 写时合并(merge on write)
  • 可以在 be.conf 中添加配置项 disable_storage_page_cache=false,可能会优化数据导入性能
create table test_u
(
   ky   int,
   name varchar(10),
   val  int
)
unique key(ky, name)
distributed by hash(ky) buckets 1
properties (
   "replication_allocation" = "tag.location.default: 1",
   "enable_unique_key_merge_on_write" = "true"
)
Duplicate

可重复模型

  • 不对导入数据做任何操作
  • 建表语句中指定的 DUPLICATE KEY,只是用来指明底层数据按照那些列进行排序。(更贴切的名称应该为 “Sorted Column”)
create table test
(
   ky   int,
   name varchar(10),
   val  int
)
distributed by hash(ky) buckets 1
properties (
   "replication_allocation" = "tag.location.default: 1",
   "enable_duplicate_without_keys_by_default" = "true"
)
分区、分桶
  • list

10 个分区,6 个桶,3 个副本

create table test_p
(
   part tinyint not null,
   ky   int,
   name varchar(10),
   val  int
)
duplicate key(part, ky)
partition by list(part)
(
   partition p_0 values in(0),
   partition p_1 values in(1),
   partition p_2 values in(2),
   partition p_3 values in(3),
   partition p_4 values in(4),
   partition p_5 values in(5),
   partition p_6 values in(6),
   partition p_7 values in(7),
   partition p_8 values in(8),
   partition p_9 values in(9)
   -- partition p_0 values in(2,4,6,8,0),
   -- partition p_1 values in(1,3,5,7,9)
)
distributed by hash(ky) buckets 6
properties (
   "replication_allocation" = "tag.location.default: 3"
)
  • range
create table test1
(
   part tinyint not null,
   ky   int,
   name varchar(10),
   val  int
)
duplicate key(part, ky)
partition by range(part)
(
   partition p_0 VALUES less than (5),
   partition p_1 VALUES less than (10),
   partition p_9 VALUES less than maxvalue
   -- partition p_0 VALUES [(0), (5)),
   -- partition p_1 VALUES [(6), (10))
)
distributed by hash(ky) buckets 6
properties (
   "replication_allocation" = "tag.location.default: 3"
)