Iceberg 基础:修订间差异

来自牛奶河Wiki
跳到导航 跳到搜索
(创建页面,内容为“=== hive -> iceberg === ==== hive ==== <small><nowiki># 分隔符为空格,字符串中有空格用双引号引起来 CREATE TABLE test1 ( col1 STRING, col2 INT, col3 STRING, col4 STRING, col5 STRING ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' WITH SERDEPROPERTIES ( "separatorChar" = " ", "quoteChar" = "\"", "serialization.encoding"="UTF-8" ) STORED AS TEXTFILE; # hdfs dfs -put test1.csv /tmp/data/ LOAD DATA INPATH…”)
 
第43行: 第43行:
STORED by iceberg;
STORED by iceberg;


insert into test_ice(col1, col2, col3, col4, col5) select * from test1;
insert into test_ice(col1, col2, col3, col4, col5) select col1, col2, col3, col4, col5 from test1;


select col4, count(*) cs from test_ice group by col4 limit 10;  
select col4, count(*) cs from test_ice group by col4 limit 10;  

2024年12月24日 (二) 11:18的版本

hive -> iceberg

hive

# 分隔符为空格,字符串中有空格用双引号引起来
CREATE TABLE test1 (
  col1 STRING,
  col2 INT,
  col3 STRING,
  col4 STRING,
  col5 STRING
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
    "separatorChar" = " ",
    "quoteChar" = "\"",
    "serialization.encoding"="UTF-8"
)
STORED AS TEXTFILE;

# hdfs dfs -put test1.csv /tmp/data/
LOAD DATA INPATH '/tmp/data/test1.csv' OVERWRITE INTO TABLE test1;

select col4, count(*) cs from test1 group by col4 limit 10; 
----------------------------------------------------------------------------------------------
        VERTICES      MODE        STATUS  TOTAL  COMPLETED  RUNNING  PENDING  FAILED  KILLED  
----------------------------------------------------------------------------------------------
Map 1 .......... container     SUCCEEDED     30         30        0        0       0       0  
Reducer 2 ...... container     SUCCEEDED      1          1        0        0       0       0  
----------------------------------------------------------------------------------------------
VERTICES: 02/02  [==========================>>] 100%  ELAPSED TIME: 99.62 s    
----------------------------------------------------------------------------------------------
INFO  : Completed executing command(queryId=hdfs_20241224111257_4f4363b0-d489-4bac-99d8-86451fa0a45c); Time taken: 99.725 seconds

iceberg

# 分区字段不能出现在建表字段中
CREATE TABLE test_ice (
  col1 STRING,
  col2 INT,
  col4 STRING,
  col5 STRING
)
PARTITIONED BY (col3 STRING)
STORED by iceberg;

insert into test_ice(col1, col2, col3, col4, col5) select col1, col2, col3, col4, col5 from test1;

select col4, count(*) cs from test_ice group by col4 limit 10; 
----------------------------------------------------------------------------------------------
        VERTICES      MODE        STATUS  TOTAL  COMPLETED  RUNNING  PENDING  FAILED  KILLED  
----------------------------------------------------------------------------------------------
Map 1 .......... container     SUCCEEDED      4          4        0        0       0       0  
Reducer 2 ...... container     SUCCEEDED      1          1        0        0       0       0  
----------------------------------------------------------------------------------------------
VERTICES: 02/02  [==========================>>] 100%  ELAPSED TIME: 22.68 s    
----------------------------------------------------------------------------------------------
INFO  : Completed executing command(queryId=hdfs_20241224111143_d436171c-f9fe-4e9a-87e1-0e2ea9b48b1b); Time taken: 26.945 seconds
+----------------+-----------+
|      col4      |    cs     |
+----------------+-----------+
| xeron x5 3708  | 40075712  |
+----------------+-----------+