Iceberg 基础
跳到导航
跳到搜索
hive -> iceberg
hive
# 分隔符为空格,字符串中有空格用双引号引起来 CREATE TABLE test1 ( col1 STRING, col2 INT, col3 STRING, col4 STRING, col5 STRING ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' WITH SERDEPROPERTIES ( "separatorChar" = " ", "quoteChar" = "\"", "serialization.encoding"="UTF-8" ) STORED AS TEXTFILE; # hdfs dfs -put test1.csv /tmp/data/ LOAD DATA INPATH '/tmp/data/test1.csv' OVERWRITE INTO TABLE test1; select col4, count(*) cs from test1 group by col4 limit 10; ---------------------------------------------------------------------------------------------- VERTICES MODE STATUS TOTAL COMPLETED RUNNING PENDING FAILED KILLED ---------------------------------------------------------------------------------------------- Map 1 .......... container SUCCEEDED 30 30 0 0 0 0 Reducer 2 ...... container SUCCEEDED 1 1 0 0 0 0 ---------------------------------------------------------------------------------------------- VERTICES: 02/02 [==========================>>] 100% ELAPSED TIME: 99.62 s ---------------------------------------------------------------------------------------------- INFO : Completed executing command(queryId=hdfs_20241224111257_4f4363b0-d489-4bac-99d8-86451fa0a45c); Time taken: 99.725 seconds
iceberg
# 分区字段不能出现在建表字段中 CREATE TABLE test_ice ( col1 STRING, col2 INT, col4 STRING, col5 STRING ) PARTITIONED BY (col3 STRING) STORED by iceberg; insert into test_ice(col1, col2, col3, col4, col5) select col1, col2, col3, col4, col5 from test1; select col4, count(*) cs from test_ice group by col4 limit 10; ---------------------------------------------------------------------------------------------- VERTICES MODE STATUS TOTAL COMPLETED RUNNING PENDING FAILED KILLED ---------------------------------------------------------------------------------------------- Map 1 .......... container SUCCEEDED 4 4 0 0 0 0 Reducer 2 ...... container SUCCEEDED 1 1 0 0 0 0 ---------------------------------------------------------------------------------------------- VERTICES: 02/02 [==========================>>] 100% ELAPSED TIME: 22.68 s ---------------------------------------------------------------------------------------------- INFO : Completed executing command(queryId=hdfs_20241224111143_d436171c-f9fe-4e9a-87e1-0e2ea9b48b1b); Time taken: 26.945 seconds +----------------+-----------+ | col4 | cs | +----------------+-----------+ | xeron x5 3708 | 40075712 | +----------------+-----------+