一.安装使用
ClickHouse是Yandex提供的一个开源的列式存储数据库管理系统,多用于联机分析(OLAP)场景,可提供海量数据的存储和分析,同时利用其数据压缩和向量化引擎的特性,能提供快速的数据搜索。
Ⅰ).安装
1
2
3
4
5
6
|
sudo yum install yum-utils sudo rpm -- import https: //repo .yandex.ru /clickhouse/CLICKHOUSE-KEY .GPG sudo yum-config-manager --add-repo https: //repo .yandex.ru /clickhouse/rpm/stable/x86_64 sudo yum install clickhouse-server clickhouse-client sudo /etc/init .d /clickhouse-server start clickhouse-client |
Ⅱ).配置
a).clickhouse-server
1
2
3
4
5
6
|
CLICKHOUSE_USER=username CLICKHOUSE_LOGDIR=${CLICKHOUSE_HOME} /log/clickhoue-server CLICKHOUSE_LOGDIR_USER=username CLICKHOUSE_DATADIR_OLD=${CLICKHOUSE_HOME} /data/old CLICKHOUSE_DATADIR=${CLICKHOUSE_HOME} /data |
b).config.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
... ... <!-- 配置日志参数 --> <logger> <level>info< /level > <log>${CLICKHOUSE_HOME} /log/clickhoue-server/clickhoue-server .log< /log > <errorlog>${CLICKHOUSE_HOME} /log/clickhoue-server/clickhoue-server-error .log< /errorlog > <size>100M< /size > <count>5< /count > < /logger > <!-- 配置数据保存路径 --> <path>${CLICKHOUSE_HOME}</> <tmp_path>${CLICKHOUSE_HOME} /tmp </> <user_files_path>${CLICKHOUSE_HOME} /user_files </> <!-- 配置监听 --> <listen_host>::< /listen_host > <!-- 配置时区 --> <timezone>Asiz /Shanghai < /timezone > ... ... |
Ⅲ).启停服务
1
2
3
4
|
#### a).启动服务 sudo service clickhouse-server start #### b).停止服务 sudo service clickhouse-server stop |
Ⅳ).客户端访问
1
|
clickhouse-client |
二.常用命令
Ⅰ).创建表
1
2
3
4
5
6
7
8
9
10
11
|
CREATE TABLE IF NOT EXISTS database .table_name ON cluster cluster_shardNum_replicasNum( 'id' UInt64, 'name' String, 'time' UInt64, 'age' UInt8, 'flag' UInt8 ) ENGINE = MergeTree PARTITION BY toDate( time /1000) ORDER BY (id, name ) SETTINGS index_granularity = 8192 |
Ⅱ).创建物化视图
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
CREATE MATERIALIZED VIEW database .view_name ON cluster cluster_shardNum_replicasNum ENGINE = AggregatingMergeTree PARTITION BY toYYYYMMDD( time ) ORDER BY (id, name ) AS SELECT toStartOfHour(toDateTime( time /1000)) as time , id, name , sumState( if (flag = 1, 1, 0)) AS successCount, sumState( if (flag = 0, 1, 0)) AS faildCount, sumState( if ((age < 10), 1, 0)) AS rang1Age, sumState( if ((age > 10) AND (age < 20), 2, 0)) AS rang2Age, sumState( if ((age > 20), 3, 0)) AS rang3Age, maxState(age) AS maxAge, minState(age) AS minAge FROM datasource.table_name GROUP BY time ,id, name |
Ⅲ).插入数据
a).普通数据插入
1
|
INSERT INTO database .table_name(id, name , age, flag) VALUES (1, 'test' , 15, 0) |
b).Json数据插入
1
|
INSERT INTO database .table_name FORMAT JSONEachRow{ "id" : "1" , "name" : "test" , "age" : "11" , "flag" : "1" } |
Ⅳ).查询数据
a).表数据查询
1
|
SELECT * FROM database .table_name WHERE id=1 |
b).物化视图查询
1
2
3
4
|
SELECT id, name , sumMerge(successCount), sumMerge(faildCount), sumMerge(rang1Age), sumMerge(rang2Age), maxMerge(maxAge), minMerge(minAge) FROM database .view_name WHERE id=1 GROUP BY id, name |
Ⅴ).创建NESTED表
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
CREATE TABLE IF NOT EXISTS database .table_name( 'id' UInt64, 'name' String, 'time' UInt64, 'age' UInt8, 'flag' UInt8 nested_table_name Nested ( sequence UInt32, id UInt64, name String, time UInt64, age UInt8, flag UInt8 socketAddr String, socketRemotePort UInt32, socketLocalPort UInt32, eventTime UInt64, exceptionClassName String, hashCode Int32, nextSpanId UInt64 )) ENGINE = MergeTree PARTITION BY toDate ( time / 1000) ORDER BY (id, name , time ) SETTINGS index_granularity = 8192 |
Ⅵ).NESTED表数据查询
1
|
SELECT table1.*,table1.id FROM nest.table_name AS table1 array JOIN nested_table_name AS table2 |
Ⅶ).配置字典项
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
<dictionaries> <dictionary> <name>url</name> <source> <clickhouse> <host>hostname</host> <port> 9000 </port> <user> default </user> <password/> <db>dict</db> <table>url_dict</table> </clickhouse> </source> <lifetime> <min> 30 </min> <max> 36 </max> </lifetime> <layout> <hashed/> </layout> <structure> <id> <name>id</name> </id> <attribute> <name>hash_code</name> <type>String</type> <null_value/> </attribute> <attribute> <name>url</name> <type>String</type> <null_value/> </attribute> </structure> </dictionary> <dictionary> <name>url_hash</name> <source> <clickhouse> <host>hostname</host> <port> 9000 </port> <user> default </user> <password/> <db>dict</db> <table>url_hash</table> </clickhouse> </source> <lifetime> <min> 30 </min> <max> 36 </max> </lifetime> <layout> <complex_key_hashed/> </layout> <structure> <key> <attribute> <name>hash_code</name> <type>String</type> </attribute> </key> <attribute> <name>url</name> <type>String</type> <null_value/> </attribute> </structure> </dictionary> </dictionaries> |
Ⅷ).字典查询
1
2
3
4
5
|
SELECT id, dictGet( 'name' , 'name' , toUInt64( name )) AS name , dictGetString( 'url' , 'url' , tuple(url)) AS url FROM table_name |
Ⅸ).导入数据
1
|
clickhouse-client --query= "INSERT INTO database.table_name FORMAT CSVWithNames" < /path/import_filename .csv |
Ⅹ).导出数据
1
|
clickhouse-client --query= "SELECT * FROM database.table_name FORMAT CSV" sed 's/"//g' > /path/export_filename .csv |
Ⅺ).查看partition状态
1
|
SELECT table , name , partition,active FROM system.parts WHERE database = 'database_name' |
Ⅻ).清理partition
1
|
ALTER TABLE database .table_name ON cluster cluster_shardNum_replicasNum detach partition 'partition_id' |
XIII).查看列的压缩率
1
2
3
4
5
6
7
8
9
10
11
|
SELECT database , table , name , formatReadableSize( sum (data_compressed_bytes) AS c) AS comp, formatReadableSize( sum (data_uncompressed_bytes) AS r) AS raw, c/r AS comp_ratio FROM system.columns WHERE database = 'database_name' AND table = 'table_name' GROUP BY name |
XIV).查看物化视图的磁盘占用
1
|
clickhouse-client --query= "SELECT partition,count(*) AS partition_num, formatReadableSize(sum(bytes)) AS disk_size FROM system.columns WHERE database='database_name' " --external --? le =***.sql --name=parts --structure= 'table String, name String, partition UInt64, engine String' -h hostname |
到此这篇关于clickhouse 批量插入数据及ClickHouse常用命令的文章就介绍到这了,更多相关clickhouse 批量插入内容请搜索服务器之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持服务器之家!
原文链接:https://blog.csdn.net/weixin_30444625/article/details/112520469