spark写clickhouse,查询数据翻了一倍
发布于 3 个月前 作者 Asura7969 1416 次浏览 来自 问答

配置:metrika.xml

<yandex>
<!-- 集群配置 -->
<clickhouse_remote_servers>
    <cluster_argus>
        <!-- 数据分片1  -->
        <shard>
            <weight>1</weight>
            <!--<internal_replication>true</internal_replication>-->
            <replica>
                <host>host1</host>
                <port>9000</port>
                <user>default</user>
                <password>xxxxx</password>
            </replica>
        </shard>

        <!-- 数据分片2  -->
        <shard>
            <weight>1</weight>
            <replica>
                <host>host2</host>
                <port>9000</port>
                <user>default</user>
                <password>xxxxx</password>
            </replica>
        </shard>
        <!-- 数据分片3  -->
        <shard>
            <weight>1</weight>
            <replica>
                <host>host3</host>
                <port>9000</port>
                <user>default</user>
                <password>xxxxx</password>
            </replica>
        </shard>
        <!-- 数据分片4  -->
        <shard>
            <weight>1</weight>
            <replica>
                <host>host4</host>
                <port>9000</port>
                <user>default</user>
                <password>xxxxx</password>
            </replica>
        </shard>
    </cluster_argus>
    <rep_cluster_argus>
        <shard>
            <weight>1</weight>
            <internal_replication>true</internal_replication>
            <replica>
                <host>host1</host>
                <port>9000</port>
                <user>default</user>
                <password>xxxxx</password>
            </replica>
            <replica>
                <host>host2</host>
                <port>9000</port>
                <user>default</user>
                <password>xxxxx</password>
            </replica>
        </shard>
        <shard>
            <weight>1</weight>
            <internal_replication>true</internal_replication>
            <replica>
                <host>host3</host>
                <port>9000</port>
                <user>default</user>
                <password>xxxxx</password>
            </replica>
            <replica>
                <host>host4</host>
                <port>9000</port>
                <user>default</user>
                <password>xxxxx</password>
            </replica>
        </shard>
    </rep_cluster_argus>
</clickhouse_remote_servers>

<!-- 本节点副本名称(这里无用) -->
<macros>
    <layer>rep_cluster_argus</layer>
    <shard>shard01</shard>
    <replica>host1</replica>
</macros>

<!-- 监听网络(貌似重复) -->
<networks>
   <ip>::/0</ip>
</networks>

<!-- ZK  -->
<zookeeper-servers>
  <node index="1">
    <host>host1</host>
    <port>2181</port>
  </node>
  <node index="2">
    <host>host2</host>
    <port>2181</port>
  </node>
  <node index="3">
    <host>host3</host>
    <port>2181</port>
  </node>
</zookeeper-servers>

<!-- 数据压缩算法  -->
<clickhouse_compression>
<case>
  <min_part_size>10000000000</min_part_size>
  <min_part_size_ratio>0.01</min_part_size_ratio>
  <method>lz4</method>
</case>
</clickhouse_compression>
</yandex>

建表语句: 1、本地表:

CREATE TABLE IF NOT EXISTS default.metricsuserGwtcpSql_local ON CLUSTER cluster_argus(
processTime UInt32, 
logTime UInt32, 
logLevel String
)ENGINE = MergeTree() 
PARTITION BY toHour(toDateTime(processTime)) ORDER BY (processTime) SETTINGS index_granularity = 8192

2、分布式表:

CREATE TABLE default.metricsuserGwtcpSql ON CLUSTER cluster_argus AS default.metricsuserGwtcpSql_local ENGINE = Distributed(cluster_argus, default, metricsuserGwtcpSql_local, rand())

sparkstreaming insert 代码:https://github.com/housepower/ClickHouse-Native-JDBC/issues/83

有没有大佬帮忙看一下?

1 回复

问题已经解决,clickhouse-native-jdbc的驱动包问题

回到顶部