最近更新时间:2024-09-02 15:22:07
1.Kafka0.11.0版本。
2.Flume1.7版本。
3.KMR4.0.1版本。
4.KSCKMR安全组开放8020与50010端口。
1.配置flume配置文件,例如,路径为/root/apache-flume-1.7.0-bin/conf/flume-hdfs.conf
。
# ------------------- define data source ----------------------
# source alias
agent.sources = source_from_kafka
# channels alias
agent.channels = mem_channel
# sink alias
agent.sinks = s3_sink
# define kafka source
agent.sources.source_from_kafka.type = org.apache.flume.source.kafka.KafkaSource
agent.sources.source_from_kafka.batchSize = 10
# set kafka broker address
## 此处注意kafka所在安全组是否对flume主机放行,端口为6667
agent.sources.source_from_kafka.kafka.bootstrap.servers = 10.0.0.139:6667 # 替换为对应的托管Kafka ,并将kafka服务起的/etc/hosts文件 加入flume机器
# set kafka topic
agent.sources.source_from_kafka.kafka.topics = jacktest
# set kafka groupid
agent.sources.source_from_kafka.kafka.consumer.group.id = flumeTest1
# defind hdfs sink
agent.sinks.s3_sink.type = hdfs
# set store hdfs path
## 此处注意KMR所在安全组是否对flume主机放行,端口为8020
agent.sinks.s3_sink.hdfs.path = hdfs://10.0.88.30:8020/tmp #KMR master1地址,端口为8020
# set file size to trigger roll
agent.sinks.s3_sink.hdfs.rollSize = 0
agent.sinks.s3_sink.hdfs.rollCount = 0
agent.sinks.s3_sink.hdfs.rollInterval = 5
#agent.sinks.s3_sink.hdfs.threadsPoolSize = 30
agent.sinks.s3_sink.hdfs.fileType = DataStream
agent.sinks.s3_sink.hdfs.writeFormat = Text
# define channel from kafka source to hdfs sink
agent.channels.mem_channel.type = memory
# channel store size
agent.channels.mem_channel.capacity = 1000
# transaction size
agent.channels.mem_channel.transactionCapacity = 1000
agent.channels.mem_channel.byteCapacity = 800000
agent.echannels.mem_channel.byteCapacityBufferPercentage = 20
agent.echannels.mem_channel.keep-alive = 60
# specify the channel the sink should use
agent.sources.source_from_kafka.channels = mem_channel
agent.sinks.s3_sink.channel = mem_channel
2.将hadoop-auth-2.7.3.2.6.1.0-129.jar
、commons-configuration-1.6.jar
、hadoop-common-2.7.3.2.6.1.0-129.jar
、hadoop-hdfs-2.7.3.2.6.1.0-129.jar
、
htrace-core-3.1.0-incubating.jar
和commonsio-2.4.jar
拷贝至目录 /root/apache-flume-1.7.0-bin/lib
或配置环境变量${HADOOP_HOME}
。
export HADOOP_HOME=/hadoop/hadoop-2.7.3.2.6.1.0-129
3.启动Flume。
bin/flume-ng agent --conf conf/ --name agent --conf-file conf/flume-hdfs.conf -Dflume.root.logger=WARN,console
纯净模式