一、准备环境
下载apache-flume-1.8.0-bin.tar.gz、kafka_2.11-0.11.0.3.tgz、flink-1.6.1-bin-hadoop26-scala_2.11.tgz、zookeeper-3.4.13.tar.gz
二、配置环境
1、安装flume cd /usr/local tar -zxvf apache-flume-1.8.0-bin.tar.gz vim apache-flume-1.8.0/conf/kafka.properties agent.sources = s1 agent.channels = c1 agent.sinks = k1 # 从指定文件读取数据 agent.sources.s1.type = exec agent.sources.s1.command = tail -f /usr/local/test.log agent.sources.s1.channels = c1 # 配置传输通道 agent.channels.c1.type = memory agent.channels.c1.capacity = 10000 agent.channels.c1.transactionCapacity = 100 # 配置kafka接收数据 agent.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink agent.sinks.k1.brokerList = 192.168.56.101:9092 agent.sinks.k1.topic = test agent.sinks.k1.serializer.class = kafka.serializer.StringEncoder agent.sinks.k1.channel = c1 2、安装kafka cd /usr/local tar -zxvf kafka_2.11-0.11.0.3.tgz vim kafka_2.11-0.11.0.3/config/server.properties broker.id=1 advertised.listeners=PLAINTEXT://192.168.56.101:9092 zookeeper.connect=192.168.56.101:2181 3、安装flink cd /usr/local tar -zxvf flink-1.6.1-bin-hadoop26-scala_2.11.tgz 4、安装zookeeper cd /usr/local tar -zxvf zookeeper-3.4.13.tar.gz vim zookeeper-3.4.13/conf/zoo.cfg tickTime=2000 initLimit=10 syncLimit=5 dataDir=/tmp/zookeeper clientPort=2181
三、启动
1、启动zk
cd /usr/local/zookeeper-3.4.13/bin ./zkServer.sh start &
2、启动kafka
cd /usr/local/kafka_2.11-0.11.0.3 ./bin/kafka-server-start.sh config/server.properties & ./bin/kafka-console-producer.sh --broker-list 192.168.56.101:9092 --topic test
3、启动 flume
cd /usr/local/apache-flume-1.8.0 ./bin/flume-ng agent --conf-file ./conf/kafka.properties -c conf/ --name agent -Dflume.root.logger=DEBUG,console
4、启动flink
cd /usr/local/flink-1.6.1 ./bin/start-cluster.sh &
在浏览器中 输入 http://192.168.56.101:8081 可看到界面
5、IDEA 开发程序 新建maven 工程
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.tonytaotao</groupId>
<artifactId>flink</artifactId>
<version>1.0-SNAPSHOT</version>
<description>flink 实战</description>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-java -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.6.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>1.6.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>1.6.1</version>
<!-- <scope>provided</scope>-->
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka-0.11 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.11</artifactId>
<version>1.6.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.25</version>
</dependency>
<!-- https://mvnrepository.com/artifact/log4j/log4j -->
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.2</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>test</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>
target/classes/lib
</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<mainClass>
com.tonytaotao.flink.FlinkKafka
</mainClass>
<classpathPrefix>lib/</classpathPrefix>
</manifest>
<manifestEntries>
<Class-Path>.</Class-Path>
</manifestEntries>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>
FlinkKafka.java
package com.tonytaotao.flink;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import javax.annotation.Nullable;
import java.util.Properties;
public class FlinkKafka {
public static void main(String[] args) throws Exception{
// 引入Flink StreamExecutionEnvironment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 设置监控数据流时间间隔(官方叫状态与检查点)
env.enableCheckpointing(1000);
// 配置kafka和zookeeper的ip和端口
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "192.168.56.101:9092");
properties.setProperty("zookeeper.connect", "192.168.56.101:2181");
properties.setProperty("group.id", "test");
// 记载kafka和zookeeper的配置
FlinkKafkaConsumer011<String> consumer = new FlinkKafkaConsumer011<String>("test", new SimpleStringSchema(), properties);
consumer.assignTimestampsAndWatermarks(new LineSplitter());
// 转换kafka数据类型为flink的dataStream类型
DataStream<String> stream = env.addSource(consumer);
stream.print();
env.execute("WordCount from kafka data");
}
public static final class LineSplitter implements AssignerWithPunctuatedWatermarks<String> {
private static final long serialVersionUID = 1L;
@Nullable
public Watermark checkAndGetNextWatermark(String arg0, long arg1) {
if (null != arg0 && arg0.contains(",")) {
String parts[] = arg0.split(",");
return new Watermark(Long.parseLong(parts[0]));
}
return null;
}
public long extractTimestamp(String arg0, long arg1) {
if (null != arg0 && arg0.contains(",")) {
String parts[] = arg0.split(",");
return Long.parseLong(parts[0]);
}
return 0;
}
}
}
6、打包程序为jar包,上传到服务器
cd /usr/local/flink-1.6.1 ./bin/flink run -c com.tonytaotao.flink.FlinkKafak flink-1.0-SNAPSHOT.jar
7、在/usr/local/ 下新建test.log 文件,同时新建脚本 kafkaoutput.sh 文件,脚本内容为
for((i=0;i<=1000;i++));
do echo "kafka_test-"+$i>>/usr/local/test.log;
done
然后授权 chmod 777 kafkaoutput.sh
8、执行脚本,如果没有问题,则会在 /usr/local/flink-1.6.1/log/flink-root-taskexecutor-0-master.out 看到脚本生成的内容


所有评论(0)