1.Flink从Kafka中获取数据

2.对数据进行处理

3.将数据插入到Kafka中

import java.util.Properties
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer010, FlinkKafkaProducer010}

object Test {
  def main(args: Array[String]): Unit = {
    //1.
    // create env
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    env.enableCheckpointing(1000)
    env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)

    // kafka params
    val properties = new Properties()
    properties.setProperty("bootstrap.servers","localhost:9092")
    properties.setProperty("zookeeper.connect","localhost:2181")
    properties.setProperty("group.id","cs-test-kafka")
    properties.setProperty("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer")
    properties.setProperty("value.deserializer","org.apache.kafka.common.serialization.StringDeserializer")

    // create a Kafka streaming source consumer for Kafka 0.10.x
    val kafkaConsumer = new FlinkKafkaConsumer010[String]("cstest1",new SimpleStringSchema(),properties)
    val originalStream:DataStream[String] = env.addSource(kafkaConsumer)

    //2.
    // business logic
    val resultStream = originalStream.map(x =>{
      //x:18646198431|2018-07-31 17:04:40.8998680|6636|微信|即时通讯|淘宝|电商购物
      var line = x.split("\\|")
      var mobileNumber= line(0) //手机号
      var startTime = line(1) //开始时间
      var appName = line(3) //APP名称
      var successMsg = mobileNumber+ "|" + startTime + "|" + appName //消息汇总
      successMsg //18646198431|2018-07-31 17:04:40.8998680|微信
    })
    
    //3.
    // create a Kafka streaming sink producer for Kafka 0.10.x
    //    val myProducer = new FlinkKafkaProducer010[String](
    //      "localhost:9092",         // broker list
    //      "cstest2",                // target topic
    //      new SimpleStringSchema    // serialization schema
    //    )
    val myProducer = new FlinkKafkaProducer010[String]("cstest2", new SimpleStringSchema(), properties)

    // versions 0.10+ allow attaching the records' event timestamp when writing them to Kafka;
    // this method is not available for earlier Kafka versions
    myProducer.setWriteTimestampToKafka(true)

    // insert date
    resultStream.addSink(myProducer)

    // execute
    env.execute("Kafka 0.10 Example")
  }
}

 

Logo

Kafka开源项目指南提供详尽教程,助开发者掌握其架构、配置和使用,实现高效数据流管理和实时处理。它高性能、可扩展,适合日志收集和实时数据处理,通过持久化保障数据安全,是企业大数据生态系统的核心。

更多推荐