import kafka.api.OffsetRequest

import org.apache.spark.SparkConf

import org.apache.spark.SparkContext

import org.apache.spark.streaming.kafka.KafkaUtils

import org.apache.spark.streaming.Seconds

import org.apache.spark.streaming.StreamingContext

import kafka.serializer.StringDecoder

import scala.collection.immutable.HashMap

import org.apache.log4j.Level

import org.apache.log4j.Logger

object Nginx {

def main(args: Array[String]) {

Logger.getLogger("org.apache.spark").setLevel(Level.WARN);

Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.ERROR);

val conf = new SparkConf().setAppName("Nginx").setMaster("local[2]")

val sc = new SparkContext(conf)

val ssc = new StreamingContext(sc, Seconds(3))

val topics = Set("kafkatext")

val groupId = "groupid"

val brokers = "spark:9092"

val kafkaParams = Map[String, String]("metadata.broker.list" -> brokers,"group.id" -> groupId)

val kafkaStream = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topics)

val lines = kafkaStream.map(_._2)

val Nginx = lines.flatMap(line=>(line.split("\\t")).map(line=>(line.split(" ")(2),line.split(" ")(29),line.split(" ")(27))))

Nginx.print()

ssc.start()

ssc.awaitTermination()

}

}

warning: there was one deprecation warning; re-run with -deprecation for details

java.lang.NoSuchMethodError: kafka.api.TopicMetadata.errorCode()S

at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$getPartitionMetadata$1$$anonfun$4.apply(KafkaCluster.scala:135)

at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$getPartitionMetadata$1$$anonfun$4.apply(KafkaCluster.scala:135)

at scala.collection.TraversableLike$$anonfun$filterImpl$1.apply(TraversableLike.scala:248)

at scala.collection.Iterator$class.foreach(Iterator.scala:893)

at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)

at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)

at scala.collection.AbstractIterable.foreach(Iterable.scala:54)

at scala.collection.TraversableLike$class.filterImpl(TraversableLike.scala:247)

at scala.collection.TraversableLike$class.filter(TraversableLike.scala:259)

at scala.collection.AbstractTraversable.filter(Traversable.scala:104)

at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$getPartitionMetadata$1.apply(KafkaCluster.scala:135)

at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$getPartitionMetadata$1.apply(KafkaCluster.scala:133)

at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$org$apache$spark$streaming$kafka$KafkaCluster$$withBrokers$1.apply(KafkaCluster.scala:366)

at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$org$apache$spark$streaming$kafka$KafkaCluster$$withBrokers$1.apply(KafkaCluster.scala:362)

at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)

at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)

at org.apache.spark.streaming.kafka.KafkaCluster.org$apache$spark$streaming$kafka$KafkaCluster$$withBrokers(KafkaCluster.scala:362)

at org.apache.spark.streaming.kafka.KafkaCluster.getPartitionMetadata(KafkaCluster.scala:133)

at org.apache.spark.streaming.kafka.KafkaCluster.getPartitions(KafkaCluster.scala:120)

at org.apache.spark.streaming.kafka.KafkaUtils$.getFromOffsets(KafkaUtils.scala:212)

at org.apache.spark.streaming.kafka.KafkaUtils$.createDirectStream(KafkaUtils.scala:485)

Logo

Kafka开源项目指南提供详尽教程,助开发者掌握其架构、配置和使用,实现高效数据流管理和实时处理。它高性能、可扩展,适合日志收集和实时数据处理,通过持久化保障数据安全,是企业大数据生态系统的核心。

更多推荐