StreamKafkaProducer1
package org.hnsw import java.util import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import org.apache.spark.SparkConf import org.apache.spark.streaming.{Seconds, StreamingContext} object StreamKafkaProducer1 { def streamingkafka() = { val sparkConf = new SparkConf().setAppName("DirectKafkaWordCount").setMaster("local[4]") // .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val ssc = new StreamingContext(sparkConf, Seconds(5)) val streamRdd = ssc.socketTextStream("192.168.3.66",8888) // 分割字符 val words = streamRdd.flatMap((x)=>{ x.split(" ") }).map((x)=>{ (x,1) }) // 第一步 统计每个单词数目 val wordscount = words.reduceByKeyAndWindow((v1:Int,v2:Int)=>{ v1+v2 },Seconds(30),Seconds(10)) wordscount.print() //第二步 发送消息到kafka wordscount.foreachRDD((rdd)=>{ rdd.foreachPartition((partRdd)=>{ //设置kafka服务参数 val props = new util.HashMap[String,Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.3.66:9092") //序列化 props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,classOf[StringSerializer]) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,classOf[StringSerializer]) //实例化一个kafka生产者 val producer = new KafkaProducer[String,String](props) //创建kafka生产者 //封装成kafka消息 //定义topic 及消息内容 val topic="kafka-lt" //指定发送的topic名字 partRdd.foreach((line)=>{ val str = line._1 +" "+line._2 //定义发送单词结构为 "单词 次数" val message = new ProducerRecord[String,String](topic,null,str) //封装kafka消息 //给kafka发消息 producer.send(message) //发送消息 }) }) }) ssc.start() ssc.awaitTermination() } def main(args: Array[String]): Unit = { streamingkafka() } }