消息队列MQ

SparkStreaming整合Kafka

本文主要是介绍SparkStreaming整合Kafka,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!

intellij代码实现Spark Stream和Kafka结合消息接收及处理StreamKaf

StreamKafkaProducer1

package org.hnsw

import java.util

import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}

object StreamKafkaProducer1 {

  def streamingkafka() = {
    val sparkConf = new SparkConf().setAppName("DirectKafkaWordCount").setMaster("local[4]")
//      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    val ssc = new StreamingContext(sparkConf, Seconds(5))
    val streamRdd = ssc.socketTextStream("192.168.3.66",8888)
    // 分割字符
    val words = streamRdd.flatMap((x)=>{
      x.split(" ")
    }).map((x)=>{
      (x,1)
    })
    // 第一步 统计每个单词数目
    val wordscount = words.reduceByKeyAndWindow((v1:Int,v2:Int)=>{
      v1+v2
    },Seconds(30),Seconds(10))
    wordscount.print()
    //第二步 发送消息到kafka
    wordscount.foreachRDD((rdd)=>{
      rdd.foreachPartition((partRdd)=>{
        //设置kafka服务参数
        val props = new util.HashMap[String,Object]()
        props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.3.66:9092")
        //序列化
        props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,classOf[StringSerializer])
        props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,classOf[StringSerializer])
        //实例化一个kafka生产者
        val producer = new KafkaProducer[String,String](props)  //创建kafka生产者
        //封装成kafka消息
        //定义topic 及消息内容
        val topic="kafka-lt" //指定发送的topic名字
        partRdd.foreach((line)=>{
          val str = line._1 +" "+line._2 //定义发送单词结构为 "单词 次数"
          val message = new ProducerRecord[String,String](topic,null,str) //封装kafka消息
          //给kafka发消息
          producer.send(message) //发送消息
        })
      })
    })
    ssc.start()
    ssc.awaitTermination()
  }

  def main(args: Array[String]): Unit = {
    streamingkafka()
  }
}

 

这篇关于SparkStreaming整合Kafka的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!