16.1.1 transform
操作
transform
原语允许 DStream
上执行任意的RDD-to-RDD
函数。
可以用来执行一些 RDD 操作, 即使这些操作并没有在 SparkStreaming 中暴露出来.
该函数每一批次调度一次。其实也就是对DStream
中的RDD
应用转换。
package com.atguigu.streaming
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* Author lzc
* Date 2019/4/28 6:51 PM
*/
object TransformDemo {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Practice").setMaster("local[2]")
val sctx = new StreamingContext(conf, Seconds(3))
val dstream: ReceiverInputDStream[String] = sctx.socketTextStream("hadoop201", 10000)
val resultDStream = dstream.transform(rdd => {
rdd.flatMap(_.split("\\W")).map((_, 1)).reduceByKey(_ + _)
})
resultDStream.print
sctx.start
sctx.awaitTermination()
}
}