本分析基于Spark version 3.1.2
local模式下,默认最小分区数不会超过2
val conf = new SparkConf() conf.setAppName("my-spark-01") conf.setMaster("local") // 取消如下注释,分区数为2;否则分区数为1 // conf.set("spark.default.parallelism", "3") val sc = new SparkContext(conf) val lines = sc.textFile("./data/words")
def textFile( path: String, minPartitions: Int = defaultMinPartitions): RDD[String] = withScope {...}
// 默认最小分区数不会超过2 def defaultMinPartitions: Int = math.min(defaultParallelism, 2)
def defaultParallelism: Int = { ... taskScheduler.defaultParallelism }
override def defaultParallelism(): Int = backend.defaultParallelism()
// totalCores在系统初始化时赋值为1 // 如果在程序启动时没有设置spark.default.parallelism的值,那么该方法返回值是1 override def defaultParallelism(): Int = scheduler.conf.getInt("spark.default.parallelism", totalCores)