object HiveRead { def main(args: Array[String]): Unit = { val spark = SparkSession.builder() .master("local[*]") .appName("HiveRead") .enableHiveSupport() .getOrCreate() import spark.implicits._ spark.sql("show databases") spark.sql("use gmall") spark.sql("select count(*) from ads_uv_count").show() spark.close() } }
object HiveWrite2 { def main(args: Array[String]): Unit = { System.setProperty("HADOOP_USER_NAME","xingmeng") val spark = SparkSession.builder() .master("local[*]") .appName("HiveRead") .enableHiveSupport() .config("spark.sql.warehouse.dir","hdfs://hadoop102:9000/user/hive/warehouse") .getOrCreate() //先创建一个数据库 // spark.sql("create database spark1016") // spark.sql("use spark1016") // spark.sql("create table user1(id int, name string)").show() // spark.sql("insert into table user1 VALUES(10,'lisi')") val df = spark.read.json("F:/BaiduNetdiskDownload/15-spark/spark-coreData/users.json") spark.sql("use spark1016") val df1 = spark.sql("select * from a") val df2 = spark.sql("select sum(age) sum_age from a group by name") df1.write.saveAsTable("a1") //hive 聚合后,分区数会成为200 df2.coalesce(1).write.mode("overwrite").saveAsTable("a2") spark.close() } }