DataFrame的sql操作。
Spark SQL支持sql语句查询DataFrame中的数据,需要两步操作:
下面来看一个案例,scala代码如下:
package com.simoniu.scalademo.sql
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
/**
* 需求:使用sql操作DataFrame
* Created by simoniu
*/
object DataFrameSqlScalaDemo {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
.setMaster("local")
//创建SparkSession对象,里面包含SparkContext和SqlContext
val sparkSession = SparkSession.builder()
.appName("DataFrameSqlScala")
.config(conf)
.getOrCreate()
val stuDf = sparkSession.read.json("D:\\uploadFiles\\students.json")
//将DataFrame注册为一个临时表
stuDf.createOrReplaceTempView("student")
//使用sql查询临时表中的数据
sparkSession.sql("select gender,count(*) as num from student group by gender")
.show()
sparkSession.stop()
}
}
java代码实现:
package com.simoniu.sparkdemo.javademo.sql;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
/**
* 需求:使用sql操作DataFrame
* Created by simoniu
*/
public class DataFrameSqlJavaDemo {
public static void main(String[] args) {
SparkConf conf = new SparkConf();
conf.setMaster("local");
//创建SparkSession对象,里面包含SparkContext和SqlContext
SparkSession sparkSession = SparkSession.builder()
.appName("DataFrameSqlJava")
.config(conf)
.getOrCreate();
Dataset<Row> stuDf = sparkSession.read().json("D:\\uploadFiles\\students.json");
//将Dataset<Row>注册为一个临时表
stuDf.createOrReplaceTempView("student");
//使用sql查询临时表中的数据
sparkSession.sql("select gender,count(*) as num from student group by gender")
.show();
sparkSession.stop();
}
}
运行结果:
+------+---+
|gender|num|
+------+---+
|female| 2|
| male| 4|
+------+---+