import org.apache.spark.sql.types._ val schema = new StructType() .add("_c0",IntegerType,true) .add("carat",DoubleType,true) .add("cut",StringType,true) .add("color",StringType,true) .add("clarity",StringType,true) .add("depth",DoubleType,true) .add("table",DoubleType,true) .add("price",IntegerType,true) .add("x",DoubleType,true) .add("y",DoubleType,true) .add("z",DoubleType,true) val diamonds_with_schema = spark.read.format("csv") .option("header", "true") .schema(schema) .load("/databricks-datasets/Rdatasets/data-001/csv/ggplot2/diamonds.csv")
import org.apache.spark.sql.types._
schema: org.apache.spark.sql.types.StructType = StructType(StructField(_c0,IntegerType,true), StructField(carat,DoubleType,true), StructField(cut,StringType,true), StructField(color,StringType,true), StructField(clarity,StringType,true), StructField(depth,DoubleType,true), StructField(table,DoubleType,true), StructField(price,IntegerType,true), StructField(x,DoubleType,true), StructField(y,DoubleType,true), StructField(z,DoubleType,true))
diamonds_with_schema: org.apache.spark.sql.DataFrame = [_c0: int, carat: double ... 9 more fields]
diamonds_with_schema.printSchema
root
|-- _c0: integer (nullable = true)
|-- carat: double (nullable = true)
|-- cut: string (nullable = true)
|-- color: string (nullable = true)
|-- clarity: string (nullable = true)
|-- depth: double (nullable = true)
|-- table: double (nullable = true)
|-- price: integer (nullable = true)
|-- x: double (nullable = true)
|-- y: double (nullable = true)
|-- z: double (nullable = true)