read-parquet-files(Scala)

Loading...
%scala 
 
case class MyCaseClass(key: String, group: String, value: Int, someints: Seq[Int], somemap: Map[String, Int])
val dataframe = sc.parallelize(Array(MyCaseClass("a", "vowels", 1, Array(1), Map("a" -> 1)),
  MyCaseClass("b", "consonants", 2, Array(2, 2), Map("b" -> 2)),
  MyCaseClass("c", "consonants", 3, Array(3, 3, 3), Map("c" -> 3)),
  MyCaseClass("d", "consonants", 4, Array(4, 4, 4, 4), Map("d" -> 4)),
  MyCaseClass("e", "vowels", 5, Array(5, 5, 5, 5, 5), Map("e" -> 5)))
).toDF()
// now write it to disk
dataframe.write.mode("overwrite").parquet("/tmp/testParquet")
defined class MyCaseClass dataframe: org.apache.spark.sql.DataFrame = [key: string, group: string ... 3 more fields]
%scala
 
val data = spark.read.parquet("/tmp/testParquet")
 
display(data)
 
key
group
value
someints
somemap
1
2
3
4
5
d
consonants
4
[4, 4, 4, 4]
{"d": 4}
c
consonants
3
[3, 3, 3]
{"c": 3}
b
consonants
2
[2, 2]
{"b": 2}
e
vowels
5
[5, 5, 5, 5, 5]
{"e": 5}
a
vowels
1
[1]
{"a": 1}

Showing all 5 rows.

%r 
library(SparkR)
 
data <- read.df("/tmp/testParquet", "parquet")
 
display(data)
 
key
group
value
someints
somemap
1
2
3
4
5
d
consonants
4
[4, 4, 4, 4]
{"d": 4}
c
consonants
3
[3, 3, 3]
{"c": 3}
b
consonants
2
[2, 2]
{"b": 2}
e
vowels
5
[5, 5, 5, 5, 5]
{"e": 5}
a
vowels
1
[1]
{"a": 1}

Showing all 5 rows.

%python
 
data = spark.read.parquet("/tmp/testParquet")
 
display(data)
 
key
group
value
someints
somemap
1
2
3
4
5
d
consonants
4
[4, 4, 4, 4]
{"d": 4}
c
consonants
3
[3, 3, 3]
{"c": 3}
b
consonants
2
[2, 2]
{"b": 2}
e
vowels
5
[5, 5, 5, 5, 5]
{"e": 5}
a
vowels
1
[1]
{"a": 1}

Showing all 5 rows.

%sql 
CREATE TABLE scalaTable
USING parquet
OPTIONS (path "/tmp/testParquet")
OK
%sql SELECT * FROM scalaTable
 
key
group
value
someints
somemap
1
2
3
4
5
d
consonants
4
[4, 4, 4, 4]
{"d": 4}
c
consonants
3
[3, 3, 3]
{"c": 3}
b
consonants
2
[2, 2]
{"b": 2}
e
vowels
5
[5, 5, 5, 5, 5]
{"e": 5}
a
vowels
1
[1]
{"a": 1}

Showing all 5 rows.