read-avro-files(Python)
Loading...
%scala

val df = Seq((2012, 8, "Batman", 9.8),
    (2012, 8, "Hero", 8.7),
    (2012, 7, "Robot", 5.5),
    (2011, 7, "Git", 2.0))
    .toDF("year", "month", "title", "rating")

df.write.mode("overwrite").partitionBy("year", "month").format("avro").save("/tmp/test_dataset")

display(dbutils.fs.ls("/tmp/test_dataset"))
dbfs:/tmp/test_dataset/_SUCCESS_SUCCESS0
dbfs:/tmp/test_dataset/year=2011/year=2011/0
dbfs:/tmp/test_dataset/year=2012/year=2012/0
%scala

val data = spark.read.format("avro").load("/tmp/test_dataset")

display(data)
Batman9.820128
Robot5.520127
Hero8.720128
Git220117
%r

library(SparkR)

data <- read.df("/tmp/test_dataset", "avro")

display(data)
Batman9.820128
Robot5.520127
Hero8.720128
Git220117
%python

data = spark.read.format("avro").load("/tmp/test_dataset")

display(data)
Batman9.820128
Robot5.520127
Hero8.720128
Git220117
%sql
CREATE TEMPORARY VIEW avroTable
USING avro
OPTIONS (path "/tmp/test_dataset")
OK
%sql SELECT * FROM avroTable
Batman9.820128
Robot5.520127
Hero8.720128
Git220117