dbutils.fs.put("/tmp/test.json", """ {"string":"string1","int":1,"array":[1,2,3],"dict": {"key": "value1"}} {"string":"string2","int":2,"array":[2,4,6],"dict": {"key": "value2"}} {"string":"string3","int":3,"array":[3,6,9],"dict": {"key": "value3", "extra_key": "extra_value3"}} """, true)
Wrote 243 bytes.
res3: Boolean = true
dbutils.fs.put("/tmp/multi-line.json", """[ {"string":"string1","int":1,"array":[1,2,3],"dict": {"key": "value1"}}, {"string":"string2","int":2,"array":[2,4,6],"dict": {"key": "value2"}}, { "string": "string3", "int": 3, "array": [ 3, 6, 9 ], "dict": { "key": "value3", "extra_key": "extra_value3" } } ]""", true)
Wrote 385 bytes.
res6: Boolean = true
val mldf = spark.read.option("multiline", "true").json("/tmp/multi-line.json") mldf.show(false)
+---------+----------------------+---+-------+
|array |dict |int|string |
+---------+----------------------+---+-------+
|[1, 2, 3]|[, value1] |1 |string1|
|[2, 4, 6]|[, value2] |2 |string2|
|[3, 6, 9]|[extra_value3, value3]|3 |string3|
+---------+----------------------+---+-------+
mldf: org.apache.spark.sql.DataFrame = [array: array<bigint>, dict: struct<extra_key: string, key: string> ... 2 more fields]