read-csv-corrupt-record(Scala)
Loading...

PERMISSIVE mode (default)

import org.apache.spark.sql.types._

val schema = new StructType()
  .add("_c0",IntegerType,true)
  .add("carat",DoubleType,true)
  .add("cut",StringType,true)
  .add("color",StringType,true)
  .add("clarity",StringType,true)
  .add("depth",IntegerType,true) // The depth field is defined wrongly. The actual data contains floating point numbers, while the schema specifies an integer.
  .add("table",DoubleType,true)
  .add("price",IntegerType,true)
  .add("x",DoubleType,true)
  .add("y",DoubleType,true)
  .add("z",DoubleType,true)
  .add("_corrupt_record", StringType, true) // The schema contains a special column _corrupt_record, which does not exist in the data. This column captures rows that did not parse correctly.

val diamonds_with_wrong_schema = spark.read.format("csv")
  .option("header", "true")
  .schema(schema)
  .load("/databricks-datasets/Rdatasets/data-001/csv/ggplot2/diamonds.csv")
import org.apache.spark.sql.types._ schema: org.apache.spark.sql.types.StructType = StructType(StructField(_c0,IntegerType,true), StructField(carat,DoubleType,true), StructField(cut,StringType,true), StructField(color,StringType,true), StructField(clarity,StringType,true), StructField(depth,IntegerType,true), StructField(table,DoubleType,true), StructField(price,IntegerType,true), StructField(x,DoubleType,true), StructField(y,DoubleType,true), StructField(z,DoubleType,true), StructField(_corrupt_record,StringType,true)) diamonds_with_wrong_schema: org.apache.spark.sql.DataFrame = [_c0: int, carat: double ... 10 more fields]
// The mistake in the user-specified schema causes any row with a non-integer value in the depth column to be nullified.
// There are some rows, where the value of depth is an integer e.g. 64.0. They are parsed and coverted successfully.
// The _currupt_record column shows the string with original row data, which helps find the issue. 
display(diamonds_with_wrong_schema)
10.23IdealESI2null553263.953.982.43"1",0.23,"Ideal","E","SI2",61.5,55,326,3.95,3.98,2.43
20.21PremiumESI1null613263.893.842.31"2",0.21,"Premium","E","SI1",59.8,61,326,3.89,3.84,2.31
30.23GoodEVS1null653274.054.072.31"3",0.23,"Good","E","VS1",56.9,65,327,4.05,4.07,2.31
40.29PremiumIVS2null583344.24.232.63"4",0.29,"Premium","I","VS2",62.4,58,334,4.2,4.23,2.63
50.31GoodJSI2null583354.344.352.75"5",0.31,"Good","J","SI2",63.3,58,335,4.34,4.35,2.75
60.24Very GoodJVVS2null573363.943.962.48"6",0.24,"Very Good","J","VVS2",62.8,57,336,3.94,3.96,2.48
70.24Very GoodIVVS1null573363.953.982.47"7",0.24,"Very Good","I","VVS1",62.3,57,336,3.95,3.98,2.47
80.26Very GoodHSI1null553374.074.112.53"8",0.26,"Very Good","H","SI1",61.9,55,337,4.07,4.11,2.53
90.22FairEVS2null613373.873.782.49"9",0.22,"Fair","E","VS2",65.1,61,337,3.87,3.78,2.49
100.23Very GoodHVS1null6133844.052.39"10",0.23,"Very Good","H","VS1",59.4,61,338,4,4.05,2.39
110.3GoodJSI164553394.254.282.73null
120.23IdealJVS1null563403.933.92.46"12",0.23,"Ideal","J","VS1",62.8,56,340,3.93,3.9,2.46
130.22PremiumFSI1null613423.883.842.33"13",0.22,"Premium","F","SI1",60.4,61,342,3.88,3.84,2.33
140.31IdealJSI2null543444.354.372.71"14",0.31,"Ideal","J","SI2",62.2,54,344,4.35,4.37,2.71
150.2PremiumESI2null623453.793.752.27"15",0.2,"Premium","E","SI2",60.2,62,345,3.79,3.75,2.27
160.32PremiumEI1null583454.384.422.68"16",0.32,"Premium","E","I1",60.9,58,345,4.38,4.42,2.68
170.3IdealISI262543484.314.342.68null
180.3GoodJSI1null543514.234.292.7"18",0.3,"Good","J","SI1",63.4,54,351,4.23,4.29,2.7
190.3GoodJSI1null563514.234.262.71"19",0.3,"Good","J","SI1",63.8,56,351,4.23,4.26,2.71
200.3Very GoodJSI1null593514.214.272.66"20",0.3,"Very Good","J","SI1",62.7,59,351,4.21,4.27,2.66
210.3GoodISI2null563514.264.32.71"21",0.3,"Good","I","SI2",63.3,56,351,4.26,4.3,2.71
220.23Very GoodEVS2null553523.853.922.48"22",0.23,"Very Good","E","VS2",63.8,55,352,3.85,3.92,2.48
230.23Very GoodHVS161573533.943.962.41null
240.31Very GoodJSI1null623534.394.432.62"24",0.31,"Very Good","J","SI1",59.4,62,353,4.39,4.43,2.62
250.31Very GoodJSI1null623534.444.472.59"25",0.31,"Very Good","J","SI1",58.1,62,353,4.44,4.47,2.59
260.23Very GoodGVVS2null583543.974.012.41"26",0.23,"Very Good","G","VVS2",60.4,58,354,3.97,4.01,2.41
270.24PremiumIVS1null573553.973.942.47"27",0.24,"Premium","I","VS1",62.5,57,355,3.97,3.94,2.47
280.3Very GoodJVS2null573574.284.32.67"28",0.3,"Very Good","J","VS2",62.2,57,357,4.28,4.3,2.67
290.23Very GoodDVS2null613573.963.972.4"29",0.23,"Very Good","D","VS2",60.5,61,357,3.96,3.97,2.4
300.23Very GoodFVS1null573573.963.992.42"30",0.23,"Very Good","F","VS1",60.9,57,357,3.96,3.99,2.42
310.23Very GoodFVS1605740244.032.41null
320.23Very GoodFVS1null574024.044.062.42"32",0.23,"Very Good","F","VS1",59.8,57,402,4.04,4.06,2.42
330.23Very GoodEVS1null594023.974.012.42"33",0.23,"Very Good","E","VS1",60.7,59,402,3.97,4.01,2.42
340.23Very GoodEVS1null584024.014.062.4"34",0.23,"Very Good","E","VS1",59.5,58,402,4.01,4.06,2.4
350.23Very GoodDVS1null584023.923.962.44"35",0.23,"Very Good","D","VS1",61.9,58,402,3.92,3.96,2.44
360.23GoodFVS1null594024.064.082.37"36",0.23,"Good","F","VS1",58.2,59,402,4.06,4.08,2.37
370.23GoodEVS1null594023.833.852.46"37",0.23,"Good","E","VS1",64.1,59,402,3.83,3.85,2.46
380.31GoodHSI164544024.294.312.75null
390.26Very GoodDVS2null594034.134.162.52"39",0.26,"Very Good","D","VS2",60.8,59,403,4.13,4.16,2.52
400.33IdealISI2null554034.494.512.78"40",0.33,"Ideal","I","SI2",61.8,55,403,4.49,4.51,2.78
410.33IdealISI2null564034.494.52.75"41",0.33,"Ideal","I","SI2",61.2,56,403,4.49,4.5,2.75
420.33IdealJSI1null564034.494.552.76"42",0.33,"Ideal","J","SI1",61.1,56,403,4.49,4.55,2.76
430.26GoodDVS2null564033.994.022.61"43",0.26,"Good","D","VS2",65.2,56,403,3.99,4.02,2.61
440.26GoodDVS1null634034.194.242.46"44",0.26,"Good","D","VS1",58.4,63,403,4.19,4.24,2.46
450.32GoodHSI2null564034.344.372.75"45",0.32,"Good","H","SI2",63.1,56,403,4.34,4.37,2.75
460.29PremiumFSI1null584034.244.262.65"46",0.29,"Premium","F","SI1",62.4,58,403,4.24,4.26,2.65
470.32Very GoodHSI2null554034.354.422.71"47",0.32,"Very Good","H","SI2",61.8,55,403,4.35,4.42,2.71
480.32GoodHSI2null564034.364.382.79"48",0.32,"Good","H","SI2",63.8,56,403,4.36,4.38,2.79
490.25Very GoodEVS2null6040444.032.54"49",0.25,"Very Good","E","VS2",63.3,60,404,4,4.03,2.54
500.29Very GoodHSI2null604044.334.372.64"50",0.29,"Very Good","H","SI2",60.7,60,404,4.33,4.37,2.64
510.24Very GoodFSI1null614044.024.032.45"51",0.24,"Very Good","F","SI1",60.9,61,404,4.02,4.03,2.45
520.23IdealGVS1null544043.933.952.44"52",0.23,"Ideal","G","VS1",61.9,54,404,3.93,3.95,2.44
530.32IdealISI1null554044.454.482.72"53",0.32,"Ideal","I","SI1",60.9,55,404,4.45,4.48,2.72
540.22PremiumEVS2null584043.933.892.41"54",0.22,"Premium","E","VS2",61.6,58,404,3.93,3.89,2.41
550.22PremiumDVS2null624043.913.882.31"55",0.22,"Premium","D","VS2",59.3,62,404,3.91,3.88,2.31
560.3IdealISI261594054.34.332.63null
570.3PremiumJSI2null614054.434.382.61"57",0.3,"Premium","J","SI2",59.3,61,405,4.43,4.38,2.61
580.3Very GoodISI1null574054.254.282.67"58",0.3,"Very Good","I","SI1",62.6,57,405,4.25,4.28,2.67
590.3Very GoodISI163574054.284.322.71null
600.3GoodISI1null554054.254.292.7"60",0.3,"Good","I","SI1",63.2,55,405,4.25,4.29,2.7
610.35IdealIVS1null575524.544.592.78"61",0.35,"Ideal","I","VS1",60.9,57,552,4.54,4.59,2.78
620.3PremiumDSI1null595524.234.272.66"62",0.3,"Premium","D","SI1",62.6,59,552,4.23,4.27,2.66
630.3IdealDSI1null575524.294.322.69"63",0.3,"Ideal","D","SI1",62.5,57,552,4.29,4.32,2.69
640.3IdealDSI1null565524.34.332.68"64",0.3,"Ideal","D","SI1",62.1,56,552,4.3,4.33,2.68
650.42PremiumISI2null595524.784.842.96"65",0.42,"Premium","I","SI2",61.5,59,552,4.78,4.84,2.96
660.28IdealGVVS2null565534.194.222.58"66",0.28,"Ideal","G","VVS2",61.4,56,553,4.19,4.22,2.58
670.32IdealIVVS16255.35534.394.422.73null
680.31Very GoodGSI1null575534.334.32.73"68",0.31,"Very Good","G","SI1",63.3,57,553,4.33,4.3,2.73
690.31PremiumGSI1null585534.354.322.68"69",0.31,"Premium","G","SI1",61.8,58,553,4.35,4.32,2.68
700.24PremiumEVVS1null585534.014.032.44"70",0.24,"Premium","E","VVS1",60.7,58,553,4.01,4.03,2.44
710.24Very GoodDVVS1null605533.9742.45"71",0.24,"Very Good","D","VVS1",61.5,60,553,3.97,4,2.45
720.3Very GoodHSI1null565544.294.272.7"72",0.3,"Very Good","H","SI1",63.1,56,554,4.29,4.27,2.7
730.3PremiumHSI1null595544.284.242.68"73",0.3,"Premium","H","SI1",62.9,59,554,4.28,4.24,2.68
740.3PremiumHSI1null575544.294.252.67"74",0.3,"Premium","H","SI1",62.5,57,554,4.29,4.25,2.67
750.3GoodHSI1null575544.284.262.72"75",0.3,"Good","H","SI1",63.7,57,554,4.28,4.26,2.72
760.26Very GoodFVVS2null605544.194.222.49"76",0.26,"Very Good","F","VVS2",59.2,60,554,4.19,4.22,2.49
770.26Very GoodEVVS2null585544.154.232.51"77",0.26,"Very Good","E","VVS2",59.9,58,554,4.15,4.23,2.51
780.26Very GoodDVVS2null545544.084.132.56"78",0.26,"Very Good","D","VVS2",62.4,54,554,4.08,4.13,2.56
790.26Very GoodDVVS2null605544.014.052.53"79",0.26,"Very Good","D","VVS2",62.8,60,554,4.01,4.05,2.53
800.26Very GoodEVVS1null595544.064.092.55"80",0.26,"Very Good","E","VVS1",62.6,59,554,4.06,4.09,2.55
810.26Very GoodEVVS1null5955444.042.55"81",0.26,"Very Good","E","VVS1",63.4,59,554,4,4.04,2.55
820.26Very GoodDVVS1null605544.034.122.53"82",0.26,"Very Good","D","VVS1",62.1,60,554,4.03,4.12,2.53
830.26IdealEVVS2null585544.024.062.54"83",0.26,"Ideal","E","VVS2",62.9,58,554,4.02,4.06,2.54
840.38IdealISI2null565544.654.672.87"84",0.38,"Ideal","I","SI2",61.6,56,554,4.65,4.67,2.87
850.26GoodEVVS1null605544.224.252.45"85",0.26,"Good","E","VVS1",57.9,60,554,4.22,4.25,2.45
860.24PremiumGVVS1null595543.953.922.45"86",0.24,"Premium","G","VVS1",62.3,59,554,3.95,3.92,2.45
870.24PremiumHVVS1null585544.013.962.44"87",0.24,"Premium","H","VVS1",61.2,58,554,4.01,3.96,2.44
880.24PremiumHVVS1null595544.0242.44"88",0.24,"Premium","H","VVS1",60.8,59,554,4.02,4,2.44
890.24PremiumHVVS2null585544.074.042.46"89",0.24,"Premium","H","VVS2",60.7,58,554,4.07,4.04,2.46
900.32PremiumISI1null585544.354.332.73"90",0.32,"Premium","I","SI1",62.9,58,554,4.35,4.33,2.73
910.7IdealESI1null5727575.75.723.57"91",0.7,"Ideal","E","SI1",62.5,57,2757,5.7,5.72,3.57
920.86FairESI2null6927576.456.333.52"92",0.86,"Fair","E","SI2",55.1,69,2757,6.45,6.33,3.52
930.7IdealGVS2null5627575.75.673.5"93",0.7,"Ideal","G","VS2",61.6,56,2757,5.7,5.67,3.5
940.71Very GoodEVS2null5727595.685.733.56"94",0.71,"Very Good","E","VS2",62.4,57,2759,5.68,5.73,3.56
950.78Very GoodGSI2null5627595.815.853.72"95",0.78,"Very Good","G","SI2",63.8,56,2759,5.81,5.85,3.72
960.7GoodEVS2null5827595.855.93.38"96",0.7,"Good","E","VS2",57.5,58,2759,5.85,5.9,3.38
970.7GoodFVS1null6227595.715.763.4"97",0.7,"Good","F","VS1",59.4,62,2759,5.71,5.76,3.4
980.96FairFSI2null6227596.275.954.07"98",0.96,"Fair","F","SI2",66.3,62,2759,6.27,5.95,4.07
990.73Very GoodESI1null5927605.775.783.56"99",0.73,"Very Good","E","SI1",61.6,59,2760,5.77,5.78,3.56
1000.8PremiumHSI1null5827605.975.933.66"100",0.8,"Premium","H","SI1",61.5,58,2760,5.97,5.93,3.66

Showing the first 1000 rows.

// Since Spark 2.3, the queries from raw JSON/CSV files are disallowed when the referenced columns only include the internal corrupt record column (named _corrupt_record by default).
// For example: spark.read.schema(schema).csv(file).filter($"_corrupt_record".isNotNull).count() and spark.read.schema(schema).csv(file).select("_corrupt_record").show().
// Instead, you can cache or save the parsed results and then send the same query.

val badRows = diamonds_with_wrong_schema.filter($"_corrupt_record".isNotNull)
badRows.cache()
val numBadRows = badRows.count()
badRows.unpersist()
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 167.0 failed 4 times, most recent failure: Lost task 0.3 in stage 167.0 (TID 877, 10.97.245.56, executor 0): com.databricks.sql.io.FileReadException: Error while reading file dbfs:/databricks-datasets/Rdatasets/data-001/csv/ggplot2/diamonds.csv.

DROPMALFORMED mode

val diamonds_with_wrong_schema_drop_malformed = spark.read.format("csv")
  .option("mode", "DROPMALFORMED")
  .option("header", "true")
  .schema(schema)
  .load("/databricks-datasets/Rdatasets/data-001/csv/ggplot2/diamonds.csv")
diamonds_with_wrong_schema_drop_malformed: org.apache.spark.sql.DataFrame = [_c0: int, carat: double ... 10 more fields]
display(diamonds_with_wrong_schema_drop_malformed)
110.3GoodJSI164553394.254.282.73null
170.3IdealISI262543484.314.342.68null
230.23Very GoodHVS161573533.943.962.41null
310.23Very GoodFVS1605740244.032.41null
380.31GoodHSI164544024.294.312.75null
560.3IdealISI261594054.34.332.63null
590.3Very GoodISI163574054.284.322.71null
670.32IdealIVVS16255.35534.394.422.73null
1070.8PremiumGSI1635927605.95.813.69null
1100.59IdealEVVS2625527615.385.433.35null
1130.9PremiumIVS2635827616.166.123.87null
1310.77IdealHVS2625627635.895.863.64null
1440.7Very GoodFVS1605727675.85.873.5null
1670.8Very GoodFSI2615727726.016.033.67null
1720.7PremiumDVS2586227735.875.783.38null
1940.7PremiumESI1605927775.795.753.46null
1970.7PremiumESI1615727775.735.683.48null
1980.7PremiumESI1615827775.785.723.51null
2310.72Very GoodFVS2635427845.695.733.6null
2340.51IdealFVVS1625727875.115.153.18null
2520.81GoodGSI2616127895.945.993.64null
2810.72PremiumDSI2626027955.735.693.54null
2820.72PremiumIIF635727955.725.73.6null
2830.81PremiumHVS2585927956.176.133.57null
2980.8PremiumFSI2615727976.036.013.67null
3020.83Very GoodESI2586227996.196.253.61null
3150.76FairGVS1597028005.895.83.46null
3280.72PremiumEVS2635528025.795.613.59null
3360.71PremiumFVS2586228035.855.813.38null
3440.71Very GoodEVS2645728045.665.683.63null
3500.7IdealDSI1615928045.685.73.47null
3610.82GoodGVS2645728055.925.893.78null
3660.71Very GoodFVS1605728075.845.93.52null
3690.7Very GoodFVS1625728085.645.713.52null
3870.7Very GoodGVS1636028125.575.643.53null
3940.32PremiumISI161595544.394.362.67null
4120.34IdealIVS162565554.54.532.8null
4190.3IdealGVS262565564.284.32.66null
4240.99FairJSI1556128126.726.673.68null
4260.51IdealFVVS1625728125.155.113.18null
4400.9FairJVS2655628156.086.043.94null
4410.95FairFSI2566028156.626.533.68null
4650.7PremiumEVS2636028185.645.63.54null
4680.8IdealHSI1615728186.0763.68null
4790.75IdealESI1625728215.85.783.59null
4930.6IdealFVVS2625528225.375.43.34null
5000.9GoodJVS2646128226.046.033.86null
5230.83PremiumHSI1605928256.086.053.64null
5240.73Very GoodGVS1625728255.755.83.58null
5380.72GoodDVS2645428275.685.73.64null
5460.59IdealEVVS1625628295.365.383.33null
5650.81PremiumGSI1636028325.875.813.68null
5690.8Very GoodIVS2625828335.865.953.66null
5700.56Very GoodEIF615928335.285.343.24null
5720.7IdealDVS2615728335.745.763.51null
5880.72IdealESI1615728355.785.83.53null
6050.7PremiumGVS1636028385.645.573.53null
6060.74Very GoodESI1605728395.855.893.52null
6110.71PremiumFVS1596028395.825.83.43null
6210.77Very GoodHVS1616028405.95.873.59null
6230.7IdealFSI1615628405.755.83.52null
6490.72IdealEVS2625728435.715.743.55null
6530.7Very GoodEVS1625928445.655.683.51null
6800.7Very GoodEVS2616028505.745.773.51null
6860.87Very GoodFSI2616328516.226.073.75null
6880.74IdealFSI1615728515.855.813.56null
6970.79PremiumDSI2606028536.076.033.63null
7030.73PremiumEVS2625728545.865.763.6null
7141.02FairISI1536328566.846.773.66null
7340.31IdealIVVS162545574.374.42.72null
7440.33PremiumFSI263585574.424.42.78null
7480.33IdealISI163575574.394.372.76null
7530.77PremiumFSI1615828565.945.93.61null
7620.71PremiumEVS2616028585.765.693.49null
7730.7IdealDVS2615728595.765.743.51null
7880.9PremiumISI1635828616.096.013.81null
7910.66PremiumDVS1615828615.675.573.43null
7990.79PremiumHVS1606028626.075.993.64null
8080.71PremiumDSI1616128635.825.753.53null
8200.71IdealEVS2615528635.795.753.52null
8240.56Very GoodDVVS1625628665.255.33.27null
8430.79PremiumESI2615828685.965.93.62null
8670.7Very GoodGSI2596228725.795.813.42null
8760.76Very GoodFVS2625828735.85.863.62null
8830.9GoodJSI16461287365.963.83null
8890.7PremiumFVS1595928745.795.773.41null
8920.7IdealFVS1615528745.775.733.51null
9170.84IdealGSI2615628796.136.13.73null
9210.72IdealFVS1625628795.765.733.56null
9240.7IdealHVVS1625528815.745.713.55null
9250.71Very GoodEVS2605928815.845.833.5null
9261.05PremiumHI1625928816.56.474.02null
9340.9FairHSI2656128836.015.963.89null
10140.81IdealESI2635629015.955.93.73null
10170.73PremiumEVS2626029025.765.733.56null
10270.79Very GoodFSI1635429045.915.943.73null
10300.62IdealEVVS2625629045.485.523.41null
10750.31PremiumHSI161615584.394.332.66null
10830.7PremiumDVS2616029095.755.73.49null
10981FairISI1665629126.316.244.13null

Showing the first 1000 rows.

FAILFAST mode

val diamonds_with_wrong_schema_fail_fast = spark.read.format("csv")
  .option("mode", "FAILFAST")
  .option("header", "true")
  .schema(schema)
  .load("/databricks-datasets/Rdatasets/data-001/csv/ggplot2/diamonds.csv")
diamonds_with_wrong_schema_fail_fast: org.apache.spark.sql.DataFrame = [_c0: int, carat: double ... 10 more fields]
display(diamonds_with_wrong_schema_fail_fast)
SparkException: Job aborted due to stage failure: Task 0 in stage 170.0 failed 4 times, most recent failure: Lost task 0.3 in stage 170.0 (TID 882, 10.97.240.8, executor 1): com.databricks.sql.io.FileReadException: Error while reading file dbfs:/databricks-datasets/Rdatasets/data-001/csv/ggplot2/diamonds.csv. at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.logFileNameAndThrow(FileScanRDD.scala:340) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:319) at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:406) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:259) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) at org.apache.spark.sql.execution.collect.UnsafeRowBatchUtils$.encodeUnsafeRows(UnsafeRowBatchUtils.scala:80) at org.apache.spark.sql.execution.collect.Collector.$anonfun$processFunc$1(Collector.scala:187) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.doRunTask(Task.scala:144) at org.apache.spark.scheduler.Task.run(Task.scala:117) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$9(Executor.scala:639) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1559) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:642) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.spark.SparkException: Malformed records are detected in record parsing. Parse Mode: FAILFAST. To process malformed records as null result, try setting the option 'mode' as 'PERMISSIVE'. at org.apache.spark.sql.catalyst.util.FailureSafeParser.parse(FailureSafeParser.scala:77) at org.apache.spark.sql.catalyst.csv.UnivocityParser$.$anonfun$parseIterator$2(UnivocityParser.scala:411) at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490) at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:31) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:292) ... 15 more Caused by: org.apache.spark.sql.catalyst.util.BadRecordException: java.lang.NumberFormatException: For input string: "61.5" at org.apache.spark.sql.catalyst.csv.UnivocityParser.org$apache$spark$sql$catalyst$csv$UnivocityParser$$convert(UnivocityParser.scala:308) at org.apache.spark.sql.catalyst.csv.UnivocityParser.$anonfun$parse$2(UnivocityParser.scala:253) at org.apache.spark.sql.catalyst.csv.UnivocityParser$.$anonfun$parseIterator$1(UnivocityParser.scala:404) at org.apache.spark.sql.catalyst.util.FailureSafeParser.parse(FailureSafeParser.scala:64) ... 21 more Caused by: java.lang.NumberFormatException: For input string: "61.5" at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65) at java.lang.Integer.parseInt(Integer.java:580) at java.lang.Integer.parseInt(Integer.java:615) at scala.collection.immutable.StringLike.toInt(StringLike.scala:304) at scala.collection.immutable.StringLike.toInt$(StringLike.scala:304) at scala.collection.immutable.StringOps.toInt(StringOps.scala:33) at org.apache.spark.sql.catalyst.csv.UnivocityParser.$anonfun$makeConverter$6(UnivocityParser.scala:156) at org.apache.spark.sql.catalyst.csv.UnivocityParser.$anonfun$makeConverter$6$adapted(UnivocityParser.scala:156) at org.apache.spark.sql.catalyst.csv.UnivocityParser.nullSafeDatum(UnivocityParser.scala:237) at org.apache.spark.sql.catalyst.csv.UnivocityParser.$anonfun$makeConverter$5(UnivocityParser.scala:156) at org.apache.spark.sql.catalyst.csv.UnivocityParser.org$apache$spark$sql$catalyst$csv$UnivocityParser$$convert(UnivocityParser.scala:290) ... 24 more Driver stacktrace: