timestamp-conversion(Scala)
Loading...

Convert string date into TimestampType in Spark SQL

Convert the date as string into timestamp (including time zone) using unix_timestamp and cast it as TimestampType. Note that you might need to convert with a specific timezone.

import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._
val df = Seq(
  (1, "2014/01/01 23:00:01"), 
  (1, "2014/11/31 12:40:32"), 
  (1, "2016/12/29 09:54:00"), 
  (1, "2016/05/09 10:12:43")).toDF("id", "date")
df: org.apache.spark.sql.DataFrame = [id: int, date: string]
val res = df.select($"id", $"date", unix_timestamp($"date", "yyyy/MM/dd HH:mm:ss").cast(TimestampType).as("timestamp"), current_timestamp(), current_date())
res: org.apache.spark.sql.DataFrame = [id: int, date: string ... 3 more fields]
res.printSchema
root |-- id: integer (nullable = false) |-- date: string (nullable = true) |-- timestamp: timestamp (nullable = true) |-- current_timestamp(): timestamp (nullable = false) |-- current_date(): date (nullable = false)
res.show(false)
+---+-------------------+-------------------+-----------------------+--------------+ |id |date |timestamp |current_timestamp() |current_date()| +---+-------------------+-------------------+-----------------------+--------------+ |1 |2014/01/01 23:00:01|2014-01-01 23:00:01|2020-05-20 23:29:16.471|2020-05-20 | |1 |2014/11/31 12:40:32|null |2020-05-20 23:29:16.471|2020-05-20 | |1 |2016/12/29 09:54:00|2016-12-29 09:54:00|2020-05-20 23:29:16.471|2020-05-20 | |1 |2016/05/09 10:12:43|2016-05-09 10:12:43|2020-05-20 23:29:16.471|2020-05-20 | +---+-------------------+-------------------+-----------------------+--------------+