printSchemaΒΆ

printSchema prints out the schema for a SparkDataFrame in a tree format.

Syntax:

  • printSchema(SparkDataFrame)

Parameters:

  • SparkDataFrame: Any SparkDataFrame

Output:

  • Schema in tree format
require(SparkR)

authors <- data.frame(surname = c("Tukey", "Venables", "Tierney", "Ripley", "McNeil"),
                      nationality = c("US", "Australia", "US", "UK", "Australia"),
                      deceased = c("yes", rep("no", 4)))

books <- data.frame(name = c("Tukey", "Venables", "Tierney", "Ripley", "Ripley", "McNeil", "R Core"),
                    title = c("Exploratory Data Analysis", "Modern Applied Statistics ...", "LISP-STAT", "Spatial Statistics", "Stochastic Simulation",
                              "Interactive Data Analysis", "An Introduction to R"),
                    other.author = c(NA, "Ripley", NA, NA, NA, NA, "Venables & Smith"))

# Create SparkDataFrame
authorsDF <- createDataFrame(authors)
booksDF <- createDataFrame(books)

# Print Schema for authorsDF
printSchema(authorsDF)
# Print Schema for booksDF
printSchema(booksDF)
# Join authorsDF and booksDF
joinDF <- join(authorsDF, booksDF, authorsDF$surname == booksDF$name)

# Print Schema for joinDF
printSchema(joinDF)