from pyspark.sql import Row
salesEntryDataFrame = spark.createDataFrame(sc.parallelize([
Row(category="fruits_and_vegetables", product="apples", year=2012, salesAmount=100.50),
Row(category="fruits_and_vegetables", product="oranges", year=2012, salesAmount=100.75),
Row(category="fruits_and_vegetables", product="apples", year=2013, salesAmount=200.25),
Row(category="fruits_and_vegetables", product="oranges", year=2013, salesAmount=300.65),
Row(category="fruits_and_vegetables", product="apples", year=2014, salesAmount=300.65),
Row(category="fruits_and_vegetables", product="oranges", year=2015, salesAmount=100.35),
Row(category="butcher_shop", product="beef", year=2012, salesAmount=200.50),
Row(category="butcher_shop", product="chicken", year=2012, salesAmount=200.75),
Row(category="butcher_shop", product="pork", year=2013, salesAmount=400.25),
Row(category="butcher_shop", product="beef", year=2013, salesAmount=600.65),
Row(category="butcher_shop", product="beef", year=2014, salesAmount=600.65),
Row(category="butcher_shop", product="chicken", year=2015, salesAmount=200.35),
Row(category="misc", product="gum", year=2012, salesAmount=400.50),
Row(category="misc", product="cleaning_supplies", year=2012, salesAmount=400.75),
Row(category="misc", product="greeting_cards", year=2013, salesAmount=800.25),
Row(category="misc", product="kitchen_utensils", year=2013, salesAmount=1200.65),
Row(category="misc", product="cleaning_supplies", year=2014, salesAmount=1200.65),
Row(category="misc", product="cleaning_supplies", year=2015, salesAmount=400.35)
]))
salesEntryDataFrame.registerTempTable("test_sales_table")
display(spark.sql("select * from test_sales_table"))
Chart and Graph Types with Python
This notebook covers the various charts and graphs that are built into Databricks.
While Python is used to generate the test data displayed in the visualizations in this notebook, all the information about how to configure these charts & graphs applies to all notebooks.