Change data feed demo

countries = [("USA", 10000, 20000), ("India", 1000, 1500), ("UK", 7000, 10000), ("Canada", 500, 700) ]columns = ["Country","NumVaccinated","AvailableDoses"]spark.createDataFrame(data=countries, schema = columns).write.format("delta").mode("overwrite").saveAsTable("silverTable")

%sqlSELECT * FROM silverTable

import pyspark.sql.functions as Fspark.read.format("delta").table("silverTable").withColumn("VaccinationRate", F.col("NumVaccinated") / F.col("AvailableDoses")) \  .drop("NumVaccinated").drop("AvailableDoses") \  .write.format("delta").mode("overwrite").saveAsTable("goldTable")

%sqlSELECT * FROM goldTable

%sqlALTER TABLE silverTable SET TBLPROPERTIES (delta.enableChangeDataFeed = true)

OK

# Insert new recordsnew_countries = [("Australia", 100, 3000)]spark.createDataFrame(data=new_countries, schema = columns).write.format("delta").mode("append").saveAsTable("silverTable")

%sql-- update a recordUPDATE silverTable SET NumVaccinated = '11000' WHERE Country = 'USA'

%sql-- delete a recordDELETE from silverTable WHERE Country = 'UK'

%sqlSELECT * FROM silverTable

%sql -- view the changesSELECT * FROM table_changes('silverTable', 2, 5) order by _commit_timestamp

changes_df = spark.read.format("delta").option("readChangeData", True).option("startingVersion", 2).table('silverTable')display(changes_df)

%sql-- Collect only the latest version for each countryCREATE OR REPLACE TEMPORARY VIEW silverTable_latest_version asSELECT *     FROM          (SELECT *, rank() over (partition by Country order by _commit_version desc) as rank          FROM table_changes('silverTable', 2, 5)          WHERE _change_type !='update_preimage')    WHERE rank=1

OK

%sql-- Merge the changes to goldMERGE INTO goldTable t USING silverTable_latest_version s ON s.Country = t.Country        WHEN MATCHED AND s._change_type='update_postimage' THEN UPDATE SET VaccinationRate = s.NumVaccinated/s.AvailableDoses        WHEN NOT MATCHED THEN INSERT (Country, VaccinationRate) VALUES (s.Country, s.NumVaccinated/s.AvailableDoses)

%sqlSELECT * FROM goldTable

%sqlDROP TABLE silverTable;DROP TABLE goldTable;

OK

Change data feed demo(Python)

Demo of Delta Lake change data feed

Create a silver table that tracks absolute number vaccinations and available doses by country

Generate gold table showing vaccination rate by country

Enable change data feed on silver table

Update silver table daily

Explore the change data in SQL and PySpark

Propagate changes from silver to gold table

Clean up tables