%scala val llist = Seq(("bob", "2015-01-13", 4), ("alice", "2015-04-23",10)) val left = llist.toDF("name","date","duration") val right = Seq(("alice", 100),("bob", 23)).toDF("name","upload") val df = left.join(right, left.col("name") === right.col("name")) display(df)
%python llist = [('bob', '2015-01-13', 4), ('alice', '2015-04-23',10)] left = spark.createDataFrame(llist, ['name','date','duration']) right = spark.createDataFrame([('alice', 100),('bob', 23)],['name','upload']) df = left.join(right, left.name == right.name) display(df)
%r library(SparkR) sparkR.session() left <- sql("SELECT * FROM left_test_table") right <- sql("SELECT * FROM right_test_table") head(join(left, right, left$name == right$name))
name date duration name upload 1 alice 2015-04-23 10 alice 100 2 bob 2015-01-13 4 bob 23
Problem
If you join on columns, you get duplicated columns.
Last refresh: Never