bikeStations: org.apache.spark.sql.DataFrame = [id: int, name: string ... 5 more fields]
tripData: org.apache.spark.sql.DataFrame = [id: int, duration: int ... 9 more fields]
root
|-- id: integer (nullable = true)
|-- name: string (nullable = true)
|-- lat: double (nullable = true)
|-- long: double (nullable = true)
|-- dock_count: integer (nullable = true)
|-- city: string (nullable = true)
|-- installation_date: string (nullable = true)
root
|-- id: integer (nullable = true)
|-- duration: integer (nullable = true)
|-- start_date: string (nullable = true)
|-- start_station_name: string (nullable = true)
|-- start_station_id: integer (nullable = true)
|-- end_date: string (nullable = true)
|-- end_station_name: string (nullable = true)
|-- end_station_id: integer (nullable = true)
|-- bike_id: integer (nullable = true)
|-- subscription_type: string (nullable = true)
|-- zip_code: string (nullable = true)
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.graphframes._
stationVertices: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [id: int, name: string ... 5 more fields]
tripEdges: org.apache.spark.sql.DataFrame = [id: int, duration: int ... 9 more fields]
stationGraph: org.graphframes.GraphFrame = GraphFrame(v:[id: int, name: string ... 5 more fields], e:[src: string, dst: string ... 9 more fields])
res7: stationVertices.type = [id: int, name: string ... 5 more fields]
Total Number of Stations: 70
Total Number of Trips in Graph: 669959
Total Number of Trips in Original Data: 669959
5 rows
5 rows