DBU usage per user(Python)
  1. Follow the instructions in Usage to download a usage data CSV file, upload the file to Databricks, and create a table named usage.
  2. Set the <token> and <databricks-instance> variables.
  3. Attach the notebooks to a cluster and click Run All.
%sql
select * from usage
import requests

token = "<token>"
request_header = {'Authorization': 'Bearer {0}'.format(token)}
users_endpoint = "https://<databricks-instance>/api/2.0/preview/scim/v2/Users"
users = requests.get(users_endpoint, headers=request_header).json().get('Resources')
import json 

dbutils.fs.mkdirs("/home/users/")
with open("/dbfs/home/users/user_list.json", "w") as fp:
  for x in users:
    user_info = {}
    user_info['email'] = x.get('emails')[0].get('value')
    user_info['id']  = x.get('id')
    user_info['groups'] = x.get('groups')    
    fp.write(json.dumps(user_info) + "\n")
df_user = spark.read.json("/home/users/user_list.json")
df_user.createOrReplaceTempView("users")
%sql
select 
  sum(dbus) as total_dbus, 
  clusterOwnerUserId, 
  B.email
from usage A join users B
 on B.id = A.clusterOwnerUserId
group by A.clusterOwnerUserId, B.email
order by total_dbus desc