import json, requests, time
token = dbutils.secrets.get(scope=secret_configuration['scope'], key=secret_configuration['key'])
def renderURL(api_url, version, endpoint):
return api_url + '/api/{0}'.format(version) + endpoint
def renderToken(token):
return {'Authorization': 'Bearer {0}'.format(token)}
def get(endpoint, api_url, token, json_params = None, printJson = False, version = '2.0', debug=False):
if json_params:
resp = requests.get(renderURL(api_url, version, endpoint), headers=renderToken(token), params=json_params)
if resp.status_code == 403:
if debug:
print("Invalid Token", renderToken(token))
raise PermissionError("Authentication Failed")
if debug:
print("get DEBUG", resp.text)
results = resp.json()
else:
resp = requests.get(renderURL(api_url, version, endpoint), headers=renderToken(token))
if resp.status_code == 403:
if debug:
print("Invalid Token", renderToken(token))
raise PermissionError("Authentication Failed")
if debug:
print("get DEBUG", resp.text)
results = resp.json()
if printJson:
print(json.dumps(results, indent=4, sort_keys=True))
return results
def post(endpoint, api_url, token, json_params = None, printJson = False, version = '2.0'):
if json_params:
try:
raw_results = requests.post(renderURL(api_url, version, endpoint), headers=renderToken(token), json=json_params)
results = raw_results.json()
except:
raw_results = requests.post(renderURL(api_url, version, endpoint), headers=renderToken(token), data=json.dumps(json_params))
results = raw_results.json()
else:
print("Must have a payload in json_args param.")
return {}
if printJson:
print('""' + json.dumps(results, sort_keys=True) + '""')
# if results are empty, let's return the return status
if results:
results['http_status_code'] = raw_results.status_code
return results
else:
return {'http_status_code': raw_results.status_code}
clusters = get("/clusters/list", api_url, token)
count =0
for cluster in clusters['clusters']:
if scan_running_clusters_only!=True or ( scan_running_clusters_only and cluster['state'] == "RUNNING"):
spark_conf = cluster.get("spark_conf",None)
enableProcessIsolation = None
allowedLanguages = None
dfAclsEnabled = None
data_security_mode = cluster.get("data_security_mode",None)
access_mode = cluster.get("access_mode",None)
single_user_name = cluster.get("single_user_name",None)
if spark_conf is not None:
enableProcessIsolation = spark_conf.get("spark.databricks.pyspark.enableProcessIsolation",None)
allowedLanguages = spark_conf.get("spark.databricks.repl.allowedLanguages",None)
dfAclsEnabled = spark_conf.get("spark.databricks.acl.dfAclsEnabled",None)
# Check if isolation is set:
# Single user name is set
single_user_name_is_set = (single_user_name is not None)
# Security mode set with either data_security_mode or access_mode (new!)
isolated_security_mode = (\
(data_security_mode is not None and data_security_mode!="NONE") \
or (access_mode is not None and access_mode!="NO_ISOLATION") \
)
# It's a Table ACLs cluster
dfacl = (dfAclsEnabled is not None and dfAclsEnabled == "true")
# There is no process isolation enabled
process_isolation = (enableProcessIsolation is not None and enableProcessIsolation == "true")
# Does not allow scala or r, which can be bypassed
no_not_allowed_languages = (((allowedLanguages is not None and "scala" not in allowedLanguages.lower())\
or (allowedLanguages is not None and "r" not in allowedLanguages.lower())))
# Check if it is an isolation cluster
isolation_enabled = single_user_name_is_set or isolated_security_mode or ((dfacl or process_isolation) and no_not_allowed_languages)
# Print clusters without isolation
if not isolation_enabled:
count += 1
print(f"{count}, Cluster ID {cluster['cluster_id']}, name {cluster['cluster_name']}, creator user name {cluster['creator_user_name']}")
Identify No-Isolation-Shared clusters
This notebook will query the Databricks API for all live clusters. This notebook lists shared clusters without user isolation. This requires administrative rights.
Configure your credentials
In order to use this notebook, you will need to have an admin-level personal access token. To practice secure coding practices, this should not be hardcoded in a notebook, and rather should be stored using the Databricks Secrets capability or using a third party secret manager. Configure the Databricks Secrets service using the databricks cli or use the API. (Docs for CLI and API)
Example for API, after configuring the CLI:
The scope name and key name can be whatever you wish, just provide them below in the configuration section.
After each put, the CLI will open up the vi text editor. Press the letter 'i' to switch to "insert" mode and then type in the username (or password). To get out of "insert" mode, hit escape, and then save and quit by typing ':wq'. Databricks will automatically remove any leading or trailing whitespace. Confused by vi? You're not alone. vi for beginners