Agent Quickstart Notebook

%pip install -U -qqqq mlflow databricks-openai databricks-agents
dbutils.library.restartPython()

# The snippet below tries to pick the first LLM API available in your Databricks workspace
# from a set of candidates. You can override and simplify it
# to just specify LLM_ENDPOINT_NAME.
LLM_ENDPOINT_NAME = None

from databricks.sdk import WorkspaceClient
def is_endpoint_available(endpoint_name):
  try:
    client = WorkspaceClient().serving_endpoints.get_open_ai_client()
    client.chat.completions.create(model=endpoint_name, messages=[{"role": "user", "content": "What is AI?"}])
    return True
  except Exception:
    return False
  
client = WorkspaceClient()
for candidate_endpoint_name in ["databricks-claude-3-7-sonnet", "databricks-meta-llama-3-3-70b-instruct"]:
    if is_endpoint_available(candidate_endpoint_name):
      LLM_ENDPOINT_NAME = candidate_endpoint_name
assert LLM_ENDPOINT_NAME is not None, "Please specify LLM_ENDPOINT_NAME"

import json
import mlflow
from databricks.sdk import WorkspaceClient
from databricks_openai import UCFunctionToolkit, DatabricksFunctionClient

# Automatically log traces from LLM calls for ease of debugging
mlflow.openai.autolog()

# Get an OpenAI client configured to talk to Databricks model serving endpoints
# We'll use this to query an LLM in our agent
openai_client = WorkspaceClient().serving_endpoints.get_open_ai_client()

# Load Databricks built-in tools (a stateless Python code interpreter tool)
client = DatabricksFunctionClient()
builtin_tools = UCFunctionToolkit(
    function_names=["system.ai.python_exec"], client=client
).tools
for tool in builtin_tools:
    del tool["function"]["strict"]


def call_tool(tool_name, parameters):
    if tool_name == "system__ai__python_exec":
        return DatabricksFunctionClient().execute_function(
            "system.ai.python_exec", parameters=parameters
        )
    raise ValueError(f"Unknown tool: {tool_name}")


def run_agent(prompt):
    """
    Send a user prompt to the LLM, and return a list of LLM response messages
    The LLM is allowed to call the code interpreter tool if needed, to respond to the user
    """
    result_msgs = []
    response = openai_client.chat.completions.create(
        model=LLM_ENDPOINT_NAME,
        messages=[{"role": "user", "content": prompt}],
        tools=builtin_tools,
    )
    msg = response.choices[0].message
    result_msgs.append(msg.to_dict())

    # If the model executed a tool, call it
    if msg.tool_calls:
        call = msg.tool_calls[0]
        tool_result = call_tool(call.function.name, json.loads(call.function.arguments))
        result_msgs.append(
            {
                "role": "tool",
                "content": tool_result.value,
                "name": call.function.name,
                "tool_call_id": call.id,
            }
        )
    return result_msgs

answer = run_agent("What is the square root of 429?")
for message in answer:
    print(f'{message["role"]}: {message["content"]}')

import uuid
import mlflow
from typing import Any, Optional

from mlflow.pyfunc import ChatAgent
from mlflow.types.agent import ChatAgentMessage, ChatAgentResponse, ChatContext

mlflow.openai.autolog()

class QuickstartAgent(ChatAgent):
    def predict(
        self,
        messages: list[ChatAgentMessage],
        context: Optional[ChatContext] = None,
        custom_inputs: Optional[dict[str, Any]] = None,
    ) -> ChatAgentResponse:
        # 1. Extract the last user prompt from the input messages
        prompt = messages[-1].content

        # 2. Call run_agent to get back a list of response messages
        raw_msgs = run_agent(prompt)

        # 3. Map each response message into a ChatAgentMessage and return
        # the response
        out = []
        for m in raw_msgs:
            out.append(ChatAgentMessage(id=uuid.uuid4().hex, **m))

        return ChatAgentResponse(messages=out)

AGENT = QuickstartAgent()
for response_message in AGENT.predict(
    {"messages": [{"role": "user", "content": "What's the square root of 429?"}]}
).messages:
    print(f"role: {response_message.role}, content: {response_message.content}")

Log the agent

Log the agent and register it to Unity Catalog as a model (AWS | Azure | GCP). This step packages the agent code and its dependencies into a single artifact to deploy it to a serving endpoint.

The following code cells do the following:

Copy the agent code from above and combine it into a single cell.
Add the %%writefile cell magic command at the top of the cell to save the agent code to a file called quickstart_agent.py.
Add a mlflow.models.set_model() call to the bottom of the cell. This tells MLflow which Python agent object to use for making predictions when your agent is deployed.
Log the agent code in the quickstart_agent.py file using MLflow APIs (AWS | Azure | GCP).

%%writefile quickstart_agent.py

import json
import uuid
from databricks.sdk import WorkspaceClient
from databricks_openai import UCFunctionToolkit, DatabricksFunctionClient
from typing import Any, Optional

import mlflow
from mlflow.pyfunc import ChatAgent
from mlflow.types.agent import ChatAgentMessage, ChatAgentResponse, ChatContext

# Get an OpenAI client configured to talk to Databricks model serving endpoints
# We'll use this to query an LLM in our agent
openai_client = WorkspaceClient().serving_endpoints.get_open_ai_client()

# The snippet below tries to pick the first LLM API available in your Databricks workspace
# from a set of candidates. You can override and simplify it
# to just specify LLM_ENDPOINT_NAME.
LLM_ENDPOINT_NAME = None

def is_endpoint_available(endpoint_name):
  try:
    client = WorkspaceClient().serving_endpoints.get_open_ai_client()
    client.chat.completions.create(model=endpoint_name, messages=[{"role": "user", "content": "What is AI?"}])
    return True
  except Exception:
    return False
  
for candidate_endpoint_name in ["databricks-claude-3-7-sonnet", "databricks-meta-llama-3-3-70b-instruct"]:
    if is_endpoint_available(candidate_endpoint_name):
      LLM_ENDPOINT_NAME = candidate_endpoint_name
assert LLM_ENDPOINT_NAME is not None, "Please specify LLM_ENDPOINT_NAME"

# Enable automatic tracing of LLM calls
mlflow.openai.autolog()

# Load Databricks built-in tools (a stateless Python code interpreter tool)
client = DatabricksFunctionClient()
builtin_tools = UCFunctionToolkit(function_names=["system.ai.python_exec"], client=client).tools
for tool in builtin_tools:
    del tool["function"]["strict"]

def call_tool(tool_name, parameters):
    if tool_name == "system__ai__python_exec":
        return DatabricksFunctionClient().execute_function("system.ai.python_exec", parameters=parameters)
    raise ValueError(f"Unknown tool: {tool_name}")

def run_agent(prompt):
    """
    Send a user prompt to the LLM, and return a list of LLM response messages
    The LLM is allowed to call the code interpreter tool if needed, to respond to the user
    """
    result_msgs = []
    response = openai_client.chat.completions.create(
        model=LLM_ENDPOINT_NAME,
        messages=[{"role": "user", "content": prompt}],
        tools=builtin_tools,
    )
    msg = response.choices[0].message
    result_msgs.append(msg.to_dict())

    # If the model executed a tool, call it
    if msg.tool_calls:
        call = msg.tool_calls[0]
        tool_result = call_tool(call.function.name, json.loads(call.function.arguments))
        result_msgs.append({"role": "tool", "content": tool_result.value, "name": call.function.name, "tool_call_id": call.id})
    return result_msgs

class QuickstartAgent(ChatAgent):
    def predict(
        self,
        messages: list[ChatAgentMessage],
        context: Optional[ChatContext] = None,
        custom_inputs: Optional[dict[str, Any]] = None,
    ) -> ChatAgentResponse:
        prompt = messages[-1].content
        raw_msgs = run_agent(prompt)
        out = []
        for m in raw_msgs:
            out.append(ChatAgentMessage(
                id=uuid.uuid4().hex,
                **m
            ))

        return ChatAgentResponse(messages=out)

AGENT = QuickstartAgent()
mlflow.models.set_model(AGENT)

dbutils.library.restartPython()

import mlflow
from mlflow.models.resources import DatabricksFunction, DatabricksServingEndpoint
from pkg_resources import get_distribution
from quickstart_agent import LLM_ENDPOINT_NAME

# Register the model to the workspace default catalog.
# Specify a catalog (e.g. "main") and schema name (e.g. "custom_schema") if needed,
# in order to register the agent to a different location
catalog_name = spark.sql("SELECT current_catalog()").collect()[0][0]
schema_name = "default"
registered_model_name = f"{catalog_name}.{schema_name}.quickstart_agent"

# Specify Databricks product resources that the agent needs access to (our builtin python
# code interpreter tool and LLM serving endpoint), so that Databricks can automatically
# configure authentication for the agent to access these resources when it's deployed.
resources = [
    DatabricksServingEndpoint(endpoint_name=LLM_ENDPOINT_NAME),
    DatabricksFunction(function_name="system.ai.python_exec"),
]

mlflow.set_registry_uri("databricks-uc")
with mlflow.start_run():
    logged_agent_info = mlflow.pyfunc.log_model(
        artifact_path="agent",
        python_model="quickstart_agent.py",
        extra_pip_requirements=[
            f"databricks-connect=={get_distribution('databricks-connect').version}"
        ],
        resources=resources,
        registered_model_name=registered_model_name,
    )

from databricks import agents

deployment_info = agents.deploy(
    model_name=registered_model_name,
    model_version=logged_agent_info.registered_model_version,
)

Agent Quickstart Notebook

Quickstart: Build, test, and deploy an agent using Mosaic AI Agent Framework

Define and test an agent

Prepare agent code for logging

Log the agent

Deploy the agent