databricks-logo

    Agent Quickstart Notebook

    (Python)
    Loading...

    Quickstart: Build, test, and deploy an agent using Mosaic AI Agent Framework

    This quickstart notebook demonstrates how to build, test, and deploy a generative AI agent (AWS | Azure | GCP) using Mosaic AI Agent Framework (AWS | Azure | GCP) on Databricks

    Define and test an agent

    This section defines and tests a simple agent with the following attributes:

    • The agent uses an LLM served on Databricks Foundation Model API (AWS | Azure | GCP)
    • The agent has access to a single tool, the built-in Python code interpreter tool on Databricks Unity Catalog. It can use this tool to run LLM-generated code in order to respond to user questions. (AWS | Azure | GCP)

    We will use databricks_openai SDK (AWS | Azure | GCP) to query the LLM endpoint.

    %pip install -U -qqqq mlflow databricks-openai databricks-agents
    dbutils.library.restartPython()
    # The snippet below tries to pick the first LLM API available in your Databricks workspace
    # from a set of candidates. You can override and simplify it
    # to just specify LLM_ENDPOINT_NAME.
    LLM_ENDPOINT_NAME = None
    
    from databricks.sdk import WorkspaceClient
    def is_endpoint_available(endpoint_name):
      try:
        client = WorkspaceClient().serving_endpoints.get_open_ai_client()
        client.chat.completions.create(model=endpoint_name, messages=[{"role": "user", "content": "What is AI?"}])
        return True
      except Exception:
        return False
      
    client = WorkspaceClient()
    for candidate_endpoint_name in ["databricks-claude-3-7-sonnet", "databricks-meta-llama-3-3-70b-instruct"]:
        if is_endpoint_available(candidate_endpoint_name):
          LLM_ENDPOINT_NAME = candidate_endpoint_name
    assert LLM_ENDPOINT_NAME is not None, "Please specify LLM_ENDPOINT_NAME"
    import json
    import mlflow
    from databricks.sdk import WorkspaceClient
    from databricks_openai import UCFunctionToolkit, DatabricksFunctionClient
    
    # Automatically log traces from LLM calls for ease of debugging
    mlflow.openai.autolog()
    
    # Get an OpenAI client configured to talk to Databricks model serving endpoints
    # We'll use this to query an LLM in our agent
    openai_client = WorkspaceClient().serving_endpoints.get_open_ai_client()
    
    # Load Databricks built-in tools (a stateless Python code interpreter tool)
    client = DatabricksFunctionClient()
    builtin_tools = UCFunctionToolkit(
        function_names=["system.ai.python_exec"], client=client
    ).tools
    for tool in builtin_tools:
        del tool["function"]["strict"]
    
    
    def call_tool(tool_name, parameters):
        if tool_name == "system__ai__python_exec":
            return DatabricksFunctionClient().execute_function(
                "system.ai.python_exec", parameters=parameters
            )
        raise ValueError(f"Unknown tool: {tool_name}")
    
    
    def run_agent(prompt):
        """
        Send a user prompt to the LLM, and return a list of LLM response messages
        The LLM is allowed to call the code interpreter tool if needed, to respond to the user
        """
        result_msgs = []
        response = openai_client.chat.completions.create(
            model=LLM_ENDPOINT_NAME,
            messages=[{"role": "user", "content": prompt}],
            tools=builtin_tools,
        )
        msg = response.choices[0].message
        result_msgs.append(msg.to_dict())
    
        # If the model executed a tool, call it
        if msg.tool_calls:
            call = msg.tool_calls[0]
            tool_result = call_tool(call.function.name, json.loads(call.function.arguments))
            result_msgs.append(
                {
                    "role": "tool",
                    "content": tool_result.value,
                    "name": call.function.name,
                    "tool_call_id": call.id,
                }
            )
        return result_msgs
    answer = run_agent("What is the square root of 429?")
    for message in answer:
        print(f'{message["role"]}: {message["content"]}')

    Prepare agent code for logging

    Wrap your agent definition in MLflow’s ChatAgent interface to prepare your code for logging.

    By using MLflow’s standard agent authoring interface, you get built-in UIs for chatting with your agent and sharing it with others after deployment. (AWS | Azure | GCP)

    import uuid
    import mlflow
    from typing import Any, Optional
    
    from mlflow.pyfunc import ChatAgent
    from mlflow.types.agent import ChatAgentMessage, ChatAgentResponse, ChatContext
    
    mlflow.openai.autolog()
    
    class QuickstartAgent(ChatAgent):
        def predict(
            self,
            messages: list[ChatAgentMessage],
            context: Optional[ChatContext] = None,
            custom_inputs: Optional[dict[str, Any]] = None,
        ) -> ChatAgentResponse:
            # 1. Extract the last user prompt from the input messages
            prompt = messages[-1].content
    
            # 2. Call run_agent to get back a list of response messages
            raw_msgs = run_agent(prompt)
    
            # 3. Map each response message into a ChatAgentMessage and return
            # the response
            out = []
            for m in raw_msgs:
                out.append(ChatAgentMessage(id=uuid.uuid4().hex, **m))
    
            return ChatAgentResponse(messages=out)
    AGENT = QuickstartAgent()
    for response_message in AGENT.predict(
        {"messages": [{"role": "user", "content": "What's the square root of 429?"}]}
    ).messages:
        print(f"role: {response_message.role}, content: {response_message.content}")

    Log the agent

    Log the agent and register it to Unity Catalog as a model (AWS | Azure | GCP). This step packages the agent code and its dependencies into a single artifact to deploy it to a serving endpoint.

    The following code cells do the following:

    1. Copy the agent code from above and combine it into a single cell.
    2. Add the %%writefile cell magic command at the top of the cell to save the agent code to a file called quickstart_agent.py.
    3. Add a mlflow.models.set_model() call to the bottom of the cell. This tells MLflow which Python agent object to use for making predictions when your agent is deployed.
    4. Log the agent code in the quickstart_agent.py file using MLflow APIs (AWS | Azure | GCP).
    %%writefile quickstart_agent.py
    
    import json
    import uuid
    from databricks.sdk import WorkspaceClient
    from databricks_openai import UCFunctionToolkit, DatabricksFunctionClient
    from typing import Any, Optional
    
    import mlflow
    from mlflow.pyfunc import ChatAgent
    from mlflow.types.agent import ChatAgentMessage, ChatAgentResponse, ChatContext
    
    # Get an OpenAI client configured to talk to Databricks model serving endpoints
    # We'll use this to query an LLM in our agent
    openai_client = WorkspaceClient().serving_endpoints.get_open_ai_client()
    
    # The snippet below tries to pick the first LLM API available in your Databricks workspace
    # from a set of candidates. You can override and simplify it
    # to just specify LLM_ENDPOINT_NAME.
    LLM_ENDPOINT_NAME = None
    
    def is_endpoint_available(endpoint_name):
      try:
        client = WorkspaceClient().serving_endpoints.get_open_ai_client()
        client.chat.completions.create(model=endpoint_name, messages=[{"role": "user", "content": "What is AI?"}])
        return True
      except Exception:
        return False
      
    for candidate_endpoint_name in ["databricks-claude-3-7-sonnet", "databricks-meta-llama-3-3-70b-instruct"]:
        if is_endpoint_available(candidate_endpoint_name):
          LLM_ENDPOINT_NAME = candidate_endpoint_name
    assert LLM_ENDPOINT_NAME is not None, "Please specify LLM_ENDPOINT_NAME"
    
    # Enable automatic tracing of LLM calls
    mlflow.openai.autolog()
    
    # Load Databricks built-in tools (a stateless Python code interpreter tool)
    client = DatabricksFunctionClient()
    builtin_tools = UCFunctionToolkit(function_names=["system.ai.python_exec"], client=client).tools
    for tool in builtin_tools:
        del tool["function"]["strict"]
    
    def call_tool(tool_name, parameters):
        if tool_name == "system__ai__python_exec":
            return DatabricksFunctionClient().execute_function("system.ai.python_exec", parameters=parameters)
        raise ValueError(f"Unknown tool: {tool_name}")
    
    def run_agent(prompt):
        """
        Send a user prompt to the LLM, and return a list of LLM response messages
        The LLM is allowed to call the code interpreter tool if needed, to respond to the user
        """
        result_msgs = []
        response = openai_client.chat.completions.create(
            model=LLM_ENDPOINT_NAME,
            messages=[{"role": "user", "content": prompt}],
            tools=builtin_tools,
        )
        msg = response.choices[0].message
        result_msgs.append(msg.to_dict())
    
        # If the model executed a tool, call it
        if msg.tool_calls:
            call = msg.tool_calls[0]
            tool_result = call_tool(call.function.name, json.loads(call.function.arguments))
            result_msgs.append({"role": "tool", "content": tool_result.value, "name": call.function.name, "tool_call_id": call.id})
        return result_msgs
    
    class QuickstartAgent(ChatAgent):
        def predict(
            self,
            messages: list[ChatAgentMessage],
            context: Optional[ChatContext] = None,
            custom_inputs: Optional[dict[str, Any]] = None,
        ) -> ChatAgentResponse:
            prompt = messages[-1].content
            raw_msgs = run_agent(prompt)
            out = []
            for m in raw_msgs:
                out.append(ChatAgentMessage(
                    id=uuid.uuid4().hex,
                    **m
                ))
    
            return ChatAgentResponse(messages=out)
    
    AGENT = QuickstartAgent()
    mlflow.models.set_model(AGENT)
    dbutils.library.restartPython()
    import mlflow
    from mlflow.models.resources import DatabricksFunction, DatabricksServingEndpoint
    from pkg_resources import get_distribution
    from quickstart_agent import LLM_ENDPOINT_NAME
    
    # Register the model to the workspace default catalog.
    # Specify a catalog (e.g. "main") and schema name (e.g. "custom_schema") if needed,
    # in order to register the agent to a different location
    catalog_name = spark.sql("SELECT current_catalog()").collect()[0][0]
    schema_name = "default"
    registered_model_name = f"{catalog_name}.{schema_name}.quickstart_agent"
    
    # Specify Databricks product resources that the agent needs access to (our builtin python
    # code interpreter tool and LLM serving endpoint), so that Databricks can automatically
    # configure authentication for the agent to access these resources when it's deployed.
    resources = [
        DatabricksServingEndpoint(endpoint_name=LLM_ENDPOINT_NAME),
        DatabricksFunction(function_name="system.ai.python_exec"),
    ]
    
    mlflow.set_registry_uri("databricks-uc")
    with mlflow.start_run():
        logged_agent_info = mlflow.pyfunc.log_model(
            artifact_path="agent",
            python_model="quickstart_agent.py",
            extra_pip_requirements=[
                f"databricks-connect=={get_distribution('databricks-connect').version}"
            ],
            resources=resources,
            registered_model_name=registered_model_name,
        )

    Deploy the agent

    Run the cell below to deploy the agent (AWS | Azure | GCP). Once the agent endpoint starts, you can chat with it via AI Playground (AWS | Azure | GCP), or share it with stakeholders (AWS | Azure | GCP) for initial feedback, before sharing it more broadly.

    from databricks import agents
    
    deployment_info = agents.deploy(
        model_name=registered_model_name,
        model_version=logged_agent_info.registered_model_version,
    )
    ;