# ============================================================================= # Databricks Container Services for standard compute — Minimal Base Image (client.5.x) # # This Dockerfile builds the base image for running Python code in the # Databricks Container Services for standard compute REPL sandbox on the # client.5.x line. Customers can customize it by # adding packages in the marked sections below. # # Build: # docker build -t my-dcs-base . # # Quick test: # docker run --rm --cpus 2 my-dcs-base /databricks/python3/bin/python -c \ # "import pandas, numpy, pyarrow; print('All imports OK')" # # WARNING — The following are required by the Databricks platform and must # not be changed: # - /databricks/python3 virtualenv path # - /databricks/python -> /databricks/python3 symlink # - /databricks/IMAGE_KEY, /databricks/DBR_VERSION files # - /etc/environment variables (see comments at the bottom) # - databricks-connect and its registration hook # ============================================================================= FROM ubuntu:24.04 # Override at build time if you have a private pip mirror: # docker build --build-arg PIP_INDEX_URL=https://your-mirror/simple . # ARG only (no ENV bridge) — the override is scoped to build steps; pip in # the runtime image falls back to its default index, so a build-only mirror # doesn't leak into clusters that can't reach it. ARG PIP_INDEX_URL=https://pypi.org/simple WORKDIR /databricks # Timezone (prevents interactive tzdata prompt during apt-get) ENV TZ=Etc/UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone # Workaround for Ubuntu 24.04 FIPS detection bug (LP#2066990) ENV OPENSSL_FORCE_FIPS_MODE=0 # System packages + Python 3.12 # The -dev packages are needed if you pip-install packages with C extensions. RUN apt-get update \ && apt-get install -y \ build-essential \ cmake \ file \ git \ iproute2 \ iputils-ping \ locales \ procps \ psmisc \ software-properties-common \ sudo \ unzip \ wget \ zstd \ libbz2-dev \ libffi8 \ libffi-dev \ liblzma-dev \ libpq-dev \ libsqlite3-0 \ libsqlite3-dev \ libssl3t64 \ libssl-dev \ libtinfo6 \ python-is-python3 \ python3.12 \ python3.12-dev \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* # === ADD YOUR CUSTOM SYSTEM PACKAGES HERE === # Example: RUN apt-get update && apt-get install -y your-package && apt-get clean # Locale RUN dpkg-reconfigure locales \ && locale-gen en_US.UTF-8 \ && update-locale LC_ALL=en_US.UTF-8 LANGUAGE=en_US.UTF-8 ENV LANG=en_US.UTF-8 # Pip — Ubuntu 24.04 marks system Python as externally-managed (PEP 668). # --break-system-packages is forwarded by get-pip.py to pip install, # telling pip to ignore the EXTERNALLY-MANAGED marker and install system-wide. # setuptools is pinned to 78.1.1 to pick up the CVE-2025-47273 fix # (matches the upstream client.5.x base image; see PATCHSCA-715859). RUN wget -q https://bootstrap.pypa.io/get-pip.py -O get-pip.py \ && /usr/bin/python3.12 get-pip.py --break-system-packages \ pip==25.0.1 setuptools==78.1.1 wheel==0.45.1 \ && rm -rf get-pip.py # uv — fast Python package manager, used by the Databricks platform's # cluster-scoped library install flow to create child virtualenvs offline. # Installs the binary to /usr/local/bin/uv and pre-populates /opt/uv-cache # with a seeded pip package so `uv venv --seed` works without network. # UV_INDEX_URL is set for this RUN only — uv ignores PIP_INDEX_URL. RUN /usr/local/bin/pip3.12 install --break-system-packages --no-cache-dir uv==0.6.10 RUN export UV_INDEX_URL="${PIP_INDEX_URL}" \ && mkdir -p /opt/uv-cache \ && /usr/local/bin/uv --cache-dir /opt/uv-cache venv /tmp/uv-seed-venv \ && /usr/local/bin/uv pip install --cache-dir /opt/uv-cache --python /tmp/uv-seed-venv pip==25.0.1 \ && rm -rf /tmp/uv-seed-venv # Virtualenv (--system-site-packages lets the venv see apt-installed libs; # removing this flag will break package visibility inside /databricks/python3) ENV VIRTUALENV_NO_PERIODIC_UPDATE=1 RUN /usr/local/bin/pip3.12 install --break-system-packages --no-cache-dir \ virtualenv==20.29.3 distlib==0.3.9 filelock==3.18.0 platformdirs==4.3.7 RUN virtualenv --python=/usr/bin/python3.12 /databricks/python3 \ --system-site-packages --no-download # Install pinned Python packages (see requirements.txt for what each section is) COPY requirements.txt /databricks/.virtualenv-def/standard-requirements.txt RUN /databricks/python3/bin/python -m pip install --no-cache-dir \ --requirement /databricks/.virtualenv-def/standard-requirements.txt # === ADD YOUR CUSTOM PYTHON PACKAGES HERE === # Example: RUN /databricks/python3/bin/python -m pip install --no-cache-dir your-package # Registration hook sets up PySpark entrypoints for remote Spark execution. RUN /databricks/python3/bin/_databricks-connect-register-pyspark-installation RUN /databricks/python3/bin/pip cache purge RUN /databricks/python3/bin/pip list --format=freeze > /databricks/.virtualenv-def/environment.txt # --------------------------------------------------------------------------- # Environment setup (required by Databricks platform — do not modify) # --------------------------------------------------------------------------- # Symlink first so MLFLOW_PYTHON_EXECUTABLE resolves at build time. RUN ln -sf /databricks/python3 /databricks/python RUN echo "client.5.x-base" > /databricks/IMAGE_KEY # Update DBR_VERSION when targeting a newer DBR snapshot. See the Databricks # Runtime release notes for the snapshot value that matches your target DBR. RUN echo "client.5.6" > /databricks/DBR_VERSION # /etc/environment — read by the Databricks system at container start. # These variables are required by the Databricks platform; do not modify or remove. RUN printf '%s\n' \ 'DB_HOME=/databricks' \ 'CLUSTER_DB_HOME=/databricks' \ 'DATABRICKS_RUNTIME_VERSION=client.5.6' \ 'DEFAULT_DATABRICKS_ROOT_VIRTUALENV_ENV=/databricks/python3' \ 'IS_CLIENT_IMAGE=true' \ 'MLFLOW_TRACKING_URI=databricks' \ 'MLFLOW_GATEWAY_URI=databricks' \ 'MLFLOW_DEPLOYMENTS_TARGET=databricks' \ 'MLFLOW_PYTHON_EXECUTABLE=/databricks/python/bin/python' \ 'OPENSSL_FORCE_FIPS_MODE=0' \ > /etc/environment # === ADD YOUR CUSTOM ENV VARS HERE === # Example: RUN echo 'MY_VAR=my_value' >> /etc/environment