Removes source compilation of nixl dependency (#24874)

Signed-off-by: bbartels <benjamin@bartels.dev>
Signed-off-by: Benjamin Bartels <benjamin@bartels.dev>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Daniele <36171005+dtrifiro@users.noreply.github.com>
This commit is contained in:
Benjamin Bartels
2025-09-17 02:33:18 +01:00
committed by GitHub
parent cef32104b4
commit 64ad551878
5 changed files with 77 additions and 116 deletions

View File

@@ -283,6 +283,10 @@ WORKDIR /vllm-workspace
ENV DEBIAN_FRONTEND=noninteractive
ARG TARGETPLATFORM
ARG GDRCOPY_CUDA_VERSION=12.8
# Keep in line with FINAL_BASE_IMAGE
ARG GDRCOPY_OS_VERSION=Ubuntu22_04
SHELL ["/bin/bash", "-c"]
ARG DEADSNAKES_MIRROR_URL
@@ -441,13 +445,21 @@ COPY tools/install_deepgemm.sh /tmp/install_deepgemm.sh
RUN --mount=type=cache,target=/root/.cache/uv \
VLLM_DOCKER_BUILD_CONTEXT=1 /tmp/install_deepgemm.sh --cuda-version "${CUDA_VERSION}" ${DEEPGEMM_GIT_REF:+--ref "$DEEPGEMM_GIT_REF"}
# Install EP kernels(pplx-kernels and DeepEP), NixL
COPY tools/install_gdrcopy.sh install_gdrcopy.sh
RUN set -eux; \
case "${TARGETPLATFORM}" in \
linux/arm64) UUARCH="aarch64" ;; \
linux/amd64) UUARCH="x64" ;; \
*) echo "Unsupported TARGETPLATFORM: ${TARGETPLATFORM}" >&2; exit 1 ;; \
esac; \
./install_gdrcopy.sh "${GDRCOPY_OS_VERSION}" "${GDRCOPY_CUDA_VERSION}" "${UUARCH}"; \
rm ./install_gdrcopy.sh
# Install EP kernels(pplx-kernels and DeepEP)
COPY tools/ep_kernels/install_python_libraries.sh install_python_libraries.sh
COPY tools/install_nixl.sh install_nixl.sh
ENV CUDA_HOME=/usr/local/cuda
RUN export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-9.0a+PTX}" \
&& bash install_python_libraries.sh \
&& bash install_nixl.sh --force
&& bash install_python_libraries.sh
#################### vLLM installation IMAGE ####################

View File

@@ -10,7 +10,7 @@ Before using EP, you need to install the necessary dependencies. We are actively
1. **Install DeepEP and pplx-kernels**: Set up host environment following vLLM's guide for EP kernels [here](gh-file:tools/ep_kernels).
2. **Install DeepGEMM library**: Follow the [official instructions](https://github.com/deepseek-ai/DeepGEMM#installation).
3. **For disaggregated serving**: Install UCX and NIXL following the [script](gh-file:tools/install_nixl.sh).
3. **For disaggregated serving**: Install `gdrcopy` by running the [`install_gdrcopy.sh`](gh-file:tools/install_gdrcopy.sh) script (e.g., `install_gdrcopy.sh "${GDRCOPY_OS_VERSION}" "12.8" "x64"`). You can find available OS versions [here](https://developer.download.nvidia.com/compute/redist/gdrcopy/CUDA%2012.8/).
### Backend Selection Guide
@@ -191,7 +191,7 @@ For production deployments requiring strict SLA guarantees for time-to-first-tok
### Setup Steps
1. **Install KV Connector**: Install NIXL using the [installation script](gh-file:tools/install_nixl.sh)
1. **Install gdrcopy/ucx/nixl**: For maximum performance, run the [install_gdrcopy.sh](gh-file:tools/install_gdrcopy.sh) script to install `gdrcopy` (e.g., `install_gdrcopy.sh "${GDRCOPY_OS_VERSION}" "12.8" "x64"`). You can find available OS versions [here](https://developer.download.nvidia.com/compute/redist/gdrcopy/CUDA%2012.8/). If `gdrcopy` is not installed, things will still work with a plain `pip install nixl`, just with lower performance. `nixl` and `ucx` are installed as dependencies via pip.
2. **Configure Both Instances**: Add this flag to both prefill and decode instances `--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}`

View File

@@ -1 +1,2 @@
lmcache
lmcache
nixl >= 0.5.1 # Required for disaggregated prefill

57
tools/install_gdrcopy.sh Executable file
View File

@@ -0,0 +1,57 @@
#!/usr/bin/env bash
set -euo pipefail
# Usage: install_gdrcopy.sh <GDRCOPY_OS_VERSION> <GDRCOPY_CUDA_VERSION> <uuarch>
# uuarch must be "x64" or "aarch64"
# Optional: set GDRCOPY_VERSION to override the libgdrapi package version (default: 2.5.1-1)
# Requires: curl, apt-get, root privileges
if [[ $(id -u) -ne 0 ]]; then
echo "Must be run as root" >&2
exit 1
fi
if [[ $# -ne 3 ]]; then
echo "Usage: $0 <GDRCOPY_OS_VERSION> <GDRCOPY_CUDA_VERSION> <uuarch(x64|aarch64)>" >&2
exit 1
fi
OS_VER="$1"
CUDA_VER="$2"
UUARCH_RAW="$3"
# Normalize/validate arch
case "${UUARCH_RAW,,}" in
aarch64|arm64)
URL_ARCH="aarch64"
DEB_ARCH="arm64"
;;
x64|x86_64|amd64)
URL_ARCH="x64"
DEB_ARCH="amd64"
;;
*)
echo "Unsupported uuarch: ${UUARCH_RAW}. Use 'x64' or 'aarch64'." >&2
exit 1
;;
esac
OS_VER_LOWER="$(tr '[:upper:]' '[:lower:]' <<<"$OS_VER")"
GDRCOPY_PKG_VER="${GDRCOPY_VERSION:-2.5.1-1}"
DEB_NAME="libgdrapi_${GDRCOPY_PKG_VER}_${DEB_ARCH}.${OS_VER}.deb"
BASE_URL="https://developer.download.nvidia.com/compute/redist/gdrcopy"
URL="${BASE_URL}/CUDA%20${CUDA_VER}/${OS_VER_LOWER}/${URL_ARCH}/${DEB_NAME}"
echo "Downloading: ${URL}"
TMPDIR="$(mktemp -d)"
trap 'rm -rf "${TMPDIR}"' EXIT
curl -fSL "${URL}" -o "${TMPDIR}/${DEB_NAME}"
export DEBIAN_FRONTEND=noninteractive
apt-get update
apt-get install -y "${TMPDIR}/${DEB_NAME}"
apt-get clean
rm -rf /var/lib/apt/lists/*
echo "Installed ${DEB_NAME}"

View File

@@ -1,109 +0,0 @@
#!/bin/bash
# Usage: ./install_nixl.sh [--force]
FORCE=false
if [ "$1" == "--force" ]; then
FORCE=true
fi
SUDO=false
if command -v sudo >/dev/null 2>&1 && sudo -n true 2>/dev/null; then
SUDO=true
fi
ARCH=$(uname -m)
ROOT_DIR="/usr/local"
mkdir -p "$ROOT_DIR"
GDR_HOME="$ROOT_DIR/gdrcopy"
UCX_HOME="$ROOT_DIR/ucx"
NIXL_HOME="$ROOT_DIR/nixl"
CUDA_HOME=/usr/local/cuda
export PATH="$GDR_HOME/bin:$UCX_HOME/bin:$NIXL_HOME/bin:$PATH"
export LD_LIBRARY_PATH="$GDR_HOME/lib:$UCX_HOME/lib:$NIXL_HOME/lib/$ARCH-linux-gnu:$LD_LIBRARY_PATH"
TEMP_DIR="nixl_installer"
mkdir -p "$TEMP_DIR"
cd "$TEMP_DIR"
pip install meson ninja pybind11
if [ ! -e "/dev/gdrdrv" ] || [ "$FORCE" = true ]; then
echo "Installing gdrcopy\n"
wget https://github.com/NVIDIA/gdrcopy/archive/refs/tags/v2.5.tar.gz
tar xzf v2.5.tar.gz; rm v2.5.tar.gz
cd gdrcopy-2.5
make prefix=$GDR_HOME CUDA=$CUDA_HOME all install
if $SUDO; then
echo "Running insmod.sh with sudo"
sudo ./insmod.sh
else
echo "Skipping insmod.sh - sudo not available"
echo "Please run 'sudo ./gdrcopy-2.5/insmod.sh' manually if needed"
fi
cd ..
else
echo "Found /dev/gdrdrv. Skipping gdrcopy installation"
fi
if ! command -v ucx_info &> /dev/null || [ "$FORCE" = true ]; then
echo "Installing UCX"
wget https://github.com/openucx/ucx/releases/download/v1.18.0/ucx-1.18.0.tar.gz
tar xzf ucx-1.18.0.tar.gz; rm ucx-1.18.0.tar.gz
cd ucx-1.18.0
# Checking Mellanox NICs
MLX_OPTS=""
if lspci | grep -i mellanox > /dev/null || command -v ibstat > /dev/null; then
echo "Mellanox NIC detected, adding Mellanox-specific options"
MLX_OPTS="--with-rdmacm \
--with-mlx5-dv \
--with-ib-hw-tm"
fi
./configure --prefix=$UCX_HOME \
--enable-shared \
--disable-static \
--disable-doxygen-doc \
--enable-optimizations \
--enable-cma \
--enable-devel-headers \
--with-cuda=$CUDA_HOME \
--with-dm \
--with-gdrcopy=$GDR_HOME \
--with-verbs \
--enable-mt \
$MLX_OPTS
make -j
make -j install-strip
if $SUDO; then
echo "Running ldconfig with sudo"
sudo ldconfig
else
echo "Skipping ldconfig - sudo not available"
echo "Please run 'sudo ldconfig' manually if needed"
fi
cd ..
else
echo "Found existing UCX. Skipping UCX installation"
fi
if ! command -v nixl_test &> /dev/null || [ "$FORCE" = true ]; then
echo "Installing NIXL"
wget https://github.com/ai-dynamo/nixl/archive/refs/tags/0.2.0.tar.gz
tar xzf 0.2.0.tar.gz; rm 0.2.0.tar.gz
cd nixl-0.2.0
meson setup build --prefix=$NIXL_HOME -Ducx_path=$UCX_HOME
cd build
ninja
ninja install
cd ../..
else
echo "Found existing NIXL. Skipping NIXL installation"
fi