mirror of
https://github.com/vllm-project/vllm.git
synced 2025-12-06 06:53:12 +08:00
152 lines
7.9 KiB
YAML
152 lines
7.9 KiB
YAML
steps:
|
|
# aarch64 + CUDA builds. PyTorch 2.8 aarch64 + CUDA wheel is only available on CUDA 12.9
|
|
- label: "Build arm64 wheel - CUDA 12.9"
|
|
id: build-wheel-arm64-cuda-12-9
|
|
agents:
|
|
queue: arm64_cpu_queue_postmerge
|
|
commands:
|
|
# #NOTE: torch_cuda_arch_list is derived from upstream PyTorch build files here:
|
|
# https://github.com/pytorch/pytorch/blob/main/.ci/aarch64_linux/aarch64_ci_build.sh#L7
|
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
|
|
- "mkdir artifacts"
|
|
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
|
- "bash .buildkite/scripts/upload-wheels.sh"
|
|
env:
|
|
DOCKER_BUILDKIT: "1"
|
|
|
|
- block: "Build CUDA 12.8 wheel"
|
|
key: block-build-cu128-wheel
|
|
|
|
- label: "Build wheel - CUDA 12.8"
|
|
depends_on: block-build-cu128-wheel
|
|
id: build-wheel-cuda-12-8
|
|
agents:
|
|
queue: cpu_queue_postmerge
|
|
commands:
|
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.8.1 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
|
|
- "mkdir artifacts"
|
|
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
|
- "bash .buildkite/scripts/upload-wheels.sh"
|
|
env:
|
|
DOCKER_BUILDKIT: "1"
|
|
|
|
- block: "Build CUDA 12.6 wheel"
|
|
key: block-build-cu126-wheel
|
|
depends_on: ~
|
|
|
|
- label: "Build wheel - CUDA 12.6"
|
|
depends_on: block-build-cu126-wheel
|
|
id: build-wheel-cuda-12-6
|
|
agents:
|
|
queue: cpu_queue_postmerge
|
|
commands:
|
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.6.3 --build-arg torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0+PTX' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
|
|
- "mkdir artifacts"
|
|
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
|
- "bash .buildkite/scripts/upload-wheels.sh"
|
|
env:
|
|
DOCKER_BUILDKIT: "1"
|
|
|
|
# x86 + CUDA builds
|
|
- label: "Build wheel - CUDA 12.9"
|
|
depends_on: ~
|
|
id: build-wheel-cuda-12-9
|
|
agents:
|
|
queue: cpu_queue_postmerge
|
|
commands:
|
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0+PTX' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
|
|
- "mkdir artifacts"
|
|
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
|
- "bash .buildkite/scripts/upload-wheels.sh"
|
|
env:
|
|
DOCKER_BUILDKIT: "1"
|
|
|
|
- label: "Build release image (x86)"
|
|
depends_on: ~
|
|
id: build-release-image-x86
|
|
agents:
|
|
queue: cpu_queue_postmerge
|
|
commands:
|
|
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.8.1 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ."
|
|
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)"
|
|
# re-tag to default image tag and push, just in case arm64 build fails
|
|
- "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
|
|
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
|
|
|
|
# PyTorch 2.8 aarch64 + CUDA wheel is only available on CUDA 12.9
|
|
- label: "Build release image (arm64)"
|
|
depends_on: ~
|
|
id: build-release-image-arm64
|
|
agents:
|
|
queue: arm64_cpu_queue_postmerge
|
|
commands:
|
|
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ."
|
|
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)"
|
|
|
|
# Add job to create multi-arch manifest
|
|
- label: "Create multi-arch manifest"
|
|
depends_on:
|
|
- build-release-image-x86
|
|
- build-release-image-arm64
|
|
id: create-multi-arch-manifest
|
|
agents:
|
|
queue: cpu_queue_postmerge
|
|
commands:
|
|
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
|
- "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64 --amend"
|
|
- "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
|
|
|
|
- label: "Annotate release workflow"
|
|
depends_on:
|
|
- create-multi-arch-manifest
|
|
- build-wheel-cuda-12-8
|
|
- build-wheel-cuda-12-6
|
|
- build-wheel-cuda-12-9
|
|
id: annotate-release-workflow
|
|
agents:
|
|
queue: cpu_queue_postmerge
|
|
commands:
|
|
- "bash .buildkite/scripts/annotate-release.sh"
|
|
|
|
- label: "Build and publish TPU release image"
|
|
depends_on: ~
|
|
if: build.env("NIGHTLY") == "1"
|
|
agents:
|
|
queue: tpu_queue_postmerge
|
|
commands:
|
|
- "yes | docker system prune -a"
|
|
- "git fetch --all"
|
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm/vllm-tpu:nightly --tag vllm/vllm-tpu:$BUILDKITE_COMMIT --progress plain -f docker/Dockerfile.tpu ."
|
|
- "docker push vllm/vllm-tpu:nightly"
|
|
- "docker push vllm/vllm-tpu:$BUILDKITE_COMMIT"
|
|
plugins:
|
|
- docker-login#v3.0.0:
|
|
username: vllmbot
|
|
password-env: DOCKERHUB_TOKEN
|
|
env:
|
|
DOCKER_BUILDKIT: "1"
|
|
|
|
- input: "Provide Release version here"
|
|
id: input-release-version
|
|
fields:
|
|
- text: "What is the release version?"
|
|
key: release-version
|
|
|
|
- block: "Build CPU release image"
|
|
key: block-cpu-release-image-build
|
|
depends_on: ~
|
|
|
|
- label: "Build and publish CPU release image"
|
|
depends_on: block-cpu-release-image-build
|
|
agents:
|
|
queue: cpu_queue_postmerge
|
|
commands:
|
|
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_AVX512BF16=true --build-arg VLLM_CPU_AVX512VNNI=true --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ."
|
|
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest"
|
|
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
|
|
env:
|
|
DOCKER_BUILDKIT: "1"
|