From 6366c098d7c76120b6a55a6829a2649c727a2862 Mon Sep 17 00:00:00 2001 From: Noa Neria Date: Thu, 4 Dec 2025 12:04:43 +0200 Subject: [PATCH] Validating Runai Model Streamer Integration with S3 Object Storage (#29320) Signed-off-by: Noa Neria --- docker/Dockerfile | 2 +- requirements/nightly_torch_test.txt | 2 +- requirements/rocm.txt | 2 +- requirements/test.in | 2 +- requirements/test.txt | 6 +-- setup.py | 2 +- .../__init__.py | 0 .../runai_streamer_loader/conftest.py | 39 ++++++++++++++ .../test_runai_model_streamer_loader.py | 0 .../test_runai_model_streamer_s3.py | 52 +++++++++++++++++++ .../test_runai_utils.py | 0 .../test_weight_utils.py | 0 vllm/transformers_utils/runai_utils.py | 4 +- 13 files changed, 100 insertions(+), 11 deletions(-) rename tests/model_executor/model_loader/{runai_model_streamer => runai_streamer_loader}/__init__.py (100%) create mode 100644 tests/model_executor/model_loader/runai_streamer_loader/conftest.py rename tests/model_executor/model_loader/{runai_model_streamer => runai_streamer_loader}/test_runai_model_streamer_loader.py (100%) create mode 100644 tests/model_executor/model_loader/runai_streamer_loader/test_runai_model_streamer_s3.py rename tests/model_executor/model_loader/{runai_model_streamer => runai_streamer_loader}/test_runai_utils.py (100%) rename tests/model_executor/model_loader/{runai_model_streamer => runai_streamer_loader}/test_weight_utils.py (100%) diff --git a/docker/Dockerfile b/docker/Dockerfile index 73cb4d7e0dc..0d50d97e54c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -580,7 +580,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ else \ BITSANDBYTES_VERSION="0.46.1"; \ fi; \ - uv pip install --system accelerate hf_transfer modelscope "bitsandbytes>=${BITSANDBYTES_VERSION}" 'timm>=1.0.17' 'runai-model-streamer[s3,gcs]>=0.15.0' + uv pip install --system accelerate hf_transfer modelscope "bitsandbytes>=${BITSANDBYTES_VERSION}" 'timm>=1.0.17' 'runai-model-streamer[s3,gcs]>=0.15.3' ENV VLLM_USAGE_SOURCE production-docker-image diff --git a/requirements/nightly_torch_test.txt b/requirements/nightly_torch_test.txt index 53b012372be..7b2c665448a 100644 --- a/requirements/nightly_torch_test.txt +++ b/requirements/nightly_torch_test.txt @@ -42,6 +42,6 @@ tritonclient==2.51.0 numba == 0.61.2 # Required for N-gram speculative decoding numpy -runai-model-streamer[s3,gcs]==0.15.0 +runai-model-streamer[s3,gcs]==0.15.3 fastsafetensors>=0.1.10 pydantic>=2.12 # 2.11 leads to error on python 3.13 diff --git a/requirements/rocm.txt b/requirements/rocm.txt index abbd33d6e12..05b9a21791c 100644 --- a/requirements/rocm.txt +++ b/requirements/rocm.txt @@ -12,7 +12,7 @@ tensorizer==2.10.1 packaging>=24.2 setuptools>=77.0.3,<80.0.0 setuptools-scm>=8 -runai-model-streamer[s3,gcs]==0.15.0 +runai-model-streamer[s3,gcs]==0.15.3 conch-triton-kernels==1.2.1 timm>=1.0.17 fastsafetensors @ git+https://github.com/foundation-model-stack/fastsafetensors.git@d6f998a03432b2452f8de2bb5cefb5af9795d459 diff --git a/requirements/test.in b/requirements/test.in index da7a7db1f00..dfae5b75821 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -51,7 +51,7 @@ tritonclient==2.51.0 arctic-inference == 0.1.1 # Required for suffix decoding test numba == 0.61.2 # Required for N-gram speculative decoding numpy -runai-model-streamer[s3,gcs]==0.15.0 +runai-model-streamer[s3,gcs]==0.15.3 fastsafetensors>=0.1.10 pydantic>=2.12 # 2.11 leads to error on python 3.13 decord==0.6.0 diff --git a/requirements/test.txt b/requirements/test.txt index c5f103b8b0d..571194e05c1 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -965,11 +965,11 @@ rsa==4.9.1 # via google-auth rtree==1.4.0 # via torchgeo -runai-model-streamer==0.15.0 +runai-model-streamer==0.15.3 # via -r requirements/test.in -runai-model-streamer-gcs==0.15.0 +runai-model-streamer-gcs==0.15.3 # via runai-model-streamer -runai-model-streamer-s3==0.15.0 +runai-model-streamer-s3==0.15.3 # via runai-model-streamer s3transfer==0.10.3 # via boto3 diff --git a/setup.py b/setup.py index af7282d4f74..6fcb6653bc4 100644 --- a/setup.py +++ b/setup.py @@ -797,7 +797,7 @@ setup( "bench": ["pandas", "matplotlib", "seaborn", "datasets"], "tensorizer": ["tensorizer==2.10.1"], "fastsafetensors": ["fastsafetensors >= 0.1.10"], - "runai": ["runai-model-streamer[s3,gcs] >= 0.15.0"], + "runai": ["runai-model-streamer[s3,gcs] >= 0.15.3"], "audio": [ "librosa", "soundfile", diff --git a/tests/model_executor/model_loader/runai_model_streamer/__init__.py b/tests/model_executor/model_loader/runai_streamer_loader/__init__.py similarity index 100% rename from tests/model_executor/model_loader/runai_model_streamer/__init__.py rename to tests/model_executor/model_loader/runai_streamer_loader/__init__.py diff --git a/tests/model_executor/model_loader/runai_streamer_loader/conftest.py b/tests/model_executor/model_loader/runai_streamer_loader/conftest.py new file mode 100644 index 00000000000..9a022f6bbd9 --- /dev/null +++ b/tests/model_executor/model_loader/runai_streamer_loader/conftest.py @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from vllm.utils.network_utils import get_distributed_init_method, get_ip, get_open_port +from vllm.v1.executor import UniProcExecutor +from vllm.v1.worker.worker_base import WorkerWrapperBase + + +# This is a dummy executor for patching in test_runai_model_streamer_s3.py. +# We cannot use vllm_runner fixture here, because it spawns worker process. +# The worker process reimports the patched entities, and the patch is not applied. +class RunaiDummyExecutor(UniProcExecutor): + def _init_executor(self) -> None: + distributed_init_method = get_distributed_init_method(get_ip(), get_open_port()) + + local_rank = 0 + rank = 0 + is_driver_worker = True + + device_info = self.vllm_config.device_config.device.__str__().split(":") + if len(device_info) > 1: + local_rank = int(device_info[1]) + + worker_rpc_kwargs = dict( + vllm_config=self.vllm_config, + local_rank=local_rank, + rank=rank, + distributed_init_method=distributed_init_method, + is_driver_worker=is_driver_worker, + ) + + wrapper_kwargs = { + "vllm_config": self.vllm_config, + } + + self.driver_worker = WorkerWrapperBase(**wrapper_kwargs) + + self.collective_rpc("init_worker", args=([worker_rpc_kwargs],)) + self.collective_rpc("init_device") diff --git a/tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py b/tests/model_executor/model_loader/runai_streamer_loader/test_runai_model_streamer_loader.py similarity index 100% rename from tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py rename to tests/model_executor/model_loader/runai_streamer_loader/test_runai_model_streamer_loader.py diff --git a/tests/model_executor/model_loader/runai_streamer_loader/test_runai_model_streamer_s3.py b/tests/model_executor/model_loader/runai_streamer_loader/test_runai_model_streamer_s3.py new file mode 100644 index 00000000000..d60c9ba64cb --- /dev/null +++ b/tests/model_executor/model_loader/runai_streamer_loader/test_runai_model_streamer_s3.py @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from pathlib import Path + +from huggingface_hub import snapshot_download +from runai_model_streamer.safetensors_streamer.streamer_mock import StreamerPatcher + +from vllm.engine.arg_utils import EngineArgs + +from .conftest import RunaiDummyExecutor + +load_format = "runai_streamer" +test_model = "openai-community/gpt2" + + +def test_runai_model_loader_download_files_s3_mocked_with_patch( + vllm_runner, + tmp_path: Path, + monkeypatch, +): + patcher = StreamerPatcher(str(tmp_path)) + + test_mock_s3_model = "s3://my-mock-bucket/gpt2/" + + # Download model from HF + mock_model_dir = f"{tmp_path}/gpt2" + snapshot_download(repo_id=test_model, local_dir=mock_model_dir) + + monkeypatch.setattr( + "vllm.transformers_utils.runai_utils.runai_list_safetensors", + patcher.shim_list_safetensors, + ) + monkeypatch.setattr( + "vllm.transformers_utils.runai_utils.runai_pull_files", + patcher.shim_pull_files, + ) + monkeypatch.setattr( + "vllm.model_executor.model_loader.weight_utils.SafetensorsStreamer", + patcher.create_mock_streamer, + ) + + engine_args = EngineArgs( + model=test_mock_s3_model, + load_format=load_format, + tensor_parallel_size=1, + ) + + vllm_config = engine_args.create_engine_config() + + executor = RunaiDummyExecutor(vllm_config) + executor.driver_worker.load_model() diff --git a/tests/model_executor/model_loader/runai_model_streamer/test_runai_utils.py b/tests/model_executor/model_loader/runai_streamer_loader/test_runai_utils.py similarity index 100% rename from tests/model_executor/model_loader/runai_model_streamer/test_runai_utils.py rename to tests/model_executor/model_loader/runai_streamer_loader/test_runai_utils.py diff --git a/tests/model_executor/model_loader/runai_model_streamer/test_weight_utils.py b/tests/model_executor/model_loader/runai_streamer_loader/test_weight_utils.py similarity index 100% rename from tests/model_executor/model_loader/runai_model_streamer/test_weight_utils.py rename to tests/model_executor/model_loader/runai_streamer_loader/test_weight_utils.py diff --git a/vllm/transformers_utils/runai_utils.py b/vllm/transformers_utils/runai_utils.py index eac4294bb59..041056720a9 100644 --- a/vllm/transformers_utils/runai_utils.py +++ b/vllm/transformers_utils/runai_utils.py @@ -18,9 +18,7 @@ SUPPORTED_SCHEMES = ["s3://", "gs://"] try: from runai_model_streamer import list_safetensors as runai_list_safetensors from runai_model_streamer import pull_files as runai_pull_files -except (ImportError, OSError): - # see https://github.com/run-ai/runai-model-streamer/issues/26 - # OSError will be raised on arm64 platform +except ImportError: runai_model_streamer = PlaceholderModule("runai_model_streamer") # type: ignore[assignment] runai_pull_files = runai_model_streamer.placeholder_attr("pull_files") runai_list_safetensors = runai_model_streamer.placeholder_attr("list_safetensors")