mirror of
https://github.com/vllm-project/vllm.git
synced 2025-12-06 06:53:12 +08:00
Validating Runai Model Streamer Integration with S3 Object Storage (#29320)
Signed-off-by: Noa Neria <noa@run.ai>
This commit is contained in:
@@ -580,7 +580,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
else \
|
||||
BITSANDBYTES_VERSION="0.46.1"; \
|
||||
fi; \
|
||||
uv pip install --system accelerate hf_transfer modelscope "bitsandbytes>=${BITSANDBYTES_VERSION}" 'timm>=1.0.17' 'runai-model-streamer[s3,gcs]>=0.15.0'
|
||||
uv pip install --system accelerate hf_transfer modelscope "bitsandbytes>=${BITSANDBYTES_VERSION}" 'timm>=1.0.17' 'runai-model-streamer[s3,gcs]>=0.15.3'
|
||||
|
||||
ENV VLLM_USAGE_SOURCE production-docker-image
|
||||
|
||||
|
||||
@@ -42,6 +42,6 @@ tritonclient==2.51.0
|
||||
|
||||
numba == 0.61.2 # Required for N-gram speculative decoding
|
||||
numpy
|
||||
runai-model-streamer[s3,gcs]==0.15.0
|
||||
runai-model-streamer[s3,gcs]==0.15.3
|
||||
fastsafetensors>=0.1.10
|
||||
pydantic>=2.12 # 2.11 leads to error on python 3.13
|
||||
|
||||
@@ -12,7 +12,7 @@ tensorizer==2.10.1
|
||||
packaging>=24.2
|
||||
setuptools>=77.0.3,<80.0.0
|
||||
setuptools-scm>=8
|
||||
runai-model-streamer[s3,gcs]==0.15.0
|
||||
runai-model-streamer[s3,gcs]==0.15.3
|
||||
conch-triton-kernels==1.2.1
|
||||
timm>=1.0.17
|
||||
fastsafetensors @ git+https://github.com/foundation-model-stack/fastsafetensors.git@d6f998a03432b2452f8de2bb5cefb5af9795d459
|
||||
|
||||
@@ -51,7 +51,7 @@ tritonclient==2.51.0
|
||||
arctic-inference == 0.1.1 # Required for suffix decoding test
|
||||
numba == 0.61.2 # Required for N-gram speculative decoding
|
||||
numpy
|
||||
runai-model-streamer[s3,gcs]==0.15.0
|
||||
runai-model-streamer[s3,gcs]==0.15.3
|
||||
fastsafetensors>=0.1.10
|
||||
pydantic>=2.12 # 2.11 leads to error on python 3.13
|
||||
decord==0.6.0
|
||||
|
||||
@@ -965,11 +965,11 @@ rsa==4.9.1
|
||||
# via google-auth
|
||||
rtree==1.4.0
|
||||
# via torchgeo
|
||||
runai-model-streamer==0.15.0
|
||||
runai-model-streamer==0.15.3
|
||||
# via -r requirements/test.in
|
||||
runai-model-streamer-gcs==0.15.0
|
||||
runai-model-streamer-gcs==0.15.3
|
||||
# via runai-model-streamer
|
||||
runai-model-streamer-s3==0.15.0
|
||||
runai-model-streamer-s3==0.15.3
|
||||
# via runai-model-streamer
|
||||
s3transfer==0.10.3
|
||||
# via boto3
|
||||
|
||||
2
setup.py
2
setup.py
@@ -797,7 +797,7 @@ setup(
|
||||
"bench": ["pandas", "matplotlib", "seaborn", "datasets"],
|
||||
"tensorizer": ["tensorizer==2.10.1"],
|
||||
"fastsafetensors": ["fastsafetensors >= 0.1.10"],
|
||||
"runai": ["runai-model-streamer[s3,gcs] >= 0.15.0"],
|
||||
"runai": ["runai-model-streamer[s3,gcs] >= 0.15.3"],
|
||||
"audio": [
|
||||
"librosa",
|
||||
"soundfile",
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from vllm.utils.network_utils import get_distributed_init_method, get_ip, get_open_port
|
||||
from vllm.v1.executor import UniProcExecutor
|
||||
from vllm.v1.worker.worker_base import WorkerWrapperBase
|
||||
|
||||
|
||||
# This is a dummy executor for patching in test_runai_model_streamer_s3.py.
|
||||
# We cannot use vllm_runner fixture here, because it spawns worker process.
|
||||
# The worker process reimports the patched entities, and the patch is not applied.
|
||||
class RunaiDummyExecutor(UniProcExecutor):
|
||||
def _init_executor(self) -> None:
|
||||
distributed_init_method = get_distributed_init_method(get_ip(), get_open_port())
|
||||
|
||||
local_rank = 0
|
||||
rank = 0
|
||||
is_driver_worker = True
|
||||
|
||||
device_info = self.vllm_config.device_config.device.__str__().split(":")
|
||||
if len(device_info) > 1:
|
||||
local_rank = int(device_info[1])
|
||||
|
||||
worker_rpc_kwargs = dict(
|
||||
vllm_config=self.vllm_config,
|
||||
local_rank=local_rank,
|
||||
rank=rank,
|
||||
distributed_init_method=distributed_init_method,
|
||||
is_driver_worker=is_driver_worker,
|
||||
)
|
||||
|
||||
wrapper_kwargs = {
|
||||
"vllm_config": self.vllm_config,
|
||||
}
|
||||
|
||||
self.driver_worker = WorkerWrapperBase(**wrapper_kwargs)
|
||||
|
||||
self.collective_rpc("init_worker", args=([worker_rpc_kwargs],))
|
||||
self.collective_rpc("init_device")
|
||||
@@ -0,0 +1,52 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
from runai_model_streamer.safetensors_streamer.streamer_mock import StreamerPatcher
|
||||
|
||||
from vllm.engine.arg_utils import EngineArgs
|
||||
|
||||
from .conftest import RunaiDummyExecutor
|
||||
|
||||
load_format = "runai_streamer"
|
||||
test_model = "openai-community/gpt2"
|
||||
|
||||
|
||||
def test_runai_model_loader_download_files_s3_mocked_with_patch(
|
||||
vllm_runner,
|
||||
tmp_path: Path,
|
||||
monkeypatch,
|
||||
):
|
||||
patcher = StreamerPatcher(str(tmp_path))
|
||||
|
||||
test_mock_s3_model = "s3://my-mock-bucket/gpt2/"
|
||||
|
||||
# Download model from HF
|
||||
mock_model_dir = f"{tmp_path}/gpt2"
|
||||
snapshot_download(repo_id=test_model, local_dir=mock_model_dir)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"vllm.transformers_utils.runai_utils.runai_list_safetensors",
|
||||
patcher.shim_list_safetensors,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"vllm.transformers_utils.runai_utils.runai_pull_files",
|
||||
patcher.shim_pull_files,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"vllm.model_executor.model_loader.weight_utils.SafetensorsStreamer",
|
||||
patcher.create_mock_streamer,
|
||||
)
|
||||
|
||||
engine_args = EngineArgs(
|
||||
model=test_mock_s3_model,
|
||||
load_format=load_format,
|
||||
tensor_parallel_size=1,
|
||||
)
|
||||
|
||||
vllm_config = engine_args.create_engine_config()
|
||||
|
||||
executor = RunaiDummyExecutor(vllm_config)
|
||||
executor.driver_worker.load_model()
|
||||
@@ -18,9 +18,7 @@ SUPPORTED_SCHEMES = ["s3://", "gs://"]
|
||||
try:
|
||||
from runai_model_streamer import list_safetensors as runai_list_safetensors
|
||||
from runai_model_streamer import pull_files as runai_pull_files
|
||||
except (ImportError, OSError):
|
||||
# see https://github.com/run-ai/runai-model-streamer/issues/26
|
||||
# OSError will be raised on arm64 platform
|
||||
except ImportError:
|
||||
runai_model_streamer = PlaceholderModule("runai_model_streamer") # type: ignore[assignment]
|
||||
runai_pull_files = runai_model_streamer.placeholder_attr("pull_files")
|
||||
runai_list_safetensors = runai_model_streamer.placeholder_attr("list_safetensors")
|
||||
|
||||
Reference in New Issue
Block a user