mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-24 21:34:55 +08:00
Compare commits
56 Commits
custom-mod
...
lavinal712
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c92cf75dfe | ||
|
|
218b17040f | ||
|
|
a7c7a270f6 | ||
|
|
5455dd58e8 | ||
|
|
07084ef036 | ||
|
|
9e15c576cb | ||
|
|
8ddba1e082 | ||
|
|
d1b8202e42 | ||
|
|
f7439c30c9 | ||
|
|
b53bd8372b | ||
|
|
a73981fe17 | ||
|
|
d738ec4141 | ||
|
|
03d1751cce | ||
|
|
cd71418052 | ||
|
|
58559ecc7e | ||
|
|
48eeeae1f7 | ||
|
|
2223722e5b | ||
|
|
4d1e8912d6 | ||
|
|
dfad05625e | ||
|
|
9d94c377ef | ||
|
|
1e8221ce39 | ||
|
|
00a26cd8dd | ||
|
|
a2eff1c668 | ||
|
|
1c902725b0 | ||
|
|
59a42b23d3 | ||
|
|
4a64d64407 | ||
|
|
c6c13b6717 | ||
|
|
af8255e934 | ||
|
|
d3a07558cf | ||
|
|
23cba1804f | ||
|
|
53a06cc969 | ||
|
|
0a5bd74931 | ||
|
|
d752992831 | ||
|
|
39e9254208 | ||
|
|
c134bca767 | ||
|
|
63bafc88cd | ||
|
|
8f7fc0ada0 | ||
|
|
6fff794e59 | ||
|
|
ab9eeff757 | ||
|
|
6a1ff82d08 | ||
|
|
ce2b34bba7 | ||
|
|
2de1505e6e | ||
|
|
81eed41b74 | ||
|
|
0719c20f5e | ||
|
|
7c25a06591 | ||
|
|
280cf7fd38 | ||
|
|
33288e667f | ||
|
|
dd24464065 | ||
|
|
523967f396 | ||
|
|
10daac7e19 | ||
|
|
de61226385 | ||
|
|
39b3b84acc | ||
|
|
2453e149d2 | ||
|
|
9cf8ad7a73 | ||
|
|
e9d91e156d | ||
|
|
18de3adad1 |
78
.github/workflows/pr_modular_tests.yml
vendored
78
.github/workflows/pr_modular_tests.yml
vendored
@@ -77,47 +77,63 @@ jobs:
|
||||
|
||||
run_fast_tests:
|
||||
needs: [check_code_quality, check_repository_consistency]
|
||||
name: Fast PyTorch Modular Pipeline CPU tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
config:
|
||||
- name: Fast PyTorch Modular Pipeline CPU tests
|
||||
framework: pytorch_pipelines
|
||||
runner: aws-highmemory-32-plus
|
||||
image: diffusers/diffusers-pytorch-cpu
|
||||
report: torch_cpu_modular_pipelines
|
||||
|
||||
name: ${{ matrix.config.name }}
|
||||
|
||||
runs-on:
|
||||
group: aws-highmemory-32-plus
|
||||
group: ${{ matrix.config.runner }}
|
||||
|
||||
container:
|
||||
image: diffusers/diffusers-pytorch-cpu
|
||||
image: ${{ matrix.config.image }}
|
||||
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
steps:
|
||||
- name: Checkout diffusers
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 2
|
||||
- name: Checkout diffusers
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv pip install -e ".[quality]"
|
||||
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
|
||||
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv pip install -e ".[quality]"
|
||||
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
|
||||
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
|
||||
|
||||
- name: Environment
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
- name: Environment
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
|
||||
- name: Run fast PyTorch Pipeline CPU tests
|
||||
run: |
|
||||
pytest -n 8 --max-worker-restart=0 --dist=loadfile \
|
||||
-s -v \
|
||||
--make-reports=tests_torch_cpu_modular_pipelines \
|
||||
tests/modular_pipelines
|
||||
- name: Run fast PyTorch Pipeline CPU tests
|
||||
if: ${{ matrix.config.framework == 'pytorch_pipelines' }}
|
||||
run: |
|
||||
pytest -n 8 --max-worker-restart=0 --dist=loadfile \
|
||||
-k "not Flax and not Onnx" \
|
||||
--make-reports=tests_${{ matrix.config.report }} \
|
||||
tests/modular_pipelines
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
run: cat reports/tests_${{ matrix.config.report }}_failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: pr_${{ matrix.config.framework }}_${{ matrix.config.report }}_test_reports
|
||||
path: reports
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
run: cat reports/tests_torch_cpu_modular_pipelines_failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: pr_pytorch_pipelines_torch_cpu_modular_pipelines_test_reports
|
||||
path: reports
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
|
||||
import functools
|
||||
import math
|
||||
from math import prod
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
@@ -364,13 +363,7 @@ class QwenDoubleStreamAttnProcessor2_0:
|
||||
@maybe_allow_in_graph
|
||||
class QwenImageTransformerBlock(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
dim: int,
|
||||
num_attention_heads: int,
|
||||
attention_head_dim: int,
|
||||
qk_norm: str = "rms_norm",
|
||||
eps: float = 1e-6,
|
||||
zero_cond_t: bool = False,
|
||||
self, dim: int, num_attention_heads: int, attention_head_dim: int, qk_norm: str = "rms_norm", eps: float = 1e-6
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
@@ -410,43 +403,10 @@ class QwenImageTransformerBlock(nn.Module):
|
||||
self.txt_norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=eps)
|
||||
self.txt_mlp = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate")
|
||||
|
||||
self.zero_cond_t = zero_cond_t
|
||||
|
||||
def _modulate(self, x, mod_params, index=None):
|
||||
def _modulate(self, x, mod_params):
|
||||
"""Apply modulation to input tensor"""
|
||||
# x: b l d, shift: b d, scale: b d, gate: b d
|
||||
shift, scale, gate = mod_params.chunk(3, dim=-1)
|
||||
|
||||
if index is not None:
|
||||
# Assuming mod_params batch dim is 2*actual_batch (chunked into 2 parts)
|
||||
# So shift, scale, gate have shape [2*actual_batch, d]
|
||||
actual_batch = shift.size(0) // 2
|
||||
shift_0, shift_1 = shift[:actual_batch], shift[actual_batch:] # each: [actual_batch, d]
|
||||
scale_0, scale_1 = scale[:actual_batch], scale[actual_batch:]
|
||||
gate_0, gate_1 = gate[:actual_batch], gate[actual_batch:]
|
||||
|
||||
# index: [b, l] where b is actual batch size
|
||||
# Expand to [b, l, 1] to match feature dimension
|
||||
index_expanded = index.unsqueeze(-1) # [b, l, 1]
|
||||
|
||||
# Expand chunks to [b, 1, d] then broadcast to [b, l, d]
|
||||
shift_0_exp = shift_0.unsqueeze(1) # [b, 1, d]
|
||||
shift_1_exp = shift_1.unsqueeze(1) # [b, 1, d]
|
||||
scale_0_exp = scale_0.unsqueeze(1)
|
||||
scale_1_exp = scale_1.unsqueeze(1)
|
||||
gate_0_exp = gate_0.unsqueeze(1)
|
||||
gate_1_exp = gate_1.unsqueeze(1)
|
||||
|
||||
# Use torch.where to select based on index
|
||||
shift_result = torch.where(index_expanded == 0, shift_0_exp, shift_1_exp)
|
||||
scale_result = torch.where(index_expanded == 0, scale_0_exp, scale_1_exp)
|
||||
gate_result = torch.where(index_expanded == 0, gate_0_exp, gate_1_exp)
|
||||
else:
|
||||
shift_result = shift.unsqueeze(1)
|
||||
scale_result = scale.unsqueeze(1)
|
||||
gate_result = gate.unsqueeze(1)
|
||||
|
||||
return x * (1 + scale_result) + shift_result, gate_result
|
||||
return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1), gate.unsqueeze(1)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
@@ -456,13 +416,9 @@ class QwenImageTransformerBlock(nn.Module):
|
||||
temb: torch.Tensor,
|
||||
image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
|
||||
joint_attention_kwargs: Optional[Dict[str, Any]] = None,
|
||||
modulate_index: Optional[List[int]] = None,
|
||||
) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
# Get modulation parameters for both streams
|
||||
img_mod_params = self.img_mod(temb) # [B, 6*dim]
|
||||
|
||||
if self.zero_cond_t:
|
||||
temb = torch.chunk(temb, 2, dim=0)[0]
|
||||
txt_mod_params = self.txt_mod(temb) # [B, 6*dim]
|
||||
|
||||
# Split modulation parameters for norm1 and norm2
|
||||
@@ -471,7 +427,7 @@ class QwenImageTransformerBlock(nn.Module):
|
||||
|
||||
# Process image stream - norm1 + modulation
|
||||
img_normed = self.img_norm1(hidden_states)
|
||||
img_modulated, img_gate1 = self._modulate(img_normed, img_mod1, modulate_index)
|
||||
img_modulated, img_gate1 = self._modulate(img_normed, img_mod1)
|
||||
|
||||
# Process text stream - norm1 + modulation
|
||||
txt_normed = self.txt_norm1(encoder_hidden_states)
|
||||
@@ -501,7 +457,7 @@ class QwenImageTransformerBlock(nn.Module):
|
||||
|
||||
# Process image stream - norm2 + MLP
|
||||
img_normed2 = self.img_norm2(hidden_states)
|
||||
img_modulated2, img_gate2 = self._modulate(img_normed2, img_mod2, modulate_index)
|
||||
img_modulated2, img_gate2 = self._modulate(img_normed2, img_mod2)
|
||||
img_mlp_output = self.img_mlp(img_modulated2)
|
||||
hidden_states = hidden_states + img_gate2 * img_mlp_output
|
||||
|
||||
@@ -577,7 +533,6 @@ class QwenImageTransformer2DModel(
|
||||
joint_attention_dim: int = 3584,
|
||||
guidance_embeds: bool = False, # TODO: this should probably be removed
|
||||
axes_dims_rope: Tuple[int, int, int] = (16, 56, 56),
|
||||
zero_cond_t: bool = False,
|
||||
):
|
||||
super().__init__()
|
||||
self.out_channels = out_channels or in_channels
|
||||
@@ -598,7 +553,6 @@ class QwenImageTransformer2DModel(
|
||||
dim=self.inner_dim,
|
||||
num_attention_heads=num_attention_heads,
|
||||
attention_head_dim=attention_head_dim,
|
||||
zero_cond_t=zero_cond_t,
|
||||
)
|
||||
for _ in range(num_layers)
|
||||
]
|
||||
@@ -608,7 +562,6 @@ class QwenImageTransformer2DModel(
|
||||
self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True)
|
||||
|
||||
self.gradient_checkpointing = False
|
||||
self.zero_cond_t = zero_cond_t
|
||||
|
||||
def forward(
|
||||
self,
|
||||
@@ -665,17 +618,6 @@ class QwenImageTransformer2DModel(
|
||||
hidden_states = self.img_in(hidden_states)
|
||||
|
||||
timestep = timestep.to(hidden_states.dtype)
|
||||
|
||||
if self.zero_cond_t:
|
||||
timestep = torch.cat([timestep, timestep * 0], dim=0)
|
||||
modulate_index = torch.tensor(
|
||||
[[0] * prod(sample[0]) + [1] * sum([prod(s) for s in sample[1:]]) for sample in img_shapes],
|
||||
device=timestep.device,
|
||||
dtype=torch.int,
|
||||
)
|
||||
else:
|
||||
modulate_index = None
|
||||
|
||||
encoder_hidden_states = self.txt_norm(encoder_hidden_states)
|
||||
encoder_hidden_states = self.txt_in(encoder_hidden_states)
|
||||
|
||||
@@ -699,8 +641,6 @@ class QwenImageTransformer2DModel(
|
||||
encoder_hidden_states_mask,
|
||||
temb,
|
||||
image_rotary_emb,
|
||||
attention_kwargs,
|
||||
modulate_index,
|
||||
)
|
||||
|
||||
else:
|
||||
@@ -711,7 +651,6 @@ class QwenImageTransformer2DModel(
|
||||
temb=temb,
|
||||
image_rotary_emb=image_rotary_emb,
|
||||
joint_attention_kwargs=attention_kwargs,
|
||||
modulate_index=modulate_index,
|
||||
)
|
||||
|
||||
# controlnet residual
|
||||
@@ -720,8 +659,6 @@ class QwenImageTransformer2DModel(
|
||||
interval_control = int(np.ceil(interval_control))
|
||||
hidden_states = hidden_states + controlnet_block_samples[index_block // interval_control]
|
||||
|
||||
if self.zero_cond_t:
|
||||
temb = temb.chunk(2, dim=0)[0]
|
||||
# Use only the image part (hidden_states) from the dual-stream blocks
|
||||
hidden_states = self.norm_out(hidden_states, temb)
|
||||
output = self.proj_out(hidden_states)
|
||||
|
||||
@@ -32,8 +32,6 @@ warnings.simplefilter(action="ignore", category=FutureWarning)
|
||||
|
||||
def pytest_configure(config):
|
||||
config.addinivalue_line("markers", "big_accelerator: marks tests as requiring big accelerator resources")
|
||||
config.addinivalue_line("markers", "slow: mark test as slow")
|
||||
config.addinivalue_line("markers", "nightly: mark test as nightly")
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
|
||||
@@ -1,272 +0,0 @@
|
||||
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from collections import deque
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from diffusers import FluxTransformer2DModel
|
||||
from diffusers.modular_pipelines import (
|
||||
ComponentSpec,
|
||||
InputParam,
|
||||
ModularPipelineBlocks,
|
||||
OutputParam,
|
||||
PipelineState,
|
||||
WanModularPipeline,
|
||||
)
|
||||
|
||||
from ..testing_utils import nightly, require_torch, slow
|
||||
|
||||
|
||||
class DummyCustomBlockSimple(ModularPipelineBlocks):
|
||||
def __init__(self, use_dummy_model_component=False):
|
||||
self.use_dummy_model_component = use_dummy_model_component
|
||||
super().__init__()
|
||||
|
||||
@property
|
||||
def expected_components(self):
|
||||
if self.use_dummy_model_component:
|
||||
return [ComponentSpec("transformer", FluxTransformer2DModel)]
|
||||
else:
|
||||
return []
|
||||
|
||||
@property
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [InputParam("prompt", type_hint=str, required=True, description="Prompt to use")]
|
||||
|
||||
@property
|
||||
def intermediate_inputs(self) -> List[InputParam]:
|
||||
return []
|
||||
|
||||
@property
|
||||
def intermediate_outputs(self) -> List[OutputParam]:
|
||||
return [
|
||||
OutputParam(
|
||||
"output_prompt",
|
||||
type_hint=str,
|
||||
description="Modified prompt",
|
||||
)
|
||||
]
|
||||
|
||||
def __call__(self, components, state: PipelineState) -> PipelineState:
|
||||
block_state = self.get_block_state(state)
|
||||
|
||||
old_prompt = block_state.prompt
|
||||
block_state.output_prompt = "Modular diffusers + " + old_prompt
|
||||
self.set_block_state(state, block_state)
|
||||
|
||||
return components, state
|
||||
|
||||
|
||||
CODE_STR = """
|
||||
from diffusers.modular_pipelines import (
|
||||
ComponentSpec,
|
||||
InputParam,
|
||||
ModularPipelineBlocks,
|
||||
OutputParam,
|
||||
PipelineState,
|
||||
WanModularPipeline,
|
||||
)
|
||||
from typing import List
|
||||
|
||||
class DummyCustomBlockSimple(ModularPipelineBlocks):
|
||||
def __init__(self, use_dummy_model_component=False):
|
||||
self.use_dummy_model_component = use_dummy_model_component
|
||||
super().__init__()
|
||||
|
||||
@property
|
||||
def expected_components(self):
|
||||
if self.use_dummy_model_component:
|
||||
return [ComponentSpec("transformer", FluxTransformer2DModel)]
|
||||
else:
|
||||
return []
|
||||
|
||||
@property
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [InputParam("prompt", type_hint=str, required=True, description="Prompt to use")]
|
||||
|
||||
@property
|
||||
def intermediate_inputs(self) -> List[InputParam]:
|
||||
return []
|
||||
|
||||
@property
|
||||
def intermediate_outputs(self) -> List[OutputParam]:
|
||||
return [
|
||||
OutputParam(
|
||||
"output_prompt",
|
||||
type_hint=str,
|
||||
description="Modified prompt",
|
||||
)
|
||||
]
|
||||
|
||||
def __call__(self, components, state: PipelineState) -> PipelineState:
|
||||
block_state = self.get_block_state(state)
|
||||
|
||||
old_prompt = block_state.prompt
|
||||
block_state.output_prompt = "Modular diffusers + " + old_prompt
|
||||
self.set_block_state(state, block_state)
|
||||
|
||||
return components, state
|
||||
"""
|
||||
|
||||
|
||||
class TestModularCustomBlocks:
|
||||
def _test_block_properties(self, block):
|
||||
assert not block.expected_components
|
||||
assert not block.intermediate_inputs
|
||||
|
||||
actual_inputs = [inp.name for inp in block.inputs]
|
||||
actual_intermediate_outputs = [out.name for out in block.intermediate_outputs]
|
||||
assert actual_inputs == ["prompt"]
|
||||
assert actual_intermediate_outputs == ["output_prompt"]
|
||||
|
||||
def test_custom_block_properties(self):
|
||||
custom_block = DummyCustomBlockSimple()
|
||||
self._test_block_properties(custom_block)
|
||||
|
||||
def test_custom_block_output(self):
|
||||
custom_block = DummyCustomBlockSimple()
|
||||
pipe = custom_block.init_pipeline()
|
||||
prompt = "Diffusers is nice"
|
||||
output = pipe(prompt=prompt)
|
||||
|
||||
actual_inputs = [inp.name for inp in custom_block.inputs]
|
||||
actual_intermediate_outputs = [out.name for out in custom_block.intermediate_outputs]
|
||||
assert sorted(output.values) == sorted(actual_inputs + actual_intermediate_outputs)
|
||||
|
||||
output_prompt = output.values["output_prompt"]
|
||||
assert output_prompt.startswith("Modular diffusers + ")
|
||||
|
||||
def test_custom_block_saving_loading(self):
|
||||
custom_block = DummyCustomBlockSimple()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
custom_block.save_pretrained(tmpdir)
|
||||
assert any("modular_config.json" in k for k in os.listdir(tmpdir))
|
||||
|
||||
with open(os.path.join(tmpdir, "modular_config.json"), "r") as f:
|
||||
config = json.load(f)
|
||||
auto_map = config["auto_map"]
|
||||
assert auto_map == {"ModularPipelineBlocks": "test_modular_pipelines_custom_blocks.DummyCustomBlockSimple"}
|
||||
|
||||
# For now, the Python script that implements the custom block has to be manually pushed to the Hub.
|
||||
# This is why, we have to separately save the Python script here.
|
||||
code_path = os.path.join(tmpdir, "test_modular_pipelines_custom_blocks.py")
|
||||
with open(code_path, "w") as f:
|
||||
f.write(CODE_STR)
|
||||
|
||||
loaded_custom_block = ModularPipelineBlocks.from_pretrained(tmpdir, trust_remote_code=True)
|
||||
|
||||
pipe = loaded_custom_block.init_pipeline()
|
||||
prompt = "Diffusers is nice"
|
||||
output = pipe(prompt=prompt)
|
||||
|
||||
actual_inputs = [inp.name for inp in loaded_custom_block.inputs]
|
||||
actual_intermediate_outputs = [out.name for out in loaded_custom_block.intermediate_outputs]
|
||||
assert sorted(output.values) == sorted(actual_inputs + actual_intermediate_outputs)
|
||||
|
||||
output_prompt = output.values["output_prompt"]
|
||||
assert output_prompt.startswith("Modular diffusers + ")
|
||||
|
||||
def test_custom_block_supported_components(self):
|
||||
custom_block = DummyCustomBlockSimple(use_dummy_model_component=True)
|
||||
pipe = custom_block.init_pipeline("hf-internal-testing/tiny-flux-kontext-pipe")
|
||||
pipe.load_components()
|
||||
|
||||
assert len(pipe.components) == 1
|
||||
assert pipe.component_names[0] == "transformer"
|
||||
|
||||
def test_custom_block_loads_from_hub(self):
|
||||
repo_id = "hf-internal-testing/tiny-modular-diffusers-block"
|
||||
block = ModularPipelineBlocks.from_pretrained(repo_id, trust_remote_code=True)
|
||||
self._test_block_properties(block)
|
||||
|
||||
pipe = block.init_pipeline()
|
||||
|
||||
prompt = "Diffusers is nice"
|
||||
output = pipe(prompt=prompt)
|
||||
output_prompt = output.values["output_prompt"]
|
||||
assert output_prompt.startswith("Modular diffusers + ")
|
||||
|
||||
|
||||
@slow
|
||||
@nightly
|
||||
@require_torch
|
||||
class TestKreaCustomBlocksIntegration:
|
||||
repo_id = "krea/krea-realtime-video"
|
||||
|
||||
def test_loading_from_hub(self):
|
||||
blocks = ModularPipelineBlocks.from_pretrained(self.repo_id, trust_remote_code=True)
|
||||
block_names = sorted(blocks.sub_blocks)
|
||||
|
||||
assert block_names == sorted(["text_encoder", "before_denoise", "denoise", "decode"])
|
||||
|
||||
pipe = WanModularPipeline(blocks, self.repo_id)
|
||||
pipe.load_components(
|
||||
trust_remote_code=True,
|
||||
device_map="cuda",
|
||||
torch_dtype={"default": torch.bfloat16, "vae": torch.float16},
|
||||
)
|
||||
assert len(pipe.components) == 7
|
||||
assert sorted(pipe.components) == sorted(
|
||||
["text_encoder", "tokenizer", "guider", "scheduler", "vae", "transformer", "video_processor"]
|
||||
)
|
||||
|
||||
def test_forward(self):
|
||||
blocks = ModularPipelineBlocks.from_pretrained(self.repo_id, trust_remote_code=True)
|
||||
pipe = WanModularPipeline(blocks, self.repo_id)
|
||||
pipe.load_components(
|
||||
trust_remote_code=True,
|
||||
device_map="cuda",
|
||||
torch_dtype={"default": torch.bfloat16, "vae": torch.float16},
|
||||
)
|
||||
|
||||
num_frames_per_block = 2
|
||||
num_blocks = 2
|
||||
|
||||
state = PipelineState()
|
||||
state.set("frame_cache_context", deque(maxlen=pipe.config.frame_cache_len))
|
||||
|
||||
prompt = ["a cat sitting on a boat"]
|
||||
|
||||
for block in pipe.transformer.blocks:
|
||||
block.self_attn.fuse_projections()
|
||||
|
||||
for block_idx in range(num_blocks):
|
||||
state = pipe(
|
||||
state,
|
||||
prompt=prompt,
|
||||
num_inference_steps=2,
|
||||
num_blocks=num_blocks,
|
||||
num_frames_per_block=num_frames_per_block,
|
||||
block_idx=block_idx,
|
||||
generator=torch.manual_seed(42),
|
||||
)
|
||||
current_frames = np.array(state.values["videos"][0])
|
||||
current_frames_flat = current_frames.flatten()
|
||||
actual_slices = np.concatenate([current_frames_flat[:4], current_frames_flat[-4:]]).tolist()
|
||||
|
||||
if block_idx == 0:
|
||||
assert current_frames.shape == (5, 480, 832, 3)
|
||||
expected_slices = np.array([211, 229, 238, 208, 195, 180, 188, 193])
|
||||
else:
|
||||
assert current_frames.shape == (8, 480, 832, 3)
|
||||
expected_slices = np.array([179, 203, 214, 176, 194, 181, 187, 191])
|
||||
|
||||
assert np.allclose(actual_slices, expected_slices)
|
||||
@@ -29,7 +29,6 @@ from diffusers import (
|
||||
)
|
||||
|
||||
from ...testing_utils import (
|
||||
Expectations,
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
numpy_cosine_similarity_distance,
|
||||
@@ -336,14 +335,7 @@ class PixArtSigmaPipelineIntegrationTests(unittest.TestCase):
|
||||
image = pipe(prompt, generator=generator, num_inference_steps=2, output_type="np").images
|
||||
|
||||
image_slice = image[0, -3:, -3:, -1]
|
||||
|
||||
expected_slices = Expectations(
|
||||
{
|
||||
("xpu", 3): np.array([0.0417, 0.0388, 0.0061, 0.0618, 0.0517, 0.0420, 0.1038, 0.1055, 0.1257]),
|
||||
("cuda", None): np.array([0.0479, 0.0378, 0.0217, 0.0942, 0.064, 0.0791, 0.2073, 0.1975, 0.2017]),
|
||||
}
|
||||
)
|
||||
expected_slice = expected_slices.get_expectation()
|
||||
expected_slice = np.array([0.0479, 0.0378, 0.0217, 0.0942, 0.064, 0.0791, 0.2073, 0.1975, 0.2017])
|
||||
|
||||
max_diff = numpy_cosine_similarity_distance(image_slice.flatten(), expected_slice)
|
||||
self.assertLessEqual(max_diff, 1e-4)
|
||||
|
||||
Reference in New Issue
Block a user