Compare commits

..

7 Commits

Author SHA1 Message Date
Sayak Paul
c30acad259 Merge branch 'main' into video-processor-accept-imagelike-inputs 2026-02-05 10:46:04 +05:30
Sayak Paul
a3dcd9882f [core] make qwen hidden states contiguous to make torchao happy. (#13081)
make qwen hidden states contiguous to make torchao happy.
2026-02-05 09:02:32 +05:30
Sayak Paul
9fe0a9cac4 [core] make flux hidden states contiguous (#13068)
* make flux hidden states contiguous

* make fix-copies
2026-02-05 08:39:44 +05:30
Daniel Gu
4ebcdb6ecb Allow VideoProcessor.preprocess_video to accept single-image inputs 2026-02-05 03:39:06 +01:00
Daniel Gu
8a913577d9 Forward kwargs from preprocess/postprocess_video to preprocess/postprocess 2026-02-05 02:52:24 +01:00
David El Malih
03af690b60 docs: improve docstring scheduling_dpmsolver_multistep_inverse.py (#13083)
Improve docstring scheduling dpmsolver multistep inverse
2026-02-04 09:21:57 -08:00
Sayak Paul
90818e82b3 [docs] Fix syntax error in quantization configuration (#13076)
Fix syntax error in quantization configuration
2026-02-04 08:31:03 -08:00
26 changed files with 270 additions and 199 deletions

View File

@@ -66,7 +66,7 @@ from diffusers import DiffusionPipeline, PipelineQuantizationConfig, TorchAoConf
from torchao.quantization import Int4WeightOnlyConfig
pipeline_quant_config = PipelineQuantizationConfig(
quant_mapping={"transformer": TorchAoConfig(Int4WeightOnlyConfig(group_size=128)))}
quant_mapping={"transformer": TorchAoConfig(Int4WeightOnlyConfig(group_size=128))}
)
pipeline = DiffusionPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev",

View File

@@ -125,9 +125,9 @@ class BriaFiboAttnProcessor:
encoder_hidden_states, hidden_states = hidden_states.split_with_sizes(
[encoder_hidden_states.shape[1], hidden_states.shape[1] - encoder_hidden_states.shape[1]], dim=1
)
hidden_states = attn.to_out[0](hidden_states)
hidden_states = attn.to_out[0](hidden_states.contiguous())
hidden_states = attn.to_out[1](hidden_states)
encoder_hidden_states = attn.to_add_out(encoder_hidden_states)
encoder_hidden_states = attn.to_add_out(encoder_hidden_states.contiguous())
return hidden_states, encoder_hidden_states
else:

View File

@@ -130,9 +130,9 @@ class FluxAttnProcessor:
encoder_hidden_states, hidden_states = hidden_states.split_with_sizes(
[encoder_hidden_states.shape[1], hidden_states.shape[1] - encoder_hidden_states.shape[1]], dim=1
)
hidden_states = attn.to_out[0](hidden_states)
hidden_states = attn.to_out[0](hidden_states.contiguous())
hidden_states = attn.to_out[1](hidden_states)
encoder_hidden_states = attn.to_add_out(encoder_hidden_states)
encoder_hidden_states = attn.to_add_out(encoder_hidden_states.contiguous())
return hidden_states, encoder_hidden_states
else:

View File

@@ -561,11 +561,11 @@ class QwenDoubleStreamAttnProcessor2_0:
img_attn_output = joint_hidden_states[:, seq_txt:, :] # Image part
# Apply output projections
img_attn_output = attn.to_out[0](img_attn_output)
img_attn_output = attn.to_out[0](img_attn_output.contiguous())
if len(attn.to_out) > 1:
img_attn_output = attn.to_out[1](img_attn_output) # dropout
txt_attn_output = attn.to_add_out(txt_attn_output)
txt_attn_output = attn.to_add_out(txt_attn_output.contiguous())
return img_attn_output, txt_attn_output

View File

@@ -302,7 +302,7 @@ class FluxTextEncoderStep(ModularPipelineBlocks):
@property
def inputs(self) -> List[InputParam]:
return [
InputParam("prompt", required=True),
InputParam("prompt"),
InputParam("prompt_2"),
InputParam("max_sequence_length", type_hint=int, default=512, required=False),
InputParam("joint_attention_kwargs"),

View File

@@ -80,7 +80,7 @@ class Flux2TextEncoderStep(ModularPipelineBlocks):
@property
def inputs(self) -> List[InputParam]:
return [
InputParam("prompt", required=True),
InputParam("prompt"),
InputParam("max_sequence_length", type_hint=int, default=512, required=False),
InputParam("text_encoder_out_layers", type_hint=Tuple[int], default=(10, 20, 30), required=False),
]
@@ -99,7 +99,7 @@ class Flux2TextEncoderStep(ModularPipelineBlocks):
@staticmethod
def check_inputs(block_state):
prompt = block_state.prompt
if not isinstance(prompt, str) and not isinstance(prompt, list):
if prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
@staticmethod
@@ -193,7 +193,7 @@ class Flux2RemoteTextEncoderStep(ModularPipelineBlocks):
@property
def inputs(self) -> List[InputParam]:
return [
InputParam("prompt", required=True),
InputParam("prompt"),
]
@property
@@ -210,7 +210,7 @@ class Flux2RemoteTextEncoderStep(ModularPipelineBlocks):
@staticmethod
def check_inputs(block_state):
prompt = block_state.prompt
if not isinstance(prompt, str) and not isinstance(prompt, list):
if prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(block_state.prompt)}")
@torch.no_grad()
@@ -270,7 +270,7 @@ class Flux2KleinTextEncoderStep(ModularPipelineBlocks):
@property
def inputs(self) -> List[InputParam]:
return [
InputParam("prompt", required=True),
InputParam("prompt"),
InputParam("max_sequence_length", type_hint=int, default=512, required=False),
InputParam("text_encoder_out_layers", type_hint=Tuple[int], default=(9, 18, 27), required=False),
]
@@ -290,7 +290,7 @@ class Flux2KleinTextEncoderStep(ModularPipelineBlocks):
def check_inputs(block_state):
prompt = block_state.prompt
if not isinstance(prompt, str) and not isinstance(prompt, list):
if prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
@staticmethod
@@ -405,7 +405,7 @@ class Flux2KleinBaseTextEncoderStep(ModularPipelineBlocks):
@property
def inputs(self) -> List[InputParam]:
return [
InputParam("prompt", required=True),
InputParam("prompt"),
InputParam("max_sequence_length", type_hint=int, default=512, required=False),
InputParam("text_encoder_out_layers", type_hint=Tuple[int], default=(9, 18, 27), required=False),
]
@@ -431,7 +431,7 @@ class Flux2KleinBaseTextEncoderStep(ModularPipelineBlocks):
def check_inputs(block_state):
prompt = block_state.prompt
if not isinstance(prompt, str) and not isinstance(prompt, list):
if prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
@staticmethod

View File

@@ -56,7 +56,52 @@ logger = logging.get_logger(__name__)
# ====================
# 1. VAE ENCODER
# 1. TEXT ENCODER
# ====================
# auto_docstring
class QwenImageAutoTextEncoderStep(AutoPipelineBlocks):
"""
Text encoder step that encodes the text prompt into a text embedding. This is an auto pipeline block.
Components:
text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use tokenizer (`Qwen2Tokenizer`):
The tokenizer to use guider (`ClassifierFreeGuidance`)
Inputs:
prompt (`str`, *optional*):
The prompt or prompts to guide image generation.
negative_prompt (`str`, *optional*):
The prompt or prompts not to guide the image generation.
max_sequence_length (`int`, *optional*, defaults to 1024):
Maximum sequence length for prompt encoding.
Outputs:
prompt_embeds (`Tensor`):
The prompt embeddings.
prompt_embeds_mask (`Tensor`):
The encoder attention mask.
negative_prompt_embeds (`Tensor`):
The negative prompt embeddings.
negative_prompt_embeds_mask (`Tensor`):
The negative prompt embeddings mask.
"""
model_name = "qwenimage"
block_classes = [QwenImageTextEncoderStep()]
block_names = ["text_encoder"]
block_trigger_inputs = ["prompt"]
@property
def description(self) -> str:
return "Text encoder step that encodes the text prompt into a text embedding. This is an auto pipeline block."
" - `QwenImageTextEncoderStep` (text_encoder) is used when `prompt` is provided."
" - if `prompt` is not provided, step will be skipped."
# ====================
# 2. VAE ENCODER
# ====================
@@ -204,7 +249,7 @@ class QwenImageOptionalControlNetVaeEncoderStep(AutoPipelineBlocks):
# ====================
# 2. DENOISE (input -> prepare_latents -> set_timesteps -> prepare_rope_inputs -> denoise -> after_denoise)
# 3. DENOISE (input -> prepare_latents -> set_timesteps -> prepare_rope_inputs -> denoise -> after_denoise)
# ====================
@@ -966,7 +1011,7 @@ class QwenImageAutoCoreDenoiseStep(ConditionalPipelineBlocks):
# ====================
# 3. DECODE
# 4. DECODE
# ====================
@@ -1051,11 +1096,11 @@ class QwenImageAutoDecodeStep(AutoPipelineBlocks):
# ====================
# 4. AUTO BLOCKS & PRESETS
# 5. AUTO BLOCKS & PRESETS
# ====================
AUTO_BLOCKS = InsertableDict(
[
("text_encoder", QwenImageTextEncoderStep()),
("text_encoder", QwenImageAutoTextEncoderStep()),
("vae_encoder", QwenImageAutoVaeEncoderStep()),
("controlnet_vae_encoder", QwenImageOptionalControlNetVaeEncoderStep()),
("denoise", QwenImageAutoCoreDenoiseStep()),

View File

@@ -244,7 +244,7 @@ class StableDiffusionXLTextEncoderStep(ModularPipelineBlocks):
@property
def inputs(self) -> List[InputParam]:
return [
InputParam("prompt", required=True),
InputParam("prompt"),
InputParam("prompt_2"),
InputParam("negative_prompt"),
InputParam("negative_prompt_2"),

View File

@@ -179,7 +179,7 @@ class WanTextEncoderStep(ModularPipelineBlocks):
@property
def inputs(self) -> List[InputParam]:
return [
InputParam("prompt", required=True),
InputParam("prompt"),
InputParam("negative_prompt"),
InputParam("max_sequence_length", default=512),
]

View File

@@ -149,7 +149,7 @@ class ZImageTextEncoderStep(ModularPipelineBlocks):
@property
def inputs(self) -> List[InputParam]:
return [
InputParam("prompt", required=True),
InputParam("prompt"),
InputParam("negative_prompt"),
InputParam("max_sequence_length", default=512),
]

View File

@@ -545,7 +545,9 @@ class CosineDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep
def index_for_timestep(
self, timestep: Union[int, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
self,
timestep: Union[int, torch.Tensor],
schedule_timesteps: Optional[torch.Tensor] = None,
) -> int:
"""
Find the index for a given timestep in the schedule.

View File

@@ -867,7 +867,9 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep
def index_for_timestep(
self, timestep: Union[int, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
self,
timestep: Union[int, torch.Tensor],
schedule_timesteps: Optional[torch.Tensor] = None,
) -> int:
"""
Find the index for a given timestep in the schedule.

View File

@@ -245,13 +245,26 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
):
if self.config.use_beta_sigmas and not is_scipy_available():
raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
if (
sum(
[
self.config.use_beta_sigmas,
self.config.use_exponential_sigmas,
self.config.use_karras_sigmas,
]
)
> 1
):
raise ValueError(
"Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used."
)
if algorithm_type in ["dpmsolver", "sde-dpmsolver"]:
deprecation_message = f"algorithm_type {algorithm_type} is deprecated and will be removed in a future version. Choose from `dpmsolver++` or `sde-dpmsolver++` instead"
deprecate("algorithm_types dpmsolver and sde-dpmsolver", "1.0.0", deprecation_message)
deprecate(
"algorithm_types dpmsolver and sde-dpmsolver",
"1.0.0",
deprecation_message,
)
if trained_betas is not None:
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -259,7 +272,15 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
elif beta_schedule == "scaled_linear":
# this schedule is very specific to the latent diffusion model.
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
self.betas = (
torch.linspace(
beta_start**0.5,
beta_end**0.5,
num_train_timesteps,
dtype=torch.float32,
)
** 2
)
elif beta_schedule == "squaredcos_cap_v2":
# Glide cosine schedule
self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -287,7 +308,12 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
self.init_noise_sigma = 1.0
# settings for DPM-Solver
if algorithm_type not in ["dpmsolver", "dpmsolver++", "sde-dpmsolver", "sde-dpmsolver++"]:
if algorithm_type not in [
"dpmsolver",
"dpmsolver++",
"sde-dpmsolver",
"sde-dpmsolver++",
]:
if algorithm_type == "deis":
self.register_to_config(algorithm_type="dpmsolver++")
else:
@@ -724,7 +750,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
self,
model_output: torch.Tensor,
*args,
sample: torch.Tensor = None,
sample: Optional[torch.Tensor] = None,
**kwargs,
) -> torch.Tensor:
"""
@@ -738,7 +764,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
Args:
model_output (`torch.Tensor`):
The direct output from the learned diffusion model.
sample (`torch.Tensor`):
sample (`torch.Tensor`, *optional*):
A current instance of a sample created by the diffusion process.
Returns:
@@ -822,7 +848,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
self,
model_output: torch.Tensor,
*args,
sample: torch.Tensor = None,
sample: Optional[torch.Tensor] = None,
noise: Optional[torch.Tensor] = None,
**kwargs,
) -> torch.Tensor:
@@ -832,8 +858,10 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
Args:
model_output (`torch.Tensor`):
The direct output from the learned diffusion model.
sample (`torch.Tensor`):
sample (`torch.Tensor`, *optional*):
A current instance of a sample created by the diffusion process.
noise (`torch.Tensor`, *optional*):
The noise tensor.
Returns:
`torch.Tensor`:
@@ -860,7 +888,10 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
"Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`",
)
sigma_t, sigma_s = self.sigmas[self.step_index + 1], self.sigmas[self.step_index]
sigma_t, sigma_s = (
self.sigmas[self.step_index + 1],
self.sigmas[self.step_index],
)
alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t)
alpha_s, sigma_s = self._sigma_to_alpha_sigma_t(sigma_s)
lambda_t = torch.log(alpha_t) - torch.log(sigma_t)
@@ -891,7 +922,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
self,
model_output_list: List[torch.Tensor],
*args,
sample: torch.Tensor = None,
sample: Optional[torch.Tensor] = None,
noise: Optional[torch.Tensor] = None,
**kwargs,
) -> torch.Tensor:
@@ -901,7 +932,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
Args:
model_output_list (`List[torch.Tensor]`):
The direct outputs from learned diffusion model at current and latter timesteps.
sample (`torch.Tensor`):
sample (`torch.Tensor`, *optional*):
A current instance of a sample created by the diffusion process.
Returns:
@@ -1014,7 +1045,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
self,
model_output_list: List[torch.Tensor],
*args,
sample: torch.Tensor = None,
sample: Optional[torch.Tensor] = None,
noise: Optional[torch.Tensor] = None,
**kwargs,
) -> torch.Tensor:
@@ -1024,8 +1055,10 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
Args:
model_output_list (`List[torch.Tensor]`):
The direct outputs from learned diffusion model at current and latter timesteps.
sample (`torch.Tensor`):
sample (`torch.Tensor`, *optional*):
A current instance of a sample created by diffusion process.
noise (`torch.Tensor`, *optional*):
The noise tensor.
Returns:
`torch.Tensor`:
@@ -1106,7 +1139,9 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
return x_t
def index_for_timestep(
self, timestep: Union[int, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
self,
timestep: Union[int, torch.Tensor],
schedule_timesteps: Optional[torch.Tensor] = None,
) -> int:
"""
Find the index for a given timestep in the schedule.
@@ -1216,7 +1251,10 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
sample = sample.to(torch.float32)
if self.config.algorithm_type in ["sde-dpmsolver", "sde-dpmsolver++"] and variance_noise is None:
noise = randn_tensor(
model_output.shape, generator=generator, device=model_output.device, dtype=torch.float32
model_output.shape,
generator=generator,
device=model_output.device,
dtype=torch.float32,
)
elif self.config.algorithm_type in ["sde-dpmsolver", "sde-dpmsolver++"]:
noise = variance_noise.to(device=model_output.device, dtype=torch.float32)

View File

@@ -141,6 +141,10 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
use_beta_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta
Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information.
use_flow_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use flow sigmas for step sizes in the noise schedule during the sampling process.
flow_shift (`float`, *optional*, defaults to 1.0):
The flow shift factor. Valid only when `use_flow_sigmas=True`.
lambda_min_clipped (`float`, defaults to `-inf`):
Clipping threshold for the minimum value of `lambda(t)` for numerical stability. This is critical for the
cosine (`squaredcos_cap_v2`) noise schedule.
@@ -163,15 +167,15 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
num_train_timesteps: int = 1000,
beta_start: float = 0.0001,
beta_end: float = 0.02,
beta_schedule: str = "linear",
beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "linear",
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
solver_order: int = 2,
prediction_type: str = "epsilon",
prediction_type: Literal["epsilon", "sample", "v_prediction", "flow_prediction"] = "epsilon",
thresholding: bool = False,
dynamic_thresholding_ratio: float = 0.995,
sample_max_value: float = 1.0,
algorithm_type: str = "dpmsolver++",
solver_type: str = "midpoint",
algorithm_type: Literal["dpmsolver", "dpmsolver++", "sde-dpmsolver", "sde-dpmsolver++"] = "dpmsolver++",
solver_type: Literal["midpoint", "heun"] = "midpoint",
lower_order_final: bool = True,
euler_at_final: bool = False,
use_karras_sigmas: Optional[bool] = False,
@@ -180,19 +184,32 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
use_flow_sigmas: Optional[bool] = False,
flow_shift: Optional[float] = 1.0,
lambda_min_clipped: float = -float("inf"),
variance_type: Optional[str] = None,
timestep_spacing: str = "linspace",
variance_type: Optional[Literal["learned", "learned_range"]] = None,
timestep_spacing: Literal["linspace", "leading", "trailing"] = "linspace",
steps_offset: int = 0,
):
if self.config.use_beta_sigmas and not is_scipy_available():
raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
if (
sum(
[
self.config.use_beta_sigmas,
self.config.use_exponential_sigmas,
self.config.use_karras_sigmas,
]
)
> 1
):
raise ValueError(
"Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used."
)
if algorithm_type in ["dpmsolver", "sde-dpmsolver"]:
deprecation_message = f"algorithm_type {algorithm_type} is deprecated and will be removed in a future version. Choose from `dpmsolver++` or `sde-dpmsolver++` instead"
deprecate("algorithm_types dpmsolver and sde-dpmsolver", "1.0.0", deprecation_message)
deprecate(
"algorithm_types dpmsolver and sde-dpmsolver",
"1.0.0",
deprecation_message,
)
if trained_betas is not None:
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -200,7 +217,15 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
elif beta_schedule == "scaled_linear":
# this schedule is very specific to the latent diffusion model.
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
self.betas = (
torch.linspace(
beta_start**0.5,
beta_end**0.5,
num_train_timesteps,
dtype=torch.float32,
)
** 2
)
elif beta_schedule == "squaredcos_cap_v2":
# Glide cosine schedule
self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -219,7 +244,12 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
self.init_noise_sigma = 1.0
# settings for DPM-Solver
if algorithm_type not in ["dpmsolver", "dpmsolver++", "sde-dpmsolver", "sde-dpmsolver++"]:
if algorithm_type not in [
"dpmsolver",
"dpmsolver++",
"sde-dpmsolver",
"sde-dpmsolver++",
]:
if algorithm_type == "deis":
self.register_to_config(algorithm_type="dpmsolver++")
else:
@@ -250,7 +280,11 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
"""
return self._step_index
def set_timesteps(self, num_inference_steps: int = None, device: Union[str, torch.device] = None):
def set_timesteps(
self,
num_inference_steps: Optional[int] = None,
device: Optional[Union[str, torch.device]] = None,
):
"""
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
@@ -382,7 +416,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
return sample
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t
def _sigma_to_t(self, sigma, log_sigmas):
def _sigma_to_t(self, sigma: np.ndarray, log_sigmas: np.ndarray) -> np.ndarray:
"""
Convert sigma values to corresponding timestep values through interpolation.
@@ -419,7 +453,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
return t
# Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler._sigma_to_alpha_sigma_t
def _sigma_to_alpha_sigma_t(self, sigma):
def _sigma_to_alpha_sigma_t(self, sigma: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Convert sigma values to alpha_t and sigma_t values.
@@ -441,7 +475,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
return alpha_t, sigma_t
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras
def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor:
def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""
Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative
Models](https://huggingface.co/papers/2206.00364).
@@ -567,7 +601,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
self,
model_output: torch.Tensor,
*args,
sample: torch.Tensor = None,
sample: Optional[torch.Tensor] = None,
**kwargs,
) -> torch.Tensor:
"""
@@ -581,7 +615,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
Args:
model_output (`torch.Tensor`):
The direct output from the learned diffusion model.
sample (`torch.Tensor`):
sample (`torch.Tensor`, *optional*):
A current instance of a sample created by the diffusion process.
Returns:
@@ -666,7 +700,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
self,
model_output: torch.Tensor,
*args,
sample: torch.Tensor = None,
sample: Optional[torch.Tensor] = None,
noise: Optional[torch.Tensor] = None,
**kwargs,
) -> torch.Tensor:
@@ -676,8 +710,10 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
Args:
model_output (`torch.Tensor`):
The direct output from the learned diffusion model.
sample (`torch.Tensor`):
sample (`torch.Tensor`, *optional*):
A current instance of a sample created by the diffusion process.
noise (`torch.Tensor`, *optional*):
The noise tensor.
Returns:
`torch.Tensor`:
@@ -704,7 +740,10 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
"Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`",
)
sigma_t, sigma_s = self.sigmas[self.step_index + 1], self.sigmas[self.step_index]
sigma_t, sigma_s = (
self.sigmas[self.step_index + 1],
self.sigmas[self.step_index],
)
alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t)
alpha_s, sigma_s = self._sigma_to_alpha_sigma_t(sigma_s)
lambda_t = torch.log(alpha_t) - torch.log(sigma_t)
@@ -736,7 +775,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
self,
model_output_list: List[torch.Tensor],
*args,
sample: torch.Tensor = None,
sample: Optional[torch.Tensor] = None,
noise: Optional[torch.Tensor] = None,
**kwargs,
) -> torch.Tensor:
@@ -746,7 +785,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
Args:
model_output_list (`List[torch.Tensor]`):
The direct outputs from learned diffusion model at current and latter timesteps.
sample (`torch.Tensor`):
sample (`torch.Tensor`, *optional*):
A current instance of a sample created by the diffusion process.
Returns:
@@ -860,7 +899,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
self,
model_output_list: List[torch.Tensor],
*args,
sample: torch.Tensor = None,
sample: Optional[torch.Tensor] = None,
noise: Optional[torch.Tensor] = None,
**kwargs,
) -> torch.Tensor:
@@ -870,8 +909,10 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
Args:
model_output_list (`List[torch.Tensor]`):
The direct outputs from learned diffusion model at current and latter timesteps.
sample (`torch.Tensor`):
sample (`torch.Tensor`, *optional*):
A current instance of a sample created by diffusion process.
noise (`torch.Tensor`, *optional*):
The noise tensor.
Returns:
`torch.Tensor`:
@@ -951,7 +992,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
)
return x_t
def _init_step_index(self, timestep):
def _init_step_index(self, timestep: Union[int, torch.Tensor]):
if isinstance(timestep, torch.Tensor):
timestep = timestep.to(self.timesteps.device)
@@ -975,7 +1016,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
model_output: torch.Tensor,
timestep: Union[int, torch.Tensor],
sample: torch.Tensor,
generator=None,
generator: Optional[torch.Generator] = None,
variance_noise: Optional[torch.Tensor] = None,
return_dict: bool = True,
) -> Union[SchedulerOutput, Tuple]:
@@ -1027,7 +1068,10 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
if self.config.algorithm_type in ["sde-dpmsolver", "sde-dpmsolver++"] and variance_noise is None:
noise = randn_tensor(
model_output.shape, generator=generator, device=model_output.device, dtype=model_output.dtype
model_output.shape,
generator=generator,
device=model_output.device,
dtype=model_output.dtype,
)
elif self.config.algorithm_type in ["sde-dpmsolver", "sde-dpmsolver++"]:
noise = variance_noise
@@ -1074,6 +1118,21 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor,
timesteps: torch.IntTensor,
) -> torch.Tensor:
"""
Add noise to the clean `original_samples` using the scheduler's equivalent function.
Args:
original_samples (`torch.Tensor`):
The original samples to add noise to.
noise (`torch.Tensor`):
The noise tensor.
timesteps (`torch.IntTensor`):
The timesteps at which to add noise.
Returns:
`torch.Tensor`:
The noisy samples.
"""
# Make sure sigmas and timesteps have the same device and dtype as original_samples
sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype)
if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
@@ -1103,5 +1162,5 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
noisy_samples = alpha_t * original_samples + sigma_t * noise
return noisy_samples
def __len__(self):
def __len__(self) -> int:
return self.config.num_train_timesteps

View File

@@ -1120,7 +1120,9 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep
def index_for_timestep(
self, timestep: Union[int, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
self,
timestep: Union[int, torch.Tensor],
schedule_timesteps: Optional[torch.Tensor] = None,
) -> int:
"""
Find the index for a given timestep in the schedule.

View File

@@ -662,7 +662,9 @@ class EDMDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep
def index_for_timestep(
self, timestep: Union[int, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
self,
timestep: Union[int, torch.Tensor],
schedule_timesteps: Optional[torch.Tensor] = None,
) -> int:
"""
Find the index for a given timestep in the schedule.

View File

@@ -1122,7 +1122,9 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep
def index_for_timestep(
self, timestep: Union[int, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
self,
timestep: Union[int, torch.Tensor],
schedule_timesteps: Optional[torch.Tensor] = None,
) -> int:
"""
Find the index for a given timestep in the schedule.

View File

@@ -1083,7 +1083,9 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep
def index_for_timestep(
self, timestep: Union[int, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
self,
timestep: Union[int, torch.Tensor],
schedule_timesteps: Optional[torch.Tensor] = None,
) -> int:
"""
Find the index for a given timestep in the schedule.

View File

@@ -16,7 +16,7 @@ import warnings
from typing import List, Optional, Tuple, Union
import numpy as np
import PIL
import PIL.Image
import torch
import torch.nn.functional as F
@@ -26,9 +26,11 @@ from .image_processor import VaeImageProcessor, is_valid_image, is_valid_image_i
class VideoProcessor(VaeImageProcessor):
r"""Simple video processor."""
def preprocess_video(self, video, height: Optional[int] = None, width: Optional[int] = None) -> torch.Tensor:
def preprocess_video(
self, video, height: Optional[int] = None, width: Optional[int] = None, **kwargs
) -> torch.Tensor:
r"""
Preprocesses input video(s).
Preprocesses input video(s). Keyword arguments will be forwarded to `VaeImageProcessor.preprocess`.
Args:
video (`List[PIL.Image]`, `List[List[PIL.Image]]`, `torch.Tensor`, `np.array`, `List[torch.Tensor]`, `List[np.array]`):
@@ -50,6 +52,10 @@ class VideoProcessor(VaeImageProcessor):
width (`int`, *optional*`, defaults to `None`):
The width in preprocessed frames of the video. If `None`, will use get_default_height_width()` to get
the default width.
Returns:
`torch.Tensor` of shape `(batch_size, num_channels, num_frames, height, width)`:
A 5D tensor holding the batched channels-first video(s).
"""
if isinstance(video, list) and isinstance(video[0], np.ndarray) and video[0].ndim == 5:
warnings.warn(
@@ -67,20 +73,47 @@ class VideoProcessor(VaeImageProcessor):
video = torch.cat(video, axis=0)
# ensure the input is a list of videos:
# - if it is a batch of videos (5d torch.Tensor or np.ndarray), it is converted to a list of videos (a list of 4d torch.Tensor or np.ndarray)
# - if it is a single video, it is converted to a list of one video.
# - if it is a batched array of videos (5d torch.Tensor or np.ndarray), it is converted to a list of video
# arrays (a list of 4d torch.Tensor or np.ndarray). `VaeImageProcessor.preprocess` will then treat the first
# (frame) dim as a batch dim.
# - if it is a single video, it is converted to a list of one video. (A single video is a list of images or a
# single imagelist.)
# - if it is a list of imagelists, it will be kept as is (already a list of videos).
# - if it is a single image, it is expanded to a single frame video and then to a list of one video. The
# expansion will depend on the image type:
# - PIL.Image.Image --> one element list of PIL.Image.Image
# - 3D np.ndarray --> interpret as (H, W, C), expand to (F=1, H, W, C)
# - 3D torch.Tensor --> interpret as (C, H, W), expand to (F=1, C, H, W)
if isinstance(video, (np.ndarray, torch.Tensor)) and video.ndim == 5:
video = list(video)
elif isinstance(video, list) and is_valid_image(video[0]) or is_valid_image_imagelist(video):
video = [video]
elif isinstance(video, list) and is_valid_image_imagelist(video[0]):
video = video
elif is_valid_image(video):
if isinstance(video, PIL.Image.Image):
video = [video]
elif isinstance(video, np.ndarray):
if video.ndim == 2:
video = np.expand_dims(video, axis=-1) # Unsqueeze channel dim in last axis
if video.ndim == 3:
video = np.expand_dims(video, axis=0)
else:
raise ValueError(f"Input numpy.ndarray is expected to have 2 or 3 dims but got {video.ndim} dims")
elif isinstance(video, torch.Tensor):
if video.ndim == 2:
video = torch.unsqueeze(video, dim=0) # Unsqueeze channel dim in first dim
if video.ndim == 3:
video = torch.unsqueeze(video, dim=0)
else:
raise ValueError(f"Input torch.Tensor is expected to have 2 or 3 dims but got {video.ndim} dims")
video = [video]
else:
raise ValueError(
"Input is in incorrect format. Currently, we only support numpy.ndarray, torch.Tensor, PIL.Image.Image"
)
video = torch.stack([self.preprocess(img, height=height, width=width) for img in video], dim=0)
video = torch.stack([self.preprocess(img, height=height, width=width, **kwargs) for img in video], dim=0)
# move the number of channels before the number of frames.
video = video.permute(0, 2, 1, 3, 4)
@@ -88,10 +121,11 @@ class VideoProcessor(VaeImageProcessor):
return video
def postprocess_video(
self, video: torch.Tensor, output_type: str = "np"
self, video: torch.Tensor, output_type: str = "np", **kwargs
) -> Union[np.ndarray, torch.Tensor, List[PIL.Image.Image]]:
r"""
Converts a video tensor to a list of frames for export.
Converts a video tensor to a list of frames for export. Keyword arguments will be forwarded to
`VaeImageProcessor.postprocess`.
Args:
video (`torch.Tensor`): The video as a tensor.
@@ -101,7 +135,7 @@ class VideoProcessor(VaeImageProcessor):
outputs = []
for batch_idx in range(batch_size):
batch_vid = video[batch_idx].permute(1, 0, 2, 3)
batch_output = self.postprocess(batch_vid, output_type)
batch_output = self.postprocess(batch_vid, output_type, **kwargs)
outputs.append(batch_output)
if output_type == "np":

View File

@@ -37,7 +37,6 @@ class TestFluxModularPipelineFast(ModularPipelineTesterMixin):
pipeline_class = FluxModularPipeline
pipeline_blocks_class = FluxAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-flux-modular"
default_repo_id = "hf-internal-testing/tiny-flux-pipe"
params = frozenset(["prompt", "height", "width", "guidance_scale"])
batch_params = frozenset(["prompt"])
@@ -64,7 +63,6 @@ class TestFluxImg2ImgModularPipelineFast(ModularPipelineTesterMixin):
pipeline_class = FluxModularPipeline
pipeline_blocks_class = FluxAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-flux-modular"
default_repo_id = "hf-internal-testing/tiny-flux-pipe"
params = frozenset(["prompt", "height", "width", "guidance_scale", "image"])
batch_params = frozenset(["prompt", "image"])
@@ -131,7 +129,6 @@ class TestFluxKontextModularPipelineFast(ModularPipelineTesterMixin):
pipeline_class = FluxKontextModularPipeline
pipeline_blocks_class = FluxKontextAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-flux-kontext-pipe"
default_repo_id = "hf-internal-testing/tiny-flux-kontext-pipe"
params = frozenset(["prompt", "height", "width", "guidance_scale", "image"])
batch_params = frozenset(["prompt", "image"])

View File

@@ -32,8 +32,6 @@ class TestFlux2ModularPipelineFast(ModularPipelineTesterMixin):
pipeline_class = Flux2ModularPipeline
pipeline_blocks_class = Flux2AutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-flux2-modular"
default_repo_id = "black-forest-labs/FLUX.2-dev"
default_repo_id = "hf-internal-testing/tiny-flux2"
params = frozenset(["prompt", "height", "width", "guidance_scale"])
batch_params = frozenset(["prompt"])
@@ -62,7 +60,6 @@ class TestFlux2ImageConditionedModularPipelineFast(ModularPipelineTesterMixin):
pipeline_class = Flux2ModularPipeline
pipeline_blocks_class = Flux2AutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-flux2-modular"
default_repo_id = "hf-internal-testing/tiny-flux2"
params = frozenset(["prompt", "height", "width", "guidance_scale", "image"])
batch_params = frozenset(["prompt", "image"])

View File

@@ -32,7 +32,6 @@ class TestFlux2ModularPipelineFast(ModularPipelineTesterMixin):
pipeline_class = Flux2KleinModularPipeline
pipeline_blocks_class = Flux2KleinAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-flux2-klein-modular"
default_repo_id = None # TODO
params = frozenset(["prompt", "height", "width"])
batch_params = frozenset(["prompt"])
@@ -60,7 +59,6 @@ class TestFlux2ImageConditionedModularPipelineFast(ModularPipelineTesterMixin):
pipeline_class = Flux2KleinModularPipeline
pipeline_blocks_class = Flux2KleinAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-flux2-klein-modular"
default_repo_id = None # TODO
params = frozenset(["prompt", "height", "width", "image"])
batch_params = frozenset(["prompt", "image"])

View File

@@ -32,7 +32,7 @@ class TestFlux2ModularPipelineFast(ModularPipelineTesterMixin):
pipeline_class = Flux2KleinModularPipeline
pipeline_blocks_class = Flux2KleinBaseAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-flux2-klein-base-modular"
default_repo_id = "hf-internal-testing/tiny-flux2-klein"
params = frozenset(["prompt", "height", "width"])
batch_params = frozenset(["prompt"])
@@ -59,7 +59,6 @@ class TestFlux2ImageConditionedModularPipelineFast(ModularPipelineTesterMixin):
pipeline_class = Flux2KleinModularPipeline
pipeline_blocks_class = Flux2KleinBaseAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-flux2-klein-base-modular"
default_repo_id = "hf-internal-testing/tiny-flux2-klein"
params = frozenset(["prompt", "height", "width", "image"])
batch_params = frozenset(["prompt", "image"])

View File

@@ -34,7 +34,6 @@ class TestQwenImageModularPipelineFast(ModularPipelineTesterMixin, ModularGuider
pipeline_class = QwenImageModularPipeline
pipeline_blocks_class = QwenImageAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-qwenimage-modular"
default_repo_id = "Qwen/Qwen-Image"
params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image", "mask_image"])
batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"])
@@ -61,7 +60,6 @@ class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, ModularGu
pipeline_class = QwenImageEditModularPipeline
pipeline_blocks_class = QwenImageEditAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-qwenimage-edit-modular"
default_repo_id = "Qwen/Qwen-Image-Edit"
params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image", "mask_image"])
batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"])
@@ -88,7 +86,6 @@ class TestQwenImageEditPlusModularPipelineFast(ModularPipelineTesterMixin, Modul
pipeline_class = QwenImageEditPlusModularPipeline
pipeline_blocks_class = QwenImageEditPlusAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-qwenimage-edit-plus-modular"
default_repo_id = "Qwen/Qwen-Image-Edit-2509"
# No `mask_image` yet.
params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image"])

View File

@@ -279,8 +279,6 @@ class TestSDXLModularPipelineFast(
pipeline_class = StableDiffusionXLModularPipeline
pipeline_blocks_class = StableDiffusionXLAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-sdxl-modular"
default_repo_id = "hf-internal-testing/tiny-sdxl-pipe"
params = frozenset(
[
"prompt",
@@ -328,7 +326,6 @@ class TestSDXLImg2ImgModularPipelineFast(
pipeline_class = StableDiffusionXLModularPipeline
pipeline_blocks_class = StableDiffusionXLAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-sdxl-modular"
default_repo_id = "hf-internal-testing/tiny-sdxl-pipe"
params = frozenset(
[
"prompt",
@@ -382,7 +379,6 @@ class SDXLInpaintingModularPipelineFastTests(
pipeline_class = StableDiffusionXLModularPipeline
pipeline_blocks_class = StableDiffusionXLAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-sdxl-modular"
default_repo_id = "hf-internal-testing/tiny-sdxl-pipe"
params = frozenset(
[
"prompt",

View File

@@ -37,8 +37,6 @@ class ModularPipelineTesterMixin:
optional_params = frozenset(["num_inference_steps", "num_images_per_prompt", "latents", "output_type"])
# this is modular specific: generator needs to be a intermediate input because it's mutable
intermediate_params = frozenset(["generator"])
# prompt is required for most pipeline, with exceptions like qwen-image layer
required_params = frozenset(["prompt"])
def get_generator(self, seed=0):
generator = torch.Generator("cpu").manual_seed(seed)
@@ -57,12 +55,6 @@ class ModularPipelineTesterMixin:
"You need to set the attribute `pretrained_model_name_or_path` in the child test class. See existing pipeline tests for reference."
)
@property
def default_repo_id(self) -> str:
raise NotImplementedError(
"You need to set the attribute `default_repo_id` in the child test class. See existing pipeline tests for reference."
)
@property
def pipeline_blocks_class(self) -> Union[Callable, ModularPipelineBlocks]:
raise NotImplementedError(
@@ -129,7 +121,6 @@ class ModularPipelineTesterMixin:
pipe = self.get_pipeline()
input_parameters = pipe.blocks.input_names
optional_parameters = pipe.default_call_parameters
required_parameters = pipe.blocks.required_inputs
def _check_for_parameters(parameters, expected_parameters, param_type):
remaining_parameters = {param for param in parameters if param not in expected_parameters}
@@ -139,98 +130,6 @@ class ModularPipelineTesterMixin:
_check_for_parameters(self.params, input_parameters, "input")
_check_for_parameters(self.optional_params, optional_parameters, "optional")
_check_for_parameters(self.required_params, required_parameters, "required")
def test_loading_from_default_repo(self):
if self.default_repo_id is None:
return
try:
pipe = ModularPipeline.from_pretrained(self.default_repo_id)
assert pipe.blocks.__class__ == self.pipeline_blocks_class
except Exception as e:
assert False, f"Failed to load pipeline from default repo: {e}"
def test_modular_inference(self):
# run the pipeline to get the base output for comparison
pipe = self.get_pipeline()
pipe.to(torch_device, torch.float32)
inputs = self.get_dummy_inputs()
standard_output = pipe(**inputs, output="images")
# create text, denoise, decoder (and optional vae encoder) nodes
blocks = self.pipeline_blocks_class()
assert "text_encoder" in blocks.sub_blocks, "`text_encoder` block is not present in the pipeline"
assert "denoise" in blocks.sub_blocks, "`denoise` block is not present in the pipeline"
assert "decode" in blocks.sub_blocks, "`decode` block is not present in the pipeline"
# manually set the components in the sub_pipe
# a hack to workaround the fact the default pipeline properties are often incorrect for testing cases,
# #e.g. vae_scale_factor is ususally not 8 because vae is configured to be smaller for testing
def manually_set_all_components(pipe: ModularPipeline, sub_pipe: ModularPipeline):
for n, comp in pipe.components.items():
setattr(sub_pipe, n, comp)
# Initialize all nodes
text_node = blocks.sub_blocks["text_encoder"].init_pipeline(self.pretrained_model_name_or_path)
text_node.load_components(torch_dtype=torch.float32)
text_node.to(torch_device)
manually_set_all_components(pipe, text_node)
denoise_node = blocks.sub_blocks["denoise"].init_pipeline(self.pretrained_model_name_or_path)
denoise_node.load_components(torch_dtype=torch.float32)
denoise_node.to(torch_device)
manually_set_all_components(pipe, denoise_node)
decoder_node = blocks.sub_blocks["decode"].init_pipeline(self.pretrained_model_name_or_path)
decoder_node.load_components(torch_dtype=torch.float32)
decoder_node.to(torch_device)
manually_set_all_components(pipe, decoder_node)
if "vae_encoder" in blocks.sub_blocks:
vae_encoder_node = blocks.sub_blocks["vae_encoder"].init_pipeline(self.pretrained_model_name_or_path)
vae_encoder_node.load_components(torch_dtype=torch.float32)
vae_encoder_node.to(torch_device)
manually_set_all_components(pipe, vae_encoder_node)
else:
vae_encoder_node = None
def filter_inputs(available: dict, expected_keys) -> dict:
return {k: v for k, v in available.items() if k in expected_keys}
# prepare inputs for each node
inputs = self.get_dummy_inputs()
# 1. Text encoder: takes from inputs
text_inputs = filter_inputs(inputs, text_node.blocks.input_names)
text_output = text_node(**text_inputs)
text_output_dict = text_output.get_by_kwargs("denoiser_input_fields")
# 2. VAE encoder (optional): takes from inputs + text_output
if vae_encoder_node is not None:
vae_available = {**inputs, **text_output_dict}
vae_encoder_inputs = filter_inputs(vae_available, vae_encoder_node.blocks.input_names)
vae_encoder_output = vae_encoder_node(**vae_encoder_inputs)
vae_output_dict = vae_encoder_output.values
else:
vae_output_dict = {}
# 3. Denoise: takes from inputs + text_output + vae_output
denoise_available = {**inputs, **text_output_dict, **vae_output_dict}
denoise_inputs = filter_inputs(denoise_available, denoise_node.blocks.input_names)
denoise_output = denoise_node(**denoise_inputs)
latents = denoise_output.latents
# 4. Decoder: takes from inputs + denoise_output
decode_available = {**inputs, "latents": latents}
decode_inputs = filter_inputs(decode_available, decoder_node.blocks.input_names)
modular_output = decoder_node(**decode_inputs).images
assert modular_output.shape == standard_output.shape, (
f"Modular output should have same shape as standard output {standard_output.shape}, but got {modular_output.shape}"
)
def test_inference_batch_consistent(self, batch_sizes=[2], batch_generator=True):
pipe = self.get_pipeline().to(torch_device)