u pup

does not need to specify the params:
fix more
2026-03-05 16:20:39 +08:00 · 2026-02-05 09:06:00 +00:00 · 2026-02-05 09:05:25 +00:00 · 2026-02-05 08:40:52 +00:00 · 2026-02-04 19:48:56 +00:00 · 2026-02-04 19:48:11 +00:00
36 changed files with 196 additions and 1810 deletions
--- a/docs/source/en/api/pipelines/z_image.md
+++ b/docs/source/en/api/pipelines/z_image.md
@@ -53,41 +53,6 @@ image = pipe(
 image.save("zimage_img2img.png")
 ```

-## Inpainting
-
-Use [`ZImageInpaintPipeline`] to inpaint specific regions of an image based on a text prompt and mask.
-
-```python
-import torch
-import numpy as np
-from PIL import Image
-from diffusers import ZImageInpaintPipeline
-from diffusers.utils import load_image
-
-pipe = ZImageInpaintPipeline.from_pretrained("Tongyi-MAI/Z-Image-Turbo", torch_dtype=torch.bfloat16)
-pipe.to("cuda")
-
-url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"
-init_image = load_image(url).resize((1024, 1024))
-
-# Create a mask (white = inpaint, black = preserve)
-mask = np.zeros((1024, 1024), dtype=np.uint8)
-mask[256:768, 256:768] = 255  # Inpaint center region
-mask_image = Image.fromarray(mask)
-
-prompt = "A beautiful lake with mountains in the background"
-image = pipe(
-    prompt,
-    image=init_image,
-    mask_image=mask_image,
-    strength=1.0,
-    num_inference_steps=9,
-    guidance_scale=0.0,
-    generator=torch.Generator("cuda").manual_seed(42),
-).images[0]
-image.save("zimage_inpaint.png")
-```
-
 ## ZImagePipeline

 [[autodoc]] ZImagePipeline
@@ -99,9 +64,3 @@ image.save("zimage_inpaint.png")
 [[autodoc]] ZImageImg2ImgPipeline
 	- all
 	- __call__
-
-## ZImageInpaintPipeline
-
-[[autodoc]] ZImageInpaintPipeline
-	- all
-	- __call__
--- a/docs/source/en/quantization/torchao.md
+++ b/docs/source/en/quantization/torchao.md
@@ -66,7 +66,7 @@ from diffusers import DiffusionPipeline, PipelineQuantizationConfig, TorchAoConf
 from torchao.quantization import Int4WeightOnlyConfig

 pipeline_quant_config = PipelineQuantizationConfig(
-    quant_mapping={"transformer": TorchAoConfig(Int4WeightOnlyConfig(group_size=128))}
+    quant_mapping={"transformer": TorchAoConfig(Int4WeightOnlyConfig(group_size=128)))}
 )
 pipeline = DiffusionPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-dev",
--- a/src/diffusers/init.py
+++ b/src/diffusers/init.py
@@ -696,7 +696,6 @@ else:
            "ZImageControlNetInpaintPipeline",
            "ZImageControlNetPipeline",
            "ZImageImg2ImgPipeline",
-            "ZImageInpaintPipeline",
            "ZImageOmniPipeline",
            "ZImagePipeline",
        ]
@@ -1429,7 +1428,6 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
            ZImageControlNetInpaintPipeline,
            ZImageControlNetPipeline,
            ZImageImg2ImgPipeline,
-            ZImageInpaintPipeline,
            ZImageOmniPipeline,
            ZImagePipeline,
        )
--- a/src/diffusers/models/auto_model.py
+++ b/src/diffusers/models/auto_model.py
@@ -31,128 +31,10 @@ class AutoModel(ConfigMixin):
    def __init__(self, *args, **kwargs):
        raise EnvironmentError(
            f"{self.__class__.__name__} is designed to be instantiated "
-            f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)`, "
-            f"`{self.__class__.__name__}.from_config(config)`, or "
+            f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)` or "
            f"`{self.__class__.__name__}.from_pipe(pipeline)` methods."
        )

-    @classmethod
-    def from_config(
-        cls, pretrained_model_name_or_path_or_dict: Optional[Union[str, os.PathLike, dict]] = None, **kwargs
-    ):
-        r"""
-        Instantiate a model from a config dictionary or a pretrained model configuration file with random weights (no
-        pretrained weights are loaded).
-
-        Parameters:
-            pretrained_model_name_or_path_or_dict (`str`, `os.PathLike`, or `dict`):
-                Can be either:
-
-                    - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model
-                      configuration hosted on the Hub.
-                    - A path to a *directory* (for example `./my_model_directory`) containing a model configuration
-                      file.
-                    - A config dictionary.
-
-            cache_dir (`Union[str, os.PathLike]`, *optional*):
-                Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
-                is not used.
-            force_download (`bool`, *optional*, defaults to `False`):
-                Whether or not to force the (re-)download of the model configuration, overriding the cached version if
-                it exists.
-            proxies (`Dict[str, str]`, *optional*):
-                A dictionary of proxy servers to use by protocol or endpoint.
-            local_files_only(`bool`, *optional*, defaults to `False`):
-                Whether to only load local model configuration files or not.
-            token (`str` or *bool*, *optional*):
-                The token to use as HTTP bearer authorization for remote files.
-            revision (`str`, *optional*, defaults to `"main"`):
-                The specific model version to use.
-            trust_remote_code (`bool`, *optional*, defaults to `False`):
-                Whether to trust remote code.
-            subfolder (`str`, *optional*, defaults to `""`):
-                The subfolder location of a model file within a larger model repository on the Hub or locally.
-
-        Returns:
-            A model object instantiated from the config with random weights.
-
-        Example:
-
-        ```py
-        from diffusers import AutoModel
-
-        model = AutoModel.from_config("stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="unet")
-        ```
-        """
-        subfolder = kwargs.pop("subfolder", None)
-        trust_remote_code = kwargs.pop("trust_remote_code", False)
-
-        hub_kwargs_names = [
-            "cache_dir",
-            "force_download",
-            "local_files_only",
-            "proxies",
-            "revision",
-            "token",
-        ]
-        hub_kwargs = {name: kwargs.pop(name, None) for name in hub_kwargs_names}
-
-        if pretrained_model_name_or_path_or_dict is None:
-            raise ValueError(
-                "Please provide a `pretrained_model_name_or_path_or_dict` as the first positional argument."
-            )
-
-        if isinstance(pretrained_model_name_or_path_or_dict, (str, os.PathLike)):
-            pretrained_model_name_or_path = pretrained_model_name_or_path_or_dict
-            config = cls.load_config(pretrained_model_name_or_path, subfolder=subfolder, **hub_kwargs)
-        else:
-            config = pretrained_model_name_or_path_or_dict
-            pretrained_model_name_or_path = config.get("_name_or_path", None)
-
-        has_remote_code = "auto_map" in config and cls.__name__ in config["auto_map"]
-        trust_remote_code = resolve_trust_remote_code(
-            trust_remote_code, pretrained_model_name_or_path, has_remote_code
-        )
-
-        if has_remote_code and trust_remote_code:
-            class_ref = config["auto_map"][cls.__name__]
-            module_file, class_name = class_ref.split(".")
-            module_file = module_file + ".py"
-            model_cls = get_class_from_dynamic_module(
-                pretrained_model_name_or_path,
-                subfolder=subfolder,
-                module_file=module_file,
-                class_name=class_name,
-                **hub_kwargs,
-            )
-        else:
-            if "_class_name" in config:
-                class_name = config["_class_name"]
-                library = "diffusers"
-            elif "model_type" in config:
-                class_name = "AutoModel"
-                library = "transformers"
-            else:
-                raise ValueError(
-                    f"Couldn't find a model class associated with the config: {config}. Make sure the config "
-                    "contains a `_class_name` or `model_type` key."
-                )
-
-            from ..pipelines.pipeline_loading_utils import ALL_IMPORTABLE_CLASSES, get_class_obj_and_candidates
-
-            model_cls, _ = get_class_obj_and_candidates(
-                library_name=library,
-                class_name=class_name,
-                importable_classes=ALL_IMPORTABLE_CLASSES,
-                pipelines=None,
-                is_pipeline_module=False,
-            )
-
-        if model_cls is None:
-            raise ValueError(f"AutoModel can't find a model linked to {class_name}.")
-
-        return model_cls.from_config(config, **kwargs)
-
    @classmethod
    @validate_hf_hub_args
    def from_pretrained(cls, pretrained_model_or_path: Optional[Union[str, os.PathLike]] = None, **kwargs):
--- a/src/diffusers/models/transformers/transformer_bria_fibo.py
+++ b/src/diffusers/models/transformers/transformer_bria_fibo.py
@@ -125,9 +125,9 @@ class BriaFiboAttnProcessor:
            encoder_hidden_states, hidden_states = hidden_states.split_with_sizes(
                [encoder_hidden_states.shape[1], hidden_states.shape[1] - encoder_hidden_states.shape[1]], dim=1
            )
-            hidden_states = attn.to_out[0](hidden_states.contiguous())
+            hidden_states = attn.to_out[0](hidden_states)
            hidden_states = attn.to_out[1](hidden_states)
-            encoder_hidden_states = attn.to_add_out(encoder_hidden_states.contiguous())
+            encoder_hidden_states = attn.to_add_out(encoder_hidden_states)

            return hidden_states, encoder_hidden_states
        else:
--- a/src/diffusers/models/transformers/transformer_flux.py
+++ b/src/diffusers/models/transformers/transformer_flux.py
@@ -130,9 +130,9 @@ class FluxAttnProcessor:
            encoder_hidden_states, hidden_states = hidden_states.split_with_sizes(
                [encoder_hidden_states.shape[1], hidden_states.shape[1] - encoder_hidden_states.shape[1]], dim=1
            )
-            hidden_states = attn.to_out[0](hidden_states.contiguous())
+            hidden_states = attn.to_out[0](hidden_states)
            hidden_states = attn.to_out[1](hidden_states)
-            encoder_hidden_states = attn.to_add_out(encoder_hidden_states.contiguous())
+            encoder_hidden_states = attn.to_add_out(encoder_hidden_states)

            return hidden_states, encoder_hidden_states
        else:
--- a/src/diffusers/models/transformers/transformer_qwenimage.py
+++ b/src/diffusers/models/transformers/transformer_qwenimage.py
@@ -561,11 +561,11 @@ class QwenDoubleStreamAttnProcessor2_0:
        img_attn_output = joint_hidden_states[:, seq_txt:, :]  # Image part

        # Apply output projections
-        img_attn_output = attn.to_out[0](img_attn_output.contiguous())
+        img_attn_output = attn.to_out[0](img_attn_output)
        if len(attn.to_out) > 1:
            img_attn_output = attn.to_out[1](img_attn_output)  # dropout

-        txt_attn_output = attn.to_add_out(txt_attn_output.contiguous())
+        txt_attn_output = attn.to_add_out(txt_attn_output)

        return img_attn_output, txt_attn_output

--- a/src/diffusers/modular_pipelines/flux/encoders.py
+++ b/src/diffusers/modular_pipelines/flux/encoders.py
@@ -302,7 +302,7 @@ class FluxTextEncoderStep(ModularPipelineBlocks):
    @property
    def inputs(self) -> List[InputParam]:
        return [
-            InputParam("prompt"),
+            InputParam("prompt", required=True),
            InputParam("prompt_2"),
            InputParam("max_sequence_length", type_hint=int, default=512, required=False),
            InputParam("joint_attention_kwargs"),
--- a/src/diffusers/modular_pipelines/flux2/encoders.py
+++ b/src/diffusers/modular_pipelines/flux2/encoders.py
@@ -80,7 +80,7 @@ class Flux2TextEncoderStep(ModularPipelineBlocks):
    @property
    def inputs(self) -> List[InputParam]:
        return [
-            InputParam("prompt"),
+            InputParam("prompt", required=True),
            InputParam("max_sequence_length", type_hint=int, default=512, required=False),
            InputParam("text_encoder_out_layers", type_hint=Tuple[int], default=(10, 20, 30), required=False),
        ]
@@ -99,7 +99,7 @@ class Flux2TextEncoderStep(ModularPipelineBlocks):
    @staticmethod
    def check_inputs(block_state):
        prompt = block_state.prompt
-        if prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
+        if not isinstance(prompt, str) and not isinstance(prompt, list):
            raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")

    @staticmethod
@@ -193,7 +193,7 @@ class Flux2RemoteTextEncoderStep(ModularPipelineBlocks):
    @property
    def inputs(self) -> List[InputParam]:
        return [
-            InputParam("prompt"),
+            InputParam("prompt", required=True),
        ]

    @property
@@ -210,7 +210,7 @@ class Flux2RemoteTextEncoderStep(ModularPipelineBlocks):
    @staticmethod
    def check_inputs(block_state):
        prompt = block_state.prompt
-        if prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
+        if not isinstance(prompt, str) and not isinstance(prompt, list):
            raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(block_state.prompt)}")

    @torch.no_grad()
@@ -270,7 +270,7 @@ class Flux2KleinTextEncoderStep(ModularPipelineBlocks):
    @property
    def inputs(self) -> List[InputParam]:
        return [
-            InputParam("prompt"),
+            InputParam("prompt", required=True),
            InputParam("max_sequence_length", type_hint=int, default=512, required=False),
            InputParam("text_encoder_out_layers", type_hint=Tuple[int], default=(9, 18, 27), required=False),
        ]
@@ -290,7 +290,7 @@ class Flux2KleinTextEncoderStep(ModularPipelineBlocks):
    def check_inputs(block_state):
        prompt = block_state.prompt

-        if prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
+        if not isinstance(prompt, str) and not isinstance(prompt, list):
            raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")

    @staticmethod
@@ -405,7 +405,7 @@ class Flux2KleinBaseTextEncoderStep(ModularPipelineBlocks):
    @property
    def inputs(self) -> List[InputParam]:
        return [
-            InputParam("prompt"),
+            InputParam("prompt", required=True),
            InputParam("max_sequence_length", type_hint=int, default=512, required=False),
            InputParam("text_encoder_out_layers", type_hint=Tuple[int], default=(9, 18, 27), required=False),
        ]
@@ -431,7 +431,7 @@ class Flux2KleinBaseTextEncoderStep(ModularPipelineBlocks):
    def check_inputs(block_state):
        prompt = block_state.prompt

-        if prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
+        if not isinstance(prompt, str) and not isinstance(prompt, list):
            raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")

    @staticmethod
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py
@@ -56,52 +56,7 @@ logger = logging.get_logger(__name__)


 # ====================
-# 1. TEXT ENCODER
-# ====================
-
-
-# auto_docstring
-class QwenImageAutoTextEncoderStep(AutoPipelineBlocks):
-    """
-    Text encoder step that encodes the text prompt into a text embedding. This is an auto pipeline block.
-
-      Components:
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use tokenizer (`Qwen2Tokenizer`):
-          The tokenizer to use guider (`ClassifierFreeGuidance`)
-
-      Inputs:
-          prompt (`str`, *optional*):
-              The prompt or prompts to guide image generation.
-          negative_prompt (`str`, *optional*):
-              The prompt or prompts not to guide the image generation.
-          max_sequence_length (`int`, *optional*, defaults to 1024):
-              Maximum sequence length for prompt encoding.
-
-      Outputs:
-          prompt_embeds (`Tensor`):
-              The prompt embeddings.
-          prompt_embeds_mask (`Tensor`):
-              The encoder attention mask.
-          negative_prompt_embeds (`Tensor`):
-              The negative prompt embeddings.
-          negative_prompt_embeds_mask (`Tensor`):
-              The negative prompt embeddings mask.
-    """
-
-    model_name = "qwenimage"
-    block_classes = [QwenImageTextEncoderStep()]
-    block_names = ["text_encoder"]
-    block_trigger_inputs = ["prompt"]
-
-    @property
-    def description(self) -> str:
-        return "Text encoder step that encodes the text prompt into a text embedding. This is an auto pipeline block."
-        " - `QwenImageTextEncoderStep` (text_encoder) is used when `prompt` is provided."
-        " - if `prompt` is not provided, step will be skipped."
-
-
-# ====================
-# 2. VAE ENCODER
+# 1. VAE ENCODER
 # ====================


@@ -249,7 +204,7 @@ class QwenImageOptionalControlNetVaeEncoderStep(AutoPipelineBlocks):


 # ====================
-# 3. DENOISE (input -> prepare_latents -> set_timesteps -> prepare_rope_inputs -> denoise -> after_denoise)
+# 2. DENOISE (input -> prepare_latents -> set_timesteps -> prepare_rope_inputs -> denoise -> after_denoise)
 # ====================


@@ -1011,7 +966,7 @@ class QwenImageAutoCoreDenoiseStep(ConditionalPipelineBlocks):


 # ====================
-# 4. DECODE
+# 3. DECODE
 # ====================


@@ -1096,11 +1051,11 @@ class QwenImageAutoDecodeStep(AutoPipelineBlocks):


 # ====================
-# 5. AUTO BLOCKS & PRESETS
+# 4. AUTO BLOCKS & PRESETS
 # ====================
 AUTO_BLOCKS = InsertableDict(
    [
-        ("text_encoder", QwenImageAutoTextEncoderStep()),
+        ("text_encoder", QwenImageTextEncoderStep()),
        ("vae_encoder", QwenImageAutoVaeEncoderStep()),
        ("controlnet_vae_encoder", QwenImageOptionalControlNetVaeEncoderStep()),
        ("denoise", QwenImageAutoCoreDenoiseStep()),
--- a/src/diffusers/modular_pipelines/stable_diffusion_xl/encoders.py
+++ b/src/diffusers/modular_pipelines/stable_diffusion_xl/encoders.py
@@ -244,7 +244,7 @@ class StableDiffusionXLTextEncoderStep(ModularPipelineBlocks):
    @property
    def inputs(self) -> List[InputParam]:
        return [
-            InputParam("prompt"),
+            InputParam("prompt", required=True),
            InputParam("prompt_2"),
            InputParam("negative_prompt"),
            InputParam("negative_prompt_2"),
--- a/src/diffusers/modular_pipelines/wan/encoders.py
+++ b/src/diffusers/modular_pipelines/wan/encoders.py
@@ -179,7 +179,7 @@ class WanTextEncoderStep(ModularPipelineBlocks):
    @property
    def inputs(self) -> List[InputParam]:
        return [
-            InputParam("prompt"),
+            InputParam("prompt", required=True),
            InputParam("negative_prompt"),
            InputParam("max_sequence_length", default=512),
        ]
--- a/src/diffusers/modular_pipelines/z_image/encoders.py
+++ b/src/diffusers/modular_pipelines/z_image/encoders.py
@@ -149,7 +149,7 @@ class ZImageTextEncoderStep(ModularPipelineBlocks):
    @property
    def inputs(self) -> List[InputParam]:
        return [
-            InputParam("prompt"),
+            InputParam("prompt", required=True),
            InputParam("negative_prompt"),
            InputParam("max_sequence_length", default=512),
        ]
--- a/src/diffusers/pipelines/init.py
+++ b/src/diffusers/pipelines/init.py
@@ -410,12 +410,11 @@ else:
        "Kandinsky5I2IPipeline",
    ]
    _import_structure["z_image"] = [
-        "ZImageControlNetInpaintPipeline",
-        "ZImageControlNetPipeline",
        "ZImageImg2ImgPipeline",
-        "ZImageInpaintPipeline",
-        "ZImageOmniPipeline",
        "ZImagePipeline",
+        "ZImageControlNetPipeline",
+        "ZImageControlNetInpaintPipeline",
+        "ZImageOmniPipeline",
    ]
    _import_structure["skyreels_v2"] = [
        "SkyReelsV2DiffusionForcingPipeline",
@@ -871,7 +870,6 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
            ZImageControlNetInpaintPipeline,
            ZImageControlNetPipeline,
            ZImageImg2ImgPipeline,
-            ZImageInpaintPipeline,
            ZImageOmniPipeline,
            ZImagePipeline,
        )
--- a/src/diffusers/pipelines/auto_pipeline.py
+++ b/src/diffusers/pipelines/auto_pipeline.py
@@ -127,7 +127,6 @@ from .z_image import (
    ZImageControlNetInpaintPipeline,
    ZImageControlNetPipeline,
    ZImageImg2ImgPipeline,
-    ZImageInpaintPipeline,
    ZImageOmniPipeline,
    ZImagePipeline,
 )
@@ -236,7 +235,6 @@ AUTO_INPAINT_PIPELINES_MAPPING = OrderedDict(
        ("stable-diffusion-pag", StableDiffusionPAGInpaintPipeline),
        ("qwenimage", QwenImageInpaintPipeline),
        ("qwenimage-edit", QwenImageEditInpaintPipeline),
-        ("z-image", ZImageInpaintPipeline),
    ]
 )

--- a/src/diffusers/pipelines/z_image/init.py
+++ b/src/diffusers/pipelines/z_image/init.py
@@ -26,7 +26,6 @@ else:
    _import_structure["pipeline_z_image_controlnet"] = ["ZImageControlNetPipeline"]
    _import_structure["pipeline_z_image_controlnet_inpaint"] = ["ZImageControlNetInpaintPipeline"]
    _import_structure["pipeline_z_image_img2img"] = ["ZImageImg2ImgPipeline"]
-    _import_structure["pipeline_z_image_inpaint"] = ["ZImageInpaintPipeline"]
    _import_structure["pipeline_z_image_omni"] = ["ZImageOmniPipeline"]


@@ -43,7 +42,6 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
        from .pipeline_z_image_controlnet import ZImageControlNetPipeline
        from .pipeline_z_image_controlnet_inpaint import ZImageControlNetInpaintPipeline
        from .pipeline_z_image_img2img import ZImageImg2ImgPipeline
-        from .pipeline_z_image_inpaint import ZImageInpaintPipeline
        from .pipeline_z_image_omni import ZImageOmniPipeline
 else:
    import sys
--- a/src/diffusers/pipelines/z_image/pipeline_z_image_inpaint.py
+++ b/src/diffusers/pipelines/z_image/pipeline_z_image_inpaint.py
@@ -1,932 +0,0 @@
-# Copyright 2025 Alibaba Z-Image Team and The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import inspect
-from typing import Any, Callable, Dict, List, Optional, Union
-
-import torch
-from transformers import AutoTokenizer, PreTrainedModel
-
-from ...image_processor import PipelineImageInput, VaeImageProcessor
-from ...loaders import FromSingleFileMixin, ZImageLoraLoaderMixin
-from ...models.autoencoders import AutoencoderKL
-from ...models.transformers import ZImageTransformer2DModel
-from ...pipelines.pipeline_utils import DiffusionPipeline
-from ...schedulers import FlowMatchEulerDiscreteScheduler
-from ...utils import is_torch_xla_available, logging, replace_example_docstring
-from ...utils.torch_utils import randn_tensor
-from .pipeline_output import ZImagePipelineOutput
-
-
-if is_torch_xla_available():
-    import torch_xla.core.xla_model as xm
-
-    XLA_AVAILABLE = True
-else:
-    XLA_AVAILABLE = False
-
-logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
-
-EXAMPLE_DOC_STRING = """
-    Examples:
-        ```py
-        >>> import torch
-        >>> from diffusers import ZImageInpaintPipeline
-        >>> from diffusers.utils import load_image
-
-        >>> pipe = ZImageInpaintPipeline.from_pretrained("Tongyi-MAI/Z-Image-Turbo", torch_dtype=torch.bfloat16)
-        >>> pipe.to("cuda")
-
-        >>> url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"
-        >>> init_image = load_image(url).resize((1024, 1024))
-
-        >>> # Create a mask (white = inpaint, black = preserve)
-        >>> import numpy as np
-        >>> from PIL import Image
-
-        >>> mask = np.zeros((1024, 1024), dtype=np.uint8)
-        >>> mask[256:768, 256:768] = 255  # Inpaint center region
-        >>> mask_image = Image.fromarray(mask)
-
-        >>> prompt = "A beautiful lake with mountains in the background"
-        >>> image = pipe(
-        ...     prompt,
-        ...     image=init_image,
-        ...     mask_image=mask_image,
-        ...     strength=1.0,
-        ...     num_inference_steps=9,
-        ...     guidance_scale=0.0,
-        ...     generator=torch.Generator("cuda").manual_seed(42),
-        ... ).images[0]
-        >>> image.save("zimage_inpaint.png")
-        ```
-"""
-
-
-# Copied from diffusers.pipelines.flux.pipeline_flux.calculate_shift
-def calculate_shift(
-    image_seq_len,
-    base_seq_len: int = 256,
-    max_seq_len: int = 4096,
-    base_shift: float = 0.5,
-    max_shift: float = 1.15,
-):
-    m = (max_shift - base_shift) / (max_seq_len - base_seq_len)
-    b = base_shift - m * base_seq_len
-    mu = image_seq_len * m + b
-    return mu
-
-
-# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.retrieve_latents
-def retrieve_latents(
-    encoder_output: torch.Tensor, generator: Optional[torch.Generator] = None, sample_mode: str = "sample"
-):
-    if hasattr(encoder_output, "latent_dist") and sample_mode == "sample":
-        return encoder_output.latent_dist.sample(generator)
-    elif hasattr(encoder_output, "latent_dist") and sample_mode == "argmax":
-        return encoder_output.latent_dist.mode()
-    elif hasattr(encoder_output, "latents"):
-        return encoder_output.latents
-    else:
-        raise AttributeError("Could not access latents of provided encoder_output")
-
-
-# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.retrieve_timesteps
-def retrieve_timesteps(
-    scheduler,
-    num_inference_steps: Optional[int] = None,
-    device: Optional[Union[str, torch.device]] = None,
-    timesteps: Optional[List[int]] = None,
-    sigmas: Optional[List[float]] = None,
-    **kwargs,
-):
-    r"""
-    Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
-    custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
-
-    Args:
-        scheduler (`SchedulerMixin`):
-            The scheduler to get timesteps from.
-        num_inference_steps (`int`):
-            The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
-            must be `None`.
-        device (`str` or `torch.device`, *optional*):
-            The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
-        timesteps (`List[int]`, *optional*):
-            Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
-            `num_inference_steps` and `sigmas` must be `None`.
-        sigmas (`List[float]`, *optional*):
-            Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
-            `num_inference_steps` and `timesteps` must be `None`.
-
-    Returns:
-        `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
-        second element is the number of inference steps.
-    """
-    if timesteps is not None and sigmas is not None:
-        raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
-    if timesteps is not None:
-        accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
-        if not accepts_timesteps:
-            raise ValueError(
-                f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
-                f" timestep schedules. Please check whether you are using the correct scheduler."
-            )
-        scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
-        timesteps = scheduler.timesteps
-        num_inference_steps = len(timesteps)
-    elif sigmas is not None:
-        accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
-        if not accept_sigmas:
-            raise ValueError(
-                f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
-                f" sigmas schedules. Please check whether you are using the correct scheduler."
-            )
-        scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
-        timesteps = scheduler.timesteps
-        num_inference_steps = len(timesteps)
-    else:
-        scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
-        timesteps = scheduler.timesteps
-    return timesteps, num_inference_steps
-
-
-class ZImageInpaintPipeline(DiffusionPipeline, ZImageLoraLoaderMixin, FromSingleFileMixin):
-    r"""
-    The ZImage pipeline for inpainting.
-
-    Args:
-        scheduler ([`FlowMatchEulerDiscreteScheduler`]):
-            A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
-        vae ([`AutoencoderKL`]):
-            Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
-        text_encoder ([`PreTrainedModel`]):
-            A text encoder model to encode text prompts.
-        tokenizer ([`AutoTokenizer`]):
-            A tokenizer to tokenize text prompts.
-        transformer ([`ZImageTransformer2DModel`]):
-            A ZImage transformer model to denoise the encoded image latents.
-    """
-
-    model_cpu_offload_seq = "text_encoder->transformer->vae"
-    _optional_components = []
-    _callback_tensor_inputs = ["latents", "prompt_embeds", "mask", "masked_image_latents"]
-
-    def __init__(
-        self,
-        scheduler: FlowMatchEulerDiscreteScheduler,
-        vae: AutoencoderKL,
-        text_encoder: PreTrainedModel,
-        tokenizer: AutoTokenizer,
-        transformer: ZImageTransformer2DModel,
-    ):
-        super().__init__()
-
-        self.register_modules(
-            vae=vae,
-            text_encoder=text_encoder,
-            tokenizer=tokenizer,
-            scheduler=scheduler,
-            transformer=transformer,
-        )
-        self.vae_scale_factor = (
-            2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
-        )
-        self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
-        self.mask_processor = VaeImageProcessor(
-            vae_scale_factor=self.vae_scale_factor * 2,
-            do_normalize=False,
-            do_binarize=True,
-            do_convert_grayscale=True,
-        )
-
-    # Copied from diffusers.pipelines.z_image.pipeline_z_image.ZImagePipeline.encode_prompt
-    def encode_prompt(
-        self,
-        prompt: Union[str, List[str]],
-        device: Optional[torch.device] = None,
-        do_classifier_free_guidance: bool = True,
-        negative_prompt: Optional[Union[str, List[str]]] = None,
-        prompt_embeds: Optional[List[torch.FloatTensor]] = None,
-        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
-        max_sequence_length: int = 512,
-    ):
-        prompt = [prompt] if isinstance(prompt, str) else prompt
-        prompt_embeds = self._encode_prompt(
-            prompt=prompt,
-            device=device,
-            prompt_embeds=prompt_embeds,
-            max_sequence_length=max_sequence_length,
-        )
-
-        if do_classifier_free_guidance:
-            if negative_prompt is None:
-                negative_prompt = ["" for _ in prompt]
-            else:
-                negative_prompt = [negative_prompt] if isinstance(negative_prompt, str) else negative_prompt
-            assert len(prompt) == len(negative_prompt)
-            negative_prompt_embeds = self._encode_prompt(
-                prompt=negative_prompt,
-                device=device,
-                prompt_embeds=negative_prompt_embeds,
-                max_sequence_length=max_sequence_length,
-            )
-        else:
-            negative_prompt_embeds = []
-        return prompt_embeds, negative_prompt_embeds
-
-    # Copied from diffusers.pipelines.z_image.pipeline_z_image.ZImagePipeline._encode_prompt
-    def _encode_prompt(
-        self,
-        prompt: Union[str, List[str]],
-        device: Optional[torch.device] = None,
-        prompt_embeds: Optional[List[torch.FloatTensor]] = None,
-        max_sequence_length: int = 512,
-    ) -> List[torch.FloatTensor]:
-        device = device or self._execution_device
-
-        if prompt_embeds is not None:
-            return prompt_embeds
-
-        if isinstance(prompt, str):
-            prompt = [prompt]
-
-        for i, prompt_item in enumerate(prompt):
-            messages = [
-                {"role": "user", "content": prompt_item},
-            ]
-            prompt_item = self.tokenizer.apply_chat_template(
-                messages,
-                tokenize=False,
-                add_generation_prompt=True,
-                enable_thinking=True,
-            )
-            prompt[i] = prompt_item
-
-        text_inputs = self.tokenizer(
-            prompt,
-            padding="max_length",
-            max_length=max_sequence_length,
-            truncation=True,
-            return_tensors="pt",
-        )
-
-        text_input_ids = text_inputs.input_ids.to(device)
-        prompt_masks = text_inputs.attention_mask.to(device).bool()
-
-        prompt_embeds = self.text_encoder(
-            input_ids=text_input_ids,
-            attention_mask=prompt_masks,
-            output_hidden_states=True,
-        ).hidden_states[-2]
-
-        embeddings_list = []
-
-        for i in range(len(prompt_embeds)):
-            embeddings_list.append(prompt_embeds[i][prompt_masks[i]])
-
-        return embeddings_list
-
-    # Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3_img2img.StableDiffusion3Img2ImgPipeline.get_timesteps
-    def get_timesteps(self, num_inference_steps, strength, device):
-        # get the original timestep using init_timestep
-        init_timestep = min(num_inference_steps * strength, num_inference_steps)
-
-        t_start = int(max(num_inference_steps - init_timestep, 0))
-        timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
-        if hasattr(self.scheduler, "set_begin_index"):
-            self.scheduler.set_begin_index(t_start * self.scheduler.order)
-
-        return timesteps, num_inference_steps - t_start
-
-    def prepare_mask_latents(
-        self,
-        mask,
-        masked_image,
-        batch_size,
-        height,
-        width,
-        dtype,
-        device,
-        generator,
-    ):
-        """Prepare mask and masked image latents for inpainting.
-
-        Args:
-            mask: Binary mask tensor where 1 = inpaint region, 0 = preserve region.
-            masked_image: Original image with masked regions zeroed out.
-            batch_size: Number of images to generate.
-            height: Output image height.
-            width: Output image width.
-            dtype: Data type for the tensors.
-            device: Device to place tensors on.
-            generator: Random generator for reproducibility.
-
-        Returns:
-            Tuple of (mask, masked_image_latents) prepared for the denoising loop.
-        """
-        # Calculate latent dimensions
-        latent_height = 2 * (int(height) // (self.vae_scale_factor * 2))
-        latent_width = 2 * (int(width) // (self.vae_scale_factor * 2))
-
-        # Resize mask to latent dimensions
-        mask = torch.nn.functional.interpolate(mask, size=(latent_height, latent_width), mode="nearest")
-        mask = mask.to(device=device, dtype=dtype)
-
-        # Encode masked image to latents
-        masked_image = masked_image.to(device=device, dtype=dtype)
-        if isinstance(generator, list):
-            masked_image_latents = [
-                retrieve_latents(self.vae.encode(masked_image[i : i + 1]), generator=generator[i])
-                for i in range(masked_image.shape[0])
-            ]
-            masked_image_latents = torch.cat(masked_image_latents, dim=0)
-        else:
-            masked_image_latents = retrieve_latents(self.vae.encode(masked_image), generator=generator)
-
-        # Apply VAE scaling
-        masked_image_latents = (masked_image_latents - self.vae.config.shift_factor) * self.vae.config.scaling_factor
-
-        # Expand for batch size
-        if mask.shape[0] < batch_size:
-            if not batch_size % mask.shape[0] == 0:
-                raise ValueError(
-                    "The passed mask and the required batch size don't match. Masks are supposed to be duplicated to"
-                    f" a total batch size of {batch_size}, but {mask.shape[0]} masks were passed. Make sure the number"
-                    " of masks that you pass is divisible by the total requested batch size."
-                )
-            mask = mask.repeat(batch_size // mask.shape[0], 1, 1, 1)
-        if masked_image_latents.shape[0] < batch_size:
-            if not batch_size % masked_image_latents.shape[0] == 0:
-                raise ValueError(
-                    "The passed images and the required batch size don't match. Images are supposed to be duplicated"
-                    f" to a total batch size of {batch_size}, but {masked_image_latents.shape[0]} images were passed."
-                    " Make sure the number of images that you pass is divisible by the total requested batch size."
-                )
-            masked_image_latents = masked_image_latents.repeat(batch_size // masked_image_latents.shape[0], 1, 1, 1)
-
-        return mask, masked_image_latents
-
-    def prepare_latents(
-        self,
-        image,
-        timestep,
-        batch_size,
-        num_channels_latents,
-        height,
-        width,
-        dtype,
-        device,
-        generator,
-        latents=None,
-    ):
-        """Prepare latents for inpainting, returning noise and image_latents for blending.
-
-        Returns:
-            Tuple of (latents, noise, image_latents) where:
-            - latents: Noised image latents for denoising
-            - noise: The noise tensor used for blending
-            - image_latents: Clean image latents for blending
-        """
-        height = 2 * (int(height) // (self.vae_scale_factor * 2))
-        width = 2 * (int(width) // (self.vae_scale_factor * 2))
-
-        shape = (batch_size, num_channels_latents, height, width)
-
-        if latents is not None:
-            # Generate noise for blending even if latents are provided
-            noise = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
-            # Encode image for blending
-            image = image.to(device=device, dtype=dtype)
-            if isinstance(generator, list):
-                image_latents = [
-                    retrieve_latents(self.vae.encode(image[i : i + 1]), generator=generator[i])
-                    for i in range(image.shape[0])
-                ]
-                image_latents = torch.cat(image_latents, dim=0)
-            else:
-                image_latents = retrieve_latents(self.vae.encode(image), generator=generator)
-            image_latents = (image_latents - self.vae.config.shift_factor) * self.vae.config.scaling_factor
-            if batch_size > image_latents.shape[0] and batch_size % image_latents.shape[0] == 0:
-                image_latents = torch.cat([image_latents] * (batch_size // image_latents.shape[0]), dim=0)
-            return latents.to(device=device, dtype=dtype), noise, image_latents
-
-        # Encode the input image
-        image = image.to(device=device, dtype=dtype)
-        if image.shape[1] != num_channels_latents:
-            if isinstance(generator, list):
-                image_latents = [
-                    retrieve_latents(self.vae.encode(image[i : i + 1]), generator=generator[i])
-                    for i in range(image.shape[0])
-                ]
-                image_latents = torch.cat(image_latents, dim=0)
-            else:
-                image_latents = retrieve_latents(self.vae.encode(image), generator=generator)
-
-            # Apply scaling (inverse of decoding: decode does latents/scaling_factor + shift_factor)
-            image_latents = (image_latents - self.vae.config.shift_factor) * self.vae.config.scaling_factor
-        else:
-            image_latents = image
-
-        # Handle batch size expansion
-        if batch_size > image_latents.shape[0] and batch_size % image_latents.shape[0] == 0:
-            additional_image_per_prompt = batch_size // image_latents.shape[0]
-            image_latents = torch.cat([image_latents] * additional_image_per_prompt, dim=0)
-        elif batch_size > image_latents.shape[0] and batch_size % image_latents.shape[0] != 0:
-            raise ValueError(
-                f"Cannot duplicate `image` of batch size {image_latents.shape[0]} to {batch_size} text prompts."
-            )
-
-        # Generate noise for both initial noising and later blending
-        noise = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
-
-        # Add noise using flow matching scale_noise
-        latents = self.scheduler.scale_noise(image_latents, timestep, noise)
-
-        return latents, noise, image_latents
-
-    @property
-    def guidance_scale(self):
-        return self._guidance_scale
-
-    @property
-    def do_classifier_free_guidance(self):
-        return self._guidance_scale > 1
-
-    @property
-    def joint_attention_kwargs(self):
-        return self._joint_attention_kwargs
-
-    @property
-    def num_timesteps(self):
-        return self._num_timesteps
-
-    @property
-    def interrupt(self):
-        return self._interrupt
-
-    def check_inputs(
-        self,
-        prompt,
-        image,
-        mask_image,
-        strength,
-        height,
-        width,
-        output_type,
-        negative_prompt=None,
-        prompt_embeds=None,
-        negative_prompt_embeds=None,
-        callback_on_step_end_tensor_inputs=None,
-    ):
-        if strength < 0 or strength > 1:
-            raise ValueError(f"The value of strength should be in [0.0, 1.0] but is {strength}")
-
-        if callback_on_step_end_tensor_inputs is not None and not all(
-            k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs
-        ):
-            raise ValueError(
-                f"`callback_on_step_end_tensor_inputs` has to be in {self._callback_tensor_inputs}, but found {[k for k in callback_on_step_end_tensor_inputs if k not in self._callback_tensor_inputs]}"
-            )
-
-        if prompt is not None and prompt_embeds is not None:
-            raise ValueError(
-                f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
-                " only forward one of the two."
-            )
-        elif prompt is None and prompt_embeds is None:
-            raise ValueError(
-                "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined."
-            )
-        elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
-            raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
-
-        if negative_prompt is not None and negative_prompt_embeds is not None:
-            raise ValueError(
-                f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:"
-                f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
-            )
-
-        if image is None:
-            raise ValueError("`image` input cannot be undefined for inpainting.")
-
-        if mask_image is None:
-            raise ValueError("`mask_image` input cannot be undefined for inpainting.")
-
-        if output_type not in ["latent", "pil", "np", "pt"]:
-            raise ValueError(f"`output_type` must be one of 'latent', 'pil', 'np', or 'pt', but got {output_type}")
-
-    @torch.no_grad()
-    @replace_example_docstring(EXAMPLE_DOC_STRING)
-    def __call__(
-        self,
-        prompt: Union[str, List[str]] = None,
-        image: PipelineImageInput = None,
-        mask_image: PipelineImageInput = None,
-        masked_image_latents: Optional[torch.FloatTensor] = None,
-        strength: float = 1.0,
-        height: Optional[int] = None,
-        width: Optional[int] = None,
-        num_inference_steps: int = 50,
-        sigmas: Optional[List[float]] = None,
-        guidance_scale: float = 5.0,
-        cfg_normalization: bool = False,
-        cfg_truncation: float = 1.0,
-        negative_prompt: Optional[Union[str, List[str]]] = None,
-        num_images_per_prompt: Optional[int] = 1,
-        generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
-        latents: Optional[torch.FloatTensor] = None,
-        prompt_embeds: Optional[List[torch.FloatTensor]] = None,
-        negative_prompt_embeds: Optional[List[torch.FloatTensor]] = None,
-        output_type: Optional[str] = "pil",
-        return_dict: bool = True,
-        joint_attention_kwargs: Optional[Dict[str, Any]] = None,
-        callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
-        callback_on_step_end_tensor_inputs: List[str] = ["latents"],
-        max_sequence_length: int = 512,
-    ):
-        r"""
-        Function invoked when calling the pipeline for inpainting.
-
-        Args:
-            prompt (`str` or `List[str]`, *optional*):
-                The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
-                instead.
-            image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
-                `Image`, numpy array or tensor representing an image batch to be used as the starting point. For both
-                numpy array and pytorch tensor, the expected value range is between `[0, 1]`. If it's a tensor or a
-                list of tensors, the expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or
-                a list of arrays, the expected shape should be `(B, H, W, C)` or `(H, W, C)`.
-            mask_image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
-                `Image`, numpy array or tensor representing a mask image for inpainting. White pixels (value 1) in the
-                mask will be inpainted, black pixels (value 0) will be preserved from the original image.
-            masked_image_latents (`torch.FloatTensor`, *optional*):
-                Pre-encoded masked image latents. If provided, the masked image encoding step will be skipped.
-            strength (`float`, *optional*, defaults to 1.0):
-                Indicates extent to transform the reference `image`. Must be between 0 and 1. `image` is used as a
-                starting point and more noise is added the higher the `strength`. The number of denoising steps depends
-                on the amount of noise initially added. When `strength` is 1, added noise is maximum and the denoising
-                process runs for the full number of iterations specified in `num_inference_steps`. A value of 1
-                essentially ignores `image` in the masked region.
-            height (`int`, *optional*, defaults to 1024):
-                The height in pixels of the generated image. If not provided, uses the input image height.
-            width (`int`, *optional*, defaults to 1024):
-                The width in pixels of the generated image. If not provided, uses the input image width.
-            num_inference_steps (`int`, *optional*, defaults to 50):
-                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
-                expense of slower inference.
-            sigmas (`List[float]`, *optional*):
-                Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
-                their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
-                will be used.
-            guidance_scale (`float`, *optional*, defaults to 5.0):
-                Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
-                `guidance_scale` is defined as `w` of equation 2. of [Imagen
-                Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
-                1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
-                usually at the expense of lower image quality.
-            cfg_normalization (`bool`, *optional*, defaults to False):
-                Whether to apply configuration normalization.
-            cfg_truncation (`float`, *optional*, defaults to 1.0):
-                The truncation value for configuration.
-            negative_prompt (`str` or `List[str]`, *optional*):
-                The prompt or prompts not to guide the image generation. If not defined, one has to pass
-                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
-                less than `1`).
-            num_images_per_prompt (`int`, *optional*, defaults to 1):
-                The number of images to generate per prompt.
-            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
-                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
-                to make generation deterministic.
-            latents (`torch.FloatTensor`, *optional*):
-                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
-                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will be generated by sampling using the supplied random `generator`.
-            prompt_embeds (`List[torch.FloatTensor]`, *optional*):
-                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
-                provided, text embeddings will be generated from `prompt` input argument.
-            negative_prompt_embeds (`List[torch.FloatTensor]`, *optional*):
-                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
-                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
-                argument.
-            output_type (`str`, *optional*, defaults to `"pil"`):
-                The output format of the generate image. Choose between
-                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
-            return_dict (`bool`, *optional*, defaults to `True`):
-                Whether or not to return a [`~pipelines.stable_diffusion.ZImagePipelineOutput`] instead of a plain
-                tuple.
-            joint_attention_kwargs (`dict`, *optional*):
-                A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
-                `self.processor` in
-                [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
-            callback_on_step_end (`Callable`, *optional*):
-                A function that calls at the end of each denoising steps during the inference. The function is called
-                with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
-                callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
-                `callback_on_step_end_tensor_inputs`.
-            callback_on_step_end_tensor_inputs (`List`, *optional*):
-                The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
-                will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
-                `._callback_tensor_inputs` attribute of your pipeline class.
-            max_sequence_length (`int`, *optional*, defaults to 512):
-                Maximum sequence length to use with the `prompt`.
-
-        Examples:
-
-        Returns:
-            [`~pipelines.z_image.ZImagePipelineOutput`] or `tuple`: [`~pipelines.z_image.ZImagePipelineOutput`] if
-            `return_dict` is True, otherwise a `tuple`. When returning a tuple, the first element is a list with the
-            generated images.
-        """
-        # 1. Check inputs
-        self.check_inputs(
-            prompt=prompt,
-            image=image,
-            mask_image=mask_image,
-            strength=strength,
-            height=height,
-            width=width,
-            output_type=output_type,
-            negative_prompt=negative_prompt,
-            prompt_embeds=prompt_embeds,
-            negative_prompt_embeds=negative_prompt_embeds,
-            callback_on_step_end_tensor_inputs=callback_on_step_end_tensor_inputs,
-        )
-
-        # 2. Preprocess image and mask
-        init_image = self.image_processor.preprocess(image)
-        init_image = init_image.to(dtype=torch.float32)
-
-        # Get dimensions from the preprocessed image if not specified
-        if height is None:
-            height = init_image.shape[-2]
-        if width is None:
-            width = init_image.shape[-1]
-
-        vae_scale = self.vae_scale_factor * 2
-        if height % vae_scale != 0:
-            raise ValueError(
-                f"Height must be divisible by {vae_scale} (got {height}). "
-                f"Please adjust the height to a multiple of {vae_scale}."
-            )
-        if width % vae_scale != 0:
-            raise ValueError(
-                f"Width must be divisible by {vae_scale} (got {width}). "
-                f"Please adjust the width to a multiple of {vae_scale}."
-            )
-
-        # Preprocess mask
-        mask = self.mask_processor.preprocess(mask_image, height=height, width=width)
-
-        device = self._execution_device
-
-        self._guidance_scale = guidance_scale
-        self._joint_attention_kwargs = joint_attention_kwargs
-        self._interrupt = False
-        self._cfg_normalization = cfg_normalization
-        self._cfg_truncation = cfg_truncation
-
-        # 3. Define call parameters
-        if prompt is not None and isinstance(prompt, str):
-            batch_size = 1
-        elif prompt is not None and isinstance(prompt, list):
-            batch_size = len(prompt)
-        else:
-            batch_size = len(prompt_embeds)
-
-        # If prompt_embeds is provided and prompt is None, skip encoding
-        if prompt_embeds is not None and prompt is None:
-            if self.do_classifier_free_guidance and negative_prompt_embeds is None:
-                raise ValueError(
-                    "When `prompt_embeds` is provided without `prompt`, "
-                    "`negative_prompt_embeds` must also be provided for classifier-free guidance."
-                )
-        else:
-            (
-                prompt_embeds,
-                negative_prompt_embeds,
-            ) = self.encode_prompt(
-                prompt=prompt,
-                negative_prompt=negative_prompt,
-                do_classifier_free_guidance=self.do_classifier_free_guidance,
-                prompt_embeds=prompt_embeds,
-                negative_prompt_embeds=negative_prompt_embeds,
-                device=device,
-                max_sequence_length=max_sequence_length,
-            )
-
-        # 4. Prepare latent variables
-        num_channels_latents = self.transformer.in_channels
-
-        # Repeat prompt_embeds for num_images_per_prompt
-        if num_images_per_prompt > 1:
-            prompt_embeds = [pe for pe in prompt_embeds for _ in range(num_images_per_prompt)]
-            if self.do_classifier_free_guidance and negative_prompt_embeds:
-                negative_prompt_embeds = [npe for npe in negative_prompt_embeds for _ in range(num_images_per_prompt)]
-
-        actual_batch_size = batch_size * num_images_per_prompt
-
-        # Calculate latent dimensions for image_seq_len
-        latent_height = 2 * (int(height) // (self.vae_scale_factor * 2))
-        latent_width = 2 * (int(width) // (self.vae_scale_factor * 2))
-        image_seq_len = (latent_height // 2) * (latent_width // 2)
-
-        # 5. Prepare timesteps
-        mu = calculate_shift(
-            image_seq_len,
-            self.scheduler.config.get("base_image_seq_len", 256),
-            self.scheduler.config.get("max_image_seq_len", 4096),
-            self.scheduler.config.get("base_shift", 0.5),
-            self.scheduler.config.get("max_shift", 1.15),
-        )
-        self.scheduler.sigma_min = 0.0
-        scheduler_kwargs = {"mu": mu}
-        timesteps, num_inference_steps = retrieve_timesteps(
-            self.scheduler,
-            num_inference_steps,
-            device,
-            sigmas=sigmas,
-            **scheduler_kwargs,
-        )
-
-        # 6. Adjust timesteps based on strength
-        timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
-        if num_inference_steps < 1:
-            raise ValueError(
-                f"After adjusting the num_inference_steps by strength parameter: {strength}, the number of pipeline "
-                f"steps is {num_inference_steps} which is < 1 and not appropriate for this pipeline."
-            )
-        latent_timestep = timesteps[:1].repeat(actual_batch_size)
-
-        # 7. Prepare latents from image (returns noise and image_latents for blending)
-        latents, noise, image_latents = self.prepare_latents(
-            init_image,
-            latent_timestep,
-            actual_batch_size,
-            num_channels_latents,
-            height,
-            width,
-            prompt_embeds[0].dtype,
-            device,
-            generator,
-            latents,
-        )
-
-        # 8. Prepare mask and masked image latents
-        # Create masked image: preserve only unmasked regions (mask=0)
-        if masked_image_latents is None:
-            masked_image = init_image * (mask < 0.5)
-        else:
-            masked_image = None  # Will use provided masked_image_latents
-
-        mask, masked_image_latents = self.prepare_mask_latents(
-            mask,
-            masked_image if masked_image is not None else init_image,
-            actual_batch_size,
-            height,
-            width,
-            prompt_embeds[0].dtype,
-            device,
-            generator,
-        )
-
-        num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
-        self._num_timesteps = len(timesteps)
-
-        # 9. Denoising loop
-        with self.progress_bar(total=num_inference_steps) as progress_bar:
-            for i, t in enumerate(timesteps):
-                if self.interrupt:
-                    continue
-
-                # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
-                timestep = t.expand(latents.shape[0])
-                timestep = (1000 - timestep) / 1000
-                # Normalized time for time-aware config (0 at start, 1 at end)
-                t_norm = timestep[0].item()
-
-                # Handle cfg truncation
-                current_guidance_scale = self.guidance_scale
-                if (
-                    self.do_classifier_free_guidance
-                    and self._cfg_truncation is not None
-                    and float(self._cfg_truncation) <= 1
-                ):
-                    if t_norm > self._cfg_truncation:
-                        current_guidance_scale = 0.0
-
-                # Run CFG only if configured AND scale is non-zero
-                apply_cfg = self.do_classifier_free_guidance and current_guidance_scale > 0
-
-                if apply_cfg:
-                    latents_typed = latents.to(self.transformer.dtype)
-                    latent_model_input = latents_typed.repeat(2, 1, 1, 1)
-                    prompt_embeds_model_input = prompt_embeds + negative_prompt_embeds
-                    timestep_model_input = timestep.repeat(2)
-                else:
-                    latent_model_input = latents.to(self.transformer.dtype)
-                    prompt_embeds_model_input = prompt_embeds
-                    timestep_model_input = timestep
-
-                latent_model_input = latent_model_input.unsqueeze(2)
-                latent_model_input_list = list(latent_model_input.unbind(dim=0))
-
-                model_out_list = self.transformer(
-                    latent_model_input_list,
-                    timestep_model_input,
-                    prompt_embeds_model_input,
-                )[0]
-
-                if apply_cfg:
-                    # Perform CFG
-                    pos_out = model_out_list[:actual_batch_size]
-                    neg_out = model_out_list[actual_batch_size:]
-
-                    noise_pred = []
-                    for j in range(actual_batch_size):
-                        pos = pos_out[j].float()
-                        neg = neg_out[j].float()
-
-                        pred = pos + current_guidance_scale * (pos - neg)
-
-                        # Renormalization
-                        if self._cfg_normalization and float(self._cfg_normalization) > 0.0:
-                            ori_pos_norm = torch.linalg.vector_norm(pos)
-                            new_pos_norm = torch.linalg.vector_norm(pred)
-                            max_new_norm = ori_pos_norm * float(self._cfg_normalization)
-                            if new_pos_norm > max_new_norm:
-                                pred = pred * (max_new_norm / new_pos_norm)
-
-                        noise_pred.append(pred)
-
-                    noise_pred = torch.stack(noise_pred, dim=0)
-                else:
-                    noise_pred = torch.stack([t.float() for t in model_out_list], dim=0)
-
-                noise_pred = noise_pred.squeeze(2)
-                noise_pred = -noise_pred
-
-                # compute the previous noisy sample x_t -> x_t-1
-                latents = self.scheduler.step(noise_pred.to(torch.float32), t, latents, return_dict=False)[0]
-                assert latents.dtype == torch.float32
-
-                # Inpainting blend: combine denoised latents with original image latents
-                init_latents_proper = image_latents
-
-                # Re-scale original latents to current noise level for proper blending
-                if i < len(timesteps) - 1:
-                    noise_timestep = timesteps[i + 1]
-                    init_latents_proper = self.scheduler.scale_noise(
-                        init_latents_proper, torch.tensor([noise_timestep]), noise
-                    )
-
-                # Blend: mask=1 for inpaint region (use denoised), mask=0 for preserve region (use original)
-                latents = (1 - mask) * init_latents_proper + mask * latents
-
-                if callback_on_step_end is not None:
-                    callback_kwargs = {}
-                    for k in callback_on_step_end_tensor_inputs:
-                        callback_kwargs[k] = locals()[k]
-                    callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
-
-                    latents = callback_outputs.pop("latents", latents)
-                    prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
-                    negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
-                    mask = callback_outputs.pop("mask", mask)
-                    masked_image_latents = callback_outputs.pop("masked_image_latents", masked_image_latents)
-
-                # call the callback, if provided
-                if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
-                    progress_bar.update()
-
-                if XLA_AVAILABLE:
-                    xm.mark_step()
-
-        if output_type == "latent":
-            image = latents
-
-        else:
-            latents = latents.to(self.vae.dtype)
-            latents = (latents / self.vae.config.scaling_factor) + self.vae.config.shift_factor
-
-            image = self.vae.decode(latents, return_dict=False)[0]
-            image = self.image_processor.postprocess(image, output_type=output_type)
-
-        # Offload all models
-        self.maybe_free_model_hooks()
-
-        if not return_dict:
-            return (image,)
-
-        return ZImagePipelineOutput(images=image)
--- a/src/diffusers/quantizers/gguf/utils.py
+++ b/src/diffusers/quantizers/gguf/utils.py
@@ -79,8 +79,7 @@ MMQ_QUANT_TYPES = STANDARD_QUANT_TYPES | KQUANT_TYPES
 def _fused_mul_mat_gguf(x: torch.Tensor, qweight: torch.Tensor, qweight_type: int) -> torch.Tensor:
    # there is no need to call any kernel for fp16/bf16
    if qweight_type in UNQUANTIZED_TYPES:
-        weight = dequantize_gguf_tensor(qweight)
-        return x @ weight.T
+        return x @ qweight.T

    # TODO(Isotr0py): GGUF's MMQ and MMVQ implementation are designed for
    # contiguous batching and inefficient with diffusers' batching,
--- a/src/diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py
+++ b/src/diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py
@@ -545,9 +545,7 @@ class CosineDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):

    # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep
    def index_for_timestep(
-        self,
-        timestep: Union[int, torch.Tensor],
-        schedule_timesteps: Optional[torch.Tensor] = None,
+        self, timestep: Union[int, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
    ) -> int:
        """
        Find the index for a given timestep in the schedule.
--- a/src/diffusers/schedulers/scheduling_deis_multistep.py
+++ b/src/diffusers/schedulers/scheduling_deis_multistep.py
@@ -867,9 +867,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):

    # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep
    def index_for_timestep(
-        self,
-        timestep: Union[int, torch.Tensor],
-        schedule_timesteps: Optional[torch.Tensor] = None,
+        self, timestep: Union[int, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
    ) -> int:
        """
        Find the index for a given timestep in the schedule.
--- a/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
+++ b/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
@@ -245,26 +245,13 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
    ):
        if self.config.use_beta_sigmas and not is_scipy_available():
            raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
-        if (
-            sum(
-                [
-                    self.config.use_beta_sigmas,
-                    self.config.use_exponential_sigmas,
-                    self.config.use_karras_sigmas,
-                ]
-            )
-            > 1
-        ):
+        if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
            raise ValueError(
                "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used."
            )
        if algorithm_type in ["dpmsolver", "sde-dpmsolver"]:
            deprecation_message = f"algorithm_type {algorithm_type} is deprecated and will be removed in a future version. Choose from `dpmsolver++` or `sde-dpmsolver++` instead"
-            deprecate(
-                "algorithm_types dpmsolver and sde-dpmsolver",
-                "1.0.0",
-                deprecation_message,
-            )
+            deprecate("algorithm_types dpmsolver and sde-dpmsolver", "1.0.0", deprecation_message)

        if trained_betas is not None:
            self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -272,15 +259,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
            self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
        elif beta_schedule == "scaled_linear":
            # this schedule is very specific to the latent diffusion model.
-            self.betas = (
-                torch.linspace(
-                    beta_start**0.5,
-                    beta_end**0.5,
-                    num_train_timesteps,
-                    dtype=torch.float32,
-                )
-                ** 2
-            )
+            self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
        elif beta_schedule == "squaredcos_cap_v2":
            # Glide cosine schedule
            self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -308,12 +287,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
        self.init_noise_sigma = 1.0

        # settings for DPM-Solver
-        if algorithm_type not in [
-            "dpmsolver",
-            "dpmsolver++",
-            "sde-dpmsolver",
-            "sde-dpmsolver++",
-        ]:
+        if algorithm_type not in ["dpmsolver", "dpmsolver++", "sde-dpmsolver", "sde-dpmsolver++"]:
            if algorithm_type == "deis":
                self.register_to_config(algorithm_type="dpmsolver++")
            else:
@@ -750,7 +724,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
        self,
        model_output: torch.Tensor,
        *args,
-        sample: Optional[torch.Tensor] = None,
+        sample: torch.Tensor = None,
        **kwargs,
    ) -> torch.Tensor:
        """
@@ -764,7 +738,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
        Args:
            model_output (`torch.Tensor`):
                The direct output from the learned diffusion model.
-            sample (`torch.Tensor`, *optional*):
+            sample (`torch.Tensor`):
                A current instance of a sample created by the diffusion process.

        Returns:
@@ -848,7 +822,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
        self,
        model_output: torch.Tensor,
        *args,
-        sample: Optional[torch.Tensor] = None,
+        sample: torch.Tensor = None,
        noise: Optional[torch.Tensor] = None,
        **kwargs,
    ) -> torch.Tensor:
@@ -858,10 +832,8 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
        Args:
            model_output (`torch.Tensor`):
                The direct output from the learned diffusion model.
-            sample (`torch.Tensor`, *optional*):
+            sample (`torch.Tensor`):
                A current instance of a sample created by the diffusion process.
-            noise (`torch.Tensor`, *optional*):
-                The noise tensor.

        Returns:
            `torch.Tensor`:
@@ -888,10 +860,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
                "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`",
            )

-        sigma_t, sigma_s = (
-            self.sigmas[self.step_index + 1],
-            self.sigmas[self.step_index],
-        )
+        sigma_t, sigma_s = self.sigmas[self.step_index + 1], self.sigmas[self.step_index]
        alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t)
        alpha_s, sigma_s = self._sigma_to_alpha_sigma_t(sigma_s)
        lambda_t = torch.log(alpha_t) - torch.log(sigma_t)
@@ -922,7 +891,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
        self,
        model_output_list: List[torch.Tensor],
        *args,
-        sample: Optional[torch.Tensor] = None,
+        sample: torch.Tensor = None,
        noise: Optional[torch.Tensor] = None,
        **kwargs,
    ) -> torch.Tensor:
@@ -932,7 +901,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
        Args:
            model_output_list (`List[torch.Tensor]`):
                The direct outputs from learned diffusion model at current and latter timesteps.
-            sample (`torch.Tensor`, *optional*):
+            sample (`torch.Tensor`):
                A current instance of a sample created by the diffusion process.

        Returns:
@@ -1045,7 +1014,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
        self,
        model_output_list: List[torch.Tensor],
        *args,
-        sample: Optional[torch.Tensor] = None,
+        sample: torch.Tensor = None,
        noise: Optional[torch.Tensor] = None,
        **kwargs,
    ) -> torch.Tensor:
@@ -1055,10 +1024,8 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
        Args:
            model_output_list (`List[torch.Tensor]`):
                The direct outputs from learned diffusion model at current and latter timesteps.
-            sample (`torch.Tensor`, *optional*):
+            sample (`torch.Tensor`):
                A current instance of a sample created by diffusion process.
-            noise (`torch.Tensor`, *optional*):
-                The noise tensor.

        Returns:
            `torch.Tensor`:
@@ -1139,9 +1106,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
        return x_t

    def index_for_timestep(
-        self,
-        timestep: Union[int, torch.Tensor],
-        schedule_timesteps: Optional[torch.Tensor] = None,
+        self, timestep: Union[int, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
    ) -> int:
        """
        Find the index for a given timestep in the schedule.
@@ -1251,10 +1216,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
        sample = sample.to(torch.float32)
        if self.config.algorithm_type in ["sde-dpmsolver", "sde-dpmsolver++"] and variance_noise is None:
            noise = randn_tensor(
-                model_output.shape,
-                generator=generator,
-                device=model_output.device,
-                dtype=torch.float32,
+                model_output.shape, generator=generator, device=model_output.device, dtype=torch.float32
            )
        elif self.config.algorithm_type in ["sde-dpmsolver", "sde-dpmsolver++"]:
            noise = variance_noise.to(device=model_output.device, dtype=torch.float32)
--- a/src/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py
+++ b/src/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py
@@ -141,10 +141,6 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        use_beta_sigmas (`bool`, *optional*, defaults to `False`):
            Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta
            Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information.
-        use_flow_sigmas (`bool`, *optional*, defaults to `False`):
-            Whether to use flow sigmas for step sizes in the noise schedule during the sampling process.
-        flow_shift (`float`, *optional*, defaults to 1.0):
-            The flow shift factor. Valid only when `use_flow_sigmas=True`.
        lambda_min_clipped (`float`, defaults to `-inf`):
            Clipping threshold for the minimum value of `lambda(t)` for numerical stability. This is critical for the
            cosine (`squaredcos_cap_v2`) noise schedule.
@@ -167,15 +163,15 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        num_train_timesteps: int = 1000,
        beta_start: float = 0.0001,
        beta_end: float = 0.02,
-        beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "linear",
+        beta_schedule: str = "linear",
        trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
        solver_order: int = 2,
-        prediction_type: Literal["epsilon", "sample", "v_prediction", "flow_prediction"] = "epsilon",
+        prediction_type: str = "epsilon",
        thresholding: bool = False,
        dynamic_thresholding_ratio: float = 0.995,
        sample_max_value: float = 1.0,
-        algorithm_type: Literal["dpmsolver", "dpmsolver++", "sde-dpmsolver", "sde-dpmsolver++"] = "dpmsolver++",
-        solver_type: Literal["midpoint", "heun"] = "midpoint",
+        algorithm_type: str = "dpmsolver++",
+        solver_type: str = "midpoint",
        lower_order_final: bool = True,
        euler_at_final: bool = False,
        use_karras_sigmas: Optional[bool] = False,
@@ -184,32 +180,19 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        use_flow_sigmas: Optional[bool] = False,
        flow_shift: Optional[float] = 1.0,
        lambda_min_clipped: float = -float("inf"),
-        variance_type: Optional[Literal["learned", "learned_range"]] = None,
-        timestep_spacing: Literal["linspace", "leading", "trailing"] = "linspace",
+        variance_type: Optional[str] = None,
+        timestep_spacing: str = "linspace",
        steps_offset: int = 0,
    ):
        if self.config.use_beta_sigmas and not is_scipy_available():
            raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
-        if (
-            sum(
-                [
-                    self.config.use_beta_sigmas,
-                    self.config.use_exponential_sigmas,
-                    self.config.use_karras_sigmas,
-                ]
-            )
-            > 1
-        ):
+        if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
            raise ValueError(
                "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used."
            )
        if algorithm_type in ["dpmsolver", "sde-dpmsolver"]:
            deprecation_message = f"algorithm_type {algorithm_type} is deprecated and will be removed in a future version. Choose from `dpmsolver++` or `sde-dpmsolver++` instead"
-            deprecate(
-                "algorithm_types dpmsolver and sde-dpmsolver",
-                "1.0.0",
-                deprecation_message,
-            )
+            deprecate("algorithm_types dpmsolver and sde-dpmsolver", "1.0.0", deprecation_message)

        if trained_betas is not None:
            self.betas = torch.tensor(trained_betas, dtype=torch.float32)
@@ -217,15 +200,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
            self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
        elif beta_schedule == "scaled_linear":
            # this schedule is very specific to the latent diffusion model.
-            self.betas = (
-                torch.linspace(
-                    beta_start**0.5,
-                    beta_end**0.5,
-                    num_train_timesteps,
-                    dtype=torch.float32,
-                )
-                ** 2
-            )
+            self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
        elif beta_schedule == "squaredcos_cap_v2":
            # Glide cosine schedule
            self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -244,12 +219,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        self.init_noise_sigma = 1.0

        # settings for DPM-Solver
-        if algorithm_type not in [
-            "dpmsolver",
-            "dpmsolver++",
-            "sde-dpmsolver",
-            "sde-dpmsolver++",
-        ]:
+        if algorithm_type not in ["dpmsolver", "dpmsolver++", "sde-dpmsolver", "sde-dpmsolver++"]:
            if algorithm_type == "deis":
                self.register_to_config(algorithm_type="dpmsolver++")
            else:
@@ -280,11 +250,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        """
        return self._step_index

-    def set_timesteps(
-        self,
-        num_inference_steps: Optional[int] = None,
-        device: Optional[Union[str, torch.device]] = None,
-    ):
+    def set_timesteps(self, num_inference_steps: int = None, device: Union[str, torch.device] = None):
        """
        Sets the discrete timesteps used for the diffusion chain (to be run before inference).

@@ -416,7 +382,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        return sample

    # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t
-    def _sigma_to_t(self, sigma: np.ndarray, log_sigmas: np.ndarray) -> np.ndarray:
+    def _sigma_to_t(self, sigma, log_sigmas):
        """
        Convert sigma values to corresponding timestep values through interpolation.

@@ -453,7 +419,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        return t

    # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler._sigma_to_alpha_sigma_t
-    def _sigma_to_alpha_sigma_t(self, sigma: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+    def _sigma_to_alpha_sigma_t(self, sigma):
        """
        Convert sigma values to alpha_t and sigma_t values.

@@ -475,7 +441,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        return alpha_t, sigma_t

    # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras
-    def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
+    def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor:
        """
        Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative
        Models](https://huggingface.co/papers/2206.00364).
@@ -601,7 +567,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        self,
        model_output: torch.Tensor,
        *args,
-        sample: Optional[torch.Tensor] = None,
+        sample: torch.Tensor = None,
        **kwargs,
    ) -> torch.Tensor:
        """
@@ -615,7 +581,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        Args:
            model_output (`torch.Tensor`):
                The direct output from the learned diffusion model.
-            sample (`torch.Tensor`, *optional*):
+            sample (`torch.Tensor`):
                A current instance of a sample created by the diffusion process.

        Returns:
@@ -700,7 +666,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        self,
        model_output: torch.Tensor,
        *args,
-        sample: Optional[torch.Tensor] = None,
+        sample: torch.Tensor = None,
        noise: Optional[torch.Tensor] = None,
        **kwargs,
    ) -> torch.Tensor:
@@ -710,10 +676,8 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        Args:
            model_output (`torch.Tensor`):
                The direct output from the learned diffusion model.
-            sample (`torch.Tensor`, *optional*):
+            sample (`torch.Tensor`):
                A current instance of a sample created by the diffusion process.
-            noise (`torch.Tensor`, *optional*):
-                The noise tensor.

        Returns:
            `torch.Tensor`:
@@ -740,10 +704,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
                "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`",
            )

-        sigma_t, sigma_s = (
-            self.sigmas[self.step_index + 1],
-            self.sigmas[self.step_index],
-        )
+        sigma_t, sigma_s = self.sigmas[self.step_index + 1], self.sigmas[self.step_index]
        alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t)
        alpha_s, sigma_s = self._sigma_to_alpha_sigma_t(sigma_s)
        lambda_t = torch.log(alpha_t) - torch.log(sigma_t)
@@ -775,7 +736,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        self,
        model_output_list: List[torch.Tensor],
        *args,
-        sample: Optional[torch.Tensor] = None,
+        sample: torch.Tensor = None,
        noise: Optional[torch.Tensor] = None,
        **kwargs,
    ) -> torch.Tensor:
@@ -785,7 +746,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        Args:
            model_output_list (`List[torch.Tensor]`):
                The direct outputs from learned diffusion model at current and latter timesteps.
-            sample (`torch.Tensor`, *optional*):
+            sample (`torch.Tensor`):
                A current instance of a sample created by the diffusion process.

        Returns:
@@ -899,7 +860,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        self,
        model_output_list: List[torch.Tensor],
        *args,
-        sample: Optional[torch.Tensor] = None,
+        sample: torch.Tensor = None,
        noise: Optional[torch.Tensor] = None,
        **kwargs,
    ) -> torch.Tensor:
@@ -909,10 +870,8 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        Args:
            model_output_list (`List[torch.Tensor]`):
                The direct outputs from learned diffusion model at current and latter timesteps.
-            sample (`torch.Tensor`, *optional*):
+            sample (`torch.Tensor`):
                A current instance of a sample created by diffusion process.
-            noise (`torch.Tensor`, *optional*):
-                The noise tensor.

        Returns:
            `torch.Tensor`:
@@ -992,7 +951,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
            )
        return x_t

-    def _init_step_index(self, timestep: Union[int, torch.Tensor]):
+    def _init_step_index(self, timestep):
        if isinstance(timestep, torch.Tensor):
            timestep = timestep.to(self.timesteps.device)

@@ -1016,7 +975,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        model_output: torch.Tensor,
        timestep: Union[int, torch.Tensor],
        sample: torch.Tensor,
-        generator: Optional[torch.Generator] = None,
+        generator=None,
        variance_noise: Optional[torch.Tensor] = None,
        return_dict: bool = True,
    ) -> Union[SchedulerOutput, Tuple]:
@@ -1068,10 +1027,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):

        if self.config.algorithm_type in ["sde-dpmsolver", "sde-dpmsolver++"] and variance_noise is None:
            noise = randn_tensor(
-                model_output.shape,
-                generator=generator,
-                device=model_output.device,
-                dtype=model_output.dtype,
+                model_output.shape, generator=generator, device=model_output.device, dtype=model_output.dtype
            )
        elif self.config.algorithm_type in ["sde-dpmsolver", "sde-dpmsolver++"]:
            noise = variance_noise
@@ -1118,21 +1074,6 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        noise: torch.Tensor,
        timesteps: torch.IntTensor,
    ) -> torch.Tensor:
-        """
-        Add noise to the clean `original_samples` using the scheduler's equivalent function.
-
-        Args:
-            original_samples (`torch.Tensor`):
-                The original samples to add noise to.
-            noise (`torch.Tensor`):
-                The noise tensor.
-            timesteps (`torch.IntTensor`):
-                The timesteps at which to add noise.
-
-        Returns:
-            `torch.Tensor`:
-                The noisy samples.
-        """
        # Make sure sigmas and timesteps have the same device and dtype as original_samples
        sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype)
        if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
@@ -1162,5 +1103,5 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
        noisy_samples = alpha_t * original_samples + sigma_t * noise
        return noisy_samples

-    def __len__(self) -> int:
+    def __len__(self):
        return self.config.num_train_timesteps
--- a/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py
+++ b/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py
@@ -1120,9 +1120,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):

    # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep
    def index_for_timestep(
-        self,
-        timestep: Union[int, torch.Tensor],
-        schedule_timesteps: Optional[torch.Tensor] = None,
+        self, timestep: Union[int, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
    ) -> int:
        """
        Find the index for a given timestep in the schedule.
--- a/src/diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py
+++ b/src/diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py
@@ -662,9 +662,7 @@ class EDMDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):

    # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep
    def index_for_timestep(
-        self,
-        timestep: Union[int, torch.Tensor],
-        schedule_timesteps: Optional[torch.Tensor] = None,
+        self, timestep: Union[int, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
    ) -> int:
        """
        Find the index for a given timestep in the schedule.
--- a/src/diffusers/schedulers/scheduling_sasolver.py
+++ b/src/diffusers/schedulers/scheduling_sasolver.py
@@ -1122,9 +1122,7 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):

    # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep
    def index_for_timestep(
-        self,
-        timestep: Union[int, torch.Tensor],
-        schedule_timesteps: Optional[torch.Tensor] = None,
+        self, timestep: Union[int, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
    ) -> int:
        """
        Find the index for a given timestep in the schedule.
--- a/src/diffusers/schedulers/scheduling_unipc_multistep.py
+++ b/src/diffusers/schedulers/scheduling_unipc_multistep.py
@@ -1083,9 +1083,7 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):

    # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep
    def index_for_timestep(
-        self,
-        timestep: Union[int, torch.Tensor],
-        schedule_timesteps: Optional[torch.Tensor] = None,
+        self, timestep: Union[int, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
    ) -> int:
        """
        Find the index for a given timestep in the schedule.
--- a/src/diffusers/utils/dummy_torch_and_transformers_objects.py
+++ b/src/diffusers/utils/dummy_torch_and_transformers_objects.py
@@ -4112,21 +4112,6 @@ class ZImageImg2ImgPipeline(metaclass=DummyObject):
        requires_backends(cls, ["torch", "transformers"])


-class ZImageInpaintPipeline(metaclass=DummyObject):
-    _backends = ["torch", "transformers"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch", "transformers"])
-
-    @classmethod
-    def from_config(cls, *args, **kwargs):
-        requires_backends(cls, ["torch", "transformers"])
-
-    @classmethod
-    def from_pretrained(cls, *args, **kwargs):
-        requires_backends(cls, ["torch", "transformers"])
-
-
 class ZImageOmniPipeline(metaclass=DummyObject):
    _backends = ["torch", "transformers"]

--- a/tests/models/test_models_auto.py
+++ b/tests/models/test_models_auto.py
@@ -1,5 +1,5 @@
 import unittest
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch

 from transformers import CLIPTextModel, LongformerModel

@@ -30,69 +30,3 @@ class TestAutoModel(unittest.TestCase):
    def test_load_from_model_index(self):
        model = AutoModel.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder")
        assert isinstance(model, CLIPTextModel)
-
-
-class TestAutoModelFromConfig(unittest.TestCase):
-    @patch(
-        "diffusers.pipelines.pipeline_loading_utils.get_class_obj_and_candidates",
-        return_value=(MagicMock(), None),
-    )
-    def test_from_config_with_dict_diffusers_class(self, mock_get_class):
-        config = {"_class_name": "UNet2DConditionModel", "sample_size": 64}
-        mock_model = MagicMock()
-        mock_get_class.return_value[0].from_config.return_value = mock_model
-
-        result = AutoModel.from_config(config)
-
-        mock_get_class.assert_called_once_with(
-            library_name="diffusers",
-            class_name="UNet2DConditionModel",
-            importable_classes=unittest.mock.ANY,
-            pipelines=None,
-            is_pipeline_module=False,
-        )
-        mock_get_class.return_value[0].from_config.assert_called_once_with(config)
-        assert result is mock_model
-
-    @patch(
-        "diffusers.pipelines.pipeline_loading_utils.get_class_obj_and_candidates",
-        return_value=(MagicMock(), None),
-    )
-    @patch("diffusers.models.AutoModel.load_config", return_value={"_class_name": "UNet2DConditionModel"})
-    def test_from_config_with_string_path(self, mock_load_config, mock_get_class):
-        mock_model = MagicMock()
-        mock_get_class.return_value[0].from_config.return_value = mock_model
-
-        result = AutoModel.from_config("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="unet")
-
-        mock_load_config.assert_called_once()
-        assert result is mock_model
-
-    def test_from_config_raises_on_missing_class_info(self):
-        config = {"some_key": "some_value"}
-        with self.assertRaises(ValueError, msg="Couldn't find a model class"):
-            AutoModel.from_config(config)
-
-    @patch(
-        "diffusers.pipelines.pipeline_loading_utils.get_class_obj_and_candidates",
-        return_value=(MagicMock(), None),
-    )
-    def test_from_config_with_model_type_routes_to_transformers(self, mock_get_class):
-        config = {"model_type": "clip_text_model"}
-        mock_model = MagicMock()
-        mock_get_class.return_value[0].from_config.return_value = mock_model
-
-        result = AutoModel.from_config(config)
-
-        mock_get_class.assert_called_once_with(
-            library_name="transformers",
-            class_name="AutoModel",
-            importable_classes=unittest.mock.ANY,
-            pipelines=None,
-            is_pipeline_module=False,
-        )
-        assert result is mock_model
-
-    def test_from_config_raises_on_none(self):
-        with self.assertRaises(ValueError, msg="Please provide a `pretrained_model_name_or_path_or_dict`"):
-            AutoModel.from_config(None)
--- a/tests/modular_pipelines/flux/test_modular_pipeline_flux.py
+++ b/tests/modular_pipelines/flux/test_modular_pipeline_flux.py
@@ -37,6 +37,7 @@ class TestFluxModularPipelineFast(ModularPipelineTesterMixin):
    pipeline_class = FluxModularPipeline
    pipeline_blocks_class = FluxAutoBlocks
    pretrained_model_name_or_path = "hf-internal-testing/tiny-flux-modular"
+    default_repo_id = "hf-internal-testing/tiny-flux-pipe"

    params = frozenset(["prompt", "height", "width", "guidance_scale"])
    batch_params = frozenset(["prompt"])
@@ -63,6 +64,7 @@ class TestFluxImg2ImgModularPipelineFast(ModularPipelineTesterMixin):
    pipeline_class = FluxModularPipeline
    pipeline_blocks_class = FluxAutoBlocks
    pretrained_model_name_or_path = "hf-internal-testing/tiny-flux-modular"
+    default_repo_id = "hf-internal-testing/tiny-flux-pipe"

    params = frozenset(["prompt", "height", "width", "guidance_scale", "image"])
    batch_params = frozenset(["prompt", "image"])
@@ -129,6 +131,7 @@ class TestFluxKontextModularPipelineFast(ModularPipelineTesterMixin):
    pipeline_class = FluxKontextModularPipeline
    pipeline_blocks_class = FluxKontextAutoBlocks
    pretrained_model_name_or_path = "hf-internal-testing/tiny-flux-kontext-pipe"
+    default_repo_id = "hf-internal-testing/tiny-flux-kontext-pipe"

    params = frozenset(["prompt", "height", "width", "guidance_scale", "image"])
    batch_params = frozenset(["prompt", "image"])
--- a/tests/modular_pipelines/flux2/test_modular_pipeline_flux2.py
+++ b/tests/modular_pipelines/flux2/test_modular_pipeline_flux2.py
@@ -32,6 +32,8 @@ class TestFlux2ModularPipelineFast(ModularPipelineTesterMixin):
    pipeline_class = Flux2ModularPipeline
    pipeline_blocks_class = Flux2AutoBlocks
    pretrained_model_name_or_path = "hf-internal-testing/tiny-flux2-modular"
+    default_repo_id = "black-forest-labs/FLUX.2-dev"
+    default_repo_id = "hf-internal-testing/tiny-flux2"

    params = frozenset(["prompt", "height", "width", "guidance_scale"])
    batch_params = frozenset(["prompt"])
@@ -60,6 +62,7 @@ class TestFlux2ImageConditionedModularPipelineFast(ModularPipelineTesterMixin):
    pipeline_class = Flux2ModularPipeline
    pipeline_blocks_class = Flux2AutoBlocks
    pretrained_model_name_or_path = "hf-internal-testing/tiny-flux2-modular"
+    default_repo_id = "hf-internal-testing/tiny-flux2"

    params = frozenset(["prompt", "height", "width", "guidance_scale", "image"])
    batch_params = frozenset(["prompt", "image"])
--- a/tests/modular_pipelines/flux2/test_modular_pipeline_flux2_klein.py
+++ b/tests/modular_pipelines/flux2/test_modular_pipeline_flux2_klein.py
@@ -32,6 +32,7 @@ class TestFlux2ModularPipelineFast(ModularPipelineTesterMixin):
    pipeline_class = Flux2KleinModularPipeline
    pipeline_blocks_class = Flux2KleinAutoBlocks
    pretrained_model_name_or_path = "hf-internal-testing/tiny-flux2-klein-modular"
+    default_repo_id = None  # TODO

    params = frozenset(["prompt", "height", "width"])
    batch_params = frozenset(["prompt"])
@@ -59,6 +60,7 @@ class TestFlux2ImageConditionedModularPipelineFast(ModularPipelineTesterMixin):
    pipeline_class = Flux2KleinModularPipeline
    pipeline_blocks_class = Flux2KleinAutoBlocks
    pretrained_model_name_or_path = "hf-internal-testing/tiny-flux2-klein-modular"
+    default_repo_id = None  # TODO

    params = frozenset(["prompt", "height", "width", "image"])
    batch_params = frozenset(["prompt", "image"])
--- a/tests/modular_pipelines/flux2/test_modular_pipeline_flux2_klein_base.py
+++ b/tests/modular_pipelines/flux2/test_modular_pipeline_flux2_klein_base.py
@@ -32,7 +32,7 @@ class TestFlux2ModularPipelineFast(ModularPipelineTesterMixin):
    pipeline_class = Flux2KleinModularPipeline
    pipeline_blocks_class = Flux2KleinBaseAutoBlocks
    pretrained_model_name_or_path = "hf-internal-testing/tiny-flux2-klein-base-modular"
-
+    default_repo_id = "hf-internal-testing/tiny-flux2-klein"
    params = frozenset(["prompt", "height", "width"])
    batch_params = frozenset(["prompt"])

@@ -59,6 +59,7 @@ class TestFlux2ImageConditionedModularPipelineFast(ModularPipelineTesterMixin):
    pipeline_class = Flux2KleinModularPipeline
    pipeline_blocks_class = Flux2KleinBaseAutoBlocks
    pretrained_model_name_or_path = "hf-internal-testing/tiny-flux2-klein-base-modular"
+    default_repo_id = "hf-internal-testing/tiny-flux2-klein"

    params = frozenset(["prompt", "height", "width", "image"])
    batch_params = frozenset(["prompt", "image"])
--- a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
+++ b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
@@ -34,6 +34,7 @@ class TestQwenImageModularPipelineFast(ModularPipelineTesterMixin, ModularGuider
    pipeline_class = QwenImageModularPipeline
    pipeline_blocks_class = QwenImageAutoBlocks
    pretrained_model_name_or_path = "hf-internal-testing/tiny-qwenimage-modular"
+    default_repo_id = "Qwen/Qwen-Image"

    params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image", "mask_image"])
    batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"])
@@ -60,6 +61,7 @@ class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, ModularGu
    pipeline_class = QwenImageEditModularPipeline
    pipeline_blocks_class = QwenImageEditAutoBlocks
    pretrained_model_name_or_path = "hf-internal-testing/tiny-qwenimage-edit-modular"
+    default_repo_id = "Qwen/Qwen-Image-Edit"

    params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image", "mask_image"])
    batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"])
@@ -86,6 +88,7 @@ class TestQwenImageEditPlusModularPipelineFast(ModularPipelineTesterMixin, Modul
    pipeline_class = QwenImageEditPlusModularPipeline
    pipeline_blocks_class = QwenImageEditPlusAutoBlocks
    pretrained_model_name_or_path = "hf-internal-testing/tiny-qwenimage-edit-plus-modular"
+    default_repo_id = "Qwen/Qwen-Image-Edit-2509"

    # No `mask_image` yet.
    params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image"])
--- a/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py
+++ b/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py
@@ -279,6 +279,8 @@ class TestSDXLModularPipelineFast(
    pipeline_class = StableDiffusionXLModularPipeline
    pipeline_blocks_class = StableDiffusionXLAutoBlocks
    pretrained_model_name_or_path = "hf-internal-testing/tiny-sdxl-modular"
+    default_repo_id = "hf-internal-testing/tiny-sdxl-pipe"
+
    params = frozenset(
        [
            "prompt",
@@ -326,6 +328,7 @@ class TestSDXLImg2ImgModularPipelineFast(
    pipeline_class = StableDiffusionXLModularPipeline
    pipeline_blocks_class = StableDiffusionXLAutoBlocks
    pretrained_model_name_or_path = "hf-internal-testing/tiny-sdxl-modular"
+    default_repo_id = "hf-internal-testing/tiny-sdxl-pipe"
    params = frozenset(
        [
            "prompt",
@@ -379,6 +382,7 @@ class SDXLInpaintingModularPipelineFastTests(
    pipeline_class = StableDiffusionXLModularPipeline
    pipeline_blocks_class = StableDiffusionXLAutoBlocks
    pretrained_model_name_or_path = "hf-internal-testing/tiny-sdxl-modular"
+    default_repo_id = "hf-internal-testing/tiny-sdxl-pipe"
    params = frozenset(
        [
            "prompt",
--- a/tests/modular_pipelines/test_modular_pipelines_common.py
+++ b/tests/modular_pipelines/test_modular_pipelines_common.py
@@ -37,6 +37,8 @@ class ModularPipelineTesterMixin:
    optional_params = frozenset(["num_inference_steps", "num_images_per_prompt", "latents", "output_type"])
    # this is modular specific: generator needs to be a intermediate input because it's mutable
    intermediate_params = frozenset(["generator"])
+    # prompt is required for most pipeline, with exceptions like qwen-image layer
+    required_params = frozenset(["prompt"])

    def get_generator(self, seed=0):
        generator = torch.Generator("cpu").manual_seed(seed)
@@ -55,6 +57,12 @@ class ModularPipelineTesterMixin:
            "You need to set the attribute `pretrained_model_name_or_path` in the child test class. See existing pipeline tests for reference."
        )

+    @property
+    def default_repo_id(self) -> str:
+        raise NotImplementedError(
+            "You need to set the attribute `default_repo_id` in the child test class. See existing pipeline tests for reference."
+        )
+
    @property
    def pipeline_blocks_class(self) -> Union[Callable, ModularPipelineBlocks]:
        raise NotImplementedError(
@@ -121,6 +129,7 @@ class ModularPipelineTesterMixin:
        pipe = self.get_pipeline()
        input_parameters = pipe.blocks.input_names
        optional_parameters = pipe.default_call_parameters
+        required_parameters = pipe.blocks.required_inputs

        def _check_for_parameters(parameters, expected_parameters, param_type):
            remaining_parameters = {param for param in parameters if param not in expected_parameters}
@@ -130,6 +139,98 @@ class ModularPipelineTesterMixin:

        _check_for_parameters(self.params, input_parameters, "input")
        _check_for_parameters(self.optional_params, optional_parameters, "optional")
+        _check_for_parameters(self.required_params, required_parameters, "required")
+
+    def test_loading_from_default_repo(self):
+        if self.default_repo_id is None:
+            return
+
+        try:
+            pipe = ModularPipeline.from_pretrained(self.default_repo_id)
+            assert pipe.blocks.__class__ == self.pipeline_blocks_class
+        except Exception as e:
+            assert False, f"Failed to load pipeline from default repo: {e}"
+
+    def test_modular_inference(self):
+        # run the pipeline to get the base output for comparison
+        pipe = self.get_pipeline()
+        pipe.to(torch_device, torch.float32)
+
+        inputs = self.get_dummy_inputs()
+        standard_output = pipe(**inputs, output="images")
+
+        # create text, denoise, decoder (and optional vae encoder) nodes
+        blocks = self.pipeline_blocks_class()
+
+        assert "text_encoder" in blocks.sub_blocks, "`text_encoder` block is not present in the pipeline"
+        assert "denoise" in blocks.sub_blocks, "`denoise` block is not present in the pipeline"
+        assert "decode" in blocks.sub_blocks, "`decode` block is not present in the pipeline"
+
+        # manually set the components in the sub_pipe
+        # a hack to workaround the fact the default pipeline properties are often incorrect for testing cases,
+        # #e.g. vae_scale_factor is ususally not 8 because vae is configured to be smaller for testing
+        def manually_set_all_components(pipe: ModularPipeline, sub_pipe: ModularPipeline):
+            for n, comp in pipe.components.items():
+                setattr(sub_pipe, n, comp)
+
+        # Initialize all nodes
+        text_node = blocks.sub_blocks["text_encoder"].init_pipeline(self.pretrained_model_name_or_path)
+        text_node.load_components(torch_dtype=torch.float32)
+        text_node.to(torch_device)
+        manually_set_all_components(pipe, text_node)
+
+        denoise_node = blocks.sub_blocks["denoise"].init_pipeline(self.pretrained_model_name_or_path)
+        denoise_node.load_components(torch_dtype=torch.float32)
+        denoise_node.to(torch_device)
+        manually_set_all_components(pipe, denoise_node)
+
+        decoder_node = blocks.sub_blocks["decode"].init_pipeline(self.pretrained_model_name_or_path)
+        decoder_node.load_components(torch_dtype=torch.float32)
+        decoder_node.to(torch_device)
+        manually_set_all_components(pipe, decoder_node)
+
+        if "vae_encoder" in blocks.sub_blocks:
+            vae_encoder_node = blocks.sub_blocks["vae_encoder"].init_pipeline(self.pretrained_model_name_or_path)
+            vae_encoder_node.load_components(torch_dtype=torch.float32)
+            vae_encoder_node.to(torch_device)
+            manually_set_all_components(pipe, vae_encoder_node)
+        else:
+            vae_encoder_node = None
+
+        def filter_inputs(available: dict, expected_keys) -> dict:
+            return {k: v for k, v in available.items() if k in expected_keys}
+
+        # prepare inputs for each node
+        inputs = self.get_dummy_inputs()
+
+        # 1. Text encoder: takes from inputs
+        text_inputs = filter_inputs(inputs, text_node.blocks.input_names)
+        text_output = text_node(**text_inputs)
+        text_output_dict = text_output.get_by_kwargs("denoiser_input_fields")
+
+        # 2. VAE encoder (optional): takes from inputs + text_output
+        if vae_encoder_node is not None:
+            vae_available = {**inputs, **text_output_dict}
+            vae_encoder_inputs = filter_inputs(vae_available, vae_encoder_node.blocks.input_names)
+            vae_encoder_output = vae_encoder_node(**vae_encoder_inputs)
+            vae_output_dict = vae_encoder_output.values
+        else:
+            vae_output_dict = {}
+
+        # 3. Denoise: takes from inputs + text_output + vae_output
+        denoise_available = {**inputs, **text_output_dict, **vae_output_dict}
+        denoise_inputs = filter_inputs(denoise_available, denoise_node.blocks.input_names)
+        denoise_output = denoise_node(**denoise_inputs)
+        latents = denoise_output.latents
+
+        # 4. Decoder: takes from inputs + denoise_output
+        decode_available = {**inputs, "latents": latents}
+        decode_inputs = filter_inputs(decode_available, decoder_node.blocks.input_names)
+        modular_output = decoder_node(**decode_inputs).images
+
+        assert modular_output.shape == standard_output.shape, (
+            f"Modular output should have same shape as standard output {standard_output.shape}, but got {modular_output.shape}"
+        )

    def test_inference_batch_consistent(self, batch_sizes=[2], batch_generator=True):
        pipe = self.get_pipeline().to(torch_device)
--- a/tests/pipelines/z_image/test_z_image_inpaint.py
+++ b/tests/pipelines/z_image/test_z_image_inpaint.py
@@ -1,396 +0,0 @@
-# Copyright 2025 Alibaba Z-Image Team and The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import gc
-import os
-import unittest
-
-import numpy as np
-import torch
-from transformers import Qwen2Tokenizer, Qwen3Config, Qwen3Model
-
-from diffusers import (
-    AutoencoderKL,
-    FlowMatchEulerDiscreteScheduler,
-    ZImageInpaintPipeline,
-    ZImageTransformer2DModel,
-)
-from diffusers.utils.testing_utils import floats_tensor
-
-from ...testing_utils import torch_device
-from ..pipeline_params import (
-    IMAGE_TO_IMAGE_IMAGE_PARAMS,
-    TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS,
-    TEXT_GUIDED_IMAGE_INPAINTING_PARAMS,
-)
-from ..test_pipelines_common import PipelineTesterMixin, to_np
-
-
-# Z-Image requires torch.use_deterministic_algorithms(False) due to complex64 RoPE operations
-# Cannot use enable_full_determinism() which sets it to True
-# Note: Z-Image does not support FP16 inference due to complex64 RoPE embeddings
-os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
-os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"
-torch.use_deterministic_algorithms(False)
-torch.backends.cudnn.deterministic = True
-torch.backends.cudnn.benchmark = False
-if hasattr(torch.backends, "cuda"):
-    torch.backends.cuda.matmul.allow_tf32 = False
-
-
-class ZImageInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
-    pipeline_class = ZImageInpaintPipeline
-    params = TEXT_GUIDED_IMAGE_INPAINTING_PARAMS - {"cross_attention_kwargs"}
-    batch_params = TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS
-    image_params = frozenset(["image", "mask_image"])
-    image_latents_params = IMAGE_TO_IMAGE_IMAGE_PARAMS
-    required_optional_params = frozenset(
-        [
-            "num_inference_steps",
-            "strength",
-            "generator",
-            "latents",
-            "return_dict",
-            "callback_on_step_end",
-            "callback_on_step_end_tensor_inputs",
-        ]
-    )
-    supports_dduf = False
-    test_xformers_attention = False
-    test_layerwise_casting = True
-    test_group_offloading = True
-
-    def setUp(self):
-        gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-            torch.cuda.synchronize()
-        torch.manual_seed(0)
-        if torch.cuda.is_available():
-            torch.cuda.manual_seed_all(0)
-
-    def tearDown(self):
-        super().tearDown()
-        gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-            torch.cuda.synchronize()
-        torch.manual_seed(0)
-        if torch.cuda.is_available():
-            torch.cuda.manual_seed_all(0)
-
-    def get_dummy_components(self):
-        torch.manual_seed(0)
-        transformer = ZImageTransformer2DModel(
-            all_patch_size=(2,),
-            all_f_patch_size=(1,),
-            in_channels=16,
-            dim=32,
-            n_layers=2,
-            n_refiner_layers=1,
-            n_heads=2,
-            n_kv_heads=2,
-            norm_eps=1e-5,
-            qk_norm=True,
-            cap_feat_dim=16,
-            rope_theta=256.0,
-            t_scale=1000.0,
-            axes_dims=[8, 4, 4],
-            axes_lens=[256, 32, 32],
-        )
-        # `x_pad_token` and `cap_pad_token` are initialized with `torch.empty` which contains
-        # uninitialized memory. Set them to known values for deterministic test behavior.
-        with torch.no_grad():
-            transformer.x_pad_token.copy_(torch.ones_like(transformer.x_pad_token.data))
-            transformer.cap_pad_token.copy_(torch.ones_like(transformer.cap_pad_token.data))
-
-        torch.manual_seed(0)
-        vae = AutoencoderKL(
-            in_channels=3,
-            out_channels=3,
-            down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
-            up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
-            block_out_channels=[32, 64],
-            layers_per_block=1,
-            latent_channels=16,
-            norm_num_groups=32,
-            sample_size=32,
-            scaling_factor=0.3611,
-            shift_factor=0.1159,
-        )
-
-        torch.manual_seed(0)
-        scheduler = FlowMatchEulerDiscreteScheduler()
-
-        torch.manual_seed(0)
-        config = Qwen3Config(
-            hidden_size=16,
-            intermediate_size=16,
-            num_hidden_layers=2,
-            num_attention_heads=2,
-            num_key_value_heads=2,
-            vocab_size=151936,
-            max_position_embeddings=512,
-        )
-        text_encoder = Qwen3Model(config)
-        tokenizer = Qwen2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration")
-
-        components = {
-            "transformer": transformer,
-            "vae": vae,
-            "scheduler": scheduler,
-            "text_encoder": text_encoder,
-            "tokenizer": tokenizer,
-        }
-        return components
-
-    def get_dummy_inputs(self, device, seed=0):
-        import random
-
-        if str(device).startswith("mps"):
-            generator = torch.manual_seed(seed)
-        else:
-            generator = torch.Generator(device=device).manual_seed(seed)
-
-        image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device)
-        # Create mask: 1 = inpaint region, 0 = preserve region
-        mask_image = torch.zeros((1, 1, 32, 32), device=device)
-        mask_image[:, :, 8:24, 8:24] = 1.0  # Inpaint center region
-
-        inputs = {
-            "prompt": "dance monkey",
-            "negative_prompt": "bad quality",
-            "image": image,
-            "mask_image": mask_image,
-            "strength": 1.0,
-            "generator": generator,
-            "num_inference_steps": 2,
-            "guidance_scale": 3.0,
-            "cfg_normalization": False,
-            "cfg_truncation": 1.0,
-            "height": 32,
-            "width": 32,
-            "max_sequence_length": 16,
-            "output_type": "np",
-        }
-
-        return inputs
-
-    def test_inference(self):
-        device = "cpu"
-
-        components = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-        pipe.to(device)
-        pipe.set_progress_bar_config(disable=None)
-
-        inputs = self.get_dummy_inputs(device)
-        image = pipe(**inputs).images
-        generated_image = image[0]
-        self.assertEqual(generated_image.shape, (32, 32, 3))
-
-    def test_inference_batch_single_identical(self):
-        gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-            torch.cuda.synchronize()
-        torch.manual_seed(0)
-        if torch.cuda.is_available():
-            torch.cuda.manual_seed_all(0)
-        self._test_inference_batch_single_identical(batch_size=3, expected_max_diff=1e-1)
-
-    def test_num_images_per_prompt(self):
-        import inspect
-
-        sig = inspect.signature(self.pipeline_class.__call__)
-
-        if "num_images_per_prompt" not in sig.parameters:
-            return
-
-        components = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-        pipe = pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
-
-        batch_sizes = [1, 2]
-        num_images_per_prompts = [1, 2]
-
-        for batch_size in batch_sizes:
-            for num_images_per_prompt in num_images_per_prompts:
-                inputs = self.get_dummy_inputs(torch_device)
-
-                for key in inputs.keys():
-                    if key in self.batch_params:
-                        inputs[key] = batch_size * [inputs[key]]
-
-                images = pipe(**inputs, num_images_per_prompt=num_images_per_prompt)[0]
-
-                assert images.shape[0] == batch_size * num_images_per_prompt
-
-        del pipe
-        gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-            torch.cuda.synchronize()
-
-    def test_attention_slicing_forward_pass(
-        self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-3
-    ):
-        if not self.test_attention_slicing:
-            return
-
-        components = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-        for component in pipe.components.values():
-            if hasattr(component, "set_default_attn_processor"):
-                component.set_default_attn_processor()
-        pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
-
-        generator_device = "cpu"
-        inputs = self.get_dummy_inputs(generator_device)
-        output_without_slicing = pipe(**inputs)[0]
-
-        pipe.enable_attention_slicing(slice_size=1)
-        inputs = self.get_dummy_inputs(generator_device)
-        output_with_slicing1 = pipe(**inputs)[0]
-
-        pipe.enable_attention_slicing(slice_size=2)
-        inputs = self.get_dummy_inputs(generator_device)
-        output_with_slicing2 = pipe(**inputs)[0]
-
-        if test_max_difference:
-            max_diff1 = np.abs(to_np(output_with_slicing1) - to_np(output_without_slicing)).max()
-            max_diff2 = np.abs(to_np(output_with_slicing2) - to_np(output_without_slicing)).max()
-            self.assertLess(
-                max(max_diff1, max_diff2),
-                expected_max_diff,
-                "Attention slicing should not affect the inference results",
-            )
-
-    def test_vae_tiling(self, expected_diff_max: float = 0.7):
-        import random
-
-        generator_device = "cpu"
-        components = self.get_dummy_components()
-
-        pipe = self.pipeline_class(**components)
-        pipe.to("cpu")
-        pipe.set_progress_bar_config(disable=None)
-
-        # Without tiling
-        inputs = self.get_dummy_inputs(generator_device)
-        inputs["height"] = inputs["width"] = 128
-        # Generate a larger image for the input
-        inputs["image"] = floats_tensor((1, 3, 128, 128), rng=random.Random(0)).to("cpu")
-        # Generate a larger mask for the input
-        mask = torch.zeros((1, 1, 128, 128), device="cpu")
-        mask[:, :, 32:96, 32:96] = 1.0
-        inputs["mask_image"] = mask
-        output_without_tiling = pipe(**inputs)[0]
-
-        # With tiling (standard AutoencoderKL doesn't accept parameters)
-        pipe.vae.enable_tiling()
-        inputs = self.get_dummy_inputs(generator_device)
-        inputs["height"] = inputs["width"] = 128
-        inputs["image"] = floats_tensor((1, 3, 128, 128), rng=random.Random(0)).to("cpu")
-        inputs["mask_image"] = mask
-        output_with_tiling = pipe(**inputs)[0]
-
-        self.assertLess(
-            (to_np(output_without_tiling) - to_np(output_with_tiling)).max(),
-            expected_diff_max,
-            "VAE tiling should not affect the inference results",
-        )
-
-    def test_pipeline_with_accelerator_device_map(self, expected_max_difference=1e-3):
-        # Z-Image RoPE embeddings (complex64) have slightly higher numerical tolerance
-        # Inpainting mask blending adds additional numerical variance
-        super().test_pipeline_with_accelerator_device_map(expected_max_difference=expected_max_difference)
-
-    def test_group_offloading_inference(self):
-        # Block-level offloading conflicts with RoPE cache. Pipeline-level offloading (tested separately) works fine.
-        self.skipTest("Using test_pipeline_level_group_offloading_inference instead")
-
-    def test_save_load_float16(self, expected_max_diff=1e-2):
-        # Z-Image does not support FP16 due to complex64 RoPE embeddings
-        self.skipTest("Z-Image does not support FP16 inference")
-
-    def test_float16_inference(self, expected_max_diff=5e-2):
-        # Z-Image does not support FP16 due to complex64 RoPE embeddings
-        self.skipTest("Z-Image does not support FP16 inference")
-
-    def test_strength_parameter(self):
-        """Test that strength parameter affects the output correctly."""
-        device = "cpu"
-        components = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-        pipe.to(device)
-        pipe.set_progress_bar_config(disable=None)
-
-        # Test with different strength values
-        inputs_low_strength = self.get_dummy_inputs(device)
-        inputs_low_strength["strength"] = 0.2
-
-        inputs_high_strength = self.get_dummy_inputs(device)
-        inputs_high_strength["strength"] = 0.8
-
-        # Both should complete without errors
-        output_low = pipe(**inputs_low_strength).images[0]
-        output_high = pipe(**inputs_high_strength).images[0]
-
-        # Outputs should be different (different amount of transformation)
-        self.assertFalse(np.allclose(output_low, output_high, atol=1e-3))
-
-    def test_invalid_strength(self):
-        """Test that invalid strength values raise appropriate errors."""
-        device = "cpu"
-        components = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-        pipe.to(device)
-
-        inputs = self.get_dummy_inputs(device)
-
-        # Test strength < 0
-        inputs["strength"] = -0.1
-        with self.assertRaises(ValueError):
-            pipe(**inputs)
-
-        # Test strength > 1
-        inputs["strength"] = 1.5
-        with self.assertRaises(ValueError):
-            pipe(**inputs)
-
-    def test_mask_inpainting(self):
-        """Test that the mask properly controls which regions are inpainted."""
-        device = "cpu"
-        components = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-        pipe.to(device)
-        pipe.set_progress_bar_config(disable=None)
-
-        # Generate with full mask (inpaint everything)
-        inputs_full = self.get_dummy_inputs(device)
-        inputs_full["mask_image"] = torch.ones((1, 1, 32, 32), device=device)
-
-        # Generate with no mask (preserve everything)
-        inputs_none = self.get_dummy_inputs(device)
-        inputs_none["mask_image"] = torch.zeros((1, 1, 32, 32), device=device)
-
-        # Both should complete without errors
-        output_full = pipe(**inputs_full).images[0]
-        output_none = pipe(**inputs_none).images[0]
-
-        # Outputs should be different (full inpaint vs preserve)
-        self.assertFalse(np.allclose(output_full, output_none, atol=1e-3))
Author	SHA1	Message	Date
yiyi@huggingface.co	c91835c943	u pup	2026-02-05 09:06:00 +00:00
yiyi@huggingface.co	98b3a31259	does not need to specify the params:	2026-02-05 09:05:25 +00:00
yiyi@huggingface.co	4c1a5bcfeb	fix more	2026-02-05 08:40:52 +00:00
yiyi@huggingface.co	027394d392	up up	2026-02-04 19:48:56 +00:00
yiyi@huggingface.co	5c378a9415	text_encoder should not be auto for qwen-image	2026-02-04 19:48:11 +00:00
yiyi@huggingface.co	f34cc7b344	style	2026-02-04 11:31:16 +00:00
yiyi@huggingface.co	24c4b1c47d	add required param tests	2026-02-04 11:30:38 +00:00
yiyi@huggingface.co	13c922972e	more fix	2026-02-04 11:13:58 +00:00
yiyi@huggingface.co	f4d27b9a8a	style	2026-02-04 11:00:12 +00:00
yiyi@huggingface.co	1a2e736166	try to fix modular tests	2026-02-04 10:59:03 +00:00
yiyi@huggingface.co	c293ad7899	fix default_repo_id	2026-02-04 10:07:58 +00:00
YiYi Xu	2c7f5d7421	Merge branch 'main' into modular-test	2026-02-03 22:43:09 -10:00
yiyixuxu	fb6ec06a39	style etc	2026-01-22 03:14:15 +01:00
yiyixuxu	ea63cccb8c	add modular test and loading from standard repo	2026-01-22 03:13:32 +01:00