Merge branch 'main' into modular-workflow

[Modular]add a real quick start guide (#13029 )
* add a real quick start guide * Update docs/source/en/modular_diffusers/quickstart.md * update a bit more * fix * Apply suggestions from code review Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> * Update docs/source/en/modular_diffusers/quickstart.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> * Update docs/source/en/modular_diffusers/quickstart.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> * update more * Apply suggestions from code review Co-authored-by: Sayak Paul <spsayakpaul@gmail.com> * address more feedbacks: move components amnager earlier, explain blocks vs sub-blocks etc * more * remove the link to mellon guide, not exist in this PR yet --------- Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
2026-02-03 01:15:10 +08:00 · 2026-01-31 09:51:11 -10:00 · 2026-01-31 09:43:20 -10:00 · 2026-01-30 19:24:05 +05:30 · 2026-01-29 16:16:42 -10:00 · 2026-01-29 08:45:24 -10:00
91 changed files with 747 additions and 864 deletions
--- a/.github/workflows/pr_tests.yml
+++ b/.github/workflows/pr_tests.yml
@@ -92,9 +92,8 @@ jobs:
            runner: aws-general-8-plus
            image: diffusers/diffusers-pytorch-cpu
            report: torch_example_cpu
-        transformers_version: ["main"]

-    name: ${{ matrix.config.name }} (transformers ${{ matrix.transformers_version }})
+    name: ${{ matrix.config.name }}

    runs-on:
      group: ${{ matrix.config.runner }}
@@ -116,11 +115,8 @@ jobs:
    - name: Install dependencies
      run: |
        uv pip install -e ".[quality]"
-        if [ "${{ matrix.transformers_version }}" = "main" ]; then
-          uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
-        else
-          uv pip uninstall transformers huggingface_hub && uv pip install transformers==${{ matrix.transformers_version }}
-        fi
+        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps

    - name: Environment
@@ -159,7 +155,7 @@ jobs:
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      with:
-        name: pr_${{ matrix.config.framework }}_${{ matrix.config.report }}_transformers_${{ matrix.transformers_version }}_test_reports
+        name: pr_${{ matrix.config.framework }}_${{ matrix.config.report }}_test_reports
        path: reports

  run_staging_tests:
@@ -224,10 +220,8 @@ jobs:
    needs: [check_code_quality, check_repository_consistency]
    strategy:
      fail-fast: false
-      matrix:
-        transformers_version: ["main"]

-    name: LoRA tests with PEFT main (transformers ${{ matrix.transformers_version }})
+    name: LoRA tests with PEFT main

    runs-on:
      group: aws-general-8-plus
@@ -253,12 +247,9 @@ jobs:
        uv pip install -U peft@git+https://github.com/huggingface/peft.git --no-deps
        uv pip install -U tokenizers
        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
-        if [ "${{ matrix.transformers_version }}" = "main" ]; then
-          uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
-        else
-          uv pip uninstall transformers huggingface_hub && uv pip install transformers==${{ matrix.transformers_version }}
-        fi
-        
+        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
+
    - name: Environment
      run: |
        python utils/print_env.py
@@ -284,6 +275,6 @@ jobs:
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      with:
-        name: pr_lora_transformers_${{ matrix.transformers_version }}_test_reports
+        name: pr_main_test_reports
        path: reports

--- a/.github/workflows/pr_tests_gpu.yml
+++ b/.github/workflows/pr_tests_gpu.yml
@@ -14,7 +14,6 @@ on:
      - "tests/pipelines/test_pipelines_common.py"
      - "tests/models/test_modeling_common.py"
      - "examples/**/*.py"
-      - ".github/**.yml"
  workflow_dispatch:

 concurrency:
@@ -107,14 +106,13 @@ jobs:
          path: reports

  torch_pipelines_cuda_tests:
-    name: Torch Pipelines CUDA Tests (transformers ${{ matrix.transformers_version }})
+    name: Torch Pipelines CUDA Tests
    needs: setup_torch_cuda_pipeline_matrix
    strategy:
      fail-fast: false
      max-parallel: 8
      matrix:
        module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
-        transformers_version: ["main"]
    runs-on:
      group: aws-g4dn-2xlarge
    container:
@@ -133,12 +131,8 @@ jobs:
        run: |
          uv pip install -e ".[quality]"
          uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
-          if [ "${{ matrix.transformers_version }}" = "main" ]; then
-            uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
-          else
-            uv pip uninstall transformers huggingface_hub && uv pip install transformers==${{ matrix.transformers_version }}
-          fi
-          
+          #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+          uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1

      - name: Environment
        run: |
@@ -178,11 +172,11 @@ jobs:
        if: ${{ always() }}
        uses: actions/upload-artifact@v6
        with:
-          name: pipeline_${{ matrix.module }}_transformers_${{ matrix.transformers_version }}_test_reports
+          name: pipeline_${{ matrix.module }}_test_reports
          path: reports

  torch_cuda_tests:
-    name: Torch CUDA Tests (transformers ${{ matrix.transformers_version }})
+    name: Torch CUDA Tests
    needs: [check_code_quality, check_repository_consistency]
    runs-on:
      group: aws-g4dn-2xlarge
@@ -197,7 +191,6 @@ jobs:
      max-parallel: 4
      matrix:
        module: [models, schedulers, lora, others]
-        transformers_version: ["main"]
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v6
@@ -209,12 +202,8 @@ jobs:
        uv pip install -e ".[quality]"
        uv pip install peft@git+https://github.com/huggingface/peft.git
        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
-        if [ "${{ matrix.transformers_version }}" = "main" ]; then
-          uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
-        else
-          uv pip uninstall transformers huggingface_hub && uv pip install transformers==${{ matrix.transformers_version }}
-        fi
-        
+        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1

    - name: Environment
      run: |
@@ -252,16 +241,12 @@ jobs:
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      with:
-        name: torch_cuda_test_reports_${{ matrix.module }}_transformers_${{ matrix.transformers_version }}
+        name: torch_cuda_test_reports_${{ matrix.module }}
        path: reports

  run_examples_tests:
-    name: Examples PyTorch CUDA tests on Ubuntu (transformers ${{ matrix.transformers_version }})
+    name: Examples PyTorch CUDA tests on Ubuntu
    needs: [check_code_quality, check_repository_consistency]
-    strategy:
-      fail-fast: false
-      matrix:
-        transformers_version: ["main"]
    runs-on:
      group: aws-g4dn-2xlarge

@@ -279,11 +264,8 @@ jobs:
        nvidia-smi
    - name: Install dependencies
      run: |
-        if [ "${{ matrix.transformers_version }}" = "main" ]; then
-          uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
-        else
-          uv pip uninstall transformers huggingface_hub && uv pip install transformers==${{ matrix.transformers_version }}
-        fi
+        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
        uv pip install -e ".[quality,training]"

    - name: Environment
@@ -307,6 +289,6 @@ jobs:
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      with:
-        name: examples_transformers_${{ matrix.transformers_version }}_test_reports
+        name: examples_test_reports
        path: reports

--- a/docs/source/en/modular_diffusers/overview.md
+++ b/docs/source/en/modular_diffusers/overview.md
@@ -24,7 +24,7 @@ The Modular Diffusers docs are organized as shown below.

 ## Quickstart

- A [quickstart](./quickstart) demonstrating how to implement an example workflow with Modular Diffusers.
+- The [quickstart](./quickstart) shows you how to run a modular pipeline, understand its structure, and customize it by modifying the blocks that compose it.

 ## ModularPipelineBlocks

--- a/docs/source/en/modular_diffusers/quickstart.md
+++ b/docs/source/en/modular_diffusers/quickstart.md
@@ -12,333 +12,248 @@ specific language governing permissions and limitations under the License.

 # Quickstart

-Modular Diffusers is a framework for quickly building flexible and customizable pipelines. At the core of Modular Diffusers are [`ModularPipelineBlocks`] that can be combined with other blocks to adapt to new workflows. The blocks are converted into a [`ModularPipeline`], a friendly user-facing interface developers can use.
+Modular Diffusers is a framework for quickly building flexible and customizable pipelines. These pipelines can go beyond what standard `DiffusionPipeline`s can do. At the core of Modular Diffusers are [`ModularPipelineBlocks`] that can be combined with other blocks to adapt to new workflows. The blocks are converted into a [`ModularPipeline`], a friendly user-facing interface for running generation tasks.

-This doc will show you how to implement a [Differential Diffusion](https://differential-diffusion.github.io/) pipeline with the modular framework.
+This guide shows you how to run a modular pipeline, understand its structure, and customize it by modifying the blocks that compose it.

-## ModularPipelineBlocks
-
-[`ModularPipelineBlocks`] are *definitions* that specify the components, inputs, outputs, and computation logic for a single step in a pipeline. There are four types of blocks.
-
- [`ModularPipelineBlocks`] is the most basic block for a single step.
- [`SequentialPipelineBlocks`] is a multi-block that composes other blocks linearly. The outputs of one block are the inputs to the next block.
- [`LoopSequentialPipelineBlocks`] is a multi-block that runs iteratively and is designed for iterative workflows.
- [`AutoPipelineBlocks`] is a collection of blocks for different workflows and it selects which block to run based on the input. It is designed to conveniently package multiple workflows into a single pipeline.
-
-[Differential Diffusion](https://differential-diffusion.github.io/) is an image-to-image workflow. Start with the `IMAGE2IMAGE_BLOCKS` preset, a collection of `ModularPipelineBlocks` for image-to-image generation.
-
-```py
-from diffusers.modular_pipelines.stable_diffusion_xl import IMAGE2IMAGE_BLOCKS
-IMAGE2IMAGE_BLOCKS = InsertableDict([
-    ("text_encoder", StableDiffusionXLTextEncoderStep),
-    ("image_encoder", StableDiffusionXLVaeEncoderStep),
-    ("input", StableDiffusionXLInputStep),
-    ("set_timesteps", StableDiffusionXLImg2ImgSetTimestepsStep),
-    ("prepare_latents", StableDiffusionXLImg2ImgPrepareLatentsStep),
-    ("prepare_add_cond", StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep),
-    ("denoise", StableDiffusionXLDenoiseStep),
-    ("decode", StableDiffusionXLDecodeStep)
-])
-```
-
-## Pipeline and block states
-
-Modular Diffusers uses *state* to communicate data between blocks. There are two types of states.
-
- [`PipelineState`] is a global state that can be used to track all inputs and outputs across all blocks.
- [`BlockState`] is a local view of relevant variables from [`PipelineState`] for an individual block.
-
-## Customizing blocks
-
-[Differential Diffusion](https://differential-diffusion.github.io/) differs from standard image-to-image in its `prepare_latents` and `denoise` blocks. All the other blocks can be reused, but you'll need to modify these two.
-
-Create placeholder `ModularPipelineBlocks` for `prepare_latents` and `denoise` by copying and modifying the existing ones.
-
-Print the `denoise` block to see that it is composed of [`LoopSequentialPipelineBlocks`] with three sub-blocks, `before_denoiser`, `denoiser`, and `after_denoiser`. Only the `before_denoiser` sub-block needs to be modified to prepare the latent input for the denoiser based on the change map.
-
-```py
-denoise_blocks = IMAGE2IMAGE_BLOCKS["denoise"]()
-print(denoise_blocks)
-```
-
-Replace the `StableDiffusionXLLoopBeforeDenoiser` sub-block with the new `SDXLDiffDiffLoopBeforeDenoiser` block.
-
-```py
-# Copy existing blocks as placeholders
-class SDXLDiffDiffPrepareLatentsStep(ModularPipelineBlocks):
-    """Copied from StableDiffusionXLImg2ImgPrepareLatentsStep - will modify later"""
-    # ... same implementation as StableDiffusionXLImg2ImgPrepareLatentsStep
-
-class SDXLDiffDiffDenoiseStep(StableDiffusionXLDenoiseLoopWrapper):
-    block_classes = [SDXLDiffDiffLoopBeforeDenoiser, StableDiffusionXLLoopDenoiser, StableDiffusionXLLoopAfterDenoiser]
-    block_names = ["before_denoiser", "denoiser", "after_denoiser"]
-```
-
-### prepare_latents
-
-The `prepare_latents` block requires the following changes.
-
- a processor to process the change map
- a new `inputs` to accept the user-provided change map, `timestep` for precomputing all the latents and `num_inference_steps` to create the mask for updating the image regions
- update the computation in the `__call__` method for processing the change map and creating the masks, and storing it in the [`BlockState`]
-
-```diff
-class SDXLDiffDiffPrepareLatentsStep(ModularPipelineBlocks):
-    @property
-    def expected_components(self) -> List[ComponentSpec]:
-        return [
-            ComponentSpec("vae", AutoencoderKL),
-            ComponentSpec("scheduler", EulerDiscreteScheduler),
-+           ComponentSpec("mask_processor", VaeImageProcessor, config=FrozenDict({"do_normalize": False, "do_convert_grayscale": True}))
-        ]
-    @property
-    def inputs(self) -> List[Tuple[str, Any]]:
-        return [
-            InputParam("generator"),
-+           InputParam("diffdiff_map", required=True),
-           InputParam("latent_timestep", required=True, type_hint=torch.Tensor),
-+           InputParam("timesteps", type_hint=torch.Tensor),
-+           InputParam("num_inference_steps", type_hint=int),
-        ]
-
-    @property
-    def intermediate_outputs(self) -> List[OutputParam]:
-        return [
-+           OutputParam("original_latents", type_hint=torch.Tensor),
-+           OutputParam("diffdiff_masks", type_hint=torch.Tensor),
-        ]
-    def __call__(self, components, state: PipelineState):
-        # ... existing logic ...
-+       # Process change map and create masks
-+       diffdiff_map = components.mask_processor.preprocess(block_state.diffdiff_map, height=latent_height, width=latent_width)
-+       thresholds = torch.arange(block_state.num_inference_steps, dtype=diffdiff_map.dtype) / block_state.num_inference_steps
-+       block_state.diffdiff_masks = diffdiff_map > (thresholds + (block_state.denoising_start or 0))
-+       block_state.original_latents = block_state.latents
-```
-
-### denoise
-
-The `before_denoiser` sub-block requires the following changes.
-
- a new `inputs` to accept a `denoising_start` parameter,  `original_latents` and `diffdiff_masks` from the `prepare_latents` block
- update the computation in the `__call__` method for applying Differential Diffusion
-
-```diff
-class SDXLDiffDiffLoopBeforeDenoiser(ModularPipelineBlocks):
-    @property
-    def description(self) -> str:
-        return (
-            "Step within the denoising loop for differential diffusion that prepare the latent input for the denoiser"
-        )
-
-    @property
-    def inputs(self) -> List[str]:
-        return [
-            InputParam("latents", required=True, type_hint=torch.Tensor),
-+           InputParam("denoising_start"),
-+           InputParam("original_latents", type_hint=torch.Tensor),
-+           InputParam("diffdiff_masks", type_hint=torch.Tensor),
-        ]
-
-    def __call__(self, components, block_state, i, t):
-+       # Apply differential diffusion logic
-+       if i == 0 and block_state.denoising_start is None:
-+           block_state.latents = block_state.original_latents[:1]
-+       else:
-+           block_state.mask = block_state.diffdiff_masks[i].unsqueeze(0).unsqueeze(1)
-+           block_state.latents = block_state.original_latents[i] * block_state.mask + block_state.latents * (1 - block_state.mask)
-
-        # ... rest of existing logic ...
-```
-
-## Assembling the blocks
-
-You should have all the blocks you need at this point to create a [`ModularPipeline`].
-
-Copy the existing `IMAGE2IMAGE_BLOCKS` preset and for the `set_timesteps` block, use the `set_timesteps` from the `TEXT2IMAGE_BLOCKS` because Differential Diffusion doesn't require a `strength` parameter.
-
-Set the `prepare_latents` and `denoise` blocks to the `SDXLDiffDiffPrepareLatentsStep` and `SDXLDiffDiffDenoiseStep` blocks you just modified.
-
-Call [`SequentialPipelineBlocks.from_blocks_dict`] on the blocks to create a `SequentialPipelineBlocks`.
-
-```py
-DIFFDIFF_BLOCKS = IMAGE2IMAGE_BLOCKS.copy()
-DIFFDIFF_BLOCKS["set_timesteps"] = TEXT2IMAGE_BLOCKS["set_timesteps"]
-DIFFDIFF_BLOCKS["prepare_latents"] = SDXLDiffDiffPrepareLatentsStep
-DIFFDIFF_BLOCKS["denoise"] = SDXLDiffDiffDenoiseStep
-
-dd_blocks = SequentialPipelineBlocks.from_blocks_dict(DIFFDIFF_BLOCKS)
-print(dd_blocks)
-```
-
-## ModularPipeline
-
-Convert the [`SequentialPipelineBlocks`] into a [`ModularPipeline`] with the [`ModularPipeline.init_pipeline`] method. This initializes the expected components to load from a `modular_model_index.json` file. Explicitly load the components by calling [`ModularPipeline.load_components`].
-
-It is a good idea to initialize the [`ComponentManager`] with the pipeline to help manage the different components. Once you call [`~ModularPipeline.load_components`], the components are registered to the [`ComponentManager`] and can be shared between workflows. The example below uses the `collection` argument to assign the components a `"diffdiff"` label for better organization.
-
-```py
-from diffusers.modular_pipelines import ComponentsManager
-
-components = ComponentManager()
-
-dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", components_manager=components, collection="diffdiff")
-dd_pipeline.load_componenets(torch_dtype=torch.float16)
-dd_pipeline.to("cuda")
-```
-
-## Adding workflows
-
-Other workflows can be added to the [`ModularPipeline`] to support additional features without rewriting the entire pipeline from scratch.
-
-This section demonstrates how to add an IP-Adapter or ControlNet.
-
-### IP-Adapter
-
-Stable Diffusion XL already has a preset IP-Adapter block that you can use and doesn't require any changes to the existing Differential Diffusion pipeline.
-
-```py
-from diffusers.modular_pipelines.stable_diffusion_xl.encoders import StableDiffusionXLAutoIPAdapterStep
-
-ip_adapter_block = StableDiffusionXLAutoIPAdapterStep()
-```
-
-Use the [`sub_blocks.insert`] method to insert it into the [`ModularPipeline`]. The example below inserts the `ip_adapter_block` at position `0`. Print the pipeline to see that the `ip_adapter_block` is added and it requires an `ip_adapter_image`. This also added two components to the pipeline, the `image_encoder` and `feature_extractor`.
-
-```py
-dd_blocks.sub_blocks.insert("ip_adapter", ip_adapter_block, 0)
-```
-
-Call [`~ModularPipeline.init_pipeline`] to initialize a [`ModularPipeline`] and use [`~ModularPipeline.load_components`] to load the model components. Load and set the IP-Adapter to run the pipeline.
-
-```py
-dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff")
-dd_pipeline.load_components(torch_dtype=torch.float16)
-dd_pipeline.loader.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
-dd_pipeline.loader.set_ip_adapter_scale(0.6)
-dd_pipeline = dd_pipeline.to(device)
-
-ip_adapter_image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/diffdiff_orange.jpeg")
-image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true")
-mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true")
-
-prompt = "a green pear"
-negative_prompt = "blurry"
-generator = torch.Generator(device=device).manual_seed(42)
-
-image = dd_pipeline(
-    prompt=prompt,
-    negative_prompt=negative_prompt,
-    num_inference_steps=25,
-    generator=generator,
-    ip_adapter_image=ip_adapter_image,
-    diffdiff_map=mask,
-    image=image,
-    output="images"
-)[0]
-```
-
-### ControlNet
-
-Stable Diffusion XL already has a preset ControlNet block that can readily be used.
-
-```py
-from diffusers.modular_pipelines.stable_diffusion_xl.modular_blocks import StableDiffusionXLAutoControlNetInputStep
-
-control_input_block = StableDiffusionXLAutoControlNetInputStep()
-```
-
-However, it requires modifying the `denoise` block because that's where the ControlNet injects the control information into the UNet.
-
-Modify the `denoise` block by replacing the `StableDiffusionXLLoopDenoiser` sub-block with the `StableDiffusionXLControlNetLoopDenoiser`.
-
-```py
-class SDXLDiffDiffControlNetDenoiseStep(StableDiffusionXLDenoiseLoopWrapper):
-    block_classes = [SDXLDiffDiffLoopBeforeDenoiser, StableDiffusionXLControlNetLoopDenoiser, StableDiffusionXLDenoiseLoopAfterDenoiser]
-    block_names = ["before_denoiser", "denoiser", "after_denoiser"]
-
-controlnet_denoise_block = SDXLDiffDiffControlNetDenoiseStep()
-```
-
-Insert the `controlnet_input` block and replace the `denoise` block with the new `controlnet_denoise_block`. Initialize a [`ModularPipeline`] and [`~ModularPipeline.load_components`] into it.
-
-```py
-dd_blocks.sub_blocks.insert("controlnet_input", control_input_block, 7)
-dd_blocks.sub_blocks["denoise"] = controlnet_denoise_block
-
-dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff")
-dd_pipeline.load_components(torch_dtype=torch.float16)
-dd_pipeline = dd_pipeline.to(device)
-
-control_image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/diffdiff_tomato_canny.jpeg")
-image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true")
-mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true")
-
-prompt = "a green pear"
-negative_prompt = "blurry"
-generator = torch.Generator(device=device).manual_seed(42)
-
-image = dd_pipeline(
-    prompt=prompt,
-    negative_prompt=negative_prompt,
-    num_inference_steps=25,
-    generator=generator,
-    control_image=control_image,
-    controlnet_conditioning_scale=0.5,
-    diffdiff_map=mask,
-    image=image,
-    output="images"
-)[0]
-```
-
-### AutoPipelineBlocks
-
-The Differential Diffusion, IP-Adapter, and ControlNet workflows can be bundled into a single [`ModularPipeline`] by using [`AutoPipelineBlocks`]. This allows automatically selecting which sub-blocks to run based on the inputs like `control_image` or `ip_adapter_image`. If none of these inputs are passed, then it defaults to the Differential Diffusion.
-
-Use `block_trigger_inputs` to only run the `SDXLDiffDiffControlNetDenoiseStep` block if a `control_image` input is provided. Otherwise, the `SDXLDiffDiffDenoiseStep` is used.
-
-```py
-class SDXLDiffDiffAutoDenoiseStep(AutoPipelineBlocks):
-    block_classes = [SDXLDiffDiffControlNetDenoiseStep, SDXLDiffDiffDenoiseStep]
-    block_names = ["controlnet_denoise", "denoise"]
-    block_trigger_inputs = ["controlnet_cond", None]
-```
-
-Add the `ip_adapter` and `controlnet_input` blocks.
-
-```py
-DIFFDIFF_AUTO_BLOCKS = IMAGE2IMAGE_BLOCKS.copy()
-DIFFDIFF_AUTO_BLOCKS["prepare_latents"] = SDXLDiffDiffPrepareLatentsStep
-DIFFDIFF_AUTO_BLOCKS["set_timesteps"] = TEXT2IMAGE_BLOCKS["set_timesteps"]
-DIFFDIFF_AUTO_BLOCKS["denoise"] = SDXLDiffDiffAutoDenoiseStep
-DIFFDIFF_AUTO_BLOCKS.insert("ip_adapter", StableDiffusionXLAutoIPAdapterStep, 0)
-DIFFDIFF_AUTO_BLOCKS.insert("controlnet_input",StableDiffusionXLControlNetAutoInput, 7)
-```
-
-Call [`SequentialPipelineBlocks.from_blocks_dict`] to create a [`SequentialPipelineBlocks`] and create a [`ModularPipeline`] and load in the model components to run.
-
-```py
-dd_auto_blocks = SequentialPipelineBlocks.from_blocks_dict(DIFFDIFF_AUTO_BLOCKS)
-dd_pipeline = dd_auto_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff")
-dd_pipeline.load_components(torch_dtype=torch.float16)
-```
-
-## Share
-
-Add your [`ModularPipeline`] to the Hub with [`~ModularPipeline.save_pretrained`] and set `push_to_hub` argument to `True`.
-
-```py
-dd_pipeline.save_pretrained("YiYiXu/test_modular_doc", push_to_hub=True)
-```
-
-Other users can load the [`ModularPipeline`] with [`~ModularPipeline.from_pretrained`].
+## Run a pipeline

+[`ModularPipeline`] is the main interface for loading, running, and managing modular pipelines.
 ```py
 import torch
-from diffusers.modular_pipelines import ModularPipeline, ComponentsManager
+from diffusers import ModularPipeline, ComponentsManager

-components = ComponentsManager()
+# Use ComponentsManager to enable auto CPU offloading for memory efficiency
+manager = ComponentsManager()
+manager.enable_auto_cpu_offload(device="cuda:0")

-diffdiff_pipeline = ModularPipeline.from_pretrained("YiYiXu/modular-diffdiff-0704", trust_remote_code=True, components_manager=components, collection="diffdiff")
-diffdiff_pipeline.load_components(torch_dtype=torch.float16)
+pipe = ModularPipeline.from_pretrained("Qwen/Qwen-Image", components_manager=manager)
+pipe.load_components(torch_dtype=torch.bfloat16)
+
+image = pipe(
+    prompt="cat wizard with red hat, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney",
+).images[0]
+image
 ```
+
+[`~ModularPipeline.from_pretrained`] uses lazy loading - it reads the configuration to learn where to load each component from, but doesn't actually load the model weights until you call [`~ModularPipeline.load_components`]. This gives you control over when and how components are loaded.
+
+> [!TIP]
+> [`ComponentsManager`] with `enable_auto_cpu_offload` automatically moves models between CPU and GPU as needed, reducing memory usage for large models like Qwen-Image. Learn more in the [ComponentsManager](./components_manager) guide.
+
+Learn more about creating and loading pipelines in the [Creating a pipeline](https://huggingface.co/docs/diffusers/modular_diffusers/modular_pipeline#creating-a-pipeline) and [Loading components](https://huggingface.co/docs/diffusers/modular_diffusers/modular_pipeline#loading-components) guides.
+
+## Understand the structure
+
+A [`ModularPipeline`] has two parts:
+- **State**: the loaded components (models, schedulers, processors) and configuration
+- **Definition**: the [`ModularPipelineBlocks`] that specify inputs, outputs, expected components and computation logic
+
+The blocks define *what* the pipeline does. Access them through `pipe.blocks`.
+```py
+print(pipe.blocks)
+```
+```
+QwenImageAutoBlocks(
+  Class: SequentialPipelineBlocks
+
+  Description: Auto Modular pipeline for text-to-image, image-to-image, inpainting, and controlnet tasks using QwenImage.
+      
+      Supported workflows:
+        - `text2image`: requires `prompt`
+        - `image2image`: requires `prompt`, `image`
+        - `inpainting`: requires `prompt`, `mask_image`, `image`
+        - `controlnet_text2image`: requires `prompt`, `control_image`
+        ...
+
+  Components:
+      text_encoder (`Qwen2_5_VLForConditionalGeneration`)
+      vae (`AutoencoderKLQwenImage`)
+      transformer (`QwenImageTransformer2DModel`)
+      ...
+
+  Sub-Blocks:
+    [0] text_encoder (QwenImageAutoTextEncoderStep)
+    [1] vae_encoder (QwenImageAutoVaeEncoderStep)
+    [2] controlnet_vae_encoder (QwenImageOptionalControlNetVaeEncoderStep)
+    [3] denoise (QwenImageAutoCoreDenoiseStep)
+    [4] decode (QwenImageAutoDecodeStep)
+)
+```
+
+The output returns:
+- The supported workflows (text2image, image2image, inpainting, etc.)
+- The Sub-Blocks it's composed of (text_encoder, vae_encoder, denoise, decode)
+
+### Workflows
+
+`QwenImageAutoBlocks` is a [`ConditionalPipelineBlocks`], so this pipeline supports multiple workflows and adapts its behavior based on the inputs you provide. For example, if you pass `image` to the pipeline, it runs an image-to-image workflow instead of text-to-image. Let's see this in action with an example.
+```py
+from diffusers.utils import load_image
+
+input_image = load_image("https://github.com/Trgtuan10/Image_storage/blob/main/cute_cat.png?raw=true")
+
+image = pipe(
+    prompt="cat wizard with red hat, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney",
+    image=input_image,
+).images[0]
+```
+
+Use `get_workflow()` to extract the blocks for a specific workflow. Pass the workflow name (e.g., `"image2image"`, `"inpainting"`, `"controlnet_text2image"`) to get only the blocks relevant to that workflow.
+```py
+img2img_blocks = pipe.blocks.get_workflow("image2image")
+```
+
+Conditional blocks are convenient for users, but their conditional logic adds complexity when customizing or debugging. Extracting a workflow gives you the specific blocks relevant to your workflow, making it easier to work with. Learn more in the [AutoPipelineBlocks](https://huggingface.co/docs/diffusers/modular_diffusers/auto_pipeline_blocks) guide.
+
+### Sub-blocks
+
+Blocks can contain other blocks. `pipe.blocks` gives you the top-level block definition (here, `QwenImageAutoBlocks`), while `sub_blocks` lets you access the smaller blocks inside it.
+
+`QwenImageAutoBlocks` is composed of: `text_encoder`, `vae_encoder`, `controlnet_vae_encoder`, `denoise`, and `decode`. Access them through the `sub_blocks` property.
+
+The `doc` property is useful for seeing the full documentation of any block, including its inputs, outputs, and components.
+```py
+vae_encoder_block = pipe.blocks.sub_blocks["vae_encoder"]
+print(vae_encoder_block.doc)
+```
+
+This block can be converted to a pipeline so that it can run on its own with [`~ModularPipelineBlocks.init_pipeline`].
+```py
+vae_encoder_pipe = vae_encoder_block.init_pipeline()
+
+# Reuse the VAE we already loaded, we can reuse it with update_components() method
+vae_encoder_pipe.update_components(vae=pipe.vae)
+
+# Run just this block
+image_latents = vae_encoder_pipe(image=input_image).image_latents
+print(image_latents.shape)
+```
+
+It reuses the VAE from our original pipeline instead of reloading it, keeping memory usage efficient. Learn more in the [Loading components](https://huggingface.co/docs/diffusers/modular_diffusers/modular_pipeline#loading-components) guide.
+
+Since blocks are composable, you can modify the pipeline's definition by adding, removing, or swapping blocks to create new workflows. In the next section, we'll add a canny edge detection block to a ControlNet pipeline, so you can pass a regular image instead of a pre-processed canny edge map.
+
+## Compose new workflows
+
+Let's add a canny edge detection block to a ControlNet pipeline. First, load a pre-built canny block from the Hub (see [Building Custom Blocks](https://huggingface.co/docs/diffusers/modular_diffusers/custom_blocks) to create your own).
+```py
+from diffusers.modular_pipelines import ModularPipelineBlocks
+
+# Load a canny block from the Hub
+canny_block = ModularPipelineBlocks.from_pretrained(
+    "diffusers-internal-dev/canny-filtering",
+    trust_remote_code=True,
+)
+
+print(canny_block.doc)
+```
+```
+class CannyBlock
+
+  Inputs:
+      image (`Union[Image, ndarray]`):
+          Image to compute canny filter on
+      low_threshold (`int`, *optional*, defaults to 50):
+          Low threshold for the canny filter.
+      high_threshold (`int`, *optional*, defaults to 200):
+          High threshold for the canny filter.
+      ...
+
+  Outputs:
+      control_image (`PIL.Image`):
+          Canny map for input image
+```
+
+UUse `get_workflow` to extract the ControlNet workflow from [`QwenImageAutoBlocks`].
+```py
+# Get the controlnet workflow that we want to work with
+blocks = pipe.blocks.get_workflow("controlnet_text2image")
+print(blocks.doc)
+```
+```
+class SequentialPipelineBlocks
+
+  Inputs:
+      prompt (`str`):
+          The prompt or prompts to guide image generation.
+      control_image (`Image`):
+          Control image for ControlNet conditioning.
+      ...
+```
+
+The extracted workflow is a [`SequentialPipelineBlocks`](./sequential_pipeline_blocks) - a multi-block type where blocks run one after another and data flows linearly from one block to the next. Each block's `intermediate_outputs` become available as `inputs` to subsequent blocks.
+
+Currently this workflow requires `control_image` as input. Let's insert the canny block at the beginning so the pipeline accepts a regular image instead.
+```py
+# Insert canny at the beginning
+blocks.sub_blocks.insert("canny", canny_block, 0)
+
+# Check the updated structure: CannyBlock is now listed as first sub-block
+print(blocks)
+# Check the updated doc
+print(blocks.doc)
+```
+```
+class SequentialPipelineBlocks
+
+  Inputs:
+      image (`Union[Image, ndarray]`):
+          Image to compute canny filter on
+      low_threshold (`int`, *optional*, defaults to 50):
+          Low threshold for the canny filter.
+      high_threshold (`int`, *optional*, defaults to 200):
+          High threshold for the canny filter.
+      prompt (`str`):
+          The prompt or prompts to guide image generation.
+      ...
+```
+
+Now the pipeline takes `image` as input instead of `control_image`. Because blocks in a sequence share data automatically, the canny block's output (`control_image`) flows to the denoise block that needs it, and the canny block's input (`image`) becomes a pipeline input since no earlier block provides it.
+
+Create a pipeline from the modified blocks and load a ControlNet model.
+```py
+pipeline = blocks.init_pipeline("Qwen/Qwen-Image", components_manager=manager)
+
+pipeline.load_components(torch_dtype=torch.bfloat16)
+
+# Load the ControlNet model
+controlnet_spec = pipeline.get_component_spec("controlnet")
+controlnet_spec.pretrained_model_name_or_path = "InstantX/Qwen-Image-ControlNet-Union"
+controlnet = controlnet_spec.load(torch_dtype=torch.bfloat16)
+pipeline.update_components(controlnet=controlnet)
+```
+
+Now run the pipeline - the canny block preprocesses the image for ControlNet.
+```py
+from diffusers.utils import load_image
+
+prompt = "cat wizard with red hat, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney"
+image = load_image("https://github.com/Trgtuan10/Image_storage/blob/main/cute_cat.png?raw=true")
+
+output = pipeline(
+    prompt=prompt,
+    image=image,
+).images[0]
+output
+```
+
+## Next steps
+
+<hfoptions id="next">
+<hfoption id="Build custom blocks">
+
+Learn how to create your own blocks with custom logic in the [Building Custom Blocks](./custom_blocks) guide.
+
+</hfoption>
+<hfoption id="Share components">
+
+Use [`ComponentsManager`](./components_manager) to share models across multiple pipelines and manage memory efficiently.
+
+</hfoption>
+<hfoption id="Visual interface">
+
+Connect modular pipelines to [Mellon](https://github.com/cubiq/Mellon), a visual node-based interface for building workflows. Custom blocks built with Modular Diffusers work out of the box with Mellon - no UI code required. Read more in the Mellon guide.
+
+</hfoption>
+</hfoptions>
--- a/examples/custom_diffusion/test_custom_diffusion.py
+++ b/examples/custom_diffusion/test_custom_diffusion.py
@@ -17,9 +17,6 @@ import logging
 import os
 import sys
 import tempfile
-import unittest
-
-from diffusers.utils import is_transformers_version


 sys.path.append("..")
@@ -33,7 +30,6 @@ stream_handler = logging.StreamHandler(sys.stdout)
 logger.addHandler(stream_handler)


-@unittest.skipIf(is_transformers_version(">=", "4.57.5"), "Size mismatch")
 class CustomDiffusion(ExamplesTestsAccelerate):
    def test_custom_diffusion(self):
        with tempfile.TemporaryDirectory() as tmpdir:
--- a/src/diffusers/commands/fp16_safetensors.py
+++ b/src/diffusers/commands/fp16_safetensors.py
@@ -35,8 +35,8 @@ from . import BaseDiffusersCLICommand
 def conversion_command_factory(args: Namespace):
    if args.use_auth_token:
        warnings.warn(
-            "The `--use_auth_token` flag is deprecated and will be removed in a future version. Authentication is now"
-            " handled automatically if user is logged in."
+            "The `--use_auth_token` flag is deprecated and will be removed in a future version."
+            "Authentication is now handled automatically if the user is logged in."
        )
    return FP16SafetensorsCommand(args.ckpt_id, args.fp16, args.use_safetensors)

@@ -92,8 +92,8 @@ class FP16SafetensorsCommand(BaseDiffusersCLICommand):
        pipeline_class = getattr(import_module("diffusers"), pipeline_class_name)
        self.logger.info(f"Pipeline class imported: {pipeline_class_name}.")

-        # Load the appropriate pipeline. We could have use `DiffusionPipeline`
-        # here, but just to avoid any rough edge cases.
+        # Load the appropriate pipeline. We could have used `DiffusionPipeline`
+        # here, but just to avoid potential edge cases.
        pipeline = pipeline_class.from_pretrained(
            self.ckpt_id, torch_dtype=torch.float16 if self.fp16 else torch.float32
        )
--- a/src/diffusers/hooks/_common.py
+++ b/src/diffusers/hooks/_common.py
@@ -44,7 +44,6 @@ _GO_LC_SUPPORTED_PYTORCH_LAYERS = (
    torch.nn.ConvTranspose2d,
    torch.nn.ConvTranspose3d,
    torch.nn.Linear,
-    torch.nn.Embedding,
    # TODO(aryan): look into torch.nn.LayerNorm, torch.nn.GroupNorm later, seems to be causing some issues with CogVideoX
    # because of double invocation of the same norm layer in CogVideoXLayerNorm
 )
--- a/src/diffusers/loaders/textual_inversion.py
+++ b/src/diffusers/loaders/textual_inversion.py
@@ -21,12 +21,7 @@ from tokenizers import Tokenizer as TokenizerFast
 from torch import nn

 from ..models.modeling_utils import load_state_dict
-from ..utils import (
-    _get_model_file,
-    is_accelerate_available,
-    is_transformers_available,
-    logging,
-)
+from ..utils import _get_model_file, is_accelerate_available, is_transformers_available, logging


 if is_transformers_available():
--- a/src/diffusers/modular_pipelines/modular_pipeline.py
+++ b/src/diffusers/modular_pipelines/modular_pipeline.py
@@ -39,8 +39,11 @@ from .modular_pipeline_utils import (
    InputParam,
    InsertableDict,
    OutputParam,
+    combine_inputs,
+    combine_outputs,
    format_components,
    format_configs,
+    format_workflow,
    make_doc_string,
 )

@@ -243,6 +246,7 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin):

    config_name = "modular_config.json"
    model_name = None
+    _workflow_map = None

    @classmethod
    def _get_signature_keys(cls, obj):
@@ -298,6 +302,35 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin):
    def outputs(self) -> List[OutputParam]:
        return self._get_outputs()

+    # currentlyonly ConditionalPipelineBlocks and SequentialPipelineBlocks support `get_execution_blocks`
+    def get_execution_blocks(self, **kwargs):
+        """
+        Get the block(s) that would execute given the inputs. Must be implemented by subclasses that support
+        conditional block selection.
+
+        Args:
+            **kwargs: Input names and values. Only trigger inputs affect block selection.
+        """
+        raise NotImplementedError(f"`get_execution_blocks` is not implemented for {self.__class__.__name__}")
+
+    # currently only SequentialPipelineBlocks support workflows
+    @property
+    def workflow_names(self):
+        """
+        Returns a list of available workflow names. Must be implemented by subclasses that define `_workflow_map`.
+        """
+        raise NotImplementedError(f"`workflow_names` is not implemented for {self.__class__.__name__}")
+
+    def get_workflow(self, workflow_name: str):
+        """
+        Get the execution blocks for a specific workflow. Must be implemented by subclasses that define
+        `_workflow_map`.
+
+        Args:
+            workflow_name: Name of the workflow to retrieve.
+        """
+        raise NotImplementedError(f"`get_workflow` is not implemented for {self.__class__.__name__}")
+
    @classmethod
    def from_pretrained(
        cls,
@@ -435,72 +468,6 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin):
                    if current_value is not param:  # Using identity comparison to check if object was modified
                        state.set(param_name, param, input_param.kwargs_type)

-    @staticmethod
-    def combine_inputs(*named_input_lists: List[Tuple[str, List[InputParam]]]) -> List[InputParam]:
-        """
-        Combines multiple lists of InputParam objects from different blocks. For duplicate inputs, updates only if
-        current default value is None and new default value is not None. Warns if multiple non-None default values
-        exist for the same input.
-
-        Args:
-            named_input_lists: List of tuples containing (block_name, input_param_list) pairs
-
-        Returns:
-            List[InputParam]: Combined list of unique InputParam objects
-        """
-        combined_dict = {}  # name -> InputParam
-        value_sources = {}  # name -> block_name
-
-        for block_name, inputs in named_input_lists:
-            for input_param in inputs:
-                if input_param.name is None and input_param.kwargs_type is not None:
-                    input_name = "*_" + input_param.kwargs_type
-                else:
-                    input_name = input_param.name
-                if input_name in combined_dict:
-                    current_param = combined_dict[input_name]
-                    if (
-                        current_param.default is not None
-                        and input_param.default is not None
-                        and current_param.default != input_param.default
-                    ):
-                        warnings.warn(
-                            f"Multiple different default values found for input '{input_name}': "
-                            f"{current_param.default} (from block '{value_sources[input_name]}') and "
-                            f"{input_param.default} (from block '{block_name}'). Using {current_param.default}."
-                        )
-                    if current_param.default is None and input_param.default is not None:
-                        combined_dict[input_name] = input_param
-                        value_sources[input_name] = block_name
-                else:
-                    combined_dict[input_name] = input_param
-                    value_sources[input_name] = block_name
-
-        return list(combined_dict.values())
-
-    @staticmethod
-    def combine_outputs(*named_output_lists: List[Tuple[str, List[OutputParam]]]) -> List[OutputParam]:
-        """
-        Combines multiple lists of OutputParam objects from different blocks. For duplicate outputs, keeps the first
-        occurrence of each output name.
-
-        Args:
-            named_output_lists: List of tuples containing (block_name, output_param_list) pairs
-
-        Returns:
-            List[OutputParam]: Combined list of unique OutputParam objects
-        """
-        combined_dict = {}  # name -> OutputParam
-
-        for block_name, outputs in named_output_lists:
-            for output_param in outputs:
-                if (output_param.name not in combined_dict) or (
-                    combined_dict[output_param.name].kwargs_type is None and output_param.kwargs_type is not None
-                ):
-                    combined_dict[output_param.name] = output_param
-
-        return list(combined_dict.values())
-
    @property
    def input_names(self) -> List[str]:
        return [input_param.name for input_param in self.inputs if input_param.name is not None]
@@ -532,7 +499,8 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin):
 class ConditionalPipelineBlocks(ModularPipelineBlocks):
    """
    A Pipeline Blocks that conditionally selects a block to run based on the inputs. Subclasses must implement the
-    `select_block` method to define the logic for selecting the block.
+    `select_block` method to define the logic for selecting the block. Currently, we only support selection logic based
+    on the presence or absence of inputs (i.e., whether they are `None` or not)

    This class inherits from [`ModularPipelineBlocks`]. Check the superclass documentation for the generic methods the
    library implements for all the pipeline blocks (such as loading or saving etc.)
@@ -540,15 +508,20 @@ class ConditionalPipelineBlocks(ModularPipelineBlocks):
    > [!WARNING] > This is an experimental feature and is likely to change in the future.

    Attributes:
-        block_classes: List of block classes to be used
-        block_names: List of prefixes for each block
-        block_trigger_inputs: List of input names that select_block() uses to determine which block to run
+        block_classes: List of block classes to be used. Must have the same length as `block_names`.
+        block_names: List of names for each block. Must have the same length as `block_classes`.
+        block_trigger_inputs: List of input names that `select_block()` uses to determine which block to run.
+            For `ConditionalPipelineBlocks`, this does not need to correspond to `block_names` and `block_classes`. For
+            `AutoPipelineBlocks`, this must have the same length as `block_names` and `block_classes`, where each
+            element specifies the trigger input for the corresponding block.
+        default_block_name: Name of the default block to run when no trigger inputs match.
+            If None, this block can be skipped entirely when no trigger inputs are provided.
    """

    block_classes = []
    block_names = []
    block_trigger_inputs = []
-    default_block_name = None  # name of the default block if no trigger inputs are provided, if None, this block can be skipped if no trigger inputs are provided
+    default_block_name = None

    def __init__(self):
        sub_blocks = InsertableDict()
@@ -612,7 +585,7 @@ class ConditionalPipelineBlocks(ModularPipelineBlocks):
    @property
    def inputs(self) -> List[Tuple[str, Any]]:
        named_inputs = [(name, block.inputs) for name, block in self.sub_blocks.items()]
-        combined_inputs = self.combine_inputs(*named_inputs)
+        combined_inputs = combine_inputs(*named_inputs)
        # mark Required inputs only if that input is required by all the blocks
        for input_param in combined_inputs:
            if input_param.name in self.required_inputs:
@@ -624,15 +597,16 @@ class ConditionalPipelineBlocks(ModularPipelineBlocks):
    @property
    def intermediate_outputs(self) -> List[str]:
        named_outputs = [(name, block.intermediate_outputs) for name, block in self.sub_blocks.items()]
-        combined_outputs = self.combine_outputs(*named_outputs)
+        combined_outputs = combine_outputs(*named_outputs)
        return combined_outputs

    @property
    def outputs(self) -> List[str]:
        named_outputs = [(name, block.outputs) for name, block in self.sub_blocks.items()]
-        combined_outputs = self.combine_outputs(*named_outputs)
+        combined_outputs = combine_outputs(*named_outputs)
        return combined_outputs

+    # used for `__repr__`
    def _get_trigger_inputs(self) -> set:
        """
        Returns a set of all unique trigger input values found in this block and nested blocks.
@@ -661,11 +635,6 @@ class ConditionalPipelineBlocks(ModularPipelineBlocks):

        return all_triggers

-    @property
-    def trigger_inputs(self):
-        """All trigger inputs including from nested blocks."""
-        return self._get_trigger_inputs()
-
    def select_block(self, **kwargs) -> Optional[str]:
        """
        Select the block to run based on the trigger inputs. Subclasses must implement this method to define the logic
@@ -705,6 +674,39 @@ class ConditionalPipelineBlocks(ModularPipelineBlocks):
            logger.error(error_msg)
            raise

+    def get_execution_blocks(self, **kwargs) -> Optional["ModularPipelineBlocks"]:
+        """
+        Get the block(s) that would execute given the inputs.
+
+        Recursively resolves nested ConditionalPipelineBlocks until reaching either:
+        - A leaf block (no sub_blocks) → returns single `ModularPipelineBlocks`
+        - A `SequentialPipelineBlocks` → delegates to its `get_execution_blocks()` which returns
+        a `SequentialPipelineBlocks` containing the resolved execution blocks
+
+        Args:
+            **kwargs: Input names and values. Only trigger inputs affect block selection.
+
+        Returns:
+            - `ModularPipelineBlocks`: A leaf block or resolved `SequentialPipelineBlocks`
+            - `None`: If this block would be skipped (no trigger matched and no default)
+        """
+        trigger_kwargs = {name: kwargs.get(name) for name in self.block_trigger_inputs if name is not None}
+        block_name = self.select_block(**trigger_kwargs)
+
+        if block_name is None:
+            block_name = self.default_block_name
+
+        if block_name is None:
+            return None
+
+        block = self.sub_blocks[block_name]
+
+        # Recursively resolve until we hit a leaf block or a SequentialPipelineBlocks
+        if block.sub_blocks:
+            return block.get_execution_blocks(**kwargs)
+
+        return block
+
    def __repr__(self):
        class_name = self.__class__.__name__
        base_class = self.__class__.__bases__[0].__name__
@@ -712,11 +714,11 @@ class ConditionalPipelineBlocks(ModularPipelineBlocks):
            f"{class_name}(\n  Class: {base_class}\n" if base_class and base_class != "object" else f"{class_name}(\n"
        )

-        if self.trigger_inputs:
+        if self._get_trigger_inputs():
            header += "\n"
            header += "  " + "=" * 100 + "\n"
            header += "  This pipeline contains blocks that are selected at runtime based on inputs.\n"
-            header += f"  Trigger Inputs: {sorted(self.trigger_inputs)}\n"
+            header += f"  Trigger Inputs: {sorted(self._get_trigger_inputs())}\n"
            header += "  " + "=" * 100 + "\n\n"

        # Format description with proper indentation
@@ -783,24 +785,56 @@ class ConditionalPipelineBlocks(ModularPipelineBlocks):

 class AutoPipelineBlocks(ConditionalPipelineBlocks):
    """
-    A Pipeline Blocks that automatically selects a block to run based on the presence of trigger inputs.
+        A Pipeline Blocks that automatically selects a block to run based on the presence of trigger inputs.
+
+        This is a specialized version of `ConditionalPipelineBlocks` where:
+        - Each block has one corresponding trigger input (1:1 mapping)
+        - Block selection is automatic: the first block whose trigger input is present gets selected
+        - `block_trigger_inputs` must have the same length as `block_names` and `block_classes`
+        - Use `None` in `block_trigger_inputs` to specify the default block, i.e the block that will run if no trigger
+          inputs are present
+
+        Attributes:
+            block_classes:
+                List of block classes to be used. Must have the same length as `block_names` and
+                `block_trigger_inputs`.
+            block_names:
+                List of names for each block. Must have the same length as `block_classes` and `block_trigger_inputs`.
+            block_trigger_inputs:
+                List of input names where each element specifies the trigger input for the corresponding block. Use
+                `None` to mark the default block.
+
+        Example:
+    ```python
+        class MyAutoBlock(AutoPipelineBlocks):
+            block_classes = [InpaintEncoderBlock, ImageEncoderBlock, TextEncoderBlock]
+            block_names = ["inpaint", "img2img", "text2img"]
+            block_trigger_inputs = ["mask_image", "image", None]  # text2img is the default
+    ```
+
+        With this definition:
+        - As long as `mask_image` is provided, "inpaint" block runs (regardless of `image` being provided or not)
+        - If `mask_image` is not provided but `image` is provided, "img2img" block runs
+        - Otherwise, "text2img" block runs (default, trigger is `None`)
    """

    def __init__(self):
        super().__init__()

+        if self.default_block_name is not None:
+            raise ValueError(
+                f"In {self.__class__.__name__}, do not set `default_block_name` for AutoPipelineBlocks. "
+                f"Use `None` in `block_trigger_inputs` to specify the default block."
+            )
+
        if not (len(self.block_classes) == len(self.block_names) == len(self.block_trigger_inputs)):
            raise ValueError(
                f"In {self.__class__.__name__}, the number of block_classes, block_names, and block_trigger_inputs must be the same."
            )

-    @property
-    def default_block_name(self) -> Optional[str]:
-        """Derive default_block_name from block_trigger_inputs (None entry)."""
        if None in self.block_trigger_inputs:
            idx = self.block_trigger_inputs.index(None)
-            return self.block_names[idx]
-        return None
+            self.default_block_name = self.block_names[idx]

    def select_block(self, **kwargs) -> Optional[str]:
        """Select block based on which trigger input is present (not None)."""
@@ -854,6 +888,29 @@ class SequentialPipelineBlocks(ModularPipelineBlocks):
                    expected_configs.append(config)
        return expected_configs

+    @property
+    def workflow_names(self):
+        if self._workflow_map is None:
+            raise NotImplementedError(
+                f"workflows is not supported because _workflow_map is not set for {self.__class__.__name__}"
+            )
+
+        return list(self._workflow_map.keys())
+
+    def get_workflow(self, workflow_name: str):
+        if self._workflow_map is None:
+            raise NotImplementedError(
+                f"workflows is not supported because _workflow_map is not set for {self.__class__.__name__}"
+            )
+
+        if workflow_name not in self._workflow_map:
+            raise ValueError(f"Workflow {workflow_name} not found in {self.__class__.__name__}")
+
+        trigger_inputs = self._workflow_map[workflow_name]
+        workflow_blocks = self.get_execution_blocks(**trigger_inputs)
+
+        return workflow_blocks
+
    @classmethod
    def from_blocks_dict(
        cls, blocks_dict: Dict[str, Any], description: Optional[str] = None
@@ -949,7 +1006,7 @@ class SequentialPipelineBlocks(ModularPipelineBlocks):
            # filter out them here so they do not end up as intermediate_outputs
            if name not in inp_names:
                named_outputs.append((name, block.intermediate_outputs))
-        combined_outputs = self.combine_outputs(*named_outputs)
+        combined_outputs = combine_outputs(*named_outputs)
        return combined_outputs

    # YiYi TODO: I think we can remove the outputs property
@@ -973,6 +1030,7 @@ class SequentialPipelineBlocks(ModularPipelineBlocks):
                raise
        return pipeline, state

+    # used for `trigger_inputs` property
    def _get_trigger_inputs(self):
        """
        Returns a set of all unique trigger input values found in the blocks.
@@ -996,89 +1054,50 @@ class SequentialPipelineBlocks(ModularPipelineBlocks):

        return fn_recursive_get_trigger(self.sub_blocks)

-    @property
-    def trigger_inputs(self):
-        return self._get_trigger_inputs()
-
-    def _traverse_trigger_blocks(self, active_inputs):
+    def get_execution_blocks(self, **kwargs) -> "SequentialPipelineBlocks":
        """
-        Traverse blocks and select which ones would run given the active inputs.
+        Get the blocks that would execute given the specified inputs.

        Args:
-            active_inputs: Dict of input names to values that are "present"
+            **kwargs: Input names and values. Only trigger inputs affect block selection.

        Returns:
-            OrderedDict of block_name -> block that would execute
+            SequentialPipelineBlocks containing only the blocks that would execute
        """
+        # Copy kwargs so we can add outputs as we traverse
+        active_inputs = dict(kwargs)

        def fn_recursive_traverse(block, block_name, active_inputs):
            result_blocks = OrderedDict()

            # ConditionalPipelineBlocks (includes AutoPipelineBlocks)
            if isinstance(block, ConditionalPipelineBlocks):
-                trigger_kwargs = {name: active_inputs.get(name) for name in block.block_trigger_inputs}
-                selected_block_name = block.select_block(**trigger_kwargs)
-
-                if selected_block_name is None:
-                    selected_block_name = block.default_block_name
-
-                if selected_block_name is None:
+                block = block.get_execution_blocks(**active_inputs)
+                if block is None:
                    return result_blocks

-                selected_block = block.sub_blocks[selected_block_name]
-
-                if selected_block.sub_blocks:
-                    result_blocks.update(fn_recursive_traverse(selected_block, block_name, active_inputs))
-                else:
-                    result_blocks[block_name] = selected_block
-                    if hasattr(selected_block, "outputs"):
-                        for out in selected_block.outputs:
-                            active_inputs[out.name] = True
-
-                return result_blocks
-
-            # SequentialPipelineBlocks or LoopSequentialPipelineBlocks
-            if block.sub_blocks:
+            # Has sub_blocks (SequentialPipelineBlocks/ConditionalPipelineBlocks)
+            if block.sub_blocks and not isinstance(block, LoopSequentialPipelineBlocks):
                for sub_block_name, sub_block in block.sub_blocks.items():
-                    blocks_to_update = fn_recursive_traverse(sub_block, sub_block_name, active_inputs)
-                    blocks_to_update = {f"{block_name}.{k}": v for k, v in blocks_to_update.items()}
-                    result_blocks.update(blocks_to_update)
+                    nested_blocks = fn_recursive_traverse(sub_block, sub_block_name, active_inputs)
+                    nested_blocks = {f"{block_name}.{k}": v for k, v in nested_blocks.items()}
+                    result_blocks.update(nested_blocks)
            else:
+                # Leaf block: single ModularPipelineBlocks or LoopSequentialPipelineBlocks
                result_blocks[block_name] = block
-                if hasattr(block, "outputs"):
-                    for out in block.outputs:
+                # Add outputs to active_inputs so subsequent blocks can use them as triggers
+                if hasattr(block, "intermediate_outputs"):
+                    for out in block.intermediate_outputs:
                        active_inputs[out.name] = True

            return result_blocks

        all_blocks = OrderedDict()
        for block_name, block in self.sub_blocks.items():
-            blocks_to_update = fn_recursive_traverse(block, block_name, active_inputs)
-            all_blocks.update(blocks_to_update)
-        return all_blocks
+            nested_blocks = fn_recursive_traverse(block, block_name, active_inputs)
+            all_blocks.update(nested_blocks)

-    def get_execution_blocks(self, **kwargs):
-        """
-        Get the blocks that would execute given the specified inputs.
-
-        Args:
-            **kwargs: Input names and values. Only trigger inputs affect block selection.
-                    Pass any inputs that would be non-None at runtime.
-
-        Returns:
-            SequentialPipelineBlocks containing only the blocks that would execute
-
-        Example:
-            # Get blocks for inpainting workflow blocks = pipeline.get_execution_blocks(prompt="a cat", mask=mask,
-            image=image)
-
-            # Get blocks for text2image workflow blocks = pipeline.get_execution_blocks(prompt="a cat")
-        """
-        # Filter out None values
-        active_inputs = {k: v for k, v in kwargs.items() if v is not None}
-
-        blocks_triggered = self._traverse_trigger_blocks(active_inputs)
-        return SequentialPipelineBlocks.from_blocks_dict(blocks_triggered)
+        return SequentialPipelineBlocks.from_blocks_dict(all_blocks)

    def __repr__(self):
        class_name = self.__class__.__name__
@@ -1087,18 +1106,23 @@ class SequentialPipelineBlocks(ModularPipelineBlocks):
            f"{class_name}(\n  Class: {base_class}\n" if base_class and base_class != "object" else f"{class_name}(\n"
        )

-        if self.trigger_inputs:
+        if self._workflow_map is None and self._get_trigger_inputs():
            header += "\n"
            header += "  " + "=" * 100 + "\n"
            header += "  This pipeline contains blocks that are selected at runtime based on inputs.\n"
-            header += f"  Trigger Inputs: {[inp for inp in self.trigger_inputs if inp is not None]}\n"
+            header += f"  Trigger Inputs: {[inp for inp in self._get_trigger_inputs() if inp is not None]}\n"
            # Get first trigger input as example
-            example_input = next(t for t in self.trigger_inputs if t is not None)
+            example_input = next(t for t in self._get_trigger_inputs() if t is not None)
            header += f"  Use `get_execution_blocks()` to see selected blocks (e.g. `get_execution_blocks({example_input}=...)`).\n"
            header += "  " + "=" * 100 + "\n\n"

+        description = self.description
+        if self._workflow_map is not None:
+            workflow_str = format_workflow(self._workflow_map)
+            description = f"{self.description}\n\n{workflow_str}"
+
        # Format description with proper indentation
-        desc_lines = self.description.split("\n")
+        desc_lines = description.split("\n")
        desc = []
        # First line with "Description:" label
        desc.append(f"  Description: {desc_lines[0]}")
@@ -1146,10 +1170,15 @@ class SequentialPipelineBlocks(ModularPipelineBlocks):

    @property
    def doc(self):
+        description = self.description
+        if self._workflow_map is not None:
+            workflow_str = format_workflow(self._workflow_map)
+            description = f"{self.description}\n\n{workflow_str}"
+
        return make_doc_string(
            self.inputs,
            self.outputs,
-            self.description,
+            description=description,
            class_name=self.__class__.__name__,
            expected_components=self.expected_components,
            expected_configs=self.expected_configs,
@@ -1282,7 +1311,7 @@ class LoopSequentialPipelineBlocks(ModularPipelineBlocks):
    @property
    def intermediate_outputs(self) -> List[str]:
        named_outputs = [(name, block.intermediate_outputs) for name, block in self.sub_blocks.items()]
-        combined_outputs = self.combine_outputs(*named_outputs)
+        combined_outputs = combine_outputs(*named_outputs)
        for output in self.loop_intermediate_outputs:
            if output.name not in {output.name for output in combined_outputs}:
                combined_outputs.append(output)
--- a/src/diffusers/modular_pipelines/modular_pipeline_utils.py
+++ b/src/diffusers/modular_pipelines/modular_pipeline_utils.py
@@ -14,9 +14,10 @@

 import inspect
 import re
+import warnings
 from collections import OrderedDict
 from dataclasses import dataclass, field, fields
-from typing import Any, Dict, List, Literal, Optional, Type, Union
+from typing import Any, Dict, List, Literal, Optional, Tuple, Type, Union

 import PIL.Image
 import torch
@@ -860,6 +861,30 @@ def format_configs(configs, indent_level=4, max_line_length=115, add_empty_lines
    return "\n".join(formatted_configs)


+def format_workflow(workflow_map):
+    """Format a workflow map into a readable string representation.
+
+    Args:
+        workflow_map: Dictionary mapping workflow names to trigger inputs
+
+    Returns:
+        A formatted string representing all workflows
+    """
+    if workflow_map is None:
+        return ""
+
+    lines = ["Supported workflows:"]
+    for workflow_name, trigger_inputs in workflow_map.items():
+        required_inputs = [k for k, v in trigger_inputs.items() if v]
+        if required_inputs:
+            inputs_str = ", ".join(f"`{t}`" for t in required_inputs)
+            lines.append(f"  - `{workflow_name}`: requires {inputs_str}")
+        else:
+            lines.append(f"  - `{workflow_name}`: default (no additional inputs required)")
+
+    return "\n".join(lines)
+
+
 def make_doc_string(
    inputs,
    outputs,
@@ -914,3 +939,69 @@ def make_doc_string(
    output += format_output_params(outputs, indent_level=2)

    return output
+
+
+def combine_inputs(*named_input_lists: List[Tuple[str, List[InputParam]]]) -> List[InputParam]:
+    """
+    Combines multiple lists of InputParam objects from different blocks. For duplicate inputs, updates only if current
+    default value is None and new default value is not None. Warns if multiple non-None default values exist for the
+    same input.
+
+    Args:
+        named_input_lists: List of tuples containing (block_name, input_param_list) pairs
+
+    Returns:
+        List[InputParam]: Combined list of unique InputParam objects
+    """
+    combined_dict = {}  # name -> InputParam
+    value_sources = {}  # name -> block_name
+
+    for block_name, inputs in named_input_lists:
+        for input_param in inputs:
+            if input_param.name is None and input_param.kwargs_type is not None:
+                input_name = "*_" + input_param.kwargs_type
+            else:
+                input_name = input_param.name
+            if input_name in combined_dict:
+                current_param = combined_dict[input_name]
+                if (
+                    current_param.default is not None
+                    and input_param.default is not None
+                    and current_param.default != input_param.default
+                ):
+                    warnings.warn(
+                        f"Multiple different default values found for input '{input_name}': "
+                        f"{current_param.default} (from block '{value_sources[input_name]}') and "
+                        f"{input_param.default} (from block '{block_name}'). Using {current_param.default}."
+                    )
+                if current_param.default is None and input_param.default is not None:
+                    combined_dict[input_name] = input_param
+                    value_sources[input_name] = block_name
+            else:
+                combined_dict[input_name] = input_param
+                value_sources[input_name] = block_name
+
+    return list(combined_dict.values())
+
+
+def combine_outputs(*named_output_lists: List[Tuple[str, List[OutputParam]]]) -> List[OutputParam]:
+    """
+    Combines multiple lists of OutputParam objects from different blocks. For duplicate outputs, keeps the first
+    occurrence of each output name.
+
+    Args:
+        named_output_lists: List of tuples containing (block_name, output_param_list) pairs
+
+    Returns:
+        List[OutputParam]: Combined list of unique OutputParam objects
+    """
+    combined_dict = {}  # name -> OutputParam
+
+    for block_name, outputs in named_output_lists:
+        for output_param in outputs:
+            if (output_param.name not in combined_dict) or (
+                combined_dict[output_param.name].kwargs_type is None and output_param.kwargs_type is not None
+            ):
+                combined_dict[output_param.name] = output_param
+
+    return list(combined_dict.values())
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py
@@ -1113,10 +1113,14 @@ AUTO_BLOCKS = InsertableDict(
 class QwenImageAutoBlocks(SequentialPipelineBlocks):
    """
    Auto Modular pipeline for text-to-image, image-to-image, inpainting, and controlnet tasks using QwenImage.
-      - for image-to-image generation, you need to provide `image`
-      - for inpainting, you need to provide `mask_image` and `image`, optionally you can provide `padding_mask_crop`.
-      - to run the controlnet workflow, you need to provide `control_image`
-      - for text-to-image generation, all you need to provide is `prompt`
+
+      Supported workflows:
+        - `text2image`: requires `prompt`
+        - `image2image`: requires `prompt`, `image`
+        - `inpainting`: requires `prompt`, `mask_image`, `image`
+        - `controlnet_text2image`: requires `prompt`, `control_image`
+        - `controlnet_image2image`: requires `prompt`, `image`, `control_image`
+        - `controlnet_inpainting`: requires `prompt`, `mask_image`, `image`, `control_image`

      Components:
          text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use tokenizer (`Qwen2Tokenizer`):
@@ -1197,15 +1201,24 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
    block_classes = AUTO_BLOCKS.values()
    block_names = AUTO_BLOCKS.keys()

+    # Workflow map defines the trigger conditions for each workflow.
+    # How to define:
+    #   - Only include required inputs and trigger inputs (inputs that determine which blocks run)
+    #   - `True` means the workflow triggers when the input is not None (most common case)
+    #   - Use specific values (e.g., `{"strength": 0.5}`) if your `select_block` logic depends on the value
+
+    _workflow_map = {
+        "text2image": {"prompt": True},
+        "image2image": {"prompt": True, "image": True},
+        "inpainting": {"prompt": True, "mask_image": True, "image": True},
+        "controlnet_text2image": {"prompt": True, "control_image": True},
+        "controlnet_image2image": {"prompt": True, "image": True, "control_image": True},
+        "controlnet_inpainting": {"prompt": True, "mask_image": True, "image": True, "control_image": True},
+    }
+
    @property
    def description(self):
-        return (
-            "Auto Modular pipeline for text-to-image, image-to-image, inpainting, and controlnet tasks using QwenImage.\n"
-            + "- for image-to-image generation, you need to provide `image`\n"
-            + "- for inpainting, you need to provide `mask_image` and `image`, optionally you can provide `padding_mask_crop`.\n"
-            + "- to run the controlnet workflow, you need to provide `control_image`\n"
-            + "- for text-to-image generation, all you need to provide is `prompt`"
-        )
+        return "Auto Modular pipeline for text-to-image, image-to-image, inpainting, and controlnet tasks using QwenImage."

    @property
    def outputs(self):
--- a/src/diffusers/pipelines/cosmos/pipeline_cosmos2_5_predict.py
+++ b/src/diffusers/pipelines/cosmos/pipeline_cosmos2_5_predict.py
@@ -287,9 +287,6 @@ class Cosmos2_5_PredictBasePipeline(DiffusionPipeline):
                truncation=True,
                padding="max_length",
            )
-            input_ids = (
-                input_ids["input_ids"] if not isinstance(input_ids, list) and "input_ids" in input_ids else input_ids
-            )
            input_ids = torch.LongTensor(input_ids)
            input_ids_batch.append(input_ids)

--- a/src/diffusers/pipelines/glm_image/pipeline_glm_image.py
+++ b/src/diffusers/pipelines/glm_image/pipeline_glm_image.py
@@ -407,8 +407,8 @@ class GlmImagePipeline(DiffusionPipeline):

            if len(source_grids) > 0:
                prior_token_image_embed = self.vision_language_encoder.get_image_features(
-                    inputs["pixel_values"], source_grids, return_dict=False
-                )
+                    inputs["pixel_values"], source_grids
+                ).pooler_output
                prior_token_image_embed = torch.cat(prior_token_image_embed, dim=0)
                prior_token_image_ids_d32 = self.vision_language_encoder.get_image_tokens(
                    prior_token_image_embed, source_grids
--- a/src/diffusers/pipelines/kandinsky/text_encoder.py
+++ b/src/diffusers/pipelines/kandinsky/text_encoder.py
@@ -20,8 +20,6 @@ class MultilingualCLIP(PreTrainedModel):
        self.LinearTransformation = torch.nn.Linear(
            in_features=config.transformerDimensions, out_features=config.numDims
        )
-        if hasattr(self, "post_init"):
-            self.post_init()

    def forward(self, input_ids, attention_mask):
        embs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)[0]
--- a/src/diffusers/pipelines/kolors/text_encoder.py
+++ b/src/diffusers/pipelines/kolors/text_encoder.py
@@ -782,9 +782,6 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
            self.prefix_encoder = PrefixEncoder(config)
            self.dropout = torch.nn.Dropout(0.1)

-        if hasattr(self, "post_init"):
-            self.post_init()
-
    def get_input_embeddings(self):
        return self.embedding.word_embeddings

@@ -814,7 +811,7 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
        )
-        use_cache = use_cache if use_cache is not None else getattr(self.config, "use_cache", None)
+        use_cache = use_cache if use_cache is not None else self.config.use_cache
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        batch_size, seq_length = input_ids.shape
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -340,7 +340,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
            save_method_accept_safe = "safe_serialization" in save_method_signature.parameters
            save_method_accept_variant = "variant" in save_method_signature.parameters
            save_method_accept_max_shard_size = "max_shard_size" in save_method_signature.parameters
-            save_method_accept_peft_format = "save_peft_format" in save_method_signature.parameters

            save_kwargs = {}
            if save_method_accept_safe:
@@ -350,11 +349,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
            if save_method_accept_max_shard_size and max_shard_size is not None:
                # max_shard_size is expected to not be None in ModelMixin
                save_kwargs["max_shard_size"] = max_shard_size
-            if save_method_accept_peft_format:
-                # Set save_peft_format=False for transformers>=5.0.0 compatibility
-                # In transformers 5.0.0+, the default save_peft_format=True adds "base_model.model" prefix
-                # to adapter keys, but from_pretrained expects keys without this prefix
-                save_kwargs["save_peft_format"] = False

            save_method(os.path.join(save_directory, pipeline_component_name), **save_kwargs)

--- a/src/diffusers/pipelines/wan/pipeline_wan.py
+++ b/src/diffusers/pipelines/wan/pipeline_wan.py
@@ -496,8 +496,13 @@ class WanPipeline(DiffusionPipeline, WanLoraLoaderMixin):
            num_frames = num_frames // self.vae_scale_factor_temporal * self.vae_scale_factor_temporal + 1
        num_frames = max(num_frames, 1)

-        h_multiple_of = self.vae_scale_factor_spatial * self.transformer.config.patch_size[1]
-        w_multiple_of = self.vae_scale_factor_spatial * self.transformer.config.patch_size[2]
+        patch_size = (
+            self.transformer.config.patch_size
+            if self.transformer is not None
+            else self.transformer_2.config.patch_size
+        )
+        h_multiple_of = self.vae_scale_factor_spatial * patch_size[1]
+        w_multiple_of = self.vae_scale_factor_spatial * patch_size[2]
        calc_height = height // h_multiple_of * h_multiple_of
        calc_width = width // w_multiple_of * w_multiple_of
        if height != calc_height or width != calc_width:
--- a/src/diffusers/pipelines/wan/pipeline_wan_i2v.py
+++ b/src/diffusers/pipelines/wan/pipeline_wan_i2v.py
@@ -637,8 +637,13 @@ class WanImageToVideoPipeline(DiffusionPipeline, WanLoraLoaderMixin):
            num_frames = num_frames // self.vae_scale_factor_temporal * self.vae_scale_factor_temporal + 1
        num_frames = max(num_frames, 1)

-        h_multiple_of = self.vae_scale_factor_spatial * self.transformer.config.patch_size[1]
-        w_multiple_of = self.vae_scale_factor_spatial * self.transformer.config.patch_size[2]
+        patch_size = (
+            self.transformer.config.patch_size
+            if self.transformer is not None
+            else self.transformer_2.config.patch_size
+        )
+        h_multiple_of = self.vae_scale_factor_spatial * patch_size[1]
+        w_multiple_of = self.vae_scale_factor_spatial * patch_size[2]
        calc_height = height // h_multiple_of * h_multiple_of
        calc_width = width // w_multiple_of * w_multiple_of
        if height != calc_height or width != calc_width:
--- a/src/diffusers/utils/import_utils.py
+++ b/src/diffusers/utils/import_utils.py
@@ -227,7 +227,7 @@ _cosmos_guardrail_available, _cosmos_guardrail_version = _is_package_available("
 _sageattention_available, _sageattention_version = _is_package_available("sageattention")
 _flash_attn_available, _flash_attn_version = _is_package_available("flash_attn")
 _flash_attn_3_available, _flash_attn_3_version = _is_package_available("flash_attn_3")
-_aiter_available, _aiter_version = _is_package_available("aiter")
+_aiter_available, _aiter_version = _is_package_available("aiter", get_dist_name=True)
 _kornia_available, _kornia_version = _is_package_available("kornia")
 _nvidia_modelopt_available, _nvidia_modelopt_version = _is_package_available("modelopt", get_dist_name=True)
 _av_available, _av_version = _is_package_available("av")
--- a/tests/models/test_models_auto.py
+++ b/tests/models/test_models_auto.py
@@ -20,9 +20,7 @@ class TestAutoModel(unittest.TestCase):
        side_effect=[EnvironmentError("File not found"), {"model_type": "clip_text_model"}],
    )
    def test_load_from_config_transformers_with_subfolder(self, mock_load_config):
-        model = AutoModel.from_pretrained(
-            "hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder", use_safetensors=False
-        )
+        model = AutoModel.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder")
        assert isinstance(model, CLIPTextModel)

    def test_load_from_config_without_subfolder(self):
@@ -30,7 +28,5 @@ class TestAutoModel(unittest.TestCase):
        assert isinstance(model, LongformerModel)

    def test_load_from_model_index(self):
-        model = AutoModel.from_pretrained(
-            "hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder", use_safetensors=False
-        )
+        model = AutoModel.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder")
        assert isinstance(model, CLIPTextModel)
--- a/tests/pipelines/bria/test_pipeline_bria.py
+++ b/tests/pipelines/bria/test_pipeline_bria.py
@@ -19,7 +19,7 @@ import unittest
 import numpy as np
 import torch
 from huggingface_hub import hf_hub_download
-from transformers import AutoConfig, T5EncoderModel, T5TokenizerFast
+from transformers import T5EncoderModel, T5TokenizerFast

 from diffusers import (
    AutoencoderKL,
@@ -89,8 +89,7 @@ class BriaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        scheduler = FlowMatchEulerDiscreteScheduler()

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = T5TokenizerFast.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/chroma/test_pipeline_chroma.py
+++ b/tests/pipelines/chroma/test_pipeline_chroma.py
@@ -2,7 +2,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, ChromaPipeline, ChromaTransformer2DModel, FlowMatchEulerDiscreteScheduler

@@ -41,8 +41,7 @@ class ChromaPipelineFastTests(
        )

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

--- a/tests/pipelines/chroma/test_pipeline_chroma_img2img.py
+++ b/tests/pipelines/chroma/test_pipeline_chroma_img2img.py
@@ -3,7 +3,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, ChromaImg2ImgPipeline, ChromaTransformer2DModel, FlowMatchEulerDiscreteScheduler

@@ -42,8 +42,7 @@ class ChromaImg2ImgPipelineFastTests(
        )

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

--- a/tests/pipelines/chronoedit/test_chronoedit.py
+++ b/tests/pipelines/chronoedit/test_chronoedit.py
@@ -17,7 +17,6 @@ import unittest
 import torch
 from PIL import Image
 from transformers import (
-    AutoConfig,
    AutoTokenizer,
    CLIPImageProcessor,
    CLIPVisionConfig,
@@ -72,8 +71,7 @@ class ChronoEditPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        torch.manual_seed(0)
        # TODO: impl FlowDPMSolverMultistepScheduler
        scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/cogvideo/test_cogvideox.py
+++ b/tests/pipelines/cogvideo/test_cogvideox.py
@@ -18,7 +18,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLCogVideoX, CogVideoXPipeline, CogVideoXTransformer3DModel, DDIMScheduler

@@ -117,8 +117,7 @@ class CogVideoXPipelineFastTests(

        torch.manual_seed(0)
        scheduler = DDIMScheduler()
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/cogvideo/test_cogvideox_fun_control.py
+++ b/tests/pipelines/cogvideo/test_cogvideox_fun_control.py
@@ -18,7 +18,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLCogVideoX, CogVideoXFunControlPipeline, CogVideoXTransformer3DModel, DDIMScheduler

@@ -104,8 +104,7 @@ class CogVideoXFunControlPipelineFastTests(PipelineTesterMixin, unittest.TestCas

        torch.manual_seed(0)
        scheduler = DDIMScheduler()
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/cogvideo/test_cogvideox_image2video.py
+++ b/tests/pipelines/cogvideo/test_cogvideox_image2video.py
@@ -19,7 +19,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLCogVideoX, CogVideoXImageToVideoPipeline, CogVideoXTransformer3DModel, DDIMScheduler
 from diffusers.utils import load_image
@@ -113,8 +113,7 @@ class CogVideoXImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC

        torch.manual_seed(0)
        scheduler = DDIMScheduler()
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/cogvideo/test_cogvideox_video2video.py
+++ b/tests/pipelines/cogvideo/test_cogvideox_video2video.py
@@ -18,7 +18,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel, CogVideoXVideoToVideoPipeline, DDIMScheduler

@@ -99,8 +99,7 @@ class CogVideoXVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC

        torch.manual_seed(0)
        scheduler = DDIMScheduler()
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/cogview3/test_cogview3plus.py
+++ b/tests/pipelines/cogview3/test_cogview3plus.py
@@ -18,7 +18,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, CogVideoXDDIMScheduler, CogView3PlusPipeline, CogView3PlusTransformer2DModel

@@ -89,8 +89,7 @@ class CogView3PlusPipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = CogVideoXDDIMScheduler()
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/cogview4/test_cogview4.py
+++ b/tests/pipelines/cogview4/test_cogview4.py
@@ -108,7 +108,7 @@ class CogView4PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
            generator = torch.Generator(device=device).manual_seed(seed)
        inputs = {
            "prompt": "dance monkey",
-            "negative_prompt": "bad",
+            "negative_prompt": "",
            "generator": generator,
            "num_inference_steps": 2,
            "guidance_scale": 6.0,
--- a/tests/pipelines/consisid/test_consisid.py
+++ b/tests/pipelines/consisid/test_consisid.py
@@ -19,7 +19,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLCogVideoX, ConsisIDPipeline, ConsisIDTransformer3DModel, DDIMScheduler
 from diffusers.utils import load_image
@@ -122,8 +122,7 @@ class ConsisIDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = DDIMScheduler()
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/controlnet_flux/test_controlnet_flux.py
+++ b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
@@ -19,7 +19,7 @@ import unittest
 import numpy as np
 import torch
 from huggingface_hub import hf_hub_download
-from transformers import AutoConfig, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
+from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast

 from diffusers import (
    AutoencoderKL,
@@ -97,8 +97,7 @@ class FluxControlNetPipelineFastTests(unittest.TestCase, PipelineTesterMixin, Fl
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = T5TokenizerFast.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py
+++ b/tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py
@@ -2,7 +2,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -13,7 +13,9 @@ from diffusers import (
 )
 from diffusers.utils.torch_utils import randn_tensor

-from ...testing_utils import torch_device
+from ...testing_utils import (
+    torch_device,
+)
 from ..test_pipelines_common import PipelineTesterMixin, check_qkv_fused_layers_exist


@@ -68,8 +70,7 @@ class FluxControlNetImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMi
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/controlnet_flux/test_controlnet_flux_inpaint.py
+++ b/tests/pipelines/controlnet_flux/test_controlnet_flux_inpaint.py
@@ -3,7 +3,15 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+
+# torch_device,  # {{ edit_1 }} Removed unused import
+from transformers import (
+    AutoTokenizer,
+    CLIPTextConfig,
+    CLIPTextModel,
+    CLIPTokenizer,
+    T5EncoderModel,
+)

 from diffusers import (
    AutoencoderKL,
@@ -14,7 +22,11 @@ from diffusers import (
 )
 from diffusers.utils.torch_utils import randn_tensor

-from ...testing_utils import enable_full_determinism, floats_tensor, torch_device
+from ...testing_utils import (
+    enable_full_determinism,
+    floats_tensor,
+    torch_device,
+)
 from ..test_pipelines_common import PipelineTesterMixin


@@ -73,8 +85,7 @@ class FluxControlNetInpaintPipelineTests(unittest.TestCase, PipelineTesterMixin)
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/controlnet_hunyuandit/test_controlnet_hunyuandit.py
+++ b/tests/pipelines/controlnet_hunyuandit/test_controlnet_hunyuandit.py
@@ -18,7 +18,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel
+from transformers import AutoTokenizer, BertModel, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -96,10 +96,7 @@ class HunyuanDiTControlNetPipelineFastTests(unittest.TestCase, PipelineTesterMix
        scheduler = DDPMScheduler()
        text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel")
-
-        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/controlnet_sd3/test_controlnet_inpaint_sd3.py
+++ b/tests/pipelines/controlnet_sd3/test_controlnet_inpaint_sd3.py
@@ -17,14 +17,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import (
-    AutoConfig,
-    AutoTokenizer,
-    CLIPTextConfig,
-    CLIPTextModelWithProjection,
-    CLIPTokenizer,
-    T5EncoderModel,
-)
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -35,7 +28,10 @@ from diffusers import (
 from diffusers.models import SD3ControlNetModel
 from diffusers.utils.torch_utils import randn_tensor

-from ...testing_utils import enable_full_determinism, torch_device
+from ...testing_utils import (
+    enable_full_determinism,
+    torch_device,
+)
 from ..test_pipelines_common import PipelineTesterMixin


@@ -107,8 +103,7 @@ class StableDiffusion3ControlInpaintNetPipelineFastTests(unittest.TestCase, Pipe
        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_3 = T5EncoderModel(config)
+        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
--- a/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
+++ b/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
@@ -19,14 +19,7 @@ from typing import Optional

 import numpy as np
 import torch
-from transformers import (
-    AutoConfig,
-    AutoTokenizer,
-    CLIPTextConfig,
-    CLIPTextModelWithProjection,
-    CLIPTokenizer,
-    T5EncoderModel,
-)
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -125,8 +118,7 @@ class StableDiffusion3ControlNetPipelineFastTests(unittest.TestCase, PipelineTes
        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_3 = T5EncoderModel(config)
+        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
--- a/tests/pipelines/cosmos/test_cosmos.py
+++ b/tests/pipelines/cosmos/test_cosmos.py
@@ -20,7 +20,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLCosmos, CosmosTextToWorldPipeline, CosmosTransformer3DModel, EDMEulerScheduler

@@ -107,8 +107,7 @@ class CosmosTextToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
            rho=7.0,
            final_sigmas_type="sigma_min",
        )
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/cosmos/test_cosmos2_text2image.py
+++ b/tests/pipelines/cosmos/test_cosmos2_text2image.py
@@ -20,7 +20,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLWan,
@@ -95,8 +95,7 @@ class Cosmos2TextToImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase

        torch.manual_seed(0)
        scheduler = FlowMatchEulerDiscreteScheduler(use_karras_sigmas=True)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/cosmos/test_cosmos2_video2world.py
+++ b/tests/pipelines/cosmos/test_cosmos2_video2world.py
@@ -21,7 +21,7 @@ import unittest
 import numpy as np
 import PIL.Image
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLWan,
@@ -96,8 +96,7 @@ class Cosmos2VideoToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCas

        torch.manual_seed(0)
        scheduler = FlowMatchEulerDiscreteScheduler(use_karras_sigmas=True)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/cosmos/test_cosmos_video2world.py
+++ b/tests/pipelines/cosmos/test_cosmos_video2world.py
@@ -21,7 +21,7 @@ import unittest
 import numpy as np
 import PIL.Image
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLCosmos, CosmosTransformer3DModel, CosmosVideoToWorldPipeline, EDMEulerScheduler

@@ -108,8 +108,7 @@ class CosmosVideoToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCase
            rho=7.0,
            final_sigmas_type="sigma_min",
        )
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/deepfloyd_if/init.py
+++ b/tests/pipelines/deepfloyd_if/init.py
@@ -2,7 +2,7 @@ import tempfile

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import DDPMScheduler, UNet2DConditionModel
 from diffusers.models.attention_processor import AttnAddedKVProcessor
@@ -18,8 +18,7 @@ from ..test_pipelines_common import to_np
 class IFPipelineTesterMixin:
    def _get_dummy_components(self):
        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
@@ -76,8 +75,7 @@ class IFPipelineTesterMixin:

    def _get_superresolution_dummy_components(self):
        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/deepfloyd_if/test_if.py
+++ b/tests/pipelines/deepfloyd_if/test_if.py
@@ -18,7 +18,9 @@ import unittest

 import torch

-from diffusers import IFPipeline
+from diffusers import (
+    IFPipeline,
+)
 from diffusers.models.attention_processor import AttnAddedKVProcessor
 from diffusers.utils.import_utils import is_xformers_available

--- a/tests/pipelines/flux/test_pipeline_flux.py
+++ b/tests/pipelines/flux/test_pipeline_flux.py
@@ -4,7 +4,7 @@ import unittest
 import numpy as np
 import torch
 from huggingface_hub import hf_hub_download
-from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -91,8 +91,7 @@ class FluxPipelineFastTests(
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/flux/test_pipeline_flux_control.py
+++ b/tests/pipelines/flux/test_pipeline_flux_control.py
@@ -3,7 +3,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxControlPipeline, FluxTransformer2DModel

@@ -53,8 +53,7 @@ class FluxControlPipelineFastTests(unittest.TestCase, PipelineTesterMixin):
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/flux/test_pipeline_flux_control_img2img.py
+++ b/tests/pipelines/flux/test_pipeline_flux_control_img2img.py
@@ -3,7 +3,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -57,8 +57,7 @@ class FluxControlImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMixin
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/flux/test_pipeline_flux_control_inpaint.py
+++ b/tests/pipelines/flux/test_pipeline_flux_control_inpaint.py
@@ -3,7 +3,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -58,8 +58,7 @@ class FluxControlInpaintPipelineFastTests(unittest.TestCase, PipelineTesterMixin
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/flux/test_pipeline_flux_fill.py
+++ b/tests/pipelines/flux/test_pipeline_flux_fill.py
@@ -3,7 +3,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxFillPipeline, FluxTransformer2DModel

@@ -58,8 +58,7 @@ class FluxFillPipelineFastTests(unittest.TestCase, PipelineTesterMixin):
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/flux/test_pipeline_flux_img2img.py
+++ b/tests/pipelines/flux/test_pipeline_flux_img2img.py
@@ -3,7 +3,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxImg2ImgPipeline, FluxTransformer2DModel

@@ -55,8 +55,7 @@ class FluxImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMixin, FluxI
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/flux/test_pipeline_flux_inpaint.py
+++ b/tests/pipelines/flux/test_pipeline_flux_inpaint.py
@@ -3,7 +3,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxInpaintPipeline, FluxTransformer2DModel

@@ -55,8 +55,7 @@ class FluxInpaintPipelineFastTests(unittest.TestCase, PipelineTesterMixin, FluxI
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/flux/test_pipeline_flux_kontext.py
+++ b/tests/pipelines/flux/test_pipeline_flux_kontext.py
@@ -3,7 +3,7 @@ import unittest
 import numpy as np
 import PIL.Image
 import torch
-from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -79,8 +79,7 @@ class FluxKontextPipelineFastTests(
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/flux/test_pipeline_flux_kontext_inpaint.py
+++ b/tests/pipelines/flux/test_pipeline_flux_kontext_inpaint.py
@@ -3,7 +3,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -79,8 +79,7 @@ class FluxKontextInpaintPipelineFastTests(
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/glm_image/test_glm_image.py
+++ b/tests/pipelines/glm_image/test_glm_image.py
@@ -16,7 +16,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, GlmImagePipeline, GlmImageTransformer2DModel
 from diffusers.utils import is_transformers_version
@@ -57,8 +57,7 @@ class GlmImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):

    def get_dummy_components(self):
        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        glm_config = GlmImageConfig(
--- a/tests/pipelines/hidream_image/test_pipeline_hidream.py
+++ b/tests/pipelines/hidream_image/test_pipeline_hidream.py
@@ -18,7 +18,6 @@ import unittest
 import numpy as np
 import torch
 from transformers import (
-    AutoConfig,
    AutoTokenizer,
    CLIPTextConfig,
    CLIPTextModelWithProjection,
@@ -95,8 +94,7 @@ class HiDreamImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_3 = T5EncoderModel(config)
+        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
        text_encoder_4 = LlamaForCausalLM.from_pretrained("hf-internal-testing/tiny-random-LlamaForCausalLM")
@@ -151,7 +149,7 @@ class HiDreamImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        self.assertEqual(generated_image.shape, (128, 128, 3))

        # fmt: off
-        expected_slice = np.array([0.4501, 0.5256, 0.4207, 0.5783, 0.4842, 0.4833, 0.4441, 0.5112, 0.6587, 0.3169, 0.7308, 0.5927, 0.6251, 0.5509, 0.5355, 0.5969])
+        expected_slice = np.array([0.4507, 0.5256, 0.4205, 0.5791, 0.4848, 0.4831, 0.4443, 0.5107, 0.6586, 0.3163, 0.7318, 0.5933, 0.6252, 0.5512, 0.5357, 0.5983])
        # fmt: on

        generated_slice = generated_image.flatten()
--- a/tests/pipelines/hunyuan_video/test_hunyuan_image2video.py
+++ b/tests/pipelines/hunyuan_video/test_hunyuan_image2video.py
@@ -233,7 +233,7 @@ class HunyuanVideoImageToVideoPipelineFastTests(
        self.assertEqual(generated_video.shape, (5, 3, 16, 16))

        # fmt: off
-        expected_slice = torch.tensor([0.4441, 0.4790, 0.4485, 0.5748, 0.3539, 0.1553, 0.2707, 0.3594, 0.5331, 0.6645, 0.6799, 0.5257, 0.5092, 0.3450, 0.4276, 0.4127])
+        expected_slice = torch.tensor([0.444, 0.479, 0.4485, 0.5752, 0.3539, 0.1548, 0.2706, 0.3593, 0.5323, 0.6635, 0.6795, 0.5255, 0.5091, 0.345, 0.4276, 0.4128])
        # fmt: on

        generated_slice = generated_video.flatten()
--- a/tests/pipelines/hunyuan_video1_5/test_hunyuan_1_5.py
+++ b/tests/pipelines/hunyuan_video1_5/test_hunyuan_1_5.py
@@ -15,14 +15,7 @@
 import unittest

 import torch
-from transformers import (
-    AutoConfig,
-    ByT5Tokenizer,
-    Qwen2_5_VLTextConfig,
-    Qwen2_5_VLTextModel,
-    Qwen2Tokenizer,
-    T5EncoderModel,
-)
+from transformers import ByT5Tokenizer, Qwen2_5_VLTextConfig, Qwen2_5_VLTextModel, Qwen2Tokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLHunyuanVideo15,
@@ -121,8 +114,7 @@ class HunyuanVideo15PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        tokenizer = Qwen2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration")

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer_2 = ByT5Tokenizer()

        guider = ClassifierFreeGuidance(guidance_scale=1.0)
--- a/tests/pipelines/hunyuandit/test_hunyuan_dit.py
+++ b/tests/pipelines/hunyuandit/test_hunyuan_dit.py
@@ -19,7 +19,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel
+from transformers import AutoTokenizer, BertModel, T5EncoderModel

 from diffusers import AutoencoderKL, DDPMScheduler, HunyuanDiT2DModel, HunyuanDiTPipeline

@@ -74,9 +74,7 @@ class HunyuanDiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        scheduler = DDPMScheduler()
        text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel")
-        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/kandinsky3/test_kandinsky3.py
+++ b/tests/pipelines/kandinsky3/test_kandinsky3.py
@@ -19,7 +19,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoPipelineForImage2Image,
@@ -108,8 +108,7 @@ class Kandinsky3PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        torch.manual_seed(0)
        movq = self.dummy_movq
        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/kandinsky3/test_kandinsky3_img2img.py
+++ b/tests/pipelines/kandinsky3/test_kandinsky3_img2img.py
@@ -20,7 +20,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoPipelineForImage2Image,
@@ -119,8 +119,7 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
        torch.manual_seed(0)
        movq = self.dummy_movq
        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/latte/test_latte.py
+++ b/tests/pipelines/latte/test_latte.py
@@ -20,7 +20,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -109,8 +109,7 @@ class LattePipelineFastTests(
        vae = AutoencoderKL()

        scheduler = DDIMScheduler()
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

--- a/tests/pipelines/ltx/test_ltx.py
+++ b/tests/pipelines/ltx/test_ltx.py
@@ -17,7 +17,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLLTXVideo, FlowMatchEulerDiscreteScheduler, LTXPipeline, LTXVideoTransformer3DModel

@@ -88,8 +88,7 @@ class LTXPipelineFastTests(PipelineTesterMixin, FirstBlockCacheTesterMixin, unit

        torch.manual_seed(0)
        scheduler = FlowMatchEulerDiscreteScheduler()
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/ltx/test_ltx_condition.py
+++ b/tests/pipelines/ltx/test_ltx_condition.py
@@ -17,7 +17,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLLTXVideo,
@@ -92,8 +92,7 @@ class LTXConditionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = FlowMatchEulerDiscreteScheduler()
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/ltx/test_ltx_image2video.py
+++ b/tests/pipelines/ltx/test_ltx_image2video.py
@@ -17,7 +17,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLLTXVideo,
@@ -91,8 +91,7 @@ class LTXImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = FlowMatchEulerDiscreteScheduler()
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/mochi/test_mochi.py
+++ b/tests/pipelines/mochi/test_mochi.py
@@ -18,7 +18,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLMochi, FlowMatchEulerDiscreteScheduler, MochiPipeline, MochiTransformer3DModel

@@ -89,8 +89,7 @@ class MochiPipelineFastTests(

        torch.manual_seed(0)
        scheduler = FlowMatchEulerDiscreteScheduler()
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/pag/test_pag_hunyuan_dit.py
+++ b/tests/pipelines/pag/test_pag_hunyuan_dit.py
@@ -19,7 +19,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel
+from transformers import AutoTokenizer, BertModel, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -67,9 +67,7 @@ class HunyuanDiTPAGPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        scheduler = DDPMScheduler()
        text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel")
-        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/pag/test_pag_pixart_sigma.py
+++ b/tests/pipelines/pag/test_pag_pixart_sigma.py
@@ -19,7 +19,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 import diffusers
 from diffusers import (
@@ -80,8 +80,7 @@ class PixArtSigmaPAGPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        vae = AutoencoderKL()

        scheduler = DDIMScheduler()
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

--- a/tests/pipelines/pag/test_pag_sd3.py
+++ b/tests/pipelines/pag/test_pag_sd3.py
@@ -3,14 +3,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import (
-    AutoConfig,
-    AutoTokenizer,
-    CLIPTextConfig,
-    CLIPTextModelWithProjection,
-    CLIPTokenizer,
-    T5EncoderModel,
-)
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -80,9 +73,7 @@ class StableDiffusion3PAGPipelineFastTests(unittest.TestCase, PipelineTesterMixi
        torch.manual_seed(0)
        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)

-        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_3 = T5EncoderModel(config)
+        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
--- a/tests/pipelines/pag/test_pag_sd3_img2img.py
+++ b/tests/pipelines/pag/test_pag_sd3_img2img.py
@@ -5,14 +5,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import (
-    AutoConfig,
-    AutoTokenizer,
-    CLIPTextConfig,
-    CLIPTextModelWithProjection,
-    CLIPTokenizer,
-    T5EncoderModel,
-)
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -91,9 +84,7 @@ class StableDiffusion3PAGImg2ImgPipelineFastTests(unittest.TestCase, PipelineTes
        torch.manual_seed(0)
        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)

-        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_3 = T5EncoderModel(config)
+        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
--- a/tests/pipelines/pixart_alpha/test_pixart.py
+++ b/tests/pipelines/pixart_alpha/test_pixart.py
@@ -19,7 +19,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -77,10 +77,7 @@ class PixArtAlphaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        vae = AutoencoderKL()

        scheduler = DDIMScheduler()
-
-        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

--- a/tests/pipelines/pixart_sigma/test_pixart.py
+++ b/tests/pipelines/pixart_sigma/test_pixart.py
@@ -19,7 +19,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -83,10 +83,7 @@ class PixArtSigmaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        vae = AutoencoderKL()

        scheduler = DDIMScheduler()
-
-        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

--- a/tests/pipelines/qwenimage/test_qwenimage.py
+++ b/tests/pipelines/qwenimage/test_qwenimage.py
@@ -160,7 +160,7 @@ class QwenImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        self.assertEqual(generated_image.shape, (3, 32, 32))

        # fmt: off
-        expected_slice = torch.tensor([0.5646, 0.6369, 0.6019, 0.5640, 0.5830, 0.5520, 0.5717, 0.6315, 0.4167, 0.3563, 0.5640, 0.4849, 0.4961, 0.5237, 0.4084, 0.5014])
+        expected_slice = torch.tensor([0.56331, 0.63677, 0.6015, 0.56369, 0.58166, 0.55277, 0.57176, 0.63261, 0.41466, 0.35561, 0.56229, 0.48334, 0.49714, 0.52622, 0.40872, 0.50208])
        # fmt: on

        generated_slice = generated_image.flatten()
--- a/tests/pipelines/qwenimage/test_qwenimage_edit.py
+++ b/tests/pipelines/qwenimage/test_qwenimage_edit.py
@@ -163,7 +163,7 @@ class QwenImageEditPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        self.assertEqual(generated_image.shape, (3, 32, 32))

        # fmt: off
-        expected_slice = torch.tensor([0.5640, 0.6350, 0.6003, 0.5606, 0.5801, 0.5502, 0.5757, 0.6388, 0.4174, 0.3590, 0.5647, 0.4891, 0.4975, 0.5256, 0.4088, 0.4991])
+        expected_slice = torch.tensor([[0.5637, 0.6341, 0.6001, 0.5620, 0.5794, 0.5498, 0.5757, 0.6389, 0.4174, 0.3597, 0.5649, 0.4894, 0.4969, 0.5255, 0.4083, 0.4986]])
        # fmt: on

        generated_slice = generated_image.flatten()
--- a/tests/pipelines/qwenimage/test_qwenimage_edit_plus.py
+++ b/tests/pipelines/qwenimage/test_qwenimage_edit_plus.py
@@ -164,7 +164,7 @@ class QwenImageEditPlusPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
        self.assertEqual(generated_image.shape, (3, 32, 32))

        # fmt: off
-        expected_slice = torch.tensor([0.5640, 0.6339, 0.5997, 0.5607, 0.5799, 0.5496, 0.5760, 0.6393, 0.4172, 0.3595, 0.5655, 0.4896, 0.4971, 0.5255, 0.4088, 0.4987])
+        expected_slice = torch.tensor([[0.5637, 0.6341, 0.6001, 0.5620, 0.5794, 0.5498, 0.5757, 0.6389, 0.4174, 0.3597, 0.5649, 0.4894, 0.4969, 0.5255, 0.4083, 0.4986]])
        # fmt: on

        generated_slice = generated_image.flatten()
--- a/tests/pipelines/skyreels_v2/test_skyreels_v2.py
+++ b/tests/pipelines/skyreels_v2/test_skyreels_v2.py
@@ -16,7 +16,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLWan,
@@ -68,8 +68,7 @@ class SkyReelsV2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(flow_shift=8.0, use_flow_sigmas=True)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/skyreels_v2/test_skyreels_v2_df.py
+++ b/tests/pipelines/skyreels_v2/test_skyreels_v2_df.py
@@ -16,7 +16,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLWan,
@@ -68,8 +68,7 @@ class SkyReelsV2DiffusionForcingPipelineFastTests(PipelineTesterMixin, unittest.

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(flow_shift=8.0, use_flow_sigmas=True)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/skyreels_v2/test_skyreels_v2_df_image_to_video.py
+++ b/tests/pipelines/skyreels_v2/test_skyreels_v2_df_image_to_video.py
@@ -18,7 +18,6 @@ import numpy as np
 import torch
 from PIL import Image
 from transformers import (
-    AutoConfig,
    AutoTokenizer,
    T5EncoderModel,
 )
@@ -69,8 +68,7 @@ class SkyReelsV2DiffusionForcingImageToVideoPipelineFastTests(PipelineTesterMixi

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(flow_shift=5.0, use_flow_sigmas=True)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
@@ -161,8 +159,7 @@ class SkyReelsV2DiffusionForcingImageToVideoPipelineFastTests(SkyReelsV2Diffusio

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(flow_shift=5.0, use_flow_sigmas=True)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/skyreels_v2/test_skyreels_v2_df_video_to_video.py
+++ b/tests/pipelines/skyreels_v2/test_skyreels_v2_df_video_to_video.py
@@ -18,7 +18,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLWan,
@@ -70,8 +70,7 @@ class SkyReelsV2DiffusionForcingVideoToVideoPipelineFastTests(PipelineTesterMixi

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(flow_shift=5.0, use_flow_sigmas=True)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/skyreels_v2/test_skyreels_v2_image_to_video.py
+++ b/tests/pipelines/skyreels_v2/test_skyreels_v2_image_to_video.py
@@ -18,7 +18,6 @@ import numpy as np
 import torch
 from PIL import Image
 from transformers import (
-    AutoConfig,
    AutoTokenizer,
    CLIPImageProcessor,
    CLIPVisionConfig,
@@ -72,8 +71,7 @@ class SkyReelsV2ImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.Test

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(flow_shift=5.0, use_flow_sigmas=True)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/stable_audio/test_stable_audio.py
+++ b/tests/pipelines/stable_audio/test_stable_audio.py
@@ -19,7 +19,10 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, T5EncoderModel, T5Tokenizer
+from transformers import (
+    T5EncoderModel,
+    T5Tokenizer,
+)

 from diffusers import (
    AutoencoderOobleck,
@@ -108,8 +111,7 @@ class StableAudioPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        )
        torch.manual_seed(0)
        t5_repo_id = "hf-internal-testing/tiny-random-T5ForConditionalGeneration"
-        config = AutoConfig.from_pretrained(t5_repo_id)
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained(t5_repo_id)
        tokenizer = T5Tokenizer.from_pretrained(t5_repo_id, truncation=True, model_max_length=25)

        torch.manual_seed(0)
--- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py
+++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py
@@ -3,14 +3,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import (
-    AutoConfig,
-    AutoTokenizer,
-    CLIPTextConfig,
-    CLIPTextModelWithProjection,
-    CLIPTokenizer,
-    T5EncoderModel,
-)
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, SD3Transformer2DModel, StableDiffusion3Pipeline

@@ -79,9 +72,7 @@ class StableDiffusion3PipelineFastTests(unittest.TestCase, PipelineTesterMixin):
        torch.manual_seed(0)
        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)

-        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_3 = T5EncoderModel(config)
+        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
--- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py
+++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py
@@ -4,14 +4,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import (
-    AutoConfig,
-    AutoTokenizer,
-    CLIPTextConfig,
-    CLIPTextModelWithProjection,
-    CLIPTokenizer,
-    T5EncoderModel,
-)
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -80,9 +73,7 @@ class StableDiffusion3Img2ImgPipelineFastTests(PipelineLatentTesterMixin, unitte
        torch.manual_seed(0)
        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)

-        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_3 = T5EncoderModel(config)
+        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
--- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py
+++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py
@@ -3,14 +3,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import (
-    AutoConfig,
-    AutoTokenizer,
-    CLIPTextConfig,
-    CLIPTextModelWithProjection,
-    CLIPTokenizer,
-    T5EncoderModel,
-)
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -80,9 +73,7 @@ class StableDiffusion3InpaintPipelineFastTests(PipelineLatentTesterMixin, unitte
        torch.manual_seed(0)
        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)

-        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_3 = T5EncoderModel(config)
+        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
--- a/tests/pipelines/visualcloze/test_pipeline_visualcloze_combined.py
+++ b/tests/pipelines/visualcloze/test_pipeline_visualcloze_combined.py
@@ -5,7 +5,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 import diffusers
 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxTransformer2DModel, VisualClozePipeline
@@ -77,8 +77,7 @@ class VisualClozePipelineFastTests(unittest.TestCase, PipelineTesterMixin):
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/visualcloze/test_pipeline_visualcloze_generation.py
+++ b/tests/pipelines/visualcloze/test_pipeline_visualcloze_generation.py
@@ -5,7 +5,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 import diffusers
 from diffusers import (
@@ -79,8 +79,7 @@ class VisualClozeGenerationPipelineFastTests(unittest.TestCase, PipelineTesterMi
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder_2 = T5EncoderModel(config)
+        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/wan/test_wan.py
+++ b/tests/pipelines/wan/test_wan.py
@@ -18,7 +18,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLWan, FlowMatchEulerDiscreteScheduler, WanPipeline, WanTransformer3DModel

@@ -68,8 +68,7 @@ class WanPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        torch.manual_seed(0)
        # TODO: impl FlowDPMSolverMultistepScheduler
        scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/wan/test_wan_22.py
+++ b/tests/pipelines/wan/test_wan_22.py
@@ -17,11 +17,14 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanPipeline, WanTransformer3DModel

-from ...testing_utils import enable_full_determinism, torch_device
+from ...testing_utils import (
+    enable_full_determinism,
+    torch_device,
+)
 from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
 from ..test_pipelines_common import PipelineTesterMixin

@@ -60,8 +63,7 @@ class Wan22PipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
@@ -233,8 +235,7 @@ class Wan225BPipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/wan/test_wan_22_image_to_video.py
+++ b/tests/pipelines/wan/test_wan_22_image_to_video.py
@@ -18,7 +18,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanImageToVideoPipeline, WanTransformer3DModel

@@ -64,8 +64,7 @@ class Wan22ImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase)

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
@@ -249,8 +248,7 @@ class Wan225BImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCas

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/wan/test_wan_animate.py
+++ b/tests/pipelines/wan/test_wan_animate.py
@@ -19,7 +19,6 @@ import numpy as np
 import torch
 from PIL import Image
 from transformers import (
-    AutoConfig,
    AutoTokenizer,
    CLIPImageProcessor,
    CLIPVisionConfig,
@@ -79,8 +78,7 @@ class WanAnimatePipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/wan/test_wan_image_to_video.py
+++ b/tests/pipelines/wan/test_wan_image_to_video.py
@@ -19,7 +19,6 @@ import numpy as np
 import torch
 from PIL import Image
 from transformers import (
-    AutoConfig,
    AutoTokenizer,
    CLIPImageProcessor,
    CLIPVisionConfig,
@@ -69,8 +68,7 @@ class WanImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        torch.manual_seed(0)
        # TODO: impl FlowDPMSolverMultistepScheduler
        scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
@@ -241,8 +239,7 @@ class WanFLFToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        torch.manual_seed(0)
        # TODO: impl FlowDPMSolverMultistepScheduler
        scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/wan/test_wan_vace.py
+++ b/tests/pipelines/wan/test_wan_vace.py
@@ -18,7 +18,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLWan,
@@ -67,8 +67,7 @@ class WanVACEPipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/wan/test_wan_video_to_video.py
+++ b/tests/pipelines/wan/test_wan_video_to_video.py
@@ -16,7 +16,7 @@ import unittest

 import torch
 from PIL import Image
-from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
+from transformers import AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanTransformer3DModel, WanVideoToVideoPipeline

@@ -62,8 +62,7 @@ class WanVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(flow_shift=3.0)
-        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
-        text_encoder = T5EncoderModel(config)
+        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
Author	SHA1	Message	Date
YiYi Xu	b73cc50e48	Merge branch 'main' into modular-workflow	2026-01-31 09:51:11 -10:00
YiYi Xu	769a1f3a12	[Modular]add a real quick start guide (#13029 ) * add a real quick start guide * Update docs/source/en/modular_diffusers/quickstart.md * update a bit more * fix * Apply suggestions from code review Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> * Update docs/source/en/modular_diffusers/quickstart.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> * Update docs/source/en/modular_diffusers/quickstart.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> * update more * Apply suggestions from code review Co-authored-by: Sayak Paul <spsayakpaul@gmail.com> * address more feedbacks: move components amnager earlier, explain blocks vs sub-blocks etc * more * remove the link to mellon guide, not exist in this PR yet --------- Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>	2026-01-31 09:43:20 -10:00
Mikko Lauri	ec6b2bcccb	Fix aiter availability check (#13059 ) Update import_utils.py	2026-01-30 19:24:05 +05:30
Jared Wen	6a1904eb06	[bug fix] GLM-Image fit new `get_image_features` API (#13052 ) change get_image_features API Signed-off-by: JaredforReal <w13431838023@gmail.com> Co-authored-by: YiYi Xu <yixu310@gmail.com>	2026-01-29 16:16:42 -10:00
Sayak Paul	f5b6b6625a	[wan] fix wan 2.2 when either of the transformers isn't present. (#13055 ) fix wan 2.2 when either of the transformers isn't present.	2026-01-29 08:45:24 -10:00
Olexandr88	1be2f7e8c5	docs: fix grammar in fp16_safetensors CLI warning (#13040 ) * docs: fix grammar in fp16_safetensors CLI warning * Apply style fixes --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2026-01-29 21:33:09 +05:30
yiyixuxu	20c35da75c	up up	2026-01-25 12:11:37 +01:00
yiyixuxu	6a549f5f55	initial support: workflow	2026-01-25 11:40:52 +01:00
Sayak Paul	412e51c856	include auto-docstring check in the modular ci. (#13004 )	2026-01-23 22:34:24 -10:00
github-actions[bot]	23d06423ab	Apply style fixes	2026-01-19 09:23:31 +00:00
YiYi Xu	aba551c868	Merge branch 'main' into modular-doc-improv	2026-01-18 23:20:36 -10:00
yiyixuxu	1f9576a2ca	fix	2026-01-19 09:56:14 +01:00
yiyixuxu	d75fbc43c7	Merge branch 'modular-doc-improv' of github.com:huggingface/diffusers into modular-doc-improv	2026-01-19 09:54:46 +01:00
yiyixuxu	b7127ce7a7	revert change in z	2026-01-19 09:54:40 +01:00
YiYi Xu	7e9d2b954e	Apply suggestions from code review	2026-01-18 22:44:44 -10:00
yiyixuxu	94525200fd	rmove space in make docstring	2026-01-19 09:35:39 +01:00
yiyixuxu	f056af1fbb	make style	2026-01-19 09:27:40 +01:00
yiyixuxu	8d45ff5bf6	apply auto docstring	2026-01-19 09:22:04 +01:00
yiyixuxu	fb15752d55	up up up	2026-01-19 08:10:31 +01:00
yiyixuxu	1f2dbc9dd2	up	2026-01-19 04:10:17 +01:00
yiyixuxu	002c3e8239	add template method	2026-01-19 03:24:34 +01:00
yiyixuxu	de03d7f100	refactor based on dhruv's feedback: remove the class method	2026-01-18 00:35:01 +01:00
yiyixuxu	25c968a38f	add TODO in the description for empty docstring	2026-01-17 09:57:56 +01:00
yiyixuxu	aea0d046f6	address feedbacks	2026-01-17 09:36:58 +01:00
yiyixuxu	1c90ce33f2	up	2026-01-10 12:21:26 +01:00
yiyixuxu	507953f415	more more	2026-01-10 12:19:14 +01:00
yiyixuxu	f0555af1c6	up up up	2026-01-10 12:15:53 +01:00
yiyixuxu	2a81f2ec54	style	2026-01-10 12:15:36 +01:00
yiyixuxu	d20f413f78	more auto docstring	2026-01-10 12:11:28 +01:00
yiyixuxu	ff09bf1a63	add modular_auto_docstring!	2026-01-10 11:55:03 +01:00
yiyixuxu	34a743e2dc	style	2026-01-10 10:57:27 +01:00
yiyixuxu	43ab14845d	update outputs	2026-01-10 10:56:54 +01:00
YiYi Xu	fbfe5c8d6b	Merge branch 'main' into modular-doc-improv	2026-01-09 23:54:23 -10:00
yiyixuxu	b29873dee7	up up	2026-01-10 10:52:53 +01:00
yiyixuxu	7b499de6d0	up	2026-01-10 03:35:15 +01:00