Merge branch 'main' into transformers-v5-pr

up
2026-02-06 19:05:11 +08:00 · 2026-02-03 10:59:44 +05:30 · 2026-01-29 19:33:29 +05:30 · 2026-01-28 17:46:59 +05:30 · 2026-01-28 17:35:30 +05:30 · 2026-01-28 17:26:38 +05:30
90 changed files with 494 additions and 788 deletions
--- a/.github/workflows/pr_tests.yml
+++ b/.github/workflows/pr_tests.yml
@@ -92,8 +92,9 @@ jobs:
            runner: aws-general-8-plus
            image: diffusers/diffusers-pytorch-cpu
            report: torch_example_cpu
+        transformers_version: ["main"]

-    name: ${{ matrix.config.name }}
+    name: ${{ matrix.config.name }} (transformers ${{ matrix.transformers_version }})

    runs-on:
      group: ${{ matrix.config.runner }}
@@ -115,8 +116,11 @@ jobs:
    - name: Install dependencies
      run: |
        uv pip install -e ".[quality]"
-        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
-        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
+        if [ "${{ matrix.transformers_version }}" = "main" ]; then
+          uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+        else
+          uv pip uninstall transformers huggingface_hub && uv pip install transformers==${{ matrix.transformers_version }}
+        fi
        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps

    - name: Environment
@@ -155,7 +159,7 @@ jobs:
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      with:
-        name: pr_${{ matrix.config.framework }}_${{ matrix.config.report }}_test_reports
+        name: pr_${{ matrix.config.framework }}_${{ matrix.config.report }}_transformers_${{ matrix.transformers_version }}_test_reports
        path: reports

  run_staging_tests:
@@ -220,8 +224,10 @@ jobs:
    needs: [check_code_quality, check_repository_consistency]
    strategy:
      fail-fast: false
+      matrix:
+        transformers_version: ["main"]

-    name: LoRA tests with PEFT main
+    name: LoRA tests with PEFT main (transformers ${{ matrix.transformers_version }})

    runs-on:
      group: aws-general-8-plus
@@ -247,9 +253,12 @@ jobs:
        uv pip install -U peft@git+https://github.com/huggingface/peft.git --no-deps
        uv pip install -U tokenizers
        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
-        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
-        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
-
+        if [ "${{ matrix.transformers_version }}" = "main" ]; then
+          uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+        else
+          uv pip uninstall transformers huggingface_hub && uv pip install transformers==${{ matrix.transformers_version }}
+        fi
+        
    - name: Environment
      run: |
        python utils/print_env.py
@@ -275,6 +284,6 @@ jobs:
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      with:
-        name: pr_main_test_reports
+        name: pr_lora_transformers_${{ matrix.transformers_version }}_test_reports
        path: reports

--- a/.github/workflows/pr_tests_gpu.yml
+++ b/.github/workflows/pr_tests_gpu.yml
@@ -14,6 +14,7 @@ on:
      - "tests/pipelines/test_pipelines_common.py"
      - "tests/models/test_modeling_common.py"
      - "examples/**/*.py"
+      - ".github/**.yml"
  workflow_dispatch:

 concurrency:
@@ -106,13 +107,14 @@ jobs:
          path: reports

  torch_pipelines_cuda_tests:
-    name: Torch Pipelines CUDA Tests
+    name: Torch Pipelines CUDA Tests (transformers ${{ matrix.transformers_version }})
    needs: setup_torch_cuda_pipeline_matrix
    strategy:
      fail-fast: false
      max-parallel: 8
      matrix:
        module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
+        transformers_version: ["main"]
    runs-on:
      group: aws-g4dn-2xlarge
    container:
@@ -131,8 +133,12 @@ jobs:
        run: |
          uv pip install -e ".[quality]"
          uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
-          #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
-          uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
+          if [ "${{ matrix.transformers_version }}" = "main" ]; then
+            uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+          else
+            uv pip uninstall transformers huggingface_hub && uv pip install transformers==${{ matrix.transformers_version }}
+          fi
+          

      - name: Environment
        run: |
@@ -172,11 +178,11 @@ jobs:
        if: ${{ always() }}
        uses: actions/upload-artifact@v6
        with:
-          name: pipeline_${{ matrix.module }}_test_reports
+          name: pipeline_${{ matrix.module }}_transformers_${{ matrix.transformers_version }}_test_reports
          path: reports

  torch_cuda_tests:
-    name: Torch CUDA Tests
+    name: Torch CUDA Tests (transformers ${{ matrix.transformers_version }})
    needs: [check_code_quality, check_repository_consistency]
    runs-on:
      group: aws-g4dn-2xlarge
@@ -191,6 +197,7 @@ jobs:
      max-parallel: 4
      matrix:
        module: [models, schedulers, lora, others]
+        transformers_version: ["main"]
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v6
@@ -202,8 +209,12 @@ jobs:
        uv pip install -e ".[quality]"
        uv pip install peft@git+https://github.com/huggingface/peft.git
        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
-        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
-        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
+        if [ "${{ matrix.transformers_version }}" = "main" ]; then
+          uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+        else
+          uv pip uninstall transformers huggingface_hub && uv pip install transformers==${{ matrix.transformers_version }}
+        fi
+        

    - name: Environment
      run: |
@@ -241,12 +252,16 @@ jobs:
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      with:
-        name: torch_cuda_test_reports_${{ matrix.module }}
+        name: torch_cuda_test_reports_${{ matrix.module }}_transformers_${{ matrix.transformers_version }}
        path: reports

  run_examples_tests:
-    name: Examples PyTorch CUDA tests on Ubuntu
+    name: Examples PyTorch CUDA tests on Ubuntu (transformers ${{ matrix.transformers_version }})
    needs: [check_code_quality, check_repository_consistency]
+    strategy:
+      fail-fast: false
+      matrix:
+        transformers_version: ["main"]
    runs-on:
      group: aws-g4dn-2xlarge

@@ -264,8 +279,11 @@ jobs:
        nvidia-smi
    - name: Install dependencies
      run: |
-        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
-        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
+        if [ "${{ matrix.transformers_version }}" = "main" ]; then
+          uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+        else
+          uv pip uninstall transformers huggingface_hub && uv pip install transformers==${{ matrix.transformers_version }}
+        fi
        uv pip install -e ".[quality,training]"

    - name: Environment
@@ -289,6 +307,6 @@ jobs:
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      with:
-        name: examples_test_reports
+        name: examples_transformers_${{ matrix.transformers_version }}_test_reports
        path: reports

--- a/examples/custom_diffusion/test_custom_diffusion.py
+++ b/examples/custom_diffusion/test_custom_diffusion.py
@@ -17,6 +17,9 @@ import logging
 import os
 import sys
 import tempfile
+import unittest
+
+from diffusers.utils import is_transformers_version


 sys.path.append("..")
@@ -30,6 +33,7 @@ stream_handler = logging.StreamHandler(sys.stdout)
 logger.addHandler(stream_handler)


+@unittest.skipIf(is_transformers_version(">=", "4.57.5"), "Size mismatch")
 class CustomDiffusion(ExamplesTestsAccelerate):
    def test_custom_diffusion(self):
        with tempfile.TemporaryDirectory() as tmpdir:
--- a/src/diffusers/hooks/_common.py
+++ b/src/diffusers/hooks/_common.py
@@ -44,6 +44,7 @@ _GO_LC_SUPPORTED_PYTORCH_LAYERS = (
    torch.nn.ConvTranspose2d,
    torch.nn.ConvTranspose3d,
    torch.nn.Linear,
+    torch.nn.Embedding,
    # TODO(aryan): look into torch.nn.LayerNorm, torch.nn.GroupNorm later, seems to be causing some issues with CogVideoX
    # because of double invocation of the same norm layer in CogVideoXLayerNorm
 )
--- a/src/diffusers/loaders/textual_inversion.py
+++ b/src/diffusers/loaders/textual_inversion.py
@@ -21,7 +21,12 @@ from tokenizers import Tokenizer as TokenizerFast
 from torch import nn

 from ..models.modeling_utils import load_state_dict
-from ..utils import _get_model_file, is_accelerate_available, is_transformers_available, logging
+from ..utils import (
+    _get_model_file,
+    is_accelerate_available,
+    is_transformers_available,
+    logging,
+)


 if is_transformers_available():
--- a/src/diffusers/models/transformers/transformer_wan_animate.py
+++ b/src/diffusers/models/transformers/transformer_wan_animate.py
@@ -502,16 +502,13 @@ class WanAnimateFaceBlockCrossAttention(nn.Module, AttentionModuleMixin):
        dim_head: int = 64,
        eps: float = 1e-6,
        cross_attention_dim_head: Optional[int] = None,
-        bias: bool = True,
        processor=None,
    ):
        super().__init__()
        self.inner_dim = dim_head * heads
        self.heads = heads
-        self.cross_attention_dim_head = cross_attention_dim_head
+        self.cross_attention_head_dim = cross_attention_dim_head
        self.kv_inner_dim = self.inner_dim if cross_attention_dim_head is None else cross_attention_dim_head * heads
-        self.use_bias = bias
-        self.is_cross_attention = cross_attention_dim_head is not None

        # 1. Pre-Attention Norms for the hidden_states (video latents) and encoder_hidden_states (motion vector).
        # NOTE: this is not used in "vanilla" WanAttention
@@ -519,10 +516,10 @@ class WanAnimateFaceBlockCrossAttention(nn.Module, AttentionModuleMixin):
        self.pre_norm_kv = nn.LayerNorm(dim, eps, elementwise_affine=False)

        # 2. QKV and Output Projections
-        self.to_q = torch.nn.Linear(dim, self.inner_dim, bias=bias)
-        self.to_k = torch.nn.Linear(dim, self.kv_inner_dim, bias=bias)
-        self.to_v = torch.nn.Linear(dim, self.kv_inner_dim, bias=bias)
-        self.to_out = torch.nn.Linear(self.inner_dim, dim, bias=bias)
+        self.to_q = torch.nn.Linear(dim, self.inner_dim, bias=True)
+        self.to_k = torch.nn.Linear(dim, self.kv_inner_dim, bias=True)
+        self.to_v = torch.nn.Linear(dim, self.kv_inner_dim, bias=True)
+        self.to_out = torch.nn.Linear(self.inner_dim, dim, bias=True)

        # 3. QK Norm
        # NOTE: this is applied after the reshape, so only over dim_head rather than dim_head * heads
--- a/src/diffusers/models/transformers/transformer_wan_vace.py
+++ b/src/diffusers/models/transformers/transformer_wan_vace.py
@@ -76,7 +76,6 @@ class WanVACETransformerBlock(nn.Module):
            eps=eps,
            added_kv_proj_dim=added_kv_proj_dim,
            processor=WanAttnProcessor(),
-            is_cross_attention=True,
        )
        self.norm2 = FP32LayerNorm(dim, eps, elementwise_affine=True) if cross_attn_norm else nn.Identity()

@@ -179,7 +178,6 @@ class WanVACETransformer3DModel(
    _no_split_modules = ["WanTransformerBlock", "WanVACETransformerBlock"]
    _keep_in_fp32_modules = ["time_embedder", "scale_shift_table", "norm1", "norm2", "norm3"]
    _keys_to_ignore_on_load_unexpected = ["norm_added_q"]
-    _repeated_blocks = ["WanTransformerBlock", "WanVACETransformerBlock"]

    @register_to_config
    def __init__(
--- a/src/diffusers/pipelines/cosmos/pipeline_cosmos2_5_predict.py
+++ b/src/diffusers/pipelines/cosmos/pipeline_cosmos2_5_predict.py
@@ -287,6 +287,9 @@ class Cosmos2_5_PredictBasePipeline(DiffusionPipeline):
                truncation=True,
                padding="max_length",
            )
+            input_ids = (
+                input_ids["input_ids"] if not isinstance(input_ids, list) and "input_ids" in input_ids else input_ids
+            )
            input_ids = torch.LongTensor(input_ids)
            input_ids_batch.append(input_ids)

--- a/src/diffusers/pipelines/kandinsky/text_encoder.py
+++ b/src/diffusers/pipelines/kandinsky/text_encoder.py
@@ -20,6 +20,8 @@ class MultilingualCLIP(PreTrainedModel):
        self.LinearTransformation = torch.nn.Linear(
            in_features=config.transformerDimensions, out_features=config.numDims
        )
+        if hasattr(self, "post_init"):
+            self.post_init()

    def forward(self, input_ids, attention_mask):
        embs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)[0]
--- a/src/diffusers/pipelines/kolors/text_encoder.py
+++ b/src/diffusers/pipelines/kolors/text_encoder.py
@@ -782,6 +782,9 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
            self.prefix_encoder = PrefixEncoder(config)
            self.dropout = torch.nn.Dropout(0.1)

+        if hasattr(self, "post_init"):
+            self.post_init()
+
    def get_input_embeddings(self):
        return self.embedding.word_embeddings

@@ -811,7 +814,7 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
        )
-        use_cache = use_cache if use_cache is not None else self.config.use_cache
+        use_cache = use_cache if use_cache is not None else getattr(self.config, "use_cache", None)
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        batch_size, seq_length = input_ids.shape
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -340,6 +340,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
            save_method_accept_safe = "safe_serialization" in save_method_signature.parameters
            save_method_accept_variant = "variant" in save_method_signature.parameters
            save_method_accept_max_shard_size = "max_shard_size" in save_method_signature.parameters
+            save_method_accept_peft_format = "save_peft_format" in save_method_signature.parameters

            save_kwargs = {}
            if save_method_accept_safe:
@@ -349,6 +350,11 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
            if save_method_accept_max_shard_size and max_shard_size is not None:
                # max_shard_size is expected to not be None in ModelMixin
                save_kwargs["max_shard_size"] = max_shard_size
+            if save_method_accept_peft_format:
+                # Set save_peft_format=False for transformers>=5.0.0 compatibility
+                # In transformers 5.0.0+, the default save_peft_format=True adds "base_model.model" prefix
+                # to adapter keys, but from_pretrained expects keys without this prefix
+                save_kwargs["save_peft_format"] = False

            save_method(os.path.join(save_directory, pipeline_component_name), **save_kwargs)

--- a/src/diffusers/quantizers/gguf/gguf_quantizer.py
+++ b/src/diffusers/quantizers/gguf/gguf_quantizer.py
@@ -41,7 +41,7 @@ class GGUFQuantizer(DiffusersQuantizer):

        self.compute_dtype = quantization_config.compute_dtype
        self.pre_quantized = quantization_config.pre_quantized
-        self.modules_to_not_convert = quantization_config.modules_to_not_convert or []
+        self.modules_to_not_convert = quantization_config.modules_to_not_convert

        if not isinstance(self.modules_to_not_convert, list):
            self.modules_to_not_convert = [self.modules_to_not_convert]
--- a/tests/models/test_models_auto.py
+++ b/tests/models/test_models_auto.py
@@ -20,7 +20,9 @@ class TestAutoModel(unittest.TestCase):
        side_effect=[EnvironmentError("File not found"), {"model_type": "clip_text_model"}],
    )
    def test_load_from_config_transformers_with_subfolder(self, mock_load_config):
-        model = AutoModel.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder")
+        model = AutoModel.from_pretrained(
+            "hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder", use_safetensors=False
+        )
        assert isinstance(model, CLIPTextModel)

    def test_load_from_config_without_subfolder(self):
@@ -28,5 +30,7 @@ class TestAutoModel(unittest.TestCase):
        assert isinstance(model, LongformerModel)

    def test_load_from_model_index(self):
-        model = AutoModel.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder")
+        model = AutoModel.from_pretrained(
+            "hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder", use_safetensors=False
+        )
        assert isinstance(model, CLIPTextModel)
--- a/tests/models/testing_utils/common.py
+++ b/tests/models/testing_utils/common.py
@@ -446,17 +446,16 @@ class ModelTesterMixin:
        torch_device not in ["cuda", "xpu"],
        reason="float16 and bfloat16 can only be used with an accelerator",
    )
-    def test_keep_in_fp32_modules(self, tmp_path):
+    def test_keep_in_fp32_modules(self):
        model = self.model_class(**self.get_init_dict())
        fp32_modules = model._keep_in_fp32_modules

        if fp32_modules is None or len(fp32_modules) == 0:
            pytest.skip("Model does not have _keep_in_fp32_modules defined.")

-        # Save the model and reload with float16 dtype
-        # _keep_in_fp32_modules is only enforced during from_pretrained loading
-        model.save_pretrained(tmp_path)
-        model = self.model_class.from_pretrained(tmp_path, torch_dtype=torch.float16).to(torch_device)
+        # Test with float16
+        model.to(torch_device)
+        model.to(torch.float16)

        for name, param in model.named_parameters():
            if any(module_to_keep_in_fp32 in name.split(".") for module_to_keep_in_fp32 in fp32_modules):
@@ -471,7 +470,7 @@ class ModelTesterMixin:
    )
    @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16], ids=["fp16", "bf16"])
    @torch.no_grad()
-    def test_from_save_pretrained_dtype_inference(self, tmp_path, dtype, atol=1e-4, rtol=0):
+    def test_from_save_pretrained_dtype_inference(self, tmp_path, dtype):
        model = self.model_class(**self.get_init_dict())
        model.to(torch_device)
        fp32_modules = model._keep_in_fp32_modules or []
@@ -491,6 +490,10 @@ class ModelTesterMixin:
        output = model(**inputs, return_dict=False)[0]
        output_loaded = model_loaded(**inputs, return_dict=False)[0]

+        self._check_dtype_inference_output(output, output_loaded, dtype)
+
+    def _check_dtype_inference_output(self, output, output_loaded, dtype, atol=1e-4, rtol=0):
+        """Check dtype inference output with configurable tolerance."""
        assert_tensors_close(
            output, output_loaded, atol=atol, rtol=rtol, msg=f"Loaded model output differs for {dtype}"
        )
--- a/tests/models/testing_utils/quantization.py
+++ b/tests/models/testing_utils/quantization.py
@@ -176,7 +176,15 @@ class QuantizationTesterMixin:
        model_quantized = self._create_quantized_model(config_kwargs)
        model_quantized.to(torch_device)

+        # Get model dtype from first parameter
+        model_dtype = next(model_quantized.parameters()).dtype
+
        inputs = self.get_dummy_inputs()
+        # Cast inputs to model dtype
+        inputs = {
+            k: v.to(model_dtype) if isinstance(v, torch.Tensor) and v.is_floating_point() else v
+            for k, v in inputs.items()
+        }
        output = model_quantized(**inputs, return_dict=False)[0]

        assert output is not None, "Model output is None"
@@ -1013,6 +1021,9 @@ class GGUFTesterMixin(GGUFConfigMixin, QuantizationTesterMixin):
        """Test that dequantize() works correctly."""
        self._test_dequantize({"compute_dtype": torch.bfloat16})

+    def test_gguf_quantized_layers(self):
+        self._test_quantized_layers({"compute_dtype": torch.bfloat16})
+

@is_quantization
@is_modelopt
--- a/tests/models/transformers/test_models_transformer_wan.py
+++ b/tests/models/transformers/test_models_transformer_wan.py
@@ -12,57 +12,57 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import pytest
+import unittest
+
 import torch

 from diffusers import WanTransformer3DModel
-from diffusers.utils.torch_utils import randn_tensor

-from ...testing_utils import enable_full_determinism, torch_device
-from ..testing_utils import (
-    AttentionTesterMixin,
-    BaseModelTesterConfig,
-    BitsAndBytesTesterMixin,
-    GGUFCompileTesterMixin,
-    GGUFTesterMixin,
-    MemoryTesterMixin,
-    ModelTesterMixin,
-    TorchAoTesterMixin,
-    TorchCompileTesterMixin,
-    TrainingTesterMixin,
+from ...testing_utils import (
+    enable_full_determinism,
+    torch_device,
 )
+from ..test_modeling_common import ModelTesterMixin, TorchCompileTesterMixin


 enable_full_determinism()


-class WanTransformer3DTesterConfig(BaseModelTesterConfig):
-    @property
-    def model_class(self):
-        return WanTransformer3DModel
+class WanTransformer3DTests(ModelTesterMixin, unittest.TestCase):
+    model_class = WanTransformer3DModel
+    main_input_name = "hidden_states"
+    uses_custom_attn_processor = True

    @property
-    def pretrained_model_name_or_path(self):
-        return "hf-internal-testing/tiny-wan22-transformer"
+    def dummy_input(self):
+        batch_size = 1
+        num_channels = 4
+        num_frames = 2
+        height = 16
+        width = 16
+        text_encoder_embedding_dim = 16
+        sequence_length = 12

-    @property
-    def output_shape(self) -> tuple[int, ...]:
-        return (4, 2, 16, 16)
+        hidden_states = torch.randn((batch_size, num_channels, num_frames, height, width)).to(torch_device)
+        timestep = torch.randint(0, 1000, size=(batch_size,)).to(torch_device)
+        encoder_hidden_states = torch.randn((batch_size, sequence_length, text_encoder_embedding_dim)).to(torch_device)

-    @property
-    def input_shape(self) -> tuple[int, ...]:
-        return (4, 2, 16, 16)
-
-    @property
-    def main_input_name(self) -> str:
-        return "hidden_states"
-
-    @property
-    def generator(self):
-        return torch.Generator("cpu").manual_seed(0)
-
-    def get_init_dict(self) -> dict[str, int | list[int] | tuple | str | bool]:
        return {
+            "hidden_states": hidden_states,
+            "encoder_hidden_states": encoder_hidden_states,
+            "timestep": timestep,
+        }
+
+    @property
+    def input_shape(self):
+        return (4, 1, 16, 16)
+
+    @property
+    def output_shape(self):
+        return (4, 1, 16, 16)
+
+    def prepare_init_args_and_inputs_for_common(self):
+        init_dict = {
            "patch_size": (1, 2, 2),
            "num_attention_heads": 2,
            "attention_head_dim": 12,
@@ -76,128 +76,16 @@ class WanTransformer3DTesterConfig(BaseModelTesterConfig):
            "qk_norm": "rms_norm_across_heads",
            "rope_max_seq_len": 32,
        }
-
-    def get_dummy_inputs(self) -> dict[str, torch.Tensor]:
-        batch_size = 1
-        num_channels = 4
-        num_frames = 2
-        height = 16
-        width = 16
-        text_encoder_embedding_dim = 16
-        sequence_length = 12
-
-        return {
-            "hidden_states": randn_tensor(
-                (batch_size, num_channels, num_frames, height, width),
-                generator=self.generator,
-                device=torch_device,
-            ),
-            "encoder_hidden_states": randn_tensor(
-                (batch_size, sequence_length, text_encoder_embedding_dim),
-                generator=self.generator,
-                device=torch_device,
-            ),
-            "timestep": torch.randint(0, 1000, size=(batch_size,), generator=self.generator).to(torch_device),
-        }
-
-
-class TestWanTransformer3D(WanTransformer3DTesterConfig, ModelTesterMixin):
-    """Core model tests for Wan Transformer 3D."""
-
-    @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16], ids=["fp16", "bf16"])
-    def test_from_save_pretrained_dtype_inference(self, tmp_path, dtype):
-        # Skip: fp16/bf16 require very high atol to pass, providing little signal.
-        # Dtype preservation is already tested by test_from_save_pretrained_dtype and test_keep_in_fp32_modules.
-        pytest.skip("Tolerance requirements too high for meaningful test")
-
-
-class TestWanTransformer3DMemory(WanTransformer3DTesterConfig, MemoryTesterMixin):
-    """Memory optimization tests for Wan Transformer 3D."""
-
-
-class TestWanTransformer3DTraining(WanTransformer3DTesterConfig, TrainingTesterMixin):
-    """Training tests for Wan Transformer 3D."""
+        inputs_dict = self.dummy_input
+        return init_dict, inputs_dict

    def test_gradient_checkpointing_is_applied(self):
        expected_set = {"WanTransformer3DModel"}
        super().test_gradient_checkpointing_is_applied(expected_set=expected_set)


-class TestWanTransformer3DAttention(WanTransformer3DTesterConfig, AttentionTesterMixin):
-    """Attention processor tests for Wan Transformer 3D."""
+class WanTransformerCompileTests(TorchCompileTesterMixin, unittest.TestCase):
+    model_class = WanTransformer3DModel

-
-class TestWanTransformer3DCompile(WanTransformer3DTesterConfig, TorchCompileTesterMixin):
-    """Torch compile tests for Wan Transformer 3D."""
-
-
-class TestWanTransformer3DBitsAndBytes(WanTransformer3DTesterConfig, BitsAndBytesTesterMixin):
-    """BitsAndBytes quantization tests for Wan Transformer 3D."""
-
-
-class TestWanTransformer3DTorchAo(WanTransformer3DTesterConfig, TorchAoTesterMixin):
-    """TorchAO quantization tests for Wan Transformer 3D."""
-
-
-class TestWanTransformer3DGGUF(WanTransformer3DTesterConfig, GGUFTesterMixin):
-    """GGUF quantization tests for Wan Transformer 3D."""
-
-    @property
-    def gguf_filename(self):
-        return "https://huggingface.co/QuantStack/Wan2.2-I2V-A14B-GGUF/blob/main/LowNoise/Wan2.2-I2V-A14B-LowNoise-Q2_K.gguf"
-
-    @property
-    def torch_dtype(self):
-        return torch.bfloat16
-
-    def _create_quantized_model(self, config_kwargs=None, **extra_kwargs):
-        return super()._create_quantized_model(
-            config_kwargs, config="Wan-AI/Wan2.2-I2V-A14B-Diffusers", subfolder="transformer", **extra_kwargs
-        )
-
-    def get_dummy_inputs(self):
-        """Override to provide inputs matching the real Wan I2V model dimensions.
-
-        Wan 2.2 I2V: in_channels=36, text_dim=4096
-        """
-        return {
-            "hidden_states": randn_tensor(
-                (1, 36, 2, 64, 64), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "encoder_hidden_states": randn_tensor(
-                (1, 512, 4096), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "timestep": torch.tensor([1.0]).to(torch_device, self.torch_dtype),
-        }
-
-
-class TestWanTransformer3DGGUFCompile(WanTransformer3DTesterConfig, GGUFCompileTesterMixin):
-    """GGUF + compile tests for Wan Transformer 3D."""
-
-    @property
-    def gguf_filename(self):
-        return "https://huggingface.co/QuantStack/Wan2.2-I2V-A14B-GGUF/blob/main/LowNoise/Wan2.2-I2V-A14B-LowNoise-Q2_K.gguf"
-
-    @property
-    def torch_dtype(self):
-        return torch.bfloat16
-
-    def _create_quantized_model(self, config_kwargs=None, **extra_kwargs):
-        return super()._create_quantized_model(
-            config_kwargs, config="Wan-AI/Wan2.2-I2V-A14B-Diffusers", subfolder="transformer", **extra_kwargs
-        )
-
-    def get_dummy_inputs(self):
-        """Override to provide inputs matching the real Wan I2V model dimensions.
-
-        Wan 2.2 I2V: in_channels=36, text_dim=4096
-        """
-        return {
-            "hidden_states": randn_tensor(
-                (1, 36, 2, 64, 64), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "encoder_hidden_states": randn_tensor(
-                (1, 512, 4096), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "timestep": torch.tensor([1.0]).to(torch_device, self.torch_dtype),
-        }
+    def prepare_init_args_and_inputs_for_common(self):
+        return WanTransformer3DTests().prepare_init_args_and_inputs_for_common()
--- a/tests/models/transformers/test_models_transformer_wan_animate.py
+++ b/tests/models/transformers/test_models_transformer_wan_animate.py
@@ -12,62 +12,76 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import pytest
+import unittest
+
 import torch

 from diffusers import WanAnimateTransformer3DModel
-from diffusers.utils.torch_utils import randn_tensor

-from ...testing_utils import enable_full_determinism, torch_device
-from ..testing_utils import (
-    AttentionTesterMixin,
-    BaseModelTesterConfig,
-    BitsAndBytesTesterMixin,
-    GGUFCompileTesterMixin,
-    GGUFTesterMixin,
-    MemoryTesterMixin,
-    ModelTesterMixin,
-    TorchAoTesterMixin,
-    TorchCompileTesterMixin,
-    TrainingTesterMixin,
+from ...testing_utils import (
+    enable_full_determinism,
+    torch_device,
 )
+from ..test_modeling_common import ModelTesterMixin, TorchCompileTesterMixin


 enable_full_determinism()


-class WanAnimateTransformer3DTesterConfig(BaseModelTesterConfig):
-    @property
-    def model_class(self):
-        return WanAnimateTransformer3DModel
+class WanAnimateTransformer3DTests(ModelTesterMixin, unittest.TestCase):
+    model_class = WanAnimateTransformer3DModel
+    main_input_name = "hidden_states"
+    uses_custom_attn_processor = True

    @property
-    def pretrained_model_name_or_path(self):
-        return "hf-internal-testing/tiny-wan-animate-transformer"
+    def dummy_input(self):
+        batch_size = 1
+        num_channels = 4
+        num_frames = 20  # To make the shapes work out; for complicated reasons we want 21 to divide num_frames + 1
+        height = 16
+        width = 16
+        text_encoder_embedding_dim = 16
+        sequence_length = 12
+
+        clip_seq_len = 12
+        clip_dim = 16
+
+        inference_segment_length = 77  # The inference segment length in the full Wan2.2-Animate-14B model
+        face_height = 16  # Should be square and match `motion_encoder_size` below
+        face_width = 16
+
+        hidden_states = torch.randn((batch_size, 2 * num_channels + 4, num_frames + 1, height, width)).to(torch_device)
+        timestep = torch.randint(0, 1000, size=(batch_size,)).to(torch_device)
+        encoder_hidden_states = torch.randn((batch_size, sequence_length, text_encoder_embedding_dim)).to(torch_device)
+        clip_ref_features = torch.randn((batch_size, clip_seq_len, clip_dim)).to(torch_device)
+        pose_latents = torch.randn((batch_size, num_channels, num_frames, height, width)).to(torch_device)
+        face_pixel_values = torch.randn((batch_size, 3, inference_segment_length, face_height, face_width)).to(
+            torch_device
+        )
+
+        return {
+            "hidden_states": hidden_states,
+            "timestep": timestep,
+            "encoder_hidden_states": encoder_hidden_states,
+            "encoder_hidden_states_image": clip_ref_features,
+            "pose_hidden_states": pose_latents,
+            "face_pixel_values": face_pixel_values,
+        }

    @property
-    def output_shape(self) -> tuple[int, ...]:
-        # Output has fewer channels than input (4 vs 12)
-        return (4, 21, 16, 16)
+    def input_shape(self):
+        return (12, 1, 16, 16)

    @property
-    def input_shape(self) -> tuple[int, ...]:
-        return (12, 21, 16, 16)
+    def output_shape(self):
+        return (4, 1, 16, 16)

-    @property
-    def main_input_name(self) -> str:
-        return "hidden_states"
-
-    @property
-    def generator(self):
-        return torch.Generator("cpu").manual_seed(0)
-
-    def get_init_dict(self) -> dict[str, int | list[int] | tuple | str | bool | float | dict]:
+    def prepare_init_args_and_inputs_for_common(self):
        # Use custom channel sizes since the default Wan Animate channel sizes will cause the motion encoder to
        # contain the vast majority of the parameters in the test model
        channel_sizes = {"4": 16, "8": 16, "16": 16}

-        return {
+        init_dict = {
            "patch_size": (1, 2, 2),
            "num_attention_heads": 2,
            "attention_head_dim": 12,
@@ -91,169 +105,22 @@ class WanAnimateTransformer3DTesterConfig(BaseModelTesterConfig):
            "face_encoder_num_heads": 2,
            "inject_face_latents_blocks": 2,
        }
-
-    def get_dummy_inputs(self) -> dict[str, torch.Tensor]:
-        batch_size = 1
-        num_channels = 4
-        num_frames = 20  # To make the shapes work out; for complicated reasons we want 21 to divide num_frames + 1
-        height = 16
-        width = 16
-        text_encoder_embedding_dim = 16
-        sequence_length = 12
-
-        clip_seq_len = 12
-        clip_dim = 16
-
-        inference_segment_length = 77  # The inference segment length in the full Wan2.2-Animate-14B model
-        face_height = 16  # Should be square and match `motion_encoder_size`
-        face_width = 16
-
-        return {
-            "hidden_states": randn_tensor(
-                (batch_size, 2 * num_channels + 4, num_frames + 1, height, width),
-                generator=self.generator,
-                device=torch_device,
-            ),
-            "timestep": torch.randint(0, 1000, size=(batch_size,), generator=self.generator).to(torch_device),
-            "encoder_hidden_states": randn_tensor(
-                (batch_size, sequence_length, text_encoder_embedding_dim),
-                generator=self.generator,
-                device=torch_device,
-            ),
-            "encoder_hidden_states_image": randn_tensor(
-                (batch_size, clip_seq_len, clip_dim),
-                generator=self.generator,
-                device=torch_device,
-            ),
-            "pose_hidden_states": randn_tensor(
-                (batch_size, num_channels, num_frames, height, width),
-                generator=self.generator,
-                device=torch_device,
-            ),
-            "face_pixel_values": randn_tensor(
-                (batch_size, 3, inference_segment_length, face_height, face_width),
-                generator=self.generator,
-                device=torch_device,
-            ),
-        }
-
-
-class TestWanAnimateTransformer3D(WanAnimateTransformer3DTesterConfig, ModelTesterMixin):
-    """Core model tests for Wan Animate Transformer 3D."""
-
-    def test_output(self):
-        # Override test_output because the transformer output is expected to have less channels
-        # than the main transformer input.
-        expected_output_shape = (1, 4, 21, 16, 16)
-        super().test_output(expected_output_shape=expected_output_shape)
-
-    @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16], ids=["fp16", "bf16"])
-    def test_from_save_pretrained_dtype_inference(self, tmp_path, dtype):
-        # Skip: fp16/bf16 require very high atol (~1e-2) to pass, providing little signal.
-        # Dtype preservation is already tested by test_from_save_pretrained_dtype and test_keep_in_fp32_modules.
-        pytest.skip("Tolerance requirements too high for meaningful test")
-
-
-class TestWanAnimateTransformer3DMemory(WanAnimateTransformer3DTesterConfig, MemoryTesterMixin):
-    """Memory optimization tests for Wan Animate Transformer 3D."""
-
-
-class TestWanAnimateTransformer3DTraining(WanAnimateTransformer3DTesterConfig, TrainingTesterMixin):
-    """Training tests for Wan Animate Transformer 3D."""
+        inputs_dict = self.dummy_input
+        return init_dict, inputs_dict

    def test_gradient_checkpointing_is_applied(self):
        expected_set = {"WanAnimateTransformer3DModel"}
        super().test_gradient_checkpointing_is_applied(expected_set=expected_set)

-
-class TestWanAnimateTransformer3DAttention(WanAnimateTransformer3DTesterConfig, AttentionTesterMixin):
-    """Attention processor tests for Wan Animate Transformer 3D."""
+    # Override test_output because the transformer output is expected to have less channels than the main transformer
+    # input.
+    def test_output(self):
+        expected_output_shape = (1, 4, 21, 16, 16)
+        super().test_output(expected_output_shape=expected_output_shape)


-class TestWanAnimateTransformer3DCompile(WanAnimateTransformer3DTesterConfig, TorchCompileTesterMixin):
-    """Torch compile tests for Wan Animate Transformer 3D."""
+class WanAnimateTransformerCompileTests(TorchCompileTesterMixin, unittest.TestCase):
+    model_class = WanAnimateTransformer3DModel

-    def test_torch_compile_recompilation_and_graph_break(self):
-        # Skip: F.pad with mode="replicate" in WanAnimateFaceEncoder triggers importlib.import_module
-        # internally, which dynamo doesn't support tracing through.
-        pytest.skip("F.pad with replicate mode triggers unsupported import in torch.compile")
-
-
-class TestWanAnimateTransformer3DBitsAndBytes(WanAnimateTransformer3DTesterConfig, BitsAndBytesTesterMixin):
-    """BitsAndBytes quantization tests for Wan Animate Transformer 3D."""
-
-
-class TestWanAnimateTransformer3DTorchAo(WanAnimateTransformer3DTesterConfig, TorchAoTesterMixin):
-    """TorchAO quantization tests for Wan Animate Transformer 3D."""
-
-
-class TestWanAnimateTransformer3DGGUF(WanAnimateTransformer3DTesterConfig, GGUFTesterMixin):
-    """GGUF quantization tests for Wan Animate Transformer 3D."""
-
-    @property
-    def gguf_filename(self):
-        return "https://huggingface.co/QuantStack/Wan2.2-Animate-14B-GGUF/blob/main/Wan2.2-Animate-14B-Q2_K.gguf"
-
-    @property
-    def torch_dtype(self):
-        return torch.bfloat16
-
-    def get_dummy_inputs(self):
-        """Override to provide inputs matching the real Wan Animate model dimensions.
-
-        Wan 2.2 Animate: in_channels=36 (2*16+4), text_dim=4096, image_dim=1280
-        """
-        return {
-            "hidden_states": randn_tensor(
-                (1, 36, 21, 64, 64), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "encoder_hidden_states": randn_tensor(
-                (1, 512, 4096), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "encoder_hidden_states_image": randn_tensor(
-                (1, 257, 1280), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "pose_hidden_states": randn_tensor(
-                (1, 16, 20, 64, 64), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "face_pixel_values": randn_tensor(
-                (1, 3, 77, 512, 512), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "timestep": torch.tensor([1.0]).to(torch_device, self.torch_dtype),
-        }
-
-
-class TestWanAnimateTransformer3DGGUFCompile(WanAnimateTransformer3DTesterConfig, GGUFCompileTesterMixin):
-    """GGUF + compile tests for Wan Animate Transformer 3D."""
-
-    @property
-    def gguf_filename(self):
-        return "https://huggingface.co/QuantStack/Wan2.2-Animate-14B-GGUF/blob/main/Wan2.2-Animate-14B-Q2_K.gguf"
-
-    @property
-    def torch_dtype(self):
-        return torch.bfloat16
-
-    def get_dummy_inputs(self):
-        """Override to provide inputs matching the real Wan Animate model dimensions.
-
-        Wan 2.2 Animate: in_channels=36 (2*16+4), text_dim=4096, image_dim=1280
-        """
-        return {
-            "hidden_states": randn_tensor(
-                (1, 36, 21, 64, 64), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "encoder_hidden_states": randn_tensor(
-                (1, 512, 4096), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "encoder_hidden_states_image": randn_tensor(
-                (1, 257, 1280), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "pose_hidden_states": randn_tensor(
-                (1, 16, 20, 64, 64), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "face_pixel_values": randn_tensor(
-                (1, 3, 77, 512, 512), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "timestep": torch.tensor([1.0]).to(torch_device, self.torch_dtype),
-        }
+    def prepare_init_args_and_inputs_for_common(self):
+        return WanAnimateTransformer3DTests().prepare_init_args_and_inputs_for_common()
--- a/tests/models/transformers/test_models_transformer_wan_vace.py
+++ b/tests/models/transformers/test_models_transformer_wan_vace.py
@@ -1,233 +0,0 @@
-# Copyright 2025 HuggingFace Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import pytest
-import torch
-
-from diffusers import WanVACETransformer3DModel
-from diffusers.utils.torch_utils import randn_tensor
-
-from ...testing_utils import enable_full_determinism, torch_device
-from ..testing_utils import (
-    AttentionTesterMixin,
-    BaseModelTesterConfig,
-    BitsAndBytesTesterMixin,
-    GGUFCompileTesterMixin,
-    GGUFTesterMixin,
-    MemoryTesterMixin,
-    ModelTesterMixin,
-    TorchAoTesterMixin,
-    TorchCompileTesterMixin,
-    TrainingTesterMixin,
-)
-
-
-enable_full_determinism()
-
-
-class WanVACETransformer3DTesterConfig(BaseModelTesterConfig):
-    @property
-    def model_class(self):
-        return WanVACETransformer3DModel
-
-    @property
-    def pretrained_model_name_or_path(self):
-        return "hf-internal-testing/tiny-wan-vace-transformer"
-
-    @property
-    def output_shape(self) -> tuple[int, ...]:
-        return (16, 2, 16, 16)
-
-    @property
-    def input_shape(self) -> tuple[int, ...]:
-        return (16, 2, 16, 16)
-
-    @property
-    def main_input_name(self) -> str:
-        return "hidden_states"
-
-    @property
-    def generator(self):
-        return torch.Generator("cpu").manual_seed(0)
-
-    def get_init_dict(self) -> dict[str, int | list[int] | tuple | str | bool | None]:
-        return {
-            "patch_size": (1, 2, 2),
-            "num_attention_heads": 2,
-            "attention_head_dim": 12,
-            "in_channels": 16,
-            "out_channels": 16,
-            "text_dim": 32,
-            "freq_dim": 256,
-            "ffn_dim": 32,
-            "num_layers": 4,
-            "cross_attn_norm": True,
-            "qk_norm": "rms_norm_across_heads",
-            "rope_max_seq_len": 32,
-            "vace_layers": [0, 2],
-            "vace_in_channels": 48,  # 3 * in_channels = 3 * 16 = 48
-        }
-
-    def get_dummy_inputs(self) -> dict[str, torch.Tensor]:
-        batch_size = 1
-        num_channels = 16
-        num_frames = 2
-        height = 16
-        width = 16
-        text_encoder_embedding_dim = 32
-        sequence_length = 12
-
-        # VACE requires control_hidden_states with vace_in_channels (3 * in_channels)
-        vace_in_channels = 48
-
-        return {
-            "hidden_states": randn_tensor(
-                (batch_size, num_channels, num_frames, height, width),
-                generator=self.generator,
-                device=torch_device,
-            ),
-            "encoder_hidden_states": randn_tensor(
-                (batch_size, sequence_length, text_encoder_embedding_dim),
-                generator=self.generator,
-                device=torch_device,
-            ),
-            "control_hidden_states": randn_tensor(
-                (batch_size, vace_in_channels, num_frames, height, width),
-                generator=self.generator,
-                device=torch_device,
-            ),
-            "timestep": torch.randint(0, 1000, size=(batch_size,), generator=self.generator).to(torch_device),
-        }
-
-
-class TestWanVACETransformer3D(WanVACETransformer3DTesterConfig, ModelTesterMixin):
-    """Core model tests for Wan VACE Transformer 3D."""
-
-    @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16], ids=["fp16", "bf16"])
-    def test_from_save_pretrained_dtype_inference(self, tmp_path, dtype):
-        # Skip: fp16/bf16 require very high atol to pass, providing little signal.
-        # Dtype preservation is already tested by test_from_save_pretrained_dtype and test_keep_in_fp32_modules.
-        pytest.skip("Tolerance requirements too high for meaningful test")
-
-    def test_model_parallelism(self, tmp_path):
-        # Skip: Device mismatch between cuda:0 and cuda:1 in VACE control flow
-        pytest.skip("Model parallelism not yet supported for WanVACE")
-
-
-class TestWanVACETransformer3DMemory(WanVACETransformer3DTesterConfig, MemoryTesterMixin):
-    """Memory optimization tests for Wan VACE Transformer 3D."""
-
-
-class TestWanVACETransformer3DTraining(WanVACETransformer3DTesterConfig, TrainingTesterMixin):
-    """Training tests for Wan VACE Transformer 3D."""
-
-    def test_gradient_checkpointing_is_applied(self):
-        expected_set = {"WanVACETransformer3DModel"}
-        super().test_gradient_checkpointing_is_applied(expected_set=expected_set)
-
-
-class TestWanVACETransformer3DAttention(WanVACETransformer3DTesterConfig, AttentionTesterMixin):
-    """Attention processor tests for Wan VACE Transformer 3D."""
-
-
-class TestWanVACETransformer3DCompile(WanVACETransformer3DTesterConfig, TorchCompileTesterMixin):
-    """Torch compile tests for Wan VACE Transformer 3D."""
-
-    def test_torch_compile_repeated_blocks(self):
-        # WanVACE has two block types (WanTransformerBlock and WanVACETransformerBlock),
-        # so we need recompile_limit=2 instead of the default 1.
-        import torch._dynamo
-        import torch._inductor.utils
-
-        init_dict = self.get_init_dict()
-        inputs_dict = self.get_dummy_inputs()
-
-        model = self.model_class(**init_dict).to(torch_device)
-        model.eval()
-        model.compile_repeated_blocks(fullgraph=True)
-
-        with (
-            torch._inductor.utils.fresh_inductor_cache(),
-            torch._dynamo.config.patch(recompile_limit=2),
-        ):
-            _ = model(**inputs_dict)
-            _ = model(**inputs_dict)
-
-
-class TestWanVACETransformer3DBitsAndBytes(WanVACETransformer3DTesterConfig, BitsAndBytesTesterMixin):
-    """BitsAndBytes quantization tests for Wan VACE Transformer 3D."""
-
-
-class TestWanVACETransformer3DTorchAo(WanVACETransformer3DTesterConfig, TorchAoTesterMixin):
-    """TorchAO quantization tests for Wan VACE Transformer 3D."""
-
-
-class TestWanVACETransformer3DGGUF(WanVACETransformer3DTesterConfig, GGUFTesterMixin):
-    """GGUF quantization tests for Wan VACE Transformer 3D."""
-
-    @property
-    def gguf_filename(self):
-        return "https://huggingface.co/QuantStack/Wan2.1_14B_VACE-GGUF/blob/main/Wan2.1_14B_VACE-Q3_K_S.gguf"
-
-    @property
-    def torch_dtype(self):
-        return torch.bfloat16
-
-    def get_dummy_inputs(self):
-        """Override to provide inputs matching the real Wan VACE model dimensions.
-
-        Wan 2.1 VACE: in_channels=16, text_dim=4096, vace_in_channels=96
-        """
-        return {
-            "hidden_states": randn_tensor(
-                (1, 16, 2, 64, 64), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "encoder_hidden_states": randn_tensor(
-                (1, 512, 4096), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "control_hidden_states": randn_tensor(
-                (1, 96, 2, 64, 64), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "timestep": torch.tensor([1.0]).to(torch_device, self.torch_dtype),
-        }
-
-
-class TestWanVACETransformer3DGGUFCompile(WanVACETransformer3DTesterConfig, GGUFCompileTesterMixin):
-    """GGUF + compile tests for Wan VACE Transformer 3D."""
-
-    @property
-    def gguf_filename(self):
-        return "https://huggingface.co/QuantStack/Wan2.1_14B_VACE-GGUF/blob/main/Wan2.1_14B_VACE-Q3_K_S.gguf"
-
-    @property
-    def torch_dtype(self):
-        return torch.bfloat16
-
-    def get_dummy_inputs(self):
-        """Override to provide inputs matching the real Wan VACE model dimensions.
-
-        Wan 2.1 VACE: in_channels=16, text_dim=4096, vace_in_channels=96
-        """
-        return {
-            "hidden_states": randn_tensor(
-                (1, 16, 2, 64, 64), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "encoder_hidden_states": randn_tensor(
-                (1, 512, 4096), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "control_hidden_states": randn_tensor(
-                (1, 96, 2, 64, 64), generator=self.generator, device=torch_device, dtype=self.torch_dtype
-            ),
-            "timestep": torch.tensor([1.0]).to(torch_device, self.torch_dtype),
-        }
--- a/tests/pipelines/bria/test_pipeline_bria.py
+++ b/tests/pipelines/bria/test_pipeline_bria.py
@@ -19,7 +19,7 @@ import unittest
 import numpy as np
 import torch
 from huggingface_hub import hf_hub_download
-from transformers import T5EncoderModel, T5TokenizerFast
+from transformers import AutoConfig, T5EncoderModel, T5TokenizerFast

 from diffusers import (
    AutoencoderKL,
@@ -89,7 +89,8 @@ class BriaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        scheduler = FlowMatchEulerDiscreteScheduler()

        torch.manual_seed(0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = T5TokenizerFast.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/chroma/test_pipeline_chroma.py
+++ b/tests/pipelines/chroma/test_pipeline_chroma.py
@@ -2,7 +2,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, ChromaPipeline, ChromaTransformer2DModel, FlowMatchEulerDiscreteScheduler

@@ -41,7 +41,8 @@ class ChromaPipelineFastTests(
        )

        torch.manual_seed(0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)

        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

--- a/tests/pipelines/chroma/test_pipeline_chroma_img2img.py
+++ b/tests/pipelines/chroma/test_pipeline_chroma_img2img.py
@@ -3,7 +3,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, ChromaImg2ImgPipeline, ChromaTransformer2DModel, FlowMatchEulerDiscreteScheduler

@@ -42,7 +42,8 @@ class ChromaImg2ImgPipelineFastTests(
        )

        torch.manual_seed(0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)

        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

--- a/tests/pipelines/chronoedit/test_chronoedit.py
+++ b/tests/pipelines/chronoedit/test_chronoedit.py
@@ -17,6 +17,7 @@ import unittest
 import torch
 from PIL import Image
 from transformers import (
+    AutoConfig,
    AutoTokenizer,
    CLIPImageProcessor,
    CLIPVisionConfig,
@@ -71,7 +72,8 @@ class ChronoEditPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        torch.manual_seed(0)
        # TODO: impl FlowDPMSolverMultistepScheduler
        scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/cogvideo/test_cogvideox.py
+++ b/tests/pipelines/cogvideo/test_cogvideox.py
@@ -18,7 +18,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLCogVideoX, CogVideoXPipeline, CogVideoXTransformer3DModel, DDIMScheduler

@@ -117,7 +117,8 @@ class CogVideoXPipelineFastTests(

        torch.manual_seed(0)
        scheduler = DDIMScheduler()
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/cogvideo/test_cogvideox_fun_control.py
+++ b/tests/pipelines/cogvideo/test_cogvideox_fun_control.py
@@ -18,7 +18,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLCogVideoX, CogVideoXFunControlPipeline, CogVideoXTransformer3DModel, DDIMScheduler

@@ -104,7 +104,8 @@ class CogVideoXFunControlPipelineFastTests(PipelineTesterMixin, unittest.TestCas

        torch.manual_seed(0)
        scheduler = DDIMScheduler()
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/cogvideo/test_cogvideox_image2video.py
+++ b/tests/pipelines/cogvideo/test_cogvideox_image2video.py
@@ -19,7 +19,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLCogVideoX, CogVideoXImageToVideoPipeline, CogVideoXTransformer3DModel, DDIMScheduler
 from diffusers.utils import load_image
@@ -113,7 +113,8 @@ class CogVideoXImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC

        torch.manual_seed(0)
        scheduler = DDIMScheduler()
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/cogvideo/test_cogvideox_video2video.py
+++ b/tests/pipelines/cogvideo/test_cogvideox_video2video.py
@@ -18,7 +18,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel, CogVideoXVideoToVideoPipeline, DDIMScheduler

@@ -99,7 +99,8 @@ class CogVideoXVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC

        torch.manual_seed(0)
        scheduler = DDIMScheduler()
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/cogview3/test_cogview3plus.py
+++ b/tests/pipelines/cogview3/test_cogview3plus.py
@@ -18,7 +18,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, CogVideoXDDIMScheduler, CogView3PlusPipeline, CogView3PlusTransformer2DModel

@@ -89,7 +89,8 @@ class CogView3PlusPipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = CogVideoXDDIMScheduler()
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/cogview4/test_cogview4.py
+++ b/tests/pipelines/cogview4/test_cogview4.py
@@ -108,7 +108,7 @@ class CogView4PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
            generator = torch.Generator(device=device).manual_seed(seed)
        inputs = {
            "prompt": "dance monkey",
-            "negative_prompt": "",
+            "negative_prompt": "bad",
            "generator": generator,
            "num_inference_steps": 2,
            "guidance_scale": 6.0,
--- a/tests/pipelines/consisid/test_consisid.py
+++ b/tests/pipelines/consisid/test_consisid.py
@@ -19,7 +19,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLCogVideoX, ConsisIDPipeline, ConsisIDTransformer3DModel, DDIMScheduler
 from diffusers.utils import load_image
@@ -122,7 +122,8 @@ class ConsisIDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = DDIMScheduler()
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/controlnet_flux/test_controlnet_flux.py
+++ b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
@@ -19,7 +19,7 @@ import unittest
 import numpy as np
 import torch
 from huggingface_hub import hf_hub_download
-from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
+from transformers import AutoConfig, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast

 from diffusers import (
    AutoencoderKL,
@@ -97,7 +97,8 @@ class FluxControlNetPipelineFastTests(unittest.TestCase, PipelineTesterMixin, Fl
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = T5TokenizerFast.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py
+++ b/tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py
@@ -2,7 +2,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -13,9 +13,7 @@ from diffusers import (
 )
 from diffusers.utils.torch_utils import randn_tensor

-from ...testing_utils import (
-    torch_device,
-)
+from ...testing_utils import torch_device
 from ..test_pipelines_common import PipelineTesterMixin, check_qkv_fused_layers_exist


@@ -70,7 +68,8 @@ class FluxControlNetImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMi
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/controlnet_flux/test_controlnet_flux_inpaint.py
+++ b/tests/pipelines/controlnet_flux/test_controlnet_flux_inpaint.py
@@ -3,15 +3,7 @@ import unittest

 import numpy as np
 import torch
-
-# torch_device,  # {{ edit_1 }} Removed unused import
-from transformers import (
-    AutoTokenizer,
-    CLIPTextConfig,
-    CLIPTextModel,
-    CLIPTokenizer,
-    T5EncoderModel,
-)
+from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -22,11 +14,7 @@ from diffusers import (
 )
 from diffusers.utils.torch_utils import randn_tensor

-from ...testing_utils import (
-    enable_full_determinism,
-    floats_tensor,
-    torch_device,
-)
+from ...testing_utils import enable_full_determinism, floats_tensor, torch_device
 from ..test_pipelines_common import PipelineTesterMixin


@@ -85,7 +73,8 @@ class FluxControlNetInpaintPipelineTests(unittest.TestCase, PipelineTesterMixin)
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/controlnet_hunyuandit/test_controlnet_hunyuandit.py
+++ b/tests/pipelines/controlnet_hunyuandit/test_controlnet_hunyuandit.py
@@ -18,7 +18,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, BertModel, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -96,7 +96,10 @@ class HunyuanDiTControlNetPipelineFastTests(unittest.TestCase, PipelineTesterMix
        scheduler = DDPMScheduler()
        text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel")
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+
+        torch.manual_seed(0)
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/controlnet_sd3/test_controlnet_inpaint_sd3.py
+++ b/tests/pipelines/controlnet_sd3/test_controlnet_inpaint_sd3.py
@@ -17,7 +17,14 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
+from transformers import (
+    AutoConfig,
+    AutoTokenizer,
+    CLIPTextConfig,
+    CLIPTextModelWithProjection,
+    CLIPTokenizer,
+    T5EncoderModel,
+)

 from diffusers import (
    AutoencoderKL,
@@ -28,10 +35,7 @@ from diffusers import (
 from diffusers.models import SD3ControlNetModel
 from diffusers.utils.torch_utils import randn_tensor

-from ...testing_utils import (
-    enable_full_determinism,
-    torch_device,
-)
+from ...testing_utils import enable_full_determinism, torch_device
 from ..test_pipelines_common import PipelineTesterMixin


@@ -103,7 +107,8 @@ class StableDiffusion3ControlInpaintNetPipelineFastTests(unittest.TestCase, Pipe
        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_3 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
--- a/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
+++ b/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
@@ -19,7 +19,14 @@ from typing import Optional

 import numpy as np
 import torch
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
+from transformers import (
+    AutoConfig,
+    AutoTokenizer,
+    CLIPTextConfig,
+    CLIPTextModelWithProjection,
+    CLIPTokenizer,
+    T5EncoderModel,
+)

 from diffusers import (
    AutoencoderKL,
@@ -118,7 +125,8 @@ class StableDiffusion3ControlNetPipelineFastTests(unittest.TestCase, PipelineTes
        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_3 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
--- a/tests/pipelines/cosmos/test_cosmos.py
+++ b/tests/pipelines/cosmos/test_cosmos.py
@@ -20,7 +20,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLCosmos, CosmosTextToWorldPipeline, CosmosTransformer3DModel, EDMEulerScheduler

@@ -107,7 +107,8 @@ class CosmosTextToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
            rho=7.0,
            final_sigmas_type="sigma_min",
        )
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/cosmos/test_cosmos2_text2image.py
+++ b/tests/pipelines/cosmos/test_cosmos2_text2image.py
@@ -20,7 +20,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLWan,
@@ -95,7 +95,8 @@ class Cosmos2TextToImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase

        torch.manual_seed(0)
        scheduler = FlowMatchEulerDiscreteScheduler(use_karras_sigmas=True)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/cosmos/test_cosmos2_video2world.py
+++ b/tests/pipelines/cosmos/test_cosmos2_video2world.py
@@ -21,7 +21,7 @@ import unittest
 import numpy as np
 import PIL.Image
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLWan,
@@ -96,7 +96,8 @@ class Cosmos2VideoToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCas

        torch.manual_seed(0)
        scheduler = FlowMatchEulerDiscreteScheduler(use_karras_sigmas=True)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/cosmos/test_cosmos_video2world.py
+++ b/tests/pipelines/cosmos/test_cosmos_video2world.py
@@ -21,7 +21,7 @@ import unittest
 import numpy as np
 import PIL.Image
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLCosmos, CosmosTransformer3DModel, CosmosVideoToWorldPipeline, EDMEulerScheduler

@@ -108,7 +108,8 @@ class CosmosVideoToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCase
            rho=7.0,
            final_sigmas_type="sigma_min",
        )
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/deepfloyd_if/init.py
+++ b/tests/pipelines/deepfloyd_if/init.py
@@ -2,7 +2,7 @@ import tempfile

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import DDPMScheduler, UNet2DConditionModel
 from diffusers.models.attention_processor import AttnAddedKVProcessor
@@ -18,7 +18,8 @@ from ..test_pipelines_common import to_np
 class IFPipelineTesterMixin:
    def _get_dummy_components(self):
        torch.manual_seed(0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)

        torch.manual_seed(0)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
@@ -75,7 +76,8 @@ class IFPipelineTesterMixin:

    def _get_superresolution_dummy_components(self):
        torch.manual_seed(0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)

        torch.manual_seed(0)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/deepfloyd_if/test_if.py
+++ b/tests/pipelines/deepfloyd_if/test_if.py
@@ -18,9 +18,7 @@ import unittest

 import torch

-from diffusers import (
-    IFPipeline,
-)
+from diffusers import IFPipeline
 from diffusers.models.attention_processor import AttnAddedKVProcessor
 from diffusers.utils.import_utils import is_xformers_available

--- a/tests/pipelines/flux/test_pipeline_flux.py
+++ b/tests/pipelines/flux/test_pipeline_flux.py
@@ -4,7 +4,7 @@ import unittest
 import numpy as np
 import torch
 from huggingface_hub import hf_hub_download
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -91,7 +91,8 @@ class FluxPipelineFastTests(
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/flux/test_pipeline_flux_control.py
+++ b/tests/pipelines/flux/test_pipeline_flux_control.py
@@ -3,7 +3,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxControlPipeline, FluxTransformer2DModel

@@ -53,7 +53,8 @@ class FluxControlPipelineFastTests(unittest.TestCase, PipelineTesterMixin):
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/flux/test_pipeline_flux_control_img2img.py
+++ b/tests/pipelines/flux/test_pipeline_flux_control_img2img.py
@@ -3,7 +3,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -57,7 +57,8 @@ class FluxControlImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMixin
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/flux/test_pipeline_flux_control_inpaint.py
+++ b/tests/pipelines/flux/test_pipeline_flux_control_inpaint.py
@@ -3,7 +3,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -58,7 +58,8 @@ class FluxControlInpaintPipelineFastTests(unittest.TestCase, PipelineTesterMixin
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/flux/test_pipeline_flux_fill.py
+++ b/tests/pipelines/flux/test_pipeline_flux_fill.py
@@ -3,7 +3,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxFillPipeline, FluxTransformer2DModel

@@ -58,7 +58,8 @@ class FluxFillPipelineFastTests(unittest.TestCase, PipelineTesterMixin):
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/flux/test_pipeline_flux_img2img.py
+++ b/tests/pipelines/flux/test_pipeline_flux_img2img.py
@@ -3,7 +3,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxImg2ImgPipeline, FluxTransformer2DModel

@@ -55,7 +55,8 @@ class FluxImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMixin, FluxI
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/flux/test_pipeline_flux_inpaint.py
+++ b/tests/pipelines/flux/test_pipeline_flux_inpaint.py
@@ -3,7 +3,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxInpaintPipeline, FluxTransformer2DModel

@@ -55,7 +55,8 @@ class FluxInpaintPipelineFastTests(unittest.TestCase, PipelineTesterMixin, FluxI
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/flux/test_pipeline_flux_kontext.py
+++ b/tests/pipelines/flux/test_pipeline_flux_kontext.py
@@ -3,7 +3,7 @@ import unittest
 import numpy as np
 import PIL.Image
 import torch
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -79,7 +79,8 @@ class FluxKontextPipelineFastTests(
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/flux/test_pipeline_flux_kontext_inpaint.py
+++ b/tests/pipelines/flux/test_pipeline_flux_kontext_inpaint.py
@@ -3,7 +3,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -79,7 +79,8 @@ class FluxKontextInpaintPipelineFastTests(
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/glm_image/test_glm_image.py
+++ b/tests/pipelines/glm_image/test_glm_image.py
@@ -16,7 +16,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, GlmImagePipeline, GlmImageTransformer2DModel
 from diffusers.utils import is_transformers_version
@@ -57,7 +57,8 @@ class GlmImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):

    def get_dummy_components(self):
        torch.manual_seed(0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        glm_config = GlmImageConfig(
--- a/tests/pipelines/hidream_image/test_pipeline_hidream.py
+++ b/tests/pipelines/hidream_image/test_pipeline_hidream.py
@@ -18,6 +18,7 @@ import unittest
 import numpy as np
 import torch
 from transformers import (
+    AutoConfig,
    AutoTokenizer,
    CLIPTextConfig,
    CLIPTextModelWithProjection,
@@ -94,7 +95,8 @@ class HiDreamImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_3 = T5EncoderModel(config)

        torch.manual_seed(0)
        text_encoder_4 = LlamaForCausalLM.from_pretrained("hf-internal-testing/tiny-random-LlamaForCausalLM")
@@ -149,7 +151,7 @@ class HiDreamImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        self.assertEqual(generated_image.shape, (128, 128, 3))

        # fmt: off
-        expected_slice = np.array([0.4507, 0.5256, 0.4205, 0.5791, 0.4848, 0.4831, 0.4443, 0.5107, 0.6586, 0.3163, 0.7318, 0.5933, 0.6252, 0.5512, 0.5357, 0.5983])
+        expected_slice = np.array([0.4501, 0.5256, 0.4207, 0.5783, 0.4842, 0.4833, 0.4441, 0.5112, 0.6587, 0.3169, 0.7308, 0.5927, 0.6251, 0.5509, 0.5355, 0.5969])
        # fmt: on

        generated_slice = generated_image.flatten()
--- a/tests/pipelines/hunyuan_video/test_hunyuan_image2video.py
+++ b/tests/pipelines/hunyuan_video/test_hunyuan_image2video.py
@@ -233,7 +233,7 @@ class HunyuanVideoImageToVideoPipelineFastTests(
        self.assertEqual(generated_video.shape, (5, 3, 16, 16))

        # fmt: off
-        expected_slice = torch.tensor([0.444, 0.479, 0.4485, 0.5752, 0.3539, 0.1548, 0.2706, 0.3593, 0.5323, 0.6635, 0.6795, 0.5255, 0.5091, 0.345, 0.4276, 0.4128])
+        expected_slice = torch.tensor([0.4441, 0.4790, 0.4485, 0.5748, 0.3539, 0.1553, 0.2707, 0.3594, 0.5331, 0.6645, 0.6799, 0.5257, 0.5092, 0.3450, 0.4276, 0.4127])
        # fmt: on

        generated_slice = generated_video.flatten()
--- a/tests/pipelines/hunyuan_video1_5/test_hunyuan_1_5.py
+++ b/tests/pipelines/hunyuan_video1_5/test_hunyuan_1_5.py
@@ -15,7 +15,14 @@
 import unittest

 import torch
-from transformers import ByT5Tokenizer, Qwen2_5_VLTextConfig, Qwen2_5_VLTextModel, Qwen2Tokenizer, T5EncoderModel
+from transformers import (
+    AutoConfig,
+    ByT5Tokenizer,
+    Qwen2_5_VLTextConfig,
+    Qwen2_5_VLTextModel,
+    Qwen2Tokenizer,
+    T5EncoderModel,
+)

 from diffusers import (
    AutoencoderKLHunyuanVideo15,
@@ -114,7 +121,8 @@ class HunyuanVideo15PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        tokenizer = Qwen2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration")

        torch.manual_seed(0)
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)
        tokenizer_2 = ByT5Tokenizer()

        guider = ClassifierFreeGuidance(guidance_scale=1.0)
--- a/tests/pipelines/hunyuandit/test_hunyuan_dit.py
+++ b/tests/pipelines/hunyuandit/test_hunyuan_dit.py
@@ -19,7 +19,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, BertModel, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel

 from diffusers import AutoencoderKL, DDPMScheduler, HunyuanDiT2DModel, HunyuanDiTPipeline

@@ -74,7 +74,9 @@ class HunyuanDiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        scheduler = DDPMScheduler()
        text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel")
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        torch.manual_seed(0)
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/kandinsky3/test_kandinsky3.py
+++ b/tests/pipelines/kandinsky3/test_kandinsky3.py
@@ -19,7 +19,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoPipelineForImage2Image,
@@ -108,7 +108,8 @@ class Kandinsky3PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        torch.manual_seed(0)
        movq = self.dummy_movq
        torch.manual_seed(0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)

        torch.manual_seed(0)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/kandinsky3/test_kandinsky3_img2img.py
+++ b/tests/pipelines/kandinsky3/test_kandinsky3_img2img.py
@@ -20,7 +20,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoPipelineForImage2Image,
@@ -119,7 +119,8 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
        torch.manual_seed(0)
        movq = self.dummy_movq
        torch.manual_seed(0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)

        torch.manual_seed(0)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/latte/test_latte.py
+++ b/tests/pipelines/latte/test_latte.py
@@ -20,7 +20,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -109,7 +109,8 @@ class LattePipelineFastTests(
        vae = AutoencoderKL()

        scheduler = DDIMScheduler()
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)

        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

--- a/tests/pipelines/ltx/test_ltx.py
+++ b/tests/pipelines/ltx/test_ltx.py
@@ -17,7 +17,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLLTXVideo, FlowMatchEulerDiscreteScheduler, LTXPipeline, LTXVideoTransformer3DModel

@@ -88,7 +88,8 @@ class LTXPipelineFastTests(PipelineTesterMixin, FirstBlockCacheTesterMixin, unit

        torch.manual_seed(0)
        scheduler = FlowMatchEulerDiscreteScheduler()
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/ltx/test_ltx_condition.py
+++ b/tests/pipelines/ltx/test_ltx_condition.py
@@ -17,7 +17,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLLTXVideo,
@@ -92,7 +92,8 @@ class LTXConditionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = FlowMatchEulerDiscreteScheduler()
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/ltx/test_ltx_image2video.py
+++ b/tests/pipelines/ltx/test_ltx_image2video.py
@@ -17,7 +17,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLLTXVideo,
@@ -91,7 +91,8 @@ class LTXImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = FlowMatchEulerDiscreteScheduler()
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/mochi/test_mochi.py
+++ b/tests/pipelines/mochi/test_mochi.py
@@ -18,7 +18,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLMochi, FlowMatchEulerDiscreteScheduler, MochiPipeline, MochiTransformer3DModel

@@ -89,7 +89,8 @@ class MochiPipelineFastTests(

        torch.manual_seed(0)
        scheduler = FlowMatchEulerDiscreteScheduler()
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/pag/test_pag_hunyuan_dit.py
+++ b/tests/pipelines/pag/test_pag_hunyuan_dit.py
@@ -19,7 +19,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, BertModel, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -67,7 +67,9 @@ class HunyuanDiTPAGPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        scheduler = DDPMScheduler()
        text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel")
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel")
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        torch.manual_seed(0)
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        components = {
--- a/tests/pipelines/pag/test_pag_pixart_sigma.py
+++ b/tests/pipelines/pag/test_pag_pixart_sigma.py
@@ -19,7 +19,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 import diffusers
 from diffusers import (
@@ -80,7 +80,8 @@ class PixArtSigmaPAGPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        vae = AutoencoderKL()

        scheduler = DDIMScheduler()
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)

        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

--- a/tests/pipelines/pag/test_pag_sd3.py
+++ b/tests/pipelines/pag/test_pag_sd3.py
@@ -3,7 +3,14 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
+from transformers import (
+    AutoConfig,
+    AutoTokenizer,
+    CLIPTextConfig,
+    CLIPTextModelWithProjection,
+    CLIPTokenizer,
+    T5EncoderModel,
+)

 from diffusers import (
    AutoencoderKL,
@@ -73,7 +80,9 @@ class StableDiffusion3PAGPipelineFastTests(unittest.TestCase, PipelineTesterMixi
        torch.manual_seed(0)
        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)

-        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        torch.manual_seed(0)
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_3 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
--- a/tests/pipelines/pag/test_pag_sd3_img2img.py
+++ b/tests/pipelines/pag/test_pag_sd3_img2img.py
@@ -5,7 +5,14 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
+from transformers import (
+    AutoConfig,
+    AutoTokenizer,
+    CLIPTextConfig,
+    CLIPTextModelWithProjection,
+    CLIPTokenizer,
+    T5EncoderModel,
+)

 from diffusers import (
    AutoencoderKL,
@@ -84,7 +91,9 @@ class StableDiffusion3PAGImg2ImgPipelineFastTests(unittest.TestCase, PipelineTes
        torch.manual_seed(0)
        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)

-        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        torch.manual_seed(0)
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_3 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
--- a/tests/pipelines/pixart_alpha/test_pixart.py
+++ b/tests/pipelines/pixart_alpha/test_pixart.py
@@ -19,7 +19,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -77,7 +77,10 @@ class PixArtAlphaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        vae = AutoencoderKL()

        scheduler = DDIMScheduler()
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+
+        torch.manual_seed(0)
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)

        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

--- a/tests/pipelines/pixart_sigma/test_pixart.py
+++ b/tests/pipelines/pixart_sigma/test_pixart.py
@@ -19,7 +19,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKL,
@@ -83,7 +83,10 @@ class PixArtSigmaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        vae = AutoencoderKL()

        scheduler = DDIMScheduler()
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+
+        torch.manual_seed(0)
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)

        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

--- a/tests/pipelines/qwenimage/test_qwenimage.py
+++ b/tests/pipelines/qwenimage/test_qwenimage.py
@@ -160,7 +160,7 @@ class QwenImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        self.assertEqual(generated_image.shape, (3, 32, 32))

        # fmt: off
-        expected_slice = torch.tensor([0.56331, 0.63677, 0.6015, 0.56369, 0.58166, 0.55277, 0.57176, 0.63261, 0.41466, 0.35561, 0.56229, 0.48334, 0.49714, 0.52622, 0.40872, 0.50208])
+        expected_slice = torch.tensor([0.5646, 0.6369, 0.6019, 0.5640, 0.5830, 0.5520, 0.5717, 0.6315, 0.4167, 0.3563, 0.5640, 0.4849, 0.4961, 0.5237, 0.4084, 0.5014])
        # fmt: on

        generated_slice = generated_image.flatten()
--- a/tests/pipelines/qwenimage/test_qwenimage_edit.py
+++ b/tests/pipelines/qwenimage/test_qwenimage_edit.py
@@ -163,7 +163,7 @@ class QwenImageEditPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        self.assertEqual(generated_image.shape, (3, 32, 32))

        # fmt: off
-        expected_slice = torch.tensor([[0.5637, 0.6341, 0.6001, 0.5620, 0.5794, 0.5498, 0.5757, 0.6389, 0.4174, 0.3597, 0.5649, 0.4894, 0.4969, 0.5255, 0.4083, 0.4986]])
+        expected_slice = torch.tensor([0.5640, 0.6350, 0.6003, 0.5606, 0.5801, 0.5502, 0.5757, 0.6388, 0.4174, 0.3590, 0.5647, 0.4891, 0.4975, 0.5256, 0.4088, 0.4991])
        # fmt: on

        generated_slice = generated_image.flatten()
--- a/tests/pipelines/qwenimage/test_qwenimage_edit_plus.py
+++ b/tests/pipelines/qwenimage/test_qwenimage_edit_plus.py
@@ -164,7 +164,7 @@ class QwenImageEditPlusPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
        self.assertEqual(generated_image.shape, (3, 32, 32))

        # fmt: off
-        expected_slice = torch.tensor([[0.5637, 0.6341, 0.6001, 0.5620, 0.5794, 0.5498, 0.5757, 0.6389, 0.4174, 0.3597, 0.5649, 0.4894, 0.4969, 0.5255, 0.4083, 0.4986]])
+        expected_slice = torch.tensor([0.5640, 0.6339, 0.5997, 0.5607, 0.5799, 0.5496, 0.5760, 0.6393, 0.4172, 0.3595, 0.5655, 0.4896, 0.4971, 0.5255, 0.4088, 0.4987])
        # fmt: on

        generated_slice = generated_image.flatten()
--- a/tests/pipelines/skyreels_v2/test_skyreels_v2.py
+++ b/tests/pipelines/skyreels_v2/test_skyreels_v2.py
@@ -16,7 +16,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLWan,
@@ -68,7 +68,8 @@ class SkyReelsV2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(flow_shift=8.0, use_flow_sigmas=True)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/skyreels_v2/test_skyreels_v2_df.py
+++ b/tests/pipelines/skyreels_v2/test_skyreels_v2_df.py
@@ -16,7 +16,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLWan,
@@ -68,7 +68,8 @@ class SkyReelsV2DiffusionForcingPipelineFastTests(PipelineTesterMixin, unittest.

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(flow_shift=8.0, use_flow_sigmas=True)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/skyreels_v2/test_skyreels_v2_df_image_to_video.py
+++ b/tests/pipelines/skyreels_v2/test_skyreels_v2_df_image_to_video.py
@@ -18,6 +18,7 @@ import numpy as np
 import torch
 from PIL import Image
 from transformers import (
+    AutoConfig,
    AutoTokenizer,
    T5EncoderModel,
 )
@@ -68,7 +69,8 @@ class SkyReelsV2DiffusionForcingImageToVideoPipelineFastTests(PipelineTesterMixi

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(flow_shift=5.0, use_flow_sigmas=True)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
@@ -159,7 +161,8 @@ class SkyReelsV2DiffusionForcingImageToVideoPipelineFastTests(SkyReelsV2Diffusio

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(flow_shift=5.0, use_flow_sigmas=True)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/skyreels_v2/test_skyreels_v2_df_video_to_video.py
+++ b/tests/pipelines/skyreels_v2/test_skyreels_v2_df_video_to_video.py
@@ -18,7 +18,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLWan,
@@ -70,7 +70,8 @@ class SkyReelsV2DiffusionForcingVideoToVideoPipelineFastTests(PipelineTesterMixi

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(flow_shift=5.0, use_flow_sigmas=True)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/skyreels_v2/test_skyreels_v2_image_to_video.py
+++ b/tests/pipelines/skyreels_v2/test_skyreels_v2_image_to_video.py
@@ -18,6 +18,7 @@ import numpy as np
 import torch
 from PIL import Image
 from transformers import (
+    AutoConfig,
    AutoTokenizer,
    CLIPImageProcessor,
    CLIPVisionConfig,
@@ -71,7 +72,8 @@ class SkyReelsV2ImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.Test

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(flow_shift=5.0, use_flow_sigmas=True)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/stable_audio/test_stable_audio.py
+++ b/tests/pipelines/stable_audio/test_stable_audio.py
@@ -19,10 +19,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import (
-    T5EncoderModel,
-    T5Tokenizer,
-)
+from transformers import AutoConfig, T5EncoderModel, T5Tokenizer

 from diffusers import (
    AutoencoderOobleck,
@@ -111,7 +108,8 @@ class StableAudioPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        )
        torch.manual_seed(0)
        t5_repo_id = "hf-internal-testing/tiny-random-T5ForConditionalGeneration"
-        text_encoder = T5EncoderModel.from_pretrained(t5_repo_id)
+        config = AutoConfig.from_pretrained(t5_repo_id)
+        text_encoder = T5EncoderModel(config)
        tokenizer = T5Tokenizer.from_pretrained(t5_repo_id, truncation=True, model_max_length=25)

        torch.manual_seed(0)
--- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py
+++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py
@@ -3,7 +3,14 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
+from transformers import (
+    AutoConfig,
+    AutoTokenizer,
+    CLIPTextConfig,
+    CLIPTextModelWithProjection,
+    CLIPTokenizer,
+    T5EncoderModel,
+)

 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, SD3Transformer2DModel, StableDiffusion3Pipeline

@@ -72,7 +79,9 @@ class StableDiffusion3PipelineFastTests(unittest.TestCase, PipelineTesterMixin):
        torch.manual_seed(0)
        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)

-        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        torch.manual_seed(0)
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_3 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
--- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py
+++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py
@@ -4,7 +4,14 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
+from transformers import (
+    AutoConfig,
+    AutoTokenizer,
+    CLIPTextConfig,
+    CLIPTextModelWithProjection,
+    CLIPTokenizer,
+    T5EncoderModel,
+)

 from diffusers import (
    AutoencoderKL,
@@ -73,7 +80,9 @@ class StableDiffusion3Img2ImgPipelineFastTests(PipelineLatentTesterMixin, unitte
        torch.manual_seed(0)
        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)

-        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        torch.manual_seed(0)
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_3 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
--- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py
+++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py
@@ -3,7 +3,14 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
+from transformers import (
+    AutoConfig,
+    AutoTokenizer,
+    CLIPTextConfig,
+    CLIPTextModelWithProjection,
+    CLIPTokenizer,
+    T5EncoderModel,
+)

 from diffusers import (
    AutoencoderKL,
@@ -73,7 +80,9 @@ class StableDiffusion3InpaintPipelineFastTests(PipelineLatentTesterMixin, unitte
        torch.manual_seed(0)
        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)

-        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        torch.manual_seed(0)
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_3 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
--- a/tests/pipelines/visualcloze/test_pipeline_visualcloze_combined.py
+++ b/tests/pipelines/visualcloze/test_pipeline_visualcloze_combined.py
@@ -5,7 +5,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 import diffusers
 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxTransformer2DModel, VisualClozePipeline
@@ -77,7 +77,8 @@ class VisualClozePipelineFastTests(unittest.TestCase, PipelineTesterMixin):
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/visualcloze/test_pipeline_visualcloze_generation.py
+++ b/tests/pipelines/visualcloze/test_pipeline_visualcloze_generation.py
@@ -5,7 +5,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel

 import diffusers
 from diffusers import (
@@ -79,7 +79,8 @@ class VisualClozeGenerationPipelineFastTests(unittest.TestCase, PipelineTesterMi
        text_encoder = CLIPTextModel(clip_text_encoder_config)

        torch.manual_seed(0)
-        text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder_2 = T5EncoderModel(config)

        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
        tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
--- a/tests/pipelines/wan/test_wan.py
+++ b/tests/pipelines/wan/test_wan.py
@@ -18,7 +18,7 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLWan, FlowMatchEulerDiscreteScheduler, WanPipeline, WanTransformer3DModel

@@ -68,7 +68,8 @@ class WanPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        torch.manual_seed(0)
        # TODO: impl FlowDPMSolverMultistepScheduler
        scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/wan/test_wan_22.py
+++ b/tests/pipelines/wan/test_wan_22.py
@@ -17,14 +17,11 @@ import unittest

 import numpy as np
 import torch
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanPipeline, WanTransformer3DModel

-from ...testing_utils import (
-    enable_full_determinism,
-    torch_device,
-)
+from ...testing_utils import enable_full_determinism, torch_device
 from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
 from ..test_pipelines_common import PipelineTesterMixin

@@ -63,7 +60,8 @@ class Wan22PipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
@@ -235,7 +233,8 @@ class Wan225BPipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/wan/test_wan_22_image_to_video.py
+++ b/tests/pipelines/wan/test_wan_22_image_to_video.py
@@ -18,7 +18,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanImageToVideoPipeline, WanTransformer3DModel

@@ -64,7 +64,8 @@ class Wan22ImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase)

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
@@ -248,7 +249,8 @@ class Wan225BImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCas

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/wan/test_wan_animate.py
+++ b/tests/pipelines/wan/test_wan_animate.py
@@ -19,6 +19,7 @@ import numpy as np
 import torch
 from PIL import Image
 from transformers import (
+    AutoConfig,
    AutoTokenizer,
    CLIPImageProcessor,
    CLIPVisionConfig,
@@ -78,7 +79,8 @@ class WanAnimatePipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/wan/test_wan_image_to_video.py
+++ b/tests/pipelines/wan/test_wan_image_to_video.py
@@ -19,6 +19,7 @@ import numpy as np
 import torch
 from PIL import Image
 from transformers import (
+    AutoConfig,
    AutoTokenizer,
    CLIPImageProcessor,
    CLIPVisionConfig,
@@ -68,7 +69,8 @@ class WanImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        torch.manual_seed(0)
        # TODO: impl FlowDPMSolverMultistepScheduler
        scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
@@ -239,7 +241,8 @@ class WanFLFToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        torch.manual_seed(0)
        # TODO: impl FlowDPMSolverMultistepScheduler
        scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/wan/test_wan_vace.py
+++ b/tests/pipelines/wan/test_wan_vace.py
@@ -18,7 +18,7 @@ import unittest
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import (
    AutoencoderKLWan,
@@ -67,7 +67,8 @@ class WanVACEPipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/pipelines/wan/test_wan_video_to_video.py
+++ b/tests/pipelines/wan/test_wan_video_to_video.py
@@ -16,7 +16,7 @@ import unittest

 import torch
 from PIL import Image
-from transformers import AutoTokenizer, T5EncoderModel
+from transformers import AutoConfig, AutoTokenizer, T5EncoderModel

 from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanTransformer3DModel, WanVideoToVideoPipeline

@@ -62,7 +62,8 @@ class WanVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        torch.manual_seed(0)
        scheduler = UniPCMultistepScheduler(flow_shift=3.0)
-        text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+        config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
+        text_encoder = T5EncoderModel(config)
        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

        torch.manual_seed(0)
--- a/tests/testing_utils.py
+++ b/tests/testing_utils.py
@@ -168,7 +168,7 @@ def assert_tensors_close(
        max_diff = abs_diff.max().item()

        flat_idx = abs_diff.argmax().item()
-        max_idx = tuple(idx.item() for idx in torch.unravel_index(torch.tensor(flat_idx), actual.shape))
+        max_idx = tuple(torch.unravel_index(torch.tensor(flat_idx), actual.shape).tolist())

        threshold = atol + rtol * expected.abs()
        mismatched = (abs_diff > threshold).sum().item()
Author	SHA1	Message	Date
Sayak Paul	10ef2269a9	Merge branch 'main' into transformers-v5-pr	2026-02-03 10:59:44 +05:30
Sayak Paul	c3249d7b2e	Merge branch 'main' into transformers-v5-pr	2026-01-29 19:33:29 +05:30
sayakpaul	85682000a9	up	2026-01-28 17:46:59 +05:30
sayakpaul	5fefef9bc9	empty	2026-01-28 17:35:30 +05:30
sayakpaul	ea815e5bb0	remove torchvision.	2026-01-28 17:26:38 +05:30
sayakpaul	7eb51e932f	resolve conflicts.	2026-01-28 17:25:50 +05:30
sayakpaul	079e0e31b7	just keep main for easier debugging.	2026-01-27 11:38:41 +08:00
Sayak Paul	f9bdc09534	Merge branch 'main' into transformers-v5-pr	2026-01-27 09:47:36 +08:00
sayakpaul	2bee621229	fix with peft_format.	2026-01-26 18:48:52 +08:00
Sayak Paul	7a0739ccd3	Merge branch 'main' into transformers-v5-pr	2026-01-26 18:02:52 +08:00
sayakpaul	b4b707e585	up	2026-01-25 23:57:52 +08:00
Sayak Paul	fefd0f4e45	Merge branch 'main' into transformers-v5-pr	2026-01-25 23:31:53 +08:00
sayakpaul	6e8e7bad9e	up	2026-01-25 23:30:04 +08:00
sayakpaul	0eaa35fdca	Revert "up" This reverts commit `5274ffdd7f`.	2026-01-23 17:31:48 +05:30
sayakpaul	4dff31871c	Revert "up" This reverts commit `515dd06db5`.	2026-01-23 17:31:21 +05:30
sayakpaul	515dd06db5	up	2026-01-23 17:23:19 +05:30
sayakpaul	5274ffdd7f	up	2026-01-23 17:15:25 +05:30
Sayak Paul	a21a6ac565	Merge branch 'main' into transformers-v5-pr	2026-01-23 16:55:19 +05:30
Sayak Paul	c2d8273891	Merge branch 'main' into transformers-v5-pr	2026-01-23 12:51:36 +05:30
sayakpaul	e1249d2640	same stuff.	2026-01-20 18:05:20 +05:30
sayakpaul	2fe9f9868d	change qwen expected slice because of how init is handled in v5.	2026-01-20 16:56:54 +05:30
Sayak Paul	387befd6de	Merge branch 'main' into transformers-v5-pr	2026-01-20 15:46:00 +05:30
sayakpaul	351316328f	matrix configuration to see differences between 4.57.3 and main failures.	2026-01-20 10:11:08 +05:30
Sayak Paul	62bf2b0ab9	Merge branch 'main' into transformers-v5-pr	2026-01-20 09:49:11 +05:30
Sayak Paul	7f2cd5b6fc	Merge branch 'main' into transformers-v5-pr	2026-01-19 16:02:28 +05:30
Sayak Paul	4ea43ee6ab	Merge branch 'main' into transformers-v5-pr	2026-01-19 15:52:35 +05:30
sayakpaul	084c959bdf	fix t5 stuff for more.	2026-01-19 15:08:55 +05:30
sayakpaul	3dcb97c9ea	tie embedding issue.	2026-01-19 13:43:47 +05:30
Sayak Paul	7b55da8846	Merge branch 'main' into transformers-v5-pr	2026-01-19 11:05:40 +05:30
sayakpaul	cec020988b	up	2026-01-16 10:22:59 +05:30
sayakpaul	926db24add	up	2026-01-16 10:01:44 +05:30
sayakpaul	37cfceef0d	attributes	2026-01-16 09:39:36 +05:30
Sayak Paul	ea90a74ed4	Merge branch 'main' into transformers-v5-pr	2026-01-15 20:01:03 +05:30
sayakpaul	96f08043a3	fix group offloading.	2026-01-15 20:00:45 +05:30
sayakpaul	d0f279ce76	up	2026-01-15 16:59:41 +05:30
sayakpaul	c5e023fbe6	up	2026-01-15 13:03:11 +05:30
Sayak Paul	f8e50fab75	Merge branch 'main' into transformers-v5-pr	2026-01-15 12:47:21 +05:30
sayakpaul	c152b1831c	more	2026-01-14 14:54:39 +05:30
sayakpaul	039324ae16	switch to transformers main again./	2026-01-14 14:52:15 +05:30