up

2026-02-11 05:15:31 +08:00 · 2026-02-10 04:57:17 +01:00
9 changed files with 30 additions and 160 deletions
--- a/docs/source/en/using-diffusers/automodel.md
+++ b/docs/source/en/using-diffusers/automodel.md
@@ -29,31 +29,8 @@ text_encoder = AutoModel.from_pretrained(
 )
 ```

-## Custom models
-
 [`AutoModel`] also loads models from the [Hub](https://huggingface.co/models) that aren't included in Diffusers. Set `trust_remote_code=True` in [`AutoModel.from_pretrained`] to load custom models.

-A custom model repository needs a Python module with the model class, and a `config.json` with an `auto_map` entry that maps `"AutoModel"` to `"module_file.ClassName"`.
-
-```
-custom/custom-transformer-model/
-├── config.json
-├── my_model.py
-└── diffusion_pytorch_model.safetensors
-```
-
-The `config.json` includes the `auto_map` field pointing to the custom class.
-
-```json
-{
-  "auto_map": {
-    "AutoModel": "my_model.MyCustomModel"
-  }
-}
-```
-
-Then load it with `trust_remote_code=True`.
-
 ```py
 import torch
 from diffusers import AutoModel
@@ -63,39 +40,7 @@ transformer = AutoModel.from_pretrained(
 )
 ```

-For a real-world example, [Overworld/Waypoint-1-Small](https://huggingface.co/Overworld/Waypoint-1-Small/tree/main/transformer) hosts a custom `WorldModel` class across several modules in its `transformer` subfolder.
-
-```
-transformer/
-├── config.json          # auto_map: "model.WorldModel"
-├── model.py
-├── attn.py
-├── nn.py
-├── cache.py
-├── quantize.py
-├── __init__.py
-└── diffusion_pytorch_model.safetensors
-```
-
-```py
-import torch
-from diffusers import AutoModel
-
-transformer = AutoModel.from_pretrained(
-    "Overworld/Waypoint-1-Small", subfolder="transformer", trust_remote_code=True, torch_dtype=torch.bfloat16, device_map="cuda"
-)
-```
-
 If the custom model inherits from the [`ModelMixin`] class, it gets access to the same features as Diffusers model classes, like [regional compilation](../optimization/fp16#regional-compilation) and [group offloading](../optimization/memory#group-offloading).

-> [!WARNING]
-> As a precaution with `trust_remote_code=True`, pass a commit hash to the `revision` argument in [`AutoModel.from_pretrained`] to make sure the code hasn't been updated with new malicious code (unless you fully trust the model owners).
->
-> ```py
-> transformer = AutoModel.from_pretrained(
->     "Overworld/Waypoint-1-Small", subfolder="transformer", trust_remote_code=True, revision="a3d8cb2"
-> )
-> ```
-
 > [!NOTE]
 > Learn more about implementing custom models in the [Community components](../using-diffusers/custom_pipeline_overview#community-components) guide.
--- a/src/diffusers/init.py
+++ b/src/diffusers/init.py
@@ -297,6 +297,8 @@ else:
            "ComponentSpec",
            "ModularPipeline",
            "ModularPipelineBlocks",
+            "InputParam",
+            "OutputParam",
        ]
    )
    _import_structure["optimization"] = [
@@ -1060,7 +1062,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
            ZImageTransformer2DModel,
            attention_backend,
        )
-        from .modular_pipelines import ComponentsManager, ComponentSpec, ModularPipeline, ModularPipelineBlocks
+        from .modular_pipelines import ComponentsManager, ComponentSpec, ModularPipeline, ModularPipelineBlocks, InputParam, OutputParam
        from .optimization import (
            get_constant_schedule,
            get_constant_schedule_with_warmup,
--- a/src/diffusers/modular_pipelines/flux/init.py
+++ b/src/diffusers/modular_pipelines/flux/init.py
@@ -31,9 +31,7 @@ else:
        "FluxAutoBeforeDenoiseStep",
        "FluxAutoBlocks",
        "FluxAutoDecodeStep",
-        "FluxAutoDenoiseStep",
        "FluxKontextAutoBlocks",
-        "FluxKontextAutoDenoiseStep",
        "FluxKontextBeforeDenoiseStep",
    ]
    _import_structure["modular_pipeline"] = ["FluxKontextModularPipeline", "FluxModularPipeline"]
@@ -55,9 +53,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
            FluxAutoBeforeDenoiseStep,
            FluxAutoBlocks,
            FluxAutoDecodeStep,
-            FluxAutoDenoiseStep,
            FluxKontextAutoBlocks,
-            FluxKontextAutoDenoiseStep,
            FluxKontextBeforeDenoiseStep,
        )
        from .modular_pipeline import FluxKontextModularPipeline, FluxModularPipeline
--- a/src/diffusers/modular_pipelines/flux/modular_blocks.py
+++ b/src/diffusers/modular_pipelines/flux/modular_blocks.py
@@ -201,37 +201,6 @@ class FluxKontextAutoBeforeDenoiseStep(AutoPipelineBlocks):
        )


-# denoise: text2image
-class FluxAutoDenoiseStep(AutoPipelineBlocks):
-    block_classes = [FluxDenoiseStep]
-    block_names = ["denoise"]
-    block_trigger_inputs = [None]
-
-    @property
-    def description(self) -> str:
-        return (
-            "Denoise step that iteratively denoise the latents. "
-            "This is a auto pipeline block that works for text2image and img2img tasks."
-            " - `FluxDenoiseStep` (denoise) for text2image and img2img tasks."
-        )
-
-
-# denoise: Flux Kontext
-
-
-class FluxKontextAutoDenoiseStep(AutoPipelineBlocks):
-    block_classes = [FluxKontextDenoiseStep]
-    block_names = ["denoise"]
-    block_trigger_inputs = [None]
-
-    @property
-    def description(self) -> str:
-        return (
-            "Denoise step that iteratively denoise the latents for Flux Kontext. "
-            "This is a auto pipeline block that works for text2image and img2img tasks."
-            " - `FluxDenoiseStep` (denoise) for text2image and img2img tasks."
-        )
-

 # decode: all task (text2img, img2img)
 class FluxAutoDecodeStep(AutoPipelineBlocks):
@@ -322,7 +291,7 @@ class FluxKontextAutoInputStep(AutoPipelineBlocks):

 class FluxCoreDenoiseStep(SequentialPipelineBlocks):
    model_name = "flux"
-    block_classes = [FluxAutoInputStep, FluxAutoBeforeDenoiseStep, FluxAutoDenoiseStep]
+    block_classes = [FluxAutoInputStep, FluxAutoBeforeDenoiseStep, FluxDenoiseStep]
    block_names = ["input", "before_denoise", "denoise"]

    @property
@@ -331,7 +300,7 @@ class FluxCoreDenoiseStep(SequentialPipelineBlocks):
            "Core step that performs the denoising process. \n"
            + " - `FluxAutoInputStep` (input) standardizes the inputs for the denoising step.\n"
            + " - `FluxAutoBeforeDenoiseStep` (before_denoise) prepares the inputs for the denoising step.\n"
-            + " - `FluxAutoDenoiseStep` (denoise) iteratively denoises the latents.\n"
+            + " - `FluxDenoiseStep` (denoise) iteratively denoises the latents.\n"
            + "This step supports text-to-image and image-to-image tasks for Flux:\n"
            + " - for image-to-image generation, you need to provide `image_latents`\n"
            + " - for text-to-image generation, all you need to provide is prompt embeddings."
@@ -340,7 +309,7 @@ class FluxCoreDenoiseStep(SequentialPipelineBlocks):

 class FluxKontextCoreDenoiseStep(SequentialPipelineBlocks):
    model_name = "flux-kontext"
-    block_classes = [FluxKontextAutoInputStep, FluxKontextAutoBeforeDenoiseStep, FluxKontextAutoDenoiseStep]
+    block_classes = [FluxKontextAutoInputStep, FluxKontextAutoBeforeDenoiseStep, FluxKontextDenoiseStep]
    block_names = ["input", "before_denoise", "denoise"]

    @property
@@ -349,7 +318,7 @@ class FluxKontextCoreDenoiseStep(SequentialPipelineBlocks):
            "Core step that performs the denoising process. \n"
            + " - `FluxKontextAutoInputStep` (input) standardizes the inputs for the denoising step.\n"
            + " - `FluxKontextAutoBeforeDenoiseStep` (before_denoise) prepares the inputs for the denoising step.\n"
-            + " - `FluxKontextAutoDenoiseStep` (denoise) iteratively denoises the latents.\n"
+            + " - `FluxKontextDenoiseStep` (denoise) iteratively denoises the latents.\n"
            + "This step supports text-to-image and image-to-image tasks for Flux:\n"
            + " - for image-to-image generation, you need to provide `image_latents`\n"
            + " - for text-to-image generation, all you need to provide is prompt embeddings."
--- a/src/diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py
+++ b/src/diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py
@@ -18,6 +18,7 @@ import re
 from copy import deepcopy
 from typing import Any, Callable, Dict, List, Optional, Union

+import ftfy
 import torch
 from transformers import AutoTokenizer, UMT5EncoderModel

--- a/src/diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py
+++ b/src/diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py
@@ -18,6 +18,7 @@ import re
 from copy import deepcopy
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union

+import ftfy
 import PIL
 import torch
 from transformers import AutoTokenizer, UMT5EncoderModel
--- a/src/diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py
+++ b/src/diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py
@@ -19,6 +19,7 @@ import re
 from copy import deepcopy
 from typing import Any, Callable, Dict, List, Optional, Union

+import ftfy
 import torch
 from PIL import Image
 from transformers import AutoTokenizer, UMT5EncoderModel
--- a/tests/models/testing_utils/quantization.py
+++ b/tests/models/testing_utils/quantization.py
@@ -21,8 +21,11 @@ import torch
 from diffusers import BitsAndBytesConfig, GGUFQuantizationConfig, NVIDIAModelOptConfig, QuantoConfig, TorchAoConfig
 from diffusers.utils.import_utils import (
    is_bitsandbytes_available,
+    is_gguf_available,
    is_nvidia_modelopt_available,
    is_optimum_quanto_available,
+    is_torchao_available,
+    is_torchao_version,
 )

 from ...testing_utils import (
@@ -56,6 +59,13 @@ if is_bitsandbytes_available():
 if is_optimum_quanto_available():
    from optimum.quanto import QLinear

+if is_gguf_available():
+    pass
+
+if is_torchao_available():
+    if is_torchao_version(">=", "0.9.0"):
+        pass
+

 class LoRALayer(torch.nn.Module):
    """Wraps a linear layer with LoRA-like adapter - Used for testing purposes only.
@@ -122,14 +132,14 @@ class QuantizationTesterMixin:
    def _verify_if_layer_quantized(self, name, module, config_kwargs):
        raise NotImplementedError("Subclass must implement _verify_if_layer_quantized")

-    def _is_module_quantized(self, module, quant_config_kwargs=None):
+    def _is_module_quantized(self, module):
        """
        Check if a module is quantized. Returns True if quantized, False otherwise.
        Default implementation tries _verify_if_layer_quantized and catches exceptions.
        Subclasses can override for more efficient checking.
        """
        try:
-            self._verify_if_layer_quantized("", module, quant_config_kwargs or {})
+            self._verify_if_layer_quantized("", module, {})
            return True
        except (AssertionError, AttributeError):
            return False
@@ -269,9 +279,7 @@ class QuantizationTesterMixin:
            f"Quantized layer count mismatch: expected {expected_quantized_layers}, got {num_quantized_layers} (total linear layers: {num_linear_layers}, FP32 modules: {num_fp32_modules})"
        )

-    def _test_quantization_modules_to_not_convert(
-        self, config_kwargs, modules_to_not_convert, to_not_convert_key="modules_to_not_convert"
-    ):
+    def _test_quantization_modules_to_not_convert(self, config_kwargs, modules_to_not_convert):
        """
        Test that modules specified in modules_to_not_convert are not quantized.

@@ -281,7 +289,7 @@ class QuantizationTesterMixin:
        """
        # Create config with modules_to_not_convert
        config_kwargs_with_exclusion = config_kwargs.copy()
-        config_kwargs_with_exclusion[to_not_convert_key] = modules_to_not_convert
+        config_kwargs_with_exclusion["modules_to_not_convert"] = modules_to_not_convert

        model_with_exclusion = self._create_quantized_model(config_kwargs_with_exclusion)

@@ -293,7 +301,7 @@ class QuantizationTesterMixin:
                if any(excluded in name for excluded in modules_to_not_convert):
                    found_excluded = True
                    # This module should NOT be quantized
-                    assert not self._is_module_quantized(module, config_kwargs_with_exclusion), (
+                    assert not self._is_module_quantized(module), (
                        f"Module {name} should not be quantized but was found to be quantized"
                    )

@@ -305,7 +313,7 @@ class QuantizationTesterMixin:
            if isinstance(module, torch.nn.Linear):
                # Check if this module is NOT in the exclusion list
                if not any(excluded in name for excluded in modules_to_not_convert):
-                    if self._is_module_quantized(module, config_kwargs_with_exclusion):
+                    if self._is_module_quantized(module):
                        found_quantized = True
                        break

@@ -610,7 +618,7 @@ class BitsAndBytesTesterMixin(BitsAndBytesConfigMixin, QuantizationTesterMixin):
            pytest.skip("modules_to_not_convert_for_test not defined for this model")

        self._test_quantization_modules_to_not_convert(
-            BitsAndBytesConfigMixin.BNB_CONFIGS["4bit_nf4"], modules_to_exclude, "llm_int8_skip_modules"
+            BitsAndBytesConfigMixin.BNB_CONFIGS["4bit_nf4"], modules_to_exclude
        )

    @pytest.mark.parametrize("config_name", ["4bit_nf4", "8bit"], ids=["4bit_nf4", "8bit"])
@@ -809,14 +817,7 @@ class TorchAoConfigMixin:
        return self.model_class.from_pretrained(self.pretrained_model_name_or_path, **kwargs)

    def _verify_if_layer_quantized(self, name, module, config_kwargs):
-        from torchao.dtypes import AffineQuantizedTensor
-        from torchao.quantization.linear_activation_quantized_tensor import LinearActivationQuantizedTensor
-
        assert isinstance(module, torch.nn.Linear), f"Layer {name} is not Linear, got {type(module)}"
-        # Check if the weight is actually quantized
-        weight = module.weight
-        is_quantized = isinstance(weight, (AffineQuantizedTensor, LinearActivationQuantizedTensor))
-        assert is_quantized, f"Layer {name} weight is not quantized, got {type(weight)}"


 # int4wo requires CUDA-specific ops (_convert_weight_to_int4pack)
@@ -912,39 +913,9 @@ class TorchAoTesterMixin(TorchAoConfigMixin, QuantizationTesterMixin):
        if modules_to_exclude is None:
            pytest.skip("modules_to_not_convert_for_test not defined for this model")

-        # Custom implementation for torchao that skips memory footprint check
-        # because get_memory_footprint() doesn't accurately reflect torchao quantization
-        config_kwargs = TorchAoConfigMixin.TORCHAO_QUANT_TYPES["int8wo"]
-        config_kwargs_with_exclusion = config_kwargs.copy()
-        config_kwargs_with_exclusion["modules_to_not_convert"] = modules_to_exclude
-
-        model_with_exclusion = self._create_quantized_model(config_kwargs_with_exclusion)
-
-        # Find a module that should NOT be quantized
-        found_excluded = False
-        for name, module in model_with_exclusion.named_modules():
-            if isinstance(module, torch.nn.Linear):
-                # Check if this module is in the exclusion list
-                if any(excluded in name for excluded in modules_to_exclude):
-                    found_excluded = True
-                    # This module should NOT be quantized
-                    assert not self._is_module_quantized(module, config_kwargs_with_exclusion), (
-                        f"Module {name} should not be quantized but was found to be quantized"
-                    )
-
-        assert found_excluded, f"No linear layers found in excluded modules: {modules_to_exclude}"
-
-        # Find a module that SHOULD be quantized (not in exclusion list)
-        found_quantized = False
-        for name, module in model_with_exclusion.named_modules():
-            if isinstance(module, torch.nn.Linear):
-                # Check if this module is NOT in the exclusion list
-                if not any(excluded in name for excluded in modules_to_exclude):
-                    if self._is_module_quantized(module, config_kwargs_with_exclusion):
-                        found_quantized = True
-                        break
-
-        assert found_quantized, "No quantized layers found outside of excluded modules"
+        self._test_quantization_modules_to_not_convert(
+            TorchAoConfigMixin.TORCHAO_QUANT_TYPES["int8wo"], modules_to_exclude
+        )

    def test_torchao_device_map(self):
        """Test that device_map='auto' works correctly with quantization."""
--- a/tests/models/transformers/test_models_transformer_flux.py
+++ b/tests/models/transformers/test_models_transformer_flux.py
@@ -318,10 +318,6 @@ class TestFluxSingleFile(FluxTransformerTesterConfig, SingleFileTesterMixin):
 class TestFluxTransformerBitsAndBytes(FluxTransformerTesterConfig, BitsAndBytesTesterMixin):
    """BitsAndBytes quantization tests for Flux Transformer."""

-    @property
-    def modules_to_not_convert_for_test(self):
-        return ["norm_out.linear"]
-

 class TestFluxTransformerQuanto(FluxTransformerTesterConfig, QuantoTesterMixin):
    """Quanto quantization tests for Flux Transformer."""
@@ -334,18 +330,10 @@ class TestFluxTransformerQuanto(FluxTransformerTesterConfig, QuantoTesterMixin):
    def pretrained_model_kwargs(self):
        return {}

-    @property
-    def modules_to_not_convert_for_test(self):
-        return ["norm_out.linear"]
-

 class TestFluxTransformerTorchAo(FluxTransformerTesterConfig, TorchAoTesterMixin):
    """TorchAO quantization tests for Flux Transformer."""

-    @property
-    def modules_to_not_convert_for_test(self):
-        return ["norm_out.linear"]
-

 class TestFluxTransformerGGUF(FluxTransformerTesterConfig, GGUFTesterMixin):
    @property
@@ -414,10 +402,6 @@ class TestFluxTransformerGGUFCompile(FluxTransformerTesterConfig, GGUFCompileTes
 class TestFluxTransformerModelOpt(FluxTransformerTesterConfig, ModelOptTesterMixin):
    """ModelOpt quantization tests for Flux Transformer."""

-    @property
-    def modules_to_not_convert_for_test(self):
-        return ["norm_out.linear"]
-

 class TestFluxTransformerModelOptCompile(FluxTransformerTesterConfig, ModelOptCompileTesterMixin):
    """ModelOpt + compile tests for Flux Transformer."""