up

2025-12-24 05:14:55 +08:00 · 2025-12-22 10:26:10 +05:30
3 changed files with 6 additions and 69 deletions
--- a/src/diffusers/models/autoencoders/autoencoder_kl_hunyuanimage.py
+++ b/src/diffusers/models/autoencoders/autoencoder_kl_hunyuanimage.py
@@ -27,7 +27,7 @@ from ...utils.accelerate_utils import apply_forward_hook
 from ..activations import get_activation
 from ..modeling_outputs import AutoencoderKLOutput
 from ..modeling_utils import ModelMixin
-from .vae import DecoderOutput, DiagonalGaussianDistribution
+from .vae import AutoencoderMixin, DecoderOutput, DiagonalGaussianDistribution
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -410,7 +410,7 @@ class HunyuanImageDecoder2D(nn.Module):
        return h
-class AutoencoderKLHunyuanImage(ModelMixin, ConfigMixin, FromOriginalModelMixin):
+class AutoencoderKLHunyuanImage(ModelMixin, AutoencoderMixin, ConfigMixin, FromOriginalModelMixin):
    r"""
    A VAE model for 2D images with spatial tiling support.
@@ -486,27 +486,6 @@ class AutoencoderKLHunyuanImage(ModelMixin, ConfigMixin, FromOriginalModelMixin)
        self.tile_overlap_factor = tile_overlap_factor or self.tile_overlap_factor
        self.tile_latent_min_size = self.tile_sample_min_size // self.config.spatial_compression_ratio
    def disable_tiling(self) -> None:
        r"""
        Disable tiled VAE decoding. If `enable_tiling` was previously enabled, this method will go back to computing
        decoding in one step.
        """
        self.use_tiling = False
    def enable_slicing(self) -> None:
        r"""
        Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
        compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
        """
        self.use_slicing = True
    def disable_slicing(self) -> None:
        r"""
        Disable sliced VAE decoding. If `enable_slicing` was previously enabled, this method will go back to computing
        decoding in one step.
        """
        self.use_slicing = False
    def _encode(self, x: torch.Tensor):
        batch_size, num_channels, height, width = x.shape
--- a/src/diffusers/models/autoencoders/autoencoder_kl_hunyuanimage_refiner.py
+++ b/src/diffusers/models/autoencoders/autoencoder_kl_hunyuanimage_refiner.py
@@ -26,7 +26,7 @@ from ...utils.accelerate_utils import apply_forward_hook
 from ..activations import get_activation
 from ..modeling_outputs import AutoencoderKLOutput
 from ..modeling_utils import ModelMixin
-from .vae import DecoderOutput, DiagonalGaussianDistribution
+from .vae import AutoencoderMixin, DecoderOutput, DiagonalGaussianDistribution
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -584,7 +584,7 @@ class HunyuanImageRefinerDecoder3D(nn.Module):
        return hidden_states
-class AutoencoderKLHunyuanImageRefiner(ModelMixin, ConfigMixin):
+class AutoencoderKLHunyuanImageRefiner(ModelMixin, AutoencoderMixin, ConfigMixin):
    r"""
    A VAE model with KL loss for encoding videos into latents and decoding latent representations into videos. Used for
    HunyuanImage-2.1 Refiner.
@@ -685,27 +685,6 @@ class AutoencoderKLHunyuanImageRefiner(ModelMixin, ConfigMixin):
        self.tile_sample_stride_width = tile_sample_stride_width or self.tile_sample_stride_width
        self.tile_overlap_factor = tile_overlap_factor or self.tile_overlap_factor
    def disable_tiling(self) -> None:
        r"""
        Disable tiled VAE decoding. If `enable_tiling` was previously enabled, this method will go back to computing
        decoding in one step.
        """
        self.use_tiling = False
    def enable_slicing(self) -> None:
        r"""
        Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
        compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
        """
        self.use_slicing = True
    def disable_slicing(self) -> None:
        r"""
        Disable sliced VAE decoding. If `enable_slicing` was previously enabled, this method will go back to computing
        decoding in one step.
        """
        self.use_slicing = False
    def _encode(self, x: torch.Tensor) -> torch.Tensor:
        _, _, _, height, width = x.shape
--- a/src/diffusers/models/autoencoders/autoencoder_kl_hunyuanvideo15.py
+++ b/src/diffusers/models/autoencoders/autoencoder_kl_hunyuanvideo15.py
@@ -26,7 +26,7 @@ from ...utils.accelerate_utils import apply_forward_hook
 from ..activations import get_activation
 from ..modeling_outputs import AutoencoderKLOutput
 from ..modeling_utils import ModelMixin
-from .vae import DecoderOutput, DiagonalGaussianDistribution
+from .vae import AutoencoderMixin, DecoderOutput, DiagonalGaussianDistribution
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -625,7 +625,7 @@ class HunyuanVideo15Decoder3D(nn.Module):
        return hidden_states
-class AutoencoderKLHunyuanVideo15(ModelMixin, ConfigMixin):
+class AutoencoderKLHunyuanVideo15(ModelMixin, AutoencoderMixin, ConfigMixin):
    r"""
    A VAE model with KL loss for encoding videos into latents and decoding latent representations into videos. Used for
    HunyuanVideo-1.5.
@@ -723,27 +723,6 @@ class AutoencoderKLHunyuanVideo15(ModelMixin, ConfigMixin):
        self.tile_latent_min_width = tile_latent_min_width or self.tile_latent_min_width
        self.tile_overlap_factor = tile_overlap_factor or self.tile_overlap_factor
    def disable_tiling(self) -> None:
        r"""
        Disable tiled VAE decoding. If `enable_tiling` was previously enabled, this method will go back to computing
        decoding in one step.
        """
        self.use_tiling = False
    def enable_slicing(self) -> None:
        r"""
        Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
        compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
        """
        self.use_slicing = True
    def disable_slicing(self) -> None:
        r"""
        Disable sliced VAE decoding. If `enable_slicing` was previously enabled, this method will go back to computing
        decoding in one step.
        """
        self.use_slicing = False
    def _encode(self, x: torch.Tensor) -> torch.Tensor:
        _, _, _, height, width = x.shape