mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-24 05:14:55 +08:00
Compare commits
1 Commits
attention-
...
remove-unn
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
10dfa9b722 |
@@ -27,7 +27,7 @@ from ...utils.accelerate_utils import apply_forward_hook
|
|||||||
from ..activations import get_activation
|
from ..activations import get_activation
|
||||||
from ..modeling_outputs import AutoencoderKLOutput
|
from ..modeling_outputs import AutoencoderKLOutput
|
||||||
from ..modeling_utils import ModelMixin
|
from ..modeling_utils import ModelMixin
|
||||||
from .vae import DecoderOutput, DiagonalGaussianDistribution
|
from .vae import AutoencoderMixin, DecoderOutput, DiagonalGaussianDistribution
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||||
@@ -410,7 +410,7 @@ class HunyuanImageDecoder2D(nn.Module):
|
|||||||
return h
|
return h
|
||||||
|
|
||||||
|
|
||||||
class AutoencoderKLHunyuanImage(ModelMixin, ConfigMixin, FromOriginalModelMixin):
|
class AutoencoderKLHunyuanImage(ModelMixin, AutoencoderMixin, ConfigMixin, FromOriginalModelMixin):
|
||||||
r"""
|
r"""
|
||||||
A VAE model for 2D images with spatial tiling support.
|
A VAE model for 2D images with spatial tiling support.
|
||||||
|
|
||||||
@@ -486,27 +486,6 @@ class AutoencoderKLHunyuanImage(ModelMixin, ConfigMixin, FromOriginalModelMixin)
|
|||||||
self.tile_overlap_factor = tile_overlap_factor or self.tile_overlap_factor
|
self.tile_overlap_factor = tile_overlap_factor or self.tile_overlap_factor
|
||||||
self.tile_latent_min_size = self.tile_sample_min_size // self.config.spatial_compression_ratio
|
self.tile_latent_min_size = self.tile_sample_min_size // self.config.spatial_compression_ratio
|
||||||
|
|
||||||
def disable_tiling(self) -> None:
|
|
||||||
r"""
|
|
||||||
Disable tiled VAE decoding. If `enable_tiling` was previously enabled, this method will go back to computing
|
|
||||||
decoding in one step.
|
|
||||||
"""
|
|
||||||
self.use_tiling = False
|
|
||||||
|
|
||||||
def enable_slicing(self) -> None:
|
|
||||||
r"""
|
|
||||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
|
||||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
|
||||||
"""
|
|
||||||
self.use_slicing = True
|
|
||||||
|
|
||||||
def disable_slicing(self) -> None:
|
|
||||||
r"""
|
|
||||||
Disable sliced VAE decoding. If `enable_slicing` was previously enabled, this method will go back to computing
|
|
||||||
decoding in one step.
|
|
||||||
"""
|
|
||||||
self.use_slicing = False
|
|
||||||
|
|
||||||
def _encode(self, x: torch.Tensor):
|
def _encode(self, x: torch.Tensor):
|
||||||
|
|
||||||
batch_size, num_channels, height, width = x.shape
|
batch_size, num_channels, height, width = x.shape
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ from ...utils.accelerate_utils import apply_forward_hook
|
|||||||
from ..activations import get_activation
|
from ..activations import get_activation
|
||||||
from ..modeling_outputs import AutoencoderKLOutput
|
from ..modeling_outputs import AutoencoderKLOutput
|
||||||
from ..modeling_utils import ModelMixin
|
from ..modeling_utils import ModelMixin
|
||||||
from .vae import DecoderOutput, DiagonalGaussianDistribution
|
from .vae import AutoencoderMixin, DecoderOutput, DiagonalGaussianDistribution
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||||
@@ -584,7 +584,7 @@ class HunyuanImageRefinerDecoder3D(nn.Module):
|
|||||||
return hidden_states
|
return hidden_states
|
||||||
|
|
||||||
|
|
||||||
class AutoencoderKLHunyuanImageRefiner(ModelMixin, ConfigMixin):
|
class AutoencoderKLHunyuanImageRefiner(ModelMixin, AutoencoderMixin, ConfigMixin):
|
||||||
r"""
|
r"""
|
||||||
A VAE model with KL loss for encoding videos into latents and decoding latent representations into videos. Used for
|
A VAE model with KL loss for encoding videos into latents and decoding latent representations into videos. Used for
|
||||||
HunyuanImage-2.1 Refiner.
|
HunyuanImage-2.1 Refiner.
|
||||||
@@ -685,27 +685,6 @@ class AutoencoderKLHunyuanImageRefiner(ModelMixin, ConfigMixin):
|
|||||||
self.tile_sample_stride_width = tile_sample_stride_width or self.tile_sample_stride_width
|
self.tile_sample_stride_width = tile_sample_stride_width or self.tile_sample_stride_width
|
||||||
self.tile_overlap_factor = tile_overlap_factor or self.tile_overlap_factor
|
self.tile_overlap_factor = tile_overlap_factor or self.tile_overlap_factor
|
||||||
|
|
||||||
def disable_tiling(self) -> None:
|
|
||||||
r"""
|
|
||||||
Disable tiled VAE decoding. If `enable_tiling` was previously enabled, this method will go back to computing
|
|
||||||
decoding in one step.
|
|
||||||
"""
|
|
||||||
self.use_tiling = False
|
|
||||||
|
|
||||||
def enable_slicing(self) -> None:
|
|
||||||
r"""
|
|
||||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
|
||||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
|
||||||
"""
|
|
||||||
self.use_slicing = True
|
|
||||||
|
|
||||||
def disable_slicing(self) -> None:
|
|
||||||
r"""
|
|
||||||
Disable sliced VAE decoding. If `enable_slicing` was previously enabled, this method will go back to computing
|
|
||||||
decoding in one step.
|
|
||||||
"""
|
|
||||||
self.use_slicing = False
|
|
||||||
|
|
||||||
def _encode(self, x: torch.Tensor) -> torch.Tensor:
|
def _encode(self, x: torch.Tensor) -> torch.Tensor:
|
||||||
_, _, _, height, width = x.shape
|
_, _, _, height, width = x.shape
|
||||||
|
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ from ...utils.accelerate_utils import apply_forward_hook
|
|||||||
from ..activations import get_activation
|
from ..activations import get_activation
|
||||||
from ..modeling_outputs import AutoencoderKLOutput
|
from ..modeling_outputs import AutoencoderKLOutput
|
||||||
from ..modeling_utils import ModelMixin
|
from ..modeling_utils import ModelMixin
|
||||||
from .vae import DecoderOutput, DiagonalGaussianDistribution
|
from .vae import AutoencoderMixin, DecoderOutput, DiagonalGaussianDistribution
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||||
@@ -625,7 +625,7 @@ class HunyuanVideo15Decoder3D(nn.Module):
|
|||||||
return hidden_states
|
return hidden_states
|
||||||
|
|
||||||
|
|
||||||
class AutoencoderKLHunyuanVideo15(ModelMixin, ConfigMixin):
|
class AutoencoderKLHunyuanVideo15(ModelMixin, AutoencoderMixin, ConfigMixin):
|
||||||
r"""
|
r"""
|
||||||
A VAE model with KL loss for encoding videos into latents and decoding latent representations into videos. Used for
|
A VAE model with KL loss for encoding videos into latents and decoding latent representations into videos. Used for
|
||||||
HunyuanVideo-1.5.
|
HunyuanVideo-1.5.
|
||||||
@@ -723,27 +723,6 @@ class AutoencoderKLHunyuanVideo15(ModelMixin, ConfigMixin):
|
|||||||
self.tile_latent_min_width = tile_latent_min_width or self.tile_latent_min_width
|
self.tile_latent_min_width = tile_latent_min_width or self.tile_latent_min_width
|
||||||
self.tile_overlap_factor = tile_overlap_factor or self.tile_overlap_factor
|
self.tile_overlap_factor = tile_overlap_factor or self.tile_overlap_factor
|
||||||
|
|
||||||
def disable_tiling(self) -> None:
|
|
||||||
r"""
|
|
||||||
Disable tiled VAE decoding. If `enable_tiling` was previously enabled, this method will go back to computing
|
|
||||||
decoding in one step.
|
|
||||||
"""
|
|
||||||
self.use_tiling = False
|
|
||||||
|
|
||||||
def enable_slicing(self) -> None:
|
|
||||||
r"""
|
|
||||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
|
||||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
|
||||||
"""
|
|
||||||
self.use_slicing = True
|
|
||||||
|
|
||||||
def disable_slicing(self) -> None:
|
|
||||||
r"""
|
|
||||||
Disable sliced VAE decoding. If `enable_slicing` was previously enabled, this method will go back to computing
|
|
||||||
decoding in one step.
|
|
||||||
"""
|
|
||||||
self.use_slicing = False
|
|
||||||
|
|
||||||
def _encode(self, x: torch.Tensor) -> torch.Tensor:
|
def _encode(self, x: torch.Tensor) -> torch.Tensor:
|
||||||
_, _, _, height, width = x.shape
|
_, _, _, height, width = x.shape
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user