mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-10 14:34:55 +08:00
Compare commits
8 Commits
kernelize
...
deprecate-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ffa39f212f | ||
|
|
3fe9a012c3 | ||
|
|
9114c77e0b | ||
|
|
b43dc75cef | ||
|
|
da09f915ac | ||
|
|
9ac0760c81 | ||
|
|
6575c92b5d | ||
|
|
9ccb82dc77 |
@@ -21,7 +21,7 @@ from ...image_processor import VaeImageProcessor
|
||||
from ...models import UVit2DModel, VQModel
|
||||
from ...schedulers import AmusedScheduler
|
||||
from ...utils import is_torch_xla_available, replace_example_docstring
|
||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
|
||||
|
||||
|
||||
if is_torch_xla_available():
|
||||
@@ -47,7 +47,8 @@ EXAMPLE_DOC_STRING = """
|
||||
"""
|
||||
|
||||
|
||||
class AmusedPipeline(DiffusionPipeline):
|
||||
class AmusedPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
||||
_last_supported_version = "0.33.1"
|
||||
image_processor: VaeImageProcessor
|
||||
vqvae: VQModel
|
||||
tokenizer: CLIPTokenizer
|
||||
|
||||
@@ -21,7 +21,7 @@ from ...image_processor import PipelineImageInput, VaeImageProcessor
|
||||
from ...models import UVit2DModel, VQModel
|
||||
from ...schedulers import AmusedScheduler
|
||||
from ...utils import is_torch_xla_available, replace_example_docstring
|
||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
|
||||
|
||||
|
||||
if is_torch_xla_available():
|
||||
@@ -57,7 +57,8 @@ EXAMPLE_DOC_STRING = """
|
||||
"""
|
||||
|
||||
|
||||
class AmusedImg2ImgPipeline(DiffusionPipeline):
|
||||
class AmusedImg2ImgPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
||||
_last_supported_version = "0.33.1"
|
||||
image_processor: VaeImageProcessor
|
||||
vqvae: VQModel
|
||||
tokenizer: CLIPTokenizer
|
||||
|
||||
@@ -22,7 +22,7 @@ from ...image_processor import PipelineImageInput, VaeImageProcessor
|
||||
from ...models import UVit2DModel, VQModel
|
||||
from ...schedulers import AmusedScheduler
|
||||
from ...utils import is_torch_xla_available, replace_example_docstring
|
||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
|
||||
|
||||
|
||||
if is_torch_xla_available():
|
||||
@@ -65,7 +65,8 @@ EXAMPLE_DOC_STRING = """
|
||||
"""
|
||||
|
||||
|
||||
class AmusedInpaintPipeline(DiffusionPipeline):
|
||||
class AmusedInpaintPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
||||
_last_supported_version = "0.33.1"
|
||||
image_processor: VaeImageProcessor
|
||||
vqvae: VQModel
|
||||
tokenizer: CLIPTokenizer
|
||||
|
||||
@@ -24,7 +24,7 @@ from ...models import AutoencoderKL, UNet2DConditionModel
|
||||
from ...schedulers import KarrasDiffusionSchedulers
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import AudioPipelineOutput, DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
|
||||
|
||||
if is_torch_xla_available():
|
||||
@@ -57,7 +57,7 @@ EXAMPLE_DOC_STRING = """
|
||||
"""
|
||||
|
||||
|
||||
class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
||||
class AudioLDMPipeline(DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin):
|
||||
r"""
|
||||
Pipeline for text-to-audio generation using AudioLDM.
|
||||
|
||||
@@ -81,6 +81,7 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
||||
Vocoder of class `SpeechT5HifiGan`.
|
||||
"""
|
||||
|
||||
_last_supported_version = "0.33.1"
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
|
||||
def __init__(
|
||||
|
||||
@@ -25,7 +25,7 @@ from ...utils import (
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
|
||||
from .blip_image_processing import BlipImageProcessor
|
||||
from .modeling_blip2 import Blip2QFormerModel
|
||||
from .modeling_ctx_clip import ContextCLIPTextModel
|
||||
@@ -81,7 +81,7 @@ EXAMPLE_DOC_STRING = """
|
||||
"""
|
||||
|
||||
|
||||
class BlipDiffusionPipeline(DiffusionPipeline):
|
||||
class BlipDiffusionPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
||||
"""
|
||||
Pipeline for Zero-Shot Subject Driven Generation using Blip Diffusion.
|
||||
|
||||
@@ -107,6 +107,7 @@ class BlipDiffusionPipeline(DiffusionPipeline):
|
||||
Position of the context token in the text encoder.
|
||||
"""
|
||||
|
||||
_last_supported_version = "0.33.1"
|
||||
model_cpu_offload_seq = "qformer->text_encoder->unet->vae"
|
||||
|
||||
def __init__(
|
||||
|
||||
@@ -37,7 +37,7 @@ from ...utils import (
|
||||
unscale_lora_layers,
|
||||
)
|
||||
from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
|
||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
|
||||
@@ -98,6 +98,7 @@ EXAMPLE_DOC_STRING = """
|
||||
|
||||
|
||||
class StableDiffusionControlNetXSPipeline(
|
||||
DeprecatedPipelineMixin,
|
||||
DiffusionPipeline,
|
||||
StableDiffusionMixin,
|
||||
TextualInversionLoaderMixin,
|
||||
@@ -138,6 +139,7 @@ class StableDiffusionControlNetXSPipeline(
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
_last_supported_version = "0.33.1"
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@@ -46,7 +46,7 @@ from ...utils import (
|
||||
unscale_lora_layers,
|
||||
)
|
||||
from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline
|
||||
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
|
||||
|
||||
|
||||
@@ -114,6 +114,7 @@ EXAMPLE_DOC_STRING = """
|
||||
|
||||
|
||||
class StableDiffusionXLControlNetXSPipeline(
|
||||
DeprecatedPipelineMixin,
|
||||
DiffusionPipeline,
|
||||
TextualInversionLoaderMixin,
|
||||
StableDiffusionXLLoraLoaderMixin,
|
||||
@@ -158,6 +159,7 @@ class StableDiffusionXLControlNetXSPipeline(
|
||||
watermarker is used.
|
||||
"""
|
||||
|
||||
_last_supported_version = "0.33.1"
|
||||
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
|
||||
_optional_components = [
|
||||
"tokenizer",
|
||||
|
||||
@@ -21,7 +21,7 @@ from ...models import UNet1DModel
|
||||
from ...schedulers import SchedulerMixin
|
||||
from ...utils import is_torch_xla_available, logging
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline
|
||||
from ..pipeline_utils import AudioPipelineOutput, DeprecatedPipelineMixin, DiffusionPipeline
|
||||
|
||||
|
||||
if is_torch_xla_available():
|
||||
@@ -34,7 +34,7 @@ else:
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
|
||||
class DanceDiffusionPipeline(DiffusionPipeline):
|
||||
class DanceDiffusionPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
||||
r"""
|
||||
Pipeline for audio generation.
|
||||
|
||||
@@ -49,6 +49,7 @@ class DanceDiffusionPipeline(DiffusionPipeline):
|
||||
[`IPNDMScheduler`].
|
||||
"""
|
||||
|
||||
_last_supported_version = "0.33.1"
|
||||
model_cpu_offload_seq = "unet"
|
||||
|
||||
def __init__(self, unet: UNet1DModel, scheduler: SchedulerMixin):
|
||||
|
||||
@@ -33,7 +33,7 @@ from ...utils import (
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ...video_processor import VideoProcessor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
|
||||
|
||||
if is_torch_xla_available():
|
||||
@@ -97,9 +97,11 @@ class I2VGenXLPipelineOutput(BaseOutput):
|
||||
|
||||
|
||||
class I2VGenXLPipeline(
|
||||
DeprecatedPipelineMixin,
|
||||
DiffusionPipeline,
|
||||
StableDiffusionMixin,
|
||||
):
|
||||
_last_supported_version = "0.33.1"
|
||||
r"""
|
||||
Pipeline for image-to-video generation as proposed in [I2VGenXL](https://i2vgen-xl.github.io/).
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ from ...utils import (
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import AudioPipelineOutput, DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
|
||||
|
||||
if is_librosa_available():
|
||||
@@ -76,7 +76,8 @@ EXAMPLE_DOC_STRING = """
|
||||
"""
|
||||
|
||||
|
||||
class MusicLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
||||
class MusicLDMPipeline(DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin):
|
||||
_last_supported_version = "0.33.1"
|
||||
r"""
|
||||
Pipeline for text-to-audio generation using MusicLDM.
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ from ...models import AutoencoderKL, UNet2DConditionModel
|
||||
from ...schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
|
||||
from ...utils import deprecate, is_torch_xla_available, logging
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
from ..stable_diffusion import StableDiffusionPipelineOutput
|
||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
from .image_encoder import PaintByExampleImageEncoder
|
||||
@@ -155,7 +155,8 @@ def prepare_mask_and_masked_image(image, mask):
|
||||
return mask, masked_image
|
||||
|
||||
|
||||
class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
|
||||
class PaintByExamplePipeline(DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin):
|
||||
_last_supported_version = "0.33.1"
|
||||
r"""
|
||||
<Tip warning={true}>
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ from ...utils import (
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ...video_processor import VideoProcessor
|
||||
from ..free_init_utils import FreeInitMixin
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
|
||||
|
||||
if is_torch_xla_available():
|
||||
@@ -132,6 +132,7 @@ class PIAPipelineOutput(BaseOutput):
|
||||
|
||||
|
||||
class PIAPipeline(
|
||||
DeprecatedPipelineMixin,
|
||||
DiffusionPipeline,
|
||||
StableDiffusionMixin,
|
||||
TextualInversionLoaderMixin,
|
||||
@@ -140,6 +141,7 @@ class PIAPipeline(
|
||||
FromSingleFileMixin,
|
||||
FreeInitMixin,
|
||||
):
|
||||
_last_supported_version = "0.33.1"
|
||||
r"""
|
||||
Pipeline for text-to-video generation.
|
||||
|
||||
|
||||
@@ -139,6 +139,43 @@ class AudioPipelineOutput(BaseOutput):
|
||||
audios: np.ndarray
|
||||
|
||||
|
||||
class DeprecatedPipelineMixin:
|
||||
"""
|
||||
A mixin that can be used to mark a pipeline as deprecated.
|
||||
|
||||
Pipelines inheriting from this mixin will raise a warning when instantiated, indicating that they are deprecated
|
||||
and won't receive updates past the specified version. Tests will be skipped for pipelines that inherit from this
|
||||
mixin.
|
||||
|
||||
Example usage:
|
||||
```python
|
||||
class MyDeprecatedPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
||||
_last_supported_version = "0.20.0"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
```
|
||||
"""
|
||||
|
||||
# Override this in the inheriting class to specify the last version that will support this pipeline
|
||||
_last_supported_version = None
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
# Get the class name for the warning message
|
||||
class_name = self.__class__.__name__
|
||||
|
||||
# Get the last supported version or use the current version if not specified
|
||||
version_info = getattr(self.__class__, "_last_supported_version", __version__)
|
||||
|
||||
# Raise a warning that this pipeline is deprecated
|
||||
logger.warning(
|
||||
f"The {class_name} has been deprecated and will not receive bug fixes or feature updates after Diffusers version {version_info}. "
|
||||
)
|
||||
|
||||
# Call the parent class's __init__ method
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
r"""
|
||||
Base class for all pipelines.
|
||||
|
||||
@@ -11,7 +11,7 @@ from ...pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyCh
|
||||
from ...schedulers import KarrasDiffusionSchedulers
|
||||
from ...utils import deprecate, is_torch_xla_available, logging
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
from .pipeline_output import SemanticStableDiffusionPipelineOutput
|
||||
|
||||
|
||||
@@ -25,7 +25,8 @@ else:
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
|
||||
class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
||||
class SemanticStableDiffusionPipeline(DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin):
|
||||
_last_supported_version = "0.33.1"
|
||||
r"""
|
||||
Pipeline for text-to-image generation using Stable Diffusion with latent editing.
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ from ...utils import (
|
||||
unscale_lora_layers,
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
from ..stable_diffusion import StableDiffusionPipelineOutput
|
||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
|
||||
@@ -179,7 +179,9 @@ class AttendExciteAttnProcessor:
|
||||
return hidden_states
|
||||
|
||||
|
||||
class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin):
|
||||
class StableDiffusionAttendAndExcitePipeline(
|
||||
DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin
|
||||
):
|
||||
r"""
|
||||
Pipeline for text-to-image generation using Stable Diffusion and Attend-and-Excite.
|
||||
|
||||
@@ -209,6 +211,8 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
_last_supported_version = "0.33.1"
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@@ -40,7 +40,7 @@ from ...utils import (
|
||||
unscale_lora_layers,
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
from ..stable_diffusion import StableDiffusionPipelineOutput
|
||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
|
||||
@@ -242,7 +242,11 @@ def preprocess_mask(mask, batch_size: int = 1):
|
||||
|
||||
|
||||
class StableDiffusionDiffEditPipeline(
|
||||
DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin
|
||||
DeprecatedPipelineMixin,
|
||||
DiffusionPipeline,
|
||||
StableDiffusionMixin,
|
||||
TextualInversionLoaderMixin,
|
||||
StableDiffusionLoraLoaderMixin,
|
||||
):
|
||||
r"""
|
||||
<Tip warning={true}>
|
||||
@@ -282,6 +286,8 @@ class StableDiffusionDiffEditPipeline(
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
_last_supported_version = "0.33.1"
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor", "inverse_scheduler"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@@ -36,7 +36,7 @@ from ...utils import (
|
||||
unscale_lora_layers,
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
from ..stable_diffusion import StableDiffusionPipelineOutput
|
||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
|
||||
@@ -108,7 +108,7 @@ EXAMPLE_DOC_STRING = """
|
||||
"""
|
||||
|
||||
|
||||
class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
||||
class StableDiffusionGLIGENPipeline(DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin):
|
||||
r"""
|
||||
Pipeline for text-to-image generation using Stable Diffusion with Grounded-Language-to-Image Generation (GLIGEN).
|
||||
|
||||
@@ -135,6 +135,8 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
_last_supported_version = "0.33.1"
|
||||
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@@ -41,7 +41,7 @@ from ...utils import (
|
||||
unscale_lora_layers,
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
from ..stable_diffusion import StableDiffusionPipelineOutput
|
||||
from ..stable_diffusion.clip_image_project_model import CLIPImageProjection
|
||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
@@ -160,7 +160,7 @@ EXAMPLE_DOC_STRING = """
|
||||
"""
|
||||
|
||||
|
||||
class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionMixin):
|
||||
class StableDiffusionGLIGENTextImagePipeline(DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin):
|
||||
r"""
|
||||
Pipeline for text-to-image generation using Stable Diffusion with Grounded-Language-to-Image Generation (GLIGEN).
|
||||
|
||||
@@ -193,6 +193,8 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
_last_supported_version = "0.33.1"
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@@ -42,7 +42,7 @@ from ...utils import (
|
||||
unscale_lora_layers,
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
|
||||
|
||||
|
||||
@@ -64,7 +64,11 @@ class ModelWrapper:
|
||||
|
||||
|
||||
class StableDiffusionKDiffusionPipeline(
|
||||
DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin
|
||||
DeprecatedPipelineMixin,
|
||||
DiffusionPipeline,
|
||||
StableDiffusionMixin,
|
||||
TextualInversionLoaderMixin,
|
||||
StableDiffusionLoraLoaderMixin,
|
||||
):
|
||||
r"""
|
||||
Pipeline for text-to-image generation using Stable Diffusion.
|
||||
@@ -105,6 +109,8 @@ class StableDiffusionKDiffusionPipeline(
|
||||
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
|
||||
"""
|
||||
|
||||
_last_supported_version = "0.33.1"
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@@ -48,7 +48,7 @@ from ...utils import (
|
||||
unscale_lora_layers,
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
|
||||
|
||||
|
||||
@@ -88,6 +88,7 @@ class ModelWrapper:
|
||||
|
||||
|
||||
class StableDiffusionXLKDiffusionPipeline(
|
||||
DeprecatedPipelineMixin,
|
||||
DiffusionPipeline,
|
||||
StableDiffusionMixin,
|
||||
FromSingleFileMixin,
|
||||
@@ -95,6 +96,8 @@ class StableDiffusionXLKDiffusionPipeline(
|
||||
TextualInversionLoaderMixin,
|
||||
IPAdapterMixin,
|
||||
):
|
||||
_last_supported_version = "0.33.1"
|
||||
|
||||
r"""
|
||||
Pipeline for text-to-image generation using Stable Diffusion XL and k-diffusion.
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ from ...utils import (
|
||||
unscale_lora_layers,
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
|
||||
|
||||
@@ -178,6 +178,7 @@ class LDM3DPipelineOutput(BaseOutput):
|
||||
|
||||
|
||||
class StableDiffusionLDM3DPipeline(
|
||||
DeprecatedPipelineMixin,
|
||||
DiffusionPipeline,
|
||||
StableDiffusionMixin,
|
||||
TextualInversionLoaderMixin,
|
||||
@@ -185,6 +186,8 @@ class StableDiffusionLDM3DPipeline(
|
||||
StableDiffusionLoraLoaderMixin,
|
||||
FromSingleFileMixin,
|
||||
):
|
||||
_last_supported_version = "0.33.1"
|
||||
|
||||
r"""
|
||||
Pipeline for text-to-image and 3D generation using LDM3D.
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ from ...utils import (
|
||||
unscale_lora_layers,
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
from ..stable_diffusion import StableDiffusionPipelineOutput
|
||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
|
||||
@@ -156,12 +156,15 @@ def retrieve_timesteps(
|
||||
|
||||
|
||||
class StableDiffusionPanoramaPipeline(
|
||||
DeprecatedPipelineMixin,
|
||||
DiffusionPipeline,
|
||||
StableDiffusionMixin,
|
||||
TextualInversionLoaderMixin,
|
||||
StableDiffusionLoraLoaderMixin,
|
||||
IPAdapterMixin,
|
||||
):
|
||||
_last_supported_version = "0.33.1"
|
||||
|
||||
r"""
|
||||
Pipeline for text-to-image generation using MultiDiffusion.
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
|
||||
from ...schedulers import KarrasDiffusionSchedulers
|
||||
from ...utils import deprecate, is_torch_xla_available, logging
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
from . import StableDiffusionSafePipelineOutput
|
||||
from .safety_checker import SafeStableDiffusionSafetyChecker
|
||||
|
||||
@@ -29,7 +29,9 @@ else:
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
|
||||
class StableDiffusionPipelineSafe(DiffusionPipeline, StableDiffusionMixin, IPAdapterMixin):
|
||||
class StableDiffusionPipelineSafe(DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin, IPAdapterMixin):
|
||||
_last_supported_version = "0.33.1"
|
||||
|
||||
r"""
|
||||
Pipeline based on the [`StableDiffusionPipeline`] for text-to-image generation using Safe Latent Diffusion.
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ from ...utils import (
|
||||
unscale_lora_layers,
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
from ..stable_diffusion import StableDiffusionPipelineOutput
|
||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
|
||||
@@ -107,7 +107,11 @@ class CrossAttnStoreProcessor:
|
||||
|
||||
|
||||
# Modified to get self-attention guidance scale in this paper (https://huggingface.co/papers/2210.00939) as an input
|
||||
class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin):
|
||||
class StableDiffusionSAGPipeline(
|
||||
DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin
|
||||
):
|
||||
_last_supported_version = "0.33.1"
|
||||
|
||||
r"""
|
||||
Pipeline for text-to-image generation using Stable Diffusion.
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ from ...utils import (
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ...video_processor import VideoProcessor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
from . import TextToVideoSDPipelineOutput
|
||||
|
||||
|
||||
@@ -68,8 +68,13 @@ EXAMPLE_DOC_STRING = """
|
||||
|
||||
|
||||
class TextToVideoSDPipeline(
|
||||
DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin
|
||||
DeprecatedPipelineMixin,
|
||||
DiffusionPipeline,
|
||||
StableDiffusionMixin,
|
||||
TextualInversionLoaderMixin,
|
||||
StableDiffusionLoraLoaderMixin,
|
||||
):
|
||||
_last_supported_version = "0.33.1"
|
||||
r"""
|
||||
Pipeline for text-to-video generation.
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ from ...utils import (
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ...video_processor import VideoProcessor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
from . import TextToVideoSDPipelineOutput
|
||||
|
||||
|
||||
@@ -103,8 +103,13 @@ def retrieve_latents(
|
||||
|
||||
|
||||
class VideoToVideoSDPipeline(
|
||||
DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin
|
||||
DeprecatedPipelineMixin,
|
||||
DiffusionPipeline,
|
||||
StableDiffusionMixin,
|
||||
TextualInversionLoaderMixin,
|
||||
StableDiffusionLoraLoaderMixin,
|
||||
):
|
||||
_last_supported_version = "0.33.1"
|
||||
r"""
|
||||
Pipeline for text-guided video-to-video generation.
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ from ...utils import (
|
||||
unscale_lora_layers,
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
from ..stable_diffusion import StableDiffusionSafetyChecker
|
||||
|
||||
|
||||
@@ -296,12 +296,14 @@ def create_motion_field_and_warp_latents(motion_field_strength_x, motion_field_s
|
||||
|
||||
|
||||
class TextToVideoZeroPipeline(
|
||||
DeprecatedPipelineMixin,
|
||||
DiffusionPipeline,
|
||||
StableDiffusionMixin,
|
||||
TextualInversionLoaderMixin,
|
||||
StableDiffusionLoraLoaderMixin,
|
||||
FromSingleFileMixin,
|
||||
):
|
||||
_last_supported_version = "0.33.1"
|
||||
r"""
|
||||
Pipeline for zero-shot text-to-video generation using Stable Diffusion.
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ from ...utils import (
|
||||
unscale_lora_layers,
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
||||
|
||||
|
||||
if is_invisible_watermark_available():
|
||||
@@ -346,11 +346,13 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
||||
|
||||
|
||||
class TextToVideoZeroSDXLPipeline(
|
||||
DeprecatedPipelineMixin,
|
||||
DiffusionPipeline,
|
||||
StableDiffusionMixin,
|
||||
StableDiffusionXLLoraLoaderMixin,
|
||||
TextualInversionLoaderMixin,
|
||||
):
|
||||
_last_supported_version = "0.33.1"
|
||||
r"""
|
||||
Pipeline for zero-shot text-to-video generation using Stable Diffusion XL.
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ from ...models import PriorTransformer, UNet2DConditionModel, UNet2DModel
|
||||
from ...schedulers import UnCLIPScheduler
|
||||
from ...utils import is_torch_xla_available, logging
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
|
||||
from .text_proj import UnCLIPTextProjModel
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ else:
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
|
||||
class UnCLIPPipeline(DiffusionPipeline):
|
||||
class UnCLIPPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
||||
"""
|
||||
Pipeline for text-to-image generation using unCLIP.
|
||||
|
||||
@@ -69,6 +69,7 @@ class UnCLIPPipeline(DiffusionPipeline):
|
||||
|
||||
"""
|
||||
|
||||
_last_supported_version = "0.33.1"
|
||||
_exclude_from_cpu_offload = ["prior"]
|
||||
|
||||
prior: PriorTransformer
|
||||
|
||||
@@ -29,7 +29,7 @@ from ...models import UNet2DConditionModel, UNet2DModel
|
||||
from ...schedulers import UnCLIPScheduler
|
||||
from ...utils import is_torch_xla_available, logging
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
|
||||
from .text_proj import UnCLIPTextProjModel
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ else:
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
|
||||
class UnCLIPImageVariationPipeline(DiffusionPipeline):
|
||||
class UnCLIPImageVariationPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
||||
"""
|
||||
Pipeline to generate image variations from an input image using UnCLIP.
|
||||
|
||||
@@ -73,6 +73,7 @@ class UnCLIPImageVariationPipeline(DiffusionPipeline):
|
||||
Scheduler used in the super resolution denoising process (a modified [`DDPMScheduler`]).
|
||||
"""
|
||||
|
||||
_last_supported_version = "0.33.1"
|
||||
decoder: UNet2DConditionModel
|
||||
text_proj: UnCLIPTextProjModel
|
||||
text_encoder: CLIPTextModelWithProjection
|
||||
|
||||
@@ -28,7 +28,7 @@ from ...utils import (
|
||||
)
|
||||
from ...utils.outputs import BaseOutput
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline
|
||||
from .modeling_text_decoder import UniDiffuserTextDecoder
|
||||
from .modeling_uvit import UniDiffuserModel
|
||||
|
||||
@@ -62,7 +62,7 @@ class ImageTextPipelineOutput(BaseOutput):
|
||||
text: Optional[Union[List[str], List[List[str]]]]
|
||||
|
||||
|
||||
class UniDiffuserPipeline(DiffusionPipeline):
|
||||
class UniDiffuserPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
||||
r"""
|
||||
Pipeline for a bimodal image-text model which supports unconditional text and image generation, text-conditioned
|
||||
image generation, image-conditioned text generation, and joint image-text generation.
|
||||
@@ -96,6 +96,7 @@ class UniDiffuserPipeline(DiffusionPipeline):
|
||||
original UniDiffuser paper uses the [`DPMSolverMultistepScheduler`] scheduler.
|
||||
"""
|
||||
|
||||
_last_supported_version = "0.33.1"
|
||||
# TODO: support for moving submodules for components with enable_model_cpu_offload
|
||||
model_cpu_offload_seq = "text_encoder->image_encoder->unet->vae->text_decoder"
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ from transformers import CLIPTextModel, CLIPTokenizer
|
||||
from ...schedulers import DDPMWuerstchenScheduler
|
||||
from ...utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
|
||||
from .modeling_paella_vq_model import PaellaVQModel
|
||||
from .modeling_wuerstchen_diffnext import WuerstchenDiffNeXt
|
||||
|
||||
@@ -56,7 +56,7 @@ EXAMPLE_DOC_STRING = """
|
||||
"""
|
||||
|
||||
|
||||
class WuerstchenDecoderPipeline(DiffusionPipeline):
|
||||
class WuerstchenDecoderPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
||||
"""
|
||||
Pipeline for generating images from the Wuerstchen model.
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ from transformers import CLIPTextModel, CLIPTokenizer
|
||||
|
||||
from ...schedulers import DDPMWuerstchenScheduler
|
||||
from ...utils import deprecate, replace_example_docstring
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline
|
||||
from .modeling_paella_vq_model import PaellaVQModel
|
||||
from .modeling_wuerstchen_diffnext import WuerstchenDiffNeXt
|
||||
from .modeling_wuerstchen_prior import WuerstchenPrior
|
||||
@@ -40,7 +40,7 @@ TEXT2IMAGE_EXAMPLE_DOC_STRING = """
|
||||
"""
|
||||
|
||||
|
||||
class WuerstchenCombinedPipeline(DiffusionPipeline):
|
||||
class WuerstchenCombinedPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
||||
"""
|
||||
Combined Pipeline for text-to-image generation using Wuerstchen
|
||||
|
||||
@@ -68,6 +68,7 @@ class WuerstchenCombinedPipeline(DiffusionPipeline):
|
||||
The scheduler to be used for prior pipeline.
|
||||
"""
|
||||
|
||||
_last_supported_version = "0.33.1"
|
||||
_load_connected_pipes = True
|
||||
|
||||
def __init__(
|
||||
|
||||
@@ -1115,6 +1115,14 @@ class PipelineTesterMixin:
|
||||
gc.collect()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
# Skip tests for pipelines that inherit from DeprecatedPipelineMixin
|
||||
from diffusers.pipelines.pipeline_utils import DeprecatedPipelineMixin
|
||||
|
||||
if hasattr(self, "pipeline_class") and issubclass(self.pipeline_class, DeprecatedPipelineMixin):
|
||||
import pytest
|
||||
|
||||
pytest.skip(reason=f"Deprecated Pipeline: {self.pipeline_class.__name__}")
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test in case of CUDA runtime errors
|
||||
super().tearDown()
|
||||
|
||||
Reference in New Issue
Block a user