mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-07 21:14:44 +08:00
Compare commits
33 Commits
custom-cod
...
pipeline-d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8d6d012b18 | ||
|
|
1a25d54917 | ||
|
|
53efae0af2 | ||
|
|
3014195c55 | ||
|
|
574a1b0476 | ||
|
|
8734fc6c63 | ||
|
|
5929e8b03f | ||
|
|
13fb4216b0 | ||
|
|
09de36c4d8 | ||
|
|
737216c23d | ||
|
|
ee00adec72 | ||
|
|
4f1ab190bb | ||
|
|
c6947e0849 | ||
|
|
3a93eb5b4f | ||
|
|
0dd33b2db9 | ||
|
|
1b53d16477 | ||
|
|
605cff1538 | ||
|
|
f3553fed36 | ||
|
|
3c3556e5fe | ||
|
|
05b0162b73 | ||
|
|
4d40ea3897 | ||
|
|
ca1c5ccddc | ||
|
|
c41bae5540 | ||
|
|
5aa47f6152 | ||
|
|
e47e0e13e6 | ||
|
|
6bedba63e8 | ||
|
|
c1aac15394 | ||
|
|
01ad711645 | ||
|
|
c9b034b94e | ||
|
|
99a8f0fca9 | ||
|
|
19afac19fa | ||
|
|
b797186e84 | ||
|
|
c68db8014f |
@@ -46,7 +46,7 @@ An attention processor is a class for applying different types of attention mech
|
|||||||
|
|
||||||
## CrossFrameAttnProcessor
|
## CrossFrameAttnProcessor
|
||||||
|
|
||||||
[[autodoc]] pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.CrossFrameAttnProcessor
|
[[autodoc]] pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.CrossFrameAttnProcessor
|
||||||
|
|
||||||
## Custom Diffusion
|
## Custom Diffusion
|
||||||
|
|
||||||
@@ -163,4 +163,4 @@ An attention processor is a class for applying different types of attention mech
|
|||||||
|
|
||||||
## XLAFluxFlashAttnProcessor2_0
|
## XLAFluxFlashAttnProcessor2_0
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.XLAFluxFlashAttnProcessor2_0
|
[[autodoc]] models.attention_processor.XLAFluxFlashAttnProcessor2_0
|
||||||
|
|||||||
@@ -42,4 +42,4 @@ pipe = FluxControlNetPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", co
|
|||||||
|
|
||||||
## FluxControlNetOutput
|
## FluxControlNetOutput
|
||||||
|
|
||||||
[[autodoc]] models.controlnet_flux.FluxControlNetOutput
|
[[autodoc]] models.controlnets.FluxControlNetOutput
|
||||||
|
|||||||
@@ -43,4 +43,4 @@ controlnet = SparseControlNetModel.from_pretrained("guoyww/animatediff-sparsectr
|
|||||||
|
|
||||||
## SparseControlNetOutput
|
## SparseControlNetOutput
|
||||||
|
|
||||||
[[autodoc]] models.controlnet_sparsectrl.SparseControlNetOutput
|
[[autodoc]] models.controlnets.SparseControlNetOutput
|
||||||
|
|||||||
@@ -55,4 +55,4 @@ Sample output with I2VGenXL:
|
|||||||
- __call__
|
- __call__
|
||||||
|
|
||||||
## I2VGenXLPipelineOutput
|
## I2VGenXLPipelineOutput
|
||||||
[[autodoc]] pipelines.i2vgen_xl.pipeline_i2vgen_xl.I2VGenXLPipelineOutput
|
[[autodoc]] pipelines.deprecated.i2vgen_xl.pipeline_i2vgen_xl.I2VGenXLPipelineOutput
|
||||||
|
|||||||
@@ -168,4 +168,4 @@ FreeInit is not really free - the improved quality comes at the cost of extra co
|
|||||||
|
|
||||||
## PIAPipelineOutput
|
## PIAPipelineOutput
|
||||||
|
|
||||||
[[autodoc]] pipelines.pia.PIAPipelineOutput
|
[[autodoc]] pipelines.deprecated.pia.PIAPipelineOutput
|
||||||
|
|||||||
@@ -31,5 +31,5 @@ Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers)
|
|||||||
- __call__
|
- __call__
|
||||||
|
|
||||||
## SemanticStableDiffusionPipelineOutput
|
## SemanticStableDiffusionPipelineOutput
|
||||||
[[autodoc]] pipelines.semantic_stable_diffusion.pipeline_output.SemanticStableDiffusionPipelineOutput
|
[[autodoc]] pipelines.deprecated.semantic_stable_diffusion.pipeline_output.SemanticStableDiffusionPipelineOutput
|
||||||
- all
|
- all
|
||||||
|
|||||||
@@ -34,4 +34,4 @@ See the [reuse components across pipelines](../../using-diffusers/loading#reuse-
|
|||||||
- __call__
|
- __call__
|
||||||
|
|
||||||
## ShapEPipelineOutput
|
## ShapEPipelineOutput
|
||||||
[[autodoc]] pipelines.shap_e.pipeline_shap_e.ShapEPipelineOutput
|
[[autodoc]] pipelines.deprecated.shap_e.pipeline_shap_e.ShapEPipelineOutput
|
||||||
|
|||||||
@@ -35,14 +35,14 @@ Make sure to check out the Stable Diffusion [Tips](overview#tips) section to lea
|
|||||||
|
|
||||||
## StableDiffusionLDM3DPipeline
|
## StableDiffusionLDM3DPipeline
|
||||||
|
|
||||||
[[autodoc]] pipelines.stable_diffusion_ldm3d.pipeline_stable_diffusion_ldm3d.StableDiffusionLDM3DPipeline
|
[[autodoc]] pipelines.deprecated.stable_diffusion_ldm3d.pipeline_stable_diffusion_ldm3d.StableDiffusionLDM3DPipeline
|
||||||
- all
|
- all
|
||||||
- __call__
|
- __call__
|
||||||
|
|
||||||
|
|
||||||
## LDM3DPipelineOutput
|
## LDM3DPipelineOutput
|
||||||
|
|
||||||
[[autodoc]] pipelines.stable_diffusion_ldm3d.pipeline_stable_diffusion_ldm3d.LDM3DPipelineOutput
|
[[autodoc]] pipelines.deprecated.stable_diffusion_ldm3d.pipeline_stable_diffusion_ldm3d.LDM3DPipelineOutput
|
||||||
- all
|
- all
|
||||||
- __call__
|
- __call__
|
||||||
|
|
||||||
|
|||||||
@@ -56,6 +56,6 @@ Make sure to check out the Stable Diffusion [Tips](overview#tips) section to lea
|
|||||||
|
|
||||||
## StableDiffusionSafePipelineOutput
|
## StableDiffusionSafePipelineOutput
|
||||||
|
|
||||||
[[autodoc]] pipelines.stable_diffusion_safe.StableDiffusionSafePipelineOutput
|
[[autodoc]] pipelines.deprecated.stable_diffusion_safe.StableDiffusionSafePipelineOutput
|
||||||
- all
|
- all
|
||||||
- __call__
|
- __call__
|
||||||
|
|||||||
@@ -194,4 +194,4 @@ Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers)
|
|||||||
- __call__
|
- __call__
|
||||||
|
|
||||||
## TextToVideoSDPipelineOutput
|
## TextToVideoSDPipelineOutput
|
||||||
[[autodoc]] pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput
|
[[autodoc]] pipelines.deprecated.text_to_video_synthesis.TextToVideoSDPipelineOutput
|
||||||
|
|||||||
@@ -303,4 +303,4 @@ Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers)
|
|||||||
- __call__
|
- __call__
|
||||||
|
|
||||||
## TextToVideoPipelineOutput
|
## TextToVideoPipelineOutput
|
||||||
[[autodoc]] pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput
|
[[autodoc]] pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput
|
||||||
|
|||||||
@@ -145,7 +145,7 @@ The original codebase, as well as experimental ideas, can be found at [dome272/W
|
|||||||
|
|
||||||
## WuerstchenPriorPipelineOutput
|
## WuerstchenPriorPipelineOutput
|
||||||
|
|
||||||
[[autodoc]] pipelines.wuerstchen.pipeline_wuerstchen_prior.WuerstchenPriorPipelineOutput
|
[[autodoc]] pipelines.deprecated.wuerstchen.pipeline_wuerstchen_prior.WuerstchenPriorPipelineOutput
|
||||||
|
|
||||||
## WuerstchenDecoderPipeline
|
## WuerstchenDecoderPipeline
|
||||||
|
|
||||||
|
|||||||
@@ -436,7 +436,7 @@ class AnimateDiffControlNetPipeline(
|
|||||||
image_embeds = ip_adapter_image_embeds
|
image_embeds = ip_adapter_image_embeds
|
||||||
return image_embeds
|
return image_embeds
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis/pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||||
def decode_latents(self, latents):
|
def decode_latents(self, latents):
|
||||||
latents = 1 / self.vae.config.scaling_factor * latents
|
latents = 1 / self.vae.config.scaling_factor * latents
|
||||||
|
|
||||||
@@ -663,7 +663,7 @@ class AnimateDiffControlNetPipeline(
|
|||||||
f"If image batch size is not 1, image batch size must be same as prompt batch size. image batch size: {image_batch_size}, prompt batch size: {prompt_batch_size}"
|
f"If image batch size is not 1, image batch size must be same as prompt batch size. image batch size: {image_batch_size}, prompt batch size: {prompt_batch_size}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||||
def prepare_latents(
|
def prepare_latents(
|
||||||
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
||||||
):
|
):
|
||||||
|
|||||||
@@ -553,7 +553,7 @@ class AnimateDiffImgToVideoPipeline(
|
|||||||
image_embeds = ip_adapter_image_embeds
|
image_embeds = ip_adapter_image_embeds
|
||||||
return image_embeds
|
return image_embeds
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis/pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||||
def decode_latents(self, latents):
|
def decode_latents(self, latents):
|
||||||
latents = 1 / self.vae.config.scaling_factor * latents
|
latents = 1 / self.vae.config.scaling_factor * latents
|
||||||
|
|
||||||
|
|||||||
@@ -425,7 +425,7 @@ class AnimateDiffPipelineIpex(
|
|||||||
|
|
||||||
return image_embeds
|
return image_embeds
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis/pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||||
def decode_latents(self, latents):
|
def decode_latents(self, latents):
|
||||||
latents = 1 / self.vae.config.scaling_factor * latents
|
latents = 1 / self.vae.config.scaling_factor * latents
|
||||||
|
|
||||||
@@ -520,7 +520,7 @@ class AnimateDiffPipelineIpex(
|
|||||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||||
def prepare_latents(
|
def prepare_latents(
|
||||||
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
||||||
):
|
):
|
||||||
|
|||||||
@@ -427,7 +427,7 @@ class CogVideoXSTGPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|||||||
extra_step_kwargs["generator"] = generator
|
extra_step_kwargs["generator"] = generator
|
||||||
return extra_step_kwargs
|
return extra_step_kwargs
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.latte.pipeline_latte.LattePipeline.check_inputs
|
# Copied from diffusers.pipelines.deprecated.latte.pipeline_latte.LattePipeline.check_inputs
|
||||||
def check_inputs(
|
def check_inputs(
|
||||||
self,
|
self,
|
||||||
prompt,
|
prompt,
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ from diffusers import (
|
|||||||
UNet2DConditionModel,
|
UNet2DConditionModel,
|
||||||
UNet2DModel,
|
UNet2DModel,
|
||||||
)
|
)
|
||||||
from diffusers.pipelines.unclip import UnCLIPTextProjModel
|
from diffusers.pipelines.deprecated.unclip import UnCLIPTextProjModel
|
||||||
from diffusers.utils import logging
|
from diffusers.utils import logging
|
||||||
from diffusers.utils.torch_utils import randn_tensor
|
from diffusers.utils.torch_utils import randn_tensor
|
||||||
|
|
||||||
@@ -84,7 +84,7 @@ class UnCLIPImageInterpolationPipeline(DiffusionPipeline):
|
|||||||
decoder_scheduler: UnCLIPScheduler
|
decoder_scheduler: UnCLIPScheduler
|
||||||
super_res_scheduler: UnCLIPScheduler
|
super_res_scheduler: UnCLIPScheduler
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip_image_variation.UnCLIPImageVariationPipeline.__init__
|
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip_image_variation.UnCLIPImageVariationPipeline.__init__
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
decoder: UNet2DConditionModel,
|
decoder: UNet2DConditionModel,
|
||||||
@@ -113,7 +113,7 @@ class UnCLIPImageInterpolationPipeline(DiffusionPipeline):
|
|||||||
super_res_scheduler=super_res_scheduler,
|
super_res_scheduler=super_res_scheduler,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||||
if latents is None:
|
if latents is None:
|
||||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||||
@@ -125,7 +125,7 @@ class UnCLIPImageInterpolationPipeline(DiffusionPipeline):
|
|||||||
latents = latents * scheduler.init_noise_sigma
|
latents = latents * scheduler.init_noise_sigma
|
||||||
return latents
|
return latents
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip_image_variation.UnCLIPImageVariationPipeline._encode_prompt
|
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip_image_variation.UnCLIPImageVariationPipeline._encode_prompt
|
||||||
def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance):
|
def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance):
|
||||||
batch_size = len(prompt) if isinstance(prompt, list) else 1
|
batch_size = len(prompt) if isinstance(prompt, list) else 1
|
||||||
|
|
||||||
@@ -189,7 +189,7 @@ class UnCLIPImageInterpolationPipeline(DiffusionPipeline):
|
|||||||
|
|
||||||
return prompt_embeds, text_encoder_hidden_states, text_mask
|
return prompt_embeds, text_encoder_hidden_states, text_mask
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip_image_variation.UnCLIPImageVariationPipeline._encode_image
|
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip_image_variation.UnCLIPImageVariationPipeline._encode_image
|
||||||
def _encode_image(self, image, device, num_images_per_prompt, image_embeddings: Optional[torch.Tensor] = None):
|
def _encode_image(self, image, device, num_images_per_prompt, image_embeddings: Optional[torch.Tensor] = None):
|
||||||
dtype = next(self.image_encoder.parameters()).dtype
|
dtype = next(self.image_encoder.parameters()).dtype
|
||||||
|
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ from diffusers import (
|
|||||||
UNet2DConditionModel,
|
UNet2DConditionModel,
|
||||||
UNet2DModel,
|
UNet2DModel,
|
||||||
)
|
)
|
||||||
from diffusers.pipelines.unclip import UnCLIPTextProjModel
|
from diffusers.pipelines.deprecated.unclip import UnCLIPTextProjModel
|
||||||
from diffusers.utils import logging
|
from diffusers.utils import logging
|
||||||
from diffusers.utils.torch_utils import randn_tensor
|
from diffusers.utils.torch_utils import randn_tensor
|
||||||
|
|
||||||
@@ -78,7 +78,7 @@ class UnCLIPTextInterpolationPipeline(DiffusionPipeline):
|
|||||||
decoder_scheduler: UnCLIPScheduler
|
decoder_scheduler: UnCLIPScheduler
|
||||||
super_res_scheduler: UnCLIPScheduler
|
super_res_scheduler: UnCLIPScheduler
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.__init__
|
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.__init__
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
prior: PriorTransformer,
|
prior: PriorTransformer,
|
||||||
@@ -107,7 +107,7 @@ class UnCLIPTextInterpolationPipeline(DiffusionPipeline):
|
|||||||
super_res_scheduler=super_res_scheduler,
|
super_res_scheduler=super_res_scheduler,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||||
if latents is None:
|
if latents is None:
|
||||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||||
@@ -119,7 +119,7 @@ class UnCLIPTextInterpolationPipeline(DiffusionPipeline):
|
|||||||
latents = latents * scheduler.init_noise_sigma
|
latents = latents * scheduler.init_noise_sigma
|
||||||
return latents
|
return latents
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline._encode_prompt
|
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline._encode_prompt
|
||||||
def _encode_prompt(
|
def _encode_prompt(
|
||||||
self,
|
self,
|
||||||
prompt,
|
prompt,
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ from transformers.utils import ContextManagers
|
|||||||
|
|
||||||
from diffusers import AutoPipelineForText2Image, DDPMWuerstchenScheduler, WuerstchenPriorPipeline
|
from diffusers import AutoPipelineForText2Image, DDPMWuerstchenScheduler, WuerstchenPriorPipeline
|
||||||
from diffusers.optimization import get_scheduler
|
from diffusers.optimization import get_scheduler
|
||||||
from diffusers.pipelines.wuerstchen import DEFAULT_STAGE_C_TIMESTEPS, WuerstchenPrior
|
from diffusers.pipelines.deprecated.wuerstchen import DEFAULT_STAGE_C_TIMESTEPS, WuerstchenPrior
|
||||||
from diffusers.utils import check_min_version, is_wandb_available, make_image_grid
|
from diffusers.utils import check_min_version, is_wandb_available, make_image_grid
|
||||||
from diffusers.utils.logging import set_verbosity_error, set_verbosity_info
|
from diffusers.utils.logging import set_verbosity_error, set_verbosity_info
|
||||||
|
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ from transformers.utils import ContextManagers
|
|||||||
|
|
||||||
from diffusers import AutoPipelineForText2Image, DDPMWuerstchenScheduler
|
from diffusers import AutoPipelineForText2Image, DDPMWuerstchenScheduler
|
||||||
from diffusers.optimization import get_scheduler
|
from diffusers.optimization import get_scheduler
|
||||||
from diffusers.pipelines.wuerstchen import DEFAULT_STAGE_C_TIMESTEPS, WuerstchenPrior
|
from diffusers.pipelines.deprecated.wuerstchen import DEFAULT_STAGE_C_TIMESTEPS, WuerstchenPrior
|
||||||
from diffusers.training_utils import EMAModel
|
from diffusers.training_utils import EMAModel
|
||||||
from diffusers.utils import check_min_version, is_wandb_available, make_image_grid
|
from diffusers.utils import check_min_version, is_wandb_available, make_image_grid
|
||||||
from diffusers.utils.logging import set_verbosity_error, set_verbosity_info
|
from diffusers.utils.logging import set_verbosity_error, set_verbosity_info
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from transformers import CLIPTextModelWithProjection, CLIPTokenizer
|
|||||||
|
|
||||||
from diffusers import UnCLIPPipeline, UNet2DConditionModel, UNet2DModel
|
from diffusers import UnCLIPPipeline, UNet2DConditionModel, UNet2DModel
|
||||||
from diffusers.models.transformers.prior_transformer import PriorTransformer
|
from diffusers.models.transformers.prior_transformer import PriorTransformer
|
||||||
from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel
|
from diffusers.pipelines.deprecated.unclip.text_proj import UnCLIPTextProjModel
|
||||||
from diffusers.schedulers.scheduling_unclip import UnCLIPScheduler
|
from diffusers.schedulers.scheduling_unclip import UnCLIPScheduler
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import torch
|
|||||||
from accelerate import load_checkpoint_and_dispatch
|
from accelerate import load_checkpoint_and_dispatch
|
||||||
|
|
||||||
from diffusers.models.transformers.prior_transformer import PriorTransformer
|
from diffusers.models.transformers.prior_transformer import PriorTransformer
|
||||||
from diffusers.pipelines.shap_e import ShapERenderer
|
from diffusers.pipelines.deprecated.shap_e import ShapERenderer
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ from diffusers import (
|
|||||||
from diffusers.loaders.single_file_utils import convert_stable_cascade_unet_single_file_to_diffusers
|
from diffusers.loaders.single_file_utils import convert_stable_cascade_unet_single_file_to_diffusers
|
||||||
from diffusers.models import StableCascadeUNet
|
from diffusers.models import StableCascadeUNet
|
||||||
from diffusers.models.modeling_utils import load_model_dict_into_meta
|
from diffusers.models.modeling_utils import load_model_dict_into_meta
|
||||||
from diffusers.pipelines.wuerstchen import PaellaVQModel
|
from diffusers.pipelines.deprecated.wuerstchen import PaellaVQModel
|
||||||
from diffusers.utils import is_accelerate_available
|
from diffusers.utils import is_accelerate_available
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ from diffusers import (
|
|||||||
from diffusers.loaders.single_file_utils import convert_stable_cascade_unet_single_file_to_diffusers
|
from diffusers.loaders.single_file_utils import convert_stable_cascade_unet_single_file_to_diffusers
|
||||||
from diffusers.models import StableCascadeUNet
|
from diffusers.models import StableCascadeUNet
|
||||||
from diffusers.models.modeling_utils import load_model_dict_into_meta
|
from diffusers.models.modeling_utils import load_model_dict_into_meta
|
||||||
from diffusers.pipelines.wuerstchen import PaellaVQModel
|
from diffusers.pipelines.deprecated.wuerstchen import PaellaVQModel
|
||||||
from diffusers.utils import is_accelerate_available
|
from diffusers.utils import is_accelerate_available
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from diffusers import (
|
|||||||
WuerstchenDecoderPipeline,
|
WuerstchenDecoderPipeline,
|
||||||
WuerstchenPriorPipeline,
|
WuerstchenPriorPipeline,
|
||||||
)
|
)
|
||||||
from diffusers.pipelines.wuerstchen import PaellaVQModel, WuerstchenDiffNeXt, WuerstchenPrior
|
from diffusers.pipelines.deprecated.wuerstchen import PaellaVQModel, WuerstchenDiffNeXt, WuerstchenPrior
|
||||||
|
|
||||||
|
|
||||||
model_path = "models/"
|
model_path = "models/"
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ from ..attention_processor import Attention
|
|||||||
from ..modeling_utils import ModelMixin
|
from ..modeling_utils import ModelMixin
|
||||||
|
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.wuerstchen.modeling_wuerstchen_common.WuerstchenLayerNorm with WuerstchenLayerNorm -> SDCascadeLayerNorm
|
# Copied from diffusers.pipelines.deprecated.wuerstchen.modeling_wuerstchen_common.WuerstchenLayerNorm with WuerstchenLayerNorm -> SDCascadeLayerNorm
|
||||||
class SDCascadeLayerNorm(nn.LayerNorm):
|
class SDCascadeLayerNorm(nn.LayerNorm):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|||||||
@@ -48,7 +48,6 @@ else:
|
|||||||
"AutoPipelineForText2Image",
|
"AutoPipelineForText2Image",
|
||||||
]
|
]
|
||||||
_import_structure["consistency_models"] = ["ConsistencyModelPipeline"]
|
_import_structure["consistency_models"] = ["ConsistencyModelPipeline"]
|
||||||
_import_structure["dance_diffusion"] = ["DanceDiffusionPipeline"]
|
|
||||||
_import_structure["ddim"] = ["DDIMPipeline"]
|
_import_structure["ddim"] = ["DDIMPipeline"]
|
||||||
_import_structure["ddpm"] = ["DDPMPipeline"]
|
_import_structure["ddpm"] = ["DDPMPipeline"]
|
||||||
_import_structure["dit"] = ["DiTPipeline"]
|
_import_structure["dit"] = ["DiTPipeline"]
|
||||||
@@ -62,6 +61,7 @@ else:
|
|||||||
_import_structure["deprecated"].extend(
|
_import_structure["deprecated"].extend(
|
||||||
[
|
[
|
||||||
"PNDMPipeline",
|
"PNDMPipeline",
|
||||||
|
"DanceDiffusionPipeline",
|
||||||
"LDMPipeline",
|
"LDMPipeline",
|
||||||
"RePaintPipeline",
|
"RePaintPipeline",
|
||||||
"ScoreSdeVePipeline",
|
"ScoreSdeVePipeline",
|
||||||
@@ -107,6 +107,7 @@ else:
|
|||||||
"AltDiffusionPipeline",
|
"AltDiffusionPipeline",
|
||||||
"AltDiffusionImg2ImgPipeline",
|
"AltDiffusionImg2ImgPipeline",
|
||||||
"CycleDiffusionPipeline",
|
"CycleDiffusionPipeline",
|
||||||
|
"StableDiffusionLDM3DPipeline",
|
||||||
"StableDiffusionInpaintPipelineLegacy",
|
"StableDiffusionInpaintPipelineLegacy",
|
||||||
"StableDiffusionPix2PixZeroPipeline",
|
"StableDiffusionPix2PixZeroPipeline",
|
||||||
"StableDiffusionParadigmsPipeline",
|
"StableDiffusionParadigmsPipeline",
|
||||||
@@ -115,10 +116,45 @@ else:
|
|||||||
"VersatileDiffusionImageVariationPipeline",
|
"VersatileDiffusionImageVariationPipeline",
|
||||||
"VersatileDiffusionPipeline",
|
"VersatileDiffusionPipeline",
|
||||||
"VersatileDiffusionTextToImagePipeline",
|
"VersatileDiffusionTextToImagePipeline",
|
||||||
|
"AmusedImg2ImgPipeline",
|
||||||
|
"AmusedInpaintPipeline",
|
||||||
|
"AmusedPipeline",
|
||||||
|
"StableDiffusionControlNetXSPipeline",
|
||||||
|
"StableDiffusionXLControlNetXSPipeline",
|
||||||
|
"I2VGenXLPipeline",
|
||||||
|
"LattePipeline",
|
||||||
|
"MusicLDMPipeline",
|
||||||
|
"PaintByExamplePipeline",
|
||||||
|
"PIAPipeline",
|
||||||
|
"SemanticStableDiffusionPipeline",
|
||||||
|
"ShapEImg2ImgPipeline",
|
||||||
|
"ShapEPipeline",
|
||||||
|
"StableDiffusionAttendAndExcitePipeline",
|
||||||
|
"StableDiffusionPipelineSafe",
|
||||||
|
"StableDiffusionSAGPipeline",
|
||||||
|
"StableDiffusionGLIGENPipeline",
|
||||||
|
"StableDiffusionGLIGENTextImagePipeline",
|
||||||
|
"StableDiffusionDiffEditPipeline",
|
||||||
|
"StableDiffusionLDM3DPipeline",
|
||||||
|
"StableDiffusionPanoramaPipeline",
|
||||||
|
"TextToVideoSDPipeline",
|
||||||
|
"TextToVideoZeroPipeline",
|
||||||
|
"TextToVideoZeroSDXLPipeline",
|
||||||
|
"VideoToVideoSDPipeline",
|
||||||
|
"UnCLIPImageVariationPipeline",
|
||||||
|
"UnCLIPPipeline",
|
||||||
|
"ImageTextPipelineOutput",
|
||||||
|
"UniDiffuserModel",
|
||||||
|
"UniDiffuserPipeline",
|
||||||
|
"UniDiffuserTextDecoder",
|
||||||
|
"WuerstchenCombinedPipeline",
|
||||||
|
"WuerstchenDecoderPipeline",
|
||||||
|
"WuerstchenPriorPipeline",
|
||||||
|
"BlipDiffusionPipeline",
|
||||||
|
"BlipDiffusionControlNetPipeline",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
_import_structure["allegro"] = ["AllegroPipeline"]
|
_import_structure["allegro"] = ["AllegroPipeline"]
|
||||||
_import_structure["amused"] = ["AmusedImg2ImgPipeline", "AmusedInpaintPipeline", "AmusedPipeline"]
|
|
||||||
_import_structure["animatediff"] = [
|
_import_structure["animatediff"] = [
|
||||||
"AnimateDiffPipeline",
|
"AnimateDiffPipeline",
|
||||||
"AnimateDiffControlNetPipeline",
|
"AnimateDiffControlNetPipeline",
|
||||||
@@ -147,7 +183,6 @@ else:
|
|||||||
"AudioLDM2ProjectionModel",
|
"AudioLDM2ProjectionModel",
|
||||||
"AudioLDM2UNet2DConditionModel",
|
"AudioLDM2UNet2DConditionModel",
|
||||||
]
|
]
|
||||||
_import_structure["blip_diffusion"] = ["BlipDiffusionPipeline"]
|
|
||||||
_import_structure["cogvideo"] = [
|
_import_structure["cogvideo"] = [
|
||||||
"CogVideoXPipeline",
|
"CogVideoXPipeline",
|
||||||
"CogVideoXImageToVideoPipeline",
|
"CogVideoXImageToVideoPipeline",
|
||||||
@@ -158,7 +193,6 @@ else:
|
|||||||
_import_structure["cogview4"] = ["CogView4Pipeline", "CogView4ControlPipeline"]
|
_import_structure["cogview4"] = ["CogView4Pipeline", "CogView4ControlPipeline"]
|
||||||
_import_structure["controlnet"].extend(
|
_import_structure["controlnet"].extend(
|
||||||
[
|
[
|
||||||
"BlipDiffusionControlNetPipeline",
|
|
||||||
"StableDiffusionControlNetImg2ImgPipeline",
|
"StableDiffusionControlNetImg2ImgPipeline",
|
||||||
"StableDiffusionControlNetInpaintPipeline",
|
"StableDiffusionControlNetInpaintPipeline",
|
||||||
"StableDiffusionControlNetPipeline",
|
"StableDiffusionControlNetPipeline",
|
||||||
@@ -191,12 +225,6 @@ else:
|
|||||||
"SanaPAGPipeline",
|
"SanaPAGPipeline",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
_import_structure["controlnet_xs"].extend(
|
|
||||||
[
|
|
||||||
"StableDiffusionControlNetXSPipeline",
|
|
||||||
"StableDiffusionXLControlNetXSPipeline",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
_import_structure["controlnet_hunyuandit"].extend(
|
_import_structure["controlnet_hunyuandit"].extend(
|
||||||
[
|
[
|
||||||
"HunyuanDiTControlNetPipeline",
|
"HunyuanDiTControlNetPipeline",
|
||||||
@@ -264,7 +292,6 @@ else:
|
|||||||
"LEditsPPPipelineStableDiffusionXL",
|
"LEditsPPPipelineStableDiffusionXL",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
_import_structure["latte"] = ["LattePipeline"]
|
|
||||||
_import_structure["ltx"] = ["LTXPipeline", "LTXImageToVideoPipeline", "LTXConditionPipeline"]
|
_import_structure["ltx"] = ["LTXPipeline", "LTXImageToVideoPipeline", "LTXConditionPipeline"]
|
||||||
_import_structure["lumina"] = ["LuminaPipeline", "LuminaText2ImgPipeline"]
|
_import_structure["lumina"] = ["LuminaPipeline", "LuminaText2ImgPipeline"]
|
||||||
_import_structure["lumina2"] = ["Lumina2Pipeline", "Lumina2Text2ImgPipeline"]
|
_import_structure["lumina2"] = ["Lumina2Pipeline", "Lumina2Text2ImgPipeline"]
|
||||||
@@ -276,14 +303,9 @@ else:
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
_import_structure["mochi"] = ["MochiPipeline"]
|
_import_structure["mochi"] = ["MochiPipeline"]
|
||||||
_import_structure["musicldm"] = ["MusicLDMPipeline"]
|
|
||||||
_import_structure["omnigen"] = ["OmniGenPipeline"]
|
_import_structure["omnigen"] = ["OmniGenPipeline"]
|
||||||
_import_structure["paint_by_example"] = ["PaintByExamplePipeline"]
|
|
||||||
_import_structure["pia"] = ["PIAPipeline"]
|
|
||||||
_import_structure["pixart_alpha"] = ["PixArtAlphaPipeline", "PixArtSigmaPipeline"]
|
_import_structure["pixart_alpha"] = ["PixArtAlphaPipeline", "PixArtSigmaPipeline"]
|
||||||
_import_structure["sana"] = ["SanaPipeline", "SanaSprintPipeline", "SanaControlNetPipeline"]
|
_import_structure["sana"] = ["SanaPipeline", "SanaSprintPipeline", "SanaControlNetPipeline"]
|
||||||
_import_structure["semantic_stable_diffusion"] = ["SemanticStableDiffusionPipeline"]
|
|
||||||
_import_structure["shap_e"] = ["ShapEImg2ImgPipeline", "ShapEPipeline"]
|
|
||||||
_import_structure["stable_audio"] = [
|
_import_structure["stable_audio"] = [
|
||||||
"StableAudioProjectionModel",
|
"StableAudioProjectionModel",
|
||||||
"StableAudioPipeline",
|
"StableAudioPipeline",
|
||||||
@@ -306,7 +328,6 @@ else:
|
|||||||
"StableDiffusionUpscalePipeline",
|
"StableDiffusionUpscalePipeline",
|
||||||
"StableUnCLIPImg2ImgPipeline",
|
"StableUnCLIPImg2ImgPipeline",
|
||||||
"StableUnCLIPPipeline",
|
"StableUnCLIPPipeline",
|
||||||
"StableDiffusionLDM3DPipeline",
|
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
_import_structure["aura_flow"] = ["AuraFlowPipeline"]
|
_import_structure["aura_flow"] = ["AuraFlowPipeline"]
|
||||||
@@ -315,13 +336,6 @@ else:
|
|||||||
"StableDiffusion3Img2ImgPipeline",
|
"StableDiffusion3Img2ImgPipeline",
|
||||||
"StableDiffusion3InpaintPipeline",
|
"StableDiffusion3InpaintPipeline",
|
||||||
]
|
]
|
||||||
_import_structure["stable_diffusion_attend_and_excite"] = ["StableDiffusionAttendAndExcitePipeline"]
|
|
||||||
_import_structure["stable_diffusion_safe"] = ["StableDiffusionPipelineSafe"]
|
|
||||||
_import_structure["stable_diffusion_sag"] = ["StableDiffusionSAGPipeline"]
|
|
||||||
_import_structure["stable_diffusion_gligen"] = [
|
|
||||||
"StableDiffusionGLIGENPipeline",
|
|
||||||
"StableDiffusionGLIGENTextImagePipeline",
|
|
||||||
]
|
|
||||||
_import_structure["stable_video_diffusion"] = ["StableVideoDiffusionPipeline"]
|
_import_structure["stable_video_diffusion"] = ["StableVideoDiffusionPipeline"]
|
||||||
_import_structure["stable_diffusion_xl"].extend(
|
_import_structure["stable_diffusion_xl"].extend(
|
||||||
[
|
[
|
||||||
@@ -331,32 +345,10 @@ else:
|
|||||||
"StableDiffusionXLPipeline",
|
"StableDiffusionXLPipeline",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
_import_structure["stable_diffusion_diffedit"] = ["StableDiffusionDiffEditPipeline"]
|
|
||||||
_import_structure["stable_diffusion_ldm3d"] = ["StableDiffusionLDM3DPipeline"]
|
|
||||||
_import_structure["stable_diffusion_panorama"] = ["StableDiffusionPanoramaPipeline"]
|
|
||||||
_import_structure["t2i_adapter"] = [
|
_import_structure["t2i_adapter"] = [
|
||||||
"StableDiffusionAdapterPipeline",
|
"StableDiffusionAdapterPipeline",
|
||||||
"StableDiffusionXLAdapterPipeline",
|
"StableDiffusionXLAdapterPipeline",
|
||||||
]
|
]
|
||||||
_import_structure["text_to_video_synthesis"] = [
|
|
||||||
"TextToVideoSDPipeline",
|
|
||||||
"TextToVideoZeroPipeline",
|
|
||||||
"TextToVideoZeroSDXLPipeline",
|
|
||||||
"VideoToVideoSDPipeline",
|
|
||||||
]
|
|
||||||
_import_structure["i2vgen_xl"] = ["I2VGenXLPipeline"]
|
|
||||||
_import_structure["unclip"] = ["UnCLIPImageVariationPipeline", "UnCLIPPipeline"]
|
|
||||||
_import_structure["unidiffuser"] = [
|
|
||||||
"ImageTextPipelineOutput",
|
|
||||||
"UniDiffuserModel",
|
|
||||||
"UniDiffuserPipeline",
|
|
||||||
"UniDiffuserTextDecoder",
|
|
||||||
]
|
|
||||||
_import_structure["wuerstchen"] = [
|
|
||||||
"WuerstchenCombinedPipeline",
|
|
||||||
"WuerstchenDecoderPipeline",
|
|
||||||
"WuerstchenPriorPipeline",
|
|
||||||
]
|
|
||||||
_import_structure["wan"] = ["WanPipeline", "WanImageToVideoPipeline", "WanVideoToVideoPipeline"]
|
_import_structure["wan"] = ["WanPipeline", "WanImageToVideoPipeline", "WanVideoToVideoPipeline"]
|
||||||
try:
|
try:
|
||||||
if not is_onnx_available():
|
if not is_onnx_available():
|
||||||
@@ -395,10 +387,12 @@ except OptionalDependencyNotAvailable:
|
|||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_k_diffusion_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_k_diffusion_objects))
|
||||||
else:
|
else:
|
||||||
_import_structure["stable_diffusion_k_diffusion"] = [
|
_import_structure["deprecated"].extend(
|
||||||
"StableDiffusionKDiffusionPipeline",
|
[
|
||||||
"StableDiffusionXLKDiffusionPipeline",
|
"StableDiffusionKDiffusionPipeline",
|
||||||
]
|
"StableDiffusionXLKDiffusionPipeline",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if not (is_torch_available() and is_transformers_available() and is_sentencepiece_available()):
|
if not (is_torch_available() and is_transformers_available() and is_sentencepiece_available()):
|
||||||
@@ -472,10 +466,16 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
AutoPipelineForText2Image,
|
AutoPipelineForText2Image,
|
||||||
)
|
)
|
||||||
from .consistency_models import ConsistencyModelPipeline
|
from .consistency_models import ConsistencyModelPipeline
|
||||||
from .dance_diffusion import DanceDiffusionPipeline
|
|
||||||
from .ddim import DDIMPipeline
|
from .ddim import DDIMPipeline
|
||||||
from .ddpm import DDPMPipeline
|
from .ddpm import DDPMPipeline
|
||||||
from .deprecated import KarrasVePipeline, LDMPipeline, PNDMPipeline, RePaintPipeline, ScoreSdeVePipeline
|
from .deprecated import (
|
||||||
|
DanceDiffusionPipeline,
|
||||||
|
KarrasVePipeline,
|
||||||
|
LDMPipeline,
|
||||||
|
PNDMPipeline,
|
||||||
|
RePaintPipeline,
|
||||||
|
ScoreSdeVePipeline,
|
||||||
|
)
|
||||||
from .dit import DiTPipeline
|
from .dit import DiTPipeline
|
||||||
from .latent_diffusion import LDMSuperResolutionPipeline
|
from .latent_diffusion import LDMSuperResolutionPipeline
|
||||||
from .pipeline_utils import (
|
from .pipeline_utils import (
|
||||||
@@ -500,7 +500,6 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
from ..utils.dummy_torch_and_transformers_objects import *
|
from ..utils.dummy_torch_and_transformers_objects import *
|
||||||
else:
|
else:
|
||||||
from .allegro import AllegroPipeline
|
from .allegro import AllegroPipeline
|
||||||
from .amused import AmusedImg2ImgPipeline, AmusedInpaintPipeline, AmusedPipeline
|
|
||||||
from .animatediff import (
|
from .animatediff import (
|
||||||
AnimateDiffControlNetPipeline,
|
AnimateDiffControlNetPipeline,
|
||||||
AnimateDiffPipeline,
|
AnimateDiffPipeline,
|
||||||
@@ -509,14 +508,12 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
AnimateDiffVideoToVideoControlNetPipeline,
|
AnimateDiffVideoToVideoControlNetPipeline,
|
||||||
AnimateDiffVideoToVideoPipeline,
|
AnimateDiffVideoToVideoPipeline,
|
||||||
)
|
)
|
||||||
from .audioldm import AudioLDMPipeline
|
|
||||||
from .audioldm2 import (
|
from .audioldm2 import (
|
||||||
AudioLDM2Pipeline,
|
AudioLDM2Pipeline,
|
||||||
AudioLDM2ProjectionModel,
|
AudioLDM2ProjectionModel,
|
||||||
AudioLDM2UNet2DConditionModel,
|
AudioLDM2UNet2DConditionModel,
|
||||||
)
|
)
|
||||||
from .aura_flow import AuraFlowPipeline
|
from .aura_flow import AuraFlowPipeline
|
||||||
from .blip_diffusion import BlipDiffusionPipeline
|
|
||||||
from .cogvideo import (
|
from .cogvideo import (
|
||||||
CogVideoXFunControlPipeline,
|
CogVideoXFunControlPipeline,
|
||||||
CogVideoXImageToVideoPipeline,
|
CogVideoXImageToVideoPipeline,
|
||||||
@@ -526,7 +523,6 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
from .cogview3 import CogView3PlusPipeline
|
from .cogview3 import CogView3PlusPipeline
|
||||||
from .cogview4 import CogView4ControlPipeline, CogView4Pipeline
|
from .cogview4 import CogView4ControlPipeline, CogView4Pipeline
|
||||||
from .controlnet import (
|
from .controlnet import (
|
||||||
BlipDiffusionControlNetPipeline,
|
|
||||||
StableDiffusionControlNetImg2ImgPipeline,
|
StableDiffusionControlNetImg2ImgPipeline,
|
||||||
StableDiffusionControlNetInpaintPipeline,
|
StableDiffusionControlNetInpaintPipeline,
|
||||||
StableDiffusionControlNetPipeline,
|
StableDiffusionControlNetPipeline,
|
||||||
@@ -541,10 +537,6 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
HunyuanDiTControlNetPipeline,
|
HunyuanDiTControlNetPipeline,
|
||||||
)
|
)
|
||||||
from .controlnet_sd3 import StableDiffusion3ControlNetInpaintingPipeline, StableDiffusion3ControlNetPipeline
|
from .controlnet_sd3 import StableDiffusion3ControlNetInpaintingPipeline, StableDiffusion3ControlNetPipeline
|
||||||
from .controlnet_xs import (
|
|
||||||
StableDiffusionControlNetXSPipeline,
|
|
||||||
StableDiffusionXLControlNetXSPipeline,
|
|
||||||
)
|
|
||||||
from .deepfloyd_if import (
|
from .deepfloyd_if import (
|
||||||
IFImg2ImgPipeline,
|
IFImg2ImgPipeline,
|
||||||
IFImg2ImgSuperResolutionPipeline,
|
IFImg2ImgSuperResolutionPipeline,
|
||||||
@@ -556,16 +548,53 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
from .deprecated import (
|
from .deprecated import (
|
||||||
AltDiffusionImg2ImgPipeline,
|
AltDiffusionImg2ImgPipeline,
|
||||||
AltDiffusionPipeline,
|
AltDiffusionPipeline,
|
||||||
|
AmusedImg2ImgPipeline,
|
||||||
|
AmusedInpaintPipeline,
|
||||||
|
AmusedPipeline,
|
||||||
|
AudioLDMPipeline,
|
||||||
|
BlipDiffusionControlNetPipeline,
|
||||||
|
BlipDiffusionPipeline,
|
||||||
CycleDiffusionPipeline,
|
CycleDiffusionPipeline,
|
||||||
|
I2VGenXLPipeline,
|
||||||
|
ImageTextPipelineOutput,
|
||||||
|
LattePipeline,
|
||||||
|
MusicLDMPipeline,
|
||||||
|
PaintByExamplePipeline,
|
||||||
|
PIAPipeline,
|
||||||
|
SemanticStableDiffusionPipeline,
|
||||||
|
ShapEImg2ImgPipeline,
|
||||||
|
ShapEPipeline,
|
||||||
|
StableDiffusionAttendAndExcitePipeline,
|
||||||
|
StableDiffusionControlNetXSPipeline,
|
||||||
|
StableDiffusionDiffEditPipeline,
|
||||||
|
StableDiffusionGLIGENPipeline,
|
||||||
|
StableDiffusionGLIGENTextImagePipeline,
|
||||||
StableDiffusionInpaintPipelineLegacy,
|
StableDiffusionInpaintPipelineLegacy,
|
||||||
|
StableDiffusionLDM3DPipeline,
|
||||||
StableDiffusionModelEditingPipeline,
|
StableDiffusionModelEditingPipeline,
|
||||||
|
StableDiffusionPanoramaPipeline,
|
||||||
StableDiffusionParadigmsPipeline,
|
StableDiffusionParadigmsPipeline,
|
||||||
|
StableDiffusionPipelineSafe,
|
||||||
StableDiffusionPix2PixZeroPipeline,
|
StableDiffusionPix2PixZeroPipeline,
|
||||||
|
StableDiffusionSAGPipeline,
|
||||||
|
StableDiffusionXLControlNetXSPipeline,
|
||||||
|
TextToVideoSDPipeline,
|
||||||
|
TextToVideoZeroPipeline,
|
||||||
|
TextToVideoZeroSDXLPipeline,
|
||||||
|
UnCLIPImageVariationPipeline,
|
||||||
|
UnCLIPPipeline,
|
||||||
|
UniDiffuserModel,
|
||||||
|
UniDiffuserPipeline,
|
||||||
|
UniDiffuserTextDecoder,
|
||||||
VersatileDiffusionDualGuidedPipeline,
|
VersatileDiffusionDualGuidedPipeline,
|
||||||
VersatileDiffusionImageVariationPipeline,
|
VersatileDiffusionImageVariationPipeline,
|
||||||
VersatileDiffusionPipeline,
|
VersatileDiffusionPipeline,
|
||||||
VersatileDiffusionTextToImagePipeline,
|
VersatileDiffusionTextToImagePipeline,
|
||||||
|
VideoToVideoSDPipeline,
|
||||||
VQDiffusionPipeline,
|
VQDiffusionPipeline,
|
||||||
|
WuerstchenCombinedPipeline,
|
||||||
|
WuerstchenDecoderPipeline,
|
||||||
|
WuerstchenPriorPipeline,
|
||||||
)
|
)
|
||||||
from .easyanimate import (
|
from .easyanimate import (
|
||||||
EasyAnimateControlPipeline,
|
EasyAnimateControlPipeline,
|
||||||
@@ -593,7 +622,6 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
HunyuanVideoPipeline,
|
HunyuanVideoPipeline,
|
||||||
)
|
)
|
||||||
from .hunyuandit import HunyuanDiTPipeline
|
from .hunyuandit import HunyuanDiTPipeline
|
||||||
from .i2vgen_xl import I2VGenXLPipeline
|
|
||||||
from .kandinsky import (
|
from .kandinsky import (
|
||||||
KandinskyCombinedPipeline,
|
KandinskyCombinedPipeline,
|
||||||
KandinskyImg2ImgCombinedPipeline,
|
KandinskyImg2ImgCombinedPipeline,
|
||||||
@@ -624,7 +652,6 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
LatentConsistencyModelPipeline,
|
LatentConsistencyModelPipeline,
|
||||||
)
|
)
|
||||||
from .latent_diffusion import LDMTextToImagePipeline
|
from .latent_diffusion import LDMTextToImagePipeline
|
||||||
from .latte import LattePipeline
|
|
||||||
from .ledits_pp import (
|
from .ledits_pp import (
|
||||||
LEditsPPDiffusionPipelineOutput,
|
LEditsPPDiffusionPipelineOutput,
|
||||||
LEditsPPInversionPipelineOutput,
|
LEditsPPInversionPipelineOutput,
|
||||||
@@ -640,7 +667,6 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
MarigoldNormalsPipeline,
|
MarigoldNormalsPipeline,
|
||||||
)
|
)
|
||||||
from .mochi import MochiPipeline
|
from .mochi import MochiPipeline
|
||||||
from .musicldm import MusicLDMPipeline
|
|
||||||
from .omnigen import OmniGenPipeline
|
from .omnigen import OmniGenPipeline
|
||||||
from .pag import (
|
from .pag import (
|
||||||
AnimateDiffPAGPipeline,
|
AnimateDiffPAGPipeline,
|
||||||
@@ -661,12 +687,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
StableDiffusionXLPAGInpaintPipeline,
|
StableDiffusionXLPAGInpaintPipeline,
|
||||||
StableDiffusionXLPAGPipeline,
|
StableDiffusionXLPAGPipeline,
|
||||||
)
|
)
|
||||||
from .paint_by_example import PaintByExamplePipeline
|
|
||||||
from .pia import PIAPipeline
|
|
||||||
from .pixart_alpha import PixArtAlphaPipeline, PixArtSigmaPipeline
|
from .pixart_alpha import PixArtAlphaPipeline, PixArtSigmaPipeline
|
||||||
from .sana import SanaControlNetPipeline, SanaPipeline, SanaSprintPipeline
|
from .sana import SanaControlNetPipeline, SanaPipeline, SanaSprintPipeline
|
||||||
from .semantic_stable_diffusion import SemanticStableDiffusionPipeline
|
|
||||||
from .shap_e import ShapEImg2ImgPipeline, ShapEPipeline
|
|
||||||
from .stable_audio import StableAudioPipeline, StableAudioProjectionModel
|
from .stable_audio import StableAudioPipeline, StableAudioProjectionModel
|
||||||
from .stable_cascade import (
|
from .stable_cascade import (
|
||||||
StableCascadeCombinedPipeline,
|
StableCascadeCombinedPipeline,
|
||||||
@@ -691,13 +713,6 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
StableDiffusion3InpaintPipeline,
|
StableDiffusion3InpaintPipeline,
|
||||||
StableDiffusion3Pipeline,
|
StableDiffusion3Pipeline,
|
||||||
)
|
)
|
||||||
from .stable_diffusion_attend_and_excite import StableDiffusionAttendAndExcitePipeline
|
|
||||||
from .stable_diffusion_diffedit import StableDiffusionDiffEditPipeline
|
|
||||||
from .stable_diffusion_gligen import StableDiffusionGLIGENPipeline, StableDiffusionGLIGENTextImagePipeline
|
|
||||||
from .stable_diffusion_ldm3d import StableDiffusionLDM3DPipeline
|
|
||||||
from .stable_diffusion_panorama import StableDiffusionPanoramaPipeline
|
|
||||||
from .stable_diffusion_safe import StableDiffusionPipelineSafe
|
|
||||||
from .stable_diffusion_sag import StableDiffusionSAGPipeline
|
|
||||||
from .stable_diffusion_xl import (
|
from .stable_diffusion_xl import (
|
||||||
StableDiffusionXLImg2ImgPipeline,
|
StableDiffusionXLImg2ImgPipeline,
|
||||||
StableDiffusionXLInpaintPipeline,
|
StableDiffusionXLInpaintPipeline,
|
||||||
@@ -709,25 +724,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
StableDiffusionAdapterPipeline,
|
StableDiffusionAdapterPipeline,
|
||||||
StableDiffusionXLAdapterPipeline,
|
StableDiffusionXLAdapterPipeline,
|
||||||
)
|
)
|
||||||
from .text_to_video_synthesis import (
|
|
||||||
TextToVideoSDPipeline,
|
|
||||||
TextToVideoZeroPipeline,
|
|
||||||
TextToVideoZeroSDXLPipeline,
|
|
||||||
VideoToVideoSDPipeline,
|
|
||||||
)
|
|
||||||
from .unclip import UnCLIPImageVariationPipeline, UnCLIPPipeline
|
|
||||||
from .unidiffuser import (
|
|
||||||
ImageTextPipelineOutput,
|
|
||||||
UniDiffuserModel,
|
|
||||||
UniDiffuserPipeline,
|
|
||||||
UniDiffuserTextDecoder,
|
|
||||||
)
|
|
||||||
from .wan import WanImageToVideoPipeline, WanPipeline, WanVideoToVideoPipeline
|
from .wan import WanImageToVideoPipeline, WanPipeline, WanVideoToVideoPipeline
|
||||||
from .wuerstchen import (
|
|
||||||
WuerstchenCombinedPipeline,
|
|
||||||
WuerstchenDecoderPipeline,
|
|
||||||
WuerstchenPriorPipeline,
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if not is_onnx_available():
|
if not is_onnx_available():
|
||||||
@@ -758,7 +755,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ..utils.dummy_torch_and_transformers_and_k_diffusion_objects import *
|
from ..utils.dummy_torch_and_transformers_and_k_diffusion_objects import *
|
||||||
else:
|
else:
|
||||||
from .stable_diffusion_k_diffusion import (
|
from .deprecated import (
|
||||||
StableDiffusionKDiffusionPipeline,
|
StableDiffusionKDiffusionPipeline,
|
||||||
StableDiffusionXLKDiffusionPipeline,
|
StableDiffusionXLKDiffusionPipeline,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -635,7 +635,7 @@ class AnimateDiffSDXLPipeline(
|
|||||||
|
|
||||||
return ip_adapter_image_embeds
|
return ip_adapter_image_embeds
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis/pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||||
def decode_latents(self, latents):
|
def decode_latents(self, latents):
|
||||||
latents = 1 / self.vae.config.scaling_factor * latents
|
latents = 1 / self.vae.config.scaling_factor * latents
|
||||||
|
|
||||||
@@ -738,7 +738,7 @@ class AnimateDiffSDXLPipeline(
|
|||||||
"If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
|
"If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
|
||||||
)
|
)
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||||
def prepare_latents(
|
def prepare_latents(
|
||||||
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
||||||
):
|
):
|
||||||
|
|||||||
@@ -458,7 +458,7 @@ class AnimateDiffSparseControlNetPipeline(
|
|||||||
|
|
||||||
return ip_adapter_image_embeds
|
return ip_adapter_image_embeds
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis/pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||||
def decode_latents(self, latents):
|
def decode_latents(self, latents):
|
||||||
latents = 1 / self.vae.config.scaling_factor * latents
|
latents = 1 / self.vae.config.scaling_factor * latents
|
||||||
|
|
||||||
@@ -621,7 +621,7 @@ class AnimateDiffSparseControlNetPipeline(
|
|||||||
f"If image batch size is not 1, image batch size must be same as prompt batch size. image batch size: {image_batch_size}, prompt batch size: {prompt_batch_size}"
|
f"If image batch size is not 1, image batch size must be same as prompt batch size. image batch size: {image_batch_size}, prompt batch size: {prompt_batch_size}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||||
def prepare_latents(
|
def prepare_latents(
|
||||||
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
||||||
):
|
):
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ from .controlnet_sd3 import (
|
|||||||
StableDiffusion3ControlNetPipeline,
|
StableDiffusion3ControlNetPipeline,
|
||||||
)
|
)
|
||||||
from .deepfloyd_if import IFImg2ImgPipeline, IFInpaintingPipeline, IFPipeline
|
from .deepfloyd_if import IFImg2ImgPipeline, IFInpaintingPipeline, IFPipeline
|
||||||
|
from .deprecated.wuerstchen import WuerstchenCombinedPipeline, WuerstchenDecoderPipeline
|
||||||
from .flux import (
|
from .flux import (
|
||||||
FluxControlImg2ImgPipeline,
|
FluxControlImg2ImgPipeline,
|
||||||
FluxControlInpaintPipeline,
|
FluxControlInpaintPipeline,
|
||||||
@@ -106,7 +107,6 @@ from .stable_diffusion_xl import (
|
|||||||
StableDiffusionXLInpaintPipeline,
|
StableDiffusionXLInpaintPipeline,
|
||||||
StableDiffusionXLPipeline,
|
StableDiffusionXLPipeline,
|
||||||
)
|
)
|
||||||
from .wuerstchen import WuerstchenCombinedPipeline, WuerstchenDecoderPipeline
|
|
||||||
|
|
||||||
|
|
||||||
AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict(
|
AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict(
|
||||||
|
|||||||
@@ -373,7 +373,7 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|||||||
extra_step_kwargs["generator"] = generator
|
extra_step_kwargs["generator"] = generator
|
||||||
return extra_step_kwargs
|
return extra_step_kwargs
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.latte.pipeline_latte.LattePipeline.check_inputs
|
# Copied from diffusers.pipelines.deprecated.latte.pipeline_latte.LattePipeline.check_inputs
|
||||||
def check_inputs(
|
def check_inputs(
|
||||||
self,
|
self,
|
||||||
prompt,
|
prompt,
|
||||||
|
|||||||
@@ -333,7 +333,7 @@ class CogView3PlusPipeline(DiffusionPipeline):
|
|||||||
extra_step_kwargs["generator"] = generator
|
extra_step_kwargs["generator"] = generator
|
||||||
return extra_step_kwargs
|
return extra_step_kwargs
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.latte.pipeline_latte.LattePipeline.check_inputs
|
# Copied from diffusers.pipelines.deprecated.latte.pipeline_latte.LattePipeline.check_inputs
|
||||||
def check_inputs(
|
def check_inputs(
|
||||||
self,
|
self,
|
||||||
prompt,
|
prompt,
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ except OptionalDependencyNotAvailable:
|
|||||||
else:
|
else:
|
||||||
_import_structure["multicontrolnet"] = ["MultiControlNetModel"]
|
_import_structure["multicontrolnet"] = ["MultiControlNetModel"]
|
||||||
_import_structure["pipeline_controlnet"] = ["StableDiffusionControlNetPipeline"]
|
_import_structure["pipeline_controlnet"] = ["StableDiffusionControlNetPipeline"]
|
||||||
_import_structure["pipeline_controlnet_blip_diffusion"] = ["BlipDiffusionControlNetPipeline"]
|
|
||||||
_import_structure["pipeline_controlnet_img2img"] = ["StableDiffusionControlNetImg2ImgPipeline"]
|
_import_structure["pipeline_controlnet_img2img"] = ["StableDiffusionControlNetImg2ImgPipeline"]
|
||||||
_import_structure["pipeline_controlnet_inpaint"] = ["StableDiffusionControlNetInpaintPipeline"]
|
_import_structure["pipeline_controlnet_inpaint"] = ["StableDiffusionControlNetInpaintPipeline"]
|
||||||
_import_structure["pipeline_controlnet_inpaint_sd_xl"] = ["StableDiffusionXLControlNetInpaintPipeline"]
|
_import_structure["pipeline_controlnet_inpaint_sd_xl"] = ["StableDiffusionXLControlNetInpaintPipeline"]
|
||||||
@@ -54,7 +53,6 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
else:
|
else:
|
||||||
from .multicontrolnet import MultiControlNetModel
|
from .multicontrolnet import MultiControlNetModel
|
||||||
from .pipeline_controlnet import StableDiffusionControlNetPipeline
|
from .pipeline_controlnet import StableDiffusionControlNetPipeline
|
||||||
from .pipeline_controlnet_blip_diffusion import BlipDiffusionControlNetPipeline
|
|
||||||
from .pipeline_controlnet_img2img import StableDiffusionControlNetImg2ImgPipeline
|
from .pipeline_controlnet_img2img import StableDiffusionControlNetImg2ImgPipeline
|
||||||
from .pipeline_controlnet_inpaint import StableDiffusionControlNetInpaintPipeline
|
from .pipeline_controlnet_inpaint import StableDiffusionControlNetInpaintPipeline
|
||||||
from .pipeline_controlnet_inpaint_sd_xl import StableDiffusionXLControlNetInpaintPipeline
|
from .pipeline_controlnet_inpaint_sd_xl import StableDiffusionXLControlNetInpaintPipeline
|
||||||
|
|||||||
@@ -1,3 +1,7 @@
|
|||||||
|
import functools
|
||||||
|
import inspect
|
||||||
|
import sys
|
||||||
|
import warnings
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ...utils import (
|
||||||
@@ -9,9 +13,32 @@ from ...utils import (
|
|||||||
is_note_seq_available,
|
is_note_seq_available,
|
||||||
is_torch_available,
|
is_torch_available,
|
||||||
is_transformers_available,
|
is_transformers_available,
|
||||||
|
logging,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# Custom Lazy Module for deprecated pipelines that shows a warning
|
||||||
|
class _DeprecatedLazyModule(_LazyModule):
|
||||||
|
"""
|
||||||
|
Module class that surfaces all objects but only performs associated imports when the objects are requested, and
|
||||||
|
shows deprecation warnings when any of its attributes are accessed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
# Regular attribute access - first check if it's supposed to be loaded
|
||||||
|
if name in self._modules or name in self._class_to_module:
|
||||||
|
# Only warn for actual pipeline components, not utility functions
|
||||||
|
logger.warning(
|
||||||
|
f"{name} is deprecated and will no longer be maintained or receive future updates.",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use the standard lazy module behavior to load the attribute
|
||||||
|
return super().__getattr__(name)
|
||||||
|
|
||||||
|
|
||||||
_dummy_objects = {}
|
_dummy_objects = {}
|
||||||
_import_structure = {}
|
_import_structure = {}
|
||||||
|
|
||||||
@@ -23,6 +50,7 @@ except OptionalDependencyNotAvailable:
|
|||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_pt_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_pt_objects))
|
||||||
else:
|
else:
|
||||||
|
_import_structure["dance_diffusion"] = ["DanceDiffusionPipeline"]
|
||||||
_import_structure["latent_diffusion_uncond"] = ["LDMPipeline"]
|
_import_structure["latent_diffusion_uncond"] = ["LDMPipeline"]
|
||||||
_import_structure["pndm"] = ["PNDMPipeline"]
|
_import_structure["pndm"] = ["PNDMPipeline"]
|
||||||
_import_structure["repaint"] = ["RePaintPipeline"]
|
_import_structure["repaint"] = ["RePaintPipeline"]
|
||||||
@@ -42,6 +70,62 @@ else:
|
|||||||
"AltDiffusionPipeline",
|
"AltDiffusionPipeline",
|
||||||
"AltDiffusionPipelineOutput",
|
"AltDiffusionPipelineOutput",
|
||||||
]
|
]
|
||||||
|
_import_structure["amused"] = ["AmusedPipeline", "AmusedImg2ImgPipeline", "AmusedInpaintPipeline"]
|
||||||
|
_import_structure["audioldm"] = ["AudioLDMPipeline"]
|
||||||
|
_import_structure["controlnet_xs"] = [
|
||||||
|
"StableDiffusionControlNetXSPipeline",
|
||||||
|
"StableDiffusionXLControlNetXSPipeline",
|
||||||
|
]
|
||||||
|
_import_structure["blip_diffusion"] = ["BlipDiffusionPipeline"]
|
||||||
|
_import_structure["controlnet"] = ["BlipDiffusionControlNetPipeline"]
|
||||||
|
_import_structure["i2vgen_xl"] = ["I2VGenXLPipeline"]
|
||||||
|
_import_structure["latte"] = ["LattePipeline"]
|
||||||
|
_import_structure["musicldm"] = ["MusicLDMPipeline"]
|
||||||
|
_import_structure["paint_by_example"] = ["PaintByExamplePipeline"]
|
||||||
|
_import_structure["pia"] = ["PIAPipeline"]
|
||||||
|
_import_structure["semantic_stable_diffusion"] = ["SemanticStableDiffusionPipeline"]
|
||||||
|
_import_structure["shap_e"] = [
|
||||||
|
"ShapEPipeline",
|
||||||
|
"ShapEImg2ImgPipeline",
|
||||||
|
]
|
||||||
|
_import_structure["stable_diffusion_attend_and_excite"] = ["StableDiffusionAttendAndExcitePipeline"]
|
||||||
|
_import_structure["stable_diffusion_diffedit"] = ["StableDiffusionDiffEditPipeline"]
|
||||||
|
_import_structure["stable_diffusion_gligen"] = [
|
||||||
|
"StableDiffusionGLIGENPipeline",
|
||||||
|
"StableDiffusionGLIGENTextImagePipeline",
|
||||||
|
]
|
||||||
|
_import_structure["stable_diffusion_k_diffusion"] = [
|
||||||
|
"StableDiffusionKDiffusionPipeline",
|
||||||
|
"StableDiffusionXLKDiffusionPipeline",
|
||||||
|
]
|
||||||
|
_import_structure["stable_diffusion_ldm3d"] = ["StableDiffusionLDM3DPipeline"]
|
||||||
|
_import_structure["stable_diffusion_panorama"] = ["StableDiffusionPanoramaPipeline"]
|
||||||
|
_import_structure["stable_diffusion_safe"] = ["StableDiffusionPipelineSafe"]
|
||||||
|
_import_structure["stable_diffusion_sag"] = ["StableDiffusionSAGPipeline"]
|
||||||
|
_import_structure["stable_diffusion_variants"] = [
|
||||||
|
"CycleDiffusionPipeline",
|
||||||
|
"StableDiffusionInpaintPipelineLegacy",
|
||||||
|
"StableDiffusionPix2PixZeroPipeline",
|
||||||
|
"StableDiffusionParadigmsPipeline",
|
||||||
|
"StableDiffusionModelEditingPipeline",
|
||||||
|
]
|
||||||
|
_import_structure["text_to_video_synthesis"] = [
|
||||||
|
"TextToVideoSDPipeline",
|
||||||
|
"TextToVideoZeroPipeline",
|
||||||
|
"TextToVideoZeroSDXLPipeline",
|
||||||
|
"VideoToVideoSDPipeline",
|
||||||
|
]
|
||||||
|
_import_structure["unclip"] = [
|
||||||
|
"UnCLIPPipeline",
|
||||||
|
"UnCLIPImageVariationPipeline",
|
||||||
|
]
|
||||||
|
_import_structure["unidiffuser"] = [
|
||||||
|
"UniDiffuserTextDecoder",
|
||||||
|
"UniDiffuserModel",
|
||||||
|
"UTransformer2DModel",
|
||||||
|
"UniDiffuserPipeline",
|
||||||
|
"ImageTextPipelineOutput",
|
||||||
|
]
|
||||||
_import_structure["versatile_diffusion"] = [
|
_import_structure["versatile_diffusion"] = [
|
||||||
"VersatileDiffusionDualGuidedPipeline",
|
"VersatileDiffusionDualGuidedPipeline",
|
||||||
"VersatileDiffusionImageVariationPipeline",
|
"VersatileDiffusionImageVariationPipeline",
|
||||||
@@ -49,12 +133,10 @@ else:
|
|||||||
"VersatileDiffusionTextToImagePipeline",
|
"VersatileDiffusionTextToImagePipeline",
|
||||||
]
|
]
|
||||||
_import_structure["vq_diffusion"] = ["VQDiffusionPipeline"]
|
_import_structure["vq_diffusion"] = ["VQDiffusionPipeline"]
|
||||||
_import_structure["stable_diffusion_variants"] = [
|
_import_structure["wuerstchen"] = [
|
||||||
"CycleDiffusionPipeline",
|
"WuerstchenCombinedPipeline",
|
||||||
"StableDiffusionInpaintPipelineLegacy",
|
"WuerstchenDecoderPipeline",
|
||||||
"StableDiffusionPix2PixZeroPipeline",
|
"WuerstchenPriorPipeline",
|
||||||
"StableDiffusionParadigmsPipeline",
|
|
||||||
"StableDiffusionModelEditingPipeline",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -88,6 +170,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
from ...utils.dummy_pt_objects import *
|
from ...utils.dummy_pt_objects import *
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
from .dance_diffusion import DanceDiffusionPipeline
|
||||||
from .latent_diffusion_uncond import LDMPipeline
|
from .latent_diffusion_uncond import LDMPipeline
|
||||||
from .pndm import PNDMPipeline
|
from .pndm import PNDMPipeline
|
||||||
from .repaint import RePaintPipeline
|
from .repaint import RePaintPipeline
|
||||||
@@ -102,8 +185,29 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
from .alt_diffusion import AltDiffusionImg2ImgPipeline, AltDiffusionPipeline, AltDiffusionPipelineOutput
|
from .alt_diffusion import AltDiffusionImg2ImgPipeline, AltDiffusionPipeline, AltDiffusionPipelineOutput
|
||||||
from .audio_diffusion import AudioDiffusionPipeline, Mel
|
from .amused import AmusedImg2ImgPipeline, AmusedInpaintPipeline, AmusedPipeline
|
||||||
from .spectrogram_diffusion import SpectrogramDiffusionPipeline
|
from .audioldm import AudioLDMPipeline
|
||||||
|
from .blip_diffusion import BlipDiffusionPipeline
|
||||||
|
from .controlnet import BlipDiffusionControlNetPipeline
|
||||||
|
from .controlnet_xs import StableDiffusionControlNetXSPipeline, StableDiffusionXLControlNetXSPipeline
|
||||||
|
from .i2vgen_xl import I2VGenXLPipeline
|
||||||
|
from .latte import LattePipeline
|
||||||
|
from .musicldm import MusicLDMPipeline
|
||||||
|
from .paint_by_example import PaintByExamplePipeline
|
||||||
|
from .pia import PIAPipeline
|
||||||
|
from .semantic_stable_diffusion import SemanticStableDiffusionPipeline
|
||||||
|
from .shap_e import ShapEImg2ImgPipeline, ShapEPipeline
|
||||||
|
from .stable_diffusion_attend_and_excite import StableDiffusionAttendAndExcitePipeline
|
||||||
|
from .stable_diffusion_diffedit import StableDiffusionDiffEditPipeline
|
||||||
|
from .stable_diffusion_gligen import StableDiffusionGLIGENPipeline, StableDiffusionGLIGENTextImagePipeline
|
||||||
|
from .stable_diffusion_k_diffusion import (
|
||||||
|
StableDiffusionKDiffusionPipeline,
|
||||||
|
StableDiffusionXLKDiffusionPipeline,
|
||||||
|
)
|
||||||
|
from .stable_diffusion_ldm3d import StableDiffusionLDM3DPipeline
|
||||||
|
from .stable_diffusion_panorama import StableDiffusionPanoramaPipeline
|
||||||
|
from .stable_diffusion_safe import StableDiffusionPipelineSafe
|
||||||
|
from .stable_diffusion_sag import StableDiffusionSAGPipeline
|
||||||
from .stable_diffusion_variants import (
|
from .stable_diffusion_variants import (
|
||||||
CycleDiffusionPipeline,
|
CycleDiffusionPipeline,
|
||||||
StableDiffusionInpaintPipelineLegacy,
|
StableDiffusionInpaintPipelineLegacy,
|
||||||
@@ -111,7 +215,14 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
StableDiffusionParadigmsPipeline,
|
StableDiffusionParadigmsPipeline,
|
||||||
StableDiffusionPix2PixZeroPipeline,
|
StableDiffusionPix2PixZeroPipeline,
|
||||||
)
|
)
|
||||||
from .stochastic_karras_ve import KarrasVePipeline
|
from .text_to_video_synthesis import (
|
||||||
|
TextToVideoSDPipeline,
|
||||||
|
TextToVideoZeroPipeline,
|
||||||
|
TextToVideoZeroSDXLPipeline,
|
||||||
|
VideoToVideoSDPipeline,
|
||||||
|
)
|
||||||
|
from .unclip import UnCLIPImageVariationPipeline, UnCLIPPipeline
|
||||||
|
from .unidiffuser import ImageTextPipelineOutput, UniDiffuserModel, UniDiffuserPipeline, UniDiffuserTextDecoder
|
||||||
from .versatile_diffusion import (
|
from .versatile_diffusion import (
|
||||||
VersatileDiffusionDualGuidedPipeline,
|
VersatileDiffusionDualGuidedPipeline,
|
||||||
VersatileDiffusionImageVariationPipeline,
|
VersatileDiffusionImageVariationPipeline,
|
||||||
@@ -119,6 +230,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
VersatileDiffusionTextToImagePipeline,
|
VersatileDiffusionTextToImagePipeline,
|
||||||
)
|
)
|
||||||
from .vq_diffusion import VQDiffusionPipeline
|
from .vq_diffusion import VQDiffusionPipeline
|
||||||
|
from .wuerstchen import WuerstchenCombinedPipeline, WuerstchenDecoderPipeline, WuerstchenPriorPipeline
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if not (is_torch_available() and is_librosa_available()):
|
if not (is_torch_available() and is_librosa_available()):
|
||||||
@@ -143,7 +255,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
else:
|
else:
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
sys.modules[__name__] = _LazyModule(
|
# Use the custom deprecated lazy module instead of the standard one
|
||||||
|
sys.modules[__name__] = _DeprecatedLazyModule(
|
||||||
__name__,
|
__name__,
|
||||||
globals()["__file__"],
|
globals()["__file__"],
|
||||||
_import_structure,
|
_import_structure,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -16,7 +16,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import (
|
from ....utils.dummy_torch_and_transformers_objects import (
|
||||||
AmusedImg2ImgPipeline,
|
AmusedImg2ImgPipeline,
|
||||||
AmusedInpaintPipeline,
|
AmusedInpaintPipeline,
|
||||||
AmusedPipeline,
|
AmusedPipeline,
|
||||||
@@ -40,7 +40,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import (
|
from ....utils.dummy_torch_and_transformers_objects import (
|
||||||
AmusedPipeline,
|
AmusedPipeline,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@@ -17,11 +17,11 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPTextModelWithProjection, CLIPTokenizer
|
from transformers import CLIPTextModelWithProjection, CLIPTokenizer
|
||||||
|
|
||||||
from ...image_processor import VaeImageProcessor
|
from ....image_processor import VaeImageProcessor
|
||||||
from ...models import UVit2DModel, VQModel
|
from ....models import UVit2DModel, VQModel
|
||||||
from ...schedulers import AmusedScheduler
|
from ....schedulers import AmusedScheduler
|
||||||
from ...utils import is_torch_xla_available, replace_example_docstring
|
from ....utils import is_torch_xla_available, replace_example_docstring
|
||||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -17,11 +17,11 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPTextModelWithProjection, CLIPTokenizer
|
from transformers import CLIPTextModelWithProjection, CLIPTokenizer
|
||||||
|
|
||||||
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
from ....image_processor import PipelineImageInput, VaeImageProcessor
|
||||||
from ...models import UVit2DModel, VQModel
|
from ....models import UVit2DModel, VQModel
|
||||||
from ...schedulers import AmusedScheduler
|
from ....schedulers import AmusedScheduler
|
||||||
from ...utils import is_torch_xla_available, replace_example_docstring
|
from ....utils import is_torch_xla_available, replace_example_docstring
|
||||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -18,11 +18,11 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPTextModelWithProjection, CLIPTokenizer
|
from transformers import CLIPTextModelWithProjection, CLIPTokenizer
|
||||||
|
|
||||||
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
from ....image_processor import PipelineImageInput, VaeImageProcessor
|
||||||
from ...models import UVit2DModel, VQModel
|
from ....models import UVit2DModel, VQModel
|
||||||
from ...schedulers import AmusedScheduler
|
from ....schedulers import AmusedScheduler
|
||||||
from ...utils import is_torch_xla_available, replace_example_docstring
|
from ....utils import is_torch_xla_available, replace_example_docstring
|
||||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
51
src/diffusers/pipelines/deprecated/audioldm/__init__.py
Normal file
51
src/diffusers/pipelines/deprecated/audioldm/__init__.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from ....utils import (
|
||||||
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
|
OptionalDependencyNotAvailable,
|
||||||
|
_LazyModule,
|
||||||
|
is_torch_available,
|
||||||
|
is_transformers_available,
|
||||||
|
is_transformers_version,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_dummy_objects = {}
|
||||||
|
_import_structure = {}
|
||||||
|
|
||||||
|
try:
|
||||||
|
if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.27.0")):
|
||||||
|
raise OptionalDependencyNotAvailable()
|
||||||
|
except OptionalDependencyNotAvailable:
|
||||||
|
from ....utils.dummy_torch_and_transformers_objects import (
|
||||||
|
AudioLDMPipeline,
|
||||||
|
)
|
||||||
|
|
||||||
|
_dummy_objects.update({"AudioLDMPipeline": AudioLDMPipeline})
|
||||||
|
else:
|
||||||
|
_import_structure["pipeline_audioldm"] = ["AudioLDMPipeline"]
|
||||||
|
|
||||||
|
|
||||||
|
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||||
|
try:
|
||||||
|
if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.27.0")):
|
||||||
|
raise OptionalDependencyNotAvailable()
|
||||||
|
except OptionalDependencyNotAvailable:
|
||||||
|
from ....utils.dummy_torch_and_transformers_objects import (
|
||||||
|
AudioLDMPipeline,
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
from .pipeline_audioldm import AudioLDMPipeline
|
||||||
|
else:
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.modules[__name__] = _LazyModule(
|
||||||
|
__name__,
|
||||||
|
globals()["__file__"],
|
||||||
|
_import_structure,
|
||||||
|
module_spec=__spec__,
|
||||||
|
)
|
||||||
|
|
||||||
|
for name, value in _dummy_objects.items():
|
||||||
|
setattr(sys.modules[__name__], name, value)
|
||||||
561
src/diffusers/pipelines/deprecated/audioldm/pipeline_audioldm.py
Normal file
561
src/diffusers/pipelines/deprecated/audioldm/pipeline_audioldm.py
Normal file
@@ -0,0 +1,561 @@
|
|||||||
|
# Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import inspect
|
||||||
|
from typing import Any, Callable, Dict, List, Optional, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from transformers import ClapTextModelWithProjection, RobertaTokenizer, RobertaTokenizerFast, SpeechT5HifiGan
|
||||||
|
|
||||||
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
||||||
|
from ....schedulers import KarrasDiffusionSchedulers
|
||||||
|
from ....utils import is_torch_xla_available, logging, replace_example_docstring
|
||||||
|
from ....utils.torch_utils import randn_tensor
|
||||||
|
from ...pipeline_utils import AudioPipelineOutput, DiffusionPipeline, StableDiffusionMixin
|
||||||
|
|
||||||
|
|
||||||
|
if is_torch_xla_available():
|
||||||
|
import torch_xla.core.xla_model as xm
|
||||||
|
|
||||||
|
XLA_AVAILABLE = True
|
||||||
|
else:
|
||||||
|
XLA_AVAILABLE = False
|
||||||
|
|
||||||
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||||
|
|
||||||
|
|
||||||
|
EXAMPLE_DOC_STRING = """
|
||||||
|
Examples:
|
||||||
|
```py
|
||||||
|
>>> from diffusers import AudioLDMPipeline
|
||||||
|
>>> import torch
|
||||||
|
>>> import scipy
|
||||||
|
|
||||||
|
>>> repo_id = "cvssp/audioldm-s-full-v2"
|
||||||
|
>>> pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch.float16)
|
||||||
|
>>> pipe = pipe.to("cuda")
|
||||||
|
|
||||||
|
>>> prompt = "Techno music with a strong, upbeat tempo and high melodic riffs"
|
||||||
|
>>> audio = pipe(prompt, num_inference_steps=10, audio_length_in_s=5.0).audios[0]
|
||||||
|
|
||||||
|
>>> # save the audio sample as a .wav file
|
||||||
|
>>> scipy.io.wavfile.write("techno.wav", rate=16000, data=audio)
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
||||||
|
r"""
|
||||||
|
Pipeline for text-to-audio generation using AudioLDM.
|
||||||
|
|
||||||
|
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
|
||||||
|
implemented for all pipelines (downloading, saving, running on a particular device, etc.).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
vae ([`AutoencoderKL`]):
|
||||||
|
Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
|
||||||
|
text_encoder ([`~transformers.ClapTextModelWithProjection`]):
|
||||||
|
Frozen text-encoder (`ClapTextModelWithProjection`, specifically the
|
||||||
|
[laion/clap-htsat-unfused](https://huggingface.co/laion/clap-htsat-unfused) variant.
|
||||||
|
tokenizer ([`PreTrainedTokenizer`]):
|
||||||
|
A [`~transformers.RobertaTokenizer`] to tokenize text.
|
||||||
|
unet ([`UNet2DConditionModel`]):
|
||||||
|
A `UNet2DConditionModel` to denoise the encoded audio latents.
|
||||||
|
scheduler ([`SchedulerMixin`]):
|
||||||
|
A scheduler to be used in combination with `unet` to denoise the encoded audio latents. Can be one of
|
||||||
|
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
||||||
|
vocoder ([`~transformers.SpeechT5HifiGan`]):
|
||||||
|
Vocoder of class `SpeechT5HifiGan`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
vae: AutoencoderKL,
|
||||||
|
text_encoder: ClapTextModelWithProjection,
|
||||||
|
tokenizer: Union[RobertaTokenizer, RobertaTokenizerFast],
|
||||||
|
unet: UNet2DConditionModel,
|
||||||
|
scheduler: KarrasDiffusionSchedulers,
|
||||||
|
vocoder: SpeechT5HifiGan,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
logger.warning(f"{self.__class__.__name__} is deprecated and will no longer be actively maintained")
|
||||||
|
|
||||||
|
logger.warning(f"{self.__class__.__name__} is deprecated and will no longer be actively maintained")
|
||||||
|
|
||||||
|
self.register_modules(
|
||||||
|
vae=vae,
|
||||||
|
text_encoder=text_encoder,
|
||||||
|
tokenizer=tokenizer,
|
||||||
|
unet=unet,
|
||||||
|
scheduler=scheduler,
|
||||||
|
vocoder=vocoder,
|
||||||
|
)
|
||||||
|
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
||||||
|
|
||||||
|
def _encode_prompt(
|
||||||
|
self,
|
||||||
|
prompt,
|
||||||
|
device,
|
||||||
|
num_waveforms_per_prompt,
|
||||||
|
do_classifier_free_guidance,
|
||||||
|
negative_prompt=None,
|
||||||
|
prompt_embeds: Optional[torch.Tensor] = None,
|
||||||
|
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
||||||
|
):
|
||||||
|
r"""
|
||||||
|
Encodes the prompt into text encoder hidden states.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt (`str` or `List[str]`, *optional*):
|
||||||
|
prompt to be encoded
|
||||||
|
device (`torch.device`):
|
||||||
|
torch device
|
||||||
|
num_waveforms_per_prompt (`int`):
|
||||||
|
number of waveforms that should be generated per prompt
|
||||||
|
do_classifier_free_guidance (`bool`):
|
||||||
|
whether to use classifier free guidance or not
|
||||||
|
negative_prompt (`str` or `List[str]`, *optional*):
|
||||||
|
The prompt or prompts not to guide the audio generation. If not defined, one has to pass
|
||||||
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
||||||
|
less than `1`).
|
||||||
|
prompt_embeds (`torch.Tensor`, *optional*):
|
||||||
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
||||||
|
provided, text embeddings will be generated from `prompt` input argument.
|
||||||
|
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
||||||
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
||||||
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
||||||
|
argument.
|
||||||
|
"""
|
||||||
|
if prompt is not None and isinstance(prompt, str):
|
||||||
|
batch_size = 1
|
||||||
|
elif prompt is not None and isinstance(prompt, list):
|
||||||
|
batch_size = len(prompt)
|
||||||
|
else:
|
||||||
|
batch_size = prompt_embeds.shape[0]
|
||||||
|
|
||||||
|
if prompt_embeds is None:
|
||||||
|
text_inputs = self.tokenizer(
|
||||||
|
prompt,
|
||||||
|
padding="max_length",
|
||||||
|
max_length=self.tokenizer.model_max_length,
|
||||||
|
truncation=True,
|
||||||
|
return_tensors="pt",
|
||||||
|
)
|
||||||
|
text_input_ids = text_inputs.input_ids
|
||||||
|
attention_mask = text_inputs.attention_mask
|
||||||
|
untruncated_ids = self.tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
|
||||||
|
|
||||||
|
if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
|
||||||
|
text_input_ids, untruncated_ids
|
||||||
|
):
|
||||||
|
removed_text = self.tokenizer.batch_decode(
|
||||||
|
untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1]
|
||||||
|
)
|
||||||
|
logger.warning(
|
||||||
|
"The following part of your input was truncated because CLAP can only handle sequences up to"
|
||||||
|
f" {self.tokenizer.model_max_length} tokens: {removed_text}"
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt_embeds = self.text_encoder(
|
||||||
|
text_input_ids.to(device),
|
||||||
|
attention_mask=attention_mask.to(device),
|
||||||
|
)
|
||||||
|
prompt_embeds = prompt_embeds.text_embeds
|
||||||
|
# additional L_2 normalization over each hidden-state
|
||||||
|
prompt_embeds = F.normalize(prompt_embeds, dim=-1)
|
||||||
|
|
||||||
|
prompt_embeds = prompt_embeds.to(dtype=self.text_encoder.dtype, device=device)
|
||||||
|
|
||||||
|
(
|
||||||
|
bs_embed,
|
||||||
|
seq_len,
|
||||||
|
) = prompt_embeds.shape
|
||||||
|
# duplicate text embeddings for each generation per prompt, using mps friendly method
|
||||||
|
prompt_embeds = prompt_embeds.repeat(1, num_waveforms_per_prompt)
|
||||||
|
prompt_embeds = prompt_embeds.view(bs_embed * num_waveforms_per_prompt, seq_len)
|
||||||
|
|
||||||
|
# get unconditional embeddings for classifier free guidance
|
||||||
|
if do_classifier_free_guidance and negative_prompt_embeds is None:
|
||||||
|
uncond_tokens: List[str]
|
||||||
|
if negative_prompt is None:
|
||||||
|
uncond_tokens = [""] * batch_size
|
||||||
|
elif type(prompt) is not type(negative_prompt):
|
||||||
|
raise TypeError(
|
||||||
|
f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
|
||||||
|
f" {type(prompt)}."
|
||||||
|
)
|
||||||
|
elif isinstance(negative_prompt, str):
|
||||||
|
uncond_tokens = [negative_prompt]
|
||||||
|
elif batch_size != len(negative_prompt):
|
||||||
|
raise ValueError(
|
||||||
|
f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
|
||||||
|
f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
|
||||||
|
" the batch size of `prompt`."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
uncond_tokens = negative_prompt
|
||||||
|
|
||||||
|
max_length = prompt_embeds.shape[1]
|
||||||
|
uncond_input = self.tokenizer(
|
||||||
|
uncond_tokens,
|
||||||
|
padding="max_length",
|
||||||
|
max_length=max_length,
|
||||||
|
truncation=True,
|
||||||
|
return_tensors="pt",
|
||||||
|
)
|
||||||
|
|
||||||
|
uncond_input_ids = uncond_input.input_ids.to(device)
|
||||||
|
attention_mask = uncond_input.attention_mask.to(device)
|
||||||
|
|
||||||
|
negative_prompt_embeds = self.text_encoder(
|
||||||
|
uncond_input_ids,
|
||||||
|
attention_mask=attention_mask,
|
||||||
|
)
|
||||||
|
negative_prompt_embeds = negative_prompt_embeds.text_embeds
|
||||||
|
# additional L_2 normalization over each hidden-state
|
||||||
|
negative_prompt_embeds = F.normalize(negative_prompt_embeds, dim=-1)
|
||||||
|
|
||||||
|
if do_classifier_free_guidance:
|
||||||
|
# duplicate unconditional embeddings for each generation per prompt, using mps friendly method
|
||||||
|
seq_len = negative_prompt_embeds.shape[1]
|
||||||
|
|
||||||
|
negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.text_encoder.dtype, device=device)
|
||||||
|
|
||||||
|
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_waveforms_per_prompt)
|
||||||
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_waveforms_per_prompt, seq_len)
|
||||||
|
|
||||||
|
# For classifier free guidance, we need to do two forward passes.
|
||||||
|
# Here we concatenate the unconditional and text embeddings into a single batch
|
||||||
|
# to avoid doing two forward passes
|
||||||
|
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
|
||||||
|
|
||||||
|
return prompt_embeds
|
||||||
|
|
||||||
|
def decode_latents(self, latents):
|
||||||
|
latents = 1 / self.vae.config.scaling_factor * latents
|
||||||
|
mel_spectrogram = self.vae.decode(latents).sample
|
||||||
|
return mel_spectrogram
|
||||||
|
|
||||||
|
def mel_spectrogram_to_waveform(self, mel_spectrogram):
|
||||||
|
if mel_spectrogram.dim() == 4:
|
||||||
|
mel_spectrogram = mel_spectrogram.squeeze(1)
|
||||||
|
|
||||||
|
waveform = self.vocoder(mel_spectrogram)
|
||||||
|
# we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16
|
||||||
|
waveform = waveform.cpu().float()
|
||||||
|
return waveform
|
||||||
|
|
||||||
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
|
||||||
|
def prepare_extra_step_kwargs(self, generator, eta):
|
||||||
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
||||||
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
||||||
|
# eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
|
||||||
|
# and should be between [0, 1]
|
||||||
|
|
||||||
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
||||||
|
extra_step_kwargs = {}
|
||||||
|
if accepts_eta:
|
||||||
|
extra_step_kwargs["eta"] = eta
|
||||||
|
|
||||||
|
# check if the scheduler accepts generator
|
||||||
|
accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
||||||
|
if accepts_generator:
|
||||||
|
extra_step_kwargs["generator"] = generator
|
||||||
|
return extra_step_kwargs
|
||||||
|
|
||||||
|
def check_inputs(
|
||||||
|
self,
|
||||||
|
prompt,
|
||||||
|
audio_length_in_s,
|
||||||
|
vocoder_upsample_factor,
|
||||||
|
callback_steps,
|
||||||
|
negative_prompt=None,
|
||||||
|
prompt_embeds=None,
|
||||||
|
negative_prompt_embeds=None,
|
||||||
|
):
|
||||||
|
min_audio_length_in_s = vocoder_upsample_factor * self.vae_scale_factor
|
||||||
|
if audio_length_in_s < min_audio_length_in_s:
|
||||||
|
raise ValueError(
|
||||||
|
f"`audio_length_in_s` has to be a positive value greater than or equal to {min_audio_length_in_s}, but "
|
||||||
|
f"is {audio_length_in_s}."
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.vocoder.config.model_in_dim % self.vae_scale_factor != 0:
|
||||||
|
raise ValueError(
|
||||||
|
f"The number of frequency bins in the vocoder's log-mel spectrogram has to be divisible by the "
|
||||||
|
f"VAE scale factor, but got {self.vocoder.config.model_in_dim} bins and a scale factor of "
|
||||||
|
f"{self.vae_scale_factor}."
|
||||||
|
)
|
||||||
|
|
||||||
|
if (callback_steps is None) or (
|
||||||
|
callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0)
|
||||||
|
):
|
||||||
|
raise ValueError(
|
||||||
|
f"`callback_steps` has to be a positive integer but is {callback_steps} of type"
|
||||||
|
f" {type(callback_steps)}."
|
||||||
|
)
|
||||||
|
|
||||||
|
if prompt is not None and prompt_embeds is not None:
|
||||||
|
raise ValueError(
|
||||||
|
f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
|
||||||
|
" only forward one of the two."
|
||||||
|
)
|
||||||
|
elif prompt is None and prompt_embeds is None:
|
||||||
|
raise ValueError(
|
||||||
|
"Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined."
|
||||||
|
)
|
||||||
|
elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
|
||||||
|
raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
|
||||||
|
|
||||||
|
if negative_prompt is not None and negative_prompt_embeds is not None:
|
||||||
|
raise ValueError(
|
||||||
|
f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:"
|
||||||
|
f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
|
||||||
|
)
|
||||||
|
|
||||||
|
if prompt_embeds is not None and negative_prompt_embeds is not None:
|
||||||
|
if prompt_embeds.shape != negative_prompt_embeds.shape:
|
||||||
|
raise ValueError(
|
||||||
|
"`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but"
|
||||||
|
f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`"
|
||||||
|
f" {negative_prompt_embeds.shape}."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents with width->self.vocoder.config.model_in_dim
|
||||||
|
def prepare_latents(self, batch_size, num_channels_latents, height, dtype, device, generator, latents=None):
|
||||||
|
shape = (
|
||||||
|
batch_size,
|
||||||
|
num_channels_latents,
|
||||||
|
int(height) // self.vae_scale_factor,
|
||||||
|
int(self.vocoder.config.model_in_dim) // self.vae_scale_factor,
|
||||||
|
)
|
||||||
|
if isinstance(generator, list) and len(generator) != batch_size:
|
||||||
|
raise ValueError(
|
||||||
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
||||||
|
f" size of {batch_size}. Make sure the batch size matches the length of the generators."
|
||||||
|
)
|
||||||
|
|
||||||
|
if latents is None:
|
||||||
|
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||||
|
else:
|
||||||
|
latents = latents.to(device)
|
||||||
|
|
||||||
|
# scale the initial noise by the standard deviation required by the scheduler
|
||||||
|
latents = latents * self.scheduler.init_noise_sigma
|
||||||
|
return latents
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
@replace_example_docstring(EXAMPLE_DOC_STRING)
|
||||||
|
def __call__(
|
||||||
|
self,
|
||||||
|
prompt: Union[str, List[str]] = None,
|
||||||
|
audio_length_in_s: Optional[float] = None,
|
||||||
|
num_inference_steps: int = 10,
|
||||||
|
guidance_scale: float = 2.5,
|
||||||
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
||||||
|
num_waveforms_per_prompt: Optional[int] = 1,
|
||||||
|
eta: float = 0.0,
|
||||||
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
||||||
|
latents: Optional[torch.Tensor] = None,
|
||||||
|
prompt_embeds: Optional[torch.Tensor] = None,
|
||||||
|
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
||||||
|
return_dict: bool = True,
|
||||||
|
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
||||||
|
callback_steps: Optional[int] = 1,
|
||||||
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
||||||
|
output_type: Optional[str] = "np",
|
||||||
|
):
|
||||||
|
r"""
|
||||||
|
The call function to the pipeline for generation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt (`str` or `List[str]`, *optional*):
|
||||||
|
The prompt or prompts to guide audio generation. If not defined, you need to pass `prompt_embeds`.
|
||||||
|
audio_length_in_s (`int`, *optional*, defaults to 5.12):
|
||||||
|
The length of the generated audio sample in seconds.
|
||||||
|
num_inference_steps (`int`, *optional*, defaults to 10):
|
||||||
|
The number of denoising steps. More denoising steps usually lead to a higher quality audio at the
|
||||||
|
expense of slower inference.
|
||||||
|
guidance_scale (`float`, *optional*, defaults to 2.5):
|
||||||
|
A higher guidance scale value encourages the model to generate audio that is closely linked to the text
|
||||||
|
`prompt` at the expense of lower sound quality. Guidance scale is enabled when `guidance_scale > 1`.
|
||||||
|
negative_prompt (`str` or `List[str]`, *optional*):
|
||||||
|
The prompt or prompts to guide what to not include in audio generation. If not defined, you need to
|
||||||
|
pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
|
||||||
|
num_waveforms_per_prompt (`int`, *optional*, defaults to 1):
|
||||||
|
The number of waveforms to generate per prompt.
|
||||||
|
eta (`float`, *optional*, defaults to 0.0):
|
||||||
|
Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
|
||||||
|
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
||||||
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
||||||
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
||||||
|
generation deterministic.
|
||||||
|
latents (`torch.Tensor`, *optional*):
|
||||||
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
||||||
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
||||||
|
tensor is generated by sampling using the supplied random `generator`.
|
||||||
|
prompt_embeds (`torch.Tensor`, *optional*):
|
||||||
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
||||||
|
provided, text embeddings are generated from the `prompt` input argument.
|
||||||
|
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
||||||
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
||||||
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
||||||
|
return_dict (`bool`, *optional*, defaults to `True`):
|
||||||
|
Whether or not to return a [`~pipelines.AudioPipelineOutput`] instead of a plain tuple.
|
||||||
|
callback (`Callable`, *optional*):
|
||||||
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
||||||
|
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
||||||
|
callback_steps (`int`, *optional*, defaults to 1):
|
||||||
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
||||||
|
every step.
|
||||||
|
cross_attention_kwargs (`dict`, *optional*):
|
||||||
|
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||||
|
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||||
|
output_type (`str`, *optional*, defaults to `"np"`):
|
||||||
|
The output format of the generated image. Choose between `"np"` to return a NumPy `np.ndarray` or
|
||||||
|
`"pt"` to return a PyTorch `torch.Tensor` object.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
[`~pipelines.AudioPipelineOutput`] or `tuple`:
|
||||||
|
If `return_dict` is `True`, [`~pipelines.AudioPipelineOutput`] is returned, otherwise a `tuple` is
|
||||||
|
returned where the first element is a list with the generated audio.
|
||||||
|
"""
|
||||||
|
# 0. Convert audio input length from seconds to spectrogram height
|
||||||
|
vocoder_upsample_factor = np.prod(self.vocoder.config.upsample_rates) / self.vocoder.config.sampling_rate
|
||||||
|
|
||||||
|
if audio_length_in_s is None:
|
||||||
|
audio_length_in_s = self.unet.config.sample_size * self.vae_scale_factor * vocoder_upsample_factor
|
||||||
|
|
||||||
|
height = int(audio_length_in_s / vocoder_upsample_factor)
|
||||||
|
|
||||||
|
original_waveform_length = int(audio_length_in_s * self.vocoder.config.sampling_rate)
|
||||||
|
if height % self.vae_scale_factor != 0:
|
||||||
|
height = int(np.ceil(height / self.vae_scale_factor)) * self.vae_scale_factor
|
||||||
|
logger.info(
|
||||||
|
f"Audio length in seconds {audio_length_in_s} is increased to {height * vocoder_upsample_factor} "
|
||||||
|
f"so that it can be handled by the model. It will be cut to {audio_length_in_s} after the "
|
||||||
|
f"denoising process."
|
||||||
|
)
|
||||||
|
|
||||||
|
# 1. Check inputs. Raise error if not correct
|
||||||
|
self.check_inputs(
|
||||||
|
prompt,
|
||||||
|
audio_length_in_s,
|
||||||
|
vocoder_upsample_factor,
|
||||||
|
callback_steps,
|
||||||
|
negative_prompt,
|
||||||
|
prompt_embeds,
|
||||||
|
negative_prompt_embeds,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Define call parameters
|
||||||
|
if prompt is not None and isinstance(prompt, str):
|
||||||
|
batch_size = 1
|
||||||
|
elif prompt is not None and isinstance(prompt, list):
|
||||||
|
batch_size = len(prompt)
|
||||||
|
else:
|
||||||
|
batch_size = prompt_embeds.shape[0]
|
||||||
|
|
||||||
|
device = self._execution_device
|
||||||
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
||||||
|
# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
|
||||||
|
# corresponds to doing no classifier free guidance.
|
||||||
|
do_classifier_free_guidance = guidance_scale > 1.0
|
||||||
|
|
||||||
|
# 3. Encode input prompt
|
||||||
|
prompt_embeds = self._encode_prompt(
|
||||||
|
prompt,
|
||||||
|
device,
|
||||||
|
num_waveforms_per_prompt,
|
||||||
|
do_classifier_free_guidance,
|
||||||
|
negative_prompt,
|
||||||
|
prompt_embeds=prompt_embeds,
|
||||||
|
negative_prompt_embeds=negative_prompt_embeds,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 4. Prepare timesteps
|
||||||
|
self.scheduler.set_timesteps(num_inference_steps, device=device)
|
||||||
|
timesteps = self.scheduler.timesteps
|
||||||
|
|
||||||
|
# 5. Prepare latent variables
|
||||||
|
num_channels_latents = self.unet.config.in_channels
|
||||||
|
latents = self.prepare_latents(
|
||||||
|
batch_size * num_waveforms_per_prompt,
|
||||||
|
num_channels_latents,
|
||||||
|
height,
|
||||||
|
prompt_embeds.dtype,
|
||||||
|
device,
|
||||||
|
generator,
|
||||||
|
latents,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 6. Prepare extra step kwargs
|
||||||
|
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
|
||||||
|
|
||||||
|
# 7. Denoising loop
|
||||||
|
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
|
||||||
|
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
||||||
|
for i, t in enumerate(timesteps):
|
||||||
|
# expand the latents if we are doing classifier free guidance
|
||||||
|
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
|
||||||
|
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
||||||
|
|
||||||
|
# predict the noise residual
|
||||||
|
noise_pred = self.unet(
|
||||||
|
latent_model_input,
|
||||||
|
t,
|
||||||
|
encoder_hidden_states=None,
|
||||||
|
class_labels=prompt_embeds,
|
||||||
|
cross_attention_kwargs=cross_attention_kwargs,
|
||||||
|
).sample
|
||||||
|
|
||||||
|
# perform guidance
|
||||||
|
if do_classifier_free_guidance:
|
||||||
|
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
||||||
|
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
||||||
|
|
||||||
|
# compute the previous noisy sample x_t -> x_t-1
|
||||||
|
latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample
|
||||||
|
|
||||||
|
# call the callback, if provided
|
||||||
|
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
||||||
|
progress_bar.update()
|
||||||
|
if callback is not None and i % callback_steps == 0:
|
||||||
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
||||||
|
callback(step_idx, t, latents)
|
||||||
|
|
||||||
|
if XLA_AVAILABLE:
|
||||||
|
xm.mark_step()
|
||||||
|
|
||||||
|
# 8. Post-processing
|
||||||
|
mel_spectrogram = self.decode_latents(latents)
|
||||||
|
|
||||||
|
audio = self.mel_spectrogram_to_waveform(mel_spectrogram)
|
||||||
|
|
||||||
|
audio = audio[:, :original_waveform_length]
|
||||||
|
|
||||||
|
if output_type == "np":
|
||||||
|
audio = audio.numpy()
|
||||||
|
|
||||||
|
if not return_dict:
|
||||||
|
return (audio,)
|
||||||
|
|
||||||
|
return AudioPipelineOutput(audios=audio)
|
||||||
@@ -5,7 +5,7 @@ import numpy as np
|
|||||||
import PIL
|
import PIL
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from ...utils import OptionalDependencyNotAvailable, is_torch_available, is_transformers_available
|
from ....utils import OptionalDependencyNotAvailable, is_torch_available, is_transformers_available
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -17,15 +17,15 @@ import PIL.Image
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPTokenizer
|
from transformers import CLIPTokenizer
|
||||||
|
|
||||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
||||||
from ...schedulers import PNDMScheduler
|
from ....schedulers import PNDMScheduler
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
is_torch_xla_available,
|
is_torch_xla_available,
|
||||||
logging,
|
logging,
|
||||||
replace_example_docstring,
|
replace_example_docstring,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||||
from .blip_image_processing import BlipImageProcessor
|
from .blip_image_processing import BlipImageProcessor
|
||||||
from .modeling_blip2 import Blip2QFormerModel
|
from .modeling_blip2 import Blip2QFormerModel
|
||||||
from .modeling_ctx_clip import ContextCLIPTextModel
|
from .modeling_ctx_clip import ContextCLIPTextModel
|
||||||
46
src/diffusers/pipelines/deprecated/controlnet/__init__.py
Normal file
46
src/diffusers/pipelines/deprecated/controlnet/__init__.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from ....utils import (
|
||||||
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
|
OptionalDependencyNotAvailable,
|
||||||
|
_LazyModule,
|
||||||
|
get_objects_from_module,
|
||||||
|
is_torch_available,
|
||||||
|
is_transformers_available,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_dummy_objects = {}
|
||||||
|
_import_structure = {}
|
||||||
|
|
||||||
|
try:
|
||||||
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
|
raise OptionalDependencyNotAvailable()
|
||||||
|
except OptionalDependencyNotAvailable:
|
||||||
|
from ....utils import dummy_torch_and_transformers_objects # noqa F403
|
||||||
|
|
||||||
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
|
else:
|
||||||
|
_import_structure["pipeline_controlnet_blip_diffusion"] = ["BlipDiffusionControlNetPipeline"]
|
||||||
|
|
||||||
|
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||||
|
try:
|
||||||
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
|
raise OptionalDependencyNotAvailable()
|
||||||
|
|
||||||
|
except OptionalDependencyNotAvailable:
|
||||||
|
from ...utils.dummy_torch_and_transformers_objects import *
|
||||||
|
else:
|
||||||
|
from .pipeline_controlnet_blip_diffusion import BlipDiffusionControlNetPipeline
|
||||||
|
|
||||||
|
else:
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.modules[__name__] = _LazyModule(
|
||||||
|
__name__,
|
||||||
|
globals()["__file__"],
|
||||||
|
_import_structure,
|
||||||
|
module_spec=__spec__,
|
||||||
|
)
|
||||||
|
for name, value in _dummy_objects.items():
|
||||||
|
setattr(sys.modules[__name__], name, value)
|
||||||
@@ -18,18 +18,18 @@ import PIL.Image
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPTokenizer
|
from transformers import CLIPTokenizer
|
||||||
|
|
||||||
from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
|
from ....models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
|
||||||
from ...schedulers import PNDMScheduler
|
from ....schedulers import PNDMScheduler
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
is_torch_xla_available,
|
is_torch_xla_available,
|
||||||
logging,
|
logging,
|
||||||
replace_example_docstring,
|
replace_example_docstring,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
|
from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||||
from ..blip_diffusion.blip_image_processing import BlipImageProcessor
|
from ..blip_diffusion.blip_image_processing import BlipImageProcessor
|
||||||
from ..blip_diffusion.modeling_blip2 import Blip2QFormerModel
|
from ..blip_diffusion.modeling_blip2 import Blip2QFormerModel
|
||||||
from ..blip_diffusion.modeling_ctx_clip import ContextCLIPTextModel
|
from ..blip_diffusion.modeling_ctx_clip import ContextCLIPTextModel
|
||||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -18,7 +18,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
from ....utils import dummy_torch_and_transformers_objects # noqa F403
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -28,7 +28,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_flax_available()):
|
if not (is_transformers_available() and is_flax_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_flax_and_transformers_objects # noqa F403
|
from ....utils import dummy_flax_and_transformers_objects # noqa F403
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_flax_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_flax_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -41,7 +41,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
|
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import *
|
from ....utils.dummy_torch_and_transformers_objects import *
|
||||||
else:
|
else:
|
||||||
from .pipeline_controlnet_xs import StableDiffusionControlNetXSPipeline
|
from .pipeline_controlnet_xs import StableDiffusionControlNetXSPipeline
|
||||||
from .pipeline_controlnet_xs_sd_xl import StableDiffusionXLControlNetXSPipeline
|
from .pipeline_controlnet_xs_sd_xl import StableDiffusionXLControlNetXSPipeline
|
||||||
@@ -50,7 +50,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
if not (is_transformers_available() and is_flax_available()):
|
if not (is_transformers_available() and is_flax_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_flax_and_transformers_objects import * # noqa F403
|
from ....utils.dummy_flax_and_transformers_objects import * # noqa F403
|
||||||
else:
|
else:
|
||||||
pass # from .pipeline_flax_controlnet import FlaxStableDiffusionControlNetPipeline
|
pass # from .pipeline_flax_controlnet import FlaxStableDiffusionControlNetPipeline
|
||||||
|
|
||||||
@@ -21,13 +21,13 @@ import torch
|
|||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
||||||
|
|
||||||
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
from ....callbacks import MultiPipelineCallbacks, PipelineCallback
|
||||||
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
from ....image_processor import PipelineImageInput, VaeImageProcessor
|
||||||
from ...loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
from ....loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
||||||
from ...models import AutoencoderKL, ControlNetXSAdapter, UNet2DConditionModel, UNetControlNetXSModel
|
from ....models import AutoencoderKL, ControlNetXSAdapter, UNet2DConditionModel, UNetControlNetXSModel
|
||||||
from ...models.lora import adjust_lora_scale_text_encoder
|
from ....models.lora import adjust_lora_scale_text_encoder
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
from ....schedulers import KarrasDiffusionSchedulers
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
USE_PEFT_BACKEND,
|
USE_PEFT_BACKEND,
|
||||||
deprecate,
|
deprecate,
|
||||||
is_torch_xla_available,
|
is_torch_xla_available,
|
||||||
@@ -36,10 +36,10 @@ from ...utils import (
|
|||||||
scale_lora_layers,
|
scale_lora_layers,
|
||||||
unscale_lora_layers,
|
unscale_lora_layers,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
|
from ....utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
|
from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
|
||||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -28,33 +28,33 @@ from transformers import (
|
|||||||
|
|
||||||
from diffusers.utils.import_utils import is_invisible_watermark_available
|
from diffusers.utils.import_utils import is_invisible_watermark_available
|
||||||
|
|
||||||
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
from ....callbacks import MultiPipelineCallbacks, PipelineCallback
|
||||||
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
from ....image_processor import PipelineImageInput, VaeImageProcessor
|
||||||
from ...loaders import FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin
|
from ....loaders import FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin
|
||||||
from ...models import AutoencoderKL, ControlNetXSAdapter, UNet2DConditionModel, UNetControlNetXSModel
|
from ....models import AutoencoderKL, ControlNetXSAdapter, UNet2DConditionModel, UNetControlNetXSModel
|
||||||
from ...models.attention_processor import (
|
from ....models.attention_processor import (
|
||||||
AttnProcessor2_0,
|
AttnProcessor2_0,
|
||||||
XFormersAttnProcessor,
|
XFormersAttnProcessor,
|
||||||
)
|
)
|
||||||
from ...models.lora import adjust_lora_scale_text_encoder
|
from ....models.lora import adjust_lora_scale_text_encoder
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
from ....schedulers import KarrasDiffusionSchedulers
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
USE_PEFT_BACKEND,
|
USE_PEFT_BACKEND,
|
||||||
logging,
|
logging,
|
||||||
replace_example_docstring,
|
replace_example_docstring,
|
||||||
scale_lora_layers,
|
scale_lora_layers,
|
||||||
unscale_lora_layers,
|
unscale_lora_layers,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
|
from ....utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline
|
from ...pipeline_utils import DiffusionPipeline
|
||||||
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
|
from ...stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
|
||||||
|
|
||||||
|
|
||||||
if is_invisible_watermark_available():
|
if is_invisible_watermark_available():
|
||||||
from ..stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
|
from ...stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
|
||||||
|
|
||||||
|
|
||||||
from ...utils import is_torch_xla_available
|
from ....utils import is_torch_xla_available
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import DIFFUSERS_SLOW_IMPORT, _LazyModule
|
from ....utils import DIFFUSERS_SLOW_IMPORT, _LazyModule
|
||||||
|
|
||||||
|
|
||||||
_import_structure = {"pipeline_dance_diffusion": ["DanceDiffusionPipeline"]}
|
_import_structure = {"pipeline_dance_diffusion": ["DanceDiffusionPipeline"]}
|
||||||
@@ -17,11 +17,11 @@ from typing import List, Optional, Tuple, Union
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from ...models import UNet1DModel
|
from ....models import UNet1DModel
|
||||||
from ...schedulers import SchedulerMixin
|
from ....schedulers import SchedulerMixin
|
||||||
from ...utils import is_torch_xla_available, logging
|
from ....utils import is_torch_xla_available, logging
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline
|
from ...pipeline_utils import AudioPipelineOutput, DiffusionPipeline
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -17,7 +17,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
from ....utils import dummy_torch_and_transformers_objects # noqa F403
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -29,7 +29,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
|
from ....utils.dummy_torch_and_transformers_objects import * # noqa F403
|
||||||
else:
|
else:
|
||||||
from .pipeline_i2vgen_xl import I2VGenXLPipeline
|
from .pipeline_i2vgen_xl import I2VGenXLPipeline
|
||||||
|
|
||||||
@@ -21,19 +21,19 @@ import PIL
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
||||||
|
|
||||||
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
from ....image_processor import PipelineImageInput, VaeImageProcessor
|
||||||
from ...models import AutoencoderKL
|
from ....models import AutoencoderKL
|
||||||
from ...models.unets.unet_i2vgen_xl import I2VGenXLUNet
|
from ....models.unets.unet_i2vgen_xl import I2VGenXLUNet
|
||||||
from ...schedulers import DDIMScheduler
|
from ....schedulers import DDIMScheduler
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
BaseOutput,
|
BaseOutput,
|
||||||
is_torch_xla_available,
|
is_torch_xla_available,
|
||||||
logging,
|
logging,
|
||||||
replace_example_docstring,
|
replace_example_docstring,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ...video_processor import VideoProcessor
|
from ....video_processor import VideoProcessor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -479,7 +479,7 @@ class I2VGenXLPipeline(
|
|||||||
|
|
||||||
return image_latents
|
return image_latents
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||||
def prepare_latents(
|
def prepare_latents(
|
||||||
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
||||||
):
|
):
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -18,7 +18,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
from ....utils import dummy_torch_and_transformers_objects # noqa F403
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -30,7 +30,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
|
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import *
|
from ....utils.dummy_torch_and_transformers_objects import *
|
||||||
else:
|
else:
|
||||||
from .pipeline_latte import LattePipeline
|
from .pipeline_latte import LattePipeline
|
||||||
|
|
||||||
@@ -23,11 +23,10 @@ from typing import Callable, Dict, List, Optional, Tuple, Union
|
|||||||
import torch
|
import torch
|
||||||
from transformers import T5EncoderModel, T5Tokenizer
|
from transformers import T5EncoderModel, T5Tokenizer
|
||||||
|
|
||||||
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
from ....callbacks import MultiPipelineCallbacks, PipelineCallback
|
||||||
from ...models import AutoencoderKL, LatteTransformer3DModel
|
from ....models import AutoencoderKL, LatteTransformer3DModel
|
||||||
from ...pipelines.pipeline_utils import DiffusionPipeline
|
from ....schedulers import KarrasDiffusionSchedulers
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
from ....utils import (
|
||||||
from ...utils import (
|
|
||||||
BACKENDS_MAPPING,
|
BACKENDS_MAPPING,
|
||||||
BaseOutput,
|
BaseOutput,
|
||||||
deprecate,
|
deprecate,
|
||||||
@@ -37,8 +36,9 @@ from ...utils import (
|
|||||||
logging,
|
logging,
|
||||||
replace_example_docstring,
|
replace_example_docstring,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import is_compiled_module, randn_tensor
|
from ....utils.torch_utils import is_compiled_module, randn_tensor
|
||||||
from ...video_processor import VideoProcessor
|
from ....video_processor import VideoProcessor
|
||||||
|
from ...pipeline_utils import DiffusionPipeline
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -561,7 +561,7 @@ class LattePipeline(DiffusionPipeline):
|
|||||||
|
|
||||||
return caption.strip()
|
return caption.strip()
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||||
def prepare_latents(
|
def prepare_latents(
|
||||||
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
||||||
):
|
):
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -18,7 +18,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.27.0")):
|
if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.27.0")):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
from ....utils import dummy_torch_and_transformers_objects # noqa F403
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -31,7 +31,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
|
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import *
|
from ....utils.dummy_torch_and_transformers_objects import *
|
||||||
else:
|
else:
|
||||||
from .pipeline_musicldm import MusicLDMPipeline
|
from .pipeline_musicldm import MusicLDMPipeline
|
||||||
|
|
||||||
@@ -26,24 +26,24 @@ from transformers import (
|
|||||||
SpeechT5HifiGan,
|
SpeechT5HifiGan,
|
||||||
)
|
)
|
||||||
|
|
||||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
from ....schedulers import KarrasDiffusionSchedulers
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
is_accelerate_available,
|
is_accelerate_available,
|
||||||
is_accelerate_version,
|
is_accelerate_version,
|
||||||
is_librosa_available,
|
is_librosa_available,
|
||||||
logging,
|
logging,
|
||||||
replace_example_docstring,
|
replace_example_docstring,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import AudioPipelineOutput, DiffusionPipeline, StableDiffusionMixin
|
||||||
|
|
||||||
|
|
||||||
if is_librosa_available():
|
if is_librosa_available():
|
||||||
import librosa
|
import librosa
|
||||||
|
|
||||||
|
|
||||||
from ...utils import is_torch_xla_available
|
from ....utils import is_torch_xla_available
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -5,7 +5,7 @@ import numpy as np
|
|||||||
import PIL
|
import PIL
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -22,7 +22,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
from ....utils import dummy_torch_and_transformers_objects # noqa F403
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -36,7 +36,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
|
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import *
|
from ....utils.dummy_torch_and_transformers_objects import *
|
||||||
else:
|
else:
|
||||||
from .image_encoder import PaintByExampleImageEncoder
|
from .image_encoder import PaintByExampleImageEncoder
|
||||||
from .pipeline_paint_by_example import PaintByExamplePipeline
|
from .pipeline_paint_by_example import PaintByExamplePipeline
|
||||||
@@ -15,8 +15,8 @@ import torch
|
|||||||
from torch import nn
|
from torch import nn
|
||||||
from transformers import CLIPPreTrainedModel, CLIPVisionModel
|
from transformers import CLIPPreTrainedModel, CLIPVisionModel
|
||||||
|
|
||||||
from ...models.attention import BasicTransformerBlock
|
from ....models.attention import BasicTransformerBlock
|
||||||
from ...utils import logging
|
from ....utils import logging
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||||
@@ -20,14 +20,14 @@ import PIL.Image
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPImageProcessor
|
from transformers import CLIPImageProcessor
|
||||||
|
|
||||||
from ...image_processor import VaeImageProcessor
|
from ....image_processor import VaeImageProcessor
|
||||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
||||||
from ...schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
|
from ....schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
|
||||||
from ...utils import deprecate, is_torch_xla_available, logging
|
from ....utils import deprecate, is_torch_xla_available, logging
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
from ..stable_diffusion import StableDiffusionPipelineOutput
|
from ...stable_diffusion import StableDiffusionPipelineOutput
|
||||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||||
from .image_encoder import PaintByExampleImageEncoder
|
from .image_encoder import PaintByExampleImageEncoder
|
||||||
|
|
||||||
|
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -17,7 +17,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_objects
|
from ....utils import dummy_torch_and_transformers_objects
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -28,7 +28,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import *
|
from ....utils.dummy_torch_and_transformers_objects import *
|
||||||
|
|
||||||
else:
|
else:
|
||||||
from .pipeline_pia import PIAPipeline, PIAPipelineOutput
|
from .pipeline_pia import PIAPipeline, PIAPipelineOutput
|
||||||
@@ -21,12 +21,17 @@ import PIL
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
||||||
|
|
||||||
from ...image_processor import PipelineImageInput
|
from ....image_processor import PipelineImageInput
|
||||||
from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
from ....loaders import (
|
||||||
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
|
FromSingleFileMixin,
|
||||||
from ...models.lora import adjust_lora_scale_text_encoder
|
IPAdapterMixin,
|
||||||
from ...models.unets.unet_motion_model import MotionAdapter
|
StableDiffusionLoraLoaderMixin,
|
||||||
from ...schedulers import (
|
TextualInversionLoaderMixin,
|
||||||
|
)
|
||||||
|
from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
|
||||||
|
from ....models.lora import adjust_lora_scale_text_encoder
|
||||||
|
from ....models.unets.unet_motion_model import MotionAdapter
|
||||||
|
from ....schedulers import (
|
||||||
DDIMScheduler,
|
DDIMScheduler,
|
||||||
DPMSolverMultistepScheduler,
|
DPMSolverMultistepScheduler,
|
||||||
EulerAncestralDiscreteScheduler,
|
EulerAncestralDiscreteScheduler,
|
||||||
@@ -34,7 +39,7 @@ from ...schedulers import (
|
|||||||
LMSDiscreteScheduler,
|
LMSDiscreteScheduler,
|
||||||
PNDMScheduler,
|
PNDMScheduler,
|
||||||
)
|
)
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
USE_PEFT_BACKEND,
|
USE_PEFT_BACKEND,
|
||||||
BaseOutput,
|
BaseOutput,
|
||||||
is_torch_xla_available,
|
is_torch_xla_available,
|
||||||
@@ -43,10 +48,10 @@ from ...utils import (
|
|||||||
scale_lora_layers,
|
scale_lora_layers,
|
||||||
unscale_lora_layers,
|
unscale_lora_layers,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ...video_processor import VideoProcessor
|
from ....video_processor import VideoProcessor
|
||||||
from ..free_init_utils import FreeInitMixin
|
from ...free_init_utils import FreeInitMixin
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -191,6 +196,8 @@ class PIAPipeline(
|
|||||||
image_encoder: CLIPVisionModelWithProjection = None,
|
image_encoder: CLIPVisionModelWithProjection = None,
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
|
logger.warning(f"{self.__class__.__name__} is deprecated and will no longer be actively maintained")
|
||||||
if isinstance(unet, UNet2DConditionModel):
|
if isinstance(unet, UNet2DConditionModel):
|
||||||
unet = UNetMotionModel.from_unet2d(unet, motion_adapter)
|
unet = UNetMotionModel.from_unet2d(unet, motion_adapter)
|
||||||
|
|
||||||
@@ -415,7 +422,7 @@ class PIAPipeline(
|
|||||||
|
|
||||||
return image_embeds, uncond_image_embeds
|
return image_embeds, uncond_image_embeds
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis/pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||||
def decode_latents(self, latents):
|
def decode_latents(self, latents):
|
||||||
latents = 1 / self.vae.config.scaling_factor * latents
|
latents = 1 / self.vae.config.scaling_factor * latents
|
||||||
|
|
||||||
@@ -555,7 +562,7 @@ class PIAPipeline(
|
|||||||
|
|
||||||
return ip_adapter_image_embeds
|
return ip_adapter_image_embeds
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||||
def prepare_latents(
|
def prepare_latents(
|
||||||
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
||||||
):
|
):
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -17,7 +17,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
from ....utils import dummy_torch_and_transformers_objects # noqa F403
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -31,7 +31,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
|
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import *
|
from ....utils.dummy_torch_and_transformers_objects import *
|
||||||
else:
|
else:
|
||||||
from .pipeline_semantic_stable_diffusion import SemanticStableDiffusionPipeline
|
from .pipeline_semantic_stable_diffusion import SemanticStableDiffusionPipeline
|
||||||
|
|
||||||
@@ -4,7 +4,7 @@ from typing import List, Optional, Union
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import PIL.Image
|
import PIL.Image
|
||||||
|
|
||||||
from ...utils import BaseOutput
|
from ....utils import BaseOutput
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -5,13 +5,13 @@ from typing import Callable, List, Optional, Union
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
||||||
|
|
||||||
from ...image_processor import VaeImageProcessor
|
from ....image_processor import VaeImageProcessor
|
||||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
||||||
from ...pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
from ....schedulers import KarrasDiffusionSchedulers
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
from ....utils import deprecate, is_torch_xla_available, logging
|
||||||
from ...utils import deprecate, is_torch_xla_available, logging
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||||
from .pipeline_output import SemanticStableDiffusionPipelineOutput
|
from .pipeline_output import SemanticStableDiffusionPipelineOutput
|
||||||
|
|
||||||
|
|
||||||
@@ -143,7 +143,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|||||||
extra_step_kwargs["generator"] = generator
|
extra_step_kwargs["generator"] = generator
|
||||||
return extra_step_kwargs
|
return extra_step_kwargs
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
# Copied from diffusers.pipelines.deprecated.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
||||||
def check_inputs(
|
def check_inputs(
|
||||||
self,
|
self,
|
||||||
prompt,
|
prompt,
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -17,7 +17,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
from ....utils import dummy_torch_and_transformers_objects # noqa F403
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -41,7 +41,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
|
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import *
|
from ....utils.dummy_torch_and_transformers_objects import *
|
||||||
else:
|
else:
|
||||||
from .camera import create_pan_cameras
|
from .camera import create_pan_cameras
|
||||||
from .pipeline_shap_e import ShapEPipeline
|
from .pipeline_shap_e import ShapEPipeline
|
||||||
@@ -21,16 +21,16 @@ import PIL.Image
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPTextModelWithProjection, CLIPTokenizer
|
from transformers import CLIPTextModelWithProjection, CLIPTokenizer
|
||||||
|
|
||||||
from ...models import PriorTransformer
|
from ....models import PriorTransformer
|
||||||
from ...schedulers import HeunDiscreteScheduler
|
from ....schedulers import HeunDiscreteScheduler
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
BaseOutput,
|
BaseOutput,
|
||||||
is_torch_xla_available,
|
is_torch_xla_available,
|
||||||
logging,
|
logging,
|
||||||
replace_example_docstring,
|
replace_example_docstring,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline
|
from ...pipeline_utils import DiffusionPipeline
|
||||||
from .renderer import ShapERenderer
|
from .renderer import ShapERenderer
|
||||||
|
|
||||||
|
|
||||||
@@ -127,7 +127,7 @@ class ShapEPipeline(DiffusionPipeline):
|
|||||||
shap_e_renderer=shap_e_renderer,
|
shap_e_renderer=shap_e_renderer,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||||
if latents is None:
|
if latents is None:
|
||||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||||
@@ -20,16 +20,16 @@ import PIL.Image
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPImageProcessor, CLIPVisionModel
|
from transformers import CLIPImageProcessor, CLIPVisionModel
|
||||||
|
|
||||||
from ...models import PriorTransformer
|
from ....models import PriorTransformer
|
||||||
from ...schedulers import HeunDiscreteScheduler
|
from ....schedulers import HeunDiscreteScheduler
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
BaseOutput,
|
BaseOutput,
|
||||||
is_torch_xla_available,
|
is_torch_xla_available,
|
||||||
logging,
|
logging,
|
||||||
replace_example_docstring,
|
replace_example_docstring,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline
|
from ...pipeline_utils import DiffusionPipeline
|
||||||
from .renderer import ShapERenderer
|
from .renderer import ShapERenderer
|
||||||
|
|
||||||
|
|
||||||
@@ -128,7 +128,7 @@ class ShapEImg2ImgPipeline(DiffusionPipeline):
|
|||||||
shap_e_renderer=shap_e_renderer,
|
shap_e_renderer=shap_e_renderer,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||||
if latents is None:
|
if latents is None:
|
||||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||||
@@ -21,9 +21,9 @@ import torch
|
|||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
from ...configuration_utils import ConfigMixin, register_to_config
|
from ....configuration_utils import ConfigMixin, register_to_config
|
||||||
from ...models import ModelMixin
|
from ....models import ModelMixin
|
||||||
from ...utils import BaseOutput
|
from ....utils import BaseOutput
|
||||||
from .camera import create_pan_cameras
|
from .camera import create_pan_cameras
|
||||||
|
|
||||||
|
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -18,7 +18,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
from ....utils import dummy_torch_and_transformers_objects # noqa F403
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -30,7 +30,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
|
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import *
|
from ....utils.dummy_torch_and_transformers_objects import *
|
||||||
else:
|
else:
|
||||||
from .pipeline_stable_diffusion_attend_and_excite import StableDiffusionAttendAndExcitePipeline
|
from .pipeline_stable_diffusion_attend_and_excite import StableDiffusionAttendAndExcitePipeline
|
||||||
|
|
||||||
@@ -21,13 +21,13 @@ import torch
|
|||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
||||||
|
|
||||||
from ...image_processor import VaeImageProcessor
|
from ....image_processor import VaeImageProcessor
|
||||||
from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
from ....loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
||||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
||||||
from ...models.attention_processor import Attention
|
from ....models.attention_processor import Attention
|
||||||
from ...models.lora import adjust_lora_scale_text_encoder
|
from ....models.lora import adjust_lora_scale_text_encoder
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
from ....schedulers import KarrasDiffusionSchedulers
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
USE_PEFT_BACKEND,
|
USE_PEFT_BACKEND,
|
||||||
deprecate,
|
deprecate,
|
||||||
is_torch_xla_available,
|
is_torch_xla_available,
|
||||||
@@ -36,10 +36,10 @@ from ...utils import (
|
|||||||
scale_lora_layers,
|
scale_lora_layers,
|
||||||
unscale_lora_layers,
|
unscale_lora_layers,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
from ..stable_diffusion import StableDiffusionPipelineOutput
|
from ...stable_diffusion import StableDiffusionPipelineOutput
|
||||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -18,7 +18,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
from ....utils import dummy_torch_and_transformers_objects # noqa F403
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -30,7 +30,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
|
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import *
|
from ....utils.dummy_torch_and_transformers_objects import *
|
||||||
else:
|
else:
|
||||||
from .pipeline_stable_diffusion_diffedit import StableDiffusionDiffEditPipeline
|
from .pipeline_stable_diffusion_diffedit import StableDiffusionDiffEditPipeline
|
||||||
|
|
||||||
@@ -22,13 +22,13 @@ import torch
|
|||||||
from packaging import version
|
from packaging import version
|
||||||
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
||||||
|
|
||||||
from ...configuration_utils import FrozenDict
|
from ....configuration_utils import FrozenDict
|
||||||
from ...image_processor import VaeImageProcessor
|
from ....image_processor import VaeImageProcessor
|
||||||
from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
from ....loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
||||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
||||||
from ...models.lora import adjust_lora_scale_text_encoder
|
from ....models.lora import adjust_lora_scale_text_encoder
|
||||||
from ...schedulers import DDIMInverseScheduler, KarrasDiffusionSchedulers
|
from ....schedulers import DDIMInverseScheduler, KarrasDiffusionSchedulers
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
PIL_INTERPOLATION,
|
PIL_INTERPOLATION,
|
||||||
USE_PEFT_BACKEND,
|
USE_PEFT_BACKEND,
|
||||||
BaseOutput,
|
BaseOutput,
|
||||||
@@ -39,10 +39,10 @@ from ...utils import (
|
|||||||
scale_lora_layers,
|
scale_lora_layers,
|
||||||
unscale_lora_layers,
|
unscale_lora_layers,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
from ..stable_diffusion import StableDiffusionPipelineOutput
|
from ...stable_diffusion import StableDiffusionPipelineOutput
|
||||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -18,7 +18,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
from ....utils import dummy_torch_and_transformers_objects # noqa F403
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -31,7 +31,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
|
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import *
|
from ....utils.dummy_torch_and_transformers_objects import *
|
||||||
else:
|
else:
|
||||||
from .pipeline_stable_diffusion_gligen import StableDiffusionGLIGENPipeline
|
from .pipeline_stable_diffusion_gligen import StableDiffusionGLIGENPipeline
|
||||||
from .pipeline_stable_diffusion_gligen_text_image import StableDiffusionGLIGENTextImagePipeline
|
from .pipeline_stable_diffusion_gligen_text_image import StableDiffusionGLIGENTextImagePipeline
|
||||||
@@ -20,13 +20,13 @@ import PIL.Image
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
||||||
|
|
||||||
from ...image_processor import VaeImageProcessor
|
from ....image_processor import VaeImageProcessor
|
||||||
from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
from ....loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
||||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
||||||
from ...models.attention import GatedSelfAttentionDense
|
from ....models.attention import GatedSelfAttentionDense
|
||||||
from ...models.lora import adjust_lora_scale_text_encoder
|
from ....models.lora import adjust_lora_scale_text_encoder
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
from ....schedulers import KarrasDiffusionSchedulers
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
USE_PEFT_BACKEND,
|
USE_PEFT_BACKEND,
|
||||||
deprecate,
|
deprecate,
|
||||||
is_torch_xla_available,
|
is_torch_xla_available,
|
||||||
@@ -35,10 +35,10 @@ from ...utils import (
|
|||||||
scale_lora_layers,
|
scale_lora_layers,
|
||||||
unscale_lora_layers,
|
unscale_lora_layers,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
from ..stable_diffusion import StableDiffusionPipelineOutput
|
from ...stable_diffusion import StableDiffusionPipelineOutput
|
||||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -26,13 +26,13 @@ from transformers import (
|
|||||||
CLIPVisionModelWithProjection,
|
CLIPVisionModelWithProjection,
|
||||||
)
|
)
|
||||||
|
|
||||||
from ...image_processor import VaeImageProcessor
|
from ....image_processor import VaeImageProcessor
|
||||||
from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
from ....loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
||||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
||||||
from ...models.attention import GatedSelfAttentionDense
|
from ....models.attention import GatedSelfAttentionDense
|
||||||
from ...models.lora import adjust_lora_scale_text_encoder
|
from ....models.lora import adjust_lora_scale_text_encoder
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
from ....schedulers import KarrasDiffusionSchedulers
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
USE_PEFT_BACKEND,
|
USE_PEFT_BACKEND,
|
||||||
is_torch_xla_available,
|
is_torch_xla_available,
|
||||||
logging,
|
logging,
|
||||||
@@ -40,11 +40,11 @@ from ...utils import (
|
|||||||
scale_lora_layers,
|
scale_lora_layers,
|
||||||
unscale_lora_layers,
|
unscale_lora_layers,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
from ..stable_diffusion import StableDiffusionPipelineOutput
|
from ...stable_diffusion import StableDiffusionPipelineOutput
|
||||||
from ..stable_diffusion.clip_image_project_model import CLIPImageProjection
|
from ...stable_diffusion.clip_image_project_model import CLIPImageProjection
|
||||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -25,7 +25,7 @@ try:
|
|||||||
):
|
):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_and_k_diffusion_objects # noqa F403
|
from ....utils import dummy_torch_and_transformers_and_k_diffusion_objects # noqa F403
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_k_diffusion_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_k_diffusion_objects))
|
||||||
else:
|
else:
|
||||||
@@ -43,7 +43,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
|
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_and_k_diffusion_objects import *
|
from ....utils.dummy_torch_and_transformers_and_k_diffusion_objects import *
|
||||||
else:
|
else:
|
||||||
from .pipeline_stable_diffusion_k_diffusion import StableDiffusionKDiffusionPipeline
|
from .pipeline_stable_diffusion_k_diffusion import StableDiffusionKDiffusionPipeline
|
||||||
from .pipeline_stable_diffusion_xl_k_diffusion import StableDiffusionXLKDiffusionPipeline
|
from .pipeline_stable_diffusion_xl_k_diffusion import StableDiffusionXLKDiffusionPipeline
|
||||||
@@ -26,24 +26,24 @@ from transformers import (
|
|||||||
CLIPTokenizerFast,
|
CLIPTokenizerFast,
|
||||||
)
|
)
|
||||||
|
|
||||||
from ...image_processor import VaeImageProcessor
|
from ....image_processor import VaeImageProcessor
|
||||||
from ...loaders import (
|
from ....loaders import (
|
||||||
StableDiffusionLoraLoaderMixin,
|
StableDiffusionLoraLoaderMixin,
|
||||||
TextualInversionLoaderMixin,
|
TextualInversionLoaderMixin,
|
||||||
)
|
)
|
||||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
||||||
from ...models.lora import adjust_lora_scale_text_encoder
|
from ....models.lora import adjust_lora_scale_text_encoder
|
||||||
from ...schedulers import KarrasDiffusionSchedulers, LMSDiscreteScheduler
|
from ....schedulers import KarrasDiffusionSchedulers, LMSDiscreteScheduler
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
USE_PEFT_BACKEND,
|
USE_PEFT_BACKEND,
|
||||||
deprecate,
|
deprecate,
|
||||||
logging,
|
logging,
|
||||||
scale_lora_layers,
|
scale_lora_layers,
|
||||||
unscale_lora_layers,
|
unscale_lora_layers,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
|
from ...stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||||
@@ -122,6 +122,8 @@ class StableDiffusionKDiffusionPipeline(
|
|||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
|
logger.warning(f"{self.__class__.__name__} is deprecated and will no longer be actively maintained")
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"{self.__class__} is an experimntal pipeline and is likely to change in the future. We recommend to use"
|
f"{self.__class__} is an experimntal pipeline and is likely to change in the future. We recommend to use"
|
||||||
" this pipeline for fast experimentation / iteration if needed, but advice to rely on existing pipelines"
|
" this pipeline for fast experimentation / iteration if needed, but advice to rely on existing pipelines"
|
||||||
@@ -25,31 +25,31 @@ from transformers import (
|
|||||||
CLIPTokenizer,
|
CLIPTokenizer,
|
||||||
)
|
)
|
||||||
|
|
||||||
from ...image_processor import VaeImageProcessor
|
from ....image_processor import VaeImageProcessor
|
||||||
from ...loaders import (
|
from ....loaders import (
|
||||||
FromSingleFileMixin,
|
FromSingleFileMixin,
|
||||||
IPAdapterMixin,
|
IPAdapterMixin,
|
||||||
StableDiffusionXLLoraLoaderMixin,
|
StableDiffusionXLLoraLoaderMixin,
|
||||||
TextualInversionLoaderMixin,
|
TextualInversionLoaderMixin,
|
||||||
)
|
)
|
||||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
||||||
from ...models.attention_processor import (
|
from ....models.attention_processor import (
|
||||||
AttnProcessor2_0,
|
AttnProcessor2_0,
|
||||||
FusedAttnProcessor2_0,
|
FusedAttnProcessor2_0,
|
||||||
XFormersAttnProcessor,
|
XFormersAttnProcessor,
|
||||||
)
|
)
|
||||||
from ...models.lora import adjust_lora_scale_text_encoder
|
from ....models.lora import adjust_lora_scale_text_encoder
|
||||||
from ...schedulers import KarrasDiffusionSchedulers, LMSDiscreteScheduler
|
from ....schedulers import KarrasDiffusionSchedulers, LMSDiscreteScheduler
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
USE_PEFT_BACKEND,
|
USE_PEFT_BACKEND,
|
||||||
logging,
|
logging,
|
||||||
replace_example_docstring,
|
replace_example_docstring,
|
||||||
scale_lora_layers,
|
scale_lora_layers,
|
||||||
unscale_lora_layers,
|
unscale_lora_layers,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
|
from ...stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||||
@@ -72,7 +72,7 @@ EXAMPLE_DOC_STRING = """
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.ModelWrapper
|
# Copied from diffusers.pipelines.deprecated.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.ModelWrapper
|
||||||
class ModelWrapper:
|
class ModelWrapper:
|
||||||
def __init__(self, model, alphas_cumprod):
|
def __init__(self, model, alphas_cumprod):
|
||||||
self.model = model
|
self.model = model
|
||||||
@@ -158,6 +158,8 @@ class StableDiffusionXLKDiffusionPipeline(
|
|||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
|
logger.warning(f"{self.__class__.__name__} is deprecated and will no longer be actively maintained")
|
||||||
|
|
||||||
# get correct sigmas from LMS
|
# get correct sigmas from LMS
|
||||||
scheduler = LMSDiscreteScheduler.from_config(scheduler.config)
|
scheduler = LMSDiscreteScheduler.from_config(scheduler.config)
|
||||||
self.register_modules(
|
self.register_modules(
|
||||||
@@ -185,7 +187,7 @@ class StableDiffusionXLKDiffusionPipeline(
|
|||||||
else:
|
else:
|
||||||
self.k_diffusion_model = CompVisDenoiser(model)
|
self.k_diffusion_model = CompVisDenoiser(model)
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.set_scheduler
|
# Copied from diffusers.pipelines.deprecated.stable_diffusion_k_diffusion..pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.set_scheduler
|
||||||
def set_scheduler(self, scheduler_type: str):
|
def set_scheduler(self, scheduler_type: str):
|
||||||
library = importlib.import_module("k_diffusion")
|
library = importlib.import_module("k_diffusion")
|
||||||
sampling = getattr(library, "sampling")
|
sampling = getattr(library, "sampling")
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -18,7 +18,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
from ....utils import dummy_torch_and_transformers_objects # noqa F403
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -30,7 +30,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
|
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import *
|
from ....utils.dummy_torch_and_transformers_objects import *
|
||||||
else:
|
else:
|
||||||
from .pipeline_stable_diffusion_ldm3d import StableDiffusionLDM3DPipeline
|
from .pipeline_stable_diffusion_ldm3d import StableDiffusionLDM3DPipeline
|
||||||
|
|
||||||
@@ -21,12 +21,17 @@ import PIL.Image
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
||||||
|
|
||||||
from ...image_processor import PipelineImageInput, VaeImageProcessorLDM3D
|
from ....image_processor import PipelineImageInput, VaeImageProcessorLDM3D
|
||||||
from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
from ....loaders import (
|
||||||
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
|
FromSingleFileMixin,
|
||||||
from ...models.lora import adjust_lora_scale_text_encoder
|
IPAdapterMixin,
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
StableDiffusionLoraLoaderMixin,
|
||||||
from ...utils import (
|
TextualInversionLoaderMixin,
|
||||||
|
)
|
||||||
|
from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel
|
||||||
|
from ....models.lora import adjust_lora_scale_text_encoder
|
||||||
|
from ....schedulers import KarrasDiffusionSchedulers
|
||||||
|
from ....utils import (
|
||||||
USE_PEFT_BACKEND,
|
USE_PEFT_BACKEND,
|
||||||
BaseOutput,
|
BaseOutput,
|
||||||
deprecate,
|
deprecate,
|
||||||
@@ -36,9 +41,9 @@ from ...utils import (
|
|||||||
scale_lora_layers,
|
scale_lora_layers,
|
||||||
unscale_lora_layers,
|
unscale_lora_layers,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -18,7 +18,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
from ....utils import dummy_torch_and_transformers_objects # noqa F403
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -30,7 +30,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
|
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import *
|
from ....utils.dummy_torch_and_transformers_objects import *
|
||||||
else:
|
else:
|
||||||
from .pipeline_stable_diffusion_panorama import StableDiffusionPanoramaPipeline
|
from .pipeline_stable_diffusion_panorama import StableDiffusionPanoramaPipeline
|
||||||
|
|
||||||
@@ -18,12 +18,12 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
||||||
|
|
||||||
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
from ....image_processor import PipelineImageInput, VaeImageProcessor
|
||||||
from ...loaders import IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
from ....loaders import IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
||||||
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
|
from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel
|
||||||
from ...models.lora import adjust_lora_scale_text_encoder
|
from ....models.lora import adjust_lora_scale_text_encoder
|
||||||
from ...schedulers import DDIMScheduler
|
from ....schedulers import DDIMScheduler
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
USE_PEFT_BACKEND,
|
USE_PEFT_BACKEND,
|
||||||
deprecate,
|
deprecate,
|
||||||
is_torch_xla_available,
|
is_torch_xla_available,
|
||||||
@@ -32,10 +32,10 @@ from ...utils import (
|
|||||||
scale_lora_layers,
|
scale_lora_layers,
|
||||||
unscale_lora_layers,
|
unscale_lora_layers,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
from ..stable_diffusion import StableDiffusionPipelineOutput
|
from ...stable_diffusion import StableDiffusionPipelineOutput
|
||||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -6,7 +6,7 @@ import numpy as np
|
|||||||
import PIL
|
import PIL
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
BaseOutput,
|
BaseOutput,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
@@ -59,7 +59,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_objects
|
from ....utils import dummy_torch_and_transformers_objects
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -77,7 +77,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import *
|
from ....utils.dummy_torch_and_transformers_objects import *
|
||||||
else:
|
else:
|
||||||
from .pipeline_output import StableDiffusionSafePipelineOutput
|
from .pipeline_output import StableDiffusionSafePipelineOutput
|
||||||
from .pipeline_stable_diffusion_safe import StableDiffusionPipelineSafe
|
from .pipeline_stable_diffusion_safe import StableDiffusionPipelineSafe
|
||||||
@@ -4,7 +4,7 @@ from typing import List, Optional, Union
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import PIL.Image
|
import PIL.Image
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
BaseOutput,
|
BaseOutput,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -7,14 +7,14 @@ import torch
|
|||||||
from packaging import version
|
from packaging import version
|
||||||
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
||||||
|
|
||||||
from ...configuration_utils import FrozenDict
|
from ....configuration_utils import FrozenDict
|
||||||
from ...image_processor import PipelineImageInput
|
from ....image_processor import PipelineImageInput
|
||||||
from ...loaders import IPAdapterMixin
|
from ....loaders import IPAdapterMixin
|
||||||
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
|
from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
from ....schedulers import KarrasDiffusionSchedulers
|
||||||
from ...utils import deprecate, is_torch_xla_available, logging
|
from ....utils import deprecate, is_torch_xla_available, logging
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
from . import StableDiffusionSafePipelineOutput
|
from . import StableDiffusionSafePipelineOutput
|
||||||
from .safety_checker import SafeStableDiffusionSafetyChecker
|
from .safety_checker import SafeStableDiffusionSafetyChecker
|
||||||
|
|
||||||
@@ -372,7 +372,7 @@ class StableDiffusionPipelineSafe(DiffusionPipeline, StableDiffusionMixin, IPAda
|
|||||||
extra_step_kwargs["generator"] = generator
|
extra_step_kwargs["generator"] = generator
|
||||||
return extra_step_kwargs
|
return extra_step_kwargs
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
# Copied from diffusers.pipelines.deprecated.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
||||||
def check_inputs(
|
def check_inputs(
|
||||||
self,
|
self,
|
||||||
prompt,
|
prompt,
|
||||||
@@ -16,7 +16,7 @@ import torch
|
|||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from transformers import CLIPConfig, CLIPVisionModel, PreTrainedModel
|
from transformers import CLIPConfig, CLIPVisionModel, PreTrainedModel
|
||||||
|
|
||||||
from ...utils import logging
|
from ....utils import logging
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -18,7 +18,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
from ....utils import dummy_torch_and_transformers_objects # noqa F403
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -30,7 +30,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
|
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import *
|
from ....utils.dummy_torch_and_transformers_objects import *
|
||||||
else:
|
else:
|
||||||
from .pipeline_stable_diffusion_sag import StableDiffusionSAGPipeline
|
from .pipeline_stable_diffusion_sag import StableDiffusionSAGPipeline
|
||||||
|
|
||||||
@@ -19,12 +19,12 @@ import torch
|
|||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
||||||
|
|
||||||
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
from ....image_processor import PipelineImageInput, VaeImageProcessor
|
||||||
from ...loaders import IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
from ....loaders import IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
||||||
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
|
from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel
|
||||||
from ...models.lora import adjust_lora_scale_text_encoder
|
from ....models.lora import adjust_lora_scale_text_encoder
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
from ....schedulers import KarrasDiffusionSchedulers
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
USE_PEFT_BACKEND,
|
USE_PEFT_BACKEND,
|
||||||
deprecate,
|
deprecate,
|
||||||
is_torch_xla_available,
|
is_torch_xla_available,
|
||||||
@@ -33,10 +33,10 @@ from ...utils import (
|
|||||||
scale_lora_layers,
|
scale_lora_layers,
|
||||||
unscale_lora_layers,
|
unscale_lora_layers,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
from ..stable_diffusion import StableDiffusionPipelineOutput
|
from ...stable_diffusion import StableDiffusionPipelineOutput
|
||||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -490,7 +490,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
|
|||||||
extra_step_kwargs["generator"] = generator
|
extra_step_kwargs["generator"] = generator
|
||||||
return extra_step_kwargs
|
return extra_step_kwargs
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
# Copied from diffusers.pipelines.deprecated.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
||||||
def check_inputs(
|
def check_inputs(
|
||||||
self,
|
self,
|
||||||
prompt,
|
prompt,
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -17,7 +17,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
from ....utils import dummy_torch_and_transformers_objects # noqa F403
|
||||||
|
|
||||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||||
else:
|
else:
|
||||||
@@ -33,7 +33,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
if not (is_transformers_available() and is_torch_available()):
|
if not (is_transformers_available() and is_torch_available()):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
|
from ....utils.dummy_torch_and_transformers_objects import * # noqa F403
|
||||||
else:
|
else:
|
||||||
from .pipeline_output import TextToVideoSDPipelineOutput
|
from .pipeline_output import TextToVideoSDPipelineOutput
|
||||||
from .pipeline_text_to_video_synth import TextToVideoSDPipeline
|
from .pipeline_text_to_video_synth import TextToVideoSDPipeline
|
||||||
@@ -5,7 +5,7 @@ import numpy as np
|
|||||||
import PIL
|
import PIL
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
BaseOutput,
|
BaseOutput,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -18,11 +18,11 @@ from typing import Any, Callable, Dict, List, Optional, Union
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPTextModel, CLIPTokenizer
|
from transformers import CLIPTextModel, CLIPTokenizer
|
||||||
|
|
||||||
from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
from ....loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
||||||
from ...models import AutoencoderKL, UNet3DConditionModel
|
from ....models import AutoencoderKL, UNet3DConditionModel
|
||||||
from ...models.lora import adjust_lora_scale_text_encoder
|
from ....models.lora import adjust_lora_scale_text_encoder
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
from ....schedulers import KarrasDiffusionSchedulers
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
USE_PEFT_BACKEND,
|
USE_PEFT_BACKEND,
|
||||||
deprecate,
|
deprecate,
|
||||||
is_torch_xla_available,
|
is_torch_xla_available,
|
||||||
@@ -31,9 +31,9 @@ from ...utils import (
|
|||||||
scale_lora_layers,
|
scale_lora_layers,
|
||||||
unscale_lora_layers,
|
unscale_lora_layers,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ...video_processor import VideoProcessor
|
from ....video_processor import VideoProcessor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
from . import TextToVideoSDPipelineOutput
|
from . import TextToVideoSDPipelineOutput
|
||||||
|
|
||||||
|
|
||||||
@@ -363,7 +363,7 @@ class TextToVideoSDPipeline(
|
|||||||
extra_step_kwargs["generator"] = generator
|
extra_step_kwargs["generator"] = generator
|
||||||
return extra_step_kwargs
|
return extra_step_kwargs
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
# Copied from diffusers.pipelines.deprecated.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
||||||
def check_inputs(
|
def check_inputs(
|
||||||
self,
|
self,
|
||||||
prompt,
|
prompt,
|
||||||
@@ -19,11 +19,11 @@ import numpy as np
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPTextModel, CLIPTokenizer
|
from transformers import CLIPTextModel, CLIPTokenizer
|
||||||
|
|
||||||
from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
from ....loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
||||||
from ...models import AutoencoderKL, UNet3DConditionModel
|
from ....models import AutoencoderKL, UNet3DConditionModel
|
||||||
from ...models.lora import adjust_lora_scale_text_encoder
|
from ....models.lora import adjust_lora_scale_text_encoder
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
from ....schedulers import KarrasDiffusionSchedulers
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
USE_PEFT_BACKEND,
|
USE_PEFT_BACKEND,
|
||||||
deprecate,
|
deprecate,
|
||||||
is_torch_xla_available,
|
is_torch_xla_available,
|
||||||
@@ -32,9 +32,9 @@ from ...utils import (
|
|||||||
scale_lora_layers,
|
scale_lora_layers,
|
||||||
unscale_lora_layers,
|
unscale_lora_layers,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ...video_processor import VideoProcessor
|
from ....video_processor import VideoProcessor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
from . import TextToVideoSDPipelineOutput
|
from . import TextToVideoSDPipelineOutput
|
||||||
|
|
||||||
|
|
||||||
@@ -368,7 +368,7 @@ class VideoToVideoSDPipeline(
|
|||||||
|
|
||||||
return prompt_embeds, negative_prompt_embeds
|
return prompt_embeds, negative_prompt_embeds
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||||
def decode_latents(self, latents):
|
def decode_latents(self, latents):
|
||||||
latents = 1 / self.vae.config.scaling_factor * latents
|
latents = 1 / self.vae.config.scaling_factor * latents
|
||||||
|
|
||||||
@@ -10,12 +10,12 @@ import torch.nn.functional as F
|
|||||||
from torch.nn.functional import grid_sample
|
from torch.nn.functional import grid_sample
|
||||||
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
||||||
|
|
||||||
from ...image_processor import VaeImageProcessor
|
from ....image_processor import VaeImageProcessor
|
||||||
from ...loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
from ....loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
||||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
||||||
from ...models.lora import adjust_lora_scale_text_encoder
|
from ....models.lora import adjust_lora_scale_text_encoder
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
from ....schedulers import KarrasDiffusionSchedulers
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
USE_PEFT_BACKEND,
|
USE_PEFT_BACKEND,
|
||||||
BaseOutput,
|
BaseOutput,
|
||||||
is_torch_xla_available,
|
is_torch_xla_available,
|
||||||
@@ -23,9 +23,9 @@ from ...utils import (
|
|||||||
scale_lora_layers,
|
scale_lora_layers,
|
||||||
unscale_lora_layers,
|
unscale_lora_layers,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
from ..stable_diffusion import StableDiffusionSafetyChecker
|
from ...stable_diffusion import StableDiffusionSafetyChecker
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -464,7 +464,7 @@ class TextToVideoZeroPipeline(
|
|||||||
|
|
||||||
return latents.clone().detach()
|
return latents.clone().detach()
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
# Copied from diffusers.pipelines.deprecated.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
||||||
def check_inputs(
|
def check_inputs(
|
||||||
self,
|
self,
|
||||||
prompt,
|
prompt,
|
||||||
@@ -16,17 +16,17 @@ from transformers import (
|
|||||||
CLIPVisionModelWithProjection,
|
CLIPVisionModelWithProjection,
|
||||||
)
|
)
|
||||||
|
|
||||||
from ...image_processor import VaeImageProcessor
|
from ....image_processor import VaeImageProcessor
|
||||||
from ...loaders import StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin
|
from ....loaders import StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin
|
||||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
||||||
from ...models.attention_processor import (
|
from ....models.attention_processor import (
|
||||||
AttnProcessor2_0,
|
AttnProcessor2_0,
|
||||||
FusedAttnProcessor2_0,
|
FusedAttnProcessor2_0,
|
||||||
XFormersAttnProcessor,
|
XFormersAttnProcessor,
|
||||||
)
|
)
|
||||||
from ...models.lora import adjust_lora_scale_text_encoder
|
from ....models.lora import adjust_lora_scale_text_encoder
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
from ....schedulers import KarrasDiffusionSchedulers
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
USE_PEFT_BACKEND,
|
USE_PEFT_BACKEND,
|
||||||
BaseOutput,
|
BaseOutput,
|
||||||
is_invisible_watermark_available,
|
is_invisible_watermark_available,
|
||||||
@@ -34,15 +34,15 @@ from ...utils import (
|
|||||||
scale_lora_layers,
|
scale_lora_layers,
|
||||||
unscale_lora_layers,
|
unscale_lora_layers,
|
||||||
)
|
)
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
||||||
|
|
||||||
|
|
||||||
if is_invisible_watermark_available():
|
if is_invisible_watermark_available():
|
||||||
from ..stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
|
from ...stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
|
||||||
|
|
||||||
|
|
||||||
from ...utils import is_torch_xla_available
|
from ....utils import is_torch_xla_available
|
||||||
|
|
||||||
|
|
||||||
if is_torch_xla_available():
|
if is_torch_xla_available():
|
||||||
@@ -55,32 +55,32 @@ else:
|
|||||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||||
|
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.rearrange_0
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.rearrange_0
|
||||||
def rearrange_0(tensor, f):
|
def rearrange_0(tensor, f):
|
||||||
F, C, H, W = tensor.size()
|
F, C, H, W = tensor.size()
|
||||||
tensor = torch.permute(torch.reshape(tensor, (F // f, f, C, H, W)), (0, 2, 1, 3, 4))
|
tensor = torch.permute(torch.reshape(tensor, (F // f, f, C, H, W)), (0, 2, 1, 3, 4))
|
||||||
return tensor
|
return tensor
|
||||||
|
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.rearrange_1
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.rearrange_1
|
||||||
def rearrange_1(tensor):
|
def rearrange_1(tensor):
|
||||||
B, C, F, H, W = tensor.size()
|
B, C, F, H, W = tensor.size()
|
||||||
return torch.reshape(torch.permute(tensor, (0, 2, 1, 3, 4)), (B * F, C, H, W))
|
return torch.reshape(torch.permute(tensor, (0, 2, 1, 3, 4)), (B * F, C, H, W))
|
||||||
|
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.rearrange_3
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.rearrange_3
|
||||||
def rearrange_3(tensor, f):
|
def rearrange_3(tensor, f):
|
||||||
F, D, C = tensor.size()
|
F, D, C = tensor.size()
|
||||||
return torch.reshape(tensor, (F // f, f, D, C))
|
return torch.reshape(tensor, (F // f, f, D, C))
|
||||||
|
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.rearrange_4
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.rearrange_4
|
||||||
def rearrange_4(tensor):
|
def rearrange_4(tensor):
|
||||||
B, F, D, C = tensor.size()
|
B, F, D, C = tensor.size()
|
||||||
return torch.reshape(tensor, (B * F, D, C))
|
return torch.reshape(tensor, (B * F, D, C))
|
||||||
|
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.CrossFrameAttnProcessor
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.CrossFrameAttnProcessor
|
||||||
class CrossFrameAttnProcessor:
|
class CrossFrameAttnProcessor:
|
||||||
"""
|
"""
|
||||||
Cross frame attention processor. Each frame attends the first frame.
|
Cross frame attention processor. Each frame attends the first frame.
|
||||||
@@ -140,7 +140,7 @@ class CrossFrameAttnProcessor:
|
|||||||
return hidden_states
|
return hidden_states
|
||||||
|
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.CrossFrameAttnProcessor2_0
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.CrossFrameAttnProcessor2_0
|
||||||
class CrossFrameAttnProcessor2_0:
|
class CrossFrameAttnProcessor2_0:
|
||||||
"""
|
"""
|
||||||
Cross frame attention processor with scaled_dot_product attention of Pytorch 2.0.
|
Cross frame attention processor with scaled_dot_product attention of Pytorch 2.0.
|
||||||
@@ -230,7 +230,7 @@ class TextToVideoSDXLPipelineOutput(BaseOutput):
|
|||||||
images: Union[List[PIL.Image.Image], np.ndarray]
|
images: Union[List[PIL.Image.Image], np.ndarray]
|
||||||
|
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.coords_grid
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.coords_grid
|
||||||
def coords_grid(batch, ht, wd, device):
|
def coords_grid(batch, ht, wd, device):
|
||||||
# Adapted from https://github.com/princeton-vl/RAFT/blob/master/core/utils/utils.py
|
# Adapted from https://github.com/princeton-vl/RAFT/blob/master/core/utils/utils.py
|
||||||
coords = torch.meshgrid(torch.arange(ht, device=device), torch.arange(wd, device=device))
|
coords = torch.meshgrid(torch.arange(ht, device=device), torch.arange(wd, device=device))
|
||||||
@@ -238,7 +238,7 @@ def coords_grid(batch, ht, wd, device):
|
|||||||
return coords[None].repeat(batch, 1, 1, 1)
|
return coords[None].repeat(batch, 1, 1, 1)
|
||||||
|
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.warp_single_latent
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.warp_single_latent
|
||||||
def warp_single_latent(latent, reference_flow):
|
def warp_single_latent(latent, reference_flow):
|
||||||
"""
|
"""
|
||||||
Warp latent of a single frame with given flow
|
Warp latent of a single frame with given flow
|
||||||
@@ -266,7 +266,7 @@ def warp_single_latent(latent, reference_flow):
|
|||||||
return warped
|
return warped
|
||||||
|
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.create_motion_field
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.create_motion_field
|
||||||
def create_motion_field(motion_field_strength_x, motion_field_strength_y, frame_ids, device, dtype):
|
def create_motion_field(motion_field_strength_x, motion_field_strength_y, frame_ids, device, dtype):
|
||||||
"""
|
"""
|
||||||
Create translation motion field
|
Create translation motion field
|
||||||
@@ -290,7 +290,7 @@ def create_motion_field(motion_field_strength_x, motion_field_strength_y, frame_
|
|||||||
return reference_flow
|
return reference_flow
|
||||||
|
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.create_motion_field_and_warp_latents
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.create_motion_field_and_warp_latents
|
||||||
def create_motion_field_and_warp_latents(motion_field_strength_x, motion_field_strength_y, frame_ids, latents):
|
def create_motion_field_and_warp_latents(motion_field_strength_x, motion_field_strength_y, frame_ids, latents):
|
||||||
"""
|
"""
|
||||||
Creates translation motion and warps the latents accordingly
|
Creates translation motion and warps the latents accordingly
|
||||||
@@ -832,7 +832,7 @@ class TextToVideoZeroSDXLPipeline(
|
|||||||
|
|
||||||
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
|
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoZeroPipeline.forward_loop
|
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoZeroPipeline.forward_loop
|
||||||
def forward_loop(self, x_t0, t0, t1, generator):
|
def forward_loop(self, x_t0, t0, t1, generator):
|
||||||
"""
|
"""
|
||||||
Perform DDPM forward process from time t0 to t1. This is the same as adding noise with corresponding variance.
|
Perform DDPM forward process from time t0 to t1. This is the same as adding noise with corresponding variance.
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...utils import (
|
from ....utils import (
|
||||||
DIFFUSERS_SLOW_IMPORT,
|
DIFFUSERS_SLOW_IMPORT,
|
||||||
OptionalDependencyNotAvailable,
|
OptionalDependencyNotAvailable,
|
||||||
_LazyModule,
|
_LazyModule,
|
||||||
@@ -17,7 +17,7 @@ try:
|
|||||||
if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")):
|
if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import UnCLIPImageVariationPipeline, UnCLIPPipeline
|
from ....utils.dummy_torch_and_transformers_objects import UnCLIPImageVariationPipeline, UnCLIPPipeline
|
||||||
|
|
||||||
_dummy_objects.update(
|
_dummy_objects.update(
|
||||||
{"UnCLIPImageVariationPipeline": UnCLIPImageVariationPipeline, "UnCLIPPipeline": UnCLIPPipeline}
|
{"UnCLIPImageVariationPipeline": UnCLIPImageVariationPipeline, "UnCLIPPipeline": UnCLIPPipeline}
|
||||||
@@ -33,7 +33,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|||||||
if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")):
|
if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")):
|
||||||
raise OptionalDependencyNotAvailable()
|
raise OptionalDependencyNotAvailable()
|
||||||
except OptionalDependencyNotAvailable:
|
except OptionalDependencyNotAvailable:
|
||||||
from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
|
from ....utils.dummy_torch_and_transformers_objects import * # noqa F403
|
||||||
else:
|
else:
|
||||||
from .pipeline_unclip import UnCLIPPipeline
|
from .pipeline_unclip import UnCLIPPipeline
|
||||||
from .pipeline_unclip_image_variation import UnCLIPImageVariationPipeline
|
from .pipeline_unclip_image_variation import UnCLIPImageVariationPipeline
|
||||||
@@ -20,11 +20,11 @@ from torch.nn import functional as F
|
|||||||
from transformers import CLIPTextModelWithProjection, CLIPTokenizer
|
from transformers import CLIPTextModelWithProjection, CLIPTokenizer
|
||||||
from transformers.models.clip.modeling_clip import CLIPTextModelOutput
|
from transformers.models.clip.modeling_clip import CLIPTextModelOutput
|
||||||
|
|
||||||
from ...models import PriorTransformer, UNet2DConditionModel, UNet2DModel
|
from ....models import PriorTransformer, UNet2DConditionModel, UNet2DModel
|
||||||
from ...schedulers import UnCLIPScheduler
|
from ....schedulers import UnCLIPScheduler
|
||||||
from ...utils import is_torch_xla_available, logging
|
from ....utils import is_torch_xla_available, logging
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||||
from .text_proj import UnCLIPTextProjModel
|
from .text_proj import UnCLIPTextProjModel
|
||||||
|
|
||||||
|
|
||||||
@@ -25,11 +25,11 @@ from transformers import (
|
|||||||
CLIPVisionModelWithProjection,
|
CLIPVisionModelWithProjection,
|
||||||
)
|
)
|
||||||
|
|
||||||
from ...models import UNet2DConditionModel, UNet2DModel
|
from ....models import UNet2DConditionModel, UNet2DModel
|
||||||
from ...schedulers import UnCLIPScheduler
|
from ....schedulers import UnCLIPScheduler
|
||||||
from ...utils import is_torch_xla_available, logging
|
from ....utils import is_torch_xla_available, logging
|
||||||
from ...utils.torch_utils import randn_tensor
|
from ....utils.torch_utils import randn_tensor
|
||||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||||
from .text_proj import UnCLIPTextProjModel
|
from .text_proj import UnCLIPTextProjModel
|
||||||
|
|
||||||
|
|
||||||
@@ -114,7 +114,7 @@ class UnCLIPImageVariationPipeline(DiffusionPipeline):
|
|||||||
super_res_scheduler=super_res_scheduler,
|
super_res_scheduler=super_res_scheduler,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||||
if latents is None:
|
if latents is None:
|
||||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user