Compare commits

..

7 Commits

Author SHA1 Message Date
Patrick von Platen
8e44aa4b9f fix transformers naming 2023-02-20 09:39:01 +02:00
Patrick von Platen
126f32775b make style 2023-02-20 09:35:00 +02:00
Haofan Wang
92679c4851 Update pipeline_utils.py (#2415) 2023-02-20 09:34:54 +02:00
Patrick von Platen
a41b043570 Release: v0.13.1 2023-02-20 09:15:19 +02:00
YiYi Xu
ba441fe534 fix the get_indices function (#2418)
Co-authored-by: yiyixuxu <yixu310@gmail,com>
2023-02-20 09:13:49 +02:00
Patrick von Platen
ef86993568 Fix deprecation warning (#2426)
Deprecation warning should only hit at version 0.15
2023-02-20 09:13:42 +02:00
Sayak Paul
dca9191fc6 remove author names. (#2428)
* remove author names.

* add: demo link to panorama.
2023-02-20 09:13:14 +02:00
34 changed files with 94 additions and 136 deletions

View File

@@ -32,7 +32,7 @@ Resources
| Pipeline | Tasks | Colab | Demo
|---|---|:---:|:---:|
| [pipeline_semantic_stable_diffusion_attend_and_excite.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_semantic_stable_diffusion_attend_and_excite) | *Text-to-Image Generation* | - | https://huggingface.co/spaces/AttendAndExcite/Attend-and-Excite
| [pipeline_semantic_stable_diffusion_attend_and_excite.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_semantic_stable_diffusion_attend_and_excite) | *Text-to-Image Generation* | - | -
### Usage example

View File

@@ -20,9 +20,6 @@ The original codebase can be found here: [CampVis/stable-diffusion](https://gith
[`StableDiffusionImg2ImgPipeline`] is compatible with all Stable Diffusion checkpoints for [Text-to-Image](./text2img)
The pipeline uses the diffusion-denoising mechanism proposed by SDEdit ([SDEdit: Guided Image Synthesis and Editing with Stochastic Differential Equations](https://arxiv.org/abs/2108.01073)
proposed by Chenlin Meng, Yutong He, Yang Song, Jiaming Song, Jiajun Wu, Jun-Yan Zhu, Stefano Ermon).
[[autodoc]] StableDiffusionImg2ImgPipeline
- all
- __call__

View File

@@ -60,7 +60,7 @@ def download_image(url):
image = download_image(url)
prompt = "make the mountains snowy"
images = pipe(prompt, image=image, num_inference_steps=20, image_guidance_scale=1.5, guidance_scale=7).images
edit = pipe(prompt, image=image, num_inference_steps=20, image_guidance_scale=1.5, guidance_scale=7).images[0]
images[0].save("snowy_mountains.png")
```

View File

@@ -34,7 +34,6 @@ Unless otherwise mentioned, these are techniques that work with existing models
6. [Depth2image](#depth2image)
7. [DreamBooth](#dreambooth)
8. [Textual Inversion](#textual-inversion)
10. [MultiDiffusion Panorama](#panorama)
## Instruct pix2pix
@@ -123,12 +122,3 @@ See [here](../training/dreambooth) for more information on how to use it.
[Textual Inversion](../training/text_inversion) fine-tunes a model to teach it about a new concept. I.e. a few pictures of a style of artwork can be used to generate images in that style.
See [here](../training/text_inversion) for more information on how to use it.
## MultiDiffusion Panorama
[Paper](https://multidiffusion.github.io/)
[Demo](https://huggingface.co/spaces/weizmannscience/MultiDiffusion)
MultiDiffusion defines a new generation process over a pre-trained diffusion model. This process binds together multiple diffusion generation processes can be readily applied to generate high quality and diverse images that adhere to user-provided controls, such as desired aspect ratio (e.g., panorama), and spatial guiding signals, ranging from tight segmentation masks to bounding boxes.
[MultiDiffusion Panorama](../api/pipelines/stable_diffusion/panorama) allows to generate high-quality images at arbitrary aspect ratios (e.g., panoramas).
See [here](../api/pipelines/stable_diffusion/panorama) for more information on how to use it to generate panoramic images.

View File

@@ -22,7 +22,7 @@ from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
from diffusers.utils import logging
from diffusers.utils import deprecate, logging
if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"):
@@ -184,6 +184,10 @@ class ImagicStableDiffusionPipeline(DiffusionPipeline):
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
(nsfw) content, according to the `safety_checker`.
"""
message = "Please use `image` instead of `init_image`."
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
image = init_image or image
accelerator = Accelerator(
gradient_accumulation_steps=1,
mixed_precision="fp16",
@@ -342,6 +346,7 @@ class ImagicStableDiffusionPipeline(DiffusionPipeline):
return_dict: bool = True,
guidance_scale: float = 7.5,
eta: float = 0.0,
**kwargs,
):
r"""
Function invoked when calling the pipeline for generation.

View File

@@ -12,7 +12,7 @@ import diffusers
from diffusers import SchedulerMixin, StableDiffusionPipeline
from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
from diffusers.utils import logging
from diffusers.utils import deprecate, logging
try:
@@ -252,6 +252,7 @@ def get_weighted_text_embeddings(
no_boseos_middle: Optional[bool] = False,
skip_parsing: Optional[bool] = False,
skip_weighting: Optional[bool] = False,
**kwargs,
):
r"""
Prompts can be assigned with local weights using brackets. For example,
@@ -681,6 +682,7 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
is_cancelled_callback: Optional[Callable[[], bool]] = None,
callback_steps: int = 1,
**kwargs,
):
r"""
Function invoked when calling the pipeline for generation.
@@ -756,6 +758,10 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
(nsfw) content, according to the `safety_checker`.
"""
message = "Please use `image` instead of `init_image`."
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
image = init_image or image
# 0. Default height and width to unet
height = height or self.unet.config.sample_size * self.vae_scale_factor
width = width or self.unet.config.sample_size * self.vae_scale_factor
@@ -878,6 +884,7 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
is_cancelled_callback: Optional[Callable[[], bool]] = None,
callback_steps: int = 1,
**kwargs,
):
r"""
Function for text-to-image generation.
@@ -953,6 +960,7 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
callback=callback,
is_cancelled_callback=is_cancelled_callback,
callback_steps=callback_steps,
**kwargs,
)
def img2img(
@@ -972,6 +980,7 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
is_cancelled_callback: Optional[Callable[[], bool]] = None,
callback_steps: int = 1,
**kwargs,
):
r"""
Function for image-to-image generation.
@@ -1047,6 +1056,7 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
callback=callback,
is_cancelled_callback=is_cancelled_callback,
callback_steps=callback_steps,
**kwargs,
)
def inpaint(
@@ -1067,6 +1077,7 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
is_cancelled_callback: Optional[Callable[[], bool]] = None,
callback_steps: int = 1,
**kwargs,
):
r"""
Function for inpaint.
@@ -1147,4 +1158,5 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
callback=callback,
is_cancelled_callback=is_cancelled_callback,
callback_steps=callback_steps,
**kwargs,
)

View File

@@ -11,7 +11,7 @@ from transformers import CLIPFeatureExtractor, CLIPTokenizer
import diffusers
from diffusers import OnnxRuntimeModel, OnnxStableDiffusionPipeline, SchedulerMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.utils import logging
from diffusers.utils import deprecate, logging
try:
@@ -744,6 +744,10 @@ class OnnxStableDiffusionLongPromptWeightingPipeline(OnnxStableDiffusionPipeline
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
(nsfw) content, according to the `safety_checker`.
"""
message = "Please use `image` instead of `init_image`."
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
image = init_image or image
# 0. Default height and width to unet
height = height or self.unet.config.sample_size * self.vae_scale_factor
width = width or self.unet.config.sample_size * self.vae_scale_factor

View File

@@ -47,7 +47,7 @@ from diffusers.utils.import_utils import is_xformers_available
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.14.0.dev0")
check_min_version("0.13.0")
logger = get_logger(__name__)

View File

@@ -36,7 +36,7 @@ from diffusers.utils import check_min_version
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.14.0.dev0")
check_min_version("0.13.0")
# Cache compiled models across invocations of this script.
cc.initialize_cache(os.path.expanduser("~/.cache/jax/compilation_cache"))

View File

@@ -54,7 +54,7 @@ from diffusers.utils.import_utils import is_xformers_available
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.14.0.dev0")
check_min_version("0.13.0")
logger = get_logger(__name__)

View File

@@ -47,7 +47,7 @@ from diffusers.utils.import_utils import is_xformers_available
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.14.0.dev0")
check_min_version("0.13.0")
logger = get_logger(__name__, log_level="INFO")

View File

@@ -34,7 +34,7 @@ from diffusers.utils import check_min_version
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.14.0.dev0")
check_min_version("0.13.0")
logger = logging.getLogger(__name__)

View File

@@ -48,7 +48,7 @@ from diffusers.utils.import_utils import is_xformers_available
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.14.0.dev0")
check_min_version("0.13.0")
logger = get_logger(__name__, log_level="INFO")
@@ -448,6 +448,19 @@ def main():
vae.to(accelerator.device, dtype=weight_dtype)
text_encoder.to(accelerator.device, dtype=weight_dtype)
if args.enable_xformers_memory_efficient_attention:
if is_xformers_available():
import xformers
xformers_version = version.parse(xformers.__version__)
if xformers_version == version.parse("0.0.16"):
logger.warn(
"xFormers 0.0.16 cannot be used for training in some GPUs. If you observe problems during training, please update xFormers to at least 0.0.17. See https://huggingface.co/docs/diffusers/main/en/optimization/xformers for more details."
)
unet.enable_xformers_memory_efficient_attention()
else:
raise ValueError("xformers is not available. Make sure it is installed correctly")
# now we will add new LoRA weights to the attention layers
# It's important to realize here how many attention weights will be added and of which sizes
# The sizes of the attention layers consist only of two different variables:
@@ -479,20 +492,6 @@ def main():
)
unet.set_attn_processor(lora_attn_procs)
if args.enable_xformers_memory_efficient_attention:
if is_xformers_available():
import xformers
xformers_version = version.parse(xformers.__version__)
if xformers_version == version.parse("0.0.16"):
logger.warn(
"xFormers 0.0.16 cannot be used for training in some GPUs. If you observe problems during training, please update xFormers to at least 0.0.17. See https://huggingface.co/docs/diffusers/main/en/optimization/xformers for more details."
)
unet.enable_xformers_memory_efficient_attention()
else:
raise ValueError("xformers is not available. Make sure it is installed correctly")
lora_layers = AttnProcsLayers(unet.attn_processors)
# Enable TF32 for faster training on Ampere GPUs,

View File

@@ -74,7 +74,7 @@ else:
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.14.0.dev0")
check_min_version("0.13.0")
logger = get_logger(__name__)

View File

@@ -57,7 +57,7 @@ else:
# ------------------------------------------------------------------------------
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.14.0.dev0")
check_min_version("0.13.0")
logger = logging.getLogger(__name__)

View File

@@ -23,11 +23,11 @@ import diffusers
from diffusers import DDPMPipeline, DDPMScheduler, UNet2DModel
from diffusers.optimization import get_scheduler
from diffusers.training_utils import EMAModel
from diffusers.utils import check_min_version, is_accelerate_version, is_tensorboard_available, is_wandb_available
from diffusers.utils import check_min_version, is_tensorboard_available, is_wandb_available
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.14.0.dev0")
check_min_version("0.13.0")
logger = get_logger(__name__, log_level="INFO")
@@ -628,13 +628,10 @@ def main(args):
images_processed = (images * 255).round().astype("uint8")
if args.logger == "tensorboard":
if is_accelerate_version(">=", "0.17.0.dev0"):
tracker = accelerator.get_tracker("tensorboard", unwrap=True)
else:
tracker = accelerator.get_tracker()
tracker.add_images("test_samples", images_processed.transpose(0, 3, 1, 2), epoch)
accelerator.get_tracker("tensorboard").add_images(
"test_samples", images_processed.transpose(0, 3, 1, 2), epoch
)
elif args.logger == "wandb":
# Upcoming `log_images` helper coming in https://github.com/huggingface/accelerate/pull/962/files
accelerator.get_tracker("wandb").log(
{"test_samples": [wandb.Image(img) for img in images_processed], "epoch": epoch},
step=global_step,

View File

@@ -219,7 +219,7 @@ install_requires = [
setup(
name="diffusers",
version="0.14.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
version="0.13.1", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
description="Diffusers",
long_description=open("README.md", "r", encoding="utf-8").read(),
long_description_content_type="text/markdown",

View File

@@ -1,4 +1,4 @@
__version__ = "0.14.0.dev0"
__version__ = "0.13.1"
from .configuration_utils import ConfigMixin
from .utils import (

View File

@@ -18,7 +18,6 @@ import torch
import torch.nn.functional as F
from torch import nn
from ..utils import logging
from ..utils.import_utils import is_xformers_available
from .cross_attention import CrossAttention
from .embeddings import CombinedTimestepLabelEmbeddings
@@ -30,8 +29,6 @@ if is_xformers_available():
else:
xformers = None
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
class AttentionBlock(nn.Module):
"""
@@ -211,7 +208,6 @@ class BasicTransformerBlock(nn.Module):
final_dropout: bool = False,
):
super().__init__()
print(f"Using {activation_fn} as activation_fn in BasicTransformerBlock.")
self.only_cross_attention = only_cross_attention
self.use_ada_layer_norm_zero = (num_embeds_ada_norm is not None) and norm_type == "ada_norm_zero"
@@ -357,22 +353,15 @@ class FeedForward(nn.Module):
super().__init__()
inner_dim = int(dim * mult)
dim_out = dim_out if dim_out is not None else dim
use_bias = True
if activation_fn == "gelu":
act_fn = GELU(dim, inner_dim)
if activation_fn == "gelu-approximate":
act_fn = GELU(dim, inner_dim, approximate="tanh")
elif activation_fn == "geglu":
print("Using GEGLU as the activation function in the FFN.")
act_fn = GEGLU(dim, inner_dim)
elif activation_fn == "geglu-approximate":
act_fn = ApproximateGELU(dim, inner_dim)
elif activation_fn == "swiglu":
print("Using SwiGLU as the activation function in the FFN.")
inner_dim = int(2 * dim_out / 3)
act_fn = SwiGLU(dim, inner_dim)
use_bias = False
self.net = nn.ModuleList([])
# project in
@@ -380,7 +369,7 @@ class FeedForward(nn.Module):
# project dropout
self.net.append(nn.Dropout(dropout))
# project out
self.net.append(nn.Linear(inner_dim, dim_out, bias=use_bias))
self.net.append(nn.Linear(inner_dim, dim_out))
# FF as used in Vision Transformer, MLP-Mixer, etc. have a final dropout
if final_dropout:
self.net.append(nn.Dropout(dropout))
@@ -453,22 +442,6 @@ class ApproximateGELU(nn.Module):
return x * torch.sigmoid(1.702 * x)
class SwiGLU(nn.Module):
"""
GEGLU-like that uses SiLU instead of GELU on the gates. SwiGLU is used in works like PaLM.
Reference: https://arxiv.org/abs/2002.05202
"""
def __init__(self, dim_in: int, dim_out: int):
super().__init__()
self.w1 = nn.Linear(dim_in, dim_out, bias=False)
self.w3 = nn.Linear(dim_in, dim_out, bias=False)
def forward(self, x):
return F.silu(self.w1(x)) * self.w3(x)
class AdaLayerNorm(nn.Module):
"""
Norm layer modified to incorporate timestep embeddings.

View File

@@ -104,7 +104,6 @@ class Transformer2DModel(ModelMixin, ConfigMixin):
self.num_attention_heads = num_attention_heads
self.attention_head_dim = attention_head_dim
inner_dim = num_attention_heads * attention_head_dim
print(f"Using {activation_fn} as activation_fn in Transformer2DModel.")
# 1. Transformer2DModel can process both standard continous images of shape `(batch_size, num_channels, width, height)` as well as quantized image embeddings of shape `(batch_size, num_image_vectors)`
# Define whether input is continuous or discrete depending on configuration

View File

@@ -42,7 +42,6 @@ def get_down_block(
only_cross_attention=False,
upcast_attention=False,
resnet_time_scale_shift="default",
ff_activation_fn="geglu",
):
down_block_type = down_block_type[7:] if down_block_type.startswith("UNetRes") else down_block_type
if down_block_type == "DownBlock2D":
@@ -104,7 +103,6 @@ def get_down_block(
only_cross_attention=only_cross_attention,
upcast_attention=upcast_attention,
resnet_time_scale_shift=resnet_time_scale_shift,
ff_activation_fn=ff_activation_fn,
)
elif down_block_type == "SimpleCrossAttnDownBlock2D":
if cross_attention_dim is None:
@@ -216,7 +214,6 @@ def get_up_block(
only_cross_attention=False,
upcast_attention=False,
resnet_time_scale_shift="default",
ff_activation_fn="geglu",
):
up_block_type = up_block_type[7:] if up_block_type.startswith("UNetRes") else up_block_type
if up_block_type == "UpBlock2D":
@@ -265,7 +262,6 @@ def get_up_block(
only_cross_attention=only_cross_attention,
upcast_attention=upcast_attention,
resnet_time_scale_shift=resnet_time_scale_shift,
ff_activation_fn=ff_activation_fn,
)
elif up_block_type == "SimpleCrossAttnUpBlock2D":
if cross_attention_dim is None:
@@ -469,10 +465,8 @@ class UNetMidBlock2DCrossAttn(nn.Module):
dual_cross_attention=False,
use_linear_projection=False,
upcast_attention=False,
ff_activation_fn="geglu",
):
super().__init__()
print(f"Using {ff_activation_fn} as ff_activation_fn in UNetMidBlock2DCrossAttn")
self.has_cross_attention = True
self.attn_num_head_channels = attn_num_head_channels
@@ -507,7 +501,6 @@ class UNetMidBlock2DCrossAttn(nn.Module):
norm_num_groups=resnet_groups,
use_linear_projection=use_linear_projection,
upcast_attention=upcast_attention,
activation_fn=ff_activation_fn,
)
)
else:
@@ -519,7 +512,6 @@ class UNetMidBlock2DCrossAttn(nn.Module):
num_layers=1,
cross_attention_dim=cross_attention_dim,
norm_num_groups=resnet_groups,
activation_fn=ff_activation_fn,
)
)
resnets.append(
@@ -750,7 +742,6 @@ class CrossAttnDownBlock2D(nn.Module):
use_linear_projection=False,
only_cross_attention=False,
upcast_attention=False,
ff_activation_fn="geglu",
):
super().__init__()
resnets = []
@@ -787,7 +778,6 @@ class CrossAttnDownBlock2D(nn.Module):
use_linear_projection=use_linear_projection,
only_cross_attention=only_cross_attention,
upcast_attention=upcast_attention,
activation_fn=ff_activation_fn,
)
)
else:
@@ -799,7 +789,6 @@ class CrossAttnDownBlock2D(nn.Module):
num_layers=1,
cross_attention_dim=cross_attention_dim,
norm_num_groups=resnet_groups,
activation_fn=ff_activation_fn,
)
)
self.attentions = nn.ModuleList(attentions)
@@ -1723,7 +1712,6 @@ class CrossAttnUpBlock2D(nn.Module):
use_linear_projection=False,
only_cross_attention=False,
upcast_attention=False,
ff_activation_fn="geglu",
):
super().__init__()
resnets = []
@@ -1762,7 +1750,6 @@ class CrossAttnUpBlock2D(nn.Module):
use_linear_projection=use_linear_projection,
only_cross_attention=only_cross_attention,
upcast_attention=upcast_attention,
activation_fn=ff_activation_fn,
)
)
else:
@@ -1774,7 +1761,6 @@ class CrossAttnUpBlock2D(nn.Module):
num_layers=1,
cross_attention_dim=cross_attention_dim,
norm_num_groups=resnet_groups,
activation_fn=ff_activation_fn,
)
)
self.attentions = nn.ModuleList(attentions)

View File

@@ -148,10 +148,8 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
conv_in_kernel: int = 3,
conv_out_kernel: int = 3,
projection_class_embeddings_input_dim: Optional[int] = None,
ff_activation_fn="geglu",
):
super().__init__()
print(f"Using {ff_activation_fn} as ff_activation_fn in UNet2DConditionModel")
self.sample_size = sample_size
@@ -266,7 +264,6 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
only_cross_attention=only_cross_attention[i],
upcast_attention=upcast_attention,
resnet_time_scale_shift=resnet_time_scale_shift,
ff_activation_fn=ff_activation_fn,
)
self.down_blocks.append(down_block)
@@ -285,7 +282,6 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
dual_cross_attention=dual_cross_attention,
use_linear_projection=use_linear_projection,
upcast_attention=upcast_attention,
ff_activation_fn=ff_activation_fn,
)
elif mid_block_type == "UNetMidBlock2DSimpleCrossAttn":
self.mid_block = UNetMidBlock2DSimpleCrossAttn(
@@ -345,7 +341,6 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
only_cross_attention=only_cross_attention[i],
upcast_attention=upcast_attention,
resnet_time_scale_shift=resnet_time_scale_shift,
ff_activation_fn=ff_activation_fn,
)
self.up_blocks.append(up_block)
prev_output_channel = output_channel

View File

@@ -561,6 +561,7 @@ class AltDiffusionImg2ImgPipeline(DiffusionPipeline):
return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
callback_steps: int = 1,
**kwargs,
):
r"""
Function invoked when calling the pipeline for generation.
@@ -627,6 +628,10 @@ class AltDiffusionImg2ImgPipeline(DiffusionPipeline):
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
(nsfw) content, according to the `safety_checker`.
"""
message = "Please use `image` instead of `init_image`."
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
image = init_image or image
# 1. Check inputs. Raise error if not correct
self.check_inputs(prompt, strength, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds)

View File

@@ -15,7 +15,7 @@ from ...schedulers import (
LMSDiscreteScheduler,
PNDMScheduler,
)
from ...utils import PIL_INTERPOLATION, randn_tensor
from ...utils import PIL_INTERPOLATION, deprecate, randn_tensor
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
@@ -72,6 +72,7 @@ class LDMSuperResolutionPipeline(DiffusionPipeline):
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
output_type: Optional[str] = "pil",
return_dict: bool = True,
**kwargs,
) -> Union[Tuple, ImagePipelineOutput]:
r"""
Args:
@@ -99,6 +100,10 @@ class LDMSuperResolutionPipeline(DiffusionPipeline):
[`~pipelines.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if `return_dict` is
True, otherwise a `tuple. When returning a tuple, the first element is a list with the generated images.
"""
message = "Please use `image` instead of `init_image`."
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
image = init_image or image
if isinstance(image, PIL.Image.Image):
batch_size = 1
elif isinstance(image, torch.Tensor):

View File

@@ -582,6 +582,7 @@ class CycleDiffusionPipeline(DiffusionPipeline):
return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
callback_steps: int = 1,
**kwargs,
):
r"""
Function invoked when calling the pipeline for generation.
@@ -645,6 +646,10 @@ class CycleDiffusionPipeline(DiffusionPipeline):
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
(nsfw) content, according to the `safety_checker`.
"""
message = "Please use `image` instead of `init_image`."
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
image = init_image or image
# 1. Check inputs
self.check_inputs(prompt, strength, callback_steps)

View File

@@ -253,6 +253,7 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
return_dict: bool = True,
callback: Optional[Callable[[int, int, np.ndarray], None]] = None,
callback_steps: int = 1,
**kwargs,
):
r"""
Function invoked when calling the pipeline for generation.
@@ -308,6 +309,10 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
(nsfw) content, according to the `safety_checker`.
"""
message = "Please use `image` instead of `init_image`."
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
image = init_image or image
if isinstance(prompt, str):
batch_size = 1
elif isinstance(prompt, list):

View File

@@ -240,6 +240,7 @@ class OnnxStableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
return_dict: bool = True,
callback: Optional[Callable[[int, int, np.ndarray], None]] = None,
callback_steps: int = 1,
**kwargs,
):
r"""
Function invoked when calling the pipeline for generation.
@@ -300,6 +301,10 @@ class OnnxStableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
(nsfw) content, according to the `safety_checker`.
"""
message = "Please use `image` instead of `init_image`."
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
image = init_image or image
if isinstance(prompt, str):
batch_size = 1
elif isinstance(prompt, list):

View File

@@ -587,26 +587,13 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 1. Check inputs
self.check_inputs(
prompt,
strength,
callback_steps,
negative_prompt=negative_prompt,
prompt_embeds=prompt_embeds,
negative_prompt_embeds=negative_prompt_embeds,
)
self.check_inputs(prompt, strength, callback_steps)
if image is None:
raise ValueError("`image` input cannot be undefined.")
# 2. Define call parameters
if prompt is not None and isinstance(prompt, str):
batch_size = 1
elif prompt is not None and isinstance(prompt, list):
batch_size = len(prompt)
else:
batch_size = prompt_embeds.shape[0]
batch_size = 1 if isinstance(prompt, str) else len(prompt)
device = self._execution_device
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`

View File

@@ -572,6 +572,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):
return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
callback_steps: int = 1,
**kwargs,
):
r"""
Function invoked when calling the pipeline for generation.
@@ -638,6 +639,10 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
(nsfw) content, according to the `safety_checker`.
"""
message = "Please use `image` instead of `init_image`."
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
image = init_image or image
# 1. Check inputs. Raise error if not correct
self.check_inputs(prompt, strength, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds)

View File

@@ -530,6 +530,7 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
callback_steps: int = 1,
**kwargs,
):
r"""
Function invoked when calling the pipeline for generation.
@@ -602,6 +603,10 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
(nsfw) content, according to the `safety_checker`.
"""
message = "Please use `image` instead of `init_image`."
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
image = init_image or image
# 1. Check inputs
self.check_inputs(prompt, strength, callback_steps)

View File

@@ -66,10 +66,6 @@ class UNet1DModelTests(ModelTesterMixin, unittest.TestCase):
def test_from_save_pretrained(self):
super().test_from_save_pretrained()
@unittest.skipIf(torch_device == "mps", "mish op not supported in MPS")
def test_from_save_pretrained_variant(self):
super().test_from_save_pretrained_variant()
@unittest.skipIf(torch_device == "mps", "mish op not supported in MPS")
def test_model_from_pretrained(self):
super().test_model_from_pretrained()
@@ -190,10 +186,6 @@ class UNetRLModelTests(ModelTesterMixin, unittest.TestCase):
def test_from_save_pretrained(self):
super().test_from_save_pretrained()
@unittest.skipIf(torch_device == "mps", "mish op not supported in MPS")
def test_from_save_pretrained_variant(self):
super().test_from_save_pretrained_variant()
@unittest.skipIf(torch_device == "mps", "mish op not supported in MPS")
def test_model_from_pretrained(self):
super().test_model_from_pretrained()

View File

@@ -30,7 +30,7 @@ from diffusers import (
UNet2DConditionModel,
)
from diffusers.utils import slow, torch_device
from diffusers.utils.testing_utils import require_torch_gpu, skip_mps
from diffusers.utils.testing_utils import require_torch_gpu
from ...test_pipelines_common import PipelineTesterMixin
@@ -38,7 +38,6 @@ from ...test_pipelines_common import PipelineTesterMixin
torch.backends.cuda.matmul.allow_tf32 = False
@skip_mps
class StableDiffusionPanoramaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = StableDiffusionPanoramaPipeline

View File

@@ -189,10 +189,6 @@ class StableUnCLIPPipelineIntegrationTests(unittest.TestCase):
pipe = StableUnCLIPPipeline.from_pretrained("fusing/stable-unclip-2-1-l", torch_dtype=torch.float16)
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
# stable unclip will oom when integration tests are run on a V100,
# so turn on memory savings
pipe.enable_attention_slicing()
pipe.enable_sequential_cpu_offload()
generator = torch.Generator(device="cpu").manual_seed(0)
output = pipe("anime turle", generator=generator, output_type="np")

View File

@@ -185,10 +185,6 @@ class StableUnCLIPImg2ImgPipelineIntegrationTests(unittest.TestCase):
)
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
# stable unclip will oom when integration tests are run on a V100,
# so turn on memory savings
pipe.enable_attention_slicing()
pipe.enable_sequential_cpu_offload()
generator = torch.Generator(device="cpu").manual_seed(0)
output = pipe("anime turle", image=input_image, generator=generator, output_type="np")
@@ -213,10 +209,6 @@ class StableUnCLIPImg2ImgPipelineIntegrationTests(unittest.TestCase):
)
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
# stable unclip will oom when integration tests are run on a V100,
# so turn on memory savings
pipe.enable_attention_slicing()
pipe.enable_sequential_cpu_offload()
generator = torch.Generator(device="cpu").manual_seed(0)
output = pipe("anime turle", image=input_image, generator=generator, output_type="np")