mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-10 22:44:38 +08:00
Compare commits
7 Commits
temp/swigl
...
v0.13.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8e44aa4b9f | ||
|
|
126f32775b | ||
|
|
92679c4851 | ||
|
|
a41b043570 | ||
|
|
ba441fe534 | ||
|
|
ef86993568 | ||
|
|
dca9191fc6 |
@@ -32,7 +32,7 @@ Resources
|
||||
|
||||
| Pipeline | Tasks | Colab | Demo
|
||||
|---|---|:---:|:---:|
|
||||
| [pipeline_semantic_stable_diffusion_attend_and_excite.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_semantic_stable_diffusion_attend_and_excite) | *Text-to-Image Generation* | - | https://huggingface.co/spaces/AttendAndExcite/Attend-and-Excite
|
||||
| [pipeline_semantic_stable_diffusion_attend_and_excite.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_semantic_stable_diffusion_attend_and_excite) | *Text-to-Image Generation* | - | -
|
||||
|
||||
|
||||
### Usage example
|
||||
|
||||
@@ -20,9 +20,6 @@ The original codebase can be found here: [CampVis/stable-diffusion](https://gith
|
||||
|
||||
[`StableDiffusionImg2ImgPipeline`] is compatible with all Stable Diffusion checkpoints for [Text-to-Image](./text2img)
|
||||
|
||||
The pipeline uses the diffusion-denoising mechanism proposed by SDEdit ([SDEdit: Guided Image Synthesis and Editing with Stochastic Differential Equations](https://arxiv.org/abs/2108.01073)
|
||||
proposed by Chenlin Meng, Yutong He, Yang Song, Jiaming Song, Jiajun Wu, Jun-Yan Zhu, Stefano Ermon).
|
||||
|
||||
[[autodoc]] StableDiffusionImg2ImgPipeline
|
||||
- all
|
||||
- __call__
|
||||
|
||||
@@ -60,7 +60,7 @@ def download_image(url):
|
||||
image = download_image(url)
|
||||
|
||||
prompt = "make the mountains snowy"
|
||||
images = pipe(prompt, image=image, num_inference_steps=20, image_guidance_scale=1.5, guidance_scale=7).images
|
||||
edit = pipe(prompt, image=image, num_inference_steps=20, image_guidance_scale=1.5, guidance_scale=7).images[0]
|
||||
images[0].save("snowy_mountains.png")
|
||||
```
|
||||
|
||||
|
||||
@@ -34,7 +34,6 @@ Unless otherwise mentioned, these are techniques that work with existing models
|
||||
6. [Depth2image](#depth2image)
|
||||
7. [DreamBooth](#dreambooth)
|
||||
8. [Textual Inversion](#textual-inversion)
|
||||
10. [MultiDiffusion Panorama](#panorama)
|
||||
|
||||
## Instruct pix2pix
|
||||
|
||||
@@ -123,12 +122,3 @@ See [here](../training/dreambooth) for more information on how to use it.
|
||||
[Textual Inversion](../training/text_inversion) fine-tunes a model to teach it about a new concept. I.e. a few pictures of a style of artwork can be used to generate images in that style.
|
||||
|
||||
See [here](../training/text_inversion) for more information on how to use it.
|
||||
|
||||
## MultiDiffusion Panorama
|
||||
|
||||
[Paper](https://multidiffusion.github.io/)
|
||||
[Demo](https://huggingface.co/spaces/weizmannscience/MultiDiffusion)
|
||||
MultiDiffusion defines a new generation process over a pre-trained diffusion model. This process binds together multiple diffusion generation processes can be readily applied to generate high quality and diverse images that adhere to user-provided controls, such as desired aspect ratio (e.g., panorama), and spatial guiding signals, ranging from tight segmentation masks to bounding boxes.
|
||||
[MultiDiffusion Panorama](../api/pipelines/stable_diffusion/panorama) allows to generate high-quality images at arbitrary aspect ratios (e.g., panoramas).
|
||||
|
||||
See [here](../api/pipelines/stable_diffusion/panorama) for more information on how to use it to generate panoramic images.
|
||||
|
||||
@@ -22,7 +22,7 @@ from diffusers.models import AutoencoderKL, UNet2DConditionModel
|
||||
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
|
||||
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
|
||||
from diffusers.utils import logging
|
||||
from diffusers.utils import deprecate, logging
|
||||
|
||||
|
||||
if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"):
|
||||
@@ -184,6 +184,10 @@ class ImagicStableDiffusionPipeline(DiffusionPipeline):
|
||||
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
|
||||
(nsfw) content, according to the `safety_checker`.
|
||||
"""
|
||||
message = "Please use `image` instead of `init_image`."
|
||||
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
|
||||
image = init_image or image
|
||||
|
||||
accelerator = Accelerator(
|
||||
gradient_accumulation_steps=1,
|
||||
mixed_precision="fp16",
|
||||
@@ -342,6 +346,7 @@ class ImagicStableDiffusionPipeline(DiffusionPipeline):
|
||||
return_dict: bool = True,
|
||||
guidance_scale: float = 7.5,
|
||||
eta: float = 0.0,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
Function invoked when calling the pipeline for generation.
|
||||
|
||||
@@ -12,7 +12,7 @@ import diffusers
|
||||
from diffusers import SchedulerMixin, StableDiffusionPipeline
|
||||
from diffusers.models import AutoencoderKL, UNet2DConditionModel
|
||||
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
|
||||
from diffusers.utils import logging
|
||||
from diffusers.utils import deprecate, logging
|
||||
|
||||
|
||||
try:
|
||||
@@ -252,6 +252,7 @@ def get_weighted_text_embeddings(
|
||||
no_boseos_middle: Optional[bool] = False,
|
||||
skip_parsing: Optional[bool] = False,
|
||||
skip_weighting: Optional[bool] = False,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
Prompts can be assigned with local weights using brackets. For example,
|
||||
@@ -681,6 +682,7 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
|
||||
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
||||
is_cancelled_callback: Optional[Callable[[], bool]] = None,
|
||||
callback_steps: int = 1,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
Function invoked when calling the pipeline for generation.
|
||||
@@ -756,6 +758,10 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
|
||||
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
|
||||
(nsfw) content, according to the `safety_checker`.
|
||||
"""
|
||||
message = "Please use `image` instead of `init_image`."
|
||||
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
|
||||
image = init_image or image
|
||||
|
||||
# 0. Default height and width to unet
|
||||
height = height or self.unet.config.sample_size * self.vae_scale_factor
|
||||
width = width or self.unet.config.sample_size * self.vae_scale_factor
|
||||
@@ -878,6 +884,7 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
|
||||
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
||||
is_cancelled_callback: Optional[Callable[[], bool]] = None,
|
||||
callback_steps: int = 1,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
Function for text-to-image generation.
|
||||
@@ -953,6 +960,7 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
|
||||
callback=callback,
|
||||
is_cancelled_callback=is_cancelled_callback,
|
||||
callback_steps=callback_steps,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def img2img(
|
||||
@@ -972,6 +980,7 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
|
||||
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
||||
is_cancelled_callback: Optional[Callable[[], bool]] = None,
|
||||
callback_steps: int = 1,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
Function for image-to-image generation.
|
||||
@@ -1047,6 +1056,7 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
|
||||
callback=callback,
|
||||
is_cancelled_callback=is_cancelled_callback,
|
||||
callback_steps=callback_steps,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def inpaint(
|
||||
@@ -1067,6 +1077,7 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
|
||||
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
||||
is_cancelled_callback: Optional[Callable[[], bool]] = None,
|
||||
callback_steps: int = 1,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
Function for inpaint.
|
||||
@@ -1147,4 +1158,5 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
|
||||
callback=callback,
|
||||
is_cancelled_callback=is_cancelled_callback,
|
||||
callback_steps=callback_steps,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@@ -11,7 +11,7 @@ from transformers import CLIPFeatureExtractor, CLIPTokenizer
|
||||
import diffusers
|
||||
from diffusers import OnnxRuntimeModel, OnnxStableDiffusionPipeline, SchedulerMixin
|
||||
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
|
||||
from diffusers.utils import logging
|
||||
from diffusers.utils import deprecate, logging
|
||||
|
||||
|
||||
try:
|
||||
@@ -744,6 +744,10 @@ class OnnxStableDiffusionLongPromptWeightingPipeline(OnnxStableDiffusionPipeline
|
||||
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
|
||||
(nsfw) content, according to the `safety_checker`.
|
||||
"""
|
||||
message = "Please use `image` instead of `init_image`."
|
||||
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
|
||||
image = init_image or image
|
||||
|
||||
# 0. Default height and width to unet
|
||||
height = height or self.unet.config.sample_size * self.vae_scale_factor
|
||||
width = width or self.unet.config.sample_size * self.vae_scale_factor
|
||||
|
||||
@@ -47,7 +47,7 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.14.0.dev0")
|
||||
check_min_version("0.13.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ from diffusers.utils import check_min_version
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.14.0.dev0")
|
||||
check_min_version("0.13.0")
|
||||
|
||||
# Cache compiled models across invocations of this script.
|
||||
cc.initialize_cache(os.path.expanduser("~/.cache/jax/compilation_cache"))
|
||||
|
||||
@@ -54,7 +54,7 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.14.0.dev0")
|
||||
check_min_version("0.13.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.14.0.dev0")
|
||||
check_min_version("0.13.0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ from diffusers.utils import check_min_version
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.14.0.dev0")
|
||||
check_min_version("0.13.0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.14.0.dev0")
|
||||
check_min_version("0.13.0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
@@ -448,6 +448,19 @@ def main():
|
||||
vae.to(accelerator.device, dtype=weight_dtype)
|
||||
text_encoder.to(accelerator.device, dtype=weight_dtype)
|
||||
|
||||
if args.enable_xformers_memory_efficient_attention:
|
||||
if is_xformers_available():
|
||||
import xformers
|
||||
|
||||
xformers_version = version.parse(xformers.__version__)
|
||||
if xformers_version == version.parse("0.0.16"):
|
||||
logger.warn(
|
||||
"xFormers 0.0.16 cannot be used for training in some GPUs. If you observe problems during training, please update xFormers to at least 0.0.17. See https://huggingface.co/docs/diffusers/main/en/optimization/xformers for more details."
|
||||
)
|
||||
unet.enable_xformers_memory_efficient_attention()
|
||||
else:
|
||||
raise ValueError("xformers is not available. Make sure it is installed correctly")
|
||||
|
||||
# now we will add new LoRA weights to the attention layers
|
||||
# It's important to realize here how many attention weights will be added and of which sizes
|
||||
# The sizes of the attention layers consist only of two different variables:
|
||||
@@ -479,20 +492,6 @@ def main():
|
||||
)
|
||||
|
||||
unet.set_attn_processor(lora_attn_procs)
|
||||
|
||||
if args.enable_xformers_memory_efficient_attention:
|
||||
if is_xformers_available():
|
||||
import xformers
|
||||
|
||||
xformers_version = version.parse(xformers.__version__)
|
||||
if xformers_version == version.parse("0.0.16"):
|
||||
logger.warn(
|
||||
"xFormers 0.0.16 cannot be used for training in some GPUs. If you observe problems during training, please update xFormers to at least 0.0.17. See https://huggingface.co/docs/diffusers/main/en/optimization/xformers for more details."
|
||||
)
|
||||
unet.enable_xformers_memory_efficient_attention()
|
||||
else:
|
||||
raise ValueError("xformers is not available. Make sure it is installed correctly")
|
||||
|
||||
lora_layers = AttnProcsLayers(unet.attn_processors)
|
||||
|
||||
# Enable TF32 for faster training on Ampere GPUs,
|
||||
|
||||
@@ -74,7 +74,7 @@ else:
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.14.0.dev0")
|
||||
check_min_version("0.13.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ else:
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.14.0.dev0")
|
||||
check_min_version("0.13.0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -23,11 +23,11 @@ import diffusers
|
||||
from diffusers import DDPMPipeline, DDPMScheduler, UNet2DModel
|
||||
from diffusers.optimization import get_scheduler
|
||||
from diffusers.training_utils import EMAModel
|
||||
from diffusers.utils import check_min_version, is_accelerate_version, is_tensorboard_available, is_wandb_available
|
||||
from diffusers.utils import check_min_version, is_tensorboard_available, is_wandb_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.14.0.dev0")
|
||||
check_min_version("0.13.0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
@@ -628,13 +628,10 @@ def main(args):
|
||||
images_processed = (images * 255).round().astype("uint8")
|
||||
|
||||
if args.logger == "tensorboard":
|
||||
if is_accelerate_version(">=", "0.17.0.dev0"):
|
||||
tracker = accelerator.get_tracker("tensorboard", unwrap=True)
|
||||
else:
|
||||
tracker = accelerator.get_tracker()
|
||||
tracker.add_images("test_samples", images_processed.transpose(0, 3, 1, 2), epoch)
|
||||
accelerator.get_tracker("tensorboard").add_images(
|
||||
"test_samples", images_processed.transpose(0, 3, 1, 2), epoch
|
||||
)
|
||||
elif args.logger == "wandb":
|
||||
# Upcoming `log_images` helper coming in https://github.com/huggingface/accelerate/pull/962/files
|
||||
accelerator.get_tracker("wandb").log(
|
||||
{"test_samples": [wandb.Image(img) for img in images_processed], "epoch": epoch},
|
||||
step=global_step,
|
||||
|
||||
2
setup.py
2
setup.py
@@ -219,7 +219,7 @@ install_requires = [
|
||||
|
||||
setup(
|
||||
name="diffusers",
|
||||
version="0.14.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="0.13.1", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
description="Diffusers",
|
||||
long_description=open("README.md", "r", encoding="utf-8").read(),
|
||||
long_description_content_type="text/markdown",
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__version__ = "0.14.0.dev0"
|
||||
__version__ = "0.13.1"
|
||||
|
||||
from .configuration_utils import ConfigMixin
|
||||
from .utils import (
|
||||
|
||||
@@ -18,7 +18,6 @@ import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
|
||||
from ..utils import logging
|
||||
from ..utils.import_utils import is_xformers_available
|
||||
from .cross_attention import CrossAttention
|
||||
from .embeddings import CombinedTimestepLabelEmbeddings
|
||||
@@ -30,8 +29,6 @@ if is_xformers_available():
|
||||
else:
|
||||
xformers = None
|
||||
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
|
||||
class AttentionBlock(nn.Module):
|
||||
"""
|
||||
@@ -211,7 +208,6 @@ class BasicTransformerBlock(nn.Module):
|
||||
final_dropout: bool = False,
|
||||
):
|
||||
super().__init__()
|
||||
print(f"Using {activation_fn} as activation_fn in BasicTransformerBlock.")
|
||||
self.only_cross_attention = only_cross_attention
|
||||
|
||||
self.use_ada_layer_norm_zero = (num_embeds_ada_norm is not None) and norm_type == "ada_norm_zero"
|
||||
@@ -357,22 +353,15 @@ class FeedForward(nn.Module):
|
||||
super().__init__()
|
||||
inner_dim = int(dim * mult)
|
||||
dim_out = dim_out if dim_out is not None else dim
|
||||
use_bias = True
|
||||
|
||||
if activation_fn == "gelu":
|
||||
act_fn = GELU(dim, inner_dim)
|
||||
if activation_fn == "gelu-approximate":
|
||||
act_fn = GELU(dim, inner_dim, approximate="tanh")
|
||||
elif activation_fn == "geglu":
|
||||
print("Using GEGLU as the activation function in the FFN.")
|
||||
act_fn = GEGLU(dim, inner_dim)
|
||||
elif activation_fn == "geglu-approximate":
|
||||
act_fn = ApproximateGELU(dim, inner_dim)
|
||||
elif activation_fn == "swiglu":
|
||||
print("Using SwiGLU as the activation function in the FFN.")
|
||||
inner_dim = int(2 * dim_out / 3)
|
||||
act_fn = SwiGLU(dim, inner_dim)
|
||||
use_bias = False
|
||||
|
||||
self.net = nn.ModuleList([])
|
||||
# project in
|
||||
@@ -380,7 +369,7 @@ class FeedForward(nn.Module):
|
||||
# project dropout
|
||||
self.net.append(nn.Dropout(dropout))
|
||||
# project out
|
||||
self.net.append(nn.Linear(inner_dim, dim_out, bias=use_bias))
|
||||
self.net.append(nn.Linear(inner_dim, dim_out))
|
||||
# FF as used in Vision Transformer, MLP-Mixer, etc. have a final dropout
|
||||
if final_dropout:
|
||||
self.net.append(nn.Dropout(dropout))
|
||||
@@ -453,22 +442,6 @@ class ApproximateGELU(nn.Module):
|
||||
return x * torch.sigmoid(1.702 * x)
|
||||
|
||||
|
||||
class SwiGLU(nn.Module):
|
||||
"""
|
||||
GEGLU-like that uses SiLU instead of GELU on the gates. SwiGLU is used in works like PaLM.
|
||||
|
||||
Reference: https://arxiv.org/abs/2002.05202
|
||||
"""
|
||||
|
||||
def __init__(self, dim_in: int, dim_out: int):
|
||||
super().__init__()
|
||||
self.w1 = nn.Linear(dim_in, dim_out, bias=False)
|
||||
self.w3 = nn.Linear(dim_in, dim_out, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
return F.silu(self.w1(x)) * self.w3(x)
|
||||
|
||||
|
||||
class AdaLayerNorm(nn.Module):
|
||||
"""
|
||||
Norm layer modified to incorporate timestep embeddings.
|
||||
|
||||
@@ -104,7 +104,6 @@ class Transformer2DModel(ModelMixin, ConfigMixin):
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.attention_head_dim = attention_head_dim
|
||||
inner_dim = num_attention_heads * attention_head_dim
|
||||
print(f"Using {activation_fn} as activation_fn in Transformer2DModel.")
|
||||
|
||||
# 1. Transformer2DModel can process both standard continous images of shape `(batch_size, num_channels, width, height)` as well as quantized image embeddings of shape `(batch_size, num_image_vectors)`
|
||||
# Define whether input is continuous or discrete depending on configuration
|
||||
|
||||
@@ -42,7 +42,6 @@ def get_down_block(
|
||||
only_cross_attention=False,
|
||||
upcast_attention=False,
|
||||
resnet_time_scale_shift="default",
|
||||
ff_activation_fn="geglu",
|
||||
):
|
||||
down_block_type = down_block_type[7:] if down_block_type.startswith("UNetRes") else down_block_type
|
||||
if down_block_type == "DownBlock2D":
|
||||
@@ -104,7 +103,6 @@ def get_down_block(
|
||||
only_cross_attention=only_cross_attention,
|
||||
upcast_attention=upcast_attention,
|
||||
resnet_time_scale_shift=resnet_time_scale_shift,
|
||||
ff_activation_fn=ff_activation_fn,
|
||||
)
|
||||
elif down_block_type == "SimpleCrossAttnDownBlock2D":
|
||||
if cross_attention_dim is None:
|
||||
@@ -216,7 +214,6 @@ def get_up_block(
|
||||
only_cross_attention=False,
|
||||
upcast_attention=False,
|
||||
resnet_time_scale_shift="default",
|
||||
ff_activation_fn="geglu",
|
||||
):
|
||||
up_block_type = up_block_type[7:] if up_block_type.startswith("UNetRes") else up_block_type
|
||||
if up_block_type == "UpBlock2D":
|
||||
@@ -265,7 +262,6 @@ def get_up_block(
|
||||
only_cross_attention=only_cross_attention,
|
||||
upcast_attention=upcast_attention,
|
||||
resnet_time_scale_shift=resnet_time_scale_shift,
|
||||
ff_activation_fn=ff_activation_fn,
|
||||
)
|
||||
elif up_block_type == "SimpleCrossAttnUpBlock2D":
|
||||
if cross_attention_dim is None:
|
||||
@@ -469,10 +465,8 @@ class UNetMidBlock2DCrossAttn(nn.Module):
|
||||
dual_cross_attention=False,
|
||||
use_linear_projection=False,
|
||||
upcast_attention=False,
|
||||
ff_activation_fn="geglu",
|
||||
):
|
||||
super().__init__()
|
||||
print(f"Using {ff_activation_fn} as ff_activation_fn in UNetMidBlock2DCrossAttn")
|
||||
|
||||
self.has_cross_attention = True
|
||||
self.attn_num_head_channels = attn_num_head_channels
|
||||
@@ -507,7 +501,6 @@ class UNetMidBlock2DCrossAttn(nn.Module):
|
||||
norm_num_groups=resnet_groups,
|
||||
use_linear_projection=use_linear_projection,
|
||||
upcast_attention=upcast_attention,
|
||||
activation_fn=ff_activation_fn,
|
||||
)
|
||||
)
|
||||
else:
|
||||
@@ -519,7 +512,6 @@ class UNetMidBlock2DCrossAttn(nn.Module):
|
||||
num_layers=1,
|
||||
cross_attention_dim=cross_attention_dim,
|
||||
norm_num_groups=resnet_groups,
|
||||
activation_fn=ff_activation_fn,
|
||||
)
|
||||
)
|
||||
resnets.append(
|
||||
@@ -750,7 +742,6 @@ class CrossAttnDownBlock2D(nn.Module):
|
||||
use_linear_projection=False,
|
||||
only_cross_attention=False,
|
||||
upcast_attention=False,
|
||||
ff_activation_fn="geglu",
|
||||
):
|
||||
super().__init__()
|
||||
resnets = []
|
||||
@@ -787,7 +778,6 @@ class CrossAttnDownBlock2D(nn.Module):
|
||||
use_linear_projection=use_linear_projection,
|
||||
only_cross_attention=only_cross_attention,
|
||||
upcast_attention=upcast_attention,
|
||||
activation_fn=ff_activation_fn,
|
||||
)
|
||||
)
|
||||
else:
|
||||
@@ -799,7 +789,6 @@ class CrossAttnDownBlock2D(nn.Module):
|
||||
num_layers=1,
|
||||
cross_attention_dim=cross_attention_dim,
|
||||
norm_num_groups=resnet_groups,
|
||||
activation_fn=ff_activation_fn,
|
||||
)
|
||||
)
|
||||
self.attentions = nn.ModuleList(attentions)
|
||||
@@ -1723,7 +1712,6 @@ class CrossAttnUpBlock2D(nn.Module):
|
||||
use_linear_projection=False,
|
||||
only_cross_attention=False,
|
||||
upcast_attention=False,
|
||||
ff_activation_fn="geglu",
|
||||
):
|
||||
super().__init__()
|
||||
resnets = []
|
||||
@@ -1762,7 +1750,6 @@ class CrossAttnUpBlock2D(nn.Module):
|
||||
use_linear_projection=use_linear_projection,
|
||||
only_cross_attention=only_cross_attention,
|
||||
upcast_attention=upcast_attention,
|
||||
activation_fn=ff_activation_fn,
|
||||
)
|
||||
)
|
||||
else:
|
||||
@@ -1774,7 +1761,6 @@ class CrossAttnUpBlock2D(nn.Module):
|
||||
num_layers=1,
|
||||
cross_attention_dim=cross_attention_dim,
|
||||
norm_num_groups=resnet_groups,
|
||||
activation_fn=ff_activation_fn,
|
||||
)
|
||||
)
|
||||
self.attentions = nn.ModuleList(attentions)
|
||||
|
||||
@@ -148,10 +148,8 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
|
||||
conv_in_kernel: int = 3,
|
||||
conv_out_kernel: int = 3,
|
||||
projection_class_embeddings_input_dim: Optional[int] = None,
|
||||
ff_activation_fn="geglu",
|
||||
):
|
||||
super().__init__()
|
||||
print(f"Using {ff_activation_fn} as ff_activation_fn in UNet2DConditionModel")
|
||||
|
||||
self.sample_size = sample_size
|
||||
|
||||
@@ -266,7 +264,6 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
|
||||
only_cross_attention=only_cross_attention[i],
|
||||
upcast_attention=upcast_attention,
|
||||
resnet_time_scale_shift=resnet_time_scale_shift,
|
||||
ff_activation_fn=ff_activation_fn,
|
||||
)
|
||||
self.down_blocks.append(down_block)
|
||||
|
||||
@@ -285,7 +282,6 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
|
||||
dual_cross_attention=dual_cross_attention,
|
||||
use_linear_projection=use_linear_projection,
|
||||
upcast_attention=upcast_attention,
|
||||
ff_activation_fn=ff_activation_fn,
|
||||
)
|
||||
elif mid_block_type == "UNetMidBlock2DSimpleCrossAttn":
|
||||
self.mid_block = UNetMidBlock2DSimpleCrossAttn(
|
||||
@@ -345,7 +341,6 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
|
||||
only_cross_attention=only_cross_attention[i],
|
||||
upcast_attention=upcast_attention,
|
||||
resnet_time_scale_shift=resnet_time_scale_shift,
|
||||
ff_activation_fn=ff_activation_fn,
|
||||
)
|
||||
self.up_blocks.append(up_block)
|
||||
prev_output_channel = output_channel
|
||||
|
||||
@@ -561,6 +561,7 @@ class AltDiffusionImg2ImgPipeline(DiffusionPipeline):
|
||||
return_dict: bool = True,
|
||||
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
||||
callback_steps: int = 1,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
Function invoked when calling the pipeline for generation.
|
||||
@@ -627,6 +628,10 @@ class AltDiffusionImg2ImgPipeline(DiffusionPipeline):
|
||||
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
|
||||
(nsfw) content, according to the `safety_checker`.
|
||||
"""
|
||||
message = "Please use `image` instead of `init_image`."
|
||||
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
|
||||
image = init_image or image
|
||||
|
||||
# 1. Check inputs. Raise error if not correct
|
||||
self.check_inputs(prompt, strength, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds)
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ from ...schedulers import (
|
||||
LMSDiscreteScheduler,
|
||||
PNDMScheduler,
|
||||
)
|
||||
from ...utils import PIL_INTERPOLATION, randn_tensor
|
||||
from ...utils import PIL_INTERPOLATION, deprecate, randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||
|
||||
|
||||
@@ -72,6 +72,7 @@ class LDMSuperResolutionPipeline(DiffusionPipeline):
|
||||
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
||||
output_type: Optional[str] = "pil",
|
||||
return_dict: bool = True,
|
||||
**kwargs,
|
||||
) -> Union[Tuple, ImagePipelineOutput]:
|
||||
r"""
|
||||
Args:
|
||||
@@ -99,6 +100,10 @@ class LDMSuperResolutionPipeline(DiffusionPipeline):
|
||||
[`~pipelines.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if `return_dict` is
|
||||
True, otherwise a `tuple. When returning a tuple, the first element is a list with the generated images.
|
||||
"""
|
||||
message = "Please use `image` instead of `init_image`."
|
||||
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
|
||||
image = init_image or image
|
||||
|
||||
if isinstance(image, PIL.Image.Image):
|
||||
batch_size = 1
|
||||
elif isinstance(image, torch.Tensor):
|
||||
|
||||
@@ -582,6 +582,7 @@ class CycleDiffusionPipeline(DiffusionPipeline):
|
||||
return_dict: bool = True,
|
||||
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
||||
callback_steps: int = 1,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
Function invoked when calling the pipeline for generation.
|
||||
@@ -645,6 +646,10 @@ class CycleDiffusionPipeline(DiffusionPipeline):
|
||||
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
|
||||
(nsfw) content, according to the `safety_checker`.
|
||||
"""
|
||||
message = "Please use `image` instead of `init_image`."
|
||||
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
|
||||
image = init_image or image
|
||||
|
||||
# 1. Check inputs
|
||||
self.check_inputs(prompt, strength, callback_steps)
|
||||
|
||||
|
||||
@@ -253,6 +253,7 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
|
||||
return_dict: bool = True,
|
||||
callback: Optional[Callable[[int, int, np.ndarray], None]] = None,
|
||||
callback_steps: int = 1,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
Function invoked when calling the pipeline for generation.
|
||||
@@ -308,6 +309,10 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
|
||||
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
|
||||
(nsfw) content, according to the `safety_checker`.
|
||||
"""
|
||||
message = "Please use `image` instead of `init_image`."
|
||||
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
|
||||
image = init_image or image
|
||||
|
||||
if isinstance(prompt, str):
|
||||
batch_size = 1
|
||||
elif isinstance(prompt, list):
|
||||
|
||||
@@ -240,6 +240,7 @@ class OnnxStableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
|
||||
return_dict: bool = True,
|
||||
callback: Optional[Callable[[int, int, np.ndarray], None]] = None,
|
||||
callback_steps: int = 1,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
Function invoked when calling the pipeline for generation.
|
||||
@@ -300,6 +301,10 @@ class OnnxStableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
|
||||
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
|
||||
(nsfw) content, according to the `safety_checker`.
|
||||
"""
|
||||
message = "Please use `image` instead of `init_image`."
|
||||
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
|
||||
image = init_image or image
|
||||
|
||||
if isinstance(prompt, str):
|
||||
batch_size = 1
|
||||
elif isinstance(prompt, list):
|
||||
|
||||
@@ -587,26 +587,13 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline):
|
||||
(nsfw) content, according to the `safety_checker`.
|
||||
"""
|
||||
# 1. Check inputs
|
||||
self.check_inputs(
|
||||
prompt,
|
||||
strength,
|
||||
callback_steps,
|
||||
negative_prompt=negative_prompt,
|
||||
prompt_embeds=prompt_embeds,
|
||||
negative_prompt_embeds=negative_prompt_embeds,
|
||||
)
|
||||
self.check_inputs(prompt, strength, callback_steps)
|
||||
|
||||
if image is None:
|
||||
raise ValueError("`image` input cannot be undefined.")
|
||||
|
||||
# 2. Define call parameters
|
||||
if prompt is not None and isinstance(prompt, str):
|
||||
batch_size = 1
|
||||
elif prompt is not None and isinstance(prompt, list):
|
||||
batch_size = len(prompt)
|
||||
else:
|
||||
batch_size = prompt_embeds.shape[0]
|
||||
|
||||
batch_size = 1 if isinstance(prompt, str) else len(prompt)
|
||||
device = self._execution_device
|
||||
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
||||
# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
|
||||
|
||||
@@ -572,6 +572,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):
|
||||
return_dict: bool = True,
|
||||
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
||||
callback_steps: int = 1,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
Function invoked when calling the pipeline for generation.
|
||||
@@ -638,6 +639,10 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):
|
||||
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
|
||||
(nsfw) content, according to the `safety_checker`.
|
||||
"""
|
||||
message = "Please use `image` instead of `init_image`."
|
||||
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
|
||||
image = init_image or image
|
||||
|
||||
# 1. Check inputs. Raise error if not correct
|
||||
self.check_inputs(prompt, strength, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds)
|
||||
|
||||
|
||||
@@ -530,6 +530,7 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
|
||||
return_dict: bool = True,
|
||||
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
||||
callback_steps: int = 1,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
Function invoked when calling the pipeline for generation.
|
||||
@@ -602,6 +603,10 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
|
||||
list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
|
||||
(nsfw) content, according to the `safety_checker`.
|
||||
"""
|
||||
message = "Please use `image` instead of `init_image`."
|
||||
init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
|
||||
image = init_image or image
|
||||
|
||||
# 1. Check inputs
|
||||
self.check_inputs(prompt, strength, callback_steps)
|
||||
|
||||
|
||||
@@ -66,10 +66,6 @@ class UNet1DModelTests(ModelTesterMixin, unittest.TestCase):
|
||||
def test_from_save_pretrained(self):
|
||||
super().test_from_save_pretrained()
|
||||
|
||||
@unittest.skipIf(torch_device == "mps", "mish op not supported in MPS")
|
||||
def test_from_save_pretrained_variant(self):
|
||||
super().test_from_save_pretrained_variant()
|
||||
|
||||
@unittest.skipIf(torch_device == "mps", "mish op not supported in MPS")
|
||||
def test_model_from_pretrained(self):
|
||||
super().test_model_from_pretrained()
|
||||
@@ -190,10 +186,6 @@ class UNetRLModelTests(ModelTesterMixin, unittest.TestCase):
|
||||
def test_from_save_pretrained(self):
|
||||
super().test_from_save_pretrained()
|
||||
|
||||
@unittest.skipIf(torch_device == "mps", "mish op not supported in MPS")
|
||||
def test_from_save_pretrained_variant(self):
|
||||
super().test_from_save_pretrained_variant()
|
||||
|
||||
@unittest.skipIf(torch_device == "mps", "mish op not supported in MPS")
|
||||
def test_model_from_pretrained(self):
|
||||
super().test_model_from_pretrained()
|
||||
|
||||
@@ -30,7 +30,7 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils import slow, torch_device
|
||||
from diffusers.utils.testing_utils import require_torch_gpu, skip_mps
|
||||
from diffusers.utils.testing_utils import require_torch_gpu
|
||||
|
||||
from ...test_pipelines_common import PipelineTesterMixin
|
||||
|
||||
@@ -38,7 +38,6 @@ from ...test_pipelines_common import PipelineTesterMixin
|
||||
torch.backends.cuda.matmul.allow_tf32 = False
|
||||
|
||||
|
||||
@skip_mps
|
||||
class StableDiffusionPanoramaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
pipeline_class = StableDiffusionPanoramaPipeline
|
||||
|
||||
|
||||
@@ -189,10 +189,6 @@ class StableUnCLIPPipelineIntegrationTests(unittest.TestCase):
|
||||
pipe = StableUnCLIPPipeline.from_pretrained("fusing/stable-unclip-2-1-l", torch_dtype=torch.float16)
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
# stable unclip will oom when integration tests are run on a V100,
|
||||
# so turn on memory savings
|
||||
pipe.enable_attention_slicing()
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
|
||||
generator = torch.Generator(device="cpu").manual_seed(0)
|
||||
output = pipe("anime turle", generator=generator, output_type="np")
|
||||
|
||||
@@ -185,10 +185,6 @@ class StableUnCLIPImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
)
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
# stable unclip will oom when integration tests are run on a V100,
|
||||
# so turn on memory savings
|
||||
pipe.enable_attention_slicing()
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
|
||||
generator = torch.Generator(device="cpu").manual_seed(0)
|
||||
output = pipe("anime turle", image=input_image, generator=generator, output_type="np")
|
||||
@@ -213,10 +209,6 @@ class StableUnCLIPImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
)
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
# stable unclip will oom when integration tests are run on a V100,
|
||||
# so turn on memory savings
|
||||
pipe.enable_attention_slicing()
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
|
||||
generator = torch.Generator(device="cpu").manual_seed(0)
|
||||
output = pipe("anime turle", image=input_image, generator=generator, output_type="np")
|
||||
|
||||
Reference in New Issue
Block a user