Compare commits

...

4 Commits

Author SHA1 Message Date
Patrick von Platen
9b14ce397e Release: v0.16.1 2023-04-28 14:03:50 +02:00
Patrick von Platen
23159f4adb Allow disabling torch 2_0 attention (#3273)
* Allow disabling torch 2_0 attention

* make style

* Update src/diffusers/models/attention.py
2023-04-28 14:01:41 +02:00
Patrick von Platen
4c476e99b5 Fix community pipelines (#3266) 2023-04-28 13:16:57 +02:00
apolinário
9c876a5915 merge conflict 2023-04-28 13:16:33 +02:00
12 changed files with 30 additions and 30 deletions

View File

@@ -28,8 +28,8 @@ Our work underscores the potential of larger UNet architectures in the first sta
## Usage ## Usage
Before you can use IF, you need to accept its usage conditions. To do so: Before you can use IF, you need to accept its usage conditions. To do so:
1. Make sure to have a [Hugging Face account](https://huggingface.co/join) and be loggin in 1. Make sure to have a [Hugging Face account](https://huggingface.co/join) and be logged in
2. Accept the license on the model card of [DeepFloyd/IF-I-IF-v1.0](https://huggingface.co/DeepFloyd/IF-I-IF-v1.0) and [DeepFloyd/IF-II-L-v1.0](https://huggingface.co/DeepFloyd/IF-II-L-v1.0) 2. Accept the license on the model card of [DeepFloyd/IF-I-XL-v1.0](https://huggingface.co/DeepFloyd/IF-I-XL-v1.0). Accepting the license on the stage I model card will auto accept for the other IF models.
3. Make sure to login locally. Install `huggingface_hub` 3. Make sure to login locally. Install `huggingface_hub`
```sh ```sh
pip install huggingface_hub --upgrade pip install huggingface_hub --upgrade
@@ -62,7 +62,7 @@ The following sections give more in-detail examples of how to use IF. Specifical
**Available checkpoints** **Available checkpoints**
- *Stage-1* - *Stage-1*
- [DeepFloyd/IF-I-IF-v1.0](https://huggingface.co/DeepFloyd/IF-I-IF-v1.0) - [DeepFloyd/IF-I-XL-v1.0](https://huggingface.co/DeepFloyd/IF-I-XL-v1.0)
- [DeepFloyd/IF-I-L-v1.0](https://huggingface.co/DeepFloyd/IF-I-L-v1.0) - [DeepFloyd/IF-I-L-v1.0](https://huggingface.co/DeepFloyd/IF-I-L-v1.0)
- [DeepFloyd/IF-I-M-v1.0](https://huggingface.co/DeepFloyd/IF-I-M-v1.0) - [DeepFloyd/IF-I-M-v1.0](https://huggingface.co/DeepFloyd/IF-I-M-v1.0)
@@ -90,7 +90,7 @@ from diffusers.utils import pt_to_pil
import torch import torch
# stage 1 # stage 1
stage_1 = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) stage_1 = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
stage_1.enable_model_cpu_offload() stage_1.enable_model_cpu_offload()
# stage 2 # stage 2
@@ -162,7 +162,7 @@ original_image = Image.open(BytesIO(response.content)).convert("RGB")
original_image = original_image.resize((768, 512)) original_image = original_image.resize((768, 512))
# stage 1 # stage 1
stage_1 = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) stage_1 = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
stage_1.enable_model_cpu_offload() stage_1.enable_model_cpu_offload()
# stage 2 # stage 2
@@ -244,7 +244,7 @@ mask_image = Image.open(BytesIO(response.content))
mask_image = mask_image mask_image = mask_image
# stage 1 # stage 1
stage_1 = IFInpaintingPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) stage_1 = IFInpaintingPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
stage_1.enable_model_cpu_offload() stage_1.enable_model_cpu_offload()
# stage 2 # stage 2
@@ -305,7 +305,7 @@ In addition to being loaded with `from_pretrained`, Pipelines can also be loaded
```python ```python
from diffusers import IFPipeline, IFSuperResolutionPipeline from diffusers import IFPipeline, IFSuperResolutionPipeline
pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0") pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0")
pipe_2 = IFSuperResolutionPipeline.from_pretrained("DeepFloyd/IF-II-L-v1.0") pipe_2 = IFSuperResolutionPipeline.from_pretrained("DeepFloyd/IF-II-L-v1.0")
@@ -326,7 +326,7 @@ pipe_2 = IFInpaintingSuperResolutionPipeline(**pipe_2.components)
The simplest optimization to run IF faster is to move all model components to the GPU. The simplest optimization to run IF faster is to move all model components to the GPU.
```py ```py
pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
pipe.to("cuda") pipe.to("cuda")
``` ```
@@ -352,7 +352,7 @@ the input image which also determines how many steps to run in the denoising pro
A smaller number will vary the image less but run faster. A smaller number will vary the image less but run faster.
```py ```py
pipe = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) pipe = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
pipe.to("cuda") pipe.to("cuda")
image = pipe(image=image, prompt="<prompt>", strength=0.3).images image = pipe(image=image, prompt="<prompt>", strength=0.3).images
@@ -364,7 +364,7 @@ with IF and it might not give expected results.
```py ```py
import torch import torch
pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
pipe.to("cuda") pipe.to("cuda")
pipe.text_encoder = torch.compile(pipe.text_encoder) pipe.text_encoder = torch.compile(pipe.text_encoder)
@@ -378,14 +378,14 @@ When optimizing for GPU memory, we can use the standard diffusers cpu offloading
Either the model based CPU offloading, Either the model based CPU offloading,
```py ```py
pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload()
``` ```
or the more aggressive layer based CPU offloading. or the more aggressive layer based CPU offloading.
```py ```py
pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
pipe.enable_sequential_cpu_offload() pipe.enable_sequential_cpu_offload()
``` ```
@@ -395,13 +395,13 @@ Additionally, T5 can be loaded in 8bit precision
from transformers import T5EncoderModel from transformers import T5EncoderModel
text_encoder = T5EncoderModel.from_pretrained( text_encoder = T5EncoderModel.from_pretrained(
"DeepFloyd/IF-I-IF-v1.0", subfolder="text_encoder", device_map="auto", load_in_8bit=True, variant="8bit" "DeepFloyd/IF-I-XL-v1.0", subfolder="text_encoder", device_map="auto", load_in_8bit=True, variant="8bit"
) )
from diffusers import DiffusionPipeline from diffusers import DiffusionPipeline
pipe = DiffusionPipeline.from_pretrained( pipe = DiffusionPipeline.from_pretrained(
"DeepFloyd/IF-I-IF-v1.0", "DeepFloyd/IF-I-XL-v1.0",
text_encoder=text_encoder, # pass the previously instantiated 8bit text encoder text_encoder=text_encoder, # pass the previously instantiated 8bit text encoder
unet=None, unet=None,
device_map="auto", device_map="auto",
@@ -422,13 +422,13 @@ from transformers import T5EncoderModel
from diffusers.utils import pt_to_pil from diffusers.utils import pt_to_pil
text_encoder = T5EncoderModel.from_pretrained( text_encoder = T5EncoderModel.from_pretrained(
"DeepFloyd/IF-I-IF-v1.0", subfolder="text_encoder", device_map="auto", load_in_8bit=True, variant="8bit" "DeepFloyd/IF-I-XL-v1.0", subfolder="text_encoder", device_map="auto", load_in_8bit=True, variant="8bit"
) )
# text to image # text to image
pipe = DiffusionPipeline.from_pretrained( pipe = DiffusionPipeline.from_pretrained(
"DeepFloyd/IF-I-IF-v1.0", "DeepFloyd/IF-I-XL-v1.0",
text_encoder=text_encoder, # pass the previously instantiated 8bit text encoder text_encoder=text_encoder, # pass the previously instantiated 8bit text encoder
unet=None, unet=None,
device_map="auto", device_map="auto",
@@ -444,7 +444,7 @@ gc.collect()
torch.cuda.empty_cache() torch.cuda.empty_cache()
pipe = IFPipeline.from_pretrained( pipe = IFPipeline.from_pretrained(
"DeepFloyd/IF-I-IF-v1.0", text_encoder=None, variant="fp16", torch_dtype=torch.float16, device_map="auto" "DeepFloyd/IF-I-XL-v1.0", text_encoder=None, variant="fp16", torch_dtype=torch.float16, device_map="auto"
) )
generator = torch.Generator().manual_seed(0) generator = torch.Generator().manual_seed(0)

View File

@@ -226,7 +226,7 @@ install_requires = [
setup( setup(
name="diffusers", name="diffusers",
version="0.16.0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots) version="0.16.1", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
description="Diffusers", description="Diffusers",
long_description=open("README.md", "r", encoding="utf-8").read(), long_description=open("README.md", "r", encoding="utf-8").read(),
long_description_content_type="text/markdown", long_description_content_type="text/markdown",

View File

@@ -1,4 +1,4 @@
__version__ = "0.16.0" __version__ = "0.16.1"
from .configuration_utils import ConfigMixin from .configuration_utils import ConfigMixin
from .utils import ( from .utils import (

View File

@@ -71,6 +71,7 @@ class AttentionBlock(nn.Module):
self.proj_attn = nn.Linear(channels, channels, bias=True) self.proj_attn = nn.Linear(channels, channels, bias=True)
self._use_memory_efficient_attention_xformers = False self._use_memory_efficient_attention_xformers = False
self._use_2_0_attn = True
self._attention_op = None self._attention_op = None
def reshape_heads_to_batch_dim(self, tensor, merge_head_and_batch=True): def reshape_heads_to_batch_dim(self, tensor, merge_head_and_batch=True):
@@ -142,9 +143,8 @@ class AttentionBlock(nn.Module):
scale = 1 / math.sqrt(self.channels / self.num_heads) scale = 1 / math.sqrt(self.channels / self.num_heads)
use_torch_2_0_attn = ( _use_2_0_attn = self._use_2_0_attn and not self._use_memory_efficient_attention_xformers
hasattr(F, "scaled_dot_product_attention") and not self._use_memory_efficient_attention_xformers use_torch_2_0_attn = hasattr(F, "scaled_dot_product_attention") and _use_2_0_attn
)
query_proj = self.reshape_heads_to_batch_dim(query_proj, merge_head_and_batch=not use_torch_2_0_attn) query_proj = self.reshape_heads_to_batch_dim(query_proj, merge_head_and_batch=not use_torch_2_0_attn)
key_proj = self.reshape_heads_to_batch_dim(key_proj, merge_head_and_batch=not use_torch_2_0_attn) key_proj = self.reshape_heads_to_batch_dim(key_proj, merge_head_and_batch=not use_torch_2_0_attn)

View File

@@ -41,7 +41,7 @@ EXAMPLE_DOC_STRING = """
>>> from diffusers.utils import pt_to_pil >>> from diffusers.utils import pt_to_pil
>>> import torch >>> import torch
>>> pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) >>> pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
>>> pipe.enable_model_cpu_offload() >>> pipe.enable_model_cpu_offload()
>>> prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"' >>> prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"'

View File

@@ -70,7 +70,7 @@ EXAMPLE_DOC_STRING = """
>>> original_image = original_image.resize((768, 512)) >>> original_image = original_image.resize((768, 512))
>>> pipe = IFImg2ImgPipeline.from_pretrained( >>> pipe = IFImg2ImgPipeline.from_pretrained(
... "DeepFloyd/IF-I-IF-v1.0", ... "DeepFloyd/IF-I-XL-v1.0",
... variant="fp16", ... variant="fp16",
... torch_dtype=torch.float16, ... torch_dtype=torch.float16,
... ) ... )

View File

@@ -73,7 +73,7 @@ EXAMPLE_DOC_STRING = """
>>> original_image = original_image.resize((768, 512)) >>> original_image = original_image.resize((768, 512))
>>> pipe = IFImg2ImgPipeline.from_pretrained( >>> pipe = IFImg2ImgPipeline.from_pretrained(
... "DeepFloyd/IF-I-IF-v1.0", ... "DeepFloyd/IF-I-XL-v1.0",
... variant="fp16", ... variant="fp16",
... torch_dtype=torch.float16, ... torch_dtype=torch.float16,
... ) ... )

View File

@@ -76,7 +76,7 @@ EXAMPLE_DOC_STRING = """
>>> mask_image = mask_image >>> mask_image = mask_image
>>> pipe = IFInpaintingPipeline.from_pretrained( >>> pipe = IFInpaintingPipeline.from_pretrained(
... "DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16 ... "DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16
... ) ... )
>>> pipe.enable_model_cpu_offload() >>> pipe.enable_model_cpu_offload()

View File

@@ -78,7 +78,7 @@ EXAMPLE_DOC_STRING = """
>>> mask_image = mask_image >>> mask_image = mask_image
>>> pipe = IFInpaintingPipeline.from_pretrained( >>> pipe = IFInpaintingPipeline.from_pretrained(
... "DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16 ... "DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16
... ) ... )
>>> pipe.enable_model_cpu_offload() >>> pipe.enable_model_cpu_offload()

View File

@@ -45,7 +45,7 @@ EXAMPLE_DOC_STRING = """
>>> from diffusers.utils import pt_to_pil >>> from diffusers.utils import pt_to_pil
>>> import torch >>> import torch
>>> pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) >>> pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
>>> pipe.enable_model_cpu_offload() >>> pipe.enable_model_cpu_offload()
>>> prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"' >>> prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"'

View File

@@ -267,7 +267,7 @@ def get_cached_module_file(
# retrieve github version that matches # retrieve github version that matches
if revision is None: if revision is None:
revision = latest_version if latest_version in available_versions else "main" revision = latest_version if latest_version[1:] in available_versions else "main"
logger.info(f"Defaulting to latest_version: {revision}.") logger.info(f"Defaulting to latest_version: {revision}.")
elif revision in available_versions: elif revision in available_versions:
revision = f"v{revision}" revision = f"v{revision}"

View File

@@ -94,7 +94,7 @@ class IFPipelineSlowTests(unittest.TestCase):
def test_all(self): def test_all(self):
# if # if
pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
pipe_2 = IFSuperResolutionPipeline.from_pretrained( pipe_2 = IFSuperResolutionPipeline.from_pretrained(
"DeepFloyd/IF-II-L-v1.0", variant="fp16", torch_dtype=torch.float16, text_encoder=None, tokenizer=None "DeepFloyd/IF-II-L-v1.0", variant="fp16", torch_dtype=torch.float16, text_encoder=None, tokenizer=None