mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-06 20:44:33 +08:00
Compare commits
4 Commits
custom-cod
...
v0.16.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9b14ce397e | ||
|
|
23159f4adb | ||
|
|
4c476e99b5 | ||
|
|
9c876a5915 |
@@ -28,8 +28,8 @@ Our work underscores the potential of larger UNet architectures in the first sta
|
||||
## Usage
|
||||
|
||||
Before you can use IF, you need to accept its usage conditions. To do so:
|
||||
1. Make sure to have a [Hugging Face account](https://huggingface.co/join) and be loggin in
|
||||
2. Accept the license on the model card of [DeepFloyd/IF-I-IF-v1.0](https://huggingface.co/DeepFloyd/IF-I-IF-v1.0) and [DeepFloyd/IF-II-L-v1.0](https://huggingface.co/DeepFloyd/IF-II-L-v1.0)
|
||||
1. Make sure to have a [Hugging Face account](https://huggingface.co/join) and be logged in
|
||||
2. Accept the license on the model card of [DeepFloyd/IF-I-XL-v1.0](https://huggingface.co/DeepFloyd/IF-I-XL-v1.0). Accepting the license on the stage I model card will auto accept for the other IF models.
|
||||
3. Make sure to login locally. Install `huggingface_hub`
|
||||
```sh
|
||||
pip install huggingface_hub --upgrade
|
||||
@@ -62,7 +62,7 @@ The following sections give more in-detail examples of how to use IF. Specifical
|
||||
|
||||
**Available checkpoints**
|
||||
- *Stage-1*
|
||||
- [DeepFloyd/IF-I-IF-v1.0](https://huggingface.co/DeepFloyd/IF-I-IF-v1.0)
|
||||
- [DeepFloyd/IF-I-XL-v1.0](https://huggingface.co/DeepFloyd/IF-I-XL-v1.0)
|
||||
- [DeepFloyd/IF-I-L-v1.0](https://huggingface.co/DeepFloyd/IF-I-L-v1.0)
|
||||
- [DeepFloyd/IF-I-M-v1.0](https://huggingface.co/DeepFloyd/IF-I-M-v1.0)
|
||||
|
||||
@@ -90,7 +90,7 @@ from diffusers.utils import pt_to_pil
|
||||
import torch
|
||||
|
||||
# stage 1
|
||||
stage_1 = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
stage_1 = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
stage_1.enable_model_cpu_offload()
|
||||
|
||||
# stage 2
|
||||
@@ -162,7 +162,7 @@ original_image = Image.open(BytesIO(response.content)).convert("RGB")
|
||||
original_image = original_image.resize((768, 512))
|
||||
|
||||
# stage 1
|
||||
stage_1 = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
stage_1 = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
stage_1.enable_model_cpu_offload()
|
||||
|
||||
# stage 2
|
||||
@@ -244,7 +244,7 @@ mask_image = Image.open(BytesIO(response.content))
|
||||
mask_image = mask_image
|
||||
|
||||
# stage 1
|
||||
stage_1 = IFInpaintingPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
stage_1 = IFInpaintingPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
stage_1.enable_model_cpu_offload()
|
||||
|
||||
# stage 2
|
||||
@@ -305,7 +305,7 @@ In addition to being loaded with `from_pretrained`, Pipelines can also be loaded
|
||||
```python
|
||||
from diffusers import IFPipeline, IFSuperResolutionPipeline
|
||||
|
||||
pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0")
|
||||
pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0")
|
||||
pipe_2 = IFSuperResolutionPipeline.from_pretrained("DeepFloyd/IF-II-L-v1.0")
|
||||
|
||||
|
||||
@@ -326,7 +326,7 @@ pipe_2 = IFInpaintingSuperResolutionPipeline(**pipe_2.components)
|
||||
The simplest optimization to run IF faster is to move all model components to the GPU.
|
||||
|
||||
```py
|
||||
pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
pipe.to("cuda")
|
||||
```
|
||||
|
||||
@@ -352,7 +352,7 @@ the input image which also determines how many steps to run in the denoising pro
|
||||
A smaller number will vary the image less but run faster.
|
||||
|
||||
```py
|
||||
pipe = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
pipe = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
pipe.to("cuda")
|
||||
|
||||
image = pipe(image=image, prompt="<prompt>", strength=0.3).images
|
||||
@@ -364,7 +364,7 @@ with IF and it might not give expected results.
|
||||
```py
|
||||
import torch
|
||||
|
||||
pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
pipe.to("cuda")
|
||||
|
||||
pipe.text_encoder = torch.compile(pipe.text_encoder)
|
||||
@@ -378,14 +378,14 @@ When optimizing for GPU memory, we can use the standard diffusers cpu offloading
|
||||
Either the model based CPU offloading,
|
||||
|
||||
```py
|
||||
pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
pipe.enable_model_cpu_offload()
|
||||
```
|
||||
|
||||
or the more aggressive layer based CPU offloading.
|
||||
|
||||
```py
|
||||
pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
```
|
||||
|
||||
@@ -395,13 +395,13 @@ Additionally, T5 can be loaded in 8bit precision
|
||||
from transformers import T5EncoderModel
|
||||
|
||||
text_encoder = T5EncoderModel.from_pretrained(
|
||||
"DeepFloyd/IF-I-IF-v1.0", subfolder="text_encoder", device_map="auto", load_in_8bit=True, variant="8bit"
|
||||
"DeepFloyd/IF-I-XL-v1.0", subfolder="text_encoder", device_map="auto", load_in_8bit=True, variant="8bit"
|
||||
)
|
||||
|
||||
from diffusers import DiffusionPipeline
|
||||
|
||||
pipe = DiffusionPipeline.from_pretrained(
|
||||
"DeepFloyd/IF-I-IF-v1.0",
|
||||
"DeepFloyd/IF-I-XL-v1.0",
|
||||
text_encoder=text_encoder, # pass the previously instantiated 8bit text encoder
|
||||
unet=None,
|
||||
device_map="auto",
|
||||
@@ -422,13 +422,13 @@ from transformers import T5EncoderModel
|
||||
from diffusers.utils import pt_to_pil
|
||||
|
||||
text_encoder = T5EncoderModel.from_pretrained(
|
||||
"DeepFloyd/IF-I-IF-v1.0", subfolder="text_encoder", device_map="auto", load_in_8bit=True, variant="8bit"
|
||||
"DeepFloyd/IF-I-XL-v1.0", subfolder="text_encoder", device_map="auto", load_in_8bit=True, variant="8bit"
|
||||
)
|
||||
|
||||
# text to image
|
||||
|
||||
pipe = DiffusionPipeline.from_pretrained(
|
||||
"DeepFloyd/IF-I-IF-v1.0",
|
||||
"DeepFloyd/IF-I-XL-v1.0",
|
||||
text_encoder=text_encoder, # pass the previously instantiated 8bit text encoder
|
||||
unet=None,
|
||||
device_map="auto",
|
||||
@@ -444,7 +444,7 @@ gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
pipe = IFPipeline.from_pretrained(
|
||||
"DeepFloyd/IF-I-IF-v1.0", text_encoder=None, variant="fp16", torch_dtype=torch.float16, device_map="auto"
|
||||
"DeepFloyd/IF-I-XL-v1.0", text_encoder=None, variant="fp16", torch_dtype=torch.float16, device_map="auto"
|
||||
)
|
||||
|
||||
generator = torch.Generator().manual_seed(0)
|
||||
|
||||
2
setup.py
2
setup.py
@@ -226,7 +226,7 @@ install_requires = [
|
||||
|
||||
setup(
|
||||
name="diffusers",
|
||||
version="0.16.0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="0.16.1", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
description="Diffusers",
|
||||
long_description=open("README.md", "r", encoding="utf-8").read(),
|
||||
long_description_content_type="text/markdown",
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__version__ = "0.16.0"
|
||||
__version__ = "0.16.1"
|
||||
|
||||
from .configuration_utils import ConfigMixin
|
||||
from .utils import (
|
||||
|
||||
@@ -71,6 +71,7 @@ class AttentionBlock(nn.Module):
|
||||
self.proj_attn = nn.Linear(channels, channels, bias=True)
|
||||
|
||||
self._use_memory_efficient_attention_xformers = False
|
||||
self._use_2_0_attn = True
|
||||
self._attention_op = None
|
||||
|
||||
def reshape_heads_to_batch_dim(self, tensor, merge_head_and_batch=True):
|
||||
@@ -142,9 +143,8 @@ class AttentionBlock(nn.Module):
|
||||
|
||||
scale = 1 / math.sqrt(self.channels / self.num_heads)
|
||||
|
||||
use_torch_2_0_attn = (
|
||||
hasattr(F, "scaled_dot_product_attention") and not self._use_memory_efficient_attention_xformers
|
||||
)
|
||||
_use_2_0_attn = self._use_2_0_attn and not self._use_memory_efficient_attention_xformers
|
||||
use_torch_2_0_attn = hasattr(F, "scaled_dot_product_attention") and _use_2_0_attn
|
||||
|
||||
query_proj = self.reshape_heads_to_batch_dim(query_proj, merge_head_and_batch=not use_torch_2_0_attn)
|
||||
key_proj = self.reshape_heads_to_batch_dim(key_proj, merge_head_and_batch=not use_torch_2_0_attn)
|
||||
|
||||
@@ -41,7 +41,7 @@ EXAMPLE_DOC_STRING = """
|
||||
>>> from diffusers.utils import pt_to_pil
|
||||
>>> import torch
|
||||
|
||||
>>> pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
>>> pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
>>> pipe.enable_model_cpu_offload()
|
||||
|
||||
>>> prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"'
|
||||
|
||||
@@ -70,7 +70,7 @@ EXAMPLE_DOC_STRING = """
|
||||
>>> original_image = original_image.resize((768, 512))
|
||||
|
||||
>>> pipe = IFImg2ImgPipeline.from_pretrained(
|
||||
... "DeepFloyd/IF-I-IF-v1.0",
|
||||
... "DeepFloyd/IF-I-XL-v1.0",
|
||||
... variant="fp16",
|
||||
... torch_dtype=torch.float16,
|
||||
... )
|
||||
|
||||
@@ -73,7 +73,7 @@ EXAMPLE_DOC_STRING = """
|
||||
>>> original_image = original_image.resize((768, 512))
|
||||
|
||||
>>> pipe = IFImg2ImgPipeline.from_pretrained(
|
||||
... "DeepFloyd/IF-I-IF-v1.0",
|
||||
... "DeepFloyd/IF-I-XL-v1.0",
|
||||
... variant="fp16",
|
||||
... torch_dtype=torch.float16,
|
||||
... )
|
||||
|
||||
@@ -76,7 +76,7 @@ EXAMPLE_DOC_STRING = """
|
||||
>>> mask_image = mask_image
|
||||
|
||||
>>> pipe = IFInpaintingPipeline.from_pretrained(
|
||||
... "DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16
|
||||
... "DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16
|
||||
... )
|
||||
>>> pipe.enable_model_cpu_offload()
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ EXAMPLE_DOC_STRING = """
|
||||
>>> mask_image = mask_image
|
||||
|
||||
>>> pipe = IFInpaintingPipeline.from_pretrained(
|
||||
... "DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16
|
||||
... "DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16
|
||||
... )
|
||||
>>> pipe.enable_model_cpu_offload()
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ EXAMPLE_DOC_STRING = """
|
||||
>>> from diffusers.utils import pt_to_pil
|
||||
>>> import torch
|
||||
|
||||
>>> pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
>>> pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
>>> pipe.enable_model_cpu_offload()
|
||||
|
||||
>>> prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"'
|
||||
|
||||
@@ -267,7 +267,7 @@ def get_cached_module_file(
|
||||
|
||||
# retrieve github version that matches
|
||||
if revision is None:
|
||||
revision = latest_version if latest_version in available_versions else "main"
|
||||
revision = latest_version if latest_version[1:] in available_versions else "main"
|
||||
logger.info(f"Defaulting to latest_version: {revision}.")
|
||||
elif revision in available_versions:
|
||||
revision = f"v{revision}"
|
||||
|
||||
@@ -94,7 +94,7 @@ class IFPipelineSlowTests(unittest.TestCase):
|
||||
def test_all(self):
|
||||
# if
|
||||
|
||||
pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
|
||||
|
||||
pipe_2 = IFSuperResolutionPipeline.from_pretrained(
|
||||
"DeepFloyd/IF-II-L-v1.0", variant="fp16", torch_dtype=torch.float16, text_encoder=None, tokenizer=None
|
||||
|
||||
Reference in New Issue
Block a user