Compare commits

...

5 Commits

Author SHA1 Message Date
Patrick von Platen
249c06c12f Release: v0.22.2 2023-11-07 18:38:28 +01:00
Sayak Paul
0ac7d39830 [PixArt-Alpha] Support non-square images (#5672)
* debug

* support non-square images

* add: test

* fix: test

---------

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
2023-11-07 18:37:59 +01:00
Sayak Paul
d190959deb Make sure DDPM and diffusers can be used without Transformers (#5668)
* fix: import bug

* fix

* fix

* fix import utils for lcm

* fix: pixart alpha init

* Fix

---------

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
2023-11-07 18:37:51 +01:00
Sayak Paul
d5ff8f81b5 [PixArt-Alpha] fix mask_feature so that precomputed embeddings work with a batch size > 1 (#5677)
* fix embeds

* remove todo

* add: test

* better name
2023-11-07 18:37:43 +01:00
Dhruv Nair
b4ca05fc26 Fix Basic Transformer Block (#5683)
* fix

* Update src/diffusers/models/attention.py

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>

---------

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
2023-11-07 18:37:29 +01:00
9 changed files with 172 additions and 18 deletions

View File

@@ -244,7 +244,7 @@ install_requires = [
setup(
name="diffusers",
version="0.22.1", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
version="0.22.2", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
description="State-of-the-art diffusion in PyTorch and JAX.",
long_description=open("README.md", "r", encoding="utf-8").read(),
long_description_content_type="text/markdown",

View File

@@ -1,4 +1,4 @@
__version__ = "0.22.1"
__version__ = "0.22.2"
from typing import TYPE_CHECKING

View File

@@ -2390,7 +2390,7 @@ class LoraLoaderMixin:
def set_adapters_for_text_encoder(
self,
adapter_names: Union[List[str], str],
text_encoder: Optional[PreTrainedModel] = None,
text_encoder: Optional["PreTrainedModel"] = None, # noqa: F821
text_encoder_weights: List[float] = None,
):
"""
@@ -2429,7 +2429,7 @@ class LoraLoaderMixin:
)
set_weights_and_activate_adapters(text_encoder, adapter_names, text_encoder_weights)
def disable_lora_for_text_encoder(self, text_encoder: Optional[PreTrainedModel] = None):
def disable_lora_for_text_encoder(self, text_encoder: Optional["PreTrainedModel"] = None):
"""
Disables the LoRA layers for the text encoder.
@@ -2446,7 +2446,7 @@ class LoraLoaderMixin:
raise ValueError("Text Encoder not found.")
set_adapter_layers(text_encoder, enabled=False)
def enable_lora_for_text_encoder(self, text_encoder: Optional[PreTrainedModel] = None):
def enable_lora_for_text_encoder(self, text_encoder: Optional["PreTrainedModel"] = None):
"""
Enables the LoRA layers for the text encoder.

View File

@@ -287,7 +287,7 @@ class BasicTransformerBlock(nn.Module):
else:
raise ValueError("Incorrect norm")
if self.pos_embed is not None and self.use_ada_layer_norm_single is None:
if self.pos_embed is not None and self.use_ada_layer_norm_single is False:
norm_hidden_states = self.pos_embed(norm_hidden_states)
attn_output = self.attn2(

View File

@@ -339,6 +339,7 @@ class Transformer2DModel(ModelMixin, ConfigMixin):
elif self.is_input_vectorized:
hidden_states = self.latent_image_embedding(hidden_states)
elif self.is_input_patches:
height, width = hidden_states.shape[-2] // self.patch_size, hidden_states.shape[-1] // self.patch_size
hidden_states = self.pos_embed(hidden_states)
if self.adaln_single is not None:
@@ -425,7 +426,8 @@ class Transformer2DModel(ModelMixin, ConfigMixin):
hidden_states = hidden_states.squeeze(1)
# unpatchify
height = width = int(hidden_states.shape[1] ** 0.5)
if self.adaln_single is None:
height = width = int(hidden_states.shape[1] ** 0.5)
hidden_states = hidden_states.reshape(
shape=(-1, height, width, self.patch_size, self.patch_size, self.out_channels)
)

View File

@@ -1,19 +1,40 @@
from typing import TYPE_CHECKING
from ...utils import (
DIFFUSERS_SLOW_IMPORT,
OptionalDependencyNotAvailable,
_LazyModule,
get_objects_from_module,
is_torch_available,
is_transformers_available,
)
_import_structure = {
"pipeline_latent_consistency_img2img": ["LatentConsistencyModelImg2ImgPipeline"],
"pipeline_latent_consistency_text2img": ["LatentConsistencyModelPipeline"],
}
_dummy_objects = {}
_import_structure = {}
if TYPE_CHECKING:
from .pipeline_latent_consistency_img2img import LatentConsistencyModelImg2ImgPipeline
from .pipeline_latent_consistency_text2img import LatentConsistencyModelPipeline
try:
if not (is_transformers_available() and is_torch_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from ...utils import dummy_torch_and_transformers_objects # noqa F403
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
else:
_import_structure["pipeline_latent_consistency_img2img"] = ["LatentConsistencyModelImg2ImgPipeline"]
_import_structure["pipeline_latent_consistency_text2img"] = ["LatentConsistencyModelPipeline"]
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
try:
if not (is_transformers_available() and is_torch_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from ...utils.dummy_torch_and_transformers_objects import *
else:
from .pipeline_latent_consistency_img2img import LatentConsistencyModelImg2ImgPipeline
from .pipeline_latent_consistency_text2img import LatentConsistencyModelPipeline
else:
import sys
@@ -24,3 +45,6 @@ else:
_import_structure,
module_spec=__spec__,
)
for name, value in _dummy_objects.items():
setattr(sys.modules[__name__], name, value)

View File

@@ -1 +1,48 @@
from .pipeline_pixart_alpha import PixArtAlphaPipeline
from typing import TYPE_CHECKING
from ...utils import (
DIFFUSERS_SLOW_IMPORT,
OptionalDependencyNotAvailable,
_LazyModule,
get_objects_from_module,
is_torch_available,
is_transformers_available,
)
_dummy_objects = {}
_import_structure = {}
try:
if not (is_transformers_available() and is_torch_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from ...utils import dummy_torch_and_transformers_objects # noqa F403
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
else:
_import_structure["pipeline_pixart_alpha"] = ["PixArtAlphaPipeline"]
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
try:
if not (is_transformers_available() and is_torch_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from ...utils.dummy_torch_and_transformers_objects import *
else:
from .pipeline_pixart_alpha import PixArtAlphaPipeline
else:
import sys
sys.modules[__name__] = _LazyModule(
__name__,
globals()["__file__"],
_import_structure,
module_spec=__spec__,
)
for name, value in _dummy_objects.items():
setattr(sys.modules[__name__], name, value)

View File

@@ -253,7 +253,7 @@ class PixArtAlphaPipeline(DiffusionPipeline):
negative_prompt_embeds = None
# Perform additional masking.
if mask_feature:
if mask_feature and prompt_embeds is None and negative_prompt_embeds is None:
prompt_embeds = prompt_embeds.unsqueeze(1)
masked_prompt_embeds, keep_indices = self.mask_text_embeddings(prompt_embeds, prompt_embeds_attention_mask)
masked_prompt_embeds = masked_prompt_embeds.squeeze(1)

View File

@@ -174,18 +174,99 @@ class PixArtAlphaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
inputs = self.get_dummy_inputs(device)
image = pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1]
print(torch.from_numpy(image_slice.flatten()))
self.assertEqual(image.shape, (1, 8, 8, 3))
expected_slice = np.array([0.5303, 0.2658, 0.7979, 0.1182, 0.3304, 0.4608, 0.5195, 0.4261, 0.4675])
max_diff = np.abs(image_slice.flatten() - expected_slice).max()
self.assertLessEqual(max_diff, 1e-3)
def test_inference_non_square_images(self):
device = "cpu"
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
pipe.to(device)
pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs(device)
image = pipe(**inputs, height=32, width=48).images
image_slice = image[0, -3:, -3:, -1]
self.assertEqual(image.shape, (1, 32, 48, 3))
expected_slice = np.array([0.3859, 0.2987, 0.2333, 0.5243, 0.6721, 0.4436, 0.5292, 0.5373, 0.4416])
max_diff = np.abs(image_slice.flatten() - expected_slice).max()
self.assertLessEqual(max_diff, 1e-3)
def test_inference_with_embeddings_and_multiple_images(self):
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs(torch_device)
prompt = inputs["prompt"]
generator = inputs["generator"]
num_inference_steps = inputs["num_inference_steps"]
output_type = inputs["output_type"]
prompt_embeds, negative_prompt_embeds = pipe.encode_prompt(prompt)
# inputs with prompt converted to embeddings
inputs = {
"prompt_embeds": prompt_embeds,
"negative_prompt": None,
"negative_prompt_embeds": negative_prompt_embeds,
"generator": generator,
"num_inference_steps": num_inference_steps,
"output_type": output_type,
"num_images_per_prompt": 2,
}
# set all optional components to None
for optional_component in pipe._optional_components:
setattr(pipe, optional_component, None)
output = pipe(**inputs)[0]
with tempfile.TemporaryDirectory() as tmpdir:
pipe.save_pretrained(tmpdir)
pipe_loaded = self.pipeline_class.from_pretrained(tmpdir)
pipe_loaded.to(torch_device)
pipe_loaded.set_progress_bar_config(disable=None)
for optional_component in pipe._optional_components:
self.assertTrue(
getattr(pipe_loaded, optional_component) is None,
f"`{optional_component}` did not stay set to None after loading.",
)
inputs = self.get_dummy_inputs(torch_device)
generator = inputs["generator"]
num_inference_steps = inputs["num_inference_steps"]
output_type = inputs["output_type"]
# inputs with prompt converted to embeddings
inputs = {
"prompt_embeds": prompt_embeds,
"negative_prompt": None,
"negative_prompt_embeds": negative_prompt_embeds,
"generator": generator,
"num_inference_steps": num_inference_steps,
"output_type": output_type,
"num_images_per_prompt": 2,
}
output_loaded = pipe_loaded(**inputs)[0]
max_diff = np.abs(to_np(output) - to_np(output_loaded)).max()
self.assertLess(max_diff, 1e-4)
def test_inference_batch_single_identical(self):
self._test_inference_batch_single_identical(expected_max_diff=1e-3)
# TODO: needs to be updated.
@slow
@require_torch_gpu
class PixArtAlphaPipelineIntegrationTests(unittest.TestCase):