Compare commits

...

1 Commits

Author SHA1 Message Date
Patrick von Platen
f3abacbfde [WIP] Force unmasked area to not change 2023-06-28 15:47:05 +00:00

View File

@@ -650,12 +650,13 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline, TextualInversionLoaderMi
"However, either the image or the noise timestep has not been provided."
)
noise = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
if return_image_latents or (latents is None and not is_strength_max):
image = image.to(device=device, dtype=dtype)
image_latents = self._encode_vae_image(image=image, generator=generator)
if latents is None:
noise = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
# if strength is 1. then initialise the latents to noise, else initial to image + noise
latents = noise if is_strength_max else self.scheduler.add_noise(image_latents, noise, timestep)
# if pure noise then scale the initial latents by the Scheduler's init sigma
@@ -762,6 +763,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline, TextualInversionLoaderMi
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
force_unmasked_unchanged: Optional[bool] = None,
):
r"""
Function invoked when calling the pipeline for generation.
@@ -944,7 +946,16 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline, TextualInversionLoaderMi
# 6. Prepare latent variables
num_channels_latents = self.vae.config.latent_channels
num_channels_unet = self.unet.config.in_channels
return_image_latents = num_channels_unet == 4
return_image_latents = num_channels_unet == 4 or force_unmasked_unchanged
if num_channels_unet == 4 and force_unmasked_unchanged is False:
raise ValueError("Cannot set `force_unmasked_unchanged=False` for inpainting if the UNet has only 4 input channels. Either set `force_unmasked_unchanged` to `True` or use a UNet checkpoint that has been trained specifically for inpainting.")
elif num_channels_unet == 4 and force_unmasked_unchanged is None:
# For checkpoints not trained for inpainting the unmasked area should be default not be changed
force_unmasked_unchanged = True
elif num_channels_unet > 4 and force_unmasked_unchanged is None:
# For checkpoints trained for inpainting the unmasked area should be default be changed to ensure more
force_unmasked_unchanged = False
latents_outputs = self.prepare_latents(
batch_size * num_images_per_prompt,
@@ -959,10 +970,10 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline, TextualInversionLoaderMi
timestep=latent_timestep,
is_strength_max=is_strength_max,
return_noise=True,
return_image_latents=return_image_latents,
return_image_latents=force_unmasked_unchanged,
)
if return_image_latents:
if force_unmasked_unchanged:
latents, noise, image_latents = latents_outputs
else:
latents, noise = latents_outputs
@@ -1033,17 +1044,18 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline, TextualInversionLoaderMi
# compute the previous noisy sample x_t -> x_t-1
latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
if num_channels_unet == 4:
init_latents_proper = image_latents[:1]
init_mask = mask[:1]
if i < len(timesteps) - 1:
noise_timestep = timesteps[i + 1]
init_latents_proper = self.scheduler.add_noise(
init_latents_proper, noise, torch.tensor([noise_timestep])
)
if num_channels_unet == 4 and i < len(timesteps) - 1:
# add noise for next timestep
noise_timestep = timesteps[i + 1]
latents = (1 - init_mask) * init_latents_proper + init_mask * latents
init_latents_proper = self.scheduler.add_noise(
image_latents[:1], noise, torch.tensor([noise_timestep])
)
latents = (1 - mask[:1]) * init_latents_proper + mask[:1] * latents
if force_unmasked_unchanged and i == (len(timesteps) - 1):
latents = (1 - mask[:1]) * image_latents[:1] + mask[:1] * latents
# call the callback, if provided
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):