mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-06 20:44:33 +08:00
Compare commits
1 Commits
pr-tests-f
...
force_unma
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f3abacbfde |
@@ -650,12 +650,13 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline, TextualInversionLoaderMi
|
||||
"However, either the image or the noise timestep has not been provided."
|
||||
)
|
||||
|
||||
noise = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
|
||||
if return_image_latents or (latents is None and not is_strength_max):
|
||||
image = image.to(device=device, dtype=dtype)
|
||||
image_latents = self._encode_vae_image(image=image, generator=generator)
|
||||
|
||||
if latents is None:
|
||||
noise = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
# if strength is 1. then initialise the latents to noise, else initial to image + noise
|
||||
latents = noise if is_strength_max else self.scheduler.add_noise(image_latents, noise, timestep)
|
||||
# if pure noise then scale the initial latents by the Scheduler's init sigma
|
||||
@@ -762,6 +763,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline, TextualInversionLoaderMi
|
||||
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
||||
callback_steps: int = 1,
|
||||
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
||||
force_unmasked_unchanged: Optional[bool] = None,
|
||||
):
|
||||
r"""
|
||||
Function invoked when calling the pipeline for generation.
|
||||
@@ -944,7 +946,16 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline, TextualInversionLoaderMi
|
||||
# 6. Prepare latent variables
|
||||
num_channels_latents = self.vae.config.latent_channels
|
||||
num_channels_unet = self.unet.config.in_channels
|
||||
return_image_latents = num_channels_unet == 4
|
||||
|
||||
return_image_latents = num_channels_unet == 4 or force_unmasked_unchanged
|
||||
if num_channels_unet == 4 and force_unmasked_unchanged is False:
|
||||
raise ValueError("Cannot set `force_unmasked_unchanged=False` for inpainting if the UNet has only 4 input channels. Either set `force_unmasked_unchanged` to `True` or use a UNet checkpoint that has been trained specifically for inpainting.")
|
||||
elif num_channels_unet == 4 and force_unmasked_unchanged is None:
|
||||
# For checkpoints not trained for inpainting the unmasked area should be default not be changed
|
||||
force_unmasked_unchanged = True
|
||||
elif num_channels_unet > 4 and force_unmasked_unchanged is None:
|
||||
# For checkpoints trained for inpainting the unmasked area should be default be changed to ensure more
|
||||
force_unmasked_unchanged = False
|
||||
|
||||
latents_outputs = self.prepare_latents(
|
||||
batch_size * num_images_per_prompt,
|
||||
@@ -959,10 +970,10 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline, TextualInversionLoaderMi
|
||||
timestep=latent_timestep,
|
||||
is_strength_max=is_strength_max,
|
||||
return_noise=True,
|
||||
return_image_latents=return_image_latents,
|
||||
return_image_latents=force_unmasked_unchanged,
|
||||
)
|
||||
|
||||
if return_image_latents:
|
||||
if force_unmasked_unchanged:
|
||||
latents, noise, image_latents = latents_outputs
|
||||
else:
|
||||
latents, noise = latents_outputs
|
||||
@@ -1033,17 +1044,18 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline, TextualInversionLoaderMi
|
||||
# compute the previous noisy sample x_t -> x_t-1
|
||||
latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
|
||||
|
||||
if num_channels_unet == 4:
|
||||
init_latents_proper = image_latents[:1]
|
||||
init_mask = mask[:1]
|
||||
|
||||
if i < len(timesteps) - 1:
|
||||
noise_timestep = timesteps[i + 1]
|
||||
init_latents_proper = self.scheduler.add_noise(
|
||||
init_latents_proper, noise, torch.tensor([noise_timestep])
|
||||
)
|
||||
if num_channels_unet == 4 and i < len(timesteps) - 1:
|
||||
# add noise for next timestep
|
||||
noise_timestep = timesteps[i + 1]
|
||||
|
||||
latents = (1 - init_mask) * init_latents_proper + init_mask * latents
|
||||
init_latents_proper = self.scheduler.add_noise(
|
||||
image_latents[:1], noise, torch.tensor([noise_timestep])
|
||||
)
|
||||
latents = (1 - mask[:1]) * init_latents_proper + mask[:1] * latents
|
||||
|
||||
if force_unmasked_unchanged and i == (len(timesteps) - 1):
|
||||
latents = (1 - mask[:1]) * image_latents[:1] + mask[:1] * latents
|
||||
|
||||
# call the callback, if provided
|
||||
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
||||
|
||||
Reference in New Issue
Block a user