up

Merge branch 'correct_img2img' of https://github.com/huggingface/diffusers into correct_img2img
2026-02-02 00:45:04 +08:00 · 2022-12-10 17:28:02 +00:00 · 2022-12-10 17:10:45 +00:00 · 2022-12-10 17:09:55 +00:00 · 2022-12-10 17:08:53 +00:00 · 2022-12-10 18:05:20 +01:00
5 changed files with 71 additions and 41 deletions
--- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
+++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
@@ -376,14 +376,20 @@ class AltDiffusionImg2ImgPipeline(DiffusionPipeline):

    def get_timesteps(self, num_inference_steps, strength, device):
        # get the original timestep using init_timestep
-        offset = self.scheduler.config.get("steps_offset", 0)
-        init_timestep = int(num_inference_steps * strength) + offset
-        init_timestep = min(init_timestep, num_inference_steps)
+        if not strength < 1.0:
+            raise ValueError(
+                f"strength={strength} is too high for the original image to be taken into account. Make sure that"
+                " strength < 1.0."
+            )
+
+        init_timestep = int(num_inference_steps * strength)
+
+        t_start = num_inference_steps - init_timestep

-        t_start = max(num_inference_steps - init_timestep + offset, 0)
        timesteps = self.scheduler.timesteps[t_start:]
+        latent_timestep = self.scheduler.timesteps[t_start - 1]

-        return timesteps, num_inference_steps - t_start
+        return timesteps, latent_timestep, num_inference_steps - t_start

    def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None):
        image = image.to(device=device, dtype=dtype)
@@ -410,11 +416,11 @@ class AltDiffusionImg2ImgPipeline(DiffusionPipeline):
            init_latents = torch.cat([init_latents] * num_images_per_prompt, dim=0)

        # add noise to latents using the timesteps
+        torch.manual_seed(0)
        noise = torch.randn(init_latents.shape, generator=generator, device=device, dtype=dtype)

        # get latents
-        init_latents = self.scheduler.add_noise(init_latents, noise, timestep)
-        latents = init_latents
+        latents = self.scheduler.add_noise(init_latents, noise, timestep)

        return latents

@@ -517,8 +523,8 @@ class AltDiffusionImg2ImgPipeline(DiffusionPipeline):

        # 5. set timesteps
        self.scheduler.set_timesteps(num_inference_steps, device=device)
-        timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
-        latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
+        timesteps, latent_timestep, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
+        latent_timestep = latent_timestep.repeat(batch_size * num_images_per_prompt)

        # 6. Prepare latent variables
        latents = self.prepare_latents(
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py
@@ -414,14 +414,20 @@ class CycleDiffusionPipeline(DiffusionPipeline):
    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps
    def get_timesteps(self, num_inference_steps, strength, device):
        # get the original timestep using init_timestep
-        offset = self.scheduler.config.get("steps_offset", 0)
-        init_timestep = int(num_inference_steps * strength) + offset
-        init_timestep = min(init_timestep, num_inference_steps)
+        if not strength < 1.0:
+            raise ValueError(
+                f"strength={strength} is too high for the original image to be taken into account. Make sure that"
+                " strength < 1.0."
+            )
+
+        init_timestep = int(num_inference_steps * strength)
+
+        t_start = num_inference_steps - init_timestep

-        t_start = max(num_inference_steps - init_timestep + offset, 0)
        timesteps = self.scheduler.timesteps[t_start:]
+        latent_timestep = self.scheduler.timesteps[t_start - 1]

-        return timesteps, num_inference_steps - t_start
+        return timesteps, latent_timestep, num_inference_steps - t_start

    def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None):
        image = image.to(device=device, dtype=dtype)
@@ -558,8 +564,8 @@ class CycleDiffusionPipeline(DiffusionPipeline):

        # 5. Prepare timesteps
        self.scheduler.set_timesteps(num_inference_steps, device=device)
-        timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
-        latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
+        timesteps, latent_timestep, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
+        latent_timestep = latent_timestep.repeat(batch_size * num_images_per_prompt)

        # 6. Prepare latent variables
        latents, clean_latents = self.prepare_latents(
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
@@ -323,14 +323,20 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline):
    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps
    def get_timesteps(self, num_inference_steps, strength, device):
        # get the original timestep using init_timestep
-        offset = self.scheduler.config.get("steps_offset", 0)
-        init_timestep = int(num_inference_steps * strength) + offset
-        init_timestep = min(init_timestep, num_inference_steps)
+        if not strength < 1.0:
+            raise ValueError(
+                f"strength={strength} is too high for the original image to be taken into account. Make sure that"
+                " strength < 1.0."
+            )
+
+        init_timestep = int(num_inference_steps * strength)
+
+        t_start = num_inference_steps - init_timestep

-        t_start = max(num_inference_steps - init_timestep + offset, 0)
        timesteps = self.scheduler.timesteps[t_start:]
+        latent_timestep = self.scheduler.timesteps[t_start - 1]

-        return timesteps, num_inference_steps - t_start
+        return timesteps, latent_timestep, num_inference_steps - t_start

    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.prepare_latents
    def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None):
@@ -358,11 +364,11 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline):
            init_latents = torch.cat([init_latents] * num_images_per_prompt, dim=0)

        # add noise to latents using the timesteps
+        torch.manual_seed(0)
        noise = torch.randn(init_latents.shape, generator=generator, device=device, dtype=dtype)

        # get latents
-        init_latents = self.scheduler.add_noise(init_latents, noise, timestep)
-        latents = init_latents
+        latents = self.scheduler.add_noise(init_latents, noise, timestep)

        return latents

@@ -514,8 +520,8 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline):

        # 6. set timesteps
        self.scheduler.set_timesteps(num_inference_steps, device=device)
-        timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
-        latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
+        timesteps, latent_timestep, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
+        latent_timestep = latent_timestep.repeat(batch_size * num_images_per_prompt)

        # 7. Prepare latent variables
        latents = self.prepare_latents(
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
@@ -381,14 +381,20 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):

    def get_timesteps(self, num_inference_steps, strength, device):
        # get the original timestep using init_timestep
-        offset = self.scheduler.config.get("steps_offset", 0)
-        init_timestep = int(num_inference_steps * strength) + offset
-        init_timestep = min(init_timestep, num_inference_steps)
+        if not strength < 1.0:
+            raise ValueError(
+                f"strength={strength} is too high for the original image to be taken into account. Make sure that"
+                " strength < 1.0."
+            )
+
+        init_timestep = int(num_inference_steps * strength)
+
+        t_start = num_inference_steps - init_timestep

-        t_start = max(num_inference_steps - init_timestep + offset, 0)
        timesteps = self.scheduler.timesteps[t_start:]
+        latent_timestep = self.scheduler.timesteps[t_start - 1]

-        return timesteps, num_inference_steps - t_start
+        return timesteps, latent_timestep, num_inference_steps - t_start

    def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None):
        image = image.to(device=device, dtype=dtype)
@@ -415,11 +421,11 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):
            init_latents = torch.cat([init_latents] * num_images_per_prompt, dim=0)

        # add noise to latents using the timesteps
+        torch.manual_seed(0)
        noise = torch.randn(init_latents.shape, generator=generator, device=device, dtype=dtype)

        # get latents
-        init_latents = self.scheduler.add_noise(init_latents, noise, timestep)
-        latents = init_latents
+        latents = self.scheduler.add_noise(init_latents, noise, timestep)

        return latents

@@ -522,8 +528,8 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):

        # 5. set timesteps
        self.scheduler.set_timesteps(num_inference_steps, device=device)
-        timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
-        latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
+        timesteps, latent_timestep, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
+        latent_timestep = latent_timestep.repeat(batch_size * num_images_per_prompt)

        # 6. Prepare latent variables
        latents = self.prepare_latents(
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
@@ -396,14 +396,20 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps
    def get_timesteps(self, num_inference_steps, strength, device):
        # get the original timestep using init_timestep
-        offset = self.scheduler.config.get("steps_offset", 0)
-        init_timestep = int(num_inference_steps * strength) + offset
-        init_timestep = min(init_timestep, num_inference_steps)
+        if not strength < 1.0:
+            raise ValueError(
+                f"strength={strength} is too high for the original image to be taken into account. Make sure that"
+                " strength < 1.0."
+            )
+
+        init_timestep = int(num_inference_steps * strength)
+
+        t_start = num_inference_steps - init_timestep

-        t_start = max(num_inference_steps - init_timestep + offset, 0)
        timesteps = self.scheduler.timesteps[t_start:]
+        latent_timestep = self.scheduler.timesteps[t_start - 1]

-        return timesteps, num_inference_steps - t_start
+        return timesteps, latent_timestep, num_inference_steps - t_start

    def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator):
        image = image.to(device=self.device, dtype=dtype)
@@ -528,8 +534,8 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline):

        # 5. set timesteps
        self.scheduler.set_timesteps(num_inference_steps, device=device)
-        timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
-        latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
+        timesteps, latent_timestep, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device)
+        latent_timestep = latent_timestep.repeat(batch_size * num_images_per_prompt)

        # 6. Prepare latent variables
        # encode the init image into latents and scale the latents
Author	SHA1	Message	Date
Patrick von Platen	2b76136885	up	2022-12-10 17:28:02 +00:00
Patrick von Platen	1c0b63c0f6	up	2022-12-10 17:10:45 +00:00
Patrick von Platen	bae06d893f	Merge branch 'correct_img2img' of https://github.com/huggingface/diffusers into correct_img2img	2022-12-10 17:09:55 +00:00
Patrick von Platen	c6598d23d9	upload	2022-12-10 17:08:53 +00:00
Patrick von Platen	1abcd21d90	Apply suggestions from code review	2022-12-10 18:05:20 +01:00
Patrick von Platen	626f86f48b	fix some stuff	2022-12-10 17:03:03 +00:00
Patrick von Platen	53fc6f1b31	Deprecate init image correctl	2022-12-10 15:01:24 +00:00