use max of tensor diff to compare pipeline outputs

2025-12-06 20:44:33 +08:00 · 2023-08-24 09:48:15 +00:00
13 changed files with 85 additions and 177 deletions
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -664,7 +664,6 @@ class SDXLLoraLoaderMixinTests(unittest.TestCase):
                unet_lora_layers=lora_components["unet_lora_layers"],
                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
-                safe_serialization=False,
            )
            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin")))
            sd_pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))
--- a/tests/models/test_modeling_common.py
+++ b/tests/models/test_modeling_common.py
@@ -221,7 +221,7 @@ class ModelTesterMixin:
            if isinstance(new_image, dict):
                new_image = new_image.to_tuple()[0]

-        max_diff = (image - new_image).abs().sum().item()
+        max_diff = (image - new_image).abs().max().item()
        self.assertLessEqual(max_diff, 5e-5, "Models give different forward passes")

    def test_getattr_is_correct(self):
@@ -351,7 +351,7 @@ class ModelTesterMixin:
            if isinstance(new_image, dict):
                new_image = new_image.to_tuple()[0]

-        max_diff = (image - new_image).abs().sum().item()
+        max_diff = (image - new_image).abs().max().item()
        self.assertLessEqual(max_diff, 5e-5, "Models give different forward passes")

    @require_torch_2
--- a/tests/models/test_models_unet_2d.py
+++ b/tests/models/test_models_unet_2d.py
@@ -137,7 +137,7 @@ class UNetLDMModelTests(ModelTesterMixin, UNetTesterMixin, unittest.TestCase):
            model_accelerate.config.in_channels,
            model_accelerate.config.sample_size,
            model_accelerate.config.sample_size,
-            generator=torch.manual_seed(0),
+            generator=torch.Generator("cpu").manual_seed(0),
        )
        noise = noise.to(torch_device)
        time_step = torch.tensor([10] * noise.shape[0]).to(torch_device)
@@ -263,7 +263,7 @@ class NCSNppModelTests(ModelTesterMixin, UNetTesterMixin, unittest.TestCase):

        output_slice = output[0, -3:, -3:, -1].flatten().cpu()
        # fmt: off
-        expected_output_slice = torch.tensor([-4842.8691, -6499.6631, -3800.1953, -7978.2686, -10980.7129, -20028.8535, 8148.2822, 2342.2905, 567.7608])
+        expected_output_slice = torch.tensor([-4836.2178, -6487.1470, -3816.8196, -7964.9302, -10966.3037, -20043.5957, 8137.0513, 2340.3328, 544.6056])
        # fmt: on

        self.assertTrue(torch_all_close(output_slice, expected_output_slice, rtol=1e-2))
--- a/tests/models/test_models_unet_2d_condition.py
+++ b/tests/models/test_models_unet_2d_condition.py
@@ -726,8 +726,8 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
            model.disable_xformers_memory_efficient_attention()
            off_sample = model(**inputs_dict).sample

-        assert (sample - on_sample).abs().max() < 1e-4
-        assert (sample - off_sample).abs().max() < 1e-4
+        assert (sample - on_sample).abs().max() <= 5e-4
+        assert (sample - off_sample).abs().max() <= 5e-4

    def test_custom_diffusion_processors(self):
        # enable deterministic behavior for gradient checkpointing
--- a/tests/models/test_models_vae.py
+++ b/tests/models/test_models_vae.py
@@ -285,23 +285,6 @@ class AutoencoderTinyIntegrationTests(unittest.TestCase):
        model.to(torch_device).eval()
        return model

-    @parameterized.expand(
-        [
-            [(1, 4, 73, 97), (1, 3, 584, 776)],
-            [(1, 4, 97, 73), (1, 3, 776, 584)],
-            [(1, 4, 49, 65), (1, 3, 392, 520)],
-            [(1, 4, 65, 49), (1, 3, 520, 392)],
-            [(1, 4, 49, 49), (1, 3, 392, 392)],
-        ]
-    )
-    def test_tae_tiling(self, in_shape, out_shape):
-        model = self.get_sd_vae_model()
-        model.enable_tiling()
-        with torch.no_grad():
-            zeros = torch.zeros(in_shape).to(torch_device)
-            dec = model.decode(zeros).sample
-            assert dec.shape == out_shape
-
    def test_stable_diffusion(self):
        model = self.get_sd_vae_model()
        image = self.get_sd_image(seed=33)
@@ -312,32 +295,10 @@ class AutoencoderTinyIntegrationTests(unittest.TestCase):
        assert sample.shape == image.shape

        output_slice = sample[-1, -2:, -2:, :2].flatten().float().cpu()
-        expected_output_slice = torch.tensor([0.0093, 0.6385, -0.1274, 0.1631, -0.1762, 0.5232, -0.3108, -0.0382])
+        expected_output_slice = torch.tensor([0.9858, 0.9262, 0.8629, 1.0974, -0.091, -0.2485, 0.0936, 0.0604])

        assert torch_all_close(output_slice, expected_output_slice, atol=3e-3)

-    @parameterized.expand([(True,), (False,)])
-    def test_tae_roundtrip(self, enable_tiling):
-        # load the autoencoder
-        model = self.get_sd_vae_model()
-        if enable_tiling:
-            model.enable_tiling()
-
-        # make a black image with a white square in the middle,
-        # which is large enough to split across multiple tiles
-        image = -torch.ones(1, 3, 1024, 1024, device=torch_device)
-        image[..., 256:768, 256:768] = 1.0
-
-        # round-trip the image through the autoencoder
-        with torch.no_grad():
-            sample = model(image).sample
-
-        # the autoencoder reconstruction should match original image, sorta
-        def downscale(x):
-            return torch.nn.functional.avg_pool2d(x, model.spatial_scale_factor)
-
-        assert torch_all_close(downscale(sample), downscale(image), atol=0.125)
-

@slow
 class AutoencoderKLIntegrationTests(unittest.TestCase):
--- a/tests/pipelines/controlnet/test_controlnet.py
+++ b/tests/pipelines/controlnet/test_controlnet.py
@@ -31,7 +31,7 @@ from diffusers import (
    UNet2DConditionModel,
 )
 from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_controlnet import MultiControlNetModel
-from diffusers.utils import load_image, load_numpy, randn_tensor, slow, torch_device
+from diffusers.utils import load_image, load_numpy, nightly, randn_tensor, slow, torch_device
 from diffusers.utils.import_utils import is_xformers_available
 from diffusers.utils.testing_utils import (
    enable_full_determinism,
@@ -925,42 +925,6 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
        expected_slice = np.array([0.1338, 0.1597, 0.1202, 0.1687, 0.1377, 0.1017, 0.2070, 0.1574, 0.1348])
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

-    def test_load_local(self):
-        controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny")
-        pipe_1 = StableDiffusionControlNetPipeline.from_pretrained(
-            "runwayml/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
-        )
-
-        controlnet = ControlNetModel.from_single_file(
-            "https://huggingface.co/lllyasviel/ControlNet-v1-1/blob/main/control_v11p_sd15_canny.pth"
-        )
-        pipe_2 = StableDiffusionControlNetPipeline.from_single_file(
-            "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors",
-            safety_checker=None,
-            controlnet=controlnet,
-        )
-        pipes = [pipe_1, pipe_2]
-        images = []
-
-        for pipe in pipes:
-            pipe.enable_model_cpu_offload()
-            pipe.set_progress_bar_config(disable=None)
-
-            generator = torch.Generator(device="cpu").manual_seed(0)
-            prompt = "bird"
-            image = load_image(
-                "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
-            )
-
-            output = pipe(prompt, image, generator=generator, output_type="np", num_inference_steps=3)
-            images.append(output.images[0])
-
-            del pipe
-            gc.collect()
-            torch.cuda.empty_cache()
-
-        assert np.abs(images[0] - images[1]).sum() < 1e-3
-

@slow
@require_torch_gpu
@@ -1000,3 +964,48 @@ class StableDiffusionMultiControlNetPipelineSlowTests(unittest.TestCase):
        )

        assert np.abs(expected_image - image).max() < 5e-2
+
+
+@nightly
+@require_torch_gpu
+class StableDiffusionMultiControlNetPipelineNightlyTests(unittest.TestCase):
+    def tearDown(self):
+        super().tearDown()
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    def test_load_local(self):
+        controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny")
+        pipe_1 = StableDiffusionControlNetPipeline.from_pretrained(
+            "runwayml/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
+        )
+
+        controlnet = ControlNetModel.from_single_file(
+            "https://huggingface.co/lllyasviel/ControlNet-v1-1/blob/main/control_v11p_sd15_canny.pth"
+        )
+        pipe_2 = StableDiffusionControlNetPipeline.from_single_file(
+            "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors",
+            safety_checker=None,
+            controlnet=controlnet,
+        )
+        pipes = [pipe_1, pipe_2]
+        images = []
+
+        for pipe in pipes:
+            pipe.enable_model_cpu_offload()
+            pipe.set_progress_bar_config(disable=None)
+
+            generator = torch.Generator(device="cpu").manual_seed(0)
+            prompt = "bird"
+            image = load_image(
+                "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
+            )
+
+            output = pipe(prompt, image, generator=generator, output_type="np", num_inference_steps=3)
+            images.append(output.images[0])
+
+            del pipe
+            gc.collect()
+            torch.cuda.empty_cache()
+
+        assert np.abs(images[0] - images[1]).max() < 1e-3
--- a/tests/pipelines/controlnet/test_controlnet_img2img.py
+++ b/tests/pipelines/controlnet/test_controlnet_img2img.py
@@ -33,7 +33,7 @@ from diffusers import (
    UNet2DConditionModel,
 )
 from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_controlnet import MultiControlNetModel
-from diffusers.utils import floats_tensor, load_image, load_numpy, randn_tensor, slow, torch_device
+from diffusers.utils import floats_tensor, load_image, load_numpy, nightly, randn_tensor, slow, torch_device
 from diffusers.utils.import_utils import is_xformers_available
 from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu

@@ -402,6 +402,15 @@ class ControlNetImg2ImgPipelineSlowTests(unittest.TestCase):

        assert np.abs(expected_image - image).max() < 9e-2

+
+@nightly
+@require_torch_gpu
+class ControlNetImg2ImgPipelineNightlyTests(unittest.TestCase):
+    def tearDown(self):
+        super().tearDown()
+        gc.collect()
+        torch.cuda.empty_cache()
+
    def test_load_local(self):
        controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny")
        pipe_1 = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
@@ -446,4 +455,4 @@ class ControlNetImg2ImgPipelineSlowTests(unittest.TestCase):
            gc.collect()
            torch.cuda.empty_cache()

-        assert np.abs(images[0] - images[1]).sum() < 1e-3
+        assert np.abs(images[0] - images[1]).max() < 1e-3
--- a/tests/pipelines/controlnet/test_controlnet_inpaint.py
+++ b/tests/pipelines/controlnet/test_controlnet_inpaint.py
@@ -33,7 +33,7 @@ from diffusers import (
    UNet2DConditionModel,
 )
 from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_controlnet import MultiControlNetModel
-from diffusers.utils import floats_tensor, load_image, load_numpy, randn_tensor, slow, torch_device
+from diffusers.utils import floats_tensor, load_image, load_numpy, nightly, randn_tensor, slow, torch_device
 from diffusers.utils.import_utils import is_xformers_available
 from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu

@@ -544,6 +544,15 @@ class ControlNetInpaintPipelineSlowTests(unittest.TestCase):

        assert np.abs(expected_image - image).max() < 9e-2

+
+@nightly
+@require_torch_gpu
+class ControlNetInpaintPipelineNightlyTests(unittest.TestCase):
+    def tearDown(self):
+        super().tearDown()
+        gc.collect()
+        torch.cuda.empty_cache()
+
    def test_load_local(self):
        controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny")
        pipe_1 = StableDiffusionControlNetInpaintPipeline.from_pretrained(
@@ -593,4 +602,4 @@ class ControlNetInpaintPipelineSlowTests(unittest.TestCase):
            gc.collect()
            torch.cuda.empty_cache()

-        assert np.abs(images[0] - images[1]).sum() < 1e-3
+        assert np.abs(images[0] - images[1]).max() < 1e-3
--- a/tests/pipelines/controlnet/test_controlnet_sdxl.py
+++ b/tests/pipelines/controlnet/test_controlnet_sdxl.py
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import gc
 import unittest

 import numpy as np
@@ -28,9 +27,9 @@ from diffusers import (
    UNet2DConditionModel,
 )
 from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_controlnet import MultiControlNetModel
-from diffusers.utils import load_image, randn_tensor, torch_device
+from diffusers.utils import randn_tensor, torch_device
 from diffusers.utils.import_utils import is_xformers_available
-from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow
+from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu

 from ..pipeline_params import (
    IMAGE_TO_IMAGE_IMAGE_PARAMS,
@@ -679,81 +678,3 @@ class StableDiffusionXLMultiControlNetOneModelPipelineFastTests(

    def test_inference_batch_single_identical(self):
        self._test_inference_batch_single_identical(expected_max_diff=2e-3)
-
-
-@slow
-@require_torch_gpu
-class ControlNetSDXLPipelineSlowTests(unittest.TestCase):
-    def tearDown(self):
-        super().tearDown()
-        gc.collect()
-        torch.cuda.empty_cache()
-
-    def test_canny(self):
-        controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-canny-sdxl-1.0")
-
-        pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet
-        )
-        pipe.enable_sequential_cpu_offload()
-        pipe.set_progress_bar_config(disable=None)
-
-        generator = torch.Generator(device="cpu").manual_seed(0)
-        prompt = "bird"
-        image = load_image(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
-        )
-
-        images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images
-
-        assert images[0].shape == (768, 512, 3)
-
-        original_image = images[0, -3:, -3:, -1].flatten()
-        expected_image = np.array([0.4185, 0.4127, 0.4089, 0.4046, 0.4115, 0.4096, 0.4081, 0.4112, 0.3913])
-        assert np.allclose(original_image, expected_image, atol=1e-04)
-
-    def test_depth(self):
-        controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-depth-sdxl-1.0")
-
-        pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet
-        )
-        pipe.enable_sequential_cpu_offload()
-        pipe.set_progress_bar_config(disable=None)
-
-        generator = torch.Generator(device="cpu").manual_seed(0)
-        prompt = "Stormtrooper's lecture"
-        image = load_image(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/stormtrooper_depth.png"
-        )
-
-        images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images
-
-        assert images[0].shape == (512, 512, 3)
-
-        original_image = images[0, -3:, -3:, -1].flatten()
-        expected_image = np.array([0.4399, 0.5112, 0.5478, 0.4314, 0.472, 0.4823, 0.4647, 0.4957, 0.4853])
-        assert np.allclose(original_image, expected_image, atol=1e-04)
-
-    def test_canny_lora(self):
-        controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-canny-sdxl-1.0")
-
-        pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet
-        )
-        pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors")
-        pipe.enable_sequential_cpu_offload()
-
-        generator = torch.Generator(device="cpu").manual_seed(0)
-        prompt = "corgi"
-        image = load_image(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
-        )
-
-        images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images
-
-        assert images[0].shape == (768, 512, 3)
-
-        original_image = images[0, -3:, -3:, -1].flatten()
-        expected_image = np.array([0.4574, 0.4461, 0.4435, 0.4462, 0.4396, 0.439, 0.4474, 0.4486, 0.4333])
-        assert np.allclose(original_image, expected_image, atol=1e-04)
--- a/tests/pipelines/test_pipelines.py
+++ b/tests/pipelines/test_pipelines.py
@@ -122,7 +122,7 @@ def _test_from_save_pretrained_dynamo(in_queue, out_queue, timeout):
        generator = torch.Generator(device=torch_device).manual_seed(0)
        new_image = new_ddpm(generator=generator, num_inference_steps=5, output_type="numpy").images

-        assert np.abs(image - new_image).sum() < 1e-5, "Models don't give the same forward pass"
+        assert np.abs(image - new_image).max() < 1e-5, "Models don't give the same forward pass"
    except Exception:
        error = f"{traceback.format_exc()}"

@@ -1543,7 +1543,7 @@ class PipelineSlowTests(unittest.TestCase):
        generator = torch.Generator(device=torch_device).manual_seed(0)
        new_image = new_ddpm(generator=generator, num_inference_steps=5, output_type="numpy").images

-        assert np.abs(image - new_image).sum() < 1e-5, "Models don't give the same forward pass"
+        assert np.abs(image - new_image).max() < 1e-5, "Models don't give the same forward pass"

    @require_torch_2
    def test_from_save_pretrained_dynamo(self):
@@ -1568,7 +1568,7 @@ class PipelineSlowTests(unittest.TestCase):
        generator = torch.Generator(device=torch_device).manual_seed(0)
        new_image = ddpm_from_hub(generator=generator, num_inference_steps=5, output_type="numpy").images

-        assert np.abs(image - new_image).sum() < 1e-5, "Models don't give the same forward pass"
+        assert np.abs(image - new_image).max() < 1e-5, "Models don't give the same forward pass"

    def test_from_pretrained_hub_pass_model(self):
        model_path = "google/ddpm-cifar10-32"
@@ -1591,7 +1591,7 @@ class PipelineSlowTests(unittest.TestCase):
        generator = torch.Generator(device=torch_device).manual_seed(0)
        new_image = ddpm_from_hub(generator=generator, num_inference_steps=5, output_type="numpy").images

-        assert np.abs(image - new_image).sum() < 1e-5, "Models don't give the same forward pass"
+        assert np.abs(image - new_image).max() < 1e-5, "Models don't give the same forward pass"

    def test_output_format(self):
        model_path = "google/ddpm-cifar10-32"
@@ -1625,7 +1625,7 @@ class PipelineSlowTests(unittest.TestCase):
        from diffusers import FlaxStableDiffusionPipeline

        with tempfile.TemporaryDirectory() as tmpdirname:
-            pipe_pt.save_pretrained(tmpdirname)
+            pipe_pt.save_pretrained(tmpdirname, safe_serialization=False)

            pipe_flax, params = FlaxStableDiffusionPipeline.from_pretrained(
                tmpdirname, safety_checker=None, from_pt=True
--- a/tests/pipelines/versatile_diffusion/test_versatile_diffusion_dual_guided.py
+++ b/tests/pipelines/versatile_diffusion/test_versatile_diffusion_dual_guided.py
@@ -76,7 +76,7 @@ class VersatileDiffusionDualGuidedPipelineIntegrationTests(unittest.TestCase):
            output_type="numpy",
        ).images

-        assert np.abs(image - new_image).sum() < 1e-5, "Models don't have the same forward pass"
+        assert np.abs(image - new_image).max() < 1e-5, "Models don't have the same forward pass"

    def test_inference_dual_guided(self):
        pipe = VersatileDiffusionDualGuidedPipeline.from_pretrained("shi-labs/versatile-diffusion")
--- a/tests/pipelines/versatile_diffusion/test_versatile_diffusion_mega.py
+++ b/tests/pipelines/versatile_diffusion/test_versatile_diffusion_mega.py
@@ -77,7 +77,7 @@ class VersatileDiffusionMegaPipelineIntegrationTests(unittest.TestCase):
            output_type="numpy",
        ).images

-        assert np.abs(image - new_image).sum() < 1e-5, "Models don't have the same forward pass"
+        assert np.abs(image - new_image).max() < 1e-5, "Models don't have the same forward pass"

    def test_inference_dual_guided_then_text_to_image(self):
        pipe = VersatileDiffusionPipeline.from_pretrained("shi-labs/versatile-diffusion", torch_dtype=torch.float16)
--- a/tests/pipelines/versatile_diffusion/test_versatile_diffusion_text_to_image.py
+++ b/tests/pipelines/versatile_diffusion/test_versatile_diffusion_text_to_image.py
@@ -64,7 +64,7 @@ class VersatileDiffusionTextToImagePipelineIntegrationTests(unittest.TestCase):
            prompt=prompt, generator=generator, guidance_scale=7.5, num_inference_steps=2, output_type="numpy"
        ).images

-        assert np.abs(image - new_image).sum() < 1e-5, "Models don't have the same forward pass"
+        assert np.abs(image - new_image).max() < 1e-5, "Models don't have the same forward pass"

    def test_inference_text2img(self):
        pipe = VersatileDiffusionTextToImagePipeline.from_pretrained(