Compare commits

...

35 Commits

Author SHA1 Message Date
Sayak Paul
d1c05927bf Merge branch 'main' into lora-device-map 2024-12-16 08:35:20 +05:30
sayakpaul
f7c4706f79 resolve conflicts. 2024-12-08 13:51:31 +05:30
Sayak Paul
d8336d6e4d Merge branch 'main' into lora-device-map 2024-11-01 10:26:16 +05:30
sayakpaul
334173919a fixes 2024-11-01 08:08:41 +05:30
sayakpaul
569f99e3d3 fix 2024-11-01 08:02:12 +05:30
sayakpaul
4d7986a126 Merge branch 'main' into lora-device-map 2024-11-01 07:26:29 +05:30
sayakpaul
ccd8d2ad80 resolve conflicts. 2024-10-31 20:47:24 +05:30
sayakpaul
a61b754fe5 fixes 2024-10-31 20:40:59 +05:30
sayakpaul
0bd40cbff3 skip properly. 2024-10-31 19:10:18 +05:30
sayakpaul
03377b7afc fixes 2024-10-31 19:02:45 +05:30
Sayak Paul
61903c8080 Merge branch 'main' into lora-device-map 2024-10-31 18:34:46 +05:30
Sayak Paul
2db5d48743 Merge branch 'main' into lora-device-map 2024-10-23 12:51:33 +05:30
Sayak Paul
fe2cca8766 Update docs/source/en/training/distributed_inference.md
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
2024-10-23 12:51:05 +05:30
Sayak Paul
4b6124a07a Merge branch 'main' into lora-device-map 2024-10-22 16:00:09 +05:30
sayakpaul
c0dee879d4 quality 2024-10-19 18:20:29 +05:30
Sayak Paul
f64751e37d Merge branch 'main' into lora-device-map 2024-10-19 18:19:49 +05:30
sayakpaul
5ea1173aeb add hardware note. 2024-10-19 18:10:24 +05:30
sayakpaul
2334f78c3b add: tests, docs. 2024-10-19 18:06:41 +05:30
sayakpaul
f62afac640 fix-copies 2024-10-19 16:18:15 +05:30
sayakpaul
71989e3edf better error messages. 2024-10-19 16:17:12 +05:30
sayakpaul
ea727a3b32 minors 2024-10-19 16:05:33 +05:30
Sayak Paul
eefda549cd Merge branch 'main' into lora-device-map 2024-10-19 16:02:10 +05:30
Sayak Paul
f63b04c5e0 Merge branch 'main' into lora-device-map 2024-10-15 15:21:03 +05:30
Sayak Paul
e42ec19fbf Merge branch 'main' into lora-device-map 2024-10-10 21:00:21 +05:30
Sayak Paul
8f670e24e9 Merge branch 'main' into lora-device-map 2024-10-08 21:47:25 +05:30
Sayak Paul
5f3cae2bf5 Merge branch 'main' into lora-device-map 2024-10-06 10:00:48 +04:00
Sayak Paul
d2d59c38d7 Merge branch 'main' into lora-device-map 2024-10-02 15:48:04 +02:00
Sayak Paul
1ed0eb0af1 Merge branch 'main' into lora-device-map 2024-09-28 10:54:22 +05:30
Sayak Paul
2846549eaa Merge branch 'main' into lora-device-map 2024-09-27 09:35:57 +05:30
Sayak Paul
5479198085 Apply suggestions from code review
Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com>
2024-09-24 19:53:34 +05:30
Sayak Paul
d4bd94b026 Merge branch 'main' into lora-device-map 2024-09-24 09:52:58 +05:30
Sayak Paul
6d03c12dc3 Merge branch 'main' into lora-device-map 2024-09-22 16:22:33 +05:30
sayakpaul
64b3ad14da empty
Co-authored-by: Benjamin Bossan <BenjaminBossan@users.noreply.github.com>
2024-09-17 19:36:14 +05:30
sayakpaul
949a9298e3 better attibutung 2024-09-17 19:34:32 +05:30
sayakpaul
dc1aee2718 fix: lora loading when using with a device_mapped model. 2024-09-17 07:22:22 +05:30
26 changed files with 738 additions and 11 deletions

View File

@@ -237,3 +237,5 @@ with torch.no_grad():
``` ```
By selectively loading and unloading the models you need at a given stage and sharding the largest models across multiple GPUs, it is possible to run inference with large models on consumer GPUs. By selectively loading and unloading the models you need at a given stage and sharding the largest models across multiple GPUs, it is possible to run inference with large models on consumer GPUs.
This workflow is also compatible with LoRAs via [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`]. However, only LoRAs without text encoder components are currently supported in this workflow.

View File

@@ -327,12 +327,18 @@ class LoraBaseMixin:
tuple: tuple:
A tuple indicating if `is_model_cpu_offload` or `is_sequential_cpu_offload` is True. A tuple indicating if `is_model_cpu_offload` or `is_sequential_cpu_offload` is True.
""" """
from ..pipelines.pipeline_loading_utils import model_has_device_map
is_model_cpu_offload = False is_model_cpu_offload = False
is_sequential_cpu_offload = False is_sequential_cpu_offload = False
if _pipeline is not None and _pipeline.hf_device_map is None: if _pipeline is not None and _pipeline.hf_device_map is None:
for _, component in _pipeline.components.items(): for _, component in _pipeline.components.items():
if isinstance(component, nn.Module) and hasattr(component, "_hf_hook"): if (
isinstance(component, nn.Module)
and hasattr(component, "_hf_hook")
and not model_has_device_map(component)
):
if not is_model_cpu_offload: if not is_model_cpu_offload:
is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload) is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
if not is_sequential_cpu_offload: if not is_sequential_cpu_offload:

View File

@@ -400,12 +400,18 @@ class UNet2DConditionLoadersMixin:
tuple: tuple:
A tuple indicating if `is_model_cpu_offload` or `is_sequential_cpu_offload` is True. A tuple indicating if `is_model_cpu_offload` or `is_sequential_cpu_offload` is True.
""" """
from ..pipelines.pipeline_loading_utils import model_has_device_map
is_model_cpu_offload = False is_model_cpu_offload = False
is_sequential_cpu_offload = False is_sequential_cpu_offload = False
if _pipeline is not None and _pipeline.hf_device_map is None: if _pipeline is not None and _pipeline.hf_device_map is None:
for _, component in _pipeline.components.items(): for _, component in _pipeline.components.items():
if isinstance(component, nn.Module) and hasattr(component, "_hf_hook"): if (
isinstance(component, nn.Module)
and hasattr(component, "_hf_hook")
and not model_has_device_map(component)
):
if not is_model_cpu_offload: if not is_model_cpu_offload:
is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload) is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
if not is_sequential_cpu_offload: if not is_sequential_cpu_offload:

View File

@@ -36,6 +36,7 @@ from ..utils import (
deprecate, deprecate,
get_class_from_dynamic_module, get_class_from_dynamic_module,
is_accelerate_available, is_accelerate_available,
is_accelerate_version,
is_peft_available, is_peft_available,
is_transformers_available, is_transformers_available,
logging, logging,
@@ -968,3 +969,18 @@ def _get_ignore_patterns(
) )
return ignore_patterns return ignore_patterns
def model_has_device_map(model):
if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
return False
# Check if the model has a device map that is not exclusively CPU
# `device_map` can only contain CPU when a model has sharded checkpoints.
# See here: https://github.com/huggingface/diffusers/blob/41e4779d988ead99e7acd78dc8e752de88777d0f/src/diffusers/models/modeling_utils.py#L883
device_map = getattr(model, "hf_device_map", None)
if device_map is not None:
unique_devices = set(device_map.values())
return len(unique_devices) > 1 or unique_devices != {"cpu"}
return False

View File

@@ -84,6 +84,7 @@ from .pipeline_loading_utils import (
_update_init_kwargs_with_connected_pipeline, _update_init_kwargs_with_connected_pipeline,
load_sub_model, load_sub_model,
maybe_raise_or_warn, maybe_raise_or_warn,
model_has_device_map,
variant_compatible_siblings, variant_compatible_siblings,
warn_deprecated_model_variant, warn_deprecated_model_variant,
) )
@@ -406,6 +407,16 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
return hasattr(module, "_hf_hook") and isinstance(module._hf_hook, accelerate.hooks.CpuOffload) return hasattr(module, "_hf_hook") and isinstance(module._hf_hook, accelerate.hooks.CpuOffload)
# device-mapped modules should not go through any device placements.
device_mapped_components = [
key for key, component in self.components.items() if model_has_device_map(component)
]
if device_mapped_components:
raise ValueError(
"The following pipeline components have been found to use a device map: "
f"{device_mapped_components}. This is incompatible with explicitly setting the device using `to()`."
)
# .to("cuda") would raise an error if the pipeline is sequentially offloaded, so we raise our own to make it clearer # .to("cuda") would raise an error if the pipeline is sequentially offloaded, so we raise our own to make it clearer
pipeline_is_sequentially_offloaded = any( pipeline_is_sequentially_offloaded = any(
module_is_sequentially_offloaded(module) for _, module in self.components.items() module_is_sequentially_offloaded(module) for _, module in self.components.items()
@@ -1008,6 +1019,16 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
default to "cuda". default to "cuda".
""" """
# device-mapped modules should not go through any device placements.
device_mapped_components = [
key for key, component in self.components.items() if model_has_device_map(component)
]
if device_mapped_components:
raise ValueError(
"The following pipeline components have been found to use a device map: "
f"{device_mapped_components}. This is incompatible with `enable_model_cpu_offload()`."
)
is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1 is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1
if is_pipeline_device_mapped: if is_pipeline_device_mapped:
raise ValueError( raise ValueError(
@@ -1110,6 +1131,16 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
default to "cuda". default to "cuda".
""" """
# device-mapped modules should not go through any device placements.
device_mapped_components = [
key for key, component in self.components.items() if model_has_device_map(component)
]
if device_mapped_components:
raise ValueError(
"The following pipeline components have been found to use a device map: "
f"{device_mapped_components}. This is incompatible with `enable_sequential_cpu_offload()`."
)
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"): if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload from accelerate import cpu_offload
else: else:

View File

@@ -506,9 +506,14 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
model_dtypes = {key: component.dtype for key, component in components.items() if hasattr(component, "dtype")} model_dtypes = {key: component.dtype for key, component in components.items() if hasattr(component, "dtype")}
self.assertTrue(all(dtype == torch.float16 for dtype in model_dtypes.values())) self.assertTrue(all(dtype == torch.float16 for dtype in model_dtypes.values()))
@unittest.skip("Test currently not supported.")
def test_sequential_cpu_offload_forward_pass(self): def test_sequential_cpu_offload_forward_pass(self):
pass pass
@unittest.skip("Test currently not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@nightly @nightly
class AudioLDM2PipelineSlowTests(unittest.TestCase): class AudioLDM2PipelineSlowTests(unittest.TestCase):

View File

@@ -514,6 +514,18 @@ class StableDiffusionMultiControlNetPipelineFastTests(
assert image.shape == (4, 64, 64, 3) assert image.shape == (4, 64, 64, 3)
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass
class StableDiffusionMultiControlNetOneModelPipelineFastTests( class StableDiffusionMultiControlNetOneModelPipelineFastTests(
IPAdapterTesterMixin, PipelineTesterMixin, PipelineKarrasSchedulerTesterMixin, unittest.TestCase IPAdapterTesterMixin, PipelineTesterMixin, PipelineKarrasSchedulerTesterMixin, unittest.TestCase
@@ -697,6 +709,18 @@ class StableDiffusionMultiControlNetOneModelPipelineFastTests(
except NotImplementedError: except NotImplementedError:
pass pass
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass
@slow @slow
@require_torch_gpu @require_torch_gpu

View File

@@ -389,6 +389,18 @@ class StableDiffusionMultiControlNetPipelineFastTests(
except NotImplementedError: except NotImplementedError:
pass pass
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass
@slow @slow
@require_torch_gpu @require_torch_gpu

View File

@@ -441,6 +441,18 @@ class MultiControlNetInpaintPipelineFastTests(
except NotImplementedError: except NotImplementedError:
pass pass
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass
@slow @slow
@require_torch_gpu @require_torch_gpu

View File

@@ -683,6 +683,18 @@ class StableDiffusionXLMultiControlNetPipelineFastTests(
def test_save_load_optional_components(self): def test_save_load_optional_components(self):
return self._test_save_load_optional_components() return self._test_save_load_optional_components()
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass
class StableDiffusionXLMultiControlNetOneModelPipelineFastTests( class StableDiffusionXLMultiControlNetOneModelPipelineFastTests(
PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, SDXLOptionalComponentsTesterMixin, unittest.TestCase PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, SDXLOptionalComponentsTesterMixin, unittest.TestCase
@@ -887,6 +899,18 @@ class StableDiffusionXLMultiControlNetOneModelPipelineFastTests(
self.assertTrue(np.abs(image_slice_without_neg_cond - image_slice_with_neg_cond).max() > 1e-2) self.assertTrue(np.abs(image_slice_without_neg_cond - image_slice_with_neg_cond).max() > 1e-2)
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass
@slow @slow
@require_torch_gpu @require_torch_gpu

View File

@@ -8,9 +8,11 @@ from huggingface_hub import hf_hub_download
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxPipeline, FluxTransformer2DModel from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
from diffusers.image_processor import VaeImageProcessor
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_big_gpu_with_torch_cuda, require_big_gpu_with_torch_cuda,
require_torch_multi_gpu,
slow, slow,
torch_device, torch_device,
) )
@@ -296,3 +298,172 @@ class FluxPipelineSlowTests(unittest.TestCase):
max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten()) max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten())
assert max_diff < 1e-4 assert max_diff < 1e-4
@require_torch_multi_gpu
@torch.no_grad()
def test_flux_component_sharding(self):
"""
internal note: test was run on `audace`.
"""
ckpt_id = "black-forest-labs/FLUX.1-dev"
dtype = torch.bfloat16
prompt = "a photo of a cat with tiger-like look"
pipeline = FluxPipeline.from_pretrained(
ckpt_id,
transformer=None,
vae=None,
device_map="balanced",
max_memory={0: "16GB", 1: "16GB"},
torch_dtype=dtype,
)
prompt_embeds, pooled_prompt_embeds, _ = pipeline.encode_prompt(
prompt=prompt, prompt_2=None, max_sequence_length=512
)
del pipeline.text_encoder
del pipeline.text_encoder_2
del pipeline.tokenizer
del pipeline.tokenizer_2
del pipeline
gc.collect()
torch.cuda.empty_cache()
transformer = FluxTransformer2DModel.from_pretrained(
ckpt_id, subfolder="transformer", device_map="auto", max_memory={0: "16GB", 1: "16GB"}, torch_dtype=dtype
)
pipeline = FluxPipeline.from_pretrained(
ckpt_id,
text_encoder=None,
text_encoder_2=None,
tokenizer=None,
tokenizer_2=None,
vae=None,
transformer=transformer,
torch_dtype=dtype,
)
height, width = 768, 1360
# No need to wrap it up under `torch.no_grad()` as pipeline call method
# is already wrapped under that.
latents = pipeline(
prompt_embeds=prompt_embeds,
pooled_prompt_embeds=pooled_prompt_embeds,
num_inference_steps=10,
guidance_scale=3.5,
height=height,
width=width,
output_type="latent",
generator=torch.manual_seed(0),
).images
latent_slice = latents[0, :3, :3].flatten().float().cpu().numpy()
expected_slice = np.array([-0.377, -0.3008, -0.5117, -0.252, 0.0615, -0.3477, -0.1309, -0.1914, 0.1533])
assert numpy_cosine_similarity_distance(latent_slice, expected_slice) < 1e-4
del pipeline.transformer
del pipeline
gc.collect()
torch.cuda.empty_cache()
vae = AutoencoderKL.from_pretrained(ckpt_id, subfolder="vae", torch_dtype=dtype).to(torch_device)
vae_scale_factor = 2 ** (len(vae.config.block_out_channels) - 1)
image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)
latents = FluxPipeline._unpack_latents(latents, height, width, vae_scale_factor)
latents = (latents / vae.config.scaling_factor) + vae.config.shift_factor
image = vae.decode(latents, return_dict=False)[0]
image = image_processor.postprocess(image, output_type="np")
image_slice = image[0, :3, :3, -1].flatten()
expected_slice = np.array([0.127, 0.1113, 0.1055, 0.1172, 0.1172, 0.1074, 0.1191, 0.1191, 0.1152])
assert numpy_cosine_similarity_distance(image_slice, expected_slice) < 1e-4
@require_torch_multi_gpu
@torch.no_grad()
def test_flux_component_sharding_with_lora(self):
"""
internal note: test was run on `audace`.
"""
ckpt_id = "black-forest-labs/FLUX.1-dev"
dtype = torch.bfloat16
prompt = "jon snow eating pizza."
pipeline = FluxPipeline.from_pretrained(
ckpt_id,
transformer=None,
vae=None,
device_map="balanced",
max_memory={0: "16GB", 1: "16GB"},
torch_dtype=dtype,
)
prompt_embeds, pooled_prompt_embeds, _ = pipeline.encode_prompt(
prompt=prompt, prompt_2=None, max_sequence_length=512
)
del pipeline.text_encoder
del pipeline.text_encoder_2
del pipeline.tokenizer
del pipeline.tokenizer_2
del pipeline
gc.collect()
torch.cuda.empty_cache()
transformer = FluxTransformer2DModel.from_pretrained(
ckpt_id, subfolder="transformer", device_map="auto", max_memory={0: "16GB", 1: "16GB"}, torch_dtype=dtype
)
pipeline = FluxPipeline.from_pretrained(
ckpt_id,
text_encoder=None,
text_encoder_2=None,
tokenizer=None,
tokenizer_2=None,
vae=None,
transformer=transformer,
torch_dtype=dtype,
)
pipeline.load_lora_weights("TheLastBen/Jon_Snow_Flux_LoRA", weight_name="jon_snow.safetensors")
height, width = 768, 1360
# No need to wrap it up under `torch.no_grad()` as pipeline call method
# is already wrapped under that.
latents = pipeline(
prompt_embeds=prompt_embeds,
pooled_prompt_embeds=pooled_prompt_embeds,
num_inference_steps=10,
guidance_scale=3.5,
height=height,
width=width,
output_type="latent",
generator=torch.manual_seed(0),
).images
latent_slice = latents[0, :3, :3].flatten().float().cpu().numpy()
expected_slice = np.array([-0.6523, -0.4961, -0.9141, -0.5, -0.2129, -0.6914, -0.375, -0.5664, -0.1699])
assert numpy_cosine_similarity_distance(latent_slice, expected_slice) < 1e-4
del pipeline.transformer
del pipeline
gc.collect()
torch.cuda.empty_cache()
vae = AutoencoderKL.from_pretrained(ckpt_id, subfolder="vae", torch_dtype=dtype).to(torch_device)
vae_scale_factor = 2 ** (len(vae.config.block_out_channels) - 1)
image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)
latents = FluxPipeline._unpack_latents(latents, height, width, vae_scale_factor)
latents = (latents / vae.config.scaling_factor) + vae.config.shift_factor
image = vae.decode(latents, return_dict=False)[0]
image = image_processor.postprocess(image, output_type="np")
image_slice = image[0, :3, :3, -1].flatten()
expected_slice = np.array([0.1211, 0.1094, 0.1035, 0.1094, 0.1113, 0.1074, 0.1133, 0.1133, 0.1094])
assert numpy_cosine_similarity_distance(image_slice, expected_slice) < 1e-4

View File

@@ -139,6 +139,18 @@ class KandinskyPipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase)
def test_dict_tuple_outputs_equivalent(self): def test_dict_tuple_outputs_equivalent(self):
super().test_dict_tuple_outputs_equivalent(expected_max_difference=5e-4) super().test_dict_tuple_outputs_equivalent(expected_max_difference=5e-4)
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass
class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.TestCase): class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyImg2ImgCombinedPipeline pipeline_class = KandinskyImg2ImgCombinedPipeline
@@ -248,6 +260,18 @@ class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.Te
def test_save_load_optional_components(self): def test_save_load_optional_components(self):
super().test_save_load_optional_components(expected_max_difference=5e-4) super().test_save_load_optional_components(expected_max_difference=5e-4)
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass
class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.TestCase): class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyInpaintCombinedPipeline pipeline_class = KandinskyInpaintCombinedPipeline
@@ -363,3 +387,15 @@ class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.Te
def test_save_load_local(self): def test_save_load_local(self):
super().test_save_load_local(expected_max_difference=5e-3) super().test_save_load_local(expected_max_difference=5e-3)
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass

View File

@@ -13,6 +13,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
import tempfile
import unittest import unittest
import numpy as np import numpy as np
@@ -28,11 +30,16 @@ from transformers import (
) )
from diffusers import KandinskyPriorPipeline, PriorTransformer, UnCLIPScheduler from diffusers import KandinskyPriorPipeline, PriorTransformer, UnCLIPScheduler
from diffusers.utils.testing_utils import enable_full_determinism, skip_mps, torch_device from diffusers.models.modeling_utils import ModelMixin
from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
from diffusers.utils.testing_utils import enable_full_determinism, is_accelerate_available, skip_mps, torch_device
from ..test_pipelines_common import PipelineTesterMixin from ..test_pipelines_common import PipelineTesterMixin
if is_accelerate_available():
from accelerate.utils import compute_module_sizes
enable_full_determinism() enable_full_determinism()
@@ -236,3 +243,31 @@ class KandinskyPriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
test_max_difference=test_max_difference, test_max_difference=test_max_difference,
test_mean_pixel_difference=test_mean_pixel_difference, test_mean_pixel_difference=test_mean_pixel_difference,
) )
# It needs a different sharding ratio than the standard 0.75. So, we override it.
def test_sharded_components_can_be_device_placed(self):
components = self.get_dummy_components()
component_selected = None
for component_name in components:
if isinstance(components[component_name], ModelMixin) and hasattr(
components[component_name], "load_config"
):
component_to_be_sharded = components[component_name]
component_cls = component_to_be_sharded.__class__
component_selected = component_name
break
assert component_selected, "No component selected that can be sharded."
model_size = compute_module_sizes(component_to_be_sharded)[""]
max_shard_size = int((model_size * 0.45) / (2**10))
with tempfile.TemporaryDirectory() as tmp_dir:
component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))
loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
_ = components.pop(component_selected)
components.update({component_selected: loaded_sharded_component})
_ = self.pipeline_class(**components).to(torch_device)

View File

@@ -159,6 +159,18 @@ class KandinskyV22PipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCa
def test_callback_cfg(self): def test_callback_cfg(self):
pass pass
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass
class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.TestCase): class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyV22Img2ImgCombinedPipeline pipeline_class = KandinskyV22Img2ImgCombinedPipeline
@@ -281,6 +293,18 @@ class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest
def test_callback_cfg(self): def test_callback_cfg(self):
pass pass
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass
class KandinskyV22PipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.TestCase): class KandinskyV22PipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyV22InpaintCombinedPipeline pipeline_class = KandinskyV22InpaintCombinedPipeline
@@ -404,3 +428,15 @@ class KandinskyV22PipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest
def test_callback_cfg(self): def test_callback_cfg(self):
pass pass
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass

View File

@@ -14,6 +14,8 @@
# limitations under the License. # limitations under the License.
import inspect import inspect
import os
import tempfile
import unittest import unittest
import numpy as np import numpy as np
@@ -29,11 +31,17 @@ from transformers import (
) )
from diffusers import KandinskyV22PriorPipeline, PriorTransformer, UnCLIPScheduler from diffusers import KandinskyV22PriorPipeline, PriorTransformer, UnCLIPScheduler
from diffusers.utils.testing_utils import enable_full_determinism, skip_mps, torch_device from diffusers.models.modeling_utils import ModelMixin
from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
from diffusers.utils.testing_utils import enable_full_determinism, is_accelerate_available, skip_mps, torch_device
from ..test_pipelines_common import PipelineTesterMixin from ..test_pipelines_common import PipelineTesterMixin
if is_accelerate_available():
from accelerate.utils import compute_module_sizes
enable_full_determinism() enable_full_determinism()
@@ -277,3 +285,31 @@ class KandinskyV22PriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
output = pipe(**inputs)[0] output = pipe(**inputs)[0]
assert output.abs().sum() == 0 assert output.abs().sum() == 0
# It needs a different sharding ratio than the standard 0.75. So, we override it.
def test_sharded_components_can_be_device_placed(self):
components = self.get_dummy_components()
component_selected = None
for component_name in components:
if isinstance(components[component_name], ModelMixin) and hasattr(
components[component_name], "load_config"
):
component_to_be_sharded = components[component_name]
component_cls = component_to_be_sharded.__class__
component_selected = component_name
break
assert component_selected, "No component selected that can be sharded."
model_size = compute_module_sizes(component_to_be_sharded)[""]
max_shard_size = int((model_size * 0.45) / (2**10))
with tempfile.TemporaryDirectory() as tmp_dir:
component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))
loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
_ = components.pop(component_selected)
components.update({component_selected: loaded_sharded_component})
_ = self.pipeline_class(**components).to(torch_device)

View File

@@ -13,7 +13,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
import random import random
import tempfile
import unittest import unittest
import numpy as np import numpy as np
@@ -30,9 +32,12 @@ from transformers import (
) )
from diffusers import KandinskyV22PriorEmb2EmbPipeline, PriorTransformer, UnCLIPScheduler from diffusers import KandinskyV22PriorEmb2EmbPipeline, PriorTransformer, UnCLIPScheduler
from diffusers.models.modeling_utils import ModelMixin
from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
is_accelerate_available,
skip_mps, skip_mps,
torch_device, torch_device,
) )
@@ -40,6 +45,10 @@ from diffusers.utils.testing_utils import (
from ..test_pipelines_common import PipelineTesterMixin from ..test_pipelines_common import PipelineTesterMixin
if is_accelerate_available():
from accelerate.utils import compute_module_sizes
enable_full_determinism() enable_full_determinism()
@@ -240,3 +249,31 @@ class KandinskyV22PriorEmb2EmbPipelineFastTests(PipelineTesterMixin, unittest.Te
test_max_difference=test_max_difference, test_max_difference=test_max_difference,
test_mean_pixel_difference=test_mean_pixel_difference, test_mean_pixel_difference=test_mean_pixel_difference,
) )
# It needs a different sharding ratio than the standard 0.75. So, we override it.
def test_sharded_components_can_be_device_placed(self):
components = self.get_dummy_components()
component_selected = None
for component_name in components:
if isinstance(components[component_name], ModelMixin) and hasattr(
components[component_name], "load_config"
):
component_to_be_sharded = components[component_name]
component_cls = component_to_be_sharded.__class__
component_selected = component_name
break
assert component_selected, "No component selected that can be sharded."
model_size = compute_module_sizes(component_to_be_sharded)[""]
max_shard_size = int((model_size * 0.45) / (2**10))
with tempfile.TemporaryDirectory() as tmp_dir:
component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))
loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
_ = components.pop(component_selected)
components.update({component_selected: loaded_sharded_component})
_ = self.pipeline_class(**components).to(torch_device)

View File

@@ -404,6 +404,10 @@ class MusicLDMPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
model_dtypes = {key: component.dtype for key, component in components.items() if hasattr(component, "dtype")} model_dtypes = {key: component.dtype for key, component in components.items() if hasattr(component, "dtype")}
self.assertTrue(all(dtype == torch.float16 for dtype in model_dtypes.values())) self.assertTrue(all(dtype == torch.float16 for dtype in model_dtypes.values()))
@unittest.skip("Test currently not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@nightly @nightly
@require_torch_gpu @require_torch_gpu

View File

@@ -279,3 +279,15 @@ class StableCascadeCombinedPipelineFastTests(PipelineTesterMixin, unittest.TestC
) )
assert np.abs(output_prompt.images - output_prompt_embeds.images).max() < 1e-5 assert np.abs(output_prompt.images - output_prompt_embeds.images).max() < 1e-5
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass

View File

@@ -593,6 +593,18 @@ class StableDiffusionMultiAdapterPipelineFastTests(AdapterTests, PipelineTesterM
if test_mean_pixel_difference: if test_mean_pixel_difference:
assert_mean_pixel_difference(output_batch[0][0], output[0][0]) assert_mean_pixel_difference(output_batch[0][0], output[0][0])
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass
@slow @slow
@require_torch_gpu @require_torch_gpu

View File

@@ -642,9 +642,6 @@ class StableDiffusionXLMultiAdapterPipelineFastTests(
assert image.shape == (1, 64, 64, 3) assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.5313, 0.5375, 0.4942, 0.5021, 0.6142, 0.4968, 0.5434, 0.5311, 0.5448]) expected_slice = np.array([0.5313, 0.5375, 0.4942, 0.5021, 0.6142, 0.4968, 0.5434, 0.5311, 0.5448])
debug = [str(round(i, 4)) for i in image_slice.flatten().tolist()]
print(",".join(debug))
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
def test_adapter_sdxl_lcm_custom_timesteps(self): def test_adapter_sdxl_lcm_custom_timesteps(self):
@@ -667,7 +664,16 @@ class StableDiffusionXLMultiAdapterPipelineFastTests(
assert image.shape == (1, 64, 64, 3) assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.5313, 0.5375, 0.4942, 0.5021, 0.6142, 0.4968, 0.5434, 0.5311, 0.5448]) expected_slice = np.array([0.5313, 0.5375, 0.4942, 0.5021, 0.6142, 0.4968, 0.5434, 0.5311, 0.5448])
debug = [str(round(i, 4)) for i in image_slice.flatten().tolist()]
print(",".join(debug))
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass

View File

@@ -1,4 +1,6 @@
import gc import gc
import os
import tempfile
import unittest import unittest
import torch import torch
@@ -12,8 +14,17 @@ from diffusers import (
StableUnCLIPPipeline, StableUnCLIPPipeline,
UNet2DConditionModel, UNet2DConditionModel,
) )
from diffusers.models.modeling_utils import ModelMixin
from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
from diffusers.utils.testing_utils import enable_full_determinism, load_numpy, nightly, require_torch_gpu, torch_device from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
from diffusers.utils.testing_utils import (
enable_full_determinism,
is_accelerate_available,
load_numpy,
nightly,
require_torch_gpu,
torch_device,
)
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import ( from ..test_pipelines_common import (
@@ -24,6 +35,10 @@ from ..test_pipelines_common import (
) )
if is_accelerate_available():
from accelerate.utils import compute_module_sizes
enable_full_determinism() enable_full_determinism()
@@ -184,6 +199,46 @@ class StableUnCLIPPipelineFastTests(
def test_inference_batch_single_identical(self): def test_inference_batch_single_identical(self):
self._test_inference_batch_single_identical(expected_max_diff=1e-3) self._test_inference_batch_single_identical(expected_max_diff=1e-3)
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass
# It needs a different sharding ratio than the standard 0.75. So, we override it.
def test_sharded_components_can_be_device_placed(self):
components = self.get_dummy_components()
component_selected = None
for component_name in components:
if isinstance(components[component_name], ModelMixin) and hasattr(
components[component_name], "load_config"
):
component_to_be_sharded = components[component_name]
component_cls = component_to_be_sharded.__class__
component_selected = component_name
break
assert component_selected, "No component selected that can be sharded."
model_size = compute_module_sizes(component_to_be_sharded)[""]
max_shard_size = int((model_size * 0.45) / (2**10))
with tempfile.TemporaryDirectory() as tmp_dir:
component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))
loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
_ = components.pop(component_selected)
components.update({component_selected: loaded_sharded_component})
_ = self.pipeline_class(**components).to(torch_device)
@nightly @nightly
@require_torch_gpu @require_torch_gpu

View File

@@ -205,6 +205,18 @@ class StableUnCLIPImg2ImgPipelineFastTests(
def test_xformers_attention_forwardGenerator_pass(self): def test_xformers_attention_forwardGenerator_pass(self):
self._test_xformers_attention_forwardGenerator_pass(test_max_difference=False) self._test_xformers_attention_forwardGenerator_pass(test_max_difference=False)
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass
@nightly @nightly
@require_torch_gpu @require_torch_gpu

View File

@@ -41,10 +41,14 @@ from diffusers.utils import logging
from diffusers.utils.import_utils import is_xformers_available from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
CaptureLogger, CaptureLogger,
is_accelerate_available,
nightly,
require_accelerate_version_greater, require_accelerate_version_greater,
require_accelerator, require_accelerator,
require_torch, require_torch,
require_torch_multi_gpu,
skip_mps, skip_mps,
slow,
torch_device, torch_device,
) )
@@ -61,6 +65,10 @@ from ..models.unets.test_models_unet_2d_condition import (
from ..others.test_utils import TOKEN, USER, is_staging_test from ..others.test_utils import TOKEN, USER, is_staging_test
if is_accelerate_available():
from accelerate.utils import compute_module_sizes
def to_np(tensor): def to_np(tensor):
if isinstance(tensor, torch.Tensor): if isinstance(tensor, torch.Tensor):
tensor = tensor.detach().cpu().numpy() tensor = tensor.detach().cpu().numpy()
@@ -1902,6 +1910,78 @@ class PipelineTesterMixin:
) )
) )
@require_torch_multi_gpu
@slow
@nightly
def test_calling_to_raises_error_device_mapped_components(self, safe_serialization=True):
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
max_model_size = max(
compute_module_sizes(module)[""]
for _, module in pipe.components.items()
if isinstance(module, torch.nn.Module)
)
with tempfile.TemporaryDirectory() as tmpdir:
pipe.save_pretrained(tmpdir, safe_serialization=safe_serialization)
max_memory = {0: max_model_size, 1: max_model_size}
loaded_pipe = self.pipeline_class.from_pretrained(tmpdir, device_map="balanced", max_memory=max_memory)
with self.assertRaises(ValueError) as err_context:
loaded_pipe.to(torch_device)
self.assertTrue(
"The following pipeline components have been found" in str(err_context.exception)
and "This is incompatible with explicitly setting the device using `to()`" in str(err_context.exception)
)
@require_torch_multi_gpu
@slow
@nightly
def test_calling_mco_raises_error_device_mapped_components(self, safe_serialization=True):
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
max_model_size = max(
compute_module_sizes(module)[""]
for _, module in pipe.components.items()
if isinstance(module, torch.nn.Module)
)
with tempfile.TemporaryDirectory() as tmpdir:
pipe.save_pretrained(tmpdir, safe_serialization=safe_serialization)
max_memory = {0: max_model_size, 1: max_model_size}
loaded_pipe = self.pipeline_class.from_pretrained(tmpdir, device_map="balanced", max_memory=max_memory)
with self.assertRaises(ValueError) as err_context:
loaded_pipe.enable_model_cpu_offload()
self.assertTrue(
"The following pipeline components have been found" in str(err_context.exception)
and "This is incompatible with `enable_model_cpu_offload()`" in str(err_context.exception)
)
@require_torch_multi_gpu
@slow
@nightly
def test_calling_sco_raises_error_device_mapped_components(self, safe_serialization=True):
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
max_model_size = max(
compute_module_sizes(module)[""]
for _, module in pipe.components.items()
if isinstance(module, torch.nn.Module)
)
with tempfile.TemporaryDirectory() as tmpdir:
pipe.save_pretrained(tmpdir, safe_serialization=safe_serialization)
max_memory = {0: max_model_size, 1: max_model_size}
loaded_pipe = self.pipeline_class.from_pretrained(tmpdir, device_map="balanced", max_memory=max_memory)
with self.assertRaises(ValueError) as err_context:
loaded_pipe.enable_sequential_cpu_offload()
self.assertTrue(
"The following pipeline components have been found" in str(err_context.exception)
and "This is incompatible with `enable_sequential_cpu_offload()`" in str(err_context.exception)
)
@is_staging_test @is_staging_test
class PipelinePushToHubTester(unittest.TestCase): class PipelinePushToHubTester(unittest.TestCase):

View File

@@ -14,6 +14,8 @@
# limitations under the License. # limitations under the License.
import gc import gc
import os
import tempfile
import unittest import unittest
import numpy as np import numpy as np
@@ -21,9 +23,12 @@ import torch
from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer
from diffusers import PriorTransformer, UnCLIPPipeline, UnCLIPScheduler, UNet2DConditionModel, UNet2DModel from diffusers import PriorTransformer, UnCLIPPipeline, UnCLIPScheduler, UNet2DConditionModel, UNet2DModel
from diffusers.models.modeling_utils import ModelMixin
from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel
from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
enable_full_determinism, enable_full_determinism,
is_accelerate_available,
load_numpy, load_numpy,
nightly, nightly,
require_torch_gpu, require_torch_gpu,
@@ -35,6 +40,9 @@ from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
if is_accelerate_available():
from accelerate.utils import compute_module_sizes
enable_full_determinism() enable_full_determinism()
@@ -418,6 +426,34 @@ class UnCLIPPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
def test_float16_inference(self): def test_float16_inference(self):
super().test_float16_inference(expected_max_diff=1.0) super().test_float16_inference(expected_max_diff=1.0)
# It needs a different sharding ratio than the standard 0.75. So, we override it.
def test_sharded_components_can_be_device_placed(self):
components = self.get_dummy_components()
component_selected = None
for component_name in components:
if isinstance(components[component_name], ModelMixin) and hasattr(
components[component_name], "load_config"
):
component_to_be_sharded = components[component_name]
component_cls = component_to_be_sharded.__class__
component_selected = component_name
break
assert component_selected, "No component selected that can be sharded."
model_size = compute_module_sizes(component_to_be_sharded)[""]
max_shard_size = int((model_size * 0.45) / (2**10))
with tempfile.TemporaryDirectory() as tmp_dir:
component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))
loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
_ = components.pop(component_selected)
components.update({component_selected: loaded_sharded_component})
_ = self.pipeline_class(**components).to(torch_device)
@nightly @nightly
class UnCLIPPipelineCPUIntegrationTests(unittest.TestCase): class UnCLIPPipelineCPUIntegrationTests(unittest.TestCase):

View File

@@ -576,6 +576,15 @@ class UniDiffuserPipelineFastTests(
expected_text_prefix = '" This This' expected_text_prefix = '" This This'
assert text[0][: len(expected_text_prefix)] == expected_text_prefix assert text[0][: len(expected_text_prefix)] == expected_text_prefix
def test_calling_mco_raises_error_device_mapped_components(self):
super().test_calling_mco_raises_error_device_mapped_components(safe_serialization=False)
def test_calling_to_raises_error_device_mapped_components(self):
super().test_calling_to_raises_error_device_mapped_components(safe_serialization=False)
def test_calling_sco_raises_error_device_mapped_components(self):
super().test_calling_sco_raises_error_device_mapped_components(safe_serialization=False)
@nightly @nightly
@require_torch_gpu @require_torch_gpu

View File

@@ -237,3 +237,15 @@ class WuerstchenCombinedPipelineFastTests(PipelineTesterMixin, unittest.TestCase
def test_callback_cfg(self): def test_callback_cfg(self):
pass pass
@unittest.skip("Test not supported.")
def test_calling_mco_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_to_raises_error_device_mapped_components(self):
pass
@unittest.skip("Test not supported.")
def test_calling_sco_raises_error_device_mapped_components(self):
pass