Update access of configuration attributes (#7343)

Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
This commit is contained in:
M. Tolga Cangöz
2024-03-18 21:53:29 +03:00
committed by GitHub
parent 01ac37b331
commit e97a633b63
20 changed files with 36 additions and 36 deletions

View File

@@ -355,7 +355,7 @@ unet_traced = torch.jit.load("unet_traced.pt")
class TracedUNet(torch.nn.Module):
def __init__(self):
super().__init__()
self.in_channels = pipe.unet.in_channels
self.in_channels = pipe.unet.config.in_channels
self.device = pipe.unet.device
def forward(self, latent_model_input, t, encoder_hidden_states):

View File

@@ -210,7 +210,7 @@ Stable Diffusion 은 text-to-image *latent diffusion* 모델입니다. latent di
```py
>>> latents = torch.randn(
... (batch_size, unet.in_channels, height // 8, width // 8),
... (batch_size, unet.config.in_channels, height // 8, width // 8),
... generator=generator,
... device=torch_device,
... )

View File

@@ -224,7 +224,7 @@ class StableDiffusionIPEXPipeline(
# 5. Prepare latent variables
latents = self.prepare_latents(
batch_size * num_images_per_prompt,
self.unet.in_channels,
self.unet.config.in_channels,
height,
width,
prompt_embeds.dtype,
@@ -679,7 +679,7 @@ class StableDiffusionIPEXPipeline(
timesteps = self.scheduler.timesteps
# 5. Prepare latent variables
num_channels_latents = self.unet.in_channels
num_channels_latents = self.unet.config.in_channels
latents = self.prepare_latents(
batch_size * num_images_per_prompt,
num_channels_latents,

View File

@@ -917,7 +917,7 @@ class TensorRTStableDiffusionPipeline(StableDiffusionPipeline):
text_embeddings = self.__encode_prompt(prompt, negative_prompt)
# Pre-initialize latents
num_channels_latents = self.unet.in_channels
num_channels_latents = self.unet.config.in_channels
latents = self.prepare_latents(
batch_size,
num_channels_latents,

View File

@@ -1195,9 +1195,9 @@ def superres_check_against_original(dump_path, unet_checkpoint_path):
if_II_model = IFStageIII(device="cuda", dir_or_name=orig_path, model_kwargs={"precision": "fp32"}).model
batch_size = 1
channels = model.in_channels // 2
height = model.sample_size
width = model.sample_size
channels = model.config.in_channels // 2
height = model.config.sample_size
width = model.config.sample_size
height = 1024
width = 1024

View File

@@ -613,7 +613,7 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
for image_ in image:
image_ = image_.convert("RGB")
image_ = resize(image_, self.unet.sample_size)
image_ = resize(image_, self.unet.config.sample_size)
image_ = np.array(image_)
image_ = image_.astype(np.float32)
image_ = image_ / 127.5 - 1

View File

@@ -662,7 +662,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
for image_ in image:
image_ = image_.convert("RGB")
image_ = resize(image_, self.unet.sample_size)
image_ = resize(image_, self.unet.config.sample_size)
image_ = np.array(image_)
image_ = image_.astype(np.float32)
image_ = image_ / 127.5 - 1

View File

@@ -654,7 +654,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
for image_ in image:
image_ = image_.convert("RGB")
image_ = resize(image_, self.unet.sample_size)
image_ = resize(image_, self.unet.config.sample_size)
image_ = np.array(image_)
image_ = image_.astype(np.float32)
image_ = image_ / 127.5 - 1
@@ -701,7 +701,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
for mask_image_ in mask_image:
mask_image_ = mask_image_.convert("L")
mask_image_ = resize(mask_image_, self.unet.sample_size)
mask_image_ = resize(mask_image_, self.unet.config.sample_size)
mask_image_ = np.array(mask_image_)
mask_image_ = mask_image_[None, None, :]
new_mask_image.append(mask_image_)

View File

@@ -698,7 +698,7 @@ class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
for image_ in image:
image_ = image_.convert("RGB")
image_ = resize(image_, self.unet.sample_size)
image_ = resize(image_, self.unet.config.sample_size)
image_ = np.array(image_)
image_ = image_.astype(np.float32)
image_ = image_ / 127.5 - 1
@@ -778,7 +778,7 @@ class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
for mask_image_ in mask_image:
mask_image_ = mask_image_.convert("L")
mask_image_ = resize(mask_image_, self.unet.sample_size)
mask_image_ = resize(mask_image_, self.unet.config.sample_size)
mask_image_ = np.array(mask_image_)
mask_image_ = mask_image_[None, None, :]
new_mask_image.append(mask_image_)

View File

@@ -469,7 +469,7 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
latents = self.prepare_latents(
batch_size * num_images_per_prompt,
self.num_latent_channels,
self.config.num_latent_channels,
height,
width,
latents_dtype,
@@ -498,12 +498,12 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
# 7. Check that sizes of image and latents match
num_channels_image = image.shape[1]
if self.num_latent_channels + num_channels_image != self.num_unet_input_channels:
if self.config.num_latent_channels + num_channels_image != self.config.num_unet_input_channels:
raise ValueError(
"Incorrect configuration settings! The config of `pipeline.unet` expects"
f" {self.num_unet_input_channels} but received `num_channels_latents`: {self.num_latent_channels} +"
f" {self.config.num_unet_input_channels} but received `num_channels_latents`: {self.config.num_latent_channels} +"
f" `num_channels_image`: {num_channels_image} "
f" = {self.num_latent_channels + num_channels_image}. Please verify the config of"
f" = {self.config.num_latent_channels + num_channels_image}. Please verify the config of"
" `pipeline.unet` or your `image` input."
)

View File

@@ -680,7 +680,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
timesteps = self.scheduler.timesteps
# 5. Prepare latent variables
num_channels_latents = self.unet.in_channels
num_channels_latents = self.unet.config.in_channels
latents = self.prepare_latents(
batch_size * num_images_per_prompt,
num_channels_latents,
@@ -713,7 +713,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
boxes = torch.zeros(max_objs, 4, device=device, dtype=self.text_encoder.dtype)
boxes[:n_objs] = torch.tensor(gligen_boxes)
text_embeddings = torch.zeros(
max_objs, self.unet.cross_attention_dim, device=device, dtype=self.text_encoder.dtype
max_objs, self.unet.config.cross_attention_dim, device=device, dtype=self.text_encoder.dtype
)
text_embeddings[:n_objs] = _text_embeddings
# Generate a mask for each object that is entity described by phrases

View File

@@ -847,7 +847,7 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM
timesteps = self.scheduler.timesteps
# 5. Prepare latent variables
num_channels_latents = self.unet.in_channels
num_channels_latents = self.unet.config.in_channels
latents = self.prepare_latents(
batch_size * num_images_per_prompt,
num_channels_latents,

View File

@@ -233,7 +233,7 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
sigmas = self._convert_to_karras(ramp)
timesteps = self.sigma_to_t(sigmas)
sigmas = np.concatenate([sigmas, [self.sigma_min]]).astype(np.float32)
sigmas = np.concatenate([sigmas, [self.config.sigma_min]]).astype(np.float32)
self.sigmas = torch.from_numpy(sigmas).to(device=device)
if str(device).startswith("mps"):

View File

@@ -233,7 +233,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
"""
# Clipping the minimum of all lambda(t) for numerical stability.
# This is critical for cosine (squaredcos_cap_v2) noise schedule.
clipped_idx = torch.searchsorted(torch.flip(self.lambda_t, [0]), self.lambda_min_clipped).item()
clipped_idx = torch.searchsorted(torch.flip(self.lambda_t, [0]), self.config.lambda_min_clipped).item()
self.noisiest_timestep = self.config.num_train_timesteps - 1 - clipped_idx
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891

View File

@@ -325,7 +325,7 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
log_sigmas = np.log(sigmas)
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
if self.use_karras_sigmas:
if self.config.use_karras_sigmas:
sigmas = self._convert_to_karras(in_sigmas=sigmas)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])

View File

@@ -343,7 +343,7 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
" 'linear' or 'log_linear'"
)
if self.use_karras_sigmas:
if self.config.use_karras_sigmas:
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])

View File

@@ -288,7 +288,7 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
log_sigmas = np.log(sigmas)
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
if self.use_karras_sigmas:
if self.config.use_karras_sigmas:
sigmas = self._convert_to_karras(in_sigmas=sigmas)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])

View File

@@ -782,7 +782,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
# update inputs_dict for ip-adapter
batch_size = inputs_dict["encoder_hidden_states"].shape[0]
# for ip-adapter image_embeds has shape [batch_size, num_image, embed_dim]
image_embeds = floats_tensor((batch_size, 1, model.cross_attention_dim)).to(torch_device)
image_embeds = floats_tensor((batch_size, 1, model.config.cross_attention_dim)).to(torch_device)
inputs_dict["added_cond_kwargs"] = {"image_embeds": [image_embeds]}
# make ip_adapter_1 and ip_adapter_2
@@ -854,7 +854,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
# update inputs_dict for ip-adapter
batch_size = inputs_dict["encoder_hidden_states"].shape[0]
# for ip-adapter-plus image_embeds has shape [batch_size, num_image, sequence_length, embed_dim]
image_embeds = floats_tensor((batch_size, 1, 1, model.cross_attention_dim)).to(torch_device)
image_embeds = floats_tensor((batch_size, 1, 1, model.config.cross_attention_dim)).to(torch_device)
inputs_dict["added_cond_kwargs"] = {"image_embeds": [image_embeds]}
# make ip_adapter_1 and ip_adapter_2

View File

@@ -272,17 +272,17 @@ class ConfigTester(unittest.TestCase):
# now loading it with SampleObject2 should put f into `_use_default_values`
config = SampleObject2.from_config(tmpdirname)
assert "f" in config._use_default_values
assert config.f == [1, 3]
assert "f" in config.config._use_default_values
assert config.config.f == [1, 3]
# now loading the config, should **NOT** use [1, 3] for `f`, but the default [1, 4] value
# **BECAUSE** it is part of `config._use_default_values`
# **BECAUSE** it is part of `config.config._use_default_values`
new_config = SampleObject4.from_config(config.config)
assert new_config.f == [5, 4]
assert new_config.config.f == [5, 4]
config.config._use_default_values.pop()
new_config_2 = SampleObject4.from_config(config.config)
assert new_config_2.f == [1, 3]
assert new_config_2.config.f == [1, 3]
# Nevertheless "e" should still be correctly loaded to [1, 3] from SampleObject2 instead of defaulting to [1, 5]
assert new_config_2.e == [1, 3]
assert new_config_2.config.e == [1, 3]

View File

@@ -137,7 +137,7 @@ class PipelineIntegrationTests(unittest.TestCase):
audio_slice = audio[0, -3:, -3:]
assert audio.shape == (1, 2, pipe.unet.sample_size)
assert audio.shape == (1, 2, pipe.unet.config.sample_size)
expected_slice = np.array([-0.0192, -0.0231, -0.0318, -0.0059, 0.0002, -0.0020])
assert np.abs(audio_slice.flatten() - expected_slice).max() < 1e-2
@@ -155,7 +155,7 @@ class PipelineIntegrationTests(unittest.TestCase):
audio_slice = audio[0, -3:, -3:]
assert audio.shape == (1, 2, pipe.unet.sample_size)
assert audio.shape == (1, 2, pipe.unet.config.sample_size)
expected_slice = np.array([-0.0367, -0.0488, -0.0771, -0.0525, -0.0444, -0.0341])
assert np.abs(audio_slice.flatten() - expected_slice).max() < 1e-2