Compare commits

...

3 Commits

Author SHA1 Message Date
Patrick von Platen
0ecc19fdd1 add comments to explain the code better 2023-11-21 11:18:15 +00:00
Patrick von Platen
b9b53198b8 Merge branch 'main' of https://github.com/huggingface/diffusers into improve_vae 2023-11-21 11:17:58 +00:00
Patrick von Platen
c25967ce2e Make vae robust 2023-11-21 11:17:29 +00:00
56 changed files with 168 additions and 56 deletions

View File

@@ -198,7 +198,9 @@ class AltDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -236,7 +236,9 @@ class AltDiffusionImg2ImgPipeline(
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -129,7 +129,9 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLo
motion_adapter=motion_adapter,
scheduler=scheduler,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt with num_images_per_prompt -> num_videos_per_prompt

View File

@@ -94,7 +94,9 @@ class AudioLDMPipeline(DiffusionPipeline):
scheduler=scheduler,
vocoder=vocoder,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
def enable_vae_slicing(self):

View File

@@ -171,7 +171,9 @@ class AudioLDM2Pipeline(DiffusionPipeline):
scheduler=scheduler,
vocoder=vocoder,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
def enable_vae_slicing(self):

View File

@@ -174,7 +174,9 @@ class StableDiffusionControlNetPipeline(
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
self.control_image_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False

View File

@@ -208,7 +208,9 @@ class StableDiffusionControlNetImg2ImgPipeline(
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
self.control_image_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False

View File

@@ -330,7 +330,9 @@ class StableDiffusionControlNetInpaintPipeline(
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.mask_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True

View File

@@ -199,7 +199,9 @@ class StableDiffusionXLControlNetInpaintPipeline(
)
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
self.register_to_config(requires_aesthetics_score=requires_aesthetics_score)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.mask_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True

View File

@@ -172,7 +172,9 @@ class StableDiffusionXLControlNetPipeline(
controlnet=controlnet,
scheduler=scheduler,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
out_channels = getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)
self.vae_scale_factor = 2 ** (len(out_channels) - 1)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
self.control_image_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False

View File

@@ -225,7 +225,9 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
controlnet=controlnet,
scheduler=scheduler,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
self.control_image_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False

View File

@@ -175,7 +175,9 @@ class FlaxStableDiffusionControlNetPipeline(FlaxDiffusionPipeline):
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
def prepare_text_inputs(self, prompt: Union[str, List[str]]):
if not isinstance(prompt, (str, list)):

View File

@@ -154,7 +154,9 @@ class LatentConsistencyModelImg2ImgPipeline(
" information, please have a look at https://github.com/huggingface/diffusers/pull/254 ."
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing

View File

@@ -141,7 +141,9 @@ class LatentConsistencyModelPipeline(
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -111,7 +111,9 @@ class MusicLDMPipeline(DiffusionPipeline):
scheduler=scheduler,
vocoder=vocoder,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
def enable_vae_slicing(self):

View File

@@ -205,7 +205,9 @@ class PaintByExamplePipeline(DiffusionPipeline):
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -154,7 +154,9 @@ class PixArtAlphaPipeline(DiffusionPipeline):
tokenizer=tokenizer, text_encoder=text_encoder, vae=vae, transformer=transformer, scheduler=scheduler
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
# Adapted from https://github.com/PixArt-alpha/PixArt-alpha/blob/master/diffusion/model/utils.py

View File

@@ -87,7 +87,9 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline):
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -224,7 +224,9 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -194,7 +194,9 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -239,7 +239,9 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, TextualInversion
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -141,7 +141,9 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
depth_estimator=depth_estimator,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt

View File

@@ -367,7 +367,9 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM
feature_extractor=feature_extractor,
inverse_scheduler=inverse_scheduler,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -168,7 +168,9 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline):
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -226,7 +226,9 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline):
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -126,7 +126,9 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline):
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -231,7 +231,9 @@ class StableDiffusionImg2ImgPipeline(
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -299,7 +299,9 @@ class StableDiffusionInpaintPipeline(
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.mask_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True

View File

@@ -213,7 +213,9 @@ class StableDiffusionInpaintPipelineLegacy(
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -133,7 +133,9 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -117,7 +117,9 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade
feature_extractor=feature_extractor,
)
self.register_to_config(requires_safety_checker=requires_safety_checker)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
model = ModelWrapper(unet, scheduler.alphas_cumprod)

View File

@@ -99,7 +99,9 @@ class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, FromSingleFileMixi
unet=unet,
scheduler=scheduler,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, resample="bicubic")
def _encode_prompt(self, prompt, device, do_classifier_free_guidance, negative_prompt):

View File

@@ -158,7 +158,9 @@ class StableDiffusionLDM3DPipeline(
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessorLDM3D(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -114,7 +114,9 @@ class StableDiffusionModelEditingPipeline(DiffusionPipeline, TextualInversionLoa
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -128,7 +128,9 @@ class StableDiffusionPanoramaPipeline(DiffusionPipeline, TextualInversionLoaderM
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -139,7 +139,9 @@ class StableDiffusionParadigmsPipeline(
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -365,7 +365,9 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline):
caption_generator=caption_generator,
inverse_scheduler=inverse_scheduler,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -151,7 +151,9 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, TextualInversionLoaderMixin)
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -141,7 +141,9 @@ class StableDiffusionUpscalePipeline(
watermarker=watermarker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, resample="bicubic")
self.register_to_config(max_noise_level=max_noise_level)

View File

@@ -147,7 +147,9 @@ class StableUnCLIPPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
vae=vae,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing

View File

@@ -148,7 +148,9 @@ class StableUnCLIPImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin
vae=vae,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing

View File

@@ -142,7 +142,9 @@ class StableDiffusionPipelineSafe(DiffusionPipeline):
feature_extractor=feature_extractor,
)
self._safety_text_concept = safety_concept
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.register_to_config(requires_safety_checker=requires_safety_checker)
@property

View File

@@ -65,7 +65,9 @@ class FlaxStableDiffusionXLPipeline(FlaxDiffusionPipeline):
unet=unet,
scheduler=scheduler,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
def prepare_inputs(self, prompt: Union[str, List[str]]):
if not isinstance(prompt, (str, list)):

View File

@@ -177,7 +177,9 @@ class StableDiffusionXLPipeline(
scheduler=scheduler,
)
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.default_sample_size = self.unet.config.sample_size

View File

@@ -192,7 +192,9 @@ class StableDiffusionXLImg2ImgPipeline(
)
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
self.register_to_config(requires_aesthetics_score=requires_aesthetics_score)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
add_watermarker = add_watermarker if add_watermarker is not None else is_invisible_watermark_available()

View File

@@ -340,7 +340,9 @@ class StableDiffusionXLInpaintPipeline(
)
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
self.register_to_config(requires_aesthetics_score=requires_aesthetics_score)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.mask_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True

View File

@@ -179,7 +179,9 @@ class StableDiffusionXLInstructPix2PixPipeline(
scheduler=scheduler,
)
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.default_sample_size = self.unet.config.sample_size

View File

@@ -199,7 +199,9 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline):
safety_checker=safety_checker,
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)

View File

@@ -188,7 +188,9 @@ class StableDiffusionXLAdapterPipeline(
scheduler=scheduler,
)
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.default_sample_size = self.unet.config.sample_size

View File

@@ -116,7 +116,9 @@ class TextToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lora
unet=unet,
scheduler=scheduler,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
def enable_vae_slicing(self):

View File

@@ -178,7 +178,9 @@ class VideoToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
unet=unet,
scheduler=scheduler,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
def enable_vae_slicing(self):

View File

@@ -117,7 +117,9 @@ class UniDiffuserPipeline(DiffusionPipeline):
scheduler=scheduler,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.num_channels_latents = vae.config.latent_channels

View File

@@ -76,7 +76,9 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
vae=vae,
scheduler=scheduler,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
@torch.no_grad()
def image_variation(

View File

@@ -94,7 +94,9 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
vae=vae,
scheduler=scheduler,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
if self.text_unet is not None and (

View File

@@ -77,7 +77,9 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
vae=vae,
scheduler=scheduler,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt):

View File

@@ -82,7 +82,9 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
vae=vae,
scheduler=scheduler,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.vae_scale_factor = 2 ** (
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
if self.text_unet is not None: