mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-06 20:44:33 +08:00
Compare commits
3 Commits
v0.28.0-re
...
improve_va
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0ecc19fdd1 | ||
|
|
b9b53198b8 | ||
|
|
c25967ce2e |
@@ -198,7 +198,9 @@ class AltDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -236,7 +236,9 @@ class AltDiffusionImg2ImgPipeline(
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -129,7 +129,9 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLo
|
||||
motion_adapter=motion_adapter,
|
||||
scheduler=scheduler,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt with num_images_per_prompt -> num_videos_per_prompt
|
||||
|
||||
@@ -94,7 +94,9 @@ class AudioLDMPipeline(DiffusionPipeline):
|
||||
scheduler=scheduler,
|
||||
vocoder=vocoder,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
|
||||
def enable_vae_slicing(self):
|
||||
|
||||
@@ -171,7 +171,9 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
||||
scheduler=scheduler,
|
||||
vocoder=vocoder,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
|
||||
def enable_vae_slicing(self):
|
||||
|
||||
@@ -174,7 +174,9 @@ class StableDiffusionControlNetPipeline(
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
|
||||
self.control_image_processor = VaeImageProcessor(
|
||||
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
|
||||
|
||||
@@ -208,7 +208,9 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
|
||||
self.control_image_processor = VaeImageProcessor(
|
||||
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
|
||||
|
||||
@@ -330,7 +330,9 @@ class StableDiffusionControlNetInpaintPipeline(
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.mask_processor = VaeImageProcessor(
|
||||
vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True
|
||||
|
||||
@@ -199,7 +199,9 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
||||
)
|
||||
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
|
||||
self.register_to_config(requires_aesthetics_score=requires_aesthetics_score)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.mask_processor = VaeImageProcessor(
|
||||
vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True
|
||||
|
||||
@@ -172,7 +172,9 @@ class StableDiffusionXLControlNetPipeline(
|
||||
controlnet=controlnet,
|
||||
scheduler=scheduler,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
out_channels = getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)
|
||||
self.vae_scale_factor = 2 ** (len(out_channels) - 1)
|
||||
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
|
||||
self.control_image_processor = VaeImageProcessor(
|
||||
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
|
||||
|
||||
@@ -225,7 +225,9 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
||||
controlnet=controlnet,
|
||||
scheduler=scheduler,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
|
||||
self.control_image_processor = VaeImageProcessor(
|
||||
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
|
||||
|
||||
@@ -175,7 +175,9 @@ class FlaxStableDiffusionControlNetPipeline(FlaxDiffusionPipeline):
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
|
||||
def prepare_text_inputs(self, prompt: Union[str, List[str]]):
|
||||
if not isinstance(prompt, (str, list)):
|
||||
|
||||
@@ -154,7 +154,9 @@ class LatentConsistencyModelImg2ImgPipeline(
|
||||
" information, please have a look at https://github.com/huggingface/diffusers/pull/254 ."
|
||||
)
|
||||
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
|
||||
|
||||
@@ -141,7 +141,9 @@ class LatentConsistencyModelPipeline(
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -111,7 +111,9 @@ class MusicLDMPipeline(DiffusionPipeline):
|
||||
scheduler=scheduler,
|
||||
vocoder=vocoder,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
|
||||
def enable_vae_slicing(self):
|
||||
|
||||
@@ -205,7 +205,9 @@ class PaintByExamplePipeline(DiffusionPipeline):
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -154,7 +154,9 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
||||
tokenizer=tokenizer, text_encoder=text_encoder, vae=vae, transformer=transformer, scheduler=scheduler
|
||||
)
|
||||
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
|
||||
# Adapted from https://github.com/PixArt-alpha/PixArt-alpha/blob/master/diffusion/model/utils.py
|
||||
|
||||
@@ -87,7 +87,9 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline):
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -224,7 +224,9 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -194,7 +194,9 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -239,7 +239,9 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, TextualInversion
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -141,7 +141,9 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
||||
depth_estimator=depth_estimator,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
|
||||
|
||||
@@ -367,7 +367,9 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM
|
||||
feature_extractor=feature_extractor,
|
||||
inverse_scheduler=inverse_scheduler,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -168,7 +168,9 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline):
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -226,7 +226,9 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline):
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -126,7 +126,9 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline):
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -231,7 +231,9 @@ class StableDiffusionImg2ImgPipeline(
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -299,7 +299,9 @@ class StableDiffusionInpaintPipeline(
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.mask_processor = VaeImageProcessor(
|
||||
vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True
|
||||
|
||||
@@ -213,7 +213,9 @@ class StableDiffusionInpaintPipelineLegacy(
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -133,7 +133,9 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -117,7 +117,9 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
|
||||
model = ModelWrapper(unet, scheduler.alphas_cumprod)
|
||||
|
||||
@@ -99,7 +99,9 @@ class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, FromSingleFileMixi
|
||||
unet=unet,
|
||||
scheduler=scheduler,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, resample="bicubic")
|
||||
|
||||
def _encode_prompt(self, prompt, device, do_classifier_free_guidance, negative_prompt):
|
||||
|
||||
@@ -158,7 +158,9 @@ class StableDiffusionLDM3DPipeline(
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessorLDM3D(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -114,7 +114,9 @@ class StableDiffusionModelEditingPipeline(DiffusionPipeline, TextualInversionLoa
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -128,7 +128,9 @@ class StableDiffusionPanoramaPipeline(DiffusionPipeline, TextualInversionLoaderM
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -139,7 +139,9 @@ class StableDiffusionParadigmsPipeline(
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -365,7 +365,9 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline):
|
||||
caption_generator=caption_generator,
|
||||
inverse_scheduler=inverse_scheduler,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -151,7 +151,9 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, TextualInversionLoaderMixin)
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -141,7 +141,9 @@ class StableDiffusionUpscalePipeline(
|
||||
watermarker=watermarker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, resample="bicubic")
|
||||
self.register_to_config(max_noise_level=max_noise_level)
|
||||
|
||||
|
||||
@@ -147,7 +147,9 @@ class StableUnCLIPPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
|
||||
vae=vae,
|
||||
)
|
||||
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
|
||||
|
||||
@@ -148,7 +148,9 @@ class StableUnCLIPImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin
|
||||
vae=vae,
|
||||
)
|
||||
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
|
||||
|
||||
@@ -142,7 +142,9 @@ class StableDiffusionPipelineSafe(DiffusionPipeline):
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self._safety_text_concept = safety_concept
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
@property
|
||||
|
||||
@@ -65,7 +65,9 @@ class FlaxStableDiffusionXLPipeline(FlaxDiffusionPipeline):
|
||||
unet=unet,
|
||||
scheduler=scheduler,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
|
||||
def prepare_inputs(self, prompt: Union[str, List[str]]):
|
||||
if not isinstance(prompt, (str, list)):
|
||||
|
||||
@@ -177,7 +177,9 @@ class StableDiffusionXLPipeline(
|
||||
scheduler=scheduler,
|
||||
)
|
||||
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
|
||||
self.default_sample_size = self.unet.config.sample_size
|
||||
|
||||
@@ -192,7 +192,9 @@ class StableDiffusionXLImg2ImgPipeline(
|
||||
)
|
||||
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
|
||||
self.register_to_config(requires_aesthetics_score=requires_aesthetics_score)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
|
||||
add_watermarker = add_watermarker if add_watermarker is not None else is_invisible_watermark_available()
|
||||
|
||||
@@ -340,7 +340,9 @@ class StableDiffusionXLInpaintPipeline(
|
||||
)
|
||||
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
|
||||
self.register_to_config(requires_aesthetics_score=requires_aesthetics_score)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.mask_processor = VaeImageProcessor(
|
||||
vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True
|
||||
|
||||
@@ -179,7 +179,9 @@ class StableDiffusionXLInstructPix2PixPipeline(
|
||||
scheduler=scheduler,
|
||||
)
|
||||
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.default_sample_size = self.unet.config.sample_size
|
||||
|
||||
|
||||
@@ -199,7 +199,9 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline):
|
||||
safety_checker=safety_checker,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
||||
|
||||
|
||||
@@ -188,7 +188,9 @@ class StableDiffusionXLAdapterPipeline(
|
||||
scheduler=scheduler,
|
||||
)
|
||||
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
self.default_sample_size = self.unet.config.sample_size
|
||||
|
||||
|
||||
@@ -116,7 +116,9 @@ class TextToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lora
|
||||
unet=unet,
|
||||
scheduler=scheduler,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
|
||||
def enable_vae_slicing(self):
|
||||
|
||||
@@ -178,7 +178,9 @@ class VideoToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
|
||||
unet=unet,
|
||||
scheduler=scheduler,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
|
||||
def enable_vae_slicing(self):
|
||||
|
||||
@@ -117,7 +117,9 @@ class UniDiffuserPipeline(DiffusionPipeline):
|
||||
scheduler=scheduler,
|
||||
)
|
||||
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
|
||||
self.num_channels_latents = vae.config.latent_channels
|
||||
|
||||
@@ -76,7 +76,9 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
|
||||
vae=vae,
|
||||
scheduler=scheduler,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
|
||||
@torch.no_grad()
|
||||
def image_variation(
|
||||
|
||||
@@ -94,7 +94,9 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
|
||||
vae=vae,
|
||||
scheduler=scheduler,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
|
||||
if self.text_unet is not None and (
|
||||
|
||||
@@ -77,7 +77,9 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
|
||||
vae=vae,
|
||||
scheduler=scheduler,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
|
||||
def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt):
|
||||
|
||||
@@ -82,7 +82,9 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
|
||||
vae=vae,
|
||||
scheduler=scheduler,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.vae_scale_factor = 2 ** (
|
||||
len(getattr(self.vae.config, "block_out_channels", self.vae.config.decoder_block_out_channels)) - 1
|
||||
)
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
|
||||
if self.text_unet is not None:
|
||||
|
||||
Reference in New Issue
Block a user