Compare commits

...

4 Commits

Author SHA1 Message Date
Dhruv Nair
57afaff270 update 2024-02-13 03:47:00 +00:00
Dhruv Nair
e7b2032082 update 2024-02-12 18:22:36 +00:00
Dhruv Nair
2d5e9c2e39 update 2024-02-12 16:10:28 +00:00
Dhruv Nair
d68635f950 update 2024-02-12 14:24:24 +00:00
2 changed files with 28 additions and 3 deletions

View File

@@ -38,6 +38,9 @@ class FromOriginalVAEMixin:
- A link to the `.ckpt` file (for example
`"https://huggingface.co/<repo_id>/blob/main/<path_to_file>.ckpt"`) on the Hub.
- A path to a *file* containing all pipeline weights.
config_file (`str`, *optional*):
Filepath to the configuration YAML file associated with the model. If not provided it will default to:
https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml
torch_dtype (`str` or `torch.dtype`, *optional*):
Override the default `torch.dtype` and load the model with another dtype. If `"auto"` is passed, the
dtype is automatically derived from the model's weights.
@@ -65,6 +68,13 @@ class FromOriginalVAEMixin:
image_size (`int`, *optional*, defaults to 512):
The image size the model was trained on. Use 512 for all Stable Diffusion v1 models and the Stable
Diffusion v2 base model. Use 768 for Stable Diffusion v2.
scaling_factor (`float`, *optional*, defaults to 0.18215):
The component-wise standard deviation of the trained latent space computed using the first batch of the
training set. This is used to scale the latent space to have unit variance when training the diffusion
model. The latents are scaled with the formula `z = z * scaling_factor` before being passed to the
diffusion model. When decoding, the latents are scaled back to the original scale with the formula: `z
= 1 / scaling_factor * z`. For more details, refer to sections 4.3.2 and D.1 of the [High-Resolution
Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) paper.
use_safetensors (`bool`, *optional*, defaults to `None`):
If set to `None`, the safetensors weights are downloaded if they're available **and** if the
safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors
@@ -92,6 +102,7 @@ class FromOriginalVAEMixin:
"""
original_config_file = kwargs.pop("original_config_file", None)
config_file = kwargs.pop("config_file", None)
resume_download = kwargs.pop("resume_download", False)
force_download = kwargs.pop("force_download", False)
proxies = kwargs.pop("proxies", None)
@@ -103,6 +114,13 @@ class FromOriginalVAEMixin:
use_safetensors = kwargs.pop("use_safetensors", True)
class_name = cls.__name__
if (config_file is not None) and (original_config_file is not None):
raise ValueError(
"You cannot pass both `config_file` and `original_config_file` to `from_single_file`. Please use only one of these arguments."
)
original_config_file = original_config_file or config_file
original_config, checkpoint = fetch_ldm_config_and_checkpoint(
pretrained_model_link_or_path=pretrained_model_link_or_path,
class_name=class_name,
@@ -118,7 +136,10 @@ class FromOriginalVAEMixin:
)
image_size = kwargs.pop("image_size", None)
component = create_diffusers_vae_model_from_ldm(class_name, original_config, checkpoint, image_size=image_size)
scaling_factor = kwargs.pop("scaling_factor", None)
component = create_diffusers_vae_model_from_ldm(
class_name, original_config, checkpoint, image_size=image_size, scaling_factor=scaling_factor
)
vae = component["vae"]
if torch_dtype is not None:
vae = vae.to(torch_dtype)

View File

@@ -175,6 +175,7 @@ DIFFUSERS_TO_LDM_MAPPING = {
}
LDM_VAE_KEY = "first_stage_model."
LDM_VAE_DEFAULT_SCALING_FACTOR = 0.18215
LDM_UNET_KEY = "model.diffusion_model."
LDM_CONTROLNET_KEY = "control_model."
LDM_CLIP_PREFIX_TO_REMOVE = ["cond_stage_model.transformer.", "conditioner.embedders.0.transformer."]
@@ -518,7 +519,10 @@ def create_vae_diffusers_config(original_config, image_size, scaling_factor=None
Creates a config for the diffusers based on the config of the LDM model.
"""
vae_params = original_config["model"]["params"]["first_stage_config"]["params"]["ddconfig"]
scaling_factor = scaling_factor or original_config["model"]["params"]["scale_factor"]
if scaling_factor is None and "scale_factor" in original_config["model"]["params"]:
scaling_factor = original_config["model"]["params"]["scale_factor"]
elif scaling_factor is None:
scaling_factor = LDM_VAE_DEFAULT_SCALING_FACTOR
block_out_channels = [vae_params["ch"] * mult for mult in vae_params["ch_mult"]]
down_block_types = ["DownEncoderBlock2D"] * len(block_out_channels)
@@ -1173,7 +1177,7 @@ def create_diffusers_unet_model_from_ldm(
def create_diffusers_vae_model_from_ldm(
pipeline_class_name, original_config, checkpoint, image_size=None, scaling_factor=0.18125
pipeline_class_name, original_config, checkpoint, image_size=None, scaling_factor=None
):
# import here to avoid circular imports
from ..models import AutoencoderKL