mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-24 12:35:59 +08:00
Compare commits
10 Commits
requiremen
...
devanshi00
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ff26d9ffd5 | ||
|
|
668f265054 | ||
|
|
55eaa6efb2 | ||
|
|
b603429ff5 | ||
|
|
7a02fadad3 | ||
|
|
3bc3fdb035 | ||
|
|
8cc38a75d3 | ||
|
|
e5bb10cfe1 | ||
|
|
ec37629371 | ||
|
|
ec541906c5 |
@@ -675,6 +675,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
variant: Optional[str] = None,
|
||||
max_shard_size: Union[int, str] = "10GB",
|
||||
push_to_hub: bool = False,
|
||||
use_flashpack: bool = False,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
@@ -707,6 +708,9 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
Whether or not to push your model to the Hugging Face Hub after saving it. You can specify the
|
||||
repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
|
||||
namespace).
|
||||
use_flashpack (`bool`, *optional*, defaults to `False`):
|
||||
Whether to save the model in [FlashPack](https://github.com/fal-ai/flashpack) format. FlashPack is a
|
||||
binary format that allows for faster loading. Requires the `flashpack` library to be installed.
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional keyword arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
||||
"""
|
||||
@@ -727,12 +731,6 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
" the logger on the traceback to understand the reason why the quantized model is not serializable."
|
||||
)
|
||||
|
||||
weights_name = SAFETENSORS_WEIGHTS_NAME if safe_serialization else WEIGHTS_NAME
|
||||
weights_name = _add_variant(weights_name, variant)
|
||||
weights_name_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(
|
||||
".safetensors", "{suffix}.safetensors"
|
||||
)
|
||||
|
||||
os.makedirs(save_directory, exist_ok=True)
|
||||
|
||||
if push_to_hub:
|
||||
@@ -746,67 +744,80 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
# Only save the model itself if we are using distributed training
|
||||
model_to_save = self
|
||||
|
||||
# Attach architecture to the config
|
||||
# Save the config
|
||||
if is_main_process:
|
||||
model_to_save.save_config(save_directory)
|
||||
|
||||
# Save the model
|
||||
state_dict = model_to_save.state_dict()
|
||||
if use_flashpack:
|
||||
if not is_main_process:
|
||||
return
|
||||
|
||||
# Save the model
|
||||
state_dict_split = split_torch_state_dict_into_shards(
|
||||
state_dict, max_shard_size=max_shard_size, filename_pattern=weights_name_pattern
|
||||
)
|
||||
from ..utils.flashpack_utils import save_flashpack
|
||||
|
||||
# Clean the folder from a previous save
|
||||
if is_main_process:
|
||||
for filename in os.listdir(save_directory):
|
||||
if filename in state_dict_split.filename_to_tensors.keys():
|
||||
continue
|
||||
full_filename = os.path.join(save_directory, filename)
|
||||
if not os.path.isfile(full_filename):
|
||||
continue
|
||||
weights_without_ext = weights_name_pattern.replace(".bin", "").replace(".safetensors", "")
|
||||
weights_without_ext = weights_without_ext.replace("{suffix}", "")
|
||||
filename_without_ext = filename.replace(".bin", "").replace(".safetensors", "")
|
||||
# make sure that file to be deleted matches format of sharded file, e.g. pytorch_model-00001-of-00005
|
||||
if (
|
||||
filename.startswith(weights_without_ext)
|
||||
and _REGEX_SHARD.fullmatch(filename_without_ext) is not None
|
||||
):
|
||||
os.remove(full_filename)
|
||||
|
||||
for filename, tensors in state_dict_split.filename_to_tensors.items():
|
||||
shard = {tensor: state_dict[tensor].contiguous() for tensor in tensors}
|
||||
filepath = os.path.join(save_directory, filename)
|
||||
if safe_serialization:
|
||||
# At some point we will need to deal better with save_function (used for TPU and other distributed
|
||||
# joyfulness), but for now this enough.
|
||||
safetensors.torch.save_file(shard, filepath, metadata={"format": "pt"})
|
||||
else:
|
||||
torch.save(shard, filepath)
|
||||
|
||||
if state_dict_split.is_sharded:
|
||||
index = {
|
||||
"metadata": state_dict_split.metadata,
|
||||
"weight_map": state_dict_split.tensor_to_filename,
|
||||
}
|
||||
save_index_file = SAFE_WEIGHTS_INDEX_NAME if safe_serialization else WEIGHTS_INDEX_NAME
|
||||
save_index_file = os.path.join(save_directory, _add_variant(save_index_file, variant))
|
||||
# Save the index as well
|
||||
with open(save_index_file, "w", encoding="utf-8") as f:
|
||||
content = json.dumps(index, indent=2, sort_keys=True) + "\n"
|
||||
f.write(content)
|
||||
logger.info(
|
||||
f"The model is bigger than the maximum size per checkpoint ({max_shard_size}) and is going to be "
|
||||
f"split in {len(state_dict_split.filename_to_tensors)} checkpoint shards. You can find where each parameters has been saved in the "
|
||||
f"index located at {save_index_file}."
|
||||
)
|
||||
save_flashpack(model_to_save, save_directory, variant=variant)
|
||||
else:
|
||||
path_to_weights = os.path.join(save_directory, weights_name)
|
||||
logger.info(f"Model weights saved in {path_to_weights}")
|
||||
weights_name = SAFETENSORS_WEIGHTS_NAME if safe_serialization else WEIGHTS_NAME
|
||||
weights_name = _add_variant(weights_name, variant)
|
||||
weights_name_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(
|
||||
".safetensors", "{suffix}.safetensors"
|
||||
)
|
||||
|
||||
state_dict = model_to_save.state_dict()
|
||||
state_dict_split = split_torch_state_dict_into_shards(
|
||||
state_dict, max_shard_size=max_shard_size, filename_pattern=weights_name_pattern
|
||||
)
|
||||
|
||||
# Clean the folder from a previous save
|
||||
if is_main_process:
|
||||
for filename in os.listdir(save_directory):
|
||||
if filename in state_dict_split.filename_to_tensors.keys():
|
||||
continue
|
||||
full_filename = os.path.join(save_directory, filename)
|
||||
if not os.path.isfile(full_filename):
|
||||
continue
|
||||
weights_without_ext = weights_name_pattern.replace(".bin", "").replace(".safetensors", "")
|
||||
weights_without_ext = weights_without_ext.replace("{suffix}", "")
|
||||
filename_without_ext = filename.replace(".bin", "").replace(".safetensors", "")
|
||||
# make sure that file to be deleted matches format of sharded file, e.g. pytorch_model-00001-of-00005
|
||||
if (
|
||||
filename.startswith(weights_without_ext)
|
||||
and _REGEX_SHARD.fullmatch(filename_without_ext) is not None
|
||||
):
|
||||
os.remove(full_filename)
|
||||
|
||||
# Save each shard
|
||||
for filename, tensors in state_dict_split.filename_to_tensors.items():
|
||||
shard = {tensor: state_dict[tensor].contiguous() for tensor in tensors}
|
||||
filepath = os.path.join(save_directory, filename)
|
||||
if safe_serialization:
|
||||
# At some point we will need to deal better with save_function (used for TPU and other distributed
|
||||
# joyfulness), but for now this enough.
|
||||
safetensors.torch.save_file(shard, filepath, metadata={"format": "pt"})
|
||||
else:
|
||||
torch.save(shard, filepath)
|
||||
|
||||
# Save index file if sharded
|
||||
if state_dict_split.is_sharded:
|
||||
index = {
|
||||
"metadata": state_dict_split.metadata,
|
||||
"weight_map": state_dict_split.tensor_to_filename,
|
||||
}
|
||||
save_index_file = SAFE_WEIGHTS_INDEX_NAME if safe_serialization else WEIGHTS_INDEX_NAME
|
||||
save_index_file = os.path.join(save_directory, _add_variant(save_index_file, variant))
|
||||
# Save the index as well
|
||||
with open(save_index_file, "w", encoding="utf-8") as f:
|
||||
content = json.dumps(index, indent=2, sort_keys=True) + "\n"
|
||||
f.write(content)
|
||||
logger.info(
|
||||
f"The model is bigger than the maximum size per checkpoint ({max_shard_size}) and is going to be "
|
||||
f"split in {len(state_dict_split.filename_to_tensors)} checkpoint shards. You can find where each parameters has been saved in the "
|
||||
f"index located at {save_index_file}."
|
||||
)
|
||||
else:
|
||||
path_to_weights = os.path.join(save_directory, weights_name)
|
||||
logger.info(f"Model weights saved in {path_to_weights}")
|
||||
|
||||
# Push to hub if requested (common to both paths)
|
||||
if push_to_hub:
|
||||
# Create a new empty model card and eventually tag it
|
||||
model_card = load_or_create_model_card(repo_id, token=token)
|
||||
@@ -939,6 +950,10 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
If set to `None`, the `safetensors` weights are downloaded if they're available **and** if the
|
||||
`safetensors` library is installed. If set to `True`, the model is forcibly loaded from `safetensors`
|
||||
weights. If set to `False`, `safetensors` weights are not loaded.
|
||||
use_flashpack (`bool`, *optional*, defaults to `False`):
|
||||
If set to `True`, the model is first loaded from `flashpack` (https://github.com/fal-ai/flashpack)
|
||||
weights if a compatible `.flashpack` file is found. If flashpack is unavailable or the `.flashpack`
|
||||
file cannot be used, automatic fallback to the standard loading path (for example, `safetensors`).
|
||||
disable_mmap ('bool', *optional*, defaults to 'False'):
|
||||
Whether to disable mmap when loading a Safetensors model. This option can perform better when the model
|
||||
is on a network mount or hard drive, which may not handle the seeky-ness of mmap very well.
|
||||
@@ -982,6 +997,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
|
||||
variant = kwargs.pop("variant", None)
|
||||
use_safetensors = kwargs.pop("use_safetensors", None)
|
||||
use_flashpack = kwargs.pop("use_flashpack", False)
|
||||
quantization_config = kwargs.pop("quantization_config", None)
|
||||
dduf_entries: Optional[Dict[str, DDUFEntry]] = kwargs.pop("dduf_entries", None)
|
||||
disable_mmap = kwargs.pop("disable_mmap", False)
|
||||
@@ -1199,7 +1215,31 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
from .modeling_pytorch_flax_utils import load_flax_checkpoint_in_pytorch_model
|
||||
|
||||
model = load_flax_checkpoint_in_pytorch_model(model, resolved_model_file)
|
||||
else:
|
||||
|
||||
flashpack_file = None
|
||||
if use_flashpack:
|
||||
try:
|
||||
flashpack_file = _get_model_file(
|
||||
pretrained_model_name_or_path,
|
||||
weights_name=_add_variant("model.flashpack", variant),
|
||||
cache_dir=cache_dir,
|
||||
force_download=force_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
token=token,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
user_agent=user_agent,
|
||||
commit_hash=commit_hash,
|
||||
dduf_entries=dduf_entries,
|
||||
)
|
||||
except EnvironmentError:
|
||||
flashpack_file = None
|
||||
logger.warning(
|
||||
"`use_flashpack` was specified to be True but not flashpack file was found. Resorting to non-flashpack alternatives."
|
||||
)
|
||||
|
||||
if flashpack_file is None:
|
||||
# in the case it is sharded, we have already the index
|
||||
if is_sharded:
|
||||
resolved_model_file, sharded_metadata = _get_checkpoint_shard_files(
|
||||
@@ -1215,6 +1255,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
dduf_entries=dduf_entries,
|
||||
)
|
||||
elif use_safetensors:
|
||||
logger.warning("Trying to load model weights with safetensors format.")
|
||||
try:
|
||||
resolved_model_file = _get_model_file(
|
||||
pretrained_model_name_or_path,
|
||||
@@ -1280,6 +1321,29 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
if dtype_orig is not None:
|
||||
torch.set_default_dtype(dtype_orig)
|
||||
|
||||
if flashpack_file is not None:
|
||||
from ..utils.flashpack_utils import load_flashpack
|
||||
|
||||
# Even when using FlashPack, we preserve `low_cpu_mem_usage` behavior by initializing
|
||||
# the model with meta tensors. Since FlashPack cannot write into meta tensors, we
|
||||
# explicitly materialize parameters before loading to ensure correctness and parity
|
||||
# with the standard loading path.
|
||||
if any(p.device.type == "meta" for p in model.parameters()):
|
||||
model.to_empty(device="cpu")
|
||||
load_flashpack(model, flashpack_file)
|
||||
model.register_to_config(_name_or_path=pretrained_model_name_or_path)
|
||||
model.eval()
|
||||
|
||||
if output_loading_info:
|
||||
return model, {
|
||||
"missing_keys": [],
|
||||
"unexpected_keys": [],
|
||||
"mismatched_keys": [],
|
||||
"error_msgs": [],
|
||||
}
|
||||
|
||||
return model
|
||||
|
||||
state_dict = None
|
||||
if not is_sharded:
|
||||
# Time to load the checkpoint
|
||||
@@ -1327,7 +1391,6 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
keep_in_fp32_modules=keep_in_fp32_modules,
|
||||
dduf_entries=dduf_entries,
|
||||
is_parallel_loading_enabled=is_parallel_loading_enabled,
|
||||
disable_mmap=disable_mmap,
|
||||
)
|
||||
loading_info = {
|
||||
"missing_keys": missing_keys,
|
||||
@@ -1373,6 +1436,8 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
if output_loading_info:
|
||||
return model, loading_info
|
||||
|
||||
logger.warning(f"Model till end {pretrained_model_name_or_path} loaded successfully")
|
||||
|
||||
return model
|
||||
|
||||
# Adapted from `transformers`.
|
||||
|
||||
@@ -756,6 +756,7 @@ def load_sub_model(
|
||||
low_cpu_mem_usage: bool,
|
||||
cached_folder: Union[str, os.PathLike],
|
||||
use_safetensors: bool,
|
||||
use_flashpack: bool,
|
||||
dduf_entries: Optional[Dict[str, DDUFEntry]],
|
||||
provider_options: Any,
|
||||
disable_mmap: bool,
|
||||
@@ -838,6 +839,9 @@ def load_sub_model(
|
||||
loading_kwargs["variant"] = model_variants.pop(name, None)
|
||||
loading_kwargs["use_safetensors"] = use_safetensors
|
||||
|
||||
if is_diffusers_model:
|
||||
loading_kwargs["use_flashpack"] = use_flashpack
|
||||
|
||||
if from_flax:
|
||||
loading_kwargs["from_flax"] = True
|
||||
|
||||
|
||||
@@ -243,6 +243,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
variant: Optional[str] = None,
|
||||
max_shard_size: Optional[Union[int, str]] = None,
|
||||
push_to_hub: bool = False,
|
||||
use_flashpack: bool = False,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
@@ -268,7 +269,9 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
|
||||
repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
|
||||
namespace).
|
||||
|
||||
use_flashpack (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to use `flashpack` to save the model weights. Requires the `flashpack` library: `pip
|
||||
install flashpack`.
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional keyword arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
||||
"""
|
||||
@@ -340,6 +343,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
save_method_accept_safe = "safe_serialization" in save_method_signature.parameters
|
||||
save_method_accept_variant = "variant" in save_method_signature.parameters
|
||||
save_method_accept_max_shard_size = "max_shard_size" in save_method_signature.parameters
|
||||
save_method_accept_flashpack = "use_flashpack" in save_method_signature.parameters
|
||||
|
||||
save_kwargs = {}
|
||||
if save_method_accept_safe:
|
||||
@@ -349,6 +353,8 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
if save_method_accept_max_shard_size and max_shard_size is not None:
|
||||
# max_shard_size is expected to not be None in ModelMixin
|
||||
save_kwargs["max_shard_size"] = max_shard_size
|
||||
if save_method_accept_flashpack:
|
||||
save_kwargs["use_flashpack"] = use_flashpack
|
||||
|
||||
save_method(os.path.join(save_directory, pipeline_component_name), **save_kwargs)
|
||||
|
||||
@@ -707,6 +713,11 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
If set to `None`, the safetensors weights are downloaded if they're available **and** if the
|
||||
safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors
|
||||
weights. If set to `False`, safetensors weights are not loaded.
|
||||
use_flashpack (`bool`, *optional*, defaults to `False`):
|
||||
If set to `True`, the model is first loaded from `flashpack` weights if a compatible `.flashpack` file
|
||||
is found. If flashpack is unavailable or the `.flashpack` file cannot be used, automatic fallback to
|
||||
the standard loading path (for example, `safetensors`). Requires the `flashpack` library: `pip install
|
||||
flashpack`.
|
||||
use_onnx (`bool`, *optional*, defaults to `None`):
|
||||
If set to `True`, ONNX weights will always be downloaded if present. If set to `False`, ONNX weights
|
||||
will never be downloaded. By default `use_onnx` defaults to the `_is_onnx` class attribute which is
|
||||
@@ -772,6 +783,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
variant = kwargs.pop("variant", None)
|
||||
dduf_file = kwargs.pop("dduf_file", None)
|
||||
use_safetensors = kwargs.pop("use_safetensors", None)
|
||||
use_flashpack = kwargs.pop("use_flashpack", False)
|
||||
use_onnx = kwargs.pop("use_onnx", None)
|
||||
load_connected_pipeline = kwargs.pop("load_connected_pipeline", False)
|
||||
quantization_config = kwargs.pop("quantization_config", None)
|
||||
@@ -1061,6 +1073,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
low_cpu_mem_usage=low_cpu_mem_usage,
|
||||
cached_folder=cached_folder,
|
||||
use_safetensors=use_safetensors,
|
||||
use_flashpack=use_flashpack,
|
||||
dduf_entries=dduf_entries,
|
||||
provider_options=provider_options,
|
||||
disable_mmap=disable_mmap,
|
||||
|
||||
@@ -14,7 +14,7 @@ from .scheduling_utils import SchedulerMixin
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -28,8 +28,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -51,7 +51,7 @@ class DDIMSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -51,7 +51,7 @@ class DDIMSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
@@ -100,14 +100,13 @@ def betas_for_alpha_bar(
|
||||
return torch.tensor(betas, dtype=torch.float32)
|
||||
|
||||
|
||||
def rescale_zero_terminal_snr(alphas_cumprod):
|
||||
def rescale_zero_terminal_snr(alphas_cumprod: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
|
||||
|
||||
Rescales betas to have zero terminal SNR Based on (Algorithm 1)[https://huggingface.co/papers/2305.08891]
|
||||
|
||||
Args:
|
||||
betas (`torch.Tensor`):
|
||||
the betas that the scheduler is being initialized with.
|
||||
alphas_cumprod (`torch.Tensor`):
|
||||
The alphas cumulative products that the scheduler is being initialized with.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`: rescaled betas with zero terminal SNR
|
||||
@@ -142,11 +141,11 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
Args:
|
||||
num_train_timesteps (`int`, defaults to 1000):
|
||||
The number of diffusion steps to train the model.
|
||||
beta_start (`float`, defaults to 0.0001):
|
||||
beta_start (`float`, defaults to 0.00085):
|
||||
The starting `beta` value of inference.
|
||||
beta_end (`float`, defaults to 0.02):
|
||||
beta_end (`float`, defaults to 0.0120):
|
||||
The final `beta` value.
|
||||
beta_schedule (`str`, defaults to `"linear"`):
|
||||
beta_schedule (`str`, defaults to `"scaled_linear"`):
|
||||
The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
|
||||
`linear`, `scaled_linear`, or `squaredcos_cap_v2`.
|
||||
trained_betas (`np.ndarray`, *optional*):
|
||||
@@ -179,6 +178,8 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
|
||||
dark samples instead of limiting it to samples with medium brightness. Loosely related to
|
||||
[`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
|
||||
snr_shift_scale (`float`, defaults to 3.0):
|
||||
Shift scale for SNR.
|
||||
"""
|
||||
|
||||
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
|
||||
@@ -190,15 +191,15 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
num_train_timesteps: int = 1000,
|
||||
beta_start: float = 0.00085,
|
||||
beta_end: float = 0.0120,
|
||||
beta_schedule: str = "scaled_linear",
|
||||
beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "scaled_linear",
|
||||
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
|
||||
clip_sample: bool = True,
|
||||
set_alpha_to_one: bool = True,
|
||||
steps_offset: int = 0,
|
||||
prediction_type: str = "epsilon",
|
||||
prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
|
||||
clip_sample_range: float = 1.0,
|
||||
sample_max_value: float = 1.0,
|
||||
timestep_spacing: str = "leading",
|
||||
timestep_spacing: Literal["linspace", "leading", "trailing"] = "leading",
|
||||
rescale_betas_zero_snr: bool = False,
|
||||
snr_shift_scale: float = 3.0,
|
||||
):
|
||||
@@ -208,7 +209,15 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
||||
elif beta_schedule == "scaled_linear":
|
||||
# this schedule is very specific to the latent diffusion model.
|
||||
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float64) ** 2
|
||||
self.betas = (
|
||||
torch.linspace(
|
||||
beta_start**0.5,
|
||||
beta_end**0.5,
|
||||
num_train_timesteps,
|
||||
dtype=torch.float64,
|
||||
)
|
||||
** 2
|
||||
)
|
||||
elif beta_schedule == "squaredcos_cap_v2":
|
||||
# Glide cosine schedule
|
||||
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
||||
@@ -238,7 +247,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
self.num_inference_steps = None
|
||||
self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
|
||||
|
||||
def _get_variance(self, timestep, prev_timestep):
|
||||
def _get_variance(self, timestep: int, prev_timestep: int) -> torch.Tensor:
|
||||
alpha_prod_t = self.alphas_cumprod[timestep]
|
||||
alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
|
||||
beta_prod_t = 1 - alpha_prod_t
|
||||
@@ -265,7 +274,11 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
"""
|
||||
return sample
|
||||
|
||||
def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
|
||||
def set_timesteps(
|
||||
self,
|
||||
num_inference_steps: int,
|
||||
device: Optional[Union[str, torch.device]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
||||
|
||||
@@ -317,7 +330,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
sample: torch.Tensor,
|
||||
eta: float = 0.0,
|
||||
use_clipped_model_output: bool = False,
|
||||
generator=None,
|
||||
generator: Optional[torch.Generator] = None,
|
||||
variance_noise: Optional[torch.Tensor] = None,
|
||||
return_dict: bool = True,
|
||||
) -> Union[DDIMSchedulerOutput, Tuple]:
|
||||
@@ -328,7 +341,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
Args:
|
||||
model_output (`torch.Tensor`):
|
||||
The direct output from learned diffusion model.
|
||||
timestep (`float`):
|
||||
timestep (`int`):
|
||||
The current discrete timestep in the diffusion chain.
|
||||
sample (`torch.Tensor`):
|
||||
A current instance of a sample created by the diffusion process.
|
||||
@@ -487,5 +500,5 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
|
||||
return velocity
|
||||
|
||||
def __len__(self):
|
||||
def __len__(self) -> int:
|
||||
return self.config.num_train_timesteps
|
||||
|
||||
@@ -49,7 +49,7 @@ class DDIMSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -63,8 +63,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -51,7 +51,7 @@ class DDIMParallelSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -48,7 +48,7 @@ class DDPMSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -62,8 +62,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
@@ -192,7 +192,12 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
||||
beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2", "sigmoid"] = "linear",
|
||||
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
|
||||
variance_type: Literal[
|
||||
"fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"
|
||||
"fixed_small",
|
||||
"fixed_small_log",
|
||||
"fixed_large",
|
||||
"fixed_large_log",
|
||||
"learned",
|
||||
"learned_range",
|
||||
] = "fixed_small",
|
||||
clip_sample: bool = True,
|
||||
prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
|
||||
@@ -210,7 +215,15 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
||||
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
||||
elif beta_schedule == "scaled_linear":
|
||||
# this schedule is very specific to the latent diffusion model.
|
||||
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
||||
self.betas = (
|
||||
torch.linspace(
|
||||
beta_start**0.5,
|
||||
beta_end**0.5,
|
||||
num_train_timesteps,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
** 2
|
||||
)
|
||||
elif beta_schedule == "squaredcos_cap_v2":
|
||||
# Glide cosine schedule
|
||||
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
||||
@@ -337,7 +350,14 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
||||
t: int,
|
||||
predicted_variance: Optional[torch.Tensor] = None,
|
||||
variance_type: Optional[
|
||||
Literal["fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"]
|
||||
Literal[
|
||||
"fixed_small",
|
||||
"fixed_small_log",
|
||||
"fixed_large",
|
||||
"fixed_large_log",
|
||||
"learned",
|
||||
"learned_range",
|
||||
]
|
||||
] = None,
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
@@ -472,7 +492,10 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
||||
|
||||
prev_t = self.previous_timestep(t)
|
||||
|
||||
if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in ["learned", "learned_range"]:
|
||||
if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in [
|
||||
"learned",
|
||||
"learned_range",
|
||||
]:
|
||||
model_output, predicted_variance = torch.split(model_output, sample.shape[1], dim=1)
|
||||
else:
|
||||
predicted_variance = None
|
||||
@@ -521,7 +544,10 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
||||
if t > 0:
|
||||
device = model_output.device
|
||||
variance_noise = randn_tensor(
|
||||
model_output.shape, generator=generator, device=device, dtype=model_output.dtype
|
||||
model_output.shape,
|
||||
generator=generator,
|
||||
device=device,
|
||||
dtype=model_output.dtype,
|
||||
)
|
||||
if self.variance_type == "fixed_small_log":
|
||||
variance = self._get_variance(t, predicted_variance=predicted_variance) * variance_noise
|
||||
|
||||
@@ -50,7 +50,7 @@ class DDPMParallelSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -64,8 +64,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
@@ -202,7 +202,12 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
||||
beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2", "sigmoid"] = "linear",
|
||||
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
|
||||
variance_type: Literal[
|
||||
"fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"
|
||||
"fixed_small",
|
||||
"fixed_small_log",
|
||||
"fixed_large",
|
||||
"fixed_large_log",
|
||||
"learned",
|
||||
"learned_range",
|
||||
] = "fixed_small",
|
||||
clip_sample: bool = True,
|
||||
prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
|
||||
@@ -220,7 +225,15 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
||||
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
||||
elif beta_schedule == "scaled_linear":
|
||||
# this schedule is very specific to the latent diffusion model.
|
||||
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
||||
self.betas = (
|
||||
torch.linspace(
|
||||
beta_start**0.5,
|
||||
beta_end**0.5,
|
||||
num_train_timesteps,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
** 2
|
||||
)
|
||||
elif beta_schedule == "squaredcos_cap_v2":
|
||||
# Glide cosine schedule
|
||||
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
||||
@@ -350,7 +363,14 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
||||
t: int,
|
||||
predicted_variance: Optional[torch.Tensor] = None,
|
||||
variance_type: Optional[
|
||||
Literal["fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"]
|
||||
Literal[
|
||||
"fixed_small",
|
||||
"fixed_small_log",
|
||||
"fixed_large",
|
||||
"fixed_large_log",
|
||||
"learned",
|
||||
"learned_range",
|
||||
]
|
||||
] = None,
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
|
||||
@@ -34,7 +34,7 @@ if is_scipy_available():
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -48,8 +48,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -52,7 +52,7 @@ class DDIMSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -66,8 +66,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -34,7 +34,7 @@ if is_scipy_available():
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -48,8 +48,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -34,7 +34,7 @@ if is_scipy_available():
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -48,8 +48,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -117,7 +117,7 @@ class BrownianTreeNoiseSampler:
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -131,8 +131,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -36,7 +36,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -50,8 +50,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -51,7 +51,7 @@ class EulerAncestralDiscreteSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -54,7 +54,7 @@ class EulerDiscreteSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -68,8 +68,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -51,7 +51,7 @@ class HeunDiscreteSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -52,7 +52,7 @@ class KDPM2AncestralDiscreteSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -66,8 +66,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -51,7 +51,7 @@ class KDPM2DiscreteSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -53,7 +53,7 @@ class LCMSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -67,8 +67,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -49,7 +49,7 @@ class LMSDiscreteSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -63,8 +63,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -28,7 +28,7 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, Schedul
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -42,8 +42,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -47,7 +47,7 @@ class RePaintSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -61,8 +61,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -35,7 +35,7 @@ if is_scipy_available():
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -49,8 +49,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -52,7 +52,7 @@ class TCDSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -66,8 +66,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -48,7 +48,7 @@ class UnCLIPSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -62,8 +62,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -34,7 +34,7 @@ if is_scipy_available():
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -48,8 +48,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
@@ -226,6 +226,7 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
time_shift_type: Literal["exponential"] = "exponential",
|
||||
sigma_min: Optional[float] = None,
|
||||
sigma_max: Optional[float] = None,
|
||||
shift_terminal: Optional[float] = None,
|
||||
) -> None:
|
||||
if self.config.use_beta_sigmas and not is_scipy_available():
|
||||
raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
|
||||
@@ -245,6 +246,8 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
||||
else:
|
||||
raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}")
|
||||
if shift_terminal is not None and not use_flow_sigmas:
|
||||
raise ValueError("`shift_terminal` is only supported when `use_flow_sigmas=True`.")
|
||||
|
||||
if rescale_betas_zero_snr:
|
||||
self.betas = rescale_zero_terminal_snr(self.betas)
|
||||
@@ -313,8 +316,12 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
self._begin_index = begin_index
|
||||
|
||||
def set_timesteps(
|
||||
self, num_inference_steps: int, device: Optional[Union[str, torch.device]] = None, mu: Optional[float] = None
|
||||
) -> None:
|
||||
self,
|
||||
num_inference_steps: Optional[int] = None,
|
||||
device: Union[str, torch.device] = None,
|
||||
sigmas: Optional[List[float]] = None,
|
||||
mu: Optional[float] = None,
|
||||
):
|
||||
"""
|
||||
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
||||
|
||||
@@ -323,13 +330,24 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
The number of diffusion steps used when generating samples with a pre-trained model.
|
||||
device (`str` or `torch.device`, *optional*):
|
||||
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
||||
sigmas (`List[float]`, *optional*):
|
||||
Custom values for sigmas to be used for each diffusion step. If `None`, the sigmas are computed
|
||||
automatically.
|
||||
mu (`float`, *optional*):
|
||||
Optional mu parameter for dynamic shifting when using exponential time shift type.
|
||||
"""
|
||||
if self.config.use_dynamic_shifting and mu is None:
|
||||
raise ValueError("`mu` must be passed when `use_dynamic_shifting` is set to be `True`")
|
||||
|
||||
if sigmas is not None:
|
||||
if not self.config.use_flow_sigmas:
|
||||
raise ValueError(
|
||||
"Passing `sigmas` is only supported when `use_flow_sigmas=True`. "
|
||||
"Please set `use_flow_sigmas=True` during scheduler initialization."
|
||||
)
|
||||
num_inference_steps = len(sigmas)
|
||||
|
||||
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
||||
if mu is not None:
|
||||
assert self.config.use_dynamic_shifting and self.config.time_shift_type == "exponential"
|
||||
self.config.flow_shift = np.exp(mu)
|
||||
if self.config.timestep_spacing == "linspace":
|
||||
timesteps = (
|
||||
np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps + 1)
|
||||
@@ -354,8 +372,9 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
|
||||
)
|
||||
|
||||
sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
|
||||
if self.config.use_karras_sigmas:
|
||||
if sigmas is None:
|
||||
sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
|
||||
log_sigmas = np.log(sigmas)
|
||||
sigmas = np.flip(sigmas).copy()
|
||||
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
|
||||
@@ -375,6 +394,8 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
)
|
||||
sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32)
|
||||
elif self.config.use_exponential_sigmas:
|
||||
if sigmas is None:
|
||||
sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
|
||||
log_sigmas = np.log(sigmas)
|
||||
sigmas = np.flip(sigmas).copy()
|
||||
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
|
||||
@@ -389,6 +410,8 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
)
|
||||
sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32)
|
||||
elif self.config.use_beta_sigmas:
|
||||
if sigmas is None:
|
||||
sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
|
||||
log_sigmas = np.log(sigmas)
|
||||
sigmas = np.flip(sigmas).copy()
|
||||
sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
|
||||
@@ -403,9 +426,18 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
)
|
||||
sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32)
|
||||
elif self.config.use_flow_sigmas:
|
||||
alphas = np.linspace(1, 1 / self.config.num_train_timesteps, num_inference_steps + 1)
|
||||
sigmas = 1.0 - alphas
|
||||
sigmas = np.flip(self.config.flow_shift * sigmas / (1 + (self.config.flow_shift - 1) * sigmas))[:-1].copy()
|
||||
if sigmas is None:
|
||||
sigmas = np.linspace(1, 1 / self.config.num_train_timesteps, num_inference_steps + 1)[:-1]
|
||||
if self.config.use_dynamic_shifting:
|
||||
sigmas = self.time_shift(mu, 1.0, sigmas)
|
||||
else:
|
||||
sigmas = self.config.flow_shift * sigmas / (1 + (self.config.flow_shift - 1) * sigmas)
|
||||
if self.config.shift_terminal:
|
||||
sigmas = self.stretch_shift_to_terminal(sigmas)
|
||||
eps = 1e-6
|
||||
if np.fabs(sigmas[0] - 1) < eps:
|
||||
# to avoid inf torch.log(alpha_si) in multistep_uni_p_bh_update during first/second update
|
||||
sigmas[0] -= eps
|
||||
timesteps = (sigmas * self.config.num_train_timesteps).copy()
|
||||
if self.config.final_sigmas_type == "sigma_min":
|
||||
sigma_last = sigmas[-1]
|
||||
@@ -417,6 +449,8 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
)
|
||||
sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32)
|
||||
else:
|
||||
if sigmas is None:
|
||||
sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
|
||||
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
|
||||
if self.config.final_sigmas_type == "sigma_min":
|
||||
sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5
|
||||
@@ -446,6 +480,43 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
self._begin_index = None
|
||||
self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler.time_shift
|
||||
def time_shift(self, mu: float, sigma: float, t: torch.Tensor):
|
||||
if self.config.time_shift_type == "exponential":
|
||||
return self._time_shift_exponential(mu, sigma, t)
|
||||
elif self.config.time_shift_type == "linear":
|
||||
return self._time_shift_linear(mu, sigma, t)
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler.stretch_shift_to_terminal
|
||||
def stretch_shift_to_terminal(self, t: torch.Tensor) -> torch.Tensor:
|
||||
r"""
|
||||
Stretches and shifts the timestep schedule to ensure it terminates at the configured `shift_terminal` config
|
||||
value.
|
||||
|
||||
Reference:
|
||||
https://github.com/Lightricks/LTX-Video/blob/a01a171f8fe3d99dce2728d60a73fecf4d4238ae/ltx_video/schedulers/rf.py#L51
|
||||
|
||||
Args:
|
||||
t (`torch.Tensor`):
|
||||
A tensor of timesteps to be stretched and shifted.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
A tensor of adjusted timesteps such that the final value equals `self.config.shift_terminal`.
|
||||
"""
|
||||
one_minus_z = 1 - t
|
||||
scale_factor = one_minus_z[-1] / (1 - self.config.shift_terminal)
|
||||
stretched_t = 1 - (one_minus_z / scale_factor)
|
||||
return stretched_t
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler._time_shift_exponential
|
||||
def _time_shift_exponential(self, mu, sigma, t):
|
||||
return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler._time_shift_linear
|
||||
def _time_shift_linear(self, mu, sigma, t):
|
||||
return mu / (mu + (1 / t - 1) ** sigma)
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
|
||||
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
|
||||
81
src/diffusers/utils/flashpack_utils.py
Normal file
81
src/diffusers/utils/flashpack_utils.py
Normal file
@@ -0,0 +1,81 @@
|
||||
import json
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from ..utils import _add_variant
|
||||
from .import_utils import is_flashpack_available
|
||||
from .logging import get_logger
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def save_flashpack(
|
||||
model,
|
||||
save_directory: str,
|
||||
variant: Optional[str] = None,
|
||||
is_main_process: bool = True,
|
||||
):
|
||||
"""
|
||||
Save model weights in FlashPack format along with a metadata config.
|
||||
|
||||
Args:
|
||||
model: Diffusers model instance
|
||||
save_directory (`str`): Directory to save weights
|
||||
variant (`str`, *optional*): Model variant
|
||||
"""
|
||||
if not is_flashpack_available():
|
||||
raise ImportError(
|
||||
"The `use_flashpack=True` argument requires the `flashpack` package. "
|
||||
"Install it with `pip install flashpack`."
|
||||
)
|
||||
|
||||
from flashpack import pack_to_file
|
||||
|
||||
os.makedirs(save_directory, exist_ok=True)
|
||||
|
||||
weights_name = _add_variant("model.flashpack", variant)
|
||||
weights_path = os.path.join(save_directory, weights_name)
|
||||
config_path = os.path.join(save_directory, "flashpack_config.json")
|
||||
|
||||
try:
|
||||
target_dtype = getattr(model, "dtype", None)
|
||||
logger.warning(f"Dtype used for FlashPack save: {target_dtype}")
|
||||
|
||||
# 1. Save binary weights
|
||||
pack_to_file(model, weights_path, target_dtype=target_dtype)
|
||||
|
||||
# 2. Save config metadata (best-effort)
|
||||
if hasattr(model, "config"):
|
||||
try:
|
||||
if hasattr(model.config, "to_dict"):
|
||||
config_data = model.config.to_dict()
|
||||
else:
|
||||
config_data = dict(model.config)
|
||||
|
||||
with open(config_path, "w") as f:
|
||||
json.dump(config_data, f, indent=4)
|
||||
|
||||
except Exception as config_err:
|
||||
logger.warning(f"FlashPack weights saved, but config serialization failed: {config_err}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save weights in FlashPack format: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def load_flashpack(model, flashpack_file: str):
|
||||
"""
|
||||
Assign FlashPack weights from a file into an initialized PyTorch model.
|
||||
"""
|
||||
if not is_flashpack_available():
|
||||
raise ImportError("FlashPack weights require the `flashpack` package. Install with `pip install flashpack`.")
|
||||
|
||||
from flashpack import assign_from_file
|
||||
|
||||
logger.warning(f"Loading FlashPack weights from {flashpack_file}")
|
||||
|
||||
try:
|
||||
assign_from_file(model, flashpack_file)
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to load FlashPack weights from {flashpack_file}") from e
|
||||
@@ -231,6 +231,7 @@ _aiter_available, _aiter_version = _is_package_available("aiter")
|
||||
_kornia_available, _kornia_version = _is_package_available("kornia")
|
||||
_nvidia_modelopt_available, _nvidia_modelopt_version = _is_package_available("modelopt", get_dist_name=True)
|
||||
_av_available, _av_version = _is_package_available("av")
|
||||
_flashpack_available, _flashpack_version = _is_package_available("flashpack")
|
||||
|
||||
|
||||
def is_torch_available():
|
||||
@@ -425,6 +426,10 @@ def is_av_available():
|
||||
return _av_available
|
||||
|
||||
|
||||
def is_flashpack_available():
|
||||
return _flashpack_available
|
||||
|
||||
|
||||
# docstyle-ignore
|
||||
FLAX_IMPORT_ERROR = """
|
||||
{0} requires the FLAX library but it was not found in your environment. Checkout the instructions on the
|
||||
@@ -942,6 +947,16 @@ def is_aiter_version(operation: str, version: str):
|
||||
return compare_versions(parse(_aiter_version), operation, version)
|
||||
|
||||
|
||||
@cache
|
||||
def is_flashpack_version(operation: str, version: str):
|
||||
"""
|
||||
Compares the current flashpack version to a given reference with an operation.
|
||||
"""
|
||||
if not _flashpack_available:
|
||||
return False
|
||||
return compare_versions(parse(_flashpack_version), operation, version)
|
||||
|
||||
|
||||
def get_objects_from_module(module):
|
||||
"""
|
||||
Returns a dict of object names and values in a module, while skipping private/internal objects
|
||||
|
||||
Reference in New Issue
Block a user