Compare commits

..

21 Commits

Author SHA1 Message Date
Sayak Paul
60e3284003 Merge branch 'main' into requirements-custom-blocks 2026-01-20 19:10:24 +05:30
sayakpaul
7b43d0e409 add tests 2026-01-20 09:29:32 +05:30
Sayak Paul
3879e32254 Merge branch 'main' into requirements-custom-blocks 2026-01-20 08:20:38 +05:30
sayakpaul
a88d11bc90 resolve conflicts. 2025-11-06 10:29:24 +05:30
Sayak Paul
a9165eb749 Merge branch 'main' into requirements-custom-blocks 2025-11-03 12:12:08 +05:30
Sayak Paul
eeb3445444 Merge branch 'main' into requirements-custom-blocks 2025-11-01 08:36:16 +05:30
Sayak Paul
5b7d0dfab6 Merge branch 'main' into requirements-custom-blocks 2025-10-29 16:30:46 +05:30
sayakpaul
1de4402c26 up 2025-10-27 13:55:17 +05:30
sayakpaul
024c2b9839 Merge branch 'main' into requirements-custom-blocks 2025-10-27 11:56:00 +05:30
Sayak Paul
35d8d97c02 Merge branch 'main' into requirements-custom-blocks 2025-10-22 21:57:45 +05:30
Sayak Paul
e52cabeff2 Merge branch 'main' into requirements-custom-blocks 2025-10-22 06:23:40 +05:30
Sayak Paul
2c4d73d72d Merge branch 'main' into requirements-custom-blocks 2025-10-21 01:54:38 +05:30
sayakpaul
046be83946 up 2025-10-02 15:43:44 +05:30
Sayak Paul
b7fba892f5 Merge branch 'main' into requirements-custom-blocks 2025-09-23 13:35:49 +05:30
Sayak Paul
ecbd907e76 Merge branch 'main' into requirements-custom-blocks 2025-09-12 15:47:22 +05:30
Sayak Paul
d159ae025d Merge branch 'main' into requirements-custom-blocks 2025-09-02 10:04:22 +05:30
Sayak Paul
756a1567f5 Merge branch 'main' into requirements-custom-blocks 2025-08-29 08:03:00 +02:00
Sayak Paul
d2731ababa Merge branch 'main' into requirements-custom-blocks 2025-08-21 07:59:54 +05:30
sayakpaul
37d3887194 unify. 2025-08-20 12:09:33 +05:30
sayakpaul
127e9a39d8 up 2025-08-20 11:51:15 +05:30
sayakpaul
12ceecf077 feat: implement requirements validation for custom blocks. 2025-08-20 11:04:28 +05:30
34 changed files with 277 additions and 350 deletions

View File

@@ -89,8 +89,6 @@ class CustomBlocksCommand(BaseDiffusersCLICommand):
# automap = self._create_automap(parent_class=parent_class, child_class=child_class)
# with open(CONFIG, "w") as f:
# json.dump(automap, f)
with open("requirements.txt", "w") as f:
f.write("")
def _choose_block(self, candidates, chosen=None):
for cls, base in candidates:

View File

@@ -39,6 +39,7 @@ from .modular_pipeline_utils import (
InputParam,
InsertableDict,
OutputParam,
_validate_requirements,
format_components,
format_configs,
make_doc_string,
@@ -242,6 +243,7 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin):
config_name = "modular_config.json"
model_name = None
_requirements: Optional[Dict[str, str]] = None
@classmethod
def _get_signature_keys(cls, obj):
@@ -304,6 +306,19 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin):
trust_remote_code: bool = False,
**kwargs,
):
config = cls.load_config(pretrained_model_name_or_path)
has_remote_code = "auto_map" in config and cls.__name__ in config["auto_map"]
trust_remote_code = resolve_trust_remote_code(
trust_remote_code, pretrained_model_name_or_path, has_remote_code
)
if not (has_remote_code and trust_remote_code):
raise ValueError(
"Selected model repository does not happear to have any custom code or does not have a valid `config.json` file."
)
if "requirements" in config and config["requirements"] is not None:
_ = _validate_requirements(config["requirements"])
hub_kwargs_names = [
"cache_dir",
"force_download",
@@ -316,16 +331,6 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin):
]
hub_kwargs = {name: kwargs.pop(name) for name in hub_kwargs_names if name in kwargs}
config = cls.load_config(pretrained_model_name_or_path, **hub_kwargs)
has_remote_code = "auto_map" in config and cls.__name__ in config["auto_map"]
trust_remote_code = resolve_trust_remote_code(
trust_remote_code, pretrained_model_name_or_path, has_remote_code
)
if not has_remote_code and trust_remote_code:
raise ValueError(
"Selected model repository does not happear to have any custom code or does not have a valid `config.json` file."
)
class_ref = config["auto_map"][cls.__name__]
module_file, class_name = class_ref.split(".")
module_file = module_file + ".py"
@@ -350,8 +355,13 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin):
module = full_mod.rsplit(".", 1)[-1].replace("__dynamic__", "")
parent_module = self.save_pretrained.__func__.__qualname__.split(".", 1)[0]
auto_map = {f"{parent_module}": f"{module}.{cls_name}"}
self.register_to_config(auto_map=auto_map)
# resolve requirements
requirements = _validate_requirements(getattr(self, "_requirements", None))
if requirements:
self.register_to_config(requirements=requirements)
self.save_config(save_directory=save_directory, push_to_hub=push_to_hub, **kwargs)
config = dict(self.config)
self._internal_dict = FrozenDict(config)
@@ -1154,6 +1164,14 @@ class SequentialPipelineBlocks(ModularPipelineBlocks):
expected_configs=self.expected_configs,
)
@property
def _requirements(self) -> Dict[str, str]:
requirements = {}
for block_name, block in self.sub_blocks.items():
if getattr(block, "_requirements", None):
requirements[block_name] = block._requirements
return requirements
class LoopSequentialPipelineBlocks(ModularPipelineBlocks):
"""

View File

@@ -19,10 +19,12 @@ from dataclasses import dataclass, field, fields
from typing import Any, Dict, List, Literal, Optional, Type, Union
import torch
from packaging.specifiers import InvalidSpecifier, SpecifierSet
from ..configuration_utils import ConfigMixin, FrozenDict
from ..loaders.single_file_utils import _is_single_file_path_or_url
from ..utils import is_torch_available, logging
from ..utils.import_utils import _is_package_available
if is_torch_available():
@@ -690,3 +692,86 @@ def make_doc_string(
output += format_output_params(outputs, indent_level=2)
return output
def _validate_requirements(reqs):
if reqs is None:
normalized_reqs = {}
else:
if not isinstance(reqs, dict):
raise ValueError(
"Requirements must be provided as a dictionary mapping package names to version specifiers."
)
normalized_reqs = _normalize_requirements(reqs)
if not normalized_reqs:
return {}
final: Dict[str, str] = {}
for req, specified_ver in normalized_reqs.items():
req_available, req_actual_ver = _is_package_available(req)
if not req_available:
logger.warning(f"{req} was specified in the requirements but wasn't found in the current environment.")
if specified_ver:
try:
specifier = SpecifierSet(specified_ver)
except InvalidSpecifier as err:
raise ValueError(f"Requirement specifier '{specified_ver}' for {req} is invalid.") from err
if req_actual_ver == "N/A":
logger.warning(
f"Version of {req} could not be determined to validate requirement '{specified_ver}'. Things might work unexpected."
)
elif not specifier.contains(req_actual_ver, prereleases=True):
logger.warning(
f"{req} requirement '{specified_ver}' is not satisfied by the installed version {req_actual_ver}. Things might work unexpected."
)
final[req] = specified_ver
return final
def _normalize_requirements(reqs):
if not reqs:
return {}
normalized: "OrderedDict[str, str]" = OrderedDict()
def _accumulate(mapping: Dict[str, Any]):
for pkg, spec in mapping.items():
if isinstance(spec, dict):
# This is recursive because blocks are composable. This way, we can merge requirements
# from multiple blocks.
_accumulate(spec)
continue
pkg_name = str(pkg).strip()
if not pkg_name:
raise ValueError("Requirement package name cannot be empty.")
spec_str = "" if spec is None else str(spec).strip()
if spec_str and not spec_str.startswith(("<", ">", "=", "!", "~")):
spec_str = f"=={spec_str}"
existing_spec = normalized.get(pkg_name)
if existing_spec is not None:
if not existing_spec and spec_str:
normalized[pkg_name] = spec_str
elif existing_spec and spec_str and existing_spec != spec_str:
try:
combined_spec = SpecifierSet(",".join(filter(None, [existing_spec, spec_str])))
except InvalidSpecifier:
logger.warning(
f"Conflicting requirements for '{pkg_name}' detected: '{existing_spec}' vs '{spec_str}'. Keeping '{existing_spec}'."
)
else:
normalized[pkg_name] = str(combined_spec)
continue
normalized[pkg_name] = spec_str
_accumulate(reqs)
return normalized

View File

@@ -14,7 +14,7 @@ from .scheduling_utils import SchedulerMixin
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -28,8 +28,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -51,7 +51,7 @@ class DDIMSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -51,7 +51,7 @@ class DDIMSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:
@@ -100,13 +100,14 @@ def betas_for_alpha_bar(
return torch.tensor(betas, dtype=torch.float32)
def rescale_zero_terminal_snr(alphas_cumprod: torch.Tensor) -> torch.Tensor:
def rescale_zero_terminal_snr(alphas_cumprod):
"""
Rescales betas to have zero terminal SNR Based on (Algorithm 1)[https://huggingface.co/papers/2305.08891]
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
Args:
alphas_cumprod (`torch.Tensor`):
The alphas cumulative products that the scheduler is being initialized with.
betas (`torch.Tensor`):
the betas that the scheduler is being initialized with.
Returns:
`torch.Tensor`: rescaled betas with zero terminal SNR
@@ -141,11 +142,11 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
Args:
num_train_timesteps (`int`, defaults to 1000):
The number of diffusion steps to train the model.
beta_start (`float`, defaults to 0.00085):
beta_start (`float`, defaults to 0.0001):
The starting `beta` value of inference.
beta_end (`float`, defaults to 0.0120):
beta_end (`float`, defaults to 0.02):
The final `beta` value.
beta_schedule (`str`, defaults to `"scaled_linear"`):
beta_schedule (`str`, defaults to `"linear"`):
The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
`linear`, `scaled_linear`, or `squaredcos_cap_v2`.
trained_betas (`np.ndarray`, *optional*):
@@ -178,8 +179,6 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
dark samples instead of limiting it to samples with medium brightness. Loosely related to
[`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
snr_shift_scale (`float`, defaults to 3.0):
Shift scale for SNR.
"""
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -191,15 +190,15 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
num_train_timesteps: int = 1000,
beta_start: float = 0.00085,
beta_end: float = 0.0120,
beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "scaled_linear",
beta_schedule: str = "scaled_linear",
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
clip_sample: bool = True,
set_alpha_to_one: bool = True,
steps_offset: int = 0,
prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
prediction_type: str = "epsilon",
clip_sample_range: float = 1.0,
sample_max_value: float = 1.0,
timestep_spacing: Literal["linspace", "leading", "trailing"] = "leading",
timestep_spacing: str = "leading",
rescale_betas_zero_snr: bool = False,
snr_shift_scale: float = 3.0,
):
@@ -209,15 +208,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
elif beta_schedule == "scaled_linear":
# this schedule is very specific to the latent diffusion model.
self.betas = (
torch.linspace(
beta_start**0.5,
beta_end**0.5,
num_train_timesteps,
dtype=torch.float64,
)
** 2
)
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float64) ** 2
elif beta_schedule == "squaredcos_cap_v2":
# Glide cosine schedule
self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -247,7 +238,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
self.num_inference_steps = None
self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
def _get_variance(self, timestep: int, prev_timestep: int) -> torch.Tensor:
def _get_variance(self, timestep, prev_timestep):
alpha_prod_t = self.alphas_cumprod[timestep]
alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
beta_prod_t = 1 - alpha_prod_t
@@ -274,11 +265,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
"""
return sample
def set_timesteps(
self,
num_inference_steps: int,
device: Optional[Union[str, torch.device]] = None,
) -> None:
def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
"""
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
@@ -330,7 +317,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
sample: torch.Tensor,
eta: float = 0.0,
use_clipped_model_output: bool = False,
generator: Optional[torch.Generator] = None,
generator=None,
variance_noise: Optional[torch.Tensor] = None,
return_dict: bool = True,
) -> Union[DDIMSchedulerOutput, Tuple]:
@@ -341,7 +328,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
Args:
model_output (`torch.Tensor`):
The direct output from learned diffusion model.
timestep (`int`):
timestep (`float`):
The current discrete timestep in the diffusion chain.
sample (`torch.Tensor`):
A current instance of a sample created by the diffusion process.
@@ -500,5 +487,5 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
return velocity
def __len__(self) -> int:
def __len__(self):
return self.config.num_train_timesteps

View File

@@ -49,7 +49,7 @@ class DDIMSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -63,8 +63,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -51,7 +51,7 @@ class DDIMParallelSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -48,7 +48,7 @@ class DDPMSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -62,8 +62,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:
@@ -192,12 +192,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2", "sigmoid"] = "linear",
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
variance_type: Literal[
"fixed_small",
"fixed_small_log",
"fixed_large",
"fixed_large_log",
"learned",
"learned_range",
"fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"
] = "fixed_small",
clip_sample: bool = True,
prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
@@ -215,15 +210,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
elif beta_schedule == "scaled_linear":
# this schedule is very specific to the latent diffusion model.
self.betas = (
torch.linspace(
beta_start**0.5,
beta_end**0.5,
num_train_timesteps,
dtype=torch.float32,
)
** 2
)
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
elif beta_schedule == "squaredcos_cap_v2":
# Glide cosine schedule
self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -350,14 +337,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
t: int,
predicted_variance: Optional[torch.Tensor] = None,
variance_type: Optional[
Literal[
"fixed_small",
"fixed_small_log",
"fixed_large",
"fixed_large_log",
"learned",
"learned_range",
]
Literal["fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"]
] = None,
) -> torch.Tensor:
"""
@@ -492,10 +472,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
prev_t = self.previous_timestep(t)
if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in [
"learned",
"learned_range",
]:
if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in ["learned", "learned_range"]:
model_output, predicted_variance = torch.split(model_output, sample.shape[1], dim=1)
else:
predicted_variance = None
@@ -544,10 +521,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
if t > 0:
device = model_output.device
variance_noise = randn_tensor(
model_output.shape,
generator=generator,
device=device,
dtype=model_output.dtype,
model_output.shape, generator=generator, device=device, dtype=model_output.dtype
)
if self.variance_type == "fixed_small_log":
variance = self._get_variance(t, predicted_variance=predicted_variance) * variance_noise

View File

@@ -50,7 +50,7 @@ class DDPMParallelSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -64,8 +64,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:
@@ -202,12 +202,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2", "sigmoid"] = "linear",
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
variance_type: Literal[
"fixed_small",
"fixed_small_log",
"fixed_large",
"fixed_large_log",
"learned",
"learned_range",
"fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"
] = "fixed_small",
clip_sample: bool = True,
prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
@@ -225,15 +220,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
elif beta_schedule == "scaled_linear":
# this schedule is very specific to the latent diffusion model.
self.betas = (
torch.linspace(
beta_start**0.5,
beta_end**0.5,
num_train_timesteps,
dtype=torch.float32,
)
** 2
)
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
elif beta_schedule == "squaredcos_cap_v2":
# Glide cosine schedule
self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -363,14 +350,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
t: int,
predicted_variance: Optional[torch.Tensor] = None,
variance_type: Optional[
Literal[
"fixed_small",
"fixed_small_log",
"fixed_large",
"fixed_large_log",
"learned",
"learned_range",
]
Literal["fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"]
] = None,
) -> torch.Tensor:
"""

View File

@@ -34,7 +34,7 @@ if is_scipy_available():
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -48,8 +48,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -52,7 +52,7 @@ class DDIMSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -66,8 +66,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -34,7 +34,7 @@ if is_scipy_available():
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -48,8 +48,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -34,7 +34,7 @@ if is_scipy_available():
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -48,8 +48,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -117,7 +117,7 @@ class BrownianTreeNoiseSampler:
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -131,8 +131,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -36,7 +36,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -50,8 +50,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -51,7 +51,7 @@ class EulerAncestralDiscreteSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -54,7 +54,7 @@ class EulerDiscreteSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -68,8 +68,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -51,7 +51,7 @@ class HeunDiscreteSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -52,7 +52,7 @@ class KDPM2AncestralDiscreteSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -66,8 +66,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -51,7 +51,7 @@ class KDPM2DiscreteSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -53,7 +53,7 @@ class LCMSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -67,8 +67,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -49,7 +49,7 @@ class LMSDiscreteSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -63,8 +63,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -28,7 +28,7 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, Schedul
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -42,8 +42,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -47,7 +47,7 @@ class RePaintSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -61,8 +61,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -35,7 +35,7 @@ if is_scipy_available():
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -49,8 +49,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -52,7 +52,7 @@ class TCDSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -66,8 +66,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -48,7 +48,7 @@ class UnCLIPSchedulerOutput(BaseOutput):
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -62,8 +62,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -34,7 +34,7 @@ if is_scipy_available():
def betas_for_alpha_bar(
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -48,8 +48,8 @@ def betas_for_alpha_bar(
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`str`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
`torch.Tensor`:

View File

@@ -37,14 +37,9 @@ class TestFluxModularPipelineFast(ModularPipelineTesterMixin):
pipeline_class = FluxModularPipeline
pipeline_blocks_class = FluxAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-flux-modular"
default_repo_id = "black-forest-labs/FLUX.1-dev"
params = frozenset(["prompt", "height", "width", "guidance_scale"])
batch_params = frozenset(["prompt"])
# should choose from the dict returned by `get_dummy_inputs`
text_encoder_block_params = frozenset(["prompt", "max_sequence_length"])
decode_block_params = frozenset(["output_type"])
vae_encoder_block_params = None # None if vae_encoder is not supported
def get_dummy_inputs(self, seed=0):
generator = self.get_generator(seed)
@@ -68,21 +63,10 @@ class TestFluxImg2ImgModularPipelineFast(ModularPipelineTesterMixin):
pipeline_class = FluxModularPipeline
pipeline_blocks_class = FluxAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-flux-modular"
default_repo_id = "black-forest-labs/FLUX.1-dev"
params = frozenset(["prompt", "height", "width", "guidance_scale", "image"])
batch_params = frozenset(["prompt", "image"])
# should choose from the dict returned by `get_dummy_inputs`
text_encoder_block_params = frozenset(
[
"prompt",
"max_sequence_length",
]
)
decode_block_params = frozenset(["output_type"])
vae_encoder_block_params = frozenset(["image", "height", "width"])
def get_pipeline(self, components_manager=None, torch_dtype=torch.float32):
pipeline = super().get_pipeline(components_manager, torch_dtype)
@@ -145,13 +129,9 @@ class TestFluxKontextModularPipelineFast(ModularPipelineTesterMixin):
pipeline_class = FluxKontextModularPipeline
pipeline_blocks_class = FluxKontextAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-flux-kontext-pipe"
default_repo_id = "black-forest-labs/FLUX.1-kontext-dev"
params = frozenset(["prompt", "height", "width", "guidance_scale", "image"])
batch_params = frozenset(["prompt", "image"])
text_encoder_block_params = frozenset(["prompt", "max_sequence_length"])
decode_block_params = frozenset(["latents"])
vae_encoder_block_params = frozenset(["image", "height", "width"])
def get_dummy_inputs(self, seed=0):
generator = self.get_generator(seed)

View File

@@ -32,14 +32,9 @@ class TestFlux2ModularPipelineFast(ModularPipelineTesterMixin):
pipeline_class = Flux2ModularPipeline
pipeline_blocks_class = Flux2AutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-flux2-modular"
default_repo_id = "black-forest-labs/FLUX.2-dev"
params = frozenset(["prompt", "height", "width", "guidance_scale"])
batch_params = frozenset(["prompt"])
# should choose from the dict returned by `get_dummy_inputs`
text_encoder_block_params = frozenset(["prompt", "max_sequence_length", "text_encoder_out_layers"])
decode_block_params = frozenset(["output_type"])
vae_encoder_block_params = None
def get_dummy_inputs(self, seed=0):
generator = self.get_generator(seed)
@@ -68,10 +63,6 @@ class TestFlux2ImageConditionedModularPipelineFast(ModularPipelineTesterMixin):
params = frozenset(["prompt", "height", "width", "guidance_scale", "image"])
batch_params = frozenset(["prompt", "image"])
# should choose from the dict returned by `get_dummy_inputs`
text_encoder_block_params = frozenset(["prompt", "max_sequence_length", "text_encoder_out_layers"])
decode_block_params = frozenset(["output_type"])
vae_encoder_block_params = frozenset(["image", "height", "width"])
def get_dummy_inputs(self, seed=0):
generator = self.get_generator(seed)

View File

@@ -34,16 +34,10 @@ class TestQwenImageModularPipelineFast(ModularPipelineTesterMixin, ModularGuider
pipeline_class = QwenImageModularPipeline
pipeline_blocks_class = QwenImageAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-qwenimage-modular"
default_repo_id = "Qwen/Qwen-Image"
params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image", "mask_image"])
batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"])
# should choose from the dict returned by `get_dummy_inputs`
text_encoder_block_params = frozenset(["prompt", "negative_prompt", "max_sequence_length"])
decode_block_params = frozenset(["output_type"])
vae_encoder_block_params = None # None if vae_encoder is not supported
def get_dummy_inputs(self):
generator = self.get_generator()
inputs = {
@@ -66,16 +60,10 @@ class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, ModularGu
pipeline_class = QwenImageEditModularPipeline
pipeline_blocks_class = QwenImageEditAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-qwenimage-edit-modular"
default_repo_id = "Qwen/Qwen-Image-Edit"
params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image", "mask_image"])
batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"])
# should choose from the dict returned by `get_dummy_inputs`
text_encoder_block_params = frozenset(["prompt", "negative_prompt", "max_sequence_length"])
decode_block_params = frozenset(["output_type"])
vae_encoder_block_params = frozenset(["image", "height", "width"])
def get_dummy_inputs(self):
generator = self.get_generator()
inputs = {
@@ -98,7 +86,6 @@ class TestQwenImageEditPlusModularPipelineFast(ModularPipelineTesterMixin, Modul
pipeline_class = QwenImageEditPlusModularPipeline
pipeline_blocks_class = QwenImageEditPlusAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-qwenimage-edit-plus-modular"
default_repo_id = "Qwen/Qwen-Image-Edit-2509"
# No `mask_image` yet.
params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image"])

View File

@@ -279,8 +279,6 @@ class TestSDXLModularPipelineFast(
pipeline_class = StableDiffusionXLModularPipeline
pipeline_blocks_class = StableDiffusionXLAutoBlocks
pretrained_model_name_or_path = "hf-internal-testing/tiny-sdxl-modular"
default_repo_id = "stabilityai/stable-diffusion-xl-base-1.0"
params = frozenset(
[
"prompt",
@@ -293,11 +291,6 @@ class TestSDXLModularPipelineFast(
batch_params = frozenset(["prompt", "negative_prompt"])
expected_image_output_shape = (1, 3, 64, 64)
# should choose from the dict returned by `get_dummy_inputs`
text_encoder_block_params = frozenset(["prompt"])
decode_block_params = frozenset(["output_type"])
vae_encoder_block_params = None # None if vae_encoder is not supported
def get_dummy_inputs(self, seed=0):
generator = self.get_generator(seed)
inputs = {
@@ -346,11 +339,6 @@ class TestSDXLImg2ImgModularPipelineFast(
batch_params = frozenset(["prompt", "negative_prompt", "image"])
expected_image_output_shape = (1, 3, 64, 64)
# should choose from the dict returned by `get_dummy_inputs`
text_encoder_block_params = frozenset(["prompt"])
decode_block_params = frozenset(["output_type"])
vae_encoder_block_params = frozenset(["image"])
def get_dummy_inputs(self, seed=0):
generator = self.get_generator(seed)
inputs = {

View File

@@ -1,4 +1,6 @@
import gc
import json
import os
import tempfile
from typing import Callable, Union
@@ -8,9 +10,16 @@ import torch
import diffusers
from diffusers import ComponentsManager, ModularPipeline, ModularPipelineBlocks
from diffusers.guiders import ClassifierFreeGuidance
from diffusers.modular_pipelines import SequentialPipelineBlocks
from diffusers.utils import logging
from ..testing_utils import backend_empty_cache, numpy_cosine_similarity_distance, require_accelerator, torch_device
from ..testing_utils import (
CaptureLogger,
backend_empty_cache,
numpy_cosine_similarity_distance,
require_accelerator,
torch_device,
)
class ModularPipelineTesterMixin:
@@ -48,12 +57,6 @@ class ModularPipelineTesterMixin:
"You need to set the attribute `pretrained_model_name_or_path` in the child test class. See existing pipeline tests for reference."
)
@property
def default_repo_id(self) -> str:
raise NotImplementedError(
"You need to set the attribute `default_repo_id` in the child test class. See existing pipeline tests for reference."
)
@property
def pipeline_blocks_class(self) -> Union[Callable, ModularPipelineBlocks]:
raise NotImplementedError(
@@ -96,30 +99,6 @@ class ModularPipelineTesterMixin:
"See existing pipeline tests for reference."
)
def text_encoder_block_params(self) -> frozenset:
raise NotImplementedError(
"You need to set the attribute `text_encoder_block_params` in the child test class. "
"`text_encoder_block_params` are the parameters required to be passed to the text encoder block. "
" if should be a subset of the parameters returned by `get_dummy_inputs`"
"See existing pipeline tests for reference."
)
def decode_block_params(self) -> frozenset:
raise NotImplementedError(
"You need to set the attribute `decode_block_params` in the child test class. "
"`decode_block_params` are the parameters required to be passed to the decode block. "
" if should be a subset of the parameters returned by `get_dummy_inputs`"
"See existing pipeline tests for reference."
)
def vae_encoder_block_params(self) -> frozenset:
raise NotImplementedError(
"You need to set the attribute `vae_encoder_block_params` in the child test class. "
"`vae_encoder_block_params` are the parameters required to be passed to the vae encoder block. "
" if should be a subset of the parameters returned by `get_dummy_inputs`"
"See existing pipeline tests for reference."
)
def setup_method(self):
# clean up the VRAM before each test
torch.compiler.reset()
@@ -154,96 +133,6 @@ class ModularPipelineTesterMixin:
_check_for_parameters(self.params, input_parameters, "input")
_check_for_parameters(self.optional_params, optional_parameters, "optional")
def test_loading_from_default_repo(self):
if self.default_repo_id is None:
return
try:
pipe = ModularPipeline.from_pretrained(self.default_repo_id)
assert pipe.blocks.__class__ == self.pipeline_blocks_class
except Exception as e:
assert False, f"Failed to load pipeline from default repo: {e}"
def test_modular_inference(self):
# run the pipeline to get the base output for comparison
pipe = self.get_pipeline()
pipe.to(torch_device, torch.float32)
inputs = self.get_dummy_inputs()
standard_output = pipe(**inputs, output="images")
# create text, denoise, decoder (and optional vae encoder) nodes
blocks = self.pipeline_blocks_class()
assert "text_encoder" in blocks.sub_blocks, "`text_encoder` block is not present in the pipeline"
assert "denoise" in blocks.sub_blocks, "`denoise` block is not present in the pipeline"
assert "decode" in blocks.sub_blocks, "`decode` block is not present in the pipeline"
if self.vae_encoder_block_params is not None:
assert "vae_encoder" in blocks.sub_blocks, "`vae_encoder` block is not present in the pipeline"
# manually set the components in the sub_pipe
# a hack to workaround the fact the default pipeline properties are often incorrect for testing cases,
# #e.g. vae_scale_factor is ususally not 8 because vae is configured to be smaller for testing
def manually_set_all_components(pipe: ModularPipeline, sub_pipe: ModularPipeline):
for n, comp in pipe.components.items():
if not hasattr(sub_pipe, n):
setattr(sub_pipe, n, comp)
text_node = blocks.sub_blocks["text_encoder"].init_pipeline(self.pretrained_model_name_or_path)
text_node.load_components(torch_dtype=torch.float32)
text_node.to(torch_device)
manually_set_all_components(pipe, text_node)
denoise_node = blocks.sub_blocks["denoise"].init_pipeline(self.pretrained_model_name_or_path)
denoise_node.load_components(torch_dtype=torch.float32)
denoise_node.to(torch_device)
manually_set_all_components(pipe, denoise_node)
decoder_node = blocks.sub_blocks["decode"].init_pipeline(self.pretrained_model_name_or_path)
decoder_node.load_components(torch_dtype=torch.float32)
decoder_node.to(torch_device)
manually_set_all_components(pipe, decoder_node)
if self.vae_encoder_block_params is not None:
vae_encoder_node = blocks.sub_blocks["vae_encoder"].init_pipeline(self.pretrained_model_name_or_path)
vae_encoder_node.load_components(torch_dtype=torch.float32)
vae_encoder_node.to(torch_device)
manually_set_all_components(pipe, vae_encoder_node)
else:
vae_encoder_node = None
# prepare inputs for each node
inputs = self.get_dummy_inputs()
def get_block_inputs(inputs: dict, block_params: frozenset) -> tuple[dict, dict]:
block_inputs = {}
for name in block_params:
if name in inputs:
block_inputs[name] = inputs.pop(name)
return block_inputs, inputs
text_inputs, inputs = get_block_inputs(inputs, self.text_encoder_block_params)
decoder_inputs, inputs = get_block_inputs(inputs, self.decode_block_params)
if vae_encoder_node is not None:
vae_encoder_inputs, inputs = get_block_inputs(inputs, self.vae_encoder_block_params)
# this is also to make sure pipelines mark text outputs as denoiser_input_fields
text_output = text_node(**text_inputs).get_by_kwargs("denoiser_input_fields")
if vae_encoder_node is not None:
vae_encoder_output = vae_encoder_node(**vae_encoder_inputs).values
denoise_inputs = {**text_output, **vae_encoder_output, **inputs}
else:
denoise_inputs = {**text_output, **inputs}
# denoise node output should be "latents"
latents = denoise_node(**denoise_inputs).latents
# denoder node input should be "latents" and output should be "images"
modular_output = decoder_node(**decoder_inputs, latents=latents).images
assert modular_output.shape == standard_output.shape, (
f"Modular output should have same shape as standard output {standard_output.shape}, but got {modular_output.shape}"
)
def test_inference_batch_consistent(self, batch_sizes=[2], batch_generator=True):
pipe = self.get_pipeline().to(torch_device)
@@ -455,3 +344,53 @@ class ModularGuiderTesterMixin:
assert out_cfg.shape == out_no_cfg.shape
max_diff = torch.abs(out_cfg - out_no_cfg).max()
assert max_diff > expected_max_diff, "Output with CFG must be different from normal inference"
class TestCustomBlockRequirements:
def get_dummy_block_pipe(self):
class DummyBlockOne:
# keep two arbitrary deps so that we can test warnings.
_requirements = {"xyz": ">=0.8.0", "abc": ">=10.0.0"}
class DummyBlockTwo:
# keep two dependencies that will be available during testing.
_requirements = {"transformers": ">=4.44.0", "diffusers": ">=0.2.0"}
pipe = SequentialPipelineBlocks.from_blocks_dict(
{"dummy_block_one": DummyBlockOne, "dummy_block_two": DummyBlockTwo}
)
return pipe
def test_custom_requirements_save_load(self):
pipe = self.get_dummy_block_pipe()
with tempfile.TemporaryDirectory() as tmpdir:
pipe.save_pretrained(tmpdir)
config_path = os.path.join(tmpdir, "modular_config.json")
with open(config_path, "r") as f:
config = json.load(f)
assert "requirements" in config
requirements = config["requirements"]
expected_requirements = {
"xyz": ">=0.8.0",
"abc": ">=10.0.0",
"transformers": ">=4.44.0",
"diffusers": ">=0.2.0",
}
assert expected_requirements == requirements
def test_warnings(self):
pipe = self.get_dummy_block_pipe()
with tempfile.TemporaryDirectory() as tmpdir:
logger = logging.get_logger("diffusers.modular_pipelines.modular_pipeline_utils")
logger.setLevel(30)
with CaptureLogger(logger) as cap_logger:
pipe.save_pretrained(tmpdir)
template = "{req} was specified in the requirements but wasn't found in the current environment"
msg_xyz = template.format(req="xyz")
msg_abc = template.format(req="abc")
assert msg_xyz in str(cap_logger.out)
assert msg_abc in str(cap_logger.out)