mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-23 20:15:49 +08:00
Compare commits
12 Commits
modular-do
...
cuda-128-g
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6b127364c4 | ||
|
|
bff672f47f | ||
|
|
d4f97d1921 | ||
|
|
1d32b19ad4 | ||
|
|
699297f647 | ||
|
|
7a02fadad3 | ||
|
|
ec37629371 | ||
|
|
4b843c8430 | ||
|
|
d7a1c31f4f | ||
|
|
29b15f41c7 | ||
|
|
75edff93a0 | ||
|
|
76f51a5e92 |
@@ -1,8 +1,8 @@
|
||||
FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
|
||||
FROM nvidia/cuda:12.8.0-runtime-ubuntu22.04
|
||||
LABEL maintainer="Hugging Face"
|
||||
LABEL repository="diffusers"
|
||||
|
||||
ARG PYTHON_VERSION=3.12
|
||||
ARG PYTHON_VERSION=3.11
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get -y update \
|
||||
@@ -32,10 +32,12 @@ RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
|
||||
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
|
||||
|
||||
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
|
||||
# Install torch, torchvision, and torchaudio together to ensure compatibility
|
||||
RUN uv pip install --no-cache-dir \
|
||||
torch \
|
||||
torchvision \
|
||||
torchaudio
|
||||
torchaudio \
|
||||
--index-url https://download.pytorch.org/whl/cu128
|
||||
|
||||
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]"
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
|
||||
FROM nvidia/cuda:12.8.0-runtime-ubuntu22.04
|
||||
LABEL maintainer="Hugging Face"
|
||||
LABEL repository="diffusers"
|
||||
|
||||
ARG PYTHON_VERSION=3.12
|
||||
ARG PYTHON_VERSION=3.11
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get -y update \
|
||||
@@ -32,10 +32,12 @@ RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
|
||||
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
|
||||
|
||||
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
|
||||
# Install torch, torchvision, and torchaudio together to ensure compatibility
|
||||
RUN uv pip install --no-cache-dir \
|
||||
torch \
|
||||
torchvision \
|
||||
torchaudio
|
||||
torchaudio \
|
||||
--index-url https://download.pytorch.org/whl/cu128
|
||||
|
||||
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]"
|
||||
|
||||
|
||||
@@ -496,6 +496,8 @@
|
||||
title: Bria 3.2
|
||||
- local: api/pipelines/bria_fibo
|
||||
title: Bria Fibo
|
||||
- local: api/pipelines/bria_fibo_edit
|
||||
title: Bria Fibo Edit
|
||||
- local: api/pipelines/chroma
|
||||
title: Chroma
|
||||
- local: api/pipelines/cogview3
|
||||
|
||||
33
docs/source/en/api/pipelines/bria_fibo_edit.md
Normal file
33
docs/source/en/api/pipelines/bria_fibo_edit.md
Normal file
@@ -0,0 +1,33 @@
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
-->
|
||||
|
||||
# Bria Fibo Edit
|
||||
|
||||
Fibo Edit is an 8B parameter image-to-image model that introduces a new paradigm of structured control, operating on JSON inputs paired with source images to enable deterministic and repeatable editing workflows.
|
||||
Featuring native masking for granular precision, it moves beyond simple prompt-based diffusion to offer explicit, interpretable control optimized for production environments.
|
||||
Its lightweight architecture is designed for deep customization, empowering researchers to build specialized "Edit" models for domain-specific tasks while delivering top-tier aesthetic quality
|
||||
|
||||
## Usage
|
||||
_As the model is gated, before using it with diffusers you first need to go to the [Bria Fibo Hugging Face page](https://huggingface.co/briaai/Fibo-Edit), fill in the form and accept the gate. Once you are in, you need to login so that your system knows you’ve accepted the gate._
|
||||
|
||||
Use the command below to log in:
|
||||
|
||||
```bash
|
||||
hf auth login
|
||||
```
|
||||
|
||||
|
||||
## BriaFiboEditPipeline
|
||||
|
||||
[[autodoc]] BriaFiboEditPipeline
|
||||
- all
|
||||
- __call__
|
||||
@@ -457,6 +457,7 @@ else:
|
||||
"AuraFlowPipeline",
|
||||
"BlipDiffusionControlNetPipeline",
|
||||
"BlipDiffusionPipeline",
|
||||
"BriaFiboEditPipeline",
|
||||
"BriaFiboPipeline",
|
||||
"BriaPipeline",
|
||||
"ChromaImg2ImgPipeline",
|
||||
@@ -1185,6 +1186,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
AudioLDM2UNet2DConditionModel,
|
||||
AudioLDMPipeline,
|
||||
AuraFlowPipeline,
|
||||
BriaFiboEditPipeline,
|
||||
BriaFiboPipeline,
|
||||
BriaPipeline,
|
||||
ChromaImg2ImgPipeline,
|
||||
|
||||
@@ -478,7 +478,7 @@ class PeftAdapterMixin:
|
||||
Args:
|
||||
adapter_names (`List[str]` or `str`):
|
||||
The names of the adapters to use.
|
||||
adapter_weights (`Union[List[float], float]`, *optional*):
|
||||
weights (`Union[List[float], float]`, *optional*):
|
||||
The adapter(s) weights to use with the UNet. If `None`, the weights are set to `1.0` for all the
|
||||
adapters.
|
||||
|
||||
@@ -495,7 +495,7 @@ class PeftAdapterMixin:
|
||||
"jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic"
|
||||
)
|
||||
pipeline.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
|
||||
pipeline.unet.set_adapters(["cinematic", "pixel"], adapter_weights=[0.5, 0.5])
|
||||
pipeline.unet.set_adapters(["cinematic", "pixel"], weights=[0.5, 0.5])
|
||||
```
|
||||
"""
|
||||
if not USE_PEFT_BACKEND:
|
||||
|
||||
@@ -406,6 +406,7 @@ class LongCatImageTransformer2DModel(
|
||||
"""
|
||||
|
||||
_supports_gradient_checkpointing = True
|
||||
_repeated_blocks = ["LongCatImageTransformerBlock", "LongCatImageSingleTransformerBlock"]
|
||||
|
||||
@register_to_config
|
||||
def __init__(
|
||||
|
||||
@@ -129,7 +129,7 @@ else:
|
||||
"AnimateDiffVideoToVideoControlNetPipeline",
|
||||
]
|
||||
_import_structure["bria"] = ["BriaPipeline"]
|
||||
_import_structure["bria_fibo"] = ["BriaFiboPipeline"]
|
||||
_import_structure["bria_fibo"] = ["BriaFiboPipeline", "BriaFiboEditPipeline"]
|
||||
_import_structure["flux2"] = ["Flux2Pipeline", "Flux2KleinPipeline"]
|
||||
_import_structure["flux"] = [
|
||||
"FluxControlPipeline",
|
||||
@@ -597,7 +597,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
from .aura_flow import AuraFlowPipeline
|
||||
from .blip_diffusion import BlipDiffusionPipeline
|
||||
from .bria import BriaPipeline
|
||||
from .bria_fibo import BriaFiboPipeline
|
||||
from .bria_fibo import BriaFiboEditPipeline, BriaFiboPipeline
|
||||
from .chroma import ChromaImg2ImgPipeline, ChromaInpaintPipeline, ChromaPipeline
|
||||
from .chronoedit import ChronoEditPipeline
|
||||
from .cogvideo import (
|
||||
|
||||
@@ -23,6 +23,8 @@ except OptionalDependencyNotAvailable:
|
||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||
else:
|
||||
_import_structure["pipeline_bria_fibo"] = ["BriaFiboPipeline"]
|
||||
_import_structure["pipeline_bria_fibo_edit"] = ["BriaFiboEditPipeline"]
|
||||
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
try:
|
||||
@@ -33,6 +35,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
from ...utils.dummy_torch_and_transformers_objects import *
|
||||
else:
|
||||
from .pipeline_bria_fibo import BriaFiboPipeline
|
||||
from .pipeline_bria_fibo_edit import BriaFiboEditPipeline
|
||||
|
||||
else:
|
||||
import sys
|
||||
|
||||
1133
src/diffusers/pipelines/bria_fibo/pipeline_bria_fibo_edit.py
Normal file
1133
src/diffusers/pipelines/bria_fibo/pipeline_bria_fibo_edit.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -84,7 +84,6 @@ EXAMPLE_DOC_STRING = """
|
||||
>>> from diffusers import ControlNetModel, StableDiffusionXLControlNetImg2ImgPipeline, AutoencoderKL
|
||||
>>> from diffusers.utils import load_image
|
||||
|
||||
|
||||
>>> depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to("cuda")
|
||||
>>> feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-hybrid-midas")
|
||||
>>> controlnet = ControlNetModel.from_pretrained(
|
||||
|
||||
@@ -53,7 +53,6 @@ EXAMPLE_DOC_STRING = """
|
||||
>>> from transformers import AutoTokenizer, LlamaForCausalLM
|
||||
>>> from diffusers import HiDreamImagePipeline
|
||||
|
||||
|
||||
>>> tokenizer_4 = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")
|
||||
>>> text_encoder_4 = LlamaForCausalLM.from_pretrained(
|
||||
... "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
|
||||
@@ -85,7 +85,6 @@ EXAMPLE_DOC_STRING = """
|
||||
>>> from diffusers import ControlNetModel, StableDiffusionXLControlNetPAGImg2ImgPipeline, AutoencoderKL
|
||||
>>> from diffusers.utils import load_image
|
||||
|
||||
|
||||
>>> depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to("cuda")
|
||||
>>> feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas")
|
||||
>>> controlnet = ControlNetModel.from_pretrained(
|
||||
|
||||
@@ -459,7 +459,6 @@ class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, StableDiffusionMix
|
||||
>>> from diffusers import StableDiffusionLatentUpscalePipeline, StableDiffusionPipeline
|
||||
>>> import torch
|
||||
|
||||
|
||||
>>> pipeline = StableDiffusionPipeline.from_pretrained(
|
||||
... "CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16
|
||||
... )
|
||||
|
||||
@@ -14,7 +14,7 @@ from .scheduling_utils import SchedulerMixin
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -28,8 +28,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -51,7 +51,7 @@ class DDIMSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -51,7 +51,7 @@ class DDIMSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
@@ -100,14 +100,13 @@ def betas_for_alpha_bar(
|
||||
return torch.tensor(betas, dtype=torch.float32)
|
||||
|
||||
|
||||
def rescale_zero_terminal_snr(alphas_cumprod):
|
||||
def rescale_zero_terminal_snr(alphas_cumprod: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
|
||||
|
||||
Rescales betas to have zero terminal SNR Based on (Algorithm 1)[https://huggingface.co/papers/2305.08891]
|
||||
|
||||
Args:
|
||||
betas (`torch.Tensor`):
|
||||
the betas that the scheduler is being initialized with.
|
||||
alphas_cumprod (`torch.Tensor`):
|
||||
The alphas cumulative products that the scheduler is being initialized with.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`: rescaled betas with zero terminal SNR
|
||||
@@ -142,11 +141,11 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
Args:
|
||||
num_train_timesteps (`int`, defaults to 1000):
|
||||
The number of diffusion steps to train the model.
|
||||
beta_start (`float`, defaults to 0.0001):
|
||||
beta_start (`float`, defaults to 0.00085):
|
||||
The starting `beta` value of inference.
|
||||
beta_end (`float`, defaults to 0.02):
|
||||
beta_end (`float`, defaults to 0.0120):
|
||||
The final `beta` value.
|
||||
beta_schedule (`str`, defaults to `"linear"`):
|
||||
beta_schedule (`str`, defaults to `"scaled_linear"`):
|
||||
The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
|
||||
`linear`, `scaled_linear`, or `squaredcos_cap_v2`.
|
||||
trained_betas (`np.ndarray`, *optional*):
|
||||
@@ -179,6 +178,8 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
|
||||
dark samples instead of limiting it to samples with medium brightness. Loosely related to
|
||||
[`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
|
||||
snr_shift_scale (`float`, defaults to 3.0):
|
||||
Shift scale for SNR.
|
||||
"""
|
||||
|
||||
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
|
||||
@@ -190,15 +191,15 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
num_train_timesteps: int = 1000,
|
||||
beta_start: float = 0.00085,
|
||||
beta_end: float = 0.0120,
|
||||
beta_schedule: str = "scaled_linear",
|
||||
beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "scaled_linear",
|
||||
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
|
||||
clip_sample: bool = True,
|
||||
set_alpha_to_one: bool = True,
|
||||
steps_offset: int = 0,
|
||||
prediction_type: str = "epsilon",
|
||||
prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
|
||||
clip_sample_range: float = 1.0,
|
||||
sample_max_value: float = 1.0,
|
||||
timestep_spacing: str = "leading",
|
||||
timestep_spacing: Literal["linspace", "leading", "trailing"] = "leading",
|
||||
rescale_betas_zero_snr: bool = False,
|
||||
snr_shift_scale: float = 3.0,
|
||||
):
|
||||
@@ -208,7 +209,15 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
||||
elif beta_schedule == "scaled_linear":
|
||||
# this schedule is very specific to the latent diffusion model.
|
||||
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float64) ** 2
|
||||
self.betas = (
|
||||
torch.linspace(
|
||||
beta_start**0.5,
|
||||
beta_end**0.5,
|
||||
num_train_timesteps,
|
||||
dtype=torch.float64,
|
||||
)
|
||||
** 2
|
||||
)
|
||||
elif beta_schedule == "squaredcos_cap_v2":
|
||||
# Glide cosine schedule
|
||||
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
||||
@@ -238,7 +247,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
self.num_inference_steps = None
|
||||
self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
|
||||
|
||||
def _get_variance(self, timestep, prev_timestep):
|
||||
def _get_variance(self, timestep: int, prev_timestep: int) -> torch.Tensor:
|
||||
alpha_prod_t = self.alphas_cumprod[timestep]
|
||||
alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
|
||||
beta_prod_t = 1 - alpha_prod_t
|
||||
@@ -265,7 +274,11 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
"""
|
||||
return sample
|
||||
|
||||
def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
|
||||
def set_timesteps(
|
||||
self,
|
||||
num_inference_steps: int,
|
||||
device: Optional[Union[str, torch.device]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
||||
|
||||
@@ -317,7 +330,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
sample: torch.Tensor,
|
||||
eta: float = 0.0,
|
||||
use_clipped_model_output: bool = False,
|
||||
generator=None,
|
||||
generator: Optional[torch.Generator] = None,
|
||||
variance_noise: Optional[torch.Tensor] = None,
|
||||
return_dict: bool = True,
|
||||
) -> Union[DDIMSchedulerOutput, Tuple]:
|
||||
@@ -328,7 +341,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
Args:
|
||||
model_output (`torch.Tensor`):
|
||||
The direct output from learned diffusion model.
|
||||
timestep (`float`):
|
||||
timestep (`int`):
|
||||
The current discrete timestep in the diffusion chain.
|
||||
sample (`torch.Tensor`):
|
||||
A current instance of a sample created by the diffusion process.
|
||||
@@ -487,5 +500,5 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
|
||||
return velocity
|
||||
|
||||
def __len__(self):
|
||||
def __len__(self) -> int:
|
||||
return self.config.num_train_timesteps
|
||||
|
||||
@@ -22,6 +22,7 @@ import flax
|
||||
import jax.numpy as jnp
|
||||
|
||||
from ..configuration_utils import ConfigMixin, register_to_config
|
||||
from ..utils import logging
|
||||
from .scheduling_utils_flax import (
|
||||
CommonSchedulerState,
|
||||
FlaxKarrasDiffusionSchedulers,
|
||||
@@ -32,6 +33,9 @@ from .scheduling_utils_flax import (
|
||||
)
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
@flax.struct.dataclass
|
||||
class DDIMSchedulerState:
|
||||
common: CommonSchedulerState
|
||||
@@ -125,6 +129,10 @@ class FlaxDDIMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
||||
prediction_type: str = "epsilon",
|
||||
dtype: jnp.dtype = jnp.float32,
|
||||
):
|
||||
logger.warning(
|
||||
"Flax classes are deprecated and will be removed in Diffusers v1.0.0. We "
|
||||
"recommend migrating to PyTorch classes or pinning your version of Diffusers."
|
||||
)
|
||||
self.dtype = dtype
|
||||
|
||||
def create_state(self, common: Optional[CommonSchedulerState] = None) -> DDIMSchedulerState:
|
||||
@@ -152,7 +160,10 @@ class FlaxDDIMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
||||
)
|
||||
|
||||
def scale_model_input(
|
||||
self, state: DDIMSchedulerState, sample: jnp.ndarray, timestep: Optional[int] = None
|
||||
self,
|
||||
state: DDIMSchedulerState,
|
||||
sample: jnp.ndarray,
|
||||
timestep: Optional[int] = None,
|
||||
) -> jnp.ndarray:
|
||||
"""
|
||||
Args:
|
||||
@@ -190,7 +201,9 @@ class FlaxDDIMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
||||
def _get_variance(self, state: DDIMSchedulerState, timestep, prev_timestep):
|
||||
alpha_prod_t = state.common.alphas_cumprod[timestep]
|
||||
alpha_prod_t_prev = jnp.where(
|
||||
prev_timestep >= 0, state.common.alphas_cumprod[prev_timestep], state.final_alpha_cumprod
|
||||
prev_timestep >= 0,
|
||||
state.common.alphas_cumprod[prev_timestep],
|
||||
state.final_alpha_cumprod,
|
||||
)
|
||||
beta_prod_t = 1 - alpha_prod_t
|
||||
beta_prod_t_prev = 1 - alpha_prod_t_prev
|
||||
|
||||
@@ -49,7 +49,7 @@ class DDIMSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -63,8 +63,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
@@ -99,7 +99,7 @@ def betas_for_alpha_bar(
|
||||
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
|
||||
def rescale_zero_terminal_snr(betas):
|
||||
def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
|
||||
|
||||
@@ -187,14 +187,14 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
|
||||
num_train_timesteps: int = 1000,
|
||||
beta_start: float = 0.0001,
|
||||
beta_end: float = 0.02,
|
||||
beta_schedule: str = "linear",
|
||||
beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "linear",
|
||||
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
|
||||
clip_sample: bool = True,
|
||||
set_alpha_to_one: bool = True,
|
||||
steps_offset: int = 0,
|
||||
prediction_type: str = "epsilon",
|
||||
prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
|
||||
clip_sample_range: float = 1.0,
|
||||
timestep_spacing: str = "leading",
|
||||
timestep_spacing: Literal["leading", "trailing"] = "leading",
|
||||
rescale_betas_zero_snr: bool = False,
|
||||
**kwargs,
|
||||
):
|
||||
@@ -210,7 +210,15 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
|
||||
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
||||
elif beta_schedule == "scaled_linear":
|
||||
# this schedule is very specific to the latent diffusion model.
|
||||
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
||||
self.betas = (
|
||||
torch.linspace(
|
||||
beta_start**0.5,
|
||||
beta_end**0.5,
|
||||
num_train_timesteps,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
** 2
|
||||
)
|
||||
elif beta_schedule == "squaredcos_cap_v2":
|
||||
# Glide cosine schedule
|
||||
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
||||
@@ -256,7 +264,11 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
|
||||
"""
|
||||
return sample
|
||||
|
||||
def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
|
||||
def set_timesteps(
|
||||
self,
|
||||
num_inference_steps: int,
|
||||
device: Optional[Union[str, torch.device]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
||||
|
||||
@@ -308,20 +320,10 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
|
||||
Args:
|
||||
model_output (`torch.Tensor`):
|
||||
The direct output from learned diffusion model.
|
||||
timestep (`float`):
|
||||
timestep (`int`):
|
||||
The current discrete timestep in the diffusion chain.
|
||||
sample (`torch.Tensor`):
|
||||
A current instance of a sample created by the diffusion process.
|
||||
eta (`float`):
|
||||
The weight of noise for added noise in diffusion step.
|
||||
use_clipped_model_output (`bool`, defaults to `False`):
|
||||
If `True`, computes "corrected" `model_output` from the clipped predicted original sample. Necessary
|
||||
because predicted original sample is clipped to [-1, 1] when `self.config.clip_sample` is `True`. If no
|
||||
clipping has happened, "corrected" `model_output` would coincide with the one provided as input and
|
||||
`use_clipped_model_output` has no effect.
|
||||
variance_noise (`torch.Tensor`):
|
||||
Alternative to generating noise with `generator` by directly providing the noise for the variance
|
||||
itself. Useful for methods such as [`CycleDiffusion`].
|
||||
return_dict (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to return a [`~schedulers.scheduling_ddim_inverse.DDIMInverseSchedulerOutput`] or
|
||||
`tuple`.
|
||||
@@ -335,7 +337,8 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
|
||||
# 1. get previous step value (=t+1)
|
||||
prev_timestep = timestep
|
||||
timestep = min(
|
||||
timestep - self.config.num_train_timesteps // self.num_inference_steps, self.config.num_train_timesteps - 1
|
||||
timestep - self.config.num_train_timesteps // self.num_inference_steps,
|
||||
self.config.num_train_timesteps - 1,
|
||||
)
|
||||
|
||||
# 2. compute alphas, betas
|
||||
@@ -378,5 +381,5 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
|
||||
return (prev_sample, pred_original_sample)
|
||||
return DDIMSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample)
|
||||
|
||||
def __len__(self):
|
||||
def __len__(self) -> int:
|
||||
return self.config.num_train_timesteps
|
||||
|
||||
@@ -51,7 +51,7 @@ class DDIMParallelSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -48,7 +48,7 @@ class DDPMSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -62,8 +62,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
@@ -192,7 +192,12 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
||||
beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2", "sigmoid"] = "linear",
|
||||
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
|
||||
variance_type: Literal[
|
||||
"fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"
|
||||
"fixed_small",
|
||||
"fixed_small_log",
|
||||
"fixed_large",
|
||||
"fixed_large_log",
|
||||
"learned",
|
||||
"learned_range",
|
||||
] = "fixed_small",
|
||||
clip_sample: bool = True,
|
||||
prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
|
||||
@@ -210,7 +215,15 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
||||
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
||||
elif beta_schedule == "scaled_linear":
|
||||
# this schedule is very specific to the latent diffusion model.
|
||||
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
||||
self.betas = (
|
||||
torch.linspace(
|
||||
beta_start**0.5,
|
||||
beta_end**0.5,
|
||||
num_train_timesteps,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
** 2
|
||||
)
|
||||
elif beta_schedule == "squaredcos_cap_v2":
|
||||
# Glide cosine schedule
|
||||
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
||||
@@ -337,7 +350,14 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
||||
t: int,
|
||||
predicted_variance: Optional[torch.Tensor] = None,
|
||||
variance_type: Optional[
|
||||
Literal["fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"]
|
||||
Literal[
|
||||
"fixed_small",
|
||||
"fixed_small_log",
|
||||
"fixed_large",
|
||||
"fixed_large_log",
|
||||
"learned",
|
||||
"learned_range",
|
||||
]
|
||||
] = None,
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
@@ -472,7 +492,10 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
||||
|
||||
prev_t = self.previous_timestep(t)
|
||||
|
||||
if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in ["learned", "learned_range"]:
|
||||
if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in [
|
||||
"learned",
|
||||
"learned_range",
|
||||
]:
|
||||
model_output, predicted_variance = torch.split(model_output, sample.shape[1], dim=1)
|
||||
else:
|
||||
predicted_variance = None
|
||||
@@ -521,7 +544,10 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
||||
if t > 0:
|
||||
device = model_output.device
|
||||
variance_noise = randn_tensor(
|
||||
model_output.shape, generator=generator, device=device, dtype=model_output.dtype
|
||||
model_output.shape,
|
||||
generator=generator,
|
||||
device=device,
|
||||
dtype=model_output.dtype,
|
||||
)
|
||||
if self.variance_type == "fixed_small_log":
|
||||
variance = self._get_variance(t, predicted_variance=predicted_variance) * variance_noise
|
||||
|
||||
@@ -50,7 +50,7 @@ class DDPMParallelSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -64,8 +64,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
@@ -202,7 +202,12 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
||||
beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2", "sigmoid"] = "linear",
|
||||
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
|
||||
variance_type: Literal[
|
||||
"fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"
|
||||
"fixed_small",
|
||||
"fixed_small_log",
|
||||
"fixed_large",
|
||||
"fixed_large_log",
|
||||
"learned",
|
||||
"learned_range",
|
||||
] = "fixed_small",
|
||||
clip_sample: bool = True,
|
||||
prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
|
||||
@@ -220,7 +225,15 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
||||
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
||||
elif beta_schedule == "scaled_linear":
|
||||
# this schedule is very specific to the latent diffusion model.
|
||||
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
||||
self.betas = (
|
||||
torch.linspace(
|
||||
beta_start**0.5,
|
||||
beta_end**0.5,
|
||||
num_train_timesteps,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
** 2
|
||||
)
|
||||
elif beta_schedule == "squaredcos_cap_v2":
|
||||
# Glide cosine schedule
|
||||
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
||||
@@ -350,7 +363,14 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
||||
t: int,
|
||||
predicted_variance: Optional[torch.Tensor] = None,
|
||||
variance_type: Optional[
|
||||
Literal["fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"]
|
||||
Literal[
|
||||
"fixed_small",
|
||||
"fixed_small_log",
|
||||
"fixed_large",
|
||||
"fixed_large_log",
|
||||
"learned",
|
||||
"learned_range",
|
||||
]
|
||||
] = None,
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
|
||||
@@ -34,7 +34,7 @@ if is_scipy_available():
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -48,8 +48,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -52,7 +52,7 @@ class DDIMSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -66,8 +66,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -34,7 +34,7 @@ if is_scipy_available():
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -48,8 +48,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -34,7 +34,7 @@ if is_scipy_available():
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -48,8 +48,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -117,7 +117,7 @@ class BrownianTreeNoiseSampler:
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -131,8 +131,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -36,7 +36,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -50,8 +50,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -51,7 +51,7 @@ class EulerAncestralDiscreteSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -54,7 +54,7 @@ class EulerDiscreteSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -68,8 +68,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -51,7 +51,7 @@ class HeunDiscreteSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -52,7 +52,7 @@ class KDPM2AncestralDiscreteSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -66,8 +66,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -51,7 +51,7 @@ class KDPM2DiscreteSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -53,7 +53,7 @@ class LCMSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -67,8 +67,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -49,7 +49,7 @@ class LMSDiscreteSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -63,8 +63,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -28,7 +28,7 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, Schedul
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -42,8 +42,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -47,7 +47,7 @@ class RePaintSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -61,8 +61,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -35,7 +35,7 @@ if is_scipy_available():
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -49,8 +49,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -52,7 +52,7 @@ class TCDSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -66,8 +66,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -48,7 +48,7 @@ class UnCLIPSchedulerOutput(BaseOutput):
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -62,8 +62,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
|
||||
@@ -34,7 +34,7 @@ if is_scipy_available():
|
||||
def betas_for_alpha_bar(
|
||||
num_diffusion_timesteps: int,
|
||||
max_beta: float = 0.999,
|
||||
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
|
||||
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
|
||||
@@ -48,8 +48,8 @@ def betas_for_alpha_bar(
|
||||
The number of betas to produce.
|
||||
max_beta (`float`, defaults to `0.999`):
|
||||
The maximum beta to use; use values lower than 1 to avoid numerical instability.
|
||||
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
|
||||
alpha_transform_type (`str`, defaults to `"cosine"`):
|
||||
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
@@ -226,6 +226,7 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
time_shift_type: Literal["exponential"] = "exponential",
|
||||
sigma_min: Optional[float] = None,
|
||||
sigma_max: Optional[float] = None,
|
||||
shift_terminal: Optional[float] = None,
|
||||
) -> None:
|
||||
if self.config.use_beta_sigmas and not is_scipy_available():
|
||||
raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
|
||||
@@ -245,6 +246,8 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
||||
else:
|
||||
raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}")
|
||||
if shift_terminal is not None and not use_flow_sigmas:
|
||||
raise ValueError("`shift_terminal` is only supported when `use_flow_sigmas=True`.")
|
||||
|
||||
if rescale_betas_zero_snr:
|
||||
self.betas = rescale_zero_terminal_snr(self.betas)
|
||||
@@ -313,8 +316,12 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
self._begin_index = begin_index
|
||||
|
||||
def set_timesteps(
|
||||
self, num_inference_steps: int, device: Optional[Union[str, torch.device]] = None, mu: Optional[float] = None
|
||||
) -> None:
|
||||
self,
|
||||
num_inference_steps: Optional[int] = None,
|
||||
device: Union[str, torch.device] = None,
|
||||
sigmas: Optional[List[float]] = None,
|
||||
mu: Optional[float] = None,
|
||||
):
|
||||
"""
|
||||
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
||||
|
||||
@@ -323,13 +330,24 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
The number of diffusion steps used when generating samples with a pre-trained model.
|
||||
device (`str` or `torch.device`, *optional*):
|
||||
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
||||
sigmas (`List[float]`, *optional*):
|
||||
Custom values for sigmas to be used for each diffusion step. If `None`, the sigmas are computed
|
||||
automatically.
|
||||
mu (`float`, *optional*):
|
||||
Optional mu parameter for dynamic shifting when using exponential time shift type.
|
||||
"""
|
||||
if self.config.use_dynamic_shifting and mu is None:
|
||||
raise ValueError("`mu` must be passed when `use_dynamic_shifting` is set to be `True`")
|
||||
|
||||
if sigmas is not None:
|
||||
if not self.config.use_flow_sigmas:
|
||||
raise ValueError(
|
||||
"Passing `sigmas` is only supported when `use_flow_sigmas=True`. "
|
||||
"Please set `use_flow_sigmas=True` during scheduler initialization."
|
||||
)
|
||||
num_inference_steps = len(sigmas)
|
||||
|
||||
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
||||
if mu is not None:
|
||||
assert self.config.use_dynamic_shifting and self.config.time_shift_type == "exponential"
|
||||
self.config.flow_shift = np.exp(mu)
|
||||
if self.config.timestep_spacing == "linspace":
|
||||
timesteps = (
|
||||
np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps + 1)
|
||||
@@ -354,8 +372,9 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
|
||||
)
|
||||
|
||||
sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
|
||||
if self.config.use_karras_sigmas:
|
||||
if sigmas is None:
|
||||
sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
|
||||
log_sigmas = np.log(sigmas)
|
||||
sigmas = np.flip(sigmas).copy()
|
||||
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
|
||||
@@ -375,6 +394,8 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
)
|
||||
sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32)
|
||||
elif self.config.use_exponential_sigmas:
|
||||
if sigmas is None:
|
||||
sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
|
||||
log_sigmas = np.log(sigmas)
|
||||
sigmas = np.flip(sigmas).copy()
|
||||
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
|
||||
@@ -389,6 +410,8 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
)
|
||||
sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32)
|
||||
elif self.config.use_beta_sigmas:
|
||||
if sigmas is None:
|
||||
sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
|
||||
log_sigmas = np.log(sigmas)
|
||||
sigmas = np.flip(sigmas).copy()
|
||||
sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
|
||||
@@ -403,9 +426,18 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
)
|
||||
sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32)
|
||||
elif self.config.use_flow_sigmas:
|
||||
alphas = np.linspace(1, 1 / self.config.num_train_timesteps, num_inference_steps + 1)
|
||||
sigmas = 1.0 - alphas
|
||||
sigmas = np.flip(self.config.flow_shift * sigmas / (1 + (self.config.flow_shift - 1) * sigmas))[:-1].copy()
|
||||
if sigmas is None:
|
||||
sigmas = np.linspace(1, 1 / self.config.num_train_timesteps, num_inference_steps + 1)[:-1]
|
||||
if self.config.use_dynamic_shifting:
|
||||
sigmas = self.time_shift(mu, 1.0, sigmas)
|
||||
else:
|
||||
sigmas = self.config.flow_shift * sigmas / (1 + (self.config.flow_shift - 1) * sigmas)
|
||||
if self.config.shift_terminal:
|
||||
sigmas = self.stretch_shift_to_terminal(sigmas)
|
||||
eps = 1e-6
|
||||
if np.fabs(sigmas[0] - 1) < eps:
|
||||
# to avoid inf torch.log(alpha_si) in multistep_uni_p_bh_update during first/second update
|
||||
sigmas[0] -= eps
|
||||
timesteps = (sigmas * self.config.num_train_timesteps).copy()
|
||||
if self.config.final_sigmas_type == "sigma_min":
|
||||
sigma_last = sigmas[-1]
|
||||
@@ -417,6 +449,8 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
)
|
||||
sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32)
|
||||
else:
|
||||
if sigmas is None:
|
||||
sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
|
||||
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
|
||||
if self.config.final_sigmas_type == "sigma_min":
|
||||
sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5
|
||||
@@ -446,6 +480,43 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
self._begin_index = None
|
||||
self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler.time_shift
|
||||
def time_shift(self, mu: float, sigma: float, t: torch.Tensor):
|
||||
if self.config.time_shift_type == "exponential":
|
||||
return self._time_shift_exponential(mu, sigma, t)
|
||||
elif self.config.time_shift_type == "linear":
|
||||
return self._time_shift_linear(mu, sigma, t)
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler.stretch_shift_to_terminal
|
||||
def stretch_shift_to_terminal(self, t: torch.Tensor) -> torch.Tensor:
|
||||
r"""
|
||||
Stretches and shifts the timestep schedule to ensure it terminates at the configured `shift_terminal` config
|
||||
value.
|
||||
|
||||
Reference:
|
||||
https://github.com/Lightricks/LTX-Video/blob/a01a171f8fe3d99dce2728d60a73fecf4d4238ae/ltx_video/schedulers/rf.py#L51
|
||||
|
||||
Args:
|
||||
t (`torch.Tensor`):
|
||||
A tensor of timesteps to be stretched and shifted.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`:
|
||||
A tensor of adjusted timesteps such that the final value equals `self.config.shift_terminal`.
|
||||
"""
|
||||
one_minus_z = 1 - t
|
||||
scale_factor = one_minus_z[-1] / (1 - self.config.shift_terminal)
|
||||
stretched_t = 1 - (one_minus_z / scale_factor)
|
||||
return stretched_t
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler._time_shift_exponential
|
||||
def _time_shift_exponential(self, mu, sigma, t):
|
||||
return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler._time_shift_linear
|
||||
def _time_shift_linear(self, mu, sigma, t):
|
||||
return mu / (mu + (1 / t - 1) ** sigma)
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
|
||||
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
|
||||
@@ -587,6 +587,21 @@ class AuraFlowPipeline(metaclass=DummyObject):
|
||||
requires_backends(cls, ["torch", "transformers"])
|
||||
|
||||
|
||||
class BriaFiboEditPipeline(metaclass=DummyObject):
|
||||
_backends = ["torch", "transformers"]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
requires_backends(self, ["torch", "transformers"])
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, *args, **kwargs):
|
||||
requires_backends(cls, ["torch", "transformers"])
|
||||
|
||||
@classmethod
|
||||
def from_pretrained(cls, *args, **kwargs):
|
||||
requires_backends(cls, ["torch", "transformers"])
|
||||
|
||||
|
||||
class BriaFiboPipeline(metaclass=DummyObject):
|
||||
_backends = ["torch", "transformers"]
|
||||
|
||||
|
||||
0
tests/pipelines/bria_fibo_edit/__init__.py
Normal file
0
tests/pipelines/bria_fibo_edit/__init__.py
Normal file
192
tests/pipelines/bria_fibo_edit/test_pipeline_bria_fibo_edit.py
Normal file
192
tests/pipelines/bria_fibo_edit/test_pipeline_bria_fibo_edit.py
Normal file
@@ -0,0 +1,192 @@
|
||||
# Copyright 2024 Bria AI and The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer
|
||||
from transformers.models.smollm3.modeling_smollm3 import SmolLM3Config, SmolLM3ForCausalLM
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKLWan,
|
||||
BriaFiboEditPipeline,
|
||||
FlowMatchEulerDiscreteScheduler,
|
||||
)
|
||||
from diffusers.models.transformers.transformer_bria_fibo import BriaFiboTransformer2DModel
|
||||
from tests.pipelines.test_pipelines_common import PipelineTesterMixin
|
||||
|
||||
from ...testing_utils import (
|
||||
enable_full_determinism,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
|
||||
enable_full_determinism()
|
||||
|
||||
|
||||
class BriaFiboPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
pipeline_class = BriaFiboEditPipeline
|
||||
params = frozenset(["prompt", "height", "width", "guidance_scale"])
|
||||
batch_params = frozenset(["prompt"])
|
||||
test_xformers_attention = False
|
||||
test_layerwise_casting = False
|
||||
test_group_offloading = False
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
transformer = BriaFiboTransformer2DModel(
|
||||
patch_size=1,
|
||||
in_channels=16,
|
||||
num_layers=1,
|
||||
num_single_layers=1,
|
||||
attention_head_dim=8,
|
||||
num_attention_heads=2,
|
||||
joint_attention_dim=64,
|
||||
text_encoder_dim=32,
|
||||
pooled_projection_dim=None,
|
||||
axes_dims_rope=[0, 4, 4],
|
||||
)
|
||||
|
||||
vae = AutoencoderKLWan(
|
||||
base_dim=80,
|
||||
decoder_base_dim=128,
|
||||
dim_mult=[1, 2, 4, 4],
|
||||
dropout=0.0,
|
||||
in_channels=12,
|
||||
latents_mean=[0.0] * 16,
|
||||
latents_std=[1.0] * 16,
|
||||
is_residual=True,
|
||||
num_res_blocks=2,
|
||||
out_channels=12,
|
||||
patch_size=2,
|
||||
scale_factor_spatial=16,
|
||||
scale_factor_temporal=4,
|
||||
temperal_downsample=[False, True, True],
|
||||
z_dim=16,
|
||||
)
|
||||
scheduler = FlowMatchEulerDiscreteScheduler()
|
||||
text_encoder = SmolLM3ForCausalLM(SmolLM3Config(hidden_size=32))
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
"scheduler": scheduler,
|
||||
"text_encoder": text_encoder,
|
||||
"tokenizer": tokenizer,
|
||||
"transformer": transformer,
|
||||
"vae": vae,
|
||||
}
|
||||
return components
|
||||
|
||||
def get_dummy_inputs(self, device, seed=0):
|
||||
if str(device).startswith("mps"):
|
||||
generator = torch.manual_seed(seed)
|
||||
else:
|
||||
generator = torch.Generator(device="cpu").manual_seed(seed)
|
||||
inputs = {
|
||||
"prompt": '{"text": "A painting of a squirrel eating a burger","edit_instruction": "A painting of a squirrel eating a burger"}',
|
||||
"negative_prompt": "bad, ugly",
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"guidance_scale": 5.0,
|
||||
"height": 192,
|
||||
"width": 336,
|
||||
"output_type": "np",
|
||||
}
|
||||
image = Image.new("RGB", (336, 192), (255, 255, 255))
|
||||
inputs["image"] = image
|
||||
return inputs
|
||||
|
||||
@unittest.skip(reason="will not be supported due to dim-fusion")
|
||||
def test_encode_prompt_works_in_isolation(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(reason="Batching is not supported yet")
|
||||
def test_num_images_per_prompt(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(reason="Batching is not supported yet")
|
||||
def test_inference_batch_consistent(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(reason="Batching is not supported yet")
|
||||
def test_inference_batch_single_identical(self):
|
||||
pass
|
||||
|
||||
def test_bria_fibo_different_prompts(self):
|
||||
pipe = self.pipeline_class(**self.get_dummy_components())
|
||||
pipe = pipe.to(torch_device)
|
||||
inputs = self.get_dummy_inputs(torch_device)
|
||||
output_same_prompt = pipe(**inputs).images[0]
|
||||
|
||||
inputs = self.get_dummy_inputs(torch_device)
|
||||
inputs["prompt"] = {"edit_instruction": "a different prompt"}
|
||||
output_different_prompts = pipe(**inputs).images[0]
|
||||
|
||||
max_diff = np.abs(output_same_prompt - output_different_prompts).max()
|
||||
assert max_diff > 1e-6
|
||||
|
||||
def test_image_output_shape(self):
|
||||
pipe = self.pipeline_class(**self.get_dummy_components())
|
||||
pipe = pipe.to(torch_device)
|
||||
inputs = self.get_dummy_inputs(torch_device)
|
||||
|
||||
height_width_pairs = [(32, 32), (64, 64), (32, 64)]
|
||||
for height, width in height_width_pairs:
|
||||
expected_height = height
|
||||
expected_width = width
|
||||
|
||||
inputs.update({"height": height, "width": width})
|
||||
image = pipe(**inputs).images[0]
|
||||
output_height, output_width, _ = image.shape
|
||||
assert (output_height, output_width) == (expected_height, expected_width)
|
||||
|
||||
def test_bria_fibo_edit_mask(self):
|
||||
pipe = self.pipeline_class(**self.get_dummy_components())
|
||||
pipe = pipe.to(torch_device)
|
||||
inputs = self.get_dummy_inputs(torch_device)
|
||||
|
||||
mask = Image.fromarray((np.ones((192, 336)) * 255).astype(np.uint8), mode="L")
|
||||
|
||||
inputs.update({"mask": mask})
|
||||
output = pipe(**inputs).images[0]
|
||||
|
||||
assert output.shape == (192, 336, 3)
|
||||
|
||||
def test_bria_fibo_edit_mask_image_size_mismatch(self):
|
||||
pipe = self.pipeline_class(**self.get_dummy_components())
|
||||
pipe = pipe.to(torch_device)
|
||||
inputs = self.get_dummy_inputs(torch_device)
|
||||
|
||||
mask = Image.fromarray((np.ones((64, 64)) * 255).astype(np.uint8), mode="L")
|
||||
|
||||
inputs.update({"mask": mask})
|
||||
with self.assertRaisesRegex(ValueError, "Mask and image must have the same size"):
|
||||
pipe(**inputs)
|
||||
|
||||
def test_bria_fibo_edit_mask_no_image(self):
|
||||
pipe = self.pipeline_class(**self.get_dummy_components())
|
||||
pipe = pipe.to(torch_device)
|
||||
inputs = self.get_dummy_inputs(torch_device)
|
||||
|
||||
mask = Image.fromarray((np.ones((32, 32)) * 255).astype(np.uint8), mode="L")
|
||||
|
||||
# Remove image from inputs if it's there (it shouldn't be by default from get_dummy_inputs)
|
||||
inputs.pop("image", None)
|
||||
inputs.update({"mask": mask})
|
||||
|
||||
with self.assertRaisesRegex(ValueError, "If mask is provided, image must also be provided"):
|
||||
pipe(**inputs)
|
||||
Reference in New Issue
Block a user