Compare commits

...

5 Commits

Author SHA1 Message Date
sayakpaul
c458048d09 up 2026-02-13 15:21:29 +05:30
sayakpaul
4527dcfad3 fix ltx2 i2v docstring. 2026-02-13 15:19:59 +05:30
Sayak Paul
76af013a41 fix cosmos transformer typing. (#13134) 2026-02-13 14:51:19 +05:30
dg845
277e305589 [CI] Fix setuptools pkg_resources Bug for PR GPU Tests (#13132)
Try to fix setuptools pkg_resources error for PR GPU test workflow
2026-02-13 10:09:32 +05:30
David El Malih
5f3ea22513 docs: improve docstring scheduling_flow_match_heun_discrete.py (#13130)
Improve docstring scheduling flow match heun discrete
2026-02-12 14:32:04 -08:00
4 changed files with 56 additions and 19 deletions

View File

@@ -199,6 +199,11 @@ jobs:
- name: Install dependencies
run: |
# Install pkgs which depend on setuptools<81 for pkg_resources first with no build isolation
uv pip install pip==25.2 setuptools==80.10.2
uv pip install --no-build-isolation k-diffusion==0.0.12
uv pip install --upgrade pip setuptools
# Install the rest as normal
uv pip install -e ".[quality]"
uv pip install peft@git+https://github.com/huggingface/peft.git
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git

View File

@@ -421,7 +421,7 @@ class CosmosTransformerBlock(nn.Module):
controlnet_residual: Optional[torch.Tensor] = None,
latents: Optional[torch.Tensor] = None,
block_idx: Optional[int] = None,
) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
if self.before_proj is not None:
hidden_states = self.before_proj(hidden_states) + latents

View File

@@ -48,7 +48,7 @@ EXAMPLE_DOC_STRING = """
Examples:
```py
>>> import torch
>>> from diffusers import LTX2Pipeline
>>> from diffusers import LTX2ImageToVideoPipeline
>>> from diffusers.pipelines.ltx2.export_utils import encode_video
>>> from diffusers.utils import load_image
@@ -62,7 +62,7 @@ EXAMPLE_DOC_STRING = """
>>> negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
>>> frame_rate = 24.0
>>> video = pipe(
>>> video, audio = pipe(
... image=image,
... prompt=prompt,
... negative_prompt=negative_prompt,

View File

@@ -51,9 +51,6 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
Args:
num_train_timesteps (`int`, defaults to 1000):
The number of diffusion steps to train the model.
timestep_spacing (`str`, defaults to `"linspace"`):
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
shift (`float`, defaults to 1.0):
The shift value for the timestep schedule.
"""
@@ -110,7 +107,7 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
def scale_noise(
self,
sample: torch.FloatTensor,
timestep: torch.FloatTensor,
timestep: Union[float, torch.FloatTensor],
noise: torch.FloatTensor,
) -> torch.FloatTensor:
"""
@@ -119,7 +116,7 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
Args:
sample (`torch.FloatTensor`):
The input sample.
timestep (`torch.FloatTensor`):
timestep (`float` or `torch.FloatTensor`):
The current timestep in the diffusion chain.
noise (`torch.FloatTensor`):
The noise tensor.
@@ -137,10 +134,14 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
return sample
def _sigma_to_t(self, sigma):
def _sigma_to_t(self, sigma: float) -> float:
return sigma * self.config.num_train_timesteps
def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
def set_timesteps(
self,
num_inference_steps: int,
device: Union[str, torch.device] = None,
) -> None:
"""
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
@@ -153,7 +154,9 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
self.num_inference_steps = num_inference_steps
timesteps = np.linspace(
self._sigma_to_t(self.sigma_max), self._sigma_to_t(self.sigma_min), num_inference_steps
self._sigma_to_t(self.sigma_max),
self._sigma_to_t(self.sigma_min),
num_inference_steps,
)
sigmas = timesteps / self.config.num_train_timesteps
@@ -174,7 +177,24 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
self._step_index = None
self._begin_index = None
def index_for_timestep(self, timestep, schedule_timesteps=None):
def index_for_timestep(
self,
timestep: Union[float, torch.FloatTensor],
schedule_timesteps: Optional[torch.FloatTensor] = None,
) -> int:
"""
Find the index of a given timestep in the timestep schedule.
Args:
timestep (`float` or `torch.FloatTensor`):
The timestep value to find in the schedule.
schedule_timesteps (`torch.FloatTensor`, *optional*):
The timestep schedule to search in. If `None`, uses `self.timesteps`.
Returns:
`int`:
The index of the timestep in the schedule.
"""
if schedule_timesteps is None:
schedule_timesteps = self.timesteps
@@ -188,7 +208,7 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
return indices[pos].item()
def _init_step_index(self, timestep):
def _init_step_index(self, timestep: Union[float, torch.FloatTensor]) -> None:
if self.begin_index is None:
if isinstance(timestep, torch.Tensor):
timestep = timestep.to(self.timesteps.device)
@@ -197,7 +217,10 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
self._step_index = self._begin_index
@property
def state_in_first_order(self):
def state_in_first_order(self) -> bool:
"""
Returns whether the scheduler is in the first-order state.
"""
return self.dt is None
def step(
@@ -219,13 +242,19 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
Args:
model_output (`torch.FloatTensor`):
The direct output from learned diffusion model.
timestep (`float`):
timestep (`float` or `torch.FloatTensor`):
The current discrete timestep in the diffusion chain.
sample (`torch.FloatTensor`):
A current instance of a sample created by the diffusion process.
s_churn (`float`):
s_tmin (`float`):
s_tmax (`float`):
Stochasticity parameter that controls the amount of noise added during sampling. Higher values increase
randomness.
s_tmin (`float`):
Minimum timestep threshold for applying stochasticity. Only timesteps above this value will have noise
added.
s_tmax (`float`):
Maximum timestep threshold for applying stochasticity. Only timesteps below this value will have noise
added.
s_noise (`float`, defaults to 1.0):
Scaling factor for noise added to the sample.
generator (`torch.Generator`, *optional*):
@@ -274,7 +303,10 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
if gamma > 0:
noise = randn_tensor(
model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator
model_output.shape,
dtype=model_output.dtype,
device=model_output.device,
generator=generator,
)
eps = noise * s_noise
sample = sample + eps * (sigma_hat**2 - sigma**2) ** 0.5
@@ -320,5 +352,5 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
return FlowMatchHeunDiscreteSchedulerOutput(prev_sample=prev_sample)
def __len__(self):
def __len__(self) -> int:
return self.config.num_train_timesteps