up

fix ltx2 i2v docstring.
fix cosmos transformer typing. (#13134 )
2026-02-13 22:35:37 +08:00 · 2026-02-13 15:21:29 +05:30 · 2026-02-13 15:19:59 +05:30 · 2026-02-13 14:51:19 +05:30 · 2026-02-13 10:09:32 +05:30 · 2026-02-12 14:32:04 -08:00
4 changed files with 56 additions and 19 deletions
--- a/.github/workflows/pr_tests_gpu.yml
+++ b/.github/workflows/pr_tests_gpu.yml
@@ -199,6 +199,11 @@ jobs:

    - name: Install dependencies
      run: |
+        # Install pkgs which depend on setuptools<81 for pkg_resources first with no build isolation
+        uv pip install pip==25.2 setuptools==80.10.2
+        uv pip install --no-build-isolation k-diffusion==0.0.12
+        uv pip install --upgrade pip setuptools
+        # Install the rest as normal
        uv pip install -e ".[quality]"
        uv pip install peft@git+https://github.com/huggingface/peft.git
        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
--- a/src/diffusers/models/transformers/transformer_cosmos.py
+++ b/src/diffusers/models/transformers/transformer_cosmos.py
@@ -421,7 +421,7 @@ class CosmosTransformerBlock(nn.Module):
        controlnet_residual: Optional[torch.Tensor] = None,
        latents: Optional[torch.Tensor] = None,
        block_idx: Optional[int] = None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
        if self.before_proj is not None:
            hidden_states = self.before_proj(hidden_states) + latents

--- a/src/diffusers/pipelines/ltx2/pipeline_ltx2_image2video.py
+++ b/src/diffusers/pipelines/ltx2/pipeline_ltx2_image2video.py
@@ -48,7 +48,7 @@ EXAMPLE_DOC_STRING = """
    Examples:
        ```py
        >>> import torch
-        >>> from diffusers import LTX2Pipeline
+        >>> from diffusers import LTX2ImageToVideoPipeline
        >>> from diffusers.pipelines.ltx2.export_utils import encode_video
        >>> from diffusers.utils import load_image

@@ -62,7 +62,7 @@ EXAMPLE_DOC_STRING = """
        >>> negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"

        >>> frame_rate = 24.0
-        >>> video = pipe(
+        >>> video, audio = pipe(
        ...     image=image,
        ...     prompt=prompt,
        ...     negative_prompt=negative_prompt,
--- a/src/diffusers/schedulers/scheduling_flow_match_heun_discrete.py
+++ b/src/diffusers/schedulers/scheduling_flow_match_heun_discrete.py
@@ -51,9 +51,6 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
    Args:
        num_train_timesteps (`int`, defaults to 1000):
            The number of diffusion steps to train the model.
-        timestep_spacing (`str`, defaults to `"linspace"`):
-            The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
-            Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
        shift (`float`, defaults to 1.0):
            The shift value for the timestep schedule.
    """
@@ -110,7 +107,7 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
    def scale_noise(
        self,
        sample: torch.FloatTensor,
-        timestep: torch.FloatTensor,
+        timestep: Union[float, torch.FloatTensor],
        noise: torch.FloatTensor,
    ) -> torch.FloatTensor:
        """
@@ -119,7 +116,7 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
        Args:
            sample (`torch.FloatTensor`):
                The input sample.
-            timestep (`torch.FloatTensor`):
+            timestep (`float` or `torch.FloatTensor`):
                The current timestep in the diffusion chain.
            noise (`torch.FloatTensor`):
                The noise tensor.
@@ -137,10 +134,14 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):

        return sample

-    def _sigma_to_t(self, sigma):
+    def _sigma_to_t(self, sigma: float) -> float:
        return sigma * self.config.num_train_timesteps

-    def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
+    def set_timesteps(
+        self,
+        num_inference_steps: int,
+        device: Union[str, torch.device] = None,
+    ) -> None:
        """
        Sets the discrete timesteps used for the diffusion chain (to be run before inference).

@@ -153,7 +154,9 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
        self.num_inference_steps = num_inference_steps

        timesteps = np.linspace(
-            self._sigma_to_t(self.sigma_max), self._sigma_to_t(self.sigma_min), num_inference_steps
+            self._sigma_to_t(self.sigma_max),
+            self._sigma_to_t(self.sigma_min),
+            num_inference_steps,
        )

        sigmas = timesteps / self.config.num_train_timesteps
@@ -174,7 +177,24 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
        self._step_index = None
        self._begin_index = None

-    def index_for_timestep(self, timestep, schedule_timesteps=None):
+    def index_for_timestep(
+        self,
+        timestep: Union[float, torch.FloatTensor],
+        schedule_timesteps: Optional[torch.FloatTensor] = None,
+    ) -> int:
+        """
+        Find the index of a given timestep in the timestep schedule.
+
+        Args:
+            timestep (`float` or `torch.FloatTensor`):
+                The timestep value to find in the schedule.
+            schedule_timesteps (`torch.FloatTensor`, *optional*):
+                The timestep schedule to search in. If `None`, uses `self.timesteps`.
+
+        Returns:
+            `int`:
+                The index of the timestep in the schedule.
+        """
        if schedule_timesteps is None:
            schedule_timesteps = self.timesteps

@@ -188,7 +208,7 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):

        return indices[pos].item()

-    def _init_step_index(self, timestep):
+    def _init_step_index(self, timestep: Union[float, torch.FloatTensor]) -> None:
        if self.begin_index is None:
            if isinstance(timestep, torch.Tensor):
                timestep = timestep.to(self.timesteps.device)
@@ -197,7 +217,10 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
            self._step_index = self._begin_index

    @property
-    def state_in_first_order(self):
+    def state_in_first_order(self) -> bool:
+        """
+        Returns whether the scheduler is in the first-order state.
+        """
        return self.dt is None

    def step(
@@ -219,13 +242,19 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
        Args:
            model_output (`torch.FloatTensor`):
                The direct output from learned diffusion model.
-            timestep (`float`):
+            timestep (`float` or `torch.FloatTensor`):
                The current discrete timestep in the diffusion chain.
            sample (`torch.FloatTensor`):
                A current instance of a sample created by the diffusion process.
            s_churn (`float`):
-            s_tmin  (`float`):
-            s_tmax  (`float`):
+                Stochasticity parameter that controls the amount of noise added during sampling. Higher values increase
+                randomness.
+            s_tmin (`float`):
+                Minimum timestep threshold for applying stochasticity. Only timesteps above this value will have noise
+                added.
+            s_tmax (`float`):
+                Maximum timestep threshold for applying stochasticity. Only timesteps below this value will have noise
+                added.
            s_noise (`float`, defaults to 1.0):
                Scaling factor for noise added to the sample.
            generator (`torch.Generator`, *optional*):
@@ -274,7 +303,10 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):

        if gamma > 0:
            noise = randn_tensor(
-                model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator
+                model_output.shape,
+                dtype=model_output.dtype,
+                device=model_output.device,
+                generator=generator,
            )
            eps = noise * s_noise
            sample = sample + eps * (sigma_hat**2 - sigma**2) ** 0.5
@@ -320,5 +352,5 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):

        return FlowMatchHeunDiscreteSchedulerOutput(prev_sample=prev_sample)

-    def __len__(self):
+    def __len__(self) -> int:
        return self.config.num_train_timesteps
Author	SHA1	Message	Date
sayakpaul	c458048d09	up	2026-02-13 15:21:29 +05:30
sayakpaul	4527dcfad3	fix ltx2 i2v docstring.	2026-02-13 15:19:59 +05:30
Sayak Paul	76af013a41	fix cosmos transformer typing. (#13134 )	2026-02-13 14:51:19 +05:30
dg845	277e305589	[CI] Fix `setuptools` `pkg_resources` Bug for PR GPU Tests (#13132 ) Try to fix setuptools pkg_resources error for PR GPU test workflow	2026-02-13 10:09:32 +05:30
David El Malih	5f3ea22513	docs: improve docstring scheduling_flow_match_heun_discrete.py (#13130 ) Improve docstring scheduling flow match heun discrete	2026-02-12 14:32:04 -08:00