Revert setuptools pin as k-diffusion pipelines are now deprecated

Merge branch 'main' into ci-pin-setuptools-pkg-resources
Pin setuptools version for dependencies which explicitly depend on pkg_resources
2026-02-18 08:48:31 +08:00 · 2026-02-17 01:31:05 +01:00 · 2026-02-17 01:29:31 +01:00 · 2026-02-16 02:44:09 +01:00
4 changed files with 4 additions and 42 deletions
--- a/.github/workflows/pr_tests_gpu.yml
+++ b/.github/workflows/pr_tests_gpu.yml
@@ -199,11 +199,6 @@ jobs:

    - name: Install dependencies
      run: |
-        # Install pkgs which depend on setuptools<81 for pkg_resources first with no build isolation
-        uv pip install pip==25.2 setuptools==80.10.2
-        uv pip install --no-build-isolation k-diffusion==0.0.12
-        uv pip install --upgrade pip setuptools
-        # Install the rest as normal
        uv pip install -e ".[quality]"
        uv pip install peft@git+https://github.com/huggingface/peft.git
        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
--- a/.github/workflows/push_tests.yml
+++ b/.github/workflows/push_tests.yml
@@ -126,11 +126,6 @@ jobs:

    - name: Install dependencies
      run: |
-        # Install pkgs which depend on setuptools<81 for pkg_resources first with no build isolation
-        uv pip install pip==25.2 setuptools==80.10.2
-        uv pip install --no-build-isolation k-diffusion==0.0.12
-        uv pip install --upgrade pip setuptools
-        # Install the rest as normal
        uv pip install -e ".[quality]"
        uv pip install peft@git+https://github.com/huggingface/peft.git
        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
--- a/src/diffusers/models/attention_dispatch.py
+++ b/src/diffusers/models/attention_dispatch.py
@@ -1117,26 +1117,6 @@ def _sage_attention_backward_op(
    raise NotImplementedError("Backward pass is not implemented for Sage attention.")


-def _maybe_modify_attn_mask_npu(query: torch.Tensor, key: torch.Tensor, attn_mask: torch.Tensor | None = None):
-    # Skip Attention Mask if all values are 1, `None` mask can speedup the computation
-    if attn_mask is not None and torch.all(attn_mask != 0):
-        attn_mask = None
-
-    # Reshape Attention Mask: [batch_size, seq_len_k] -> [batch_size, 1, sqe_len_q, seq_len_k]
-    # https://www.hiascend.com/document/detail/zh/Pytorch/730/apiref/torchnpuCustomsapi/docs/context/torch_npu-npu_fusion_attention.md
-    if (
-        attn_mask is not None
-        and attn_mask.ndim == 2
-        and attn_mask.shape[0] == query.shape[0]
-        and attn_mask.shape[1] == key.shape[1]
-    ):
-        B, Sq, Skv = attn_mask.shape[0], query.shape[1], key.shape[1]
-        attn_mask = ~attn_mask.to(torch.bool)
-        attn_mask = attn_mask.unsqueeze(1).expand(B, Sq, Skv).unsqueeze(1).contiguous()
-
-    return attn_mask
-
-
 def _npu_attention_forward_op(
    ctx: torch.autograd.function.FunctionCtx,
    query: torch.Tensor,
@@ -1154,14 +1134,11 @@ def _npu_attention_forward_op(
    if return_lse:
        raise ValueError("NPU attention backend does not support setting `return_lse=True`.")

-    attn_mask = _maybe_modify_attn_mask_npu(query, key, attn_mask)
-
    out = npu_fusion_attention(
        query,
        key,
        value,
        query.size(2),  # num_heads
-        atten_mask=attn_mask,
        input_layout="BSND",
        pse=None,
        scale=1.0 / math.sqrt(query.shape[-1]) if scale is None else scale,
@@ -2691,17 +2668,16 @@ def _native_npu_attention(
    return_lse: bool = False,
    _parallel_config: "ParallelConfig" | None = None,
 ) -> torch.Tensor:
+    if attn_mask is not None:
+        raise ValueError("`attn_mask` is not supported for NPU attention")
    if return_lse:
        raise ValueError("NPU attention backend does not support setting `return_lse=True`.")
    if _parallel_config is None:
-        attn_mask = _maybe_modify_attn_mask_npu(query, key, attn_mask)
-
        out = npu_fusion_attention(
            query,
            key,
            value,
            query.size(2),  # num_heads
-            atten_mask=attn_mask,
            input_layout="BSND",
            pse=None,
            scale=1.0 / math.sqrt(query.shape[-1]) if scale is None else scale,
@@ -2716,7 +2692,7 @@ def _native_npu_attention(
            query,
            key,
            value,
-            attn_mask,
+            None,
            dropout_p,
            None,
            scale,
--- a/src/diffusers/models/transformers/transformer_qwenimage.py
+++ b/src/diffusers/models/transformers/transformer_qwenimage.py
@@ -164,11 +164,7 @@ def compute_text_seq_len_from_mask(
    position_ids = torch.arange(text_seq_len, device=encoder_hidden_states.device, dtype=torch.long)
    active_positions = torch.where(encoder_hidden_states_mask, position_ids, position_ids.new_zeros(()))
    has_active = encoder_hidden_states_mask.any(dim=1)
-    per_sample_len = torch.where(
-        has_active,
-        active_positions.max(dim=1).values + 1,
-        torch.as_tensor(text_seq_len, device=encoder_hidden_states.device),
-    )
+    per_sample_len = torch.where(has_active, active_positions.max(dim=1).values + 1, torch.as_tensor(text_seq_len))
    return text_seq_len, per_sample_len, encoder_hidden_states_mask
Author	SHA1	Message	Date
Daniel Gu	a99d09af25	Revert setuptools pin as k-diffusion pipelines are now deprecated	2026-02-17 01:31:05 +01:00
Daniel Gu	9a07baf457	Merge branch 'main' into ci-pin-setuptools-pkg-resources	2026-02-17 01:29:31 +01:00
Daniel Gu	dd7f8f5fa0	Pin setuptools version for dependencies which explicitly depend on pkg_resources	2026-02-16 02:44:09 +01:00