Merge branch 'main' into 3d-attn-fix

update
2026-02-18 08:48:31 +08:00 · 2024-04-08 11:05:20 +05:30 · 2024-02-07 12:57:30 +00:00 · 2024-02-07 12:50:52 +00:00
2 changed files with 11 additions and 5 deletions
--- a/src/diffusers/models/unets/unet_3d_blocks.py
+++ b/src/diffusers/models/unets/unet_3d_blocks.py
@@ -497,8 +497,8 @@ class CrossAttnDownBlock3D(nn.Module):
            )
            attentions.append(
                Transformer2DModel(
-                    out_channels // num_attention_heads,
                    num_attention_heads,
+                    out_channels // num_attention_heads,
                    in_channels=out_channels,
                    num_layers=1,
                    cross_attention_dim=cross_attention_dim,
@@ -510,8 +510,8 @@ class CrossAttnDownBlock3D(nn.Module):
            )
            temp_attentions.append(
                TransformerTemporalModel(
-                    out_channels // num_attention_heads,
                    num_attention_heads,
+                    out_channels // num_attention_heads,
                    in_channels=out_channels,
                    num_layers=1,
                    cross_attention_dim=cross_attention_dim,
@@ -731,8 +731,8 @@ class CrossAttnUpBlock3D(nn.Module):
            )
            attentions.append(
                Transformer2DModel(
-                    out_channels // num_attention_heads,
                    num_attention_heads,
+                    out_channels // num_attention_heads,
                    in_channels=out_channels,
                    num_layers=1,
                    cross_attention_dim=cross_attention_dim,
@@ -744,8 +744,8 @@ class CrossAttnUpBlock3D(nn.Module):
            )
            temp_attentions.append(
                TransformerTemporalModel(
-                    out_channels // num_attention_heads,
                    num_attention_heads,
+                    out_channels // num_attention_heads,
                    in_channels=out_channels,
                    num_layers=1,
                    cross_attention_dim=cross_attention_dim,
--- a/src/diffusers/models/unets/unet_3d_condition.py
+++ b/src/diffusers/models/unets/unet_3d_condition.py
@@ -136,13 +136,19 @@ class UNet3DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
                "At the moment it is not possible to define the number of attention heads via `num_attention_heads` because of a naming issue as described in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131. Passing `num_attention_heads` will only be supported in diffusers v0.19."
            )

+        if isinstance(attention_head_dim, int):
+            num_attention_heads = [out_channels // attention_head_dim for out_channels in block_out_channels]
+        else:
+            num_attention_heads = [
+                out_channels // attn_dim for out_channels, attn_dim in zip(block_out_channels, attention_head_dim)
+            ]
+
        # If `num_attention_heads` is not defined (which is the case for most models)
        # it will default to `attention_head_dim`. This looks weird upon first reading it and it is.
        # The reason for this behavior is to correct for incorrectly named variables that were introduced
        # when this library was created. The incorrect naming was only discovered much later in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131
        # Changing `attention_head_dim` to `num_attention_heads` for 40,000+ configurations is too backwards breaking
        # which is why we correct for the naming here.
-        num_attention_heads = num_attention_heads or attention_head_dim

        # Check inputs
        if len(down_block_types) != len(up_block_types):
Author	SHA1	Message	Date
Dhruv Nair	d2bb8cf24e	Merge branch 'main' into 3d-attn-fix	2024-04-08 11:05:20 +05:30
Dhruv Nair	23592335d4	update	2024-02-07 12:57:30 +00:00
Dhruv Nair	43dceeea40	update	2024-02-07 12:50:52 +00:00