Compare commits

...

3 Commits

Author SHA1 Message Date
Dhruv Nair
d2bb8cf24e Merge branch 'main' into 3d-attn-fix 2024-04-08 11:05:20 +05:30
Dhruv Nair
23592335d4 update 2024-02-07 12:57:30 +00:00
Dhruv Nair
43dceeea40 update 2024-02-07 12:50:52 +00:00
2 changed files with 11 additions and 5 deletions

View File

@@ -497,8 +497,8 @@ class CrossAttnDownBlock3D(nn.Module):
)
attentions.append(
Transformer2DModel(
out_channels // num_attention_heads,
num_attention_heads,
out_channels // num_attention_heads,
in_channels=out_channels,
num_layers=1,
cross_attention_dim=cross_attention_dim,
@@ -510,8 +510,8 @@ class CrossAttnDownBlock3D(nn.Module):
)
temp_attentions.append(
TransformerTemporalModel(
out_channels // num_attention_heads,
num_attention_heads,
out_channels // num_attention_heads,
in_channels=out_channels,
num_layers=1,
cross_attention_dim=cross_attention_dim,
@@ -731,8 +731,8 @@ class CrossAttnUpBlock3D(nn.Module):
)
attentions.append(
Transformer2DModel(
out_channels // num_attention_heads,
num_attention_heads,
out_channels // num_attention_heads,
in_channels=out_channels,
num_layers=1,
cross_attention_dim=cross_attention_dim,
@@ -744,8 +744,8 @@ class CrossAttnUpBlock3D(nn.Module):
)
temp_attentions.append(
TransformerTemporalModel(
out_channels // num_attention_heads,
num_attention_heads,
out_channels // num_attention_heads,
in_channels=out_channels,
num_layers=1,
cross_attention_dim=cross_attention_dim,

View File

@@ -136,13 +136,19 @@ class UNet3DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
"At the moment it is not possible to define the number of attention heads via `num_attention_heads` because of a naming issue as described in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131. Passing `num_attention_heads` will only be supported in diffusers v0.19."
)
if isinstance(attention_head_dim, int):
num_attention_heads = [out_channels // attention_head_dim for out_channels in block_out_channels]
else:
num_attention_heads = [
out_channels // attn_dim for out_channels, attn_dim in zip(block_out_channels, attention_head_dim)
]
# If `num_attention_heads` is not defined (which is the case for most models)
# it will default to `attention_head_dim`. This looks weird upon first reading it and it is.
# The reason for this behavior is to correct for incorrectly named variables that were introduced
# when this library was created. The incorrect naming was only discovered much later in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131
# Changing `attention_head_dim` to `num_attention_heads` for 40,000+ configurations is too backwards breaking
# which is why we correct for the naming here.
num_attention_heads = num_attention_heads or attention_head_dim
# Check inputs
if len(down_block_types) != len(up_block_types):