mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-10 22:44:38 +08:00
Compare commits
3 Commits
update-rel
...
3d-attn-fi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d2bb8cf24e | ||
|
|
23592335d4 | ||
|
|
43dceeea40 |
@@ -497,8 +497,8 @@ class CrossAttnDownBlock3D(nn.Module):
|
|||||||
)
|
)
|
||||||
attentions.append(
|
attentions.append(
|
||||||
Transformer2DModel(
|
Transformer2DModel(
|
||||||
out_channels // num_attention_heads,
|
|
||||||
num_attention_heads,
|
num_attention_heads,
|
||||||
|
out_channels // num_attention_heads,
|
||||||
in_channels=out_channels,
|
in_channels=out_channels,
|
||||||
num_layers=1,
|
num_layers=1,
|
||||||
cross_attention_dim=cross_attention_dim,
|
cross_attention_dim=cross_attention_dim,
|
||||||
@@ -510,8 +510,8 @@ class CrossAttnDownBlock3D(nn.Module):
|
|||||||
)
|
)
|
||||||
temp_attentions.append(
|
temp_attentions.append(
|
||||||
TransformerTemporalModel(
|
TransformerTemporalModel(
|
||||||
out_channels // num_attention_heads,
|
|
||||||
num_attention_heads,
|
num_attention_heads,
|
||||||
|
out_channels // num_attention_heads,
|
||||||
in_channels=out_channels,
|
in_channels=out_channels,
|
||||||
num_layers=1,
|
num_layers=1,
|
||||||
cross_attention_dim=cross_attention_dim,
|
cross_attention_dim=cross_attention_dim,
|
||||||
@@ -731,8 +731,8 @@ class CrossAttnUpBlock3D(nn.Module):
|
|||||||
)
|
)
|
||||||
attentions.append(
|
attentions.append(
|
||||||
Transformer2DModel(
|
Transformer2DModel(
|
||||||
out_channels // num_attention_heads,
|
|
||||||
num_attention_heads,
|
num_attention_heads,
|
||||||
|
out_channels // num_attention_heads,
|
||||||
in_channels=out_channels,
|
in_channels=out_channels,
|
||||||
num_layers=1,
|
num_layers=1,
|
||||||
cross_attention_dim=cross_attention_dim,
|
cross_attention_dim=cross_attention_dim,
|
||||||
@@ -744,8 +744,8 @@ class CrossAttnUpBlock3D(nn.Module):
|
|||||||
)
|
)
|
||||||
temp_attentions.append(
|
temp_attentions.append(
|
||||||
TransformerTemporalModel(
|
TransformerTemporalModel(
|
||||||
out_channels // num_attention_heads,
|
|
||||||
num_attention_heads,
|
num_attention_heads,
|
||||||
|
out_channels // num_attention_heads,
|
||||||
in_channels=out_channels,
|
in_channels=out_channels,
|
||||||
num_layers=1,
|
num_layers=1,
|
||||||
cross_attention_dim=cross_attention_dim,
|
cross_attention_dim=cross_attention_dim,
|
||||||
|
|||||||
@@ -136,13 +136,19 @@ class UNet3DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
|
|||||||
"At the moment it is not possible to define the number of attention heads via `num_attention_heads` because of a naming issue as described in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131. Passing `num_attention_heads` will only be supported in diffusers v0.19."
|
"At the moment it is not possible to define the number of attention heads via `num_attention_heads` because of a naming issue as described in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131. Passing `num_attention_heads` will only be supported in diffusers v0.19."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if isinstance(attention_head_dim, int):
|
||||||
|
num_attention_heads = [out_channels // attention_head_dim for out_channels in block_out_channels]
|
||||||
|
else:
|
||||||
|
num_attention_heads = [
|
||||||
|
out_channels // attn_dim for out_channels, attn_dim in zip(block_out_channels, attention_head_dim)
|
||||||
|
]
|
||||||
|
|
||||||
# If `num_attention_heads` is not defined (which is the case for most models)
|
# If `num_attention_heads` is not defined (which is the case for most models)
|
||||||
# it will default to `attention_head_dim`. This looks weird upon first reading it and it is.
|
# it will default to `attention_head_dim`. This looks weird upon first reading it and it is.
|
||||||
# The reason for this behavior is to correct for incorrectly named variables that were introduced
|
# The reason for this behavior is to correct for incorrectly named variables that were introduced
|
||||||
# when this library was created. The incorrect naming was only discovered much later in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131
|
# when this library was created. The incorrect naming was only discovered much later in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131
|
||||||
# Changing `attention_head_dim` to `num_attention_heads` for 40,000+ configurations is too backwards breaking
|
# Changing `attention_head_dim` to `num_attention_heads` for 40,000+ configurations is too backwards breaking
|
||||||
# which is why we correct for the naming here.
|
# which is why we correct for the naming here.
|
||||||
num_attention_heads = num_attention_heads or attention_head_dim
|
|
||||||
|
|
||||||
# Check inputs
|
# Check inputs
|
||||||
if len(down_block_types) != len(up_block_types):
|
if len(down_block_types) != len(up_block_types):
|
||||||
|
|||||||
Reference in New Issue
Block a user