Compare commits

...

9 Commits

Author SHA1 Message Date
DN6
ad3a3afc3a Release: v0.37.1 2026-03-25 12:45:04 +05:30
Sayak Paul
944c29bb94 fix to device and to dtype tests. (#13323) 2026-03-25 12:44:41 +05:30
Sayak Paul
70ab1808bd fix klein lora loading. (#13313) 2026-03-25 12:44:30 +05:30
Dhruv Nair
5cd2fa76d0 [CI] Update fetching pipelines for latest HF Hub Version (#13322)
update
2026-03-25 12:43:39 +05:30
Dhruv Nair
d64e5e439d Fix unguarded torchvision import in Cosmos (#13321)
update
2026-03-25 12:43:27 +05:30
Sayak Paul
e8ecef1212 [tests] fix audioldm2 tests. (#13293)
fix audioldm2 tests.
2026-03-25 12:43:13 +05:30
Dhruv Nair
35551981bc [CI] Update transformer version in release tests (#13296)
update
2026-03-25 12:42:50 +05:30
Dhruv Nair
227d3d9a52 [Modular] Test for catching dtype and device issues with AutoModel type hints (#13287)
* update

* update

* update
2026-03-25 12:42:37 +05:30
Dhruv Nair
1bf34343e8 [Modular] Fix dtype assignment when type hint is AutoModel (#13271)
* update

* update
2026-03-25 12:42:15 +05:30
11 changed files with 347 additions and 31 deletions

View File

@@ -4,6 +4,7 @@
name: (Release) Fast GPU Tests on main
on:
workflow_dispatch:
push:
branches:
- "v*.*.*-release"
@@ -33,6 +34,7 @@ jobs:
- name: Install dependencies
run: |
uv pip install -e ".[quality]"
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
- name: Environment
run: |
python utils/print_env.py
@@ -74,6 +76,7 @@ jobs:
run: |
uv pip install -e ".[quality]"
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
- name: Environment
run: |
python utils/print_env.py
@@ -125,6 +128,7 @@ jobs:
uv pip install -e ".[quality]"
uv pip install peft@git+https://github.com/huggingface/peft.git
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
- name: Environment
run: |
@@ -175,6 +179,7 @@ jobs:
uv pip install -e ".[quality]"
uv pip install peft@git+https://github.com/huggingface/peft.git
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
- name: Environment
run: |
@@ -232,6 +237,7 @@ jobs:
- name: Install dependencies
run: |
uv pip install -e ".[quality,training]"
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
- name: Environment
run: |
python utils/print_env.py
@@ -274,6 +280,7 @@ jobs:
- name: Install dependencies
run: |
uv pip install -e ".[quality,training]"
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
- name: Environment
run: |
python utils/print_env.py
@@ -316,6 +323,7 @@ jobs:
- name: Install dependencies
run: |
uv pip install -e ".[quality,training]"
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
- name: Environment
run: |

View File

@@ -276,7 +276,7 @@ version_range_max = max(sys.version_info[1], 10) + 1
setup(
name="diffusers",
version="0.37.0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
version="0.37.1", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
description="State-of-the-art diffusion in PyTorch and JAX.",
long_description=open("README.md", "r", encoding="utf-8").read(),
long_description_content_type="text/markdown",

View File

@@ -1,4 +1,4 @@
__version__ = "0.37.0"
__version__ = "0.37.1"
from typing import TYPE_CHECKING

View File

@@ -2440,6 +2440,191 @@ def _convert_non_diffusers_flux2_lora_to_diffusers(state_dict):
return converted_state_dict
def _convert_kohya_flux2_lora_to_diffusers(state_dict):
def _convert_to_ai_toolkit(sds_sd, ait_sd, sds_key, ait_key):
if sds_key + ".lora_down.weight" not in sds_sd:
return
down_weight = sds_sd.pop(sds_key + ".lora_down.weight")
# scale weight by alpha and dim
rank = down_weight.shape[0]
default_alpha = torch.tensor(rank, dtype=down_weight.dtype, device=down_weight.device, requires_grad=False)
alpha = sds_sd.pop(sds_key + ".alpha", default_alpha).item()
scale = alpha / rank
scale_down = scale
scale_up = 1.0
while scale_down * 2 < scale_up:
scale_down *= 2
scale_up /= 2
ait_sd[ait_key + ".lora_A.weight"] = down_weight * scale_down
ait_sd[ait_key + ".lora_B.weight"] = sds_sd.pop(sds_key + ".lora_up.weight") * scale_up
def _convert_to_ai_toolkit_cat(sds_sd, ait_sd, sds_key, ait_keys, dims=None):
if sds_key + ".lora_down.weight" not in sds_sd:
return
down_weight = sds_sd.pop(sds_key + ".lora_down.weight")
up_weight = sds_sd.pop(sds_key + ".lora_up.weight")
sd_lora_rank = down_weight.shape[0]
default_alpha = torch.tensor(
sd_lora_rank, dtype=down_weight.dtype, device=down_weight.device, requires_grad=False
)
alpha = sds_sd.pop(sds_key + ".alpha", default_alpha)
scale = alpha / sd_lora_rank
scale_down = scale
scale_up = 1.0
while scale_down * 2 < scale_up:
scale_down *= 2
scale_up /= 2
down_weight = down_weight * scale_down
up_weight = up_weight * scale_up
num_splits = len(ait_keys)
if dims is None:
dims = [up_weight.shape[0] // num_splits] * num_splits
else:
assert sum(dims) == up_weight.shape[0]
# check if upweight is sparse
is_sparse = False
if sd_lora_rank % num_splits == 0:
ait_rank = sd_lora_rank // num_splits
is_sparse = True
i = 0
for j in range(len(dims)):
for k in range(len(dims)):
if j == k:
continue
is_sparse = is_sparse and torch.all(
up_weight[i : i + dims[j], k * ait_rank : (k + 1) * ait_rank] == 0
)
i += dims[j]
if is_sparse:
logger.info(f"weight is sparse: {sds_key}")
ait_down_keys = [k + ".lora_A.weight" for k in ait_keys]
ait_up_keys = [k + ".lora_B.weight" for k in ait_keys]
if not is_sparse:
ait_sd.update(dict.fromkeys(ait_down_keys, down_weight))
ait_sd.update({k: v for k, v in zip(ait_up_keys, torch.split(up_weight, dims, dim=0))}) # noqa: C416
else:
ait_sd.update({k: v for k, v in zip(ait_down_keys, torch.chunk(down_weight, num_splits, dim=0))}) # noqa: C416
i = 0
for j in range(len(dims)):
ait_sd[ait_up_keys[j]] = up_weight[i : i + dims[j], j * ait_rank : (j + 1) * ait_rank].contiguous()
i += dims[j]
# Detect number of blocks from keys
num_double_layers = 0
num_single_layers = 0
for key in state_dict.keys():
if key.startswith("lora_unet_double_blocks_"):
block_idx = int(key.split("_")[4])
num_double_layers = max(num_double_layers, block_idx + 1)
elif key.startswith("lora_unet_single_blocks_"):
block_idx = int(key.split("_")[4])
num_single_layers = max(num_single_layers, block_idx + 1)
ait_sd = {}
for i in range(num_double_layers):
# Attention projections
_convert_to_ai_toolkit(
state_dict,
ait_sd,
f"lora_unet_double_blocks_{i}_img_attn_proj",
f"transformer.transformer_blocks.{i}.attn.to_out.0",
)
_convert_to_ai_toolkit_cat(
state_dict,
ait_sd,
f"lora_unet_double_blocks_{i}_img_attn_qkv",
[
f"transformer.transformer_blocks.{i}.attn.to_q",
f"transformer.transformer_blocks.{i}.attn.to_k",
f"transformer.transformer_blocks.{i}.attn.to_v",
],
)
_convert_to_ai_toolkit(
state_dict,
ait_sd,
f"lora_unet_double_blocks_{i}_txt_attn_proj",
f"transformer.transformer_blocks.{i}.attn.to_add_out",
)
_convert_to_ai_toolkit_cat(
state_dict,
ait_sd,
f"lora_unet_double_blocks_{i}_txt_attn_qkv",
[
f"transformer.transformer_blocks.{i}.attn.add_q_proj",
f"transformer.transformer_blocks.{i}.attn.add_k_proj",
f"transformer.transformer_blocks.{i}.attn.add_v_proj",
],
)
# MLP layers (Flux2 uses ff.linear_in/linear_out)
_convert_to_ai_toolkit(
state_dict,
ait_sd,
f"lora_unet_double_blocks_{i}_img_mlp_0",
f"transformer.transformer_blocks.{i}.ff.linear_in",
)
_convert_to_ai_toolkit(
state_dict,
ait_sd,
f"lora_unet_double_blocks_{i}_img_mlp_2",
f"transformer.transformer_blocks.{i}.ff.linear_out",
)
_convert_to_ai_toolkit(
state_dict,
ait_sd,
f"lora_unet_double_blocks_{i}_txt_mlp_0",
f"transformer.transformer_blocks.{i}.ff_context.linear_in",
)
_convert_to_ai_toolkit(
state_dict,
ait_sd,
f"lora_unet_double_blocks_{i}_txt_mlp_2",
f"transformer.transformer_blocks.{i}.ff_context.linear_out",
)
for i in range(num_single_layers):
# Single blocks: linear1 -> attn.to_qkv_mlp_proj (fused, no split needed)
_convert_to_ai_toolkit(
state_dict,
ait_sd,
f"lora_unet_single_blocks_{i}_linear1",
f"transformer.single_transformer_blocks.{i}.attn.to_qkv_mlp_proj",
)
# Single blocks: linear2 -> attn.to_out
_convert_to_ai_toolkit(
state_dict,
ait_sd,
f"lora_unet_single_blocks_{i}_linear2",
f"transformer.single_transformer_blocks.{i}.attn.to_out",
)
# Handle optional extra keys
extra_mappings = {
"lora_unet_img_in": "transformer.x_embedder",
"lora_unet_txt_in": "transformer.context_embedder",
"lora_unet_time_in_in_layer": "transformer.time_guidance_embed.timestep_embedder.linear_1",
"lora_unet_time_in_out_layer": "transformer.time_guidance_embed.timestep_embedder.linear_2",
"lora_unet_final_layer_linear": "transformer.proj_out",
}
for sds_key, ait_key in extra_mappings.items():
_convert_to_ai_toolkit(state_dict, ait_sd, sds_key, ait_key)
remaining_keys = list(state_dict.keys())
if remaining_keys:
logger.warning(f"Unsupported keys for Kohya Flux2 LoRA conversion: {remaining_keys}")
return ait_sd
def _convert_non_diffusers_z_image_lora_to_diffusers(state_dict):
"""
Convert non-diffusers ZImage LoRA state dict to diffusers format.

View File

@@ -43,6 +43,7 @@ from .lora_conversion_utils import (
_convert_bfl_flux_control_lora_to_diffusers,
_convert_fal_kontext_lora_to_diffusers,
_convert_hunyuan_video_lora_to_diffusers,
_convert_kohya_flux2_lora_to_diffusers,
_convert_kohya_flux_lora_to_diffusers,
_convert_musubi_wan_lora_to_diffusers,
_convert_non_diffusers_flux2_lora_to_diffusers,
@@ -5673,6 +5674,13 @@ class Flux2LoraLoaderMixin(LoraBaseMixin):
logger.warning(warn_msg)
state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k}
is_kohya = any(".lora_down.weight" in k for k in state_dict)
if is_kohya:
state_dict = _convert_kohya_flux2_lora_to_diffusers(state_dict)
# Kohya already takes care of scaling the LoRA parameters with alpha.
out = (state_dict, metadata) if return_lora_metadata else state_dict
return out
is_peft_format = any(k.startswith("base_model.model.") for k in state_dict)
if is_peft_format:
state_dict = {k.replace("base_model.model.", "diffusion_model."): v for k, v in state_dict.items()}

View File

@@ -309,16 +309,16 @@ class ComponentSpec:
f"`type_hint` is required when loading a single file model but is missing for component: {self.name}"
)
from diffusers import AutoModel
# `torch_dtype` is not an accepted parameter for tokenizers and processors.
# As a result, it gets stored in `init_kwargs`, which are written to the config
# during save. This causes JSON serialization to fail when saving the component.
if self.type_hint is not None and not issubclass(self.type_hint, torch.nn.Module):
if self.type_hint is not None and not issubclass(self.type_hint, (torch.nn.Module, AutoModel)):
kwargs.pop("torch_dtype", None)
if self.type_hint is None:
try:
from diffusers import AutoModel
component = AutoModel.from_pretrained(pretrained_model_name_or_path, **load_kwargs, **kwargs)
except Exception as e:
raise ValueError(f"Unable to load {self.name} without `type_hint`: {e}")
@@ -332,12 +332,6 @@ class ComponentSpec:
else getattr(self.type_hint, "from_pretrained")
)
# `torch_dtype` is not an accepted parameter for tokenizers and processors.
# As a result, it gets stored in `init_kwargs`, which are written to the config
# during save. This causes JSON serialization to fail when saving the component.
if not issubclass(self.type_hint, torch.nn.Module):
kwargs.pop("torch_dtype", None)
try:
component = load_method(pretrained_model_name_or_path, **load_kwargs, **kwargs)
except Exception as e:

View File

@@ -324,17 +324,18 @@ class AudioLDM2Pipeline(DiffusionPipeline):
`inputs_embeds (`torch.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
The sequence of generated hidden-states.
"""
cache_position_kwargs = {}
if is_transformers_version("<", "4.52.1"):
cache_position_kwargs["input_ids"] = inputs_embeds
else:
cache_position_kwargs["seq_length"] = inputs_embeds.shape[0]
cache_position_kwargs["device"] = (
self.language_model.device if getattr(self, "language_model", None) is not None else self.device
)
cache_position_kwargs["model_kwargs"] = model_kwargs
max_new_tokens = max_new_tokens if max_new_tokens is not None else self.language_model.config.max_new_tokens
model_kwargs = self.language_model._get_initial_cache_position(**cache_position_kwargs)
if hasattr(self.language_model, "_get_initial_cache_position"):
cache_position_kwargs = {}
if is_transformers_version("<", "4.52.1"):
cache_position_kwargs["input_ids"] = inputs_embeds
else:
cache_position_kwargs["seq_length"] = inputs_embeds.shape[0]
cache_position_kwargs["device"] = (
self.language_model.device if getattr(self, "language_model", None) is not None else self.device
)
cache_position_kwargs["model_kwargs"] = model_kwargs
model_kwargs = self.language_model._get_initial_cache_position(**cache_position_kwargs)
for _ in range(max_new_tokens):
# prepare model inputs

View File

@@ -16,22 +16,29 @@ from typing import Callable
import numpy as np
import torch
import torchvision
import torchvision.transforms
import torchvision.transforms.functional
from transformers import AutoTokenizer, Qwen2_5_VLForConditionalGeneration
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
from ...image_processor import PipelineImageInput
from ...models import AutoencoderKLWan, CosmosTransformer3DModel
from ...schedulers import UniPCMultistepScheduler
from ...utils import is_cosmos_guardrail_available, is_torch_xla_available, logging, replace_example_docstring
from ...utils import (
is_cosmos_guardrail_available,
is_torch_xla_available,
is_torchvision_available,
logging,
replace_example_docstring,
)
from ...utils.torch_utils import randn_tensor
from ...video_processor import VideoProcessor
from ..pipeline_utils import DiffusionPipeline
from .pipeline_output import CosmosPipelineOutput
if is_torchvision_available():
import torchvision.transforms.functional
if is_cosmos_guardrail_available():
from cosmos_guardrail import CosmosSafetyChecker
else:

View File

@@ -31,7 +31,41 @@ from diffusers.modular_pipelines import (
WanModularPipeline,
)
from ..testing_utils import nightly, require_torch, slow
from ..testing_utils import nightly, require_torch, require_torch_accelerator, slow, torch_device
def _create_tiny_model_dir(model_dir):
TINY_MODEL_CODE = (
"import torch\n"
"from diffusers import ModelMixin, ConfigMixin\n"
"from diffusers.configuration_utils import register_to_config\n"
"\n"
"class TinyModel(ModelMixin, ConfigMixin):\n"
" @register_to_config\n"
" def __init__(self, hidden_size=4):\n"
" super().__init__()\n"
" self.linear = torch.nn.Linear(hidden_size, hidden_size)\n"
"\n"
" def forward(self, x):\n"
" return self.linear(x)\n"
)
with open(os.path.join(model_dir, "modeling.py"), "w") as f:
f.write(TINY_MODEL_CODE)
config = {
"_class_name": "TinyModel",
"_diffusers_version": "0.0.0",
"auto_map": {"AutoModel": "modeling.TinyModel"},
"hidden_size": 4,
}
with open(os.path.join(model_dir, "config.json"), "w") as f:
json.dump(config, f)
torch.save(
{"linear.weight": torch.randn(4, 4), "linear.bias": torch.randn(4)},
os.path.join(model_dir, "diffusion_pytorch_model.bin"),
)
class DummyCustomBlockSimple(ModularPipelineBlocks):
@@ -342,6 +376,81 @@ class TestModularCustomBlocks:
loaded_pipe.update_components(custom_model=custom_model)
assert getattr(loaded_pipe, "custom_model", None) is not None
def test_automodel_type_hint_preserves_torch_dtype(self, tmp_path):
"""Regression test for #13271: torch_dtype was incorrectly removed when type_hint is AutoModel."""
from diffusers import AutoModel
model_dir = str(tmp_path / "model")
os.makedirs(model_dir)
_create_tiny_model_dir(model_dir)
class DtypeTestBlock(ModularPipelineBlocks):
@property
def expected_components(self):
return [ComponentSpec("model", AutoModel, pretrained_model_name_or_path=model_dir)]
@property
def inputs(self) -> List[InputParam]:
return [InputParam("prompt", type_hint=str, required=True)]
@property
def intermediate_inputs(self) -> List[InputParam]:
return []
@property
def intermediate_outputs(self) -> List[OutputParam]:
return [OutputParam("output", type_hint=str)]
def __call__(self, components, state: PipelineState) -> PipelineState:
block_state = self.get_block_state(state)
block_state.output = "test"
self.set_block_state(state, block_state)
return components, state
block = DtypeTestBlock()
pipe = block.init_pipeline()
pipe.load_components(torch_dtype=torch.float16, trust_remote_code=True)
assert pipe.model.dtype == torch.float16
@require_torch_accelerator
def test_automodel_type_hint_preserves_device(self, tmp_path):
"""Test that ComponentSpec with AutoModel type_hint correctly passes device_map."""
from diffusers import AutoModel
model_dir = str(tmp_path / "model")
os.makedirs(model_dir)
_create_tiny_model_dir(model_dir)
class DeviceTestBlock(ModularPipelineBlocks):
@property
def expected_components(self):
return [ComponentSpec("model", AutoModel, pretrained_model_name_or_path=model_dir)]
@property
def inputs(self) -> List[InputParam]:
return [InputParam("prompt", type_hint=str, required=True)]
@property
def intermediate_inputs(self) -> List[InputParam]:
return []
@property
def intermediate_outputs(self) -> List[OutputParam]:
return [OutputParam("output", type_hint=str)]
def __call__(self, components, state: PipelineState) -> PipelineState:
block_state = self.get_block_state(state)
block_state.output = "test"
self.set_block_state(state, block_state)
return components, state
block = DeviceTestBlock()
pipe = block.init_pipeline()
pipe.load_components(device_map=torch_device, trust_remote_code=True)
assert pipe.model.device.type == torch_device
def test_custom_block_loads_from_hub(self):
repo_id = "hf-internal-testing/tiny-modular-diffusers-block"
block = ModularPipelineBlocks.from_pretrained(repo_id, trust_remote_code=True)

View File

@@ -1534,14 +1534,18 @@ class PipelineTesterMixin:
pipe.set_progress_bar_config(disable=None)
pipe.to("cpu")
model_devices = [component.device.type for component in components.values() if hasattr(component, "device")]
model_devices = [
component.device.type for component in components.values() if getattr(component, "device", None)
]
self.assertTrue(all(device == "cpu" for device in model_devices))
output_cpu = pipe(**self.get_dummy_inputs("cpu"))[0]
self.assertTrue(np.isnan(output_cpu).sum() == 0)
pipe.to(torch_device)
model_devices = [component.device.type for component in components.values() if hasattr(component, "device")]
model_devices = [
component.device.type for component in components.values() if getattr(component, "device", None)
]
self.assertTrue(all(device == torch_device for device in model_devices))
output_device = pipe(**self.get_dummy_inputs(torch_device))[0]
@@ -1552,11 +1556,11 @@ class PipelineTesterMixin:
pipe = self.pipeline_class(**components)
pipe.set_progress_bar_config(disable=None)
model_dtypes = [component.dtype for component in components.values() if hasattr(component, "dtype")]
model_dtypes = [component.dtype for component in components.values() if getattr(component, "dtype", None)]
self.assertTrue(all(dtype == torch.float32 for dtype in model_dtypes))
pipe.to(dtype=torch.float16)
model_dtypes = [component.dtype for component in components.values() if hasattr(component, "dtype")]
model_dtypes = [component.dtype for component in components.values() if getattr(component, "dtype", None)]
self.assertTrue(all(dtype == torch.float16 for dtype in model_dtypes))
def test_attention_slicing_forward_pass(self, expected_max_diff=1e-3):

View File

@@ -43,7 +43,7 @@ def filter_pipelines(usage_dict, usage_cutoff=10000):
def fetch_pipeline_objects():
models = api.list_models(library="diffusers")
models = api.list_models(filter="diffusers")
downloads = defaultdict(int)
for model in models: