Compare commits

...

4 Commits

Author SHA1 Message Date
Luc Georges
35c4ccb166 feat: bump safetensors to 0.8.0-rc.0 2026-04-14 19:26:28 +02:00
Sayak Paul
f65f135f64 [tests] xfail clip related issues. (#13454)
xfail clip related issues./
2026-04-14 22:33:25 +05:30
Sayak Paul
273b445426 [tests] fix deprecated attention processor testing. (#13469)
fix deprecated attention processor testing.
2026-04-14 22:05:39 +05:30
Alexey Zolotenkov
526498d219 Fix Qwen Image DreamBooth prior-preservation batch ordering (#13441)
Fix Qwen Image DreamBooth prior-preservation batching

Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
2026-04-14 18:00:37 +05:30
4 changed files with 29 additions and 7 deletions

View File

@@ -1533,9 +1533,9 @@ def main(args):
# from the cat above, but collate_fn also doubles the prompts list. Use half the
# prompts count to avoid a 2x over-repeat that produces more embeddings than latents.
num_repeat_elements = len(prompts) // 2 if args.with_prior_preservation else len(prompts)
prompt_embeds = prompt_embeds.repeat(num_repeat_elements, 1, 1)
prompt_embeds = prompt_embeds.repeat_interleave(num_repeat_elements, dim=0)
if prompt_embeds_mask is not None:
prompt_embeds_mask = prompt_embeds_mask.repeat(num_repeat_elements, 1)
prompt_embeds_mask = prompt_embeds_mask.repeat_interleave(num_repeat_elements, dim=0)
# Convert images to latent space
if args.cache_latents:
model_input = latents_cache[step].sample()
@@ -1602,10 +1602,11 @@ def main(args):
# Chunk the noise and model_pred into two parts and compute the loss on each part separately.
model_pred, model_pred_prior = torch.chunk(model_pred, 2, dim=0)
target, target_prior = torch.chunk(target, 2, dim=0)
weighting, weighting_prior = torch.chunk(weighting, 2, dim=0)
# Compute prior loss
prior_loss = torch.mean(
(weighting.float() * (model_pred_prior.float() - target_prior.float()) ** 2).reshape(
(weighting_prior.float() * (model_pred_prior.float() - target_prior.float()) ** 2).reshape(
target_prior.shape[0], -1
),
1,

View File

@@ -124,7 +124,7 @@ _deps = [
"pytest-xdist",
"python>=3.10.0",
"ruff==0.9.10",
"safetensors>=0.3.1",
"safetensors>=0.8.0-rc.0",
"sentencepiece>=0.1.91,!=0.1.92",
"GitPython<3.1.19",
"scipy",

View File

@@ -1,9 +1,11 @@
import importlib.metadata
import tempfile
import unittest
import numpy as np
import pytest
import torch
from packaging import version
from diffusers import DiffusionPipeline
from diffusers.models.attention_processor import Attention, AttnAddedKVProcessor
@@ -87,9 +89,10 @@ class DeprecatedAttentionBlockTests(unittest.TestCase):
return pytestconfig.getoption("dist") == "loadfile"
@pytest.mark.xfail(
condition=torch.device(torch_device).type == "cuda" and is_dist_enabled,
reason="Test currently fails on our GPU CI because of `loadfile`. Note that it only fails when the tests are distributed from `pytest ... tests/models`. If the tests are run individually, even with `loadfile` it won't fail.",
strict=True,
condition=(torch.device(torch_device).type == "cuda" and is_dist_enabled)
or version.parse(importlib.metadata.version("transformers")).is_devrelease,
reason="Test currently fails on our GPU CI because of `loadfile` or with source installation of transformers due to CLIPTextModel key prefix changes.",
strict=False,
)
def test_conversion_when_using_device_map(self):
pipe = DiffusionPipeline.from_pretrained(

View File

@@ -368,6 +368,12 @@ class DownloadTests(unittest.TestCase):
assert any((f.endswith(".onnx")) for f in files)
assert any((f.endswith(".pb")) for f in files)
@pytest.mark.xfail(
condition=is_transformers_version(">", "4.56.2"),
reason="CLIPTextModel architecture was flattened in transformers>4.56.2 without backward-compat key mapping. "
"See https://github.com/huggingface/transformers/issues/45390",
strict=False,
)
def test_download_no_safety_checker(self):
prompt = "hello"
pipe = StableDiffusionPipeline.from_pretrained(
@@ -423,6 +429,12 @@ class DownloadTests(unittest.TestCase):
assert np.max(np.abs(out - out_2)) < 1e-3
@pytest.mark.xfail(
condition=is_transformers_version(">", "4.56.2"),
reason="CLIPTextModel architecture was flattened in transformers>4.56.2 without backward-compat key mapping. "
"See https://github.com/huggingface/transformers/issues/45390",
strict=False,
)
def test_cached_files_are_used_when_no_internet(self):
# A mock response for an HTTP head request to emulate server down
response_mock = mock.Mock()
@@ -450,6 +462,12 @@ class DownloadTests(unittest.TestCase):
if p1.data.ne(p2.data).sum() > 0:
assert False, "Parameters not the same!"
@pytest.mark.xfail(
condition=is_transformers_version(">", "4.56.2"),
reason="CLIPTextModel architecture was flattened in transformers>4.56.2 without backward-compat key mapping. "
"See https://github.com/huggingface/transformers/issues/45390",
strict=False,
)
def test_local_files_only_are_used_when_no_internet(self):
# A mock response for an HTTP head request to emulate server down
response_mock = mock.Mock()