Compare commits

...

7 Commits

Author SHA1 Message Date
Dhruv Nair
96f97f8214 update 2026-02-24 09:45:26 +01:00
Dhruv Nair
f8b95ff263 Merge branch 'main' into custom-device-map 2026-02-24 09:20:16 +01:00
Sayak Paul
5e94d62eb4 migrate to transformers v5 (#12976)
* switch to transformers main again./

* more

* up

* up

* fix group offloading.

* attributes

* up

* up

* tie embedding issue.

* fix t5 stuff for more.

* matrix configuration to see differences between 4.57.3 and main failures.

* change qwen expected slice because of how init is handled in v5.

* same stuff.

* up

* up

* Revert "up"

This reverts commit 515dd06db5.

* Revert "up"

This reverts commit 5274ffdd7f.

* up

* up

* fix with peft_format.

* just keep main for easier debugging.

* remove torchvision.

* empty

* up

* up with skyreelsv2 fixes.

* fix skyreels type annotation.

* up

* up

* fix variant loading issues.

* more fixes.

* fix dduf

* fix

* fix

* fix

* more fixes

* fixes

* up

* up

* fix dduf test

* up

* more

* update

* hopefully ,final?

* one last breath

* always install from main

* up

* audioldm tests

* up

* fix PRX tests.

* up

* kandinsky fixes

* qwen fixes.

* prx

* hidream
2026-02-24 10:53:56 +05:30
dg845
7ab2011759 Fix AutoModel typing Import Error (#13178)
Fix typing import by converting to Python 3.9+ style type hint
2026-02-24 07:58:43 +05:30
Dhruv Nair
4890e9bf70 Allow Automodel to use from_config with custom code. (#13123)
* update

* update
2026-02-23 21:55:59 +05:30
David Bertoin
f1e5914120 Fix T5GemmaEncoder loading for transformers 5.x composite T5GemmaConfig (#13143) 2026-02-23 15:45:45 +05:30
DN6
cfff46069d add custom mesh support 2026-02-02 13:12:09 +05:30
100 changed files with 766 additions and 255 deletions

View File

@@ -92,7 +92,6 @@ jobs:
runner: aws-general-8-plus
image: diffusers/diffusers-pytorch-cpu
report: torch_example_cpu
name: ${{ matrix.config.name }}
runs-on:
@@ -115,8 +114,7 @@ jobs:
- name: Install dependencies
run: |
uv pip install -e ".[quality]"
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
- name: Environment
@@ -218,8 +216,6 @@ jobs:
run_lora_tests:
needs: [check_code_quality, check_repository_consistency]
strategy:
fail-fast: false
name: LoRA tests with PEFT main
@@ -247,9 +243,8 @@ jobs:
uv pip install -U peft@git+https://github.com/huggingface/peft.git --no-deps
uv pip install -U tokenizers
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
- name: Environment
run: |
python utils/print_env.py
@@ -275,6 +270,6 @@ jobs:
if: ${{ always() }}
uses: actions/upload-artifact@v6
with:
name: pr_main_test_reports
name: pr_lora_test_reports
path: reports

View File

@@ -131,8 +131,7 @@ jobs:
run: |
uv pip install -e ".[quality]"
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
- name: Environment
run: |
@@ -202,8 +201,7 @@ jobs:
uv pip install -e ".[quality]"
uv pip install peft@git+https://github.com/huggingface/peft.git
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
- name: Environment
run: |
@@ -264,8 +262,7 @@ jobs:
nvidia-smi
- name: Install dependencies
run: |
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip install -e ".[quality,training]"
- name: Environment

View File

@@ -76,8 +76,7 @@ jobs:
run: |
uv pip install -e ".[quality]"
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
- name: Environment
run: |
python utils/print_env.py
@@ -129,8 +128,7 @@ jobs:
uv pip install -e ".[quality]"
uv pip install peft@git+https://github.com/huggingface/peft.git
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
- name: Environment
run: |
@@ -182,8 +180,7 @@ jobs:
- name: Install dependencies
run: |
uv pip install -e ".[quality,training]"
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
- name: Environment
run: |
python utils/print_env.py

View File

@@ -17,6 +17,9 @@ import logging
import os
import sys
import tempfile
import unittest
from diffusers.utils import is_transformers_version
sys.path.append("..")
@@ -30,6 +33,7 @@ stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)
@unittest.skipIf(is_transformers_version(">=", "4.57.5"), "Size mismatch")
class CustomDiffusion(ExamplesTestsAccelerate):
def test_custom_diffusion(self):
with tempfile.TemporaryDirectory() as tmpdir:

View File

@@ -48,6 +48,7 @@ _GO_LC_SUPPORTED_PYTORCH_LAYERS = (
torch.nn.ConvTranspose2d,
torch.nn.ConvTranspose3d,
torch.nn.Linear,
torch.nn.Embedding,
# TODO(aryan): look into torch.nn.LayerNorm, torch.nn.GroupNorm later, seems to be causing some issues with CogVideoX
# because of double invocation of the same norm layer in CogVideoXLayerNorm
)

View File

@@ -22,7 +22,12 @@ from tokenizers import Tokenizer as TokenizerFast
from torch import nn
from ..models.modeling_utils import load_state_dict
from ..utils import _get_model_file, is_accelerate_available, is_transformers_available, logging
from ..utils import (
_get_model_file,
is_accelerate_available,
is_transformers_available,
logging,
)
if is_transformers_available():

View File

@@ -60,6 +60,16 @@ class ContextParallelConfig:
rotate_method (`str`, *optional*, defaults to `"allgather"`):
Method to use for rotating key/value states across devices in ring attention. Currently, only `"allgather"`
is supported.
ulysses_anything (`bool`, *optional*, defaults to `False`):
Whether to enable "Ulysses Anything" mode, which supports arbitrary sequence lengths and head counts that
are not evenly divisible by `ulysses_degree`. When enabled, `ulysses_degree` must be greater than 1 and
`ring_degree` must be 1.
mesh (`torch.distributed.device_mesh.DeviceMesh`, *optional*):
A custom device mesh to use for context parallelism. If provided, this mesh will be used instead of
creating a new one. This is useful when combining context parallelism with other parallelism strategies
(e.g., FSDP, tensor parallelism) that share the same device mesh. The mesh must have both "ring" and
"ulysses" dimensions. Use size 1 for dimensions not being used (e.g., `mesh_shape=(2, 1, 4)` with
`mesh_dim_names=("ring", "ulysses", "fsdp")` for ring attention only with FSDP).
"""
@@ -68,6 +78,7 @@ class ContextParallelConfig:
convert_to_fp32: bool = True
# TODO: support alltoall
rotate_method: Literal["allgather", "alltoall"] = "allgather"
mesh: torch.distributed.device_mesh.DeviceMesh | None = None
# Whether to enable ulysses anything attention to support
# any sequence lengths and any head numbers.
ulysses_anything: bool = False
@@ -124,7 +135,7 @@ class ContextParallelConfig:
f"The product of `ring_degree` ({self.ring_degree}) and `ulysses_degree` ({self.ulysses_degree}) must not exceed the world size ({world_size})."
)
self._flattened_mesh = self._mesh._flatten()
self._flattened_mesh = self._mesh["ring", "ulysses"]._flatten()
self._ring_mesh = self._mesh["ring"]
self._ulysses_mesh = self._mesh["ulysses"]
self._ring_local_rank = self._ring_mesh.get_local_rank()

View File

@@ -30,10 +30,126 @@ class AutoModel(ConfigMixin):
def __init__(self, *args, **kwargs):
raise EnvironmentError(
f"{self.__class__.__name__} is designed to be instantiated "
f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)` or "
f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)`, "
f"`{self.__class__.__name__}.from_config(config)`, or "
f"`{self.__class__.__name__}.from_pipe(pipeline)` methods."
)
@classmethod
def from_config(cls, pretrained_model_name_or_path_or_dict: str | os.PathLike | dict | None = None, **kwargs):
r"""
Instantiate a model from a config dictionary or a pretrained model configuration file with random weights (no
pretrained weights are loaded).
Parameters:
pretrained_model_name_or_path_or_dict (`str`, `os.PathLike`, or `dict`):
Can be either:
- A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model
configuration hosted on the Hub.
- A path to a *directory* (for example `./my_model_directory`) containing a model configuration
file.
- A config dictionary.
cache_dir (`Union[str, os.PathLike]`, *optional*):
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
is not used.
force_download (`bool`, *optional*, defaults to `False`):
Whether or not to force the (re-)download of the model configuration, overriding the cached version if
it exists.
proxies (`Dict[str, str]`, *optional*):
A dictionary of proxy servers to use by protocol or endpoint.
local_files_only(`bool`, *optional*, defaults to `False`):
Whether to only load local model configuration files or not.
token (`str` or *bool*, *optional*):
The token to use as HTTP bearer authorization for remote files.
revision (`str`, *optional*, defaults to `"main"`):
The specific model version to use.
trust_remote_code (`bool`, *optional*, defaults to `False`):
Whether to trust remote code.
subfolder (`str`, *optional*, defaults to `""`):
The subfolder location of a model file within a larger model repository on the Hub or locally.
Returns:
A model object instantiated from the config with random weights.
Example:
```py
from diffusers import AutoModel
model = AutoModel.from_config("stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="unet")
```
"""
subfolder = kwargs.pop("subfolder", None)
trust_remote_code = kwargs.pop("trust_remote_code", False)
hub_kwargs_names = [
"cache_dir",
"force_download",
"local_files_only",
"proxies",
"revision",
"token",
]
hub_kwargs = {name: kwargs.pop(name, None) for name in hub_kwargs_names}
if pretrained_model_name_or_path_or_dict is None:
raise ValueError(
"Please provide a `pretrained_model_name_or_path_or_dict` as the first positional argument."
)
if isinstance(pretrained_model_name_or_path_or_dict, (str, os.PathLike)):
pretrained_model_name_or_path = pretrained_model_name_or_path_or_dict
config = cls.load_config(pretrained_model_name_or_path, subfolder=subfolder, **hub_kwargs)
else:
config = pretrained_model_name_or_path_or_dict
pretrained_model_name_or_path = config.get("_name_or_path", None)
has_remote_code = "auto_map" in config and cls.__name__ in config["auto_map"]
trust_remote_code = resolve_trust_remote_code(
trust_remote_code, pretrained_model_name_or_path, has_remote_code
)
if has_remote_code and trust_remote_code:
class_ref = config["auto_map"][cls.__name__]
module_file, class_name = class_ref.split(".")
module_file = module_file + ".py"
model_cls = get_class_from_dynamic_module(
pretrained_model_name_or_path,
subfolder=subfolder,
module_file=module_file,
class_name=class_name,
**hub_kwargs,
)
else:
if "_class_name" in config:
class_name = config["_class_name"]
library = "diffusers"
elif "model_type" in config:
class_name = "AutoModel"
library = "transformers"
else:
raise ValueError(
f"Couldn't find a model class associated with the config: {config}. Make sure the config "
"contains a `_class_name` or `model_type` key."
)
from ..pipelines.pipeline_loading_utils import ALL_IMPORTABLE_CLASSES, get_class_obj_and_candidates
model_cls, _ = get_class_obj_and_candidates(
library_name=library,
class_name=class_name,
importable_classes=ALL_IMPORTABLE_CLASSES,
pipelines=None,
is_pipeline_module=False,
)
if model_cls is None:
raise ValueError(f"AutoModel can't find a model linked to {class_name}.")
return model_cls.from_config(config, **kwargs)
@classmethod
@validate_hf_hub_args
def from_pretrained(cls, pretrained_model_or_path: str | os.PathLike | None = None, **kwargs):

View File

@@ -1567,7 +1567,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
mesh = None
if config.context_parallel_config is not None:
cp_config = config.context_parallel_config
mesh = torch.distributed.device_mesh.init_device_mesh(
mesh = cp_config.mesh or torch.distributed.device_mesh.init_device_mesh(
device_type=device_type,
mesh_shape=cp_config.mesh_shape,
mesh_dim_names=cp_config.mesh_dim_names,

View File

@@ -502,6 +502,10 @@ class AudioLDM2Pipeline(DiffusionPipeline):
text_input_ids,
attention_mask=attention_mask,
)
# Extract the pooler output if it's a BaseModelOutputWithPooling (Transformers v5+)
# otherwise use it directly (Transformers v4)
if hasattr(prompt_embeds, "pooler_output"):
prompt_embeds = prompt_embeds.pooler_output
# append the seq-len dim: (bs, hidden_size) -> (bs, seq_len, hidden_size)
prompt_embeds = prompt_embeds[:, None, :]
# make sure that we attend to this single hidden-state
@@ -610,6 +614,10 @@ class AudioLDM2Pipeline(DiffusionPipeline):
uncond_input_ids,
attention_mask=negative_attention_mask,
)
# Extract the pooler output if it's a BaseModelOutputWithPooling (Transformers v5+)
# otherwise use it directly (Transformers v4)
if hasattr(negative_prompt_embeds, "pooler_output"):
negative_prompt_embeds = negative_prompt_embeds.pooler_output
# append the seq-len dim: (bs, hidden_size) -> (bs, seq_len, hidden_size)
negative_prompt_embeds = negative_prompt_embeds[:, None, :]
# make sure that we attend to this single hidden-state

View File

@@ -287,6 +287,9 @@ class Cosmos2_5_PredictBasePipeline(DiffusionPipeline):
truncation=True,
padding="max_length",
)
input_ids = (
input_ids["input_ids"] if not isinstance(input_ids, list) and "input_ids" in input_ids else input_ids
)
input_ids = torch.LongTensor(input_ids)
input_ids_batch.append(input_ids)

View File

@@ -262,6 +262,9 @@ class Cosmos2_5_TransferPipeline(DiffusionPipeline):
truncation=True,
padding="max_length",
)
input_ids = (
input_ids["input_ids"] if not isinstance(input_ids, list) and "input_ids" in input_ids else input_ids
)
input_ids = torch.LongTensor(input_ids)
input_ids_batch.append(input_ids)

View File

@@ -20,6 +20,8 @@ class MultilingualCLIP(PreTrainedModel):
self.LinearTransformation = torch.nn.Linear(
in_features=config.transformerDimensions, out_features=config.numDims
)
if hasattr(self, "post_init"):
self.post_init()
def forward(self, input_ids, attention_mask):
embs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)[0]

View File

@@ -781,6 +781,9 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
self.prefix_encoder = PrefixEncoder(config)
self.dropout = torch.nn.Dropout(0.1)
if hasattr(self, "post_init"):
self.post_init()
def get_input_embeddings(self):
return self.embedding.word_embeddings
@@ -810,7 +813,7 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
use_cache = use_cache if use_cache is not None else self.config.use_cache
use_cache = use_cache if use_cache is not None else getattr(self.config, "use_cache", None)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
batch_size, seq_length = input_ids.shape

View File

@@ -341,6 +341,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
save_method_accept_safe = "safe_serialization" in save_method_signature.parameters
save_method_accept_variant = "variant" in save_method_signature.parameters
save_method_accept_max_shard_size = "max_shard_size" in save_method_signature.parameters
save_method_accept_peft_format = "save_peft_format" in save_method_signature.parameters
save_kwargs = {}
if save_method_accept_safe:
@@ -350,6 +351,11 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
if save_method_accept_max_shard_size and max_shard_size is not None:
# max_shard_size is expected to not be None in ModelMixin
save_kwargs["max_shard_size"] = max_shard_size
if save_method_accept_peft_format:
# Set save_peft_format=False for transformers>=5.0.0 compatibility
# In transformers 5.0.0+, the default save_peft_format=True adds "base_model.model" prefix
# to adapter keys, but from_pretrained expects keys without this prefix
save_kwargs["save_peft_format"] = False
save_method(os.path.join(save_directory, pipeline_component_name), **save_kwargs)

View File

@@ -24,14 +24,25 @@ except OptionalDependencyNotAvailable:
else:
_import_structure["pipeline_prx"] = ["PRXPipeline"]
# Import T5GemmaEncoder for pipeline loading compatibility
# Wrap T5GemmaEncoder to pass config.encoder (T5GemmaModuleConfig) instead of the
# composite T5GemmaConfig, which lacks flat attributes expected by T5GemmaEncoder.__init__.
try:
if is_transformers_available():
import transformers
from transformers.models.t5gemma.modeling_t5gemma import T5GemmaEncoder
from transformers.models.t5gemma.modeling_t5gemma import T5GemmaEncoder as _T5GemmaEncoder
class T5GemmaEncoder(_T5GemmaEncoder):
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
if "config" not in kwargs:
from transformers.models.t5gemma.configuration_t5gemma import T5GemmaConfig
config = T5GemmaConfig.from_pretrained(pretrained_model_name_or_path)
if hasattr(config, "encoder"):
kwargs["config"] = config.encoder
return super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
_additional_imports["T5GemmaEncoder"] = T5GemmaEncoder
# Patch transformers module directly for serialization
if not hasattr(transformers, "T5GemmaEncoder"):
transformers.T5GemmaEncoder = T5GemmaEncoder
except ImportError:

View File

@@ -17,7 +17,7 @@ from typing import Any, Callable
import regex as re
import torch
from transformers import AutoTokenizer, UMT5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel, UMT5EncoderModel
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
from ...loaders import SkyReelsV2LoraLoaderMixin
@@ -132,7 +132,7 @@ class SkyReelsV2Pipeline(DiffusionPipeline, SkyReelsV2LoraLoaderMixin):
def __init__(
self,
tokenizer: AutoTokenizer,
text_encoder: UMT5EncoderModel,
text_encoder: T5EncoderModel | UMT5EncoderModel,
transformer: SkyReelsV2Transformer3DModel,
vae: AutoencoderKLWan,
scheduler: UniPCMultistepScheduler,

View File

@@ -19,7 +19,7 @@ from copy import deepcopy
from typing import Any, Callable
import torch
from transformers import AutoTokenizer, UMT5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel, UMT5EncoderModel
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
from ...loaders import SkyReelsV2LoraLoaderMixin
@@ -153,7 +153,7 @@ class SkyReelsV2DiffusionForcingPipeline(DiffusionPipeline, SkyReelsV2LoraLoader
def __init__(
self,
tokenizer: AutoTokenizer,
text_encoder: UMT5EncoderModel,
text_encoder: T5EncoderModel | UMT5EncoderModel,
transformer: SkyReelsV2Transformer3DModel,
vae: AutoencoderKLWan,
scheduler: UniPCMultistepScheduler,

View File

@@ -20,7 +20,7 @@ from typing import Any, Callable
import PIL
import torch
from transformers import AutoTokenizer, UMT5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel, UMT5EncoderModel
from diffusers.image_processor import PipelineImageInput
from diffusers.utils.torch_utils import randn_tensor
@@ -158,7 +158,7 @@ class SkyReelsV2DiffusionForcingImageToVideoPipeline(DiffusionPipeline, SkyReels
def __init__(
self,
tokenizer: AutoTokenizer,
text_encoder: UMT5EncoderModel,
text_encoder: T5EncoderModel | UMT5EncoderModel,
transformer: SkyReelsV2Transformer3DModel,
vae: AutoencoderKLWan,
scheduler: UniPCMultistepScheduler,

View File

@@ -21,7 +21,7 @@ from typing import Any, Callable
import torch
from PIL import Image
from transformers import AutoTokenizer, UMT5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel, UMT5EncoderModel
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
from ...loaders import SkyReelsV2LoraLoaderMixin
@@ -214,7 +214,7 @@ class SkyReelsV2DiffusionForcingVideoToVideoPipeline(DiffusionPipeline, SkyReels
def __init__(
self,
tokenizer: AutoTokenizer,
text_encoder: UMT5EncoderModel,
text_encoder: T5EncoderModel | UMT5EncoderModel,
transformer: SkyReelsV2Transformer3DModel,
vae: AutoencoderKLWan,
scheduler: UniPCMultistepScheduler,

View File

@@ -18,7 +18,7 @@ from typing import Any, Callable
import PIL
import regex as re
import torch
from transformers import AutoTokenizer, CLIPProcessor, CLIPVisionModelWithProjection, UMT5EncoderModel
from transformers import AutoTokenizer, CLIPProcessor, CLIPVisionModelWithProjection, T5EncoderModel, UMT5EncoderModel
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
from ...image_processor import PipelineImageInput
@@ -157,7 +157,7 @@ class SkyReelsV2ImageToVideoPipeline(DiffusionPipeline, SkyReelsV2LoraLoaderMixi
def __init__(
self,
tokenizer: AutoTokenizer,
text_encoder: UMT5EncoderModel,
text_encoder: T5EncoderModel | UMT5EncoderModel,
image_encoder: CLIPVisionModelWithProjection,
image_processor: CLIPProcessor,
transformer: SkyReelsV2Transformer3DModel,

View File

@@ -112,6 +112,8 @@ def _load_transformers_model_from_dduf(
tensors = safetensors.torch.load(mmap)
# Update the state dictionary with tensors
state_dict.update(tensors)
# `from_pretrained` sets the model to eval mode by default, which is the
# correct behavior for inference. Do not call `model.train()` here.
return cls.from_pretrained(
pretrained_model_name_or_path=None,
config=config,

View File

@@ -1,5 +1,5 @@
import unittest
from unittest.mock import patch
from unittest.mock import MagicMock, patch
from transformers import CLIPTextModel, LongformerModel
@@ -20,7 +20,9 @@ class TestAutoModel(unittest.TestCase):
side_effect=[EnvironmentError("File not found"), {"model_type": "clip_text_model"}],
)
def test_load_from_config_transformers_with_subfolder(self, mock_load_config):
model = AutoModel.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder")
model = AutoModel.from_pretrained(
"hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder", use_safetensors=False
)
assert isinstance(model, CLIPTextModel)
def test_load_from_config_without_subfolder(self):
@@ -28,5 +30,73 @@ class TestAutoModel(unittest.TestCase):
assert isinstance(model, LongformerModel)
def test_load_from_model_index(self):
model = AutoModel.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder")
model = AutoModel.from_pretrained(
"hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder", use_safetensors=False
)
assert isinstance(model, CLIPTextModel)
class TestAutoModelFromConfig(unittest.TestCase):
@patch(
"diffusers.pipelines.pipeline_loading_utils.get_class_obj_and_candidates",
return_value=(MagicMock(), None),
)
def test_from_config_with_dict_diffusers_class(self, mock_get_class):
config = {"_class_name": "UNet2DConditionModel", "sample_size": 64}
mock_model = MagicMock()
mock_get_class.return_value[0].from_config.return_value = mock_model
result = AutoModel.from_config(config)
mock_get_class.assert_called_once_with(
library_name="diffusers",
class_name="UNet2DConditionModel",
importable_classes=unittest.mock.ANY,
pipelines=None,
is_pipeline_module=False,
)
mock_get_class.return_value[0].from_config.assert_called_once_with(config)
assert result is mock_model
@patch(
"diffusers.pipelines.pipeline_loading_utils.get_class_obj_and_candidates",
return_value=(MagicMock(), None),
)
@patch("diffusers.models.AutoModel.load_config", return_value={"_class_name": "UNet2DConditionModel"})
def test_from_config_with_string_path(self, mock_load_config, mock_get_class):
mock_model = MagicMock()
mock_get_class.return_value[0].from_config.return_value = mock_model
result = AutoModel.from_config("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="unet")
mock_load_config.assert_called_once()
assert result is mock_model
def test_from_config_raises_on_missing_class_info(self):
config = {"some_key": "some_value"}
with self.assertRaises(ValueError, msg="Couldn't find a model class"):
AutoModel.from_config(config)
@patch(
"diffusers.pipelines.pipeline_loading_utils.get_class_obj_and_candidates",
return_value=(MagicMock(), None),
)
def test_from_config_with_model_type_routes_to_transformers(self, mock_get_class):
config = {"model_type": "clip_text_model"}
mock_model = MagicMock()
mock_get_class.return_value[0].from_config.return_value = mock_model
result = AutoModel.from_config(config)
mock_get_class.assert_called_once_with(
library_name="transformers",
class_name="AutoModel",
importable_classes=unittest.mock.ANY,
pipelines=None,
is_pipeline_module=False,
)
assert result is mock_model
def test_from_config_raises_on_none(self):
with self.assertRaises(ValueError, msg="Please provide a `pretrained_model_name_or_path_or_dict`"):
AutoModel.from_config(None)

View File

@@ -60,12 +60,7 @@ def _context_parallel_worker(rank, world_size, master_port, model_class, init_di
model.eval()
# Move inputs to device
inputs_on_device = {}
for key, value in inputs_dict.items():
if isinstance(value, torch.Tensor):
inputs_on_device[key] = value.to(device)
else:
inputs_on_device[key] = value
inputs_on_device = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs_dict.items()}
# Enable context parallelism
cp_config = ContextParallelConfig(**cp_dict)
@@ -89,6 +84,59 @@ def _context_parallel_worker(rank, world_size, master_port, model_class, init_di
dist.destroy_process_group()
def _custom_mesh_worker(
rank,
world_size,
master_port,
model_class,
init_dict,
cp_dict,
mesh_shape,
mesh_dim_names,
inputs_dict,
return_dict,
):
"""Worker function for context parallel testing with a user-provided custom DeviceMesh."""
try:
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = str(master_port)
os.environ["RANK"] = str(rank)
os.environ["WORLD_SIZE"] = str(world_size)
dist.init_process_group(backend="nccl", rank=rank, world_size=world_size)
torch.cuda.set_device(rank)
device = torch.device(f"cuda:{rank}")
model = model_class(**init_dict)
model.to(device)
model.eval()
inputs_on_device = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs_dict.items()}
# DeviceMesh must be created after init_process_group, inside each worker process.
mesh = torch.distributed.device_mesh.init_device_mesh(
"cuda", mesh_shape=mesh_shape, mesh_dim_names=mesh_dim_names
)
cp_config = ContextParallelConfig(**cp_dict, mesh=mesh)
model.enable_parallelism(config=cp_config)
with torch.no_grad():
output = model(**inputs_on_device, return_dict=False)[0]
if rank == 0:
return_dict["status"] = "success"
return_dict["output_shape"] = list(output.shape)
except Exception as e:
if rank == 0:
return_dict["status"] = "error"
return_dict["error"] = str(e)
finally:
if dist.is_initialized():
dist.destroy_process_group()
@is_context_parallel
@require_torch_multi_accelerator
class ContextParallelTesterMixin:
@@ -126,3 +174,48 @@ class ContextParallelTesterMixin:
assert return_dict.get("status") == "success", (
f"Context parallel inference failed: {return_dict.get('error', 'Unknown error')}"
)
@pytest.mark.parametrize(
"cp_type,mesh_shape,mesh_dim_names",
[
("ring_degree", (2, 1, 1), ("ring", "ulysses", "fsdp")),
("ulysses_degree", (1, 2, 1), ("ring", "ulysses", "fsdp")),
],
ids=["ring-3d-fsdp", "ulysses-3d-fsdp"],
)
def test_context_parallel_custom_mesh(self, cp_type, mesh_shape, mesh_dim_names):
if not torch.distributed.is_available():
pytest.skip("torch.distributed is not available.")
if not hasattr(self.model_class, "_cp_plan") or self.model_class._cp_plan is None:
pytest.skip("Model does not have a _cp_plan defined for context parallel inference.")
world_size = 2
init_dict = self.get_init_dict()
inputs_dict = {k: v.cpu() if isinstance(v, torch.Tensor) else v for k, v in self.get_dummy_inputs().items()}
cp_dict = {cp_type: world_size}
master_port = _find_free_port()
manager = mp.Manager()
return_dict = manager.dict()
mp.spawn(
_custom_mesh_worker,
args=(
world_size,
master_port,
self.model_class,
init_dict,
cp_dict,
mesh_shape,
mesh_dim_names,
inputs_dict,
return_dict,
),
nprocs=world_size,
join=True,
)
assert return_dict.get("status") == "success", (
f"Custom mesh context parallel inference failed: {return_dict.get('error', 'Unknown error')}"
)

View File

@@ -282,6 +282,8 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_inputs = text_inputs["input_ids"].to(torch_device)
clap_prompt_embeds = audioldm_pipe.text_encoder.get_text_features(text_inputs)
if hasattr(clap_prompt_embeds, "pooler_output"):
clap_prompt_embeds = clap_prompt_embeds.pooler_output
clap_prompt_embeds = clap_prompt_embeds[:, None, :]
text_inputs = audioldm_pipe.tokenizer_2(
@@ -341,6 +343,8 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_inputs = text_inputs["input_ids"].to(torch_device)
clap_prompt_embeds = audioldm_pipe.text_encoder.get_text_features(text_inputs)
if hasattr(clap_prompt_embeds, "pooler_output"):
clap_prompt_embeds = clap_prompt_embeds.pooler_output
clap_prompt_embeds = clap_prompt_embeds[:, None, :]
text_inputs = audioldm_pipe.tokenizer_2(

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from huggingface_hub import hf_hub_download
from transformers import T5EncoderModel, T5TokenizerFast
from transformers import AutoConfig, T5EncoderModel, T5TokenizerFast
from diffusers import (
AutoencoderKL,
@@ -89,7 +89,8 @@ class BriaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
scheduler = FlowMatchEulerDiscreteScheduler()
torch.manual_seed(0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = T5TokenizerFast.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -2,7 +2,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, ChromaPipeline, ChromaTransformer2DModel, FlowMatchEulerDiscreteScheduler
@@ -41,7 +41,8 @@ class ChromaPipelineFastTests(
)
torch.manual_seed(0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, ChromaImg2ImgPipeline, ChromaTransformer2DModel, FlowMatchEulerDiscreteScheduler
@@ -42,7 +42,8 @@ class ChromaImg2ImgPipelineFastTests(
)
torch.manual_seed(0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -17,6 +17,7 @@ import unittest
import torch
from PIL import Image
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPImageProcessor,
CLIPVisionConfig,
@@ -71,7 +72,8 @@ class ChronoEditPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
# TODO: impl FlowDPMSolverMultistepScheduler
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLCogVideoX, CogVideoXPipeline, CogVideoXTransformer3DModel, DDIMScheduler
@@ -117,7 +117,8 @@ class CogVideoXPipelineFastTests(
torch.manual_seed(0)
scheduler = DDIMScheduler()
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {
@@ -235,6 +236,9 @@ class CogVideoXPipelineFastTests(
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLCogVideoX, CogVideoXFunControlPipeline, CogVideoXTransformer3DModel, DDIMScheduler
@@ -104,7 +104,8 @@ class CogVideoXFunControlPipelineFastTests(PipelineTesterMixin, unittest.TestCas
torch.manual_seed(0)
scheduler = DDIMScheduler()
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {
@@ -228,6 +229,9 @@ class CogVideoXFunControlPipelineFastTests(PipelineTesterMixin, unittest.TestCas
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLCogVideoX, CogVideoXImageToVideoPipeline, CogVideoXTransformer3DModel, DDIMScheduler
from diffusers.utils import load_image
@@ -113,7 +113,8 @@ class CogVideoXImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
torch.manual_seed(0)
scheduler = DDIMScheduler()
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {
@@ -237,6 +238,9 @@ class CogVideoXImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel, CogVideoXVideoToVideoPipeline, DDIMScheduler
@@ -99,7 +99,8 @@ class CogVideoXVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
torch.manual_seed(0)
scheduler = DDIMScheduler()
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, CogVideoXDDIMScheduler, CogView3PlusPipeline, CogView3PlusTransformer2DModel
@@ -89,7 +89,8 @@ class CogView3PlusPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
scheduler = CogVideoXDDIMScheduler()
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -108,7 +108,7 @@ class CogView4PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
generator = torch.Generator(device=device).manual_seed(seed)
inputs = {
"prompt": "dance monkey",
"negative_prompt": "",
"negative_prompt": "bad",
"generator": generator,
"num_inference_steps": 2,
"guidance_scale": 6.0,

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLCogVideoX, ConsisIDPipeline, ConsisIDTransformer3DModel, DDIMScheduler
from diffusers.utils import load_image
@@ -122,7 +122,8 @@ class ConsisIDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
scheduler = DDIMScheduler()
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {
@@ -248,6 +249,9 @@ class ConsisIDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from huggingface_hub import hf_hub_download
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
from transformers import AutoConfig, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
from diffusers import (
AutoencoderKL,
@@ -97,7 +97,8 @@ class FluxControlNetPipelineFastTests(unittest.TestCase, PipelineTesterMixin, Fl
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = T5TokenizerFast.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -2,7 +2,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -13,9 +13,7 @@ from diffusers import (
)
from diffusers.utils.torch_utils import randn_tensor
from ...testing_utils import (
torch_device,
)
from ...testing_utils import torch_device
from ..test_pipelines_common import PipelineTesterMixin, check_qkv_fused_layers_exist
@@ -70,7 +68,8 @@ class FluxControlNetImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMi
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,15 +3,7 @@ import unittest
import numpy as np
import torch
# torch_device, # {{ edit_1 }} Removed unused import
from transformers import (
AutoTokenizer,
CLIPTextConfig,
CLIPTextModel,
CLIPTokenizer,
T5EncoderModel,
)
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -22,11 +14,7 @@ from diffusers import (
)
from diffusers.utils.torch_utils import randn_tensor
from ...testing_utils import (
enable_full_determinism,
floats_tensor,
torch_device,
)
from ...testing_utils import enable_full_determinism, floats_tensor, torch_device
from ..test_pipelines_common import PipelineTesterMixin
@@ -85,7 +73,8 @@ class FluxControlNetInpaintPipelineTests(unittest.TestCase, PipelineTesterMixin)
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, BertModel, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -96,7 +96,10 @@ class HunyuanDiTControlNetPipelineFastTests(unittest.TestCase, PipelineTesterMix
scheduler = DDPMScheduler()
text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel")
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -17,7 +17,14 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPTextConfig,
CLIPTextModelWithProjection,
CLIPTokenizer,
T5EncoderModel,
)
from diffusers import (
AutoencoderKL,
@@ -28,10 +35,7 @@ from diffusers import (
from diffusers.models import SD3ControlNetModel
from diffusers.utils.torch_utils import randn_tensor
from ...testing_utils import (
enable_full_determinism,
torch_device,
)
from ...testing_utils import enable_full_determinism, torch_device
from ..test_pipelines_common import PipelineTesterMixin
@@ -103,7 +107,8 @@ class StableDiffusion3ControlInpaintNetPipelineFastTests(unittest.TestCase, Pipe
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_3 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

View File

@@ -18,7 +18,14 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPTextConfig,
CLIPTextModelWithProjection,
CLIPTokenizer,
T5EncoderModel,
)
from diffusers import (
AutoencoderKL,
@@ -117,7 +124,8 @@ class StableDiffusion3ControlNetPipelineFastTests(unittest.TestCase, PipelineTes
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_3 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

View File

@@ -20,7 +20,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLCosmos, CosmosTextToWorldPipeline, CosmosTransformer3DModel, EDMEulerScheduler
@@ -107,7 +107,8 @@ class CosmosTextToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
rho=7.0,
final_sigmas_type="sigma_min",
)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {
@@ -232,6 +233,9 @@ class CosmosTextToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):

View File

@@ -20,7 +20,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKLWan,
@@ -95,7 +95,8 @@ class Cosmos2TextToImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase
torch.manual_seed(0)
scheduler = FlowMatchEulerDiscreteScheduler(use_karras_sigmas=True)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -21,7 +21,7 @@ import unittest
import numpy as np
import PIL.Image
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKLWan,
@@ -96,7 +96,8 @@ class Cosmos2VideoToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCas
torch.manual_seed(0)
scheduler = FlowMatchEulerDiscreteScheduler(use_karras_sigmas=True)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -21,7 +21,7 @@ import unittest
import numpy as np
import PIL.Image
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLCosmos, CosmosTransformer3DModel, CosmosVideoToWorldPipeline, EDMEulerScheduler
@@ -108,7 +108,8 @@ class CosmosVideoToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCase
rho=7.0,
final_sigmas_type="sigma_min",
)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {
@@ -245,6 +246,9 @@ class CosmosVideoToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCase
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):

View File

@@ -2,7 +2,7 @@ import tempfile
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import DDPMScheduler, UNet2DConditionModel
from diffusers.models.attention_processor import AttnAddedKVProcessor
@@ -18,7 +18,8 @@ from ..test_pipelines_common import to_np
class IFPipelineTesterMixin:
def _get_dummy_components(self):
torch.manual_seed(0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
torch.manual_seed(0)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
@@ -75,7 +76,8 @@ class IFPipelineTesterMixin:
def _get_superresolution_dummy_components(self):
torch.manual_seed(0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
torch.manual_seed(0)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
@@ -250,6 +252,9 @@ class IFPipelineTesterMixin:
# This should be handled in the base test and then this method can be removed.
def _test_save_load_local(self):
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)

View File

@@ -18,9 +18,7 @@ import unittest
import torch
from diffusers import (
IFPipeline,
)
from diffusers import IFPipeline
from diffusers.models.attention_processor import AttnAddedKVProcessor
from diffusers.utils.import_utils import is_xformers_available

View File

@@ -4,7 +4,7 @@ import unittest
import numpy as np
import torch
from huggingface_hub import hf_hub_download
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -93,7 +93,8 @@ class FluxPipelineFastTests(
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxControlPipeline, FluxTransformer2DModel
@@ -53,7 +53,8 @@ class FluxControlPipelineFastTests(unittest.TestCase, PipelineTesterMixin):
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -57,7 +57,8 @@ class FluxControlImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMixin
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -58,7 +58,8 @@ class FluxControlInpaintPipelineFastTests(unittest.TestCase, PipelineTesterMixin
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxFillPipeline, FluxTransformer2DModel
@@ -58,7 +58,8 @@ class FluxFillPipelineFastTests(unittest.TestCase, PipelineTesterMixin):
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxImg2ImgPipeline, FluxTransformer2DModel
@@ -55,7 +55,8 @@ class FluxImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMixin, FluxI
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxInpaintPipeline, FluxTransformer2DModel
@@ -55,7 +55,8 @@ class FluxInpaintPipelineFastTests(unittest.TestCase, PipelineTesterMixin, FluxI
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import PIL.Image
import torch
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -79,7 +79,8 @@ class FluxKontextPipelineFastTests(
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -79,7 +79,8 @@ class FluxKontextInpaintPipelineFastTests(
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -16,7 +16,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, GlmImagePipeline, GlmImageTransformer2DModel
from diffusers.utils import is_transformers_version
@@ -57,7 +57,8 @@ class GlmImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
def get_dummy_components(self):
torch.manual_seed(0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
glm_config = GlmImageConfig(

View File

@@ -18,6 +18,7 @@ import unittest
import numpy as np
import torch
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPTextConfig,
CLIPTextModelWithProjection,
@@ -94,7 +95,8 @@ class HiDreamImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_3 = T5EncoderModel(config)
torch.manual_seed(0)
text_encoder_4 = LlamaForCausalLM.from_pretrained("hf-internal-testing/tiny-random-LlamaForCausalLM")
@@ -149,12 +151,12 @@ class HiDreamImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
self.assertEqual(generated_image.shape, (128, 128, 3))
# fmt: off
expected_slice = np.array([0.4507, 0.5256, 0.4205, 0.5791, 0.4848, 0.4831, 0.4443, 0.5107, 0.6586, 0.3163, 0.7318, 0.5933, 0.6252, 0.5512, 0.5357, 0.5983])
expected_slice = np.array([0.4501, 0.5256, 0.4207, 0.5783, 0.4842, 0.4833, 0.4441, 0.5112, 0.6587, 0.3169, 0.7308, 0.5927, 0.6251, 0.5509, 0.5355, 0.5969])
# fmt: on
generated_slice = generated_image.flatten()
generated_slice = np.concatenate([generated_slice[:8], generated_slice[-8:]])
self.assertTrue(np.allclose(generated_slice, expected_slice, atol=1e-3))
self.assertTrue(np.allclose(generated_slice, expected_slice, atol=5e-3))
def test_inference_batch_single_identical(self):
super().test_inference_batch_single_identical(expected_max_diff=3e-4)

View File

@@ -223,7 +223,7 @@ class HunyuanImagePipelineFastTests(
self.assertEqual(generated_image.shape, (3, 16, 16))
expected_slice_np = np.array(
[0.61494756, 0.49616697, 0.60327923, 0.6115793, 0.49047345, 0.56977504, 0.53066164, 0.58880305, 0.5570612]
[0.6068114, 0.48716035, 0.5984431, 0.60241306, 0.48849544, 0.5624479, 0.53696984, 0.58964247, 0.54248774]
)
output_slice = generated_image[0, -3:, -3:].flatten().cpu().numpy()

View File

@@ -233,7 +233,7 @@ class HunyuanVideoImageToVideoPipelineFastTests(
self.assertEqual(generated_video.shape, (5, 3, 16, 16))
# fmt: off
expected_slice = torch.tensor([0.444, 0.479, 0.4485, 0.5752, 0.3539, 0.1548, 0.2706, 0.3593, 0.5323, 0.6635, 0.6795, 0.5255, 0.5091, 0.345, 0.4276, 0.4128])
expected_slice = torch.tensor([0.4441, 0.4790, 0.4485, 0.5748, 0.3539, 0.1553, 0.2707, 0.3594, 0.5331, 0.6645, 0.6799, 0.5257, 0.5092, 0.3450, 0.4276, 0.4127])
# fmt: on
generated_slice = generated_video.flatten()

View File

@@ -15,7 +15,14 @@
import unittest
import torch
from transformers import ByT5Tokenizer, Qwen2_5_VLTextConfig, Qwen2_5_VLTextModel, Qwen2Tokenizer, T5EncoderModel
from transformers import (
AutoConfig,
ByT5Tokenizer,
Qwen2_5_VLTextConfig,
Qwen2_5_VLTextModel,
Qwen2Tokenizer,
T5EncoderModel,
)
from diffusers import (
AutoencoderKLHunyuanVideo15,
@@ -114,7 +121,8 @@ class HunyuanVideo15PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
tokenizer = Qwen2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration")
torch.manual_seed(0)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer_2 = ByT5Tokenizer()
guider = ClassifierFreeGuidance(guidance_scale=1.0)

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, BertModel, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel
from diffusers import AutoencoderKL, DDPMScheduler, HunyuanDiT2DModel, HunyuanDiTPipeline
@@ -74,7 +74,9 @@ class HunyuanDiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
scheduler = DDPMScheduler()
text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel")
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import (
AutoPipelineForImage2Image,
@@ -108,7 +108,8 @@ class Kandinsky3PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
movq = self.dummy_movq
torch.manual_seed(0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config).eval()
torch.manual_seed(0)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
@@ -155,9 +156,9 @@ class Kandinsky3PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
assert image.shape == (1, 16, 16, 3)
expected_slice = np.array([0.3768, 0.4373, 0.4865, 0.4890, 0.4299, 0.5122, 0.4921, 0.4924, 0.5599])
expected_slice = np.array([0.3944, 0.3680, 0.4842, 0.5333, 0.4412, 0.4812, 0.5089, 0.5381, 0.5578])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2, (
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1, (
f" expected_slice {expected_slice}, but got {image_slice.flatten()}"
)

View File

@@ -20,7 +20,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import (
AutoPipelineForImage2Image,
@@ -119,7 +119,8 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
torch.manual_seed(0)
movq = self.dummy_movq
torch.manual_seed(0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config).eval()
torch.manual_seed(0)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
@@ -155,10 +156,7 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
return inputs
def test_dict_tuple_outputs_equivalent(self):
expected_slice = None
if torch_device == "cpu":
expected_slice = np.array([0.5762, 0.6112, 0.4150, 0.6018, 0.6167, 0.4626, 0.5426, 0.5641, 0.6536])
super().test_dict_tuple_outputs_equivalent(expected_slice=expected_slice)
super().test_dict_tuple_outputs_equivalent()
def test_kandinsky3_img2img(self):
device = "cpu"
@@ -177,11 +175,9 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array(
[0.576259, 0.6132097, 0.41703486, 0.603196, 0.62062526, 0.4655338, 0.5434324, 0.5660727, 0.65433365]
)
expected_slice = np.array([0.5725, 0.6248, 0.4355, 0.5732, 0.6105, 0.5267, 0.5470, 0.5512, 0.6618])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2, (
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1, (
f" expected_slice {expected_slice}, but got {image_slice.flatten()}"
)

View File

@@ -20,7 +20,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -109,7 +109,8 @@ class LattePipelineFastTests(
vae = AutoencoderKL()
scheduler = DDIMScheduler()
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -17,7 +17,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLLTXVideo, FlowMatchEulerDiscreteScheduler, LTXPipeline, LTXVideoTransformer3DModel
@@ -88,7 +88,8 @@ class LTXPipelineFastTests(PipelineTesterMixin, FirstBlockCacheTesterMixin, unit
torch.manual_seed(0)
scheduler = FlowMatchEulerDiscreteScheduler()
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -17,7 +17,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKLLTXVideo,
@@ -92,7 +92,8 @@ class LTXConditionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
scheduler = FlowMatchEulerDiscreteScheduler()
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -17,7 +17,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKLLTXVideo,
@@ -91,7 +91,8 @@ class LTXImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
scheduler = FlowMatchEulerDiscreteScheduler()
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLMochi, FlowMatchEulerDiscreteScheduler, MochiPipeline, MochiTransformer3DModel
@@ -89,7 +89,8 @@ class MochiPipelineFastTests(
torch.manual_seed(0)
scheduler = FlowMatchEulerDiscreteScheduler()
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {
@@ -207,6 +208,9 @@ class MochiPipelineFastTests(
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, BertModel, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -67,7 +67,9 @@ class HunyuanDiTPAGPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
scheduler = DDPMScheduler()
text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel")
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
import diffusers
from diffusers import (
@@ -80,7 +80,8 @@ class PixArtSigmaPAGPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
vae = AutoencoderKL()
scheduler = DDIMScheduler()
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,14 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPTextConfig,
CLIPTextModelWithProjection,
CLIPTokenizer,
T5EncoderModel,
)
from diffusers import (
AutoencoderKL,
@@ -73,7 +80,9 @@ class StableDiffusion3PAGPipelineFastTests(unittest.TestCase, PipelineTesterMixi
torch.manual_seed(0)
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_3 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

View File

@@ -5,7 +5,14 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPTextConfig,
CLIPTextModelWithProjection,
CLIPTokenizer,
T5EncoderModel,
)
from diffusers import (
AutoencoderKL,
@@ -84,7 +91,9 @@ class StableDiffusion3PAGImg2ImgPipelineFastTests(unittest.TestCase, PipelineTes
torch.manual_seed(0)
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_3 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -77,7 +77,10 @@ class PixArtAlphaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
vae = AutoencoderKL()
scheduler = DDIMScheduler()
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -83,7 +83,10 @@ class PixArtSigmaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
vae = AutoencoderKL()
scheduler = DDIMScheduler()
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -92,7 +92,7 @@ class PRXPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
}
encoder_config = T5GemmaModuleConfig(**encoder_params)
text_encoder_config = T5GemmaConfig(encoder=encoder_config, is_encoder_decoder=False, **encoder_params)
text_encoder = T5GemmaEncoder(text_encoder_config)
text_encoder = T5GemmaEncoder(text_encoder_config.encoder)
return {
"transformer": transformer,
@@ -256,3 +256,27 @@ class PRXPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
expected_image = torch.zeros(3, 32, 32)
max_diff = np.abs(generated_image - expected_image).max()
self.assertLessEqual(max_diff, 1e10)
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
def test_save_load_dduf(self):
pass
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
def test_loading_with_variants(self):
pass
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
def test_pipeline_with_accelerator_device_map(self):
pass
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
def test_save_load_local(self):
pass
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
def test_save_load_optional_components(self):
pass
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
def test_torch_dtype_dict(self):
pass

View File

@@ -113,7 +113,7 @@ class QwenImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
vision_start_token_id=151652,
vision_token_id=151654,
)
text_encoder = Qwen2_5_VLForConditionalGeneration(config)
text_encoder = Qwen2_5_VLForConditionalGeneration(config).eval()
tokenizer = Qwen2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration")
components = {
@@ -160,12 +160,12 @@ class QwenImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
self.assertEqual(generated_image.shape, (3, 32, 32))
# fmt: off
expected_slice = torch.tensor([0.56331, 0.63677, 0.6015, 0.56369, 0.58166, 0.55277, 0.57176, 0.63261, 0.41466, 0.35561, 0.56229, 0.48334, 0.49714, 0.52622, 0.40872, 0.50208])
expected_slice = torch.tensor([0.5633, 0.6368, 0.6015, 0.5637, 0.5817, 0.5528, 0.5718, 0.6326, 0.4147, 0.3556, 0.5623, 0.4833, 0.4971, 0.5262, 0.4087, 0.5021])
# fmt: on
generated_slice = generated_image.flatten()
generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=5e-3))
def test_inference_batch_single_identical(self):
self._test_inference_batch_single_identical(batch_size=3, expected_max_diff=1e-1)

View File

@@ -211,7 +211,7 @@ class QwenControlNetPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
generated_slice = generated_image.flatten()
generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=5e-3))
def test_qwen_controlnet_multicondition(self):
device = "cpu"
@@ -255,7 +255,7 @@ class QwenControlNetPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
generated_slice = generated_image.flatten()
generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=5e-3))
def test_attention_slicing_forward_pass(
self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-3

View File

@@ -115,7 +115,7 @@ class QwenImageEditPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
vision_start_token_id=151652,
vision_token_id=151654,
)
text_encoder = Qwen2_5_VLForConditionalGeneration(config)
text_encoder = Qwen2_5_VLForConditionalGeneration(config).eval()
tokenizer = Qwen2Tokenizer.from_pretrained(tiny_ckpt_id)
components = {
@@ -163,12 +163,12 @@ class QwenImageEditPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
self.assertEqual(generated_image.shape, (3, 32, 32))
# fmt: off
expected_slice = torch.tensor([[0.5637, 0.6341, 0.6001, 0.5620, 0.5794, 0.5498, 0.5757, 0.6389, 0.4174, 0.3597, 0.5649, 0.4894, 0.4969, 0.5255, 0.4083, 0.4986]])
expected_slice = torch.tensor([0.5637, 0.6341, 0.6001, 0.5620, 0.5794, 0.5498, 0.5757, 0.6389, 0.4174, 0.3597, 0.5649, 0.4894, 0.4969, 0.5255, 0.4083, 0.4986])
# fmt: on
generated_slice = generated_image.flatten()
generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=5e-3))
def test_inference_batch_single_identical(self):
self._test_inference_batch_single_identical(batch_size=3, expected_max_diff=1e-1)

View File

@@ -164,7 +164,7 @@ class QwenImageEditPlusPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
self.assertEqual(generated_image.shape, (3, 32, 32))
# fmt: off
expected_slice = torch.tensor([[0.5637, 0.6341, 0.6001, 0.5620, 0.5794, 0.5498, 0.5757, 0.6389, 0.4174, 0.3597, 0.5649, 0.4894, 0.4969, 0.5255, 0.4083, 0.4986]])
expected_slice = torch.tensor([0.5640, 0.6339, 0.5997, 0.5607, 0.5799, 0.5496, 0.5760, 0.6393, 0.4172, 0.3595, 0.5655, 0.4896, 0.4971, 0.5255, 0.4088, 0.4987])
# fmt: on
generated_slice = generated_image.flatten()

View File

@@ -18,20 +18,11 @@ import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKLWan,
SkyReelsV2Pipeline,
SkyReelsV2Transformer3DModel,
UniPCMultistepScheduler,
)
from diffusers import AutoencoderKLWan, SkyReelsV2Pipeline, SkyReelsV2Transformer3DModel, UniPCMultistepScheduler
from ...testing_utils import (
enable_full_determinism,
)
from ...testing_utils import enable_full_determinism
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import (
PipelineTesterMixin,
)
from ..test_pipelines_common import PipelineTesterMixin
enable_full_determinism()

View File

@@ -25,13 +25,9 @@ from diffusers import (
UniPCMultistepScheduler,
)
from ...testing_utils import (
enable_full_determinism,
)
from ...testing_utils import enable_full_determinism
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import (
PipelineTesterMixin,
)
from ..test_pipelines_common import PipelineTesterMixin
enable_full_determinism()

View File

@@ -17,10 +17,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import (
AutoTokenizer,
T5EncoderModel,
)
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKLWan,

View File

@@ -27,14 +27,9 @@ from diffusers import (
UniPCMultistepScheduler,
)
from ...testing_utils import (
enable_full_determinism,
torch_device,
)
from ...testing_utils import enable_full_determinism, torch_device
from ..pipeline_params import TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import (
PipelineTesterMixin,
)
from ..test_pipelines_common import PipelineTesterMixin
enable_full_determinism()

View File

@@ -19,10 +19,7 @@ import unittest
import numpy as np
import torch
from transformers import (
T5EncoderModel,
T5Tokenizer,
)
from transformers import AutoConfig, T5EncoderModel, T5Tokenizer
from diffusers import (
AutoencoderOobleck,
@@ -111,7 +108,8 @@ class StableAudioPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
)
torch.manual_seed(0)
t5_repo_id = "hf-internal-testing/tiny-random-T5ForConditionalGeneration"
text_encoder = T5EncoderModel.from_pretrained(t5_repo_id)
config = AutoConfig.from_pretrained(t5_repo_id)
text_encoder = T5EncoderModel(config)
tokenizer = T5Tokenizer.from_pretrained(t5_repo_id, truncation=True, model_max_length=25)
torch.manual_seed(0)

View File

@@ -3,7 +3,14 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPTextConfig,
CLIPTextModelWithProjection,
CLIPTokenizer,
T5EncoderModel,
)
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, SD3Transformer2DModel, StableDiffusion3Pipeline
@@ -72,7 +79,9 @@ class StableDiffusion3PipelineFastTests(unittest.TestCase, PipelineTesterMixin):
torch.manual_seed(0)
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_3 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

View File

@@ -4,7 +4,14 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPTextConfig,
CLIPTextModelWithProjection,
CLIPTokenizer,
T5EncoderModel,
)
from diffusers import (
AutoencoderKL,
@@ -73,7 +80,9 @@ class StableDiffusion3Img2ImgPipelineFastTests(PipelineLatentTesterMixin, unitte
torch.manual_seed(0)
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_3 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

View File

@@ -3,7 +3,14 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPTextConfig,
CLIPTextModelWithProjection,
CLIPTokenizer,
T5EncoderModel,
)
from diffusers import (
AutoencoderKL,
@@ -73,7 +80,9 @@ class StableDiffusion3InpaintPipelineFastTests(PipelineLatentTesterMixin, unitte
torch.manual_seed(0)
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_3 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

View File

@@ -1157,6 +1157,9 @@ class PipelineTesterMixin:
def test_save_load_local(self, expected_max_difference=5e-4):
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
@@ -1295,6 +1298,9 @@ class PipelineTesterMixin:
additional_params_copy_to_batched_inputs=["num_inference_steps"],
):
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for components in pipe.components.values():
if hasattr(components, "set_default_attn_processor"):
@@ -1345,6 +1351,9 @@ class PipelineTesterMixin:
def test_dict_tuple_outputs_equivalent(self, expected_slice=None, expected_max_difference=1e-4):
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
@@ -1477,6 +1486,9 @@ class PipelineTesterMixin:
if not self.pipeline_class._optional_components:
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
@@ -1557,6 +1569,9 @@ class PipelineTesterMixin:
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
@@ -2065,7 +2080,16 @@ class PipelineTesterMixin:
for component_name in model_components_pipe:
pipe_component = model_components_pipe[component_name]
pipe_loaded_component = model_components_pipe_loaded[component_name]
for p1, p2 in zip(pipe_component.parameters(), pipe_loaded_component.parameters()):
model_loaded_params = dict(pipe_loaded_component.named_parameters())
model_original_params = dict(pipe_component.named_parameters())
for name, p1 in model_original_params.items():
# Skip tied weights that aren't saved with variants (transformers v5 behavior)
if name not in model_loaded_params:
continue
p2 = model_loaded_params[name]
# nan check for luminanext (mps).
if not (is_nan(p1) and is_nan(p2)):
self.assertTrue(torch.equal(p1, p2))
@@ -2089,6 +2113,9 @@ class PipelineTesterMixin:
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
# We initialize the pipeline with only text encoders and tokenizers,
# mimicking a real-world scenario.
@@ -2220,6 +2247,9 @@ class PipelineTesterMixin:
from huggingface_hub import export_folder_as_dduf
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
@@ -2357,6 +2387,11 @@ class PipelineTesterMixin:
def test_pipeline_with_accelerator_device_map(self, expected_max_difference=1e-4):
components = self.get_dummy_components()
# Set text encoders to eval mode to match from_pretrained behavior
# This ensures deterministic outputs when models are loaded with device_map
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
@@ -2680,6 +2715,9 @@ class PyramidAttentionBroadcastTesterMixin:
device = "cpu" # ensure determinism for the device-dependent torch.Generator
num_layers = 2
components = self.get_dummy_components(num_layers=num_layers)
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
pipe = pipe.to(device)
pipe.set_progress_bar_config(disable=None)

View File

@@ -5,7 +5,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
import diffusers
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxTransformer2DModel, VisualClozePipeline
@@ -77,7 +77,8 @@ class VisualClozePipelineFastTests(unittest.TestCase, PipelineTesterMixin):
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
@@ -215,7 +216,7 @@ class VisualClozePipelineFastTests(unittest.TestCase, PipelineTesterMixin):
def test_callback_cfg(self):
pass
def test_save_load_local(self, expected_max_difference=5e-4):
def test_save_load_local(self, expected_max_difference=1e-3):
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
@@ -260,6 +261,9 @@ class VisualClozePipelineFastTests(unittest.TestCase, PipelineTesterMixin):
if not hasattr(self.pipeline_class, "_optional_components"):
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):

View File

@@ -5,7 +5,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
import diffusers
from diffusers import (
@@ -79,7 +79,8 @@ class VisualClozeGenerationPipelineFastTests(unittest.TestCase, PipelineTesterMi
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLWan, FlowMatchEulerDiscreteScheduler, WanPipeline, WanTransformer3DModel
@@ -68,7 +68,8 @@ class WanPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
# TODO: impl FlowDPMSolverMultistepScheduler
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)

View File

@@ -17,14 +17,11 @@ import unittest
import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanPipeline, WanTransformer3DModel
from ...testing_utils import (
enable_full_determinism,
torch_device,
)
from ...testing_utils import enable_full_determinism, torch_device
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import PipelineTesterMixin
@@ -63,7 +60,8 @@ class Wan22PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
@@ -235,7 +233,8 @@ class Wan225BPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanImageToVideoPipeline, WanTransformer3DModel
@@ -64,7 +64,8 @@ class Wan22ImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
torch.manual_seed(0)
scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
@@ -248,7 +249,8 @@ class Wan225BImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCas
torch.manual_seed(0)
scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)

View File

@@ -19,6 +19,7 @@ import numpy as np
import torch
from PIL import Image
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPImageProcessor,
CLIPVisionConfig,
@@ -78,7 +79,8 @@ class WanAnimatePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)

View File

@@ -19,6 +19,7 @@ import numpy as np
import torch
from PIL import Image
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPImageProcessor,
CLIPVisionConfig,
@@ -68,7 +69,8 @@ class WanImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
# TODO: impl FlowDPMSolverMultistepScheduler
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
@@ -239,7 +241,8 @@ class WanFLFToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
# TODO: impl FlowDPMSolverMultistepScheduler
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKLWan,
@@ -67,7 +67,8 @@ class WanVACEPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)

View File

@@ -16,7 +16,7 @@ import unittest
import torch
from PIL import Image
from transformers import AutoTokenizer, T5EncoderModel
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanTransformer3DModel, WanVideoToVideoPipeline
@@ -62,7 +62,8 @@ class WanVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
scheduler = UniPCMultistepScheduler(flow_shift=3.0)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)

View File

@@ -1357,7 +1357,12 @@ def enable_full_determinism():
# variable 'CUDA_LAUNCH_BLOCKING' or 'CUBLAS_WORKSPACE_CONFIG' to be set,
# depending on the CUDA version, so we set them both here
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"
# Use larger workspace size for PyTorch 2.10+ to avoid CUBLAS_STATUS_NOT_INITIALIZED errors
# (catches 2.11 dev versions which report as >= 2.10)
if is_torch_version(">=", "2.10"):
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
else:
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"
torch.use_deterministic_algorithms(True)
# Enable CUDNN deterministic mode