Compare commits

..

3 Commits

Author SHA1 Message Date
DN6
c167fe335e update 2026-02-20 19:37:38 +05:30
DN6
e340b52a92 update 2026-02-20 19:20:24 +05:30
DN6
71ce634d1e update 2026-02-20 14:04:58 +05:30
104 changed files with 519 additions and 978 deletions

View File

@@ -92,6 +92,7 @@ jobs:
runner: aws-general-8-plus
image: diffusers/diffusers-pytorch-cpu
report: torch_example_cpu
name: ${{ matrix.config.name }}
runs-on:
@@ -114,7 +115,8 @@ jobs:
- name: Install dependencies
run: |
uv pip install -e ".[quality]"
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
- name: Environment
@@ -216,6 +218,8 @@ jobs:
run_lora_tests:
needs: [check_code_quality, check_repository_consistency]
strategy:
fail-fast: false
name: LoRA tests with PEFT main
@@ -243,8 +247,9 @@ jobs:
uv pip install -U peft@git+https://github.com/huggingface/peft.git --no-deps
uv pip install -U tokenizers
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment
run: |
python utils/print_env.py
@@ -270,6 +275,6 @@ jobs:
if: ${{ always() }}
uses: actions/upload-artifact@v6
with:
name: pr_lora_test_reports
name: pr_main_test_reports
path: reports

View File

@@ -131,7 +131,8 @@ jobs:
run: |
uv pip install -e ".[quality]"
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment
run: |
@@ -201,7 +202,8 @@ jobs:
uv pip install -e ".[quality]"
uv pip install peft@git+https://github.com/huggingface/peft.git
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment
run: |
@@ -262,7 +264,8 @@ jobs:
nvidia-smi
- name: Install dependencies
run: |
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
uv pip install -e ".[quality,training]"
- name: Environment

View File

@@ -76,7 +76,8 @@ jobs:
run: |
uv pip install -e ".[quality]"
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment
run: |
python utils/print_env.py
@@ -128,7 +129,8 @@ jobs:
uv pip install -e ".[quality]"
uv pip install peft@git+https://github.com/huggingface/peft.git
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment
run: |
@@ -180,7 +182,8 @@ jobs:
- name: Install dependencies
run: |
uv pip install -e ".[quality,training]"
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment
run: |
python utils/print_env.py

View File

@@ -17,9 +17,6 @@ import logging
import os
import sys
import tempfile
import unittest
from diffusers.utils import is_transformers_version
sys.path.append("..")
@@ -33,7 +30,6 @@ stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)
@unittest.skipIf(is_transformers_version(">=", "4.57.5"), "Size mismatch")
class CustomDiffusion(ExamplesTestsAccelerate):
def test_custom_diffusion(self):
with tempfile.TemporaryDirectory() as tmpdir:

View File

@@ -48,7 +48,6 @@ _GO_LC_SUPPORTED_PYTORCH_LAYERS = (
torch.nn.ConvTranspose2d,
torch.nn.ConvTranspose3d,
torch.nn.Linear,
torch.nn.Embedding,
# TODO(aryan): look into torch.nn.LayerNorm, torch.nn.GroupNorm later, seems to be causing some issues with CogVideoX
# because of double invocation of the same norm layer in CogVideoXLayerNorm
)

View File

@@ -5472,10 +5472,6 @@ class Flux2LoraLoaderMixin(LoraBaseMixin):
logger.warning(warn_msg)
state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k}
is_peft_format = any(k.startswith("base_model.model.") for k in state_dict)
if is_peft_format:
state_dict = {k.replace("base_model.model.", "diffusion_model."): v for k, v in state_dict.items()}
is_ai_toolkit = any(k.startswith("diffusion_model.") for k in state_dict)
if is_ai_toolkit:
state_dict = _convert_non_diffusers_flux2_lora_to_diffusers(state_dict)

View File

@@ -22,12 +22,7 @@ from tokenizers import Tokenizer as TokenizerFast
from torch import nn
from ..models.modeling_utils import load_state_dict
from ..utils import (
_get_model_file,
is_accelerate_available,
is_transformers_available,
logging,
)
from ..utils import _get_model_file, is_accelerate_available, is_transformers_available, logging
if is_transformers_available():

View File

@@ -30,126 +30,10 @@ class AutoModel(ConfigMixin):
def __init__(self, *args, **kwargs):
raise EnvironmentError(
f"{self.__class__.__name__} is designed to be instantiated "
f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)`, "
f"`{self.__class__.__name__}.from_config(config)`, or "
f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)` or "
f"`{self.__class__.__name__}.from_pipe(pipeline)` methods."
)
@classmethod
def from_config(cls, pretrained_model_name_or_path_or_dict: str | os.PathLike | dict | None = None, **kwargs):
r"""
Instantiate a model from a config dictionary or a pretrained model configuration file with random weights (no
pretrained weights are loaded).
Parameters:
pretrained_model_name_or_path_or_dict (`str`, `os.PathLike`, or `dict`):
Can be either:
- A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model
configuration hosted on the Hub.
- A path to a *directory* (for example `./my_model_directory`) containing a model configuration
file.
- A config dictionary.
cache_dir (`Union[str, os.PathLike]`, *optional*):
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
is not used.
force_download (`bool`, *optional*, defaults to `False`):
Whether or not to force the (re-)download of the model configuration, overriding the cached version if
it exists.
proxies (`Dict[str, str]`, *optional*):
A dictionary of proxy servers to use by protocol or endpoint.
local_files_only(`bool`, *optional*, defaults to `False`):
Whether to only load local model configuration files or not.
token (`str` or *bool*, *optional*):
The token to use as HTTP bearer authorization for remote files.
revision (`str`, *optional*, defaults to `"main"`):
The specific model version to use.
trust_remote_code (`bool`, *optional*, defaults to `False`):
Whether to trust remote code.
subfolder (`str`, *optional*, defaults to `""`):
The subfolder location of a model file within a larger model repository on the Hub or locally.
Returns:
A model object instantiated from the config with random weights.
Example:
```py
from diffusers import AutoModel
model = AutoModel.from_config("stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="unet")
```
"""
subfolder = kwargs.pop("subfolder", None)
trust_remote_code = kwargs.pop("trust_remote_code", False)
hub_kwargs_names = [
"cache_dir",
"force_download",
"local_files_only",
"proxies",
"revision",
"token",
]
hub_kwargs = {name: kwargs.pop(name, None) for name in hub_kwargs_names}
if pretrained_model_name_or_path_or_dict is None:
raise ValueError(
"Please provide a `pretrained_model_name_or_path_or_dict` as the first positional argument."
)
if isinstance(pretrained_model_name_or_path_or_dict, (str, os.PathLike)):
pretrained_model_name_or_path = pretrained_model_name_or_path_or_dict
config = cls.load_config(pretrained_model_name_or_path, subfolder=subfolder, **hub_kwargs)
else:
config = pretrained_model_name_or_path_or_dict
pretrained_model_name_or_path = config.get("_name_or_path", None)
has_remote_code = "auto_map" in config and cls.__name__ in config["auto_map"]
trust_remote_code = resolve_trust_remote_code(
trust_remote_code, pretrained_model_name_or_path, has_remote_code
)
if has_remote_code and trust_remote_code:
class_ref = config["auto_map"][cls.__name__]
module_file, class_name = class_ref.split(".")
module_file = module_file + ".py"
model_cls = get_class_from_dynamic_module(
pretrained_model_name_or_path,
subfolder=subfolder,
module_file=module_file,
class_name=class_name,
**hub_kwargs,
)
else:
if "_class_name" in config:
class_name = config["_class_name"]
library = "diffusers"
elif "model_type" in config:
class_name = "AutoModel"
library = "transformers"
else:
raise ValueError(
f"Couldn't find a model class associated with the config: {config}. Make sure the config "
"contains a `_class_name` or `model_type` key."
)
from ..pipelines.pipeline_loading_utils import ALL_IMPORTABLE_CLASSES, get_class_obj_and_candidates
model_cls, _ = get_class_obj_and_candidates(
library_name=library,
class_name=class_name,
importable_classes=ALL_IMPORTABLE_CLASSES,
pipelines=None,
is_pipeline_module=False,
)
if model_cls is None:
raise ValueError(f"AutoModel can't find a model linked to {class_name}.")
return model_cls.from_config(config, **kwargs)
@classmethod
@validate_hf_hub_args
def from_pretrained(cls, pretrained_model_or_path: str | os.PathLike | None = None, **kwargs):

View File

@@ -14,6 +14,7 @@
import importlib
import inspect
import os
import sys
import traceback
import warnings
from collections import OrderedDict
@@ -28,10 +29,16 @@ from tqdm.auto import tqdm
from typing_extensions import Self
from ..configuration_utils import ConfigMixin, FrozenDict
from ..pipelines.pipeline_loading_utils import _fetch_class_library_tuple, simple_get_class_obj
from ..pipelines.pipeline_loading_utils import (
LOADABLE_CLASSES,
_fetch_class_library_tuple,
_unwrap_model,
simple_get_class_obj,
)
from ..utils import PushToHubMixin, is_accelerate_available, logging
from ..utils.dynamic_modules_utils import get_class_from_dynamic_module, resolve_trust_remote_code
from ..utils.hub_utils import load_or_create_model_card, populate_model_card
from ..utils.torch_utils import is_compiled_module
from .components_manager import ComponentsManager
from .modular_pipeline_utils import (
MODULAR_MODEL_CARD_TEMPLATE,
@@ -1819,29 +1826,111 @@ class ModularPipeline(ConfigMixin, PushToHubMixin):
)
return pipeline
def save_pretrained(self, save_directory: str | os.PathLike, push_to_hub: bool = False, **kwargs):
def save_pretrained(
self,
save_directory: str | os.PathLike,
safe_serialization: bool = True,
variant: str | None = None,
max_shard_size: int | str | None = None,
push_to_hub: bool = False,
**kwargs,
):
"""
Save the pipeline to a directory. It does not save components, you need to save them separately.
Save the pipeline and all its components to a directory, so that it can be re-loaded using the
[`~ModularPipeline.from_pretrained`] class method.
Args:
save_directory (`str` or `os.PathLike`):
Path to the directory where the pipeline will be saved.
push_to_hub (`bool`, optional):
Whether to push the pipeline to the huggingface hub.
**kwargs: Additional arguments passed to `save_config()` method
Directory to save the pipeline to. Will be created if it doesn't exist.
safe_serialization (`bool`, *optional*, defaults to `True`):
Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
variant (`str`, *optional*):
If specified, weights are saved in the format `pytorch_model.<variant>.bin`.
max_shard_size (`int` or `str`, defaults to `None`):
The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size
lower than this size. If expressed as a string, needs to be digits followed by a unit (like `"5GB"`).
If expressed as an integer, the unit is bytes.
push_to_hub (`bool`, *optional*, defaults to `False`):
Whether to push the pipeline to the Hugging Face model hub after saving it.
**kwargs: Additional keyword arguments passed along to the push to hub method.
"""
overwrite_modular_index = kwargs.pop("overwrite_modular_index", False)
repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
for component_name, component_spec in self._component_specs.items():
sub_model = getattr(self, component_name, None)
if sub_model is None:
continue
model_cls = sub_model.__class__
if is_compiled_module(sub_model):
sub_model = _unwrap_model(sub_model)
model_cls = sub_model.__class__
save_method_name = None
for library_name, library_classes in LOADABLE_CLASSES.items():
if library_name in sys.modules:
library = importlib.import_module(library_name)
else:
logger.info(
f"{library_name} is not installed. Cannot save {component_name} as {library_classes} from {library_name}"
)
continue
for base_class, save_load_methods in library_classes.items():
class_candidate = getattr(library, base_class, None)
if class_candidate is not None and issubclass(model_cls, class_candidate):
save_method_name = save_load_methods[0]
break
if save_method_name is not None:
break
if save_method_name is None:
logger.warning(f"self.{component_name}={sub_model} of type {type(sub_model)} cannot be saved.")
continue
save_method = getattr(sub_model, save_method_name)
save_method_signature = inspect.signature(save_method)
save_method_accept_safe = "safe_serialization" in save_method_signature.parameters
save_method_accept_variant = "variant" in save_method_signature.parameters
save_method_accept_max_shard_size = "max_shard_size" in save_method_signature.parameters
save_kwargs = {}
if save_method_accept_safe:
save_kwargs["safe_serialization"] = safe_serialization
if save_method_accept_variant:
save_kwargs["variant"] = variant
if save_method_accept_max_shard_size and max_shard_size is not None:
save_kwargs["max_shard_size"] = max_shard_size
save_method(os.path.join(save_directory, component_name), **save_kwargs)
if push_to_hub:
commit_message = kwargs.pop("commit_message", None)
private = kwargs.pop("private", None)
create_pr = kwargs.pop("create_pr", False)
token = kwargs.pop("token", None)
repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
repo_id = create_repo(repo_id, exist_ok=True, private=private, token=token).repo_id
# Generate modular pipeline card content
card_content = generate_modular_model_card_content(self.blocks)
if overwrite_modular_index:
for component_name, component_spec in self._component_specs.items():
if component_spec.default_creation_method != "from_pretrained":
continue
if component_name not in self.config:
continue
# Create a new empty model card and eventually tag it
library, class_name, component_spec_dict = self.config[component_name]
component_spec_dict["pretrained_model_name_or_path"] = repo_id
component_spec_dict["subfolder"] = component_name
if variant is not None and "variant" in component_spec_dict:
component_spec_dict["variant"] = variant
self.register_to_config(**{component_name: (library, class_name, component_spec_dict)})
self.save_config(save_directory=save_directory)
if push_to_hub:
card_content = generate_modular_model_card_content(self.blocks)
model_card = load_or_create_model_card(
repo_id,
token=token,
@@ -1850,13 +1939,8 @@ class ModularPipeline(ConfigMixin, PushToHubMixin):
is_modular=True,
)
model_card = populate_model_card(model_card, tags=card_content["tags"])
model_card.save(os.path.join(save_directory, "README.md"))
# YiYi TODO: maybe order the json file to make it more readable: configs first, then components
self.save_config(save_directory=save_directory)
if push_to_hub:
self._upload_folder(
save_directory,
repo_id,

View File

@@ -502,10 +502,6 @@ class AudioLDM2Pipeline(DiffusionPipeline):
text_input_ids,
attention_mask=attention_mask,
)
# Extract the pooler output if it's a BaseModelOutputWithPooling (Transformers v5+)
# otherwise use it directly (Transformers v4)
if hasattr(prompt_embeds, "pooler_output"):
prompt_embeds = prompt_embeds.pooler_output
# append the seq-len dim: (bs, hidden_size) -> (bs, seq_len, hidden_size)
prompt_embeds = prompt_embeds[:, None, :]
# make sure that we attend to this single hidden-state
@@ -614,10 +610,6 @@ class AudioLDM2Pipeline(DiffusionPipeline):
uncond_input_ids,
attention_mask=negative_attention_mask,
)
# Extract the pooler output if it's a BaseModelOutputWithPooling (Transformers v5+)
# otherwise use it directly (Transformers v4)
if hasattr(negative_prompt_embeds, "pooler_output"):
negative_prompt_embeds = negative_prompt_embeds.pooler_output
# append the seq-len dim: (bs, hidden_size) -> (bs, seq_len, hidden_size)
negative_prompt_embeds = negative_prompt_embeds[:, None, :]
# make sure that we attend to this single hidden-state

View File

@@ -287,9 +287,6 @@ class Cosmos2_5_PredictBasePipeline(DiffusionPipeline):
truncation=True,
padding="max_length",
)
input_ids = (
input_ids["input_ids"] if not isinstance(input_ids, list) and "input_ids" in input_ids else input_ids
)
input_ids = torch.LongTensor(input_ids)
input_ids_batch.append(input_ids)

View File

@@ -262,9 +262,6 @@ class Cosmos2_5_TransferPipeline(DiffusionPipeline):
truncation=True,
padding="max_length",
)
input_ids = (
input_ids["input_ids"] if not isinstance(input_ids, list) and "input_ids" in input_ids else input_ids
)
input_ids = torch.LongTensor(input_ids)
input_ids_batch.append(input_ids)

View File

@@ -20,8 +20,6 @@ class MultilingualCLIP(PreTrainedModel):
self.LinearTransformation = torch.nn.Linear(
in_features=config.transformerDimensions, out_features=config.numDims
)
if hasattr(self, "post_init"):
self.post_init()
def forward(self, input_ids, attention_mask):
embs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)[0]

View File

@@ -781,9 +781,6 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
self.prefix_encoder = PrefixEncoder(config)
self.dropout = torch.nn.Dropout(0.1)
if hasattr(self, "post_init"):
self.post_init()
def get_input_embeddings(self):
return self.embedding.word_embeddings
@@ -813,7 +810,7 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
use_cache = use_cache if use_cache is not None else getattr(self.config, "use_cache", None)
use_cache = use_cache if use_cache is not None else self.config.use_cache
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
batch_size, seq_length = input_ids.shape

View File

@@ -341,7 +341,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
save_method_accept_safe = "safe_serialization" in save_method_signature.parameters
save_method_accept_variant = "variant" in save_method_signature.parameters
save_method_accept_max_shard_size = "max_shard_size" in save_method_signature.parameters
save_method_accept_peft_format = "save_peft_format" in save_method_signature.parameters
save_kwargs = {}
if save_method_accept_safe:
@@ -351,11 +350,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
if save_method_accept_max_shard_size and max_shard_size is not None:
# max_shard_size is expected to not be None in ModelMixin
save_kwargs["max_shard_size"] = max_shard_size
if save_method_accept_peft_format:
# Set save_peft_format=False for transformers>=5.0.0 compatibility
# In transformers 5.0.0+, the default save_peft_format=True adds "base_model.model" prefix
# to adapter keys, but from_pretrained expects keys without this prefix
save_kwargs["save_peft_format"] = False
save_method(os.path.join(save_directory, pipeline_component_name), **save_kwargs)

View File

@@ -24,25 +24,14 @@ except OptionalDependencyNotAvailable:
else:
_import_structure["pipeline_prx"] = ["PRXPipeline"]
# Wrap T5GemmaEncoder to pass config.encoder (T5GemmaModuleConfig) instead of the
# composite T5GemmaConfig, which lacks flat attributes expected by T5GemmaEncoder.__init__.
# Import T5GemmaEncoder for pipeline loading compatibility
try:
if is_transformers_available():
import transformers
from transformers.models.t5gemma.modeling_t5gemma import T5GemmaEncoder as _T5GemmaEncoder
class T5GemmaEncoder(_T5GemmaEncoder):
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
if "config" not in kwargs:
from transformers.models.t5gemma.configuration_t5gemma import T5GemmaConfig
config = T5GemmaConfig.from_pretrained(pretrained_model_name_or_path)
if hasattr(config, "encoder"):
kwargs["config"] = config.encoder
return super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
from transformers.models.t5gemma.modeling_t5gemma import T5GemmaEncoder
_additional_imports["T5GemmaEncoder"] = T5GemmaEncoder
# Patch transformers module directly for serialization
if not hasattr(transformers, "T5GemmaEncoder"):
transformers.T5GemmaEncoder = T5GemmaEncoder
except ImportError:

View File

@@ -17,7 +17,7 @@ from typing import Any, Callable
import regex as re
import torch
from transformers import AutoTokenizer, T5EncoderModel, UMT5EncoderModel
from transformers import AutoTokenizer, UMT5EncoderModel
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
from ...loaders import SkyReelsV2LoraLoaderMixin
@@ -132,7 +132,7 @@ class SkyReelsV2Pipeline(DiffusionPipeline, SkyReelsV2LoraLoaderMixin):
def __init__(
self,
tokenizer: AutoTokenizer,
text_encoder: T5EncoderModel | UMT5EncoderModel,
text_encoder: UMT5EncoderModel,
transformer: SkyReelsV2Transformer3DModel,
vae: AutoencoderKLWan,
scheduler: UniPCMultistepScheduler,

View File

@@ -19,7 +19,7 @@ from copy import deepcopy
from typing import Any, Callable
import torch
from transformers import AutoTokenizer, T5EncoderModel, UMT5EncoderModel
from transformers import AutoTokenizer, UMT5EncoderModel
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
from ...loaders import SkyReelsV2LoraLoaderMixin
@@ -153,7 +153,7 @@ class SkyReelsV2DiffusionForcingPipeline(DiffusionPipeline, SkyReelsV2LoraLoader
def __init__(
self,
tokenizer: AutoTokenizer,
text_encoder: T5EncoderModel | UMT5EncoderModel,
text_encoder: UMT5EncoderModel,
transformer: SkyReelsV2Transformer3DModel,
vae: AutoencoderKLWan,
scheduler: UniPCMultistepScheduler,

View File

@@ -20,7 +20,7 @@ from typing import Any, Callable
import PIL
import torch
from transformers import AutoTokenizer, T5EncoderModel, UMT5EncoderModel
from transformers import AutoTokenizer, UMT5EncoderModel
from diffusers.image_processor import PipelineImageInput
from diffusers.utils.torch_utils import randn_tensor
@@ -158,7 +158,7 @@ class SkyReelsV2DiffusionForcingImageToVideoPipeline(DiffusionPipeline, SkyReels
def __init__(
self,
tokenizer: AutoTokenizer,
text_encoder: T5EncoderModel | UMT5EncoderModel,
text_encoder: UMT5EncoderModel,
transformer: SkyReelsV2Transformer3DModel,
vae: AutoencoderKLWan,
scheduler: UniPCMultistepScheduler,

View File

@@ -21,7 +21,7 @@ from typing import Any, Callable
import torch
from PIL import Image
from transformers import AutoTokenizer, T5EncoderModel, UMT5EncoderModel
from transformers import AutoTokenizer, UMT5EncoderModel
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
from ...loaders import SkyReelsV2LoraLoaderMixin
@@ -214,7 +214,7 @@ class SkyReelsV2DiffusionForcingVideoToVideoPipeline(DiffusionPipeline, SkyReels
def __init__(
self,
tokenizer: AutoTokenizer,
text_encoder: T5EncoderModel | UMT5EncoderModel,
text_encoder: UMT5EncoderModel,
transformer: SkyReelsV2Transformer3DModel,
vae: AutoencoderKLWan,
scheduler: UniPCMultistepScheduler,

View File

@@ -18,7 +18,7 @@ from typing import Any, Callable
import PIL
import regex as re
import torch
from transformers import AutoTokenizer, CLIPProcessor, CLIPVisionModelWithProjection, T5EncoderModel, UMT5EncoderModel
from transformers import AutoTokenizer, CLIPProcessor, CLIPVisionModelWithProjection, UMT5EncoderModel
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
from ...image_processor import PipelineImageInput
@@ -157,7 +157,7 @@ class SkyReelsV2ImageToVideoPipeline(DiffusionPipeline, SkyReelsV2LoraLoaderMixi
def __init__(
self,
tokenizer: AutoTokenizer,
text_encoder: T5EncoderModel | UMT5EncoderModel,
text_encoder: UMT5EncoderModel,
image_encoder: CLIPVisionModelWithProjection,
image_processor: CLIPProcessor,
transformer: SkyReelsV2Transformer3DModel,

View File

@@ -112,8 +112,6 @@ def _load_transformers_model_from_dduf(
tensors = safetensors.torch.load(mmap)
# Update the state dictionary with tensors
state_dict.update(tensors)
# `from_pretrained` sets the model to eval mode by default, which is the
# correct behavior for inference. Do not call `model.train()` here.
return cls.from_pretrained(
pretrained_model_name_or_path=None,
config=config,

View File

@@ -1,5 +1,5 @@
import unittest
from unittest.mock import MagicMock, patch
from unittest.mock import patch
from transformers import CLIPTextModel, LongformerModel
@@ -20,9 +20,7 @@ class TestAutoModel(unittest.TestCase):
side_effect=[EnvironmentError("File not found"), {"model_type": "clip_text_model"}],
)
def test_load_from_config_transformers_with_subfolder(self, mock_load_config):
model = AutoModel.from_pretrained(
"hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder", use_safetensors=False
)
model = AutoModel.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder")
assert isinstance(model, CLIPTextModel)
def test_load_from_config_without_subfolder(self):
@@ -30,73 +28,5 @@ class TestAutoModel(unittest.TestCase):
assert isinstance(model, LongformerModel)
def test_load_from_model_index(self):
model = AutoModel.from_pretrained(
"hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder", use_safetensors=False
)
model = AutoModel.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder")
assert isinstance(model, CLIPTextModel)
class TestAutoModelFromConfig(unittest.TestCase):
@patch(
"diffusers.pipelines.pipeline_loading_utils.get_class_obj_and_candidates",
return_value=(MagicMock(), None),
)
def test_from_config_with_dict_diffusers_class(self, mock_get_class):
config = {"_class_name": "UNet2DConditionModel", "sample_size": 64}
mock_model = MagicMock()
mock_get_class.return_value[0].from_config.return_value = mock_model
result = AutoModel.from_config(config)
mock_get_class.assert_called_once_with(
library_name="diffusers",
class_name="UNet2DConditionModel",
importable_classes=unittest.mock.ANY,
pipelines=None,
is_pipeline_module=False,
)
mock_get_class.return_value[0].from_config.assert_called_once_with(config)
assert result is mock_model
@patch(
"diffusers.pipelines.pipeline_loading_utils.get_class_obj_and_candidates",
return_value=(MagicMock(), None),
)
@patch("diffusers.models.AutoModel.load_config", return_value={"_class_name": "UNet2DConditionModel"})
def test_from_config_with_string_path(self, mock_load_config, mock_get_class):
mock_model = MagicMock()
mock_get_class.return_value[0].from_config.return_value = mock_model
result = AutoModel.from_config("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="unet")
mock_load_config.assert_called_once()
assert result is mock_model
def test_from_config_raises_on_missing_class_info(self):
config = {"some_key": "some_value"}
with self.assertRaises(ValueError, msg="Couldn't find a model class"):
AutoModel.from_config(config)
@patch(
"diffusers.pipelines.pipeline_loading_utils.get_class_obj_and_candidates",
return_value=(MagicMock(), None),
)
def test_from_config_with_model_type_routes_to_transformers(self, mock_get_class):
config = {"model_type": "clip_text_model"}
mock_model = MagicMock()
mock_get_class.return_value[0].from_config.return_value = mock_model
result = AutoModel.from_config(config)
mock_get_class.assert_called_once_with(
library_name="transformers",
class_name="AutoModel",
importable_classes=unittest.mock.ANY,
pipelines=None,
is_pipeline_module=False,
)
assert result is mock_model
def test_from_config_raises_on_none(self):
with self.assertRaises(ValueError, msg="Please provide a `pretrained_model_name_or_path_or_dict`"):
AutoModel.from_config(None)

View File

@@ -1,4 +1,4 @@
from .attention import AttentionBackendTesterMixin, AttentionTesterMixin
from .attention import AttentionTesterMixin
from .cache import (
CacheTesterMixin,
FasterCacheConfigMixin,
@@ -38,7 +38,6 @@ from .training import TrainingTesterMixin
__all__ = [
"AttentionBackendTesterMixin",
"AttentionTesterMixin",
"BaseModelTesterConfig",
"BitsAndBytesCompileTesterMixin",

View File

@@ -14,105 +14,22 @@
# limitations under the License.
import gc
import logging
import pytest
import torch
from diffusers.models.attention import AttentionModuleMixin
from diffusers.models.attention_dispatch import AttentionBackendName, _AttentionBackendRegistry, attention_backend
from diffusers.models.attention_processor import AttnProcessor
from diffusers.utils import is_kernels_available, is_torch_version
from ...testing_utils import assert_tensors_close, backend_empty_cache, is_attention, torch_device
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Module-level backend parameter sets for AttentionBackendTesterMixin
# ---------------------------------------------------------------------------
_CUDA_AVAILABLE = torch.cuda.is_available()
_KERNELS_AVAILABLE = is_kernels_available()
_PARAM_NATIVE = pytest.param(AttentionBackendName.NATIVE, id="native")
_PARAM_NATIVE_CUDNN = pytest.param(
AttentionBackendName._NATIVE_CUDNN,
id="native_cudnn",
marks=pytest.mark.skipif(
not _CUDA_AVAILABLE,
reason="CUDA is required for _native_cudnn backend.",
),
from diffusers.models.attention_processor import (
AttnProcessor,
)
_PARAM_FLASH_HUB = pytest.param(
AttentionBackendName.FLASH_HUB,
id="flash_hub",
marks=[
pytest.mark.skipif(not _CUDA_AVAILABLE, reason="CUDA is required for flash_hub backend."),
pytest.mark.skipif(
not _KERNELS_AVAILABLE,
reason="`kernels` package is required for flash_hub backend. Install with `pip install kernels`.",
),
],
from ...testing_utils import (
assert_tensors_close,
backend_empty_cache,
is_attention,
torch_device,
)
_PARAM_FLASH_3_HUB = pytest.param(
AttentionBackendName._FLASH_3_HUB,
id="flash_3_hub",
marks=[
pytest.mark.skipif(not _CUDA_AVAILABLE, reason="CUDA is required for _flash_3_hub backend."),
pytest.mark.skipif(
not _KERNELS_AVAILABLE,
reason="`kernels` package is required for _flash_3_hub backend. Install with `pip install kernels`.",
),
],
)
# All backends under test.
_ALL_BACKEND_PARAMS = [_PARAM_NATIVE, _PARAM_NATIVE_CUDNN, _PARAM_FLASH_HUB, _PARAM_FLASH_3_HUB]
# Backends that only accept bf16/fp16 inputs; models and inputs must be cast before running them.
_BF16_REQUIRED_BACKENDS = {
AttentionBackendName._NATIVE_CUDNN,
AttentionBackendName.FLASH_HUB,
AttentionBackendName._FLASH_3_HUB,
}
# Backends that perform non-deterministic operations and therefore cannot run when
# torch.use_deterministic_algorithms(True) is active (e.g. after enable_full_determinism()).
_NON_DETERMINISTIC_BACKENDS = {AttentionBackendName._NATIVE_CUDNN}
def _maybe_cast_to_bf16(backend, model, inputs_dict):
"""Cast model and floating-point inputs to bfloat16 when the backend requires it."""
if backend not in _BF16_REQUIRED_BACKENDS:
return model, inputs_dict
model = model.to(dtype=torch.bfloat16)
inputs_dict = {
k: v.to(dtype=torch.bfloat16) if isinstance(v, torch.Tensor) and v.is_floating_point() else v
for k, v in inputs_dict.items()
}
return model, inputs_dict
def _skip_if_backend_requires_nondeterminism(backend):
"""Skip at runtime when torch.use_deterministic_algorithms(True) blocks the backend.
This check is intentionally deferred to test execution time because
enable_full_determinism() is typically called at module level in test files *after*
the module-level pytest.param() objects in this file have already been evaluated,
making it impossible to catch via a collection-time skipif condition.
"""
if backend in _NON_DETERMINISTIC_BACKENDS and torch.are_deterministic_algorithms_enabled():
pytest.skip(
f"Backend '{backend.value}' performs non-deterministic operations and cannot run "
f"while `torch.use_deterministic_algorithms(True)` is active."
)
@is_attention
class AttentionTesterMixin:
@@ -122,6 +39,7 @@ class AttentionTesterMixin:
Tests functionality from AttentionModuleMixin including:
- Attention processor management (set/get)
- QKV projection fusion/unfusion
- Attention backends (XFormers, NPU, etc.)
Expected from config mixin:
- model_class: The model class to test
@@ -261,208 +179,3 @@ class AttentionTesterMixin:
model.set_attn_processor(wrong_processors)
assert "number of processors" in str(exc_info.value).lower(), "Error should mention processor count mismatch"
@is_attention
class AttentionBackendTesterMixin:
"""
Mixin class for testing attention backends on models. Following things are tested:
1. Backends can be set with the `attention_backend` context manager and with
`set_attention_backend()` method.
2. SDPA outputs don't deviate too much from backend outputs.
3. Backend works with (regional) compilation.
4. Backends can be restored.
Tests the backends using the model provided by the host test class. The backends to test
are defined in `_ALL_BACKEND_PARAMS`.
Expected from the host test class:
- model_class: The model class to instantiate.
Expected methods from the host test class:
- get_init_dict(): Returns dict of kwargs to construct the model.
- get_dummy_inputs(): Returns dict of inputs for the model's forward pass.
Pytest mark: attention
Use `pytest -m "not attention"` to skip these tests.
"""
# -----------------------------------------------------------------------
# Tolerance attributes — override in host class to loosen/tighten checks.
# -----------------------------------------------------------------------
# test_output_close_to_native: alternate backends (flash, cuDNN) may
# accumulate small numerical errors vs the reference PyTorch SDPA kernel.
backend_vs_native_atol: float = 1e-2
backend_vs_native_rtol: float = 1e-2
# test_compile: regional compilation introduces the same kind of numerical
# error as the non-compiled backend path, so the same loose tolerance applies.
compile_vs_native_atol: float = 1e-2
compile_vs_native_rtol: float = 1e-2
def setup_method(self):
gc.collect()
backend_empty_cache(torch_device)
def teardown_method(self):
gc.collect()
backend_empty_cache(torch_device)
@torch.no_grad()
@pytest.mark.parametrize("backend", _ALL_BACKEND_PARAMS)
def test_set_attention_backend_matches_context_manager(self, backend):
"""set_attention_backend() and the attention_backend() context manager must yield identical outputs."""
_skip_if_backend_requires_nondeterminism(backend)
init_dict = self.get_init_dict()
inputs_dict = self.get_dummy_inputs()
model = self.model_class(**init_dict)
model.to(torch_device)
model.eval()
model, inputs_dict = _maybe_cast_to_bf16(backend, model, inputs_dict)
with attention_backend(backend):
ctx_output = model(**inputs_dict, return_dict=False)[0]
initial_registry_backend, _ = _AttentionBackendRegistry.get_active_backend()
try:
model.set_attention_backend(backend.value)
except Exception as e:
logger.warning("Skipping test for backend '%s': %s", backend.value, e)
pytest.skip(str(e))
try:
set_output = model(**inputs_dict, return_dict=False)[0]
finally:
model.reset_attention_backend()
_AttentionBackendRegistry.set_active_backend(initial_registry_backend)
assert_tensors_close(
set_output,
ctx_output,
atol=0,
rtol=0,
msg=(
f"Output from model.set_attention_backend('{backend.value}') should be identical "
f"to the output from `with attention_backend('{backend.value}'):`."
),
)
@torch.no_grad()
@pytest.mark.parametrize("backend", _ALL_BACKEND_PARAMS)
def test_output_close_to_native(self, backend):
"""All backends should produce model output numerically close to the native SDPA reference."""
_skip_if_backend_requires_nondeterminism(backend)
init_dict = self.get_init_dict()
inputs_dict = self.get_dummy_inputs()
model = self.model_class(**init_dict)
model.to(torch_device)
model.eval()
model, inputs_dict = _maybe_cast_to_bf16(backend, model, inputs_dict)
with attention_backend(AttentionBackendName.NATIVE):
native_output = model(**inputs_dict, return_dict=False)[0]
initial_registry_backend, _ = _AttentionBackendRegistry.get_active_backend()
try:
model.set_attention_backend(backend.value)
except Exception as e:
logger.warning("Skipping test for backend '%s': %s", backend.value, e)
pytest.skip(str(e))
try:
backend_output = model(**inputs_dict, return_dict=False)[0]
finally:
model.reset_attention_backend()
_AttentionBackendRegistry.set_active_backend(initial_registry_backend)
assert_tensors_close(
backend_output,
native_output,
atol=self.backend_vs_native_atol,
rtol=self.backend_vs_native_rtol,
msg=f"Output from {backend} should be numerically close to native SDPA.",
)
@pytest.mark.parametrize("backend", _ALL_BACKEND_PARAMS)
def test_context_manager_switches_and_restores_backend(self, backend):
"""attention_backend() should activate the requested backend and restore the previous one on exit."""
initial_backend, _ = _AttentionBackendRegistry.get_active_backend()
with attention_backend(backend):
active_backend, _ = _AttentionBackendRegistry.get_active_backend()
assert active_backend == backend, (
f"Backend should be {backend} inside the context manager, got {active_backend}."
)
restored_backend, _ = _AttentionBackendRegistry.get_active_backend()
assert restored_backend == initial_backend, (
f"Backend should be restored to {initial_backend} after exiting the context manager, "
f"got {restored_backend}."
)
@pytest.mark.parametrize("backend", _ALL_BACKEND_PARAMS)
def test_compile(self, backend):
"""
`torch.compile` tests checking for recompilation, graph breaks, forward can run, etc.
For speed, we use regional compilation here (`model.compile_repeated_blocks()`
as opposed to `model.compile`).
"""
_skip_if_backend_requires_nondeterminism(backend)
if getattr(self.model_class, "_repeated_blocks", None) is None:
pytest.skip("Skipping tests as regional compilation is not supported.")
if backend == AttentionBackendName.NATIVE and not is_torch_version(">=", "2.9.0"):
pytest.xfail(
"test_compile with the native backend requires torch >= 2.9.0 for stable "
"fullgraph compilation with error_on_recompile=True."
)
init_dict = self.get_init_dict()
inputs_dict = self.get_dummy_inputs()
model = self.model_class(**init_dict)
model.to(torch_device)
model.eval()
model, inputs_dict = _maybe_cast_to_bf16(backend, model, inputs_dict)
with torch.no_grad(), attention_backend(AttentionBackendName.NATIVE):
native_output = model(**inputs_dict, return_dict=False)[0]
initial_registry_backend, _ = _AttentionBackendRegistry.get_active_backend()
try:
model.set_attention_backend(backend.value)
except Exception as e:
logger.warning("Skipping test for backend '%s': %s", backend.value, e)
pytest.skip(str(e))
try:
model.compile_repeated_blocks(fullgraph=True)
torch.compiler.reset()
with (
torch._inductor.utils.fresh_inductor_cache(),
torch._dynamo.config.patch(error_on_recompile=True),
):
with torch.no_grad():
compile_output = model(**inputs_dict, return_dict=False)[0]
model(**inputs_dict, return_dict=False)
finally:
model.reset_attention_backend()
_AttentionBackendRegistry.set_active_backend(initial_registry_backend)
assert_tensors_close(
compile_output,
native_output,
atol=self.compile_vs_native_atol,
rtol=self.compile_vs_native_rtol,
msg=f"Compiled output with backend '{backend.value}' should be numerically close to eager native SDPA.",
)

View File

@@ -25,7 +25,6 @@ from diffusers.utils.torch_utils import randn_tensor
from ...testing_utils import enable_full_determinism, torch_device
from ..testing_utils import (
AttentionBackendTesterMixin,
AttentionTesterMixin,
BaseModelTesterConfig,
BitsAndBytesCompileTesterMixin,
@@ -225,10 +224,6 @@ class TestFluxTransformerAttention(FluxTransformerTesterConfig, AttentionTesterM
"""Attention processor tests for Flux Transformer."""
class TestFluxTransformerAttentionBackend(FluxTransformerTesterConfig, AttentionBackendTesterMixin):
"""Attention backend tests for Flux Transformer."""
class TestFluxTransformerContextParallel(FluxTransformerTesterConfig, ContextParallelTesterMixin):
"""Context Parallel inference tests for Flux Transformer"""

View File

@@ -0,0 +1,163 @@
"""
This test suite exists for the maintainers currently. It's not run in our CI at the moment.
Once attention backends become more mature, we can consider including this in our CI.
To run this test suite:
```bash
export RUN_ATTENTION_BACKEND_TESTS=yes
pytest tests/others/test_attention_backends.py
```
Tests were conducted on an H100 with PyTorch 2.8.0 (CUDA 12.9). Slices for the compilation tests in
"native" variants were obtained with a torch nightly version (2.10.0.dev20250924+cu128).
Tests for aiter backend were conducted and slices for the aiter backend tests collected on a MI355X
with torch 2025-09-25 nightly version (ad2f7315ca66b42497047bb7951f696b50f1e81b) and
aiter 0.1.5.post4.dev20+ga25e55e79.
"""
import os
import pytest
import torch
pytestmark = pytest.mark.skipif(
os.getenv("RUN_ATTENTION_BACKEND_TESTS", "false") == "false", reason="Feature not mature enough."
)
from diffusers import FluxPipeline # noqa: E402
from diffusers.utils import is_torch_version # noqa: E402
# fmt: off
FORWARD_CASES = [
(
"flash_hub",
torch.tensor([0.0820, 0.0859, 0.0918, 0.1016, 0.0957, 0.0996, 0.0996, 0.1016, 0.2188, 0.2266, 0.2363, 0.2500, 0.2539, 0.2461, 0.2422, 0.2695], dtype=torch.bfloat16)
),
(
"_flash_3_hub",
torch.tensor([0.0820, 0.0859, 0.0938, 0.1016, 0.0977, 0.0996, 0.1016, 0.1016, 0.2188, 0.2246, 0.2344, 0.2480, 0.2539, 0.2480, 0.2441, 0.2715], dtype=torch.bfloat16),
),
(
"native",
torch.tensor([0.0820, 0.0859, 0.0938, 0.1016, 0.0957, 0.0996, 0.0996, 0.1016, 0.2188, 0.2266, 0.2363, 0.2500, 0.2539, 0.2480, 0.2461, 0.2734], dtype=torch.bfloat16)
),
(
"_native_cudnn",
torch.tensor([0.0781, 0.0840, 0.0879, 0.0957, 0.0898, 0.0957, 0.0957, 0.0977, 0.2168, 0.2246, 0.2324, 0.2500, 0.2539, 0.2480, 0.2441, 0.2695], dtype=torch.bfloat16),
),
(
"aiter",
torch.tensor([0.0781, 0.0820, 0.0879, 0.0957, 0.0898, 0.0938, 0.0957, 0.0957, 0.2285, 0.2363, 0.2461, 0.2637, 0.2695, 0.2617, 0.2617, 0.2891], dtype=torch.bfloat16),
)
]
COMPILE_CASES = [
(
"flash_hub",
torch.tensor([0.0410, 0.0410, 0.0449, 0.0508, 0.0488, 0.0586, 0.0605, 0.0586, 0.2324, 0.2422, 0.2539, 0.2734, 0.2832, 0.2812, 0.2773, 0.3047], dtype=torch.bfloat16),
True
),
(
"_flash_3_hub",
torch.tensor([0.0410, 0.0410, 0.0449, 0.0508, 0.0508, 0.0605, 0.0625, 0.0605, 0.2344, 0.2461, 0.2578, 0.2734, 0.2852, 0.2812, 0.2773, 0.3047], dtype=torch.bfloat16),
True,
),
(
"native",
torch.tensor([0.0410, 0.0410, 0.0449, 0.0508, 0.0508, 0.0605, 0.0605, 0.0605, 0.2344, 0.2461, 0.2578, 0.2773, 0.2871, 0.2832, 0.2773, 0.3066], dtype=torch.bfloat16),
True,
),
(
"_native_cudnn",
torch.tensor([0.0410, 0.0410, 0.0430, 0.0508, 0.0488, 0.0586, 0.0605, 0.0586, 0.2344, 0.2461, 0.2578, 0.2773, 0.2871, 0.2832, 0.2793, 0.3086], dtype=torch.bfloat16),
True,
),
(
"aiter",
torch.tensor([0.0391, 0.0391, 0.0430, 0.0488, 0.0469, 0.0566, 0.0586, 0.0566, 0.2402, 0.2539, 0.2637, 0.2812, 0.2930, 0.2910, 0.2891, 0.3164], dtype=torch.bfloat16),
True,
)
]
# fmt: on
INFER_KW = {
"prompt": "dance doggo dance",
"height": 256,
"width": 256,
"num_inference_steps": 2,
"guidance_scale": 3.5,
"max_sequence_length": 128,
"output_type": "pt",
}
def _backend_is_probably_supported(pipe, name: str):
try:
pipe.transformer.set_attention_backend(name)
return pipe, True
except Exception:
return False
def _check_if_slices_match(output, expected_slice):
img = output.images.detach().cpu()
generated_slice = img.flatten()
generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
assert torch.allclose(generated_slice, expected_slice, atol=1e-4)
@pytest.fixture(scope="session")
def device():
if not torch.cuda.is_available():
pytest.skip("CUDA is required for these tests.")
return torch.device("cuda:0")
@pytest.fixture(scope="session")
def pipe(device):
repo_id = "black-forest-labs/FLUX.1-dev"
pipe = FluxPipeline.from_pretrained(repo_id, torch_dtype=torch.bfloat16).to(device)
pipe.set_progress_bar_config(disable=True)
return pipe
@pytest.mark.parametrize("backend_name,expected_slice", FORWARD_CASES, ids=[c[0] for c in FORWARD_CASES])
def test_forward(pipe, backend_name, expected_slice):
out = _backend_is_probably_supported(pipe, backend_name)
if isinstance(out, bool):
pytest.xfail(f"Backend '{backend_name}' not supported in this environment.")
modified_pipe = out[0]
out = modified_pipe(**INFER_KW, generator=torch.manual_seed(0))
_check_if_slices_match(out, expected_slice)
@pytest.mark.parametrize(
"backend_name,expected_slice,error_on_recompile",
COMPILE_CASES,
ids=[c[0] for c in COMPILE_CASES],
)
def test_forward_with_compile(pipe, backend_name, expected_slice, error_on_recompile):
if "native" in backend_name and error_on_recompile and not is_torch_version(">=", "2.9.0"):
pytest.xfail(f"Test with {backend_name=} is compatible with a higher version of torch.")
out = _backend_is_probably_supported(pipe, backend_name)
if isinstance(out, bool):
pytest.xfail(f"Backend '{backend_name}' not supported in this environment.")
modified_pipe = out[0]
modified_pipe.transformer.compile(fullgraph=True)
torch.compiler.reset()
with (
torch._inductor.utils.fresh_inductor_cache(),
torch._dynamo.config.patch(error_on_recompile=error_on_recompile),
):
out = modified_pipe(**INFER_KW, generator=torch.manual_seed(0))
_check_if_slices_match(out, expected_slice)

View File

@@ -282,8 +282,6 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_inputs = text_inputs["input_ids"].to(torch_device)
clap_prompt_embeds = audioldm_pipe.text_encoder.get_text_features(text_inputs)
if hasattr(clap_prompt_embeds, "pooler_output"):
clap_prompt_embeds = clap_prompt_embeds.pooler_output
clap_prompt_embeds = clap_prompt_embeds[:, None, :]
text_inputs = audioldm_pipe.tokenizer_2(
@@ -343,8 +341,6 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_inputs = text_inputs["input_ids"].to(torch_device)
clap_prompt_embeds = audioldm_pipe.text_encoder.get_text_features(text_inputs)
if hasattr(clap_prompt_embeds, "pooler_output"):
clap_prompt_embeds = clap_prompt_embeds.pooler_output
clap_prompt_embeds = clap_prompt_embeds[:, None, :]
text_inputs = audioldm_pipe.tokenizer_2(

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from huggingface_hub import hf_hub_download
from transformers import AutoConfig, T5EncoderModel, T5TokenizerFast
from transformers import T5EncoderModel, T5TokenizerFast
from diffusers import (
AutoencoderKL,
@@ -89,8 +89,7 @@ class BriaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
scheduler = FlowMatchEulerDiscreteScheduler()
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = T5TokenizerFast.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -2,7 +2,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, ChromaPipeline, ChromaTransformer2DModel, FlowMatchEulerDiscreteScheduler
@@ -41,8 +41,7 @@ class ChromaPipelineFastTests(
)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, ChromaImg2ImgPipeline, ChromaTransformer2DModel, FlowMatchEulerDiscreteScheduler
@@ -42,8 +42,7 @@ class ChromaImg2ImgPipelineFastTests(
)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -17,7 +17,6 @@ import unittest
import torch
from PIL import Image
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPImageProcessor,
CLIPVisionConfig,
@@ -72,8 +71,7 @@ class ChronoEditPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
# TODO: impl FlowDPMSolverMultistepScheduler
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLCogVideoX, CogVideoXPipeline, CogVideoXTransformer3DModel, DDIMScheduler
@@ -117,8 +117,7 @@ class CogVideoXPipelineFastTests(
torch.manual_seed(0)
scheduler = DDIMScheduler()
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {
@@ -236,9 +235,6 @@ class CogVideoXPipelineFastTests(
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLCogVideoX, CogVideoXFunControlPipeline, CogVideoXTransformer3DModel, DDIMScheduler
@@ -104,8 +104,7 @@ class CogVideoXFunControlPipelineFastTests(PipelineTesterMixin, unittest.TestCas
torch.manual_seed(0)
scheduler = DDIMScheduler()
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {
@@ -229,9 +228,6 @@ class CogVideoXFunControlPipelineFastTests(PipelineTesterMixin, unittest.TestCas
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLCogVideoX, CogVideoXImageToVideoPipeline, CogVideoXTransformer3DModel, DDIMScheduler
from diffusers.utils import load_image
@@ -113,8 +113,7 @@ class CogVideoXImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
torch.manual_seed(0)
scheduler = DDIMScheduler()
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {
@@ -238,9 +237,6 @@ class CogVideoXImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel, CogVideoXVideoToVideoPipeline, DDIMScheduler
@@ -99,8 +99,7 @@ class CogVideoXVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
torch.manual_seed(0)
scheduler = DDIMScheduler()
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, CogVideoXDDIMScheduler, CogView3PlusPipeline, CogView3PlusTransformer2DModel
@@ -89,8 +89,7 @@ class CogView3PlusPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
scheduler = CogVideoXDDIMScheduler()
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -108,7 +108,7 @@ class CogView4PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
generator = torch.Generator(device=device).manual_seed(seed)
inputs = {
"prompt": "dance monkey",
"negative_prompt": "bad",
"negative_prompt": "",
"generator": generator,
"num_inference_steps": 2,
"guidance_scale": 6.0,

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLCogVideoX, ConsisIDPipeline, ConsisIDTransformer3DModel, DDIMScheduler
from diffusers.utils import load_image
@@ -122,8 +122,7 @@ class ConsisIDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
scheduler = DDIMScheduler()
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {
@@ -249,9 +248,6 @@ class ConsisIDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from huggingface_hub import hf_hub_download
from transformers import AutoConfig, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
from diffusers import (
AutoencoderKL,
@@ -97,8 +97,7 @@ class FluxControlNetPipelineFastTests(unittest.TestCase, PipelineTesterMixin, Fl
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = T5TokenizerFast.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -2,7 +2,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -13,7 +13,9 @@ from diffusers import (
)
from diffusers.utils.torch_utils import randn_tensor
from ...testing_utils import torch_device
from ...testing_utils import (
torch_device,
)
from ..test_pipelines_common import PipelineTesterMixin, check_qkv_fused_layers_exist
@@ -68,8 +70,7 @@ class FluxControlNetImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMi
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,15 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
# torch_device, # {{ edit_1 }} Removed unused import
from transformers import (
AutoTokenizer,
CLIPTextConfig,
CLIPTextModel,
CLIPTokenizer,
T5EncoderModel,
)
from diffusers import (
AutoencoderKL,
@@ -14,7 +22,11 @@ from diffusers import (
)
from diffusers.utils.torch_utils import randn_tensor
from ...testing_utils import enable_full_determinism, floats_tensor, torch_device
from ...testing_utils import (
enable_full_determinism,
floats_tensor,
torch_device,
)
from ..test_pipelines_common import PipelineTesterMixin
@@ -73,8 +85,7 @@ class FluxControlNetInpaintPipelineTests(unittest.TestCase, PipelineTesterMixin)
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel
from transformers import AutoTokenizer, BertModel, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -96,10 +96,7 @@ class HunyuanDiTControlNetPipelineFastTests(unittest.TestCase, PipelineTesterMix
scheduler = DDPMScheduler()
text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel")
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -17,14 +17,7 @@ import unittest
import numpy as np
import torch
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPTextConfig,
CLIPTextModelWithProjection,
CLIPTokenizer,
T5EncoderModel,
)
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -35,7 +28,10 @@ from diffusers import (
from diffusers.models import SD3ControlNetModel
from diffusers.utils.torch_utils import randn_tensor
from ...testing_utils import enable_full_determinism, torch_device
from ...testing_utils import (
enable_full_determinism,
torch_device,
)
from ..test_pipelines_common import PipelineTesterMixin
@@ -107,8 +103,7 @@ class StableDiffusion3ControlInpaintNetPipelineFastTests(unittest.TestCase, Pipe
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_3 = T5EncoderModel(config)
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

View File

@@ -18,14 +18,7 @@ import unittest
import numpy as np
import torch
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPTextConfig,
CLIPTextModelWithProjection,
CLIPTokenizer,
T5EncoderModel,
)
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -124,8 +117,7 @@ class StableDiffusion3ControlNetPipelineFastTests(unittest.TestCase, PipelineTes
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_3 = T5EncoderModel(config)
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

View File

@@ -20,7 +20,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLCosmos, CosmosTextToWorldPipeline, CosmosTransformer3DModel, EDMEulerScheduler
@@ -107,8 +107,7 @@ class CosmosTextToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
rho=7.0,
final_sigmas_type="sigma_min",
)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {
@@ -233,9 +232,6 @@ class CosmosTextToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):

View File

@@ -20,7 +20,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKLWan,
@@ -95,8 +95,7 @@ class Cosmos2TextToImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase
torch.manual_seed(0)
scheduler = FlowMatchEulerDiscreteScheduler(use_karras_sigmas=True)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -21,7 +21,7 @@ import unittest
import numpy as np
import PIL.Image
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKLWan,
@@ -96,8 +96,7 @@ class Cosmos2VideoToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCas
torch.manual_seed(0)
scheduler = FlowMatchEulerDiscreteScheduler(use_karras_sigmas=True)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -21,7 +21,7 @@ import unittest
import numpy as np
import PIL.Image
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLCosmos, CosmosTransformer3DModel, CosmosVideoToWorldPipeline, EDMEulerScheduler
@@ -108,8 +108,7 @@ class CosmosVideoToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCase
rho=7.0,
final_sigmas_type="sigma_min",
)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {
@@ -246,9 +245,6 @@ class CosmosVideoToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCase
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):

View File

@@ -2,7 +2,7 @@ import tempfile
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import DDPMScheduler, UNet2DConditionModel
from diffusers.models.attention_processor import AttnAddedKVProcessor
@@ -18,8 +18,7 @@ from ..test_pipelines_common import to_np
class IFPipelineTesterMixin:
def _get_dummy_components(self):
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
@@ -76,8 +75,7 @@ class IFPipelineTesterMixin:
def _get_superresolution_dummy_components(self):
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
@@ -252,9 +250,6 @@ class IFPipelineTesterMixin:
# This should be handled in the base test and then this method can be removed.
def _test_save_load_local(self):
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)

View File

@@ -18,7 +18,9 @@ import unittest
import torch
from diffusers import IFPipeline
from diffusers import (
IFPipeline,
)
from diffusers.models.attention_processor import AttnAddedKVProcessor
from diffusers.utils.import_utils import is_xformers_available

View File

@@ -4,7 +4,7 @@ import unittest
import numpy as np
import torch
from huggingface_hub import hf_hub_download
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -93,8 +93,7 @@ class FluxPipelineFastTests(
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxControlPipeline, FluxTransformer2DModel
@@ -53,8 +53,7 @@ class FluxControlPipelineFastTests(unittest.TestCase, PipelineTesterMixin):
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -57,8 +57,7 @@ class FluxControlImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMixin
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -58,8 +58,7 @@ class FluxControlInpaintPipelineFastTests(unittest.TestCase, PipelineTesterMixin
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxFillPipeline, FluxTransformer2DModel
@@ -58,8 +58,7 @@ class FluxFillPipelineFastTests(unittest.TestCase, PipelineTesterMixin):
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxImg2ImgPipeline, FluxTransformer2DModel
@@ -55,8 +55,7 @@ class FluxImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMixin, FluxI
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxInpaintPipeline, FluxTransformer2DModel
@@ -55,8 +55,7 @@ class FluxInpaintPipelineFastTests(unittest.TestCase, PipelineTesterMixin, FluxI
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import PIL.Image
import torch
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -79,8 +79,7 @@ class FluxKontextPipelineFastTests(
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -79,8 +79,7 @@ class FluxKontextInpaintPipelineFastTests(
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -16,7 +16,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, GlmImagePipeline, GlmImageTransformer2DModel
from diffusers.utils import is_transformers_version
@@ -57,8 +57,7 @@ class GlmImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
def get_dummy_components(self):
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
glm_config = GlmImageConfig(

View File

@@ -18,7 +18,6 @@ import unittest
import numpy as np
import torch
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPTextConfig,
CLIPTextModelWithProjection,
@@ -95,8 +94,7 @@ class HiDreamImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_3 = T5EncoderModel(config)
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
text_encoder_4 = LlamaForCausalLM.from_pretrained("hf-internal-testing/tiny-random-LlamaForCausalLM")
@@ -151,12 +149,12 @@ class HiDreamImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
self.assertEqual(generated_image.shape, (128, 128, 3))
# fmt: off
expected_slice = np.array([0.4501, 0.5256, 0.4207, 0.5783, 0.4842, 0.4833, 0.4441, 0.5112, 0.6587, 0.3169, 0.7308, 0.5927, 0.6251, 0.5509, 0.5355, 0.5969])
expected_slice = np.array([0.4507, 0.5256, 0.4205, 0.5791, 0.4848, 0.4831, 0.4443, 0.5107, 0.6586, 0.3163, 0.7318, 0.5933, 0.6252, 0.5512, 0.5357, 0.5983])
# fmt: on
generated_slice = generated_image.flatten()
generated_slice = np.concatenate([generated_slice[:8], generated_slice[-8:]])
self.assertTrue(np.allclose(generated_slice, expected_slice, atol=5e-3))
self.assertTrue(np.allclose(generated_slice, expected_slice, atol=1e-3))
def test_inference_batch_single_identical(self):
super().test_inference_batch_single_identical(expected_max_diff=3e-4)

View File

@@ -223,7 +223,7 @@ class HunyuanImagePipelineFastTests(
self.assertEqual(generated_image.shape, (3, 16, 16))
expected_slice_np = np.array(
[0.6068114, 0.48716035, 0.5984431, 0.60241306, 0.48849544, 0.5624479, 0.53696984, 0.58964247, 0.54248774]
[0.61494756, 0.49616697, 0.60327923, 0.6115793, 0.49047345, 0.56977504, 0.53066164, 0.58880305, 0.5570612]
)
output_slice = generated_image[0, -3:, -3:].flatten().cpu().numpy()

View File

@@ -233,7 +233,7 @@ class HunyuanVideoImageToVideoPipelineFastTests(
self.assertEqual(generated_video.shape, (5, 3, 16, 16))
# fmt: off
expected_slice = torch.tensor([0.4441, 0.4790, 0.4485, 0.5748, 0.3539, 0.1553, 0.2707, 0.3594, 0.5331, 0.6645, 0.6799, 0.5257, 0.5092, 0.3450, 0.4276, 0.4127])
expected_slice = torch.tensor([0.444, 0.479, 0.4485, 0.5752, 0.3539, 0.1548, 0.2706, 0.3593, 0.5323, 0.6635, 0.6795, 0.5255, 0.5091, 0.345, 0.4276, 0.4128])
# fmt: on
generated_slice = generated_video.flatten()

View File

@@ -15,14 +15,7 @@
import unittest
import torch
from transformers import (
AutoConfig,
ByT5Tokenizer,
Qwen2_5_VLTextConfig,
Qwen2_5_VLTextModel,
Qwen2Tokenizer,
T5EncoderModel,
)
from transformers import ByT5Tokenizer, Qwen2_5_VLTextConfig, Qwen2_5_VLTextModel, Qwen2Tokenizer, T5EncoderModel
from diffusers import (
AutoencoderKLHunyuanVideo15,
@@ -121,8 +114,7 @@ class HunyuanVideo15PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
tokenizer = Qwen2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration")
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer_2 = ByT5Tokenizer()
guider = ClassifierFreeGuidance(guidance_scale=1.0)

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel
from transformers import AutoTokenizer, BertModel, T5EncoderModel
from diffusers import AutoencoderKL, DDPMScheduler, HunyuanDiT2DModel, HunyuanDiTPipeline
@@ -74,9 +74,7 @@ class HunyuanDiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
scheduler = DDPMScheduler()
text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel")
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import (
AutoPipelineForImage2Image,
@@ -108,8 +108,7 @@ class Kandinsky3PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
movq = self.dummy_movq
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config).eval()
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
@@ -156,9 +155,9 @@ class Kandinsky3PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
assert image.shape == (1, 16, 16, 3)
expected_slice = np.array([0.3944, 0.3680, 0.4842, 0.5333, 0.4412, 0.4812, 0.5089, 0.5381, 0.5578])
expected_slice = np.array([0.3768, 0.4373, 0.4865, 0.4890, 0.4299, 0.5122, 0.4921, 0.4924, 0.5599])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1, (
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2, (
f" expected_slice {expected_slice}, but got {image_slice.flatten()}"
)

View File

@@ -20,7 +20,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import (
AutoPipelineForImage2Image,
@@ -119,8 +119,7 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
torch.manual_seed(0)
movq = self.dummy_movq
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config).eval()
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
@@ -156,7 +155,10 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
return inputs
def test_dict_tuple_outputs_equivalent(self):
super().test_dict_tuple_outputs_equivalent()
expected_slice = None
if torch_device == "cpu":
expected_slice = np.array([0.5762, 0.6112, 0.4150, 0.6018, 0.6167, 0.4626, 0.5426, 0.5641, 0.6536])
super().test_dict_tuple_outputs_equivalent(expected_slice=expected_slice)
def test_kandinsky3_img2img(self):
device = "cpu"
@@ -175,9 +177,11 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.5725, 0.6248, 0.4355, 0.5732, 0.6105, 0.5267, 0.5470, 0.5512, 0.6618])
expected_slice = np.array(
[0.576259, 0.6132097, 0.41703486, 0.603196, 0.62062526, 0.4655338, 0.5434324, 0.5660727, 0.65433365]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1, (
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2, (
f" expected_slice {expected_slice}, but got {image_slice.flatten()}"
)

View File

@@ -20,7 +20,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -109,8 +109,7 @@ class LattePipelineFastTests(
vae = AutoencoderKL()
scheduler = DDIMScheduler()
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -17,7 +17,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLLTXVideo, FlowMatchEulerDiscreteScheduler, LTXPipeline, LTXVideoTransformer3DModel
@@ -88,8 +88,7 @@ class LTXPipelineFastTests(PipelineTesterMixin, FirstBlockCacheTesterMixin, unit
torch.manual_seed(0)
scheduler = FlowMatchEulerDiscreteScheduler()
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -17,7 +17,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKLLTXVideo,
@@ -92,8 +92,7 @@ class LTXConditionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
scheduler = FlowMatchEulerDiscreteScheduler()
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -17,7 +17,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKLLTXVideo,
@@ -91,8 +91,7 @@ class LTXImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
scheduler = FlowMatchEulerDiscreteScheduler()
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLMochi, FlowMatchEulerDiscreteScheduler, MochiPipeline, MochiTransformer3DModel
@@ -89,8 +89,7 @@ class MochiPipelineFastTests(
torch.manual_seed(0)
scheduler = FlowMatchEulerDiscreteScheduler()
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {
@@ -208,9 +207,6 @@ class MochiPipelineFastTests(
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel
from transformers import AutoTokenizer, BertModel, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -67,9 +67,7 @@ class HunyuanDiTPAGPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
scheduler = DDPMScheduler()
text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel")
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
components = {

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
import diffusers
from diffusers import (
@@ -80,8 +80,7 @@ class PixArtSigmaPAGPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
vae = AutoencoderKL()
scheduler = DDIMScheduler()
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -3,14 +3,7 @@ import unittest
import numpy as np
import torch
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPTextConfig,
CLIPTextModelWithProjection,
CLIPTokenizer,
T5EncoderModel,
)
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -80,9 +73,7 @@ class StableDiffusion3PAGPipelineFastTests(unittest.TestCase, PipelineTesterMixi
torch.manual_seed(0)
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_3 = T5EncoderModel(config)
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

View File

@@ -5,14 +5,7 @@ import unittest
import numpy as np
import torch
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPTextConfig,
CLIPTextModelWithProjection,
CLIPTokenizer,
T5EncoderModel,
)
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -91,9 +84,7 @@ class StableDiffusion3PAGImg2ImgPipelineFastTests(unittest.TestCase, PipelineTes
torch.manual_seed(0)
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_3 = T5EncoderModel(config)
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -77,10 +77,7 @@ class PixArtAlphaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
vae = AutoencoderKL()
scheduler = DDIMScheduler()
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -19,7 +19,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -83,10 +83,7 @@ class PixArtSigmaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
vae = AutoencoderKL()
scheduler = DDIMScheduler()
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -92,7 +92,7 @@ class PRXPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
}
encoder_config = T5GemmaModuleConfig(**encoder_params)
text_encoder_config = T5GemmaConfig(encoder=encoder_config, is_encoder_decoder=False, **encoder_params)
text_encoder = T5GemmaEncoder(text_encoder_config.encoder)
text_encoder = T5GemmaEncoder(text_encoder_config)
return {
"transformer": transformer,
@@ -256,27 +256,3 @@ class PRXPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
expected_image = torch.zeros(3, 32, 32)
max_diff = np.abs(generated_image - expected_image).max()
self.assertLessEqual(max_diff, 1e10)
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
def test_save_load_dduf(self):
pass
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
def test_loading_with_variants(self):
pass
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
def test_pipeline_with_accelerator_device_map(self):
pass
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
def test_save_load_local(self):
pass
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
def test_save_load_optional_components(self):
pass
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
def test_torch_dtype_dict(self):
pass

View File

@@ -113,7 +113,7 @@ class QwenImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
vision_start_token_id=151652,
vision_token_id=151654,
)
text_encoder = Qwen2_5_VLForConditionalGeneration(config).eval()
text_encoder = Qwen2_5_VLForConditionalGeneration(config)
tokenizer = Qwen2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration")
components = {
@@ -160,12 +160,12 @@ class QwenImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
self.assertEqual(generated_image.shape, (3, 32, 32))
# fmt: off
expected_slice = torch.tensor([0.5633, 0.6368, 0.6015, 0.5637, 0.5817, 0.5528, 0.5718, 0.6326, 0.4147, 0.3556, 0.5623, 0.4833, 0.4971, 0.5262, 0.4087, 0.5021])
expected_slice = torch.tensor([0.56331, 0.63677, 0.6015, 0.56369, 0.58166, 0.55277, 0.57176, 0.63261, 0.41466, 0.35561, 0.56229, 0.48334, 0.49714, 0.52622, 0.40872, 0.50208])
# fmt: on
generated_slice = generated_image.flatten()
generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=5e-3))
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
def test_inference_batch_single_identical(self):
self._test_inference_batch_single_identical(batch_size=3, expected_max_diff=1e-1)

View File

@@ -211,7 +211,7 @@ class QwenControlNetPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
generated_slice = generated_image.flatten()
generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=5e-3))
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
def test_qwen_controlnet_multicondition(self):
device = "cpu"
@@ -255,7 +255,7 @@ class QwenControlNetPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
generated_slice = generated_image.flatten()
generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=5e-3))
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
def test_attention_slicing_forward_pass(
self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-3

View File

@@ -115,7 +115,7 @@ class QwenImageEditPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
vision_start_token_id=151652,
vision_token_id=151654,
)
text_encoder = Qwen2_5_VLForConditionalGeneration(config).eval()
text_encoder = Qwen2_5_VLForConditionalGeneration(config)
tokenizer = Qwen2Tokenizer.from_pretrained(tiny_ckpt_id)
components = {
@@ -163,12 +163,12 @@ class QwenImageEditPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
self.assertEqual(generated_image.shape, (3, 32, 32))
# fmt: off
expected_slice = torch.tensor([0.5637, 0.6341, 0.6001, 0.5620, 0.5794, 0.5498, 0.5757, 0.6389, 0.4174, 0.3597, 0.5649, 0.4894, 0.4969, 0.5255, 0.4083, 0.4986])
expected_slice = torch.tensor([[0.5637, 0.6341, 0.6001, 0.5620, 0.5794, 0.5498, 0.5757, 0.6389, 0.4174, 0.3597, 0.5649, 0.4894, 0.4969, 0.5255, 0.4083, 0.4986]])
# fmt: on
generated_slice = generated_image.flatten()
generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=5e-3))
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
def test_inference_batch_single_identical(self):
self._test_inference_batch_single_identical(batch_size=3, expected_max_diff=1e-1)

View File

@@ -164,7 +164,7 @@ class QwenImageEditPlusPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
self.assertEqual(generated_image.shape, (3, 32, 32))
# fmt: off
expected_slice = torch.tensor([0.5640, 0.6339, 0.5997, 0.5607, 0.5799, 0.5496, 0.5760, 0.6393, 0.4172, 0.3595, 0.5655, 0.4896, 0.4971, 0.5255, 0.4088, 0.4987])
expected_slice = torch.tensor([[0.5637, 0.6341, 0.6001, 0.5620, 0.5794, 0.5498, 0.5757, 0.6389, 0.4174, 0.3597, 0.5649, 0.4894, 0.4969, 0.5255, 0.4083, 0.4986]])
# fmt: on
generated_slice = generated_image.flatten()

View File

@@ -18,11 +18,20 @@ import numpy as np
import torch
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLWan, SkyReelsV2Pipeline, SkyReelsV2Transformer3DModel, UniPCMultistepScheduler
from diffusers import (
AutoencoderKLWan,
SkyReelsV2Pipeline,
SkyReelsV2Transformer3DModel,
UniPCMultistepScheduler,
)
from ...testing_utils import enable_full_determinism
from ...testing_utils import (
enable_full_determinism,
)
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import PipelineTesterMixin
from ..test_pipelines_common import (
PipelineTesterMixin,
)
enable_full_determinism()

View File

@@ -25,9 +25,13 @@ from diffusers import (
UniPCMultistepScheduler,
)
from ...testing_utils import enable_full_determinism
from ...testing_utils import (
enable_full_determinism,
)
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import PipelineTesterMixin
from ..test_pipelines_common import (
PipelineTesterMixin,
)
enable_full_determinism()

View File

@@ -17,7 +17,10 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoTokenizer, T5EncoderModel
from transformers import (
AutoTokenizer,
T5EncoderModel,
)
from diffusers import (
AutoencoderKLWan,

View File

@@ -27,9 +27,14 @@ from diffusers import (
UniPCMultistepScheduler,
)
from ...testing_utils import enable_full_determinism, torch_device
from ...testing_utils import (
enable_full_determinism,
torch_device,
)
from ..pipeline_params import TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import PipelineTesterMixin
from ..test_pipelines_common import (
PipelineTesterMixin,
)
enable_full_determinism()

View File

@@ -19,7 +19,10 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, T5EncoderModel, T5Tokenizer
from transformers import (
T5EncoderModel,
T5Tokenizer,
)
from diffusers import (
AutoencoderOobleck,
@@ -108,8 +111,7 @@ class StableAudioPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
)
torch.manual_seed(0)
t5_repo_id = "hf-internal-testing/tiny-random-T5ForConditionalGeneration"
config = AutoConfig.from_pretrained(t5_repo_id)
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained(t5_repo_id)
tokenizer = T5Tokenizer.from_pretrained(t5_repo_id, truncation=True, model_max_length=25)
torch.manual_seed(0)

View File

@@ -3,14 +3,7 @@ import unittest
import numpy as np
import torch
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPTextConfig,
CLIPTextModelWithProjection,
CLIPTokenizer,
T5EncoderModel,
)
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, SD3Transformer2DModel, StableDiffusion3Pipeline
@@ -79,9 +72,7 @@ class StableDiffusion3PipelineFastTests(unittest.TestCase, PipelineTesterMixin):
torch.manual_seed(0)
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_3 = T5EncoderModel(config)
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

View File

@@ -4,14 +4,7 @@ import unittest
import numpy as np
import torch
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPTextConfig,
CLIPTextModelWithProjection,
CLIPTokenizer,
T5EncoderModel,
)
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -80,9 +73,7 @@ class StableDiffusion3Img2ImgPipelineFastTests(PipelineLatentTesterMixin, unitte
torch.manual_seed(0)
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_3 = T5EncoderModel(config)
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

View File

@@ -3,14 +3,7 @@ import unittest
import numpy as np
import torch
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPTextConfig,
CLIPTextModelWithProjection,
CLIPTokenizer,
T5EncoderModel,
)
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
from diffusers import (
AutoencoderKL,
@@ -80,9 +73,7 @@ class StableDiffusion3InpaintPipelineFastTests(PipelineLatentTesterMixin, unitte
torch.manual_seed(0)
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_3 = T5EncoderModel(config)
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

View File

@@ -1157,9 +1157,6 @@ class PipelineTesterMixin:
def test_save_load_local(self, expected_max_difference=5e-4):
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
@@ -1298,9 +1295,6 @@ class PipelineTesterMixin:
additional_params_copy_to_batched_inputs=["num_inference_steps"],
):
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for components in pipe.components.values():
if hasattr(components, "set_default_attn_processor"):
@@ -1351,9 +1345,6 @@ class PipelineTesterMixin:
def test_dict_tuple_outputs_equivalent(self, expected_slice=None, expected_max_difference=1e-4):
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
@@ -1486,9 +1477,6 @@ class PipelineTesterMixin:
if not self.pipeline_class._optional_components:
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
@@ -1569,9 +1557,6 @@ class PipelineTesterMixin:
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
@@ -2080,16 +2065,7 @@ class PipelineTesterMixin:
for component_name in model_components_pipe:
pipe_component = model_components_pipe[component_name]
pipe_loaded_component = model_components_pipe_loaded[component_name]
model_loaded_params = dict(pipe_loaded_component.named_parameters())
model_original_params = dict(pipe_component.named_parameters())
for name, p1 in model_original_params.items():
# Skip tied weights that aren't saved with variants (transformers v5 behavior)
if name not in model_loaded_params:
continue
p2 = model_loaded_params[name]
for p1, p2 in zip(pipe_component.parameters(), pipe_loaded_component.parameters()):
# nan check for luminanext (mps).
if not (is_nan(p1) and is_nan(p2)):
self.assertTrue(torch.equal(p1, p2))
@@ -2113,9 +2089,6 @@ class PipelineTesterMixin:
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
# We initialize the pipeline with only text encoders and tokenizers,
# mimicking a real-world scenario.
@@ -2247,9 +2220,6 @@ class PipelineTesterMixin:
from huggingface_hub import export_folder_as_dduf
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
@@ -2387,11 +2357,6 @@ class PipelineTesterMixin:
def test_pipeline_with_accelerator_device_map(self, expected_max_difference=1e-4):
components = self.get_dummy_components()
# Set text encoders to eval mode to match from_pretrained behavior
# This ensures deterministic outputs when models are loaded with device_map
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
@@ -2715,9 +2680,6 @@ class PyramidAttentionBroadcastTesterMixin:
device = "cpu" # ensure determinism for the device-dependent torch.Generator
num_layers = 2
components = self.get_dummy_components(num_layers=num_layers)
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
pipe = pipe.to(device)
pipe.set_progress_bar_config(disable=None)

View File

@@ -5,7 +5,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
import diffusers
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxTransformer2DModel, VisualClozePipeline
@@ -77,8 +77,7 @@ class VisualClozePipelineFastTests(unittest.TestCase, PipelineTesterMixin):
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
@@ -216,7 +215,7 @@ class VisualClozePipelineFastTests(unittest.TestCase, PipelineTesterMixin):
def test_callback_cfg(self):
pass
def test_save_load_local(self, expected_max_difference=1e-3):
def test_save_load_local(self, expected_max_difference=5e-4):
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
@@ -261,9 +260,6 @@ class VisualClozePipelineFastTests(unittest.TestCase, PipelineTesterMixin):
if not hasattr(self.pipeline_class, "_optional_components"):
return
components = self.get_dummy_components()
for key in components:
if "text_encoder" in key and hasattr(components[key], "eval"):
components[key].eval()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):

View File

@@ -5,7 +5,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
import diffusers
from diffusers import (
@@ -79,8 +79,7 @@ class VisualClozeGenerationPipelineFastTests(unittest.TestCase, PipelineTesterMi
text_encoder = CLIPTextModel(clip_text_encoder_config)
torch.manual_seed(0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder_2 = T5EncoderModel(config)
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLWan, FlowMatchEulerDiscreteScheduler, WanPipeline, WanTransformer3DModel
@@ -68,8 +68,7 @@ class WanPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
# TODO: impl FlowDPMSolverMultistepScheduler
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)

View File

@@ -17,11 +17,14 @@ import unittest
import numpy as np
import torch
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanPipeline, WanTransformer3DModel
from ...testing_utils import enable_full_determinism, torch_device
from ...testing_utils import (
enable_full_determinism,
torch_device,
)
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import PipelineTesterMixin
@@ -60,8 +63,7 @@ class Wan22PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
@@ -233,8 +235,7 @@ class Wan225BPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)

View File

@@ -18,7 +18,7 @@ import unittest
import numpy as np
import torch
from PIL import Image
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanImageToVideoPipeline, WanTransformer3DModel
@@ -64,8 +64,7 @@ class Wan22ImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
torch.manual_seed(0)
scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
@@ -249,8 +248,7 @@ class Wan225BImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCas
torch.manual_seed(0)
scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)

View File

@@ -19,7 +19,6 @@ import numpy as np
import torch
from PIL import Image
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPImageProcessor,
CLIPVisionConfig,
@@ -79,8 +78,7 @@ class WanAnimatePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)

View File

@@ -19,7 +19,6 @@ import numpy as np
import torch
from PIL import Image
from transformers import (
AutoConfig,
AutoTokenizer,
CLIPImageProcessor,
CLIPVisionConfig,
@@ -69,8 +68,7 @@ class WanImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
# TODO: impl FlowDPMSolverMultistepScheduler
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)
@@ -241,8 +239,7 @@ class WanFLFToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
# TODO: impl FlowDPMSolverMultistepScheduler
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
text_encoder = T5EncoderModel(config)
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
torch.manual_seed(0)

Some files were not shown because too many files have changed in this diff Show More