mirror of
https://github.com/huggingface/diffusers.git
synced 2026-02-26 21:00:41 +08:00
Compare commits
7 Commits
remove-non
...
custom-dev
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
96f97f8214 | ||
|
|
f8b95ff263 | ||
|
|
5e94d62eb4 | ||
|
|
7ab2011759 | ||
|
|
4890e9bf70 | ||
|
|
f1e5914120 | ||
|
|
cfff46069d |
13
.github/workflows/pr_tests.yml
vendored
13
.github/workflows/pr_tests.yml
vendored
@@ -92,7 +92,6 @@ jobs:
|
||||
runner: aws-general-8-plus
|
||||
image: diffusers/diffusers-pytorch-cpu
|
||||
report: torch_example_cpu
|
||||
|
||||
name: ${{ matrix.config.name }}
|
||||
|
||||
runs-on:
|
||||
@@ -115,8 +114,7 @@ jobs:
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv pip install -e ".[quality]"
|
||||
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
|
||||
|
||||
- name: Environment
|
||||
@@ -218,8 +216,6 @@ jobs:
|
||||
|
||||
run_lora_tests:
|
||||
needs: [check_code_quality, check_repository_consistency]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
|
||||
name: LoRA tests with PEFT main
|
||||
|
||||
@@ -247,9 +243,8 @@ jobs:
|
||||
uv pip install -U peft@git+https://github.com/huggingface/peft.git --no-deps
|
||||
uv pip install -U tokenizers
|
||||
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
|
||||
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
|
||||
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
|
||||
- name: Environment
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
@@ -275,6 +270,6 @@ jobs:
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: pr_main_test_reports
|
||||
name: pr_lora_test_reports
|
||||
path: reports
|
||||
|
||||
|
||||
9
.github/workflows/pr_tests_gpu.yml
vendored
9
.github/workflows/pr_tests_gpu.yml
vendored
@@ -131,8 +131,7 @@ jobs:
|
||||
run: |
|
||||
uv pip install -e ".[quality]"
|
||||
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
||||
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
|
||||
- name: Environment
|
||||
run: |
|
||||
@@ -202,8 +201,7 @@ jobs:
|
||||
uv pip install -e ".[quality]"
|
||||
uv pip install peft@git+https://github.com/huggingface/peft.git
|
||||
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
||||
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
|
||||
- name: Environment
|
||||
run: |
|
||||
@@ -264,8 +262,7 @@ jobs:
|
||||
nvidia-smi
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
uv pip install -e ".[quality,training]"
|
||||
|
||||
- name: Environment
|
||||
|
||||
9
.github/workflows/push_tests.yml
vendored
9
.github/workflows/push_tests.yml
vendored
@@ -76,8 +76,7 @@ jobs:
|
||||
run: |
|
||||
uv pip install -e ".[quality]"
|
||||
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
||||
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
- name: Environment
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
@@ -129,8 +128,7 @@ jobs:
|
||||
uv pip install -e ".[quality]"
|
||||
uv pip install peft@git+https://github.com/huggingface/peft.git
|
||||
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
||||
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
|
||||
- name: Environment
|
||||
run: |
|
||||
@@ -182,8 +180,7 @@ jobs:
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv pip install -e ".[quality,training]"
|
||||
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
|
||||
uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
|
||||
- name: Environment
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
|
||||
@@ -17,6 +17,9 @@ import logging
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from diffusers.utils import is_transformers_version
|
||||
|
||||
|
||||
sys.path.append("..")
|
||||
@@ -30,6 +33,7 @@ stream_handler = logging.StreamHandler(sys.stdout)
|
||||
logger.addHandler(stream_handler)
|
||||
|
||||
|
||||
@unittest.skipIf(is_transformers_version(">=", "4.57.5"), "Size mismatch")
|
||||
class CustomDiffusion(ExamplesTestsAccelerate):
|
||||
def test_custom_diffusion(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
|
||||
@@ -48,6 +48,7 @@ _GO_LC_SUPPORTED_PYTORCH_LAYERS = (
|
||||
torch.nn.ConvTranspose2d,
|
||||
torch.nn.ConvTranspose3d,
|
||||
torch.nn.Linear,
|
||||
torch.nn.Embedding,
|
||||
# TODO(aryan): look into torch.nn.LayerNorm, torch.nn.GroupNorm later, seems to be causing some issues with CogVideoX
|
||||
# because of double invocation of the same norm layer in CogVideoXLayerNorm
|
||||
)
|
||||
|
||||
@@ -22,7 +22,12 @@ from tokenizers import Tokenizer as TokenizerFast
|
||||
from torch import nn
|
||||
|
||||
from ..models.modeling_utils import load_state_dict
|
||||
from ..utils import _get_model_file, is_accelerate_available, is_transformers_available, logging
|
||||
from ..utils import (
|
||||
_get_model_file,
|
||||
is_accelerate_available,
|
||||
is_transformers_available,
|
||||
logging,
|
||||
)
|
||||
|
||||
|
||||
if is_transformers_available():
|
||||
|
||||
@@ -60,6 +60,16 @@ class ContextParallelConfig:
|
||||
rotate_method (`str`, *optional*, defaults to `"allgather"`):
|
||||
Method to use for rotating key/value states across devices in ring attention. Currently, only `"allgather"`
|
||||
is supported.
|
||||
ulysses_anything (`bool`, *optional*, defaults to `False`):
|
||||
Whether to enable "Ulysses Anything" mode, which supports arbitrary sequence lengths and head counts that
|
||||
are not evenly divisible by `ulysses_degree`. When enabled, `ulysses_degree` must be greater than 1 and
|
||||
`ring_degree` must be 1.
|
||||
mesh (`torch.distributed.device_mesh.DeviceMesh`, *optional*):
|
||||
A custom device mesh to use for context parallelism. If provided, this mesh will be used instead of
|
||||
creating a new one. This is useful when combining context parallelism with other parallelism strategies
|
||||
(e.g., FSDP, tensor parallelism) that share the same device mesh. The mesh must have both "ring" and
|
||||
"ulysses" dimensions. Use size 1 for dimensions not being used (e.g., `mesh_shape=(2, 1, 4)` with
|
||||
`mesh_dim_names=("ring", "ulysses", "fsdp")` for ring attention only with FSDP).
|
||||
|
||||
"""
|
||||
|
||||
@@ -68,6 +78,7 @@ class ContextParallelConfig:
|
||||
convert_to_fp32: bool = True
|
||||
# TODO: support alltoall
|
||||
rotate_method: Literal["allgather", "alltoall"] = "allgather"
|
||||
mesh: torch.distributed.device_mesh.DeviceMesh | None = None
|
||||
# Whether to enable ulysses anything attention to support
|
||||
# any sequence lengths and any head numbers.
|
||||
ulysses_anything: bool = False
|
||||
@@ -124,7 +135,7 @@ class ContextParallelConfig:
|
||||
f"The product of `ring_degree` ({self.ring_degree}) and `ulysses_degree` ({self.ulysses_degree}) must not exceed the world size ({world_size})."
|
||||
)
|
||||
|
||||
self._flattened_mesh = self._mesh._flatten()
|
||||
self._flattened_mesh = self._mesh["ring", "ulysses"]._flatten()
|
||||
self._ring_mesh = self._mesh["ring"]
|
||||
self._ulysses_mesh = self._mesh["ulysses"]
|
||||
self._ring_local_rank = self._ring_mesh.get_local_rank()
|
||||
|
||||
@@ -30,10 +30,126 @@ class AutoModel(ConfigMixin):
|
||||
def __init__(self, *args, **kwargs):
|
||||
raise EnvironmentError(
|
||||
f"{self.__class__.__name__} is designed to be instantiated "
|
||||
f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)` or "
|
||||
f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)`, "
|
||||
f"`{self.__class__.__name__}.from_config(config)`, or "
|
||||
f"`{self.__class__.__name__}.from_pipe(pipeline)` methods."
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, pretrained_model_name_or_path_or_dict: str | os.PathLike | dict | None = None, **kwargs):
|
||||
r"""
|
||||
Instantiate a model from a config dictionary or a pretrained model configuration file with random weights (no
|
||||
pretrained weights are loaded).
|
||||
|
||||
Parameters:
|
||||
pretrained_model_name_or_path_or_dict (`str`, `os.PathLike`, or `dict`):
|
||||
Can be either:
|
||||
|
||||
- A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model
|
||||
configuration hosted on the Hub.
|
||||
- A path to a *directory* (for example `./my_model_directory`) containing a model configuration
|
||||
file.
|
||||
- A config dictionary.
|
||||
|
||||
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
||||
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
||||
is not used.
|
||||
force_download (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to force the (re-)download of the model configuration, overriding the cached version if
|
||||
it exists.
|
||||
proxies (`Dict[str, str]`, *optional*):
|
||||
A dictionary of proxy servers to use by protocol or endpoint.
|
||||
local_files_only(`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model configuration files or not.
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
The specific model version to use.
|
||||
trust_remote_code (`bool`, *optional*, defaults to `False`):
|
||||
Whether to trust remote code.
|
||||
subfolder (`str`, *optional*, defaults to `""`):
|
||||
The subfolder location of a model file within a larger model repository on the Hub or locally.
|
||||
|
||||
Returns:
|
||||
A model object instantiated from the config with random weights.
|
||||
|
||||
Example:
|
||||
|
||||
```py
|
||||
from diffusers import AutoModel
|
||||
|
||||
model = AutoModel.from_config("stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="unet")
|
||||
```
|
||||
"""
|
||||
subfolder = kwargs.pop("subfolder", None)
|
||||
trust_remote_code = kwargs.pop("trust_remote_code", False)
|
||||
|
||||
hub_kwargs_names = [
|
||||
"cache_dir",
|
||||
"force_download",
|
||||
"local_files_only",
|
||||
"proxies",
|
||||
"revision",
|
||||
"token",
|
||||
]
|
||||
hub_kwargs = {name: kwargs.pop(name, None) for name in hub_kwargs_names}
|
||||
|
||||
if pretrained_model_name_or_path_or_dict is None:
|
||||
raise ValueError(
|
||||
"Please provide a `pretrained_model_name_or_path_or_dict` as the first positional argument."
|
||||
)
|
||||
|
||||
if isinstance(pretrained_model_name_or_path_or_dict, (str, os.PathLike)):
|
||||
pretrained_model_name_or_path = pretrained_model_name_or_path_or_dict
|
||||
config = cls.load_config(pretrained_model_name_or_path, subfolder=subfolder, **hub_kwargs)
|
||||
else:
|
||||
config = pretrained_model_name_or_path_or_dict
|
||||
pretrained_model_name_or_path = config.get("_name_or_path", None)
|
||||
|
||||
has_remote_code = "auto_map" in config and cls.__name__ in config["auto_map"]
|
||||
trust_remote_code = resolve_trust_remote_code(
|
||||
trust_remote_code, pretrained_model_name_or_path, has_remote_code
|
||||
)
|
||||
|
||||
if has_remote_code and trust_remote_code:
|
||||
class_ref = config["auto_map"][cls.__name__]
|
||||
module_file, class_name = class_ref.split(".")
|
||||
module_file = module_file + ".py"
|
||||
model_cls = get_class_from_dynamic_module(
|
||||
pretrained_model_name_or_path,
|
||||
subfolder=subfolder,
|
||||
module_file=module_file,
|
||||
class_name=class_name,
|
||||
**hub_kwargs,
|
||||
)
|
||||
else:
|
||||
if "_class_name" in config:
|
||||
class_name = config["_class_name"]
|
||||
library = "diffusers"
|
||||
elif "model_type" in config:
|
||||
class_name = "AutoModel"
|
||||
library = "transformers"
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Couldn't find a model class associated with the config: {config}. Make sure the config "
|
||||
"contains a `_class_name` or `model_type` key."
|
||||
)
|
||||
|
||||
from ..pipelines.pipeline_loading_utils import ALL_IMPORTABLE_CLASSES, get_class_obj_and_candidates
|
||||
|
||||
model_cls, _ = get_class_obj_and_candidates(
|
||||
library_name=library,
|
||||
class_name=class_name,
|
||||
importable_classes=ALL_IMPORTABLE_CLASSES,
|
||||
pipelines=None,
|
||||
is_pipeline_module=False,
|
||||
)
|
||||
|
||||
if model_cls is None:
|
||||
raise ValueError(f"AutoModel can't find a model linked to {class_name}.")
|
||||
|
||||
return model_cls.from_config(config, **kwargs)
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def from_pretrained(cls, pretrained_model_or_path: str | os.PathLike | None = None, **kwargs):
|
||||
|
||||
@@ -1567,7 +1567,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
mesh = None
|
||||
if config.context_parallel_config is not None:
|
||||
cp_config = config.context_parallel_config
|
||||
mesh = torch.distributed.device_mesh.init_device_mesh(
|
||||
mesh = cp_config.mesh or torch.distributed.device_mesh.init_device_mesh(
|
||||
device_type=device_type,
|
||||
mesh_shape=cp_config.mesh_shape,
|
||||
mesh_dim_names=cp_config.mesh_dim_names,
|
||||
|
||||
@@ -502,6 +502,10 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
||||
text_input_ids,
|
||||
attention_mask=attention_mask,
|
||||
)
|
||||
# Extract the pooler output if it's a BaseModelOutputWithPooling (Transformers v5+)
|
||||
# otherwise use it directly (Transformers v4)
|
||||
if hasattr(prompt_embeds, "pooler_output"):
|
||||
prompt_embeds = prompt_embeds.pooler_output
|
||||
# append the seq-len dim: (bs, hidden_size) -> (bs, seq_len, hidden_size)
|
||||
prompt_embeds = prompt_embeds[:, None, :]
|
||||
# make sure that we attend to this single hidden-state
|
||||
@@ -610,6 +614,10 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
||||
uncond_input_ids,
|
||||
attention_mask=negative_attention_mask,
|
||||
)
|
||||
# Extract the pooler output if it's a BaseModelOutputWithPooling (Transformers v5+)
|
||||
# otherwise use it directly (Transformers v4)
|
||||
if hasattr(negative_prompt_embeds, "pooler_output"):
|
||||
negative_prompt_embeds = negative_prompt_embeds.pooler_output
|
||||
# append the seq-len dim: (bs, hidden_size) -> (bs, seq_len, hidden_size)
|
||||
negative_prompt_embeds = negative_prompt_embeds[:, None, :]
|
||||
# make sure that we attend to this single hidden-state
|
||||
|
||||
@@ -287,6 +287,9 @@ class Cosmos2_5_PredictBasePipeline(DiffusionPipeline):
|
||||
truncation=True,
|
||||
padding="max_length",
|
||||
)
|
||||
input_ids = (
|
||||
input_ids["input_ids"] if not isinstance(input_ids, list) and "input_ids" in input_ids else input_ids
|
||||
)
|
||||
input_ids = torch.LongTensor(input_ids)
|
||||
input_ids_batch.append(input_ids)
|
||||
|
||||
|
||||
@@ -262,6 +262,9 @@ class Cosmos2_5_TransferPipeline(DiffusionPipeline):
|
||||
truncation=True,
|
||||
padding="max_length",
|
||||
)
|
||||
input_ids = (
|
||||
input_ids["input_ids"] if not isinstance(input_ids, list) and "input_ids" in input_ids else input_ids
|
||||
)
|
||||
input_ids = torch.LongTensor(input_ids)
|
||||
input_ids_batch.append(input_ids)
|
||||
|
||||
|
||||
@@ -20,6 +20,8 @@ class MultilingualCLIP(PreTrainedModel):
|
||||
self.LinearTransformation = torch.nn.Linear(
|
||||
in_features=config.transformerDimensions, out_features=config.numDims
|
||||
)
|
||||
if hasattr(self, "post_init"):
|
||||
self.post_init()
|
||||
|
||||
def forward(self, input_ids, attention_mask):
|
||||
embs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)[0]
|
||||
|
||||
@@ -781,6 +781,9 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
|
||||
self.prefix_encoder = PrefixEncoder(config)
|
||||
self.dropout = torch.nn.Dropout(0.1)
|
||||
|
||||
if hasattr(self, "post_init"):
|
||||
self.post_init()
|
||||
|
||||
def get_input_embeddings(self):
|
||||
return self.embedding.word_embeddings
|
||||
|
||||
@@ -810,7 +813,7 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
|
||||
output_hidden_states = (
|
||||
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
||||
)
|
||||
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
||||
use_cache = use_cache if use_cache is not None else getattr(self.config, "use_cache", None)
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
batch_size, seq_length = input_ids.shape
|
||||
|
||||
@@ -341,6 +341,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
save_method_accept_safe = "safe_serialization" in save_method_signature.parameters
|
||||
save_method_accept_variant = "variant" in save_method_signature.parameters
|
||||
save_method_accept_max_shard_size = "max_shard_size" in save_method_signature.parameters
|
||||
save_method_accept_peft_format = "save_peft_format" in save_method_signature.parameters
|
||||
|
||||
save_kwargs = {}
|
||||
if save_method_accept_safe:
|
||||
@@ -350,6 +351,11 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
if save_method_accept_max_shard_size and max_shard_size is not None:
|
||||
# max_shard_size is expected to not be None in ModelMixin
|
||||
save_kwargs["max_shard_size"] = max_shard_size
|
||||
if save_method_accept_peft_format:
|
||||
# Set save_peft_format=False for transformers>=5.0.0 compatibility
|
||||
# In transformers 5.0.0+, the default save_peft_format=True adds "base_model.model" prefix
|
||||
# to adapter keys, but from_pretrained expects keys without this prefix
|
||||
save_kwargs["save_peft_format"] = False
|
||||
|
||||
save_method(os.path.join(save_directory, pipeline_component_name), **save_kwargs)
|
||||
|
||||
|
||||
@@ -24,14 +24,25 @@ except OptionalDependencyNotAvailable:
|
||||
else:
|
||||
_import_structure["pipeline_prx"] = ["PRXPipeline"]
|
||||
|
||||
# Import T5GemmaEncoder for pipeline loading compatibility
|
||||
# Wrap T5GemmaEncoder to pass config.encoder (T5GemmaModuleConfig) instead of the
|
||||
# composite T5GemmaConfig, which lacks flat attributes expected by T5GemmaEncoder.__init__.
|
||||
try:
|
||||
if is_transformers_available():
|
||||
import transformers
|
||||
from transformers.models.t5gemma.modeling_t5gemma import T5GemmaEncoder
|
||||
from transformers.models.t5gemma.modeling_t5gemma import T5GemmaEncoder as _T5GemmaEncoder
|
||||
|
||||
class T5GemmaEncoder(_T5GemmaEncoder):
|
||||
@classmethod
|
||||
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
|
||||
if "config" not in kwargs:
|
||||
from transformers.models.t5gemma.configuration_t5gemma import T5GemmaConfig
|
||||
|
||||
config = T5GemmaConfig.from_pretrained(pretrained_model_name_or_path)
|
||||
if hasattr(config, "encoder"):
|
||||
kwargs["config"] = config.encoder
|
||||
return super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
|
||||
|
||||
_additional_imports["T5GemmaEncoder"] = T5GemmaEncoder
|
||||
# Patch transformers module directly for serialization
|
||||
if not hasattr(transformers, "T5GemmaEncoder"):
|
||||
transformers.T5GemmaEncoder = T5GemmaEncoder
|
||||
except ImportError:
|
||||
|
||||
@@ -17,7 +17,7 @@ from typing import Any, Callable
|
||||
|
||||
import regex as re
|
||||
import torch
|
||||
from transformers import AutoTokenizer, UMT5EncoderModel
|
||||
from transformers import AutoTokenizer, T5EncoderModel, UMT5EncoderModel
|
||||
|
||||
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
||||
from ...loaders import SkyReelsV2LoraLoaderMixin
|
||||
@@ -132,7 +132,7 @@ class SkyReelsV2Pipeline(DiffusionPipeline, SkyReelsV2LoraLoaderMixin):
|
||||
def __init__(
|
||||
self,
|
||||
tokenizer: AutoTokenizer,
|
||||
text_encoder: UMT5EncoderModel,
|
||||
text_encoder: T5EncoderModel | UMT5EncoderModel,
|
||||
transformer: SkyReelsV2Transformer3DModel,
|
||||
vae: AutoencoderKLWan,
|
||||
scheduler: UniPCMultistepScheduler,
|
||||
|
||||
@@ -19,7 +19,7 @@ from copy import deepcopy
|
||||
from typing import Any, Callable
|
||||
|
||||
import torch
|
||||
from transformers import AutoTokenizer, UMT5EncoderModel
|
||||
from transformers import AutoTokenizer, T5EncoderModel, UMT5EncoderModel
|
||||
|
||||
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
||||
from ...loaders import SkyReelsV2LoraLoaderMixin
|
||||
@@ -153,7 +153,7 @@ class SkyReelsV2DiffusionForcingPipeline(DiffusionPipeline, SkyReelsV2LoraLoader
|
||||
def __init__(
|
||||
self,
|
||||
tokenizer: AutoTokenizer,
|
||||
text_encoder: UMT5EncoderModel,
|
||||
text_encoder: T5EncoderModel | UMT5EncoderModel,
|
||||
transformer: SkyReelsV2Transformer3DModel,
|
||||
vae: AutoencoderKLWan,
|
||||
scheduler: UniPCMultistepScheduler,
|
||||
|
||||
@@ -20,7 +20,7 @@ from typing import Any, Callable
|
||||
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import AutoTokenizer, UMT5EncoderModel
|
||||
from transformers import AutoTokenizer, T5EncoderModel, UMT5EncoderModel
|
||||
|
||||
from diffusers.image_processor import PipelineImageInput
|
||||
from diffusers.utils.torch_utils import randn_tensor
|
||||
@@ -158,7 +158,7 @@ class SkyReelsV2DiffusionForcingImageToVideoPipeline(DiffusionPipeline, SkyReels
|
||||
def __init__(
|
||||
self,
|
||||
tokenizer: AutoTokenizer,
|
||||
text_encoder: UMT5EncoderModel,
|
||||
text_encoder: T5EncoderModel | UMT5EncoderModel,
|
||||
transformer: SkyReelsV2Transformer3DModel,
|
||||
vae: AutoencoderKLWan,
|
||||
scheduler: UniPCMultistepScheduler,
|
||||
|
||||
@@ -21,7 +21,7 @@ from typing import Any, Callable
|
||||
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer, UMT5EncoderModel
|
||||
from transformers import AutoTokenizer, T5EncoderModel, UMT5EncoderModel
|
||||
|
||||
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
||||
from ...loaders import SkyReelsV2LoraLoaderMixin
|
||||
@@ -214,7 +214,7 @@ class SkyReelsV2DiffusionForcingVideoToVideoPipeline(DiffusionPipeline, SkyReels
|
||||
def __init__(
|
||||
self,
|
||||
tokenizer: AutoTokenizer,
|
||||
text_encoder: UMT5EncoderModel,
|
||||
text_encoder: T5EncoderModel | UMT5EncoderModel,
|
||||
transformer: SkyReelsV2Transformer3DModel,
|
||||
vae: AutoencoderKLWan,
|
||||
scheduler: UniPCMultistepScheduler,
|
||||
|
||||
@@ -18,7 +18,7 @@ from typing import Any, Callable
|
||||
import PIL
|
||||
import regex as re
|
||||
import torch
|
||||
from transformers import AutoTokenizer, CLIPProcessor, CLIPVisionModelWithProjection, UMT5EncoderModel
|
||||
from transformers import AutoTokenizer, CLIPProcessor, CLIPVisionModelWithProjection, T5EncoderModel, UMT5EncoderModel
|
||||
|
||||
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
||||
from ...image_processor import PipelineImageInput
|
||||
@@ -157,7 +157,7 @@ class SkyReelsV2ImageToVideoPipeline(DiffusionPipeline, SkyReelsV2LoraLoaderMixi
|
||||
def __init__(
|
||||
self,
|
||||
tokenizer: AutoTokenizer,
|
||||
text_encoder: UMT5EncoderModel,
|
||||
text_encoder: T5EncoderModel | UMT5EncoderModel,
|
||||
image_encoder: CLIPVisionModelWithProjection,
|
||||
image_processor: CLIPProcessor,
|
||||
transformer: SkyReelsV2Transformer3DModel,
|
||||
|
||||
@@ -112,6 +112,8 @@ def _load_transformers_model_from_dduf(
|
||||
tensors = safetensors.torch.load(mmap)
|
||||
# Update the state dictionary with tensors
|
||||
state_dict.update(tensors)
|
||||
# `from_pretrained` sets the model to eval mode by default, which is the
|
||||
# correct behavior for inference. Do not call `model.train()` here.
|
||||
return cls.from_pretrained(
|
||||
pretrained_model_name_or_path=None,
|
||||
config=config,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from transformers import CLIPTextModel, LongformerModel
|
||||
|
||||
@@ -20,7 +20,9 @@ class TestAutoModel(unittest.TestCase):
|
||||
side_effect=[EnvironmentError("File not found"), {"model_type": "clip_text_model"}],
|
||||
)
|
||||
def test_load_from_config_transformers_with_subfolder(self, mock_load_config):
|
||||
model = AutoModel.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder")
|
||||
model = AutoModel.from_pretrained(
|
||||
"hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder", use_safetensors=False
|
||||
)
|
||||
assert isinstance(model, CLIPTextModel)
|
||||
|
||||
def test_load_from_config_without_subfolder(self):
|
||||
@@ -28,5 +30,73 @@ class TestAutoModel(unittest.TestCase):
|
||||
assert isinstance(model, LongformerModel)
|
||||
|
||||
def test_load_from_model_index(self):
|
||||
model = AutoModel.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder")
|
||||
model = AutoModel.from_pretrained(
|
||||
"hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder", use_safetensors=False
|
||||
)
|
||||
assert isinstance(model, CLIPTextModel)
|
||||
|
||||
|
||||
class TestAutoModelFromConfig(unittest.TestCase):
|
||||
@patch(
|
||||
"diffusers.pipelines.pipeline_loading_utils.get_class_obj_and_candidates",
|
||||
return_value=(MagicMock(), None),
|
||||
)
|
||||
def test_from_config_with_dict_diffusers_class(self, mock_get_class):
|
||||
config = {"_class_name": "UNet2DConditionModel", "sample_size": 64}
|
||||
mock_model = MagicMock()
|
||||
mock_get_class.return_value[0].from_config.return_value = mock_model
|
||||
|
||||
result = AutoModel.from_config(config)
|
||||
|
||||
mock_get_class.assert_called_once_with(
|
||||
library_name="diffusers",
|
||||
class_name="UNet2DConditionModel",
|
||||
importable_classes=unittest.mock.ANY,
|
||||
pipelines=None,
|
||||
is_pipeline_module=False,
|
||||
)
|
||||
mock_get_class.return_value[0].from_config.assert_called_once_with(config)
|
||||
assert result is mock_model
|
||||
|
||||
@patch(
|
||||
"diffusers.pipelines.pipeline_loading_utils.get_class_obj_and_candidates",
|
||||
return_value=(MagicMock(), None),
|
||||
)
|
||||
@patch("diffusers.models.AutoModel.load_config", return_value={"_class_name": "UNet2DConditionModel"})
|
||||
def test_from_config_with_string_path(self, mock_load_config, mock_get_class):
|
||||
mock_model = MagicMock()
|
||||
mock_get_class.return_value[0].from_config.return_value = mock_model
|
||||
|
||||
result = AutoModel.from_config("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="unet")
|
||||
|
||||
mock_load_config.assert_called_once()
|
||||
assert result is mock_model
|
||||
|
||||
def test_from_config_raises_on_missing_class_info(self):
|
||||
config = {"some_key": "some_value"}
|
||||
with self.assertRaises(ValueError, msg="Couldn't find a model class"):
|
||||
AutoModel.from_config(config)
|
||||
|
||||
@patch(
|
||||
"diffusers.pipelines.pipeline_loading_utils.get_class_obj_and_candidates",
|
||||
return_value=(MagicMock(), None),
|
||||
)
|
||||
def test_from_config_with_model_type_routes_to_transformers(self, mock_get_class):
|
||||
config = {"model_type": "clip_text_model"}
|
||||
mock_model = MagicMock()
|
||||
mock_get_class.return_value[0].from_config.return_value = mock_model
|
||||
|
||||
result = AutoModel.from_config(config)
|
||||
|
||||
mock_get_class.assert_called_once_with(
|
||||
library_name="transformers",
|
||||
class_name="AutoModel",
|
||||
importable_classes=unittest.mock.ANY,
|
||||
pipelines=None,
|
||||
is_pipeline_module=False,
|
||||
)
|
||||
assert result is mock_model
|
||||
|
||||
def test_from_config_raises_on_none(self):
|
||||
with self.assertRaises(ValueError, msg="Please provide a `pretrained_model_name_or_path_or_dict`"):
|
||||
AutoModel.from_config(None)
|
||||
|
||||
@@ -60,12 +60,7 @@ def _context_parallel_worker(rank, world_size, master_port, model_class, init_di
|
||||
model.eval()
|
||||
|
||||
# Move inputs to device
|
||||
inputs_on_device = {}
|
||||
for key, value in inputs_dict.items():
|
||||
if isinstance(value, torch.Tensor):
|
||||
inputs_on_device[key] = value.to(device)
|
||||
else:
|
||||
inputs_on_device[key] = value
|
||||
inputs_on_device = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs_dict.items()}
|
||||
|
||||
# Enable context parallelism
|
||||
cp_config = ContextParallelConfig(**cp_dict)
|
||||
@@ -89,6 +84,59 @@ def _context_parallel_worker(rank, world_size, master_port, model_class, init_di
|
||||
dist.destroy_process_group()
|
||||
|
||||
|
||||
def _custom_mesh_worker(
|
||||
rank,
|
||||
world_size,
|
||||
master_port,
|
||||
model_class,
|
||||
init_dict,
|
||||
cp_dict,
|
||||
mesh_shape,
|
||||
mesh_dim_names,
|
||||
inputs_dict,
|
||||
return_dict,
|
||||
):
|
||||
"""Worker function for context parallel testing with a user-provided custom DeviceMesh."""
|
||||
try:
|
||||
os.environ["MASTER_ADDR"] = "localhost"
|
||||
os.environ["MASTER_PORT"] = str(master_port)
|
||||
os.environ["RANK"] = str(rank)
|
||||
os.environ["WORLD_SIZE"] = str(world_size)
|
||||
|
||||
dist.init_process_group(backend="nccl", rank=rank, world_size=world_size)
|
||||
|
||||
torch.cuda.set_device(rank)
|
||||
device = torch.device(f"cuda:{rank}")
|
||||
|
||||
model = model_class(**init_dict)
|
||||
model.to(device)
|
||||
model.eval()
|
||||
|
||||
inputs_on_device = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs_dict.items()}
|
||||
|
||||
# DeviceMesh must be created after init_process_group, inside each worker process.
|
||||
mesh = torch.distributed.device_mesh.init_device_mesh(
|
||||
"cuda", mesh_shape=mesh_shape, mesh_dim_names=mesh_dim_names
|
||||
)
|
||||
cp_config = ContextParallelConfig(**cp_dict, mesh=mesh)
|
||||
model.enable_parallelism(config=cp_config)
|
||||
|
||||
with torch.no_grad():
|
||||
output = model(**inputs_on_device, return_dict=False)[0]
|
||||
|
||||
if rank == 0:
|
||||
return_dict["status"] = "success"
|
||||
return_dict["output_shape"] = list(output.shape)
|
||||
|
||||
except Exception as e:
|
||||
if rank == 0:
|
||||
return_dict["status"] = "error"
|
||||
return_dict["error"] = str(e)
|
||||
finally:
|
||||
if dist.is_initialized():
|
||||
dist.destroy_process_group()
|
||||
|
||||
|
||||
@is_context_parallel
|
||||
@require_torch_multi_accelerator
|
||||
class ContextParallelTesterMixin:
|
||||
@@ -126,3 +174,48 @@ class ContextParallelTesterMixin:
|
||||
assert return_dict.get("status") == "success", (
|
||||
f"Context parallel inference failed: {return_dict.get('error', 'Unknown error')}"
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cp_type,mesh_shape,mesh_dim_names",
|
||||
[
|
||||
("ring_degree", (2, 1, 1), ("ring", "ulysses", "fsdp")),
|
||||
("ulysses_degree", (1, 2, 1), ("ring", "ulysses", "fsdp")),
|
||||
],
|
||||
ids=["ring-3d-fsdp", "ulysses-3d-fsdp"],
|
||||
)
|
||||
def test_context_parallel_custom_mesh(self, cp_type, mesh_shape, mesh_dim_names):
|
||||
if not torch.distributed.is_available():
|
||||
pytest.skip("torch.distributed is not available.")
|
||||
|
||||
if not hasattr(self.model_class, "_cp_plan") or self.model_class._cp_plan is None:
|
||||
pytest.skip("Model does not have a _cp_plan defined for context parallel inference.")
|
||||
|
||||
world_size = 2
|
||||
init_dict = self.get_init_dict()
|
||||
inputs_dict = {k: v.cpu() if isinstance(v, torch.Tensor) else v for k, v in self.get_dummy_inputs().items()}
|
||||
cp_dict = {cp_type: world_size}
|
||||
|
||||
master_port = _find_free_port()
|
||||
manager = mp.Manager()
|
||||
return_dict = manager.dict()
|
||||
|
||||
mp.spawn(
|
||||
_custom_mesh_worker,
|
||||
args=(
|
||||
world_size,
|
||||
master_port,
|
||||
self.model_class,
|
||||
init_dict,
|
||||
cp_dict,
|
||||
mesh_shape,
|
||||
mesh_dim_names,
|
||||
inputs_dict,
|
||||
return_dict,
|
||||
),
|
||||
nprocs=world_size,
|
||||
join=True,
|
||||
)
|
||||
|
||||
assert return_dict.get("status") == "success", (
|
||||
f"Custom mesh context parallel inference failed: {return_dict.get('error', 'Unknown error')}"
|
||||
)
|
||||
|
||||
@@ -282,6 +282,8 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
text_inputs = text_inputs["input_ids"].to(torch_device)
|
||||
|
||||
clap_prompt_embeds = audioldm_pipe.text_encoder.get_text_features(text_inputs)
|
||||
if hasattr(clap_prompt_embeds, "pooler_output"):
|
||||
clap_prompt_embeds = clap_prompt_embeds.pooler_output
|
||||
clap_prompt_embeds = clap_prompt_embeds[:, None, :]
|
||||
|
||||
text_inputs = audioldm_pipe.tokenizer_2(
|
||||
@@ -341,6 +343,8 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
text_inputs = text_inputs["input_ids"].to(torch_device)
|
||||
|
||||
clap_prompt_embeds = audioldm_pipe.text_encoder.get_text_features(text_inputs)
|
||||
if hasattr(clap_prompt_embeds, "pooler_output"):
|
||||
clap_prompt_embeds = clap_prompt_embeds.pooler_output
|
||||
clap_prompt_embeds = clap_prompt_embeds[:, None, :]
|
||||
|
||||
text_inputs = audioldm_pipe.tokenizer_2(
|
||||
|
||||
@@ -19,7 +19,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from huggingface_hub import hf_hub_download
|
||||
from transformers import T5EncoderModel, T5TokenizerFast
|
||||
from transformers import AutoConfig, T5EncoderModel, T5TokenizerFast
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -89,7 +89,8 @@ class BriaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
scheduler = FlowMatchEulerDiscreteScheduler()
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = T5TokenizerFast.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
|
||||
@@ -2,7 +2,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKL, ChromaPipeline, ChromaTransformer2DModel, FlowMatchEulerDiscreteScheduler
|
||||
|
||||
@@ -41,7 +41,8 @@ class ChromaPipelineFastTests(
|
||||
)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKL, ChromaImg2ImgPipeline, ChromaTransformer2DModel, FlowMatchEulerDiscreteScheduler
|
||||
|
||||
@@ -42,7 +42,8 @@ class ChromaImg2ImgPipelineFastTests(
|
||||
)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ import unittest
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoTokenizer,
|
||||
CLIPImageProcessor,
|
||||
CLIPVisionConfig,
|
||||
@@ -71,7 +72,8 @@ class ChronoEditPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
torch.manual_seed(0)
|
||||
# TODO: impl FlowDPMSolverMultistepScheduler
|
||||
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
@@ -18,7 +18,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKLCogVideoX, CogVideoXPipeline, CogVideoXTransformer3DModel, DDIMScheduler
|
||||
|
||||
@@ -117,7 +117,8 @@ class CogVideoXPipelineFastTests(
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = DDIMScheduler()
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
@@ -235,6 +236,9 @@ class CogVideoXPipelineFastTests(
|
||||
return
|
||||
|
||||
components = self.get_dummy_components()
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
for component in pipe.components.values():
|
||||
if hasattr(component, "set_default_attn_processor"):
|
||||
|
||||
@@ -18,7 +18,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKLCogVideoX, CogVideoXFunControlPipeline, CogVideoXTransformer3DModel, DDIMScheduler
|
||||
|
||||
@@ -104,7 +104,8 @@ class CogVideoXFunControlPipelineFastTests(PipelineTesterMixin, unittest.TestCas
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = DDIMScheduler()
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
@@ -228,6 +229,9 @@ class CogVideoXFunControlPipelineFastTests(PipelineTesterMixin, unittest.TestCas
|
||||
return
|
||||
|
||||
components = self.get_dummy_components()
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
for component in pipe.components.values():
|
||||
if hasattr(component, "set_default_attn_processor"):
|
||||
|
||||
@@ -19,7 +19,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKLCogVideoX, CogVideoXImageToVideoPipeline, CogVideoXTransformer3DModel, DDIMScheduler
|
||||
from diffusers.utils import load_image
|
||||
@@ -113,7 +113,8 @@ class CogVideoXImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = DDIMScheduler()
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
@@ -237,6 +238,9 @@ class CogVideoXImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
|
||||
return
|
||||
|
||||
components = self.get_dummy_components()
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
for component in pipe.components.values():
|
||||
if hasattr(component, "set_default_attn_processor"):
|
||||
|
||||
@@ -18,7 +18,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel, CogVideoXVideoToVideoPipeline, DDIMScheduler
|
||||
|
||||
@@ -99,7 +99,8 @@ class CogVideoXVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = DDIMScheduler()
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
|
||||
@@ -18,7 +18,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKL, CogVideoXDDIMScheduler, CogView3PlusPipeline, CogView3PlusTransformer2DModel
|
||||
|
||||
@@ -89,7 +89,8 @@ class CogView3PlusPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = CogVideoXDDIMScheduler()
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
|
||||
@@ -108,7 +108,7 @@ class CogView4PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
generator = torch.Generator(device=device).manual_seed(seed)
|
||||
inputs = {
|
||||
"prompt": "dance monkey",
|
||||
"negative_prompt": "",
|
||||
"negative_prompt": "bad",
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"guidance_scale": 6.0,
|
||||
|
||||
@@ -19,7 +19,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKLCogVideoX, ConsisIDPipeline, ConsisIDTransformer3DModel, DDIMScheduler
|
||||
from diffusers.utils import load_image
|
||||
@@ -122,7 +122,8 @@ class ConsisIDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = DDIMScheduler()
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
@@ -248,6 +249,9 @@ class ConsisIDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
return
|
||||
|
||||
components = self.get_dummy_components()
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
for component in pipe.components.values():
|
||||
if hasattr(component, "set_default_attn_processor"):
|
||||
|
||||
@@ -19,7 +19,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from huggingface_hub import hf_hub_download
|
||||
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
|
||||
from transformers import AutoConfig, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -97,7 +97,8 @@ class FluxControlNetPipelineFastTests(unittest.TestCase, PipelineTesterMixin, Fl
|
||||
text_encoder = CLIPTextModel(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = T5TokenizerFast.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
@@ -2,7 +2,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -13,9 +13,7 @@ from diffusers import (
|
||||
)
|
||||
from diffusers.utils.torch_utils import randn_tensor
|
||||
|
||||
from ...testing_utils import (
|
||||
torch_device,
|
||||
)
|
||||
from ...testing_utils import torch_device
|
||||
from ..test_pipelines_common import PipelineTesterMixin, check_qkv_fused_layers_exist
|
||||
|
||||
|
||||
@@ -70,7 +68,8 @@ class FluxControlNetImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMi
|
||||
text_encoder = CLIPTextModel(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
@@ -3,15 +3,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
# torch_device, # {{ edit_1 }} Removed unused import
|
||||
from transformers import (
|
||||
AutoTokenizer,
|
||||
CLIPTextConfig,
|
||||
CLIPTextModel,
|
||||
CLIPTokenizer,
|
||||
T5EncoderModel,
|
||||
)
|
||||
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -22,11 +14,7 @@ from diffusers import (
|
||||
)
|
||||
from diffusers.utils.torch_utils import randn_tensor
|
||||
|
||||
from ...testing_utils import (
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
torch_device,
|
||||
)
|
||||
from ...testing_utils import enable_full_determinism, floats_tensor, torch_device
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
|
||||
|
||||
@@ -85,7 +73,8 @@ class FluxControlNetInpaintPipelineTests(unittest.TestCase, PipelineTesterMixin)
|
||||
text_encoder = CLIPTextModel(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
@@ -18,7 +18,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, BertModel, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -96,7 +96,10 @@ class HunyuanDiTControlNetPipelineFastTests(unittest.TestCase, PipelineTesterMix
|
||||
scheduler = DDPMScheduler()
|
||||
text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel")
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel")
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
torch.manual_seed(0)
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
|
||||
@@ -17,7 +17,14 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoTokenizer,
|
||||
CLIPTextConfig,
|
||||
CLIPTextModelWithProjection,
|
||||
CLIPTokenizer,
|
||||
T5EncoderModel,
|
||||
)
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -28,10 +35,7 @@ from diffusers import (
|
||||
from diffusers.models import SD3ControlNetModel
|
||||
from diffusers.utils.torch_utils import randn_tensor
|
||||
|
||||
from ...testing_utils import (
|
||||
enable_full_determinism,
|
||||
torch_device,
|
||||
)
|
||||
from ...testing_utils import enable_full_determinism, torch_device
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
|
||||
|
||||
@@ -103,7 +107,8 @@ class StableDiffusion3ControlInpaintNetPipelineFastTests(unittest.TestCase, Pipe
|
||||
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_3 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
|
||||
@@ -18,7 +18,14 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoTokenizer,
|
||||
CLIPTextConfig,
|
||||
CLIPTextModelWithProjection,
|
||||
CLIPTokenizer,
|
||||
T5EncoderModel,
|
||||
)
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -117,7 +124,8 @@ class StableDiffusion3ControlNetPipelineFastTests(unittest.TestCase, PipelineTes
|
||||
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_3 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
|
||||
@@ -20,7 +20,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKLCosmos, CosmosTextToWorldPipeline, CosmosTransformer3DModel, EDMEulerScheduler
|
||||
|
||||
@@ -107,7 +107,8 @@ class CosmosTextToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
rho=7.0,
|
||||
final_sigmas_type="sigma_min",
|
||||
)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
@@ -232,6 +233,9 @@ class CosmosTextToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
return
|
||||
|
||||
components = self.get_dummy_components()
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
for component in pipe.components.values():
|
||||
if hasattr(component, "set_default_attn_processor"):
|
||||
|
||||
@@ -20,7 +20,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKLWan,
|
||||
@@ -95,7 +95,8 @@ class Cosmos2TextToImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = FlowMatchEulerDiscreteScheduler(use_karras_sigmas=True)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
|
||||
@@ -21,7 +21,7 @@ import unittest
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKLWan,
|
||||
@@ -96,7 +96,8 @@ class Cosmos2VideoToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCas
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = FlowMatchEulerDiscreteScheduler(use_karras_sigmas=True)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
|
||||
@@ -21,7 +21,7 @@ import unittest
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKLCosmos, CosmosTransformer3DModel, CosmosVideoToWorldPipeline, EDMEulerScheduler
|
||||
|
||||
@@ -108,7 +108,8 @@ class CosmosVideoToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCase
|
||||
rho=7.0,
|
||||
final_sigmas_type="sigma_min",
|
||||
)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
@@ -245,6 +246,9 @@ class CosmosVideoToWorldPipelineFastTests(PipelineTesterMixin, unittest.TestCase
|
||||
return
|
||||
|
||||
components = self.get_dummy_components()
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
for component in pipe.components.values():
|
||||
if hasattr(component, "set_default_attn_processor"):
|
||||
|
||||
@@ -2,7 +2,7 @@ import tempfile
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import DDPMScheduler, UNet2DConditionModel
|
||||
from diffusers.models.attention_processor import AttnAddedKVProcessor
|
||||
@@ -18,7 +18,8 @@ from ..test_pipelines_common import to_np
|
||||
class IFPipelineTesterMixin:
|
||||
def _get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
@@ -75,7 +76,8 @@ class IFPipelineTesterMixin:
|
||||
|
||||
def _get_superresolution_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
@@ -250,6 +252,9 @@ class IFPipelineTesterMixin:
|
||||
# This should be handled in the base test and then this method can be removed.
|
||||
def _test_save_load_local(self):
|
||||
components = self.get_dummy_components()
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
@@ -18,9 +18,7 @@ import unittest
|
||||
|
||||
import torch
|
||||
|
||||
from diffusers import (
|
||||
IFPipeline,
|
||||
)
|
||||
from diffusers import IFPipeline
|
||||
from diffusers.models.attention_processor import AttnAddedKVProcessor
|
||||
from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from huggingface_hub import hf_hub_download
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -93,7 +93,8 @@ class FluxPipelineFastTests(
|
||||
text_encoder = CLIPTextModel(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
@@ -3,7 +3,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxControlPipeline, FluxTransformer2DModel
|
||||
|
||||
@@ -53,7 +53,8 @@ class FluxControlPipelineFastTests(unittest.TestCase, PipelineTesterMixin):
|
||||
text_encoder = CLIPTextModel(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
@@ -3,7 +3,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -57,7 +57,8 @@ class FluxControlImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMixin
|
||||
text_encoder = CLIPTextModel(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
@@ -3,7 +3,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -58,7 +58,8 @@ class FluxControlInpaintPipelineFastTests(unittest.TestCase, PipelineTesterMixin
|
||||
text_encoder = CLIPTextModel(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
@@ -3,7 +3,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxFillPipeline, FluxTransformer2DModel
|
||||
|
||||
@@ -58,7 +58,8 @@ class FluxFillPipelineFastTests(unittest.TestCase, PipelineTesterMixin):
|
||||
text_encoder = CLIPTextModel(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
@@ -3,7 +3,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxImg2ImgPipeline, FluxTransformer2DModel
|
||||
|
||||
@@ -55,7 +55,8 @@ class FluxImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMixin, FluxI
|
||||
text_encoder = CLIPTextModel(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
@@ -3,7 +3,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxInpaintPipeline, FluxTransformer2DModel
|
||||
|
||||
@@ -55,7 +55,8 @@ class FluxInpaintPipelineFastTests(unittest.TestCase, PipelineTesterMixin, FluxI
|
||||
text_encoder = CLIPTextModel(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
@@ -3,7 +3,7 @@ import unittest
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import torch
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -79,7 +79,8 @@ class FluxKontextPipelineFastTests(
|
||||
text_encoder = CLIPTextModel(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
@@ -3,7 +3,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -79,7 +79,8 @@ class FluxKontextInpaintPipelineFastTests(
|
||||
text_encoder = CLIPTextModel(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
@@ -16,7 +16,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, GlmImagePipeline, GlmImageTransformer2DModel
|
||||
from diffusers.utils import is_transformers_version
|
||||
@@ -57,7 +57,8 @@ class GlmImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
glm_config = GlmImageConfig(
|
||||
|
||||
@@ -18,6 +18,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoTokenizer,
|
||||
CLIPTextConfig,
|
||||
CLIPTextModelWithProjection,
|
||||
@@ -94,7 +95,8 @@ class HiDreamImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_3 = T5EncoderModel(config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_4 = LlamaForCausalLM.from_pretrained("hf-internal-testing/tiny-random-LlamaForCausalLM")
|
||||
@@ -149,12 +151,12 @@ class HiDreamImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
self.assertEqual(generated_image.shape, (128, 128, 3))
|
||||
|
||||
# fmt: off
|
||||
expected_slice = np.array([0.4507, 0.5256, 0.4205, 0.5791, 0.4848, 0.4831, 0.4443, 0.5107, 0.6586, 0.3163, 0.7318, 0.5933, 0.6252, 0.5512, 0.5357, 0.5983])
|
||||
expected_slice = np.array([0.4501, 0.5256, 0.4207, 0.5783, 0.4842, 0.4833, 0.4441, 0.5112, 0.6587, 0.3169, 0.7308, 0.5927, 0.6251, 0.5509, 0.5355, 0.5969])
|
||||
# fmt: on
|
||||
|
||||
generated_slice = generated_image.flatten()
|
||||
generated_slice = np.concatenate([generated_slice[:8], generated_slice[-8:]])
|
||||
self.assertTrue(np.allclose(generated_slice, expected_slice, atol=1e-3))
|
||||
self.assertTrue(np.allclose(generated_slice, expected_slice, atol=5e-3))
|
||||
|
||||
def test_inference_batch_single_identical(self):
|
||||
super().test_inference_batch_single_identical(expected_max_diff=3e-4)
|
||||
|
||||
@@ -223,7 +223,7 @@ class HunyuanImagePipelineFastTests(
|
||||
self.assertEqual(generated_image.shape, (3, 16, 16))
|
||||
|
||||
expected_slice_np = np.array(
|
||||
[0.61494756, 0.49616697, 0.60327923, 0.6115793, 0.49047345, 0.56977504, 0.53066164, 0.58880305, 0.5570612]
|
||||
[0.6068114, 0.48716035, 0.5984431, 0.60241306, 0.48849544, 0.5624479, 0.53696984, 0.58964247, 0.54248774]
|
||||
)
|
||||
output_slice = generated_image[0, -3:, -3:].flatten().cpu().numpy()
|
||||
|
||||
|
||||
@@ -233,7 +233,7 @@ class HunyuanVideoImageToVideoPipelineFastTests(
|
||||
self.assertEqual(generated_video.shape, (5, 3, 16, 16))
|
||||
|
||||
# fmt: off
|
||||
expected_slice = torch.tensor([0.444, 0.479, 0.4485, 0.5752, 0.3539, 0.1548, 0.2706, 0.3593, 0.5323, 0.6635, 0.6795, 0.5255, 0.5091, 0.345, 0.4276, 0.4128])
|
||||
expected_slice = torch.tensor([0.4441, 0.4790, 0.4485, 0.5748, 0.3539, 0.1553, 0.2707, 0.3594, 0.5331, 0.6645, 0.6799, 0.5257, 0.5092, 0.3450, 0.4276, 0.4127])
|
||||
# fmt: on
|
||||
|
||||
generated_slice = generated_video.flatten()
|
||||
|
||||
@@ -15,7 +15,14 @@
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
from transformers import ByT5Tokenizer, Qwen2_5_VLTextConfig, Qwen2_5_VLTextModel, Qwen2Tokenizer, T5EncoderModel
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
ByT5Tokenizer,
|
||||
Qwen2_5_VLTextConfig,
|
||||
Qwen2_5_VLTextModel,
|
||||
Qwen2Tokenizer,
|
||||
T5EncoderModel,
|
||||
)
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKLHunyuanVideo15,
|
||||
@@ -114,7 +121,8 @@ class HunyuanVideo15PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
tokenizer = Qwen2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration")
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
tokenizer_2 = ByT5Tokenizer()
|
||||
|
||||
guider = ClassifierFreeGuidance(guidance_scale=1.0)
|
||||
|
||||
@@ -19,7 +19,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, BertModel, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKL, DDPMScheduler, HunyuanDiT2DModel, HunyuanDiTPipeline
|
||||
|
||||
@@ -74,7 +74,9 @@ class HunyuanDiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
scheduler = DDPMScheduler()
|
||||
text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel")
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel")
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
torch.manual_seed(0)
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
|
||||
@@ -19,7 +19,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoPipelineForImage2Image,
|
||||
@@ -108,7 +108,8 @@ class Kandinsky3PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
torch.manual_seed(0)
|
||||
movq = self.dummy_movq
|
||||
torch.manual_seed(0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config).eval()
|
||||
|
||||
torch.manual_seed(0)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
@@ -155,9 +156,9 @@ class Kandinsky3PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
assert image.shape == (1, 16, 16, 3)
|
||||
|
||||
expected_slice = np.array([0.3768, 0.4373, 0.4865, 0.4890, 0.4299, 0.5122, 0.4921, 0.4924, 0.5599])
|
||||
expected_slice = np.array([0.3944, 0.3680, 0.4842, 0.5333, 0.4412, 0.4812, 0.5089, 0.5381, 0.5578])
|
||||
|
||||
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2, (
|
||||
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1, (
|
||||
f" expected_slice {expected_slice}, but got {image_slice.flatten()}"
|
||||
)
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoPipelineForImage2Image,
|
||||
@@ -119,7 +119,8 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
torch.manual_seed(0)
|
||||
movq = self.dummy_movq
|
||||
torch.manual_seed(0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config).eval()
|
||||
|
||||
torch.manual_seed(0)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
@@ -155,10 +156,7 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
return inputs
|
||||
|
||||
def test_dict_tuple_outputs_equivalent(self):
|
||||
expected_slice = None
|
||||
if torch_device == "cpu":
|
||||
expected_slice = np.array([0.5762, 0.6112, 0.4150, 0.6018, 0.6167, 0.4626, 0.5426, 0.5641, 0.6536])
|
||||
super().test_dict_tuple_outputs_equivalent(expected_slice=expected_slice)
|
||||
super().test_dict_tuple_outputs_equivalent()
|
||||
|
||||
def test_kandinsky3_img2img(self):
|
||||
device = "cpu"
|
||||
@@ -177,11 +175,9 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
|
||||
assert image.shape == (1, 64, 64, 3)
|
||||
|
||||
expected_slice = np.array(
|
||||
[0.576259, 0.6132097, 0.41703486, 0.603196, 0.62062526, 0.4655338, 0.5434324, 0.5660727, 0.65433365]
|
||||
)
|
||||
expected_slice = np.array([0.5725, 0.6248, 0.4355, 0.5732, 0.6105, 0.5267, 0.5470, 0.5512, 0.6618])
|
||||
|
||||
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2, (
|
||||
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1, (
|
||||
f" expected_slice {expected_slice}, but got {image_slice.flatten()}"
|
||||
)
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -109,7 +109,8 @@ class LattePipelineFastTests(
|
||||
vae = AutoencoderKL()
|
||||
|
||||
scheduler = DDIMScheduler()
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKLLTXVideo, FlowMatchEulerDiscreteScheduler, LTXPipeline, LTXVideoTransformer3DModel
|
||||
|
||||
@@ -88,7 +88,8 @@ class LTXPipelineFastTests(PipelineTesterMixin, FirstBlockCacheTesterMixin, unit
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = FlowMatchEulerDiscreteScheduler()
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
|
||||
@@ -17,7 +17,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKLLTXVideo,
|
||||
@@ -92,7 +92,8 @@ class LTXConditionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = FlowMatchEulerDiscreteScheduler()
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
|
||||
@@ -17,7 +17,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKLLTXVideo,
|
||||
@@ -91,7 +91,8 @@ class LTXImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = FlowMatchEulerDiscreteScheduler()
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
|
||||
@@ -18,7 +18,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKLMochi, FlowMatchEulerDiscreteScheduler, MochiPipeline, MochiTransformer3DModel
|
||||
|
||||
@@ -89,7 +89,8 @@ class MochiPipelineFastTests(
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = FlowMatchEulerDiscreteScheduler()
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
@@ -207,6 +208,9 @@ class MochiPipelineFastTests(
|
||||
return
|
||||
|
||||
components = self.get_dummy_components()
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
for component in pipe.components.values():
|
||||
if hasattr(component, "set_default_attn_processor"):
|
||||
|
||||
@@ -19,7 +19,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, BertModel, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -67,7 +67,9 @@ class HunyuanDiTPAGPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
scheduler = DDPMScheduler()
|
||||
text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel")
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel")
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
torch.manual_seed(0)
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
components = {
|
||||
|
||||
@@ -19,7 +19,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
import diffusers
|
||||
from diffusers import (
|
||||
@@ -80,7 +80,8 @@ class PixArtSigmaPAGPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
vae = AutoencoderKL()
|
||||
|
||||
scheduler = DDIMScheduler()
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
|
||||
@@ -3,7 +3,14 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoTokenizer,
|
||||
CLIPTextConfig,
|
||||
CLIPTextModelWithProjection,
|
||||
CLIPTokenizer,
|
||||
T5EncoderModel,
|
||||
)
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -73,7 +80,9 @@ class StableDiffusion3PAGPipelineFastTests(unittest.TestCase, PipelineTesterMixi
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
|
||||
|
||||
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
torch.manual_seed(0)
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_3 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
|
||||
@@ -5,7 +5,14 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoTokenizer,
|
||||
CLIPTextConfig,
|
||||
CLIPTextModelWithProjection,
|
||||
CLIPTokenizer,
|
||||
T5EncoderModel,
|
||||
)
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -84,7 +91,9 @@ class StableDiffusion3PAGImg2ImgPipelineFastTests(unittest.TestCase, PipelineTes
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
|
||||
|
||||
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
torch.manual_seed(0)
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_3 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
|
||||
@@ -19,7 +19,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -77,7 +77,10 @@ class PixArtAlphaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
vae = AutoencoderKL()
|
||||
|
||||
scheduler = DDIMScheduler()
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
torch.manual_seed(0)
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -83,7 +83,10 @@ class PixArtSigmaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
vae = AutoencoderKL()
|
||||
|
||||
scheduler = DDIMScheduler()
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
torch.manual_seed(0)
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
|
||||
@@ -92,7 +92,7 @@ class PRXPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
}
|
||||
encoder_config = T5GemmaModuleConfig(**encoder_params)
|
||||
text_encoder_config = T5GemmaConfig(encoder=encoder_config, is_encoder_decoder=False, **encoder_params)
|
||||
text_encoder = T5GemmaEncoder(text_encoder_config)
|
||||
text_encoder = T5GemmaEncoder(text_encoder_config.encoder)
|
||||
|
||||
return {
|
||||
"transformer": transformer,
|
||||
@@ -256,3 +256,27 @@ class PRXPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
expected_image = torch.zeros(3, 32, 32)
|
||||
max_diff = np.abs(generated_image - expected_image).max()
|
||||
self.assertLessEqual(max_diff, 1e10)
|
||||
|
||||
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
|
||||
def test_save_load_dduf(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
|
||||
def test_loading_with_variants(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
|
||||
def test_pipeline_with_accelerator_device_map(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
|
||||
def test_save_load_local(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
|
||||
def test_save_load_optional_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Custom T5GemmaEncoder not compatible with transformers v5.")
|
||||
def test_torch_dtype_dict(self):
|
||||
pass
|
||||
|
||||
@@ -113,7 +113,7 @@ class QwenImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
vision_start_token_id=151652,
|
||||
vision_token_id=151654,
|
||||
)
|
||||
text_encoder = Qwen2_5_VLForConditionalGeneration(config)
|
||||
text_encoder = Qwen2_5_VLForConditionalGeneration(config).eval()
|
||||
tokenizer = Qwen2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration")
|
||||
|
||||
components = {
|
||||
@@ -160,12 +160,12 @@ class QwenImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
self.assertEqual(generated_image.shape, (3, 32, 32))
|
||||
|
||||
# fmt: off
|
||||
expected_slice = torch.tensor([0.56331, 0.63677, 0.6015, 0.56369, 0.58166, 0.55277, 0.57176, 0.63261, 0.41466, 0.35561, 0.56229, 0.48334, 0.49714, 0.52622, 0.40872, 0.50208])
|
||||
expected_slice = torch.tensor([0.5633, 0.6368, 0.6015, 0.5637, 0.5817, 0.5528, 0.5718, 0.6326, 0.4147, 0.3556, 0.5623, 0.4833, 0.4971, 0.5262, 0.4087, 0.5021])
|
||||
# fmt: on
|
||||
|
||||
generated_slice = generated_image.flatten()
|
||||
generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
|
||||
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
|
||||
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=5e-3))
|
||||
|
||||
def test_inference_batch_single_identical(self):
|
||||
self._test_inference_batch_single_identical(batch_size=3, expected_max_diff=1e-1)
|
||||
|
||||
@@ -211,7 +211,7 @@ class QwenControlNetPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
generated_slice = generated_image.flatten()
|
||||
generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
|
||||
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
|
||||
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=5e-3))
|
||||
|
||||
def test_qwen_controlnet_multicondition(self):
|
||||
device = "cpu"
|
||||
@@ -255,7 +255,7 @@ class QwenControlNetPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
generated_slice = generated_image.flatten()
|
||||
generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
|
||||
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
|
||||
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=5e-3))
|
||||
|
||||
def test_attention_slicing_forward_pass(
|
||||
self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-3
|
||||
|
||||
@@ -115,7 +115,7 @@ class QwenImageEditPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
vision_start_token_id=151652,
|
||||
vision_token_id=151654,
|
||||
)
|
||||
text_encoder = Qwen2_5_VLForConditionalGeneration(config)
|
||||
text_encoder = Qwen2_5_VLForConditionalGeneration(config).eval()
|
||||
tokenizer = Qwen2Tokenizer.from_pretrained(tiny_ckpt_id)
|
||||
|
||||
components = {
|
||||
@@ -163,12 +163,12 @@ class QwenImageEditPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
self.assertEqual(generated_image.shape, (3, 32, 32))
|
||||
|
||||
# fmt: off
|
||||
expected_slice = torch.tensor([[0.5637, 0.6341, 0.6001, 0.5620, 0.5794, 0.5498, 0.5757, 0.6389, 0.4174, 0.3597, 0.5649, 0.4894, 0.4969, 0.5255, 0.4083, 0.4986]])
|
||||
expected_slice = torch.tensor([0.5637, 0.6341, 0.6001, 0.5620, 0.5794, 0.5498, 0.5757, 0.6389, 0.4174, 0.3597, 0.5649, 0.4894, 0.4969, 0.5255, 0.4083, 0.4986])
|
||||
# fmt: on
|
||||
|
||||
generated_slice = generated_image.flatten()
|
||||
generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
|
||||
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
|
||||
self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=5e-3))
|
||||
|
||||
def test_inference_batch_single_identical(self):
|
||||
self._test_inference_batch_single_identical(batch_size=3, expected_max_diff=1e-1)
|
||||
|
||||
@@ -164,7 +164,7 @@ class QwenImageEditPlusPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
self.assertEqual(generated_image.shape, (3, 32, 32))
|
||||
|
||||
# fmt: off
|
||||
expected_slice = torch.tensor([[0.5637, 0.6341, 0.6001, 0.5620, 0.5794, 0.5498, 0.5757, 0.6389, 0.4174, 0.3597, 0.5649, 0.4894, 0.4969, 0.5255, 0.4083, 0.4986]])
|
||||
expected_slice = torch.tensor([0.5640, 0.6339, 0.5997, 0.5607, 0.5799, 0.5496, 0.5760, 0.6393, 0.4172, 0.3595, 0.5655, 0.4896, 0.4971, 0.5255, 0.4088, 0.4987])
|
||||
# fmt: on
|
||||
|
||||
generated_slice = generated_image.flatten()
|
||||
|
||||
@@ -18,20 +18,11 @@ import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKLWan,
|
||||
SkyReelsV2Pipeline,
|
||||
SkyReelsV2Transformer3DModel,
|
||||
UniPCMultistepScheduler,
|
||||
)
|
||||
from diffusers import AutoencoderKLWan, SkyReelsV2Pipeline, SkyReelsV2Transformer3DModel, UniPCMultistepScheduler
|
||||
|
||||
from ...testing_utils import (
|
||||
enable_full_determinism,
|
||||
)
|
||||
from ...testing_utils import enable_full_determinism
|
||||
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
|
||||
from ..test_pipelines_common import (
|
||||
PipelineTesterMixin,
|
||||
)
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
|
||||
|
||||
enable_full_determinism()
|
||||
|
||||
@@ -25,13 +25,9 @@ from diffusers import (
|
||||
UniPCMultistepScheduler,
|
||||
)
|
||||
|
||||
from ...testing_utils import (
|
||||
enable_full_determinism,
|
||||
)
|
||||
from ...testing_utils import enable_full_determinism
|
||||
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
|
||||
from ..test_pipelines_common import (
|
||||
PipelineTesterMixin,
|
||||
)
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
|
||||
|
||||
enable_full_determinism()
|
||||
|
||||
@@ -17,10 +17,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import (
|
||||
AutoTokenizer,
|
||||
T5EncoderModel,
|
||||
)
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKLWan,
|
||||
|
||||
@@ -27,14 +27,9 @@ from diffusers import (
|
||||
UniPCMultistepScheduler,
|
||||
)
|
||||
|
||||
from ...testing_utils import (
|
||||
enable_full_determinism,
|
||||
torch_device,
|
||||
)
|
||||
from ...testing_utils import enable_full_determinism, torch_device
|
||||
from ..pipeline_params import TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
|
||||
from ..test_pipelines_common import (
|
||||
PipelineTesterMixin,
|
||||
)
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
|
||||
|
||||
enable_full_determinism()
|
||||
|
||||
@@ -19,10 +19,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import (
|
||||
T5EncoderModel,
|
||||
T5Tokenizer,
|
||||
)
|
||||
from transformers import AutoConfig, T5EncoderModel, T5Tokenizer
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderOobleck,
|
||||
@@ -111,7 +108,8 @@ class StableAudioPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
)
|
||||
torch.manual_seed(0)
|
||||
t5_repo_id = "hf-internal-testing/tiny-random-T5ForConditionalGeneration"
|
||||
text_encoder = T5EncoderModel.from_pretrained(t5_repo_id)
|
||||
config = AutoConfig.from_pretrained(t5_repo_id)
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = T5Tokenizer.from_pretrained(t5_repo_id, truncation=True, model_max_length=25)
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
@@ -3,7 +3,14 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoTokenizer,
|
||||
CLIPTextConfig,
|
||||
CLIPTextModelWithProjection,
|
||||
CLIPTokenizer,
|
||||
T5EncoderModel,
|
||||
)
|
||||
|
||||
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, SD3Transformer2DModel, StableDiffusion3Pipeline
|
||||
|
||||
@@ -72,7 +79,9 @@ class StableDiffusion3PipelineFastTests(unittest.TestCase, PipelineTesterMixin):
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
|
||||
|
||||
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
torch.manual_seed(0)
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_3 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
|
||||
@@ -4,7 +4,14 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoTokenizer,
|
||||
CLIPTextConfig,
|
||||
CLIPTextModelWithProjection,
|
||||
CLIPTokenizer,
|
||||
T5EncoderModel,
|
||||
)
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -73,7 +80,9 @@ class StableDiffusion3Img2ImgPipelineFastTests(PipelineLatentTesterMixin, unitte
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
|
||||
|
||||
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
torch.manual_seed(0)
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_3 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
|
||||
@@ -3,7 +3,14 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoTokenizer,
|
||||
CLIPTextConfig,
|
||||
CLIPTextModelWithProjection,
|
||||
CLIPTokenizer,
|
||||
T5EncoderModel,
|
||||
)
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKL,
|
||||
@@ -73,7 +80,9 @@ class StableDiffusion3InpaintPipelineFastTests(PipelineLatentTesterMixin, unitte
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
|
||||
|
||||
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
torch.manual_seed(0)
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_3 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
|
||||
@@ -1157,6 +1157,9 @@ class PipelineTesterMixin:
|
||||
|
||||
def test_save_load_local(self, expected_max_difference=5e-4):
|
||||
components = self.get_dummy_components()
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
for component in pipe.components.values():
|
||||
if hasattr(component, "set_default_attn_processor"):
|
||||
@@ -1295,6 +1298,9 @@ class PipelineTesterMixin:
|
||||
additional_params_copy_to_batched_inputs=["num_inference_steps"],
|
||||
):
|
||||
components = self.get_dummy_components()
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
for components in pipe.components.values():
|
||||
if hasattr(components, "set_default_attn_processor"):
|
||||
@@ -1345,6 +1351,9 @@ class PipelineTesterMixin:
|
||||
|
||||
def test_dict_tuple_outputs_equivalent(self, expected_slice=None, expected_max_difference=1e-4):
|
||||
components = self.get_dummy_components()
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
for component in pipe.components.values():
|
||||
if hasattr(component, "set_default_attn_processor"):
|
||||
@@ -1477,6 +1486,9 @@ class PipelineTesterMixin:
|
||||
if not self.pipeline_class._optional_components:
|
||||
return
|
||||
components = self.get_dummy_components()
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
for component in pipe.components.values():
|
||||
if hasattr(component, "set_default_attn_processor"):
|
||||
@@ -1557,6 +1569,9 @@ class PipelineTesterMixin:
|
||||
return
|
||||
|
||||
components = self.get_dummy_components()
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
for component in pipe.components.values():
|
||||
if hasattr(component, "set_default_attn_processor"):
|
||||
@@ -2065,7 +2080,16 @@ class PipelineTesterMixin:
|
||||
for component_name in model_components_pipe:
|
||||
pipe_component = model_components_pipe[component_name]
|
||||
pipe_loaded_component = model_components_pipe_loaded[component_name]
|
||||
for p1, p2 in zip(pipe_component.parameters(), pipe_loaded_component.parameters()):
|
||||
|
||||
model_loaded_params = dict(pipe_loaded_component.named_parameters())
|
||||
model_original_params = dict(pipe_component.named_parameters())
|
||||
|
||||
for name, p1 in model_original_params.items():
|
||||
# Skip tied weights that aren't saved with variants (transformers v5 behavior)
|
||||
if name not in model_loaded_params:
|
||||
continue
|
||||
|
||||
p2 = model_loaded_params[name]
|
||||
# nan check for luminanext (mps).
|
||||
if not (is_nan(p1) and is_nan(p2)):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
@@ -2089,6 +2113,9 @@ class PipelineTesterMixin:
|
||||
return
|
||||
|
||||
components = self.get_dummy_components()
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
|
||||
# We initialize the pipeline with only text encoders and tokenizers,
|
||||
# mimicking a real-world scenario.
|
||||
@@ -2220,6 +2247,9 @@ class PipelineTesterMixin:
|
||||
from huggingface_hub import export_folder_as_dduf
|
||||
|
||||
components = self.get_dummy_components()
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
pipe = pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
@@ -2357,6 +2387,11 @@ class PipelineTesterMixin:
|
||||
|
||||
def test_pipeline_with_accelerator_device_map(self, expected_max_difference=1e-4):
|
||||
components = self.get_dummy_components()
|
||||
# Set text encoders to eval mode to match from_pretrained behavior
|
||||
# This ensures deterministic outputs when models are loaded with device_map
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
pipe = pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
@@ -2680,6 +2715,9 @@ class PyramidAttentionBroadcastTesterMixin:
|
||||
device = "cpu" # ensure determinism for the device-dependent torch.Generator
|
||||
num_layers = 2
|
||||
components = self.get_dummy_components(num_layers=num_layers)
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
pipe = pipe.to(device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
@@ -5,7 +5,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
|
||||
import diffusers
|
||||
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxTransformer2DModel, VisualClozePipeline
|
||||
@@ -77,7 +77,8 @@ class VisualClozePipelineFastTests(unittest.TestCase, PipelineTesterMixin):
|
||||
text_encoder = CLIPTextModel(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
@@ -215,7 +216,7 @@ class VisualClozePipelineFastTests(unittest.TestCase, PipelineTesterMixin):
|
||||
def test_callback_cfg(self):
|
||||
pass
|
||||
|
||||
def test_save_load_local(self, expected_max_difference=5e-4):
|
||||
def test_save_load_local(self, expected_max_difference=1e-3):
|
||||
components = self.get_dummy_components()
|
||||
pipe = self.pipeline_class(**components)
|
||||
for component in pipe.components.values():
|
||||
@@ -260,6 +261,9 @@ class VisualClozePipelineFastTests(unittest.TestCase, PipelineTesterMixin):
|
||||
if not hasattr(self.pipeline_class, "_optional_components"):
|
||||
return
|
||||
components = self.get_dummy_components()
|
||||
for key in components:
|
||||
if "text_encoder" in key and hasattr(components[key], "eval"):
|
||||
components[key].eval()
|
||||
pipe = self.pipeline_class(**components)
|
||||
for component in pipe.components.values():
|
||||
if hasattr(component, "set_default_attn_processor"):
|
||||
|
||||
@@ -5,7 +5,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
|
||||
import diffusers
|
||||
from diffusers import (
|
||||
@@ -79,7 +79,8 @@ class VisualClozeGenerationPipelineFastTests(unittest.TestCase, PipelineTesterMi
|
||||
text_encoder = CLIPTextModel(clip_text_encoder_config)
|
||||
|
||||
torch.manual_seed(0)
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder_2 = T5EncoderModel(config)
|
||||
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
@@ -18,7 +18,7 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKLWan, FlowMatchEulerDiscreteScheduler, WanPipeline, WanTransformer3DModel
|
||||
|
||||
@@ -68,7 +68,8 @@ class WanPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
torch.manual_seed(0)
|
||||
# TODO: impl FlowDPMSolverMultistepScheduler
|
||||
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
@@ -17,14 +17,11 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanPipeline, WanTransformer3DModel
|
||||
|
||||
from ...testing_utils import (
|
||||
enable_full_determinism,
|
||||
torch_device,
|
||||
)
|
||||
from ...testing_utils import enable_full_determinism, torch_device
|
||||
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
|
||||
@@ -63,7 +60,8 @@ class Wan22PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
torch.manual_seed(0)
|
||||
@@ -235,7 +233,8 @@ class Wan225BPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
@@ -18,7 +18,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanImageToVideoPipeline, WanTransformer3DModel
|
||||
|
||||
@@ -64,7 +64,8 @@ class Wan22ImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
torch.manual_seed(0)
|
||||
@@ -248,7 +249,8 @@ class Wan225BImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCas
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
@@ -19,6 +19,7 @@ import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoTokenizer,
|
||||
CLIPImageProcessor,
|
||||
CLIPVisionConfig,
|
||||
@@ -78,7 +79,8 @@ class WanAnimatePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
@@ -19,6 +19,7 @@ import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoTokenizer,
|
||||
CLIPImageProcessor,
|
||||
CLIPVisionConfig,
|
||||
@@ -68,7 +69,8 @@ class WanImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
torch.manual_seed(0)
|
||||
# TODO: impl FlowDPMSolverMultistepScheduler
|
||||
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
torch.manual_seed(0)
|
||||
@@ -239,7 +241,8 @@ class WanFLFToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
torch.manual_seed(0)
|
||||
# TODO: impl FlowDPMSolverMultistepScheduler
|
||||
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
@@ -18,7 +18,7 @@ import unittest
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import (
|
||||
AutoencoderKLWan,
|
||||
@@ -67,7 +67,8 @@ class WanVACEPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
@@ -16,7 +16,7 @@ import unittest
|
||||
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
from transformers import AutoConfig, AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanTransformer3DModel, WanVideoToVideoPipeline
|
||||
|
||||
@@ -62,7 +62,8 @@ class WanVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
torch.manual_seed(0)
|
||||
scheduler = UniPCMultistepScheduler(flow_shift=3.0)
|
||||
text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
text_encoder = T5EncoderModel(config)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
@@ -1357,7 +1357,12 @@ def enable_full_determinism():
|
||||
# variable 'CUDA_LAUNCH_BLOCKING' or 'CUBLAS_WORKSPACE_CONFIG' to be set,
|
||||
# depending on the CUDA version, so we set them both here
|
||||
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
|
||||
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"
|
||||
# Use larger workspace size for PyTorch 2.10+ to avoid CUBLAS_STATUS_NOT_INITIALIZED errors
|
||||
# (catches 2.11 dev versions which report as >= 2.10)
|
||||
if is_torch_version(">=", "2.10"):
|
||||
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
|
||||
else:
|
||||
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"
|
||||
torch.use_deterministic_algorithms(True)
|
||||
|
||||
# Enable CUDNN deterministic mode
|
||||
|
||||
Reference in New Issue
Block a user