mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-13 16:04:41 +08:00
Compare commits
1 Commits
update-not
...
gpu-test-t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e7697e7a0a |
2
.github/workflows/push_tests.yml
vendored
2
.github/workflows/push_tests.yml
vendored
@@ -137,7 +137,7 @@ jobs:
|
||||
|
||||
- name: Run PyTorch CUDA tests
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
||||
CUBLAS_WORKSPACE_CONFIG: :16:8
|
||||
run: |
|
||||
|
||||
@@ -240,46 +240,6 @@ Benefits of using a single-file layout include:
|
||||
1. Easy compatibility with diffusion interfaces such as [ComfyUI](https://github.com/comfyanonymous/ComfyUI) or [Automatic1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) which commonly use a single-file layout.
|
||||
2. Easier to manage (download and share) a single file.
|
||||
|
||||
### DDUF
|
||||
|
||||
> [!WARNING]
|
||||
> DDUF is an experimental file format and APIs related to it can change in the future.
|
||||
|
||||
DDUF (**D**DUF **D**iffusion **U**nified **F**ormat) is a file format designed to make storing, distributing, and using diffusion models much easier. Built on the ZIP file format, DDUF offers a standardized, efficient, and flexible way to package all parts of a diffusion model into a single, easy-to-manage file. It provides a balance between Diffusers multi-folder format and the widely popular single-file format.
|
||||
|
||||
Learn more details about DDUF on the Hugging Face Hub [documentation](https://huggingface.co/docs/hub/dduf).
|
||||
|
||||
Pass a checkpoint to the `dduf_file` parameter to load it in [`DiffusionPipeline`].
|
||||
|
||||
```py
|
||||
from diffusers import DiffusionPipeline
|
||||
import torch
|
||||
|
||||
pipe = DiffusionPipeline.from_pretrained(
|
||||
"DDUF/FLUX.1-dev-DDUF", dduf_file="FLUX.1-dev.dduf", torch_dtype=torch.bfloat16
|
||||
).to("cuda")
|
||||
image = pipe(
|
||||
"photo a cat holding a sign that says Diffusers", num_inference_steps=50, guidance_scale=3.5
|
||||
).images[0]
|
||||
image.save("cat.png")
|
||||
```
|
||||
|
||||
To save a pipeline as a `.dduf` checkpoint, use the [`~huggingface_hub.export_folder_as_dduf`] utility, which takes care of all the necessary file-level validations.
|
||||
|
||||
```py
|
||||
from huggingface_hub import export_folder_as_dduf
|
||||
from diffusers import DiffusionPipeline
|
||||
import torch
|
||||
|
||||
pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16)
|
||||
|
||||
save_folder = "flux-dev"
|
||||
pipe.save_pretrained("flux-dev")
|
||||
export_folder_as_dduf("flux-dev.dduf", folder_path=save_folder)
|
||||
|
||||
> [!TIP]
|
||||
> Packaging and loading quantized checkpoints in the DDUF format is supported as long as they respect the multi-folder structure.
|
||||
|
||||
## Convert layout and files
|
||||
|
||||
Diffusers provides many scripts and methods to convert storage layouts and file formats to enable broader support across the diffusion ecosystem.
|
||||
|
||||
@@ -158,9 +158,6 @@ def log_validation(
|
||||
f"Running validation... \n Generating {args.num_validation_images} images with prompt:"
|
||||
f" {args.validation_prompt}."
|
||||
)
|
||||
if args.enable_vae_tiling:
|
||||
pipeline.vae.enable_tiling(tile_sample_min_height=1024, tile_sample_stride_width=1024)
|
||||
|
||||
pipeline.text_encoder = pipeline.text_encoder.to(torch.bfloat16)
|
||||
pipeline = pipeline.to(accelerator.device)
|
||||
pipeline.set_progress_bar_config(disable=True)
|
||||
@@ -600,7 +597,6 @@ def parse_args(input_args=None):
|
||||
help="Whether to offload the VAE and the text encoder to CPU when they are not used.",
|
||||
)
|
||||
parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
|
||||
parser.add_argument("--enable_vae_tiling", action="store_true", help="Enabla vae tiling in log validation")
|
||||
|
||||
if input_args is not None:
|
||||
args = parser.parse_args(input_args)
|
||||
|
||||
7
setup.py
7
setup.py
@@ -74,9 +74,8 @@ To create the package for PyPI.
|
||||
twine upload dist/* -r pypi
|
||||
|
||||
10. Prepare the release notes and publish them on GitHub once everything is looking hunky-dory. You can use the following
|
||||
Space to fetch all the commits applicable for the release: https://huggingface.co/spaces/sayakpaul/auto-release-notes-diffusers.
|
||||
It automatically fetches the correct tag and branch but also provides the option to configure them.
|
||||
`tag` should be the previous release tag (v0.26.1, for example), and `branch` should be
|
||||
Space to fetch all the commits applicable for the release: https://huggingface.co/spaces/lysandre/github-release. Repo should
|
||||
be `huggingface/diffusers`. `tag` should be the previous release tag (v0.26.1, for example), and `branch` should be
|
||||
the latest release branch (v0.27.0-release, for example). It denotes all commits that have happened on branch
|
||||
v0.27.0-release after the tag v0.26.1 was created.
|
||||
|
||||
@@ -102,7 +101,7 @@ _deps = [
|
||||
"filelock",
|
||||
"flax>=0.4.1",
|
||||
"hf-doc-builder>=0.3.0",
|
||||
"huggingface-hub>=0.27.0",
|
||||
"huggingface-hub>=0.23.2",
|
||||
"requests-mock==1.10.0",
|
||||
"importlib_metadata",
|
||||
"invisible-watermark>=0.2.0",
|
||||
|
||||
@@ -24,10 +24,10 @@ import os
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Tuple, Union
|
||||
from typing import Any, Dict, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
from huggingface_hub import DDUFEntry, create_repo, hf_hub_download
|
||||
from huggingface_hub import create_repo, hf_hub_download
|
||||
from huggingface_hub.utils import (
|
||||
EntryNotFoundError,
|
||||
RepositoryNotFoundError,
|
||||
@@ -347,7 +347,6 @@ class ConfigMixin:
|
||||
_ = kwargs.pop("mirror", None)
|
||||
subfolder = kwargs.pop("subfolder", None)
|
||||
user_agent = kwargs.pop("user_agent", {})
|
||||
dduf_entries: Optional[Dict[str, DDUFEntry]] = kwargs.pop("dduf_entries", None)
|
||||
|
||||
user_agent = {**user_agent, "file_type": "config"}
|
||||
user_agent = http_user_agent(user_agent)
|
||||
@@ -359,15 +358,8 @@ class ConfigMixin:
|
||||
"`self.config_name` is not defined. Note that one should not load a config from "
|
||||
"`ConfigMixin`. Please make sure to define `config_name` in a class inheriting from `ConfigMixin`"
|
||||
)
|
||||
# Custom path for now
|
||||
if dduf_entries:
|
||||
if subfolder is not None:
|
||||
raise ValueError(
|
||||
"DDUF file only allow for 1 level of directory (e.g transformer/model1/model.safetentors is not allowed). "
|
||||
"Please check the DDUF structure"
|
||||
)
|
||||
config_file = cls._get_config_file_from_dduf(pretrained_model_name_or_path, dduf_entries)
|
||||
elif os.path.isfile(pretrained_model_name_or_path):
|
||||
|
||||
if os.path.isfile(pretrained_model_name_or_path):
|
||||
config_file = pretrained_model_name_or_path
|
||||
elif os.path.isdir(pretrained_model_name_or_path):
|
||||
if subfolder is not None and os.path.isfile(
|
||||
@@ -434,8 +426,10 @@ class ConfigMixin:
|
||||
f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
|
||||
f"containing a {cls.config_name} file"
|
||||
)
|
||||
|
||||
try:
|
||||
config_dict = cls._dict_from_json_file(config_file, dduf_entries=dduf_entries)
|
||||
# Load config dict
|
||||
config_dict = cls._dict_from_json_file(config_file)
|
||||
|
||||
commit_hash = extract_commit_hash(config_file)
|
||||
except (json.JSONDecodeError, UnicodeDecodeError):
|
||||
@@ -558,14 +552,9 @@ class ConfigMixin:
|
||||
return init_dict, unused_kwargs, hidden_config_dict
|
||||
|
||||
@classmethod
|
||||
def _dict_from_json_file(
|
||||
cls, json_file: Union[str, os.PathLike], dduf_entries: Optional[Dict[str, DDUFEntry]] = None
|
||||
):
|
||||
if dduf_entries:
|
||||
text = dduf_entries[json_file].read_text()
|
||||
else:
|
||||
with open(json_file, "r", encoding="utf-8") as reader:
|
||||
text = reader.read()
|
||||
def _dict_from_json_file(cls, json_file: Union[str, os.PathLike]):
|
||||
with open(json_file, "r", encoding="utf-8") as reader:
|
||||
text = reader.read()
|
||||
return json.loads(text)
|
||||
|
||||
def __repr__(self):
|
||||
@@ -627,20 +616,6 @@ class ConfigMixin:
|
||||
with open(json_file_path, "w", encoding="utf-8") as writer:
|
||||
writer.write(self.to_json_string())
|
||||
|
||||
@classmethod
|
||||
def _get_config_file_from_dduf(cls, pretrained_model_name_or_path: str, dduf_entries: Dict[str, DDUFEntry]):
|
||||
# paths inside a DDUF file must always be "/"
|
||||
config_file = (
|
||||
cls.config_name
|
||||
if pretrained_model_name_or_path == ""
|
||||
else "/".join([pretrained_model_name_or_path, cls.config_name])
|
||||
)
|
||||
if config_file not in dduf_entries:
|
||||
raise ValueError(
|
||||
f"We did not manage to find the file {config_file} in the dduf file. We only have the following files {dduf_entries.keys()}"
|
||||
)
|
||||
return config_file
|
||||
|
||||
|
||||
def register_to_config(init):
|
||||
r"""
|
||||
|
||||
@@ -9,7 +9,7 @@ deps = {
|
||||
"filelock": "filelock",
|
||||
"flax": "flax>=0.4.1",
|
||||
"hf-doc-builder": "hf-doc-builder>=0.3.0",
|
||||
"huggingface-hub": "huggingface-hub>=0.27.0",
|
||||
"huggingface-hub": "huggingface-hub>=0.23.2",
|
||||
"requests-mock": "requests-mock==1.10.0",
|
||||
"importlib_metadata": "importlib_metadata",
|
||||
"invisible-watermark": "invisible-watermark>=0.2.0",
|
||||
|
||||
@@ -21,7 +21,6 @@ from huggingface_hub.utils import validate_hf_hub_args
|
||||
from ..utils import (
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
get_submodule_by_name,
|
||||
is_peft_available,
|
||||
is_peft_version,
|
||||
is_torch_version,
|
||||
@@ -1982,17 +1981,10 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
|
||||
in_features = state_dict[lora_A_weight_name].shape[1]
|
||||
out_features = state_dict[lora_B_weight_name].shape[0]
|
||||
|
||||
# Model maybe loaded with different quantization schemes which may flatten the params.
|
||||
# `bitsandbytes`, for example, flatten the weights when using 4bit. 8bit bnb models
|
||||
# preserve weight shape.
|
||||
module_weight_shape = cls._calculate_module_shape(model=transformer, base_module=module)
|
||||
|
||||
# This means there's no need for an expansion in the params, so we simply skip.
|
||||
if tuple(module_weight_shape) == (out_features, in_features):
|
||||
if tuple(module_weight.shape) == (out_features, in_features):
|
||||
continue
|
||||
|
||||
# TODO (sayakpaul): We still need to consider if the module we're expanding is
|
||||
# quantized and handle it accordingly if that is the case.
|
||||
module_out_features, module_in_features = module_weight.shape
|
||||
debug_message = ""
|
||||
if in_features > module_in_features:
|
||||
@@ -2088,16 +2080,13 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
|
||||
base_weight_param = transformer_state_dict[base_param_name]
|
||||
lora_A_param = lora_state_dict[f"{prefix}{k}.lora_A.weight"]
|
||||
|
||||
# TODO (sayakpaul): Handle the cases when we actually need to expand when using quantization.
|
||||
base_module_shape = cls._calculate_module_shape(model=transformer, base_weight_param_name=base_param_name)
|
||||
|
||||
if base_module_shape[1] > lora_A_param.shape[1]:
|
||||
if base_weight_param.shape[1] > lora_A_param.shape[1]:
|
||||
shape = (lora_A_param.shape[0], base_weight_param.shape[1])
|
||||
expanded_state_dict_weight = torch.zeros(shape, device=base_weight_param.device)
|
||||
expanded_state_dict_weight[:, : lora_A_param.shape[1]].copy_(lora_A_param)
|
||||
lora_state_dict[f"{prefix}{k}.lora_A.weight"] = expanded_state_dict_weight
|
||||
expanded_module_names.add(k)
|
||||
elif base_module_shape[1] < lora_A_param.shape[1]:
|
||||
elif base_weight_param.shape[1] < lora_A_param.shape[1]:
|
||||
raise NotImplementedError(
|
||||
f"This LoRA param ({k}.lora_A.weight) has an incompatible shape {lora_A_param.shape}. Please open an issue to file for a feature request - https://github.com/huggingface/diffusers/issues/new."
|
||||
)
|
||||
@@ -2109,28 +2098,6 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
|
||||
|
||||
return lora_state_dict
|
||||
|
||||
@staticmethod
|
||||
def _calculate_module_shape(
|
||||
model: "torch.nn.Module",
|
||||
base_module: "torch.nn.Linear" = None,
|
||||
base_weight_param_name: str = None,
|
||||
) -> "torch.Size":
|
||||
def _get_weight_shape(weight: torch.Tensor):
|
||||
return weight.quant_state.shape if weight.__class__.__name__ == "Params4bit" else weight.shape
|
||||
|
||||
if base_module is not None:
|
||||
return _get_weight_shape(base_module.weight)
|
||||
elif base_weight_param_name is not None:
|
||||
if not base_weight_param_name.endswith(".weight"):
|
||||
raise ValueError(
|
||||
f"Invalid `base_weight_param_name` passed as it does not end with '.weight' {base_weight_param_name=}."
|
||||
)
|
||||
module_path = base_weight_param_name.rsplit(".weight", 1)[0]
|
||||
submodule = get_submodule_by_name(model, module_path)
|
||||
return _get_weight_shape(submodule.weight)
|
||||
|
||||
raise ValueError("Either `base_module` or `base_weight_param_name` must be provided.")
|
||||
|
||||
|
||||
# The reason why we subclass from `StableDiffusionLoraLoaderMixin` here is because Amused initially
|
||||
# relied on `StableDiffusionLoraLoaderMixin` for its LoRA support.
|
||||
|
||||
@@ -40,7 +40,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
local_files_only = kwargs.pop("local_files_only", None)
|
||||
hf_token = kwargs.pop("hf_token", None)
|
||||
token = kwargs.pop("token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
subfolder = kwargs.pop("subfolder", None)
|
||||
weight_name = kwargs.pop("weight_name", None)
|
||||
@@ -73,7 +73,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
|
||||
force_download=force_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
token=hf_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
user_agent=user_agent,
|
||||
@@ -93,7 +93,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
|
||||
force_download=force_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
token=hf_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
user_agent=user_agent,
|
||||
@@ -312,7 +312,7 @@ class TextualInversionLoaderMixin:
|
||||
local_files_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
||||
won't be downloaded from the Hub.
|
||||
hf_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
|
||||
@@ -20,11 +20,10 @@ import os
|
||||
from array import array
|
||||
from collections import OrderedDict
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Union
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import safetensors
|
||||
import torch
|
||||
from huggingface_hub import DDUFEntry
|
||||
from huggingface_hub.utils import EntryNotFoundError
|
||||
|
||||
from ..utils import (
|
||||
@@ -133,10 +132,7 @@ def _fetch_remapped_cls_from_config(config, old_class):
|
||||
|
||||
|
||||
def load_state_dict(
|
||||
checkpoint_file: Union[str, os.PathLike],
|
||||
variant: Optional[str] = None,
|
||||
dduf_entries: Optional[Dict[str, DDUFEntry]] = None,
|
||||
disable_mmap: bool = False,
|
||||
checkpoint_file: Union[str, os.PathLike], variant: Optional[str] = None, disable_mmap: bool = False
|
||||
):
|
||||
"""
|
||||
Reads a checkpoint file, returning properly formatted errors if they arise.
|
||||
@@ -148,10 +144,6 @@ def load_state_dict(
|
||||
try:
|
||||
file_extension = os.path.basename(checkpoint_file).split(".")[-1]
|
||||
if file_extension == SAFETENSORS_FILE_EXTENSION:
|
||||
if dduf_entries:
|
||||
# tensors are loaded on cpu
|
||||
with dduf_entries[checkpoint_file].as_mmap() as mm:
|
||||
return safetensors.torch.load(mm)
|
||||
if disable_mmap:
|
||||
return safetensors.torch.load(open(checkpoint_file, "rb").read())
|
||||
else:
|
||||
@@ -292,7 +284,6 @@ def _fetch_index_file(
|
||||
revision,
|
||||
user_agent,
|
||||
commit_hash,
|
||||
dduf_entries: Optional[Dict[str, DDUFEntry]] = None,
|
||||
):
|
||||
if is_local:
|
||||
index_file = Path(
|
||||
@@ -318,10 +309,8 @@ def _fetch_index_file(
|
||||
subfolder=None,
|
||||
user_agent=user_agent,
|
||||
commit_hash=commit_hash,
|
||||
dduf_entries=dduf_entries,
|
||||
)
|
||||
if not dduf_entries:
|
||||
index_file = Path(index_file)
|
||||
index_file = Path(index_file)
|
||||
except (EntryNotFoundError, EnvironmentError):
|
||||
index_file = None
|
||||
|
||||
@@ -330,9 +319,7 @@ def _fetch_index_file(
|
||||
|
||||
# Adapted from
|
||||
# https://github.com/bghira/SimpleTuner/blob/cea2457ab063f6dedb9e697830ae68a96be90641/helpers/training/save_hooks.py#L64
|
||||
def _merge_sharded_checkpoints(
|
||||
sharded_ckpt_cached_folder, sharded_metadata, dduf_entries: Optional[Dict[str, DDUFEntry]] = None
|
||||
):
|
||||
def _merge_sharded_checkpoints(sharded_ckpt_cached_folder, sharded_metadata):
|
||||
weight_map = sharded_metadata.get("weight_map", None)
|
||||
if weight_map is None:
|
||||
raise KeyError("'weight_map' key not found in the shard index file.")
|
||||
@@ -345,23 +332,14 @@ def _merge_sharded_checkpoints(
|
||||
# Load tensors from each unique file
|
||||
for file_name in files_to_load:
|
||||
part_file_path = os.path.join(sharded_ckpt_cached_folder, file_name)
|
||||
if dduf_entries:
|
||||
if part_file_path not in dduf_entries:
|
||||
raise FileNotFoundError(f"Part file {file_name} not found.")
|
||||
else:
|
||||
if not os.path.exists(part_file_path):
|
||||
raise FileNotFoundError(f"Part file {file_name} not found.")
|
||||
if not os.path.exists(part_file_path):
|
||||
raise FileNotFoundError(f"Part file {file_name} not found.")
|
||||
|
||||
if is_safetensors:
|
||||
if dduf_entries:
|
||||
with dduf_entries[part_file_path].as_mmap() as mm:
|
||||
tensors = safetensors.torch.load(mm)
|
||||
merged_state_dict.update(tensors)
|
||||
else:
|
||||
with safetensors.safe_open(part_file_path, framework="pt", device="cpu") as f:
|
||||
for tensor_key in f.keys():
|
||||
if tensor_key in weight_map:
|
||||
merged_state_dict[tensor_key] = f.get_tensor(tensor_key)
|
||||
with safetensors.safe_open(part_file_path, framework="pt", device="cpu") as f:
|
||||
for tensor_key in f.keys():
|
||||
if tensor_key in weight_map:
|
||||
merged_state_dict[tensor_key] = f.get_tensor(tensor_key)
|
||||
else:
|
||||
merged_state_dict.update(torch.load(part_file_path, weights_only=True, map_location="cpu"))
|
||||
|
||||
@@ -382,7 +360,6 @@ def _fetch_index_file_legacy(
|
||||
revision,
|
||||
user_agent,
|
||||
commit_hash,
|
||||
dduf_entries: Optional[Dict[str, DDUFEntry]] = None,
|
||||
):
|
||||
if is_local:
|
||||
index_file = Path(
|
||||
@@ -423,7 +400,6 @@ def _fetch_index_file_legacy(
|
||||
subfolder=None,
|
||||
user_agent=user_agent,
|
||||
commit_hash=commit_hash,
|
||||
dduf_entries=dduf_entries,
|
||||
)
|
||||
index_file = Path(index_file)
|
||||
deprecation_message = f"This serialization format is now deprecated to standardize the serialization format between `transformers` and `diffusers`. We recommend you to remove the existing files associated with the current variant ({variant}) and re-obtain them by running a `save_pretrained()`."
|
||||
|
||||
@@ -23,11 +23,11 @@ import re
|
||||
from collections import OrderedDict
|
||||
from functools import partial, wraps
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
||||
from typing import Any, Callable, List, Optional, Tuple, Union
|
||||
|
||||
import safetensors
|
||||
import torch
|
||||
from huggingface_hub import DDUFEntry, create_repo, split_torch_state_dict_into_shards
|
||||
from huggingface_hub import create_repo, split_torch_state_dict_into_shards
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
from torch import Tensor, nn
|
||||
|
||||
@@ -607,7 +607,6 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
variant = kwargs.pop("variant", None)
|
||||
use_safetensors = kwargs.pop("use_safetensors", None)
|
||||
quantization_config = kwargs.pop("quantization_config", None)
|
||||
dduf_entries: Optional[Dict[str, DDUFEntry]] = kwargs.pop("dduf_entries", None)
|
||||
disable_mmap = kwargs.pop("disable_mmap", False)
|
||||
|
||||
allow_pickle = False
|
||||
@@ -701,7 +700,6 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
user_agent=user_agent,
|
||||
dduf_entries=dduf_entries,
|
||||
**kwargs,
|
||||
)
|
||||
# no in-place modification of the original config.
|
||||
@@ -778,14 +776,13 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
"revision": revision,
|
||||
"user_agent": user_agent,
|
||||
"commit_hash": commit_hash,
|
||||
"dduf_entries": dduf_entries,
|
||||
}
|
||||
index_file = _fetch_index_file(**index_file_kwargs)
|
||||
# In case the index file was not found we still have to consider the legacy format.
|
||||
# this becomes applicable when the variant is not None.
|
||||
if variant is not None and (index_file is None or not os.path.exists(index_file)):
|
||||
index_file = _fetch_index_file_legacy(**index_file_kwargs)
|
||||
if index_file is not None and (dduf_entries or index_file.is_file()):
|
||||
if index_file is not None and index_file.is_file():
|
||||
is_sharded = True
|
||||
|
||||
if is_sharded and from_flax:
|
||||
@@ -814,7 +811,6 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
|
||||
model = load_flax_checkpoint_in_pytorch_model(model, model_file)
|
||||
else:
|
||||
# in the case it is sharded, we have already the index
|
||||
if is_sharded:
|
||||
sharded_ckpt_cached_folder, sharded_metadata = _get_checkpoint_shard_files(
|
||||
pretrained_model_name_or_path,
|
||||
@@ -826,13 +822,10 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
user_agent=user_agent,
|
||||
revision=revision,
|
||||
subfolder=subfolder or "",
|
||||
dduf_entries=dduf_entries,
|
||||
)
|
||||
# TODO: https://github.com/huggingface/diffusers/issues/10013
|
||||
if hf_quantizer is not None or dduf_entries:
|
||||
model_file = _merge_sharded_checkpoints(
|
||||
sharded_ckpt_cached_folder, sharded_metadata, dduf_entries=dduf_entries
|
||||
)
|
||||
if hf_quantizer is not None:
|
||||
model_file = _merge_sharded_checkpoints(sharded_ckpt_cached_folder, sharded_metadata)
|
||||
logger.info("Merged sharded checkpoints as `hf_quantizer` is not None.")
|
||||
is_sharded = False
|
||||
|
||||
@@ -850,7 +843,6 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
subfolder=subfolder,
|
||||
user_agent=user_agent,
|
||||
commit_hash=commit_hash,
|
||||
dduf_entries=dduf_entries,
|
||||
)
|
||||
|
||||
except IOError as e:
|
||||
@@ -874,7 +866,6 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
subfolder=subfolder,
|
||||
user_agent=user_agent,
|
||||
commit_hash=commit_hash,
|
||||
dduf_entries=dduf_entries,
|
||||
)
|
||||
|
||||
if low_cpu_mem_usage:
|
||||
@@ -896,9 +887,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
# TODO (sayakpaul, SunMarc): remove this after model loading refactor
|
||||
else:
|
||||
param_device = torch.device(torch.cuda.current_device())
|
||||
state_dict = load_state_dict(
|
||||
model_file, variant=variant, dduf_entries=dduf_entries, disable_mmap=disable_mmap
|
||||
)
|
||||
state_dict = load_state_dict(model_file, variant=variant, disable_mmap=disable_mmap)
|
||||
model._convert_deprecated_attention_blocks(state_dict)
|
||||
|
||||
# move the params from meta device to cpu
|
||||
@@ -994,9 +983,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
else:
|
||||
model = cls.from_config(config, **unused_kwargs)
|
||||
|
||||
state_dict = load_state_dict(
|
||||
model_file, variant=variant, dduf_entries=dduf_entries, disable_mmap=disable_mmap
|
||||
)
|
||||
state_dict = load_state_dict(model_file, variant=variant, disable_mmap=disable_mmap)
|
||||
model._convert_deprecated_attention_blocks(state_dict)
|
||||
|
||||
model, missing_keys, unexpected_keys, mismatched_keys, error_msgs = cls._load_pretrained_model(
|
||||
|
||||
@@ -21,7 +21,7 @@ from transformers import T5EncoderModel, T5TokenizerFast
|
||||
|
||||
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
||||
from ...loaders import Mochi1LoraLoaderMixin
|
||||
from ...models.autoencoders import AutoencoderKLMochi
|
||||
from ...models.autoencoders import AutoencoderKL
|
||||
from ...models.transformers import MochiTransformer3DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import (
|
||||
@@ -151,8 +151,8 @@ class MochiPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
|
||||
Conditional Transformer architecture to denoise the encoded video latents.
|
||||
scheduler ([`FlowMatchEulerDiscreteScheduler`]):
|
||||
A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
|
||||
vae ([`AutoencoderKLMochi`]):
|
||||
Variational Auto-Encoder (VAE) Model to encode and decode videos to and from latent representations.
|
||||
vae ([`AutoencoderKL`]):
|
||||
Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
|
||||
text_encoder ([`T5EncoderModel`]):
|
||||
[T5](https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5EncoderModel), specifically
|
||||
the [google/t5-v1_1-xxl](https://huggingface.co/google/t5-v1_1-xxl) variant.
|
||||
@@ -171,7 +171,7 @@ class MochiPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
|
||||
def __init__(
|
||||
self,
|
||||
scheduler: FlowMatchEulerDiscreteScheduler,
|
||||
vae: AutoencoderKLMochi,
|
||||
vae: AutoencoderKL,
|
||||
text_encoder: T5EncoderModel,
|
||||
tokenizer: T5TokenizerFast,
|
||||
transformer: MochiTransformer3DModel,
|
||||
|
||||
@@ -16,7 +16,6 @@ import html
|
||||
import inspect
|
||||
import re
|
||||
import urllib.parse as ul
|
||||
import warnings
|
||||
from typing import Callable, Dict, List, Optional, Tuple, Union
|
||||
|
||||
import torch
|
||||
@@ -42,7 +41,6 @@ from ..pixart_alpha.pipeline_pixart_alpha import (
|
||||
ASPECT_RATIO_1024_BIN,
|
||||
)
|
||||
from ..pixart_alpha.pipeline_pixart_sigma import ASPECT_RATIO_2048_BIN
|
||||
from ..sana.pipeline_sana import ASPECT_RATIO_4096_BIN
|
||||
from .pag_utils import PAGMixin
|
||||
|
||||
|
||||
@@ -641,7 +639,7 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
|
||||
negative_prompt_attention_mask: Optional[torch.Tensor] = None,
|
||||
output_type: Optional[str] = "pil",
|
||||
return_dict: bool = True,
|
||||
clean_caption: bool = False,
|
||||
clean_caption: bool = True,
|
||||
use_resolution_binning: bool = True,
|
||||
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
||||
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
||||
@@ -757,9 +755,7 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
|
||||
callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
|
||||
|
||||
if use_resolution_binning:
|
||||
if self.transformer.config.sample_size == 128:
|
||||
aspect_ratio_bin = ASPECT_RATIO_4096_BIN
|
||||
elif self.transformer.config.sample_size == 64:
|
||||
if self.transformer.config.sample_size == 64:
|
||||
aspect_ratio_bin = ASPECT_RATIO_2048_BIN
|
||||
elif self.transformer.config.sample_size == 32:
|
||||
aspect_ratio_bin = ASPECT_RATIO_1024_BIN
|
||||
@@ -916,14 +912,7 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
|
||||
image = latents
|
||||
else:
|
||||
latents = latents.to(self.vae.dtype)
|
||||
try:
|
||||
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
||||
except torch.cuda.OutOfMemoryError as e:
|
||||
warnings.warn(
|
||||
f"{e}. \n"
|
||||
f"Try to use VAE tiling for large images. For example: \n"
|
||||
f"pipe.vae.enable_tiling(tile_sample_min_width=512, tile_sample_min_height=512)"
|
||||
)
|
||||
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
||||
if use_resolution_binning:
|
||||
image = self.image_processor.resize_and_crop_tensor(image, orig_width, orig_height)
|
||||
|
||||
|
||||
@@ -12,19 +12,19 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import importlib
|
||||
import os
|
||||
import re
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import requests
|
||||
import torch
|
||||
from huggingface_hub import DDUFEntry, ModelCard, model_info, snapshot_download
|
||||
from huggingface_hub.utils import OfflineModeIsEnabled, validate_hf_hub_args
|
||||
from huggingface_hub import ModelCard, model_info
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
from packaging import version
|
||||
from requests.exceptions import HTTPError
|
||||
|
||||
from .. import __version__
|
||||
from ..utils import (
|
||||
@@ -38,16 +38,14 @@ from ..utils import (
|
||||
is_accelerate_available,
|
||||
is_peft_available,
|
||||
is_transformers_available,
|
||||
is_transformers_version,
|
||||
logging,
|
||||
)
|
||||
from ..utils.torch_utils import is_compiled_module
|
||||
from .transformers_loading_utils import _load_tokenizer_from_dduf, _load_transformers_model_from_dduf
|
||||
|
||||
|
||||
if is_transformers_available():
|
||||
import transformers
|
||||
from transformers import PreTrainedModel, PreTrainedTokenizerBase
|
||||
from transformers import PreTrainedModel
|
||||
from transformers.utils import FLAX_WEIGHTS_NAME as TRANSFORMERS_FLAX_WEIGHTS_NAME
|
||||
from transformers.utils import SAFE_WEIGHTS_NAME as TRANSFORMERS_SAFE_WEIGHTS_NAME
|
||||
from transformers.utils import WEIGHTS_NAME as TRANSFORMERS_WEIGHTS_NAME
|
||||
@@ -629,7 +627,6 @@ def load_sub_model(
|
||||
low_cpu_mem_usage: bool,
|
||||
cached_folder: Union[str, os.PathLike],
|
||||
use_safetensors: bool,
|
||||
dduf_entries: Optional[Dict[str, DDUFEntry]],
|
||||
):
|
||||
"""Helper method to load the module `name` from `library_name` and `class_name`"""
|
||||
|
||||
@@ -666,7 +663,7 @@ def load_sub_model(
|
||||
f" any of the loading methods defined in {ALL_IMPORTABLE_CLASSES}."
|
||||
)
|
||||
|
||||
load_method = _get_load_method(class_obj, load_method_name, is_dduf=dduf_entries is not None)
|
||||
load_method = getattr(class_obj, load_method_name)
|
||||
|
||||
# add kwargs to loading method
|
||||
diffusers_module = importlib.import_module(__name__.split(".")[0])
|
||||
@@ -724,10 +721,7 @@ def load_sub_model(
|
||||
loading_kwargs["low_cpu_mem_usage"] = False
|
||||
|
||||
# check if the module is in a subdirectory
|
||||
if dduf_entries:
|
||||
loading_kwargs["dduf_entries"] = dduf_entries
|
||||
loaded_sub_model = load_method(name, **loading_kwargs)
|
||||
elif os.path.isdir(os.path.join(cached_folder, name)):
|
||||
if os.path.isdir(os.path.join(cached_folder, name)):
|
||||
loaded_sub_model = load_method(os.path.join(cached_folder, name), **loading_kwargs)
|
||||
else:
|
||||
# else load from the root directory
|
||||
@@ -752,22 +746,6 @@ def load_sub_model(
|
||||
return loaded_sub_model
|
||||
|
||||
|
||||
def _get_load_method(class_obj: object, load_method_name: str, is_dduf: bool) -> Callable:
|
||||
"""
|
||||
Return the method to load the sub model.
|
||||
|
||||
In practice, this method will return the `"from_pretrained"` (or `load_method_name`) method of the class object
|
||||
except if loading from a DDUF checkpoint. In that case, transformers models and tokenizers have a specific loading
|
||||
method that we need to use.
|
||||
"""
|
||||
if is_dduf:
|
||||
if issubclass(class_obj, PreTrainedTokenizerBase):
|
||||
return lambda *args, **kwargs: _load_tokenizer_from_dduf(class_obj, *args, **kwargs)
|
||||
if issubclass(class_obj, PreTrainedModel):
|
||||
return lambda *args, **kwargs: _load_transformers_model_from_dduf(class_obj, *args, **kwargs)
|
||||
return getattr(class_obj, load_method_name)
|
||||
|
||||
|
||||
def _fetch_class_library_tuple(module):
|
||||
# import it here to avoid circular import
|
||||
diffusers_module = importlib.import_module(__name__.split(".")[0])
|
||||
@@ -990,70 +968,3 @@ def _get_ignore_patterns(
|
||||
)
|
||||
|
||||
return ignore_patterns
|
||||
|
||||
|
||||
def _download_dduf_file(
|
||||
pretrained_model_name: str,
|
||||
dduf_file: str,
|
||||
pipeline_class_name: str,
|
||||
cache_dir: str,
|
||||
proxies: str,
|
||||
local_files_only: bool,
|
||||
token: str,
|
||||
revision: str,
|
||||
):
|
||||
model_info_call_error = None
|
||||
if not local_files_only:
|
||||
try:
|
||||
info = model_info(pretrained_model_name, token=token, revision=revision)
|
||||
except (HTTPError, OfflineModeIsEnabled, requests.ConnectionError) as e:
|
||||
logger.warning(f"Couldn't connect to the Hub: {e}.\nWill try to load from local cache.")
|
||||
local_files_only = True
|
||||
model_info_call_error = e # save error to reraise it if model is not cached locally
|
||||
|
||||
if (
|
||||
not local_files_only
|
||||
and dduf_file is not None
|
||||
and dduf_file not in (sibling.rfilename for sibling in info.siblings)
|
||||
):
|
||||
raise ValueError(f"Requested {dduf_file} file is not available in {pretrained_model_name}.")
|
||||
|
||||
try:
|
||||
user_agent = {"pipeline_class": pipeline_class_name, "dduf": True}
|
||||
cached_folder = snapshot_download(
|
||||
pretrained_model_name,
|
||||
cache_dir=cache_dir,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
token=token,
|
||||
revision=revision,
|
||||
allow_patterns=[dduf_file],
|
||||
user_agent=user_agent,
|
||||
)
|
||||
return cached_folder
|
||||
except FileNotFoundError:
|
||||
# Means we tried to load pipeline with `local_files_only=True` but the files have not been found in local cache.
|
||||
# This can happen in two cases:
|
||||
# 1. If the user passed `local_files_only=True` => we raise the error directly
|
||||
# 2. If we forced `local_files_only=True` when `model_info` failed => we raise the initial error
|
||||
if model_info_call_error is None:
|
||||
# 1. user passed `local_files_only=True`
|
||||
raise
|
||||
else:
|
||||
# 2. we forced `local_files_only=True` when `model_info` failed
|
||||
raise EnvironmentError(
|
||||
f"Cannot load model {pretrained_model_name}: model is not cached locally and an error occurred"
|
||||
" while trying to fetch metadata from the Hub. Please check out the root cause in the stacktrace"
|
||||
" above."
|
||||
) from model_info_call_error
|
||||
|
||||
|
||||
def _maybe_raise_error_for_incorrect_transformers(config_dict):
|
||||
has_transformers_component = False
|
||||
for k in config_dict:
|
||||
if isinstance(config_dict[k], list):
|
||||
has_transformers_component = config_dict[k][0] == "transformers"
|
||||
if has_transformers_component:
|
||||
break
|
||||
if has_transformers_component and not is_transformers_version(">", "4.47.1"):
|
||||
raise ValueError("Please upgrade your `transformers` installation to the latest version to use DDUF.")
|
||||
|
||||
@@ -29,12 +29,10 @@ import PIL.Image
|
||||
import requests
|
||||
import torch
|
||||
from huggingface_hub import (
|
||||
DDUFEntry,
|
||||
ModelCard,
|
||||
create_repo,
|
||||
hf_hub_download,
|
||||
model_info,
|
||||
read_dduf_file,
|
||||
snapshot_download,
|
||||
)
|
||||
from huggingface_hub.utils import OfflineModeIsEnabled, validate_hf_hub_args
|
||||
@@ -74,7 +72,6 @@ from .pipeline_loading_utils import (
|
||||
CONNECTED_PIPES_KEYS,
|
||||
CUSTOM_PIPELINE_FILE_NAME,
|
||||
LOADABLE_CLASSES,
|
||||
_download_dduf_file,
|
||||
_fetch_class_library_tuple,
|
||||
_get_custom_components_and_folders,
|
||||
_get_custom_pipeline_class,
|
||||
@@ -82,7 +79,6 @@ from .pipeline_loading_utils import (
|
||||
_get_ignore_patterns,
|
||||
_get_pipeline_class,
|
||||
_identify_model_variants,
|
||||
_maybe_raise_error_for_incorrect_transformers,
|
||||
_maybe_raise_warning_for_inpainting,
|
||||
_resolve_custom_pipeline_and_cls,
|
||||
_unwrap_model,
|
||||
@@ -222,7 +218,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
|
||||
repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
|
||||
namespace).
|
||||
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional keyword arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
||||
"""
|
||||
@@ -536,7 +531,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
- A path to a *directory* (for example `./my_pipeline_directory/`) containing pipeline weights
|
||||
saved using
|
||||
[`~DiffusionPipeline.save_pretrained`].
|
||||
- A path to a *directory* (for example `./my_pipeline_directory/`) containing a dduf file
|
||||
torch_dtype (`str` or `torch.dtype`, *optional*):
|
||||
Override the default `torch.dtype` and load the model with another dtype. If "auto" is passed, the
|
||||
dtype is automatically derived from the model's weights.
|
||||
@@ -631,8 +625,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
variant (`str`, *optional*):
|
||||
Load weights from a specified variant filename such as `"fp16"` or `"ema"`. This is ignored when
|
||||
loading `from_flax`.
|
||||
dduf_file(`str`, *optional*):
|
||||
Load weights from the specified dduf file.
|
||||
|
||||
<Tip>
|
||||
|
||||
@@ -682,7 +674,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
offload_state_dict = kwargs.pop("offload_state_dict", False)
|
||||
low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
|
||||
variant = kwargs.pop("variant", None)
|
||||
dduf_file = kwargs.pop("dduf_file", None)
|
||||
use_safetensors = kwargs.pop("use_safetensors", None)
|
||||
use_onnx = kwargs.pop("use_onnx", None)
|
||||
load_connected_pipeline = kwargs.pop("load_connected_pipeline", False)
|
||||
@@ -731,12 +722,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
" dispatching. Please make sure to set `low_cpu_mem_usage=True`."
|
||||
)
|
||||
|
||||
if dduf_file:
|
||||
if custom_pipeline:
|
||||
raise NotImplementedError("Custom pipelines are not supported with DDUF at the moment.")
|
||||
if load_connected_pipeline:
|
||||
raise NotImplementedError("Connected pipelines are not supported with DDUF at the moment.")
|
||||
|
||||
# 1. Download the checkpoints and configs
|
||||
# use snapshot download here to get it working from from_pretrained
|
||||
if not os.path.isdir(pretrained_model_name_or_path):
|
||||
@@ -759,7 +744,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
custom_pipeline=custom_pipeline,
|
||||
custom_revision=custom_revision,
|
||||
variant=variant,
|
||||
dduf_file=dduf_file,
|
||||
load_connected_pipeline=load_connected_pipeline,
|
||||
**kwargs,
|
||||
)
|
||||
@@ -781,17 +765,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
)
|
||||
logger.warning(warn_msg)
|
||||
|
||||
dduf_entries = None
|
||||
if dduf_file:
|
||||
dduf_file_path = os.path.join(cached_folder, dduf_file)
|
||||
dduf_entries = read_dduf_file(dduf_file_path)
|
||||
# The reader contains already all the files needed, no need to check it again
|
||||
cached_folder = ""
|
||||
|
||||
config_dict = cls.load_config(cached_folder, dduf_entries=dduf_entries)
|
||||
|
||||
if dduf_file:
|
||||
_maybe_raise_error_for_incorrect_transformers(config_dict)
|
||||
config_dict = cls.load_config(cached_folder)
|
||||
|
||||
# pop out "_ignore_files" as it is only needed for download
|
||||
config_dict.pop("_ignore_files", None)
|
||||
@@ -969,7 +943,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
low_cpu_mem_usage=low_cpu_mem_usage,
|
||||
cached_folder=cached_folder,
|
||||
use_safetensors=use_safetensors,
|
||||
dduf_entries=dduf_entries,
|
||||
)
|
||||
logger.info(
|
||||
f"Loaded {name} as {class_name} from `{name}` subfolder of {pretrained_model_name_or_path}."
|
||||
@@ -1283,8 +1256,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
variant (`str`, *optional*):
|
||||
Load weights from a specified variant filename such as `"fp16"` or `"ema"`. This is ignored when
|
||||
loading `from_flax`.
|
||||
dduf_file(`str`, *optional*):
|
||||
Load weights from the specified DDUF file.
|
||||
use_safetensors (`bool`, *optional*, defaults to `None`):
|
||||
If set to `None`, the safetensors weights are downloaded if they're available **and** if the
|
||||
safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors
|
||||
@@ -1325,23 +1296,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
use_onnx = kwargs.pop("use_onnx", None)
|
||||
load_connected_pipeline = kwargs.pop("load_connected_pipeline", False)
|
||||
trust_remote_code = kwargs.pop("trust_remote_code", False)
|
||||
dduf_file: Optional[Dict[str, DDUFEntry]] = kwargs.pop("dduf_file", None)
|
||||
|
||||
if dduf_file:
|
||||
if custom_pipeline:
|
||||
raise NotImplementedError("Custom pipelines are not supported with DDUF at the moment.")
|
||||
if load_connected_pipeline:
|
||||
raise NotImplementedError("Connected pipelines are not supported with DDUF at the moment.")
|
||||
return _download_dduf_file(
|
||||
pretrained_model_name=pretrained_model_name,
|
||||
dduf_file=dduf_file,
|
||||
pipeline_class_name=cls.__name__,
|
||||
cache_dir=cache_dir,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
token=token,
|
||||
revision=revision,
|
||||
)
|
||||
|
||||
allow_pickle = False
|
||||
if use_safetensors is None:
|
||||
@@ -1421,6 +1375,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
allow_patterns += [f"{custom_pipeline}.py"] if f"{custom_pipeline}.py" in filenames else []
|
||||
# also allow downloading config.json files with the model
|
||||
allow_patterns += [os.path.join(k, "config.json") for k in model_folder_names]
|
||||
|
||||
allow_patterns += [
|
||||
SCHEDULER_CONFIG_NAME,
|
||||
CONFIG_NAME,
|
||||
@@ -1516,6 +1471,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
user_agent=user_agent,
|
||||
)
|
||||
|
||||
# retrieve pipeline class from local file
|
||||
cls_name = cls.load_config(os.path.join(cached_folder, "model_index.json")).get("_class_name", None)
|
||||
cls_name = cls_name[4:] if isinstance(cls_name, str) and cls_name.startswith("Flax") else cls_name
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@ import html
|
||||
import inspect
|
||||
import re
|
||||
import urllib.parse as ul
|
||||
import warnings
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
||||
|
||||
import torch
|
||||
@@ -954,14 +953,7 @@ class SanaPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
image = latents
|
||||
else:
|
||||
latents = latents.to(self.vae.dtype)
|
||||
try:
|
||||
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
||||
except torch.cuda.OutOfMemoryError as e:
|
||||
warnings.warn(
|
||||
f"{e}. \n"
|
||||
f"Try to use VAE tiling for large images. For example: \n"
|
||||
f"pipe.vae.enable_tiling(tile_sample_min_width=512, tile_sample_min_height=512)"
|
||||
)
|
||||
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
||||
if use_resolution_binning:
|
||||
image = self.image_processor.resize_and_crop_tensor(image, orig_width, orig_height)
|
||||
|
||||
|
||||
@@ -13,21 +13,19 @@
|
||||
# limitations under the License.
|
||||
|
||||
import inspect
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
from typing import Callable, Dict, List, Optional, Union
|
||||
|
||||
import torch
|
||||
from transformers import (
|
||||
BaseImageProcessor,
|
||||
CLIPTextModelWithProjection,
|
||||
CLIPTokenizer,
|
||||
PreTrainedModel,
|
||||
T5EncoderModel,
|
||||
T5TokenizerFast,
|
||||
)
|
||||
|
||||
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
||||
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
||||
from ...loaders import FromSingleFileMixin, SD3IPAdapterMixin, SD3LoraLoaderMixin
|
||||
from ...loaders import FromSingleFileMixin, SD3LoraLoaderMixin
|
||||
from ...models.autoencoders import AutoencoderKL
|
||||
from ...models.transformers import SD3Transformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
@@ -164,7 +162,7 @@ def retrieve_timesteps(
|
||||
return timesteps, num_inference_steps
|
||||
|
||||
|
||||
class StableDiffusion3InpaintPipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingleFileMixin, SD3IPAdapterMixin):
|
||||
class StableDiffusion3InpaintPipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingleFileMixin):
|
||||
r"""
|
||||
Args:
|
||||
transformer ([`SD3Transformer2DModel`]):
|
||||
@@ -196,14 +194,10 @@ class StableDiffusion3InpaintPipeline(DiffusionPipeline, SD3LoraLoaderMixin, Fro
|
||||
tokenizer_3 (`T5TokenizerFast`):
|
||||
Tokenizer of class
|
||||
[T5Tokenizer](https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5Tokenizer).
|
||||
image_encoder (`PreTrainedModel`, *optional*):
|
||||
Pre-trained Vision Model for IP Adapter.
|
||||
feature_extractor (`BaseImageProcessor`, *optional*):
|
||||
Image processor for IP Adapter.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->text_encoder_2->text_encoder_3->image_encoder->transformer->vae"
|
||||
_optional_components = ["image_encoder", "feature_extractor"]
|
||||
model_cpu_offload_seq = "text_encoder->text_encoder_2->text_encoder_3->transformer->vae"
|
||||
_optional_components = []
|
||||
_callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds", "negative_pooled_prompt_embeds"]
|
||||
|
||||
def __init__(
|
||||
@@ -217,8 +211,6 @@ class StableDiffusion3InpaintPipeline(DiffusionPipeline, SD3LoraLoaderMixin, Fro
|
||||
tokenizer_2: CLIPTokenizer,
|
||||
text_encoder_3: T5EncoderModel,
|
||||
tokenizer_3: T5TokenizerFast,
|
||||
image_encoder: PreTrainedModel = None,
|
||||
feature_extractor: BaseImageProcessor = None,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
@@ -232,8 +224,6 @@ class StableDiffusion3InpaintPipeline(DiffusionPipeline, SD3LoraLoaderMixin, Fro
|
||||
tokenizer_3=tokenizer_3,
|
||||
transformer=transformer,
|
||||
scheduler=scheduler,
|
||||
image_encoder=image_encoder,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
||||
latent_channels = self.vae.config.latent_channels if getattr(self, "vae", None) else 16
|
||||
@@ -828,10 +818,6 @@ class StableDiffusion3InpaintPipeline(DiffusionPipeline, SD3LoraLoaderMixin, Fro
|
||||
def do_classifier_free_guidance(self):
|
||||
return self._guidance_scale > 1
|
||||
|
||||
@property
|
||||
def joint_attention_kwargs(self):
|
||||
return self._joint_attention_kwargs
|
||||
|
||||
@property
|
||||
def num_timesteps(self):
|
||||
return self._num_timesteps
|
||||
@@ -840,84 +826,6 @@ class StableDiffusion3InpaintPipeline(DiffusionPipeline, SD3LoraLoaderMixin, Fro
|
||||
def interrupt(self):
|
||||
return self._interrupt
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3.StableDiffusion3Pipeline.encode_image
|
||||
def encode_image(self, image: PipelineImageInput, device: torch.device) -> torch.Tensor:
|
||||
"""Encodes the given image into a feature representation using a pre-trained image encoder.
|
||||
|
||||
Args:
|
||||
image (`PipelineImageInput`):
|
||||
Input image to be encoded.
|
||||
device: (`torch.device`):
|
||||
Torch device.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor`: The encoded image feature representation.
|
||||
"""
|
||||
if not isinstance(image, torch.Tensor):
|
||||
image = self.feature_extractor(image, return_tensors="pt").pixel_values
|
||||
|
||||
image = image.to(device=device, dtype=self.dtype)
|
||||
|
||||
return self.image_encoder(image, output_hidden_states=True).hidden_states[-2]
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3.StableDiffusion3Pipeline.prepare_ip_adapter_image_embeds
|
||||
def prepare_ip_adapter_image_embeds(
|
||||
self,
|
||||
ip_adapter_image: Optional[PipelineImageInput] = None,
|
||||
ip_adapter_image_embeds: Optional[torch.Tensor] = None,
|
||||
device: Optional[torch.device] = None,
|
||||
num_images_per_prompt: int = 1,
|
||||
do_classifier_free_guidance: bool = True,
|
||||
) -> torch.Tensor:
|
||||
"""Prepares image embeddings for use in the IP-Adapter.
|
||||
|
||||
Either `ip_adapter_image` or `ip_adapter_image_embeds` must be passed.
|
||||
|
||||
Args:
|
||||
ip_adapter_image (`PipelineImageInput`, *optional*):
|
||||
The input image to extract features from for IP-Adapter.
|
||||
ip_adapter_image_embeds (`torch.Tensor`, *optional*):
|
||||
Precomputed image embeddings.
|
||||
device: (`torch.device`, *optional*):
|
||||
Torch device.
|
||||
num_images_per_prompt (`int`, defaults to 1):
|
||||
Number of images that should be generated per prompt.
|
||||
do_classifier_free_guidance (`bool`, defaults to True):
|
||||
Whether to use classifier free guidance or not.
|
||||
"""
|
||||
device = device or self._execution_device
|
||||
|
||||
if ip_adapter_image_embeds is not None:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = ip_adapter_image_embeds.chunk(2)
|
||||
else:
|
||||
single_image_embeds = ip_adapter_image_embeds
|
||||
elif ip_adapter_image is not None:
|
||||
single_image_embeds = self.encode_image(ip_adapter_image, device)
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds = torch.zeros_like(single_image_embeds)
|
||||
else:
|
||||
raise ValueError("Neither `ip_adapter_image_embeds` or `ip_adapter_image_embeds` were provided.")
|
||||
|
||||
image_embeds = torch.cat([single_image_embeds] * num_images_per_prompt, dim=0)
|
||||
|
||||
if do_classifier_free_guidance:
|
||||
negative_image_embeds = torch.cat([single_negative_image_embeds] * num_images_per_prompt, dim=0)
|
||||
image_embeds = torch.cat([negative_image_embeds, image_embeds], dim=0)
|
||||
|
||||
return image_embeds.to(device=device)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3.StableDiffusion3Pipeline.enable_sequential_cpu_offload
|
||||
def enable_sequential_cpu_offload(self, *args, **kwargs):
|
||||
if self.image_encoder is not None and "image_encoder" not in self._exclude_from_cpu_offload:
|
||||
logger.warning(
|
||||
"`pipe.enable_sequential_cpu_offload()` might fail for `image_encoder` if it uses "
|
||||
"`torch.nn.MultiheadAttention`. You can exclude `image_encoder` from CPU offloading by calling "
|
||||
"`pipe._exclude_from_cpu_offload.append('image_encoder')` before `pipe.enable_sequential_cpu_offload()`."
|
||||
)
|
||||
|
||||
super().enable_sequential_cpu_offload(*args, **kwargs)
|
||||
|
||||
@torch.no_grad()
|
||||
@replace_example_docstring(EXAMPLE_DOC_STRING)
|
||||
def __call__(
|
||||
@@ -945,11 +853,8 @@ class StableDiffusion3InpaintPipeline(DiffusionPipeline, SD3LoraLoaderMixin, Fro
|
||||
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
||||
pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
||||
negative_pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
||||
ip_adapter_image: Optional[PipelineImageInput] = None,
|
||||
ip_adapter_image_embeds: Optional[torch.Tensor] = None,
|
||||
output_type: Optional[str] = "pil",
|
||||
return_dict: bool = True,
|
||||
joint_attention_kwargs: Optional[Dict[str, Any]] = None,
|
||||
clip_skip: Optional[int] = None,
|
||||
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
||||
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
||||
@@ -985,9 +890,9 @@ class StableDiffusion3InpaintPipeline(DiffusionPipeline, SD3LoraLoaderMixin, Fro
|
||||
mask_image_latent (`torch.Tensor`, `List[torch.Tensor]`):
|
||||
`Tensor` representing an image batch to mask `image` generated by VAE. If not provided, the mask
|
||||
latents tensor will ge generated by `mask_image`.
|
||||
height (`int`, *optional*, defaults to self.transformer.config.sample_size * self.vae_scale_factor):
|
||||
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
|
||||
The height in pixels of the generated image. This is set to 1024 by default for the best results.
|
||||
width (`int`, *optional*, defaults to self.transformer.config.sample_size * self.vae_scale_factor):
|
||||
width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
|
||||
The width in pixels of the generated image. This is set to 1024 by default for the best results.
|
||||
padding_mask_crop (`int`, *optional*, defaults to `None`):
|
||||
The size of margin in the crop to be applied to the image and masking. If `None`, no crop is applied to
|
||||
@@ -1048,22 +953,12 @@ class StableDiffusion3InpaintPipeline(DiffusionPipeline, SD3LoraLoaderMixin, Fro
|
||||
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
||||
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
||||
input argument.
|
||||
ip_adapter_image (`PipelineImageInput`, *optional*):
|
||||
Optional image input to work with IP Adapters.
|
||||
ip_adapter_image_embeds (`torch.Tensor`, *optional*):
|
||||
Pre-generated image embeddings for IP-Adapter. Should be a tensor of shape `(batch_size, num_images,
|
||||
emb_dim)`. It should contain the negative image embedding if `do_classifier_free_guidance` is set to
|
||||
`True`. If not provided, embeddings are computed from the `ip_adapter_image` input argument.
|
||||
output_type (`str`, *optional*, defaults to `"pil"`):
|
||||
The output format of the generate image. Choose between
|
||||
[PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
|
||||
return_dict (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to return a [`~pipelines.stable_diffusion_3.StableDiffusion3PipelineOutput`] instead of
|
||||
a plain tuple.
|
||||
joint_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
callback_on_step_end (`Callable`, *optional*):
|
||||
A function that calls at the end of each denoising steps during the inference. The function is called
|
||||
with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
|
||||
@@ -1111,7 +1006,6 @@ class StableDiffusion3InpaintPipeline(DiffusionPipeline, SD3LoraLoaderMixin, Fro
|
||||
|
||||
self._guidance_scale = guidance_scale
|
||||
self._clip_skip = clip_skip
|
||||
self._joint_attention_kwargs = joint_attention_kwargs
|
||||
self._interrupt = False
|
||||
|
||||
# 2. Define call parameters
|
||||
@@ -1266,22 +1160,7 @@ class StableDiffusion3InpaintPipeline(DiffusionPipeline, SD3LoraLoaderMixin, Fro
|
||||
f"The transformer {self.transformer.__class__} should have 16 input channels or 33 input channels, not {self.transformer.config.in_channels}."
|
||||
)
|
||||
|
||||
# 7. Prepare image embeddings
|
||||
if (ip_adapter_image is not None and self.is_ip_adapter_active) or ip_adapter_image_embeds is not None:
|
||||
ip_adapter_image_embeds = self.prepare_ip_adapter_image_embeds(
|
||||
ip_adapter_image,
|
||||
ip_adapter_image_embeds,
|
||||
device,
|
||||
batch_size * num_images_per_prompt,
|
||||
self.do_classifier_free_guidance,
|
||||
)
|
||||
|
||||
if self.joint_attention_kwargs is None:
|
||||
self._joint_attention_kwargs = {"ip_adapter_image_embeds": ip_adapter_image_embeds}
|
||||
else:
|
||||
self._joint_attention_kwargs.update(ip_adapter_image_embeds=ip_adapter_image_embeds)
|
||||
|
||||
# 8. Denoising loop
|
||||
# 7. Denoising loop
|
||||
num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
|
||||
self._num_timesteps = len(timesteps)
|
||||
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
||||
@@ -1302,7 +1181,6 @@ class StableDiffusion3InpaintPipeline(DiffusionPipeline, SD3LoraLoaderMixin, Fro
|
||||
timestep=timestep,
|
||||
encoder_hidden_states=prompt_embeds,
|
||||
pooled_projections=pooled_prompt_embeds,
|
||||
joint_attention_kwargs=self.joint_attention_kwargs,
|
||||
return_dict=False,
|
||||
)[0]
|
||||
|
||||
|
||||
@@ -1,121 +0,0 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2024 The HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import contextlib
|
||||
import os
|
||||
import tempfile
|
||||
from typing import TYPE_CHECKING, Dict
|
||||
|
||||
from huggingface_hub import DDUFEntry
|
||||
from tqdm import tqdm
|
||||
|
||||
from ..utils import is_safetensors_available, is_transformers_available, is_transformers_version
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers import PreTrainedModel, PreTrainedTokenizer
|
||||
|
||||
if is_transformers_available():
|
||||
from transformers import PreTrainedModel, PreTrainedTokenizer
|
||||
|
||||
if is_safetensors_available():
|
||||
import safetensors.torch
|
||||
|
||||
|
||||
def _load_tokenizer_from_dduf(
|
||||
cls: "PreTrainedTokenizer", name: str, dduf_entries: Dict[str, DDUFEntry], **kwargs
|
||||
) -> "PreTrainedTokenizer":
|
||||
"""
|
||||
Load a tokenizer from a DDUF archive.
|
||||
|
||||
In practice, `transformers` do not provide a way to load a tokenizer from a DDUF archive. This function is a
|
||||
workaround by extracting the tokenizer files from the DDUF archive and loading the tokenizer from the extracted
|
||||
files. There is an extra cost of extracting the files, but of limited impact as the tokenizer files are usually
|
||||
small-ish.
|
||||
"""
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
for entry_name, entry in dduf_entries.items():
|
||||
if entry_name.startswith(name + "/"):
|
||||
tmp_entry_path = os.path.join(tmp_dir, *entry_name.split("/"))
|
||||
# need to create intermediary directory if they don't exist
|
||||
os.makedirs(os.path.dirname(tmp_entry_path), exist_ok=True)
|
||||
with open(tmp_entry_path, "wb") as f:
|
||||
with entry.as_mmap() as mm:
|
||||
f.write(mm)
|
||||
return cls.from_pretrained(os.path.dirname(tmp_entry_path), **kwargs)
|
||||
|
||||
|
||||
def _load_transformers_model_from_dduf(
|
||||
cls: "PreTrainedModel", name: str, dduf_entries: Dict[str, DDUFEntry], **kwargs
|
||||
) -> "PreTrainedModel":
|
||||
"""
|
||||
Load a transformers model from a DDUF archive.
|
||||
|
||||
In practice, `transformers` do not provide a way to load a model from a DDUF archive. This function is a workaround
|
||||
by instantiating a model from the config file and loading the weights from the DDUF archive directly.
|
||||
"""
|
||||
config_file = dduf_entries.get(f"{name}/config.json")
|
||||
if config_file is None:
|
||||
raise EnvironmentError(
|
||||
f"Could not find a config.json file for component {name} in DDUF file (contains {dduf_entries.keys()})."
|
||||
)
|
||||
generation_config = dduf_entries.get(f"{name}/generation_config.json", None)
|
||||
|
||||
weight_files = [
|
||||
entry
|
||||
for entry_name, entry in dduf_entries.items()
|
||||
if entry_name.startswith(f"{name}/") and entry_name.endswith(".safetensors")
|
||||
]
|
||||
if not weight_files:
|
||||
raise EnvironmentError(
|
||||
f"Could not find any weight file for component {name} in DDUF file (contains {dduf_entries.keys()})."
|
||||
)
|
||||
if not is_safetensors_available():
|
||||
raise EnvironmentError(
|
||||
"Safetensors is not available, cannot load model from DDUF. Please `pip install safetensors`."
|
||||
)
|
||||
if is_transformers_version("<", "4.47.0"):
|
||||
raise ImportError(
|
||||
"You need to install `transformers>4.47.0` in order to load a transformers model from a DDUF file. "
|
||||
"You can install it with: `pip install --upgrade transformers`"
|
||||
)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
from transformers import AutoConfig, GenerationConfig
|
||||
|
||||
tmp_config_file = os.path.join(tmp_dir, "config.json")
|
||||
with open(tmp_config_file, "w") as f:
|
||||
f.write(config_file.read_text())
|
||||
config = AutoConfig.from_pretrained(tmp_config_file)
|
||||
if generation_config is not None:
|
||||
tmp_generation_config_file = os.path.join(tmp_dir, "generation_config.json")
|
||||
with open(tmp_generation_config_file, "w") as f:
|
||||
f.write(generation_config.read_text())
|
||||
generation_config = GenerationConfig.from_pretrained(tmp_generation_config_file)
|
||||
state_dict = {}
|
||||
with contextlib.ExitStack() as stack:
|
||||
for entry in tqdm(weight_files, desc="Loading state_dict"): # Loop over safetensors files
|
||||
# Memory-map the safetensors file
|
||||
mmap = stack.enter_context(entry.as_mmap())
|
||||
# Load tensors from the memory-mapped file
|
||||
tensors = safetensors.torch.load(mmap)
|
||||
# Update the state dictionary with tensors
|
||||
state_dict.update(tensors)
|
||||
return cls.from_pretrained(
|
||||
pretrained_model_name_or_path=None,
|
||||
config=config,
|
||||
generation_config=generation_config,
|
||||
state_dict=state_dict,
|
||||
**kwargs,
|
||||
)
|
||||
@@ -70,7 +70,6 @@ from .import_utils import (
|
||||
is_gguf_available,
|
||||
is_gguf_version,
|
||||
is_google_colab,
|
||||
is_hf_hub_version,
|
||||
is_inflect_available,
|
||||
is_invisible_watermark_available,
|
||||
is_k_diffusion_available,
|
||||
@@ -101,7 +100,7 @@ from .import_utils import (
|
||||
is_xformers_available,
|
||||
requires_backends,
|
||||
)
|
||||
from .loading_utils import get_module_from_name, get_submodule_by_name, load_image, load_video
|
||||
from .loading_utils import get_module_from_name, load_image, load_video
|
||||
from .logging import get_logger
|
||||
from .outputs import BaseOutput
|
||||
from .peft_utils import (
|
||||
|
||||
@@ -26,7 +26,6 @@ from typing import Dict, List, Optional, Union
|
||||
from uuid import uuid4
|
||||
|
||||
from huggingface_hub import (
|
||||
DDUFEntry,
|
||||
ModelCard,
|
||||
ModelCardData,
|
||||
create_repo,
|
||||
@@ -292,26 +291,9 @@ def _get_model_file(
|
||||
user_agent: Optional[Union[Dict, str]] = None,
|
||||
revision: Optional[str] = None,
|
||||
commit_hash: Optional[str] = None,
|
||||
dduf_entries: Optional[Dict[str, DDUFEntry]] = None,
|
||||
):
|
||||
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
|
||||
|
||||
if dduf_entries:
|
||||
if subfolder is not None:
|
||||
raise ValueError(
|
||||
"DDUF file only allow for 1 level of directory (e.g transformer/model1/model.safetentors is not allowed). "
|
||||
"Please check the DDUF structure"
|
||||
)
|
||||
model_file = (
|
||||
weights_name
|
||||
if pretrained_model_name_or_path == ""
|
||||
else "/".join([pretrained_model_name_or_path, weights_name])
|
||||
)
|
||||
if model_file in dduf_entries:
|
||||
return model_file
|
||||
else:
|
||||
raise EnvironmentError(f"Error no file named {weights_name} found in archive {dduf_entries.keys()}.")
|
||||
elif os.path.isfile(pretrained_model_name_or_path):
|
||||
if os.path.isfile(pretrained_model_name_or_path):
|
||||
return pretrained_model_name_or_path
|
||||
elif os.path.isdir(pretrained_model_name_or_path):
|
||||
if os.path.isfile(os.path.join(pretrained_model_name_or_path, weights_name)):
|
||||
@@ -437,7 +419,6 @@ def _get_checkpoint_shard_files(
|
||||
user_agent=None,
|
||||
revision=None,
|
||||
subfolder="",
|
||||
dduf_entries: Optional[Dict[str, DDUFEntry]] = None,
|
||||
):
|
||||
"""
|
||||
For a given model:
|
||||
@@ -449,18 +430,11 @@ def _get_checkpoint_shard_files(
|
||||
For the description of each arg, see [`PreTrainedModel.from_pretrained`]. `index_filename` is the full path to the
|
||||
index (downloaded and cached if `pretrained_model_name_or_path` is a model ID on the Hub).
|
||||
"""
|
||||
if dduf_entries:
|
||||
if index_filename not in dduf_entries:
|
||||
raise ValueError(f"Can't find a checkpoint index ({index_filename}) in {pretrained_model_name_or_path}.")
|
||||
else:
|
||||
if not os.path.isfile(index_filename):
|
||||
raise ValueError(f"Can't find a checkpoint index ({index_filename}) in {pretrained_model_name_or_path}.")
|
||||
if not os.path.isfile(index_filename):
|
||||
raise ValueError(f"Can't find a checkpoint index ({index_filename}) in {pretrained_model_name_or_path}.")
|
||||
|
||||
if dduf_entries:
|
||||
index = json.loads(dduf_entries[index_filename].read_text())
|
||||
else:
|
||||
with open(index_filename, "r") as f:
|
||||
index = json.loads(f.read())
|
||||
with open(index_filename, "r") as f:
|
||||
index = json.loads(f.read())
|
||||
|
||||
original_shard_filenames = sorted(set(index["weight_map"].values()))
|
||||
sharded_metadata = index["metadata"]
|
||||
@@ -474,8 +448,6 @@ def _get_checkpoint_shard_files(
|
||||
pretrained_model_name_or_path, subfolder=subfolder, original_shard_filenames=original_shard_filenames
|
||||
)
|
||||
return shards_path, sharded_metadata
|
||||
elif dduf_entries:
|
||||
return shards_path, sharded_metadata
|
||||
|
||||
# At this stage pretrained_model_name_or_path is a model identifier on the Hub
|
||||
allow_patterns = original_shard_filenames
|
||||
|
||||
@@ -115,13 +115,6 @@ try:
|
||||
except importlib_metadata.PackageNotFoundError:
|
||||
_transformers_available = False
|
||||
|
||||
_hf_hub_available = importlib.util.find_spec("huggingface_hub") is not None
|
||||
try:
|
||||
_hf_hub_version = importlib_metadata.version("huggingface_hub")
|
||||
logger.debug(f"Successfully imported huggingface_hub version {_hf_hub_version}")
|
||||
except importlib_metadata.PackageNotFoundError:
|
||||
_hf_hub_available = False
|
||||
|
||||
|
||||
_inflect_available = importlib.util.find_spec("inflect") is not None
|
||||
try:
|
||||
@@ -774,21 +767,6 @@ def is_transformers_version(operation: str, version: str):
|
||||
return compare_versions(parse(_transformers_version), operation, version)
|
||||
|
||||
|
||||
def is_hf_hub_version(operation: str, version: str):
|
||||
"""
|
||||
Compares the current Hugging Face Hub version to a given reference with an operation.
|
||||
|
||||
Args:
|
||||
operation (`str`):
|
||||
A string representation of an operator, such as `">"` or `"<="`
|
||||
version (`str`):
|
||||
A version string
|
||||
"""
|
||||
if not _hf_hub_available:
|
||||
return False
|
||||
return compare_versions(parse(_hf_hub_version), operation, version)
|
||||
|
||||
|
||||
def is_accelerate_version(operation: str, version: str):
|
||||
"""
|
||||
Compares the current Accelerate version to a given reference with an operation.
|
||||
|
||||
@@ -148,15 +148,3 @@ def get_module_from_name(module, tensor_name: str) -> Tuple[Any, str]:
|
||||
module = new_module
|
||||
tensor_name = splits[-1]
|
||||
return module, tensor_name
|
||||
|
||||
|
||||
def get_submodule_by_name(root_module, module_path: str):
|
||||
current = root_module
|
||||
parts = module_path.split(".")
|
||||
for part in parts:
|
||||
if part.isdigit():
|
||||
idx = int(part)
|
||||
current = current[idx] # e.g., for nn.ModuleList or nn.Sequential
|
||||
else:
|
||||
current = getattr(current, part)
|
||||
return current
|
||||
|
||||
@@ -478,18 +478,6 @@ def require_bitsandbytes_version_greater(bnb_version):
|
||||
return decorator
|
||||
|
||||
|
||||
def require_hf_hub_version_greater(hf_hub_version):
|
||||
def decorator(test_case):
|
||||
correct_hf_hub_version = version.parse(
|
||||
version.parse(importlib.metadata.version("huggingface_hub")).base_version
|
||||
) > version.parse(hf_hub_version)
|
||||
return unittest.skipUnless(
|
||||
correct_hf_hub_version, f"Test requires huggingface_hub with the version greater than {hf_hub_version}."
|
||||
)(test_case)
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def require_gguf_version_greater_or_equal(gguf_version):
|
||||
def decorator(test_case):
|
||||
correct_gguf_version = is_gguf_available() and version.parse(
|
||||
|
||||
@@ -33,7 +33,6 @@ class CogVideoXTransformerTests(ModelTesterMixin, unittest.TestCase):
|
||||
model_class = CogVideoXTransformer3DModel
|
||||
main_input_name = "hidden_states"
|
||||
uses_custom_attn_processor = True
|
||||
model_split_percents = [0.7, 0.7, 0.8]
|
||||
|
||||
@property
|
||||
def dummy_input(self):
|
||||
|
||||
@@ -33,7 +33,6 @@ class CogView3PlusTransformerTests(ModelTesterMixin, unittest.TestCase):
|
||||
model_class = CogView3PlusTransformer2DModel
|
||||
main_input_name = "hidden_states"
|
||||
uses_custom_attn_processor = True
|
||||
model_split_percents = [0.7, 0.6, 0.6]
|
||||
|
||||
@property
|
||||
def dummy_input(self):
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
|
||||
import gc
|
||||
import inspect
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
@@ -26,9 +24,7 @@ from diffusers import AllegroPipeline, AllegroTransformer3DModel, AutoencoderKLA
|
||||
from diffusers.utils.testing_utils import (
|
||||
enable_full_determinism,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_hf_hub_version_greater,
|
||||
require_torch_gpu,
|
||||
require_transformers_version_greater,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -301,35 +297,6 @@ class AllegroPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
"VAE tiling should not affect the inference results",
|
||||
)
|
||||
|
||||
@require_hf_hub_version_greater("0.26.5")
|
||||
@require_transformers_version_greater("4.47.1")
|
||||
def test_save_load_dduf(self):
|
||||
# reimplement because it needs `enable_tiling()` on the loaded pipe.
|
||||
from huggingface_hub import export_folder_as_dduf
|
||||
|
||||
components = self.get_dummy_components()
|
||||
pipe = self.pipeline_class(**components)
|
||||
pipe = pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_dummy_inputs(device="cpu")
|
||||
inputs.pop("generator")
|
||||
inputs["generator"] = torch.manual_seed(0)
|
||||
|
||||
pipeline_out = pipe(**inputs)[0].cpu()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
dduf_filename = os.path.join(tmpdir, f"{pipe.__class__.__name__.lower()}.dduf")
|
||||
pipe.save_pretrained(tmpdir, safe_serialization=True)
|
||||
export_folder_as_dduf(dduf_filename, folder_path=tmpdir)
|
||||
loaded_pipe = self.pipeline_class.from_pretrained(tmpdir, dduf_file=dduf_filename).to(torch_device)
|
||||
|
||||
loaded_pipe.vae.enable_tiling()
|
||||
inputs["generator"] = torch.manual_seed(0)
|
||||
loaded_pipeline_out = loaded_pipe(**inputs)[0].cpu()
|
||||
|
||||
assert np.allclose(pipeline_out, loaded_pipeline_out)
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -63,8 +63,6 @@ class AudioLDMPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
]
|
||||
)
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet2DConditionModel(
|
||||
|
||||
@@ -70,8 +70,6 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
]
|
||||
)
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
unet = AudioLDM2UNet2DConditionModel(
|
||||
|
||||
@@ -60,8 +60,6 @@ class BlipDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
"prompt_reps",
|
||||
]
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
text_encoder_config = CLIPTextConfig(
|
||||
|
||||
@@ -291,8 +291,6 @@ class StableDiffusionMultiControlNetPipelineFastTests(
|
||||
batch_params = TEXT_TO_IMAGE_BATCH_PARAMS
|
||||
image_params = frozenset([]) # TO_DO: add image_params once refactored VaeImageProcessor.preprocess
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet2DConditionModel(
|
||||
@@ -525,8 +523,6 @@ class StableDiffusionMultiControlNetOneModelPipelineFastTests(
|
||||
batch_params = TEXT_TO_IMAGE_BATCH_PARAMS
|
||||
image_params = frozenset([]) # TO_DO: add image_params once refactored VaeImageProcessor.preprocess
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet2DConditionModel(
|
||||
|
||||
@@ -68,8 +68,6 @@ class BlipDiffusionControlNetPipelineFastTests(PipelineTesterMixin, unittest.Tes
|
||||
"prompt_reps",
|
||||
]
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
text_encoder_config = CLIPTextConfig(
|
||||
|
||||
@@ -198,8 +198,6 @@ class StableDiffusionMultiControlNetPipelineFastTests(
|
||||
batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS
|
||||
image_params = frozenset([]) # TO_DO: add image_params once refactored VaeImageProcessor.preprocess
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet2DConditionModel(
|
||||
|
||||
@@ -257,8 +257,6 @@ class MultiControlNetInpaintPipelineFastTests(
|
||||
params = TEXT_GUIDED_IMAGE_INPAINTING_PARAMS
|
||||
batch_params = TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet2DConditionModel(
|
||||
|
||||
@@ -78,8 +78,6 @@ class ControlNetPipelineSDXLFastTests(
|
||||
}
|
||||
)
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet2DConditionModel(
|
||||
|
||||
@@ -487,8 +487,6 @@ class StableDiffusionXLMultiControlNetPipelineFastTests(
|
||||
batch_params = TEXT_TO_IMAGE_BATCH_PARAMS
|
||||
image_params = frozenset([]) # TO_DO: add image_params once refactored VaeImageProcessor.preprocess
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet2DConditionModel(
|
||||
@@ -694,8 +692,6 @@ class StableDiffusionXLMultiControlNetOneModelPipelineFastTests(
|
||||
batch_params = TEXT_TO_IMAGE_BATCH_PARAMS
|
||||
image_params = frozenset([]) # TO_DO: add image_params once refactored VaeImageProcessor.preprocess
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet2DConditionModel(
|
||||
|
||||
@@ -26,9 +26,7 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
load_numpy,
|
||||
require_accelerator,
|
||||
require_hf_hub_version_greater,
|
||||
require_torch_gpu,
|
||||
require_transformers_version_greater,
|
||||
skip_mps,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -91,11 +89,6 @@ class IFPipelineFastTests(PipelineTesterMixin, IFPipelineTesterMixin, unittest.T
|
||||
def test_xformers_attention_forwardGenerator_pass(self):
|
||||
self._test_xformers_attention_forwardGenerator_pass(expected_max_diff=1e-3)
|
||||
|
||||
@require_hf_hub_version_greater("0.26.5")
|
||||
@require_transformers_version_greater("4.47.1")
|
||||
def test_save_load_dduf(self):
|
||||
super().test_save_load_dduf(atol=1e-2, rtol=1e-2)
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -26,9 +26,7 @@ from diffusers.utils.testing_utils import (
|
||||
floats_tensor,
|
||||
load_numpy,
|
||||
require_accelerator,
|
||||
require_hf_hub_version_greater,
|
||||
require_torch_gpu,
|
||||
require_transformers_version_greater,
|
||||
skip_mps,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -102,11 +100,6 @@ class IFImg2ImgPipelineFastTests(PipelineTesterMixin, IFPipelineTesterMixin, uni
|
||||
expected_max_diff=1e-2,
|
||||
)
|
||||
|
||||
@require_hf_hub_version_greater("0.26.5")
|
||||
@require_transformers_version_greater("4.47.1")
|
||||
def test_save_load_dduf(self):
|
||||
super().test_save_load_dduf(atol=1e-2, rtol=1e-2)
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -26,9 +26,7 @@ from diffusers.utils.testing_utils import (
|
||||
floats_tensor,
|
||||
load_numpy,
|
||||
require_accelerator,
|
||||
require_hf_hub_version_greater,
|
||||
require_torch_gpu,
|
||||
require_transformers_version_greater,
|
||||
skip_mps,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -99,11 +97,6 @@ class IFImg2ImgSuperResolutionPipelineFastTests(PipelineTesterMixin, IFPipelineT
|
||||
expected_max_diff=1e-2,
|
||||
)
|
||||
|
||||
@require_hf_hub_version_greater("0.26.5")
|
||||
@require_transformers_version_greater("4.47.1")
|
||||
def test_save_load_dduf(self):
|
||||
super().test_save_load_dduf(atol=1e-2, rtol=1e-2)
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -26,9 +26,7 @@ from diffusers.utils.testing_utils import (
|
||||
floats_tensor,
|
||||
load_numpy,
|
||||
require_accelerator,
|
||||
require_hf_hub_version_greater,
|
||||
require_torch_gpu,
|
||||
require_transformers_version_greater,
|
||||
skip_mps,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -99,11 +97,6 @@ class IFInpaintingPipelineFastTests(PipelineTesterMixin, IFPipelineTesterMixin,
|
||||
expected_max_diff=1e-2,
|
||||
)
|
||||
|
||||
@require_hf_hub_version_greater("0.26.5")
|
||||
@require_transformers_version_greater("4.47.1")
|
||||
def test_save_load_dduf(self):
|
||||
super().test_save_load_dduf(atol=1e-2, rtol=1e-2)
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -26,9 +26,7 @@ from diffusers.utils.testing_utils import (
|
||||
floats_tensor,
|
||||
load_numpy,
|
||||
require_accelerator,
|
||||
require_hf_hub_version_greater,
|
||||
require_torch_gpu,
|
||||
require_transformers_version_greater,
|
||||
skip_mps,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -101,11 +99,6 @@ class IFInpaintingSuperResolutionPipelineFastTests(PipelineTesterMixin, IFPipeli
|
||||
expected_max_diff=1e-2,
|
||||
)
|
||||
|
||||
@require_hf_hub_version_greater("0.26.5")
|
||||
@require_transformers_version_greater("4.47.1")
|
||||
def test_save_load_dduf(self):
|
||||
super().test_save_load_dduf(atol=1e-2, rtol=1e-2)
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -26,9 +26,7 @@ from diffusers.utils.testing_utils import (
|
||||
floats_tensor,
|
||||
load_numpy,
|
||||
require_accelerator,
|
||||
require_hf_hub_version_greater,
|
||||
require_torch_gpu,
|
||||
require_transformers_version_greater,
|
||||
skip_mps,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -94,11 +92,6 @@ class IFSuperResolutionPipelineFastTests(PipelineTesterMixin, IFPipelineTesterMi
|
||||
expected_max_diff=1e-2,
|
||||
)
|
||||
|
||||
@require_hf_hub_version_greater("0.26.5")
|
||||
@require_transformers_version_greater("4.47.1")
|
||||
def test_save_load_dduf(self):
|
||||
super().test_save_load_dduf(atol=1e-2, rtol=1e-2)
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -59,8 +59,6 @@ class I2VGenXLPipelineFastTests(SDFunctionTesterMixin, PipelineTesterMixin, unit
|
||||
# No `output_type`.
|
||||
required_optional_params = frozenset(["num_inference_steps", "generator", "latents", "return_dict"])
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
scheduler = DDIMScheduler(
|
||||
|
||||
@@ -204,8 +204,6 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
]
|
||||
test_xformers_attention = False
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
dummy = Dummies()
|
||||
return dummy.get_dummy_components()
|
||||
|
||||
@@ -52,8 +52,6 @@ class KandinskyPipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
]
|
||||
test_xformers_attention = True
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
dummy = Dummies()
|
||||
prior_dummy = PriorDummies()
|
||||
@@ -162,8 +160,6 @@ class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.Te
|
||||
]
|
||||
test_xformers_attention = False
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
dummy = Img2ImgDummies()
|
||||
prior_dummy = PriorDummies()
|
||||
@@ -273,8 +269,6 @@ class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.Te
|
||||
]
|
||||
test_xformers_attention = False
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
dummy = InpaintDummies()
|
||||
prior_dummy = PriorDummies()
|
||||
|
||||
@@ -226,8 +226,6 @@ class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
]
|
||||
test_xformers_attention = False
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
dummies = Dummies()
|
||||
return dummies.get_dummy_components()
|
||||
|
||||
@@ -220,8 +220,6 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
]
|
||||
test_xformers_attention = False
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
dummies = Dummies()
|
||||
return dummies.get_dummy_components()
|
||||
|
||||
@@ -184,8 +184,6 @@ class KandinskyPriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
]
|
||||
test_xformers_attention = False
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
dummy = Dummies()
|
||||
return dummy.get_dummy_components()
|
||||
|
||||
@@ -57,8 +57,6 @@ class KandinskyV22PipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCa
|
||||
test_xformers_attention = True
|
||||
callback_cfg_params = ["image_embds"]
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
dummy = Dummies()
|
||||
prior_dummy = PriorDummies()
|
||||
@@ -183,8 +181,6 @@ class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest
|
||||
test_xformers_attention = False
|
||||
callback_cfg_params = ["image_embds"]
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
dummy = Img2ImgDummies()
|
||||
prior_dummy = PriorDummies()
|
||||
@@ -306,8 +302,6 @@ class KandinskyV22PipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest
|
||||
]
|
||||
test_xformers_attention = False
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
dummy = InpaintDummies()
|
||||
prior_dummy = PriorDummies()
|
||||
|
||||
@@ -186,8 +186,6 @@ class KandinskyV22PriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
callback_cfg_params = ["prompt_embeds", "text_encoder_hidden_states", "text_mask"]
|
||||
test_xformers_attention = False
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
dummies = Dummies()
|
||||
return dummies.get_dummy_components()
|
||||
|
||||
@@ -59,8 +59,6 @@ class KandinskyV22PriorEmb2EmbPipelineFastTests(PipelineTesterMixin, unittest.Te
|
||||
]
|
||||
test_xformers_attention = False
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
@property
|
||||
def text_embedder_hidden_size(self):
|
||||
return 32
|
||||
|
||||
@@ -47,8 +47,6 @@ class KolorsPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS
|
||||
callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union({"add_text_embeds", "add_time_ids"})
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self, time_cond_proj_dim=None):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet2DConditionModel(
|
||||
|
||||
@@ -51,8 +51,6 @@ class KolorsPipelineImg2ImgFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS
|
||||
callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union({"add_text_embeds", "add_time_ids"})
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
# Copied from tests.pipelines.kolors.test_kolors.KolorsPipelineFastTests.get_dummy_components
|
||||
def get_dummy_components(self, time_cond_proj_dim=None):
|
||||
torch.manual_seed(0)
|
||||
|
||||
@@ -31,8 +31,6 @@ class LuminaText2ImgPipelinePipelineFastTests(unittest.TestCase, PipelineTesterM
|
||||
)
|
||||
batch_params = frozenset(["prompt", "negative_prompt"])
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
transformer = LuminaNextDiT2DModel(
|
||||
|
||||
@@ -65,8 +65,6 @@ class MusicLDMPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
]
|
||||
)
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet2DConditionModel(
|
||||
|
||||
@@ -56,8 +56,6 @@ class KolorsPAGPipelineFastTests(
|
||||
image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS
|
||||
callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union({"add_text_embeds", "add_time_ids"})
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
# Copied from tests.pipelines.kolors.test_kolors.KolorsPipelineFastTests.get_dummy_components
|
||||
def get_dummy_components(self, time_cond_proj_dim=None):
|
||||
torch.manual_seed(0)
|
||||
|
||||
@@ -53,8 +53,6 @@ class SanaPAGPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
)
|
||||
test_xformers_attention = False
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
transformer = SanaTransformer2DModel(
|
||||
|
||||
@@ -82,8 +82,6 @@ class StableDiffusionXLPAGImg2ImgPipelineFastTests(
|
||||
{"add_text_embeds", "add_time_ids", "add_neg_time_ids"}
|
||||
)
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
# based on tests.pipelines.stable_diffusion_xl.test_stable_diffusion_xl_img2img_pipeline.get_dummy_components
|
||||
def get_dummy_components(
|
||||
self, skip_first_text_encoder=False, time_cond_proj_dim=None, requires_aesthetics_score=False
|
||||
|
||||
@@ -82,8 +82,6 @@ class StableDiffusionXLPAGInpaintPipelineFastTests(
|
||||
{"add_text_embeds", "add_time_ids", "mask", "masked_image_latents"}
|
||||
)
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
# based on tests.pipelines.stable_diffusion_xl.test_stable_diffusion_xl_inpaint.StableDiffusionXLInpaintPipelineFastTests.get_dummy_components
|
||||
def get_dummy_components(
|
||||
self, skip_first_text_encoder=False, time_cond_proj_dim=None, requires_aesthetics_score=False
|
||||
|
||||
@@ -46,8 +46,6 @@ class PaintByExamplePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
batch_params = IMAGE_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS
|
||||
image_params = frozenset([]) # TO_DO: update the image_prams once refactored VaeImageProcessor.preprocess
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet2DConditionModel(
|
||||
|
||||
@@ -50,8 +50,6 @@ class ShapEImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
]
|
||||
test_xformers_attention = False
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
@property
|
||||
def text_embedder_hidden_size(self):
|
||||
return 16
|
||||
|
||||
@@ -70,7 +70,6 @@ class StableAudioPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
)
|
||||
# There is not xformers version of the StableAudioPipeline custom attention processor
|
||||
test_xformers_attention = False
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
|
||||
@@ -76,8 +76,6 @@ class StableDiffusionDepth2ImgPipelineFastTests(
|
||||
image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS
|
||||
callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union({"depth_mask"})
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet2DConditionModel(
|
||||
|
||||
@@ -106,8 +106,6 @@ class StableDiffusion3InpaintPipelineFastTests(PipelineLatentTesterMixin, unitte
|
||||
"tokenizer_3": tokenizer_3,
|
||||
"transformer": transformer,
|
||||
"vae": vae,
|
||||
"image_encoder": None,
|
||||
"feature_extractor": None,
|
||||
}
|
||||
|
||||
def get_dummy_inputs(self, device, seed=0):
|
||||
|
||||
@@ -389,8 +389,6 @@ class StableDiffusionLightAdapterPipelineFastTests(AdapterTests, PipelineTesterM
|
||||
|
||||
|
||||
class StableDiffusionMultiAdapterPipelineFastTests(AdapterTests, PipelineTesterMixin, unittest.TestCase):
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self, time_cond_proj_dim=None):
|
||||
return super().get_dummy_components("multi_adapter", time_cond_proj_dim=time_cond_proj_dim)
|
||||
|
||||
|
||||
@@ -66,8 +66,6 @@ class GligenTextImagePipelineFastTests(
|
||||
image_params = TEXT_TO_IMAGE_IMAGE_PARAMS
|
||||
image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet2DConditionModel(
|
||||
|
||||
@@ -58,8 +58,6 @@ class StableDiffusionImageVariationPipelineFastTests(
|
||||
# TO-DO: update image_params once pipeline is refactored with VaeImageProcessor.preprocess
|
||||
image_latents_params = frozenset([])
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet2DConditionModel(
|
||||
|
||||
@@ -422,8 +422,6 @@ class StableDiffusionXLAdapterPipelineFastTests(
|
||||
class StableDiffusionXLMultiAdapterPipelineFastTests(
|
||||
StableDiffusionXLAdapterPipelineFastTests, PipelineTesterMixin, unittest.TestCase
|
||||
):
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self, time_cond_proj_dim=None):
|
||||
return super().get_dummy_components("multi_adapter", time_cond_proj_dim=time_cond_proj_dim)
|
||||
|
||||
|
||||
@@ -77,8 +77,6 @@ class StableDiffusionXLImg2ImgPipelineFastTests(
|
||||
{"add_text_embeds", "add_time_ids", "add_neg_time_ids"}
|
||||
)
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self, skip_first_text_encoder=False, time_cond_proj_dim=None):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet2DConditionModel(
|
||||
|
||||
@@ -72,8 +72,6 @@ class StableDiffusionXLInpaintPipelineFastTests(
|
||||
}
|
||||
)
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self, skip_first_text_encoder=False, time_cond_proj_dim=None):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet2DConditionModel(
|
||||
|
||||
@@ -51,8 +51,6 @@ class StableUnCLIPImg2ImgPipelineFastTests(
|
||||
) # TO-DO: update image_params once pipeline is refactored with VaeImageProcessor.preprocess
|
||||
image_latents_params = frozenset([])
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
embedder_hidden_size = 32
|
||||
embedder_projection_dim = embedder_hidden_size
|
||||
|
||||
@@ -58,8 +58,6 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa
|
||||
]
|
||||
)
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
unet = UNetSpatioTemporalConditionModel(
|
||||
|
||||
@@ -75,11 +75,9 @@ from diffusers.utils.testing_utils import (
|
||||
nightly,
|
||||
require_compel,
|
||||
require_flax,
|
||||
require_hf_hub_version_greater,
|
||||
require_onnxruntime,
|
||||
require_torch_2,
|
||||
require_torch_gpu,
|
||||
require_transformers_version_greater,
|
||||
run_test_in_subprocess,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -983,18 +981,6 @@ class DownloadTests(unittest.TestCase):
|
||||
assert not any(f in ["vae/diffusion_pytorch_model.bin", "text_encoder/config.json"] for f in files)
|
||||
assert len(files) == 14
|
||||
|
||||
def test_download_dduf_with_custom_pipeline_raises_error(self):
|
||||
with self.assertRaises(NotImplementedError):
|
||||
_ = DiffusionPipeline.download(
|
||||
"DDUF/tiny-flux-dev-pipe-dduf", dduf_file="fluxpipeline.dduf", custom_pipeline="my_pipeline"
|
||||
)
|
||||
|
||||
def test_download_dduf_with_connected_pipeline_raises_error(self):
|
||||
with self.assertRaises(NotImplementedError):
|
||||
_ = DiffusionPipeline.download(
|
||||
"DDUF/tiny-flux-dev-pipe-dduf", dduf_file="fluxpipeline.dduf", load_connected_pipeline=True
|
||||
)
|
||||
|
||||
def test_get_pipeline_class_from_flax(self):
|
||||
flax_config = {"_class_name": "FlaxStableDiffusionPipeline"}
|
||||
config = {"_class_name": "StableDiffusionPipeline"}
|
||||
@@ -1816,55 +1802,6 @@ class PipelineFastTests(unittest.TestCase):
|
||||
sd.maybe_free_model_hooks()
|
||||
assert sd._offload_gpu_id == 5
|
||||
|
||||
@parameterized.expand([torch.float32, torch.float16])
|
||||
@require_hf_hub_version_greater("0.26.5")
|
||||
@require_transformers_version_greater("4.47.1")
|
||||
def test_load_dduf_from_hub(self, dtype):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pipe = DiffusionPipeline.from_pretrained(
|
||||
"DDUF/tiny-flux-dev-pipe-dduf", dduf_file="fluxpipeline.dduf", cache_dir=tmpdir, torch_dtype=dtype
|
||||
).to(torch_device)
|
||||
out_1 = pipe(prompt="dog", num_inference_steps=5, generator=torch.manual_seed(0), output_type="np").images
|
||||
|
||||
pipe.save_pretrained(tmpdir)
|
||||
loaded_pipe = DiffusionPipeline.from_pretrained(tmpdir, torch_dtype=dtype).to(torch_device)
|
||||
|
||||
out_2 = loaded_pipe(
|
||||
prompt="dog", num_inference_steps=5, generator=torch.manual_seed(0), output_type="np"
|
||||
).images
|
||||
|
||||
self.assertTrue(np.allclose(out_1, out_2, atol=1e-4, rtol=1e-4))
|
||||
|
||||
@require_hf_hub_version_greater("0.26.5")
|
||||
@require_transformers_version_greater("4.47.1")
|
||||
def test_load_dduf_from_hub_local_files_only(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pipe = DiffusionPipeline.from_pretrained(
|
||||
"DDUF/tiny-flux-dev-pipe-dduf", dduf_file="fluxpipeline.dduf", cache_dir=tmpdir
|
||||
).to(torch_device)
|
||||
out_1 = pipe(prompt="dog", num_inference_steps=5, generator=torch.manual_seed(0), output_type="np").images
|
||||
|
||||
local_files_pipe = DiffusionPipeline.from_pretrained(
|
||||
"DDUF/tiny-flux-dev-pipe-dduf", dduf_file="fluxpipeline.dduf", cache_dir=tmpdir, local_files_only=True
|
||||
).to(torch_device)
|
||||
out_2 = local_files_pipe(
|
||||
prompt="dog", num_inference_steps=5, generator=torch.manual_seed(0), output_type="np"
|
||||
).images
|
||||
|
||||
self.assertTrue(np.allclose(out_1, out_2, atol=1e-4, rtol=1e-4))
|
||||
|
||||
def test_dduf_raises_error_with_custom_pipeline(self):
|
||||
with self.assertRaises(NotImplementedError):
|
||||
_ = DiffusionPipeline.from_pretrained(
|
||||
"DDUF/tiny-flux-dev-pipe-dduf", dduf_file="fluxpipeline.dduf", custom_pipeline="my_pipeline"
|
||||
)
|
||||
|
||||
def test_dduf_raises_error_with_connected_pipeline(self):
|
||||
with self.assertRaises(NotImplementedError):
|
||||
_ = DiffusionPipeline.from_pretrained(
|
||||
"DDUF/tiny-flux-dev-pipe-dduf", dduf_file="fluxpipeline.dduf", load_connected_pipeline=True
|
||||
)
|
||||
|
||||
def test_wrong_model(self):
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
with self.assertRaises(ValueError) as error_context:
|
||||
@@ -1875,27 +1812,6 @@ class PipelineFastTests(unittest.TestCase):
|
||||
assert "is of type" in str(error_context.exception)
|
||||
assert "but should be" in str(error_context.exception)
|
||||
|
||||
@require_hf_hub_version_greater("0.26.5")
|
||||
@require_transformers_version_greater("4.47.1")
|
||||
def test_dduf_load_sharded_checkpoint_diffusion_model(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pipe = DiffusionPipeline.from_pretrained(
|
||||
"hf-internal-testing/tiny-flux-dev-pipe-sharded-checkpoint-DDUF",
|
||||
dduf_file="tiny-flux-dev-pipe-sharded-checkpoint.dduf",
|
||||
cache_dir=tmpdir,
|
||||
).to(torch_device)
|
||||
|
||||
out_1 = pipe(prompt="dog", num_inference_steps=5, generator=torch.manual_seed(0), output_type="np").images
|
||||
|
||||
pipe.save_pretrained(tmpdir)
|
||||
loaded_pipe = DiffusionPipeline.from_pretrained(tmpdir).to(torch_device)
|
||||
|
||||
out_2 = loaded_pipe(
|
||||
prompt="dog", num_inference_steps=5, generator=torch.manual_seed(0), output_type="np"
|
||||
).images
|
||||
|
||||
self.assertTrue(np.allclose(out_1, out_2, atol=1e-4, rtol=1e-4))
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -43,9 +43,7 @@ from diffusers.utils.testing_utils import (
|
||||
CaptureLogger,
|
||||
require_accelerate_version_greater,
|
||||
require_accelerator,
|
||||
require_hf_hub_version_greater,
|
||||
require_torch,
|
||||
require_transformers_version_greater,
|
||||
skip_mps,
|
||||
torch_device,
|
||||
)
|
||||
@@ -988,8 +986,6 @@ class PipelineTesterMixin:
|
||||
|
||||
test_xformers_attention = True
|
||||
|
||||
supports_dduf = True
|
||||
|
||||
def get_generator(self, seed):
|
||||
device = torch_device if torch_device != "mps" else "cpu"
|
||||
generator = torch.Generator(device).manual_seed(seed)
|
||||
@@ -1994,39 +1990,6 @@ class PipelineTesterMixin:
|
||||
)
|
||||
)
|
||||
|
||||
@require_hf_hub_version_greater("0.26.5")
|
||||
@require_transformers_version_greater("4.47.1")
|
||||
def test_save_load_dduf(self, atol=1e-4, rtol=1e-4):
|
||||
if not self.supports_dduf:
|
||||
return
|
||||
|
||||
from huggingface_hub import export_folder_as_dduf
|
||||
|
||||
components = self.get_dummy_components()
|
||||
pipe = self.pipeline_class(**components)
|
||||
pipe = pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_dummy_inputs(device="cpu")
|
||||
inputs.pop("generator")
|
||||
inputs["generator"] = torch.manual_seed(0)
|
||||
|
||||
pipeline_out = pipe(**inputs)[0]
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
dduf_filename = os.path.join(tmpdir, f"{pipe.__class__.__name__.lower()}.dduf")
|
||||
pipe.save_pretrained(tmpdir, safe_serialization=True)
|
||||
export_folder_as_dduf(dduf_filename, folder_path=tmpdir)
|
||||
loaded_pipe = self.pipeline_class.from_pretrained(tmpdir, dduf_file=dduf_filename).to(torch_device)
|
||||
|
||||
inputs["generator"] = torch.manual_seed(0)
|
||||
loaded_pipeline_out = loaded_pipe(**inputs)[0]
|
||||
|
||||
if isinstance(pipeline_out, np.ndarray) and isinstance(loaded_pipeline_out, np.ndarray):
|
||||
assert np.allclose(pipeline_out, loaded_pipeline_out, atol=atol, rtol=rtol)
|
||||
elif isinstance(pipeline_out, torch.Tensor) and isinstance(loaded_pipeline_out, torch.Tensor):
|
||||
assert torch.allclose(pipeline_out, loaded_pipeline_out, atol=atol, rtol=rtol)
|
||||
|
||||
|
||||
@is_staging_test
|
||||
class PipelinePushToHubTester(unittest.TestCase):
|
||||
|
||||
@@ -66,7 +66,6 @@ class UnCLIPImageVariationPipelineFastTests(PipelineTesterMixin, unittest.TestCa
|
||||
"super_res_num_inference_steps",
|
||||
]
|
||||
test_xformers_attention = False
|
||||
supports_dduf = False
|
||||
|
||||
@property
|
||||
def text_embedder_hidden_size(self):
|
||||
|
||||
@@ -86,8 +86,6 @@ class UniDiffuserPipelineFastTests(
|
||||
# vae_latents, not latents, is the argument that corresponds to VAE latent inputs
|
||||
image_latents_params = frozenset(["vae_latents"])
|
||||
|
||||
supports_dduf = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
unet = UniDiffuserModel.from_pretrained(
|
||||
"hf-internal-testing/unidiffuser-diffusers-test",
|
||||
|
||||
@@ -20,7 +20,6 @@ import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
import safetensors.torch
|
||||
from huggingface_hub import hf_hub_download
|
||||
|
||||
from diffusers import BitsAndBytesConfig, DiffusionPipeline, FluxTransformer2DModel, SD3Transformer2DModel
|
||||
from diffusers.utils import is_accelerate_version, logging
|
||||
@@ -569,27 +568,6 @@ class SlowBnb4BitFluxTests(Base4bitTests):
|
||||
max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice)
|
||||
self.assertTrue(max_diff < 1e-3)
|
||||
|
||||
def test_lora_loading(self):
|
||||
self.pipeline_4bit.load_lora_weights(
|
||||
hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"), adapter_name="hyper-sd"
|
||||
)
|
||||
self.pipeline_4bit.set_adapters("hyper-sd", adapter_weights=0.125)
|
||||
|
||||
output = self.pipeline_4bit(
|
||||
prompt=self.prompt,
|
||||
height=256,
|
||||
width=256,
|
||||
max_sequence_length=64,
|
||||
output_type="np",
|
||||
num_inference_steps=8,
|
||||
generator=torch.Generator().manual_seed(42),
|
||||
).images
|
||||
out_slice = output[0, -3:, -3:, -1].flatten()
|
||||
expected_slice = np.array([0.5347, 0.5342, 0.5283, 0.5093, 0.4988, 0.5093, 0.5044, 0.5015, 0.4946])
|
||||
|
||||
max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice)
|
||||
self.assertTrue(max_diff < 1e-3)
|
||||
|
||||
|
||||
@slow
|
||||
class BaseBnb4BitSerializationTests(Base4bitTests):
|
||||
|
||||
@@ -18,7 +18,6 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from huggingface_hub import hf_hub_download
|
||||
|
||||
from diffusers import BitsAndBytesConfig, DiffusionPipeline, FluxTransformer2DModel, SD3Transformer2DModel, logging
|
||||
from diffusers.utils import is_accelerate_version
|
||||
@@ -31,7 +30,6 @@ from diffusers.utils.testing_utils import (
|
||||
numpy_cosine_similarity_distance,
|
||||
require_accelerate,
|
||||
require_bitsandbytes_version_greater,
|
||||
require_peft_version_greater,
|
||||
require_torch,
|
||||
require_torch_gpu,
|
||||
require_transformers_version_greater,
|
||||
@@ -511,29 +509,6 @@ class SlowBnb8bitFluxTests(Base8bitTests):
|
||||
max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice)
|
||||
self.assertTrue(max_diff < 1e-3)
|
||||
|
||||
@require_peft_version_greater("0.14.0")
|
||||
def test_lora_loading(self):
|
||||
self.pipeline_8bit.load_lora_weights(
|
||||
hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"), adapter_name="hyper-sd"
|
||||
)
|
||||
self.pipeline_8bit.set_adapters("hyper-sd", adapter_weights=0.125)
|
||||
|
||||
output = self.pipeline_8bit(
|
||||
prompt=self.prompt,
|
||||
height=256,
|
||||
width=256,
|
||||
max_sequence_length=64,
|
||||
output_type="np",
|
||||
num_inference_steps=8,
|
||||
generator=torch.manual_seed(42),
|
||||
).images
|
||||
out_slice = output[0, -3:, -3:, -1].flatten()
|
||||
|
||||
expected_slice = np.array([0.3916, 0.3916, 0.3887, 0.4243, 0.4155, 0.4233, 0.4570, 0.4531, 0.4248])
|
||||
|
||||
max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice)
|
||||
self.assertTrue(max_diff < 1e-3)
|
||||
|
||||
|
||||
@slow
|
||||
class BaseBnb8bitSerializationTests(Base8bitTests):
|
||||
|
||||
Reference in New Issue
Block a user