mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-09 22:14:43 +08:00
Compare commits
3 Commits
revert_sch
...
harmonize-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
be139feda5 | ||
|
|
6b7716d358 | ||
|
|
c1b2289529 |
8
.github/workflows/pr_test_fetcher.yml
vendored
8
.github/workflows/pr_test_fetcher.yml
vendored
@@ -1,6 +1,12 @@
|
||||
name: Fast tests for PRs - Test Fetcher
|
||||
|
||||
on: workflow_dispatch
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
push:
|
||||
branches:
|
||||
- ci-*
|
||||
|
||||
env:
|
||||
DIFFUSERS_IS_CI: yes
|
||||
|
||||
@@ -20,9 +20,6 @@ An attention processor is a class for applying different types of attention mech
|
||||
## AttnProcessor2_0
|
||||
[[autodoc]] models.attention_processor.AttnProcessor2_0
|
||||
|
||||
## FusedAttnProcessor2_0
|
||||
[[autodoc]] models.attention_processor.FusedAttnProcessor2_0
|
||||
|
||||
## LoRAAttnProcessor
|
||||
[[autodoc]] models.attention_processor.LoRAAttnProcessor
|
||||
|
||||
|
||||
@@ -174,10 +174,4 @@ Set `private=True` in the [`~diffusers.utils.PushToHubMixin.push_to_hub`] functi
|
||||
controlnet.push_to_hub("my-controlnet-model-private", private=True)
|
||||
```
|
||||
|
||||
Private repositories are only visible to you, and other users won't be able to clone the repository and your repository won't appear in search results. Even if a user has the URL to your private repository, they'll receive a `404 - Sorry, we can't find the page you are looking for.`
|
||||
|
||||
To load a model, scheduler, or pipeline from private or gated repositories, set `use_auth_token=True`:
|
||||
|
||||
```py
|
||||
model = ControlNetModel.from_pretrained("your-namespace/my-controlnet-model-private", use_auth_token=True)
|
||||
```
|
||||
Private repositories are only visible to you, and other users won't be able to clone the repository and your repository won't appear in search results. Even if a user has the URL to your private repository, they'll receive a `404 - Sorry, we can't find the page you are looking for`. You must be [logged in](https://huggingface.co/docs/huggingface_hub/quick-start#login) to load a model from a private repository.
|
||||
@@ -133,7 +133,7 @@ def save_model_card(
|
||||
diffusers_imports_pivotal = """from huggingface_hub import hf_hub_download
|
||||
from safetensors.torch import load_file
|
||||
"""
|
||||
diffusers_example_pivotal = f"""embedding_path = hf_hub_download(repo_id='{repo_id}', filename="embeddings.safetensors", repo_type="model")
|
||||
diffusers_example_pivotal = f"""embedding_path = hf_hub_download(repo_id="{repo_id}", filename="embeddings.safetensors", repo_type="model")
|
||||
state_dict = load_file(embedding_path)
|
||||
pipeline.load_textual_inversion(state_dict["clip_l"], token=["<s0>", "<s1>"], text_encoder=pipe.text_encoder, tokenizer=pipe.tokenizer)
|
||||
pipeline.load_textual_inversion(state_dict["clip_g"], token=["<s0>", "<s1>"], text_encoder=pipe.text_encoder_2, tokenizer=pipe.tokenizer_2)
|
||||
@@ -145,7 +145,8 @@ pipeline.load_textual_inversion(state_dict["clip_g"], token=["<s0>", "<s1>"], te
|
||||
to trigger concept `{key}` → use `{tokens}` in your prompt \n
|
||||
"""
|
||||
|
||||
yaml = f"""---
|
||||
yaml = f"""
|
||||
---
|
||||
tags:
|
||||
- stable-diffusion-xl
|
||||
- stable-diffusion-xl-diffusers
|
||||
@@ -158,7 +159,7 @@ base_model: {base_model}
|
||||
instance_prompt: {instance_prompt}
|
||||
license: openrail++
|
||||
---
|
||||
"""
|
||||
"""
|
||||
|
||||
model_card = f"""
|
||||
# SDXL LoRA DreamBooth - {repo_id}
|
||||
@@ -169,6 +170,14 @@ license: openrail++
|
||||
|
||||
### These are {repo_id} LoRA adaption weights for {base_model}.
|
||||
|
||||
The weights were trained using [DreamBooth](https://dreambooth.github.io/).
|
||||
|
||||
LoRA for the text encoder was enabled: {train_text_encoder}.
|
||||
|
||||
Pivotal tuning was enabled: {train_text_encoder_ti}.
|
||||
|
||||
Special VAE used for training: {vae_path}.
|
||||
|
||||
## Trigger words
|
||||
|
||||
{trigger_str}
|
||||
@@ -187,24 +196,11 @@ image = pipeline('{validation_prompt if validation_prompt else instance_prompt}'
|
||||
|
||||
For more details, including weighting, merging and fusing LoRAs, check the [documentation on loading LoRAs in diffusers](https://huggingface.co/docs/diffusers/main/en/using-diffusers/loading_adapters)
|
||||
|
||||
## Download model
|
||||
## Download model (use it with UIs such as AUTO1111, Comfy, SD.Next, Invoke)
|
||||
|
||||
### Use it with UIs such as AUTOMATIC1111, Comfy UI, SD.Next, Invoke
|
||||
Weights for this model are available in Safetensors format.
|
||||
|
||||
- Download the LoRA *.safetensors [here](/{repo_id}/blob/main/pytorch_lora_weights.safetensors). Rename it and place it on your Lora folder.
|
||||
- Download the text embeddings *.safetensors [here](/{repo_id}/blob/main/embeddings.safetensors). Rename it and place it on it on your embeddings folder.
|
||||
|
||||
All [Files & versions](/{repo_id}/tree/main).
|
||||
|
||||
## Details
|
||||
|
||||
The weights were trained using [🧨 diffusers Advanced Dreambooth Training Script](https://github.com/huggingface/diffusers/blob/main/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py).
|
||||
|
||||
LoRA for the text encoder was enabled. {train_text_encoder}.
|
||||
|
||||
Pivotal tuning was enabled: {train_text_encoder_ti}.
|
||||
|
||||
Special VAE used for training: {vae_path}.
|
||||
[Download]({repo_id}/tree/main) them in the Files & versions tab.
|
||||
|
||||
"""
|
||||
with open(os.path.join(repo_folder, "README.md"), "w") as f:
|
||||
@@ -671,12 +667,6 @@ def parse_args(input_args=None):
|
||||
default=4,
|
||||
help=("The dimension of the LoRA update matrices."),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cache_latents",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Cache the VAE latents",
|
||||
)
|
||||
|
||||
if input_args is not None:
|
||||
args = parser.parse_args(input_args)
|
||||
@@ -1180,7 +1170,6 @@ def main(args):
|
||||
revision=args.revision,
|
||||
variant=args.variant,
|
||||
)
|
||||
vae_scaling_factor = vae.config.scaling_factor
|
||||
unet = UNet2DConditionModel.from_pretrained(
|
||||
args.pretrained_model_name_or_path, subfolder="unet", revision=args.revision, variant=args.variant
|
||||
)
|
||||
@@ -1611,20 +1600,6 @@ def main(args):
|
||||
args.validation_prompt = args.validation_prompt.replace(token_abs, "".join(token_replacement))
|
||||
print("validation prompt:", args.validation_prompt)
|
||||
|
||||
if args.cache_latents:
|
||||
latents_cache = []
|
||||
for batch in tqdm(train_dataloader, desc="Caching latents"):
|
||||
with torch.no_grad():
|
||||
batch["pixel_values"] = batch["pixel_values"].to(
|
||||
accelerator.device, non_blocking=True, dtype=torch.float32
|
||||
)
|
||||
latents_cache.append(vae.encode(batch["pixel_values"]).latent_dist)
|
||||
|
||||
if args.validation_prompt is None:
|
||||
del vae
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
# Scheduler and math around the number of training steps.
|
||||
overrode_max_train_steps = False
|
||||
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
|
||||
@@ -1740,7 +1715,9 @@ def main(args):
|
||||
unet.train()
|
||||
for step, batch in enumerate(train_dataloader):
|
||||
with accelerator.accumulate(unet):
|
||||
pixel_values = batch["pixel_values"].to(dtype=vae.dtype)
|
||||
prompts = batch["prompts"]
|
||||
# print(prompts)
|
||||
# encode batch prompts when custom prompts are provided for each image -
|
||||
if train_dataset.custom_instance_prompts:
|
||||
if freeze_text_encoder:
|
||||
@@ -1752,13 +1729,9 @@ def main(args):
|
||||
tokens_one = tokenize_prompt(tokenizer_one, prompts, add_special_tokens)
|
||||
tokens_two = tokenize_prompt(tokenizer_two, prompts, add_special_tokens)
|
||||
|
||||
if args.cache_latents:
|
||||
model_input = latents_cache[step].sample()
|
||||
else:
|
||||
pixel_values = batch["pixel_values"].to(dtype=vae.dtype)
|
||||
model_input = vae.encode(pixel_values).latent_dist.sample()
|
||||
|
||||
model_input = model_input * vae_scaling_factor
|
||||
# Convert images to latent space
|
||||
model_input = vae.encode(pixel_values).latent_dist.sample()
|
||||
model_input = model_input * vae.config.scaling_factor
|
||||
if args.pretrained_vae_model_name_or_path is None:
|
||||
model_input = model_input.to(weight_dtype)
|
||||
|
||||
|
||||
@@ -512,7 +512,6 @@ device = torch.device('cpu' if not has_cuda else 'cuda')
|
||||
pipe = DiffusionPipeline.from_pretrained(
|
||||
"CompVis/stable-diffusion-v1-4",
|
||||
safety_checker=None,
|
||||
use_auth_token=True,
|
||||
custom_pipeline="imagic_stable_diffusion",
|
||||
scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False)
|
||||
).to(device)
|
||||
@@ -552,7 +551,6 @@ device = th.device('cpu' if not has_cuda else 'cuda')
|
||||
|
||||
pipe = DiffusionPipeline.from_pretrained(
|
||||
"CompVis/stable-diffusion-v1-4",
|
||||
use_auth_token=True,
|
||||
custom_pipeline="seed_resize_stable_diffusion"
|
||||
).to(device)
|
||||
|
||||
@@ -588,7 +586,6 @@ generator = th.Generator("cuda").manual_seed(0)
|
||||
|
||||
pipe = DiffusionPipeline.from_pretrained(
|
||||
"CompVis/stable-diffusion-v1-4",
|
||||
use_auth_token=True,
|
||||
custom_pipeline="/home/mark/open_source/diffusers/examples/community/"
|
||||
).to(device)
|
||||
|
||||
@@ -607,7 +604,6 @@ image.save('./seed_resize/seed_resize_{w}_{h}_image.png'.format(w=width, h=heigh
|
||||
|
||||
pipe_compare = DiffusionPipeline.from_pretrained(
|
||||
"CompVis/stable-diffusion-v1-4",
|
||||
use_auth_token=True,
|
||||
custom_pipeline="/home/mark/open_source/diffusers/examples/community/"
|
||||
).to(device)
|
||||
|
||||
|
||||
@@ -5,10 +5,11 @@ from typing import Dict, List, Union
|
||||
import safetensors.torch
|
||||
import torch
|
||||
from huggingface_hub import snapshot_download
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
|
||||
from diffusers import DiffusionPipeline, __version__
|
||||
from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
|
||||
from diffusers.utils import CONFIG_NAME, DIFFUSERS_CACHE, ONNX_WEIGHTS_NAME, WEIGHTS_NAME
|
||||
from diffusers.utils import CONFIG_NAME, ONNX_WEIGHTS_NAME, WEIGHTS_NAME
|
||||
|
||||
|
||||
class CheckpointMergerPipeline(DiffusionPipeline):
|
||||
@@ -57,6 +58,7 @@ class CheckpointMergerPipeline(DiffusionPipeline):
|
||||
return (temp_dict, meta_keys)
|
||||
|
||||
@torch.no_grad()
|
||||
@validate_hf_hub_args
|
||||
def merge(self, pretrained_model_name_or_path_list: List[Union[str, os.PathLike]], **kwargs):
|
||||
"""
|
||||
Returns a new pipeline object of the class 'DiffusionPipeline' with the merged checkpoints(weights) of the models passed
|
||||
@@ -69,7 +71,7 @@ class CheckpointMergerPipeline(DiffusionPipeline):
|
||||
**kwargs:
|
||||
Supports all the default DiffusionPipeline.get_config_dict kwargs viz..
|
||||
|
||||
cache_dir, resume_download, force_download, proxies, local_files_only, use_auth_token, revision, torch_dtype, device_map.
|
||||
cache_dir, resume_download, force_download, proxies, local_files_only, token, revision, torch_dtype, device_map.
|
||||
|
||||
alpha - The interpolation parameter. Ranges from 0 to 1. It affects the ratio in which the checkpoints are merged. A 0.8 alpha
|
||||
would mean that the first model checkpoints would affect the final result far less than an alpha of 0.2
|
||||
@@ -81,12 +83,12 @@ class CheckpointMergerPipeline(DiffusionPipeline):
|
||||
|
||||
"""
|
||||
# Default kwargs from DiffusionPipeline
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
local_files_only = kwargs.pop("local_files_only", False)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
token = kwargs.pop("token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
torch_dtype = kwargs.pop("torch_dtype", None)
|
||||
device_map = kwargs.pop("device_map", None)
|
||||
@@ -123,7 +125,7 @@ class CheckpointMergerPipeline(DiffusionPipeline):
|
||||
force_download=force_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
)
|
||||
config_dicts.append(config_dict)
|
||||
@@ -159,7 +161,7 @@ class CheckpointMergerPipeline(DiffusionPipeline):
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
allow_patterns=allow_patterns,
|
||||
user_agent=user_agent,
|
||||
|
||||
@@ -28,6 +28,7 @@ import PIL.Image
|
||||
import tensorrt as trt
|
||||
import torch
|
||||
from huggingface_hub import snapshot_download
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
from onnx import shape_inference
|
||||
from polygraphy import cuda
|
||||
from polygraphy.backend.common import bytes_from_path
|
||||
@@ -50,7 +51,7 @@ from diffusers.pipelines.stable_diffusion import (
|
||||
StableDiffusionSafetyChecker,
|
||||
)
|
||||
from diffusers.schedulers import DDIMScheduler
|
||||
from diffusers.utils import DIFFUSERS_CACHE, logging
|
||||
from diffusers.utils import logging
|
||||
|
||||
|
||||
"""
|
||||
@@ -778,12 +779,13 @@ class TensorRTStableDiffusionImg2ImgPipeline(StableDiffusionImg2ImgPipeline):
|
||||
self.models["vae_encoder"] = make_VAEEncoder(self.vae, **models_args)
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def set_cached_folder(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs):
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
local_files_only = kwargs.pop("local_files_only", False)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
token = kwargs.pop("token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
|
||||
cls.cached_folder = (
|
||||
@@ -795,7 +797,7 @@ class TensorRTStableDiffusionImg2ImgPipeline(StableDiffusionImg2ImgPipeline):
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -28,6 +28,7 @@ import PIL.Image
|
||||
import tensorrt as trt
|
||||
import torch
|
||||
from huggingface_hub import snapshot_download
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
from onnx import shape_inference
|
||||
from polygraphy import cuda
|
||||
from polygraphy.backend.common import bytes_from_path
|
||||
@@ -51,7 +52,7 @@ from diffusers.pipelines.stable_diffusion import (
|
||||
)
|
||||
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint import prepare_mask_and_masked_image
|
||||
from diffusers.schedulers import DDIMScheduler
|
||||
from diffusers.utils import DIFFUSERS_CACHE, logging
|
||||
from diffusers.utils import logging
|
||||
|
||||
|
||||
"""
|
||||
@@ -779,12 +780,13 @@ class TensorRTStableDiffusionInpaintPipeline(StableDiffusionInpaintPipeline):
|
||||
self.models["vae_encoder"] = make_VAEEncoder(self.vae, **models_args)
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def set_cached_folder(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs):
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
local_files_only = kwargs.pop("local_files_only", False)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
token = kwargs.pop("token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
|
||||
cls.cached_folder = (
|
||||
@@ -796,7 +798,7 @@ class TensorRTStableDiffusionInpaintPipeline(StableDiffusionInpaintPipeline):
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -27,6 +27,7 @@ import onnx_graphsurgeon as gs
|
||||
import tensorrt as trt
|
||||
import torch
|
||||
from huggingface_hub import snapshot_download
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
from onnx import shape_inference
|
||||
from polygraphy import cuda
|
||||
from polygraphy.backend.common import bytes_from_path
|
||||
@@ -49,7 +50,7 @@ from diffusers.pipelines.stable_diffusion import (
|
||||
StableDiffusionSafetyChecker,
|
||||
)
|
||||
from diffusers.schedulers import DDIMScheduler
|
||||
from diffusers.utils import DIFFUSERS_CACHE, logging
|
||||
from diffusers.utils import logging
|
||||
|
||||
|
||||
"""
|
||||
@@ -691,12 +692,13 @@ class TensorRTStableDiffusionPipeline(StableDiffusionPipeline):
|
||||
self.models["vae"] = make_VAE(self.vae, **models_args)
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def set_cached_folder(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs):
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
local_files_only = kwargs.pop("local_files_only", False)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
token = kwargs.pop("token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
|
||||
cls.cached_folder = (
|
||||
@@ -708,7 +710,7 @@ class TensorRTStableDiffusionPipeline(StableDiffusionPipeline):
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Latent Consistency Distillation Example:
|
||||
|
||||
[Latent Consistency Models (LCMs)](https://arxiv.org/abs/2310.04378) is a method to distill a latent diffusion model to enable swift inference with minimal steps. This example demonstrates how to use latent consistency distillation to distill stable-diffusion-v1.5 for inference with few timesteps.
|
||||
[Latent Consistency Models (LCMs)](https://arxiv.org/abs/2310.04378) is method to distill latent diffusion model to enable swift inference with minimal steps. This example demonstrates how to use the latent consistency distillation to distill stable-diffusion-v1.5 for less timestep inference.
|
||||
|
||||
## Full model distillation
|
||||
|
||||
@@ -24,7 +24,7 @@ Then cd in the example folder and run
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
And initialize an [🤗 Accelerate](https://github.com/huggingface/accelerate/) environment with:
|
||||
And initialize an [🤗Accelerate](https://github.com/huggingface/accelerate/) environment with:
|
||||
|
||||
```bash
|
||||
accelerate config
|
||||
@@ -46,16 +46,12 @@ write_basic_config()
|
||||
When running `accelerate config`, if we specify torch compile mode to True there can be dramatic speedups.
|
||||
|
||||
|
||||
#### Example
|
||||
|
||||
The following uses the [Conceptual Captions 12M (CC12M) dataset](https://github.com/google-research-datasets/conceptual-12m) as an example, and for illustrative purposes only. For best results you may consider large and high-quality text-image datasets such as [LAION](https://laion.ai/blog/laion-400-open-dataset/). You may also need to search the hyperparameter space according to the dataset you use.
|
||||
#### Example with LAION-A6+ dataset
|
||||
|
||||
```bash
|
||||
export MODEL_NAME="runwayml/stable-diffusion-v1-5"
|
||||
export OUTPUT_DIR="path/to/saved/model"
|
||||
|
||||
accelerate launch train_lcm_distill_sd_wds.py \
|
||||
--pretrained_teacher_model=$MODEL_NAME \
|
||||
runwayml/stable-diffusion-v1-5
|
||||
PROGRAM="train_lcm_distill_sd_wds.py \
|
||||
--pretrained_teacher_model=$MODEL_DIR \
|
||||
--output_dir=$OUTPUT_DIR \
|
||||
--mixed_precision=fp16 \
|
||||
--resolution=512 \
|
||||
@@ -63,7 +59,7 @@ accelerate launch train_lcm_distill_sd_wds.py \
|
||||
--max_train_steps=1000 \
|
||||
--max_train_samples=4000000 \
|
||||
--dataloader_num_workers=8 \
|
||||
--train_shards_path_or_url="pipe:curl -L -s https://huggingface.co/datasets/laion/conceptual-captions-12m-webdataset/resolve/main/data/{00000..01099}.tar?download=true" \
|
||||
--train_shards_path_or_url='pipe:aws s3 cp s3://muse-datasets/laion-aesthetic6plus-min512-data/{00000..01210}.tar -' \
|
||||
--validation_steps=200 \
|
||||
--checkpointing_steps=200 --checkpoints_total_limit=10 \
|
||||
--train_batch_size=12 \
|
||||
@@ -73,23 +69,19 @@ accelerate launch train_lcm_distill_sd_wds.py \
|
||||
--resume_from_checkpoint=latest \
|
||||
--report_to=wandb \
|
||||
--seed=453645634 \
|
||||
--push_to_hub
|
||||
--push_to_hub \
|
||||
```
|
||||
|
||||
## LCM-LoRA
|
||||
|
||||
Instead of fine-tuning the full model, we can also just train a LoRA that can be injected into any SDXL model.
|
||||
|
||||
### Example
|
||||
|
||||
The following uses the [Conceptual Captions 12M (CC12M) dataset](https://github.com/google-research-datasets/conceptual-12m) as an example. For best results you may consider large and high-quality text-image datasets such as [LAION](https://laion.ai/blog/laion-400-open-dataset/).
|
||||
|
||||
### Example with LAION-A6+ dataset
|
||||
|
||||
```bash
|
||||
export MODEL_NAME="runwayml/stable-diffusion-v1-5"
|
||||
export OUTPUT_DIR="path/to/saved/model"
|
||||
|
||||
accelerate launch train_lcm_distill_lora_sd_wds.py \
|
||||
--pretrained_teacher_model=$MODEL_NAME \
|
||||
runwayml/stable-diffusion-v1-5
|
||||
PROGRAM="train_lcm_distill_lora_sd_wds.py \
|
||||
--pretrained_teacher_model=$MODEL_DIR \
|
||||
--output_dir=$OUTPUT_DIR \
|
||||
--mixed_precision=fp16 \
|
||||
--resolution=512 \
|
||||
@@ -98,7 +90,7 @@ accelerate launch train_lcm_distill_lora_sd_wds.py \
|
||||
--max_train_steps=1000 \
|
||||
--max_train_samples=4000000 \
|
||||
--dataloader_num_workers=8 \
|
||||
--train_shards_path_or_url="pipe:curl -L -s https://huggingface.co/datasets/laion/conceptual-captions-12m-webdataset/resolve/main/data/{00000..01099}.tar?download=true" \
|
||||
--train_shards_path_or_url='pipe:aws s3 cp s3://muse-datasets/laion-aesthetic6plus-min512-data/{00000..01210}.tar -' \
|
||||
--validation_steps=200 \
|
||||
--checkpointing_steps=200 --checkpoints_total_limit=10 \
|
||||
--train_batch_size=12 \
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Latent Consistency Distillation Example:
|
||||
|
||||
[Latent Consistency Models (LCMs)](https://arxiv.org/abs/2310.04378) is a method to distill a latent diffusion model to enable swift inference with minimal steps. This example demonstrates how to use latent consistency distillation to distill SDXL for inference with few timesteps.
|
||||
[Latent Consistency Models (LCMs)](https://arxiv.org/abs/2310.04378) is method to distill latent diffusion model to enable swift inference with minimal steps. This example demonstrates how to use the latent consistency distillation to distill SDXL for less timestep inference.
|
||||
|
||||
## Full model distillation
|
||||
|
||||
@@ -24,7 +24,7 @@ Then cd in the example folder and run
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
And initialize an [🤗 Accelerate](https://github.com/huggingface/accelerate/) environment with:
|
||||
And initialize an [🤗Accelerate](https://github.com/huggingface/accelerate/) environment with:
|
||||
|
||||
```bash
|
||||
accelerate config
|
||||
@@ -46,16 +46,12 @@ write_basic_config()
|
||||
When running `accelerate config`, if we specify torch compile mode to True there can be dramatic speedups.
|
||||
|
||||
|
||||
#### Example
|
||||
|
||||
The following uses the [Conceptual Captions 12M (CC12M) dataset](https://github.com/google-research-datasets/conceptual-12m) as an example, and for illustrative purposes only. For best results you may consider large and high-quality text-image datasets such as [LAION](https://laion.ai/blog/laion-400-open-dataset/). You may also need to search the hyperparameter space according to the dataset you use.
|
||||
#### Example with LAION-A6+ dataset
|
||||
|
||||
```bash
|
||||
export MODEL_NAME="stabilityai/stable-diffusion-xl-base-1.0"
|
||||
export OUTPUT_DIR="path/to/saved/model"
|
||||
|
||||
accelerate launch train_lcm_distill_sdxl_wds.py \
|
||||
--pretrained_teacher_model=$MODEL_NAME \
|
||||
export MODEL_DIR="stabilityai/stable-diffusion-xl-base-1.0"
|
||||
PROGRAM="train_lcm_distill_sdxl_wds.py \
|
||||
--pretrained_teacher_model=$MODEL_DIR \
|
||||
--pretrained_vae_model_name_or_path=madebyollin/sdxl-vae-fp16-fix \
|
||||
--output_dir=$OUTPUT_DIR \
|
||||
--mixed_precision=fp16 \
|
||||
@@ -64,7 +60,7 @@ accelerate launch train_lcm_distill_sdxl_wds.py \
|
||||
--max_train_steps=1000 \
|
||||
--max_train_samples=4000000 \
|
||||
--dataloader_num_workers=8 \
|
||||
--train_shards_path_or_url="pipe:curl -L -s https://huggingface.co/datasets/laion/conceptual-captions-12m-webdataset/resolve/main/data/{00000..01099}.tar?download=true" \
|
||||
--train_shards_path_or_url='pipe:aws s3 cp s3://muse-datasets/laion-aesthetic6plus-min512-data/{00000..01210}.tar -' \
|
||||
--validation_steps=200 \
|
||||
--checkpointing_steps=200 --checkpoints_total_limit=10 \
|
||||
--train_batch_size=12 \
|
||||
@@ -81,15 +77,11 @@ accelerate launch train_lcm_distill_sdxl_wds.py \
|
||||
|
||||
Instead of fine-tuning the full model, we can also just train a LoRA that can be injected into any SDXL model.
|
||||
|
||||
### Example
|
||||
|
||||
The following uses the [Conceptual Captions 12M (CC12M) dataset](https://github.com/google-research-datasets/conceptual-12m) as an example. For best results you may consider large and high-quality text-image datasets such as [LAION](https://laion.ai/blog/laion-400-open-dataset/).
|
||||
|
||||
### Example with LAION-A6+ dataset
|
||||
|
||||
```bash
|
||||
export MODEL_NAME="stabilityai/stable-diffusion-xl-base-1.0"
|
||||
export OUTPUT_DIR="path/to/saved/model"
|
||||
|
||||
accelerate launch train_lcm_distill_lora_sdxl_wds.py \
|
||||
export MODEL_DIR="stabilityai/stable-diffusion-xl-base-1.0"
|
||||
PROGRAM="train_lcm_distill_lora_sdxl_wds.py \
|
||||
--pretrained_teacher_model=$MODEL_DIR \
|
||||
--pretrained_vae_model_name_or_path=madebyollin/sdxl-vae-fp16-fix \
|
||||
--output_dir=$OUTPUT_DIR \
|
||||
@@ -100,7 +92,7 @@ accelerate launch train_lcm_distill_lora_sdxl_wds.py \
|
||||
--max_train_steps=1000 \
|
||||
--max_train_samples=4000000 \
|
||||
--dataloader_num_workers=8 \
|
||||
--train_shards_path_or_url="pipe:curl -L -s https://huggingface.co/datasets/laion/conceptual-captions-12m-webdataset/resolve/main/data/{00000..01099}.tar?download=true" \
|
||||
--train_shards_path_or_url='pipe:aws s3 cp s3://muse-datasets/laion-aesthetic6plus-min512-data/{00000..01210}.tar -' \
|
||||
--validation_steps=200 \
|
||||
--checkpointing_steps=200 --checkpoints_total_limit=10 \
|
||||
--train_batch_size=12 \
|
||||
|
||||
@@ -423,7 +423,7 @@ def import_model_class_from_model_name_or_path(
|
||||
pretrained_model_name_or_path: str, revision: str, subfolder: str = "text_encoder"
|
||||
):
|
||||
text_encoder_config = PretrainedConfig.from_pretrained(
|
||||
pretrained_model_name_or_path, subfolder=subfolder, revision=revision, use_auth_token=True
|
||||
pretrained_model_name_or_path, subfolder=subfolder, revision=revision
|
||||
)
|
||||
model_class = text_encoder_config.architectures[0]
|
||||
|
||||
@@ -1123,7 +1123,7 @@ def main(args):
|
||||
for epoch in range(first_epoch, args.num_train_epochs):
|
||||
for step, batch in enumerate(train_dataloader):
|
||||
with accelerator.accumulate(unet):
|
||||
image, text = batch
|
||||
image, text, _, _ = batch
|
||||
|
||||
image = image.to(accelerator.device, non_blocking=True)
|
||||
encoded_text = compute_embeddings_fn(text)
|
||||
|
||||
@@ -68,11 +68,6 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
MAX_SEQ_LENGTH = 77
|
||||
|
||||
# Adjust for your dataset
|
||||
WDS_JSON_WIDTH = "width" # original_width for LAION
|
||||
WDS_JSON_HEIGHT = "height" # original_height for LAION
|
||||
MIN_SIZE = 700 # ~960 for LAION, ideal: 1024 if the dataset contains large images
|
||||
|
||||
if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
@@ -151,10 +146,10 @@ class WebdatasetFilter:
|
||||
try:
|
||||
if "json" in x:
|
||||
x_json = json.loads(x["json"])
|
||||
filter_size = (x_json.get(WDS_JSON_WIDTH, 0.0) or 0.0) >= self.min_size and x_json.get(
|
||||
WDS_JSON_HEIGHT, 0
|
||||
filter_size = (x_json.get("original_width", 0.0) or 0.0) >= self.min_size and x_json.get(
|
||||
"original_height", 0
|
||||
) >= self.min_size
|
||||
filter_watermark = (x_json.get("pwatermark", 0.0) or 0.0) <= self.max_pwatermark
|
||||
filter_watermark = (x_json.get("pwatermark", 1.0) or 1.0) <= self.max_pwatermark
|
||||
return filter_size and filter_watermark
|
||||
else:
|
||||
return False
|
||||
@@ -185,7 +180,7 @@ class Text2ImageDataset:
|
||||
if use_fix_crop_and_size:
|
||||
return (resolution, resolution)
|
||||
else:
|
||||
return (int(json.get(WDS_JSON_WIDTH, 0.0)), int(json.get(WDS_JSON_HEIGHT, 0.0)))
|
||||
return (int(json.get("original_width", 0.0)), int(json.get("original_height", 0.0)))
|
||||
|
||||
def transform(example):
|
||||
# resize image
|
||||
@@ -217,7 +212,7 @@ class Text2ImageDataset:
|
||||
pipeline = [
|
||||
wds.ResampledShards(train_shards_path_or_url),
|
||||
tarfile_to_samples_nothrow,
|
||||
wds.select(WebdatasetFilter(min_size=MIN_SIZE)),
|
||||
wds.select(WebdatasetFilter(min_size=960)),
|
||||
wds.shuffle(shuffle_buffer_size),
|
||||
*processing_pipeline,
|
||||
wds.batched(per_gpu_batch_size, partial=False, collation_fn=default_collate),
|
||||
@@ -397,7 +392,7 @@ def import_model_class_from_model_name_or_path(
|
||||
pretrained_model_name_or_path: str, revision: str, subfolder: str = "text_encoder"
|
||||
):
|
||||
text_encoder_config = PretrainedConfig.from_pretrained(
|
||||
pretrained_model_name_or_path, subfolder=subfolder, revision=revision, use_auth_token=True
|
||||
pretrained_model_name_or_path, subfolder=subfolder, revision=revision
|
||||
)
|
||||
model_class = text_encoder_config.architectures[0]
|
||||
|
||||
|
||||
@@ -400,7 +400,7 @@ def import_model_class_from_model_name_or_path(
|
||||
pretrained_model_name_or_path: str, revision: str, subfolder: str = "text_encoder"
|
||||
):
|
||||
text_encoder_config = PretrainedConfig.from_pretrained(
|
||||
pretrained_model_name_or_path, subfolder=subfolder, revision=revision, use_auth_token=True
|
||||
pretrained_model_name_or_path, subfolder=subfolder, revision=revision
|
||||
)
|
||||
model_class = text_encoder_config.architectures[0]
|
||||
|
||||
@@ -1106,7 +1106,7 @@ def main(args):
|
||||
for epoch in range(first_epoch, args.num_train_epochs):
|
||||
for step, batch in enumerate(train_dataloader):
|
||||
with accelerator.accumulate(unet):
|
||||
image, text = batch
|
||||
image, text, _, _ = batch
|
||||
|
||||
image = image.to(accelerator.device, non_blocking=True)
|
||||
encoded_text = compute_embeddings_fn(text)
|
||||
|
||||
@@ -67,11 +67,6 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
MAX_SEQ_LENGTH = 77
|
||||
|
||||
# Adjust for your dataset
|
||||
WDS_JSON_WIDTH = "width" # original_width for LAION
|
||||
WDS_JSON_HEIGHT = "height" # original_height for LAION
|
||||
MIN_SIZE = 700 # ~960 for LAION, ideal: 1024 if the dataset contains large images
|
||||
|
||||
if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
@@ -133,10 +128,10 @@ class WebdatasetFilter:
|
||||
try:
|
||||
if "json" in x:
|
||||
x_json = json.loads(x["json"])
|
||||
filter_size = (x_json.get(WDS_JSON_WIDTH, 0.0) or 0.0) >= self.min_size and x_json.get(
|
||||
WDS_JSON_HEIGHT, 0
|
||||
filter_size = (x_json.get("original_width", 0.0) or 0.0) >= self.min_size and x_json.get(
|
||||
"original_height", 0
|
||||
) >= self.min_size
|
||||
filter_watermark = (x_json.get("pwatermark", 0.0) or 0.0) <= self.max_pwatermark
|
||||
filter_watermark = (x_json.get("pwatermark", 1.0) or 1.0) <= self.max_pwatermark
|
||||
return filter_size and filter_watermark
|
||||
else:
|
||||
return False
|
||||
@@ -167,7 +162,7 @@ class Text2ImageDataset:
|
||||
if use_fix_crop_and_size:
|
||||
return (resolution, resolution)
|
||||
else:
|
||||
return (int(json.get(WDS_JSON_WIDTH, 0.0)), int(json.get(WDS_JSON_HEIGHT, 0.0)))
|
||||
return (int(json.get("original_width", 0.0)), int(json.get("original_height", 0.0)))
|
||||
|
||||
def transform(example):
|
||||
# resize image
|
||||
@@ -199,7 +194,7 @@ class Text2ImageDataset:
|
||||
pipeline = [
|
||||
wds.ResampledShards(train_shards_path_or_url),
|
||||
tarfile_to_samples_nothrow,
|
||||
wds.select(WebdatasetFilter(min_size=MIN_SIZE)),
|
||||
wds.select(WebdatasetFilter(min_size=960)),
|
||||
wds.shuffle(shuffle_buffer_size),
|
||||
*processing_pipeline,
|
||||
wds.batched(per_gpu_batch_size, partial=False, collation_fn=default_collate),
|
||||
@@ -419,7 +414,7 @@ def import_model_class_from_model_name_or_path(
|
||||
pretrained_model_name_or_path: str, revision: str, subfolder: str = "text_encoder"
|
||||
):
|
||||
text_encoder_config = PretrainedConfig.from_pretrained(
|
||||
pretrained_model_name_or_path, subfolder=subfolder, revision=revision, use_auth_token=True
|
||||
pretrained_model_name_or_path, subfolder=subfolder, revision=revision
|
||||
)
|
||||
model_class = text_encoder_config.architectures[0]
|
||||
|
||||
|
||||
@@ -420,7 +420,7 @@ def import_model_class_from_model_name_or_path(
|
||||
pretrained_model_name_or_path: str, revision: str, subfolder: str = "text_encoder"
|
||||
):
|
||||
text_encoder_config = PretrainedConfig.from_pretrained(
|
||||
pretrained_model_name_or_path, subfolder=subfolder, revision=revision, use_auth_token=True
|
||||
pretrained_model_name_or_path, subfolder=subfolder, revision=revision
|
||||
)
|
||||
model_class = text_encoder_config.architectures[0]
|
||||
|
||||
@@ -975,7 +975,7 @@ def main(args):
|
||||
revision=args.revision,
|
||||
)
|
||||
unet = UNet2DConditionModel.from_pretrained(
|
||||
args.pretrained_model_name_or_path, subfolder="unet", revision=args.revision, use_auth_token=True
|
||||
args.pretrained_model_name_or_path, subfolder="unet", revision=args.revision
|
||||
)
|
||||
|
||||
if args.controlnet_model_name_or_path:
|
||||
|
||||
@@ -19,6 +19,7 @@ Usage example:
|
||||
|
||||
import glob
|
||||
import json
|
||||
import warnings
|
||||
from argparse import ArgumentParser, Namespace
|
||||
from importlib import import_module
|
||||
|
||||
@@ -32,12 +33,12 @@ from . import BaseDiffusersCLICommand
|
||||
|
||||
|
||||
def conversion_command_factory(args: Namespace):
|
||||
return FP16SafetensorsCommand(
|
||||
args.ckpt_id,
|
||||
args.fp16,
|
||||
args.use_safetensors,
|
||||
args.use_auth_token,
|
||||
)
|
||||
if args.use_auth_token:
|
||||
warnings.warn(
|
||||
"The `--use_auth_token` flag is deprecated and will be removed in a future version. Authentication is now"
|
||||
" handled automatically if user is logged in."
|
||||
)
|
||||
return FP16SafetensorsCommand(args.ckpt_id, args.fp16, args.use_safetensors)
|
||||
|
||||
|
||||
class FP16SafetensorsCommand(BaseDiffusersCLICommand):
|
||||
@@ -62,7 +63,7 @@ class FP16SafetensorsCommand(BaseDiffusersCLICommand):
|
||||
)
|
||||
conversion_parser.set_defaults(func=conversion_command_factory)
|
||||
|
||||
def __init__(self, ckpt_id: str, fp16: bool, use_safetensors: bool, use_auth_token: bool):
|
||||
def __init__(self, ckpt_id: str, fp16: bool, use_safetensors: bool):
|
||||
self.logger = logging.get_logger("diffusers-cli/fp16_safetensors")
|
||||
self.ckpt_id = ckpt_id
|
||||
self.local_ckpt_dir = f"/tmp/{ckpt_id}"
|
||||
@@ -75,8 +76,6 @@ class FP16SafetensorsCommand(BaseDiffusersCLICommand):
|
||||
"When `use_safetensors` and `fp16` both are False, then this command is of no use."
|
||||
)
|
||||
|
||||
self.use_auth_token = use_auth_token
|
||||
|
||||
def run(self):
|
||||
if version.parse(huggingface_hub.__version__) < version.parse("0.9.0"):
|
||||
raise ImportError(
|
||||
@@ -87,7 +86,7 @@ class FP16SafetensorsCommand(BaseDiffusersCLICommand):
|
||||
from huggingface_hub import create_commit
|
||||
from huggingface_hub._commit_api import CommitOperationAdd
|
||||
|
||||
model_index = hf_hub_download(repo_id=self.ckpt_id, filename="model_index.json", token=self.use_auth_token)
|
||||
model_index = hf_hub_download(repo_id=self.ckpt_id, filename="model_index.json")
|
||||
with open(model_index, "r") as f:
|
||||
pipeline_class_name = json.load(f)["_class_name"]
|
||||
pipeline_class = getattr(import_module("diffusers"), pipeline_class_name)
|
||||
@@ -96,7 +95,7 @@ class FP16SafetensorsCommand(BaseDiffusersCLICommand):
|
||||
# Load the appropriate pipeline. We could have use `DiffusionPipeline`
|
||||
# here, but just to avoid any rough edge cases.
|
||||
pipeline = pipeline_class.from_pretrained(
|
||||
self.ckpt_id, torch_dtype=torch.float16 if self.fp16 else torch.float32, use_auth_token=self.use_auth_token
|
||||
self.ckpt_id, torch_dtype=torch.float16 if self.fp16 else torch.float32
|
||||
)
|
||||
pipeline.save_pretrained(
|
||||
self.local_ckpt_dir,
|
||||
|
||||
@@ -27,12 +27,16 @@ from typing import Any, Dict, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
from huggingface_hub import create_repo, hf_hub_download
|
||||
from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError
|
||||
from huggingface_hub.utils import (
|
||||
EntryNotFoundError,
|
||||
RepositoryNotFoundError,
|
||||
RevisionNotFoundError,
|
||||
validate_hf_hub_args,
|
||||
)
|
||||
from requests import HTTPError
|
||||
|
||||
from . import __version__
|
||||
from .utils import (
|
||||
DIFFUSERS_CACHE,
|
||||
HUGGINGFACE_CO_RESOLVE_ENDPOINT,
|
||||
DummyObject,
|
||||
deprecate,
|
||||
@@ -275,6 +279,7 @@ class ConfigMixin:
|
||||
return cls.load_config(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def load_config(
|
||||
cls,
|
||||
pretrained_model_name_or_path: Union[str, os.PathLike],
|
||||
@@ -311,7 +316,7 @@ class ConfigMixin:
|
||||
local_files_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
||||
won't be downloaded from the Hub.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
@@ -329,11 +334,11 @@ class ConfigMixin:
|
||||
A dictionary of all the parameters stored in a JSON configuration file.
|
||||
|
||||
"""
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
token = kwargs.pop("token", None)
|
||||
local_files_only = kwargs.pop("local_files_only", False)
|
||||
revision = kwargs.pop("revision", None)
|
||||
_ = kwargs.pop("mirror", None)
|
||||
@@ -376,7 +381,7 @@ class ConfigMixin:
|
||||
proxies=proxies,
|
||||
resume_download=resume_download,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
user_agent=user_agent,
|
||||
subfolder=subfolder,
|
||||
revision=revision,
|
||||
@@ -385,8 +390,7 @@ class ConfigMixin:
|
||||
raise EnvironmentError(
|
||||
f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier"
|
||||
" listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a"
|
||||
" token having permission to this repo with `use_auth_token` or log in with `huggingface-cli"
|
||||
" login`."
|
||||
" token having permission to this repo with `token` or log in with `huggingface-cli login`."
|
||||
)
|
||||
except RevisionNotFoundError:
|
||||
raise EnvironmentError(
|
||||
|
||||
@@ -15,11 +15,10 @@ import os
|
||||
from typing import Dict, Union
|
||||
|
||||
import torch
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
from safetensors import safe_open
|
||||
|
||||
from ..utils import (
|
||||
DIFFUSERS_CACHE,
|
||||
HF_HUB_OFFLINE,
|
||||
_get_model_file,
|
||||
is_transformers_available,
|
||||
logging,
|
||||
@@ -43,6 +42,7 @@ logger = logging.get_logger(__name__)
|
||||
class IPAdapterMixin:
|
||||
"""Mixin for handling IP Adapters."""
|
||||
|
||||
@validate_hf_hub_args
|
||||
def load_ip_adapter(
|
||||
self,
|
||||
pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
|
||||
@@ -77,7 +77,7 @@ class IPAdapterMixin:
|
||||
local_files_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
||||
won't be downloaded from the Hub.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
@@ -88,12 +88,12 @@ class IPAdapterMixin:
|
||||
"""
|
||||
|
||||
# Load the main state dict first.
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
local_files_only = kwargs.pop("local_files_only", None)
|
||||
token = kwargs.pop("token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
|
||||
user_agent = {
|
||||
@@ -110,7 +110,7 @@ class IPAdapterMixin:
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
user_agent=user_agent,
|
||||
|
||||
@@ -18,14 +18,13 @@ from typing import Callable, Dict, List, Optional, Union
|
||||
import safetensors
|
||||
import torch
|
||||
from huggingface_hub import model_info
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
from packaging import version
|
||||
from torch import nn
|
||||
|
||||
from .. import __version__
|
||||
from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta
|
||||
from ..utils import (
|
||||
DIFFUSERS_CACHE,
|
||||
HF_HUB_OFFLINE,
|
||||
USE_PEFT_BACKEND,
|
||||
_get_model_file,
|
||||
convert_state_dict_to_diffusers,
|
||||
@@ -132,6 +131,7 @@ class LoraLoaderMixin:
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def lora_state_dict(
|
||||
cls,
|
||||
pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
|
||||
@@ -174,7 +174,7 @@ class LoraLoaderMixin:
|
||||
local_files_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
||||
won't be downloaded from the Hub.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
@@ -195,12 +195,12 @@ class LoraLoaderMixin:
|
||||
"""
|
||||
# Load the main state dict first which has the LoRA layers for either of
|
||||
# UNet and text encoder or both.
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
local_files_only = kwargs.pop("local_files_only", None)
|
||||
token = kwargs.pop("token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
subfolder = kwargs.pop("subfolder", None)
|
||||
weight_name = kwargs.pop("weight_name", None)
|
||||
@@ -239,7 +239,7 @@ class LoraLoaderMixin:
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
user_agent=user_agent,
|
||||
@@ -265,7 +265,7 @@ class LoraLoaderMixin:
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
user_agent=user_agent,
|
||||
|
||||
@@ -18,10 +18,9 @@ from pathlib import Path
|
||||
import requests
|
||||
import torch
|
||||
from huggingface_hub import hf_hub_download
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
|
||||
from ..utils import (
|
||||
DIFFUSERS_CACHE,
|
||||
HF_HUB_OFFLINE,
|
||||
deprecate,
|
||||
is_accelerate_available,
|
||||
is_omegaconf_available,
|
||||
@@ -52,6 +51,7 @@ class FromSingleFileMixin:
|
||||
return cls.from_single_file(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
|
||||
r"""
|
||||
Instantiate a [`DiffusionPipeline`] from pretrained pipeline weights saved in the `.ckpt` or `.safetensors`
|
||||
@@ -81,7 +81,7 @@ class FromSingleFileMixin:
|
||||
local_files_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
||||
won't be downloaded from the Hub.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
@@ -154,12 +154,12 @@ class FromSingleFileMixin:
|
||||
|
||||
original_config_file = kwargs.pop("original_config_file", None)
|
||||
config_files = kwargs.pop("config_files", None)
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
local_files_only = kwargs.pop("local_files_only", None)
|
||||
token = kwargs.pop("token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
extract_ema = kwargs.pop("extract_ema", False)
|
||||
image_size = kwargs.pop("image_size", None)
|
||||
@@ -253,7 +253,7 @@ class FromSingleFileMixin:
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
force_download=force_download,
|
||||
)
|
||||
@@ -293,6 +293,7 @@ class FromOriginalVAEMixin:
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
|
||||
r"""
|
||||
Instantiate a [`AutoencoderKL`] from pretrained ControlNet weights saved in the original `.ckpt` or
|
||||
@@ -322,7 +323,7 @@ class FromOriginalVAEMixin:
|
||||
local_files_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to True, the model
|
||||
won't be downloaded from the Hub.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
@@ -379,12 +380,12 @@ class FromOriginalVAEMixin:
|
||||
)
|
||||
|
||||
config_file = kwargs.pop("config_file", None)
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
local_files_only = kwargs.pop("local_files_only", None)
|
||||
token = kwargs.pop("token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
image_size = kwargs.pop("image_size", None)
|
||||
scaling_factor = kwargs.pop("scaling_factor", None)
|
||||
@@ -425,7 +426,7 @@ class FromOriginalVAEMixin:
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
force_download=force_download,
|
||||
)
|
||||
@@ -490,6 +491,7 @@ class FromOriginalControlnetMixin:
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
|
||||
r"""
|
||||
Instantiate a [`ControlNetModel`] from pretrained ControlNet weights saved in the original `.ckpt` or
|
||||
@@ -519,7 +521,7 @@ class FromOriginalControlnetMixin:
|
||||
local_files_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to True, the model
|
||||
won't be downloaded from the Hub.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
@@ -555,12 +557,12 @@ class FromOriginalControlnetMixin:
|
||||
from ..pipelines.stable_diffusion.convert_from_ckpt import download_controlnet_from_original_ckpt
|
||||
|
||||
config_file = kwargs.pop("config_file", None)
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
local_files_only = kwargs.pop("local_files_only", None)
|
||||
token = kwargs.pop("token", None)
|
||||
num_in_channels = kwargs.pop("num_in_channels", None)
|
||||
use_linear_projection = kwargs.pop("use_linear_projection", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
@@ -603,7 +605,7 @@ class FromOriginalControlnetMixin:
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
force_download=force_download,
|
||||
)
|
||||
|
||||
@@ -15,16 +15,10 @@ from typing import Dict, List, Optional, Union
|
||||
|
||||
import safetensors
|
||||
import torch
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
from torch import nn
|
||||
|
||||
from ..utils import (
|
||||
DIFFUSERS_CACHE,
|
||||
HF_HUB_OFFLINE,
|
||||
_get_model_file,
|
||||
is_accelerate_available,
|
||||
is_transformers_available,
|
||||
logging,
|
||||
)
|
||||
from ..utils import _get_model_file, is_accelerate_available, is_transformers_available, logging
|
||||
|
||||
|
||||
if is_transformers_available():
|
||||
@@ -39,13 +33,14 @@ TEXT_INVERSION_NAME = "learned_embeds.bin"
|
||||
TEXT_INVERSION_NAME_SAFE = "learned_embeds.safetensors"
|
||||
|
||||
|
||||
@validate_hf_hub_args
|
||||
def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs):
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
local_files_only = kwargs.pop("local_files_only", None)
|
||||
token = kwargs.pop("token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
subfolder = kwargs.pop("subfolder", None)
|
||||
weight_name = kwargs.pop("weight_name", None)
|
||||
@@ -79,7 +74,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
user_agent=user_agent,
|
||||
@@ -100,7 +95,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
user_agent=user_agent,
|
||||
@@ -267,6 +262,7 @@ class TextualInversionLoaderMixin:
|
||||
|
||||
return all_tokens, all_embeddings
|
||||
|
||||
@validate_hf_hub_args
|
||||
def load_textual_inversion(
|
||||
self,
|
||||
pretrained_model_name_or_path: Union[str, List[str], Dict[str, torch.Tensor], List[Dict[str, torch.Tensor]]],
|
||||
@@ -320,7 +316,7 @@ class TextualInversionLoaderMixin:
|
||||
local_files_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
||||
won't be downloaded from the Hub.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
|
||||
@@ -19,13 +19,12 @@ from typing import Callable, Dict, List, Optional, Union
|
||||
import safetensors
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
from torch import nn
|
||||
|
||||
from ..models.embeddings import ImageProjection, Resampler
|
||||
from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta
|
||||
from ..utils import (
|
||||
DIFFUSERS_CACHE,
|
||||
HF_HUB_OFFLINE,
|
||||
USE_PEFT_BACKEND,
|
||||
_get_model_file,
|
||||
delete_adapter_layers,
|
||||
@@ -62,6 +61,7 @@ class UNet2DConditionLoadersMixin:
|
||||
text_encoder_name = TEXT_ENCODER_NAME
|
||||
unet_name = UNET_NAME
|
||||
|
||||
@validate_hf_hub_args
|
||||
def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], **kwargs):
|
||||
r"""
|
||||
Load pretrained attention processor layers into [`UNet2DConditionModel`]. Attention processor layers have to be
|
||||
@@ -95,7 +95,7 @@ class UNet2DConditionLoadersMixin:
|
||||
local_files_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
||||
won't be downloaded from the Hub.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
|
||||
@@ -130,12 +130,12 @@ class UNet2DConditionLoadersMixin:
|
||||
from ..models.attention_processor import CustomDiffusionAttnProcessor
|
||||
from ..models.lora import LoRACompatibleConv, LoRACompatibleLinear, LoRAConv2dLayer, LoRALinearLayer
|
||||
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
local_files_only = kwargs.pop("local_files_only", None)
|
||||
token = kwargs.pop("token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
subfolder = kwargs.pop("subfolder", None)
|
||||
weight_name = kwargs.pop("weight_name", None)
|
||||
@@ -184,7 +184,7 @@ class UNet2DConditionLoadersMixin:
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
user_agent=user_agent,
|
||||
@@ -204,7 +204,7 @@ class UNet2DConditionLoadersMixin:
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
user_agent=user_agent,
|
||||
|
||||
@@ -113,14 +113,12 @@ class Attention(nn.Module):
|
||||
):
|
||||
super().__init__()
|
||||
self.inner_dim = out_dim if out_dim is not None else dim_head * heads
|
||||
self.query_dim = query_dim
|
||||
self.cross_attention_dim = cross_attention_dim if cross_attention_dim is not None else query_dim
|
||||
self.upcast_attention = upcast_attention
|
||||
self.upcast_softmax = upcast_softmax
|
||||
self.rescale_output_factor = rescale_output_factor
|
||||
self.residual_connection = residual_connection
|
||||
self.dropout = dropout
|
||||
self.fused_projections = False
|
||||
self.out_dim = out_dim if out_dim is not None else query_dim
|
||||
|
||||
# we make use of this private variable to know whether this class is loaded
|
||||
@@ -182,7 +180,6 @@ class Attention(nn.Module):
|
||||
else:
|
||||
linear_cls = LoRACompatibleLinear
|
||||
|
||||
self.linear_cls = linear_cls
|
||||
self.to_q = linear_cls(query_dim, self.inner_dim, bias=bias)
|
||||
|
||||
if not self.only_cross_attention:
|
||||
@@ -695,32 +692,6 @@ class Attention(nn.Module):
|
||||
|
||||
return encoder_hidden_states
|
||||
|
||||
@torch.no_grad()
|
||||
def fuse_projections(self, fuse=True):
|
||||
is_cross_attention = self.cross_attention_dim != self.query_dim
|
||||
device = self.to_q.weight.data.device
|
||||
dtype = self.to_q.weight.data.dtype
|
||||
|
||||
if not is_cross_attention:
|
||||
# fetch weight matrices.
|
||||
concatenated_weights = torch.cat([self.to_q.weight.data, self.to_k.weight.data, self.to_v.weight.data])
|
||||
in_features = concatenated_weights.shape[1]
|
||||
out_features = concatenated_weights.shape[0]
|
||||
|
||||
# create a new single projection layer and copy over the weights.
|
||||
self.to_qkv = self.linear_cls(in_features, out_features, bias=False, device=device, dtype=dtype)
|
||||
self.to_qkv.weight.copy_(concatenated_weights)
|
||||
|
||||
else:
|
||||
concatenated_weights = torch.cat([self.to_k.weight.data, self.to_v.weight.data])
|
||||
in_features = concatenated_weights.shape[1]
|
||||
out_features = concatenated_weights.shape[0]
|
||||
|
||||
self.to_kv = self.linear_cls(in_features, out_features, bias=False, device=device, dtype=dtype)
|
||||
self.to_kv.weight.copy_(concatenated_weights)
|
||||
|
||||
self.fused_projections = fuse
|
||||
|
||||
|
||||
class AttnProcessor:
|
||||
r"""
|
||||
@@ -1213,6 +1184,9 @@ class AttnProcessor2_0:
|
||||
scale: float = 1.0,
|
||||
) -> torch.FloatTensor:
|
||||
residual = hidden_states
|
||||
|
||||
args = () if USE_PEFT_BACKEND else (scale,)
|
||||
|
||||
if attn.spatial_norm is not None:
|
||||
hidden_states = attn.spatial_norm(hidden_states, temb)
|
||||
|
||||
@@ -1279,103 +1253,6 @@ class AttnProcessor2_0:
|
||||
return hidden_states
|
||||
|
||||
|
||||
class FusedAttnProcessor2_0:
|
||||
r"""
|
||||
Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0).
|
||||
It uses fused projection layers. For self-attention modules, all projection matrices (i.e., query,
|
||||
key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This API is currently 🧪 experimental in nature and can change in future.
|
||||
|
||||
</Tip>
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
if not hasattr(F, "scaled_dot_product_attention"):
|
||||
raise ImportError(
|
||||
"FusedAttnProcessor2_0 requires at least PyTorch 2.0, to use it. Please upgrade PyTorch to > 2.0."
|
||||
)
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
attn: Attention,
|
||||
hidden_states: torch.FloatTensor,
|
||||
encoder_hidden_states: Optional[torch.FloatTensor] = None,
|
||||
attention_mask: Optional[torch.FloatTensor] = None,
|
||||
temb: Optional[torch.FloatTensor] = None,
|
||||
scale: float = 1.0,
|
||||
) -> torch.FloatTensor:
|
||||
residual = hidden_states
|
||||
if attn.spatial_norm is not None:
|
||||
hidden_states = attn.spatial_norm(hidden_states, temb)
|
||||
|
||||
input_ndim = hidden_states.ndim
|
||||
|
||||
if input_ndim == 4:
|
||||
batch_size, channel, height, width = hidden_states.shape
|
||||
hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
|
||||
|
||||
batch_size, sequence_length, _ = (
|
||||
hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
|
||||
)
|
||||
|
||||
if attention_mask is not None:
|
||||
attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
|
||||
# scaled_dot_product_attention expects attention_mask shape to be
|
||||
# (batch, heads, source_length, target_length)
|
||||
attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
|
||||
|
||||
if attn.group_norm is not None:
|
||||
hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
|
||||
|
||||
args = () if USE_PEFT_BACKEND else (scale,)
|
||||
if encoder_hidden_states is None:
|
||||
qkv = attn.to_qkv(hidden_states, *args)
|
||||
split_size = qkv.shape[-1] // 3
|
||||
query, key, value = torch.split(qkv, split_size, dim=-1)
|
||||
else:
|
||||
if attn.norm_cross:
|
||||
encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
|
||||
query = attn.to_q(hidden_states, *args)
|
||||
|
||||
kv = attn.to_kv(encoder_hidden_states, *args)
|
||||
split_size = kv.shape[-1] // 2
|
||||
key, value = torch.split(kv, split_size, dim=-1)
|
||||
|
||||
inner_dim = key.shape[-1]
|
||||
head_dim = inner_dim // attn.heads
|
||||
|
||||
query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
|
||||
key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
|
||||
value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
|
||||
|
||||
# the output of sdp = (batch, num_heads, seq_len, head_dim)
|
||||
# TODO: add support for attn.scale when we move to Torch 2.1
|
||||
hidden_states = F.scaled_dot_product_attention(
|
||||
query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
|
||||
)
|
||||
|
||||
hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
|
||||
hidden_states = hidden_states.to(query.dtype)
|
||||
|
||||
# linear proj
|
||||
hidden_states = attn.to_out[0](hidden_states, *args)
|
||||
# dropout
|
||||
hidden_states = attn.to_out[1](hidden_states)
|
||||
|
||||
if input_ndim == 4:
|
||||
hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
|
||||
|
||||
if attn.residual_connection:
|
||||
hidden_states = hidden_states + residual
|
||||
|
||||
hidden_states = hidden_states / attn.rescale_output_factor
|
||||
|
||||
return hidden_states
|
||||
|
||||
|
||||
class CustomDiffusionXFormersAttnProcessor(nn.Module):
|
||||
r"""
|
||||
Processor for implementing memory efficient attention using xFormers for the Custom Diffusion method.
|
||||
@@ -2374,7 +2251,6 @@ CROSS_ATTENTION_PROCESSORS = (
|
||||
AttentionProcessor = Union[
|
||||
AttnProcessor,
|
||||
AttnProcessor2_0,
|
||||
FusedAttnProcessor2_0,
|
||||
XFormersAttnProcessor,
|
||||
SlicedAttnProcessor,
|
||||
AttnAddedKVProcessor,
|
||||
|
||||
@@ -22,7 +22,6 @@ from ..utils.accelerate_utils import apply_forward_hook
|
||||
from .attention_processor import (
|
||||
ADDED_KV_ATTENTION_PROCESSORS,
|
||||
CROSS_ATTENTION_PROCESSORS,
|
||||
Attention,
|
||||
AttentionProcessor,
|
||||
AttnAddedKVProcessor,
|
||||
AttnProcessor,
|
||||
@@ -449,41 +448,3 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalVAEMixin):
|
||||
return (dec,)
|
||||
|
||||
return DecoderOutput(sample=dec)
|
||||
|
||||
# Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections
|
||||
def fuse_qkv_projections(self):
|
||||
"""
|
||||
Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
|
||||
key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This API is 🧪 experimental.
|
||||
|
||||
</Tip>
|
||||
"""
|
||||
self.original_attn_processors = None
|
||||
|
||||
for _, attn_processor in self.attn_processors.items():
|
||||
if "Added" in str(attn_processor.__class__.__name__):
|
||||
raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.")
|
||||
|
||||
self.original_attn_processors = self.attn_processors
|
||||
|
||||
for module in self.modules():
|
||||
if isinstance(module, Attention):
|
||||
module.fuse_projections(fuse=True)
|
||||
|
||||
# Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
|
||||
def unfuse_qkv_projections(self):
|
||||
"""Disables the fused QKV projection if enabled.
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This API is 🧪 experimental.
|
||||
|
||||
</Tip>
|
||||
|
||||
"""
|
||||
if self.original_attn_processors is not None:
|
||||
self.set_attn_processor(self.original_attn_processors)
|
||||
|
||||
@@ -24,13 +24,17 @@ from flax.core.frozen_dict import FrozenDict, unfreeze
|
||||
from flax.serialization import from_bytes, to_bytes
|
||||
from flax.traverse_util import flatten_dict, unflatten_dict
|
||||
from huggingface_hub import create_repo, hf_hub_download
|
||||
from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError
|
||||
from huggingface_hub.utils import (
|
||||
EntryNotFoundError,
|
||||
RepositoryNotFoundError,
|
||||
RevisionNotFoundError,
|
||||
validate_hf_hub_args,
|
||||
)
|
||||
from requests import HTTPError
|
||||
|
||||
from .. import __version__, is_torch_available
|
||||
from ..utils import (
|
||||
CONFIG_NAME,
|
||||
DIFFUSERS_CACHE,
|
||||
FLAX_WEIGHTS_NAME,
|
||||
HUGGINGFACE_CO_RESOLVE_ENDPOINT,
|
||||
WEIGHTS_NAME,
|
||||
@@ -197,6 +201,7 @@ class FlaxModelMixin(PushToHubMixin):
|
||||
raise NotImplementedError(f"init_weights method has to be implemented for {self}")
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def from_pretrained(
|
||||
cls,
|
||||
pretrained_model_name_or_path: Union[str, os.PathLike],
|
||||
@@ -288,13 +293,13 @@ class FlaxModelMixin(PushToHubMixin):
|
||||
```
|
||||
"""
|
||||
config = kwargs.pop("config", None)
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
from_pt = kwargs.pop("from_pt", False)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
local_files_only = kwargs.pop("local_files_only", False)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
token = kwargs.pop("token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
subfolder = kwargs.pop("subfolder", None)
|
||||
|
||||
@@ -314,7 +319,7 @@ class FlaxModelMixin(PushToHubMixin):
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
**kwargs,
|
||||
@@ -359,7 +364,7 @@ class FlaxModelMixin(PushToHubMixin):
|
||||
proxies=proxies,
|
||||
resume_download=resume_download,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
user_agent=user_agent,
|
||||
subfolder=subfolder,
|
||||
revision=revision,
|
||||
@@ -369,7 +374,7 @@ class FlaxModelMixin(PushToHubMixin):
|
||||
raise EnvironmentError(
|
||||
f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier "
|
||||
"listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a "
|
||||
"token having permission to this repo with `use_auth_token` or log in with `huggingface-cli "
|
||||
"token having permission to this repo with `token` or log in with `huggingface-cli "
|
||||
"login`."
|
||||
)
|
||||
except RevisionNotFoundError:
|
||||
|
||||
@@ -25,14 +25,13 @@ from typing import Any, Callable, List, Optional, Tuple, Union
|
||||
import safetensors
|
||||
import torch
|
||||
from huggingface_hub import create_repo
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
from torch import Tensor, nn
|
||||
|
||||
from .. import __version__
|
||||
from ..utils import (
|
||||
CONFIG_NAME,
|
||||
DIFFUSERS_CACHE,
|
||||
FLAX_WEIGHTS_NAME,
|
||||
HF_HUB_OFFLINE,
|
||||
MIN_PEFT_VERSION,
|
||||
SAFETENSORS_WEIGHTS_NAME,
|
||||
WEIGHTS_NAME,
|
||||
@@ -535,6 +534,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs):
|
||||
r"""
|
||||
Instantiate a pretrained PyTorch model from a pretrained model configuration.
|
||||
@@ -571,7 +571,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
local_files_only(`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
||||
won't be downloaded from the Hub.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
@@ -640,15 +640,15 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
||||
```
|
||||
"""
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
ignore_mismatched_sizes = kwargs.pop("ignore_mismatched_sizes", False)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
from_flax = kwargs.pop("from_flax", False)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
output_loading_info = kwargs.pop("output_loading_info", False)
|
||||
local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
local_files_only = kwargs.pop("local_files_only", None)
|
||||
token = kwargs.pop("token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
torch_dtype = kwargs.pop("torch_dtype", None)
|
||||
subfolder = kwargs.pop("subfolder", None)
|
||||
@@ -718,7 +718,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
device_map=device_map,
|
||||
@@ -740,7 +740,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
user_agent=user_agent,
|
||||
@@ -763,7 +763,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
user_agent=user_agent,
|
||||
@@ -782,7 +782,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
user_agent=user_agent,
|
||||
|
||||
@@ -25,7 +25,6 @@ from .activations import get_activation
|
||||
from .attention_processor import (
|
||||
ADDED_KV_ATTENTION_PROCESSORS,
|
||||
CROSS_ATTENTION_PROCESSORS,
|
||||
Attention,
|
||||
AttentionProcessor,
|
||||
AttnAddedKVProcessor,
|
||||
AttnProcessor,
|
||||
@@ -795,42 +794,6 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
|
||||
if hasattr(upsample_block, k) or getattr(upsample_block, k, None) is not None:
|
||||
setattr(upsample_block, k, None)
|
||||
|
||||
def fuse_qkv_projections(self):
|
||||
"""
|
||||
Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
|
||||
key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This API is 🧪 experimental.
|
||||
|
||||
</Tip>
|
||||
"""
|
||||
self.original_attn_processors = None
|
||||
|
||||
for _, attn_processor in self.attn_processors.items():
|
||||
if "Added" in str(attn_processor.__class__.__name__):
|
||||
raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.")
|
||||
|
||||
self.original_attn_processors = self.attn_processors
|
||||
|
||||
for module in self.modules():
|
||||
if isinstance(module, Attention):
|
||||
module.fuse_projections(fuse=True)
|
||||
|
||||
def unfuse_qkv_projections(self):
|
||||
"""Disables the fused QKV projection if enabled.
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This API is 🧪 experimental.
|
||||
|
||||
</Tip>
|
||||
|
||||
"""
|
||||
if self.original_attn_processors is not None:
|
||||
self.set_attn_processor(self.original_attn_processors)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
sample: torch.FloatTensor,
|
||||
|
||||
@@ -16,8 +16,9 @@
|
||||
import inspect
|
||||
from collections import OrderedDict
|
||||
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
|
||||
from ..configuration_utils import ConfigMixin
|
||||
from ..utils import DIFFUSERS_CACHE
|
||||
from .controlnet import (
|
||||
StableDiffusionControlNetImg2ImgPipeline,
|
||||
StableDiffusionControlNetInpaintPipeline,
|
||||
@@ -195,6 +196,7 @@ class AutoPipelineForText2Image(ConfigMixin):
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def from_pretrained(cls, pretrained_model_or_path, **kwargs):
|
||||
r"""
|
||||
Instantiates a text-to-image Pytorch diffusion pipeline from pretrained pipeline weight.
|
||||
@@ -246,7 +248,7 @@ class AutoPipelineForText2Image(ConfigMixin):
|
||||
local_files_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
||||
won't be downloaded from the Hub.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
@@ -310,11 +312,11 @@ class AutoPipelineForText2Image(ConfigMixin):
|
||||
>>> image = pipeline(prompt).images[0]
|
||||
```
|
||||
"""
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
token = kwargs.pop("token", None)
|
||||
local_files_only = kwargs.pop("local_files_only", False)
|
||||
revision = kwargs.pop("revision", None)
|
||||
|
||||
@@ -323,7 +325,7 @@ class AutoPipelineForText2Image(ConfigMixin):
|
||||
"force_download": force_download,
|
||||
"resume_download": resume_download,
|
||||
"proxies": proxies,
|
||||
"use_auth_token": use_auth_token,
|
||||
"token": token,
|
||||
"local_files_only": local_files_only,
|
||||
"revision": revision,
|
||||
}
|
||||
@@ -466,6 +468,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def from_pretrained(cls, pretrained_model_or_path, **kwargs):
|
||||
r"""
|
||||
Instantiates a image-to-image Pytorch diffusion pipeline from pretrained pipeline weight.
|
||||
@@ -518,7 +521,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
||||
local_files_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
||||
won't be downloaded from the Hub.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
@@ -582,11 +585,11 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
||||
>>> image = pipeline(prompt, image).images[0]
|
||||
```
|
||||
"""
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
token = kwargs.pop("token", None)
|
||||
local_files_only = kwargs.pop("local_files_only", False)
|
||||
revision = kwargs.pop("revision", None)
|
||||
|
||||
@@ -595,7 +598,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
||||
"force_download": force_download,
|
||||
"resume_download": resume_download,
|
||||
"proxies": proxies,
|
||||
"use_auth_token": use_auth_token,
|
||||
"token": token,
|
||||
"local_files_only": local_files_only,
|
||||
"revision": revision,
|
||||
}
|
||||
@@ -742,6 +745,7 @@ class AutoPipelineForInpainting(ConfigMixin):
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def from_pretrained(cls, pretrained_model_or_path, **kwargs):
|
||||
r"""
|
||||
Instantiates a inpainting Pytorch diffusion pipeline from pretrained pipeline weight.
|
||||
@@ -793,7 +797,7 @@ class AutoPipelineForInpainting(ConfigMixin):
|
||||
local_files_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
||||
won't be downloaded from the Hub.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
@@ -857,11 +861,11 @@ class AutoPipelineForInpainting(ConfigMixin):
|
||||
>>> image = pipeline(prompt, image=init_image, mask_image=mask_image).images[0]
|
||||
```
|
||||
"""
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
token = kwargs.pop("token", None)
|
||||
local_files_only = kwargs.pop("local_files_only", False)
|
||||
revision = kwargs.pop("revision", None)
|
||||
|
||||
@@ -870,7 +874,7 @@ class AutoPipelineForInpainting(ConfigMixin):
|
||||
"force_download": force_download,
|
||||
"resume_download": resume_download,
|
||||
"proxies": proxies,
|
||||
"use_auth_token": use_auth_token,
|
||||
"token": token,
|
||||
"local_files_only": local_files_only,
|
||||
"revision": revision,
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ from typing import Optional, Union
|
||||
|
||||
import numpy as np
|
||||
from huggingface_hub import hf_hub_download
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
|
||||
from ..utils import ONNX_EXTERNAL_WEIGHTS_NAME, ONNX_WEIGHTS_NAME, is_onnx_available, logging
|
||||
|
||||
@@ -130,10 +131,11 @@ class OnnxRuntimeModel:
|
||||
self._save_pretrained(save_directory, **kwargs)
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def _from_pretrained(
|
||||
cls,
|
||||
model_id: Union[str, Path],
|
||||
use_auth_token: Optional[Union[bool, str, None]] = None,
|
||||
token: Optional[Union[bool, str, None]] = None,
|
||||
revision: Optional[Union[str, None]] = None,
|
||||
force_download: bool = False,
|
||||
cache_dir: Optional[str] = None,
|
||||
@@ -148,7 +150,7 @@ class OnnxRuntimeModel:
|
||||
Arguments:
|
||||
model_id (`str` or `Path`):
|
||||
Directory from which to load
|
||||
use_auth_token (`str` or `bool`):
|
||||
token (`str` or `bool`):
|
||||
Is needed to load models from a private or gated repository
|
||||
revision (`str`):
|
||||
Revision is the specific model version to use. It can be a branch name, a tag name, or a commit id
|
||||
@@ -179,7 +181,7 @@ class OnnxRuntimeModel:
|
||||
model_cache_path = hf_hub_download(
|
||||
repo_id=model_id,
|
||||
filename=model_file_name,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
cache_dir=cache_dir,
|
||||
force_download=force_download,
|
||||
@@ -190,11 +192,12 @@ class OnnxRuntimeModel:
|
||||
return cls(model=model, **kwargs)
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def from_pretrained(
|
||||
cls,
|
||||
model_id: Union[str, Path],
|
||||
force_download: bool = True,
|
||||
use_auth_token: Optional[str] = None,
|
||||
token: Optional[str] = None,
|
||||
cache_dir: Optional[str] = None,
|
||||
**model_kwargs,
|
||||
):
|
||||
@@ -207,6 +210,6 @@ class OnnxRuntimeModel:
|
||||
revision=revision,
|
||||
cache_dir=cache_dir,
|
||||
force_download=force_download,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
**model_kwargs,
|
||||
)
|
||||
|
||||
@@ -24,6 +24,7 @@ import numpy as np
|
||||
import PIL.Image
|
||||
from flax.core.frozen_dict import FrozenDict
|
||||
from huggingface_hub import create_repo, snapshot_download
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
from PIL import Image
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
@@ -32,7 +33,6 @@ from ..models.modeling_flax_utils import FLAX_WEIGHTS_NAME, FlaxModelMixin
|
||||
from ..schedulers.scheduling_utils_flax import SCHEDULER_CONFIG_NAME, FlaxSchedulerMixin
|
||||
from ..utils import (
|
||||
CONFIG_NAME,
|
||||
DIFFUSERS_CACHE,
|
||||
BaseOutput,
|
||||
PushToHubMixin,
|
||||
http_user_agent,
|
||||
@@ -227,6 +227,7 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs):
|
||||
r"""
|
||||
Instantiate a Flax-based diffusion pipeline from pretrained pipeline weights.
|
||||
@@ -264,7 +265,7 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
local_files_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
||||
won't be downloaded from the Hub.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
@@ -314,11 +315,11 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
>>> dpm_params["scheduler"] = dpmpp_state
|
||||
```
|
||||
"""
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
local_files_only = kwargs.pop("local_files_only", False)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
token = kwargs.pop("token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
from_pt = kwargs.pop("from_pt", False)
|
||||
use_memory_efficient_attention = kwargs.pop("use_memory_efficient_attention", False)
|
||||
@@ -334,7 +335,7 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
)
|
||||
# make sure we only download sub-folders and `diffusers` filenames
|
||||
@@ -365,7 +366,7 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
allow_patterns=allow_patterns,
|
||||
ignore_patterns=ignore_patterns,
|
||||
|
||||
@@ -28,7 +28,14 @@ from typing import Any, Callable, Dict, List, Optional, Union
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import torch
|
||||
from huggingface_hub import ModelCard, create_repo, hf_hub_download, model_info, snapshot_download
|
||||
from huggingface_hub import (
|
||||
ModelCard,
|
||||
create_repo,
|
||||
hf_hub_download,
|
||||
model_info,
|
||||
snapshot_download,
|
||||
)
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
from packaging import version
|
||||
from requests.exceptions import HTTPError
|
||||
from tqdm.auto import tqdm
|
||||
@@ -40,8 +47,6 @@ from ..schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
|
||||
from ..utils import (
|
||||
CONFIG_NAME,
|
||||
DEPRECATED_REVISION_ARGS,
|
||||
DIFFUSERS_CACHE,
|
||||
HF_HUB_OFFLINE,
|
||||
SAFETENSORS_WEIGHTS_NAME,
|
||||
WEIGHTS_NAME,
|
||||
BaseOutput,
|
||||
@@ -249,10 +254,11 @@ def variant_compatible_siblings(filenames, variant=None) -> Union[List[os.PathLi
|
||||
return usable_filenames, variant_filenames
|
||||
|
||||
|
||||
def warn_deprecated_model_variant(pretrained_model_name_or_path, use_auth_token, variant, revision, model_filenames):
|
||||
@validate_hf_hub_args
|
||||
def warn_deprecated_model_variant(pretrained_model_name_or_path, token, variant, revision, model_filenames):
|
||||
info = model_info(
|
||||
pretrained_model_name_or_path,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=None,
|
||||
)
|
||||
filenames = {sibling.rfilename for sibling in info.siblings}
|
||||
@@ -375,7 +381,6 @@ def _get_pipeline_class(
|
||||
custom_pipeline,
|
||||
module_file=file_name,
|
||||
class_name=class_name,
|
||||
repo_id=repo_id,
|
||||
cache_dir=cache_dir,
|
||||
revision=revision,
|
||||
)
|
||||
@@ -909,6 +914,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
return torch.float32
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs):
|
||||
r"""
|
||||
Instantiate a PyTorch diffusion pipeline from pretrained pipeline weights.
|
||||
@@ -976,7 +982,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
local_files_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
||||
won't be downloaded from the Hub.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
@@ -1056,12 +1062,12 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
>>> pipeline.scheduler = scheduler
|
||||
```
|
||||
"""
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
local_files_only = kwargs.pop("local_files_only", None)
|
||||
token = kwargs.pop("token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
from_flax = kwargs.pop("from_flax", False)
|
||||
torch_dtype = kwargs.pop("torch_dtype", None)
|
||||
@@ -1094,7 +1100,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
force_download=force_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
from_flax=from_flax,
|
||||
use_safetensors=use_safetensors,
|
||||
@@ -1299,7 +1305,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
"force_download": force_download,
|
||||
"proxies": proxies,
|
||||
"local_files_only": local_files_only,
|
||||
"use_auth_token": use_auth_token,
|
||||
"token": token,
|
||||
"revision": revision,
|
||||
"torch_dtype": torch_dtype,
|
||||
"custom_pipeline": custom_pipeline,
|
||||
@@ -1529,6 +1535,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
cpu_offload(model, device, offload_buffers=offload_buffers)
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def download(cls, pretrained_model_name, **kwargs) -> Union[str, os.PathLike]:
|
||||
r"""
|
||||
Download and cache a PyTorch diffusion pipeline from pretrained pipeline weights.
|
||||
@@ -1576,7 +1583,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
local_files_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
||||
won't be downloaded from the Hub.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
@@ -1619,12 +1626,12 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
</Tip>
|
||||
|
||||
"""
|
||||
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
local_files_only = kwargs.pop("local_files_only", None)
|
||||
token = kwargs.pop("token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
from_flax = kwargs.pop("from_flax", False)
|
||||
custom_pipeline = kwargs.pop("custom_pipeline", None)
|
||||
@@ -1646,11 +1653,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
model_info_call_error: Optional[Exception] = None
|
||||
if not local_files_only:
|
||||
try:
|
||||
info = model_info(
|
||||
pretrained_model_name,
|
||||
use_auth_token=use_auth_token,
|
||||
revision=revision,
|
||||
)
|
||||
info = model_info(pretrained_model_name, token=token, revision=revision)
|
||||
except HTTPError as e:
|
||||
logger.warn(f"Couldn't connect to the Hub: {e}.\nWill try to load from local cache.")
|
||||
local_files_only = True
|
||||
@@ -1665,7 +1668,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
proxies=proxies,
|
||||
force_download=force_download,
|
||||
resume_download=resume_download,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
)
|
||||
|
||||
config_dict = cls._dict_from_json_file(config_file)
|
||||
@@ -1715,9 +1718,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
if revision in DEPRECATED_REVISION_ARGS and version.parse(
|
||||
version.parse(__version__).base_version
|
||||
) >= version.parse("0.22.0"):
|
||||
warn_deprecated_model_variant(
|
||||
pretrained_model_name, use_auth_token, variant, revision, model_filenames
|
||||
)
|
||||
warn_deprecated_model_variant(pretrained_model_name, token, variant, revision, model_filenames)
|
||||
|
||||
model_folder_names = {os.path.split(f)[0] for f in model_filenames if os.path.split(f)[0] in folder_names}
|
||||
|
||||
@@ -1859,7 +1860,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
allow_patterns=allow_patterns,
|
||||
ignore_patterns=ignore_patterns,
|
||||
@@ -1883,7 +1884,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
"force_download": force_download,
|
||||
"proxies": proxies,
|
||||
"local_files_only": local_files_only,
|
||||
"use_auth_token": use_auth_token,
|
||||
"token": token,
|
||||
"variant": variant,
|
||||
"use_safetensors": use_safetensors,
|
||||
}
|
||||
|
||||
@@ -446,9 +446,8 @@ def convert_ldm_unet_checkpoint(
|
||||
new_checkpoint["add_embedding.linear_2.bias"] = unet_state_dict["label_emb.0.2.bias"]
|
||||
|
||||
# Relevant to StableDiffusionUpscalePipeline
|
||||
if "num_class_embeds" in config:
|
||||
if (config["num_class_embeds"] is not None) and ("label_emb.weight" in unet_state_dict):
|
||||
new_checkpoint["class_embedding.weight"] = unet_state_dict["label_emb.weight"]
|
||||
if (config["num_class_embeds"] is not None) and ("label_emb.weight" in unet_state_dict):
|
||||
new_checkpoint["class_embedding.weight"] = unet_state_dict["label_emb.weight"]
|
||||
|
||||
new_checkpoint["conv_in.weight"] = unet_state_dict["input_blocks.0.0.weight"]
|
||||
new_checkpoint["conv_in.bias"] = unet_state_dict["input_blocks.0.0.bias"]
|
||||
|
||||
@@ -34,7 +34,6 @@ from ...loaders import (
|
||||
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
|
||||
from ...models.attention_processor import (
|
||||
AttnProcessor2_0,
|
||||
FusedAttnProcessor2_0,
|
||||
LoRAAttnProcessor2_0,
|
||||
LoRAXFormersAttnProcessor,
|
||||
XFormersAttnProcessor,
|
||||
@@ -682,6 +681,7 @@ class StableDiffusionXLPipeline(
|
||||
add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
|
||||
return add_time_ids
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_upscale.StableDiffusionUpscalePipeline.upcast_vae
|
||||
def upcast_vae(self):
|
||||
dtype = self.vae.dtype
|
||||
self.vae.to(dtype=torch.float32)
|
||||
@@ -692,7 +692,6 @@ class StableDiffusionXLPipeline(
|
||||
XFormersAttnProcessor,
|
||||
LoRAXFormersAttnProcessor,
|
||||
LoRAAttnProcessor2_0,
|
||||
FusedAttnProcessor2_0,
|
||||
),
|
||||
)
|
||||
# if xformers or torch_2_0 is used attention block does not need
|
||||
@@ -730,65 +729,6 @@ class StableDiffusionXLPipeline(
|
||||
"""Disables the FreeU mechanism if enabled."""
|
||||
self.unet.disable_freeu()
|
||||
|
||||
def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
|
||||
"""
|
||||
Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
|
||||
key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This API is 🧪 experimental.
|
||||
|
||||
</Tip>
|
||||
|
||||
Args:
|
||||
unet (`bool`, defaults to `True`): To apply fusion on the UNet.
|
||||
vae (`bool`, defaults to `True`): To apply fusion on the VAE.
|
||||
"""
|
||||
self.fusing_unet = False
|
||||
self.fusing_vae = False
|
||||
|
||||
if unet:
|
||||
self.fusing_unet = True
|
||||
self.unet.fuse_qkv_projections()
|
||||
self.unet.set_attn_processor(FusedAttnProcessor2_0())
|
||||
|
||||
if vae:
|
||||
if not isinstance(self.vae, AutoencoderKL):
|
||||
raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
|
||||
|
||||
self.fusing_vae = True
|
||||
self.vae.fuse_qkv_projections()
|
||||
self.vae.set_attn_processor(FusedAttnProcessor2_0())
|
||||
|
||||
def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
|
||||
"""Disable QKV projection fusion if enabled.
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This API is 🧪 experimental.
|
||||
|
||||
</Tip>
|
||||
|
||||
Args:
|
||||
unet (`bool`, defaults to `True`): To apply fusion on the UNet.
|
||||
vae (`bool`, defaults to `True`): To apply fusion on the VAE.
|
||||
|
||||
"""
|
||||
if unet:
|
||||
if not self.fusing_unet:
|
||||
logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
|
||||
else:
|
||||
self.unet.unfuse_qkv_projections()
|
||||
self.fusing_unet = False
|
||||
|
||||
if vae:
|
||||
if not self.fusing_vae:
|
||||
logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
|
||||
else:
|
||||
self.vae.unfuse_qkv_projections()
|
||||
self.fusing_vae = False
|
||||
|
||||
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
||||
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
||||
"""
|
||||
|
||||
@@ -24,7 +24,6 @@ from ...loaders import FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, Te
|
||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
||||
from ...models.attention_processor import (
|
||||
AttnProcessor2_0,
|
||||
FusedAttnProcessor2_0,
|
||||
LoRAAttnProcessor2_0,
|
||||
LoRAXFormersAttnProcessor,
|
||||
XFormersAttnProcessor,
|
||||
@@ -611,7 +610,6 @@ class StableDiffusionXLInstructPix2PixPipeline(
|
||||
XFormersAttnProcessor,
|
||||
LoRAXFormersAttnProcessor,
|
||||
LoRAAttnProcessor2_0,
|
||||
FusedAttnProcessor2_0,
|
||||
),
|
||||
)
|
||||
# if xformers or torch_2_0 is used attention block does not need
|
||||
|
||||
@@ -10,10 +10,10 @@ from diffusers.utils import deprecate
|
||||
from ...configuration_utils import ConfigMixin, register_to_config
|
||||
from ...models import ModelMixin
|
||||
from ...models.activations import get_activation
|
||||
from ...models.attention import Attention
|
||||
from ...models.attention_processor import (
|
||||
ADDED_KV_ATTENTION_PROCESSORS,
|
||||
CROSS_ATTENTION_PROCESSORS,
|
||||
Attention,
|
||||
AttentionProcessor,
|
||||
AttnAddedKVProcessor,
|
||||
AttnAddedKVProcessor2_0,
|
||||
@@ -1000,42 +1000,6 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
|
||||
if hasattr(upsample_block, k) or getattr(upsample_block, k, None) is not None:
|
||||
setattr(upsample_block, k, None)
|
||||
|
||||
def fuse_qkv_projections(self):
|
||||
"""
|
||||
Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
|
||||
key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This API is 🧪 experimental.
|
||||
|
||||
</Tip>
|
||||
"""
|
||||
self.original_attn_processors = None
|
||||
|
||||
for _, attn_processor in self.attn_processors.items():
|
||||
if "Added" in str(attn_processor.__class__.__name__):
|
||||
raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.")
|
||||
|
||||
self.original_attn_processors = self.attn_processors
|
||||
|
||||
for module in self.modules():
|
||||
if isinstance(module, Attention):
|
||||
module.fuse_projections(fuse=True)
|
||||
|
||||
def unfuse_qkv_projections(self):
|
||||
"""Disables the fused QKV projection if enabled.
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This API is 🧪 experimental.
|
||||
|
||||
</Tip>
|
||||
|
||||
"""
|
||||
if self.original_attn_processors is not None:
|
||||
self.set_attn_processor(self.original_attn_processors)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
sample: torch.FloatTensor,
|
||||
|
||||
@@ -191,11 +191,10 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
@property
|
||||
def init_noise_sigma(self):
|
||||
# standard deviation of the initial noise distribution
|
||||
max_sigma = max(self.sigmas) if isinstance(self.sigmas, list) else self.sigmas.max()
|
||||
if self.config.timestep_spacing in ["linspace", "trailing"]:
|
||||
return max_sigma
|
||||
return self.sigmas.max()
|
||||
|
||||
return (max_sigma**2 + 1) ** 0.5
|
||||
return (self.sigmas.max() ** 2 + 1) ** 0.5
|
||||
|
||||
@property
|
||||
def step_index(self):
|
||||
|
||||
@@ -18,6 +18,7 @@ from enum import Enum
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
|
||||
from ..utils import BaseOutput, PushToHubMixin
|
||||
|
||||
@@ -81,6 +82,7 @@ class SchedulerMixin(PushToHubMixin):
|
||||
has_compatibles = True
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def from_pretrained(
|
||||
cls,
|
||||
pretrained_model_name_or_path: Optional[Union[str, os.PathLike]] = None,
|
||||
@@ -120,7 +122,7 @@ class SchedulerMixin(PushToHubMixin):
|
||||
local_files_only(`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
||||
won't be downloaded from the Hub.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
|
||||
@@ -20,6 +20,7 @@ from typing import Optional, Tuple, Union
|
||||
|
||||
import flax
|
||||
import jax.numpy as jnp
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
|
||||
from ..utils import BaseOutput, PushToHubMixin
|
||||
|
||||
@@ -70,6 +71,7 @@ class FlaxSchedulerMixin(PushToHubMixin):
|
||||
has_compatibles = True
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def from_pretrained(
|
||||
cls,
|
||||
pretrained_model_name_or_path: Optional[Union[str, os.PathLike]] = None,
|
||||
@@ -110,7 +112,7 @@ class FlaxSchedulerMixin(PushToHubMixin):
|
||||
Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages.
|
||||
local_files_only(`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to only look at local files (i.e., do not try to download the model).
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
|
||||
when running `transformers-cli login` (stored in `~/.huggingface`).
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
|
||||
@@ -21,7 +21,6 @@ from .. import __version__
|
||||
from .constants import (
|
||||
CONFIG_NAME,
|
||||
DEPRECATED_REVISION_ARGS,
|
||||
DIFFUSERS_CACHE,
|
||||
DIFFUSERS_DYNAMIC_MODULE_NAME,
|
||||
FLAX_WEIGHTS_NAME,
|
||||
HF_MODULES_CACHE,
|
||||
@@ -38,7 +37,6 @@ from .doc_utils import replace_example_docstring
|
||||
from .dynamic_modules_utils import get_class_from_dynamic_module
|
||||
from .export_utils import export_to_gif, export_to_obj, export_to_ply, export_to_video
|
||||
from .hub_utils import (
|
||||
HF_HUB_OFFLINE,
|
||||
PushToHubMixin,
|
||||
_add_variant,
|
||||
_get_model_file,
|
||||
|
||||
@@ -14,15 +14,13 @@
|
||||
import importlib
|
||||
import os
|
||||
|
||||
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE, hf_cache_home
|
||||
from huggingface_hub.constants import HF_HOME
|
||||
from packaging import version
|
||||
|
||||
from ..dependency_versions_check import dep_version_check
|
||||
from .import_utils import ENV_VARS_TRUE_VALUES, is_peft_available, is_transformers_available
|
||||
|
||||
|
||||
default_cache_path = HUGGINGFACE_HUB_CACHE
|
||||
|
||||
MIN_PEFT_VERSION = "0.6.0"
|
||||
MIN_TRANSFORMERS_VERSION = "4.34.0"
|
||||
_CHECK_PEFT = os.environ.get("_CHECK_PEFT", "1") in ENV_VARS_TRUE_VALUES
|
||||
@@ -35,9 +33,8 @@ ONNX_WEIGHTS_NAME = "model.onnx"
|
||||
SAFETENSORS_WEIGHTS_NAME = "diffusion_pytorch_model.safetensors"
|
||||
ONNX_EXTERNAL_WEIGHTS_NAME = "weights.pb"
|
||||
HUGGINGFACE_CO_RESOLVE_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co")
|
||||
DIFFUSERS_CACHE = default_cache_path
|
||||
DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules"
|
||||
HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(hf_cache_home, "modules"))
|
||||
HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(HF_HOME, "modules"))
|
||||
DEPRECATED_REVISION_ARGS = ["fp16", "non-ema"]
|
||||
|
||||
# Below should be `True` if the current version of `peft` and `transformers` are compatible with
|
||||
|
||||
@@ -25,7 +25,8 @@ from pathlib import Path
|
||||
from typing import Dict, Optional, Union
|
||||
from urllib import request
|
||||
|
||||
from huggingface_hub import HfFolder, cached_download, hf_hub_download, model_info
|
||||
from huggingface_hub import cached_download, hf_hub_download, model_info
|
||||
from huggingface_hub.utils import validate_hf_hub_args
|
||||
from packaging import version
|
||||
|
||||
from .. import __version__
|
||||
@@ -194,6 +195,7 @@ def find_pipeline_class(loaded_module):
|
||||
return pipeline_class
|
||||
|
||||
|
||||
@validate_hf_hub_args
|
||||
def get_cached_module_file(
|
||||
pretrained_model_name_or_path: Union[str, os.PathLike],
|
||||
module_file: str,
|
||||
@@ -201,7 +203,7 @@ def get_cached_module_file(
|
||||
force_download: bool = False,
|
||||
resume_download: bool = False,
|
||||
proxies: Optional[Dict[str, str]] = None,
|
||||
use_auth_token: Optional[Union[bool, str]] = None,
|
||||
token: Optional[Union[bool, str]] = None,
|
||||
revision: Optional[str] = None,
|
||||
local_files_only: bool = False,
|
||||
):
|
||||
@@ -232,7 +234,7 @@ def get_cached_module_file(
|
||||
proxies (`Dict[str, str]`, *optional*):
|
||||
A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
|
||||
'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
|
||||
use_auth_token (`str` or *bool*, *optional*):
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
|
||||
when running `transformers-cli login` (stored in `~/.huggingface`).
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
@@ -244,7 +246,7 @@ def get_cached_module_file(
|
||||
|
||||
<Tip>
|
||||
|
||||
You may pass a token in `use_auth_token` if you are not logged in (`huggingface-cli long`) and want to use private
|
||||
You may pass a token in `token` if you are not logged in (`huggingface-cli login`) and want to use private
|
||||
or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models).
|
||||
|
||||
</Tip>
|
||||
@@ -289,7 +291,7 @@ def get_cached_module_file(
|
||||
proxies=proxies,
|
||||
resume_download=resume_download,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=False,
|
||||
token=False,
|
||||
)
|
||||
submodule = "git"
|
||||
module_file = pretrained_model_name_or_path + ".py"
|
||||
@@ -307,7 +309,7 @@ def get_cached_module_file(
|
||||
proxies=proxies,
|
||||
resume_download=resume_download,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
)
|
||||
submodule = os.path.join("local", "--".join(pretrained_model_name_or_path.split("/")))
|
||||
except EnvironmentError:
|
||||
@@ -332,13 +334,6 @@ def get_cached_module_file(
|
||||
else:
|
||||
# Get the commit hash
|
||||
# TODO: we will get this info in the etag soon, so retrieve it from there and not here.
|
||||
if isinstance(use_auth_token, str):
|
||||
token = use_auth_token
|
||||
elif use_auth_token is True:
|
||||
token = HfFolder.get_token()
|
||||
else:
|
||||
token = None
|
||||
|
||||
commit_hash = model_info(pretrained_model_name_or_path, revision=revision, token=token).sha
|
||||
|
||||
# The module file will end up being placed in a subfolder with the git hash of the repo. This way we get the
|
||||
@@ -359,13 +354,14 @@ def get_cached_module_file(
|
||||
force_download=force_download,
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
local_files_only=local_files_only,
|
||||
)
|
||||
return os.path.join(full_submodule, module_file)
|
||||
|
||||
|
||||
@validate_hf_hub_args
|
||||
def get_class_from_dynamic_module(
|
||||
pretrained_model_name_or_path: Union[str, os.PathLike],
|
||||
module_file: str,
|
||||
@@ -374,7 +370,7 @@ def get_class_from_dynamic_module(
|
||||
force_download: bool = False,
|
||||
resume_download: bool = False,
|
||||
proxies: Optional[Dict[str, str]] = None,
|
||||
use_auth_token: Optional[Union[bool, str]] = None,
|
||||
token: Optional[Union[bool, str]] = None,
|
||||
revision: Optional[str] = None,
|
||||
local_files_only: bool = False,
|
||||
**kwargs,
|
||||
@@ -414,7 +410,7 @@ def get_class_from_dynamic_module(
|
||||
proxies (`Dict[str, str]`, *optional*):
|
||||
A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
|
||||
'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
|
||||
use_auth_token (`str` or `bool`, *optional*):
|
||||
token (`str` or `bool`, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
|
||||
when running `transformers-cli login` (stored in `~/.huggingface`).
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
@@ -426,7 +422,7 @@ def get_class_from_dynamic_module(
|
||||
|
||||
<Tip>
|
||||
|
||||
You may pass a token in `use_auth_token` if you are not logged in (`huggingface-cli long`) and want to use private
|
||||
You may pass a token in `token` if you are not logged in (`huggingface-cli login`) and want to use private
|
||||
or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models).
|
||||
|
||||
</Tip>
|
||||
@@ -449,7 +445,7 @@ def get_class_from_dynamic_module(
|
||||
force_download=force_download,
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
revision=revision,
|
||||
local_files_only=local_files_only,
|
||||
)
|
||||
|
||||
@@ -25,20 +25,21 @@ from typing import Dict, Optional, Union
|
||||
from uuid import uuid4
|
||||
|
||||
from huggingface_hub import (
|
||||
HfFolder,
|
||||
ModelCard,
|
||||
ModelCardData,
|
||||
create_repo,
|
||||
get_full_repo_name,
|
||||
hf_hub_download,
|
||||
upload_folder,
|
||||
whoami,
|
||||
)
|
||||
from huggingface_hub.constants import HF_HUB_CACHE, HF_HUB_DISABLE_TELEMETRY, HF_HUB_OFFLINE
|
||||
from huggingface_hub.file_download import REGEX_COMMIT_HASH
|
||||
from huggingface_hub.utils import (
|
||||
EntryNotFoundError,
|
||||
RepositoryNotFoundError,
|
||||
RevisionNotFoundError,
|
||||
is_jinja_available,
|
||||
validate_hf_hub_args,
|
||||
)
|
||||
from packaging import version
|
||||
from requests import HTTPError
|
||||
@@ -46,7 +47,6 @@ from requests import HTTPError
|
||||
from .. import __version__
|
||||
from .constants import (
|
||||
DEPRECATED_REVISION_ARGS,
|
||||
DIFFUSERS_CACHE,
|
||||
HUGGINGFACE_CO_RESOLVE_ENDPOINT,
|
||||
SAFETENSORS_WEIGHTS_NAME,
|
||||
WEIGHTS_NAME,
|
||||
@@ -69,9 +69,6 @@ logger = get_logger(__name__)
|
||||
|
||||
MODEL_CARD_TEMPLATE_PATH = Path(__file__).parent / "model_card_template.md"
|
||||
SESSION_ID = uuid4().hex
|
||||
HF_HUB_OFFLINE = os.getenv("HF_HUB_OFFLINE", "").upper() in ENV_VARS_TRUE_VALUES
|
||||
DISABLE_TELEMETRY = os.getenv("DISABLE_TELEMETRY", "").upper() in ENV_VARS_TRUE_VALUES
|
||||
HUGGINGFACE_CO_TELEMETRY = HUGGINGFACE_CO_RESOLVE_ENDPOINT + "/api/telemetry/"
|
||||
|
||||
|
||||
def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str:
|
||||
@@ -79,7 +76,7 @@ def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str:
|
||||
Formats a user-agent string with basic info about a request.
|
||||
"""
|
||||
ua = f"diffusers/{__version__}; python/{sys.version.split()[0]}; session_id/{SESSION_ID}"
|
||||
if DISABLE_TELEMETRY or HF_HUB_OFFLINE:
|
||||
if HF_HUB_DISABLE_TELEMETRY or HF_HUB_OFFLINE:
|
||||
return ua + "; telemetry/off"
|
||||
if is_torch_available():
|
||||
ua += f"; torch/{_torch_version}"
|
||||
@@ -98,16 +95,6 @@ def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str:
|
||||
return ua
|
||||
|
||||
|
||||
def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None):
|
||||
if token is None:
|
||||
token = HfFolder.get_token()
|
||||
if organization is None:
|
||||
username = whoami(token)["name"]
|
||||
return f"{username}/{model_id}"
|
||||
else:
|
||||
return f"{organization}/{model_id}"
|
||||
|
||||
|
||||
def create_model_card(args, model_name):
|
||||
if not is_jinja_available():
|
||||
raise ValueError(
|
||||
@@ -183,7 +170,7 @@ old_diffusers_cache = os.path.join(hf_cache_home, "diffusers")
|
||||
|
||||
def move_cache(old_cache_dir: Optional[str] = None, new_cache_dir: Optional[str] = None) -> None:
|
||||
if new_cache_dir is None:
|
||||
new_cache_dir = DIFFUSERS_CACHE
|
||||
new_cache_dir = HF_HUB_CACHE
|
||||
if old_cache_dir is None:
|
||||
old_cache_dir = old_diffusers_cache
|
||||
|
||||
@@ -203,7 +190,7 @@ def move_cache(old_cache_dir: Optional[str] = None, new_cache_dir: Optional[str]
|
||||
# At this point, old_cache_dir contains symlinks to the new cache (it can still be used).
|
||||
|
||||
|
||||
cache_version_file = os.path.join(DIFFUSERS_CACHE, "version_diffusers_cache.txt")
|
||||
cache_version_file = os.path.join(HF_HUB_CACHE, "version_diffusers_cache.txt")
|
||||
if not os.path.isfile(cache_version_file):
|
||||
cache_version = 0
|
||||
else:
|
||||
@@ -233,12 +220,12 @@ if cache_version < 1:
|
||||
|
||||
if cache_version < 1:
|
||||
try:
|
||||
os.makedirs(DIFFUSERS_CACHE, exist_ok=True)
|
||||
os.makedirs(HF_HUB_CACHE, exist_ok=True)
|
||||
with open(cache_version_file, "w") as f:
|
||||
f.write("1")
|
||||
except Exception:
|
||||
logger.warning(
|
||||
f"There was a problem when trying to write in your cache folder ({DIFFUSERS_CACHE}). Please, ensure "
|
||||
f"There was a problem when trying to write in your cache folder ({HF_HUB_CACHE}). Please, ensure "
|
||||
"the directory exists and can be written to."
|
||||
)
|
||||
|
||||
@@ -252,20 +239,21 @@ def _add_variant(weights_name: str, variant: Optional[str] = None) -> str:
|
||||
return weights_name
|
||||
|
||||
|
||||
@validate_hf_hub_args
|
||||
def _get_model_file(
|
||||
pretrained_model_name_or_path,
|
||||
pretrained_model_name_or_path: Union[str, Path],
|
||||
*,
|
||||
weights_name,
|
||||
subfolder,
|
||||
cache_dir,
|
||||
force_download,
|
||||
proxies,
|
||||
resume_download,
|
||||
local_files_only,
|
||||
use_auth_token,
|
||||
user_agent,
|
||||
revision,
|
||||
commit_hash=None,
|
||||
weights_name: str,
|
||||
subfolder: Optional[str],
|
||||
cache_dir: Optional[str],
|
||||
force_download: bool,
|
||||
proxies: Optional[Dict],
|
||||
resume_download: bool,
|
||||
local_files_only: bool,
|
||||
token: Optional[str],
|
||||
user_agent: Union[Dict, str, None],
|
||||
revision: Optional[str],
|
||||
commit_hash: Optional[str] = None,
|
||||
):
|
||||
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
|
||||
if os.path.isfile(pretrained_model_name_or_path):
|
||||
@@ -300,7 +288,7 @@ def _get_model_file(
|
||||
proxies=proxies,
|
||||
resume_download=resume_download,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
user_agent=user_agent,
|
||||
subfolder=subfolder,
|
||||
revision=revision or commit_hash,
|
||||
@@ -325,7 +313,7 @@ def _get_model_file(
|
||||
proxies=proxies,
|
||||
resume_download=resume_download,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
token=token,
|
||||
user_agent=user_agent,
|
||||
subfolder=subfolder,
|
||||
revision=revision or commit_hash,
|
||||
@@ -336,7 +324,7 @@ def _get_model_file(
|
||||
raise EnvironmentError(
|
||||
f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier "
|
||||
"listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a "
|
||||
"token having permission to this repo with `use_auth_token` or log in with `huggingface-cli "
|
||||
"token having permission to this repo with `token` or log in with `huggingface-cli "
|
||||
"login`."
|
||||
)
|
||||
except RevisionNotFoundError:
|
||||
|
||||
@@ -938,37 +938,6 @@ class StableDiffusionXLPipelineFastTests(
|
||||
|
||||
assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3
|
||||
|
||||
def test_stable_diffusion_xl_with_fused_qkv_projections(self):
|
||||
device = "cpu" # ensure determinism for the device-dependent torch.Generator
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = StableDiffusionXLPipeline(**components)
|
||||
sd_pipe = sd_pipe.to(device)
|
||||
sd_pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_dummy_inputs(device)
|
||||
image = sd_pipe(**inputs).images
|
||||
original_image_slice = image[0, -3:, -3:, -1]
|
||||
|
||||
sd_pipe.fuse_qkv_projections()
|
||||
inputs = self.get_dummy_inputs(device)
|
||||
image = sd_pipe(**inputs).images
|
||||
image_slice_fused = image[0, -3:, -3:, -1]
|
||||
|
||||
sd_pipe.unfuse_qkv_projections()
|
||||
inputs = self.get_dummy_inputs(device)
|
||||
image = sd_pipe(**inputs).images
|
||||
image_slice_disabled = image[0, -3:, -3:, -1]
|
||||
|
||||
assert np.allclose(
|
||||
original_image_slice, image_slice_fused, atol=1e-2, rtol=1e-2
|
||||
), "Fusion of QKV projections shouldn't affect the outputs."
|
||||
assert np.allclose(
|
||||
image_slice_fused, image_slice_disabled, atol=1e-2, rtol=1e-2
|
||||
), "Outputs, with QKV projection fusion enabled, shouldn't change when fused QKV projections are disabled."
|
||||
assert np.allclose(
|
||||
original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2
|
||||
), "Original outputs should match when fused QKV projections are disabled."
|
||||
|
||||
|
||||
@slow
|
||||
class StableDiffusionXLPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
Reference in New Issue
Block a user