mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-08 21:44:27 +08:00
Compare commits
1 Commits
cache-late
...
rev-fix
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6cb2178a91 |
8
.github/workflows/pr_test_fetcher.yml
vendored
8
.github/workflows/pr_test_fetcher.yml
vendored
@@ -1,6 +1,12 @@
|
||||
name: Fast tests for PRs - Test Fetcher
|
||||
|
||||
on: workflow_dispatch
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
push:
|
||||
branches:
|
||||
- ci-*
|
||||
|
||||
env:
|
||||
DIFFUSERS_IS_CI: yes
|
||||
|
||||
@@ -133,7 +133,7 @@ def save_model_card(
|
||||
diffusers_imports_pivotal = """from huggingface_hub import hf_hub_download
|
||||
from safetensors.torch import load_file
|
||||
"""
|
||||
diffusers_example_pivotal = f"""embedding_path = hf_hub_download(repo_id='{repo_id}', filename="embeddings.safetensors", repo_type="model")
|
||||
diffusers_example_pivotal = f"""embedding_path = hf_hub_download(repo_id="{repo_id}", filename="embeddings.safetensors", repo_type="model")
|
||||
state_dict = load_file(embedding_path)
|
||||
pipeline.load_textual_inversion(state_dict["clip_l"], token=["<s0>", "<s1>"], text_encoder=pipe.text_encoder, tokenizer=pipe.tokenizer)
|
||||
pipeline.load_textual_inversion(state_dict["clip_g"], token=["<s0>", "<s1>"], text_encoder=pipe.text_encoder_2, tokenizer=pipe.tokenizer_2)
|
||||
@@ -145,7 +145,8 @@ pipeline.load_textual_inversion(state_dict["clip_g"], token=["<s0>", "<s1>"], te
|
||||
to trigger concept `{key}` → use `{tokens}` in your prompt \n
|
||||
"""
|
||||
|
||||
yaml = f"""---
|
||||
yaml = f"""
|
||||
---
|
||||
tags:
|
||||
- stable-diffusion-xl
|
||||
- stable-diffusion-xl-diffusers
|
||||
@@ -158,7 +159,7 @@ base_model: {base_model}
|
||||
instance_prompt: {instance_prompt}
|
||||
license: openrail++
|
||||
---
|
||||
"""
|
||||
"""
|
||||
|
||||
model_card = f"""
|
||||
# SDXL LoRA DreamBooth - {repo_id}
|
||||
@@ -169,6 +170,14 @@ license: openrail++
|
||||
|
||||
### These are {repo_id} LoRA adaption weights for {base_model}.
|
||||
|
||||
The weights were trained using [DreamBooth](https://dreambooth.github.io/).
|
||||
|
||||
LoRA for the text encoder was enabled: {train_text_encoder}.
|
||||
|
||||
Pivotal tuning was enabled: {train_text_encoder_ti}.
|
||||
|
||||
Special VAE used for training: {vae_path}.
|
||||
|
||||
## Trigger words
|
||||
|
||||
{trigger_str}
|
||||
@@ -187,24 +196,11 @@ image = pipeline('{validation_prompt if validation_prompt else instance_prompt}'
|
||||
|
||||
For more details, including weighting, merging and fusing LoRAs, check the [documentation on loading LoRAs in diffusers](https://huggingface.co/docs/diffusers/main/en/using-diffusers/loading_adapters)
|
||||
|
||||
## Download model
|
||||
## Download model (use it with UIs such as AUTO1111, Comfy, SD.Next, Invoke)
|
||||
|
||||
### Use it with UIs such as AUTOMATIC1111, Comfy UI, SD.Next, Invoke
|
||||
Weights for this model are available in Safetensors format.
|
||||
|
||||
- Download the LoRA *.safetensors [here](/{repo_id}/blob/main/pytorch_lora_weights.safetensors). Rename it and place it on your Lora folder.
|
||||
- Download the text embeddings *.safetensors [here](/{repo_id}/blob/main/embeddings.safetensors). Rename it and place it on it on your embeddings folder.
|
||||
|
||||
All [Files & versions](/{repo_id}/tree/main).
|
||||
|
||||
## Details
|
||||
|
||||
The weights were trained using [🧨 diffusers Advanced Dreambooth Training Script](https://github.com/huggingface/diffusers/blob/main/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py).
|
||||
|
||||
LoRA for the text encoder was enabled. {train_text_encoder}.
|
||||
|
||||
Pivotal tuning was enabled: {train_text_encoder_ti}.
|
||||
|
||||
Special VAE used for training: {vae_path}.
|
||||
[Download]({repo_id}/tree/main) them in the Files & versions tab.
|
||||
|
||||
"""
|
||||
with open(os.path.join(repo_folder, "README.md"), "w") as f:
|
||||
@@ -671,12 +667,6 @@ def parse_args(input_args=None):
|
||||
default=4,
|
||||
help=("The dimension of the LoRA update matrices."),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cache_latents",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Cache the VAE latents",
|
||||
)
|
||||
|
||||
if input_args is not None:
|
||||
args = parser.parse_args(input_args)
|
||||
@@ -1180,7 +1170,6 @@ def main(args):
|
||||
revision=args.revision,
|
||||
variant=args.variant,
|
||||
)
|
||||
vae_scaling_factor = vae.config.scaling_factor
|
||||
unet = UNet2DConditionModel.from_pretrained(
|
||||
args.pretrained_model_name_or_path, subfolder="unet", revision=args.revision, variant=args.variant
|
||||
)
|
||||
@@ -1611,20 +1600,6 @@ def main(args):
|
||||
args.validation_prompt = args.validation_prompt.replace(token_abs, "".join(token_replacement))
|
||||
print("validation prompt:", args.validation_prompt)
|
||||
|
||||
if args.cache_latents:
|
||||
latents_cache = []
|
||||
for batch in tqdm(train_dataloader, desc="Caching latents"):
|
||||
with torch.no_grad():
|
||||
batch["pixel_values"] = batch["pixel_values"].to(
|
||||
accelerator.device, non_blocking=True, dtype=torch.float32
|
||||
)
|
||||
latents_cache.append(vae.encode(batch["pixel_values"]).latent_dist)
|
||||
|
||||
if args.validation_prompt is None:
|
||||
del vae
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
# Scheduler and math around the number of training steps.
|
||||
overrode_max_train_steps = False
|
||||
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
|
||||
@@ -1740,7 +1715,9 @@ def main(args):
|
||||
unet.train()
|
||||
for step, batch in enumerate(train_dataloader):
|
||||
with accelerator.accumulate(unet):
|
||||
pixel_values = batch["pixel_values"].to(dtype=vae.dtype)
|
||||
prompts = batch["prompts"]
|
||||
# print(prompts)
|
||||
# encode batch prompts when custom prompts are provided for each image -
|
||||
if train_dataset.custom_instance_prompts:
|
||||
if freeze_text_encoder:
|
||||
@@ -1752,13 +1729,9 @@ def main(args):
|
||||
tokens_one = tokenize_prompt(tokenizer_one, prompts, add_special_tokens)
|
||||
tokens_two = tokenize_prompt(tokenizer_two, prompts, add_special_tokens)
|
||||
|
||||
if args.cache_latents:
|
||||
model_input = latents_cache[step].sample()
|
||||
else:
|
||||
pixel_values = batch["pixel_values"].to(dtype=vae.dtype)
|
||||
model_input = vae.encode(pixel_values).latent_dist.sample()
|
||||
|
||||
model_input = model_input * vae_scaling_factor
|
||||
# Convert images to latent space
|
||||
model_input = vae.encode(pixel_values).latent_dist.sample()
|
||||
model_input = model_input * vae.config.scaling_factor
|
||||
if args.pretrained_vae_model_name_or_path is None:
|
||||
model_input = model_input.to(weight_dtype)
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Latent Consistency Distillation Example:
|
||||
|
||||
[Latent Consistency Models (LCMs)](https://arxiv.org/abs/2310.04378) is a method to distill a latent diffusion model to enable swift inference with minimal steps. This example demonstrates how to use latent consistency distillation to distill stable-diffusion-v1.5 for inference with few timesteps.
|
||||
[Latent Consistency Models (LCMs)](https://arxiv.org/abs/2310.04378) is method to distill latent diffusion model to enable swift inference with minimal steps. This example demonstrates how to use the latent consistency distillation to distill stable-diffusion-v1.5 for less timestep inference.
|
||||
|
||||
## Full model distillation
|
||||
|
||||
@@ -24,7 +24,7 @@ Then cd in the example folder and run
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
And initialize an [🤗 Accelerate](https://github.com/huggingface/accelerate/) environment with:
|
||||
And initialize an [🤗Accelerate](https://github.com/huggingface/accelerate/) environment with:
|
||||
|
||||
```bash
|
||||
accelerate config
|
||||
@@ -46,16 +46,12 @@ write_basic_config()
|
||||
When running `accelerate config`, if we specify torch compile mode to True there can be dramatic speedups.
|
||||
|
||||
|
||||
#### Example
|
||||
|
||||
The following uses the [Conceptual Captions 12M (CC12M) dataset](https://github.com/google-research-datasets/conceptual-12m) as an example, and for illustrative purposes only. For best results you may consider large and high-quality text-image datasets such as [LAION](https://laion.ai/blog/laion-400-open-dataset/). You may also need to search the hyperparameter space according to the dataset you use.
|
||||
#### Example with LAION-A6+ dataset
|
||||
|
||||
```bash
|
||||
export MODEL_NAME="runwayml/stable-diffusion-v1-5"
|
||||
export OUTPUT_DIR="path/to/saved/model"
|
||||
|
||||
accelerate launch train_lcm_distill_sd_wds.py \
|
||||
--pretrained_teacher_model=$MODEL_NAME \
|
||||
runwayml/stable-diffusion-v1-5
|
||||
PROGRAM="train_lcm_distill_sd_wds.py \
|
||||
--pretrained_teacher_model=$MODEL_DIR \
|
||||
--output_dir=$OUTPUT_DIR \
|
||||
--mixed_precision=fp16 \
|
||||
--resolution=512 \
|
||||
@@ -63,7 +59,7 @@ accelerate launch train_lcm_distill_sd_wds.py \
|
||||
--max_train_steps=1000 \
|
||||
--max_train_samples=4000000 \
|
||||
--dataloader_num_workers=8 \
|
||||
--train_shards_path_or_url="pipe:curl -L -s https://huggingface.co/datasets/laion/conceptual-captions-12m-webdataset/resolve/main/data/{00000..01099}.tar?download=true" \
|
||||
--train_shards_path_or_url='pipe:aws s3 cp s3://muse-datasets/laion-aesthetic6plus-min512-data/{00000..01210}.tar -' \
|
||||
--validation_steps=200 \
|
||||
--checkpointing_steps=200 --checkpoints_total_limit=10 \
|
||||
--train_batch_size=12 \
|
||||
@@ -73,23 +69,19 @@ accelerate launch train_lcm_distill_sd_wds.py \
|
||||
--resume_from_checkpoint=latest \
|
||||
--report_to=wandb \
|
||||
--seed=453645634 \
|
||||
--push_to_hub
|
||||
--push_to_hub \
|
||||
```
|
||||
|
||||
## LCM-LoRA
|
||||
|
||||
Instead of fine-tuning the full model, we can also just train a LoRA that can be injected into any SDXL model.
|
||||
|
||||
### Example
|
||||
|
||||
The following uses the [Conceptual Captions 12M (CC12M) dataset](https://github.com/google-research-datasets/conceptual-12m) as an example. For best results you may consider large and high-quality text-image datasets such as [LAION](https://laion.ai/blog/laion-400-open-dataset/).
|
||||
|
||||
### Example with LAION-A6+ dataset
|
||||
|
||||
```bash
|
||||
export MODEL_NAME="runwayml/stable-diffusion-v1-5"
|
||||
export OUTPUT_DIR="path/to/saved/model"
|
||||
|
||||
accelerate launch train_lcm_distill_lora_sd_wds.py \
|
||||
--pretrained_teacher_model=$MODEL_NAME \
|
||||
runwayml/stable-diffusion-v1-5
|
||||
PROGRAM="train_lcm_distill_lora_sd_wds.py \
|
||||
--pretrained_teacher_model=$MODEL_DIR \
|
||||
--output_dir=$OUTPUT_DIR \
|
||||
--mixed_precision=fp16 \
|
||||
--resolution=512 \
|
||||
@@ -98,7 +90,7 @@ accelerate launch train_lcm_distill_lora_sd_wds.py \
|
||||
--max_train_steps=1000 \
|
||||
--max_train_samples=4000000 \
|
||||
--dataloader_num_workers=8 \
|
||||
--train_shards_path_or_url="pipe:curl -L -s https://huggingface.co/datasets/laion/conceptual-captions-12m-webdataset/resolve/main/data/{00000..01099}.tar?download=true" \
|
||||
--train_shards_path_or_url='pipe:aws s3 cp s3://muse-datasets/laion-aesthetic6plus-min512-data/{00000..01210}.tar -' \
|
||||
--validation_steps=200 \
|
||||
--checkpointing_steps=200 --checkpoints_total_limit=10 \
|
||||
--train_batch_size=12 \
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Latent Consistency Distillation Example:
|
||||
|
||||
[Latent Consistency Models (LCMs)](https://arxiv.org/abs/2310.04378) is a method to distill a latent diffusion model to enable swift inference with minimal steps. This example demonstrates how to use latent consistency distillation to distill SDXL for inference with few timesteps.
|
||||
[Latent Consistency Models (LCMs)](https://arxiv.org/abs/2310.04378) is method to distill latent diffusion model to enable swift inference with minimal steps. This example demonstrates how to use the latent consistency distillation to distill SDXL for less timestep inference.
|
||||
|
||||
## Full model distillation
|
||||
|
||||
@@ -24,7 +24,7 @@ Then cd in the example folder and run
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
And initialize an [🤗 Accelerate](https://github.com/huggingface/accelerate/) environment with:
|
||||
And initialize an [🤗Accelerate](https://github.com/huggingface/accelerate/) environment with:
|
||||
|
||||
```bash
|
||||
accelerate config
|
||||
@@ -46,16 +46,12 @@ write_basic_config()
|
||||
When running `accelerate config`, if we specify torch compile mode to True there can be dramatic speedups.
|
||||
|
||||
|
||||
#### Example
|
||||
|
||||
The following uses the [Conceptual Captions 12M (CC12M) dataset](https://github.com/google-research-datasets/conceptual-12m) as an example, and for illustrative purposes only. For best results you may consider large and high-quality text-image datasets such as [LAION](https://laion.ai/blog/laion-400-open-dataset/). You may also need to search the hyperparameter space according to the dataset you use.
|
||||
#### Example with LAION-A6+ dataset
|
||||
|
||||
```bash
|
||||
export MODEL_NAME="stabilityai/stable-diffusion-xl-base-1.0"
|
||||
export OUTPUT_DIR="path/to/saved/model"
|
||||
|
||||
accelerate launch train_lcm_distill_sdxl_wds.py \
|
||||
--pretrained_teacher_model=$MODEL_NAME \
|
||||
export MODEL_DIR="stabilityai/stable-diffusion-xl-base-1.0"
|
||||
PROGRAM="train_lcm_distill_sdxl_wds.py \
|
||||
--pretrained_teacher_model=$MODEL_DIR \
|
||||
--pretrained_vae_model_name_or_path=madebyollin/sdxl-vae-fp16-fix \
|
||||
--output_dir=$OUTPUT_DIR \
|
||||
--mixed_precision=fp16 \
|
||||
@@ -64,7 +60,7 @@ accelerate launch train_lcm_distill_sdxl_wds.py \
|
||||
--max_train_steps=1000 \
|
||||
--max_train_samples=4000000 \
|
||||
--dataloader_num_workers=8 \
|
||||
--train_shards_path_or_url="pipe:curl -L -s https://huggingface.co/datasets/laion/conceptual-captions-12m-webdataset/resolve/main/data/{00000..01099}.tar?download=true" \
|
||||
--train_shards_path_or_url='pipe:aws s3 cp s3://muse-datasets/laion-aesthetic6plus-min512-data/{00000..01210}.tar -' \
|
||||
--validation_steps=200 \
|
||||
--checkpointing_steps=200 --checkpoints_total_limit=10 \
|
||||
--train_batch_size=12 \
|
||||
@@ -81,15 +77,11 @@ accelerate launch train_lcm_distill_sdxl_wds.py \
|
||||
|
||||
Instead of fine-tuning the full model, we can also just train a LoRA that can be injected into any SDXL model.
|
||||
|
||||
### Example
|
||||
|
||||
The following uses the [Conceptual Captions 12M (CC12M) dataset](https://github.com/google-research-datasets/conceptual-12m) as an example. For best results you may consider large and high-quality text-image datasets such as [LAION](https://laion.ai/blog/laion-400-open-dataset/).
|
||||
|
||||
### Example with LAION-A6+ dataset
|
||||
|
||||
```bash
|
||||
export MODEL_NAME="stabilityai/stable-diffusion-xl-base-1.0"
|
||||
export OUTPUT_DIR="path/to/saved/model"
|
||||
|
||||
accelerate launch train_lcm_distill_lora_sdxl_wds.py \
|
||||
export MODEL_DIR="stabilityai/stable-diffusion-xl-base-1.0"
|
||||
PROGRAM="train_lcm_distill_lora_sdxl_wds.py \
|
||||
--pretrained_teacher_model=$MODEL_DIR \
|
||||
--pretrained_vae_model_name_or_path=madebyollin/sdxl-vae-fp16-fix \
|
||||
--output_dir=$OUTPUT_DIR \
|
||||
@@ -100,7 +92,7 @@ accelerate launch train_lcm_distill_lora_sdxl_wds.py \
|
||||
--max_train_steps=1000 \
|
||||
--max_train_samples=4000000 \
|
||||
--dataloader_num_workers=8 \
|
||||
--train_shards_path_or_url="pipe:curl -L -s https://huggingface.co/datasets/laion/conceptual-captions-12m-webdataset/resolve/main/data/{00000..01099}.tar?download=true" \
|
||||
--train_shards_path_or_url='pipe:aws s3 cp s3://muse-datasets/laion-aesthetic6plus-min512-data/{00000..01210}.tar -' \
|
||||
--validation_steps=200 \
|
||||
--checkpointing_steps=200 --checkpoints_total_limit=10 \
|
||||
--train_batch_size=12 \
|
||||
|
||||
@@ -1123,7 +1123,7 @@ def main(args):
|
||||
for epoch in range(first_epoch, args.num_train_epochs):
|
||||
for step, batch in enumerate(train_dataloader):
|
||||
with accelerator.accumulate(unet):
|
||||
image, text = batch
|
||||
image, text, _, _ = batch
|
||||
|
||||
image = image.to(accelerator.device, non_blocking=True)
|
||||
encoded_text = compute_embeddings_fn(text)
|
||||
|
||||
@@ -68,11 +68,6 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
MAX_SEQ_LENGTH = 77
|
||||
|
||||
# Adjust for your dataset
|
||||
WDS_JSON_WIDTH = "width" # original_width for LAION
|
||||
WDS_JSON_HEIGHT = "height" # original_height for LAION
|
||||
MIN_SIZE = 700 # ~960 for LAION, ideal: 1024 if the dataset contains large images
|
||||
|
||||
if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
@@ -151,10 +146,10 @@ class WebdatasetFilter:
|
||||
try:
|
||||
if "json" in x:
|
||||
x_json = json.loads(x["json"])
|
||||
filter_size = (x_json.get(WDS_JSON_WIDTH, 0.0) or 0.0) >= self.min_size and x_json.get(
|
||||
WDS_JSON_HEIGHT, 0
|
||||
filter_size = (x_json.get("original_width", 0.0) or 0.0) >= self.min_size and x_json.get(
|
||||
"original_height", 0
|
||||
) >= self.min_size
|
||||
filter_watermark = (x_json.get("pwatermark", 0.0) or 0.0) <= self.max_pwatermark
|
||||
filter_watermark = (x_json.get("pwatermark", 1.0) or 1.0) <= self.max_pwatermark
|
||||
return filter_size and filter_watermark
|
||||
else:
|
||||
return False
|
||||
@@ -185,7 +180,7 @@ class Text2ImageDataset:
|
||||
if use_fix_crop_and_size:
|
||||
return (resolution, resolution)
|
||||
else:
|
||||
return (int(json.get(WDS_JSON_WIDTH, 0.0)), int(json.get(WDS_JSON_HEIGHT, 0.0)))
|
||||
return (int(json.get("original_width", 0.0)), int(json.get("original_height", 0.0)))
|
||||
|
||||
def transform(example):
|
||||
# resize image
|
||||
@@ -217,7 +212,7 @@ class Text2ImageDataset:
|
||||
pipeline = [
|
||||
wds.ResampledShards(train_shards_path_or_url),
|
||||
tarfile_to_samples_nothrow,
|
||||
wds.select(WebdatasetFilter(min_size=MIN_SIZE)),
|
||||
wds.select(WebdatasetFilter(min_size=960)),
|
||||
wds.shuffle(shuffle_buffer_size),
|
||||
*processing_pipeline,
|
||||
wds.batched(per_gpu_batch_size, partial=False, collation_fn=default_collate),
|
||||
|
||||
@@ -1106,7 +1106,7 @@ def main(args):
|
||||
for epoch in range(first_epoch, args.num_train_epochs):
|
||||
for step, batch in enumerate(train_dataloader):
|
||||
with accelerator.accumulate(unet):
|
||||
image, text = batch
|
||||
image, text, _, _ = batch
|
||||
|
||||
image = image.to(accelerator.device, non_blocking=True)
|
||||
encoded_text = compute_embeddings_fn(text)
|
||||
|
||||
@@ -67,11 +67,6 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
MAX_SEQ_LENGTH = 77
|
||||
|
||||
# Adjust for your dataset
|
||||
WDS_JSON_WIDTH = "width" # original_width for LAION
|
||||
WDS_JSON_HEIGHT = "height" # original_height for LAION
|
||||
MIN_SIZE = 700 # ~960 for LAION, ideal: 1024 if the dataset contains large images
|
||||
|
||||
if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
@@ -133,10 +128,10 @@ class WebdatasetFilter:
|
||||
try:
|
||||
if "json" in x:
|
||||
x_json = json.loads(x["json"])
|
||||
filter_size = (x_json.get(WDS_JSON_WIDTH, 0.0) or 0.0) >= self.min_size and x_json.get(
|
||||
WDS_JSON_HEIGHT, 0
|
||||
filter_size = (x_json.get("original_width", 0.0) or 0.0) >= self.min_size and x_json.get(
|
||||
"original_height", 0
|
||||
) >= self.min_size
|
||||
filter_watermark = (x_json.get("pwatermark", 0.0) or 0.0) <= self.max_pwatermark
|
||||
filter_watermark = (x_json.get("pwatermark", 1.0) or 1.0) <= self.max_pwatermark
|
||||
return filter_size and filter_watermark
|
||||
else:
|
||||
return False
|
||||
@@ -167,7 +162,7 @@ class Text2ImageDataset:
|
||||
if use_fix_crop_and_size:
|
||||
return (resolution, resolution)
|
||||
else:
|
||||
return (int(json.get(WDS_JSON_WIDTH, 0.0)), int(json.get(WDS_JSON_HEIGHT, 0.0)))
|
||||
return (int(json.get("original_width", 0.0)), int(json.get("original_height", 0.0)))
|
||||
|
||||
def transform(example):
|
||||
# resize image
|
||||
@@ -199,7 +194,7 @@ class Text2ImageDataset:
|
||||
pipeline = [
|
||||
wds.ResampledShards(train_shards_path_or_url),
|
||||
tarfile_to_samples_nothrow,
|
||||
wds.select(WebdatasetFilter(min_size=MIN_SIZE)),
|
||||
wds.select(WebdatasetFilter(min_size=960)),
|
||||
wds.shuffle(shuffle_buffer_size),
|
||||
*processing_pipeline,
|
||||
wds.batched(per_gpu_batch_size, partial=False, collation_fn=default_collate),
|
||||
|
||||
@@ -446,9 +446,8 @@ def convert_ldm_unet_checkpoint(
|
||||
new_checkpoint["add_embedding.linear_2.bias"] = unet_state_dict["label_emb.0.2.bias"]
|
||||
|
||||
# Relevant to StableDiffusionUpscalePipeline
|
||||
if "num_class_embeds" in config:
|
||||
if (config["num_class_embeds"] is not None) and ("label_emb.weight" in unet_state_dict):
|
||||
new_checkpoint["class_embedding.weight"] = unet_state_dict["label_emb.weight"]
|
||||
if (config["num_class_embeds"] is not None) and ("label_emb.weight" in unet_state_dict):
|
||||
new_checkpoint["class_embedding.weight"] = unet_state_dict["label_emb.weight"]
|
||||
|
||||
new_checkpoint["conv_in.weight"] = unet_state_dict["input_blocks.0.0.weight"]
|
||||
new_checkpoint["conv_in.bias"] = unet_state_dict["input_blocks.0.0.bias"]
|
||||
|
||||
Reference in New Issue
Block a user