Compare commits

...

8 Commits

Author SHA1 Message Date
Dhruv Nair
e8d40b3d5d update 2024-02-17 09:59:51 +00:00
Dhruv Nair
d699d686c0 update 2024-02-13 05:59:30 +00:00
Alex Umnov
e7696e20f9 Updated lora inference instructions (#6913)
* Updated lora inference instructions

* Update examples/dreambooth/README.md

Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>

* Update README.md

* Update README.md

---------

Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
2024-02-13 09:35:20 +05:30
Piyush Thakur
4b89aeffe1 [Type annotations] fixed in save_model_card (#6948)
fixed type annotations

Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
2024-02-13 08:56:45 +05:30
Steven Liu
0a1daadef8 [docs] Community pipelines (#6929)
fix
2024-02-12 10:38:13 -08:00
Sayak Paul
371f765908 [Diffusers -> Original SD conversion] fix things (#6933)
* fix: bias loading bug

* fixes for SDXL

* apply changes to the conversion script to match single_file_utils.py

* do transpose to match the single file loading logic.
2024-02-12 17:30:22 +05:30
Piyush Thakur
75aee39eac [Model Card] standardize T2I Adapter Sdxl model card (#6947)
standardize model card template t21-adapter-sdxl
2024-02-12 16:43:20 +05:30
Dhruv Nair
215e6804d3 Unpin torch versions in CI (#6945)
* update

* update

* update

---------

Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
2024-02-12 16:01:05 +05:30
17 changed files with 137 additions and 71 deletions

View File

@@ -26,9 +26,9 @@ ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN python3.9 -m pip install --no-cache-dir --upgrade pip && \
python3.9 -m pip install --no-cache-dir \
torch==2.1.2 \
torchvision==0.16.2 \
torchaudio==2.1.2 \
torch \
torchvision \
torchaudio \
invisible_watermark && \
python3.9 -m pip install --no-cache-dir \
accelerate \

View File

@@ -25,9 +25,9 @@ ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
python3 -m pip install --no-cache-dir \
torch==2.1.2 \
torchvision==0.16.2 \
torchaudio==2.1.2 \
torch \
torchvision \
torchaudio \
invisible_watermark \
--extra-index-url https://download.pytorch.org/whl/cpu && \
python3 -m pip install --no-cache-dir \

View File

@@ -25,9 +25,9 @@ ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
python3 -m pip install --no-cache-dir \
torch==2.1.2 \
torchvision==0.16.2 \
torchaudio==2.1.2 \
torch \
torchvision \
torchaudio \
invisible_watermark && \
python3 -m pip install --no-cache-dir \
accelerate \

View File

@@ -25,9 +25,9 @@ ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
python3 -m pip install --no-cache-dir \
torch==2.1.2 \
torchvision==0.16.2 \
torchaudio==2.1.2 \
torch \
torchvision \
torchaudio \
invisible_watermark && \
python3 -m pip install --no-cache-dir \
accelerate \

View File

@@ -56,6 +56,60 @@ pipeline = DiffusionPipeline.from_pretrained(
)
```
### Load from a local file
Community pipelines can also be loaded from a local file if you pass a file path instead. The path to the passed directory must contain a `pipeline.py` file that contains the pipeline class in order to successfully load it.
```py
pipeline = DiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
custom_pipeline="./path/to/pipeline_directory/",
clip_model=clip_model,
feature_extractor=feature_extractor,
use_safetensors=True,
)
```
### Load from a specific version
By default, community pipelines are loaded from the latest stable version of Diffusers. To load a community pipeline from another version, use the `custom_revision` parameter.
<hfoptions id="version">
<hfoption id="main">
For example, to load from the `main` branch:
```py
pipeline = DiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
custom_pipeline="clip_guided_stable_diffusion",
custom_revision="main",
clip_model=clip_model,
feature_extractor=feature_extractor,
use_safetensors=True,
)
```
</hfoption>
<hfoption id="older version">
For example, to load from a previous version of Diffusers like `v0.25.0`:
```py
pipeline = DiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
custom_pipeline="clip_guided_stable_diffusion",
custom_revision="v0.25.0",
clip_model=clip_model,
feature_extractor=feature_extractor,
use_safetensors=True,
)
```
</hfoption>
</hfoptions>
For more information about community pipelines, take a look at the [Community pipelines](custom_pipeline_examples) guide for how to use them and if you're interested in adding a community pipeline check out the [How to contribute a community pipeline](contribute_pipeline) guide!
## Community components

View File

@@ -376,18 +376,14 @@ After training, LoRA weights can be loaded very easily into the original pipelin
load the original pipeline:
```python
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
import torch
pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.to("cuda")
from diffusers import DiffusionPipeline
pipe = DiffusionPipeline.from_pretrained("base-model-name").to("cuda")
```
Next, we can load the adapter layers into the UNet with the [`load_attn_procs` function](https://huggingface.co/docs/diffusers/api/loaders#diffusers.loaders.UNet2DConditionLoadersMixin.load_attn_procs).
Next, we can load the adapter layers into the pipeline with the [`load_lora_weights` function](https://huggingface.co/docs/diffusers/main/en/using-diffusers/loading_adapters#lora).
```python
pipe.unet.load_attn_procs("patrickvonplaten/lora_dreambooth_dog_example")
pipe.load_lora_weights("path-to-the-lora-checkpoint")
```
Finally, we can run the model in inference.

View File

@@ -49,6 +49,7 @@ from diffusers import (
)
from diffusers.optimization import get_scheduler
from diffusers.utils import check_min_version, is_wandb_available
from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.torch_utils import is_compiled_module
@@ -195,7 +196,7 @@ def import_model_class_from_model_name_or_path(
raise ValueError(f"{model_class} is not supported.")
def save_model_card(repo_id: str, image_logs=None, base_model=str, repo_folder=None):
def save_model_card(repo_id: str, image_logs: dict = None, base_model: str = None, repo_folder: str = None):
img_str = ""
if image_logs is not None:
img_str = "You can find some example images below.\n"
@@ -209,27 +210,25 @@ def save_model_card(repo_id: str, image_logs=None, base_model=str, repo_folder=N
image_grid(images, 1, len(images)).save(os.path.join(repo_folder, f"images_{i}.png"))
img_str += f"![images_{i})](./images_{i}.png)\n"
yaml = f"""
---
license: creativeml-openrail-m
base_model: {base_model}
tags:
- stable-diffusion-xl
- stable-diffusion-xl-diffusers
- text-to-image
- diffusers
- t2iadapter
inference: true
---
"""
model_card = f"""
model_description = f"""
# t2iadapter-{repo_id}
These are t2iadapter weights trained on {base_model} with new type of conditioning.
{img_str}
"""
with open(os.path.join(repo_folder, "README.md"), "w") as f:
f.write(yaml + model_card)
model_card = load_or_create_model_card(
repo_id_or_path=repo_id,
from_training=True,
license="creativeml-openrail-m",
base_model=base_model,
model_description=model_description,
inference=True,
)
tags = ["stable-diffusion-xl", "stable-diffusion-xl-diffusers", "text-to-image", "diffusers", "t2iadapter"]
model_card = populate_model_card(model_card, tags=tags)
model_card.save(os.path.join(repo_folder, "README.md"))
def parse_args(input_args=None):

View File

@@ -67,8 +67,8 @@ DATASET_NAME_MAPPING = {
def save_model_card(
args,
repo_id: str,
images=None,
repo_folder=None,
images: list = None,
repo_folder: str = None,
):
img_str = ""
if len(images) > 0:

View File

@@ -56,7 +56,9 @@ check_min_version("0.27.0.dev0")
logger = get_logger(__name__, log_level="INFO")
def save_model_card(repo_id: str, images=None, base_model=str, dataset_name=str, repo_folder=None):
def save_model_card(
repo_id: str, images: list = None, base_model: str = None, dataset_name: str = None, repo_folder: str = None
):
img_str = ""
for i, image in enumerate(images):
image.save(os.path.join(repo_folder, f"image_{i}.png"))

View File

@@ -66,12 +66,12 @@ DATASET_NAME_MAPPING = {
def save_model_card(
repo_id: str,
images=None,
validation_prompt=None,
base_model=str,
dataset_name=str,
repo_folder=None,
vae_path=None,
images: list = None,
validation_prompt: str = None,
base_model: str = None,
dataset_name: str = None,
repo_folder: str = None,
vae_path: str = None,
):
img_str = ""
for i, image in enumerate(images):

View File

@@ -167,7 +167,10 @@ vae_conversion_map_attn = [
def reshape_weight_for_sd(w):
# convert HF linear weights to SD conv2d weights
return w.reshape(*w.shape, 1, 1)
if not w.ndim == 1:
return w.reshape(*w.shape, 1, 1)
else:
return w
def convert_vae_state_dict(vae_state_dict):
@@ -321,11 +324,18 @@ if __name__ == "__main__":
vae_state_dict = convert_vae_state_dict(vae_state_dict)
vae_state_dict = {"first_stage_model." + k: v for k, v in vae_state_dict.items()}
# Convert text encoder 1
text_enc_dict = convert_openai_text_enc_state_dict(text_enc_dict)
text_enc_dict = {"conditioner.embedders.0.transformer." + k: v for k, v in text_enc_dict.items()}
# Convert text encoder 2
text_enc_2_dict = convert_openclip_text_enc_state_dict(text_enc_2_dict)
text_enc_2_dict = {"conditioner.embedders.1.model." + k: v for k, v in text_enc_2_dict.items()}
# We call the `.T.contiguous()` to match what's done in
# https://github.com/huggingface/diffusers/blob/84905ca7287876b925b6bf8e9bb92fec21c78764/src/diffusers/loaders/single_file_utils.py#L1085
text_enc_2_dict["conditioner.embedders.1.model.text_projection"] = text_enc_2_dict.pop(
"conditioner.embedders.1.model.text_projection.weight"
).T.contiguous()
# Put together new checkpoint
state_dict = {**unet_state_dict, **vae_state_dict, **text_enc_dict, **text_enc_2_dict}

View File

@@ -170,7 +170,10 @@ vae_extra_conversion_map = [
def reshape_weight_for_sd(w):
# convert HF linear weights to SD conv2d weights
return w.reshape(*w.shape, 1, 1)
if not w.ndim == 1:
return w.reshape(*w.shape, 1, 1)
else:
return w
def convert_vae_state_dict(vae_state_dict):

View File

@@ -126,8 +126,8 @@ _deps = [
"regex!=2019.12.17",
"requests",
"tensorboard",
"torch>=1.4,<2.2.0",
"torchvision<0.17",
"torch>=1.4",
"torchvision",
"transformers>=4.25.1",
"urllib3<=2.0.0",
]

View File

@@ -38,8 +38,8 @@ deps = {
"regex": "regex!=2019.12.17",
"requests": "requests",
"tensorboard": "tensorboard",
"torch": "torch>=1.4,<2.2.0",
"torchvision": "torchvision<0.17",
"torch": "torch>=1.4",
"torchvision": "torchvision",
"transformers": "transformers>=4.25.1",
"urllib3": "urllib3<=2.0.0",
}

View File

@@ -1112,7 +1112,6 @@ def create_text_encoder_from_open_clip_checkpoint(
text_model_dict[diffusers_key + ".q_proj.bias"] = weight_value[:text_proj_dim]
text_model_dict[diffusers_key + ".k_proj.bias"] = weight_value[text_proj_dim : text_proj_dim * 2]
text_model_dict[diffusers_key + ".v_proj.bias"] = weight_value[text_proj_dim * 2 :]
else:
text_model_dict[diffusers_key] = checkpoint[key]

View File

@@ -981,10 +981,9 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
revision (`str`, *optional*, defaults to `"main"`):
The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
allowed by Git.
custom_revision (`str`, *optional*, defaults to `"main"`):
custom_revision (`str`, *optional*):
The specific model version to use. It can be a branch name, a tag name, or a commit id similar to
`revision` when loading a custom pipeline from the Hub. It can be a 🤗 Diffusers version when loading a
custom pipeline from GitHub, otherwise it defaults to `"main"` when loading from the Hub.
`revision` when loading a custom pipeline from the Hub. Defaults to the latest stable 🤗 Diffusers version.
mirror (`str`, *optional*):
Mirror source to resolve accessibility issues if youre downloading a model in China. We do not
guarantee the timeliness or safety of the source, and you should refer to the mirror site for more

View File

@@ -27,7 +27,13 @@ from diffusers import (
PixArtAlphaPipeline,
Transformer2DModel,
)
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow, torch_device
from diffusers.utils.testing_utils import (
enable_full_determinism,
numpy_cosine_similarity_distance,
require_torch_gpu,
slow,
torch_device,
)
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import PipelineTesterMixin, to_np
@@ -332,37 +338,35 @@ class PixArtAlphaPipelineIntegrationTests(unittest.TestCase):
torch.cuda.empty_cache()
def test_pixart_1024(self):
generator = torch.manual_seed(0)
generator = torch.Generator("cpu").manual_seed(0)
pipe = PixArtAlphaPipeline.from_pretrained(self.ckpt_id_1024, torch_dtype=torch.float16)
pipe.enable_model_cpu_offload()
prompt = self.prompt
image = pipe(prompt, generator=generator, output_type="np").images
image = pipe(prompt, generator=generator, num_inference_steps=2, output_type="np").images
image_slice = image[0, -3:, -3:, -1]
expected_slice = np.array([0.0742, 0.0835, 0.2114, 0.0295, 0.0784, 0.2361, 0.1738, 0.2251, 0.3589])
expected_slice = np.array([0.1941, 0.2117, 0.2188, 0.1946, 0.218, 0.2124, 0.199, 0.2437, 0.2583])
max_diff = np.abs(image_slice.flatten() - expected_slice).max()
self.assertLessEqual(max_diff, 1e-3)
max_diff = numpy_cosine_similarity_distance(image_slice.flatten(), expected_slice)
self.assertLessEqual(max_diff, 1e-4)
def test_pixart_512(self):
generator = torch.manual_seed(0)
generator = torch.Generator("cpu").manual_seed(0)
pipe = PixArtAlphaPipeline.from_pretrained(self.ckpt_id_512, torch_dtype=torch.float16)
pipe.enable_model_cpu_offload()
prompt = self.prompt
image = pipe(prompt, generator=generator, output_type="np").images
image = pipe(prompt, generator=generator, num_inference_steps=2, output_type="np").images
image_slice = image[0, -3:, -3:, -1]
expected_slice = np.array([0.3477, 0.3882, 0.4541, 0.3413, 0.3821, 0.4463, 0.4001, 0.4409, 0.4958])
expected_slice = np.array([0.2637, 0.291, 0.2939, 0.207, 0.2512, 0.2783, 0.2168, 0.2324, 0.2817])
max_diff = np.abs(image_slice.flatten() - expected_slice).max()
self.assertLessEqual(max_diff, 1e-3)
max_diff = numpy_cosine_similarity_distance(image_slice.flatten(), expected_slice)
self.assertLessEqual(max_diff, 1e-4)
def test_pixart_1024_without_resolution_binning(self):
generator = torch.manual_seed(0)
@@ -372,7 +376,7 @@ class PixArtAlphaPipelineIntegrationTests(unittest.TestCase):
prompt = self.prompt
height, width = 1024, 768
num_inference_steps = 10
num_inference_steps = 2
image = pipe(
prompt,
@@ -406,7 +410,7 @@ class PixArtAlphaPipelineIntegrationTests(unittest.TestCase):
prompt = self.prompt
height, width = 512, 768
num_inference_steps = 10
num_inference_steps = 2
image = pipe(
prompt,