mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-07 13:04:15 +08:00
Compare commits
11 Commits
use-hf-tok
...
single-fil
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
60a5e05683 | ||
|
|
626c68371f | ||
|
|
edf5ba6a17 | ||
|
|
9941f1f61b | ||
|
|
46a9db0336 | ||
|
|
370146e4e0 | ||
|
|
5cd45c24bf | ||
|
|
67b3fe0aae | ||
|
|
baab065679 | ||
|
|
509741aea7 | ||
|
|
e1df77ee1e |
9
.github/workflows/build_docker_images.yml
vendored
9
.github/workflows/build_docker_images.yml
vendored
@@ -25,17 +25,17 @@ jobs:
|
||||
steps:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
|
||||
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
|
||||
- name: Find Changed Dockerfiles
|
||||
id: file_changes
|
||||
uses: jitterbit/get-changed-files@v1
|
||||
with:
|
||||
format: 'space-delimited'
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
|
||||
- name: Build Changed Docker Images
|
||||
run: |
|
||||
CHANGED_FILES="${{ steps.file_changes.outputs.all }}"
|
||||
@@ -52,7 +52,7 @@ jobs:
|
||||
build-and-push-docker-images:
|
||||
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
|
||||
if: github.event_name != 'pull_request'
|
||||
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
@@ -69,6 +69,7 @@ jobs:
|
||||
- diffusers-flax-tpu
|
||||
- diffusers-onnxruntime-cpu
|
||||
- diffusers-onnxruntime-cuda
|
||||
- diffusers-doc-builder
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
|
||||
2
.github/workflows/build_documentation.yml
vendored
2
.github/workflows/build_documentation.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
package: diffusers
|
||||
notebook_folder: diffusers_doc
|
||||
languages: en ko zh ja pt
|
||||
|
||||
custom_container: diffusers/diffusers-doc-builder
|
||||
secrets:
|
||||
token: ${{ secrets.HUGGINGFACE_PUSH }}
|
||||
hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
|
||||
|
||||
1
.github/workflows/build_pr_documentation.yml
vendored
1
.github/workflows/build_pr_documentation.yml
vendored
@@ -20,3 +20,4 @@ jobs:
|
||||
install_libgl1: true
|
||||
package: diffusers
|
||||
languages: en ko zh ja pt
|
||||
custom_container: diffusers/diffusers-doc-builder
|
||||
|
||||
50
docker/diffusers-doc-builder/Dockerfile
Normal file
50
docker/diffusers-doc-builder/Dockerfile
Normal file
@@ -0,0 +1,50 @@
|
||||
FROM ubuntu:20.04
|
||||
LABEL maintainer="Hugging Face"
|
||||
LABEL repository="diffusers"
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get -y update \
|
||||
&& apt-get install -y software-properties-common \
|
||||
&& add-apt-repository ppa:deadsnakes/ppa
|
||||
|
||||
RUN apt install -y bash \
|
||||
build-essential \
|
||||
git \
|
||||
git-lfs \
|
||||
curl \
|
||||
ca-certificates \
|
||||
libsndfile1-dev \
|
||||
python3.10 \
|
||||
python3-pip \
|
||||
libgl1 \
|
||||
python3.10-venv && \
|
||||
rm -rf /var/lib/apt/lists
|
||||
|
||||
# make sure to use venv
|
||||
RUN python3.10 -m venv /opt/venv
|
||||
ENV PATH="/opt/venv/bin:$PATH"
|
||||
|
||||
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
|
||||
RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
||||
python3.10 -m uv pip install --no-cache-dir \
|
||||
torch \
|
||||
torchvision \
|
||||
torchaudio \
|
||||
invisible_watermark \
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu && \
|
||||
python3.10 -m uv pip install --no-cache-dir \
|
||||
accelerate \
|
||||
datasets \
|
||||
hf-doc-builder \
|
||||
huggingface-hub \
|
||||
Jinja2 \
|
||||
librosa \
|
||||
numpy \
|
||||
scipy \
|
||||
tensorboard \
|
||||
transformers \
|
||||
matplotlib \
|
||||
setuptools==69.5.1
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
@@ -12,9 +12,9 @@ specific language governing permissions and limitations under the License.
|
||||
|
||||
# Loading Pipelines and Models via `from_single_file`
|
||||
|
||||
The `from_single_file` method allows you to load supported pipelines using a single checkpoint file as opposed to the folder format used by Diffusers. This is useful if you are working with many of the Stable Diffusion Web UI's (such as A1111) that extensively rely on a single file to distribute all the components of a diffusion model.
|
||||
The `from_single_file` method allows you to load supported pipelines using a single checkpoint file as opposed to Diffusers' multiple folders format. This is useful if you are working with Stable Diffusion Web UI's (such as A1111) that rely on a single file format to distribute all the components of a model.
|
||||
|
||||
The `from_single_file` method also supports loading models in their originally distributed format. This means that supported models that have been finetuned with other services can be loaded directly into supported Diffusers model objects and pipelines.
|
||||
The `from_single_file` method also supports loading models in their originally distributed format. This means that supported models that have been finetuned with other services can be loaded directly into Diffusers model objects and pipelines.
|
||||
|
||||
## Pipelines that currently support `from_single_file` loading
|
||||
|
||||
@@ -59,7 +59,7 @@ pipe = StableDiffusionXLPipeline.from_single_file(ckpt_path)
|
||||
|
||||
## Setting components in a Pipeline using `from_single_file`
|
||||
|
||||
Swap components of the pipeline by passing them directly to the `from_single_file` method. e.g If you would like use a different scheduler than the pipeline default.
|
||||
Set components of a pipeline by passing them directly to the `from_single_file` method. For example, here we are swapping out the pipeline's default scheduler with the `DDIMScheduler`.
|
||||
|
||||
```python
|
||||
from diffusers import StableDiffusionXLPipeline, DDIMScheduler
|
||||
@@ -71,13 +71,15 @@ pipe = StableDiffusionXLPipeline.from_single_file(ckpt_path, scheduler=scheduler
|
||||
|
||||
```
|
||||
|
||||
Here we are passing in a ControlNet model to the `StableDiffusionControlNetPipeline`.
|
||||
|
||||
```python
|
||||
from diffusers import StableDiffusionPipeline, ControlNetModel
|
||||
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
|
||||
|
||||
ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
|
||||
|
||||
controlnet = ControlNetModel.from_pretrained("https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors")
|
||||
pipe = StableDiffusionPipeline.from_single_file(ckpt_path, controlnet=controlnet)
|
||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny")
|
||||
pipe = StableDiffusionControlNetPipeline.from_single_file(ckpt_path, controlnet=controlnet)
|
||||
|
||||
```
|
||||
|
||||
@@ -93,7 +95,7 @@ model = StableCascadeUNet.from_single_file(ckpt_path)
|
||||
|
||||
## Using a Diffusers model repository to configure single file loading
|
||||
|
||||
Under the hood, `from_single_file` will try to determine a model repository to use to configure the components of the pipeline. You can also pass in a repository id to the `config` argument of the `from_single_file` method to explicitly set the repository to use.
|
||||
Under the hood, `from_single_file` will try to automatically determine a model repository to use to configure the components of a pipeline. You can also explicitly set the model repository to configure the pipeline with the `config` argument.
|
||||
|
||||
```python
|
||||
from diffusers import StableDiffusionXLPipeline
|
||||
@@ -105,9 +107,19 @@ pipe = StableDiffusionXLPipeline.from_single_file(ckpt_path, config=repo_id)
|
||||
|
||||
```
|
||||
|
||||
In the example above, since we explicitly passed `repo_id="segmind/SSD-1B"` to the `config` argument, it will use this [configuration file](https://huggingface.co/segmind/SSD-1B/blob/main/unet/config.json) from the `unet` subfolder in `"segmind/SSD-1B"` to configure the `unet` component of the pipeline; Similarly, it will use the `config.json` file from `vae` subfolder to configure the `vae` model, `config.json` file from `text_encoder` folder to configure `text_encoder` and so on.
|
||||
|
||||
<Tip>
|
||||
|
||||
Most of the time you do not need to explicitly set a `config` argument. `from_single_file` will automatically map the checkpoint to the appropriate model repository. However, this option can be useful in cases where model components in the checkpoint might have been changed from what was originally distributed, or in cases where a checkpoint file might not have the necessary metadata to correctly determine the configuration to use for the pipeline.
|
||||
|
||||
</Tip>
|
||||
|
||||
## Override configuration options when using single file loading
|
||||
|
||||
Override the default model or pipeline configuration options when using `from_single_file` by passing in the relevant arguments directly to the `from_single_file` method. Any argument that is supported by the model or pipeline class can be configured in this way:
|
||||
Override the default model or pipeline configuration options by providing the relevant arguments directly to the `from_single_file` method. Any argument supported by the model or pipeline class can be configured in this way:
|
||||
|
||||
### Setting a pipeline configuration option
|
||||
|
||||
```python
|
||||
from diffusers import StableDiffusionXLInstructPix2PixPipeline
|
||||
@@ -117,6 +129,8 @@ pipe = StableDiffusionXLInstructPix2PixPipeline.from_single_file(ckpt_path, conf
|
||||
|
||||
```
|
||||
|
||||
### Setting a model configuration option
|
||||
|
||||
```python
|
||||
from diffusers import UNet2DConditionModel
|
||||
|
||||
@@ -125,10 +139,6 @@ model = UNet2DConditionModel.from_single_file(ckpt_path, upcast_attention=True)
|
||||
|
||||
```
|
||||
|
||||
In the example above, since we explicitly passed `repo_id="segmind/SSD-1B"`, it will use this [configuration file](https://huggingface.co/segmind/SSD-1B/blob/main/unet/config.json) from the "unet" subfolder in `"segmind/SSD-1B"` to configure the unet component included in the checkpoint; Similarly, it will use the `config.json` file from `"vae"` subfolder to configure the vae model, `config.json` file from text_encoder folder to configure text_encoder and so on.
|
||||
|
||||
Note that most of the time you do not need to explicitly a `config` argument, `from_single_file` will automatically map the checkpoint to a repo id (we will discuss this in more details in next section). However, this can be useful in cases where model components might have been changed from what was originally distributed or in cases where a checkpoint file might not have the necessary metadata to correctly determine the configuration to use for the pipeline.
|
||||
|
||||
<Tip>
|
||||
|
||||
To learn more about how to load single file weights, see the [Load different Stable Diffusion formats](../../using-diffusers/other-formats) loading guide.
|
||||
@@ -137,9 +147,11 @@ To learn more about how to load single file weights, see the [Load different Sta
|
||||
|
||||
## Working with local files
|
||||
|
||||
As of `diffusers>=0.28.0` the `from_single_file` method will attempt to configure a pipeline or model by first inferring the model type from the checkpoint file and then using the model type to determine the appropriate model repo configuration to use from the Hugging Face Hub. For example, any single file checkpoint based on the Stable Diffusion XL base model will use the [`stabilityai/stable-diffusion-xl-base-1.0`](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0) model repo to configure the pipeline.
|
||||
As of `diffusers>=0.28.0` the `from_single_file` method will attempt to configure a pipeline or model by first inferring the model type from the keys in the checkpoint file. This inferred model type is then used to determine the appropriate model repository on the Hugging Face Hub to configure the model or pipeline.
|
||||
|
||||
If you are working in an environment with restricted internet access, it is recommended to download the config files and checkpoints for the model to your preferred directory and pass the local paths to the `pretrained_model_link_or_path` and `config` arguments of the `from_single_file` method.
|
||||
For example, any single file checkpoint based on the Stable Diffusion XL base model will use the [`stabilityai/stable-diffusion-xl-base-1.0`](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0) model repository to configure the pipeline.
|
||||
|
||||
If you are working in an environment with restricted internet access, it is recommended that you download the config files and checkpoints for the model to your preferred directory and pass the local paths to the `pretrained_model_link_or_path` and `config` arguments of the `from_single_file` method.
|
||||
|
||||
```python
|
||||
from huggingface_hub import hf_hub_download, snapshot_download
|
||||
@@ -211,13 +223,14 @@ pipe = StableDiffusionXLPipeline.from_single_file(my_local_checkpoint_path, conf
|
||||
```
|
||||
|
||||
<Tip>
|
||||
Disabling symlinking means that the `huggingface_hub` caching mechanism has no way to determine whether a file has already been downloaded to the local directory. This means that the `hf_hub_download` and `snapshot_download` functions will download files to the local directory each time they are executed. If you are disabling symlinking, it is recommended that you separate the model download and loading steps to avoid downloading the same file multiple times.
|
||||
|
||||
As of `huggingface_hub>=0.23.0` the `local_dir_use_symlinks` argument isn't necessary for the `hf_hub_download` and `snapshot_download` functions.
|
||||
|
||||
</Tip>
|
||||
|
||||
## Using the original configuration file of a model
|
||||
|
||||
If you would like to configure the parameters of the model components in the pipeline using the orignal YAML configuration file, you can pass a local path or url to the original configuration file to the `original_config` argument of the `from_single_file` method.
|
||||
If you would like to configure the model components in a pipeline using the orignal YAML configuration file, you can pass a local path or url to the original configuration file via the `original_config` argument.
|
||||
|
||||
```python
|
||||
from diffusers import StableDiffusionXLPipeline
|
||||
@@ -229,13 +242,12 @@ original_config = "https://raw.githubusercontent.com/Stability-AI/generative-mod
|
||||
pipe = StableDiffusionXLPipeline.from_single_file(ckpt_path, original_config=original_config)
|
||||
```
|
||||
|
||||
In the example above, the `original_config` file is only used to configure the parameters of the individual model components of the pipeline. For example it will be used to configure parameters such as the `in_channels` of the `vae` model and `unet` model. It is not used to determine the type of component objects in the pipeline.
|
||||
|
||||
|
||||
<Tip>
|
||||
When using `original_config` with local_files_only=True`, Diffusers will attempt to infer the components based on the type signatures of pipeline class, rather than attempting to fetch the pipeline config from the Hugging Face Hub. This is to prevent backwards breaking changes in existing code that might not be able to connect to the internet to fetch the necessary pipeline config files.
|
||||
|
||||
This is not as reliable as providing a path to a local config repo and might lead to errors when configuring the pipeline. To avoid this, please run the pipeline with `local_files_only=False` once to download the appropriate pipeline config files to the local cache.
|
||||
When using `original_config` with `local_files_only=True`, Diffusers will attempt to infer the components of the pipeline based on the type signatures of pipeline class, rather than attempting to fetch the configuration files from a model repository on the Hugging Face Hub. This is to prevent backward breaking changes in existing code that might not be able to connect to the internet to fetch the necessary configuration files.
|
||||
|
||||
This is not as reliable as providing a path to a local model repository using the `config` argument and might lead to errors when configuring the pipeline. To avoid this, please run the pipeline with `local_files_only=False` once to download the appropriate pipeline configuration files to the local cache.
|
||||
|
||||
</Tip>
|
||||
|
||||
|
||||
|
||||
@@ -69,6 +69,7 @@ Please also check out our [Community Scripts](https://github.com/huggingface/dif
|
||||
| UFOGen Scheduler | Scheduler for UFOGen Model (compatible with Stable Diffusion pipelines) | [UFOGen Scheduler](#ufogen-scheduler) | - | [dg845](https://github.com/dg845) |
|
||||
| Stable Diffusion XL IPEX Pipeline | Accelerate Stable Diffusion XL inference pipeline with BF16/FP32 precision on Intel Xeon CPUs with [IPEX](https://github.com/intel/intel-extension-for-pytorch) | [Stable Diffusion XL on IPEX](#stable-diffusion-xl-on-ipex) | - | [Dan Li](https://github.com/ustcuna/) |
|
||||
| Stable Diffusion BoxDiff Pipeline | Training-free controlled generation with bounding boxes using [BoxDiff](https://github.com/showlab/BoxDiff) | [Stable Diffusion BoxDiff Pipeline](#stable-diffusion-boxdiff) | - | [Jingyang Zhang](https://github.com/zjysteven/) |
|
||||
| FRESCO V2V Pipeline | Implementation of [[CVPR 2024] FRESCO: Spatial-Temporal Correspondence for Zero-Shot Video Translation](https://arxiv.org/abs/2403.12962) | [FRESCO V2V Pipeline](#fresco) | - | [Yifan Zhou](https://github.com/SingleZombie) |
|
||||
|
||||
To load a custom pipeline you just need to pass the `custom_pipeline` argument to `DiffusionPipeline`, as one of the files in `diffusers/examples/community`. Feel free to send a PR with your own pipelines, we will merge them quickly.
|
||||
|
||||
@@ -4035,6 +4036,93 @@ onestep_image = pipe(prompt, num_inference_steps=1).images[0]
|
||||
multistep_image = pipe(prompt, num_inference_steps=4).images[0]
|
||||
```
|
||||
|
||||
### FRESCO
|
||||
|
||||
This is the Diffusers implementation of zero-shot video-to-video translation pipeline [FRESCO](https://github.com/williamyang1991/FRESCO) (without Ebsynth postprocessing and background smooth). To run the code, please install gmflow. Then modify the path in `gmflow_dir`. After that, you can run the pipeline with:
|
||||
|
||||
```py
|
||||
from PIL import Image
|
||||
import cv2
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from diffusers import ControlNetModel,DDIMScheduler, DiffusionPipeline
|
||||
import sys
|
||||
gmflow_dir = "/path/to/gmflow"
|
||||
sys.path.insert(0, gmflow_dir)
|
||||
|
||||
def video_to_frame(video_path: str, interval: int):
|
||||
vidcap = cv2.VideoCapture(video_path)
|
||||
success = True
|
||||
|
||||
count = 0
|
||||
res = []
|
||||
while success:
|
||||
count += 1
|
||||
success, image = vidcap.read()
|
||||
if count % interval != 1:
|
||||
continue
|
||||
if image is not None:
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
res.append(image)
|
||||
if len(res) >= 8:
|
||||
break
|
||||
|
||||
vidcap.release()
|
||||
return res
|
||||
|
||||
|
||||
input_video_path = 'https://github.com/williamyang1991/FRESCO/raw/main/data/car-turn.mp4'
|
||||
output_video_path = 'car.gif'
|
||||
|
||||
# You can use any fintuned SD here
|
||||
model_path = 'SG161222/Realistic_Vision_V2.0'
|
||||
|
||||
prompt = 'a red car turns in the winter'
|
||||
a_prompt = ', RAW photo, subject, (high detailed skin:1.2), 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3, '
|
||||
n_prompt = '(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation'
|
||||
|
||||
input_interval = 5
|
||||
frames = video_to_frame(
|
||||
input_video_path, input_interval)
|
||||
|
||||
control_frames = []
|
||||
# get canny image
|
||||
for frame in frames:
|
||||
image = cv2.Canny(frame, 50, 100)
|
||||
np_image = np.array(image)
|
||||
np_image = np_image[:, :, None]
|
||||
np_image = np.concatenate([np_image, np_image, np_image], axis=2)
|
||||
canny_image = Image.fromarray(np_image)
|
||||
control_frames.append(canny_image)
|
||||
|
||||
# You can use any ControlNet here
|
||||
controlnet = ControlNetModel.from_pretrained(
|
||||
"lllyasviel/sd-controlnet-canny").to('cuda')
|
||||
|
||||
pipe = DiffusionPipeline.from_pretrained(
|
||||
model_path, controlnet=controlnet, custom_pipeline='fresco_v2v').to('cuda')
|
||||
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
|
||||
|
||||
generator = torch.manual_seed(0)
|
||||
frames = [Image.fromarray(frame) for frame in frames]
|
||||
|
||||
output_frames = pipe(
|
||||
prompt + a_prompt,
|
||||
frames,
|
||||
control_frames,
|
||||
num_inference_steps=20,
|
||||
strength=0.75,
|
||||
controlnet_conditioning_scale=0.7,
|
||||
generator=generator,
|
||||
negative_prompt=n_prompt
|
||||
).images
|
||||
|
||||
output_frames[0].save(output_video_path, save_all=True,
|
||||
append_images=output_frames[1:], duration=100, loop=0)
|
||||
|
||||
```
|
||||
|
||||
# Perturbed-Attention Guidance
|
||||
|
||||
[Project](https://ku-cvlab.github.io/Perturbed-Attention-Guidance/) / [arXiv](https://arxiv.org/abs/2403.17377) / [GitHub](https://github.com/KU-CVLAB/Perturbed-Attention-Guidance)
|
||||
|
||||
2511
examples/community/fresco_v2v.py
Normal file
2511
examples/community/fresco_v2v.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -31,6 +31,7 @@ from ..utils import (
|
||||
is_transformers_available,
|
||||
is_xformers_available,
|
||||
)
|
||||
from ..utils.testing_utils import get_python_version
|
||||
from . import BaseDiffusersCLICommand
|
||||
|
||||
|
||||
@@ -105,6 +106,11 @@ class EnvironmentCommand(BaseDiffusersCLICommand):
|
||||
|
||||
xformers_version = xformers.__version__
|
||||
|
||||
if get_python_version() >= (3, 10):
|
||||
platform_info = f"{platform.freedesktop_os_release().get('PRETTY_NAME', None)} - {platform.platform()}"
|
||||
else:
|
||||
platform_info = platform.platform()
|
||||
|
||||
is_notebook_str = "Yes" if is_notebook() else "No"
|
||||
|
||||
is_google_colab_str = "Yes" if is_google_colab() else "No"
|
||||
@@ -152,7 +158,7 @@ class EnvironmentCommand(BaseDiffusersCLICommand):
|
||||
|
||||
info = {
|
||||
"🤗 Diffusers version": version,
|
||||
"Platform": f"{platform.freedesktop_os_release().get('PRETTY_NAME', None)} - {platform.platform()}",
|
||||
"Platform": platform_info,
|
||||
"Running on a notebook?": is_notebook_str,
|
||||
"Running on Google Colab?": is_google_colab_str,
|
||||
"Python version": platform.python_version(),
|
||||
|
||||
@@ -340,7 +340,7 @@ class FromSingleFileMixin:
|
||||
deprecate("original_config_file", "1.0.0", deprecation_message)
|
||||
original_config = original_config_file
|
||||
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
resume_download = kwargs.pop("resume_download", None)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
token = kwargs.pop("token", None)
|
||||
|
||||
@@ -166,7 +166,7 @@ class FromOriginalModelMixin:
|
||||
"`from_single_file` cannot accept both `config` and `original_config` arguments. Please provide only one of these arguments"
|
||||
)
|
||||
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
resume_download = kwargs.pop("resume_download", None)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
token = kwargs.pop("token", None)
|
||||
|
||||
@@ -826,8 +826,8 @@ def convert_ldm_unet_checkpoint(checkpoint, config, extract_ema=False, **kwargs)
|
||||
|
||||
# at least a 100 parameters have to start with `model_ema` in order for the checkpoint to be EMA
|
||||
if sum(k.startswith("model_ema") for k in keys) > 100 and extract_ema:
|
||||
logger.warninging("Checkpoint has both EMA and non-EMA weights.")
|
||||
logger.warninging(
|
||||
logger.warning("Checkpoint has both EMA and non-EMA weights.")
|
||||
logger.warning(
|
||||
"In this conversion only the EMA weights are extracted. If you want to instead extract the non-EMA"
|
||||
" weights (useful to continue fine-tuning), please make sure to remove the `--extract_ema` flag."
|
||||
)
|
||||
@@ -837,7 +837,7 @@ def convert_ldm_unet_checkpoint(checkpoint, config, extract_ema=False, **kwargs)
|
||||
unet_state_dict[key.replace(unet_key, "")] = checkpoint.get(flat_ema_key)
|
||||
else:
|
||||
if sum(k.startswith("model_ema") for k in keys) > 100:
|
||||
logger.warninging(
|
||||
logger.warning(
|
||||
"In this conversion only the non-EMA weights are extracted. If you want to instead extract the EMA"
|
||||
" weights (usually better for inference), please make sure to add the `--extract_ema` flag."
|
||||
)
|
||||
|
||||
@@ -178,7 +178,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
||||
self.video_processor = VideoProcessor(do_resize=False, vae_scale_factor=self.vae_scale_factor)
|
||||
self.video_processor = VideoProcessor(do_resize=True, vae_scale_factor=self.vae_scale_factor)
|
||||
|
||||
def _encode_image(
|
||||
self,
|
||||
|
||||
@@ -1,191 +0,0 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2024 HuggingFace Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import gc
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
|
||||
from diffusers import StableCascadeUNet
|
||||
from diffusers.utils import logging
|
||||
from diffusers.utils.testing_utils import (
|
||||
enable_full_determinism,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_torch_gpu,
|
||||
slow,
|
||||
)
|
||||
from diffusers.utils.torch_utils import randn_tensor
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
enable_full_determinism()
|
||||
|
||||
|
||||
@slow
|
||||
class StableCascadeUNetModelSlowTests(unittest.TestCase):
|
||||
def tearDown(self) -> None:
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
def test_stable_cascade_unet_prior_single_file_components(self):
|
||||
single_file_url = "https://huggingface.co/stabilityai/stable-cascade/blob/main/stage_c_bf16.safetensors"
|
||||
single_file_unet = StableCascadeUNet.from_single_file(single_file_url)
|
||||
|
||||
single_file_unet_config = single_file_unet.config
|
||||
del single_file_unet
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
unet = StableCascadeUNet.from_pretrained("stabilityai/stable-cascade-prior", subfolder="prior", variant="bf16")
|
||||
unet_config = unet.config
|
||||
del unet
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"]
|
||||
for param_name, param_value in single_file_unet_config.items():
|
||||
if param_name in PARAMS_TO_IGNORE:
|
||||
continue
|
||||
|
||||
assert unet_config[param_name] == param_value
|
||||
|
||||
def test_stable_cascade_unet_decoder_single_file_components(self):
|
||||
single_file_url = "https://huggingface.co/stabilityai/stable-cascade/blob/main/stage_b_bf16.safetensors"
|
||||
single_file_unet = StableCascadeUNet.from_single_file(single_file_url)
|
||||
|
||||
single_file_unet_config = single_file_unet.config
|
||||
del single_file_unet
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
unet = StableCascadeUNet.from_pretrained("stabilityai/stable-cascade", subfolder="decoder", variant="bf16")
|
||||
unet_config = unet.config
|
||||
del unet
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"]
|
||||
for param_name, param_value in single_file_unet_config.items():
|
||||
if param_name in PARAMS_TO_IGNORE:
|
||||
continue
|
||||
|
||||
assert unet_config[param_name] == param_value
|
||||
|
||||
def test_stable_cascade_unet_config_loading(self):
|
||||
config = StableCascadeUNet.load_config(
|
||||
pretrained_model_name_or_path="diffusers/stable-cascade-configs", subfolder="prior"
|
||||
)
|
||||
single_file_url = "https://huggingface.co/stabilityai/stable-cascade/blob/main/stage_c_bf16.safetensors"
|
||||
|
||||
single_file_unet = StableCascadeUNet.from_single_file(single_file_url, config=config)
|
||||
single_file_unet_config = single_file_unet.config
|
||||
del single_file_unet
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"]
|
||||
for param_name, param_value in config.items():
|
||||
if param_name in PARAMS_TO_IGNORE:
|
||||
continue
|
||||
|
||||
assert single_file_unet_config[param_name] == param_value
|
||||
|
||||
@require_torch_gpu
|
||||
def test_stable_cascade_unet_single_file_prior_forward_pass(self):
|
||||
dtype = torch.bfloat16
|
||||
generator = torch.Generator("cpu")
|
||||
|
||||
model_inputs = {
|
||||
"sample": randn_tensor((1, 16, 24, 24), generator=generator.manual_seed(0)).to("cuda", dtype),
|
||||
"timestep_ratio": torch.tensor([1]).to("cuda", dtype),
|
||||
"clip_text_pooled": randn_tensor((1, 1, 1280), generator=generator.manual_seed(0)).to("cuda", dtype),
|
||||
"clip_text": randn_tensor((1, 77, 1280), generator=generator.manual_seed(0)).to("cuda", dtype),
|
||||
"clip_img": randn_tensor((1, 1, 768), generator=generator.manual_seed(0)).to("cuda", dtype),
|
||||
"pixels": randn_tensor((1, 3, 8, 8), generator=generator.manual_seed(0)).to("cuda", dtype),
|
||||
}
|
||||
|
||||
unet = StableCascadeUNet.from_pretrained(
|
||||
"stabilityai/stable-cascade-prior",
|
||||
subfolder="prior",
|
||||
revision="refs/pr/2",
|
||||
variant="bf16",
|
||||
torch_dtype=dtype,
|
||||
)
|
||||
unet.to("cuda")
|
||||
with torch.no_grad():
|
||||
prior_output = unet(**model_inputs).sample.float().cpu().numpy()
|
||||
|
||||
# Remove UNet from GPU memory before loading the single file UNet model
|
||||
del unet
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
single_file_url = "https://huggingface.co/stabilityai/stable-cascade/blob/main/stage_c_bf16.safetensors"
|
||||
single_file_unet = StableCascadeUNet.from_single_file(single_file_url, torch_dtype=dtype)
|
||||
single_file_unet.to("cuda")
|
||||
with torch.no_grad():
|
||||
prior_single_file_output = single_file_unet(**model_inputs).sample.float().cpu().numpy()
|
||||
|
||||
# Remove UNet from GPU memory before loading the single file UNet model
|
||||
del single_file_unet
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
max_diff = numpy_cosine_similarity_distance(prior_output.flatten(), prior_single_file_output.flatten())
|
||||
assert max_diff < 8e-3
|
||||
|
||||
@require_torch_gpu
|
||||
def test_stable_cascade_unet_single_file_decoder_forward_pass(self):
|
||||
dtype = torch.float32
|
||||
generator = torch.Generator("cpu")
|
||||
|
||||
model_inputs = {
|
||||
"sample": randn_tensor((1, 4, 256, 256), generator=generator.manual_seed(0)).to("cuda", dtype),
|
||||
"timestep_ratio": torch.tensor([1]).to("cuda", dtype),
|
||||
"clip_text": randn_tensor((1, 77, 1280), generator=generator.manual_seed(0)).to("cuda", dtype),
|
||||
"clip_text_pooled": randn_tensor((1, 1, 1280), generator=generator.manual_seed(0)).to("cuda", dtype),
|
||||
"pixels": randn_tensor((1, 3, 8, 8), generator=generator.manual_seed(0)).to("cuda", dtype),
|
||||
}
|
||||
|
||||
unet = StableCascadeUNet.from_pretrained(
|
||||
"stabilityai/stable-cascade",
|
||||
subfolder="decoder",
|
||||
revision="refs/pr/44",
|
||||
torch_dtype=dtype,
|
||||
)
|
||||
unet.to("cuda")
|
||||
with torch.no_grad():
|
||||
prior_output = unet(**model_inputs).sample.float().cpu().numpy()
|
||||
|
||||
# Remove UNet from GPU memory before loading the single file UNet model
|
||||
del unet
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
single_file_url = "https://huggingface.co/stabilityai/stable-cascade/blob/main/stage_b.safetensors"
|
||||
single_file_unet = StableCascadeUNet.from_single_file(single_file_url, torch_dtype=dtype)
|
||||
single_file_unet.to("cuda")
|
||||
with torch.no_grad():
|
||||
prior_single_file_output = single_file_unet(**model_inputs).sample.float().cpu().numpy()
|
||||
|
||||
# Remove UNet from GPU memory before loading the single file UNet model
|
||||
del single_file_unet
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
max_diff = numpy_cosine_similarity_distance(prior_output.flatten(), prior_single_file_output.flatten())
|
||||
assert max_diff < 1e-4
|
||||
Reference in New Issue
Block a user