Compare commits

..

1 Commits

Author SHA1 Message Date
sayakpaul
a43b260c7a fix single_file example. 2025-07-01 21:07:16 +05:30
1065 changed files with 22502 additions and 81231 deletions

View File

@@ -11,21 +11,20 @@ env:
HF_HOME: /mnt/cache
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
BASE_PATH: benchmark_outputs
jobs:
torch_models_cuda_benchmark_tests:
torch_pipelines_cuda_benchmark_tests:
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_BENCHMARK }}
name: Torch Core Models CUDA Benchmarking Tests
name: Torch Core Pipelines CUDA Benchmarking Tests
strategy:
fail-fast: false
max-parallel: 1
runs-on:
group: aws-g6e-4xlarge
group: aws-g6-4xlarge-plus
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all
options: --shm-size "16gb" --ipc host --gpus 0
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
@@ -36,47 +35,27 @@ jobs:
nvidia-smi
- name: Install dependencies
run: |
apt update
apt install -y libpq-dev postgresql-client
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
python -m uv pip install -r benchmarks/requirements.txt
python -m uv pip install pandas peft
python -m uv pip uninstall transformers && python -m uv pip install transformers==4.48.0
- name: Environment
run: |
python utils/print_env.py
- name: Diffusers Benchmarking
env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
HF_TOKEN: ${{ secrets.DIFFUSERS_BOT_TOKEN }}
BASE_PATH: benchmark_outputs
run: |
cd benchmarks && python run_all.py
- name: Push results to the Hub
env:
HF_TOKEN: ${{ secrets.DIFFUSERS_BOT_TOKEN }}
run: |
cd benchmarks && python push_results.py
mkdir $BASE_PATH && cp *.csv $BASE_PATH
export TOTAL_GPU_MEMORY=$(python -c "import torch; print(torch.cuda.get_device_properties(0).total_memory / (1024**3))")
cd benchmarks && mkdir ${BASE_PATH} && python run_all.py && python push_results.py
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: benchmark_test_reports
path: benchmarks/${{ env.BASE_PATH }}
# TODO: enable this once the connection problem has been resolved.
- name: Update benchmarking results to DB
env:
PGDATABASE: metrics
PGHOST: ${{ secrets.DIFFUSERS_BENCHMARKS_PGHOST }}
PGUSER: transformers_benchmarks
PGPASSWORD: ${{ secrets.DIFFUSERS_BENCHMARKS_PGPASSWORD }}
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
run: |
git config --global --add safe.directory /__w/diffusers/diffusers
commit_id=$GITHUB_SHA
commit_msg=$(git show -s --format=%s "$commit_id" | cut -c1-70)
cd benchmarks && python populate_into_db.py "$BRANCH_NAME" "$commit_id" "$commit_msg"
path: benchmarks/benchmark_outputs
- name: Report success status
if: ${{ success() }}

View File

@@ -79,14 +79,14 @@ jobs:
# Check secret is set
- name: whoami
run: hf auth whoami
run: huggingface-cli whoami
env:
HF_TOKEN: ${{ secrets.HF_TOKEN_MIRROR_COMMUNITY_PIPELINES }}
# Push to HF! (under subfolder based on checkout ref)
# https://huggingface.co/datasets/diffusers/community-pipelines-mirror
- name: Mirror community pipeline to HF
run: hf upload diffusers/community-pipelines-mirror ./examples/community ${PATH_IN_REPO} --repo-type dataset
run: huggingface-cli upload diffusers/community-pipelines-mirror ./examples/community ${PATH_IN_REPO} --repo-type dataset
env:
PATH_IN_REPO: ${{ env.PATH_IN_REPO }}
HF_TOKEN: ${{ secrets.HF_TOKEN_MIRROR_COMMUNITY_PIPELINES }}

View File

@@ -61,7 +61,7 @@ jobs:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all
options: --shm-size "16gb" --ipc host --gpus 0
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
@@ -107,7 +107,7 @@ jobs:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all
options: --shm-size "16gb" --ipc host --gpus 0
defaults:
run:
shell: bash
@@ -178,7 +178,7 @@ jobs:
container:
image: diffusers/diffusers-pytorch-cuda
options: --gpus all --shm-size "16gb" --ipc host
options: --gpus 0 --shm-size "16gb" --ipc host
steps:
- name: Checkout diffusers
@@ -222,7 +222,7 @@ jobs:
group: aws-g6e-xlarge-plus
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all
options: --shm-size "16gb" --ipc host --gpus 0
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
@@ -248,7 +248,7 @@ jobs:
BIG_GPU_MEMORY: 40
run: |
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-m "big_accelerator" \
-m "big_gpu_with_torch_cuda" \
--make-reports=tests_big_gpu_torch_cuda \
--report-log=tests_big_gpu_torch_cuda.log \
tests/
@@ -270,7 +270,7 @@ jobs:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-pytorch-minimum-cuda
options: --shm-size "16gb" --ipc host --gpus all
options: --shm-size "16gb" --ipc host --gpus 0
defaults:
run:
shell: bash
@@ -333,21 +333,18 @@ jobs:
additional_deps: ["peft"]
- backend: "gguf"
test_location: "gguf"
additional_deps: ["peft", "kernels"]
additional_deps: ["peft"]
- backend: "torchao"
test_location: "torchao"
additional_deps: []
- backend: "optimum_quanto"
test_location: "quanto"
additional_deps: []
- backend: "nvidia_modelopt"
test_location: "modelopt"
additional_deps: []
runs-on:
group: aws-g6e-xlarge-plus
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "20gb" --ipc host --gpus all
options: --shm-size "20gb" --ipc host --gpus 0
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
@@ -399,7 +396,7 @@ jobs:
group: aws-g6e-xlarge-plus
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "20gb" --ipc host --gpus all
options: --shm-size "20gb" --ipc host --gpus 0
steps:
- name: Checkout diffusers
uses: actions/checkout@v3

View File

@@ -0,0 +1,38 @@
name: Run Flax dependency tests
on:
pull_request:
branches:
- main
paths:
- "src/diffusers/**.py"
push:
branches:
- main
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
check_flax_dependencies:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.8"
- name: Install dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m pip install --upgrade pip uv
python -m uv pip install -e .
python -m uv pip install "jax[cpu]>=0.2.16,!=0.3.2"
python -m uv pip install "flax>=0.4.1"
python -m uv pip install "jaxlib>=0.1.65"
python -m uv pip install pytest
- name: Check for soft dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest tests/others/test_dependencies.py

View File

@@ -1,141 +0,0 @@
name: Fast PR tests for Modular
on:
pull_request:
branches: [main]
paths:
- "src/diffusers/modular_pipelines/**.py"
- "src/diffusers/models/modeling_utils.py"
- "src/diffusers/models/model_loading_utils.py"
- "src/diffusers/pipelines/pipeline_utils.py"
- "src/diffusers/pipeline_loading_utils.py"
- "src/diffusers/loaders/lora_base.py"
- "src/diffusers/loaders/lora_pipeline.py"
- "src/diffusers/loaders/peft.py"
- "tests/modular_pipelines/**.py"
- ".github/**.yml"
- "utils/**.py"
- "setup.py"
push:
branches:
- ci-*
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
DIFFUSERS_IS_CI: yes
HF_HUB_ENABLE_HF_TRANSFER: 1
OMP_NUM_THREADS: 4
MKL_NUM_THREADS: 4
PYTEST_TIMEOUT: 60
jobs:
check_code_quality:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .[quality]
- name: Check quality
run: make quality
- name: Check if failure
if: ${{ failure() }}
run: |
echo "Quality check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make style && make quality'" >> $GITHUB_STEP_SUMMARY
check_repository_consistency:
needs: check_code_quality
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .[quality]
- name: Check repo consistency
run: |
python utils/check_copies.py
python utils/check_dummies.py
python utils/check_support_list.py
make deps_table_check_updated
- name: Check if failure
if: ${{ failure() }}
run: |
echo "Repo consistency check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make fix-copies'" >> $GITHUB_STEP_SUMMARY
run_fast_tests:
needs: [check_code_quality, check_repository_consistency]
strategy:
fail-fast: false
matrix:
config:
- name: Fast PyTorch Modular Pipeline CPU tests
framework: pytorch_pipelines
runner: aws-highmemory-32-plus
image: diffusers/diffusers-pytorch-cpu
report: torch_cpu_modular_pipelines
name: ${{ matrix.config.name }}
runs-on:
group: ${{ matrix.config.runner }}
container:
image: ${{ matrix.config.image }}
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
defaults:
run:
shell: bash
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Install dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
pip uninstall transformers -y && python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git --no-deps
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
- name: Environment
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py
- name: Run fast PyTorch Pipeline CPU tests
if: ${{ matrix.config.framework == 'pytorch_pipelines' }}
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m pytest -n 8 --max-worker-restart=0 --dist=loadfile \
-s -v -k "not Flax and not Onnx" \
--make-reports=tests_${{ matrix.config.report }} \
tests/modular_pipelines
- name: Failure short reports
if: ${{ failure() }}
run: cat reports/tests_${{ matrix.config.report }}_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: pr_${{ matrix.config.framework }}_${{ matrix.config.report }}_test_reports
path: reports

View File

@@ -13,7 +13,6 @@ on:
- "src/diffusers/loaders/peft.py"
- "tests/pipelines/test_pipelines_common.py"
- "tests/models/test_modeling_common.py"
- "examples/**/*.py"
workflow_dispatch:
concurrency:
@@ -118,7 +117,7 @@ jobs:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all
options: --shm-size "16gb" --ipc host --gpus 0
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
@@ -183,13 +182,13 @@ jobs:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all
options: --shm-size "16gb" --ipc host --gpus 0
defaults:
run:
shell: bash
strategy:
fail-fast: false
max-parallel: 4
max-parallel: 2
matrix:
module: [models, schedulers, lora, others]
steps:
@@ -253,7 +252,7 @@ jobs:
container:
image: diffusers/diffusers-pytorch-cuda
options: --gpus all --shm-size "16gb" --ipc host
options: --gpus 0 --shm-size "16gb" --ipc host
steps:
- name: Checkout diffusers
uses: actions/checkout@v3

View File

@@ -64,7 +64,7 @@ jobs:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all
options: --shm-size "16gb" --ipc host --gpus 0
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
@@ -109,7 +109,7 @@ jobs:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all
options: --shm-size "16gb" --ipc host --gpus 0
defaults:
run:
shell: bash
@@ -167,7 +167,7 @@ jobs:
container:
image: diffusers/diffusers-pytorch-cuda
options: --gpus all --shm-size "16gb" --ipc host
options: --gpus 0 --shm-size "16gb" --ipc host
steps:
- name: Checkout diffusers
@@ -210,7 +210,7 @@ jobs:
container:
image: diffusers/diffusers-pytorch-xformers-cuda
options: --gpus all --shm-size "16gb" --ipc host
options: --gpus 0 --shm-size "16gb" --ipc host
steps:
- name: Checkout diffusers
@@ -252,7 +252,7 @@ jobs:
container:
image: diffusers/diffusers-pytorch-cuda
options: --gpus all --shm-size "16gb" --ipc host
options: --gpus 0 --shm-size "16gb" --ipc host
steps:
- name: Checkout diffusers
uses: actions/checkout@v3

View File

@@ -62,7 +62,7 @@ jobs:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all
options: --shm-size "16gb" --ipc host --gpus 0
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
@@ -107,7 +107,7 @@ jobs:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all
options: --shm-size "16gb" --ipc host --gpus 0
defaults:
run:
shell: bash
@@ -163,7 +163,7 @@ jobs:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-pytorch-minimum-cuda
options: --shm-size "16gb" --ipc host --gpus all
options: --shm-size "16gb" --ipc host --gpus 0
defaults:
run:
shell: bash
@@ -222,7 +222,7 @@ jobs:
container:
image: diffusers/diffusers-pytorch-cuda
options: --gpus all --shm-size "16gb" --ipc host
options: --gpus 0 --shm-size "16gb" --ipc host
steps:
- name: Checkout diffusers
@@ -265,7 +265,7 @@ jobs:
container:
image: diffusers/diffusers-pytorch-xformers-cuda
options: --gpus all --shm-size "16gb" --ipc host
options: --gpus 0 --shm-size "16gb" --ipc host
steps:
- name: Checkout diffusers
@@ -307,7 +307,7 @@ jobs:
container:
image: diffusers/diffusers-pytorch-cuda
options: --gpus all --shm-size "16gb" --ipc host
options: --gpus 0 --shm-size "16gb" --ipc host
steps:
- name: Checkout diffusers

View File

@@ -30,7 +30,7 @@ jobs:
group: aws-g4dn-2xlarge
container:
image: ${{ github.event.inputs.docker_image }}
options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
options: --gpus 0 --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Validate test files input

View File

@@ -31,7 +31,7 @@ jobs:
group: "${{ github.event.inputs.runner_type }}"
container:
image: ${{ github.event.inputs.docker_image }}
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus all --privileged
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0 --privileged
steps:
- name: Checkout diffusers

View File

@@ -37,7 +37,7 @@ limitations under the License.
## Installation
We recommend installing 🤗 Diffusers in a virtual environment from PyPI or Conda. For more details about installing [PyTorch](https://pytorch.org/get-started/locally/), please refer to their official documentation.
We recommend installing 🤗 Diffusers in a virtual environment from PyPI or Conda. For more details about installing [PyTorch](https://pytorch.org/get-started/locally/) and [Flax](https://flax.readthedocs.io/en/latest/#installation), please refer to their official documentation.
### PyTorch
@@ -53,6 +53,14 @@ With `conda` (maintained by the community):
conda install -c conda-forge diffusers
```
### Flax
With `pip` (official package):
```bash
pip install --upgrade diffusers[flax]
```
### Apple Silicon (M1/M2) support
Please refer to the [How to use Stable Diffusion in Apple Silicon](https://huggingface.co/docs/diffusers/optimization/mps) guide.

View File

@@ -1,69 +0,0 @@
# Diffusers Benchmarks
Welcome to Diffusers Benchmarks. These benchmarks are use to obtain latency and memory information of the most popular models across different scenarios such as:
* Base case i.e., when using `torch.bfloat16` and `torch.nn.functional.scaled_dot_product_attention`.
* Base + `torch.compile()`
* NF4 quantization
* Layerwise upcasting
Instead of full diffusion pipelines, only the forward pass of the respective model classes (such as `FluxTransformer2DModel`) is tested with the real checkpoints (such as `"black-forest-labs/FLUX.1-dev"`).
The entrypoint to running all the currently available benchmarks is in `run_all.py`. However, one can run the individual benchmarks, too, e.g., `python benchmarking_flux.py`. It should produce a CSV file containing various information about the benchmarks run.
The benchmarks are run on a weekly basis and the CI is defined in [benchmark.yml](../.github/workflows/benchmark.yml).
## Running the benchmarks manually
First set up `torch` and install `diffusers` from the root of the directory:
```py
pip install -e ".[quality,test]"
```
Then make sure the other dependencies are installed:
```sh
cd benchmarks/
pip install -r requirements.txt
```
We need to be authenticated to access some of the checkpoints used during benchmarking:
```sh
hf auth login
```
We use an L40 GPU with 128GB RAM to run the benchmark CI. As such, the benchmarks are configured to run on NVIDIA GPUs. So, make sure you have access to a similar machine (or modify the benchmarking scripts accordingly).
Then you can either launch the entire benchmarking suite by running:
```sh
python run_all.py
```
Or, you can run the individual benchmarks.
## Customizing the benchmarks
We define "scenarios" to cover the most common ways in which these models are used. You can
define a new scenario, modifying an existing benchmark file:
```py
BenchmarkScenario(
name=f"{CKPT_ID}-bnb-8bit",
model_cls=FluxTransformer2DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
"quantization_config": BitsAndBytesConfig(load_in_8bit=True),
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=model_init_fn,
)
```
You can also configure a new model-level benchmark and add it to the existing suite. To do so, just defining a valid benchmarking file like `benchmarking_flux.py` should be enough.
Happy benchmarking 🧨

346
benchmarks/base_classes.py Normal file
View File

@@ -0,0 +1,346 @@
import os
import sys
import torch
from diffusers import (
AutoPipelineForImage2Image,
AutoPipelineForInpainting,
AutoPipelineForText2Image,
ControlNetModel,
LCMScheduler,
StableDiffusionAdapterPipeline,
StableDiffusionControlNetPipeline,
StableDiffusionXLAdapterPipeline,
StableDiffusionXLControlNetPipeline,
T2IAdapter,
WuerstchenCombinedPipeline,
)
from diffusers.utils import load_image
sys.path.append(".")
from utils import ( # noqa: E402
BASE_PATH,
PROMPT,
BenchmarkInfo,
benchmark_fn,
bytes_to_giga_bytes,
flush,
generate_csv_dict,
write_to_csv,
)
RESOLUTION_MAPPING = {
"Lykon/DreamShaper": (512, 512),
"lllyasviel/sd-controlnet-canny": (512, 512),
"diffusers/controlnet-canny-sdxl-1.0": (1024, 1024),
"TencentARC/t2iadapter_canny_sd14v1": (512, 512),
"TencentARC/t2i-adapter-canny-sdxl-1.0": (1024, 1024),
"stabilityai/stable-diffusion-2-1": (768, 768),
"stabilityai/stable-diffusion-xl-base-1.0": (1024, 1024),
"stabilityai/stable-diffusion-xl-refiner-1.0": (1024, 1024),
"stabilityai/sdxl-turbo": (512, 512),
}
class BaseBenchmak:
pipeline_class = None
def __init__(self, args):
super().__init__()
def run_inference(self, args):
raise NotImplementedError
def benchmark(self, args):
raise NotImplementedError
def get_result_filepath(self, args):
pipeline_class_name = str(self.pipe.__class__.__name__)
name = (
args.ckpt.replace("/", "_")
+ "_"
+ pipeline_class_name
+ f"-bs@{args.batch_size}-steps@{args.num_inference_steps}-mco@{args.model_cpu_offload}-compile@{args.run_compile}.csv"
)
filepath = os.path.join(BASE_PATH, name)
return filepath
class TextToImageBenchmark(BaseBenchmak):
pipeline_class = AutoPipelineForText2Image
def __init__(self, args):
pipe = self.pipeline_class.from_pretrained(args.ckpt, torch_dtype=torch.float16)
pipe = pipe.to("cuda")
if args.run_compile:
if not isinstance(pipe, WuerstchenCombinedPipeline):
pipe.unet.to(memory_format=torch.channels_last)
print("Run torch compile")
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
if hasattr(pipe, "movq") and getattr(pipe, "movq", None) is not None:
pipe.movq.to(memory_format=torch.channels_last)
pipe.movq = torch.compile(pipe.movq, mode="reduce-overhead", fullgraph=True)
else:
print("Run torch compile")
pipe.decoder = torch.compile(pipe.decoder, mode="reduce-overhead", fullgraph=True)
pipe.vqgan = torch.compile(pipe.vqgan, mode="reduce-overhead", fullgraph=True)
pipe.set_progress_bar_config(disable=True)
self.pipe = pipe
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
)
def benchmark(self, args):
flush()
print(f"[INFO] {self.pipe.__class__.__name__}: Running benchmark with: {vars(args)}\n")
time = benchmark_fn(self.run_inference, self.pipe, args) # in seconds.
memory = bytes_to_giga_bytes(torch.cuda.max_memory_allocated()) # in GBs.
benchmark_info = BenchmarkInfo(time=time, memory=memory)
pipeline_class_name = str(self.pipe.__class__.__name__)
flush()
csv_dict = generate_csv_dict(
pipeline_cls=pipeline_class_name, ckpt=args.ckpt, args=args, benchmark_info=benchmark_info
)
filepath = self.get_result_filepath(args)
write_to_csv(filepath, csv_dict)
print(f"Logs written to: {filepath}")
flush()
class TurboTextToImageBenchmark(TextToImageBenchmark):
def __init__(self, args):
super().__init__(args)
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
guidance_scale=0.0,
)
class LCMLoRATextToImageBenchmark(TextToImageBenchmark):
lora_id = "latent-consistency/lcm-lora-sdxl"
def __init__(self, args):
super().__init__(args)
self.pipe.load_lora_weights(self.lora_id)
self.pipe.fuse_lora()
self.pipe.unload_lora_weights()
self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
def get_result_filepath(self, args):
pipeline_class_name = str(self.pipe.__class__.__name__)
name = (
self.lora_id.replace("/", "_")
+ "_"
+ pipeline_class_name
+ f"-bs@{args.batch_size}-steps@{args.num_inference_steps}-mco@{args.model_cpu_offload}-compile@{args.run_compile}.csv"
)
filepath = os.path.join(BASE_PATH, name)
return filepath
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
guidance_scale=1.0,
)
def benchmark(self, args):
flush()
print(f"[INFO] {self.pipe.__class__.__name__}: Running benchmark with: {vars(args)}\n")
time = benchmark_fn(self.run_inference, self.pipe, args) # in seconds.
memory = bytes_to_giga_bytes(torch.cuda.max_memory_allocated()) # in GBs.
benchmark_info = BenchmarkInfo(time=time, memory=memory)
pipeline_class_name = str(self.pipe.__class__.__name__)
flush()
csv_dict = generate_csv_dict(
pipeline_cls=pipeline_class_name, ckpt=self.lora_id, args=args, benchmark_info=benchmark_info
)
filepath = self.get_result_filepath(args)
write_to_csv(filepath, csv_dict)
print(f"Logs written to: {filepath}")
flush()
class ImageToImageBenchmark(TextToImageBenchmark):
pipeline_class = AutoPipelineForImage2Image
url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/1665_Girl_with_a_Pearl_Earring.jpg"
image = load_image(url).convert("RGB")
def __init__(self, args):
super().__init__(args)
self.image = self.image.resize(RESOLUTION_MAPPING[args.ckpt])
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
image=self.image,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
)
class TurboImageToImageBenchmark(ImageToImageBenchmark):
def __init__(self, args):
super().__init__(args)
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
image=self.image,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
guidance_scale=0.0,
strength=0.5,
)
class InpaintingBenchmark(ImageToImageBenchmark):
pipeline_class = AutoPipelineForInpainting
mask_url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/overture-creations-5sI6fQgYIuo_mask.png"
mask = load_image(mask_url).convert("RGB")
def __init__(self, args):
super().__init__(args)
self.image = self.image.resize(RESOLUTION_MAPPING[args.ckpt])
self.mask = self.mask.resize(RESOLUTION_MAPPING[args.ckpt])
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
image=self.image,
mask_image=self.mask,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
)
class IPAdapterTextToImageBenchmark(TextToImageBenchmark):
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/load_neg_embed.png"
image = load_image(url)
def __init__(self, args):
pipe = self.pipeline_class.from_pretrained(args.ckpt, torch_dtype=torch.float16).to("cuda")
pipe.load_ip_adapter(
args.ip_adapter_id[0],
subfolder="models" if "sdxl" not in args.ip_adapter_id[1] else "sdxl_models",
weight_name=args.ip_adapter_id[1],
)
if args.run_compile:
pipe.unet.to(memory_format=torch.channels_last)
print("Run torch compile")
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe.set_progress_bar_config(disable=True)
self.pipe = pipe
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
ip_adapter_image=self.image,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
)
class ControlNetBenchmark(TextToImageBenchmark):
pipeline_class = StableDiffusionControlNetPipeline
aux_network_class = ControlNetModel
root_ckpt = "Lykon/DreamShaper"
url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/canny_image_condition.png"
image = load_image(url).convert("RGB")
def __init__(self, args):
aux_network = self.aux_network_class.from_pretrained(args.ckpt, torch_dtype=torch.float16)
pipe = self.pipeline_class.from_pretrained(self.root_ckpt, controlnet=aux_network, torch_dtype=torch.float16)
pipe = pipe.to("cuda")
pipe.set_progress_bar_config(disable=True)
self.pipe = pipe
if args.run_compile:
pipe.unet.to(memory_format=torch.channels_last)
pipe.controlnet.to(memory_format=torch.channels_last)
print("Run torch compile")
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe.controlnet = torch.compile(pipe.controlnet, mode="reduce-overhead", fullgraph=True)
self.image = self.image.resize(RESOLUTION_MAPPING[args.ckpt])
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
image=self.image,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
)
class ControlNetSDXLBenchmark(ControlNetBenchmark):
pipeline_class = StableDiffusionXLControlNetPipeline
root_ckpt = "stabilityai/stable-diffusion-xl-base-1.0"
def __init__(self, args):
super().__init__(args)
class T2IAdapterBenchmark(ControlNetBenchmark):
pipeline_class = StableDiffusionAdapterPipeline
aux_network_class = T2IAdapter
root_ckpt = "Lykon/DreamShaper"
url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/canny_for_adapter.png"
image = load_image(url).convert("L")
def __init__(self, args):
aux_network = self.aux_network_class.from_pretrained(args.ckpt, torch_dtype=torch.float16)
pipe = self.pipeline_class.from_pretrained(self.root_ckpt, adapter=aux_network, torch_dtype=torch.float16)
pipe = pipe.to("cuda")
pipe.set_progress_bar_config(disable=True)
self.pipe = pipe
if args.run_compile:
pipe.unet.to(memory_format=torch.channels_last)
pipe.adapter.to(memory_format=torch.channels_last)
print("Run torch compile")
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe.adapter = torch.compile(pipe.adapter, mode="reduce-overhead", fullgraph=True)
self.image = self.image.resize(RESOLUTION_MAPPING[args.ckpt])
class T2IAdapterSDXLBenchmark(T2IAdapterBenchmark):
pipeline_class = StableDiffusionXLAdapterPipeline
root_ckpt = "stabilityai/stable-diffusion-xl-base-1.0"
url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/canny_for_adapter_sdxl.png"
image = load_image(url)
def __init__(self, args):
super().__init__(args)

View File

@@ -0,0 +1,26 @@
import argparse
import sys
sys.path.append(".")
from base_classes import ControlNetBenchmark, ControlNetSDXLBenchmark # noqa: E402
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--ckpt",
type=str,
default="lllyasviel/sd-controlnet-canny",
choices=["lllyasviel/sd-controlnet-canny", "diffusers/controlnet-canny-sdxl-1.0"],
)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--num_inference_steps", type=int, default=50)
parser.add_argument("--model_cpu_offload", action="store_true")
parser.add_argument("--run_compile", action="store_true")
args = parser.parse_args()
benchmark_pipe = (
ControlNetBenchmark(args) if args.ckpt == "lllyasviel/sd-controlnet-canny" else ControlNetSDXLBenchmark(args)
)
benchmark_pipe.benchmark(args)

View File

@@ -0,0 +1,33 @@
import argparse
import sys
sys.path.append(".")
from base_classes import IPAdapterTextToImageBenchmark # noqa: E402
IP_ADAPTER_CKPTS = {
# because original SD v1.5 has been taken down.
"Lykon/DreamShaper": ("h94/IP-Adapter", "ip-adapter_sd15.bin"),
"stabilityai/stable-diffusion-xl-base-1.0": ("h94/IP-Adapter", "ip-adapter_sdxl.bin"),
}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--ckpt",
type=str,
default="rstabilityai/stable-diffusion-xl-base-1.0",
choices=list(IP_ADAPTER_CKPTS.keys()),
)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--num_inference_steps", type=int, default=50)
parser.add_argument("--model_cpu_offload", action="store_true")
parser.add_argument("--run_compile", action="store_true")
args = parser.parse_args()
args.ip_adapter_id = IP_ADAPTER_CKPTS[args.ckpt]
benchmark_pipe = IPAdapterTextToImageBenchmark(args)
args.ckpt = f"{args.ckpt} (IP-Adapter)"
benchmark_pipe.benchmark(args)

View File

@@ -0,0 +1,29 @@
import argparse
import sys
sys.path.append(".")
from base_classes import ImageToImageBenchmark, TurboImageToImageBenchmark # noqa: E402
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--ckpt",
type=str,
default="Lykon/DreamShaper",
choices=[
"Lykon/DreamShaper",
"stabilityai/stable-diffusion-2-1",
"stabilityai/stable-diffusion-xl-refiner-1.0",
"stabilityai/sdxl-turbo",
],
)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--num_inference_steps", type=int, default=50)
parser.add_argument("--model_cpu_offload", action="store_true")
parser.add_argument("--run_compile", action="store_true")
args = parser.parse_args()
benchmark_pipe = ImageToImageBenchmark(args) if "turbo" not in args.ckpt else TurboImageToImageBenchmark(args)
benchmark_pipe.benchmark(args)

View File

@@ -0,0 +1,28 @@
import argparse
import sys
sys.path.append(".")
from base_classes import InpaintingBenchmark # noqa: E402
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--ckpt",
type=str,
default="Lykon/DreamShaper",
choices=[
"Lykon/DreamShaper",
"stabilityai/stable-diffusion-2-1",
"stabilityai/stable-diffusion-xl-base-1.0",
],
)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--num_inference_steps", type=int, default=50)
parser.add_argument("--model_cpu_offload", action="store_true")
parser.add_argument("--run_compile", action="store_true")
args = parser.parse_args()
benchmark_pipe = InpaintingBenchmark(args)
benchmark_pipe.benchmark(args)

View File

@@ -0,0 +1,28 @@
import argparse
import sys
sys.path.append(".")
from base_classes import T2IAdapterBenchmark, T2IAdapterSDXLBenchmark # noqa: E402
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--ckpt",
type=str,
default="TencentARC/t2iadapter_canny_sd14v1",
choices=["TencentARC/t2iadapter_canny_sd14v1", "TencentARC/t2i-adapter-canny-sdxl-1.0"],
)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--num_inference_steps", type=int, default=50)
parser.add_argument("--model_cpu_offload", action="store_true")
parser.add_argument("--run_compile", action="store_true")
args = parser.parse_args()
benchmark_pipe = (
T2IAdapterBenchmark(args)
if args.ckpt == "TencentARC/t2iadapter_canny_sd14v1"
else T2IAdapterSDXLBenchmark(args)
)
benchmark_pipe.benchmark(args)

View File

@@ -0,0 +1,23 @@
import argparse
import sys
sys.path.append(".")
from base_classes import LCMLoRATextToImageBenchmark # noqa: E402
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--ckpt",
type=str,
default="stabilityai/stable-diffusion-xl-base-1.0",
)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--num_inference_steps", type=int, default=4)
parser.add_argument("--model_cpu_offload", action="store_true")
parser.add_argument("--run_compile", action="store_true")
args = parser.parse_args()
benchmark_pipe = LCMLoRATextToImageBenchmark(args)
benchmark_pipe.benchmark(args)

View File

@@ -0,0 +1,40 @@
import argparse
import sys
sys.path.append(".")
from base_classes import TextToImageBenchmark, TurboTextToImageBenchmark # noqa: E402
ALL_T2I_CKPTS = [
"Lykon/DreamShaper",
"segmind/SSD-1B",
"stabilityai/stable-diffusion-xl-base-1.0",
"kandinsky-community/kandinsky-2-2-decoder",
"warp-ai/wuerstchen",
"stabilityai/sdxl-turbo",
]
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--ckpt",
type=str,
default="Lykon/DreamShaper",
choices=ALL_T2I_CKPTS,
)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--num_inference_steps", type=int, default=50)
parser.add_argument("--model_cpu_offload", action="store_true")
parser.add_argument("--run_compile", action="store_true")
args = parser.parse_args()
benchmark_cls = None
if "turbo" in args.ckpt:
benchmark_cls = TurboTextToImageBenchmark
else:
benchmark_cls = TextToImageBenchmark
benchmark_pipe = benchmark_cls(args)
benchmark_pipe.benchmark(args)

View File

@@ -1,98 +0,0 @@
from functools import partial
import torch
from benchmarking_utils import BenchmarkMixin, BenchmarkScenario, model_init_fn
from diffusers import BitsAndBytesConfig, FluxTransformer2DModel
from diffusers.utils.testing_utils import torch_device
CKPT_ID = "black-forest-labs/FLUX.1-dev"
RESULT_FILENAME = "flux.csv"
def get_input_dict(**device_dtype_kwargs):
# resolution: 1024x1024
# maximum sequence length 512
hidden_states = torch.randn(1, 4096, 64, **device_dtype_kwargs)
encoder_hidden_states = torch.randn(1, 512, 4096, **device_dtype_kwargs)
pooled_prompt_embeds = torch.randn(1, 768, **device_dtype_kwargs)
image_ids = torch.ones(512, 3, **device_dtype_kwargs)
text_ids = torch.ones(4096, 3, **device_dtype_kwargs)
timestep = torch.tensor([1.0], **device_dtype_kwargs)
guidance = torch.tensor([1.0], **device_dtype_kwargs)
return {
"hidden_states": hidden_states,
"encoder_hidden_states": encoder_hidden_states,
"img_ids": image_ids,
"txt_ids": text_ids,
"pooled_projections": pooled_prompt_embeds,
"timestep": timestep,
"guidance": guidance,
}
if __name__ == "__main__":
scenarios = [
BenchmarkScenario(
name=f"{CKPT_ID}-bf16",
model_cls=FluxTransformer2DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=model_init_fn,
compile_kwargs={"fullgraph": True},
),
BenchmarkScenario(
name=f"{CKPT_ID}-bnb-nf4",
model_cls=FluxTransformer2DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
"quantization_config": BitsAndBytesConfig(
load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_quant_type="nf4"
),
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=model_init_fn,
),
BenchmarkScenario(
name=f"{CKPT_ID}-layerwise-upcasting",
model_cls=FluxTransformer2DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=partial(model_init_fn, layerwise_upcasting=True),
),
BenchmarkScenario(
name=f"{CKPT_ID}-group-offload-leaf",
model_cls=FluxTransformer2DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=partial(
model_init_fn,
group_offload_kwargs={
"onload_device": torch_device,
"offload_device": torch.device("cpu"),
"offload_type": "leaf_level",
"use_stream": True,
"non_blocking": True,
},
),
),
]
runner = BenchmarkMixin()
runner.run_bencmarks_and_collate(scenarios, filename=RESULT_FILENAME)

View File

@@ -1,80 +0,0 @@
from functools import partial
import torch
from benchmarking_utils import BenchmarkMixin, BenchmarkScenario, model_init_fn
from diffusers import LTXVideoTransformer3DModel
from diffusers.utils.testing_utils import torch_device
CKPT_ID = "Lightricks/LTX-Video-0.9.7-dev"
RESULT_FILENAME = "ltx.csv"
def get_input_dict(**device_dtype_kwargs):
# 512x704 (161 frames)
# `max_sequence_length`: 256
hidden_states = torch.randn(1, 7392, 128, **device_dtype_kwargs)
encoder_hidden_states = torch.randn(1, 256, 4096, **device_dtype_kwargs)
encoder_attention_mask = torch.ones(1, 256, **device_dtype_kwargs)
timestep = torch.tensor([1.0], **device_dtype_kwargs)
video_coords = torch.randn(1, 3, 7392, **device_dtype_kwargs)
return {
"hidden_states": hidden_states,
"encoder_hidden_states": encoder_hidden_states,
"encoder_attention_mask": encoder_attention_mask,
"timestep": timestep,
"video_coords": video_coords,
}
if __name__ == "__main__":
scenarios = [
BenchmarkScenario(
name=f"{CKPT_ID}-bf16",
model_cls=LTXVideoTransformer3DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=model_init_fn,
compile_kwargs={"fullgraph": True},
),
BenchmarkScenario(
name=f"{CKPT_ID}-layerwise-upcasting",
model_cls=LTXVideoTransformer3DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=partial(model_init_fn, layerwise_upcasting=True),
),
BenchmarkScenario(
name=f"{CKPT_ID}-group-offload-leaf",
model_cls=LTXVideoTransformer3DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=partial(
model_init_fn,
group_offload_kwargs={
"onload_device": torch_device,
"offload_device": torch.device("cpu"),
"offload_type": "leaf_level",
"use_stream": True,
"non_blocking": True,
},
),
),
]
runner = BenchmarkMixin()
runner.run_bencmarks_and_collate(scenarios, filename=RESULT_FILENAME)

View File

@@ -1,82 +0,0 @@
from functools import partial
import torch
from benchmarking_utils import BenchmarkMixin, BenchmarkScenario, model_init_fn
from diffusers import UNet2DConditionModel
from diffusers.utils.testing_utils import torch_device
CKPT_ID = "stabilityai/stable-diffusion-xl-base-1.0"
RESULT_FILENAME = "sdxl.csv"
def get_input_dict(**device_dtype_kwargs):
# height: 1024
# width: 1024
# max_sequence_length: 77
hidden_states = torch.randn(1, 4, 128, 128, **device_dtype_kwargs)
encoder_hidden_states = torch.randn(1, 77, 2048, **device_dtype_kwargs)
timestep = torch.tensor([1.0], **device_dtype_kwargs)
added_cond_kwargs = {
"text_embeds": torch.randn(1, 1280, **device_dtype_kwargs),
"time_ids": torch.ones(1, 6, **device_dtype_kwargs),
}
return {
"sample": hidden_states,
"encoder_hidden_states": encoder_hidden_states,
"timestep": timestep,
"added_cond_kwargs": added_cond_kwargs,
}
if __name__ == "__main__":
scenarios = [
BenchmarkScenario(
name=f"{CKPT_ID}-bf16",
model_cls=UNet2DConditionModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "unet",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=model_init_fn,
compile_kwargs={"fullgraph": True},
),
BenchmarkScenario(
name=f"{CKPT_ID}-layerwise-upcasting",
model_cls=UNet2DConditionModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "unet",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=partial(model_init_fn, layerwise_upcasting=True),
),
BenchmarkScenario(
name=f"{CKPT_ID}-group-offload-leaf",
model_cls=UNet2DConditionModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "unet",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=partial(
model_init_fn,
group_offload_kwargs={
"onload_device": torch_device,
"offload_device": torch.device("cpu"),
"offload_type": "leaf_level",
"use_stream": True,
"non_blocking": True,
},
),
),
]
runner = BenchmarkMixin()
runner.run_bencmarks_and_collate(scenarios, filename=RESULT_FILENAME)

View File

@@ -1,244 +0,0 @@
import gc
import inspect
import logging
import os
import queue
import threading
from contextlib import nullcontext
from dataclasses import dataclass
from typing import Any, Callable, Dict, Optional, Union
import pandas as pd
import torch
import torch.utils.benchmark as benchmark
from diffusers.models.modeling_utils import ModelMixin
from diffusers.utils.testing_utils import require_torch_gpu, torch_device
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
logger = logging.getLogger(__name__)
NUM_WARMUP_ROUNDS = 5
def benchmark_fn(f, *args, **kwargs):
t0 = benchmark.Timer(
stmt="f(*args, **kwargs)",
globals={"args": args, "kwargs": kwargs, "f": f},
num_threads=1,
)
return float(f"{(t0.blocked_autorange().mean):.3f}")
def flush():
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
# Adapted from https://github.com/lucasb-eyer/cnn_vit_benchmarks/blob/15b665ff758e8062131353076153905cae00a71f/main.py
def calculate_flops(model, input_dict):
try:
from torchprofile import profile_macs
except ModuleNotFoundError:
raise
# This is a hacky way to convert the kwargs to args as `profile_macs` cries about kwargs.
sig = inspect.signature(model.forward)
param_names = [
p.name
for p in sig.parameters.values()
if p.kind
in (
inspect.Parameter.POSITIONAL_ONLY,
inspect.Parameter.POSITIONAL_OR_KEYWORD,
)
and p.name != "self"
]
bound = sig.bind_partial(**input_dict)
bound.apply_defaults()
args = tuple(bound.arguments[name] for name in param_names)
model.eval()
with torch.no_grad():
macs = profile_macs(model, args)
flops = 2 * macs # 1 MAC operation = 2 FLOPs (1 multiplication + 1 addition)
return flops
def calculate_params(model):
return sum(p.numel() for p in model.parameters())
# Users can define their own in case this doesn't suffice. For most cases,
# it should be sufficient.
def model_init_fn(model_cls, group_offload_kwargs=None, layerwise_upcasting=False, **init_kwargs):
model = model_cls.from_pretrained(**init_kwargs).eval()
if group_offload_kwargs and isinstance(group_offload_kwargs, dict):
model.enable_group_offload(**group_offload_kwargs)
else:
model.to(torch_device)
if layerwise_upcasting:
model.enable_layerwise_casting(
storage_dtype=torch.float8_e4m3fn, compute_dtype=init_kwargs.get("torch_dtype", torch.bfloat16)
)
return model
@dataclass
class BenchmarkScenario:
name: str
model_cls: ModelMixin
model_init_kwargs: Dict[str, Any]
model_init_fn: Callable
get_model_input_dict: Callable
compile_kwargs: Optional[Dict[str, Any]] = None
@require_torch_gpu
class BenchmarkMixin:
def pre_benchmark(self):
flush()
torch.compiler.reset()
def post_benchmark(self, model):
model.cpu()
flush()
torch.compiler.reset()
@torch.no_grad()
def run_benchmark(self, scenario: BenchmarkScenario):
# 0) Basic stats
logger.info(f"Running scenario: {scenario.name}.")
try:
model = model_init_fn(scenario.model_cls, **scenario.model_init_kwargs)
num_params = round(calculate_params(model) / 1e9, 2)
try:
flops = round(calculate_flops(model, input_dict=scenario.get_model_input_dict()) / 1e9, 2)
except Exception as e:
logger.info(f"Problem in calculating FLOPs:\n{e}")
flops = None
model.cpu()
del model
except Exception as e:
logger.info(f"Error while initializing the model and calculating FLOPs:\n{e}")
return {}
self.pre_benchmark()
# 1) plain stats
results = {}
plain = None
try:
plain = self._run_phase(
model_cls=scenario.model_cls,
init_fn=scenario.model_init_fn,
init_kwargs=scenario.model_init_kwargs,
get_input_fn=scenario.get_model_input_dict,
compile_kwargs=None,
)
except Exception as e:
logger.info(f"Benchmark could not be run with the following error:\n{e}")
return results
# 2) compiled stats (if any)
compiled = {"time": None, "memory": None}
if scenario.compile_kwargs:
try:
compiled = self._run_phase(
model_cls=scenario.model_cls,
init_fn=scenario.model_init_fn,
init_kwargs=scenario.model_init_kwargs,
get_input_fn=scenario.get_model_input_dict,
compile_kwargs=scenario.compile_kwargs,
)
except Exception as e:
logger.info(f"Compilation benchmark could not be run with the following error\n: {e}")
if plain is None:
return results
# 3) merge
result = {
"scenario": scenario.name,
"model_cls": scenario.model_cls.__name__,
"num_params_B": num_params,
"flops_G": flops,
"time_plain_s": plain["time"],
"mem_plain_GB": plain["memory"],
"time_compile_s": compiled["time"],
"mem_compile_GB": compiled["memory"],
}
if scenario.compile_kwargs:
result["fullgraph"] = scenario.compile_kwargs.get("fullgraph", False)
result["mode"] = scenario.compile_kwargs.get("mode", "default")
else:
result["fullgraph"], result["mode"] = None, None
return result
def run_bencmarks_and_collate(self, scenarios: Union[BenchmarkScenario, list[BenchmarkScenario]], filename: str):
if not isinstance(scenarios, list):
scenarios = [scenarios]
record_queue = queue.Queue()
stop_signal = object()
def _writer_thread():
while True:
item = record_queue.get()
if item is stop_signal:
break
df_row = pd.DataFrame([item])
write_header = not os.path.exists(filename)
df_row.to_csv(filename, mode="a", header=write_header, index=False)
record_queue.task_done()
record_queue.task_done()
writer = threading.Thread(target=_writer_thread, daemon=True)
writer.start()
for s in scenarios:
try:
record = self.run_benchmark(s)
if record:
record_queue.put(record)
else:
logger.info(f"Record empty from scenario: {s.name}.")
except Exception as e:
logger.info(f"Running scenario ({s.name}) led to error:\n{e}")
record_queue.put(stop_signal)
logger.info(f"Results serialized to {filename=}.")
def _run_phase(
self,
*,
model_cls: ModelMixin,
init_fn: Callable,
init_kwargs: Dict[str, Any],
get_input_fn: Callable,
compile_kwargs: Optional[Dict[str, Any]],
) -> Dict[str, float]:
# setup
self.pre_benchmark()
# init & (optional) compile
model = init_fn(model_cls, **init_kwargs)
if compile_kwargs:
model.compile(**compile_kwargs)
# build inputs
inp = get_input_fn()
# measure
run_ctx = torch._inductor.utils.fresh_inductor_cache() if compile_kwargs else nullcontext()
with run_ctx:
for _ in range(NUM_WARMUP_ROUNDS):
_ = model(**inp)
time_s = benchmark_fn(lambda m, d: m(**d), model, inp)
mem_gb = torch.cuda.max_memory_allocated() / (1024**3)
mem_gb = round(mem_gb, 2)
# teardown
self.post_benchmark(model)
del model
return {"time": time_s, "memory": mem_gb}

View File

@@ -1,74 +0,0 @@
from functools import partial
import torch
from benchmarking_utils import BenchmarkMixin, BenchmarkScenario, model_init_fn
from diffusers import WanTransformer3DModel
from diffusers.utils.testing_utils import torch_device
CKPT_ID = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
RESULT_FILENAME = "wan.csv"
def get_input_dict(**device_dtype_kwargs):
# height: 480
# width: 832
# num_frames: 81
# max_sequence_length: 512
hidden_states = torch.randn(1, 16, 21, 60, 104, **device_dtype_kwargs)
encoder_hidden_states = torch.randn(1, 512, 4096, **device_dtype_kwargs)
timestep = torch.tensor([1.0], **device_dtype_kwargs)
return {"hidden_states": hidden_states, "encoder_hidden_states": encoder_hidden_states, "timestep": timestep}
if __name__ == "__main__":
scenarios = [
BenchmarkScenario(
name=f"{CKPT_ID}-bf16",
model_cls=WanTransformer3DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=model_init_fn,
compile_kwargs={"fullgraph": True},
),
BenchmarkScenario(
name=f"{CKPT_ID}-layerwise-upcasting",
model_cls=WanTransformer3DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=partial(model_init_fn, layerwise_upcasting=True),
),
BenchmarkScenario(
name=f"{CKPT_ID}-group-offload-leaf",
model_cls=WanTransformer3DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=partial(
model_init_fn,
group_offload_kwargs={
"onload_device": torch_device,
"offload_device": torch.device("cpu"),
"offload_type": "leaf_level",
"use_stream": True,
"non_blocking": True,
},
),
),
]
runner = BenchmarkMixin()
runner.run_bencmarks_and_collate(scenarios, filename=RESULT_FILENAME)

View File

@@ -1,166 +0,0 @@
import argparse
import os
import sys
import gpustat
import pandas as pd
import psycopg2
import psycopg2.extras
from psycopg2.extensions import register_adapter
from psycopg2.extras import Json
register_adapter(dict, Json)
FINAL_CSV_FILENAME = "collated_results.csv"
# https://github.com/huggingface/transformers/blob/593e29c5e2a9b17baec010e8dc7c1431fed6e841/benchmark/init_db.sql#L27
BENCHMARKS_TABLE_NAME = "benchmarks"
MEASUREMENTS_TABLE_NAME = "model_measurements"
def _init_benchmark(conn, branch, commit_id, commit_msg):
gpu_stats = gpustat.GPUStatCollection.new_query()
metadata = {"gpu_name": gpu_stats[0]["name"]}
repository = "huggingface/diffusers"
with conn.cursor() as cur:
cur.execute(
f"INSERT INTO {BENCHMARKS_TABLE_NAME} (repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s) RETURNING benchmark_id",
(repository, branch, commit_id, commit_msg, metadata),
)
benchmark_id = cur.fetchone()[0]
print(f"Initialised benchmark #{benchmark_id}")
return benchmark_id
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"branch",
type=str,
help="The branch name on which the benchmarking is performed.",
)
parser.add_argument(
"commit_id",
type=str,
help="The commit hash on which the benchmarking is performed.",
)
parser.add_argument(
"commit_msg",
type=str,
help="The commit message associated with the commit, truncated to 70 characters.",
)
args = parser.parse_args()
return args
if __name__ == "__main__":
args = parse_args()
try:
conn = psycopg2.connect(
host=os.getenv("PGHOST"),
database=os.getenv("PGDATABASE"),
user=os.getenv("PGUSER"),
password=os.getenv("PGPASSWORD"),
)
print("DB connection established successfully.")
except Exception as e:
print(f"Problem during DB init: {e}")
sys.exit(1)
try:
benchmark_id = _init_benchmark(
conn=conn,
branch=args.branch,
commit_id=args.commit_id,
commit_msg=args.commit_msg,
)
except Exception as e:
print(f"Problem during initializing benchmark: {e}")
sys.exit(1)
cur = conn.cursor()
df = pd.read_csv(FINAL_CSV_FILENAME)
# Helper to cast values (or None) given a dtype
def _cast_value(val, dtype: str):
if pd.isna(val):
return None
if dtype == "text":
return str(val).strip()
if dtype == "float":
try:
return float(val)
except ValueError:
return None
if dtype == "bool":
s = str(val).strip().lower()
if s in ("true", "t", "yes", "1"):
return True
if s in ("false", "f", "no", "0"):
return False
if val in (1, 1.0):
return True
if val in (0, 0.0):
return False
return None
return val
try:
rows_to_insert = []
for _, row in df.iterrows():
scenario = _cast_value(row.get("scenario"), "text")
model_cls = _cast_value(row.get("model_cls"), "text")
num_params_B = _cast_value(row.get("num_params_B"), "float")
flops_G = _cast_value(row.get("flops_G"), "float")
time_plain_s = _cast_value(row.get("time_plain_s"), "float")
mem_plain_GB = _cast_value(row.get("mem_plain_GB"), "float")
time_compile_s = _cast_value(row.get("time_compile_s"), "float")
mem_compile_GB = _cast_value(row.get("mem_compile_GB"), "float")
fullgraph = _cast_value(row.get("fullgraph"), "bool")
mode = _cast_value(row.get("mode"), "text")
# If "github_sha" column exists in the CSV, cast it; else default to None
if "github_sha" in df.columns:
github_sha = _cast_value(row.get("github_sha"), "text")
else:
github_sha = None
measurements = {
"scenario": scenario,
"model_cls": model_cls,
"num_params_B": num_params_B,
"flops_G": flops_G,
"time_plain_s": time_plain_s,
"mem_plain_GB": mem_plain_GB,
"time_compile_s": time_compile_s,
"mem_compile_GB": mem_compile_GB,
"fullgraph": fullgraph,
"mode": mode,
"github_sha": github_sha,
}
rows_to_insert.append((benchmark_id, measurements))
# Batch-insert all rows
insert_sql = f"""
INSERT INTO {MEASUREMENTS_TABLE_NAME} (
benchmark_id,
measurements
)
VALUES (%s, %s);
"""
psycopg2.extras.execute_batch(cur, insert_sql, rows_to_insert)
conn.commit()
cur.close()
conn.close()
except Exception as e:
print(f"Exception: {e}")
sys.exit(1)

View File

@@ -1,19 +1,19 @@
import os
import glob
import sys
import pandas as pd
from huggingface_hub import hf_hub_download, upload_file
from huggingface_hub.utils import EntryNotFoundError
REPO_ID = "diffusers/benchmarks"
sys.path.append(".")
from utils import BASE_PATH, FINAL_CSV_FILE, GITHUB_SHA, REPO_ID, collate_csv # noqa: E402
def has_previous_benchmark() -> str:
from run_all import FINAL_CSV_FILENAME
csv_path = None
try:
csv_path = hf_hub_download(repo_id=REPO_ID, repo_type="dataset", filename=FINAL_CSV_FILENAME)
csv_path = hf_hub_download(repo_id=REPO_ID, repo_type="dataset", filename=FINAL_CSV_FILE)
except EntryNotFoundError:
csv_path = None
return csv_path
@@ -26,50 +26,46 @@ def filter_float(value):
def push_to_hf_dataset():
from run_all import FINAL_CSV_FILENAME, GITHUB_SHA
all_csvs = sorted(glob.glob(f"{BASE_PATH}/*.csv"))
collate_csv(all_csvs, FINAL_CSV_FILE)
# If there's an existing benchmark file, we should report the changes.
csv_path = has_previous_benchmark()
if csv_path is not None:
current_results = pd.read_csv(FINAL_CSV_FILENAME)
current_results = pd.read_csv(FINAL_CSV_FILE)
previous_results = pd.read_csv(csv_path)
numeric_columns = current_results.select_dtypes(include=["float64", "int64"]).columns
numeric_columns = [
c for c in numeric_columns if c not in ["batch_size", "num_inference_steps", "actual_gpu_memory (gbs)"]
]
for column in numeric_columns:
# get previous values as floats, aligned to current index
prev_vals = previous_results[column].map(filter_float).reindex(current_results.index)
previous_results[column] = previous_results[column].map(lambda x: filter_float(x))
# get current values as floats
curr_vals = current_results[column].astype(float)
# Calculate the percentage change
current_results[column] = current_results[column].astype(float)
previous_results[column] = previous_results[column].astype(float)
percent_change = ((current_results[column] - previous_results[column]) / previous_results[column]) * 100
# stringify the current values
curr_str = curr_vals.map(str)
# build an appendage only when prev exists and differs
append_str = prev_vals.where(prev_vals.notnull() & (prev_vals != curr_vals), other=pd.NA).map(
lambda x: f" ({x})" if pd.notnull(x) else ""
# Format the values with '+' or '-' sign and append to original values
current_results[column] = current_results[column].map(str) + percent_change.map(
lambda x: f" ({'+' if x > 0 else ''}{x:.2f}%)"
)
# There might be newly added rows. So, filter out the NaNs.
current_results[column] = current_results[column].map(lambda x: x.replace(" (nan%)", ""))
# combine
current_results[column] = curr_str + append_str
os.remove(FINAL_CSV_FILENAME)
current_results.to_csv(FINAL_CSV_FILENAME, index=False)
# Overwrite the current result file.
current_results.to_csv(FINAL_CSV_FILE, index=False)
commit_message = f"upload from sha: {GITHUB_SHA}" if GITHUB_SHA is not None else "upload benchmark results"
upload_file(
repo_id=REPO_ID,
path_in_repo=FINAL_CSV_FILENAME,
path_or_fileobj=FINAL_CSV_FILENAME,
path_in_repo=FINAL_CSV_FILE,
path_or_fileobj=FINAL_CSV_FILE,
repo_type="dataset",
commit_message=commit_message,
)
upload_file(
repo_id="diffusers/benchmark-analyzer",
path_in_repo=FINAL_CSV_FILENAME,
path_or_fileobj=FINAL_CSV_FILENAME,
repo_type="space",
commit_message=commit_message,
)
if __name__ == "__main__":

View File

@@ -1,6 +0,0 @@
pandas
psutil
gpustat
torchprofile
bitsandbytes
psycopg2==2.9.9

View File

@@ -1,84 +1,101 @@
import glob
import logging
import os
import subprocess
import pandas as pd
import sys
from typing import List
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
logger = logging.getLogger(__name__)
sys.path.append(".")
from benchmark_text_to_image import ALL_T2I_CKPTS # noqa: E402
PATTERN = "benchmarking_*.py"
FINAL_CSV_FILENAME = "collated_results.csv"
GITHUB_SHA = os.getenv("GITHUB_SHA", None)
PATTERN = "benchmark_*.py"
class SubprocessCallException(Exception):
pass
def run_command(command: list[str], return_stdout=False):
# Taken from `test_examples_utils.py`
def run_command(command: List[str], return_stdout=False):
"""
Runs `command` with `subprocess.check_output` and will potentially return the `stdout`. Will also properly capture
if an error occurred while running `command`
"""
try:
output = subprocess.check_output(command, stderr=subprocess.STDOUT)
if return_stdout and hasattr(output, "decode"):
return output.decode("utf-8")
if return_stdout:
if hasattr(output, "decode"):
output = output.decode("utf-8")
return output
except subprocess.CalledProcessError as e:
raise SubprocessCallException(f"Command `{' '.join(command)}` failed with:\n{e.output.decode()}") from e
raise SubprocessCallException(
f"Command `{' '.join(command)}` failed with the following error:\n\n{e.output.decode()}"
) from e
def merge_csvs(final_csv: str = "collated_results.csv"):
all_csvs = glob.glob("*.csv")
all_csvs = [f for f in all_csvs if f != final_csv]
if not all_csvs:
logger.info("No result CSVs found to merge.")
return
df_list = []
for f in all_csvs:
try:
d = pd.read_csv(f)
except pd.errors.EmptyDataError:
# If a file existed but was zerobytes or corrupted, skip it
continue
df_list.append(d)
if not df_list:
logger.info("All result CSVs were empty or invalid; nothing to merge.")
return
final_df = pd.concat(df_list, ignore_index=True)
if GITHUB_SHA is not None:
final_df["github_sha"] = GITHUB_SHA
final_df.to_csv(final_csv, index=False)
logger.info(f"Merged {len(all_csvs)} partial CSVs → {final_csv}.")
def run_scripts():
python_files = sorted(glob.glob(PATTERN))
python_files = [f for f in python_files if f != "benchmarking_utils.py"]
def main():
python_files = glob.glob(PATTERN)
for file in python_files:
script_name = file.split(".py")[0].split("_")[-1] # example: benchmarking_foo.py -> foo
logger.info(f"\n****** Running file: {file} ******")
print(f"****** Running file: {file} ******")
partial_csv = f"{script_name}.csv"
if os.path.exists(partial_csv):
logger.info(f"Found {partial_csv}. Removing for safer numbers and duplication.")
os.remove(partial_csv)
# Run with canonical settings.
if file != "benchmark_text_to_image.py" and file != "benchmark_ip_adapters.py":
command = f"python {file}"
run_command(command.split())
command = ["python", file]
try:
run_command(command)
logger.info(f"{file} finished normally.")
except SubprocessCallException as e:
logger.info(f"Error running {file}:\n{e}")
finally:
logger.info(f"→ Merging partial CSVs after {file}")
merge_csvs(final_csv=FINAL_CSV_FILENAME)
command += " --run_compile"
run_command(command.split())
logger.info(f"\nAll scripts attempted. Final collated CSV: {FINAL_CSV_FILENAME}")
# Run variants.
for file in python_files:
# See: https://github.com/pytorch/pytorch/issues/129637
if file == "benchmark_ip_adapters.py":
continue
if file == "benchmark_text_to_image.py":
for ckpt in ALL_T2I_CKPTS:
command = f"python {file} --ckpt {ckpt}"
if "turbo" in ckpt:
command += " --num_inference_steps 1"
run_command(command.split())
command += " --run_compile"
run_command(command.split())
elif file == "benchmark_sd_img.py":
for ckpt in ["stabilityai/stable-diffusion-xl-refiner-1.0", "stabilityai/sdxl-turbo"]:
command = f"python {file} --ckpt {ckpt}"
if ckpt == "stabilityai/sdxl-turbo":
command += " --num_inference_steps 2"
run_command(command.split())
command += " --run_compile"
run_command(command.split())
elif file in ["benchmark_sd_inpainting.py", "benchmark_ip_adapters.py"]:
sdxl_ckpt = "stabilityai/stable-diffusion-xl-base-1.0"
command = f"python {file} --ckpt {sdxl_ckpt}"
run_command(command.split())
command += " --run_compile"
run_command(command.split())
elif file in ["benchmark_controlnet.py", "benchmark_t2i_adapter.py"]:
sdxl_ckpt = (
"diffusers/controlnet-canny-sdxl-1.0"
if "controlnet" in file
else "TencentARC/t2i-adapter-canny-sdxl-1.0"
)
command = f"python {file} --ckpt {sdxl_ckpt}"
run_command(command.split())
command += " --run_compile"
run_command(command.split())
if __name__ == "__main__":
run_scripts()
main()

98
benchmarks/utils.py Normal file
View File

@@ -0,0 +1,98 @@
import argparse
import csv
import gc
import os
from dataclasses import dataclass
from typing import Dict, List, Union
import torch
import torch.utils.benchmark as benchmark
GITHUB_SHA = os.getenv("GITHUB_SHA", None)
BENCHMARK_FIELDS = [
"pipeline_cls",
"ckpt_id",
"batch_size",
"num_inference_steps",
"model_cpu_offload",
"run_compile",
"time (secs)",
"memory (gbs)",
"actual_gpu_memory (gbs)",
"github_sha",
]
PROMPT = "ghibli style, a fantasy landscape with castles"
BASE_PATH = os.getenv("BASE_PATH", ".")
TOTAL_GPU_MEMORY = float(os.getenv("TOTAL_GPU_MEMORY", torch.cuda.get_device_properties(0).total_memory / (1024**3)))
REPO_ID = "diffusers/benchmarks"
FINAL_CSV_FILE = "collated_results.csv"
@dataclass
class BenchmarkInfo:
time: float
memory: float
def flush():
"""Wipes off memory."""
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
def bytes_to_giga_bytes(bytes):
return f"{(bytes / 1024 / 1024 / 1024):.3f}"
def benchmark_fn(f, *args, **kwargs):
t0 = benchmark.Timer(
stmt="f(*args, **kwargs)",
globals={"args": args, "kwargs": kwargs, "f": f},
num_threads=torch.get_num_threads(),
)
return f"{(t0.blocked_autorange().mean):.3f}"
def generate_csv_dict(
pipeline_cls: str, ckpt: str, args: argparse.Namespace, benchmark_info: BenchmarkInfo
) -> Dict[str, Union[str, bool, float]]:
"""Packs benchmarking data into a dictionary for latter serialization."""
data_dict = {
"pipeline_cls": pipeline_cls,
"ckpt_id": ckpt,
"batch_size": args.batch_size,
"num_inference_steps": args.num_inference_steps,
"model_cpu_offload": args.model_cpu_offload,
"run_compile": args.run_compile,
"time (secs)": benchmark_info.time,
"memory (gbs)": benchmark_info.memory,
"actual_gpu_memory (gbs)": f"{(TOTAL_GPU_MEMORY):.3f}",
"github_sha": GITHUB_SHA,
}
return data_dict
def write_to_csv(file_name: str, data_dict: Dict[str, Union[str, bool, float]]):
"""Serializes a dictionary into a CSV file."""
with open(file_name, mode="w", newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=BENCHMARK_FIELDS)
writer.writeheader()
writer.writerow(data_dict)
def collate_csv(input_files: List[str], output_file: str):
"""Collates multiple identically structured CSVs into a single CSV file."""
with open(output_file, mode="w", newline="") as outfile:
writer = csv.DictWriter(outfile, fieldnames=BENCHMARK_FIELDS)
writer.writeheader()
for file in input_files:
with open(file, mode="r") as infile:
reader = csv.DictReader(infile)
for row in reader:
writer.writerow(row)

View File

@@ -47,10 +47,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
tensorboard \
transformers \
matplotlib \
setuptools==69.5.1 \
bitsandbytes \
torchao \
gguf \
optimum-quanto
setuptools==69.5.1
CMD ["/bin/bash"]

View File

@@ -0,0 +1,49 @@
FROM ubuntu:20.04
LABEL maintainer="Hugging Face"
LABEL repository="diffusers"
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update \
&& apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa
RUN apt install -y bash \
build-essential \
git \
git-lfs \
curl \
ca-certificates \
libsndfile1-dev \
libgl1 \
python3.10 \
python3-pip \
python3.10-venv && \
rm -rf /var/lib/apt/lists
# make sure to use venv
RUN python3.10 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
# follow the instructions here: https://cloud.google.com/tpu/docs/run-in-container#train_a_jax_model_in_a_docker_container
RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
python3 -m uv pip install --upgrade --no-cache-dir \
clu \
"jax[cpu]>=0.2.16,!=0.3.2" \
"flax>=0.4.1" \
"jaxlib>=0.1.65" && \
python3 -m uv pip install --no-cache-dir \
accelerate \
datasets \
hf-doc-builder \
huggingface-hub \
Jinja2 \
librosa \
numpy==1.26.4 \
scipy \
tensorboard \
transformers \
hf_transfer
CMD ["/bin/bash"]

View File

@@ -0,0 +1,51 @@
FROM ubuntu:20.04
LABEL maintainer="Hugging Face"
LABEL repository="diffusers"
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update \
&& apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa
RUN apt install -y bash \
build-essential \
git \
git-lfs \
curl \
ca-certificates \
libsndfile1-dev \
libgl1 \
python3.10 \
python3-pip \
python3.10-venv && \
rm -rf /var/lib/apt/lists
# make sure to use venv
RUN python3.10 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
# follow the instructions here: https://cloud.google.com/tpu/docs/run-in-container#train_a_jax_model_in_a_docker_container
RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
python3 -m pip install --no-cache-dir \
"jax[tpu]>=0.2.16,!=0.3.2" \
-f https://storage.googleapis.com/jax-releases/libtpu_releases.html && \
python3 -m uv pip install --upgrade --no-cache-dir \
clu \
"flax>=0.4.1" \
"jaxlib>=0.1.65" && \
python3 -m uv pip install --no-cache-dir \
accelerate \
datasets \
hf-doc-builder \
huggingface-hub \
Jinja2 \
librosa \
numpy==1.26.4 \
scipy \
tensorboard \
transformers \
hf_transfer
CMD ["/bin/bash"]

View File

@@ -1,37 +1,36 @@
- title: Get started
sections:
- sections:
- local: index
title: Diffusers
title: 🧨 Diffusers
- local: quicktour
title: Quicktour
- local: stable_diffusion
title: Effective and efficient diffusion
- local: installation
title: Installation
- local: quicktour
title: Quickstart
- local: stable_diffusion
title: Basic performance
- title: Pipelines
isExpanded: false
sections:
- local: using-diffusers/loading
title: DiffusionPipeline
title: Get started
- sections:
- local: tutorials/tutorial_overview
title: Overview
- local: using-diffusers/write_own_pipeline
title: Understanding pipelines, models and schedulers
- local: tutorials/autopipeline
title: AutoPipeline
- local: tutorials/basic_training
title: Train a diffusion model
title: Tutorials
- sections:
- local: using-diffusers/loading
title: Load pipelines
- local: using-diffusers/custom_pipeline_overview
title: Community pipelines and components
- local: using-diffusers/callback
title: Pipeline callbacks
- local: using-diffusers/reusing_seeds
title: Reproducibility
title: Load community pipelines and components
- local: using-diffusers/schedulers
title: Schedulers
title: Load schedulers and models
- local: using-diffusers/other-formats
title: Model files and layouts
- local: using-diffusers/push_to_hub
title: Sharing pipelines and models
- title: Adapters
isExpanded: false
sections:
title: Push files to the Hub
title: Load pipelines and adapters
- sections:
- local: tutorials/using_peft_for_inference
title: LoRA
- local: using-diffusers/ip_adapter
@@ -44,54 +43,45 @@
title: DreamBooth
- local: using-diffusers/textual_inversion_inference
title: Textual inversion
- title: Inference
title: Adapters
isExpanded: false
sections:
- local: using-diffusers/weighted_prompts
title: Prompt techniques
- sections:
- local: using-diffusers/unconditional_image_generation
title: Unconditional image generation
- local: using-diffusers/conditional_image_generation
title: Text-to-image
- local: using-diffusers/img2img
title: Image-to-image
- local: using-diffusers/inpaint
title: Inpainting
- local: using-diffusers/text-img2vid
title: Video generation
- local: using-diffusers/depth2img
title: Depth-to-image
title: Generative tasks
- sections:
- local: using-diffusers/overview_techniques
title: Overview
- local: using-diffusers/create_a_server
title: Create a server
- local: using-diffusers/batched_inference
title: Batch inference
- local: training/distributed_inference
title: Distributed inference
- title: Inference optimization
isExpanded: false
sections:
- local: optimization/fp16
title: Accelerate inference
- local: optimization/cache
title: Caching
- local: optimization/attention_backends
title: Attention backends
- local: optimization/memory
title: Reduce memory usage
- local: optimization/speed-memory-optims
title: Compiling and offloading quantized models
- title: Community optimizations
sections:
- local: optimization/pruna
title: Pruna
- local: optimization/xformers
title: xFormers
- local: optimization/tome
title: Token merging
- local: optimization/deepcache
title: DeepCache
- local: optimization/tgate
title: TGATE
- local: optimization/xdit
title: xDiT
- local: optimization/para_attn
title: ParaAttention
- local: using-diffusers/image_quality
title: FreeU
- title: Hybrid Inference
isExpanded: false
sections:
- local: using-diffusers/scheduler_features
title: Scheduler features
- local: using-diffusers/callback
title: Pipeline callbacks
- local: using-diffusers/reusing_seeds
title: Reproducible pipelines
- local: using-diffusers/image_quality
title: Controlling image quality
- local: using-diffusers/weighted_prompts
title: Prompt techniques
title: Inference techniques
- sections:
- local: advanced_inference/outpaint
title: Outpainting
title: Advanced inference
- sections:
- local: hybrid_inference/overview
title: Overview
- local: hybrid_inference/vae_decode
@@ -100,112 +90,8 @@
title: VAE Encode
- local: hybrid_inference/api_reference
title: API Reference
- title: Modular Diffusers
isExpanded: false
sections:
- local: modular_diffusers/overview
title: Overview
- local: modular_diffusers/quickstart
title: Quickstart
- local: modular_diffusers/modular_diffusers_states
title: States
- local: modular_diffusers/pipeline_block
title: ModularPipelineBlocks
- local: modular_diffusers/sequential_pipeline_blocks
title: SequentialPipelineBlocks
- local: modular_diffusers/loop_sequential_pipeline_blocks
title: LoopSequentialPipelineBlocks
- local: modular_diffusers/auto_pipeline_blocks
title: AutoPipelineBlocks
- local: modular_diffusers/modular_pipeline
title: ModularPipeline
- local: modular_diffusers/components_manager
title: ComponentsManager
- local: modular_diffusers/guiders
title: Guiders
- title: Training
isExpanded: false
sections:
- local: training/overview
title: Overview
- local: training/create_dataset
title: Create a dataset for training
- local: training/adapt_a_model
title: Adapt a model to a new task
- local: tutorials/basic_training
title: Train a diffusion model
- title: Models
sections:
- local: training/unconditional_training
title: Unconditional image generation
- local: training/text2image
title: Text-to-image
- local: training/sdxl
title: Stable Diffusion XL
- local: training/kandinsky
title: Kandinsky 2.2
- local: training/wuerstchen
title: Wuerstchen
- local: training/controlnet
title: ControlNet
- local: training/t2i_adapters
title: T2I-Adapters
- local: training/instructpix2pix
title: InstructPix2Pix
- local: training/cogvideox
title: CogVideoX
- title: Methods
sections:
- local: training/text_inversion
title: Textual Inversion
- local: training/dreambooth
title: DreamBooth
- local: training/lora
title: LoRA
- local: training/custom_diffusion
title: Custom Diffusion
- local: training/lcm_distill
title: Latent Consistency Distillation
- local: training/ddpo
title: Reinforcement learning training with DDPO
- title: Quantization
isExpanded: false
sections:
- local: quantization/overview
title: Getting started
- local: quantization/bitsandbytes
title: bitsandbytes
- local: quantization/gguf
title: gguf
- local: quantization/torchao
title: torchao
- local: quantization/quanto
title: quanto
- local: quantization/modelopt
title: NVIDIA ModelOpt
- title: Model accelerators and hardware
isExpanded: false
sections:
- local: optimization/onnx
title: ONNX
- local: optimization/open_vino
title: OpenVINO
- local: optimization/coreml
title: Core ML
- local: optimization/mps
title: Metal Performance Shaders (MPS)
- local: optimization/habana
title: Intel Gaudi
- local: optimization/neuron
title: AWS Neuron
- title: Specific pipeline examples
isExpanded: false
sections:
title: Hybrid Inference
- sections:
- local: using-diffusers/consisid
title: ConsisID
- local: using-diffusers/sdxl
@@ -230,30 +116,106 @@
title: Stable Video Diffusion
- local: using-diffusers/marigold_usage
title: Marigold Computer Vision
- title: Resources
isExpanded: false
sections:
- title: Task recipes
title: Specific pipeline examples
- sections:
- local: training/overview
title: Overview
- local: training/create_dataset
title: Create a dataset for training
- local: training/adapt_a_model
title: Adapt a model to a new task
- isExpanded: false
sections:
- local: using-diffusers/unconditional_image_generation
- local: training/unconditional_training
title: Unconditional image generation
- local: using-diffusers/conditional_image_generation
- local: training/text2image
title: Text-to-image
- local: using-diffusers/img2img
title: Image-to-image
- local: using-diffusers/inpaint
title: Inpainting
- local: advanced_inference/outpaint
title: Outpainting
- local: using-diffusers/text-img2vid
title: Video generation
- local: using-diffusers/depth2img
title: Depth-to-image
- local: using-diffusers/write_own_pipeline
title: Understanding pipelines, models and schedulers
- local: community_projects
title: Projects built with Diffusers
- local: training/sdxl
title: Stable Diffusion XL
- local: training/kandinsky
title: Kandinsky 2.2
- local: training/wuerstchen
title: Wuerstchen
- local: training/controlnet
title: ControlNet
- local: training/t2i_adapters
title: T2I-Adapters
- local: training/instructpix2pix
title: InstructPix2Pix
- local: training/cogvideox
title: CogVideoX
title: Models
- isExpanded: false
sections:
- local: training/text_inversion
title: Textual Inversion
- local: training/dreambooth
title: DreamBooth
- local: training/lora
title: LoRA
- local: training/custom_diffusion
title: Custom Diffusion
- local: training/lcm_distill
title: Latent Consistency Distillation
- local: training/ddpo
title: Reinforcement learning training with DDPO
title: Methods
title: Training
- sections:
- local: quantization/overview
title: Getting Started
- local: quantization/bitsandbytes
title: bitsandbytes
- local: quantization/gguf
title: gguf
- local: quantization/torchao
title: torchao
- local: quantization/quanto
title: quanto
title: Quantization Methods
- sections:
- local: optimization/fp16
title: Accelerate inference
- local: optimization/cache
title: Caching
- local: optimization/memory
title: Reduce memory usage
- local: optimization/speed-memory-optims
title: Compile and offloading quantized models
- local: optimization/pruna
title: Pruna
- local: optimization/xformers
title: xFormers
- local: optimization/tome
title: Token merging
- local: optimization/deepcache
title: DeepCache
- local: optimization/tgate
title: TGATE
- local: optimization/xdit
title: xDiT
- local: optimization/para_attn
title: ParaAttention
- sections:
- local: using-diffusers/stable_diffusion_jax_how_to
title: JAX/Flax
- local: optimization/onnx
title: ONNX
- local: optimization/open_vino
title: OpenVINO
- local: optimization/coreml
title: Core ML
title: Optimized model formats
- sections:
- local: optimization/mps
title: Metal Performance Shaders (MPS)
- local: optimization/habana
title: Intel Gaudi
- local: optimization/neuron
title: AWS Neuron
title: Optimized hardware
title: Accelerate inference and reduce memory
- sections:
- local: conceptual/philosophy
title: Philosophy
- local: using-diffusers/controlling_generation
@@ -264,11 +226,13 @@
title: Diffusers' Ethical Guidelines
- local: conceptual/evaluation
title: Evaluating Diffusion Models
- title: API
isExpanded: false
sections:
- title: Main Classes
title: Conceptual Guides
- sections:
- local: community_projects
title: Projects built with Diffusers
title: Community Projects
- sections:
- isExpanded: false
sections:
- local: api/configuration
title: Configuration
@@ -278,19 +242,8 @@
title: Outputs
- local: api/quantization
title: Quantization
- title: Modular
sections:
- local: api/modular_diffusers/pipeline
title: Pipeline
- local: api/modular_diffusers/pipeline_blocks
title: Blocks
- local: api/modular_diffusers/pipeline_states
title: States
- local: api/modular_diffusers/pipeline_components
title: Components and configs
- local: api/modular_diffusers/guiders
title: Guiders
- title: Loaders
title: Main Classes
- isExpanded: false
sections:
- local: api/loaders/ip_adapter
title: IP-Adapter
@@ -306,14 +259,14 @@
title: SD3Transformer2D
- local: api/loaders/peft
title: PEFT
- title: Models
title: Loaders
- isExpanded: false
sections:
- local: api/models/overview
title: Overview
- local: api/models/auto_model
title: AutoModel
- title: ControlNets
sections:
- sections:
- local: api/models/controlnet
title: ControlNetModel
- local: api/models/controlnet_union
@@ -328,14 +281,12 @@
title: SD3ControlNetModel
- local: api/models/controlnet_sparsectrl
title: SparseControlNetModel
- title: Transformers
sections:
title: ControlNets
- sections:
- local: api/models/allegro_transformer3d
title: AllegroTransformer3DModel
- local: api/models/aura_flow_transformer2d
title: AuraFlowTransformer2DModel
- local: api/models/bria_transformer
title: BriaTransformer2DModel
- local: api/models/chroma_transformer
title: ChromaTransformer2DModel
- local: api/models/cogvideox_transformer3d
@@ -376,14 +327,10 @@
title: PixArtTransformer2DModel
- local: api/models/prior_transformer
title: PriorTransformer
- local: api/models/qwenimage_transformer2d
title: QwenImageTransformer2DModel
- local: api/models/sana_transformer2d
title: SanaTransformer2DModel
- local: api/models/sd3_transformer2d
title: SD3Transformer2DModel
- local: api/models/skyreels_v2_transformer_3d
title: SkyReelsV2Transformer3DModel
- local: api/models/stable_audio_transformer
title: StableAudioDiTModel
- local: api/models/transformer2d
@@ -392,8 +339,8 @@
title: TransformerTemporalModel
- local: api/models/wan_transformer_3d
title: WanTransformer3DModel
- title: UNets
sections:
title: Transformers
- sections:
- local: api/models/stable_cascade_unet
title: StableCascadeUNet
- local: api/models/unet
@@ -408,8 +355,8 @@
title: UNetMotionModel
- local: api/models/uvit2d
title: UViT2DModel
- title: VAEs
sections:
title: UNets
- sections:
- local: api/models/asymmetricautoencoderkl
title: AsymmetricAutoencoderKL
- local: api/models/autoencoder_dc
@@ -430,8 +377,6 @@
title: AutoencoderKLMagvit
- local: api/models/autoencoderkl_mochi
title: AutoencoderKLMochi
- local: api/models/autoencoderkl_qwenimage
title: AutoencoderKLQwenImage
- local: api/models/autoencoder_kl_wan
title: AutoencoderKLWan
- local: api/models/consistency_decoder_vae
@@ -442,7 +387,9 @@
title: Tiny AutoEncoder
- local: api/models/vq
title: VQModel
- title: Pipelines
title: VAEs
title: Models
- isExpanded: false
sections:
- local: api/pipelines/overview
title: Overview
@@ -464,8 +411,6 @@
title: AutoPipeline
- local: api/pipelines/blip_diffusion
title: BLIP-Diffusion
- local: api/pipelines/bria_3_2
title: Bria 3.2
- local: api/pipelines/chroma
title: Chroma
- local: api/pipelines/cogvideox
@@ -570,8 +515,6 @@
title: PixArt-α
- local: api/pipelines/pixart_sigma
title: PixArt-Σ
- local: api/pipelines/qwenimage
title: QwenImage
- local: api/pipelines/sana
title: Sana
- local: api/pipelines/sana_sprint
@@ -582,14 +525,11 @@
title: Semantic Guidance
- local: api/pipelines/shap_e
title: Shap-E
- local: api/pipelines/skyreels_v2
title: SkyReels-V2
- local: api/pipelines/stable_audio
title: Stable Audio
- local: api/pipelines/stable_cascade
title: Stable Cascade
- title: Stable Diffusion
sections:
- sections:
- local: api/pipelines/stable_diffusion/overview
title: Overview
- local: api/pipelines/stable_diffusion/depth2img
@@ -626,6 +566,7 @@
title: T2I-Adapter
- local: api/pipelines/stable_diffusion/text2img
title: Text-to-image
title: Stable Diffusion
- local: api/pipelines/stable_unclip
title: Stable unCLIP
- local: api/pipelines/text_to_video
@@ -644,7 +585,8 @@
title: Wan
- local: api/pipelines/wuerstchen
title: Wuerstchen
- title: Schedulers
title: Pipelines
- isExpanded: false
sections:
- local: api/schedulers/overview
title: Overview
@@ -714,7 +656,8 @@
title: UniPCMultistepScheduler
- local: api/schedulers/vq_diffusion
title: VQDiffusionScheduler
- title: Internal classes
title: Schedulers
- isExpanded: false
sections:
- local: api/internal_classes_overview
title: Overview
@@ -732,3 +675,5 @@
title: VAE Image Processor
- local: api/video_processor
title: Video Processor
title: Internal classes
title: API

View File

@@ -28,9 +28,3 @@ Cache methods speedup diffusion transformers by storing and reusing intermediate
[[autodoc]] FasterCacheConfig
[[autodoc]] apply_faster_cache
### FirstBlockCacheConfig
[[autodoc]] FirstBlockCacheConfig
[[autodoc]] apply_first_block_cache

View File

@@ -16,7 +16,7 @@ Schedulers from [`~schedulers.scheduling_utils.SchedulerMixin`] and models from
<Tip>
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf auth login`.
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `huggingface-cli login`.
</Tip>

View File

@@ -20,12 +20,6 @@ All pipelines with [`VaeImageProcessor`] accept PIL Image, PyTorch tensor, or Nu
[[autodoc]] image_processor.VaeImageProcessor
## InpaintProcessor
The [`InpaintProcessor`] accepts `mask` and `image` inputs and process them together. Optionally, it can accept padding_mask_crop and apply mask overlay.
[[autodoc]] image_processor.InpaintProcessor
## VaeImageProcessorLDM3D
The [`VaeImageProcessorLDM3D`] accepts RGB and depth inputs and returns RGB and depth outputs.

View File

@@ -26,11 +26,9 @@ LoRA is a fast and lightweight training method that inserts and trains a signifi
- [`HunyuanVideoLoraLoaderMixin`] provides similar functions for [HunyuanVideo](https://huggingface.co/docs/diffusers/main/en/api/pipelines/hunyuan_video).
- [`Lumina2LoraLoaderMixin`] provides similar functions for [Lumina2](https://huggingface.co/docs/diffusers/main/en/api/pipelines/lumina2).
- [`WanLoraLoaderMixin`] provides similar functions for [Wan](https://huggingface.co/docs/diffusers/main/en/api/pipelines/wan).
- [`SkyReelsV2LoraLoaderMixin`] provides similar functions for [SkyReels-V2](https://huggingface.co/docs/diffusers/main/en/api/pipelines/skyreels_v2).
- [`CogView4LoraLoaderMixin`] provides similar functions for [CogView4](https://huggingface.co/docs/diffusers/main/en/api/pipelines/cogview4).
- [`AmusedLoraLoaderMixin`] is for the [`AmusedPipeline`].
- [`HiDreamImageLoraLoaderMixin`] provides similar functions for [HiDream Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/hidream)
- [`QwenImageLoraLoaderMixin`] provides similar functions for [Qwen Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/qwen)
- [`LoraBaseMixin`] provides a base class with several utility methods to fuse, unfuse, unload, LoRAs and more.
<Tip>
@@ -94,10 +92,6 @@ To learn more about how to load LoRA weights, see the [LoRA](../../using-diffuse
[[autodoc]] loaders.lora_pipeline.WanLoraLoaderMixin
## SkyReelsV2LoraLoaderMixin
[[autodoc]] loaders.lora_pipeline.SkyReelsV2LoraLoaderMixin
## AmusedLoraLoaderMixin
[[autodoc]] loaders.lora_pipeline.AmusedLoraLoaderMixin
@@ -106,10 +100,6 @@ To learn more about how to load LoRA weights, see the [LoRA](../../using-diffuse
[[autodoc]] loaders.lora_pipeline.HiDreamImageLoraLoaderMixin
## QwenImageLoraLoaderMixin
## WanLoraLoaderMixin
[[autodoc]] loaders.lora_pipeline.QwenImageLoraLoaderMixin
## LoraBaseMixin
[[autodoc]] loaders.lora_base.LoraBaseMixin
[[autodoc]] loaders.lora_pipeline.WanLoraLoaderMixin

View File

@@ -44,3 +44,15 @@ model = AutoencoderKL.from_single_file(url)
## DecoderOutput
[[autodoc]] models.autoencoders.vae.DecoderOutput
## FlaxAutoencoderKL
[[autodoc]] FlaxAutoencoderKL
## FlaxAutoencoderKLOutput
[[autodoc]] models.vae_flax.FlaxAutoencoderKLOutput
## FlaxDecoderOutput
[[autodoc]] models.vae_flax.FlaxDecoderOutput

View File

@@ -1,35 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# AutoencoderKLQwenImage
The model can be loaded with the following code snippet.
```python
from diffusers import AutoencoderKLQwenImage
vae = AutoencoderKLQwenImage.from_pretrained("Qwen/QwenImage-20B", subfolder="vae")
```
## AutoencoderKLQwenImage
[[autodoc]] AutoencoderKLQwenImage
- decode
- encode
- all
## AutoencoderKLOutput
[[autodoc]] models.autoencoders.autoencoder_kl.AutoencoderKLOutput
## DecoderOutput
[[autodoc]] models.autoencoders.vae.DecoderOutput

View File

@@ -40,3 +40,11 @@ pipe = StableDiffusionControlNetPipeline.from_single_file(url, controlnet=contro
## ControlNetOutput
[[autodoc]] models.controlnets.controlnet.ControlNetOutput
## FlaxControlNetModel
[[autodoc]] FlaxControlNetModel
## FlaxControlNetOutput
[[autodoc]] models.controlnets.controlnet_flax.FlaxControlNetOutput

View File

@@ -19,6 +19,10 @@ All models are built from the base [`ModelMixin`] class which is a [`torch.nn.Mo
## ModelMixin
[[autodoc]] ModelMixin
## FlaxModelMixin
[[autodoc]] FlaxModelMixin
## PushToHubMixin
[[autodoc]] utils.PushToHubMixin

View File

@@ -1,28 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# QwenImageTransformer2DModel
The model can be loaded with the following code snippet.
```python
from diffusers import QwenImageTransformer2DModel
transformer = QwenImageTransformer2DModel.from_pretrained("Qwen/QwenImage-20B", subfolder="transformer", torch_dtype=torch.bfloat16)
```
## QwenImageTransformer2DModel
[[autodoc]] QwenImageTransformer2DModel
## Transformer2DModelOutput
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput

View File

@@ -1,30 +0,0 @@
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# SkyReelsV2Transformer3DModel
A Diffusion Transformer model for 3D video-like data was introduced in [SkyReels-V2](https://github.com/SkyworkAI/SkyReels-V2) by the Skywork AI.
The model can be loaded with the following code snippet.
```python
from diffusers import SkyReelsV2Transformer3DModel
transformer = SkyReelsV2Transformer3DModel.from_pretrained("Skywork/SkyReels-V2-DF-1.3B-540P-Diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
```
## SkyReelsV2Transformer3DModel
[[autodoc]] SkyReelsV2Transformer3DModel
## Transformer2DModelOutput
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput

View File

@@ -23,3 +23,9 @@ The abstract from the paper is:
## UNet2DConditionOutput
[[autodoc]] models.unets.unet_2d_condition.UNet2DConditionOutput
## FlaxUNet2DConditionModel
[[autodoc]] models.unets.unet_2d_condition_flax.FlaxUNet2DConditionModel
## FlaxUNet2DConditionOutput
[[autodoc]] models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput

View File

@@ -1,39 +0,0 @@
# Guiders
Guiders are components in Modular Diffusers that control how the diffusion process is guided during generation. They implement various guidance techniques to improve generation quality and control.
## BaseGuidance
[[autodoc]] diffusers.guiders.guider_utils.BaseGuidance
## ClassifierFreeGuidance
[[autodoc]] diffusers.guiders.classifier_free_guidance.ClassifierFreeGuidance
## ClassifierFreeZeroStarGuidance
[[autodoc]] diffusers.guiders.classifier_free_zero_star_guidance.ClassifierFreeZeroStarGuidance
## SkipLayerGuidance
[[autodoc]] diffusers.guiders.skip_layer_guidance.SkipLayerGuidance
## SmoothedEnergyGuidance
[[autodoc]] diffusers.guiders.smoothed_energy_guidance.SmoothedEnergyGuidance
## PerturbedAttentionGuidance
[[autodoc]] diffusers.guiders.perturbed_attention_guidance.PerturbedAttentionGuidance
## AdaptiveProjectedGuidance
[[autodoc]] diffusers.guiders.adaptive_projected_guidance.AdaptiveProjectedGuidance
## AutoGuidance
[[autodoc]] diffusers.guiders.auto_guidance.AutoGuidance
## TangentialClassifierFreeGuidance
[[autodoc]] diffusers.guiders.tangential_classifier_free_guidance.TangentialClassifierFreeGuidance

View File

@@ -1,5 +0,0 @@
# Pipeline
## ModularPipeline
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ModularPipeline

View File

@@ -1,17 +0,0 @@
# Pipeline blocks
## ModularPipelineBlocks
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ModularPipelineBlocks
## SequentialPipelineBlocks
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.SequentialPipelineBlocks
## LoopSequentialPipelineBlocks
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.LoopSequentialPipelineBlocks
## AutoPipelineBlocks
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.AutoPipelineBlocks

View File

@@ -1,17 +0,0 @@
# Components and configs
## ComponentSpec
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ComponentSpec
## ConfigSpec
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ConfigSpec
## ComponentsManager
[[autodoc]] diffusers.modular_pipelines.components_manager.ComponentsManager
## InsertableDict
[[autodoc]] diffusers.modular_pipelines.modular_pipeline_utils.InsertableDict

View File

@@ -1,9 +0,0 @@
# Pipeline states
## PipelineState
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.PipelineState
## BlockState
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.BlockState

View File

@@ -54,6 +54,10 @@ To check a specific pipeline or model output, refer to its corresponding API doc
[[autodoc]] pipelines.ImagePipelineOutput
## FlaxImagePipelineOutput
[[autodoc]] pipelines.pipeline_flax_utils.FlaxImagePipelineOutput
## AudioPipelineOutput
[[autodoc]] pipelines.AudioPipelineOutput

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# aMUSEd
aMUSEd was introduced in [aMUSEd: An Open MUSE Reproduction](https://huggingface.co/papers/2401.01808) by Suraj Patil, William Berman, Robin Rombach, and Patrick von Platen.

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# Attend-and-Excite
Attend-and-Excite for Stable Diffusion was proposed in [Attend-and-Excite: Attention-Based Semantic Guidance for Text-to-Image Diffusion Models](https://attendandexcite.github.io/Attend-and-Excite/) and provides textual attention control over image generation.

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# AudioLDM
AudioLDM was proposed in [AudioLDM: Text-to-Audio Generation with Latent Diffusion Models](https://huggingface.co/papers/2301.12503) by Haohe Liu et al. Inspired by [Stable Diffusion](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/overview), AudioLDM

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# BLIP-Diffusion
BLIP-Diffusion was proposed in [BLIP-Diffusion: Pre-trained Subject Representation for Controllable Text-to-Image Generation and Editing](https://huggingface.co/papers/2305.14720). It enables zero-shot subject-driven generation and control-guided zero-shot generation.

View File

@@ -1,44 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# Bria 3.2
Bria 3.2 is the next-generation commercial-ready text-to-image model. With just 4 billion parameters, it provides exceptional aesthetics and text rendering, evaluated to provide on par results to leading open-source models, and outperforming other licensed models.
In addition to being built entirely on licensed data, 3.2 provides several advantages for enterprise and commercial use:
- Efficient Compute - the model is X3 smaller than the equivalent models in the market (4B parameters vs 12B parameters other open source models)
- Architecture Consistency: Same architecture as 3.1—ideal for users looking to upgrade without disruption.
- Fine-tuning Speedup: 2x faster fine-tuning on L40S and A100.
Original model checkpoints for Bria 3.2 can be found [here](https://huggingface.co/briaai/BRIA-3.2).
Github repo for Bria 3.2 can be found [here](https://github.com/Bria-AI/BRIA-3.2).
If you want to learn more about the Bria platform, and get free traril access, please visit [bria.ai](https://bria.ai).
## Usage
_As the model is gated, before using it with diffusers you first need to go to the [Bria 3.2 Hugging Face page](https://huggingface.co/briaai/BRIA-3.2), fill in the form and accept the gate. Once you are in, you need to login so that your system knows youve accepted the gate._
Use the command below to log in:
```bash
hf auth login
```
## BriaPipeline
[[autodoc]] BriaPipeline
- all
- __call__

View File

@@ -36,7 +36,7 @@ import torch
from diffusers import ChromaPipeline
pipe = ChromaPipeline.from_pretrained("lodestones/Chroma", torch_dtype=torch.bfloat16)
pipe.enable_model_cpu_offload()
pipe.enabe_model_cpu_offload()
prompt = [
"A high-fashion close-up portrait of a blonde woman in clear sunglasses. The image uses a bold teal and red color split for dramatic lighting. The background is a simple teal-green. The photo is sharp and well-composed, and is designed for viewing with anaglyph 3D glasses for optimal effect. It looks professionally done."

View File

@@ -50,7 +50,7 @@ from diffusers.utils import export_to_video
pipeline_quant_config = PipelineQuantizationConfig(
quant_backend="torchao",
quant_kwargs={"quant_type": "int8wo"},
components_to_quantize="transformer"
components_to_quantize=["transformer"]
)
# fp8 layerwise weight-casting

View File

@@ -72,3 +72,11 @@ Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers)
## StableDiffusionPipelineOutput
[[autodoc]] pipelines.stable_diffusion.StableDiffusionPipelineOutput
## FlaxStableDiffusionControlNetPipeline
[[autodoc]] FlaxStableDiffusionControlNetPipeline
- all
- __call__
## FlaxStableDiffusionControlNetPipelineOutput
[[autodoc]] pipelines.stable_diffusion.FlaxStableDiffusionPipelineOutput

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# ControlNet-XS
<div class="flex flex-wrap space-x-1">

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# ControlNet-XS with Stable Diffusion XL
ControlNet-XS was introduced in [ControlNet-XS](https://vislearn.github.io/ControlNet-XS/) by Denis Zavadski and Carsten Rother. It is based on the observation that the control model in the [original ControlNet](https://huggingface.co/papers/2302.05543) can be made much smaller and still produce good results.

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# Dance Diffusion
[Dance Diffusion](https://github.com/Harmonai-org/sample-generator) is by Zach Evans.

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# DiffEdit
[DiffEdit: Diffusion-based semantic image editing with mask guidance](https://huggingface.co/papers/2210.11427) is by Guillaume Couairon, Jakob Verbeek, Holger Schwenk, and Matthieu Cord.

View File

@@ -25,8 +25,6 @@ Original model checkpoints for Flux can be found [here](https://huggingface.co/b
Flux can be quite expensive to run on consumer hardware devices. However, you can perform a suite of optimizations to run it faster and in a more memory-friendly manner. Check out [this section](https://huggingface.co/blog/sd3#memory-optimizations-for-sd3) for more details. Additionally, Flux can benefit from quantization for memory efficiency with a trade-off in inference latency. Refer to [this blog post](https://huggingface.co/blog/quanto-diffusers) to learn more. For an exhaustive list of resources, check out [this gist](https://gist.github.com/sayakpaul/b664605caf0aa3bf8585ab109dd5ac9c).
[Caching](../../optimization/cache) may also speed up inference by storing and reusing intermediate outputs.
</Tip>
Flux comes in the following variants:
@@ -316,67 +314,6 @@ if integrity_checker.test_image(image_):
raise ValueError("Your image has been flagged. Choose another prompt/image or try again.")
```
### Kontext Inpainting
`FluxKontextInpaintPipeline` enables image modification within a fixed mask region. It currently supports both text-based conditioning and image-reference conditioning.
<hfoptions id="kontext-inpaint">
<hfoption id="text-only">
```python
import torch
from diffusers import FluxKontextInpaintPipeline
from diffusers.utils import load_image
prompt = "Change the yellow dinosaur to green one"
img_url = (
"https://github.com/ZenAI-Vietnam/Flux-Kontext-pipelines/blob/main/assets/dinosaur_input.jpeg?raw=true"
)
mask_url = (
"https://github.com/ZenAI-Vietnam/Flux-Kontext-pipelines/blob/main/assets/dinosaur_mask.png?raw=true"
)
source = load_image(img_url)
mask = load_image(mask_url)
pipe = FluxKontextInpaintPipeline.from_pretrained(
"black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16
)
pipe.to("cuda")
image = pipe(prompt=prompt, image=source, mask_image=mask, strength=1.0).images[0]
image.save("kontext_inpainting_normal.png")
```
</hfoption>
<hfoption id="image conditioning">
```python
import torch
from diffusers import FluxKontextInpaintPipeline
from diffusers.utils import load_image
pipe = FluxKontextInpaintPipeline.from_pretrained(
"black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16
)
pipe.to("cuda")
prompt = "Replace this ball"
img_url = "https://images.pexels.com/photos/39362/the-ball-stadion-football-the-pitch-39362.jpeg?auto=compress&cs=tinysrgb&dpr=1&w=500"
mask_url = "https://github.com/ZenAI-Vietnam/Flux-Kontext-pipelines/blob/main/assets/ball_mask.png?raw=true"
image_reference_url = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTah3x6OL_ECMBaZ5ZlJJhNsyC-OSMLWAI-xw&s"
source = load_image(img_url)
mask = load_image(mask_url)
image_reference = load_image(image_reference_url)
mask = pipe.mask_processor.blur(mask, blur_factor=12)
image = pipe(
prompt=prompt, image=source, mask_image=mask, image_reference=image_reference, strength=1.0
).images[0]
image.save("kontext_inpainting_ref.png")
```
</hfoption>
</hfoptions>
## Combining Flux Turbo LoRAs with Flux Control, Fill, and Redux
We can combine Flux Turbo LoRAs with Flux Control and other pipelines like Fill and Redux to enable few-steps' inference. The example below shows how to do that for Flux Control LoRA for depth and turbo LoRA from [`ByteDance/Hyper-SD`](https://hf.co/ByteDance/Hyper-SD).
@@ -707,15 +644,3 @@ image.save("flux-fp8-dev.png")
[[autodoc]] FluxFillPipeline
- all
- __call__
## FluxKontextPipeline
[[autodoc]] FluxKontextPipeline
- all
- __call__
## FluxKontextInpaintPipeline
[[autodoc]] FluxKontextInpaintPipeline
- all
- __call__

View File

@@ -18,7 +18,7 @@
<Tip>
[Caching](../../optimization/cache) may also speed up inference by storing and reusing intermediate outputs.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>

View File

@@ -54,7 +54,7 @@ pipeline_quant_config = PipelineQuantizationConfig(
"bnb_4bit_quant_type": "nf4",
"bnb_4bit_compute_dtype": torch.bfloat16
},
components_to_quantize="transformer"
components_to_quantize=["transformer"]
)
pipeline = HunyuanVideoPipeline.from_pretrained(
@@ -91,7 +91,7 @@ pipeline_quant_config = PipelineQuantizationConfig(
"bnb_4bit_quant_type": "nf4",
"bnb_4bit_compute_dtype": torch.bfloat16
},
components_to_quantize="transformer"
components_to_quantize=["transformer"]
)
pipeline = HunyuanVideoPipeline.from_pretrained(
@@ -139,7 +139,7 @@ export_to_video(video, "output.mp4", fps=15)
"bnb_4bit_quant_type": "nf4",
"bnb_4bit_compute_dtype": torch.bfloat16
},
components_to_quantize="transformer"
components_to_quantize=["transformer"]
)
pipeline = HunyuanVideoPipeline.from_pretrained(

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# I2VGen-XL
[I2VGen-XL: High-Quality Image-to-Video Synthesis via Cascaded Diffusion Models](https://hf.co/papers/2311.04145.pdf) by Shiwei Zhang, Jiayu Wang, Yingya Zhang, Kang Zhao, Hangjie Yuan, Zhiwu Qin, Xiang Wang, Deli Zhao, and Jingren Zhou.

View File

@@ -88,7 +88,7 @@ export_to_video(video, "output.mp4", fps=24)
</hfoption>
<hfoption id="inference speed">
[Compilation](../../optimization/fp16#torchcompile) is slow the first time but subsequent calls to the pipeline are faster. [Caching](../../optimization/cache) may also speed up inference by storing and reusing intermediate outputs.
[Compilation](../../optimization/fp16#torchcompile) is slow the first time but subsequent calls to the pipeline are faster.
```py
import torch

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# MusicLDM
MusicLDM was proposed in [MusicLDM: Enhancing Novelty in Text-to-Music Generation Using Beat-Synchronous Mixup Strategies](https://huggingface.co/papers/2308.01546) by Ke Chen, Yusong Wu, Haohe Liu, Marianna Nezhurina, Taylor Berg-Kirkpatrick, Shlomo Dubnov.

View File

@@ -37,7 +37,6 @@ The table below lists all the pipelines currently available in 🤗 Diffusers an
| [AudioLDM2](audioldm2) | text2audio |
| [AuraFlow](auraflow) | text2image |
| [BLIP Diffusion](blip_diffusion) | text2image |
| [Bria 3.2](bria_3_2) | text2image |
| [CogVideoX](cogvideox) | text2video |
| [Consistency Models](consistency_models) | unconditional image generation |
| [ControlNet](controlnet) | text2image, image2image, inpainting |
@@ -106,20 +105,10 @@ The table below lists all the pipelines currently available in 🤗 Diffusers an
[[autodoc]] pipelines.StableDiffusionMixin.disable_freeu
## FlaxDiffusionPipeline
[[autodoc]] pipelines.pipeline_flax_utils.FlaxDiffusionPipeline
## PushToHubMixin
[[autodoc]] utils.PushToHubMixin
## Callbacks
[[autodoc]] callbacks.PipelineCallback
[[autodoc]] callbacks.SDCFGCutoffCallback
[[autodoc]] callbacks.SDXLCFGCutoffCallback
[[autodoc]] callbacks.SDXLControlnetCFGCutoffCallback
[[autodoc]] callbacks.IPAdapterScaleCutoffCallback
[[autodoc]] callbacks.SD3CFGCutoffCallback

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# Paint by Example
[Paint by Example: Exemplar-based Image Editing with Diffusion Models](https://huggingface.co/papers/2211.13227) is by Binxin Yang, Shuyang Gu, Bo Zhang, Ting Zhang, Xuejin Chen, Xiaoyan Sun, Dong Chen, Fang Wen.

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# MultiDiffusion
<div class="flex flex-wrap space-x-1">

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# Image-to-Video Generation with PIA (Personalized Image Animator)
<div class="flex flex-wrap space-x-1">

View File

@@ -1,167 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. -->
# QwenImage
<div class="flex flex-wrap space-x-1">
<img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
</div>
Qwen-Image from the Qwen team is an image generation foundation model in the Qwen series that achieves significant advances in complex text rendering and precise image editing. Experiments show strong general capabilities in both image generation and editing, with exceptional performance in text rendering, especially for Chinese.
Qwen-Image comes in the following variants:
| model type | model id |
|:----------:|:--------:|
| Qwen-Image | [`Qwen/Qwen-Image`](https://huggingface.co/Qwen/Qwen-Image) |
| Qwen-Image-Edit | [`Qwen/Qwen-Image-Edit`](https://huggingface.co/Qwen/Qwen-Image-Edit) |
| Qwen-Image-Edit Plus | [Qwen/Qwen-Image-Edit-2509](https://huggingface.co/Qwen/Qwen-Image-Edit-2509) |
<Tip>
[Caching](../../optimization/cache) may also speed up inference by storing and reusing intermediate outputs.
</Tip>
## LoRA for faster inference
Use a LoRA from `lightx2v/Qwen-Image-Lightning` to speed up inference by reducing the
number of steps. Refer to the code snippet below:
<details>
<summary>Code</summary>
```py
from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler
import torch
import math
ckpt_id = "Qwen/Qwen-Image"
# From
# https://github.com/ModelTC/Qwen-Image-Lightning/blob/342260e8f5468d2f24d084ce04f55e101007118b/generate_with_diffusers.py#L82C9-L97C10
scheduler_config = {
"base_image_seq_len": 256,
"base_shift": math.log(3), # We use shift=3 in distillation
"invert_sigmas": False,
"max_image_seq_len": 8192,
"max_shift": math.log(3), # We use shift=3 in distillation
"num_train_timesteps": 1000,
"shift": 1.0,
"shift_terminal": None, # set shift_terminal to None
"stochastic_sampling": False,
"time_shift_type": "exponential",
"use_beta_sigmas": False,
"use_dynamic_shifting": True,
"use_exponential_sigmas": False,
"use_karras_sigmas": False,
}
scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
pipe = DiffusionPipeline.from_pretrained(
ckpt_id, scheduler=scheduler, torch_dtype=torch.bfloat16
).to("cuda")
pipe.load_lora_weights(
"lightx2v/Qwen-Image-Lightning", weight_name="Qwen-Image-Lightning-8steps-V1.0.safetensors"
)
prompt = "a tiny astronaut hatching from an egg on the moon, Ultra HD, 4K, cinematic composition."
negative_prompt = " "
image = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
width=1024,
height=1024,
num_inference_steps=8,
true_cfg_scale=1.0,
generator=torch.manual_seed(0),
).images[0]
image.save("qwen_fewsteps.png")
```
</details>
<Tip>
The `guidance_scale` parameter in the pipeline is there to support future guidance-distilled models when they come up. Note that passing `guidance_scale` to the pipeline is ineffective. To enable classifier-free guidance, please pass `true_cfg_scale` and `negative_prompt` (even an empty negative prompt like " ") should enable classifier-free guidance computations.
</Tip>
## Multi-image reference with QwenImageEditPlusPipeline
With [`QwenImageEditPlusPipeline`], one can provide multiple images as input reference.
```
import torch
from PIL import Image
from diffusers import QwenImageEditPlusPipeline
from diffusers.utils import load_image
pipe = QwenImageEditPlusPipeline.from_pretrained(
"Qwen/Qwen-Image-Edit-2509", torch_dtype=torch.bfloat16
).to("cuda")
image_1 = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/grumpy.jpg")
image_2 = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/peng.png")
image = pipe(
image=[image_1, image_2],
prompt="put the penguin and the cat at a game show called "Qwen Edit Plus Games"",
num_inference_steps=50
).images[0]
```
## QwenImagePipeline
[[autodoc]] QwenImagePipeline
- all
- __call__
## QwenImageImg2ImgPipeline
[[autodoc]] QwenImageImg2ImgPipeline
- all
- __call__
## QwenImageInpaintPipeline
[[autodoc]] QwenImageInpaintPipeline
- all
- __call__
## QwenImageEditPipeline
[[autodoc]] QwenImageEditPipeline
- all
- __call__
## QwenImageEditInpaintPipeline
[[autodoc]] QwenImageEditInpaintPipeline
- all
- __call__
## QwenImageControlNetPipeline
[[autodoc]] QwenImageControlNetPipeline
- all
- __call__
## QwenImageEditPlusPipeline
[[autodoc]] QwenImageEditPlusPipeline
- all
- __call__
## QwenImagePipelineOutput
[[autodoc]] pipelines.qwenimage.pipeline_output.QwenImagePipelineOutput

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# Self-Attention Guidance
[Improving Sample Quality of Diffusion Models Using Self-Attention Guidance](https://huggingface.co/papers/2210.00939) is by Susung Hong et al.

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# Semantic Guidance
Semantic Guidance for Diffusion Models was proposed in [SEGA: Instructing Text-to-Image Models using Semantic Guidance](https://huggingface.co/papers/2301.12247) and provides strong semantic control over image generation.

View File

@@ -1,346 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. -->
<div style="float: right;">
<div class="flex flex-wrap space-x-1">
<a href="https://huggingface.co/docs/diffusers/main/en/tutorials/using_peft_for_inference" target="_blank" rel="noopener">
<img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
</a>
</div>
</div>
# SkyReels-V2: Infinite-length Film Generative model
[SkyReels-V2](https://huggingface.co/papers/2504.13074) by the SkyReels Team from Skywork AI.
*Recent advances in video generation have been driven by diffusion models and autoregressive frameworks, yet critical challenges persist in harmonizing prompt adherence, visual quality, motion dynamics, and duration: compromises in motion dynamics to enhance temporal visual quality, constrained video duration (5-10 seconds) to prioritize resolution, and inadequate shot-aware generation stemming from general-purpose MLLMs' inability to interpret cinematic grammar, such as shot composition, actor expressions, and camera motions. These intertwined limitations hinder realistic long-form synthesis and professional film-style generation. To address these limitations, we propose SkyReels-V2, an Infinite-length Film Generative Model, that synergizes Multi-modal Large Language Model (MLLM), Multi-stage Pretraining, Reinforcement Learning, and Diffusion Forcing Framework. Firstly, we design a comprehensive structural representation of video that combines the general descriptions by the Multi-modal LLM and the detailed shot language by sub-expert models. Aided with human annotation, we then train a unified Video Captioner, named SkyCaptioner-V1, to efficiently label the video data. Secondly, we establish progressive-resolution pretraining for the fundamental video generation, followed by a four-stage post-training enhancement: Initial concept-balanced Supervised Fine-Tuning (SFT) improves baseline quality; Motion-specific Reinforcement Learning (RL) training with human-annotated and synthetic distortion data addresses dynamic artifacts; Our diffusion forcing framework with non-decreasing noise schedules enables long-video synthesis in an efficient search space; Final high-quality SFT refines visual fidelity. All the code and models are available at [this https URL](https://github.com/SkyworkAI/SkyReels-V2).*
You can find all the original SkyReels-V2 checkpoints under the [Skywork](https://huggingface.co/collections/Skywork/skyreels-v2-6801b1b93df627d441d0d0d9) organization.
The following SkyReels-V2 models are supported in Diffusers:
- [SkyReels-V2 DF 1.3B - 540P](https://huggingface.co/Skywork/SkyReels-V2-DF-1.3B-540P-Diffusers)
- [SkyReels-V2 DF 14B - 540P](https://huggingface.co/Skywork/SkyReels-V2-DF-14B-540P-Diffusers)
- [SkyReels-V2 DF 14B - 720P](https://huggingface.co/Skywork/SkyReels-V2-DF-14B-720P-Diffusers)
- [SkyReels-V2 T2V 14B - 540P](https://huggingface.co/Skywork/SkyReels-V2-T2V-14B-540P-Diffusers)
- [SkyReels-V2 T2V 14B - 720P](https://huggingface.co/Skywork/SkyReels-V2-T2V-14B-720P-Diffusers)
- [SkyReels-V2 I2V 1.3B - 540P](https://huggingface.co/Skywork/SkyReels-V2-I2V-1.3B-540P-Diffusers)
- [SkyReels-V2 I2V 14B - 540P](https://huggingface.co/Skywork/SkyReels-V2-I2V-14B-540P-Diffusers)
- [SkyReels-V2 I2V 14B - 720P](https://huggingface.co/Skywork/SkyReels-V2-I2V-14B-720P-Diffusers)
- [SkyReels-V2 FLF2V 1.3B - 540P](https://huggingface.co/Skywork/SkyReels-V2-FLF2V-1.3B-540P-Diffusers)
> [!TIP]
> Click on the SkyReels-V2 models in the right sidebar for more examples of video generation.
### A _Visual_ Demonstration
The example below has the following parameters:
- `base_num_frames=97`
- `num_frames=97`
- `num_inference_steps=30`
- `ar_step=5`
- `causal_block_size=5`
With `vae_scale_factor_temporal=4`, expect `5` blocks of `5` frames each as calculated by:
`num_latent_frames: (97-1)//vae_scale_factor_temporal+1 = 25 frames -> 5 blocks of 5 frames each`
And the maximum context length in the latent space is calculated with `base_num_latent_frames`:
`base_num_latent_frames = (97-1)//vae_scale_factor_temporal+1 = 25 -> 25//5 = 5 blocks`
Asynchronous Processing Timeline:
```text
┌─────────────────────────────────────────────────────────────────┐
│ Steps: 1 6 11 16 21 26 31 36 41 46 50 │
│ Block 1: [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■] │
│ Block 2: [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■] │
│ Block 3: [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■] │
│ Block 4: [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■] │
│ Block 5: [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■] │
└─────────────────────────────────────────────────────────────────┘
```
For Long Videos (`num_frames` > `base_num_frames`):
`base_num_frames` acts as the "sliding window size" for processing long videos.
Example: `257`-frame video with `base_num_frames=97`, `overlap_history=17`
```text
┌──── Iteration 1 (frames 1-97) ────┐
│ Processing window: 97 frames │ → 5 blocks,
│ Generates: frames 1-97 │ async processing
└───────────────────────────────────┘
┌────── Iteration 2 (frames 81-177) ──────┐
│ Processing window: 97 frames │
│ Overlap: 17 frames (81-97) from prev │ → 5 blocks,
│ Generates: frames 98-177 │ async processing
└─────────────────────────────────────────┘
┌────── Iteration 3 (frames 161-257) ──────┐
│ Processing window: 97 frames │
│ Overlap: 17 frames (161-177) from prev │ → 5 blocks,
│ Generates: frames 178-257 │ async processing
└──────────────────────────────────────────┘
```
Each iteration independently runs the asynchronous processing with its own `5` blocks.
`base_num_frames` controls:
1. Memory usage (larger window = more VRAM)
2. Model context length (must match training constraints)
3. Number of blocks per iteration (`base_num_latent_frames // causal_block_size`)
Each block takes `30` steps to complete denoising.
Block N starts at step: `1 + (N-1) x ar_step`
Total steps: `30 + (5-1) x 5 = 50` steps
Synchronous mode (`ar_step=0`) would process all blocks/frames simultaneously:
```text
┌──────────────────────────────────────────────┐
│ Steps: 1 ... 30 │
│ All blocks: [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■] │
└──────────────────────────────────────────────┘
```
Total steps: `30` steps
An example on how the step matrix is constructed for asynchronous processing:
Given the parameters: (`num_inference_steps=30, flow_shift=8, num_frames=97, ar_step=5, causal_block_size=5`)
```
- num_latent_frames = (97 frames - 1) // (4 temporal downsampling) + 1 = 25
- step_template = [999, 995, 991, 986, 980, 975, 969, 963, 956, 948,
941, 932, 922, 912, 901, 888, 874, 859, 841, 822,
799, 773, 743, 708, 666, 615, 551, 470, 363, 216]
```
The algorithm creates a `50x25` `step_matrix` where:
```
- Row 1: [999×5, 999×5, 999×5, 999×5, 999×5]
- Row 2: [995×5, 999×5, 999×5, 999×5, 999×5]
- Row 3: [991×5, 999×5, 999×5, 999×5, 999×5]
- ...
- Row 7: [969×5, 995×5, 999×5, 999×5, 999×5]
- ...
- Row 21: [799×5, 888×5, 941×5, 975×5, 999×5]
- ...
- Row 35: [ 0×5, 216×5, 666×5, 822×5, 901×5]
- ...
- Row 42: [ 0×5, 0×5, 0×5, 551×5, 773×5]
- ...
- Row 50: [ 0×5, 0×5, 0×5, 0×5, 216×5]
```
Detailed Row `6` Analysis:
```
- step_matrix[5]: [ 975×5, 999×5, 999×5, 999×5, 999×5]
- step_index[5]: [ 6×5, 1×5, 0×5, 0×5, 0×5]
- step_update_mask[5]: [True×5, True×5, False×5, False×5, False×5]
- valid_interval[5]: (0, 25)
```
Key Pattern: Block `i` lags behind Block `i-1` by exactly `ar_step=5` timesteps, creating the
staggered "diffusion forcing" effect where later blocks condition on cleaner earlier blocks.
### Text-to-Video Generation
The example below demonstrates how to generate a video from text.
<hfoptions id="T2V usage">
<hfoption id="T2V memory">
Refer to the [Reduce memory usage](../../optimization/memory) guide for more details about the various memory saving techniques.
From the original repo:
>You can use --ar_step 5 to enable asynchronous inference. When asynchronous inference, --causal_block_size 5 is recommended while it is not supposed to be set for synchronous generation... Asynchronous inference will take more steps to diffuse the whole sequence which means it will be SLOWER than synchronous mode. In our experiments, asynchronous inference may improve the instruction following and visual consistent performance.
```py
import torch
from diffusers import AutoModel, SkyReelsV2DiffusionForcingPipeline, UniPCMultistepScheduler
from diffusers.utils import export_to_video
model_id = "Skywork/SkyReels-V2-DF-1.3B-540P-Diffusers"
vae = AutoModel.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
pipeline = SkyReelsV2DiffusionForcingPipeline.from_pretrained(
model_id,
vae=vae,
torch_dtype=torch.bfloat16,
)
pipeline.to("cuda")
flow_shift = 8.0 # 8.0 for T2V, 5.0 for I2V
pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config, flow_shift=flow_shift)
prompt = "A cat and a dog baking a cake together in a kitchen. The cat is carefully measuring flour, while the dog is stirring the batter with a wooden spoon. The kitchen is cozy, with sunlight streaming through the window."
output = pipeline(
prompt=prompt,
num_inference_steps=30,
height=544, # 720 for 720P
width=960, # 1280 for 720P
num_frames=97,
base_num_frames=97, # 121 for 720P
ar_step=5, # Controls asynchronous inference (0 for synchronous mode)
causal_block_size=5, # Number of frames in each block for asynchronous processing
overlap_history=None, # Number of frames to overlap for smooth transitions in long videos; 17 for long video generations
addnoise_condition=20, # Improves consistency in long video generation
).frames[0]
export_to_video(output, "video.mp4", fps=24, quality=8)
```
</hfoption>
</hfoptions>
### First-Last-Frame-to-Video Generation
The example below demonstrates how to use the image-to-video pipeline to generate a video using a text description, a starting frame, and an ending frame.
<hfoptions id="FLF2V usage">
<hfoption id="usage">
```python
import numpy as np
import torch
import torchvision.transforms.functional as TF
from diffusers import AutoencoderKLWan, SkyReelsV2DiffusionForcingImageToVideoPipeline, UniPCMultistepScheduler
from diffusers.utils import export_to_video, load_image
model_id = "Skywork/SkyReels-V2-DF-1.3B-720P-Diffusers"
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
pipeline = SkyReelsV2DiffusionForcingImageToVideoPipeline.from_pretrained(
model_id, vae=vae, torch_dtype=torch.bfloat16
)
pipeline.to("cuda")
flow_shift = 5.0 # 8.0 for T2V, 5.0 for I2V
pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config, flow_shift=flow_shift)
first_frame = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_first_frame.png")
last_frame = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_last_frame.png")
def aspect_ratio_resize(image, pipeline, max_area=720 * 1280):
aspect_ratio = image.height / image.width
mod_value = pipeline.vae_scale_factor_spatial * pipeline.transformer.config.patch_size[1]
height = round(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value
width = round(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value
image = image.resize((width, height))
return image, height, width
def center_crop_resize(image, height, width):
# Calculate resize ratio to match first frame dimensions
resize_ratio = max(width / image.width, height / image.height)
# Resize the image
width = round(image.width * resize_ratio)
height = round(image.height * resize_ratio)
size = [width, height]
image = TF.center_crop(image, size)
return image, height, width
first_frame, height, width = aspect_ratio_resize(first_frame, pipeline)
if last_frame.size != first_frame.size:
last_frame, _, _ = center_crop_resize(last_frame, height, width)
prompt = "CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird's feathers are delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, low-angle perspective."
output = pipeline(
image=first_frame, last_image=last_frame, prompt=prompt, height=height, width=width, guidance_scale=5.0
).frames[0]
export_to_video(output, "video.mp4", fps=24, quality=8)
```
</hfoption>
</hfoptions>
### Video-to-Video Generation
<hfoptions id="V2V usage">
<hfoption id="usage">
`SkyReelsV2DiffusionForcingVideoToVideoPipeline` extends a given video.
```python
import numpy as np
import torch
import torchvision.transforms.functional as TF
from diffusers import AutoencoderKLWan, SkyReelsV2DiffusionForcingVideoToVideoPipeline, UniPCMultistepScheduler
from diffusers.utils import export_to_video, load_video
model_id = "Skywork/SkyReels-V2-DF-1.3B-720P-Diffusers"
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
pipeline = SkyReelsV2DiffusionForcingVideoToVideoPipeline.from_pretrained(
model_id, vae=vae, torch_dtype=torch.bfloat16
)
pipeline.to("cuda")
flow_shift = 5.0 # 8.0 for T2V, 5.0 for I2V
pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config, flow_shift=flow_shift)
video = load_video("input_video.mp4")
prompt = "CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird's feathers are delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, low-angle perspective."
output = pipeline(
video=video, prompt=prompt, height=720, width=1280, guidance_scale=5.0, overlap_history=17,
num_inference_steps=30, num_frames=257, base_num_frames=121#, ar_step=5, causal_block_size=5,
).frames[0]
export_to_video(output, "video.mp4", fps=24, quality=8)
# Total frames will be the number of frames of the given video + 257
```
</hfoption>
</hfoptions>
## Notes
- SkyReels-V2 supports LoRAs with [`~loaders.SkyReelsV2LoraLoaderMixin.load_lora_weights`].
`SkyReelsV2Pipeline` and `SkyReelsV2ImageToVideoPipeline` are also available without Diffusion Forcing framework applied.
## SkyReelsV2DiffusionForcingPipeline
[[autodoc]] SkyReelsV2DiffusionForcingPipeline
- all
- __call__
## SkyReelsV2DiffusionForcingImageToVideoPipeline
[[autodoc]] SkyReelsV2DiffusionForcingImageToVideoPipeline
- all
- __call__
## SkyReelsV2DiffusionForcingVideoToVideoPipeline
[[autodoc]] SkyReelsV2DiffusionForcingVideoToVideoPipeline
- all
- __call__
## SkyReelsV2Pipeline
[[autodoc]] SkyReelsV2Pipeline
- all
- __call__
## SkyReelsV2ImageToVideoPipeline
[[autodoc]] SkyReelsV2ImageToVideoPipeline
- all
- __call__
## SkyReelsV2PipelineOutput
[[autodoc]] pipelines.skyreels_v2.pipeline_output.SkyReelsV2PipelineOutput

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# GLIGEN (Grounded Language-to-Image Generation)
The GLIGEN model was created by researchers and engineers from [University of Wisconsin-Madison, Columbia University, and Microsoft](https://github.com/gligen/GLIGEN). The [`StableDiffusionGLIGENPipeline`] and [`StableDiffusionGLIGENTextImagePipeline`] can generate photorealistic images conditioned on grounding inputs. Along with text and bounding boxes with [`StableDiffusionGLIGENPipeline`], if input images are given, [`StableDiffusionGLIGENTextImagePipeline`] can insert objects described by text at the region defined by bounding boxes. Otherwise, it'll generate an image described by the caption/prompt and insert objects described by text at the region defined by bounding boxes. It's trained on COCO2014D and COCO2014CD datasets, and the model uses a frozen CLIP ViT-L/14 text encoder to condition itself on grounding inputs.

View File

@@ -47,3 +47,13 @@ Make sure to check out the Stable Diffusion [Tips](overview#tips) section to lea
## StableDiffusionPipelineOutput
[[autodoc]] pipelines.stable_diffusion.StableDiffusionPipelineOutput
## FlaxStableDiffusionImg2ImgPipeline
[[autodoc]] FlaxStableDiffusionImg2ImgPipeline
- all
- __call__
## FlaxStableDiffusionPipelineOutput
[[autodoc]] pipelines.stable_diffusion.FlaxStableDiffusionPipelineOutput

View File

@@ -49,3 +49,13 @@ If you're interested in using one of the official checkpoints for a task, explor
## StableDiffusionPipelineOutput
[[autodoc]] pipelines.stable_diffusion.StableDiffusionPipelineOutput
## FlaxStableDiffusionInpaintPipeline
[[autodoc]] FlaxStableDiffusionInpaintPipeline
- all
- __call__
## FlaxStableDiffusionPipelineOutput
[[autodoc]] pipelines.stable_diffusion.FlaxStableDiffusionPipelineOutput

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# K-Diffusion
[k-diffusion](https://github.com/crowsonkb/k-diffusion) is a popular library created by [Katherine Crowson](https://github.com/crowsonkb/). We provide `StableDiffusionKDiffusionPipeline` and `StableDiffusionXLKDiffusionPipeline` that allow you to run Stable DIffusion with samplers from k-diffusion.

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# Text-to-(RGB, depth)
<div class="flex flex-wrap space-x-1">

View File

@@ -31,7 +31,7 @@ _As the model is gated, before using it with diffusers you first need to go to t
Use the command below to log in:
```bash
hf auth login
huggingface-cli login
```
<Tip>

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# Safe Stable Diffusion
Safe Stable Diffusion was proposed in [Safe Latent Diffusion: Mitigating Inappropriate Degeneration in Diffusion Models](https://huggingface.co/papers/2211.05105) and mitigates inappropriate degeneration from Stable Diffusion models because they're trained on unfiltered web-crawled datasets. For instance Stable Diffusion may unexpectedly generate nudity, violence, images depicting self-harm, and otherwise offensive content. Safe Stable Diffusion is an extension of Stable Diffusion that drastically reduces this type of content.

View File

@@ -51,3 +51,13 @@ If you're interested in using one of the official checkpoints for a task, explor
## StableDiffusionPipelineOutput
[[autodoc]] pipelines.stable_diffusion.StableDiffusionPipelineOutput
## FlaxStableDiffusionPipeline
[[autodoc]] FlaxStableDiffusionPipeline
- all
- __call__
## FlaxStableDiffusionPipelineOutput
[[autodoc]] pipelines.stable_diffusion.FlaxStableDiffusionPipelineOutput

View File

@@ -10,8 +10,11 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
<Tip warning={true}>
🧪 This pipeline is for research purposes only.
</Tip>
# Text-to-video

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# Text2Video-Zero
<div class="flex flex-wrap space-x-1">

View File

@@ -7,9 +7,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# unCLIP
[Hierarchical Text-Conditional Image Generation with CLIP Latents](https://huggingface.co/papers/2204.06125) is by Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, Mark Chen. The unCLIP model in 🤗 Diffusers comes from kakaobrain's [karlo](https://github.com/kakaobrain/karlo).

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# UniDiffuser
<div class="flex flex-wrap space-x-1">

View File

@@ -20,7 +20,7 @@
</div>
</div>
# Wan
# Wan2.1
[Wan-2.1](https://huggingface.co/papers/2503.20314) by the Wan Team.
@@ -29,7 +29,6 @@
You can find all the original Wan2.1 checkpoints under the [Wan-AI](https://huggingface.co/Wan-AI) organization.
The following Wan models are supported in Diffusers:
- [Wan 2.1 T2V 1.3B](https://huggingface.co/Wan-AI/Wan2.1-T2V-1.3B-Diffusers)
- [Wan 2.1 T2V 14B](https://huggingface.co/Wan-AI/Wan2.1-T2V-14B-Diffusers)
- [Wan 2.1 I2V 14B - 480P](https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-480P-Diffusers)
@@ -37,12 +36,9 @@ The following Wan models are supported in Diffusers:
- [Wan 2.1 FLF2V 14B - 720P](https://huggingface.co/Wan-AI/Wan2.1-FLF2V-14B-720P-diffusers)
- [Wan 2.1 VACE 1.3B](https://huggingface.co/Wan-AI/Wan2.1-VACE-1.3B-diffusers)
- [Wan 2.1 VACE 14B](https://huggingface.co/Wan-AI/Wan2.1-VACE-14B-diffusers)
- [Wan 2.2 T2V 14B](https://huggingface.co/Wan-AI/Wan2.2-T2V-A14B-Diffusers)
- [Wan 2.2 I2V 14B](https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers)
- [Wan 2.2 TI2V 5B](https://huggingface.co/Wan-AI/Wan2.2-TI2V-5B-Diffusers)
> [!TIP]
> Click on the Wan models in the right sidebar for more examples of video generation.
> Click on the Wan2.1 models in the right sidebar for more examples of video generation.
### Text-to-Video Generation
@@ -119,7 +115,7 @@ export_to_video(output, "output.mp4", fps=16)
</hfoption>
<hfoption id="T2V inference speed">
[Compilation](../../optimization/fp16#torchcompile) is slow the first time but subsequent calls to the pipeline are faster. [Caching](../../optimization/cache) may also speed up inference by storing and reusing intermediate outputs.
[Compilation](../../optimization/fp16#torchcompile) is slow the first time but subsequent calls to the pipeline are faster.
```py
# pip install ftfy
@@ -331,10 +327,6 @@ The general rule of thumb to keep in mind when preparing inputs for the VACE pip
- Try lower `shift` values (`2.0` to `5.0`) for lower resolution videos and higher `shift` values (`7.0` to `12.0`) for higher resolution images.
- Wan 2.1 and 2.2 support using [LightX2V LoRAs](https://huggingface.co/Kijai/WanVideo_comfy/tree/main/Lightx2v) to speed up inference. Using them on Wan 2.2 is slightly more involed. Refer to [this code snippet](https://github.com/huggingface/diffusers/pull/12040#issuecomment-3144185272) to learn more.
- Wan 2.2 has two denoisers. By default, LoRAs are only loaded into the first denoiser. One can set `load_into_transformer_2=True` to load LoRAs into the second denoiser. Refer to [this](https://github.com/huggingface/diffusers/pull/12074#issue-3292620048) and [this](https://github.com/huggingface/diffusers/pull/12074#issuecomment-3155896144) examples to learn more.
## WanPipeline
[[autodoc]] WanPipeline

View File

@@ -12,9 +12,6 @@ specific language governing permissions and limitations under the License.
# Würstchen
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
<div class="flex flex-wrap space-x-1">
<img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
</div>

View File

@@ -27,19 +27,19 @@ Learn how to quantize models in the [Quantization](../quantization/overview) gui
## BitsAndBytesConfig
[[autodoc]] quantizers.quantization_config.BitsAndBytesConfig
[[autodoc]] BitsAndBytesConfig
## GGUFQuantizationConfig
[[autodoc]] quantizers.quantization_config.GGUFQuantizationConfig
[[autodoc]] GGUFQuantizationConfig
## QuantoConfig
[[autodoc]] quantizers.quantization_config.QuantoConfig
[[autodoc]] QuantoConfig
## TorchAoConfig
[[autodoc]] quantizers.quantization_config.TorchAoConfig
[[autodoc]] TorchAoConfig
## DiffusersQuantizer

View File

@@ -12,24 +12,37 @@ specific language governing permissions and limitations under the License.
<p align="center">
<br>
<img src="https://raw.githubusercontent.com/huggingface/diffusers/77aadfee6a891ab9fcfb780f87c693f7a5beeb8e/docs/source/imgs/diffusers_library.jpg" width="400" style="border: none;"/>
<img src="https://raw.githubusercontent.com/huggingface/diffusers/77aadfee6a891ab9fcfb780f87c693f7a5beeb8e/docs/source/imgs/diffusers_library.jpg" width="400"/>
<br>
</p>
# Diffusers
Diffusers is a library of state-of-the-art pretrained diffusion models for generating videos, images, and audio.
🤗 Diffusers is the go-to library for state-of-the-art pretrained diffusion models for generating images, audio, and even 3D structures of molecules. Whether you're looking for a simple inference solution or want to train your own diffusion model, 🤗 Diffusers is a modular toolbox that supports both. Our library is designed with a focus on [usability over performance](conceptual/philosophy#usability-over-performance), [simple over easy](conceptual/philosophy#simple-over-easy), and [customizability over abstractions](conceptual/philosophy#tweakable-contributorfriendly-over-abstraction).
The library revolves around the [`DiffusionPipeline`], an API designed for:
The library has three main components:
- easy inference with only a few lines of code
- flexibility to mix-and-match pipeline components (models, schedulers)
- loading and using adapters like LoRA
- State-of-the-art diffusion pipelines for inference with just a few lines of code. There are many pipelines in 🤗 Diffusers, check out the table in the pipeline [overview](api/pipelines/overview) for a complete list of available pipelines and the task they solve.
- Interchangeable [noise schedulers](api/schedulers/overview) for balancing trade-offs between generation speed and quality.
- Pretrained [models](api/models) that can be used as building blocks, and combined with schedulers, for creating your own end-to-end diffusion systems.
Diffusers also comes with optimizations - such as offloading and quantization - to ensure even the largest models are accessible on memory-constrained devices. If memory is not an issue, Diffusers supports torch.compile to boost inference speed.
Get started right away with a Diffusers model on the [Hub](https://huggingface.co/models?library=diffusers&sort=trending) today!
## Learn
If you're a beginner, we recommend starting with the [Hugging Face Diffusion Models Course](https://huggingface.co/learn/diffusion-course/unit0/1). You'll learn the theory behind diffusion models, and learn how to use the Diffusers library to generate images, fine-tune your own models, and more.
<div class="mt-10">
<div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-2 md:gap-y-4 md:gap-x-5">
<a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./tutorials/tutorial_overview"
><div class="w-full text-center bg-gradient-to-br from-blue-400 to-blue-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Tutorials</div>
<p class="text-gray-700">Learn the fundamental skills you need to start generating outputs, build your own diffusion system, and train a diffusion model. We recommend starting here if you're using 🤗 Diffusers for the first time!</p>
</a>
<a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./using-diffusers/loading_overview"
><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">How-to guides</div>
<p class="text-gray-700">Practical guides for helping you load pipelines, models, and schedulers. You'll also learn how to use pipelines for specific tasks, control how outputs are generated, optimize for inference speed, and different training techniques.</p>
</a>
<a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./conceptual/philosophy"
><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Conceptual guides</div>
<p class="text-gray-700">Understand why the library was designed the way it was, and learn more about the ethical guidelines and safety implementations for using the library.</p>
</a>
<a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./api/models/overview"
><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Reference</div>
<p class="text-gray-700">Technical descriptions of how 🤗 Diffusers classes and methods work.</p>
</a>
</div>
</div>

View File

@@ -12,135 +12,183 @@ specific language governing permissions and limitations under the License.
# Installation
Diffusers is tested on Python 3.8+ and PyTorch 1.4+. Install [PyTorch](https://pytorch.org/get-started/locally/) according to your system and setup.
🤗 Diffusers is tested on Python 3.8+, PyTorch 1.7.0+, and Flax. Follow the installation instructions below for the deep learning library you are using:
Create a [virtual environment](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/) for easier management of separate projects and to avoid compatibility issues between dependencies. Use [uv](https://docs.astral.sh/uv/), a Rust-based Python package and project manager, to create a virtual environment and install Diffusers.
- [PyTorch](https://pytorch.org/get-started/locally/) installation instructions
- [Flax](https://flax.readthedocs.io/en/latest/) installation instructions
## Install with pip
You should install 🤗 Diffusers in a [virtual environment](https://docs.python.org/3/library/venv.html).
If you're unfamiliar with Python virtual environments, take a look at this [guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).
A virtual environment makes it easier to manage different projects and avoid compatibility issues between dependencies.
Create a virtual environment with Python or [uv](https://docs.astral.sh/uv/) (refer to [Installation](https://docs.astral.sh/uv/getting-started/installation/) for installation instructions), a fast Rust-based Python package and project manager.
<hfoptions id="install">
<hfoption id="uv">
```bash
uv venv my-env
source my-env/bin/activate
```
Install Diffusers with one of the following methods.
<hfoptions id="install">
<hfoption id="pip">
PyTorch only supports Python 3.8 - 3.11 on Windows.
```bash
uv pip install diffusers["torch"] transformers
```
</hfoption>
<hfoption id="conda">
<hfoption id="Python">
```bash
conda install -c conda-forge diffusers
```
</hfoption>
<hfoption id="source">
A source install installs the `main` version instead of the latest `stable` version. The `main` version is useful for staying updated with the latest changes but it may not always be stable. If you run into a problem, open an [Issue](https://github.com/huggingface/diffusers/issues/new/choose) and we will try to resolve it as soon as possible.
Make sure [Accelerate](https://huggingface.co/docs/accelerate/index) is installed.
```bash
uv pip install accelerate
```
Install Diffusers from source with the command below.
```bash
uv pip install git+https://github.com/huggingface/diffusers
python -m venv my-env
source my-env/bin/activate
```
</hfoption>
</hfoptions>
You should also install 🤗 Transformers because 🤗 Diffusers relies on its models.
<frameworkcontent>
<pt>
PyTorch only supports Python 3.8 - 3.11 on Windows. Install Diffusers with uv.
```bash
uv install diffusers["torch"] transformers
```
You can also install Diffusers with pip.
```bash
pip install diffusers["torch"] transformers
```
</pt>
<jax>
Install Diffusers with uv.
```bash
uv pip install diffusers["flax"] transformers
```
You can also install Diffusers with pip.
```bash
pip install diffusers["flax"] transformers
```
</jax>
</frameworkcontent>
## Install with conda
After activating your virtual environment, with `conda` (maintained by the community):
```bash
conda install -c conda-forge diffusers
```
## Install from source
Before installing 🤗 Diffusers from source, make sure you have PyTorch and 🤗 Accelerate installed.
To install 🤗 Accelerate:
```bash
pip install accelerate
```
Then install 🤗 Diffusers from source:
```bash
pip install git+https://github.com/huggingface/diffusers
```
This command installs the bleeding edge `main` version rather than the latest `stable` version.
The `main` version is useful for staying up-to-date with the latest developments.
For instance, if a bug has been fixed since the last official release but a new release hasn't been rolled out yet.
However, this means the `main` version may not always be stable.
We strive to keep the `main` version operational, and most issues are usually resolved within a few hours or a day.
If you run into a problem, please open an [Issue](https://github.com/huggingface/diffusers/issues/new/choose) so we can fix it even sooner!
## Editable install
An editable install is recommended for development workflows or if you're using the `main` version of the source code. A special link is created between the cloned repository and the Python library paths. This avoids reinstalling a package after every change.
You will need an editable install if you'd like to:
Clone the repository and install Diffusers with the following commands.
* Use the `main` version of the source code.
* Contribute to 🤗 Diffusers and need to test changes in the code.
Clone the repository and install 🤗 Diffusers with the following commands:
```bash
git clone https://github.com/huggingface/diffusers.git
cd diffusers
uv pip install -e ".[torch]"
```
> [!WARNING]
> You must keep the `diffusers` folder if you want to keep using the library with the editable install.
<frameworkcontent>
<pt>
```bash
pip install -e ".[torch]"
```
</pt>
<jax>
```bash
pip install -e ".[flax]"
```
</jax>
</frameworkcontent>
Update your cloned repository to the latest version of Diffusers with the command below.
These commands will link the folder you cloned the repository to and your Python library paths.
Python will now look inside the folder you cloned to in addition to the normal library paths.
For example, if your Python packages are typically installed in `~/anaconda3/envs/main/lib/python3.10/site-packages/`, Python will also search the `~/diffusers/` folder you cloned to.
<Tip warning={true}>
You must keep the `diffusers` folder if you want to keep using the library.
</Tip>
Now you can easily update your clone to the latest version of 🤗 Diffusers with the following command:
```bash
cd ~/diffusers/
git pull
```
Your Python environment will find the `main` version of 🤗 Diffusers on the next run.
## Cache
Model weights and files are downloaded from the Hub to a cache, which is usually your home directory. Change the cache location with the [HF_HOME](https://huggingface.co/docs/huggingface_hub/package_reference/environment_variables#hfhome) or [HF_HUB_CACHE](https://huggingface.co/docs/huggingface_hub/package_reference/environment_variables#hfhubcache) environment variables or configuring the `cache_dir` parameter in methods like [`~DiffusionPipeline.from_pretrained`].
Model weights and files are downloaded from the Hub to a cache which is usually your home directory. You can change the cache location by specifying the `HF_HOME` or `HUGGINFACE_HUB_CACHE` environment variables or configuring the `cache_dir` parameter in methods like [`~DiffusionPipeline.from_pretrained`].
<hfoptions id="cache">
<hfoption id="env variable">
```bash
export HF_HOME="/path/to/your/cache"
export HF_HUB_CACHE="/path/to/your/hub/cache"
```
</hfoption>
<hfoption id="from_pretrained">
```py
from diffusers import DiffusionPipeline
pipeline = DiffusionPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev",
cache_dir="/path/to/your/cache"
)
```
</hfoption>
</hfoptions>
Cached files allow you to use Diffusers offline. Set the [HF_HUB_OFFLINE](https://huggingface.co/docs/huggingface_hub/package_reference/environment_variables#hfhuboffline) environment variable to `1` to prevent Diffusers from connecting to the internet.
Cached files allow you to run 🤗 Diffusers offline. To prevent 🤗 Diffusers from connecting to the internet, set the `HF_HUB_OFFLINE` environment variable to `1` and 🤗 Diffusers will only load previously downloaded files in the cache.
```shell
export HF_HUB_OFFLINE=1
```
For more details about managing and cleaning the cache, take a look at the [Understand caching](https://huggingface.co/docs/huggingface_hub/guides/manage-cache) guide.
For more details about managing and cleaning the cache, take a look at the [caching](https://huggingface.co/docs/huggingface_hub/guides/manage-cache) guide.
## Telemetry logging
Diffusers gathers telemetry information during [`~DiffusionPipeline.from_pretrained`] requests.
The data gathered includes the Diffusers and PyTorch version, the requested model or pipeline class,
and the path to a pretrained checkpoint if it is hosted on the Hub.
Our library gathers telemetry information during [`~DiffusionPipeline.from_pretrained`] requests.
The data gathered includes the version of 🤗 Diffusers and PyTorch/Flax, the requested model or pipeline class,
and the path to a pretrained checkpoint if it is hosted on the Hugging Face Hub.
This usage data helps us debug issues and prioritize new features.
Telemetry is only sent when loading models and pipelines from the Hub,
and it is not collected if you're loading local files.
Opt-out and disable telemetry collection with the [HF_HUB_DISABLE_TELEMETRY](https://huggingface.co/docs/huggingface_hub/package_reference/environment_variables#hfhubdisabletelemetry) environment variable.
We understand that not everyone wants to share additional information,and we respect your privacy.
You can disable telemetry collection by setting the `HF_HUB_DISABLE_TELEMETRY` environment variable from your terminal:
<hfoptions id="telemetry">
<hfoption id="Linux/macOS">
On Linux/MacOS:
```bash
export HF_HUB_DISABLE_TELEMETRY=1
```
</hfoption>
<hfoption id="Windows">
On Windows:
```bash
set HF_HUB_DISABLE_TELEMETRY=1
```
</hfoption>
</hfoptions>

View File

@@ -1,156 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# AutoPipelineBlocks
[`~modular_pipelines.AutoPipelineBlocks`] are a multi-block type containing blocks that support different workflows. It automatically selects which sub-blocks to run based on the input provided at runtime. This is typically used to package multiple workflows - text-to-image, image-to-image, inpaint - into a single pipeline for convenience.
This guide shows how to create [`~modular_pipelines.AutoPipelineBlocks`].
Create three [`~modular_pipelines.ModularPipelineBlocks`] for text-to-image, image-to-image, and inpainting. These represent the different workflows available in the pipeline.
<hfoptions id="auto">
<hfoption id="text-to-image">
```py
import torch
from diffusers.modular_pipelines import ModularPipelineBlocks, InputParam, OutputParam
class TextToImageBlock(ModularPipelineBlocks):
model_name = "text2img"
@property
def inputs(self):
return [InputParam(name="prompt")]
@property
def intermediate_outputs(self):
return []
@property
def description(self):
return "I'm a text-to-image workflow!"
def __call__(self, components, state):
block_state = self.get_block_state(state)
print("running the text-to-image workflow")
# Add your text-to-image logic here
# For example: generate image from prompt
self.set_block_state(state, block_state)
return components, state
```
</hfoption>
<hfoption id="image-to-image">
```py
class ImageToImageBlock(ModularPipelineBlocks):
model_name = "img2img"
@property
def inputs(self):
return [InputParam(name="prompt"), InputParam(name="image")]
@property
def intermediate_outputs(self):
return []
@property
def description(self):
return "I'm an image-to-image workflow!"
def __call__(self, components, state):
block_state = self.get_block_state(state)
print("running the image-to-image workflow")
# Add your image-to-image logic here
# For example: transform input image based on prompt
self.set_block_state(state, block_state)
return components, state
```
</hfoption>
<hfoption id="inpaint">
```py
class InpaintBlock(ModularPipelineBlocks):
model_name = "inpaint"
@property
def inputs(self):
return [InputParam(name="prompt"), InputParam(name="image"), InputParam(name="mask")]
@property
def intermediate_outputs(self):
return []
@property
def description(self):
return "I'm an inpaint workflow!"
def __call__(self, components, state):
block_state = self.get_block_state(state)
print("running the inpaint workflow")
# Add your inpainting logic here
# For example: fill masked areas based on prompt
self.set_block_state(state, block_state)
return components, state
```
</hfoption>
</hfoptions>
Create an [`~modular_pipelines.AutoPipelineBlocks`] class that includes a list of the sub-block classes and their corresponding block names.
You also need to include `block_trigger_inputs`, a list of input names that trigger the corresponding block. If a trigger input is provided at runtime, then that block is selected to run. Use `None` to specify the default block to run if no trigger inputs are detected.
Lastly, it is important to include a `description` that clearly explains which inputs trigger which workflow. This helps users understand how to run specific workflows.
```py
from diffusers.modular_pipelines import AutoPipelineBlocks
class AutoImageBlocks(AutoPipelineBlocks):
# List of sub-block classes to choose from
block_classes = [block_inpaint_cls, block_i2i_cls, block_t2i_cls]
# Names for each block in the same order
block_names = ["inpaint", "img2img", "text2img"]
# Trigger inputs that determine which block to run
# - "mask" triggers inpaint workflow
# - "image" triggers img2img workflow (but only if mask is not provided)
# - if none of above, runs the text2img workflow (default)
block_trigger_inputs = ["mask", "image", None]
# Description is extremely important for AutoPipelineBlocks
def description(self):
return (
"Pipeline generates images given different types of conditions!\n"
+ "This is an auto pipeline block that works for text2img, img2img and inpainting tasks.\n"
+ " - inpaint workflow is run when `mask` is provided.\n"
+ " - img2img workflow is run when `image` is provided (but only when `mask` is not provided).\n"
+ " - text2img workflow is run when neither `image` nor `mask` is provided.\n"
)
```
It is **very** important to include a `description` to avoid any confusion over how to run a block and what inputs are required. While [`~modular_pipelines.AutoPipelineBlocks`] are convenient, it's conditional logic may be difficult to figure out if it isn't properly explained.
Create an instance of `AutoImageBlocks`.
```py
auto_blocks = AutoImageBlocks()
```
For more complex compositions, such as nested [`~modular_pipelines.AutoPipelineBlocks`] blocks when they're used as sub-blocks in larger pipelines, use the [`~modular_pipelines.SequentialPipelineBlocks.get_execution_blocks`] method to extract the a block that is actually run based on your input.
```py
auto_blocks.get_execution_blocks("mask")
```

View File

@@ -1,190 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# ComponentsManager
The [`ComponentsManager`] is a model registry and management system for Modular Diffusers. It adds and tracks models, stores useful metadata (model size, device placement, adapters), prevents duplicate model instances, and supports offloading.
This guide will show you how to use [`ComponentsManager`] to manage components and device memory.
## Add a component
The [`ComponentsManager`] should be created alongside a [`ModularPipeline`] in either [`~ModularPipeline.from_pretrained`] or [`~ModularPipelineBlocks.init_pipeline`].
> [!TIP]
> The `collection` parameter is optional but makes it easier to organize and manage components.
<hfoptions id="create">
<hfoption id="from_pretrained">
```py
from diffusers import ModularPipeline, ComponentsManager
comp = ComponentsManager()
pipe = ModularPipeline.from_pretrained("YiYiXu/modular-demo-auto", components_manager=comp, collection="test1")
```
</hfoption>
<hfoption id="init_pipeline">
```py
from diffusers import ComponentsManager
from diffusers.modular_pipelines import SequentialPipelineBlocks
from diffusers.modular_pipelines.stable_diffusion_xl import TEXT2IMAGE_BLOCKS
t2i_blocks = SequentialPipelineBlocks.from_blocks_dict(TEXT2IMAGE_BLOCKS)
modular_repo_id = "YiYiXu/modular-loader-t2i-0704"
components = ComponentsManager()
t2i_pipeline = t2i_blocks.init_pipeline(modular_repo_id, components_manager=components)
```
</hfoption>
</hfoptions>
Components are only loaded and registered when using [`~ModularPipeline.load_components`] or [`~ModularPipeline.load_components`]. The example below uses [`~ModularPipeline.load_components`] to create a second pipeline that reuses all the components from the first one, and assigns it to a different collection
```py
pipe.load_components()
pipe2 = ModularPipeline.from_pretrained("YiYiXu/modular-demo-auto", components_manager=comp, collection="test2")
```
Use the [`~ModularPipeline.null_component_names`] property to identify any components that need to be loaded, retrieve them with [`~ComponentsManager.get_components_by_names`], and then call [`~ModularPipeline.update_components`] to add the missing components.
```py
pipe2.null_component_names
['text_encoder', 'text_encoder_2', 'tokenizer', 'tokenizer_2', 'image_encoder', 'unet', 'vae', 'scheduler', 'controlnet']
comp_dict = comp.get_components_by_names(names=pipe2.null_component_names)
pipe2.update_components(**comp_dict)
```
To add individual components, use the [`~ComponentsManager.add`] method. This registers a component with a unique id.
```py
from diffusers import AutoModel
text_encoder = AutoModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="text_encoder")
component_id = comp.add("text_encoder", text_encoder)
comp
```
Use [`~ComponentsManager.remove`] to remove a component using their id.
```py
comp.remove("text_encoder_139917733042864")
```
## Retrieve a component
The [`ComponentsManager`] provides several methods to retrieve registered components.
### get_one
The [`~ComponentsManager.get_one`] method returns a single component and supports pattern matching for the `name` parameter. If multiple components match, [`~ComponentsManager.get_one`] returns an error.
| Pattern | Example | Description |
|-------------|----------------------------------|-------------------------------------------|
| exact | `comp.get_one(name="unet")` | exact name match |
| wildcard | `comp.get_one(name="unet*")` | names starting with "unet" |
| exclusion | `comp.get_one(name="!unet")` | exclude components named "unet" |
| or | `comp.get_one(name="unet&#124;vae")` | name is "unet" or "vae" |
[`~ComponentsManager.get_one`] also filters components by the `collection` argument or `load_id` argument.
```py
comp.get_one(name="unet", collection="sdxl")
```
### get_components_by_names
The [`~ComponentsManager.get_components_by_names`] method accepts a list of names and returns a dictionary mapping names to components. This is especially useful with [`ModularPipeline`] since they provide lists of required component names and the returned dictionary can be passed directly to [`~ModularPipeline.update_components`].
```py
component_dict = comp.get_components_by_names(names=["text_encoder", "unet", "vae"])
{"text_encoder": component1, "unet": component2, "vae": component3}
```
## Duplicate detection
It is recommended to load model components with [`ComponentSpec`] to assign components with a unique id that encodes their loading parameters. This allows [`ComponentsManager`] to automatically detect and prevent duplicate model instances even when different objects represent the same underlying checkpoint.
```py
from diffusers import ComponentSpec, ComponentsManager
from transformers import CLIPTextModel
comp = ComponentsManager()
# Create ComponentSpec for the first text encoder
spec = ComponentSpec(name="text_encoder", repo="stabilityai/stable-diffusion-xl-base-1.0", subfolder="text_encoder", type_hint=AutoModel)
# Create ComponentSpec for a duplicate text encoder (it is same checkpoint, from the same repo/subfolder)
spec_duplicated = ComponentSpec(name="text_encoder_duplicated", repo="stabilityai/stable-diffusion-xl-base-1.0", subfolder="text_encoder", type_hint=CLIPTextModel)
# Load and add both components - the manager will detect they're the same model
comp.add("text_encoder", spec.load())
comp.add("text_encoder_duplicated", spec_duplicated.load())
```
This returns a warning with instructions for removing the duplicate.
```py
ComponentsManager: adding component 'text_encoder_duplicated_139917580682672', but it has duplicate load_id 'stabilityai/stable-diffusion-xl-base-1.0|text_encoder|null|null' with existing components: text_encoder_139918506246832. To remove a duplicate, call `components_manager.remove('<component_id>')`.
'text_encoder_duplicated_139917580682672'
```
You could also add a component without using [`ComponentSpec`] and duplicate detection still works in most cases even if you're adding the same component under a different name.
However, [`ComponentManager`] can't detect duplicates when you load the same component into different objects. In this case, you should load a model with [`ComponentSpec`].
```py
text_encoder_2 = AutoModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="text_encoder")
comp.add("text_encoder", text_encoder_2)
'text_encoder_139917732983664'
```
## Collections
Collections are labels assigned to components for better organization and management. Add a component to a collection with the `collection` argument in [`~ComponentsManager.add`].
Only one component per name is allowed in each collection. Adding a second component with the same name automatically removes the first component.
```py
from diffusers import ComponentSpec, ComponentsManager
comp = ComponentsManager()
# Create ComponentSpec for the first UNet
spec = ComponentSpec(name="unet", repo="stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", type_hint=AutoModel)
# Create ComponentSpec for a different UNet
spec2 = ComponentSpec(name="unet", repo="RunDiffusion/Juggernaut-XL-v9", subfolder="unet", type_hint=AutoModel, variant="fp16")
# Add both UNets to the same collection - the second one will replace the first
comp.add("unet", spec.load(), collection="sdxl")
comp.add("unet", spec2.load(), collection="sdxl")
```
This makes it convenient to work with node-based systems because you can:
- Mark all models as loaded from one node with the `collection` label.
- Automatically replace models when new checkpoints are loaded under the same name.
- Batch delete all models in a collection when a node is removed.
## Offloading
The [`~ComponentsManager.enable_auto_cpu_offload`] method is a global offloading strategy that works across all models regardless of which pipeline is using them. Once enabled, you don't need to worry about device placement if you add or remove components.
```py
comp.enable_auto_cpu_offload(device="cuda")
```
All models begin on the CPU and [`ComponentsManager`] moves them to the appropriate device right before they're needed, and moves other models back to the CPU when GPU memory is low.
You can set your own rules for which models to offload first.

View File

@@ -1,175 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# Guiders
[Classifier-free guidance](https://huggingface.co/papers/2207.12598) steers model generation that better match a prompt and is commonly used to improve generation quality, control, and adherence to prompts. There are different types of guidance methods, and in Diffusers, they are known as *guiders*. Like blocks, it is easy to switch and use different guiders for different use cases without rewriting the pipeline.
This guide will show you how to switch guiders, adjust guider parameters, and load and share them to the Hub.
## Switching guiders
[`ClassifierFreeGuidance`] is the default guider and created when a pipeline is initialized with [`~ModularPipelineBlocks.init_pipeline`]. It is created by `from_config` which means it doesn't require loading specifications from a modular repository. A guider won't be listed in `modular_model_index.json`.
Use [`~ModularPipeline.get_component_spec`] to inspect a guider.
```py
t2i_pipeline.get_component_spec("guider")
ComponentSpec(name='guider', type_hint=<class 'diffusers.guiders.classifier_free_guidance.ClassifierFreeGuidance'>, description=None, config=FrozenDict([('guidance_scale', 7.5), ('guidance_rescale', 0.0), ('use_original_formulation', False), ('start', 0.0), ('stop', 1.0), ('_use_default_values', ['start', 'guidance_rescale', 'stop', 'use_original_formulation'])]), repo=None, subfolder=None, variant=None, revision=None, default_creation_method='from_config')
```
Switch to a different guider by passing the new guider to [`~ModularPipeline.update_components`].
> [!TIP]
> Changing guiders will return text letting you know you're changing the guider type.
> ```bash
> ModularPipeline.update_components: adding guider with new type: PerturbedAttentionGuidance, previous type: ClassifierFreeGuidance
> ```
```py
from diffusers import LayerSkipConfig, PerturbedAttentionGuidance
config = LayerSkipConfig(indices=[2, 9], fqn="mid_block.attentions.0.transformer_blocks", skip_attention=False, skip_attention_scores=True, skip_ff=False)
guider = PerturbedAttentionGuidance(
guidance_scale=5.0, perturbed_guidance_scale=2.5, perturbed_guidance_config=config
)
t2i_pipeline.update_components(guider=guider)
```
Use [`~ModularPipeline.get_component_spec`] again to verify the guider type is different.
```py
t2i_pipeline.get_component_spec("guider")
ComponentSpec(name='guider', type_hint=<class 'diffusers.guiders.perturbed_attention_guidance.PerturbedAttentionGuidance'>, description=None, config=FrozenDict([('guidance_scale', 5.0), ('perturbed_guidance_scale', 2.5), ('perturbed_guidance_start', 0.01), ('perturbed_guidance_stop', 0.2), ('perturbed_guidance_layers', None), ('perturbed_guidance_config', LayerSkipConfig(indices=[2, 9], fqn='mid_block.attentions.0.transformer_blocks', skip_attention=False, skip_attention_scores=True, skip_ff=False, dropout=1.0)), ('guidance_rescale', 0.0), ('use_original_formulation', False), ('start', 0.0), ('stop', 1.0), ('_use_default_values', ['perturbed_guidance_start', 'use_original_formulation', 'perturbed_guidance_layers', 'stop', 'start', 'guidance_rescale', 'perturbed_guidance_stop']), ('_class_name', 'PerturbedAttentionGuidance'), ('_diffusers_version', '0.35.0.dev0')]), repo=None, subfolder=None, variant=None, revision=None, default_creation_method='from_config')
```
## Loading custom guiders
Guiders that are already saved on the Hub with a `modular_model_index.json` file are considered a `from_pretrained` component now instead of a `from_config` component.
```json
{
"guider": [
null,
null,
{
"repo": "YiYiXu/modular-loader-t2i-guider",
"revision": null,
"subfolder": "pag_guider",
"type_hint": [
"diffusers",
"PerturbedAttentionGuidance"
],
"variant": null
}
]
}
```
The guider is only created after calling [`~ModularPipeline.load_components`] based on the loading specification in `modular_model_index.json`.
```py
t2i_pipeline = t2i_blocks.init_pipeline("YiYiXu/modular-doc-guider")
# not created during init
assert t2i_pipeline.guider is None
t2i_pipeline.load_components()
# loaded as PAG guider
t2i_pipeline.guider
```
## Changing guider parameters
The guider parameters can be adjusted with either the [`~ComponentSpec.create`] method or with [`~ModularPipeline.update_components`]. The example below changes the `guidance_scale` value.
<hfoptions id="switch">
<hfoption id="create">
```py
guider_spec = t2i_pipeline.get_component_spec("guider")
guider = guider_spec.create(guidance_scale=10)
t2i_pipeline.update_components(guider=guider)
```
</hfoption>
<hfoption id="update_components">
```py
guider_spec = t2i_pipeline.get_component_spec("guider")
guider_spec.config["guidance_scale"] = 10
t2i_pipeline.update_components(guider=guider_spec)
```
</hfoption>
</hfoptions>
## Uploading custom guiders
Call the [`~utils.PushToHubMixin.push_to_hub`] method on a custom guider to share it to the Hub.
```py
guider.push_to_hub("YiYiXu/modular-loader-t2i-guider", subfolder="pag_guider")
```
To make this guider available to the pipeline, either modify the `modular_model_index.json` file or use the [`~ModularPipeline.update_components`] method.
<hfoptions id="upload">
<hfoption id="modular_model_index.json">
Edit the `modular_model_index.json` file and add a loading specification for the guider by pointing to a folder containing the guider config.
```json
{
"guider": [
"diffusers",
"PerturbedAttentionGuidance",
{
"repo": "YiYiXu/modular-loader-t2i-guider",
"revision": null,
"subfolder": "pag_guider",
"type_hint": [
"diffusers",
"PerturbedAttentionGuidance"
],
"variant": null
}
],
```
</hfoption>
<hfoption id="update_components">
Change the [`~ComponentSpec.default_creation_method`] to `from_pretrained` and use [`~ModularPipeline.update_components`] to update the guider and component specifications as well as the pipeline config.
> [!TIP]
> Changing the creation method will return text letting you know you're changing the creation type to `from_pretrained`.
> ```bash
> ModularPipeline.update_components: changing the default_creation_method of guider from from_config to from_pretrained.
> ```
```py
guider_spec = t2i_pipeline.get_component_spec("guider")
guider_spec.default_creation_method="from_pretrained"
guider_spec.repo="YiYiXu/modular-loader-t2i-guider"
guider_spec.subfolder="pag_guider"
pag_guider = guider_spec.load()
t2i_pipeline.update_components(guider=pag_guider)
```
To make it the default guider for a pipeline, call [`~utils.PushToHubMixin.push_to_hub`]. This is an optional step and not necessary if you are only experimenting locally.
```py
t2i_pipeline.push_to_hub("YiYiXu/modular-doc-guider")
```
</hfoption>
</hfoptions>

View File

@@ -1,93 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# LoopSequentialPipelineBlocks
[`~modular_pipelines.LoopSequentialPipelineBlocks`] are a multi-block type that composes other [`~modular_pipelines.ModularPipelineBlocks`] together in a loop. Data flows circularly, using `intermediate_inputs` and `intermediate_outputs`, and each block is run iteratively. This is typically used to create a denoising loop which is iterative by default.
This guide shows you how to create [`~modular_pipelines.LoopSequentialPipelineBlocks`].
## Loop wrapper
[`~modular_pipelines.LoopSequentialPipelineBlocks`], is also known as the *loop wrapper* because it defines the loop structure, iteration variables, and configuration. Within the loop wrapper, you need the following variables.
- `loop_inputs` are user provided values and equivalent to [`~modular_pipelines.ModularPipelineBlocks.inputs`].
- `loop_intermediate_inputs` are intermediate variables from the [`~modular_pipelines.PipelineState`] and equivalent to [`~modular_pipelines.ModularPipelineBlocks.intermediate_inputs`].
- `loop_intermediate_outputs` are new intermediate variables created by the block and added to the [`~modular_pipelines.PipelineState`]. It is equivalent to [`~modular_pipelines.ModularPipelineBlocks.intermediate_outputs`].
- `__call__` method defines the loop structure and iteration logic.
```py
import torch
from diffusers.modular_pipelines import LoopSequentialPipelineBlocks, ModularPipelineBlocks, InputParam, OutputParam
class LoopWrapper(LoopSequentialPipelineBlocks):
model_name = "test"
@property
def description(self):
return "I'm a loop!!"
@property
def loop_inputs(self):
return [InputParam(name="num_steps")]
@torch.no_grad()
def __call__(self, components, state):
block_state = self.get_block_state(state)
# Loop structure - can be customized to your needs
for i in range(block_state.num_steps):
# loop_step executes all registered blocks in sequence
components, block_state = self.loop_step(components, block_state, i=i)
self.set_block_state(state, block_state)
return components, state
```
The loop wrapper can pass additional arguments, like current iteration index, to the loop blocks.
## Loop blocks
A loop block is a [`~modular_pipelines.ModularPipelineBlocks`], but the `__call__` method behaves differently.
- It recieves the iteration variable from the loop wrapper.
- It works directly with the [`~modular_pipelines.BlockState`] instead of the [`~modular_pipelines.PipelineState`].
- It doesn't require retrieving or updating the [`~modular_pipelines.BlockState`].
Loop blocks share the same [`~modular_pipelines.BlockState`] to allow values to accumulate and change for each iteration in the loop.
```py
class LoopBlock(ModularPipelineBlocks):
model_name = "test"
@property
def inputs(self):
return [InputParam(name="x")]
@property
def intermediate_outputs(self):
# outputs produced by this block
return [OutputParam(name="x")]
@property
def description(self):
return "I'm a block used inside the `LoopWrapper` class"
def __call__(self, components, block_state, i: int):
block_state.x += 1
return components, block_state
```
## LoopSequentialPipelineBlocks
Use the [`~modular_pipelines.LoopSequentialPipelineBlocks.from_blocks_dict`] method to add the loop block to the loop wrapper to create [`~modular_pipelines.LoopSequentialPipelineBlocks`].
```py
loop = LoopWrapper.from_blocks_dict({"block1": LoopBlock})
```
Add more loop blocks to run within each iteration with [`~modular_pipelines.LoopSequentialPipelineBlocks.from_blocks_dict`]. This allows you to modify the blocks without changing the loop logic itself.
```py
loop = LoopWrapper.from_blocks_dict({"block1": LoopBlock(), "block2": LoopBlock})
```

View File

@@ -1,75 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# States
Blocks rely on the [`~modular_pipelines.PipelineState`] and [`~modular_pipelines.BlockState`] data structures for communicating and sharing data.
| State | Description |
|-------|-------------|
| [`~modular_pipelines.PipelineState`] | Maintains the overall data required for a pipeline's execution and allows blocks to read and update its data. |
| [`~modular_pipelines.BlockState`] | Allows each block to perform its computation with the necessary data from `inputs`|
This guide explains how states work and how they connect blocks.
## PipelineState
The [`~modular_pipelines.PipelineState`] is a global state container for all blocks. It maintains the complete runtime state of the pipeline and provides a structured way for blocks to read from and write to shared data.
There are two dict's in [`~modular_pipelines.PipelineState`] for structuring data.
- The `values` dict is a **mutable** state containing a copy of user provided input values and intermediate output values generated by blocks. If a block modifies an `input`, it will be reflected in the `values` dict after calling `set_block_state`.
```py
PipelineState(
values={
'prompt': 'a cat'
'guidance_scale': 7.0
'num_inference_steps': 25
'prompt_embeds': Tensor(dtype=torch.float32, shape=torch.Size([1, 1, 1, 1]))
'negative_prompt_embeds': None
},
)
```
## BlockState
The [`~modular_pipelines.BlockState`] is a local view of the relevant variables an individual block needs from [`~modular_pipelines.PipelineState`] for performing it's computations.
Access these variables directly as attributes like `block_state.image`.
```py
BlockState(
image: <PIL.Image.Image image mode=RGB size=512x512 at 0x7F3ECC494640>
)
```
When a block's `__call__` method is executed, it retrieves the [`BlockState`] with `self.get_block_state(state)`, performs it's operations, and updates [`~modular_pipelines.PipelineState`] with `self.set_block_state(state, block_state)`.
```py
def __call__(self, components, state):
# retrieve BlockState
block_state = self.get_block_state(state)
# computation logic on inputs
# update PipelineState
self.set_block_state(state, block_state)
return components, state
```
## State interaction
[`~modular_pipelines.PipelineState`] and [`~modular_pipelines.BlockState`] interaction is defined by a block's `inputs`, and `intermediate_outputs`.
- `inputs`, a block can modify an input - like `block_state.image` - and this change can be propagated globally to [`~modular_pipelines.PipelineState`] by calling `set_block_state`.
- `intermediate_outputs`, is a new variable that a block creates. It is added to the [`~modular_pipelines.PipelineState`]'s `values` dict and is available as for subsequent blocks or accessed by users as a final output from the pipeline.

View File

@@ -1,358 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# ModularPipeline
[`ModularPipeline`] converts [`~modular_pipelines.ModularPipelineBlocks`]'s into an executable pipeline that loads models and performs the computation steps defined in the block. It is the main interface for running a pipeline and it is very similar to the [`DiffusionPipeline`] API.
The main difference is to include an expected `output` argument in the pipeline.
<hfoptions id="example">
<hfoption id="text-to-image">
```py
import torch
from diffusers.modular_pipelines import SequentialPipelineBlocks
from diffusers.modular_pipelines.stable_diffusion_xl import TEXT2IMAGE_BLOCKS
blocks = SequentialPipelineBlocks.from_blocks_dict(TEXT2IMAGE_BLOCKS)
modular_repo_id = "YiYiXu/modular-loader-t2i-0704"
pipeline = blocks.init_pipeline(modular_repo_id)
pipeline.load_components(torch_dtype=torch.float16)
pipeline.to("cuda")
image = pipeline(prompt="Astronaut in a jungle, cold color palette, muted colors, detailed, 8k", output="images")[0]
image.save("modular_t2i_out.png")
```
</hfoption>
<hfoption id="image-to-image">
```py
import torch
from diffusers.modular_pipelines import SequentialPipelineBlocks
from diffusers.modular_pipelines.stable_diffusion_xl import IMAGE2IMAGE_BLOCKS
blocks = SequentialPipelineBlocks.from_blocks_dict(IMAGE2IMAGE_BLOCKS)
modular_repo_id = "YiYiXu/modular-loader-t2i-0704"
pipeline = blocks.init_pipeline(modular_repo_id)
pipeline.load_components(torch_dtype=torch.float16)
pipeline.to("cuda")
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-text2img.png"
init_image = load_image(url)
prompt = "a dog catching a frisbee in the jungle"
image = pipeline(prompt=prompt, image=init_image, strength=0.8, output="images")[0]
image.save("modular_i2i_out.png")
```
</hfoption>
<hfoption id="inpainting">
```py
import torch
from diffusers.modular_pipelines import SequentialPipelineBlocks
from diffusers.modular_pipelines.stable_diffusion_xl import INPAINT_BLOCKS
from diffusers.utils import load_image
blocks = SequentialPipelineBlocks.from_blocks_dict(INPAINT_BLOCKS)
modular_repo_id = "YiYiXu/modular-loader-t2i-0704"
pipeline = blocks.init_pipeline(modular_repo_id)
pipeline.load_components(torch_dtype=torch.float16)
pipeline.to("cuda")
img_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-text2img.png"
mask_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-inpaint-mask.png"
init_image = load_image(img_url)
mask_image = load_image(mask_url)
prompt = "A deep sea diver floating"
image = pipeline(prompt=prompt, image=init_image, mask_image=mask_image, strength=0.85, output="images")[0]
image.save("moduar_inpaint_out.png")
```
</hfoption>
</hfoptions>
This guide will show you how to create a [`ModularPipeline`] and manage the components in it.
## Adding blocks
Blocks are [`InsertableDict`] objects that can be inserted at specific positions, providing a flexible way to mix-and-match blocks.
Use [`~modular_pipelines.modular_pipeline_utils.InsertableDict.insert`] on either the block class or `sub_blocks` attribute to add a block.
```py
# BLOCKS is dict of block classes, you need to add class to it
BLOCKS.insert("block_name", BlockClass, index)
# sub_blocks attribute contains instance, add a block instance to the attribute
t2i_blocks.sub_blocks.insert("block_name", block_instance, index)
```
Use [`~modular_pipelines.modular_pipeline_utils.InsertableDict.pop`] on either the block class or `sub_blocks` attribute to remove a block.
```py
# remove a block class from preset
BLOCKS.pop("text_encoder")
# split out a block instance on its own
text_encoder_block = t2i_blocks.sub_blocks.pop("text_encoder")
```
Swap blocks by setting the existing block to the new block.
```py
# Replace block class in preset
BLOCKS["prepare_latents"] = CustomPrepareLatents
# Replace in sub_blocks attribute using an block instance
t2i_blocks.sub_blocks["prepare_latents"] = CustomPrepareLatents()
```
## Creating a pipeline
There are two ways to create a [`ModularPipeline`]. Assemble and create a pipeline from [`ModularPipelineBlocks`] or load an existing pipeline with [`~ModularPipeline.from_pretrained`].
You should also initialize a [`ComponentsManager`] to handle device placement and memory and component management.
> [!TIP]
> Refer to the [ComponentsManager](./components_manager) doc for more details about how it can help manage components across different workflows.
<hfoptions id="create">
<hfoption id="ModularPipelineBlocks">
Use the [`~ModularPipelineBlocks.init_pipeline`] method to create a [`ModularPipeline`] from the component and configuration specifications. This method loads the *specifications* from a `modular_model_index.json` file, but it doesn't load the *models* yet.
```py
from diffusers import ComponentsManager
from diffusers.modular_pipelines import SequentialPipelineBlocks
from diffusers.modular_pipelines.stable_diffusion_xl import TEXT2IMAGE_BLOCKS
t2i_blocks = SequentialPipelineBlocks.from_blocks_dict(TEXT2IMAGE_BLOCKS)
modular_repo_id = "YiYiXu/modular-loader-t2i-0704"
components = ComponentsManager()
t2i_pipeline = t2i_blocks.init_pipeline(modular_repo_id, components_manager=components)
```
</hfoption>
<hfoption id="from_pretrained">
The [`~ModularPipeline.from_pretrained`] method creates a [`ModularPipeline`] from a modular repository on the Hub.
```py
from diffusers import ModularPipeline, ComponentsManager
components = ComponentsManager()
pipeline = ModularPipeline.from_pretrained("YiYiXu/modular-loader-t2i-0704", components_manager=components)
```
Add the `trust_remote_code` argument to load a custom [`ModularPipeline`].
```py
from diffusers import ModularPipeline, ComponentsManager
components = ComponentsManager()
modular_repo_id = "YiYiXu/modular-diffdiff-0704"
diffdiff_pipeline = ModularPipeline.from_pretrained(modular_repo_id, trust_remote_code=True, components_manager=components)
```
</hfoption>
</hfoptions>
## Loading components
A [`ModularPipeline`] doesn't automatically instantiate with components. It only loads the configuration and component specifications. You can load all components with [`~ModularPipeline.load_components`] or only load specific components with [`~ModularPipeline.load_components`].
<hfoptions id="load">
<hfoption id="load_components">
```py
import torch
t2i_pipeline.load_components(torch_dtype=torch.float16)
t2i_pipeline.to("cuda")
```
</hfoption>
<hfoption id="load_components">
The example below only loads the UNet and VAE.
```py
import torch
t2i_pipeline.load_components(names=["unet", "vae"], torch_dtype=torch.float16)
```
</hfoption>
</hfoptions>
Print the pipeline to inspect the loaded pretrained components.
```py
t2i_pipeline
```
This should match the `modular_model_index.json` file from the modular repository a pipeline is initialized from. If a pipeline doesn't need a component, it won't be included even if it exists in the modular repository.
To modify where components are loaded from, edit the `modular_model_index.json` file in the repository and change it to your desired loading path. The example below loads a UNet from a different repository.
```json
# original
"unet": [
null, null,
{
"repo": "stabilityai/stable-diffusion-xl-base-1.0",
"subfolder": "unet",
"variant": "fp16"
}
]
# modified
"unet": [
null, null,
{
"repo": "RunDiffusion/Juggernaut-XL-v9",
"subfolder": "unet",
"variant": "fp16"
}
]
```
### Component loading status
The pipeline properties below provide more information about which components are loaded.
Use `component_names` to return all expected components.
```py
t2i_pipeline.component_names
['text_encoder', 'text_encoder_2', 'tokenizer', 'tokenizer_2', 'guider', 'scheduler', 'unet', 'vae', 'image_processor']
```
Use `null_component_names` to return components that aren't loaded yet. Load these components with [`~ModularPipeline.from_pretrained`].
```py
t2i_pipeline.null_component_names
['text_encoder', 'text_encoder_2', 'tokenizer', 'tokenizer_2', 'scheduler']
```
Use `pretrained_component_names` to return components that will be loaded from pretrained models.
```py
t2i_pipeline.pretrained_component_names
['text_encoder', 'text_encoder_2', 'tokenizer', 'tokenizer_2', 'scheduler', 'unet', 'vae']
```
Use `config_component_names` to return components that are created with the default config (not loaded from a modular repository). Components from a config aren't included because they are already initialized during pipeline creation. This is why they aren't listed in `null_component_names`.
```py
t2i_pipeline.config_component_names
['guider', 'image_processor']
```
## Updating components
Components may be updated depending on whether it is a *pretrained component* or a *config component*.
> [!WARNING]
> A component may change from pretrained to config when updating a component. The component type is initially defined in a block's `expected_components` field.
A pretrained component is updated with [`ComponentSpec`] whereas a config component is updated by eihter passing the object directly or with [`ComponentSpec`].
The [`ComponentSpec`] shows `default_creation_method="from_pretrained"` for a pretrained component shows `default_creation_method="from_config` for a config component.
To update a pretrained component, create a [`ComponentSpec`] with the name of the component and where to load it from. Use the [`~ComponentSpec.load`] method to load the component.
```py
from diffusers import ComponentSpec, UNet2DConditionModel
unet_spec = ComponentSpec(name="unet",type_hint=UNet2DConditionModel, repo="stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", variant="fp16")
unet = unet_spec.load(torch_dtype=torch.float16)
```
The [`~ModularPipeline.update_components`] method replaces the component with a new one.
```py
t2i_pipeline.update_components(unet=unet2)
```
When a component is updated, the loading specifications are also updated in the pipeline config.
### Component extraction and modification
When you use [`~ComponentSpec.load`], the new component maintains its loading specifications. This makes it possible to extract the specification and recreate the component.
```py
spec = ComponentSpec.from_component("unet", unet2)
spec
ComponentSpec(name='unet', type_hint=<class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'>, description=None, config=None, repo='stabilityai/stable-diffusion-xl-base-1.0', subfolder='unet', variant='fp16', revision=None, default_creation_method='from_pretrained')
unet2_recreated = spec.load(torch_dtype=torch.float16)
```
The [`~ModularPipeline.get_component_spec`] method gets a copy of the current component specification to modify or update.
```py
unet_spec = t2i_pipeline.get_component_spec("unet")
unet_spec
ComponentSpec(
name='unet',
type_hint=<class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'>,
repo='RunDiffusion/Juggernaut-XL-v9',
subfolder='unet',
variant='fp16',
default_creation_method='from_pretrained'
)
# modify to load from a different repository
unet_spec.repo = "stabilityai/stable-diffusion-xl-base-1.0"
# load component with modified spec
unet = unet_spec.load(torch_dtype=torch.float16)
```
## Modular repository
A repository is required if the pipeline blocks use *pretrained components*. The repository supplies loading specifications and metadata.
[`ModularPipeline`] specifically requires *modular repositories* (see [example repository](https://huggingface.co/YiYiXu/modular-diffdiff)) which are more flexible than a typical repository. It contains a `modular_model_index.json` file containing the following 3 elements.
- `library` and `class` shows which library the component was loaded from and it's class. If `null`, the component hasn't been loaded yet.
- `loading_specs_dict` contains the information required to load the component such as the repository and subfolder it is loaded from.
Unlike standard repositories, a modular repository can fetch components from different repositories based on the `loading_specs_dict`. Components don't need to exist in the same repository.
A modular repository may contain custom code for loading a [`ModularPipeline`]. This allows you to use specialized blocks that aren't native to Diffusers.
```
modular-diffdiff-0704/
├── block.py # Custom pipeline blocks implementation
├── config.json # Pipeline configuration and auto_map
└── modular_model_index.json # Component loading specifications
```
The [config.json](https://huggingface.co/YiYiXu/modular-diffdiff-0704/blob/main/config.json) file contains an `auto_map` key that points to where a custom block is defined in `block.py`.
```json
{
"_class_name": "DiffDiffBlocks",
"auto_map": {
"ModularPipelineBlocks": "block.DiffDiffBlocks"
}
}
```

Some files were not shown because too many files have changed in this diff Show More