Compare commits

..

26 Commits

Author SHA1 Message Date
DN6
289c385d73 update 2025-06-19 21:57:21 +05:30
DN6
a2dd339a4d Merge branch 'main' into group-offload-refactor 2025-06-19 21:55:44 +05:30
DN6
ebde934317 update 2025-06-19 21:46:03 +05:30
DN6
4854309f51 update 2025-06-19 21:44:30 +05:30
DN6
a9e9ef5be2 update 2025-06-19 21:39:18 +05:30
DN6
b60bf2dacc update 2025-06-19 21:31:44 +05:30
DN6
22e6fc180c update 2025-06-19 21:26:48 +05:30
DN6
1d38a30001 update 2025-06-19 21:24:25 +05:30
DN6
135934893e update 2025-06-19 19:22:06 +05:30
DN6
ace698aa96 update 2025-06-19 18:26:10 +05:30
sayakpaul
90e546ada1 update more docs. 2025-06-19 16:16:16 +05:30
sayakpaul
7d2295567f update todos. 2025-06-19 16:10:39 +05:30
sayakpaul
da11656af4 updates 2025-06-19 15:59:48 +05:30
sayakpaul
68f7580656 Merge branch 'main' into group-offloading-with-disk 2025-06-19 15:54:54 +05:30
Sayak Paul
357226327f Merge branch 'main' into group-offloading-with-disk 2025-06-19 09:29:44 +05:30
Sayak Paul
3c69c5ecc0 Merge branch 'main' into group-offloading-with-disk 2025-06-18 09:43:35 +05:30
Sayak Paul
2d3056180a Merge branch 'main' into group-offloading-with-disk 2025-06-14 07:31:38 +05:30
Sayak Paul
a018ee1e70 Merge branch 'main' into group-offloading-with-disk 2025-06-12 11:08:56 +05:30
sayakpaul
8029cd7ef0 add test and clarify. 2025-06-12 11:08:19 +05:30
sayakpaul
4e4842fb0b check if safetensors already exist. 2025-06-12 10:48:57 +05:30
sayakpaul
d8179b10d3 offload_to_disk_path 2025-06-12 10:27:48 +05:30
Sayak Paul
0bf55a99d3 Merge branch 'main' into group-offloading-with-disk 2025-06-10 10:29:54 +05:30
Sayak Paul
d32a2c6879 Merge branch 'main' into group-offloading-with-disk 2025-06-09 14:36:43 +05:30
sayakpaul
278cbc2e47 updates.patch 2025-06-09 12:03:01 +05:30
sayakpaul
49ac665460 delete diff file. 2025-06-09 10:26:00 +05:30
sayakpaul
e0d5079f9c start implementing disk offloading in group. 2025-06-09 10:25:45 +05:30
1460 changed files with 32994 additions and 134139 deletions

View File

@@ -7,25 +7,24 @@ on:
env: env:
DIFFUSERS_IS_CI: yes DIFFUSERS_IS_CI: yes
HF_XET_HIGH_PERFORMANCE: 1 HF_HUB_ENABLE_HF_TRANSFER: 1
HF_HOME: /mnt/cache HF_HOME: /mnt/cache
OMP_NUM_THREADS: 8 OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8 MKL_NUM_THREADS: 8
BASE_PATH: benchmark_outputs
jobs: jobs:
torch_models_cuda_benchmark_tests: torch_pipelines_cuda_benchmark_tests:
env: env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_BENCHMARK }} SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_BENCHMARK }}
name: Torch Core Models CUDA Benchmarking Tests name: Torch Core Pipelines CUDA Benchmarking Tests
strategy: strategy:
fail-fast: false fail-fast: false
max-parallel: 1 max-parallel: 1
runs-on: runs-on:
group: aws-g6e-4xlarge group: aws-g6-4xlarge-plus
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all options: --shm-size "16gb" --ipc host --gpus 0
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
uses: actions/checkout@v3 uses: actions/checkout@v3
@@ -36,46 +35,27 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
apt update python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
apt install -y libpq-dev postgresql-client python -m uv pip install -e [quality,test]
uv pip install -e ".[quality]" python -m uv pip install pandas peft
uv pip install -r benchmarks/requirements.txt python -m uv pip uninstall transformers && python -m uv pip install transformers==4.48.0
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
- name: Diffusers Benchmarking - name: Diffusers Benchmarking
env: env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_BOT_TOKEN }}
BASE_PATH: benchmark_outputs
run: | run: |
cd benchmarks && python run_all.py export TOTAL_GPU_MEMORY=$(python -c "import torch; print(torch.cuda.get_device_properties(0).total_memory / (1024**3))")
cd benchmarks && mkdir ${BASE_PATH} && python run_all.py && python push_results.py
- name: Push results to the Hub
env:
HF_TOKEN: ${{ secrets.DIFFUSERS_BOT_TOKEN }}
run: |
cd benchmarks && python push_results.py
mkdir $BASE_PATH && cp *.csv $BASE_PATH
- name: Test suite reports artifacts - name: Test suite reports artifacts
if: ${{ always() }} if: ${{ always() }}
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: benchmark_test_reports name: benchmark_test_reports
path: benchmarks/${{ env.BASE_PATH }} path: benchmarks/benchmark_outputs
# TODO: enable this once the connection problem has been resolved.
- name: Update benchmarking results to DB
env:
PGDATABASE: metrics
PGHOST: ${{ secrets.DIFFUSERS_BENCHMARKS_PGHOST }}
PGUSER: transformers_benchmarks
PGPASSWORD: ${{ secrets.DIFFUSERS_BENCHMARKS_PGPASSWORD }}
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
run: |
git config --global --add safe.directory /__w/diffusers/diffusers
commit_id=$GITHUB_SHA
commit_msg=$(git show -s --format=%s "$commit_id" | cut -c1-70)
cd benchmarks && python populate_into_db.py "$BRANCH_NAME" "$commit_id" "$commit_msg"
- name: Report success status - name: Report success status
if: ${{ success() }} if: ${{ success() }}

View File

@@ -42,39 +42,18 @@ jobs:
CHANGED_FILES: ${{ steps.file_changes.outputs.all }} CHANGED_FILES: ${{ steps.file_changes.outputs.all }}
run: | run: |
echo "$CHANGED_FILES" echo "$CHANGED_FILES"
ALLOWED_IMAGES=( for FILE in $CHANGED_FILES; do
diffusers-pytorch-cpu
diffusers-pytorch-cuda
diffusers-pytorch-xformers-cuda
diffusers-pytorch-minimum-cuda
diffusers-doc-builder
)
declare -A IMAGES_TO_BUILD=()
for FILE in $CHANGED_FILES; do
# skip anything that isn't still on disk # skip anything that isn't still on disk
if [[ ! -e "$FILE" ]]; then if [[ ! -f "$FILE" ]]; then
echo "Skipping removed file $FILE" echo "Skipping removed file $FILE"
continue continue
fi
if [[ "$FILE" == docker/*Dockerfile ]]; then
DOCKER_PATH="${FILE%/Dockerfile}"
DOCKER_TAG=$(basename "$DOCKER_PATH")
echo "Building Docker image for $DOCKER_TAG"
docker build -t "$DOCKER_TAG" "$DOCKER_PATH"
fi fi
for IMAGE in "${ALLOWED_IMAGES[@]}"; do
if [[ "$FILE" == docker/${IMAGE}/* ]]; then
IMAGES_TO_BUILD["$IMAGE"]=1
fi
done
done
if [[ ${#IMAGES_TO_BUILD[@]} -eq 0 ]]; then
echo "No relevant Docker changes detected."
exit 0
fi
for IMAGE in "${!IMAGES_TO_BUILD[@]}"; do
DOCKER_PATH="docker/${IMAGE}"
echo "Building Docker image for $IMAGE"
docker build -t "$IMAGE" "$DOCKER_PATH"
done done
if: steps.file_changes.outputs.all != '' if: steps.file_changes.outputs.all != ''
@@ -93,8 +72,13 @@ jobs:
image-name: image-name:
- diffusers-pytorch-cpu - diffusers-pytorch-cpu
- diffusers-pytorch-cuda - diffusers-pytorch-cuda
- diffusers-pytorch-cuda
- diffusers-pytorch-xformers-cuda - diffusers-pytorch-xformers-cuda
- diffusers-pytorch-minimum-cuda - diffusers-pytorch-minimum-cuda
- diffusers-flax-cpu
- diffusers-flax-tpu
- diffusers-onnxruntime-cpu
- diffusers-onnxruntime-cuda
- diffusers-doc-builder - diffusers-doc-builder
steps: steps:

View File

@@ -12,33 +12,7 @@ concurrency:
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:
check-links:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install uv
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
- name: Install doc-builder
run: |
uv pip install --system git+https://github.com/huggingface/doc-builder.git@main
- name: Check documentation links
run: |
uv run doc-builder check-links docs/source/en
build: build:
needs: check-links
uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
with: with:
commit_sha: ${{ github.event.pull_request.head.sha }} commit_sha: ${{ github.event.pull_request.head.sha }}

View File

@@ -74,19 +74,19 @@ jobs:
python-version: "3.10" python-version: "3.10"
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install --upgrade pip python -m pip install --upgrade pip
pip install --upgrade huggingface_hub pip install --upgrade huggingface_hub
# Check secret is set # Check secret is set
- name: whoami - name: whoami
run: hf auth whoami run: huggingface-cli whoami
env: env:
HF_TOKEN: ${{ secrets.HF_TOKEN_MIRROR_COMMUNITY_PIPELINES }} HF_TOKEN: ${{ secrets.HF_TOKEN_MIRROR_COMMUNITY_PIPELINES }}
# Push to HF! (under subfolder based on checkout ref) # Push to HF! (under subfolder based on checkout ref)
# https://huggingface.co/datasets/diffusers/community-pipelines-mirror # https://huggingface.co/datasets/diffusers/community-pipelines-mirror
- name: Mirror community pipeline to HF - name: Mirror community pipeline to HF
run: hf upload diffusers/community-pipelines-mirror ./examples/community ${PATH_IN_REPO} --repo-type dataset run: huggingface-cli upload diffusers/community-pipelines-mirror ./examples/community ${PATH_IN_REPO} --repo-type dataset
env: env:
PATH_IN_REPO: ${{ env.PATH_IN_REPO }} PATH_IN_REPO: ${{ env.PATH_IN_REPO }}
HF_TOKEN: ${{ secrets.HF_TOKEN_MIRROR_COMMUNITY_PIPELINES }} HF_TOKEN: ${{ secrets.HF_TOKEN_MIRROR_COMMUNITY_PIPELINES }}

View File

@@ -7,7 +7,7 @@ on:
env: env:
DIFFUSERS_IS_CI: yes DIFFUSERS_IS_CI: yes
HF_XET_HIGH_PERFORMANCE: 1 HF_HUB_ENABLE_HF_TRANSFER: 1
OMP_NUM_THREADS: 8 OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8 MKL_NUM_THREADS: 8
PYTEST_TIMEOUT: 600 PYTEST_TIMEOUT: 600
@@ -61,7 +61,7 @@ jobs:
group: aws-g4dn-2xlarge group: aws-g4dn-2xlarge
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all options: --shm-size "16gb" --ipc host --gpus 0
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
uses: actions/checkout@v3 uses: actions/checkout@v3
@@ -71,11 +71,10 @@ jobs:
run: nvidia-smi run: nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install -e [quality,test]
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 python -m uv pip install pytest-reportlog
uv pip install pytest-reportlog
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -85,8 +84,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \ --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
--report-log=tests_pipeline_${{ matrix.module }}_cuda.log \ --report-log=tests_pipeline_${{ matrix.module }}_cuda.log \
tests/pipelines/${{ matrix.module }} tests/pipelines/${{ matrix.module }}
@@ -108,7 +107,7 @@ jobs:
group: aws-g4dn-2xlarge group: aws-g4dn-2xlarge
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all options: --shm-size "16gb" --ipc host --gpus 0
defaults: defaults:
run: run:
shell: bash shell: bash
@@ -125,12 +124,11 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install peft@git+https://github.com/huggingface/peft.git python -m uv pip install -e [quality,test]
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install peft@git+https://github.com/huggingface/peft.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 python -m uv pip install pytest-reportlog
uv pip install pytest-reportlog
- name: Environment - name: Environment
run: python utils/print_env.py run: python utils/print_env.py
@@ -141,8 +139,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_torch_${{ matrix.module }}_cuda \ --make-reports=tests_torch_${{ matrix.module }}_cuda \
--report-log=tests_torch_${{ matrix.module }}_cuda.log \ --report-log=tests_torch_${{ matrix.module }}_cuda.log \
tests/${{ matrix.module }} tests/${{ matrix.module }}
@@ -154,8 +152,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
--make-reports=examples_torch_cuda \ -s -v --make-reports=examples_torch_cuda \
--report-log=examples_torch_cuda.log \ --report-log=examples_torch_cuda.log \
examples/ examples/
@@ -180,7 +178,7 @@ jobs:
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --gpus all --shm-size "16gb" --ipc host options: --gpus 0 --shm-size "16gb" --ipc host
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
@@ -193,9 +191,8 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality,training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git python -m uv pip install -e [quality,test,training]
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -204,7 +201,7 @@ jobs:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
RUN_COMPILE: yes RUN_COMPILE: yes
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "compile" --make-reports=tests_torch_compile_cuda tests/ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "compile" --make-reports=tests_torch_compile_cuda tests/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}
run: cat reports/tests_torch_compile_cuda_failures_short.txt run: cat reports/tests_torch_compile_cuda_failures_short.txt
@@ -225,7 +222,7 @@ jobs:
group: aws-g6e-xlarge-plus group: aws-g6e-xlarge-plus
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all options: --shm-size "16gb" --ipc host --gpus 0
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
uses: actions/checkout@v3 uses: actions/checkout@v3
@@ -235,12 +232,11 @@ jobs:
run: nvidia-smi run: nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install peft@git+https://github.com/huggingface/peft.git python -m uv pip install -e [quality,test]
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install peft@git+https://github.com/huggingface/peft.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 python -m uv pip install pytest-reportlog
uv pip install pytest-reportlog
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -251,8 +247,8 @@ jobs:
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
BIG_GPU_MEMORY: 40 BIG_GPU_MEMORY: 40
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-m "big_accelerator" \ -m "big_gpu_with_torch_cuda" \
--make-reports=tests_big_gpu_torch_cuda \ --make-reports=tests_big_gpu_torch_cuda \
--report-log=tests_big_gpu_torch_cuda.log \ --report-log=tests_big_gpu_torch_cuda.log \
tests/ tests/
@@ -274,7 +270,7 @@ jobs:
group: aws-g4dn-2xlarge group: aws-g4dn-2xlarge
container: container:
image: diffusers/diffusers-pytorch-minimum-cuda image: diffusers/diffusers-pytorch-minimum-cuda
options: --shm-size "16gb" --ipc host --gpus all options: --shm-size "16gb" --ipc host --gpus 0
defaults: defaults:
run: run:
shell: bash shell: bash
@@ -286,11 +282,10 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install peft@git+https://github.com/huggingface/peft.git python -m uv pip install -e [quality,test]
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install peft@git+https://github.com/huggingface/peft.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment - name: Environment
run: | run: |
@@ -302,8 +297,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_torch_minimum_version_cuda \ --make-reports=tests_torch_minimum_version_cuda \
tests/models/test_modeling_common.py \ tests/models/test_modeling_common.py \
tests/pipelines/test_pipelines_common.py \ tests/pipelines/test_pipelines_common.py \
@@ -326,6 +321,55 @@ jobs:
name: torch_minimum_version_cuda_test_reports name: torch_minimum_version_cuda_test_reports
path: reports path: reports
run_nightly_onnx_tests:
name: Nightly ONNXRuntime CUDA tests on Ubuntu
runs-on:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-onnxruntime-cuda
options: --gpus 0 --shm-size "16gb" --ipc host
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: NVIDIA-SMI
run: nvidia-smi
- name: Install dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
python -m uv pip install pytest-reportlog
- name: Environment
run: python utils/print_env.py
- name: Run Nightly ONNXRuntime CUDA tests
env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: |
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-s -v -k "Onnx" \
--make-reports=tests_onnx_cuda \
--report-log=tests_onnx_cuda.log \
tests/
- name: Failure short reports
if: ${{ failure() }}
run: |
cat reports/tests_onnx_cuda_stats.txt
cat reports/tests_onnx_cuda_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: tests_onnx_cuda_reports
path: reports
run_nightly_quantization_tests: run_nightly_quantization_tests:
name: Torch quantization nightly tests name: Torch quantization nightly tests
strategy: strategy:
@@ -338,21 +382,18 @@ jobs:
additional_deps: ["peft"] additional_deps: ["peft"]
- backend: "gguf" - backend: "gguf"
test_location: "gguf" test_location: "gguf"
additional_deps: ["peft", "kernels"] additional_deps: ["peft"]
- backend: "torchao" - backend: "torchao"
test_location: "torchao" test_location: "torchao"
additional_deps: [] additional_deps: []
- backend: "optimum_quanto" - backend: "optimum_quanto"
test_location: "quanto" test_location: "quanto"
additional_deps: [] additional_deps: []
- backend: "nvidia_modelopt"
test_location: "modelopt"
additional_deps: []
runs-on: runs-on:
group: aws-g6e-xlarge-plus group: aws-g6e-xlarge-plus
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --shm-size "20gb" --ipc host --gpus all options: --shm-size "20gb" --ipc host --gpus 0
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
uses: actions/checkout@v3 uses: actions/checkout@v3
@@ -362,14 +403,13 @@ jobs:
run: nvidia-smi run: nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install -U ${{ matrix.config.backend }} python -m uv pip install -e [quality,test]
python -m uv pip install -U ${{ matrix.config.backend }}
if [ "${{ join(matrix.config.additional_deps, ' ') }}" != "" ]; then if [ "${{ join(matrix.config.additional_deps, ' ') }}" != "" ]; then
uv pip install ${{ join(matrix.config.additional_deps, ' ') }} python -m uv pip install ${{ join(matrix.config.additional_deps, ' ') }}
fi fi
uv pip install pytest-reportlog python -m uv pip install pytest-reportlog
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -380,7 +420,7 @@ jobs:
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
BIG_GPU_MEMORY: 40 BIG_GPU_MEMORY: 40
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
--make-reports=tests_${{ matrix.config.backend }}_torch_cuda \ --make-reports=tests_${{ matrix.config.backend }}_torch_cuda \
--report-log=tests_${{ matrix.config.backend }}_torch_cuda.log \ --report-log=tests_${{ matrix.config.backend }}_torch_cuda.log \
tests/quantization/${{ matrix.config.test_location }} tests/quantization/${{ matrix.config.test_location }}
@@ -405,7 +445,7 @@ jobs:
group: aws-g6e-xlarge-plus group: aws-g6e-xlarge-plus
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --shm-size "20gb" --ipc host --gpus all options: --shm-size "20gb" --ipc host --gpus 0
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
uses: actions/checkout@v3 uses: actions/checkout@v3
@@ -415,11 +455,10 @@ jobs:
run: nvidia-smi run: nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install -U bitsandbytes optimum_quanto python -m uv pip install -e [quality,test]
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git python -m uv pip install -U bitsandbytes optimum_quanto
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 python -m uv pip install pytest-reportlog
uv pip install pytest-reportlog
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -430,7 +469,7 @@ jobs:
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
BIG_GPU_MEMORY: 40 BIG_GPU_MEMORY: 40
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
--make-reports=tests_pipeline_level_quant_torch_cuda \ --make-reports=tests_pipeline_level_quant_torch_cuda \
--report-log=tests_pipeline_level_quant_torch_cuda.log \ --report-log=tests_pipeline_level_quant_torch_cuda.log \
tests/quantization/test_pipeline_level_quantization.py tests/quantization/test_pipeline_level_quantization.py
@@ -446,6 +485,57 @@ jobs:
name: torch_cuda_pipeline_level_quant_reports name: torch_cuda_pipeline_level_quant_reports
path: reports path: reports
run_flax_tpu_tests:
name: Nightly Flax TPU Tests
runs-on:
group: gcp-ct5lp-hightpu-8t
if: github.event_name == 'schedule'
container:
image: diffusers/diffusers-flax-tpu
options: --shm-size "16gb" --ipc host --privileged ${{ vars.V5_LITEPOD_8_ENV}} -v /mnt/hf_cache:/mnt/hf_cache
defaults:
run:
shell: bash
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Install dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
python -m uv pip install pytest-reportlog
- name: Environment
run: python utils/print_env.py
- name: Run nightly Flax TPU tests
env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: |
python -m pytest -n 0 \
-s -v -k "Flax" \
--make-reports=tests_flax_tpu \
--report-log=tests_flax_tpu.log \
tests/
- name: Failure short reports
if: ${{ failure() }}
run: |
cat reports/tests_flax_tpu_stats.txt
cat reports/tests_flax_tpu_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: flax_tpu_test_reports
path: reports
generate_consolidated_report: generate_consolidated_report:
name: Generate Consolidated Test Report name: Generate Consolidated Test Report
needs: [ needs: [
@@ -455,9 +545,9 @@ jobs:
run_big_gpu_torch_tests, run_big_gpu_torch_tests,
run_nightly_quantization_tests, run_nightly_quantization_tests,
run_nightly_pipeline_level_quantization_tests, run_nightly_pipeline_level_quantization_tests,
# run_nightly_onnx_tests, run_nightly_onnx_tests,
torch_minimum_version_cuda_tests, torch_minimum_version_cuda_tests,
# run_flax_tpu_tests run_flax_tpu_tests
] ]
if: always() if: always()
runs-on: runs-on:
@@ -530,11 +620,11 @@ jobs:
# - name: Install dependencies # - name: Install dependencies
# shell: arch -arch arm64 bash {0} # shell: arch -arch arm64 bash {0}
# run: | # run: |
# ${CONDA_RUN} pip install --upgrade pip uv # ${CONDA_RUN} python -m pip install --upgrade pip uv
# ${CONDA_RUN} uv pip install -e ".[quality]" # ${CONDA_RUN} python -m uv pip install -e [quality,test]
# ${CONDA_RUN} uv pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu # ${CONDA_RUN} python -m uv pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
# ${CONDA_RUN} uv pip install accelerate@git+https://github.com/huggingface/accelerate # ${CONDA_RUN} python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate
# ${CONDA_RUN} uv pip install pytest-reportlog # ${CONDA_RUN} python -m uv pip install pytest-reportlog
# - name: Environment # - name: Environment
# shell: arch -arch arm64 bash {0} # shell: arch -arch arm64 bash {0}
# run: | # run: |
@@ -545,7 +635,7 @@ jobs:
# HF_HOME: /System/Volumes/Data/mnt/cache # HF_HOME: /System/Volumes/Data/mnt/cache
# HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} # HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
# run: | # run: |
# ${CONDA_RUN} pytest -n 1 --make-reports=tests_torch_mps \ # ${CONDA_RUN} python -m pytest -n 1 -s -v --make-reports=tests_torch_mps \
# --report-log=tests_torch_mps.log \ # --report-log=tests_torch_mps.log \
# tests/ # tests/
# - name: Failure short reports # - name: Failure short reports
@@ -586,11 +676,11 @@ jobs:
# - name: Install dependencies # - name: Install dependencies
# shell: arch -arch arm64 bash {0} # shell: arch -arch arm64 bash {0}
# run: | # run: |
# ${CONDA_RUN} pip install --upgrade pip uv # ${CONDA_RUN} python -m pip install --upgrade pip uv
# ${CONDA_RUN} uv pip install -e ".[quality]" # ${CONDA_RUN} python -m uv pip install -e [quality,test]
# ${CONDA_RUN} uv pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu # ${CONDA_RUN} python -m uv pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
# ${CONDA_RUN} uv pip install accelerate@git+https://github.com/huggingface/accelerate # ${CONDA_RUN} python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate
# ${CONDA_RUN} uv pip install pytest-reportlog # ${CONDA_RUN} python -m uv pip install pytest-reportlog
# - name: Environment # - name: Environment
# shell: arch -arch arm64 bash {0} # shell: arch -arch arm64 bash {0}
# run: | # run: |
@@ -601,7 +691,7 @@ jobs:
# HF_HOME: /System/Volumes/Data/mnt/cache # HF_HOME: /System/Volumes/Data/mnt/cache
# HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} # HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
# run: | # run: |
# ${CONDA_RUN} pytest -n 1 --make-reports=tests_torch_mps \ # ${CONDA_RUN} python -m pytest -n 1 -s -v --make-reports=tests_torch_mps \
# --report-log=tests_torch_mps.log \ # --report-log=tests_torch_mps.log \
# tests/ # tests/
# - name: Failure short reports # - name: Failure short reports

View File

@@ -25,8 +25,11 @@ jobs:
python-version: "3.8" python-version: "3.8"
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install -e . python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pip install pytest python -m pip install --upgrade pip uv
python -m uv pip install -e .
python -m uv pip install pytest
- name: Check for soft dependencies - name: Check for soft dependencies
run: | run: |
pytest tests/others/test_dependencies.py python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest tests/others/test_dependencies.py

View File

@@ -0,0 +1,38 @@
name: Run Flax dependency tests
on:
pull_request:
branches:
- main
paths:
- "src/diffusers/**.py"
push:
branches:
- main
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
check_flax_dependencies:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.8"
- name: Install dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m pip install --upgrade pip uv
python -m uv pip install -e .
python -m uv pip install "jax[cpu]>=0.2.16,!=0.3.2"
python -m uv pip install "flax>=0.4.1"
python -m uv pip install "jaxlib>=0.1.65"
python -m uv pip install pytest
- name: Check for soft dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest tests/others/test_dependencies.py

View File

@@ -1,139 +0,0 @@
name: Fast PR tests for Modular
on:
pull_request:
branches: [main]
paths:
- "src/diffusers/modular_pipelines/**.py"
- "src/diffusers/models/modeling_utils.py"
- "src/diffusers/models/model_loading_utils.py"
- "src/diffusers/pipelines/pipeline_utils.py"
- "src/diffusers/pipeline_loading_utils.py"
- "src/diffusers/loaders/lora_base.py"
- "src/diffusers/loaders/lora_pipeline.py"
- "src/diffusers/loaders/peft.py"
- "tests/modular_pipelines/**.py"
- ".github/**.yml"
- "utils/**.py"
- "setup.py"
push:
branches:
- ci-*
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
DIFFUSERS_IS_CI: yes
HF_XET_HIGH_PERFORMANCE: 1
OMP_NUM_THREADS: 4
MKL_NUM_THREADS: 4
PYTEST_TIMEOUT: 60
jobs:
check_code_quality:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install dependencies
run: |
pip install --upgrade pip
pip install .[quality]
- name: Check quality
run: make quality
- name: Check if failure
if: ${{ failure() }}
run: |
echo "Quality check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make style && make quality'" >> $GITHUB_STEP_SUMMARY
check_repository_consistency:
needs: check_code_quality
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install dependencies
run: |
pip install --upgrade pip
pip install .[quality]
- name: Check repo consistency
run: |
python utils/check_copies.py
python utils/check_dummies.py
python utils/check_support_list.py
make deps_table_check_updated
- name: Check if failure
if: ${{ failure() }}
run: |
echo "Repo consistency check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make fix-copies'" >> $GITHUB_STEP_SUMMARY
run_fast_tests:
needs: [check_code_quality, check_repository_consistency]
strategy:
fail-fast: false
matrix:
config:
- name: Fast PyTorch Modular Pipeline CPU tests
framework: pytorch_pipelines
runner: aws-highmemory-32-plus
image: diffusers/diffusers-pytorch-cpu
report: torch_cpu_modular_pipelines
name: ${{ matrix.config.name }}
runs-on:
group: ${{ matrix.config.runner }}
container:
image: ${{ matrix.config.image }}
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
defaults:
run:
shell: bash
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Install dependencies
run: |
uv pip install -e ".[quality]"
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
- name: Environment
run: |
python utils/print_env.py
- name: Run fast PyTorch Pipeline CPU tests
if: ${{ matrix.config.framework == 'pytorch_pipelines' }}
run: |
pytest -n 8 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \
--make-reports=tests_${{ matrix.config.report }} \
tests/modular_pipelines
- name: Failure short reports
if: ${{ failure() }}
run: cat reports/tests_${{ matrix.config.report }}_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: pr_${{ matrix.config.framework }}_${{ matrix.config.report }}_test_reports
path: reports

View File

@@ -33,7 +33,8 @@ jobs:
fetch-depth: 0 fetch-depth: 0
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -89,16 +90,19 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install accelerate python -m pip install -e [quality,test]
python -m pip install accelerate
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run all selected tests on CPU - name: Run all selected tests on CPU
run: | run: |
pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.modules }}_tests_cpu ${{ fromJson(needs.setup_pr_tests.outputs.test_map)[matrix.modules] }} python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.modules }}_tests_cpu ${{ fromJson(needs.setup_pr_tests.outputs.test_map)[matrix.modules] }}
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}
@@ -144,16 +148,19 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install -e [quality] python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m pip install -e [quality,test]
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run Hub tests for models, schedulers, and pipelines on a staging env - name: Run Hub tests for models, schedulers, and pipelines on a staging env
if: ${{ matrix.config.framework == 'hub_tests_pytorch' }} if: ${{ matrix.config.framework == 'hub_tests_pytorch' }}
run: | run: |
HUGGINGFACE_CO_STAGING=true pytest \ python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
HUGGINGFACE_CO_STAGING=true python -m pytest \
-m "is_staging_test" \ -m "is_staging_test" \
--make-reports=tests_${{ matrix.config.report }} \ --make-reports=tests_${{ matrix.config.report }} \
tests tests

View File

@@ -22,7 +22,7 @@ concurrency:
env: env:
DIFFUSERS_IS_CI: yes DIFFUSERS_IS_CI: yes
HF_XET_HIGH_PERFORMANCE: 1 HF_HUB_ENABLE_HF_TRANSFER: 1
OMP_NUM_THREADS: 4 OMP_NUM_THREADS: 4
MKL_NUM_THREADS: 4 MKL_NUM_THREADS: 4
PYTEST_TIMEOUT: 60 PYTEST_TIMEOUT: 60
@@ -38,7 +38,7 @@ jobs:
python-version: "3.8" python-version: "3.8"
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install --upgrade pip python -m pip install --upgrade pip
pip install .[quality] pip install .[quality]
- name: Check quality - name: Check quality
run: make quality run: make quality
@@ -58,7 +58,7 @@ jobs:
python-version: "3.8" python-version: "3.8"
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install --upgrade pip python -m pip install --upgrade pip
pip install .[quality] pip install .[quality]
- name: Check repo consistency - name: Check repo consistency
run: | run: |
@@ -87,6 +87,11 @@ jobs:
runner: aws-general-8-plus runner: aws-general-8-plus
image: diffusers/diffusers-pytorch-cpu image: diffusers/diffusers-pytorch-cpu
report: torch_cpu_models_schedulers report: torch_cpu_models_schedulers
- name: Fast Flax CPU tests
framework: flax
runner: aws-general-8-plus
image: diffusers/diffusers-flax-cpu
report: flax_cpu
- name: PyTorch Example CPU tests - name: PyTorch Example CPU tests
framework: pytorch_examples framework: pytorch_examples
runner: aws-general-8-plus runner: aws-general-8-plus
@@ -114,36 +119,49 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git python -m uv pip install -e [quality,test]
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 pip uninstall transformers -y && python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git --no-deps
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run fast PyTorch Pipeline CPU tests - name: Run fast PyTorch Pipeline CPU tests
if: ${{ matrix.config.framework == 'pytorch_pipelines' }} if: ${{ matrix.config.framework == 'pytorch_pipelines' }}
run: | run: |
pytest -n 8 --max-worker-restart=0 --dist=loadfile \ python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-k "not Flax and not Onnx" \ python -m pytest -n 8 --max-worker-restart=0 --dist=loadfile \
-s -v -k "not Flax and not Onnx" \
--make-reports=tests_${{ matrix.config.report }} \ --make-reports=tests_${{ matrix.config.report }} \
tests/pipelines tests/pipelines
- name: Run fast PyTorch Model Scheduler CPU tests - name: Run fast PyTorch Model Scheduler CPU tests
if: ${{ matrix.config.framework == 'pytorch_models' }} if: ${{ matrix.config.framework == 'pytorch_models' }}
run: | run: |
pytest -n 4 --max-worker-restart=0 --dist=loadfile \ python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-k "not Flax and not Onnx and not Dependency" \ python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
-s -v -k "not Flax and not Onnx and not Dependency" \
--make-reports=tests_${{ matrix.config.report }} \ --make-reports=tests_${{ matrix.config.report }} \
tests/models tests/schedulers tests/others tests/models tests/schedulers tests/others
- name: Run fast Flax TPU tests
if: ${{ matrix.config.framework == 'flax' }}
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
-s -v -k "Flax" \
--make-reports=tests_${{ matrix.config.report }} \
tests
- name: Run example PyTorch CPU tests - name: Run example PyTorch CPU tests
if: ${{ matrix.config.framework == 'pytorch_examples' }} if: ${{ matrix.config.framework == 'pytorch_examples' }}
run: | run: |
uv pip install ".[training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest -n 4 --max-worker-restart=0 --dist=loadfile \ python -m uv pip install peft timm
python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
--make-reports=tests_${{ matrix.config.report }} \ --make-reports=tests_${{ matrix.config.report }} \
examples examples
@@ -191,16 +209,19 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run Hub tests for models, schedulers, and pipelines on a staging env - name: Run Hub tests for models, schedulers, and pipelines on a staging env
if: ${{ matrix.config.framework == 'hub_tests_pytorch' }} if: ${{ matrix.config.framework == 'hub_tests_pytorch' }}
run: | run: |
HUGGINGFACE_CO_STAGING=true pytest \ python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
HUGGINGFACE_CO_STAGING=true python -m pytest \
-m "is_staging_test" \ -m "is_staging_test" \
--make-reports=tests_${{ matrix.config.report }} \ --make-reports=tests_${{ matrix.config.report }} \
tests tests
@@ -242,26 +263,28 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
# TODO (sayakpaul, DN6): revisit `--no-deps` # TODO (sayakpaul, DN6): revisit `--no-deps`
uv pip install -U peft@git+https://github.com/huggingface/peft.git --no-deps python -m pip install -U peft@git+https://github.com/huggingface/peft.git --no-deps
uv pip install -U tokenizers python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git --no-deps
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps python -m uv pip install -U tokenizers
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run fast PyTorch LoRA tests with PEFT - name: Run fast PyTorch LoRA tests with PEFT
run: | run: |
pytest -n 4 --max-worker-restart=0 --dist=loadfile \ python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
\ python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
-s -v \
--make-reports=tests_peft_main \ --make-reports=tests_peft_main \
tests/lora/ tests/lora/
pytest -n 4 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
\ -s -v \
--make-reports=tests_models_lora_peft_main \ --make-reports=tests_models_lora_peft_main \
tests/models/ -k "lora" tests/models/ -k "lora"

View File

@@ -1,4 +1,4 @@
name: Fast GPU Tests on PR name: Fast GPU Tests on PR
on: on:
pull_request: pull_request:
@@ -13,7 +13,6 @@ on:
- "src/diffusers/loaders/peft.py" - "src/diffusers/loaders/peft.py"
- "tests/pipelines/test_pipelines_common.py" - "tests/pipelines/test_pipelines_common.py"
- "tests/models/test_modeling_common.py" - "tests/models/test_modeling_common.py"
- "examples/**/*.py"
workflow_dispatch: workflow_dispatch:
concurrency: concurrency:
@@ -24,7 +23,7 @@ env:
DIFFUSERS_IS_CI: yes DIFFUSERS_IS_CI: yes
OMP_NUM_THREADS: 8 OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8 MKL_NUM_THREADS: 8
HF_XET_HIGH_PERFORMANCE: 1 HF_HUB_ENABLE_HF_TRANSFER: 1
PYTEST_TIMEOUT: 600 PYTEST_TIMEOUT: 600
PIPELINE_USAGE_CUTOFF: 1000000000 # set high cutoff so that only always-test pipelines run PIPELINE_USAGE_CUTOFF: 1000000000 # set high cutoff so that only always-test pipelines run
@@ -39,7 +38,7 @@ jobs:
python-version: "3.8" python-version: "3.8"
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install --upgrade pip python -m pip install --upgrade pip
pip install .[quality] pip install .[quality]
- name: Check quality - name: Check quality
run: make quality run: make quality
@@ -59,7 +58,7 @@ jobs:
python-version: "3.8" python-version: "3.8"
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install --upgrade pip python -m pip install --upgrade pip
pip install .[quality] pip install .[quality]
- name: Check repo consistency - name: Check repo consistency
run: | run: |
@@ -71,7 +70,7 @@ jobs:
if: ${{ failure() }} if: ${{ failure() }}
run: | run: |
echo "Repo consistency check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make fix-copies'" >> $GITHUB_STEP_SUMMARY echo "Repo consistency check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make fix-copies'" >> $GITHUB_STEP_SUMMARY
setup_torch_cuda_pipeline_matrix: setup_torch_cuda_pipeline_matrix:
needs: [check_code_quality, check_repository_consistency] needs: [check_code_quality, check_repository_consistency]
name: Setup Torch Pipelines CUDA Slow Tests Matrix name: Setup Torch Pipelines CUDA Slow Tests Matrix
@@ -88,7 +87,8 @@ jobs:
fetch-depth: 2 fetch-depth: 2
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -117,7 +117,7 @@ jobs:
group: aws-g4dn-2xlarge group: aws-g4dn-2xlarge
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all options: --shm-size "16gb" --ipc host --gpus 0
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
uses: actions/checkout@v3 uses: actions/checkout@v3
@@ -129,10 +129,10 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install -e [quality,test]
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 pip uninstall transformers -y && python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git --no-deps
- name: Environment - name: Environment
run: | run: |
@@ -150,18 +150,18 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
if [ "${{ matrix.module }}" = "ip_adapters" ]; then if [ "${{ matrix.module }}" = "ip_adapters" ]; then
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \ --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
tests/pipelines/${{ matrix.module }} tests/pipelines/${{ matrix.module }}
else else
pattern=$(cat ${{ steps.extract_tests.outputs.pattern_file }}) pattern=$(cat ${{ steps.extract_tests.outputs.pattern_file }})
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx and $pattern" \ -s -v -k "not Flax and not Onnx and $pattern" \
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \ --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
tests/pipelines/${{ matrix.module }} tests/pipelines/${{ matrix.module }}
fi fi
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}
@@ -182,13 +182,13 @@ jobs:
group: aws-g4dn-2xlarge group: aws-g4dn-2xlarge
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all options: --shm-size "16gb" --ipc host --gpus 0
defaults: defaults:
run: run:
shell: bash shell: bash
strategy: strategy:
fail-fast: false fail-fast: false
max-parallel: 4 max-parallel: 2
matrix: matrix:
module: [models, schedulers, lora, others] module: [models, schedulers, lora, others]
steps: steps:
@@ -199,11 +199,11 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install peft@git+https://github.com/huggingface/peft.git python -m uv pip install -e [quality,test]
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install peft@git+https://github.com/huggingface/peft.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 pip uninstall transformers -y && python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git --no-deps
- name: Environment - name: Environment
run: | run: |
@@ -224,11 +224,11 @@ jobs:
run: | run: |
pattern=$(cat ${{ steps.extract_tests.outputs.pattern_file }}) pattern=$(cat ${{ steps.extract_tests.outputs.pattern_file }})
if [ -z "$pattern" ]; then if [ -z "$pattern" ]; then
pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "not Flax and not Onnx" tests/${{ matrix.module }} \ python -m pytest -n 1 -sv --max-worker-restart=0 --dist=loadfile -k "not Flax and not Onnx" tests/${{ matrix.module }} \
--make-reports=tests_torch_cuda_${{ matrix.module }} --make-reports=tests_torch_cuda_${{ matrix.module }}
else else
pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "not Flax and not Onnx and $pattern" tests/${{ matrix.module }} \ python -m pytest -n 1 -sv --max-worker-restart=0 --dist=loadfile -k "not Flax and not Onnx and $pattern" tests/${{ matrix.module }} \
--make-reports=tests_torch_cuda_${{ matrix.module }} --make-reports=tests_torch_cuda_${{ matrix.module }}
fi fi
- name: Failure short reports - name: Failure short reports
@@ -252,7 +252,7 @@ jobs:
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --gpus all --shm-size "16gb" --ipc host options: --gpus 0 --shm-size "16gb" --ipc host
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
uses: actions/checkout@v3 uses: actions/checkout@v3
@@ -264,20 +264,22 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 pip uninstall transformers -y && python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git --no-deps
uv pip install -e ".[quality,training]" python -m uv pip install -e [quality,test,training]
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run example tests on GPU - name: Run example tests on GPU
env: env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: | run: |
uv pip install ".[training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest -n 1 --max-worker-restart=0 --dist=loadfile --make-reports=examples_torch_cuda examples/ python -m uv pip install timm
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_cuda examples/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}

View File

@@ -25,8 +25,12 @@ jobs:
python-version: "3.8" python-version: "3.8"
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install -e . python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pip install torch torchvision torchaudio pytest python -m pip install --upgrade pip uv
python -m uv pip install -e .
python -m uv pip install torch torchvision torchaudio
python -m uv pip install pytest
- name: Check for soft dependencies - name: Check for soft dependencies
run: | run: |
pytest tests/others/test_dependencies.py python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest tests/others/test_dependencies.py

View File

@@ -14,7 +14,7 @@ env:
DIFFUSERS_IS_CI: yes DIFFUSERS_IS_CI: yes
OMP_NUM_THREADS: 8 OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8 MKL_NUM_THREADS: 8
HF_XET_HIGH_PERFORMANCE: 1 HF_HUB_ENABLE_HF_TRANSFER: 1
PYTEST_TIMEOUT: 600 PYTEST_TIMEOUT: 600
PIPELINE_USAGE_CUTOFF: 50000 PIPELINE_USAGE_CUTOFF: 50000
@@ -34,7 +34,8 @@ jobs:
fetch-depth: 2 fetch-depth: 2
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -63,7 +64,7 @@ jobs:
group: aws-g4dn-2xlarge group: aws-g4dn-2xlarge
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all options: --shm-size "16gb" --ipc host --gpus 0
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
uses: actions/checkout@v3 uses: actions/checkout@v3
@@ -74,10 +75,9 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install -e [quality,test]
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -87,8 +87,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \ --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
tests/pipelines/${{ matrix.module }} tests/pipelines/${{ matrix.module }}
- name: Failure short reports - name: Failure short reports
@@ -109,7 +109,7 @@ jobs:
group: aws-g4dn-2xlarge group: aws-g4dn-2xlarge
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all options: --shm-size "16gb" --ipc host --gpus 0
defaults: defaults:
run: run:
shell: bash shell: bash
@@ -126,11 +126,10 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install peft@git+https://github.com/huggingface/peft.git python -m uv pip install -e [quality,test]
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install peft@git+https://github.com/huggingface/peft.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment - name: Environment
run: | run: |
@@ -142,8 +141,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_torch_cuda_${{ matrix.module }} \ --make-reports=tests_torch_cuda_${{ matrix.module }} \
tests/${{ matrix.module }} tests/${{ matrix.module }}
@@ -160,6 +159,102 @@ jobs:
name: torch_cuda_test_reports_${{ matrix.module }} name: torch_cuda_test_reports_${{ matrix.module }}
path: reports path: reports
flax_tpu_tests:
name: Flax TPU Tests
runs-on:
group: gcp-ct5lp-hightpu-8t
container:
image: diffusers/diffusers-flax-tpu
options: --shm-size "16gb" --ipc host --privileged ${{ vars.V5_LITEPOD_8_ENV}} -v /mnt/hf_cache:/mnt/hf_cache
defaults:
run:
shell: bash
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Install dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
- name: Environment
run: |
python utils/print_env.py
- name: Run Flax TPU tests
env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: |
python -m pytest -n 0 \
-s -v -k "Flax" \
--make-reports=tests_flax_tpu \
tests/
- name: Failure short reports
if: ${{ failure() }}
run: |
cat reports/tests_flax_tpu_stats.txt
cat reports/tests_flax_tpu_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: flax_tpu_test_reports
path: reports
onnx_cuda_tests:
name: ONNX CUDA Tests
runs-on:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-onnxruntime-cuda
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ --gpus 0
defaults:
run:
shell: bash
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Install dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
- name: Environment
run: |
python utils/print_env.py
- name: Run ONNXRuntime CUDA tests
env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: |
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-s -v -k "Onnx" \
--make-reports=tests_onnx_cuda \
tests/
- name: Failure short reports
if: ${{ failure() }}
run: |
cat reports/tests_onnx_cuda_stats.txt
cat reports/tests_onnx_cuda_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: onnx_cuda_test_reports
path: reports
run_torch_compile_tests: run_torch_compile_tests:
name: PyTorch Compile CUDA tests name: PyTorch Compile CUDA tests
@@ -168,7 +263,7 @@ jobs:
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --gpus all --shm-size "16gb" --ipc host options: --gpus 0 --shm-size "16gb" --ipc host
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
@@ -181,9 +276,8 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality,training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git python -m uv pip install -e [quality,test,training]
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -192,7 +286,7 @@ jobs:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
RUN_COMPILE: yes RUN_COMPILE: yes
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "compile" --make-reports=tests_torch_compile_cuda tests/ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "compile" --make-reports=tests_torch_compile_cuda tests/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}
run: cat reports/tests_torch_compile_cuda_failures_short.txt run: cat reports/tests_torch_compile_cuda_failures_short.txt
@@ -212,7 +306,7 @@ jobs:
container: container:
image: diffusers/diffusers-pytorch-xformers-cuda image: diffusers/diffusers-pytorch-xformers-cuda
options: --gpus all --shm-size "16gb" --ipc host options: --gpus 0 --shm-size "16gb" --ipc host
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
@@ -225,7 +319,8 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality,training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test,training]
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -233,7 +328,7 @@ jobs:
env: env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "xformers" --make-reports=tests_torch_xformers_cuda tests/ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "xformers" --make-reports=tests_torch_xformers_cuda tests/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}
run: cat reports/tests_torch_xformers_cuda_failures_short.txt run: cat reports/tests_torch_xformers_cuda_failures_short.txt
@@ -253,7 +348,7 @@ jobs:
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --gpus all --shm-size "16gb" --ipc host options: --gpus 0 --shm-size "16gb" --ipc host
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
uses: actions/checkout@v3 uses: actions/checkout@v3
@@ -265,18 +360,21 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality,training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test,training]
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run example tests on GPU - name: Run example tests on GPU
env: env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: | run: |
uv pip install ".[training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest -n 1 --max-worker-restart=0 --dist=loadfile --make-reports=examples_torch_cuda examples/ python -m uv pip install timm
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_cuda examples/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}

View File

@@ -18,7 +18,7 @@ env:
HF_HOME: /mnt/cache HF_HOME: /mnt/cache
OMP_NUM_THREADS: 8 OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8 MKL_NUM_THREADS: 8
HF_XET_HIGH_PERFORMANCE: 1 HF_HUB_ENABLE_HF_TRANSFER: 1
PYTEST_TIMEOUT: 600 PYTEST_TIMEOUT: 600
RUN_SLOW: no RUN_SLOW: no
@@ -33,6 +33,16 @@ jobs:
runner: aws-general-8-plus runner: aws-general-8-plus
image: diffusers/diffusers-pytorch-cpu image: diffusers/diffusers-pytorch-cpu
report: torch_cpu report: torch_cpu
- name: Fast Flax CPU tests on Ubuntu
framework: flax
runner: aws-general-8-plus
image: diffusers/diffusers-flax-cpu
report: flax_cpu
- name: Fast ONNXRuntime CPU tests on Ubuntu
framework: onnxruntime
runner: aws-general-8-plus
image: diffusers/diffusers-onnxruntime-cpu
report: onnx_cpu
- name: PyTorch Example CPU tests on Ubuntu - name: PyTorch Example CPU tests on Ubuntu
framework: pytorch_examples framework: pytorch_examples
runner: aws-general-8-plus runner: aws-general-8-plus
@@ -60,25 +70,47 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run fast PyTorch CPU tests - name: Run fast PyTorch CPU tests
if: ${{ matrix.config.framework == 'pytorch' }} if: ${{ matrix.config.framework == 'pytorch' }}
run: | run: |
pytest -n 4 --max-worker-restart=0 --dist=loadfile \ python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-k "not Flax and not Onnx" \ python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
-s -v -k "not Flax and not Onnx" \
--make-reports=tests_${{ matrix.config.report }} \
tests/
- name: Run fast Flax TPU tests
if: ${{ matrix.config.framework == 'flax' }}
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
-s -v -k "Flax" \
--make-reports=tests_${{ matrix.config.report }} \
tests/
- name: Run fast ONNXRuntime CPU tests
if: ${{ matrix.config.framework == 'onnxruntime' }}
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
-s -v -k "Onnx" \
--make-reports=tests_${{ matrix.config.report }} \ --make-reports=tests_${{ matrix.config.report }} \
tests/ tests/
- name: Run example PyTorch CPU tests - name: Run example PyTorch CPU tests
if: ${{ matrix.config.framework == 'pytorch_examples' }} if: ${{ matrix.config.framework == 'pytorch_examples' }}
run: | run: |
uv pip install ".[training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest -n 4 --max-worker-restart=0 --dist=loadfile \ python -m uv pip install peft timm
python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
--make-reports=tests_${{ matrix.config.report }} \ --make-reports=tests_${{ matrix.config.report }} \
examples examples

View File

@@ -1,14 +1,19 @@
name: Fast mps tests on main name: Fast mps tests on main
on: on:
workflow_dispatch: push:
branches:
- main
paths:
- "src/diffusers/**.py"
- "tests/**.py"
env: env:
DIFFUSERS_IS_CI: yes DIFFUSERS_IS_CI: yes
HF_HOME: /mnt/cache HF_HOME: /mnt/cache
OMP_NUM_THREADS: 8 OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8 MKL_NUM_THREADS: 8
HF_XET_HIGH_PERFORMANCE: 1 HF_HUB_ENABLE_HF_TRANSFER: 1
PYTEST_TIMEOUT: 600 PYTEST_TIMEOUT: 600
RUN_SLOW: no RUN_SLOW: no
@@ -57,7 +62,7 @@ jobs:
HF_HOME: /System/Volumes/Data/mnt/cache HF_HOME: /System/Volumes/Data/mnt/cache
HF_TOKEN: ${{ secrets.HF_TOKEN }} HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: | run: |
${CONDA_RUN} python -m pytest -n 0 --make-reports=tests_torch_mps tests/ ${CONDA_RUN} python -m pytest -n 0 -s -v --make-reports=tests_torch_mps tests/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}

View File

@@ -32,7 +32,8 @@ jobs:
fetch-depth: 2 fetch-depth: 2
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -61,7 +62,7 @@ jobs:
group: aws-g4dn-2xlarge group: aws-g4dn-2xlarge
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all options: --shm-size "16gb" --ipc host --gpus 0
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
uses: actions/checkout@v3 uses: actions/checkout@v3
@@ -72,8 +73,9 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install -e [quality,test]
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -83,8 +85,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \ --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
tests/pipelines/${{ matrix.module }} tests/pipelines/${{ matrix.module }}
- name: Failure short reports - name: Failure short reports
@@ -105,7 +107,7 @@ jobs:
group: aws-g4dn-2xlarge group: aws-g4dn-2xlarge
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus all options: --shm-size "16gb" --ipc host --gpus 0
defaults: defaults:
run: run:
shell: bash shell: bash
@@ -122,9 +124,10 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install peft@git+https://github.com/huggingface/peft.git python -m uv pip install -e [quality,test]
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install peft@git+https://github.com/huggingface/peft.git
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
- name: Environment - name: Environment
run: | run: |
@@ -136,8 +139,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_torch_${{ matrix.module }}_cuda \ --make-reports=tests_torch_${{ matrix.module }}_cuda \
tests/${{ matrix.module }} tests/${{ matrix.module }}
@@ -160,7 +163,7 @@ jobs:
group: aws-g4dn-2xlarge group: aws-g4dn-2xlarge
container: container:
image: diffusers/diffusers-pytorch-minimum-cuda image: diffusers/diffusers-pytorch-minimum-cuda
options: --shm-size "16gb" --ipc host --gpus all options: --shm-size "16gb" --ipc host --gpus 0
defaults: defaults:
run: run:
shell: bash shell: bash
@@ -172,9 +175,10 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install peft@git+https://github.com/huggingface/peft.git python -m uv pip install -e [quality,test]
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install peft@git+https://github.com/huggingface/peft.git
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
- name: Environment - name: Environment
run: | run: |
@@ -186,8 +190,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_torch_minimum_cuda \ --make-reports=tests_torch_minimum_cuda \
tests/models/test_modeling_common.py \ tests/models/test_modeling_common.py \
tests/pipelines/test_pipelines_common.py \ tests/pipelines/test_pipelines_common.py \
@@ -209,6 +213,101 @@ jobs:
with: with:
name: torch_minimum_version_cuda_test_reports name: torch_minimum_version_cuda_test_reports
path: reports path: reports
flax_tpu_tests:
name: Flax TPU Tests
runs-on: docker-tpu
container:
image: diffusers/diffusers-flax-tpu
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ --privileged
defaults:
run:
shell: bash
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Install dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
- name: Environment
run: |
python utils/print_env.py
- name: Run slow Flax TPU tests
env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: |
python -m pytest -n 0 \
-s -v -k "Flax" \
--make-reports=tests_flax_tpu \
tests/
- name: Failure short reports
if: ${{ failure() }}
run: |
cat reports/tests_flax_tpu_stats.txt
cat reports/tests_flax_tpu_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: flax_tpu_test_reports
path: reports
onnx_cuda_tests:
name: ONNX CUDA Tests
runs-on:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-onnxruntime-cuda
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ --gpus 0
defaults:
run:
shell: bash
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Install dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
- name: Environment
run: |
python utils/print_env.py
- name: Run slow ONNXRuntime CUDA tests
env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: |
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-s -v -k "Onnx" \
--make-reports=tests_onnx_cuda \
tests/
- name: Failure short reports
if: ${{ failure() }}
run: |
cat reports/tests_onnx_cuda_stats.txt
cat reports/tests_onnx_cuda_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: onnx_cuda_test_reports
path: reports
run_torch_compile_tests: run_torch_compile_tests:
name: PyTorch Compile CUDA tests name: PyTorch Compile CUDA tests
@@ -218,7 +317,7 @@ jobs:
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --gpus all --shm-size "16gb" --ipc host options: --gpus 0 --shm-size "16gb" --ipc host
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
@@ -231,7 +330,8 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality,training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test,training]
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -240,7 +340,7 @@ jobs:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
RUN_COMPILE: yes RUN_COMPILE: yes
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "compile" --make-reports=tests_torch_compile_cuda tests/ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "compile" --make-reports=tests_torch_compile_cuda tests/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}
run: cat reports/tests_torch_compile_cuda_failures_short.txt run: cat reports/tests_torch_compile_cuda_failures_short.txt
@@ -260,7 +360,7 @@ jobs:
container: container:
image: diffusers/diffusers-pytorch-xformers-cuda image: diffusers/diffusers-pytorch-xformers-cuda
options: --gpus all --shm-size "16gb" --ipc host options: --gpus 0 --shm-size "16gb" --ipc host
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
@@ -273,7 +373,8 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality,training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test,training]
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -281,7 +382,7 @@ jobs:
env: env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "xformers" --make-reports=tests_torch_xformers_cuda tests/ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "xformers" --make-reports=tests_torch_xformers_cuda tests/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}
run: cat reports/tests_torch_xformers_cuda_failures_short.txt run: cat reports/tests_torch_xformers_cuda_failures_short.txt
@@ -301,7 +402,7 @@ jobs:
container: container:
image: diffusers/diffusers-pytorch-cuda image: diffusers/diffusers-pytorch-cuda
options: --gpus all --shm-size "16gb" --ipc host options: --gpus 0 --shm-size "16gb" --ipc host
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
@@ -315,18 +416,21 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality,training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test,training]
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run example tests on GPU - name: Run example tests on GPU
env: env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: | run: |
uv pip install ".[training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest -n 1 --max-worker-restart=0 --dist=loadfile --make-reports=examples_torch_cuda examples/ python -m uv pip install timm
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_cuda examples/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}

View File

@@ -30,7 +30,7 @@ jobs:
group: aws-g4dn-2xlarge group: aws-g4dn-2xlarge
container: container:
image: ${{ github.event.inputs.docker_image }} image: ${{ github.event.inputs.docker_image }}
options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ options: --gpus 0 --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps: steps:
- name: Validate test files input - name: Validate test files input
@@ -63,8 +63,9 @@ jobs:
- name: Install pytest - name: Install pytest
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install peft python -m uv pip install -e [quality,test]
python -m uv pip install peft
- name: Run tests - name: Run tests
env: env:

View File

@@ -31,7 +31,7 @@ jobs:
group: "${{ github.event.inputs.runner_type }}" group: "${{ github.event.inputs.runner_type }}"
container: container:
image: ${{ github.event.inputs.docker_image }} image: ${{ github.event.inputs.docker_image }}
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus all --privileged options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0 --privileged
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers

3
.gitignore vendored
View File

@@ -125,9 +125,6 @@ dmypy.json
.vs .vs
.vscode .vscode
# Cursor
.cursor
# Pycharm # Pycharm
.idea .idea

View File

@@ -37,7 +37,7 @@ limitations under the License.
## Installation ## Installation
We recommend installing 🤗 Diffusers in a virtual environment from PyPI or Conda. For more details about installing [PyTorch](https://pytorch.org/get-started/locally/), please refer to their official documentation. We recommend installing 🤗 Diffusers in a virtual environment from PyPI or Conda. For more details about installing [PyTorch](https://pytorch.org/get-started/locally/) and [Flax](https://flax.readthedocs.io/en/latest/#installation), please refer to their official documentation.
### PyTorch ### PyTorch
@@ -53,6 +53,14 @@ With `conda` (maintained by the community):
conda install -c conda-forge diffusers conda install -c conda-forge diffusers
``` ```
### Flax
With `pip` (official package):
```bash
pip install --upgrade diffusers[flax]
```
### Apple Silicon (M1/M2) support ### Apple Silicon (M1/M2) support
Please refer to the [How to use Stable Diffusion in Apple Silicon](https://huggingface.co/docs/diffusers/optimization/mps) guide. Please refer to the [How to use Stable Diffusion in Apple Silicon](https://huggingface.co/docs/diffusers/optimization/mps) guide.
@@ -171,7 +179,7 @@ Also, say 👋 in our public Discord channel <a href="https://discord.gg/G7tWnz9
<tr style="border-top: 2px solid black"> <tr style="border-top: 2px solid black">
<td>Text-guided Image Inpainting</td> <td>Text-guided Image Inpainting</td>
<td><a href="https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/inpaint">Stable Diffusion Inpainting</a></td> <td><a href="https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/inpaint">Stable Diffusion Inpainting</a></td>
<td><a href="https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-inpainting"> stable-diffusion-v1-5/stable-diffusion-inpainting </a></td> <td><a href="https://huggingface.co/runwayml/stable-diffusion-inpainting"> runwayml/stable-diffusion-inpainting </a></td>
</tr> </tr>
<tr style="border-top: 2px solid black"> <tr style="border-top: 2px solid black">
<td>Image Variation</td> <td>Image Variation</td>

View File

@@ -1,69 +0,0 @@
# Diffusers Benchmarks
Welcome to Diffusers Benchmarks. These benchmarks are use to obtain latency and memory information of the most popular models across different scenarios such as:
* Base case i.e., when using `torch.bfloat16` and `torch.nn.functional.scaled_dot_product_attention`.
* Base + `torch.compile()`
* NF4 quantization
* Layerwise upcasting
Instead of full diffusion pipelines, only the forward pass of the respective model classes (such as `FluxTransformer2DModel`) is tested with the real checkpoints (such as `"black-forest-labs/FLUX.1-dev"`).
The entrypoint to running all the currently available benchmarks is in `run_all.py`. However, one can run the individual benchmarks, too, e.g., `python benchmarking_flux.py`. It should produce a CSV file containing various information about the benchmarks run.
The benchmarks are run on a weekly basis and the CI is defined in [benchmark.yml](../.github/workflows/benchmark.yml).
## Running the benchmarks manually
First set up `torch` and install `diffusers` from the root of the directory:
```py
pip install -e ".[quality,test]"
```
Then make sure the other dependencies are installed:
```sh
cd benchmarks/
pip install -r requirements.txt
```
We need to be authenticated to access some of the checkpoints used during benchmarking:
```sh
hf auth login
```
We use an L40 GPU with 128GB RAM to run the benchmark CI. As such, the benchmarks are configured to run on NVIDIA GPUs. So, make sure you have access to a similar machine (or modify the benchmarking scripts accordingly).
Then you can either launch the entire benchmarking suite by running:
```sh
python run_all.py
```
Or, you can run the individual benchmarks.
## Customizing the benchmarks
We define "scenarios" to cover the most common ways in which these models are used. You can
define a new scenario, modifying an existing benchmark file:
```py
BenchmarkScenario(
name=f"{CKPT_ID}-bnb-8bit",
model_cls=FluxTransformer2DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
"quantization_config": BitsAndBytesConfig(load_in_8bit=True),
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=model_init_fn,
)
```
You can also configure a new model-level benchmark and add it to the existing suite. To do so, just defining a valid benchmarking file like `benchmarking_flux.py` should be enough.
Happy benchmarking 🧨

346
benchmarks/base_classes.py Normal file
View File

@@ -0,0 +1,346 @@
import os
import sys
import torch
from diffusers import (
AutoPipelineForImage2Image,
AutoPipelineForInpainting,
AutoPipelineForText2Image,
ControlNetModel,
LCMScheduler,
StableDiffusionAdapterPipeline,
StableDiffusionControlNetPipeline,
StableDiffusionXLAdapterPipeline,
StableDiffusionXLControlNetPipeline,
T2IAdapter,
WuerstchenCombinedPipeline,
)
from diffusers.utils import load_image
sys.path.append(".")
from utils import ( # noqa: E402
BASE_PATH,
PROMPT,
BenchmarkInfo,
benchmark_fn,
bytes_to_giga_bytes,
flush,
generate_csv_dict,
write_to_csv,
)
RESOLUTION_MAPPING = {
"Lykon/DreamShaper": (512, 512),
"lllyasviel/sd-controlnet-canny": (512, 512),
"diffusers/controlnet-canny-sdxl-1.0": (1024, 1024),
"TencentARC/t2iadapter_canny_sd14v1": (512, 512),
"TencentARC/t2i-adapter-canny-sdxl-1.0": (1024, 1024),
"stabilityai/stable-diffusion-2-1": (768, 768),
"stabilityai/stable-diffusion-xl-base-1.0": (1024, 1024),
"stabilityai/stable-diffusion-xl-refiner-1.0": (1024, 1024),
"stabilityai/sdxl-turbo": (512, 512),
}
class BaseBenchmak:
pipeline_class = None
def __init__(self, args):
super().__init__()
def run_inference(self, args):
raise NotImplementedError
def benchmark(self, args):
raise NotImplementedError
def get_result_filepath(self, args):
pipeline_class_name = str(self.pipe.__class__.__name__)
name = (
args.ckpt.replace("/", "_")
+ "_"
+ pipeline_class_name
+ f"-bs@{args.batch_size}-steps@{args.num_inference_steps}-mco@{args.model_cpu_offload}-compile@{args.run_compile}.csv"
)
filepath = os.path.join(BASE_PATH, name)
return filepath
class TextToImageBenchmark(BaseBenchmak):
pipeline_class = AutoPipelineForText2Image
def __init__(self, args):
pipe = self.pipeline_class.from_pretrained(args.ckpt, torch_dtype=torch.float16)
pipe = pipe.to("cuda")
if args.run_compile:
if not isinstance(pipe, WuerstchenCombinedPipeline):
pipe.unet.to(memory_format=torch.channels_last)
print("Run torch compile")
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
if hasattr(pipe, "movq") and getattr(pipe, "movq", None) is not None:
pipe.movq.to(memory_format=torch.channels_last)
pipe.movq = torch.compile(pipe.movq, mode="reduce-overhead", fullgraph=True)
else:
print("Run torch compile")
pipe.decoder = torch.compile(pipe.decoder, mode="reduce-overhead", fullgraph=True)
pipe.vqgan = torch.compile(pipe.vqgan, mode="reduce-overhead", fullgraph=True)
pipe.set_progress_bar_config(disable=True)
self.pipe = pipe
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
)
def benchmark(self, args):
flush()
print(f"[INFO] {self.pipe.__class__.__name__}: Running benchmark with: {vars(args)}\n")
time = benchmark_fn(self.run_inference, self.pipe, args) # in seconds.
memory = bytes_to_giga_bytes(torch.cuda.max_memory_allocated()) # in GBs.
benchmark_info = BenchmarkInfo(time=time, memory=memory)
pipeline_class_name = str(self.pipe.__class__.__name__)
flush()
csv_dict = generate_csv_dict(
pipeline_cls=pipeline_class_name, ckpt=args.ckpt, args=args, benchmark_info=benchmark_info
)
filepath = self.get_result_filepath(args)
write_to_csv(filepath, csv_dict)
print(f"Logs written to: {filepath}")
flush()
class TurboTextToImageBenchmark(TextToImageBenchmark):
def __init__(self, args):
super().__init__(args)
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
guidance_scale=0.0,
)
class LCMLoRATextToImageBenchmark(TextToImageBenchmark):
lora_id = "latent-consistency/lcm-lora-sdxl"
def __init__(self, args):
super().__init__(args)
self.pipe.load_lora_weights(self.lora_id)
self.pipe.fuse_lora()
self.pipe.unload_lora_weights()
self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
def get_result_filepath(self, args):
pipeline_class_name = str(self.pipe.__class__.__name__)
name = (
self.lora_id.replace("/", "_")
+ "_"
+ pipeline_class_name
+ f"-bs@{args.batch_size}-steps@{args.num_inference_steps}-mco@{args.model_cpu_offload}-compile@{args.run_compile}.csv"
)
filepath = os.path.join(BASE_PATH, name)
return filepath
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
guidance_scale=1.0,
)
def benchmark(self, args):
flush()
print(f"[INFO] {self.pipe.__class__.__name__}: Running benchmark with: {vars(args)}\n")
time = benchmark_fn(self.run_inference, self.pipe, args) # in seconds.
memory = bytes_to_giga_bytes(torch.cuda.max_memory_allocated()) # in GBs.
benchmark_info = BenchmarkInfo(time=time, memory=memory)
pipeline_class_name = str(self.pipe.__class__.__name__)
flush()
csv_dict = generate_csv_dict(
pipeline_cls=pipeline_class_name, ckpt=self.lora_id, args=args, benchmark_info=benchmark_info
)
filepath = self.get_result_filepath(args)
write_to_csv(filepath, csv_dict)
print(f"Logs written to: {filepath}")
flush()
class ImageToImageBenchmark(TextToImageBenchmark):
pipeline_class = AutoPipelineForImage2Image
url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/1665_Girl_with_a_Pearl_Earring.jpg"
image = load_image(url).convert("RGB")
def __init__(self, args):
super().__init__(args)
self.image = self.image.resize(RESOLUTION_MAPPING[args.ckpt])
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
image=self.image,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
)
class TurboImageToImageBenchmark(ImageToImageBenchmark):
def __init__(self, args):
super().__init__(args)
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
image=self.image,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
guidance_scale=0.0,
strength=0.5,
)
class InpaintingBenchmark(ImageToImageBenchmark):
pipeline_class = AutoPipelineForInpainting
mask_url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/overture-creations-5sI6fQgYIuo_mask.png"
mask = load_image(mask_url).convert("RGB")
def __init__(self, args):
super().__init__(args)
self.image = self.image.resize(RESOLUTION_MAPPING[args.ckpt])
self.mask = self.mask.resize(RESOLUTION_MAPPING[args.ckpt])
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
image=self.image,
mask_image=self.mask,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
)
class IPAdapterTextToImageBenchmark(TextToImageBenchmark):
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/load_neg_embed.png"
image = load_image(url)
def __init__(self, args):
pipe = self.pipeline_class.from_pretrained(args.ckpt, torch_dtype=torch.float16).to("cuda")
pipe.load_ip_adapter(
args.ip_adapter_id[0],
subfolder="models" if "sdxl" not in args.ip_adapter_id[1] else "sdxl_models",
weight_name=args.ip_adapter_id[1],
)
if args.run_compile:
pipe.unet.to(memory_format=torch.channels_last)
print("Run torch compile")
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe.set_progress_bar_config(disable=True)
self.pipe = pipe
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
ip_adapter_image=self.image,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
)
class ControlNetBenchmark(TextToImageBenchmark):
pipeline_class = StableDiffusionControlNetPipeline
aux_network_class = ControlNetModel
root_ckpt = "Lykon/DreamShaper"
url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/canny_image_condition.png"
image = load_image(url).convert("RGB")
def __init__(self, args):
aux_network = self.aux_network_class.from_pretrained(args.ckpt, torch_dtype=torch.float16)
pipe = self.pipeline_class.from_pretrained(self.root_ckpt, controlnet=aux_network, torch_dtype=torch.float16)
pipe = pipe.to("cuda")
pipe.set_progress_bar_config(disable=True)
self.pipe = pipe
if args.run_compile:
pipe.unet.to(memory_format=torch.channels_last)
pipe.controlnet.to(memory_format=torch.channels_last)
print("Run torch compile")
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe.controlnet = torch.compile(pipe.controlnet, mode="reduce-overhead", fullgraph=True)
self.image = self.image.resize(RESOLUTION_MAPPING[args.ckpt])
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
image=self.image,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
)
class ControlNetSDXLBenchmark(ControlNetBenchmark):
pipeline_class = StableDiffusionXLControlNetPipeline
root_ckpt = "stabilityai/stable-diffusion-xl-base-1.0"
def __init__(self, args):
super().__init__(args)
class T2IAdapterBenchmark(ControlNetBenchmark):
pipeline_class = StableDiffusionAdapterPipeline
aux_network_class = T2IAdapter
root_ckpt = "Lykon/DreamShaper"
url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/canny_for_adapter.png"
image = load_image(url).convert("L")
def __init__(self, args):
aux_network = self.aux_network_class.from_pretrained(args.ckpt, torch_dtype=torch.float16)
pipe = self.pipeline_class.from_pretrained(self.root_ckpt, adapter=aux_network, torch_dtype=torch.float16)
pipe = pipe.to("cuda")
pipe.set_progress_bar_config(disable=True)
self.pipe = pipe
if args.run_compile:
pipe.unet.to(memory_format=torch.channels_last)
pipe.adapter.to(memory_format=torch.channels_last)
print("Run torch compile")
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe.adapter = torch.compile(pipe.adapter, mode="reduce-overhead", fullgraph=True)
self.image = self.image.resize(RESOLUTION_MAPPING[args.ckpt])
class T2IAdapterSDXLBenchmark(T2IAdapterBenchmark):
pipeline_class = StableDiffusionXLAdapterPipeline
root_ckpt = "stabilityai/stable-diffusion-xl-base-1.0"
url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/canny_for_adapter_sdxl.png"
image = load_image(url)
def __init__(self, args):
super().__init__(args)

View File

@@ -0,0 +1,26 @@
import argparse
import sys
sys.path.append(".")
from base_classes import ControlNetBenchmark, ControlNetSDXLBenchmark # noqa: E402
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--ckpt",
type=str,
default="lllyasviel/sd-controlnet-canny",
choices=["lllyasviel/sd-controlnet-canny", "diffusers/controlnet-canny-sdxl-1.0"],
)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--num_inference_steps", type=int, default=50)
parser.add_argument("--model_cpu_offload", action="store_true")
parser.add_argument("--run_compile", action="store_true")
args = parser.parse_args()
benchmark_pipe = (
ControlNetBenchmark(args) if args.ckpt == "lllyasviel/sd-controlnet-canny" else ControlNetSDXLBenchmark(args)
)
benchmark_pipe.benchmark(args)

View File

@@ -0,0 +1,33 @@
import argparse
import sys
sys.path.append(".")
from base_classes import IPAdapterTextToImageBenchmark # noqa: E402
IP_ADAPTER_CKPTS = {
# because original SD v1.5 has been taken down.
"Lykon/DreamShaper": ("h94/IP-Adapter", "ip-adapter_sd15.bin"),
"stabilityai/stable-diffusion-xl-base-1.0": ("h94/IP-Adapter", "ip-adapter_sdxl.bin"),
}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--ckpt",
type=str,
default="rstabilityai/stable-diffusion-xl-base-1.0",
choices=list(IP_ADAPTER_CKPTS.keys()),
)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--num_inference_steps", type=int, default=50)
parser.add_argument("--model_cpu_offload", action="store_true")
parser.add_argument("--run_compile", action="store_true")
args = parser.parse_args()
args.ip_adapter_id = IP_ADAPTER_CKPTS[args.ckpt]
benchmark_pipe = IPAdapterTextToImageBenchmark(args)
args.ckpt = f"{args.ckpt} (IP-Adapter)"
benchmark_pipe.benchmark(args)

View File

@@ -0,0 +1,29 @@
import argparse
import sys
sys.path.append(".")
from base_classes import ImageToImageBenchmark, TurboImageToImageBenchmark # noqa: E402
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--ckpt",
type=str,
default="Lykon/DreamShaper",
choices=[
"Lykon/DreamShaper",
"stabilityai/stable-diffusion-2-1",
"stabilityai/stable-diffusion-xl-refiner-1.0",
"stabilityai/sdxl-turbo",
],
)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--num_inference_steps", type=int, default=50)
parser.add_argument("--model_cpu_offload", action="store_true")
parser.add_argument("--run_compile", action="store_true")
args = parser.parse_args()
benchmark_pipe = ImageToImageBenchmark(args) if "turbo" not in args.ckpt else TurboImageToImageBenchmark(args)
benchmark_pipe.benchmark(args)

View File

@@ -0,0 +1,28 @@
import argparse
import sys
sys.path.append(".")
from base_classes import InpaintingBenchmark # noqa: E402
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--ckpt",
type=str,
default="Lykon/DreamShaper",
choices=[
"Lykon/DreamShaper",
"stabilityai/stable-diffusion-2-1",
"stabilityai/stable-diffusion-xl-base-1.0",
],
)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--num_inference_steps", type=int, default=50)
parser.add_argument("--model_cpu_offload", action="store_true")
parser.add_argument("--run_compile", action="store_true")
args = parser.parse_args()
benchmark_pipe = InpaintingBenchmark(args)
benchmark_pipe.benchmark(args)

View File

@@ -0,0 +1,28 @@
import argparse
import sys
sys.path.append(".")
from base_classes import T2IAdapterBenchmark, T2IAdapterSDXLBenchmark # noqa: E402
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--ckpt",
type=str,
default="TencentARC/t2iadapter_canny_sd14v1",
choices=["TencentARC/t2iadapter_canny_sd14v1", "TencentARC/t2i-adapter-canny-sdxl-1.0"],
)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--num_inference_steps", type=int, default=50)
parser.add_argument("--model_cpu_offload", action="store_true")
parser.add_argument("--run_compile", action="store_true")
args = parser.parse_args()
benchmark_pipe = (
T2IAdapterBenchmark(args)
if args.ckpt == "TencentARC/t2iadapter_canny_sd14v1"
else T2IAdapterSDXLBenchmark(args)
)
benchmark_pipe.benchmark(args)

View File

@@ -0,0 +1,23 @@
import argparse
import sys
sys.path.append(".")
from base_classes import LCMLoRATextToImageBenchmark # noqa: E402
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--ckpt",
type=str,
default="stabilityai/stable-diffusion-xl-base-1.0",
)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--num_inference_steps", type=int, default=4)
parser.add_argument("--model_cpu_offload", action="store_true")
parser.add_argument("--run_compile", action="store_true")
args = parser.parse_args()
benchmark_pipe = LCMLoRATextToImageBenchmark(args)
benchmark_pipe.benchmark(args)

View File

@@ -0,0 +1,40 @@
import argparse
import sys
sys.path.append(".")
from base_classes import TextToImageBenchmark, TurboTextToImageBenchmark # noqa: E402
ALL_T2I_CKPTS = [
"Lykon/DreamShaper",
"segmind/SSD-1B",
"stabilityai/stable-diffusion-xl-base-1.0",
"kandinsky-community/kandinsky-2-2-decoder",
"warp-ai/wuerstchen",
"stabilityai/sdxl-turbo",
]
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--ckpt",
type=str,
default="Lykon/DreamShaper",
choices=ALL_T2I_CKPTS,
)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--num_inference_steps", type=int, default=50)
parser.add_argument("--model_cpu_offload", action="store_true")
parser.add_argument("--run_compile", action="store_true")
args = parser.parse_args()
benchmark_cls = None
if "turbo" in args.ckpt:
benchmark_cls = TurboTextToImageBenchmark
else:
benchmark_cls = TextToImageBenchmark
benchmark_pipe = benchmark_cls(args)
benchmark_pipe.benchmark(args)

View File

@@ -1,98 +0,0 @@
from functools import partial
import torch
from benchmarking_utils import BenchmarkMixin, BenchmarkScenario, model_init_fn
from diffusers import BitsAndBytesConfig, FluxTransformer2DModel
from diffusers.utils.testing_utils import torch_device
CKPT_ID = "black-forest-labs/FLUX.1-dev"
RESULT_FILENAME = "flux.csv"
def get_input_dict(**device_dtype_kwargs):
# resolution: 1024x1024
# maximum sequence length 512
hidden_states = torch.randn(1, 4096, 64, **device_dtype_kwargs)
encoder_hidden_states = torch.randn(1, 512, 4096, **device_dtype_kwargs)
pooled_prompt_embeds = torch.randn(1, 768, **device_dtype_kwargs)
image_ids = torch.ones(512, 3, **device_dtype_kwargs)
text_ids = torch.ones(4096, 3, **device_dtype_kwargs)
timestep = torch.tensor([1.0], **device_dtype_kwargs)
guidance = torch.tensor([1.0], **device_dtype_kwargs)
return {
"hidden_states": hidden_states,
"encoder_hidden_states": encoder_hidden_states,
"img_ids": image_ids,
"txt_ids": text_ids,
"pooled_projections": pooled_prompt_embeds,
"timestep": timestep,
"guidance": guidance,
}
if __name__ == "__main__":
scenarios = [
BenchmarkScenario(
name=f"{CKPT_ID}-bf16",
model_cls=FluxTransformer2DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=model_init_fn,
compile_kwargs={"fullgraph": True},
),
BenchmarkScenario(
name=f"{CKPT_ID}-bnb-nf4",
model_cls=FluxTransformer2DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
"quantization_config": BitsAndBytesConfig(
load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_quant_type="nf4"
),
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=model_init_fn,
),
BenchmarkScenario(
name=f"{CKPT_ID}-layerwise-upcasting",
model_cls=FluxTransformer2DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=partial(model_init_fn, layerwise_upcasting=True),
),
BenchmarkScenario(
name=f"{CKPT_ID}-group-offload-leaf",
model_cls=FluxTransformer2DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=partial(
model_init_fn,
group_offload_kwargs={
"onload_device": torch_device,
"offload_device": torch.device("cpu"),
"offload_type": "leaf_level",
"use_stream": True,
"non_blocking": True,
},
),
),
]
runner = BenchmarkMixin()
runner.run_bencmarks_and_collate(scenarios, filename=RESULT_FILENAME)

View File

@@ -1,80 +0,0 @@
from functools import partial
import torch
from benchmarking_utils import BenchmarkMixin, BenchmarkScenario, model_init_fn
from diffusers import LTXVideoTransformer3DModel
from diffusers.utils.testing_utils import torch_device
CKPT_ID = "Lightricks/LTX-Video-0.9.7-dev"
RESULT_FILENAME = "ltx.csv"
def get_input_dict(**device_dtype_kwargs):
# 512x704 (161 frames)
# `max_sequence_length`: 256
hidden_states = torch.randn(1, 7392, 128, **device_dtype_kwargs)
encoder_hidden_states = torch.randn(1, 256, 4096, **device_dtype_kwargs)
encoder_attention_mask = torch.ones(1, 256, **device_dtype_kwargs)
timestep = torch.tensor([1.0], **device_dtype_kwargs)
video_coords = torch.randn(1, 3, 7392, **device_dtype_kwargs)
return {
"hidden_states": hidden_states,
"encoder_hidden_states": encoder_hidden_states,
"encoder_attention_mask": encoder_attention_mask,
"timestep": timestep,
"video_coords": video_coords,
}
if __name__ == "__main__":
scenarios = [
BenchmarkScenario(
name=f"{CKPT_ID}-bf16",
model_cls=LTXVideoTransformer3DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=model_init_fn,
compile_kwargs={"fullgraph": True},
),
BenchmarkScenario(
name=f"{CKPT_ID}-layerwise-upcasting",
model_cls=LTXVideoTransformer3DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=partial(model_init_fn, layerwise_upcasting=True),
),
BenchmarkScenario(
name=f"{CKPT_ID}-group-offload-leaf",
model_cls=LTXVideoTransformer3DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=partial(
model_init_fn,
group_offload_kwargs={
"onload_device": torch_device,
"offload_device": torch.device("cpu"),
"offload_type": "leaf_level",
"use_stream": True,
"non_blocking": True,
},
),
),
]
runner = BenchmarkMixin()
runner.run_bencmarks_and_collate(scenarios, filename=RESULT_FILENAME)

View File

@@ -1,82 +0,0 @@
from functools import partial
import torch
from benchmarking_utils import BenchmarkMixin, BenchmarkScenario, model_init_fn
from diffusers import UNet2DConditionModel
from diffusers.utils.testing_utils import torch_device
CKPT_ID = "stabilityai/stable-diffusion-xl-base-1.0"
RESULT_FILENAME = "sdxl.csv"
def get_input_dict(**device_dtype_kwargs):
# height: 1024
# width: 1024
# max_sequence_length: 77
hidden_states = torch.randn(1, 4, 128, 128, **device_dtype_kwargs)
encoder_hidden_states = torch.randn(1, 77, 2048, **device_dtype_kwargs)
timestep = torch.tensor([1.0], **device_dtype_kwargs)
added_cond_kwargs = {
"text_embeds": torch.randn(1, 1280, **device_dtype_kwargs),
"time_ids": torch.ones(1, 6, **device_dtype_kwargs),
}
return {
"sample": hidden_states,
"encoder_hidden_states": encoder_hidden_states,
"timestep": timestep,
"added_cond_kwargs": added_cond_kwargs,
}
if __name__ == "__main__":
scenarios = [
BenchmarkScenario(
name=f"{CKPT_ID}-bf16",
model_cls=UNet2DConditionModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "unet",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=model_init_fn,
compile_kwargs={"fullgraph": True},
),
BenchmarkScenario(
name=f"{CKPT_ID}-layerwise-upcasting",
model_cls=UNet2DConditionModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "unet",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=partial(model_init_fn, layerwise_upcasting=True),
),
BenchmarkScenario(
name=f"{CKPT_ID}-group-offload-leaf",
model_cls=UNet2DConditionModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "unet",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=partial(
model_init_fn,
group_offload_kwargs={
"onload_device": torch_device,
"offload_device": torch.device("cpu"),
"offload_type": "leaf_level",
"use_stream": True,
"non_blocking": True,
},
),
),
]
runner = BenchmarkMixin()
runner.run_bencmarks_and_collate(scenarios, filename=RESULT_FILENAME)

View File

@@ -1,244 +0,0 @@
import gc
import inspect
import logging
import os
import queue
import threading
from contextlib import nullcontext
from dataclasses import dataclass
from typing import Any, Callable, Dict, Optional, Union
import pandas as pd
import torch
import torch.utils.benchmark as benchmark
from diffusers.models.modeling_utils import ModelMixin
from diffusers.utils.testing_utils import require_torch_gpu, torch_device
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
logger = logging.getLogger(__name__)
NUM_WARMUP_ROUNDS = 5
def benchmark_fn(f, *args, **kwargs):
t0 = benchmark.Timer(
stmt="f(*args, **kwargs)",
globals={"args": args, "kwargs": kwargs, "f": f},
num_threads=1,
)
return float(f"{(t0.blocked_autorange().mean):.3f}")
def flush():
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
# Adapted from https://github.com/lucasb-eyer/cnn_vit_benchmarks/blob/15b665ff758e8062131353076153905cae00a71f/main.py
def calculate_flops(model, input_dict):
try:
from torchprofile import profile_macs
except ModuleNotFoundError:
raise
# This is a hacky way to convert the kwargs to args as `profile_macs` cries about kwargs.
sig = inspect.signature(model.forward)
param_names = [
p.name
for p in sig.parameters.values()
if p.kind
in (
inspect.Parameter.POSITIONAL_ONLY,
inspect.Parameter.POSITIONAL_OR_KEYWORD,
)
and p.name != "self"
]
bound = sig.bind_partial(**input_dict)
bound.apply_defaults()
args = tuple(bound.arguments[name] for name in param_names)
model.eval()
with torch.no_grad():
macs = profile_macs(model, args)
flops = 2 * macs # 1 MAC operation = 2 FLOPs (1 multiplication + 1 addition)
return flops
def calculate_params(model):
return sum(p.numel() for p in model.parameters())
# Users can define their own in case this doesn't suffice. For most cases,
# it should be sufficient.
def model_init_fn(model_cls, group_offload_kwargs=None, layerwise_upcasting=False, **init_kwargs):
model = model_cls.from_pretrained(**init_kwargs).eval()
if group_offload_kwargs and isinstance(group_offload_kwargs, dict):
model.enable_group_offload(**group_offload_kwargs)
else:
model.to(torch_device)
if layerwise_upcasting:
model.enable_layerwise_casting(
storage_dtype=torch.float8_e4m3fn, compute_dtype=init_kwargs.get("torch_dtype", torch.bfloat16)
)
return model
@dataclass
class BenchmarkScenario:
name: str
model_cls: ModelMixin
model_init_kwargs: Dict[str, Any]
model_init_fn: Callable
get_model_input_dict: Callable
compile_kwargs: Optional[Dict[str, Any]] = None
@require_torch_gpu
class BenchmarkMixin:
def pre_benchmark(self):
flush()
torch.compiler.reset()
def post_benchmark(self, model):
model.cpu()
flush()
torch.compiler.reset()
@torch.no_grad()
def run_benchmark(self, scenario: BenchmarkScenario):
# 0) Basic stats
logger.info(f"Running scenario: {scenario.name}.")
try:
model = model_init_fn(scenario.model_cls, **scenario.model_init_kwargs)
num_params = round(calculate_params(model) / 1e9, 2)
try:
flops = round(calculate_flops(model, input_dict=scenario.get_model_input_dict()) / 1e9, 2)
except Exception as e:
logger.info(f"Problem in calculating FLOPs:\n{e}")
flops = None
model.cpu()
del model
except Exception as e:
logger.info(f"Error while initializing the model and calculating FLOPs:\n{e}")
return {}
self.pre_benchmark()
# 1) plain stats
results = {}
plain = None
try:
plain = self._run_phase(
model_cls=scenario.model_cls,
init_fn=scenario.model_init_fn,
init_kwargs=scenario.model_init_kwargs,
get_input_fn=scenario.get_model_input_dict,
compile_kwargs=None,
)
except Exception as e:
logger.info(f"Benchmark could not be run with the following error:\n{e}")
return results
# 2) compiled stats (if any)
compiled = {"time": None, "memory": None}
if scenario.compile_kwargs:
try:
compiled = self._run_phase(
model_cls=scenario.model_cls,
init_fn=scenario.model_init_fn,
init_kwargs=scenario.model_init_kwargs,
get_input_fn=scenario.get_model_input_dict,
compile_kwargs=scenario.compile_kwargs,
)
except Exception as e:
logger.info(f"Compilation benchmark could not be run with the following error\n: {e}")
if plain is None:
return results
# 3) merge
result = {
"scenario": scenario.name,
"model_cls": scenario.model_cls.__name__,
"num_params_B": num_params,
"flops_G": flops,
"time_plain_s": plain["time"],
"mem_plain_GB": plain["memory"],
"time_compile_s": compiled["time"],
"mem_compile_GB": compiled["memory"],
}
if scenario.compile_kwargs:
result["fullgraph"] = scenario.compile_kwargs.get("fullgraph", False)
result["mode"] = scenario.compile_kwargs.get("mode", "default")
else:
result["fullgraph"], result["mode"] = None, None
return result
def run_bencmarks_and_collate(self, scenarios: Union[BenchmarkScenario, list[BenchmarkScenario]], filename: str):
if not isinstance(scenarios, list):
scenarios = [scenarios]
record_queue = queue.Queue()
stop_signal = object()
def _writer_thread():
while True:
item = record_queue.get()
if item is stop_signal:
break
df_row = pd.DataFrame([item])
write_header = not os.path.exists(filename)
df_row.to_csv(filename, mode="a", header=write_header, index=False)
record_queue.task_done()
record_queue.task_done()
writer = threading.Thread(target=_writer_thread, daemon=True)
writer.start()
for s in scenarios:
try:
record = self.run_benchmark(s)
if record:
record_queue.put(record)
else:
logger.info(f"Record empty from scenario: {s.name}.")
except Exception as e:
logger.info(f"Running scenario ({s.name}) led to error:\n{e}")
record_queue.put(stop_signal)
logger.info(f"Results serialized to {filename=}.")
def _run_phase(
self,
*,
model_cls: ModelMixin,
init_fn: Callable,
init_kwargs: Dict[str, Any],
get_input_fn: Callable,
compile_kwargs: Optional[Dict[str, Any]],
) -> Dict[str, float]:
# setup
self.pre_benchmark()
# init & (optional) compile
model = init_fn(model_cls, **init_kwargs)
if compile_kwargs:
model.compile(**compile_kwargs)
# build inputs
inp = get_input_fn()
# measure
run_ctx = torch._inductor.utils.fresh_inductor_cache() if compile_kwargs else nullcontext()
with run_ctx:
for _ in range(NUM_WARMUP_ROUNDS):
_ = model(**inp)
time_s = benchmark_fn(lambda m, d: m(**d), model, inp)
mem_gb = torch.cuda.max_memory_allocated() / (1024**3)
mem_gb = round(mem_gb, 2)
# teardown
self.post_benchmark(model)
del model
return {"time": time_s, "memory": mem_gb}

View File

@@ -1,74 +0,0 @@
from functools import partial
import torch
from benchmarking_utils import BenchmarkMixin, BenchmarkScenario, model_init_fn
from diffusers import WanTransformer3DModel
from diffusers.utils.testing_utils import torch_device
CKPT_ID = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
RESULT_FILENAME = "wan.csv"
def get_input_dict(**device_dtype_kwargs):
# height: 480
# width: 832
# num_frames: 81
# max_sequence_length: 512
hidden_states = torch.randn(1, 16, 21, 60, 104, **device_dtype_kwargs)
encoder_hidden_states = torch.randn(1, 512, 4096, **device_dtype_kwargs)
timestep = torch.tensor([1.0], **device_dtype_kwargs)
return {"hidden_states": hidden_states, "encoder_hidden_states": encoder_hidden_states, "timestep": timestep}
if __name__ == "__main__":
scenarios = [
BenchmarkScenario(
name=f"{CKPT_ID}-bf16",
model_cls=WanTransformer3DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=model_init_fn,
compile_kwargs={"fullgraph": True},
),
BenchmarkScenario(
name=f"{CKPT_ID}-layerwise-upcasting",
model_cls=WanTransformer3DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=partial(model_init_fn, layerwise_upcasting=True),
),
BenchmarkScenario(
name=f"{CKPT_ID}-group-offload-leaf",
model_cls=WanTransformer3DModel,
model_init_kwargs={
"pretrained_model_name_or_path": CKPT_ID,
"torch_dtype": torch.bfloat16,
"subfolder": "transformer",
},
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
model_init_fn=partial(
model_init_fn,
group_offload_kwargs={
"onload_device": torch_device,
"offload_device": torch.device("cpu"),
"offload_type": "leaf_level",
"use_stream": True,
"non_blocking": True,
},
),
),
]
runner = BenchmarkMixin()
runner.run_bencmarks_and_collate(scenarios, filename=RESULT_FILENAME)

View File

@@ -1,166 +0,0 @@
import argparse
import os
import sys
import gpustat
import pandas as pd
import psycopg2
import psycopg2.extras
from psycopg2.extensions import register_adapter
from psycopg2.extras import Json
register_adapter(dict, Json)
FINAL_CSV_FILENAME = "collated_results.csv"
# https://github.com/huggingface/transformers/blob/593e29c5e2a9b17baec010e8dc7c1431fed6e841/benchmark/init_db.sql#L27
BENCHMARKS_TABLE_NAME = "benchmarks"
MEASUREMENTS_TABLE_NAME = "model_measurements"
def _init_benchmark(conn, branch, commit_id, commit_msg):
gpu_stats = gpustat.GPUStatCollection.new_query()
metadata = {"gpu_name": gpu_stats[0]["name"]}
repository = "huggingface/diffusers"
with conn.cursor() as cur:
cur.execute(
f"INSERT INTO {BENCHMARKS_TABLE_NAME} (repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s) RETURNING benchmark_id",
(repository, branch, commit_id, commit_msg, metadata),
)
benchmark_id = cur.fetchone()[0]
print(f"Initialised benchmark #{benchmark_id}")
return benchmark_id
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"branch",
type=str,
help="The branch name on which the benchmarking is performed.",
)
parser.add_argument(
"commit_id",
type=str,
help="The commit hash on which the benchmarking is performed.",
)
parser.add_argument(
"commit_msg",
type=str,
help="The commit message associated with the commit, truncated to 70 characters.",
)
args = parser.parse_args()
return args
if __name__ == "__main__":
args = parse_args()
try:
conn = psycopg2.connect(
host=os.getenv("PGHOST"),
database=os.getenv("PGDATABASE"),
user=os.getenv("PGUSER"),
password=os.getenv("PGPASSWORD"),
)
print("DB connection established successfully.")
except Exception as e:
print(f"Problem during DB init: {e}")
sys.exit(1)
try:
benchmark_id = _init_benchmark(
conn=conn,
branch=args.branch,
commit_id=args.commit_id,
commit_msg=args.commit_msg,
)
except Exception as e:
print(f"Problem during initializing benchmark: {e}")
sys.exit(1)
cur = conn.cursor()
df = pd.read_csv(FINAL_CSV_FILENAME)
# Helper to cast values (or None) given a dtype
def _cast_value(val, dtype: str):
if pd.isna(val):
return None
if dtype == "text":
return str(val).strip()
if dtype == "float":
try:
return float(val)
except ValueError:
return None
if dtype == "bool":
s = str(val).strip().lower()
if s in ("true", "t", "yes", "1"):
return True
if s in ("false", "f", "no", "0"):
return False
if val in (1, 1.0):
return True
if val in (0, 0.0):
return False
return None
return val
try:
rows_to_insert = []
for _, row in df.iterrows():
scenario = _cast_value(row.get("scenario"), "text")
model_cls = _cast_value(row.get("model_cls"), "text")
num_params_B = _cast_value(row.get("num_params_B"), "float")
flops_G = _cast_value(row.get("flops_G"), "float")
time_plain_s = _cast_value(row.get("time_plain_s"), "float")
mem_plain_GB = _cast_value(row.get("mem_plain_GB"), "float")
time_compile_s = _cast_value(row.get("time_compile_s"), "float")
mem_compile_GB = _cast_value(row.get("mem_compile_GB"), "float")
fullgraph = _cast_value(row.get("fullgraph"), "bool")
mode = _cast_value(row.get("mode"), "text")
# If "github_sha" column exists in the CSV, cast it; else default to None
if "github_sha" in df.columns:
github_sha = _cast_value(row.get("github_sha"), "text")
else:
github_sha = None
measurements = {
"scenario": scenario,
"model_cls": model_cls,
"num_params_B": num_params_B,
"flops_G": flops_G,
"time_plain_s": time_plain_s,
"mem_plain_GB": mem_plain_GB,
"time_compile_s": time_compile_s,
"mem_compile_GB": mem_compile_GB,
"fullgraph": fullgraph,
"mode": mode,
"github_sha": github_sha,
}
rows_to_insert.append((benchmark_id, measurements))
# Batch-insert all rows
insert_sql = f"""
INSERT INTO {MEASUREMENTS_TABLE_NAME} (
benchmark_id,
measurements
)
VALUES (%s, %s);
"""
psycopg2.extras.execute_batch(cur, insert_sql, rows_to_insert)
conn.commit()
cur.close()
conn.close()
except Exception as e:
print(f"Exception: {e}")
sys.exit(1)

View File

@@ -1,19 +1,19 @@
import os import glob
import sys
import pandas as pd import pandas as pd
from huggingface_hub import hf_hub_download, upload_file from huggingface_hub import hf_hub_download, upload_file
from huggingface_hub.utils import EntryNotFoundError from huggingface_hub.utils import EntryNotFoundError
REPO_ID = "diffusers/benchmarks" sys.path.append(".")
from utils import BASE_PATH, FINAL_CSV_FILE, GITHUB_SHA, REPO_ID, collate_csv # noqa: E402
def has_previous_benchmark() -> str: def has_previous_benchmark() -> str:
from run_all import FINAL_CSV_FILENAME
csv_path = None csv_path = None
try: try:
csv_path = hf_hub_download(repo_id=REPO_ID, repo_type="dataset", filename=FINAL_CSV_FILENAME) csv_path = hf_hub_download(repo_id=REPO_ID, repo_type="dataset", filename=FINAL_CSV_FILE)
except EntryNotFoundError: except EntryNotFoundError:
csv_path = None csv_path = None
return csv_path return csv_path
@@ -26,50 +26,46 @@ def filter_float(value):
def push_to_hf_dataset(): def push_to_hf_dataset():
from run_all import FINAL_CSV_FILENAME, GITHUB_SHA all_csvs = sorted(glob.glob(f"{BASE_PATH}/*.csv"))
collate_csv(all_csvs, FINAL_CSV_FILE)
# If there's an existing benchmark file, we should report the changes.
csv_path = has_previous_benchmark() csv_path = has_previous_benchmark()
if csv_path is not None: if csv_path is not None:
current_results = pd.read_csv(FINAL_CSV_FILENAME) current_results = pd.read_csv(FINAL_CSV_FILE)
previous_results = pd.read_csv(csv_path) previous_results = pd.read_csv(csv_path)
numeric_columns = current_results.select_dtypes(include=["float64", "int64"]).columns numeric_columns = current_results.select_dtypes(include=["float64", "int64"]).columns
numeric_columns = [
c for c in numeric_columns if c not in ["batch_size", "num_inference_steps", "actual_gpu_memory (gbs)"]
]
for column in numeric_columns: for column in numeric_columns:
# get previous values as floats, aligned to current index previous_results[column] = previous_results[column].map(lambda x: filter_float(x))
prev_vals = previous_results[column].map(filter_float).reindex(current_results.index)
# get current values as floats # Calculate the percentage change
curr_vals = current_results[column].astype(float) current_results[column] = current_results[column].astype(float)
previous_results[column] = previous_results[column].astype(float)
percent_change = ((current_results[column] - previous_results[column]) / previous_results[column]) * 100
# stringify the current values # Format the values with '+' or '-' sign and append to original values
curr_str = curr_vals.map(str) current_results[column] = current_results[column].map(str) + percent_change.map(
lambda x: f" ({'+' if x > 0 else ''}{x:.2f}%)"
# build an appendage only when prev exists and differs
append_str = prev_vals.where(prev_vals.notnull() & (prev_vals != curr_vals), other=pd.NA).map(
lambda x: f" ({x})" if pd.notnull(x) else ""
) )
# There might be newly added rows. So, filter out the NaNs.
current_results[column] = current_results[column].map(lambda x: x.replace(" (nan%)", ""))
# combine # Overwrite the current result file.
current_results[column] = curr_str + append_str current_results.to_csv(FINAL_CSV_FILE, index=False)
os.remove(FINAL_CSV_FILENAME)
current_results.to_csv(FINAL_CSV_FILENAME, index=False)
commit_message = f"upload from sha: {GITHUB_SHA}" if GITHUB_SHA is not None else "upload benchmark results" commit_message = f"upload from sha: {GITHUB_SHA}" if GITHUB_SHA is not None else "upload benchmark results"
upload_file( upload_file(
repo_id=REPO_ID, repo_id=REPO_ID,
path_in_repo=FINAL_CSV_FILENAME, path_in_repo=FINAL_CSV_FILE,
path_or_fileobj=FINAL_CSV_FILENAME, path_or_fileobj=FINAL_CSV_FILE,
repo_type="dataset", repo_type="dataset",
commit_message=commit_message, commit_message=commit_message,
) )
upload_file(
repo_id="diffusers/benchmark-analyzer",
path_in_repo=FINAL_CSV_FILENAME,
path_or_fileobj=FINAL_CSV_FILENAME,
repo_type="space",
commit_message=commit_message,
)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -1,6 +0,0 @@
pandas
psutil
gpustat
torchprofile
bitsandbytes
psycopg2==2.9.9

View File

@@ -1,84 +1,101 @@
import glob import glob
import logging
import os
import subprocess import subprocess
import sys
import pandas as pd from typing import List
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s") sys.path.append(".")
logger = logging.getLogger(__name__) from benchmark_text_to_image import ALL_T2I_CKPTS # noqa: E402
PATTERN = "benchmarking_*.py"
FINAL_CSV_FILENAME = "collated_results.csv" PATTERN = "benchmark_*.py"
GITHUB_SHA = os.getenv("GITHUB_SHA", None)
class SubprocessCallException(Exception): class SubprocessCallException(Exception):
pass pass
def run_command(command: list[str], return_stdout=False): # Taken from `test_examples_utils.py`
def run_command(command: List[str], return_stdout=False):
"""
Runs `command` with `subprocess.check_output` and will potentially return the `stdout`. Will also properly capture
if an error occurred while running `command`
"""
try: try:
output = subprocess.check_output(command, stderr=subprocess.STDOUT) output = subprocess.check_output(command, stderr=subprocess.STDOUT)
if return_stdout and hasattr(output, "decode"): if return_stdout:
return output.decode("utf-8") if hasattr(output, "decode"):
output = output.decode("utf-8")
return output
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
raise SubprocessCallException(f"Command `{' '.join(command)}` failed with:\n{e.output.decode()}") from e raise SubprocessCallException(
f"Command `{' '.join(command)}` failed with the following error:\n\n{e.output.decode()}"
) from e
def merge_csvs(final_csv: str = "collated_results.csv"): def main():
all_csvs = glob.glob("*.csv") python_files = glob.glob(PATTERN)
all_csvs = [f for f in all_csvs if f != final_csv]
if not all_csvs:
logger.info("No result CSVs found to merge.")
return
df_list = []
for f in all_csvs:
try:
d = pd.read_csv(f)
except pd.errors.EmptyDataError:
# If a file existed but was zerobytes or corrupted, skip it
continue
df_list.append(d)
if not df_list:
logger.info("All result CSVs were empty or invalid; nothing to merge.")
return
final_df = pd.concat(df_list, ignore_index=True)
if GITHUB_SHA is not None:
final_df["github_sha"] = GITHUB_SHA
final_df.to_csv(final_csv, index=False)
logger.info(f"Merged {len(all_csvs)} partial CSVs → {final_csv}.")
def run_scripts():
python_files = sorted(glob.glob(PATTERN))
python_files = [f for f in python_files if f != "benchmarking_utils.py"]
for file in python_files: for file in python_files:
script_name = file.split(".py")[0].split("_")[-1] # example: benchmarking_foo.py -> foo print(f"****** Running file: {file} ******")
logger.info(f"\n****** Running file: {file} ******")
partial_csv = f"{script_name}.csv" # Run with canonical settings.
if os.path.exists(partial_csv): if file != "benchmark_text_to_image.py" and file != "benchmark_ip_adapters.py":
logger.info(f"Found {partial_csv}. Removing for safer numbers and duplication.") command = f"python {file}"
os.remove(partial_csv) run_command(command.split())
command = ["python", file] command += " --run_compile"
try: run_command(command.split())
run_command(command)
logger.info(f"{file} finished normally.")
except SubprocessCallException as e:
logger.info(f"Error running {file}:\n{e}")
finally:
logger.info(f"→ Merging partial CSVs after {file}")
merge_csvs(final_csv=FINAL_CSV_FILENAME)
logger.info(f"\nAll scripts attempted. Final collated CSV: {FINAL_CSV_FILENAME}") # Run variants.
for file in python_files:
# See: https://github.com/pytorch/pytorch/issues/129637
if file == "benchmark_ip_adapters.py":
continue
if file == "benchmark_text_to_image.py":
for ckpt in ALL_T2I_CKPTS:
command = f"python {file} --ckpt {ckpt}"
if "turbo" in ckpt:
command += " --num_inference_steps 1"
run_command(command.split())
command += " --run_compile"
run_command(command.split())
elif file == "benchmark_sd_img.py":
for ckpt in ["stabilityai/stable-diffusion-xl-refiner-1.0", "stabilityai/sdxl-turbo"]:
command = f"python {file} --ckpt {ckpt}"
if ckpt == "stabilityai/sdxl-turbo":
command += " --num_inference_steps 2"
run_command(command.split())
command += " --run_compile"
run_command(command.split())
elif file in ["benchmark_sd_inpainting.py", "benchmark_ip_adapters.py"]:
sdxl_ckpt = "stabilityai/stable-diffusion-xl-base-1.0"
command = f"python {file} --ckpt {sdxl_ckpt}"
run_command(command.split())
command += " --run_compile"
run_command(command.split())
elif file in ["benchmark_controlnet.py", "benchmark_t2i_adapter.py"]:
sdxl_ckpt = (
"diffusers/controlnet-canny-sdxl-1.0"
if "controlnet" in file
else "TencentARC/t2i-adapter-canny-sdxl-1.0"
)
command = f"python {file} --ckpt {sdxl_ckpt}"
run_command(command.split())
command += " --run_compile"
run_command(command.split())
if __name__ == "__main__": if __name__ == "__main__":
run_scripts() main()

98
benchmarks/utils.py Normal file
View File

@@ -0,0 +1,98 @@
import argparse
import csv
import gc
import os
from dataclasses import dataclass
from typing import Dict, List, Union
import torch
import torch.utils.benchmark as benchmark
GITHUB_SHA = os.getenv("GITHUB_SHA", None)
BENCHMARK_FIELDS = [
"pipeline_cls",
"ckpt_id",
"batch_size",
"num_inference_steps",
"model_cpu_offload",
"run_compile",
"time (secs)",
"memory (gbs)",
"actual_gpu_memory (gbs)",
"github_sha",
]
PROMPT = "ghibli style, a fantasy landscape with castles"
BASE_PATH = os.getenv("BASE_PATH", ".")
TOTAL_GPU_MEMORY = float(os.getenv("TOTAL_GPU_MEMORY", torch.cuda.get_device_properties(0).total_memory / (1024**3)))
REPO_ID = "diffusers/benchmarks"
FINAL_CSV_FILE = "collated_results.csv"
@dataclass
class BenchmarkInfo:
time: float
memory: float
def flush():
"""Wipes off memory."""
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
def bytes_to_giga_bytes(bytes):
return f"{(bytes / 1024 / 1024 / 1024):.3f}"
def benchmark_fn(f, *args, **kwargs):
t0 = benchmark.Timer(
stmt="f(*args, **kwargs)",
globals={"args": args, "kwargs": kwargs, "f": f},
num_threads=torch.get_num_threads(),
)
return f"{(t0.blocked_autorange().mean):.3f}"
def generate_csv_dict(
pipeline_cls: str, ckpt: str, args: argparse.Namespace, benchmark_info: BenchmarkInfo
) -> Dict[str, Union[str, bool, float]]:
"""Packs benchmarking data into a dictionary for latter serialization."""
data_dict = {
"pipeline_cls": pipeline_cls,
"ckpt_id": ckpt,
"batch_size": args.batch_size,
"num_inference_steps": args.num_inference_steps,
"model_cpu_offload": args.model_cpu_offload,
"run_compile": args.run_compile,
"time (secs)": benchmark_info.time,
"memory (gbs)": benchmark_info.memory,
"actual_gpu_memory (gbs)": f"{(TOTAL_GPU_MEMORY):.3f}",
"github_sha": GITHUB_SHA,
}
return data_dict
def write_to_csv(file_name: str, data_dict: Dict[str, Union[str, bool, float]]):
"""Serializes a dictionary into a CSV file."""
with open(file_name, mode="w", newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=BENCHMARK_FIELDS)
writer.writeheader()
writer.writerow(data_dict)
def collate_csv(input_files: List[str], output_file: str):
"""Collates multiple identically structured CSVs into a single CSV file."""
with open(output_file, mode="w", newline="") as outfile:
writer = csv.DictWriter(outfile, fieldnames=BENCHMARK_FIELDS)
writer.writeheader()
for file in input_files:
with open(file, mode="r") as infile:
reader = csv.DictReader(infile)
for row in reader:
writer.writerow(row)

View File

@@ -1,45 +1,52 @@
FROM python:3.10-slim FROM ubuntu:20.04
ENV PYTHONDONTWRITEBYTECODE=1
LABEL maintainer="Hugging Face" LABEL maintainer="Hugging Face"
LABEL repository="diffusers" LABEL repository="diffusers"
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update && apt-get install -y bash \ RUN apt-get -y update \
build-essential \ && apt-get install -y software-properties-common \
git \ && add-apt-repository ppa:deadsnakes/ppa
git-lfs \
curl \
ca-certificates \
libglib2.0-0 \
libsndfile1-dev \
libgl1 \
zip \
wget
ENV UV_PYTHON=/usr/local/bin/python RUN apt install -y bash \
build-essential \
git \
git-lfs \
curl \
ca-certificates \
libsndfile1-dev \
python3.10 \
python3-pip \
libgl1 \
zip \
wget \
python3.10-venv && \
rm -rf /var/lib/apt/lists
# make sure to use venv
RUN python3.10 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN pip install uv RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
RUN uv pip install --no-cache-dir \ python3.10 -m uv pip install --no-cache-dir \
torch \ torch \
torchvision \ torchvision \
torchaudio \ torchaudio \
--extra-index-url https://download.pytorch.org/whl/cpu invisible_watermark \
--extra-index-url https://download.pytorch.org/whl/cpu && \
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]" python3.10 -m uv pip install --no-cache-dir \
accelerate \
# Extra dependencies datasets \
RUN uv pip install --no-cache-dir \ hf-doc-builder \
accelerate \ huggingface-hub \
numpy==1.26.4 \ Jinja2 \
hf_xet \ librosa \
setuptools==69.5.1 \ numpy==1.26.4 \
bitsandbytes \ scipy \
torchao \ tensorboard \
gguf \ transformers \
optimum-quanto matplotlib \
setuptools==69.5.1
RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
CMD ["/bin/bash"] CMD ["/bin/bash"]

View File

@@ -0,0 +1,49 @@
FROM ubuntu:20.04
LABEL maintainer="Hugging Face"
LABEL repository="diffusers"
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update \
&& apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa
RUN apt install -y bash \
build-essential \
git \
git-lfs \
curl \
ca-certificates \
libsndfile1-dev \
libgl1 \
python3.10 \
python3-pip \
python3.10-venv && \
rm -rf /var/lib/apt/lists
# make sure to use venv
RUN python3.10 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
# follow the instructions here: https://cloud.google.com/tpu/docs/run-in-container#train_a_jax_model_in_a_docker_container
RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
python3 -m uv pip install --upgrade --no-cache-dir \
clu \
"jax[cpu]>=0.2.16,!=0.3.2" \
"flax>=0.4.1" \
"jaxlib>=0.1.65" && \
python3 -m uv pip install --no-cache-dir \
accelerate \
datasets \
hf-doc-builder \
huggingface-hub \
Jinja2 \
librosa \
numpy==1.26.4 \
scipy \
tensorboard \
transformers \
hf_transfer
CMD ["/bin/bash"]

View File

@@ -0,0 +1,51 @@
FROM ubuntu:20.04
LABEL maintainer="Hugging Face"
LABEL repository="diffusers"
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update \
&& apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa
RUN apt install -y bash \
build-essential \
git \
git-lfs \
curl \
ca-certificates \
libsndfile1-dev \
libgl1 \
python3.10 \
python3-pip \
python3.10-venv && \
rm -rf /var/lib/apt/lists
# make sure to use venv
RUN python3.10 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
# follow the instructions here: https://cloud.google.com/tpu/docs/run-in-container#train_a_jax_model_in_a_docker_container
RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
python3 -m pip install --no-cache-dir \
"jax[tpu]>=0.2.16,!=0.3.2" \
-f https://storage.googleapis.com/jax-releases/libtpu_releases.html && \
python3 -m uv pip install --upgrade --no-cache-dir \
clu \
"flax>=0.4.1" \
"jaxlib>=0.1.65" && \
python3 -m uv pip install --no-cache-dir \
accelerate \
datasets \
hf-doc-builder \
huggingface-hub \
Jinja2 \
librosa \
numpy==1.26.4 \
scipy \
tensorboard \
transformers \
hf_transfer
CMD ["/bin/bash"]

View File

@@ -44,6 +44,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
scipy \ scipy \
tensorboard \ tensorboard \
transformers \ transformers \
hf_xet hf_transfer
CMD ["/bin/bash"] CMD ["/bin/bash"]

View File

@@ -38,12 +38,13 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
datasets \ datasets \
hf-doc-builder \ hf-doc-builder \
huggingface-hub \ huggingface-hub \
hf_xet \ hf_transfer \
Jinja2 \ Jinja2 \
librosa \ librosa \
numpy==1.26.4 \ numpy==1.26.4 \
scipy \ scipy \
tensorboard \ tensorboard \
transformers transformers \
hf_transfer
CMD ["/bin/bash"] CMD ["/bin/bash"]

View File

@@ -0,0 +1,50 @@
FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
LABEL maintainer="Hugging Face"
LABEL repository="diffusers"
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update \
&& apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa
RUN apt install -y bash \
build-essential \
git \
git-lfs \
curl \
ca-certificates \
libsndfile1-dev \
libgl1 \
python3.10 \
python3.10-dev \
python3-pip \
python3.10-venv && \
rm -rf /var/lib/apt/lists
# make sure to use venv
RUN python3.10 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
python3.10 -m uv pip install --no-cache-dir \
torch \
torchvision \
torchaudio \
invisible_watermark && \
python3.10 -m pip install --no-cache-dir \
accelerate \
datasets \
hf-doc-builder \
huggingface-hub \
hf_transfer \
Jinja2 \
librosa \
numpy==1.26.4 \
scipy \
tensorboard \
transformers \
hf_transfer
CMD ["/bin/bash"]

View File

@@ -1,38 +1,50 @@
FROM python:3.10-slim FROM ubuntu:20.04
ENV PYTHONDONTWRITEBYTECODE=1
LABEL maintainer="Hugging Face" LABEL maintainer="Hugging Face"
LABEL repository="diffusers" LABEL repository="diffusers"
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update && apt-get install -y bash \ RUN apt-get -y update \
build-essential \ && apt-get install -y software-properties-common \
git \ && add-apt-repository ppa:deadsnakes/ppa
git-lfs \
curl \
ca-certificates \
libglib2.0-0 \
libsndfile1-dev \
libgl1
ENV UV_PYTHON=/usr/local/bin/python RUN apt install -y bash \
build-essential \
git \
git-lfs \
curl \
ca-certificates \
libsndfile1-dev \
python3.10 \
python3.10-dev \
python3-pip \
libgl1 \
python3.10-venv && \
rm -rf /var/lib/apt/lists
# make sure to use venv
RUN python3.10 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN pip install uv RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
RUN uv pip install --no-cache-dir \ python3.10 -m uv pip install --no-cache-dir \
torch \ torch \
torchvision \ torchvision \
torchaudio \ torchaudio \
--extra-index-url https://download.pytorch.org/whl/cpu invisible_watermark \
--extra-index-url https://download.pytorch.org/whl/cpu && \
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]" python3.10 -m uv pip install --no-cache-dir \
accelerate \
# Extra dependencies datasets \
RUN uv pip install --no-cache-dir \ hf-doc-builder \
accelerate \ huggingface-hub \
numpy==1.26.4 \ Jinja2 \
hf_xet librosa \
numpy==1.26.4 \
RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean scipy \
tensorboard \
transformers matplotlib \
hf_transfer
CMD ["/bin/bash"] CMD ["/bin/bash"]

View File

@@ -2,13 +2,11 @@ FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
LABEL maintainer="Hugging Face" LABEL maintainer="Hugging Face"
LABEL repository="diffusers" LABEL repository="diffusers"
ARG PYTHON_VERSION=3.12
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update \ RUN apt-get -y update \
&& apt-get install -y software-properties-common \ && apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa && \ && add-apt-repository ppa:deadsnakes/ppa
apt-get update
RUN apt install -y bash \ RUN apt install -y bash \
build-essential \ build-essential \
@@ -16,34 +14,38 @@ RUN apt install -y bash \
git-lfs \ git-lfs \
curl \ curl \
ca-certificates \ ca-certificates \
libglib2.0-0 \
libsndfile1-dev \ libsndfile1-dev \
libgl1 \ libgl1 \
python3 \ python3.10 \
python3.10-dev \
python3-pip \ python3-pip \
&& apt-get clean \ python3.10-venv && \
&& rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists
RUN curl -LsSf https://astral.sh/uv/install.sh | sh # make sure to use venv
ENV PATH="/root/.local/bin:$PATH" RUN python3.10 -m venv /opt/venv
ENV VIRTUAL_ENV="/opt/venv" ENV PATH="/opt/venv/bin:$PATH"
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN uv pip install --no-cache-dir \ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
python3.10 -m uv pip install --no-cache-dir \
torch \ torch \
torchvision \ torchvision \
torchaudio torchaudio \
invisible_watermark && \
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]" python3.10 -m pip install --no-cache-dir \
# Extra dependencies
RUN uv pip install --no-cache-dir \
accelerate \ accelerate \
datasets \
hf-doc-builder \
huggingface-hub \
hf_transfer \
Jinja2 \
librosa \
numpy==1.26.4 \ numpy==1.26.4 \
pytorch-lightning \ scipy \
hf_xet tensorboard \
transformers \
pytorch-lightning \
hf_transfer
CMD ["/bin/bash"] CMD ["/bin/bash"]

View File

@@ -2,7 +2,6 @@ FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
LABEL maintainer="Hugging Face" LABEL maintainer="Hugging Face"
LABEL repository="diffusers" LABEL repository="diffusers"
ARG PYTHON_VERSION=3.10
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
ENV MINIMUM_SUPPORTED_TORCH_VERSION="2.1.0" ENV MINIMUM_SUPPORTED_TORCH_VERSION="2.1.0"
ENV MINIMUM_SUPPORTED_TORCHVISION_VERSION="0.16.0" ENV MINIMUM_SUPPORTED_TORCHVISION_VERSION="0.16.0"
@@ -10,8 +9,7 @@ ENV MINIMUM_SUPPORTED_TORCHAUDIO_VERSION="2.1.0"
RUN apt-get -y update \ RUN apt-get -y update \
&& apt-get install -y software-properties-common \ && apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa && \ && add-apt-repository ppa:deadsnakes/ppa
apt-get update
RUN apt install -y bash \ RUN apt install -y bash \
build-essential \ build-essential \
@@ -19,34 +17,37 @@ RUN apt install -y bash \
git-lfs \ git-lfs \
curl \ curl \
ca-certificates \ ca-certificates \
libglib2.0-0 \
libsndfile1-dev \ libsndfile1-dev \
libgl1 \ libgl1 \
python3 \ python3.10 \
python3.10-dev \
python3-pip \ python3-pip \
&& apt-get clean \ python3.10-venv && \
&& rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists
RUN curl -LsSf https://astral.sh/uv/install.sh | sh # make sure to use venv
ENV PATH="/root/.local/bin:$PATH" RUN python3.10 -m venv /opt/venv
ENV VIRTUAL_ENV="/opt/venv" ENV PATH="/opt/venv/bin:$PATH"
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN uv pip install --no-cache-dir \ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
python3.10 -m uv pip install --no-cache-dir \
torch==$MINIMUM_SUPPORTED_TORCH_VERSION \ torch==$MINIMUM_SUPPORTED_TORCH_VERSION \
torchvision==$MINIMUM_SUPPORTED_TORCHVISION_VERSION \ torchvision==$MINIMUM_SUPPORTED_TORCHVISION_VERSION \
torchaudio==$MINIMUM_SUPPORTED_TORCHAUDIO_VERSION torchaudio==$MINIMUM_SUPPORTED_TORCHAUDIO_VERSION \
invisible_watermark && \
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]" python3.10 -m pip install --no-cache-dir \
# Extra dependencies
RUN uv pip install --no-cache-dir \
accelerate \ accelerate \
datasets \
hf-doc-builder \
huggingface-hub \
hf_transfer \
Jinja2 \
librosa \
numpy==1.26.4 \ numpy==1.26.4 \
pytorch-lightning \ scipy \
hf_xet tensorboard \
transformers \
hf_transfer
CMD ["/bin/bash"] CMD ["/bin/bash"]

View File

@@ -2,49 +2,50 @@ FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
LABEL maintainer="Hugging Face" LABEL maintainer="Hugging Face"
LABEL repository="diffusers" LABEL repository="diffusers"
ARG PYTHON_VERSION=3.12
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update \ RUN apt-get -y update \
&& apt-get install -y software-properties-common \ && apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa && \ && add-apt-repository ppa:deadsnakes/ppa
apt-get update
RUN apt install -y bash \ RUN apt install -y bash \
build-essential \ build-essential \
git \ git \
git-lfs \ git-lfs \
curl \ curl \
ca-certificates \ ca-certificates \
libglib2.0-0 \ libsndfile1-dev \
libsndfile1-dev \ libgl1 \
libgl1 \ python3.10 \
python3 \ python3.10-dev \
python3-pip \ python3-pip \
&& apt-get clean \ python3.10-venv && \
&& rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists
RUN curl -LsSf https://astral.sh/uv/install.sh | sh # make sure to use venv
ENV PATH="/root/.local/bin:$PATH" RUN python3.10 -m venv /opt/venv
ENV VIRTUAL_ENV="/opt/venv" ENV PATH="/opt/venv/bin:$PATH"
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN uv pip install --no-cache-dir \ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
torch \ python3.10 -m pip install --no-cache-dir \
torchvision \ torch \
torchaudio torchvision \
torchaudio \
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]" invisible_watermark && \
python3.10 -m uv pip install --no-cache-dir \
# Extra dependencies accelerate \
RUN uv pip install --no-cache-dir \ datasets \
accelerate \ hf-doc-builder \
numpy==1.26.4 \ huggingface-hub \
pytorch-lightning \ hf_transfer \
hf_xet \ Jinja2 \
xformers librosa \
numpy==1.26.4 \
scipy \
tensorboard \
transformers \
xformers \
hf_transfer
CMD ["/bin/bash"] CMD ["/bin/bash"]

View File

@@ -1,36 +1,36 @@
- sections: - sections:
- local: index - local: index
title: Diffusers title: 🧨 Diffusers
- local: quicktour
title: Quicktour
- local: stable_diffusion
title: Effective and efficient diffusion
- local: installation - local: installation
title: Installation title: Installation
- local: quicktour
title: Quickstart
- local: stable_diffusion
title: Basic performance
title: Get started title: Get started
- isExpanded: false - sections:
sections: - local: tutorials/tutorial_overview
- local: using-diffusers/loading title: Overview
title: DiffusionPipeline - local: using-diffusers/write_own_pipeline
title: Understanding pipelines, models and schedulers
- local: tutorials/autopipeline - local: tutorials/autopipeline
title: AutoPipeline title: AutoPipeline
- local: tutorials/basic_training
title: Train a diffusion model
title: Tutorials
- sections:
- local: using-diffusers/loading
title: Load pipelines
- local: using-diffusers/custom_pipeline_overview - local: using-diffusers/custom_pipeline_overview
title: Community pipelines and components title: Load community pipelines and components
- local: using-diffusers/callback
title: Pipeline callbacks
- local: using-diffusers/reusing_seeds
title: Reproducibility
- local: using-diffusers/schedulers - local: using-diffusers/schedulers
title: Schedulers title: Load schedulers and models
- local: using-diffusers/automodel
title: AutoModel
- local: using-diffusers/other-formats - local: using-diffusers/other-formats
title: Model formats title: Model files and layouts
- local: using-diffusers/push_to_hub - local: using-diffusers/push_to_hub
title: Sharing pipelines and models title: Push files to the Hub
title: Pipelines title: Load pipelines and adapters
- isExpanded: false - sections:
sections:
- local: tutorials/using_peft_for_inference - local: tutorials/using_peft_for_inference
title: LoRA title: LoRA
- local: using-diffusers/ip_adapter - local: using-diffusers/ip_adapter
@@ -44,52 +44,44 @@
- local: using-diffusers/textual_inversion_inference - local: using-diffusers/textual_inversion_inference
title: Textual inversion title: Textual inversion
title: Adapters title: Adapters
- isExpanded: false isExpanded: false
sections: - sections:
- local: using-diffusers/weighted_prompts - local: using-diffusers/unconditional_image_generation
title: Prompting title: Unconditional image generation
- local: using-diffusers/conditional_image_generation
title: Text-to-image
- local: using-diffusers/img2img
title: Image-to-image
- local: using-diffusers/inpaint
title: Inpainting
- local: using-diffusers/text-img2vid
title: Video generation
- local: using-diffusers/depth2img
title: Depth-to-image
title: Generative tasks
- sections:
- local: using-diffusers/overview_techniques
title: Overview
- local: using-diffusers/create_a_server - local: using-diffusers/create_a_server
title: Create a server title: Create a server
- local: using-diffusers/batched_inference
title: Batch inference
- local: training/distributed_inference - local: training/distributed_inference
title: Distributed inference title: Distributed inference
title: Inference - local: using-diffusers/scheduler_features
- isExpanded: false title: Scheduler features
sections: - local: using-diffusers/callback
- local: optimization/fp16 title: Pipeline callbacks
title: Accelerate inference - local: using-diffusers/reusing_seeds
- local: optimization/cache title: Reproducible pipelines
title: Caching - local: using-diffusers/image_quality
- local: optimization/attention_backends title: Controlling image quality
title: Attention backends - local: using-diffusers/weighted_prompts
- local: optimization/memory title: Prompt techniques
title: Reduce memory usage title: Inference techniques
- local: optimization/speed-memory-optims - sections:
title: Compiling and offloading quantized models - local: advanced_inference/outpaint
- sections: title: Outpainting
- local: optimization/pruna title: Advanced inference
title: Pruna - sections:
- local: optimization/xformers
title: xFormers
- local: optimization/tome
title: Token merging
- local: optimization/deepcache
title: DeepCache
- local: optimization/cache_dit
title: CacheDiT
- local: optimization/tgate
title: TGATE
- local: optimization/xdit
title: xDiT
- local: optimization/para_attn
title: ParaAttention
- local: using-diffusers/image_quality
title: FreeU
title: Community optimizations
title: Inference optimization
- isExpanded: false
sections:
- local: hybrid_inference/overview - local: hybrid_inference/overview
title: Overview title: Overview
- local: hybrid_inference/vae_decode - local: hybrid_inference/vae_decode
@@ -99,108 +91,7 @@
- local: hybrid_inference/api_reference - local: hybrid_inference/api_reference
title: API Reference title: API Reference
title: Hybrid Inference title: Hybrid Inference
- isExpanded: false - sections:
sections:
- local: modular_diffusers/overview
title: Overview
- local: modular_diffusers/quickstart
title: Quickstart
- local: modular_diffusers/modular_diffusers_states
title: States
- local: modular_diffusers/pipeline_block
title: ModularPipelineBlocks
- local: modular_diffusers/sequential_pipeline_blocks
title: SequentialPipelineBlocks
- local: modular_diffusers/loop_sequential_pipeline_blocks
title: LoopSequentialPipelineBlocks
- local: modular_diffusers/auto_pipeline_blocks
title: AutoPipelineBlocks
- local: modular_diffusers/modular_pipeline
title: ModularPipeline
- local: modular_diffusers/components_manager
title: ComponentsManager
- local: modular_diffusers/guiders
title: Guiders
- local: modular_diffusers/custom_blocks
title: Building Custom Blocks
title: Modular Diffusers
- isExpanded: false
sections:
- local: training/overview
title: Overview
- local: training/create_dataset
title: Create a dataset for training
- local: training/adapt_a_model
title: Adapt a model to a new task
- local: tutorials/basic_training
title: Train a diffusion model
- sections:
- local: training/unconditional_training
title: Unconditional image generation
- local: training/text2image
title: Text-to-image
- local: training/sdxl
title: Stable Diffusion XL
- local: training/kandinsky
title: Kandinsky 2.2
- local: training/wuerstchen
title: Wuerstchen
- local: training/controlnet
title: ControlNet
- local: training/t2i_adapters
title: T2I-Adapters
- local: training/instructpix2pix
title: InstructPix2Pix
- local: training/cogvideox
title: CogVideoX
title: Models
- sections:
- local: training/text_inversion
title: Textual Inversion
- local: training/dreambooth
title: DreamBooth
- local: training/lora
title: LoRA
- local: training/custom_diffusion
title: Custom Diffusion
- local: training/lcm_distill
title: Latent Consistency Distillation
- local: training/ddpo
title: Reinforcement learning training with DDPO
title: Methods
title: Training
- isExpanded: false
sections:
- local: quantization/overview
title: Getting started
- local: quantization/bitsandbytes
title: bitsandbytes
- local: quantization/gguf
title: gguf
- local: quantization/torchao
title: torchao
- local: quantization/quanto
title: quanto
- local: quantization/modelopt
title: NVIDIA ModelOpt
title: Quantization
- isExpanded: false
sections:
- local: optimization/onnx
title: ONNX
- local: optimization/open_vino
title: OpenVINO
- local: optimization/coreml
title: Core ML
- local: optimization/mps
title: Metal Performance Shaders (MPS)
- local: optimization/habana
title: Intel Gaudi
- local: optimization/neuron
title: AWS Neuron
title: Model accelerators and hardware
- isExpanded: false
sections:
- local: using-diffusers/consisid - local: using-diffusers/consisid
title: ConsisID title: ConsisID
- local: using-diffusers/sdxl - local: using-diffusers/sdxl
@@ -226,28 +117,103 @@
- local: using-diffusers/marigold_usage - local: using-diffusers/marigold_usage
title: Marigold Computer Vision title: Marigold Computer Vision
title: Specific pipeline examples title: Specific pipeline examples
- isExpanded: false - sections:
sections: - local: training/overview
- sections: title: Overview
- local: using-diffusers/unconditional_image_generation - local: training/create_dataset
title: Create a dataset for training
- local: training/adapt_a_model
title: Adapt a model to a new task
- isExpanded: false
sections:
- local: training/unconditional_training
title: Unconditional image generation title: Unconditional image generation
- local: using-diffusers/conditional_image_generation - local: training/text2image
title: Text-to-image title: Text-to-image
- local: using-diffusers/img2img - local: training/sdxl
title: Image-to-image title: Stable Diffusion XL
- local: using-diffusers/inpaint - local: training/kandinsky
title: Inpainting title: Kandinsky 2.2
- local: advanced_inference/outpaint - local: training/wuerstchen
title: Outpainting title: Wuerstchen
- local: using-diffusers/text-img2vid - local: training/controlnet
title: Video generation title: ControlNet
- local: using-diffusers/depth2img - local: training/t2i_adapters
title: Depth-to-image title: T2I-Adapters
title: Task recipes - local: training/instructpix2pix
- local: using-diffusers/write_own_pipeline title: InstructPix2Pix
title: Understanding pipelines, models and schedulers - local: training/cogvideox
- local: community_projects title: CogVideoX
title: Projects built with Diffusers title: Models
- isExpanded: false
sections:
- local: training/text_inversion
title: Textual Inversion
- local: training/dreambooth
title: DreamBooth
- local: training/lora
title: LoRA
- local: training/custom_diffusion
title: Custom Diffusion
- local: training/lcm_distill
title: Latent Consistency Distillation
- local: training/ddpo
title: Reinforcement learning training with DDPO
title: Methods
title: Training
- sections:
- local: quantization/overview
title: Getting Started
- local: quantization/bitsandbytes
title: bitsandbytes
- local: quantization/gguf
title: gguf
- local: quantization/torchao
title: torchao
- local: quantization/quanto
title: quanto
title: Quantization Methods
- sections:
- local: optimization/fp16
title: Accelerate inference
- local: optimization/cache
title: Caching
- local: optimization/memory
title: Reduce memory usage
- local: optimization/pruna
title: Pruna
- local: optimization/xformers
title: xFormers
- local: optimization/tome
title: Token merging
- local: optimization/deepcache
title: DeepCache
- local: optimization/tgate
title: TGATE
- local: optimization/xdit
title: xDiT
- local: optimization/para_attn
title: ParaAttention
- sections:
- local: using-diffusers/stable_diffusion_jax_how_to
title: JAX/Flax
- local: optimization/onnx
title: ONNX
- local: optimization/open_vino
title: OpenVINO
- local: optimization/coreml
title: Core ML
title: Optimized model formats
- sections:
- local: optimization/mps
title: Metal Performance Shaders (MPS)
- local: optimization/habana
title: Intel Gaudi
- local: optimization/neuron
title: AWS Neuron
title: Optimized hardware
title: Accelerate inference and reduce memory
- sections:
- local: conceptual/philosophy - local: conceptual/philosophy
title: Philosophy title: Philosophy
- local: using-diffusers/controlling_generation - local: using-diffusers/controlling_generation
@@ -258,10 +224,14 @@
title: Diffusers' Ethical Guidelines title: Diffusers' Ethical Guidelines
- local: conceptual/evaluation - local: conceptual/evaluation
title: Evaluating Diffusion Models title: Evaluating Diffusion Models
title: Resources title: Conceptual Guides
- isExpanded: false - sections:
sections: - local: community_projects
- sections: title: Projects built with Diffusers
title: Community Projects
- sections:
- isExpanded: false
sections:
- local: api/configuration - local: api/configuration
title: Configuration title: Configuration
- local: api/logging - local: api/logging
@@ -270,22 +240,9 @@
title: Outputs title: Outputs
- local: api/quantization - local: api/quantization
title: Quantization title: Quantization
- local: api/parallel
title: Parallel inference
title: Main Classes title: Main Classes
- sections: - isExpanded: false
- local: api/modular_diffusers/pipeline sections:
title: Pipeline
- local: api/modular_diffusers/pipeline_blocks
title: Blocks
- local: api/modular_diffusers/pipeline_states
title: States
- local: api/modular_diffusers/pipeline_components
title: Components and configs
- local: api/modular_diffusers/guiders
title: Guiders
title: Modular
- sections:
- local: api/loaders/ip_adapter - local: api/loaders/ip_adapter
title: IP-Adapter title: IP-Adapter
- local: api/loaders/lora - local: api/loaders/lora
@@ -301,7 +258,8 @@
- local: api/loaders/peft - local: api/loaders/peft
title: PEFT title: PEFT
title: Loaders title: Loaders
- sections: - isExpanded: false
sections:
- local: api/models/overview - local: api/models/overview
title: Overview title: Overview
- local: api/models/auto_model - local: api/models/auto_model
@@ -327,14 +285,8 @@
title: AllegroTransformer3DModel title: AllegroTransformer3DModel
- local: api/models/aura_flow_transformer2d - local: api/models/aura_flow_transformer2d
title: AuraFlowTransformer2DModel title: AuraFlowTransformer2DModel
- local: api/models/transformer_bria_fibo
title: BriaFiboTransformer2DModel
- local: api/models/bria_transformer
title: BriaTransformer2DModel
- local: api/models/chroma_transformer - local: api/models/chroma_transformer
title: ChromaTransformer2DModel title: ChromaTransformer2DModel
- local: api/models/chronoedit_transformer_3d
title: ChronoEditTransformer3DModel
- local: api/models/cogvideox_transformer3d - local: api/models/cogvideox_transformer3d
title: CogVideoXTransformer3DModel title: CogVideoXTransformer3DModel
- local: api/models/cogview3plus_transformer2d - local: api/models/cogview3plus_transformer2d
@@ -349,16 +301,12 @@
title: DiTTransformer2DModel title: DiTTransformer2DModel
- local: api/models/easyanimate_transformer3d - local: api/models/easyanimate_transformer3d
title: EasyAnimateTransformer3DModel title: EasyAnimateTransformer3DModel
- local: api/models/flux2_transformer
title: Flux2Transformer2DModel
- local: api/models/flux_transformer - local: api/models/flux_transformer
title: FluxTransformer2DModel title: FluxTransformer2DModel
- local: api/models/hidream_image_transformer - local: api/models/hidream_image_transformer
title: HiDreamImageTransformer2DModel title: HiDreamImageTransformer2DModel
- local: api/models/hunyuan_transformer2d - local: api/models/hunyuan_transformer2d
title: HunyuanDiT2DModel title: HunyuanDiT2DModel
- local: api/models/hunyuanimage_transformer_2d
title: HunyuanImageTransformer2DModel
- local: api/models/hunyuan_video_transformer_3d - local: api/models/hunyuan_video_transformer_3d
title: HunyuanVideoTransformer3DModel title: HunyuanVideoTransformer3DModel
- local: api/models/latte_transformer3d - local: api/models/latte_transformer3d
@@ -377,24 +325,16 @@
title: PixArtTransformer2DModel title: PixArtTransformer2DModel
- local: api/models/prior_transformer - local: api/models/prior_transformer
title: PriorTransformer title: PriorTransformer
- local: api/models/qwenimage_transformer2d
title: QwenImageTransformer2DModel
- local: api/models/sana_transformer2d - local: api/models/sana_transformer2d
title: SanaTransformer2DModel title: SanaTransformer2DModel
- local: api/models/sana_video_transformer3d
title: SanaVideoTransformer3DModel
- local: api/models/sd3_transformer2d - local: api/models/sd3_transformer2d
title: SD3Transformer2DModel title: SD3Transformer2DModel
- local: api/models/skyreels_v2_transformer_3d
title: SkyReelsV2Transformer3DModel
- local: api/models/stable_audio_transformer - local: api/models/stable_audio_transformer
title: StableAudioDiTModel title: StableAudioDiTModel
- local: api/models/transformer2d - local: api/models/transformer2d
title: Transformer2DModel title: Transformer2DModel
- local: api/models/transformer_temporal - local: api/models/transformer_temporal
title: TransformerTemporalModel title: TransformerTemporalModel
- local: api/models/wan_animate_transformer_3d
title: WanAnimateTransformer3DModel
- local: api/models/wan_transformer_3d - local: api/models/wan_transformer_3d
title: WanTransformer3DModel title: WanTransformer3DModel
title: Transformers title: Transformers
@@ -427,10 +367,6 @@
title: AutoencoderKLCogVideoX title: AutoencoderKLCogVideoX
- local: api/models/autoencoderkl_cosmos - local: api/models/autoencoderkl_cosmos
title: AutoencoderKLCosmos title: AutoencoderKLCosmos
- local: api/models/autoencoder_kl_hunyuanimage
title: AutoencoderKLHunyuanImage
- local: api/models/autoencoder_kl_hunyuanimage_refiner
title: AutoencoderKLHunyuanImageRefiner
- local: api/models/autoencoder_kl_hunyuan_video - local: api/models/autoencoder_kl_hunyuan_video
title: AutoencoderKLHunyuanVideo title: AutoencoderKLHunyuanVideo
- local: api/models/autoencoderkl_ltx_video - local: api/models/autoencoderkl_ltx_video
@@ -439,8 +375,6 @@
title: AutoencoderKLMagvit title: AutoencoderKLMagvit
- local: api/models/autoencoderkl_mochi - local: api/models/autoencoderkl_mochi
title: AutoencoderKLMochi title: AutoencoderKLMochi
- local: api/models/autoencoderkl_qwenimage
title: AutoencoderKLQwenImage
- local: api/models/autoencoder_kl_wan - local: api/models/autoencoder_kl_wan
title: AutoencoderKLWan title: AutoencoderKLWan
- local: api/models/consistency_decoder_vae - local: api/models/consistency_decoder_vae
@@ -453,230 +387,205 @@
title: VQModel title: VQModel
title: VAEs title: VAEs
title: Models title: Models
- sections: - isExpanded: false
sections:
- local: api/pipelines/overview - local: api/pipelines/overview
title: Overview title: Overview
- local: api/pipelines/allegro
title: Allegro
- local: api/pipelines/amused
title: aMUSEd
- local: api/pipelines/animatediff
title: AnimateDiff
- local: api/pipelines/attend_and_excite
title: Attend-and-Excite
- local: api/pipelines/audioldm
title: AudioLDM
- local: api/pipelines/audioldm2
title: AudioLDM 2
- local: api/pipelines/aura_flow
title: AuraFlow
- local: api/pipelines/auto_pipeline - local: api/pipelines/auto_pipeline
title: AutoPipeline title: AutoPipeline
- local: api/pipelines/blip_diffusion
title: BLIP-Diffusion
- local: api/pipelines/chroma
title: Chroma
- local: api/pipelines/cogvideox
title: CogVideoX
- local: api/pipelines/cogview3
title: CogView3
- local: api/pipelines/cogview4
title: CogView4
- local: api/pipelines/consisid
title: ConsisID
- local: api/pipelines/consistency_models
title: Consistency Models
- local: api/pipelines/controlnet
title: ControlNet
- local: api/pipelines/controlnet_flux
title: ControlNet with Flux.1
- local: api/pipelines/controlnet_hunyuandit
title: ControlNet with Hunyuan-DiT
- local: api/pipelines/controlnet_sd3
title: ControlNet with Stable Diffusion 3
- local: api/pipelines/controlnet_sdxl
title: ControlNet with Stable Diffusion XL
- local: api/pipelines/controlnet_sana
title: ControlNet-Sana
- local: api/pipelines/controlnetxs
title: ControlNet-XS
- local: api/pipelines/controlnetxs_sdxl
title: ControlNet-XS with Stable Diffusion XL
- local: api/pipelines/controlnet_union
title: ControlNetUnion
- local: api/pipelines/cosmos
title: Cosmos
- local: api/pipelines/dance_diffusion
title: Dance Diffusion
- local: api/pipelines/ddim
title: DDIM
- local: api/pipelines/ddpm
title: DDPM
- local: api/pipelines/deepfloyd_if
title: DeepFloyd IF
- local: api/pipelines/diffedit
title: DiffEdit
- local: api/pipelines/dit
title: DiT
- local: api/pipelines/easyanimate
title: EasyAnimate
- local: api/pipelines/flux
title: Flux
- local: api/pipelines/control_flux_inpaint
title: FluxControlInpaint
- local: api/pipelines/framepack
title: Framepack
- local: api/pipelines/hidream
title: HiDream-I1
- local: api/pipelines/hunyuandit
title: Hunyuan-DiT
- local: api/pipelines/hunyuan_video
title: HunyuanVideo
- local: api/pipelines/i2vgenxl
title: I2VGen-XL
- local: api/pipelines/pix2pix
title: InstructPix2Pix
- local: api/pipelines/kandinsky
title: Kandinsky 2.1
- local: api/pipelines/kandinsky_v22
title: Kandinsky 2.2
- local: api/pipelines/kandinsky3
title: Kandinsky 3
- local: api/pipelines/kolors
title: Kolors
- local: api/pipelines/latent_consistency_models
title: Latent Consistency Models
- local: api/pipelines/latent_diffusion
title: Latent Diffusion
- local: api/pipelines/latte
title: Latte
- local: api/pipelines/ledits_pp
title: LEDITS++
- local: api/pipelines/ltx_video
title: LTXVideo
- local: api/pipelines/lumina2
title: Lumina 2.0
- local: api/pipelines/lumina
title: Lumina-T2X
- local: api/pipelines/marigold
title: Marigold
- local: api/pipelines/mochi
title: Mochi
- local: api/pipelines/panorama
title: MultiDiffusion
- local: api/pipelines/musicldm
title: MusicLDM
- local: api/pipelines/omnigen
title: OmniGen
- local: api/pipelines/pag
title: PAG
- local: api/pipelines/paint_by_example
title: Paint by Example
- local: api/pipelines/pia
title: Personalized Image Animator (PIA)
- local: api/pipelines/pixart
title: PixArt-α
- local: api/pipelines/pixart_sigma
title: PixArt-Σ
- local: api/pipelines/sana
title: Sana
- local: api/pipelines/sana_sprint
title: Sana Sprint
- local: api/pipelines/self_attention_guidance
title: Self-Attention Guidance
- local: api/pipelines/semantic_stable_diffusion
title: Semantic Guidance
- local: api/pipelines/shap_e
title: Shap-E
- local: api/pipelines/stable_audio
title: Stable Audio
- local: api/pipelines/stable_cascade
title: Stable Cascade
- sections: - sections:
- local: api/pipelines/audioldm - local: api/pipelines/stable_diffusion/overview
title: AudioLDM title: Overview
- local: api/pipelines/audioldm2 - local: api/pipelines/stable_diffusion/depth2img
title: AudioLDM 2 title: Depth-to-image
- local: api/pipelines/dance_diffusion - local: api/pipelines/stable_diffusion/gligen
title: Dance Diffusion title: GLIGEN (Grounded Language-to-Image Generation)
- local: api/pipelines/musicldm - local: api/pipelines/stable_diffusion/image_variation
title: MusicLDM title: Image variation
- local: api/pipelines/stable_audio - local: api/pipelines/stable_diffusion/img2img
title: Stable Audio title: Image-to-image
title: Audio
- sections:
- local: api/pipelines/amused
title: aMUSEd
- local: api/pipelines/animatediff
title: AnimateDiff
- local: api/pipelines/attend_and_excite
title: Attend-and-Excite
- local: api/pipelines/aura_flow
title: AuraFlow
- local: api/pipelines/blip_diffusion
title: BLIP-Diffusion
- local: api/pipelines/bria_3_2
title: Bria 3.2
- local: api/pipelines/bria_fibo
title: Bria Fibo
- local: api/pipelines/chroma
title: Chroma
- local: api/pipelines/cogview3
title: CogView3
- local: api/pipelines/cogview4
title: CogView4
- local: api/pipelines/consistency_models
title: Consistency Models
- local: api/pipelines/controlnet
title: ControlNet
- local: api/pipelines/controlnet_flux
title: ControlNet with Flux.1
- local: api/pipelines/controlnet_hunyuandit
title: ControlNet with Hunyuan-DiT
- local: api/pipelines/controlnet_sd3
title: ControlNet with Stable Diffusion 3
- local: api/pipelines/controlnet_sdxl
title: ControlNet with Stable Diffusion XL
- local: api/pipelines/controlnet_sana
title: ControlNet-Sana
- local: api/pipelines/controlnetxs
title: ControlNet-XS
- local: api/pipelines/controlnetxs_sdxl
title: ControlNet-XS with Stable Diffusion XL
- local: api/pipelines/controlnet_union
title: ControlNetUnion
- local: api/pipelines/cosmos
title: Cosmos
- local: api/pipelines/ddim
title: DDIM
- local: api/pipelines/ddpm
title: DDPM
- local: api/pipelines/deepfloyd_if
title: DeepFloyd IF
- local: api/pipelines/diffedit
title: DiffEdit
- local: api/pipelines/dit
title: DiT
- local: api/pipelines/easyanimate
title: EasyAnimate
- local: api/pipelines/flux
title: Flux
- local: api/pipelines/flux2
title: Flux2
- local: api/pipelines/control_flux_inpaint
title: FluxControlInpaint
- local: api/pipelines/hidream
title: HiDream-I1
- local: api/pipelines/hunyuandit
title: Hunyuan-DiT
- local: api/pipelines/hunyuanimage21
title: HunyuanImage2.1
- local: api/pipelines/pix2pix
title: InstructPix2Pix
- local: api/pipelines/kandinsky
title: Kandinsky 2.1
- local: api/pipelines/kandinsky_v22
title: Kandinsky 2.2
- local: api/pipelines/kandinsky3
title: Kandinsky 3
- local: api/pipelines/kolors
title: Kolors
- local: api/pipelines/latent_consistency_models
title: Latent Consistency Models
- local: api/pipelines/latent_diffusion
title: Latent Diffusion
- local: api/pipelines/ledits_pp
title: LEDITS++
- local: api/pipelines/lumina2
title: Lumina 2.0
- local: api/pipelines/lumina
title: Lumina-T2X
- local: api/pipelines/marigold
title: Marigold
- local: api/pipelines/panorama
title: MultiDiffusion
- local: api/pipelines/omnigen
title: OmniGen
- local: api/pipelines/pag
title: PAG
- local: api/pipelines/paint_by_example
title: Paint by Example
- local: api/pipelines/pixart
title: PixArt-α
- local: api/pipelines/pixart_sigma
title: PixArt-Σ
- local: api/pipelines/prx
title: PRX
- local: api/pipelines/qwenimage
title: QwenImage
- local: api/pipelines/sana
title: Sana
- local: api/pipelines/sana_sprint
title: Sana Sprint
- local: api/pipelines/sana_video
title: Sana Video
- local: api/pipelines/self_attention_guidance
title: Self-Attention Guidance
- local: api/pipelines/semantic_stable_diffusion
title: Semantic Guidance
- local: api/pipelines/shap_e
title: Shap-E
- local: api/pipelines/stable_cascade
title: Stable Cascade
- sections:
- local: api/pipelines/stable_diffusion/overview
title: Overview
- local: api/pipelines/stable_diffusion/depth2img
title: Depth-to-image
- local: api/pipelines/stable_diffusion/gligen
title: GLIGEN (Grounded Language-to-Image Generation)
- local: api/pipelines/stable_diffusion/image_variation
title: Image variation
- local: api/pipelines/stable_diffusion/img2img
title: Image-to-image
- local: api/pipelines/stable_diffusion/inpaint
title: Inpainting
- local: api/pipelines/stable_diffusion/k_diffusion
title: K-Diffusion
- local: api/pipelines/stable_diffusion/latent_upscale
title: Latent upscaler
- local: api/pipelines/stable_diffusion/ldm3d_diffusion
title: LDM3D Text-to-(RGB, Depth), Text-to-(RGB-pano, Depth-pano), LDM3D
Upscaler
- local: api/pipelines/stable_diffusion/stable_diffusion_safe
title: Safe Stable Diffusion
- local: api/pipelines/stable_diffusion/sdxl_turbo
title: SDXL Turbo
- local: api/pipelines/stable_diffusion/stable_diffusion_2
title: Stable Diffusion 2
- local: api/pipelines/stable_diffusion/stable_diffusion_3
title: Stable Diffusion 3
- local: api/pipelines/stable_diffusion/stable_diffusion_xl
title: Stable Diffusion XL
- local: api/pipelines/stable_diffusion/upscale
title: Super-resolution
- local: api/pipelines/stable_diffusion/adapter
title: T2I-Adapter
- local: api/pipelines/stable_diffusion/text2img
title: Text-to-image
title: Stable Diffusion
- local: api/pipelines/stable_unclip
title: Stable unCLIP
- local: api/pipelines/unclip
title: unCLIP
- local: api/pipelines/unidiffuser
title: UniDiffuser
- local: api/pipelines/value_guided_sampling
title: Value-guided sampling
- local: api/pipelines/visualcloze
title: VisualCloze
- local: api/pipelines/wuerstchen
title: Wuerstchen
title: Image
- sections:
- local: api/pipelines/allegro
title: Allegro
- local: api/pipelines/chronoedit
title: ChronoEdit
- local: api/pipelines/cogvideox
title: CogVideoX
- local: api/pipelines/consisid
title: ConsisID
- local: api/pipelines/framepack
title: Framepack
- local: api/pipelines/hunyuan_video
title: HunyuanVideo
- local: api/pipelines/i2vgenxl
title: I2VGen-XL
- local: api/pipelines/kandinsky5_video
title: Kandinsky 5.0 Video
- local: api/pipelines/latte
title: Latte
- local: api/pipelines/ltx_video
title: LTXVideo
- local: api/pipelines/mochi
title: Mochi
- local: api/pipelines/pia
title: Personalized Image Animator (PIA)
- local: api/pipelines/skyreels_v2
title: SkyReels-V2
- local: api/pipelines/stable_diffusion/svd - local: api/pipelines/stable_diffusion/svd
title: Stable Video Diffusion title: Image-to-video
- local: api/pipelines/text_to_video - local: api/pipelines/stable_diffusion/inpaint
title: Text-to-video title: Inpainting
- local: api/pipelines/text_to_video_zero - local: api/pipelines/stable_diffusion/k_diffusion
title: Text2Video-Zero title: K-Diffusion
- local: api/pipelines/wan - local: api/pipelines/stable_diffusion/latent_upscale
title: Wan title: Latent upscaler
title: Video - local: api/pipelines/stable_diffusion/ldm3d_diffusion
title: LDM3D Text-to-(RGB, Depth), Text-to-(RGB-pano, Depth-pano), LDM3D Upscaler
- local: api/pipelines/stable_diffusion/stable_diffusion_safe
title: Safe Stable Diffusion
- local: api/pipelines/stable_diffusion/sdxl_turbo
title: SDXL Turbo
- local: api/pipelines/stable_diffusion/stable_diffusion_2
title: Stable Diffusion 2
- local: api/pipelines/stable_diffusion/stable_diffusion_3
title: Stable Diffusion 3
- local: api/pipelines/stable_diffusion/stable_diffusion_xl
title: Stable Diffusion XL
- local: api/pipelines/stable_diffusion/upscale
title: Super-resolution
- local: api/pipelines/stable_diffusion/adapter
title: T2I-Adapter
- local: api/pipelines/stable_diffusion/text2img
title: Text-to-image
title: Stable Diffusion
- local: api/pipelines/stable_unclip
title: Stable unCLIP
- local: api/pipelines/text_to_video
title: Text-to-video
- local: api/pipelines/text_to_video_zero
title: Text2Video-Zero
- local: api/pipelines/unclip
title: unCLIP
- local: api/pipelines/unidiffuser
title: UniDiffuser
- local: api/pipelines/value_guided_sampling
title: Value-guided sampling
- local: api/pipelines/visualcloze
title: VisualCloze
- local: api/pipelines/wan
title: Wan
- local: api/pipelines/wuerstchen
title: Wuerstchen
title: Pipelines title: Pipelines
- sections: - isExpanded: false
sections:
- local: api/schedulers/overview - local: api/schedulers/overview
title: Overview title: Overview
- local: api/schedulers/cm_stochastic_iterative - local: api/schedulers/cm_stochastic_iterative
@@ -746,7 +655,8 @@
- local: api/schedulers/vq_diffusion - local: api/schedulers/vq_diffusion
title: VQDiffusionScheduler title: VQDiffusionScheduler
title: Schedulers title: Schedulers
- sections: - isExpanded: false
sections:
- local: api/internal_classes_overview - local: api/internal_classes_overview
title: Overview title: Overview
- local: api/attnprocessor - local: api/attnprocessor

View File

@@ -28,9 +28,3 @@ Cache methods speedup diffusion transformers by storing and reusing intermediate
[[autodoc]] FasterCacheConfig [[autodoc]] FasterCacheConfig
[[autodoc]] apply_faster_cache [[autodoc]] apply_faster_cache
## FirstBlockCacheConfig
[[autodoc]] FirstBlockCacheConfig
[[autodoc]] apply_first_block_cache

View File

@@ -14,8 +14,11 @@ specific language governing permissions and limitations under the License.
Schedulers from [`~schedulers.scheduling_utils.SchedulerMixin`] and models from [`ModelMixin`] inherit from [`ConfigMixin`] which stores all the parameters that are passed to their respective `__init__` methods in a JSON-configuration file. Schedulers from [`~schedulers.scheduling_utils.SchedulerMixin`] and models from [`ModelMixin`] inherit from [`ConfigMixin`] which stores all the parameters that are passed to their respective `__init__` methods in a JSON-configuration file.
> [!TIP] <Tip>
> To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf auth login`.
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `huggingface-cli login`.
</Tip>
## ConfigMixin ## ConfigMixin

View File

@@ -20,12 +20,6 @@ All pipelines with [`VaeImageProcessor`] accept PIL Image, PyTorch tensor, or Nu
[[autodoc]] image_processor.VaeImageProcessor [[autodoc]] image_processor.VaeImageProcessor
## InpaintProcessor
The [`InpaintProcessor`] accepts `mask` and `image` inputs and process them together. Optionally, it can accept padding_mask_crop and apply mask overlay.
[[autodoc]] image_processor.InpaintProcessor
## VaeImageProcessorLDM3D ## VaeImageProcessorLDM3D
The [`VaeImageProcessorLDM3D`] accepts RGB and depth inputs and returns RGB and depth outputs. The [`VaeImageProcessorLDM3D`] accepts RGB and depth inputs and returns RGB and depth outputs.

View File

@@ -14,8 +14,11 @@ specific language governing permissions and limitations under the License.
[IP-Adapter](https://hf.co/papers/2308.06721) is a lightweight adapter that enables prompting a diffusion model with an image. This method decouples the cross-attention layers of the image and text features. The image features are generated from an image encoder. [IP-Adapter](https://hf.co/papers/2308.06721) is a lightweight adapter that enables prompting a diffusion model with an image. This method decouples the cross-attention layers of the image and text features. The image features are generated from an image encoder.
> [!TIP] <Tip>
> Learn how to load and use an IP-Adapter checkpoint and image in the [IP-Adapter](../../using-diffusers/ip_adapter) guide,.
Learn how to load an IP-Adapter checkpoint and image in the IP-Adapter [loading](../../using-diffusers/loading_adapters#ip-adapter) guide, and you can see how to use it in the [usage](../../using-diffusers/ip_adapter) guide.
</Tip>
## IPAdapterMixin ## IPAdapterMixin

View File

@@ -26,20 +26,16 @@ LoRA is a fast and lightweight training method that inserts and trains a signifi
- [`HunyuanVideoLoraLoaderMixin`] provides similar functions for [HunyuanVideo](https://huggingface.co/docs/diffusers/main/en/api/pipelines/hunyuan_video). - [`HunyuanVideoLoraLoaderMixin`] provides similar functions for [HunyuanVideo](https://huggingface.co/docs/diffusers/main/en/api/pipelines/hunyuan_video).
- [`Lumina2LoraLoaderMixin`] provides similar functions for [Lumina2](https://huggingface.co/docs/diffusers/main/en/api/pipelines/lumina2). - [`Lumina2LoraLoaderMixin`] provides similar functions for [Lumina2](https://huggingface.co/docs/diffusers/main/en/api/pipelines/lumina2).
- [`WanLoraLoaderMixin`] provides similar functions for [Wan](https://huggingface.co/docs/diffusers/main/en/api/pipelines/wan). - [`WanLoraLoaderMixin`] provides similar functions for [Wan](https://huggingface.co/docs/diffusers/main/en/api/pipelines/wan).
- [`SkyReelsV2LoraLoaderMixin`] provides similar functions for [SkyReels-V2](https://huggingface.co/docs/diffusers/main/en/api/pipelines/skyreels_v2).
- [`CogView4LoraLoaderMixin`] provides similar functions for [CogView4](https://huggingface.co/docs/diffusers/main/en/api/pipelines/cogview4). - [`CogView4LoraLoaderMixin`] provides similar functions for [CogView4](https://huggingface.co/docs/diffusers/main/en/api/pipelines/cogview4).
- [`AmusedLoraLoaderMixin`] is for the [`AmusedPipeline`]. - [`AmusedLoraLoaderMixin`] is for the [`AmusedPipeline`].
- [`HiDreamImageLoraLoaderMixin`] provides similar functions for [HiDream Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/hidream) - [`HiDreamImageLoraLoaderMixin`] provides similar functions for [HiDream Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/hidream)
- [`QwenImageLoraLoaderMixin`] provides similar functions for [Qwen Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/qwen).
- [`Flux2LoraLoaderMixin`] provides similar functions for [Flux2](https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux2).
- [`LoraBaseMixin`] provides a base class with several utility methods to fuse, unfuse, unload, LoRAs and more. - [`LoraBaseMixin`] provides a base class with several utility methods to fuse, unfuse, unload, LoRAs and more.
> [!TIP] <Tip>
> To learn more about how to load LoRA weights, see the [LoRA](../../tutorials/using_peft_for_inference) loading guide.
## LoraBaseMixin To learn more about how to load LoRA weights, see the [LoRA](../../using-diffusers/loading_adapters#lora) loading guide.
[[autodoc]] loaders.lora_base.LoraBaseMixin </Tip>
## StableDiffusionLoraLoaderMixin ## StableDiffusionLoraLoaderMixin
@@ -57,10 +53,6 @@ LoRA is a fast and lightweight training method that inserts and trains a signifi
[[autodoc]] loaders.lora_pipeline.FluxLoraLoaderMixin [[autodoc]] loaders.lora_pipeline.FluxLoraLoaderMixin
## Flux2LoraLoaderMixin
[[autodoc]] loaders.lora_pipeline.Flux2LoraLoaderMixin
## CogVideoXLoraLoaderMixin ## CogVideoXLoraLoaderMixin
[[autodoc]] loaders.lora_pipeline.CogVideoXLoraLoaderMixin [[autodoc]] loaders.lora_pipeline.CogVideoXLoraLoaderMixin
@@ -96,10 +88,6 @@ LoRA is a fast and lightweight training method that inserts and trains a signifi
[[autodoc]] loaders.lora_pipeline.WanLoraLoaderMixin [[autodoc]] loaders.lora_pipeline.WanLoraLoaderMixin
## SkyReelsV2LoraLoaderMixin
[[autodoc]] loaders.lora_pipeline.SkyReelsV2LoraLoaderMixin
## AmusedLoraLoaderMixin ## AmusedLoraLoaderMixin
[[autodoc]] loaders.lora_pipeline.AmusedLoraLoaderMixin [[autodoc]] loaders.lora_pipeline.AmusedLoraLoaderMixin
@@ -108,13 +96,10 @@ LoRA is a fast and lightweight training method that inserts and trains a signifi
[[autodoc]] loaders.lora_pipeline.HiDreamImageLoraLoaderMixin [[autodoc]] loaders.lora_pipeline.HiDreamImageLoraLoaderMixin
## QwenImageLoraLoaderMixin
[[autodoc]] loaders.lora_pipeline.QwenImageLoraLoaderMixin
## KandinskyLoraLoaderMixin
[[autodoc]] loaders.lora_pipeline.KandinskyLoraLoaderMixin
## LoraBaseMixin ## LoraBaseMixin
[[autodoc]] loaders.lora_base.LoraBaseMixin [[autodoc]] loaders.lora_base.LoraBaseMixin
## WanLoraLoaderMixin
[[autodoc]] loaders.lora_pipeline.WanLoraLoaderMixin

View File

@@ -12,10 +12,13 @@ specific language governing permissions and limitations under the License.
# PEFT # PEFT
Diffusers supports loading adapters such as [LoRA](../../tutorials/using_peft_for_inference) with the [PEFT](https://huggingface.co/docs/peft/index) library with the [`~loaders.peft.PeftAdapterMixin`] class. This allows modeling classes in Diffusers like [`UNet2DConditionModel`], [`SD3Transformer2DModel`] to operate with an adapter. Diffusers supports loading adapters such as [LoRA](../../using-diffusers/loading_adapters) with the [PEFT](https://huggingface.co/docs/peft/index) library with the [`~loaders.peft.PeftAdapterMixin`] class. This allows modeling classes in Diffusers like [`UNet2DConditionModel`], [`SD3Transformer2DModel`] to operate with an adapter.
> [!TIP] <Tip>
> Refer to the [Inference with PEFT](../../tutorials/using_peft_for_inference.md) tutorial for an overview of how to use PEFT in Diffusers for inference.
Refer to the [Inference with PEFT](../../tutorials/using_peft_for_inference.md) tutorial for an overview of how to use PEFT in Diffusers for inference.
</Tip>
## PeftAdapterMixin ## PeftAdapterMixin

View File

@@ -16,8 +16,11 @@ Textual Inversion is a training method for personalizing models by learning new
[`TextualInversionLoaderMixin`] provides a function for loading Textual Inversion embeddings from Diffusers and Automatic1111 into the text encoder and loading a special token to activate the embeddings. [`TextualInversionLoaderMixin`] provides a function for loading Textual Inversion embeddings from Diffusers and Automatic1111 into the text encoder and loading a special token to activate the embeddings.
> [!TIP] <Tip>
> To learn more about how to load Textual Inversion embeddings, see the [Textual Inversion](../../using-diffusers/textual_inversion_inference) loading guide.
To learn more about how to load Textual Inversion embeddings, see the [Textual Inversion](../../using-diffusers/loading_adapters#textual-inversion) loading guide.
</Tip>
## TextualInversionLoaderMixin ## TextualInversionLoaderMixin

View File

@@ -16,8 +16,11 @@ This class is useful when *only* loading weights into a [`SD3Transformer2DModel`
The [`SD3Transformer2DLoadersMixin`] class currently only loads IP-Adapter weights, but will be used in the future to save weights and load LoRAs. The [`SD3Transformer2DLoadersMixin`] class currently only loads IP-Adapter weights, but will be used in the future to save weights and load LoRAs.
> [!TIP] <Tip>
> To learn more about how to load LoRA weights, see the [LoRA](../../tutorials/using_peft_for_inference) loading guide.
To learn more about how to load LoRA weights, see the [LoRA](../../using-diffusers/loading_adapters#lora) loading guide.
</Tip>
## SD3Transformer2DLoadersMixin ## SD3Transformer2DLoadersMixin

View File

@@ -16,8 +16,11 @@ Some training methods - like LoRA and Custom Diffusion - typically target the UN
The [`UNet2DConditionLoadersMixin`] class provides functions for loading and saving weights, fusing and unfusing LoRAs, disabling and enabling LoRAs, and setting and deleting adapters. The [`UNet2DConditionLoadersMixin`] class provides functions for loading and saving weights, fusing and unfusing LoRAs, disabling and enabling LoRAs, and setting and deleting adapters.
> [!TIP] <Tip>
> To learn more about how to load LoRA weights, see the [LoRA](../../tutorials/using_peft_for_inference) guide.
To learn more about how to load LoRA weights, see the [LoRA](../../using-diffusers/loading_adapters#lora) loading guide.
</Tip>
## UNet2DConditionLoadersMixin ## UNet2DConditionLoadersMixin

View File

@@ -39,7 +39,7 @@ mask_url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images
original_image = load_image(img_url).resize((512, 512)) original_image = load_image(img_url).resize((512, 512))
mask_image = load_image(mask_url).resize((512, 512)) mask_image = load_image(mask_url).resize((512, 512))
pipe = StableDiffusionInpaintPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-inpainting") pipe = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting")
pipe.vae = AsymmetricAutoencoderKL.from_pretrained("cross-attention/asymmetric-autoencoder-kl-x-1-5") pipe.vae = AsymmetricAutoencoderKL.from_pretrained("cross-attention/asymmetric-autoencoder-kl-x-1-5")
pipe.to("cuda") pipe.to("cuda")

View File

@@ -12,7 +12,15 @@ specific language governing permissions and limitations under the License.
# AutoModel # AutoModel
[`AutoModel`] automatically retrieves the correct model class from the checkpoint `config.json` file. The `AutoModel` is designed to make it easy to load a checkpoint without needing to know the specific model class. `AutoModel` automatically retrieves the correct model class from the checkpoint `config.json` file.
```python
from diffusers import AutoModel, AutoPipelineForText2Image
unet = AutoModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="unet")
pipe = AutoPipelineForText2Image.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", unet=unet)
```
## AutoModel ## AutoModel

View File

@@ -1,32 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# AutoencoderKLHunyuanImage
The 2D variational autoencoder (VAE) model with KL loss used in [HunyuanImage2.1].
The model can be loaded with the following code snippet.
```python
from diffusers import AutoencoderKLHunyuanImage
vae = AutoencoderKLHunyuanImage.from_pretrained("hunyuanvideo-community/HunyuanImage-2.1-Diffusers", subfolder="vae", torch_dtype=torch.bfloat16)
```
## AutoencoderKLHunyuanImage
[[autodoc]] AutoencoderKLHunyuanImage
- decode
- all
## DecoderOutput
[[autodoc]] models.autoencoders.vae.DecoderOutput

View File

@@ -1,32 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# AutoencoderKLHunyuanImageRefiner
The 3D variational autoencoder (VAE) model with KL loss used in [HunyuanImage2.1](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1) for its refiner pipeline.
The model can be loaded with the following code snippet.
```python
from diffusers import AutoencoderKLHunyuanImageRefiner
vae = AutoencoderKLHunyuanImageRefiner.from_pretrained("hunyuanvideo-community/HunyuanImage-2.1-Refiner-Diffusers", subfolder="vae", torch_dtype=torch.bfloat16)
```
## AutoencoderKLHunyuanImageRefiner
[[autodoc]] AutoencoderKLHunyuanImageRefiner
- decode
- all
## DecoderOutput
[[autodoc]] models.autoencoders.vae.DecoderOutput

View File

@@ -44,3 +44,15 @@ model = AutoencoderKL.from_single_file(url)
## DecoderOutput ## DecoderOutput
[[autodoc]] models.autoencoders.vae.DecoderOutput [[autodoc]] models.autoencoders.vae.DecoderOutput
## FlaxAutoencoderKL
[[autodoc]] FlaxAutoencoderKL
## FlaxAutoencoderKLOutput
[[autodoc]] models.vae_flax.FlaxAutoencoderKLOutput
## FlaxDecoderOutput
[[autodoc]] models.vae_flax.FlaxDecoderOutput

View File

@@ -1,35 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# AutoencoderKLQwenImage
The model can be loaded with the following code snippet.
```python
from diffusers import AutoencoderKLQwenImage
vae = AutoencoderKLQwenImage.from_pretrained("Qwen/QwenImage-20B", subfolder="vae")
```
## AutoencoderKLQwenImage
[[autodoc]] AutoencoderKLQwenImage
- decode
- encode
- all
## AutoencoderKLOutput
[[autodoc]] models.autoencoders.autoencoder_kl.AutoencoderKLOutput
## DecoderOutput
[[autodoc]] models.autoencoders.vae.DecoderOutput

View File

@@ -1,19 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# BriaTransformer2DModel
A modified flux Transformer model from [Bria](https://huggingface.co/briaai/BRIA-3.2)
## BriaTransformer2DModel
[[autodoc]] BriaTransformer2DModel

View File

@@ -12,7 +12,7 @@ specific language governing permissions and limitations under the License.
# ChromaTransformer2DModel # ChromaTransformer2DModel
A modified flux Transformer model from [Chroma](https://huggingface.co/lodestones/Chroma1-HD) A modified flux Transformer model from [Chroma](https://huggingface.co/lodestones/Chroma)
## ChromaTransformer2DModel ## ChromaTransformer2DModel

View File

@@ -1,32 +0,0 @@
<!-- Copyright 2025 The ChronoEdit Team and HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# ChronoEditTransformer3DModel
A Diffusion Transformer model for 3D video-like data from [ChronoEdit: Towards Temporal Reasoning for Image Editing and World Simulation](https://huggingface.co/papers/2510.04290) from NVIDIA and University of Toronto, by Jay Zhangjie Wu, Xuanchi Ren, Tianchang Shen, Tianshi Cao, Kai He, Yifan Lu, Ruiyuan Gao, Enze Xie, Shiyi Lan, Jose M. Alvarez, Jun Gao, Sanja Fidler, Zian Wang, Huan Ling.
> **TL;DR:** ChronoEdit reframes image editing as a video generation task, using input and edited images as start/end frames to leverage pretrained video models with temporal consistency. A temporal reasoning stage introduces reasoning tokens to ensure physically plausible edits and visualize the editing trajectory.
The model can be loaded with the following code snippet.
```python
from diffusers import ChronoEditTransformer3DModel
transformer = ChronoEditTransformer3DModel.from_pretrained("nvidia/ChronoEdit-14B-Diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
```
## ChronoEditTransformer3DModel
[[autodoc]] ChronoEditTransformer3DModel
## Transformer2DModelOutput
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput

View File

@@ -16,8 +16,11 @@ Consistency decoder can be used to decode the latents from the denoising UNet in
The original codebase can be found at [openai/consistencydecoder](https://github.com/openai/consistencydecoder). The original codebase can be found at [openai/consistencydecoder](https://github.com/openai/consistencydecoder).
> [!WARNING] <Tip warning={true}>
> Inference is only supported for 2 iterations as of now.
Inference is only supported for 2 iterations as of now.
</Tip>
The pipeline could not have been contributed without the help of [madebyollin](https://github.com/madebyollin) and [mrsteyk](https://github.com/mrsteyk) from [this issue](https://github.com/openai/consistencydecoder/issues/1). The pipeline could not have been contributed without the help of [madebyollin](https://github.com/madebyollin) and [mrsteyk](https://github.com/mrsteyk) from [this issue](https://github.com/openai/consistencydecoder/issues/1).

View File

@@ -40,3 +40,11 @@ pipe = StableDiffusionControlNetPipeline.from_single_file(url, controlnet=contro
## ControlNetOutput ## ControlNetOutput
[[autodoc]] models.controlnets.controlnet.ControlNetOutput [[autodoc]] models.controlnets.controlnet.ControlNetOutput
## FlaxControlNetModel
[[autodoc]] FlaxControlNetModel
## FlaxControlNetOutput
[[autodoc]] models.controlnets.controlnet_flax.FlaxControlNetOutput

View File

@@ -1,19 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# Flux2Transformer2DModel
A Transformer model for image-like data from [Flux2](https://hf.co/black-forest-labs/FLUX.2-dev).
## Flux2Transformer2DModel
[[autodoc]] Flux2Transformer2DModel

View File

@@ -1,30 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# HunyuanImageTransformer2DModel
A Diffusion Transformer model for [HunyuanImage2.1](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1).
The model can be loaded with the following code snippet.
```python
from diffusers import HunyuanImageTransformer2DModel
transformer = HunyuanImageTransformer2DModel.from_pretrained("hunyuanvideo-community/HunyuanImage-2.1-Diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
```
## HunyuanImageTransformer2DModel
[[autodoc]] HunyuanImageTransformer2DModel
## Transformer2DModelOutput
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput

View File

@@ -19,6 +19,10 @@ All models are built from the base [`ModelMixin`] class which is a [`torch.nn.Mo
## ModelMixin ## ModelMixin
[[autodoc]] ModelMixin [[autodoc]] ModelMixin
## FlaxModelMixin
[[autodoc]] FlaxModelMixin
## PushToHubMixin ## PushToHubMixin
[[autodoc]] utils.PushToHubMixin [[autodoc]] utils.PushToHubMixin

View File

@@ -1,28 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# QwenImageTransformer2DModel
The model can be loaded with the following code snippet.
```python
from diffusers import QwenImageTransformer2DModel
transformer = QwenImageTransformer2DModel.from_pretrained("Qwen/QwenImage-20B", subfolder="transformer", torch_dtype=torch.bfloat16)
```
## QwenImageTransformer2DModel
[[autodoc]] QwenImageTransformer2DModel
## Transformer2DModelOutput
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput

View File

@@ -1,36 +0,0 @@
<!-- Copyright 2025 The SANA-Video Authors and HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# SanaVideoTransformer3DModel
A Diffusion Transformer model for 3D data (video) from [SANA-Video: Efficient Video Generation with Block Linear Diffusion Transformer](https://huggingface.co/papers/2509.24695) from NVIDIA and MIT HAN Lab, by Junsong Chen, Yuyang Zhao, Jincheng Yu, Ruihang Chu, Junyu Chen, Shuai Yang, Xianbang Wang, Yicheng Pan, Daquan Zhou, Huan Ling, Haozhe Liu, Hongwei Yi, Hao Zhang, Muyang Li, Yukang Chen, Han Cai, Sanja Fidler, Ping Luo, Song Han, Enze Xie.
The abstract from the paper is:
*We introduce SANA-Video, a small diffusion model that can efficiently generate videos up to 720x1280 resolution and minute-length duration. SANA-Video synthesizes high-resolution, high-quality and long videos with strong text-video alignment at a remarkably fast speed, deployable on RTX 5090 GPU. Two core designs ensure our efficient, effective and long video generation: (1) Linear DiT: We leverage linear attention as the core operation, which is more efficient than vanilla attention given the large number of tokens processed in video generation. (2) Constant-Memory KV cache for Block Linear Attention: we design block-wise autoregressive approach for long video generation by employing a constant-memory state, derived from the cumulative properties of linear attention. This KV cache provides the Linear DiT with global context at a fixed memory cost, eliminating the need for a traditional KV cache and enabling efficient, minute-long video generation. In addition, we explore effective data filters and model training strategies, narrowing the training cost to 12 days on 64 H100 GPUs, which is only 1% of the cost of MovieGen. Given its low cost, SANA-Video achieves competitive performance compared to modern state-of-the-art small diffusion models (e.g., Wan 2.1-1.3B and SkyReel-V2-1.3B) while being 16x faster in measured latency. Moreover, SANA-Video can be deployed on RTX 5090 GPUs with NVFP4 precision, accelerating the inference speed of generating a 5-second 720p video from 71s to 29s (2.4x speedup). In summary, SANA-Video enables low-cost, high-quality video generation.*
The model can be loaded with the following code snippet.
```python
from diffusers import SanaVideoTransformer3DModel
import torch
transformer = SanaVideoTransformer3DModel.from_pretrained("Efficient-Large-Model/SANA-Video_2B_480p_diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
```
## SanaVideoTransformer3DModel
[[autodoc]] SanaVideoTransformer3DModel
## Transformer2DModelOutput
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput

View File

@@ -1,30 +0,0 @@
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# SkyReelsV2Transformer3DModel
A Diffusion Transformer model for 3D video-like data was introduced in [SkyReels-V2](https://github.com/SkyworkAI/SkyReels-V2) by the Skywork AI.
The model can be loaded with the following code snippet.
```python
from diffusers import SkyReelsV2Transformer3DModel
transformer = SkyReelsV2Transformer3DModel.from_pretrained("Skywork/SkyReels-V2-DF-1.3B-540P-Diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
```
## SkyReelsV2Transformer3DModel
[[autodoc]] SkyReelsV2Transformer3DModel
## Transformer2DModelOutput
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput

View File

@@ -22,8 +22,11 @@ When the input is **continuous**:
When the input is **discrete**: When the input is **discrete**:
> [!TIP] <Tip>
> It is assumed one of the input classes is the masked latent pixel. The predicted classes of the unnoised image don't contain a prediction for the masked pixel because the unnoised image cannot be masked.
It is assumed one of the input classes is the masked latent pixel. The predicted classes of the unnoised image don't contain a prediction for the masked pixel because the unnoised image cannot be masked.
</Tip>
1. Convert input (classes of latent pixels) to embeddings and apply positional embeddings. 1. Convert input (classes of latent pixels) to embeddings and apply positional embeddings.
2. Apply the Transformer blocks in the standard way. 2. Apply the Transformer blocks in the standard way.

View File

@@ -23,3 +23,9 @@ The abstract from the paper is:
## UNet2DConditionOutput ## UNet2DConditionOutput
[[autodoc]] models.unets.unet_2d_condition.UNet2DConditionOutput [[autodoc]] models.unets.unet_2d_condition.UNet2DConditionOutput
## FlaxUNet2DConditionModel
[[autodoc]] models.unets.unet_2d_condition_flax.FlaxUNet2DConditionModel
## FlaxUNet2DConditionOutput
[[autodoc]] models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput

View File

@@ -1,30 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# WanAnimateTransformer3DModel
A Diffusion Transformer model for 3D video-like data was introduced in [Wan Animate](https://github.com/Wan-Video/Wan2.2) by the Alibaba Wan Team.
The model can be loaded with the following code snippet.
```python
from diffusers import WanAnimateTransformer3DModel
transformer = WanAnimateTransformer3DModel.from_pretrained("Wan-AI/Wan2.2-Animate-14B-Diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
```
## WanAnimateTransformer3DModel
[[autodoc]] WanAnimateTransformer3DModel
## Transformer2DModelOutput
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput

View File

@@ -1,39 +0,0 @@
# Guiders
Guiders are components in Modular Diffusers that control how the diffusion process is guided during generation. They implement various guidance techniques to improve generation quality and control.
## BaseGuidance
[[autodoc]] diffusers.guiders.guider_utils.BaseGuidance
## ClassifierFreeGuidance
[[autodoc]] diffusers.guiders.classifier_free_guidance.ClassifierFreeGuidance
## ClassifierFreeZeroStarGuidance
[[autodoc]] diffusers.guiders.classifier_free_zero_star_guidance.ClassifierFreeZeroStarGuidance
## SkipLayerGuidance
[[autodoc]] diffusers.guiders.skip_layer_guidance.SkipLayerGuidance
## SmoothedEnergyGuidance
[[autodoc]] diffusers.guiders.smoothed_energy_guidance.SmoothedEnergyGuidance
## PerturbedAttentionGuidance
[[autodoc]] diffusers.guiders.perturbed_attention_guidance.PerturbedAttentionGuidance
## AdaptiveProjectedGuidance
[[autodoc]] diffusers.guiders.adaptive_projected_guidance.AdaptiveProjectedGuidance
## AutoGuidance
[[autodoc]] diffusers.guiders.auto_guidance.AutoGuidance
## TangentialClassifierFreeGuidance
[[autodoc]] diffusers.guiders.tangential_classifier_free_guidance.TangentialClassifierFreeGuidance

View File

@@ -1,5 +0,0 @@
# Pipeline
## ModularPipeline
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ModularPipeline

View File

@@ -1,17 +0,0 @@
# Pipeline blocks
## ModularPipelineBlocks
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ModularPipelineBlocks
## SequentialPipelineBlocks
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.SequentialPipelineBlocks
## LoopSequentialPipelineBlocks
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.LoopSequentialPipelineBlocks
## AutoPipelineBlocks
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.AutoPipelineBlocks

View File

@@ -1,17 +0,0 @@
# Components and configs
## ComponentSpec
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ComponentSpec
## ConfigSpec
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ConfigSpec
## ComponentsManager
[[autodoc]] diffusers.modular_pipelines.components_manager.ComponentsManager
## InsertableDict
[[autodoc]] diffusers.modular_pipelines.modular_pipeline_utils.InsertableDict

View File

@@ -1,9 +0,0 @@
# Pipeline states
## PipelineState
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.PipelineState
## BlockState
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.BlockState

View File

@@ -39,8 +39,11 @@ For instance, retrieving an image by indexing into it returns the tuple `(output
outputs[:1] outputs[:1]
``` ```
> [!TIP] <Tip>
> To check a specific pipeline or model output, refer to its corresponding API documentation.
To check a specific pipeline or model output, refer to its corresponding API documentation.
</Tip>
## BaseOutput ## BaseOutput
@@ -51,6 +54,10 @@ outputs[:1]
[[autodoc]] pipelines.ImagePipelineOutput [[autodoc]] pipelines.ImagePipelineOutput
## FlaxImagePipelineOutput
[[autodoc]] pipelines.pipeline_flax_utils.FlaxImagePipelineOutput
## AudioPipelineOutput ## AudioPipelineOutput
[[autodoc]] pipelines.AudioPipelineOutput [[autodoc]] pipelines.AudioPipelineOutput

View File

@@ -1,24 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# Parallelism
Parallelism strategies help speed up diffusion transformers by distributing computations across multiple devices, allowing for faster inference/training times. Refer to the [Distributed inferece](../training/distributed_inference) guide to learn more.
## ParallelConfig
[[autodoc]] ParallelConfig
## ContextParallelConfig
[[autodoc]] ContextParallelConfig
[[autodoc]] hooks.apply_context_parallel

View File

@@ -17,8 +17,11 @@ The abstract from the paper is:
*Significant advancements have been made in the field of video generation, with the open-source community contributing a wealth of research papers and tools for training high-quality models. However, despite these efforts, the available information and resources remain insufficient for achieving commercial-level performance. In this report, we open the black box and introduce Allegro, an advanced video generation model that excels in both quality and temporal consistency. We also highlight the current limitations in the field and present a comprehensive methodology for training high-performance, commercial-level video generation models, addressing key aspects such as data, model architecture, training pipeline, and evaluation. Our user study shows that Allegro surpasses existing open-source models and most commercial models, ranking just behind Hailuo and Kling. Code: https://github.com/rhymes-ai/Allegro , Model: https://huggingface.co/rhymes-ai/Allegro , Gallery: https://rhymes.ai/allegro_gallery .* *Significant advancements have been made in the field of video generation, with the open-source community contributing a wealth of research papers and tools for training high-quality models. However, despite these efforts, the available information and resources remain insufficient for achieving commercial-level performance. In this report, we open the black box and introduce Allegro, an advanced video generation model that excels in both quality and temporal consistency. We also highlight the current limitations in the field and present a comprehensive methodology for training high-performance, commercial-level video generation models, addressing key aspects such as data, model architecture, training pipeline, and evaluation. Our user study shows that Allegro surpasses existing open-source models and most commercial models, ranking just behind Hailuo and Kling. Code: https://github.com/rhymes-ai/Allegro , Model: https://huggingface.co/rhymes-ai/Allegro , Gallery: https://rhymes.ai/allegro_gallery .*
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## Quantization ## Quantization

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License. specific language governing permissions and limitations under the License.
--> -->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# aMUSEd # aMUSEd
aMUSEd was introduced in [aMUSEd: An Open MUSE Reproduction](https://huggingface.co/papers/2401.01808) by Suraj Patil, William Berman, Robin Rombach, and Patrick von Platen. aMUSEd was introduced in [aMUSEd: An Open MUSE Reproduction](https://huggingface.co/papers/2401.01808) by Suraj Patil, William Berman, Robin Rombach, and Patrick von Platen.

View File

@@ -102,8 +102,11 @@ Here are some sample outputs:
</tr> </tr>
</table> </table>
> [!TIP] <Tip>
> AnimateDiff tends to work better with finetuned Stable Diffusion models. If you plan on using a scheduler that can clip samples, make sure to disable it by setting `clip_sample=False` in the scheduler as this can also have an adverse effect on generated samples. Additionally, the AnimateDiff checkpoints can be sensitive to the beta schedule of the scheduler. We recommend setting this to `linear`.
AnimateDiff tends to work better with finetuned Stable Diffusion models. If you plan on using a scheduler that can clip samples, make sure to disable it by setting `clip_sample=False` in the scheduler as this can also have an adverse effect on generated samples. Additionally, the AnimateDiff checkpoints can be sensitive to the beta schedule of the scheduler. We recommend setting this to `linear`.
</Tip>
### AnimateDiffControlNetPipeline ### AnimateDiffControlNetPipeline
@@ -796,11 +799,17 @@ frames = output.frames[0]
export_to_gif(frames, "animation.gif") export_to_gif(frames, "animation.gif")
``` ```
> [!WARNING] <Tip warning={true}>
> FreeInit is not really free - the improved quality comes at the cost of extra computation. It requires sampling a few extra times depending on the `num_iters` parameter that is set when enabling it. Setting the `use_fast_sampling` parameter to `True` can improve the overall performance (at the cost of lower quality compared to when `use_fast_sampling=False` but still better results than vanilla video generation models).
> [!TIP] FreeInit is not really free - the improved quality comes at the cost of extra computation. It requires sampling a few extra times depending on the `num_iters` parameter that is set when enabling it. Setting the `use_fast_sampling` parameter to `True` can improve the overall performance (at the cost of lower quality compared to when `use_fast_sampling=False` but still better results than vanilla video generation models).
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
<Tip>
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
<table> <table>
<tr> <tr>

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License. specific language governing permissions and limitations under the License.
--> -->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# Attend-and-Excite # Attend-and-Excite
Attend-and-Excite for Stable Diffusion was proposed in [Attend-and-Excite: Attention-Based Semantic Guidance for Text-to-Image Diffusion Models](https://attendandexcite.github.io/Attend-and-Excite/) and provides textual attention control over image generation. Attend-and-Excite for Stable Diffusion was proposed in [Attend-and-Excite: Attention-Based Semantic Guidance for Text-to-Image Diffusion Models](https://attendandexcite.github.io/Attend-and-Excite/) and provides textual attention control over image generation.
@@ -23,8 +20,11 @@ The abstract from the paper is:
You can find additional information about Attend-and-Excite on the [project page](https://attendandexcite.github.io/Attend-and-Excite/), the [original codebase](https://github.com/AttendAndExcite/Attend-and-Excite), or try it out in a [demo](https://huggingface.co/spaces/AttendAndExcite/Attend-and-Excite). You can find additional information about Attend-and-Excite on the [project page](https://attendandexcite.github.io/Attend-and-Excite/), the [original codebase](https://github.com/AttendAndExcite/Attend-and-Excite), or try it out in a [demo](https://huggingface.co/spaces/AttendAndExcite/Attend-and-Excite).
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## StableDiffusionAttendAndExcitePipeline ## StableDiffusionAttendAndExcitePipeline

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License. specific language governing permissions and limitations under the License.
--> -->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# AudioLDM # AudioLDM
AudioLDM was proposed in [AudioLDM: Text-to-Audio Generation with Latent Diffusion Models](https://huggingface.co/papers/2301.12503) by Haohe Liu et al. Inspired by [Stable Diffusion](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/overview), AudioLDM AudioLDM was proposed in [AudioLDM: Text-to-Audio Generation with Latent Diffusion Models](https://huggingface.co/papers/2301.12503) by Haohe Liu et al. Inspired by [Stable Diffusion](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/overview), AudioLDM
@@ -38,8 +35,11 @@ During inference:
* The _quality_ of the predicted audio sample can be controlled by the `num_inference_steps` argument; higher steps give higher quality audio at the expense of slower inference. * The _quality_ of the predicted audio sample can be controlled by the `num_inference_steps` argument; higher steps give higher quality audio at the expense of slower inference.
* The _length_ of the predicted audio sample can be controlled by varying the `audio_length_in_s` argument. * The _length_ of the predicted audio sample can be controlled by varying the `audio_length_in_s` argument.
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## AudioLDMPipeline ## AudioLDMPipeline
[[autodoc]] AudioLDMPipeline [[autodoc]] AudioLDMPipeline

View File

@@ -58,8 +58,11 @@ See table below for details on the three checkpoints:
The following example demonstrates how to construct good music and speech generation using the aforementioned tips: [example](https://huggingface.co/docs/diffusers/main/en/api/pipelines/audioldm2#diffusers.AudioLDM2Pipeline.__call__.example). The following example demonstrates how to construct good music and speech generation using the aforementioned tips: [example](https://huggingface.co/docs/diffusers/main/en/api/pipelines/audioldm2#diffusers.AudioLDM2Pipeline.__call__.example).
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## AudioLDM2Pipeline ## AudioLDM2Pipeline
[[autodoc]] AudioLDM2Pipeline [[autodoc]] AudioLDM2Pipeline

View File

@@ -16,8 +16,11 @@ AuraFlow is inspired by [Stable Diffusion 3](../pipelines/stable_diffusion/stabl
It was developed by the Fal team and more details about it can be found in [this blog post](https://blog.fal.ai/auraflow/). It was developed by the Fal team and more details about it can be found in [this blog post](https://blog.fal.ai/auraflow/).
> [!TIP] <Tip>
> AuraFlow can be quite expensive to run on consumer hardware devices. However, you can perform a suite of optimizations to run it faster and in a more memory-friendly manner. Check out [this section](https://huggingface.co/blog/sd3#memory-optimizations-for-sd3) for more details.
AuraFlow can be quite expensive to run on consumer hardware devices. However, you can perform a suite of optimizations to run it faster and in a more memory-friendly manner. Check out [this section](https://huggingface.co/blog/sd3#memory-optimizations-for-sd3) for more details.
</Tip>
## Quantization ## Quantization

View File

@@ -10,9 +10,6 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License. specific language governing permissions and limitations under the License.
--> -->
> [!WARNING]
> This pipeline is deprecated but it can still be used. However, we won't test the pipeline anymore and won't accept any changes to it. If you run into any issues, reinstall the last Diffusers version that supported this model.
# BLIP-Diffusion # BLIP-Diffusion
BLIP-Diffusion was proposed in [BLIP-Diffusion: Pre-trained Subject Representation for Controllable Text-to-Image Generation and Editing](https://huggingface.co/papers/2305.14720). It enables zero-shot subject-driven generation and control-guided zero-shot generation. BLIP-Diffusion was proposed in [BLIP-Diffusion: Pre-trained Subject Representation for Controllable Text-to-Image Generation and Editing](https://huggingface.co/papers/2305.14720). It enables zero-shot subject-driven generation and control-guided zero-shot generation.
@@ -26,8 +23,11 @@ The original codebase can be found at [salesforce/LAVIS](https://github.com/sale
`BlipDiffusionPipeline` and `BlipDiffusionControlNetPipeline` were contributed by [`ayushtues`](https://github.com/ayushtues/). `BlipDiffusionPipeline` and `BlipDiffusionControlNetPipeline` were contributed by [`ayushtues`](https://github.com/ayushtues/).
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## BlipDiffusionPipeline ## BlipDiffusionPipeline

View File

@@ -1,44 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# Bria 3.2
Bria 3.2 is the next-generation commercial-ready text-to-image model. With just 4 billion parameters, it provides exceptional aesthetics and text rendering, evaluated to provide on par results to leading open-source models, and outperforming other licensed models.
In addition to being built entirely on licensed data, 3.2 provides several advantages for enterprise and commercial use:
- Efficient Compute - the model is X3 smaller than the equivalent models in the market (4B parameters vs 12B parameters other open source models)
- Architecture Consistency: Same architecture as 3.1—ideal for users looking to upgrade without disruption.
- Fine-tuning Speedup: 2x faster fine-tuning on L40S and A100.
Original model checkpoints for Bria 3.2 can be found [here](https://huggingface.co/briaai/BRIA-3.2).
Github repo for Bria 3.2 can be found [here](https://github.com/Bria-AI/BRIA-3.2).
If you want to learn more about the Bria platform, and get free traril access, please visit [bria.ai](https://bria.ai).
## Usage
_As the model is gated, before using it with diffusers you first need to go to the [Bria 3.2 Hugging Face page](https://huggingface.co/briaai/BRIA-3.2), fill in the form and accept the gate. Once you are in, you need to login so that your system knows youve accepted the gate._
Use the command below to log in:
```bash
hf auth login
```
## BriaPipeline
[[autodoc]] BriaPipeline
- all
- __call__

View File

@@ -1,45 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# Bria Fibo
Text-to-image models have mastered imagination - but not control. FIBO changes that.
FIBO is trained on structured JSON captions up to 1,000+ words and designed to understand and control different visual parameters such as lighting, composition, color, and camera settings, enabling precise and reproducible outputs.
With only 8 billion parameters, FIBO provides a new level of image quality, prompt adherence and proffesional control.
FIBO is trained exclusively on a structured prompt and will not work with freeform text prompts.
you can use the [FIBO-VLM-prompt-to-JSON](https://huggingface.co/briaai/FIBO-VLM-prompt-to-JSON) model or the [FIBO-gemini-prompt-to-JSON](https://huggingface.co/briaai/FIBO-gemini-prompt-to-JSON) to convert your freeform text prompt to a structured JSON prompt.
its not recommended to use freeform text prompts directly with FIBO, as it will not produce the best results.
you can learn more about FIBO in [Bria Fibo Hugging Face page](https://huggingface.co/briaai/FIBO).
## Usage
_As the model is gated, before using it with diffusers you first need to go to the [Bria Fibo Hugging Face page](https://huggingface.co/briaai/FIBO), fill in the form and accept the gate. Once you are in, you need to login so that your system knows youve accepted the gate._
Use the command below to log in:
```bash
hf auth login
```
## BriaPipeline
[[autodoc]] BriaPipeline
- all
- __call__

View File

@@ -19,42 +19,17 @@ specific language governing permissions and limitations under the License.
Chroma is a text to image generation model based on Flux. Chroma is a text to image generation model based on Flux.
Original model checkpoints for Chroma can be found here: Original model checkpoints for Chroma can be found [here](https://huggingface.co/lodestones/Chroma).
* High-resolution finetune: [lodestones/Chroma1-HD](https://huggingface.co/lodestones/Chroma1-HD)
* Base model: [lodestones/Chroma1-Base](https://huggingface.co/lodestones/Chroma1-Base)
* Original repo with progress checkpoints: [lodestones/Chroma](https://huggingface.co/lodestones/Chroma) (loading this repo with `from_pretrained` will load a Diffusers-compatible version of the `unlocked-v37` checkpoint)
> [!TIP] <Tip>
> Chroma can use all the same optimizations as Flux.
## Inference Chroma can use all the same optimizations as Flux.
```python </Tip>
import torch
from diffusers import ChromaPipeline
pipe = ChromaPipeline.from_pretrained("lodestones/Chroma1-HD", torch_dtype=torch.bfloat16) ## Inference (Single File)
pipe.enable_model_cpu_offload()
prompt = [ The `ChromaTransformer2DModel` supports loading checkpoints in the original format. This is also useful when trying to load finetunes or quantized versions of the models that have been published by the community.
"A high-fashion close-up portrait of a blonde woman in clear sunglasses. The image uses a bold teal and red color split for dramatic lighting. The background is a simple teal-green. The photo is sharp and well-composed, and is designed for viewing with anaglyph 3D glasses for optimal effect. It looks professionally done."
]
negative_prompt = ["low quality, ugly, unfinished, out of focus, deformed, disfigure, blurry, smudged, restricted palette, flat colors"]
image = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
generator=torch.Generator("cpu").manual_seed(433),
num_inference_steps=40,
guidance_scale=3.0,
num_images_per_prompt=1,
).images[0]
image.save("chroma.png")
```
## Loading from a single file
To use updated model checkpoints that are not in the Diffusers format, you can use the `ChromaTransformer2DModel` class to load the model from a single file in the original format. This is also useful when trying to load finetunes or quantized versions of the models that have been published by the community.
The following example demonstrates how to run Chroma from a single file. The following example demonstrates how to run Chroma from a single file.
@@ -63,29 +38,30 @@ Then run the following example
```python ```python
import torch import torch
from diffusers import ChromaTransformer2DModel, ChromaPipeline from diffusers import ChromaTransformer2DModel, ChromaPipeline
from transformers import T5EncoderModel
model_id = "lodestones/Chroma1-HD" bfl_repo = "black-forest-labs/FLUX.1-dev"
dtype = torch.bfloat16 dtype = torch.bfloat16
transformer = ChromaTransformer2DModel.from_single_file("https://huggingface.co/lodestones/Chroma1-HD/blob/main/Chroma1-HD.safetensors", torch_dtype=dtype) transformer = ChromaTransformer2DModel.from_single_file("https://huggingface.co/lodestones/Chroma/blob/main/chroma-unlocked-v35.safetensors", torch_dtype=dtype)
text_encoder = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype)
tokenizer = T5Tokenizer.from_pretrained(bfl_repo, subfolder="tokenizer_2", torch_dtype=dtype)
pipe = ChromaPipeline.from_pretrained(bfl_repo, transformer=transformer, text_encoder=text_encoder, tokenizer=tokenizer, torch_dtype=dtype)
pipe = ChromaPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=dtype)
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload()
prompt = [ prompt = "A cat holding a sign that says hello world"
"A high-fashion close-up portrait of a blonde woman in clear sunglasses. The image uses a bold teal and red color split for dramatic lighting. The background is a simple teal-green. The photo is sharp and well-composed, and is designed for viewing with anaglyph 3D glasses for optimal effect. It looks professionally done."
]
negative_prompt = ["low quality, ugly, unfinished, out of focus, deformed, disfigure, blurry, smudged, restricted palette, flat colors"]
image = pipe( image = pipe(
prompt=prompt, prompt,
negative_prompt=negative_prompt, guidance_scale=4.0,
generator=torch.Generator("cpu").manual_seed(433), output_type="pil",
num_inference_steps=40, num_inference_steps=26,
guidance_scale=3.0, generator=torch.Generator("cpu").manual_seed(0)
).images[0] ).images[0]
image.save("chroma-single-file.png") image.save("image.png")
``` ```
## ChromaPipeline ## ChromaPipeline
@@ -93,9 +69,3 @@ image.save("chroma-single-file.png")
[[autodoc]] ChromaPipeline [[autodoc]] ChromaPipeline
- all - all
- __call__ - __call__
## ChromaImg2ImgPipeline
[[autodoc]] ChromaImg2ImgPipeline
- all
- __call__

View File

@@ -1,156 +0,0 @@
<!-- Copyright 2025 The ChronoEdit Team and HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. -->
<div style="float: right;">
<div class="flex flex-wrap space-x-1">
<a href="https://huggingface.co/docs/diffusers/main/en/tutorials/using_peft_for_inference" target="_blank" rel="noopener">
<img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
</a>
</div>
</div>
# ChronoEdit
[ChronoEdit: Towards Temporal Reasoning for Image Editing and World Simulation](https://huggingface.co/papers/2510.04290) from NVIDIA and University of Toronto, by Jay Zhangjie Wu, Xuanchi Ren, Tianchang Shen, Tianshi Cao, Kai He, Yifan Lu, Ruiyuan Gao, Enze Xie, Shiyi Lan, Jose M. Alvarez, Jun Gao, Sanja Fidler, Zian Wang, Huan Ling.
> **TL;DR:** ChronoEdit reframes image editing as a video generation task, using input and edited images as start/end frames to leverage pretrained video models with temporal consistency. A temporal reasoning stage introduces reasoning tokens to ensure physically plausible edits and visualize the editing trajectory.
*Recent advances in large generative models have greatly enhanced both image editing and in-context image generation, yet a critical gap remains in ensuring physical consistency, where edited objects must remain coherent. This capability is especially vital for world simulation related tasks. In this paper, we present ChronoEdit, a framework that reframes image editing as a video generation problem. First, ChronoEdit treats the input and edited images as the first and last frames of a video, allowing it to leverage large pretrained video generative models that capture not only object appearance but also the implicit physics of motion and interaction through learned temporal consistency. Second, ChronoEdit introduces a temporal reasoning stage that explicitly performs editing at inference time. Under this setting, target frame is jointly denoised with reasoning tokens to imagine a plausible editing trajectory that constrains the solution space to physically viable transformations. The reasoning tokens are then dropped after a few steps to avoid the high computational cost of rendering a full video. To validate ChronoEdit, we introduce PBench-Edit, a new benchmark of image-prompt pairs for contexts that require physical consistency, and demonstrate that ChronoEdit surpasses state-of-the-art baselines in both visual fidelity and physical plausibility. Project page for code and models: [this https URL](https://research.nvidia.com/labs/toronto-ai/chronoedit).*
The ChronoEdit pipeline is developed by the ChronoEdit Team. The original code is available on [GitHub](https://github.com/nv-tlabs/ChronoEdit), and pretrained models can be found in the [nvidia/ChronoEdit](https://huggingface.co/collections/nvidia/chronoedit) collection on Hugging Face.
### Image Editing
```py
import torch
import numpy as np
from diffusers import AutoencoderKLWan, ChronoEditTransformer3DModel, ChronoEditPipeline
from diffusers.utils import export_to_video, load_image
from transformers import CLIPVisionModel
from PIL import Image
model_id = "nvidia/ChronoEdit-14B-Diffusers"
image_encoder = CLIPVisionModel.from_pretrained(model_id, subfolder="image_encoder", torch_dtype=torch.float32)
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
transformer = ChronoEditTransformer3DModel.from_pretrained(model_id, subfolder="transformer", torch_dtype=torch.bfloat16)
pipe = ChronoEditPipeline.from_pretrained(model_id, image_encoder=image_encoder, transformer=transformer, vae=vae, torch_dtype=torch.bfloat16)
pipe.to("cuda")
image = load_image(
"https://huggingface.co/spaces/nvidia/ChronoEdit/resolve/main/examples/3.png"
)
max_area = 720 * 1280
aspect_ratio = image.height / image.width
mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
height = round(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value
width = round(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value
print("width", width, "height", height)
image = image.resize((width, height))
prompt = (
"The user wants to transform the image by adding a small, cute mouse sitting inside the floral teacup, enjoying a spa bath. The mouse should appear relaxed and cheerful, with a tiny white bath towel draped over its head like a turban. It should be positioned comfortably in the cups liquid, with gentle steam rising around it to blend with the cozy atmosphere. "
"The mouses pose should be natural—perhaps sitting upright with paws resting lightly on the rim or submerged in the tea. The teacups floral design, gold trim, and warm lighting must remain unchanged to preserve the original aesthetic. The steam should softly swirl around the mouse, enhancing the spa-like, whimsical mood."
)
output = pipe(
image=image,
prompt=prompt,
height=height,
width=width,
num_frames=5,
num_inference_steps=50,
guidance_scale=5.0,
enable_temporal_reasoning=False,
num_temporal_reasoning_steps=0,
).frames[0]
Image.fromarray((output[-1] * 255).clip(0, 255).astype("uint8")).save("output.png")
```
Optionally, enable **temporal reasoning** for improved physical consistency:
```py
output = pipe(
image=image,
prompt=prompt,
height=height,
width=width,
num_frames=29,
num_inference_steps=50,
guidance_scale=5.0,
enable_temporal_reasoning=True,
num_temporal_reasoning_steps=50,
).frames[0]
export_to_video(output, "output.mp4", fps=16)
Image.fromarray((output[-1] * 255).clip(0, 255).astype("uint8")).save("output.png")
```
### Inference with 8-Step Distillation Lora
```py
import torch
import numpy as np
from diffusers import AutoencoderKLWan, ChronoEditTransformer3DModel, ChronoEditPipeline
from diffusers.utils import export_to_video, load_image
from transformers import CLIPVisionModel
from PIL import Image
model_id = "nvidia/ChronoEdit-14B-Diffusers"
image_encoder = CLIPVisionModel.from_pretrained(model_id, subfolder="image_encoder", torch_dtype=torch.float32)
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
transformer = ChronoEditTransformer3DModel.from_pretrained(model_id, subfolder="transformer", torch_dtype=torch.bfloat16)
pipe = ChronoEditPipeline.from_pretrained(model_id, image_encoder=image_encoder, transformer=transformer, vae=vae, torch_dtype=torch.bfloat16)
lora_path = hf_hub_download(repo_id=model_id, filename="lora/chronoedit_distill_lora.safetensors")
pipe.load_lora_weights(lora_path)
pipe.fuse_lora(lora_scale=1.0)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=2.0)
pipe.to("cuda")
image = load_image(
"https://huggingface.co/spaces/nvidia/ChronoEdit/resolve/main/examples/3.png"
)
max_area = 720 * 1280
aspect_ratio = image.height / image.width
mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
height = round(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value
width = round(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value
print("width", width, "height", height)
image = image.resize((width, height))
prompt = (
"The user wants to transform the image by adding a small, cute mouse sitting inside the floral teacup, enjoying a spa bath. The mouse should appear relaxed and cheerful, with a tiny white bath towel draped over its head like a turban. It should be positioned comfortably in the cups liquid, with gentle steam rising around it to blend with the cozy atmosphere. "
"The mouses pose should be natural—perhaps sitting upright with paws resting lightly on the rim or submerged in the tea. The teacups floral design, gold trim, and warm lighting must remain unchanged to preserve the original aesthetic. The steam should softly swirl around the mouse, enhancing the spa-like, whimsical mood."
)
output = pipe(
image=image,
prompt=prompt,
height=height,
width=width,
num_frames=5,
num_inference_steps=8,
guidance_scale=1.0,
enable_temporal_reasoning=False,
num_temporal_reasoning_steps=0,
).frames[0]
export_to_video(output, "output.mp4", fps=16)
Image.fromarray((output[-1] * 255).clip(0, 255).astype("uint8")).save("output.png")
```
## ChronoEditPipeline
[[autodoc]] ChronoEditPipeline
- all
- __call__
## ChronoEditPipelineOutput
[[autodoc]] pipelines.chronoedit.pipeline_output.ChronoEditPipelineOutput

View File

@@ -50,7 +50,7 @@ from diffusers.utils import export_to_video
pipeline_quant_config = PipelineQuantizationConfig( pipeline_quant_config = PipelineQuantizationConfig(
quant_backend="torchao", quant_backend="torchao",
quant_kwargs={"quant_type": "int8wo"}, quant_kwargs={"quant_type": "int8wo"},
components_to_quantize="transformer" components_to_quantize=["transformer"]
) )
# fp8 layerwise weight-casting # fp8 layerwise weight-casting

View File

@@ -21,8 +21,11 @@ The abstract from the paper is:
*Recent advancements in text-to-image generative systems have been largely driven by diffusion models. However, single-stage text-to-image diffusion models still face challenges, in terms of computational efficiency and the refinement of image details. To tackle the issue, we propose CogView3, an innovative cascaded framework that enhances the performance of text-to-image diffusion. CogView3 is the first model implementing relay diffusion in the realm of text-to-image generation, executing the task by first creating low-resolution images and subsequently applying relay-based super-resolution. This methodology not only results in competitive text-to-image outputs but also greatly reduces both training and inference costs. Our experimental results demonstrate that CogView3 outperforms SDXL, the current state-of-the-art open-source text-to-image diffusion model, by 77.0% in human evaluations, all while requiring only about 1/2 of the inference time. The distilled variant of CogView3 achieves comparable performance while only utilizing 1/10 of the inference time by SDXL.* *Recent advancements in text-to-image generative systems have been largely driven by diffusion models. However, single-stage text-to-image diffusion models still face challenges, in terms of computational efficiency and the refinement of image details. To tackle the issue, we propose CogView3, an innovative cascaded framework that enhances the performance of text-to-image diffusion. CogView3 is the first model implementing relay diffusion in the realm of text-to-image generation, executing the task by first creating low-resolution images and subsequently applying relay-based super-resolution. This methodology not only results in competitive text-to-image outputs but also greatly reduces both training and inference costs. Our experimental results demonstrate that CogView3 outperforms SDXL, the current state-of-the-art open-source text-to-image diffusion model, by 77.0% in human evaluations, all while requiring only about 1/2 of the inference time. The distilled variant of CogView3 achieves comparable performance while only utilizing 1/10 of the inference time by SDXL.*
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
This pipeline was contributed by [zRzRzRzRzRzRzR](https://github.com/zRzRzRzRzRzRzR). The original codebase can be found [here](https://huggingface.co/THUDM). The original weights can be found under [hf.co/THUDM](https://huggingface.co/THUDM). This pipeline was contributed by [zRzRzRzRzRzRzR](https://github.com/zRzRzRzRzRzRzR). The original codebase can be found [here](https://huggingface.co/THUDM). The original weights can be found under [hf.co/THUDM](https://huggingface.co/THUDM).

Some files were not shown because too many files have changed in this diff Show More