mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-07 04:54:47 +08:00
Compare commits
108 Commits
chroma-img
...
modular-cu
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
532b395718 | ||
|
|
5c43924ac2 | ||
|
|
a633289e10 | ||
|
|
06fd427797 | ||
|
|
48a551251d | ||
|
|
6398fbc391 | ||
|
|
3c8b67b371 | ||
|
|
9feb946432 | ||
|
|
c90352754a | ||
|
|
7a935a0bbe | ||
|
|
941b7fc084 | ||
|
|
76a62ac9cc | ||
|
|
1c6ab9e900 | ||
|
|
265840a098 | ||
|
|
9f4d997d8f | ||
|
|
b41abb2230 | ||
|
|
f33b89bafb | ||
|
|
48a6d29550 | ||
|
|
2d3d376bc0 | ||
|
|
db715e2c8c | ||
|
|
754fe85cac | ||
|
|
cc1f9a2ce3 | ||
|
|
737d7fc3b0 | ||
|
|
be23f7df00 | ||
|
|
86becea77f | ||
|
|
7e3bf4aff6 | ||
|
|
de043c6044 | ||
|
|
4c20624cc6 | ||
|
|
0454fbb30b | ||
|
|
cbc8ced20f | ||
|
|
01240fecb0 | ||
|
|
ce338d4e4a | ||
|
|
bc55b631fd | ||
|
|
15d50f16f2 | ||
|
|
2c30287958 | ||
|
|
425a715e35 | ||
|
|
2527917528 | ||
|
|
e6639fef70 | ||
|
|
8c938fb410 | ||
|
|
f864a9a352 | ||
|
|
d6fa3298fa | ||
|
|
6f1d6694df | ||
|
|
0e95aa853e | ||
|
|
5ef74fd5f6 | ||
|
|
64a9210315 | ||
|
|
d31b8cea3e | ||
|
|
62e847db5f | ||
|
|
470458623e | ||
|
|
a79c3af6bb | ||
|
|
3f3f0c16a6 | ||
|
|
f3e1310469 | ||
|
|
87f83d3dd9 | ||
|
|
f064b3bf73 | ||
|
|
3b079ec3fa | ||
|
|
bc34fa8386 | ||
|
|
05e7a854d0 | ||
|
|
76ec3d1fee | ||
|
|
cdaf84a708 | ||
|
|
e8e44a510c | ||
|
|
21543de571 | ||
|
|
d7dd924ece | ||
|
|
00f95b9755 | ||
|
|
eea76892e8 | ||
|
|
27bf7fcd0e | ||
|
|
a185e1ab91 | ||
|
|
d93381cd41 | ||
|
|
3649d7b903 | ||
|
|
10c36e0b78 | ||
|
|
8846635873 | ||
|
|
dd285099eb | ||
|
|
80f27d7e8d | ||
|
|
d3e27e05f0 | ||
|
|
5df02fc171 | ||
|
|
7392c8ff5a | ||
|
|
474a248f10 | ||
|
|
7bc0a07b19 | ||
|
|
92542719ed | ||
|
|
6760300202 | ||
|
|
798265f2b6 | ||
|
|
cd813499be | ||
|
|
fbddf02807 | ||
|
|
f20b83a04f | ||
|
|
ee40088fe5 | ||
|
|
7fc53b5d66 | ||
|
|
0874dd04dc | ||
|
|
6184d8a433 | ||
|
|
5a6e386464 | ||
|
|
42077e6c73 | ||
|
|
3d8d8485fc | ||
|
|
195926bbdc | ||
|
|
85a916bb8b | ||
|
|
3287ce2890 | ||
|
|
0c11c8c1ac | ||
|
|
fc51583c8a | ||
|
|
fb57c76aa1 | ||
|
|
7251bb4fd0 | ||
|
|
3fba74e153 | ||
|
|
a4df8dbc40 | ||
|
|
48eae6f420 | ||
|
|
66394bf6c7 | ||
|
|
62cce3045d | ||
|
|
05e867784d | ||
|
|
d72184eba3 | ||
|
|
5ce4814af1 | ||
|
|
1bc6f3dc0f | ||
|
|
79bd7ecc78 | ||
|
|
9b834f8710 | ||
|
|
81426b0f19 |
41
.github/workflows/benchmark.yml
vendored
41
.github/workflows/benchmark.yml
vendored
@@ -11,17 +11,18 @@ env:
|
||||
HF_HOME: /mnt/cache
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
BASE_PATH: benchmark_outputs
|
||||
|
||||
jobs:
|
||||
torch_pipelines_cuda_benchmark_tests:
|
||||
torch_models_cuda_benchmark_tests:
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_BENCHMARK }}
|
||||
name: Torch Core Pipelines CUDA Benchmarking Tests
|
||||
name: Torch Core Models CUDA Benchmarking Tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 1
|
||||
runs-on:
|
||||
group: aws-g6-4xlarge-plus
|
||||
group: aws-g6e-4xlarge
|
||||
container:
|
||||
image: diffusers/diffusers-pytorch-cuda
|
||||
options: --shm-size "16gb" --ipc host --gpus 0
|
||||
@@ -35,27 +36,47 @@ jobs:
|
||||
nvidia-smi
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
apt update
|
||||
apt install -y libpq-dev postgresql-client
|
||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||
python -m uv pip install -e [quality,test]
|
||||
python -m uv pip install pandas peft
|
||||
python -m uv pip uninstall transformers && python -m uv pip install transformers==4.48.0
|
||||
python -m uv pip install -r benchmarks/requirements.txt
|
||||
- name: Environment
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
- name: Diffusers Benchmarking
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.DIFFUSERS_BOT_TOKEN }}
|
||||
BASE_PATH: benchmark_outputs
|
||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
||||
run: |
|
||||
export TOTAL_GPU_MEMORY=$(python -c "import torch; print(torch.cuda.get_device_properties(0).total_memory / (1024**3))")
|
||||
cd benchmarks && mkdir ${BASE_PATH} && python run_all.py && python push_results.py
|
||||
cd benchmarks && python run_all.py
|
||||
|
||||
- name: Push results to the Hub
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.DIFFUSERS_BOT_TOKEN }}
|
||||
run: |
|
||||
cd benchmarks && python push_results.py
|
||||
mkdir $BASE_PATH && cp *.csv $BASE_PATH
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: benchmark_test_reports
|
||||
path: benchmarks/benchmark_outputs
|
||||
path: benchmarks/${{ env.BASE_PATH }}
|
||||
|
||||
# TODO: enable this once the connection problem has been resolved.
|
||||
- name: Update benchmarking results to DB
|
||||
env:
|
||||
PGDATABASE: metrics
|
||||
PGHOST: ${{ secrets.DIFFUSERS_BENCHMARKS_PGHOST }}
|
||||
PGUSER: transformers_benchmarks
|
||||
PGPASSWORD: ${{ secrets.DIFFUSERS_BENCHMARKS_PGPASSWORD }}
|
||||
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||
run: |
|
||||
git config --global --add safe.directory /__w/diffusers/diffusers
|
||||
commit_id=$GITHUB_SHA
|
||||
commit_msg=$(git show -s --format=%s "$commit_id" | cut -c1-70)
|
||||
cd benchmarks && python populate_into_db.py "$BRANCH_NAME" "$commit_id" "$commit_msg"
|
||||
|
||||
- name: Report success status
|
||||
if: ${{ success() }}
|
||||
|
||||
4
.github/workflows/build_docker_images.yml
vendored
4
.github/workflows/build_docker_images.yml
vendored
@@ -75,10 +75,6 @@ jobs:
|
||||
- diffusers-pytorch-cuda
|
||||
- diffusers-pytorch-xformers-cuda
|
||||
- diffusers-pytorch-minimum-cuda
|
||||
- diffusers-flax-cpu
|
||||
- diffusers-flax-tpu
|
||||
- diffusers-onnxruntime-cpu
|
||||
- diffusers-onnxruntime-cuda
|
||||
- diffusers-doc-builder
|
||||
|
||||
steps:
|
||||
|
||||
106
.github/workflows/nightly_tests.yml
vendored
106
.github/workflows/nightly_tests.yml
vendored
@@ -248,7 +248,7 @@ jobs:
|
||||
BIG_GPU_MEMORY: 40
|
||||
run: |
|
||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||
-m "big_gpu_with_torch_cuda" \
|
||||
-m "big_accelerator" \
|
||||
--make-reports=tests_big_gpu_torch_cuda \
|
||||
--report-log=tests_big_gpu_torch_cuda.log \
|
||||
tests/
|
||||
@@ -321,55 +321,6 @@ jobs:
|
||||
name: torch_minimum_version_cuda_test_reports
|
||||
path: reports
|
||||
|
||||
run_nightly_onnx_tests:
|
||||
name: Nightly ONNXRuntime CUDA tests on Ubuntu
|
||||
runs-on:
|
||||
group: aws-g4dn-2xlarge
|
||||
container:
|
||||
image: diffusers/diffusers-onnxruntime-cuda
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host
|
||||
|
||||
steps:
|
||||
- name: Checkout diffusers
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||
python -m uv pip install -e [quality,test]
|
||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
||||
python -m uv pip install pytest-reportlog
|
||||
- name: Environment
|
||||
run: python utils/print_env.py
|
||||
|
||||
- name: Run Nightly ONNXRuntime CUDA tests
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
||||
run: |
|
||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||
-s -v -k "Onnx" \
|
||||
--make-reports=tests_onnx_cuda \
|
||||
--report-log=tests_onnx_cuda.log \
|
||||
tests/
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
cat reports/tests_onnx_cuda_stats.txt
|
||||
cat reports/tests_onnx_cuda_failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: tests_onnx_cuda_reports
|
||||
path: reports
|
||||
|
||||
run_nightly_quantization_tests:
|
||||
name: Torch quantization nightly tests
|
||||
strategy:
|
||||
@@ -485,57 +436,6 @@ jobs:
|
||||
name: torch_cuda_pipeline_level_quant_reports
|
||||
path: reports
|
||||
|
||||
run_flax_tpu_tests:
|
||||
name: Nightly Flax TPU Tests
|
||||
runs-on:
|
||||
group: gcp-ct5lp-hightpu-8t
|
||||
if: github.event_name == 'schedule'
|
||||
|
||||
container:
|
||||
image: diffusers/diffusers-flax-tpu
|
||||
options: --shm-size "16gb" --ipc host --privileged ${{ vars.V5_LITEPOD_8_ENV}} -v /mnt/hf_cache:/mnt/hf_cache
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Checkout diffusers
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||
python -m uv pip install -e [quality,test]
|
||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
||||
python -m uv pip install pytest-reportlog
|
||||
|
||||
- name: Environment
|
||||
run: python utils/print_env.py
|
||||
|
||||
- name: Run nightly Flax TPU tests
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
||||
run: |
|
||||
python -m pytest -n 0 \
|
||||
-s -v -k "Flax" \
|
||||
--make-reports=tests_flax_tpu \
|
||||
--report-log=tests_flax_tpu.log \
|
||||
tests/
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
cat reports/tests_flax_tpu_stats.txt
|
||||
cat reports/tests_flax_tpu_failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: flax_tpu_test_reports
|
||||
path: reports
|
||||
|
||||
generate_consolidated_report:
|
||||
name: Generate Consolidated Test Report
|
||||
needs: [
|
||||
@@ -545,9 +445,9 @@ jobs:
|
||||
run_big_gpu_torch_tests,
|
||||
run_nightly_quantization_tests,
|
||||
run_nightly_pipeline_level_quantization_tests,
|
||||
run_nightly_onnx_tests,
|
||||
# run_nightly_onnx_tests,
|
||||
torch_minimum_version_cuda_tests,
|
||||
run_flax_tpu_tests
|
||||
# run_flax_tpu_tests
|
||||
]
|
||||
if: always()
|
||||
runs-on:
|
||||
|
||||
14
.github/workflows/pr_tests.yml
vendored
14
.github/workflows/pr_tests.yml
vendored
@@ -87,11 +87,6 @@ jobs:
|
||||
runner: aws-general-8-plus
|
||||
image: diffusers/diffusers-pytorch-cpu
|
||||
report: torch_cpu_models_schedulers
|
||||
- name: Fast Flax CPU tests
|
||||
framework: flax
|
||||
runner: aws-general-8-plus
|
||||
image: diffusers/diffusers-flax-cpu
|
||||
report: flax_cpu
|
||||
- name: PyTorch Example CPU tests
|
||||
framework: pytorch_examples
|
||||
runner: aws-general-8-plus
|
||||
@@ -147,15 +142,6 @@ jobs:
|
||||
--make-reports=tests_${{ matrix.config.report }} \
|
||||
tests/models tests/schedulers tests/others
|
||||
|
||||
- name: Run fast Flax TPU tests
|
||||
if: ${{ matrix.config.framework == 'flax' }}
|
||||
run: |
|
||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||
python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
|
||||
-s -v -k "Flax" \
|
||||
--make-reports=tests_${{ matrix.config.report }} \
|
||||
tests
|
||||
|
||||
- name: Run example PyTorch CPU tests
|
||||
if: ${{ matrix.config.framework == 'pytorch_examples' }}
|
||||
run: |
|
||||
|
||||
2
.github/workflows/pr_tests_gpu.yml
vendored
2
.github/workflows/pr_tests_gpu.yml
vendored
@@ -188,7 +188,7 @@ jobs:
|
||||
shell: bash
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 2
|
||||
max-parallel: 4
|
||||
matrix:
|
||||
module: [models, schedulers, lora, others]
|
||||
steps:
|
||||
|
||||
96
.github/workflows/push_tests.yml
vendored
96
.github/workflows/push_tests.yml
vendored
@@ -159,102 +159,6 @@ jobs:
|
||||
name: torch_cuda_test_reports_${{ matrix.module }}
|
||||
path: reports
|
||||
|
||||
flax_tpu_tests:
|
||||
name: Flax TPU Tests
|
||||
runs-on:
|
||||
group: gcp-ct5lp-hightpu-8t
|
||||
container:
|
||||
image: diffusers/diffusers-flax-tpu
|
||||
options: --shm-size "16gb" --ipc host --privileged ${{ vars.V5_LITEPOD_8_ENV}} -v /mnt/hf_cache:/mnt/hf_cache
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Checkout diffusers
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||
python -m uv pip install -e [quality,test]
|
||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
||||
|
||||
- name: Environment
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
|
||||
- name: Run Flax TPU tests
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
||||
run: |
|
||||
python -m pytest -n 0 \
|
||||
-s -v -k "Flax" \
|
||||
--make-reports=tests_flax_tpu \
|
||||
tests/
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
cat reports/tests_flax_tpu_stats.txt
|
||||
cat reports/tests_flax_tpu_failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: flax_tpu_test_reports
|
||||
path: reports
|
||||
|
||||
onnx_cuda_tests:
|
||||
name: ONNX CUDA Tests
|
||||
runs-on:
|
||||
group: aws-g4dn-2xlarge
|
||||
container:
|
||||
image: diffusers/diffusers-onnxruntime-cuda
|
||||
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ --gpus 0
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Checkout diffusers
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||
python -m uv pip install -e [quality,test]
|
||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
||||
|
||||
- name: Environment
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
|
||||
- name: Run ONNXRuntime CUDA tests
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
||||
run: |
|
||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||
-s -v -k "Onnx" \
|
||||
--make-reports=tests_onnx_cuda \
|
||||
tests/
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
cat reports/tests_onnx_cuda_stats.txt
|
||||
cat reports/tests_onnx_cuda_failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: onnx_cuda_test_reports
|
||||
path: reports
|
||||
|
||||
run_torch_compile_tests:
|
||||
name: PyTorch Compile CUDA tests
|
||||
|
||||
|
||||
28
.github/workflows/push_tests_fast.yml
vendored
28
.github/workflows/push_tests_fast.yml
vendored
@@ -33,16 +33,6 @@ jobs:
|
||||
runner: aws-general-8-plus
|
||||
image: diffusers/diffusers-pytorch-cpu
|
||||
report: torch_cpu
|
||||
- name: Fast Flax CPU tests on Ubuntu
|
||||
framework: flax
|
||||
runner: aws-general-8-plus
|
||||
image: diffusers/diffusers-flax-cpu
|
||||
report: flax_cpu
|
||||
- name: Fast ONNXRuntime CPU tests on Ubuntu
|
||||
framework: onnxruntime
|
||||
runner: aws-general-8-plus
|
||||
image: diffusers/diffusers-onnxruntime-cpu
|
||||
report: onnx_cpu
|
||||
- name: PyTorch Example CPU tests on Ubuntu
|
||||
framework: pytorch_examples
|
||||
runner: aws-general-8-plus
|
||||
@@ -87,24 +77,6 @@ jobs:
|
||||
--make-reports=tests_${{ matrix.config.report }} \
|
||||
tests/
|
||||
|
||||
- name: Run fast Flax TPU tests
|
||||
if: ${{ matrix.config.framework == 'flax' }}
|
||||
run: |
|
||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||
python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
|
||||
-s -v -k "Flax" \
|
||||
--make-reports=tests_${{ matrix.config.report }} \
|
||||
tests/
|
||||
|
||||
- name: Run fast ONNXRuntime CPU tests
|
||||
if: ${{ matrix.config.framework == 'onnxruntime' }}
|
||||
run: |
|
||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||
python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
|
||||
-s -v -k "Onnx" \
|
||||
--make-reports=tests_${{ matrix.config.report }} \
|
||||
tests/
|
||||
|
||||
- name: Run example PyTorch CPU tests
|
||||
if: ${{ matrix.config.framework == 'pytorch_examples' }}
|
||||
run: |
|
||||
|
||||
7
.github/workflows/push_tests_mps.yml
vendored
7
.github/workflows/push_tests_mps.yml
vendored
@@ -1,12 +1,7 @@
|
||||
name: Fast mps tests on main
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "src/diffusers/**.py"
|
||||
- "tests/**.py"
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
DIFFUSERS_IS_CI: yes
|
||||
|
||||
95
.github/workflows/release_tests_fast.yml
vendored
95
.github/workflows/release_tests_fast.yml
vendored
@@ -213,101 +213,6 @@ jobs:
|
||||
with:
|
||||
name: torch_minimum_version_cuda_test_reports
|
||||
path: reports
|
||||
|
||||
flax_tpu_tests:
|
||||
name: Flax TPU Tests
|
||||
runs-on: docker-tpu
|
||||
container:
|
||||
image: diffusers/diffusers-flax-tpu
|
||||
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ --privileged
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Checkout diffusers
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||
python -m uv pip install -e [quality,test]
|
||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
||||
|
||||
- name: Environment
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
|
||||
- name: Run slow Flax TPU tests
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
||||
run: |
|
||||
python -m pytest -n 0 \
|
||||
-s -v -k "Flax" \
|
||||
--make-reports=tests_flax_tpu \
|
||||
tests/
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
cat reports/tests_flax_tpu_stats.txt
|
||||
cat reports/tests_flax_tpu_failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: flax_tpu_test_reports
|
||||
path: reports
|
||||
|
||||
onnx_cuda_tests:
|
||||
name: ONNX CUDA Tests
|
||||
runs-on:
|
||||
group: aws-g4dn-2xlarge
|
||||
container:
|
||||
image: diffusers/diffusers-onnxruntime-cuda
|
||||
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ --gpus 0
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Checkout diffusers
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||
python -m uv pip install -e [quality,test]
|
||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
||||
|
||||
- name: Environment
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
|
||||
- name: Run slow ONNXRuntime CUDA tests
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
||||
run: |
|
||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||
-s -v -k "Onnx" \
|
||||
--make-reports=tests_onnx_cuda \
|
||||
tests/
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
cat reports/tests_onnx_cuda_stats.txt
|
||||
cat reports/tests_onnx_cuda_failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: onnx_cuda_test_reports
|
||||
path: reports
|
||||
|
||||
run_torch_compile_tests:
|
||||
name: PyTorch Compile CUDA tests
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
69
benchmarks/README.md
Normal file
69
benchmarks/README.md
Normal file
@@ -0,0 +1,69 @@
|
||||
# Diffusers Benchmarks
|
||||
|
||||
Welcome to Diffusers Benchmarks. These benchmarks are use to obtain latency and memory information of the most popular models across different scenarios such as:
|
||||
|
||||
* Base case i.e., when using `torch.bfloat16` and `torch.nn.functional.scaled_dot_product_attention`.
|
||||
* Base + `torch.compile()`
|
||||
* NF4 quantization
|
||||
* Layerwise upcasting
|
||||
|
||||
Instead of full diffusion pipelines, only the forward pass of the respective model classes (such as `FluxTransformer2DModel`) is tested with the real checkpoints (such as `"black-forest-labs/FLUX.1-dev"`).
|
||||
|
||||
The entrypoint to running all the currently available benchmarks is in `run_all.py`. However, one can run the individual benchmarks, too, e.g., `python benchmarking_flux.py`. It should produce a CSV file containing various information about the benchmarks run.
|
||||
|
||||
The benchmarks are run on a weekly basis and the CI is defined in [benchmark.yml](../.github/workflows/benchmark.yml).
|
||||
|
||||
## Running the benchmarks manually
|
||||
|
||||
First set up `torch` and install `diffusers` from the root of the directory:
|
||||
|
||||
```py
|
||||
pip install -e ".[quality,test]"
|
||||
```
|
||||
|
||||
Then make sure the other dependencies are installed:
|
||||
|
||||
```sh
|
||||
cd benchmarks/
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
We need to be authenticated to access some of the checkpoints used during benchmarking:
|
||||
|
||||
```sh
|
||||
huggingface-cli login
|
||||
```
|
||||
|
||||
We use an L40 GPU with 128GB RAM to run the benchmark CI. As such, the benchmarks are configured to run on NVIDIA GPUs. So, make sure you have access to a similar machine (or modify the benchmarking scripts accordingly).
|
||||
|
||||
Then you can either launch the entire benchmarking suite by running:
|
||||
|
||||
```sh
|
||||
python run_all.py
|
||||
```
|
||||
|
||||
Or, you can run the individual benchmarks.
|
||||
|
||||
## Customizing the benchmarks
|
||||
|
||||
We define "scenarios" to cover the most common ways in which these models are used. You can
|
||||
define a new scenario, modifying an existing benchmark file:
|
||||
|
||||
```py
|
||||
BenchmarkScenario(
|
||||
name=f"{CKPT_ID}-bnb-8bit",
|
||||
model_cls=FluxTransformer2DModel,
|
||||
model_init_kwargs={
|
||||
"pretrained_model_name_or_path": CKPT_ID,
|
||||
"torch_dtype": torch.bfloat16,
|
||||
"subfolder": "transformer",
|
||||
"quantization_config": BitsAndBytesConfig(load_in_8bit=True),
|
||||
},
|
||||
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
|
||||
model_init_fn=model_init_fn,
|
||||
)
|
||||
```
|
||||
|
||||
You can also configure a new model-level benchmark and add it to the existing suite. To do so, just defining a valid benchmarking file like `benchmarking_flux.py` should be enough.
|
||||
|
||||
Happy benchmarking 🧨
|
||||
@@ -1,346 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
import torch
|
||||
|
||||
from diffusers import (
|
||||
AutoPipelineForImage2Image,
|
||||
AutoPipelineForInpainting,
|
||||
AutoPipelineForText2Image,
|
||||
ControlNetModel,
|
||||
LCMScheduler,
|
||||
StableDiffusionAdapterPipeline,
|
||||
StableDiffusionControlNetPipeline,
|
||||
StableDiffusionXLAdapterPipeline,
|
||||
StableDiffusionXLControlNetPipeline,
|
||||
T2IAdapter,
|
||||
WuerstchenCombinedPipeline,
|
||||
)
|
||||
from diffusers.utils import load_image
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from utils import ( # noqa: E402
|
||||
BASE_PATH,
|
||||
PROMPT,
|
||||
BenchmarkInfo,
|
||||
benchmark_fn,
|
||||
bytes_to_giga_bytes,
|
||||
flush,
|
||||
generate_csv_dict,
|
||||
write_to_csv,
|
||||
)
|
||||
|
||||
|
||||
RESOLUTION_MAPPING = {
|
||||
"Lykon/DreamShaper": (512, 512),
|
||||
"lllyasviel/sd-controlnet-canny": (512, 512),
|
||||
"diffusers/controlnet-canny-sdxl-1.0": (1024, 1024),
|
||||
"TencentARC/t2iadapter_canny_sd14v1": (512, 512),
|
||||
"TencentARC/t2i-adapter-canny-sdxl-1.0": (1024, 1024),
|
||||
"stabilityai/stable-diffusion-2-1": (768, 768),
|
||||
"stabilityai/stable-diffusion-xl-base-1.0": (1024, 1024),
|
||||
"stabilityai/stable-diffusion-xl-refiner-1.0": (1024, 1024),
|
||||
"stabilityai/sdxl-turbo": (512, 512),
|
||||
}
|
||||
|
||||
|
||||
class BaseBenchmak:
|
||||
pipeline_class = None
|
||||
|
||||
def __init__(self, args):
|
||||
super().__init__()
|
||||
|
||||
def run_inference(self, args):
|
||||
raise NotImplementedError
|
||||
|
||||
def benchmark(self, args):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_result_filepath(self, args):
|
||||
pipeline_class_name = str(self.pipe.__class__.__name__)
|
||||
name = (
|
||||
args.ckpt.replace("/", "_")
|
||||
+ "_"
|
||||
+ pipeline_class_name
|
||||
+ f"-bs@{args.batch_size}-steps@{args.num_inference_steps}-mco@{args.model_cpu_offload}-compile@{args.run_compile}.csv"
|
||||
)
|
||||
filepath = os.path.join(BASE_PATH, name)
|
||||
return filepath
|
||||
|
||||
|
||||
class TextToImageBenchmark(BaseBenchmak):
|
||||
pipeline_class = AutoPipelineForText2Image
|
||||
|
||||
def __init__(self, args):
|
||||
pipe = self.pipeline_class.from_pretrained(args.ckpt, torch_dtype=torch.float16)
|
||||
pipe = pipe.to("cuda")
|
||||
|
||||
if args.run_compile:
|
||||
if not isinstance(pipe, WuerstchenCombinedPipeline):
|
||||
pipe.unet.to(memory_format=torch.channels_last)
|
||||
print("Run torch compile")
|
||||
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
|
||||
|
||||
if hasattr(pipe, "movq") and getattr(pipe, "movq", None) is not None:
|
||||
pipe.movq.to(memory_format=torch.channels_last)
|
||||
pipe.movq = torch.compile(pipe.movq, mode="reduce-overhead", fullgraph=True)
|
||||
else:
|
||||
print("Run torch compile")
|
||||
pipe.decoder = torch.compile(pipe.decoder, mode="reduce-overhead", fullgraph=True)
|
||||
pipe.vqgan = torch.compile(pipe.vqgan, mode="reduce-overhead", fullgraph=True)
|
||||
|
||||
pipe.set_progress_bar_config(disable=True)
|
||||
self.pipe = pipe
|
||||
|
||||
def run_inference(self, pipe, args):
|
||||
_ = pipe(
|
||||
prompt=PROMPT,
|
||||
num_inference_steps=args.num_inference_steps,
|
||||
num_images_per_prompt=args.batch_size,
|
||||
)
|
||||
|
||||
def benchmark(self, args):
|
||||
flush()
|
||||
|
||||
print(f"[INFO] {self.pipe.__class__.__name__}: Running benchmark with: {vars(args)}\n")
|
||||
|
||||
time = benchmark_fn(self.run_inference, self.pipe, args) # in seconds.
|
||||
memory = bytes_to_giga_bytes(torch.cuda.max_memory_allocated()) # in GBs.
|
||||
benchmark_info = BenchmarkInfo(time=time, memory=memory)
|
||||
|
||||
pipeline_class_name = str(self.pipe.__class__.__name__)
|
||||
flush()
|
||||
csv_dict = generate_csv_dict(
|
||||
pipeline_cls=pipeline_class_name, ckpt=args.ckpt, args=args, benchmark_info=benchmark_info
|
||||
)
|
||||
filepath = self.get_result_filepath(args)
|
||||
write_to_csv(filepath, csv_dict)
|
||||
print(f"Logs written to: {filepath}")
|
||||
flush()
|
||||
|
||||
|
||||
class TurboTextToImageBenchmark(TextToImageBenchmark):
|
||||
def __init__(self, args):
|
||||
super().__init__(args)
|
||||
|
||||
def run_inference(self, pipe, args):
|
||||
_ = pipe(
|
||||
prompt=PROMPT,
|
||||
num_inference_steps=args.num_inference_steps,
|
||||
num_images_per_prompt=args.batch_size,
|
||||
guidance_scale=0.0,
|
||||
)
|
||||
|
||||
|
||||
class LCMLoRATextToImageBenchmark(TextToImageBenchmark):
|
||||
lora_id = "latent-consistency/lcm-lora-sdxl"
|
||||
|
||||
def __init__(self, args):
|
||||
super().__init__(args)
|
||||
self.pipe.load_lora_weights(self.lora_id)
|
||||
self.pipe.fuse_lora()
|
||||
self.pipe.unload_lora_weights()
|
||||
self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
|
||||
|
||||
def get_result_filepath(self, args):
|
||||
pipeline_class_name = str(self.pipe.__class__.__name__)
|
||||
name = (
|
||||
self.lora_id.replace("/", "_")
|
||||
+ "_"
|
||||
+ pipeline_class_name
|
||||
+ f"-bs@{args.batch_size}-steps@{args.num_inference_steps}-mco@{args.model_cpu_offload}-compile@{args.run_compile}.csv"
|
||||
)
|
||||
filepath = os.path.join(BASE_PATH, name)
|
||||
return filepath
|
||||
|
||||
def run_inference(self, pipe, args):
|
||||
_ = pipe(
|
||||
prompt=PROMPT,
|
||||
num_inference_steps=args.num_inference_steps,
|
||||
num_images_per_prompt=args.batch_size,
|
||||
guidance_scale=1.0,
|
||||
)
|
||||
|
||||
def benchmark(self, args):
|
||||
flush()
|
||||
|
||||
print(f"[INFO] {self.pipe.__class__.__name__}: Running benchmark with: {vars(args)}\n")
|
||||
|
||||
time = benchmark_fn(self.run_inference, self.pipe, args) # in seconds.
|
||||
memory = bytes_to_giga_bytes(torch.cuda.max_memory_allocated()) # in GBs.
|
||||
benchmark_info = BenchmarkInfo(time=time, memory=memory)
|
||||
|
||||
pipeline_class_name = str(self.pipe.__class__.__name__)
|
||||
flush()
|
||||
csv_dict = generate_csv_dict(
|
||||
pipeline_cls=pipeline_class_name, ckpt=self.lora_id, args=args, benchmark_info=benchmark_info
|
||||
)
|
||||
filepath = self.get_result_filepath(args)
|
||||
write_to_csv(filepath, csv_dict)
|
||||
print(f"Logs written to: {filepath}")
|
||||
flush()
|
||||
|
||||
|
||||
class ImageToImageBenchmark(TextToImageBenchmark):
|
||||
pipeline_class = AutoPipelineForImage2Image
|
||||
url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/1665_Girl_with_a_Pearl_Earring.jpg"
|
||||
image = load_image(url).convert("RGB")
|
||||
|
||||
def __init__(self, args):
|
||||
super().__init__(args)
|
||||
self.image = self.image.resize(RESOLUTION_MAPPING[args.ckpt])
|
||||
|
||||
def run_inference(self, pipe, args):
|
||||
_ = pipe(
|
||||
prompt=PROMPT,
|
||||
image=self.image,
|
||||
num_inference_steps=args.num_inference_steps,
|
||||
num_images_per_prompt=args.batch_size,
|
||||
)
|
||||
|
||||
|
||||
class TurboImageToImageBenchmark(ImageToImageBenchmark):
|
||||
def __init__(self, args):
|
||||
super().__init__(args)
|
||||
|
||||
def run_inference(self, pipe, args):
|
||||
_ = pipe(
|
||||
prompt=PROMPT,
|
||||
image=self.image,
|
||||
num_inference_steps=args.num_inference_steps,
|
||||
num_images_per_prompt=args.batch_size,
|
||||
guidance_scale=0.0,
|
||||
strength=0.5,
|
||||
)
|
||||
|
||||
|
||||
class InpaintingBenchmark(ImageToImageBenchmark):
|
||||
pipeline_class = AutoPipelineForInpainting
|
||||
mask_url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/overture-creations-5sI6fQgYIuo_mask.png"
|
||||
mask = load_image(mask_url).convert("RGB")
|
||||
|
||||
def __init__(self, args):
|
||||
super().__init__(args)
|
||||
self.image = self.image.resize(RESOLUTION_MAPPING[args.ckpt])
|
||||
self.mask = self.mask.resize(RESOLUTION_MAPPING[args.ckpt])
|
||||
|
||||
def run_inference(self, pipe, args):
|
||||
_ = pipe(
|
||||
prompt=PROMPT,
|
||||
image=self.image,
|
||||
mask_image=self.mask,
|
||||
num_inference_steps=args.num_inference_steps,
|
||||
num_images_per_prompt=args.batch_size,
|
||||
)
|
||||
|
||||
|
||||
class IPAdapterTextToImageBenchmark(TextToImageBenchmark):
|
||||
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/load_neg_embed.png"
|
||||
image = load_image(url)
|
||||
|
||||
def __init__(self, args):
|
||||
pipe = self.pipeline_class.from_pretrained(args.ckpt, torch_dtype=torch.float16).to("cuda")
|
||||
pipe.load_ip_adapter(
|
||||
args.ip_adapter_id[0],
|
||||
subfolder="models" if "sdxl" not in args.ip_adapter_id[1] else "sdxl_models",
|
||||
weight_name=args.ip_adapter_id[1],
|
||||
)
|
||||
|
||||
if args.run_compile:
|
||||
pipe.unet.to(memory_format=torch.channels_last)
|
||||
print("Run torch compile")
|
||||
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
|
||||
|
||||
pipe.set_progress_bar_config(disable=True)
|
||||
self.pipe = pipe
|
||||
|
||||
def run_inference(self, pipe, args):
|
||||
_ = pipe(
|
||||
prompt=PROMPT,
|
||||
ip_adapter_image=self.image,
|
||||
num_inference_steps=args.num_inference_steps,
|
||||
num_images_per_prompt=args.batch_size,
|
||||
)
|
||||
|
||||
|
||||
class ControlNetBenchmark(TextToImageBenchmark):
|
||||
pipeline_class = StableDiffusionControlNetPipeline
|
||||
aux_network_class = ControlNetModel
|
||||
root_ckpt = "Lykon/DreamShaper"
|
||||
|
||||
url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/canny_image_condition.png"
|
||||
image = load_image(url).convert("RGB")
|
||||
|
||||
def __init__(self, args):
|
||||
aux_network = self.aux_network_class.from_pretrained(args.ckpt, torch_dtype=torch.float16)
|
||||
pipe = self.pipeline_class.from_pretrained(self.root_ckpt, controlnet=aux_network, torch_dtype=torch.float16)
|
||||
pipe = pipe.to("cuda")
|
||||
|
||||
pipe.set_progress_bar_config(disable=True)
|
||||
self.pipe = pipe
|
||||
|
||||
if args.run_compile:
|
||||
pipe.unet.to(memory_format=torch.channels_last)
|
||||
pipe.controlnet.to(memory_format=torch.channels_last)
|
||||
|
||||
print("Run torch compile")
|
||||
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
|
||||
pipe.controlnet = torch.compile(pipe.controlnet, mode="reduce-overhead", fullgraph=True)
|
||||
|
||||
self.image = self.image.resize(RESOLUTION_MAPPING[args.ckpt])
|
||||
|
||||
def run_inference(self, pipe, args):
|
||||
_ = pipe(
|
||||
prompt=PROMPT,
|
||||
image=self.image,
|
||||
num_inference_steps=args.num_inference_steps,
|
||||
num_images_per_prompt=args.batch_size,
|
||||
)
|
||||
|
||||
|
||||
class ControlNetSDXLBenchmark(ControlNetBenchmark):
|
||||
pipeline_class = StableDiffusionXLControlNetPipeline
|
||||
root_ckpt = "stabilityai/stable-diffusion-xl-base-1.0"
|
||||
|
||||
def __init__(self, args):
|
||||
super().__init__(args)
|
||||
|
||||
|
||||
class T2IAdapterBenchmark(ControlNetBenchmark):
|
||||
pipeline_class = StableDiffusionAdapterPipeline
|
||||
aux_network_class = T2IAdapter
|
||||
root_ckpt = "Lykon/DreamShaper"
|
||||
|
||||
url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/canny_for_adapter.png"
|
||||
image = load_image(url).convert("L")
|
||||
|
||||
def __init__(self, args):
|
||||
aux_network = self.aux_network_class.from_pretrained(args.ckpt, torch_dtype=torch.float16)
|
||||
pipe = self.pipeline_class.from_pretrained(self.root_ckpt, adapter=aux_network, torch_dtype=torch.float16)
|
||||
pipe = pipe.to("cuda")
|
||||
|
||||
pipe.set_progress_bar_config(disable=True)
|
||||
self.pipe = pipe
|
||||
|
||||
if args.run_compile:
|
||||
pipe.unet.to(memory_format=torch.channels_last)
|
||||
pipe.adapter.to(memory_format=torch.channels_last)
|
||||
|
||||
print("Run torch compile")
|
||||
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
|
||||
pipe.adapter = torch.compile(pipe.adapter, mode="reduce-overhead", fullgraph=True)
|
||||
|
||||
self.image = self.image.resize(RESOLUTION_MAPPING[args.ckpt])
|
||||
|
||||
|
||||
class T2IAdapterSDXLBenchmark(T2IAdapterBenchmark):
|
||||
pipeline_class = StableDiffusionXLAdapterPipeline
|
||||
root_ckpt = "stabilityai/stable-diffusion-xl-base-1.0"
|
||||
|
||||
url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/canny_for_adapter_sdxl.png"
|
||||
image = load_image(url)
|
||||
|
||||
def __init__(self, args):
|
||||
super().__init__(args)
|
||||
@@ -1,26 +0,0 @@
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
from base_classes import ControlNetBenchmark, ControlNetSDXLBenchmark # noqa: E402
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--ckpt",
|
||||
type=str,
|
||||
default="lllyasviel/sd-controlnet-canny",
|
||||
choices=["lllyasviel/sd-controlnet-canny", "diffusers/controlnet-canny-sdxl-1.0"],
|
||||
)
|
||||
parser.add_argument("--batch_size", type=int, default=1)
|
||||
parser.add_argument("--num_inference_steps", type=int, default=50)
|
||||
parser.add_argument("--model_cpu_offload", action="store_true")
|
||||
parser.add_argument("--run_compile", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
benchmark_pipe = (
|
||||
ControlNetBenchmark(args) if args.ckpt == "lllyasviel/sd-controlnet-canny" else ControlNetSDXLBenchmark(args)
|
||||
)
|
||||
benchmark_pipe.benchmark(args)
|
||||
@@ -1,33 +0,0 @@
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
from base_classes import IPAdapterTextToImageBenchmark # noqa: E402
|
||||
|
||||
|
||||
IP_ADAPTER_CKPTS = {
|
||||
# because original SD v1.5 has been taken down.
|
||||
"Lykon/DreamShaper": ("h94/IP-Adapter", "ip-adapter_sd15.bin"),
|
||||
"stabilityai/stable-diffusion-xl-base-1.0": ("h94/IP-Adapter", "ip-adapter_sdxl.bin"),
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--ckpt",
|
||||
type=str,
|
||||
default="rstabilityai/stable-diffusion-xl-base-1.0",
|
||||
choices=list(IP_ADAPTER_CKPTS.keys()),
|
||||
)
|
||||
parser.add_argument("--batch_size", type=int, default=1)
|
||||
parser.add_argument("--num_inference_steps", type=int, default=50)
|
||||
parser.add_argument("--model_cpu_offload", action="store_true")
|
||||
parser.add_argument("--run_compile", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
args.ip_adapter_id = IP_ADAPTER_CKPTS[args.ckpt]
|
||||
benchmark_pipe = IPAdapterTextToImageBenchmark(args)
|
||||
args.ckpt = f"{args.ckpt} (IP-Adapter)"
|
||||
benchmark_pipe.benchmark(args)
|
||||
@@ -1,29 +0,0 @@
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
from base_classes import ImageToImageBenchmark, TurboImageToImageBenchmark # noqa: E402
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--ckpt",
|
||||
type=str,
|
||||
default="Lykon/DreamShaper",
|
||||
choices=[
|
||||
"Lykon/DreamShaper",
|
||||
"stabilityai/stable-diffusion-2-1",
|
||||
"stabilityai/stable-diffusion-xl-refiner-1.0",
|
||||
"stabilityai/sdxl-turbo",
|
||||
],
|
||||
)
|
||||
parser.add_argument("--batch_size", type=int, default=1)
|
||||
parser.add_argument("--num_inference_steps", type=int, default=50)
|
||||
parser.add_argument("--model_cpu_offload", action="store_true")
|
||||
parser.add_argument("--run_compile", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
benchmark_pipe = ImageToImageBenchmark(args) if "turbo" not in args.ckpt else TurboImageToImageBenchmark(args)
|
||||
benchmark_pipe.benchmark(args)
|
||||
@@ -1,28 +0,0 @@
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
from base_classes import InpaintingBenchmark # noqa: E402
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--ckpt",
|
||||
type=str,
|
||||
default="Lykon/DreamShaper",
|
||||
choices=[
|
||||
"Lykon/DreamShaper",
|
||||
"stabilityai/stable-diffusion-2-1",
|
||||
"stabilityai/stable-diffusion-xl-base-1.0",
|
||||
],
|
||||
)
|
||||
parser.add_argument("--batch_size", type=int, default=1)
|
||||
parser.add_argument("--num_inference_steps", type=int, default=50)
|
||||
parser.add_argument("--model_cpu_offload", action="store_true")
|
||||
parser.add_argument("--run_compile", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
benchmark_pipe = InpaintingBenchmark(args)
|
||||
benchmark_pipe.benchmark(args)
|
||||
@@ -1,28 +0,0 @@
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
from base_classes import T2IAdapterBenchmark, T2IAdapterSDXLBenchmark # noqa: E402
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--ckpt",
|
||||
type=str,
|
||||
default="TencentARC/t2iadapter_canny_sd14v1",
|
||||
choices=["TencentARC/t2iadapter_canny_sd14v1", "TencentARC/t2i-adapter-canny-sdxl-1.0"],
|
||||
)
|
||||
parser.add_argument("--batch_size", type=int, default=1)
|
||||
parser.add_argument("--num_inference_steps", type=int, default=50)
|
||||
parser.add_argument("--model_cpu_offload", action="store_true")
|
||||
parser.add_argument("--run_compile", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
benchmark_pipe = (
|
||||
T2IAdapterBenchmark(args)
|
||||
if args.ckpt == "TencentARC/t2iadapter_canny_sd14v1"
|
||||
else T2IAdapterSDXLBenchmark(args)
|
||||
)
|
||||
benchmark_pipe.benchmark(args)
|
||||
@@ -1,23 +0,0 @@
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
from base_classes import LCMLoRATextToImageBenchmark # noqa: E402
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--ckpt",
|
||||
type=str,
|
||||
default="stabilityai/stable-diffusion-xl-base-1.0",
|
||||
)
|
||||
parser.add_argument("--batch_size", type=int, default=1)
|
||||
parser.add_argument("--num_inference_steps", type=int, default=4)
|
||||
parser.add_argument("--model_cpu_offload", action="store_true")
|
||||
parser.add_argument("--run_compile", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
benchmark_pipe = LCMLoRATextToImageBenchmark(args)
|
||||
benchmark_pipe.benchmark(args)
|
||||
@@ -1,40 +0,0 @@
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
from base_classes import TextToImageBenchmark, TurboTextToImageBenchmark # noqa: E402
|
||||
|
||||
|
||||
ALL_T2I_CKPTS = [
|
||||
"Lykon/DreamShaper",
|
||||
"segmind/SSD-1B",
|
||||
"stabilityai/stable-diffusion-xl-base-1.0",
|
||||
"kandinsky-community/kandinsky-2-2-decoder",
|
||||
"warp-ai/wuerstchen",
|
||||
"stabilityai/sdxl-turbo",
|
||||
]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--ckpt",
|
||||
type=str,
|
||||
default="Lykon/DreamShaper",
|
||||
choices=ALL_T2I_CKPTS,
|
||||
)
|
||||
parser.add_argument("--batch_size", type=int, default=1)
|
||||
parser.add_argument("--num_inference_steps", type=int, default=50)
|
||||
parser.add_argument("--model_cpu_offload", action="store_true")
|
||||
parser.add_argument("--run_compile", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
benchmark_cls = None
|
||||
if "turbo" in args.ckpt:
|
||||
benchmark_cls = TurboTextToImageBenchmark
|
||||
else:
|
||||
benchmark_cls = TextToImageBenchmark
|
||||
|
||||
benchmark_pipe = benchmark_cls(args)
|
||||
benchmark_pipe.benchmark(args)
|
||||
98
benchmarks/benchmarking_flux.py
Normal file
98
benchmarks/benchmarking_flux.py
Normal file
@@ -0,0 +1,98 @@
|
||||
from functools import partial
|
||||
|
||||
import torch
|
||||
from benchmarking_utils import BenchmarkMixin, BenchmarkScenario, model_init_fn
|
||||
|
||||
from diffusers import BitsAndBytesConfig, FluxTransformer2DModel
|
||||
from diffusers.utils.testing_utils import torch_device
|
||||
|
||||
|
||||
CKPT_ID = "black-forest-labs/FLUX.1-dev"
|
||||
RESULT_FILENAME = "flux.csv"
|
||||
|
||||
|
||||
def get_input_dict(**device_dtype_kwargs):
|
||||
# resolution: 1024x1024
|
||||
# maximum sequence length 512
|
||||
hidden_states = torch.randn(1, 4096, 64, **device_dtype_kwargs)
|
||||
encoder_hidden_states = torch.randn(1, 512, 4096, **device_dtype_kwargs)
|
||||
pooled_prompt_embeds = torch.randn(1, 768, **device_dtype_kwargs)
|
||||
image_ids = torch.ones(512, 3, **device_dtype_kwargs)
|
||||
text_ids = torch.ones(4096, 3, **device_dtype_kwargs)
|
||||
timestep = torch.tensor([1.0], **device_dtype_kwargs)
|
||||
guidance = torch.tensor([1.0], **device_dtype_kwargs)
|
||||
|
||||
return {
|
||||
"hidden_states": hidden_states,
|
||||
"encoder_hidden_states": encoder_hidden_states,
|
||||
"img_ids": image_ids,
|
||||
"txt_ids": text_ids,
|
||||
"pooled_projections": pooled_prompt_embeds,
|
||||
"timestep": timestep,
|
||||
"guidance": guidance,
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
scenarios = [
|
||||
BenchmarkScenario(
|
||||
name=f"{CKPT_ID}-bf16",
|
||||
model_cls=FluxTransformer2DModel,
|
||||
model_init_kwargs={
|
||||
"pretrained_model_name_or_path": CKPT_ID,
|
||||
"torch_dtype": torch.bfloat16,
|
||||
"subfolder": "transformer",
|
||||
},
|
||||
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
|
||||
model_init_fn=model_init_fn,
|
||||
compile_kwargs={"fullgraph": True},
|
||||
),
|
||||
BenchmarkScenario(
|
||||
name=f"{CKPT_ID}-bnb-nf4",
|
||||
model_cls=FluxTransformer2DModel,
|
||||
model_init_kwargs={
|
||||
"pretrained_model_name_or_path": CKPT_ID,
|
||||
"torch_dtype": torch.bfloat16,
|
||||
"subfolder": "transformer",
|
||||
"quantization_config": BitsAndBytesConfig(
|
||||
load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_quant_type="nf4"
|
||||
),
|
||||
},
|
||||
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
|
||||
model_init_fn=model_init_fn,
|
||||
),
|
||||
BenchmarkScenario(
|
||||
name=f"{CKPT_ID}-layerwise-upcasting",
|
||||
model_cls=FluxTransformer2DModel,
|
||||
model_init_kwargs={
|
||||
"pretrained_model_name_or_path": CKPT_ID,
|
||||
"torch_dtype": torch.bfloat16,
|
||||
"subfolder": "transformer",
|
||||
},
|
||||
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
|
||||
model_init_fn=partial(model_init_fn, layerwise_upcasting=True),
|
||||
),
|
||||
BenchmarkScenario(
|
||||
name=f"{CKPT_ID}-group-offload-leaf",
|
||||
model_cls=FluxTransformer2DModel,
|
||||
model_init_kwargs={
|
||||
"pretrained_model_name_or_path": CKPT_ID,
|
||||
"torch_dtype": torch.bfloat16,
|
||||
"subfolder": "transformer",
|
||||
},
|
||||
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
|
||||
model_init_fn=partial(
|
||||
model_init_fn,
|
||||
group_offload_kwargs={
|
||||
"onload_device": torch_device,
|
||||
"offload_device": torch.device("cpu"),
|
||||
"offload_type": "leaf_level",
|
||||
"use_stream": True,
|
||||
"non_blocking": True,
|
||||
},
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
runner = BenchmarkMixin()
|
||||
runner.run_bencmarks_and_collate(scenarios, filename=RESULT_FILENAME)
|
||||
80
benchmarks/benchmarking_ltx.py
Normal file
80
benchmarks/benchmarking_ltx.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from functools import partial
|
||||
|
||||
import torch
|
||||
from benchmarking_utils import BenchmarkMixin, BenchmarkScenario, model_init_fn
|
||||
|
||||
from diffusers import LTXVideoTransformer3DModel
|
||||
from diffusers.utils.testing_utils import torch_device
|
||||
|
||||
|
||||
CKPT_ID = "Lightricks/LTX-Video-0.9.7-dev"
|
||||
RESULT_FILENAME = "ltx.csv"
|
||||
|
||||
|
||||
def get_input_dict(**device_dtype_kwargs):
|
||||
# 512x704 (161 frames)
|
||||
# `max_sequence_length`: 256
|
||||
hidden_states = torch.randn(1, 7392, 128, **device_dtype_kwargs)
|
||||
encoder_hidden_states = torch.randn(1, 256, 4096, **device_dtype_kwargs)
|
||||
encoder_attention_mask = torch.ones(1, 256, **device_dtype_kwargs)
|
||||
timestep = torch.tensor([1.0], **device_dtype_kwargs)
|
||||
video_coords = torch.randn(1, 3, 7392, **device_dtype_kwargs)
|
||||
|
||||
return {
|
||||
"hidden_states": hidden_states,
|
||||
"encoder_hidden_states": encoder_hidden_states,
|
||||
"encoder_attention_mask": encoder_attention_mask,
|
||||
"timestep": timestep,
|
||||
"video_coords": video_coords,
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
scenarios = [
|
||||
BenchmarkScenario(
|
||||
name=f"{CKPT_ID}-bf16",
|
||||
model_cls=LTXVideoTransformer3DModel,
|
||||
model_init_kwargs={
|
||||
"pretrained_model_name_or_path": CKPT_ID,
|
||||
"torch_dtype": torch.bfloat16,
|
||||
"subfolder": "transformer",
|
||||
},
|
||||
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
|
||||
model_init_fn=model_init_fn,
|
||||
compile_kwargs={"fullgraph": True},
|
||||
),
|
||||
BenchmarkScenario(
|
||||
name=f"{CKPT_ID}-layerwise-upcasting",
|
||||
model_cls=LTXVideoTransformer3DModel,
|
||||
model_init_kwargs={
|
||||
"pretrained_model_name_or_path": CKPT_ID,
|
||||
"torch_dtype": torch.bfloat16,
|
||||
"subfolder": "transformer",
|
||||
},
|
||||
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
|
||||
model_init_fn=partial(model_init_fn, layerwise_upcasting=True),
|
||||
),
|
||||
BenchmarkScenario(
|
||||
name=f"{CKPT_ID}-group-offload-leaf",
|
||||
model_cls=LTXVideoTransformer3DModel,
|
||||
model_init_kwargs={
|
||||
"pretrained_model_name_or_path": CKPT_ID,
|
||||
"torch_dtype": torch.bfloat16,
|
||||
"subfolder": "transformer",
|
||||
},
|
||||
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
|
||||
model_init_fn=partial(
|
||||
model_init_fn,
|
||||
group_offload_kwargs={
|
||||
"onload_device": torch_device,
|
||||
"offload_device": torch.device("cpu"),
|
||||
"offload_type": "leaf_level",
|
||||
"use_stream": True,
|
||||
"non_blocking": True,
|
||||
},
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
runner = BenchmarkMixin()
|
||||
runner.run_bencmarks_and_collate(scenarios, filename=RESULT_FILENAME)
|
||||
82
benchmarks/benchmarking_sdxl.py
Normal file
82
benchmarks/benchmarking_sdxl.py
Normal file
@@ -0,0 +1,82 @@
|
||||
from functools import partial
|
||||
|
||||
import torch
|
||||
from benchmarking_utils import BenchmarkMixin, BenchmarkScenario, model_init_fn
|
||||
|
||||
from diffusers import UNet2DConditionModel
|
||||
from diffusers.utils.testing_utils import torch_device
|
||||
|
||||
|
||||
CKPT_ID = "stabilityai/stable-diffusion-xl-base-1.0"
|
||||
RESULT_FILENAME = "sdxl.csv"
|
||||
|
||||
|
||||
def get_input_dict(**device_dtype_kwargs):
|
||||
# height: 1024
|
||||
# width: 1024
|
||||
# max_sequence_length: 77
|
||||
hidden_states = torch.randn(1, 4, 128, 128, **device_dtype_kwargs)
|
||||
encoder_hidden_states = torch.randn(1, 77, 2048, **device_dtype_kwargs)
|
||||
timestep = torch.tensor([1.0], **device_dtype_kwargs)
|
||||
added_cond_kwargs = {
|
||||
"text_embeds": torch.randn(1, 1280, **device_dtype_kwargs),
|
||||
"time_ids": torch.ones(1, 6, **device_dtype_kwargs),
|
||||
}
|
||||
|
||||
return {
|
||||
"sample": hidden_states,
|
||||
"encoder_hidden_states": encoder_hidden_states,
|
||||
"timestep": timestep,
|
||||
"added_cond_kwargs": added_cond_kwargs,
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
scenarios = [
|
||||
BenchmarkScenario(
|
||||
name=f"{CKPT_ID}-bf16",
|
||||
model_cls=UNet2DConditionModel,
|
||||
model_init_kwargs={
|
||||
"pretrained_model_name_or_path": CKPT_ID,
|
||||
"torch_dtype": torch.bfloat16,
|
||||
"subfolder": "unet",
|
||||
},
|
||||
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
|
||||
model_init_fn=model_init_fn,
|
||||
compile_kwargs={"fullgraph": True},
|
||||
),
|
||||
BenchmarkScenario(
|
||||
name=f"{CKPT_ID}-layerwise-upcasting",
|
||||
model_cls=UNet2DConditionModel,
|
||||
model_init_kwargs={
|
||||
"pretrained_model_name_or_path": CKPT_ID,
|
||||
"torch_dtype": torch.bfloat16,
|
||||
"subfolder": "unet",
|
||||
},
|
||||
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
|
||||
model_init_fn=partial(model_init_fn, layerwise_upcasting=True),
|
||||
),
|
||||
BenchmarkScenario(
|
||||
name=f"{CKPT_ID}-group-offload-leaf",
|
||||
model_cls=UNet2DConditionModel,
|
||||
model_init_kwargs={
|
||||
"pretrained_model_name_or_path": CKPT_ID,
|
||||
"torch_dtype": torch.bfloat16,
|
||||
"subfolder": "unet",
|
||||
},
|
||||
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
|
||||
model_init_fn=partial(
|
||||
model_init_fn,
|
||||
group_offload_kwargs={
|
||||
"onload_device": torch_device,
|
||||
"offload_device": torch.device("cpu"),
|
||||
"offload_type": "leaf_level",
|
||||
"use_stream": True,
|
||||
"non_blocking": True,
|
||||
},
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
runner = BenchmarkMixin()
|
||||
runner.run_bencmarks_and_collate(scenarios, filename=RESULT_FILENAME)
|
||||
244
benchmarks/benchmarking_utils.py
Normal file
244
benchmarks/benchmarking_utils.py
Normal file
@@ -0,0 +1,244 @@
|
||||
import gc
|
||||
import inspect
|
||||
import logging
|
||||
import os
|
||||
import queue
|
||||
import threading
|
||||
from contextlib import nullcontext
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, Dict, Optional, Union
|
||||
|
||||
import pandas as pd
|
||||
import torch
|
||||
import torch.utils.benchmark as benchmark
|
||||
|
||||
from diffusers.models.modeling_utils import ModelMixin
|
||||
from diffusers.utils.testing_utils import require_torch_gpu, torch_device
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
NUM_WARMUP_ROUNDS = 5
|
||||
|
||||
|
||||
def benchmark_fn(f, *args, **kwargs):
|
||||
t0 = benchmark.Timer(
|
||||
stmt="f(*args, **kwargs)",
|
||||
globals={"args": args, "kwargs": kwargs, "f": f},
|
||||
num_threads=1,
|
||||
)
|
||||
return float(f"{(t0.blocked_autorange().mean):.3f}")
|
||||
|
||||
|
||||
def flush():
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
|
||||
|
||||
# Adapted from https://github.com/lucasb-eyer/cnn_vit_benchmarks/blob/15b665ff758e8062131353076153905cae00a71f/main.py
|
||||
def calculate_flops(model, input_dict):
|
||||
try:
|
||||
from torchprofile import profile_macs
|
||||
except ModuleNotFoundError:
|
||||
raise
|
||||
|
||||
# This is a hacky way to convert the kwargs to args as `profile_macs` cries about kwargs.
|
||||
sig = inspect.signature(model.forward)
|
||||
param_names = [
|
||||
p.name
|
||||
for p in sig.parameters.values()
|
||||
if p.kind
|
||||
in (
|
||||
inspect.Parameter.POSITIONAL_ONLY,
|
||||
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
||||
)
|
||||
and p.name != "self"
|
||||
]
|
||||
bound = sig.bind_partial(**input_dict)
|
||||
bound.apply_defaults()
|
||||
args = tuple(bound.arguments[name] for name in param_names)
|
||||
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
macs = profile_macs(model, args)
|
||||
flops = 2 * macs # 1 MAC operation = 2 FLOPs (1 multiplication + 1 addition)
|
||||
return flops
|
||||
|
||||
|
||||
def calculate_params(model):
|
||||
return sum(p.numel() for p in model.parameters())
|
||||
|
||||
|
||||
# Users can define their own in case this doesn't suffice. For most cases,
|
||||
# it should be sufficient.
|
||||
def model_init_fn(model_cls, group_offload_kwargs=None, layerwise_upcasting=False, **init_kwargs):
|
||||
model = model_cls.from_pretrained(**init_kwargs).eval()
|
||||
if group_offload_kwargs and isinstance(group_offload_kwargs, dict):
|
||||
model.enable_group_offload(**group_offload_kwargs)
|
||||
else:
|
||||
model.to(torch_device)
|
||||
if layerwise_upcasting:
|
||||
model.enable_layerwise_casting(
|
||||
storage_dtype=torch.float8_e4m3fn, compute_dtype=init_kwargs.get("torch_dtype", torch.bfloat16)
|
||||
)
|
||||
return model
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkScenario:
|
||||
name: str
|
||||
model_cls: ModelMixin
|
||||
model_init_kwargs: Dict[str, Any]
|
||||
model_init_fn: Callable
|
||||
get_model_input_dict: Callable
|
||||
compile_kwargs: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
@require_torch_gpu
|
||||
class BenchmarkMixin:
|
||||
def pre_benchmark(self):
|
||||
flush()
|
||||
torch.compiler.reset()
|
||||
|
||||
def post_benchmark(self, model):
|
||||
model.cpu()
|
||||
flush()
|
||||
torch.compiler.reset()
|
||||
|
||||
@torch.no_grad()
|
||||
def run_benchmark(self, scenario: BenchmarkScenario):
|
||||
# 0) Basic stats
|
||||
logger.info(f"Running scenario: {scenario.name}.")
|
||||
try:
|
||||
model = model_init_fn(scenario.model_cls, **scenario.model_init_kwargs)
|
||||
num_params = round(calculate_params(model) / 1e9, 2)
|
||||
try:
|
||||
flops = round(calculate_flops(model, input_dict=scenario.get_model_input_dict()) / 1e9, 2)
|
||||
except Exception as e:
|
||||
logger.info(f"Problem in calculating FLOPs:\n{e}")
|
||||
flops = None
|
||||
model.cpu()
|
||||
del model
|
||||
except Exception as e:
|
||||
logger.info(f"Error while initializing the model and calculating FLOPs:\n{e}")
|
||||
return {}
|
||||
self.pre_benchmark()
|
||||
|
||||
# 1) plain stats
|
||||
results = {}
|
||||
plain = None
|
||||
try:
|
||||
plain = self._run_phase(
|
||||
model_cls=scenario.model_cls,
|
||||
init_fn=scenario.model_init_fn,
|
||||
init_kwargs=scenario.model_init_kwargs,
|
||||
get_input_fn=scenario.get_model_input_dict,
|
||||
compile_kwargs=None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.info(f"Benchmark could not be run with the following error:\n{e}")
|
||||
return results
|
||||
|
||||
# 2) compiled stats (if any)
|
||||
compiled = {"time": None, "memory": None}
|
||||
if scenario.compile_kwargs:
|
||||
try:
|
||||
compiled = self._run_phase(
|
||||
model_cls=scenario.model_cls,
|
||||
init_fn=scenario.model_init_fn,
|
||||
init_kwargs=scenario.model_init_kwargs,
|
||||
get_input_fn=scenario.get_model_input_dict,
|
||||
compile_kwargs=scenario.compile_kwargs,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.info(f"Compilation benchmark could not be run with the following error\n: {e}")
|
||||
if plain is None:
|
||||
return results
|
||||
|
||||
# 3) merge
|
||||
result = {
|
||||
"scenario": scenario.name,
|
||||
"model_cls": scenario.model_cls.__name__,
|
||||
"num_params_B": num_params,
|
||||
"flops_G": flops,
|
||||
"time_plain_s": plain["time"],
|
||||
"mem_plain_GB": plain["memory"],
|
||||
"time_compile_s": compiled["time"],
|
||||
"mem_compile_GB": compiled["memory"],
|
||||
}
|
||||
if scenario.compile_kwargs:
|
||||
result["fullgraph"] = scenario.compile_kwargs.get("fullgraph", False)
|
||||
result["mode"] = scenario.compile_kwargs.get("mode", "default")
|
||||
else:
|
||||
result["fullgraph"], result["mode"] = None, None
|
||||
return result
|
||||
|
||||
def run_bencmarks_and_collate(self, scenarios: Union[BenchmarkScenario, list[BenchmarkScenario]], filename: str):
|
||||
if not isinstance(scenarios, list):
|
||||
scenarios = [scenarios]
|
||||
record_queue = queue.Queue()
|
||||
stop_signal = object()
|
||||
|
||||
def _writer_thread():
|
||||
while True:
|
||||
item = record_queue.get()
|
||||
if item is stop_signal:
|
||||
break
|
||||
df_row = pd.DataFrame([item])
|
||||
write_header = not os.path.exists(filename)
|
||||
df_row.to_csv(filename, mode="a", header=write_header, index=False)
|
||||
record_queue.task_done()
|
||||
|
||||
record_queue.task_done()
|
||||
|
||||
writer = threading.Thread(target=_writer_thread, daemon=True)
|
||||
writer.start()
|
||||
|
||||
for s in scenarios:
|
||||
try:
|
||||
record = self.run_benchmark(s)
|
||||
if record:
|
||||
record_queue.put(record)
|
||||
else:
|
||||
logger.info(f"Record empty from scenario: {s.name}.")
|
||||
except Exception as e:
|
||||
logger.info(f"Running scenario ({s.name}) led to error:\n{e}")
|
||||
record_queue.put(stop_signal)
|
||||
logger.info(f"Results serialized to {filename=}.")
|
||||
|
||||
def _run_phase(
|
||||
self,
|
||||
*,
|
||||
model_cls: ModelMixin,
|
||||
init_fn: Callable,
|
||||
init_kwargs: Dict[str, Any],
|
||||
get_input_fn: Callable,
|
||||
compile_kwargs: Optional[Dict[str, Any]],
|
||||
) -> Dict[str, float]:
|
||||
# setup
|
||||
self.pre_benchmark()
|
||||
|
||||
# init & (optional) compile
|
||||
model = init_fn(model_cls, **init_kwargs)
|
||||
if compile_kwargs:
|
||||
model.compile(**compile_kwargs)
|
||||
|
||||
# build inputs
|
||||
inp = get_input_fn()
|
||||
|
||||
# measure
|
||||
run_ctx = torch._inductor.utils.fresh_inductor_cache() if compile_kwargs else nullcontext()
|
||||
with run_ctx:
|
||||
for _ in range(NUM_WARMUP_ROUNDS):
|
||||
_ = model(**inp)
|
||||
time_s = benchmark_fn(lambda m, d: m(**d), model, inp)
|
||||
mem_gb = torch.cuda.max_memory_allocated() / (1024**3)
|
||||
mem_gb = round(mem_gb, 2)
|
||||
|
||||
# teardown
|
||||
self.post_benchmark(model)
|
||||
del model
|
||||
return {"time": time_s, "memory": mem_gb}
|
||||
74
benchmarks/benchmarking_wan.py
Normal file
74
benchmarks/benchmarking_wan.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from functools import partial
|
||||
|
||||
import torch
|
||||
from benchmarking_utils import BenchmarkMixin, BenchmarkScenario, model_init_fn
|
||||
|
||||
from diffusers import WanTransformer3DModel
|
||||
from diffusers.utils.testing_utils import torch_device
|
||||
|
||||
|
||||
CKPT_ID = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
|
||||
RESULT_FILENAME = "wan.csv"
|
||||
|
||||
|
||||
def get_input_dict(**device_dtype_kwargs):
|
||||
# height: 480
|
||||
# width: 832
|
||||
# num_frames: 81
|
||||
# max_sequence_length: 512
|
||||
hidden_states = torch.randn(1, 16, 21, 60, 104, **device_dtype_kwargs)
|
||||
encoder_hidden_states = torch.randn(1, 512, 4096, **device_dtype_kwargs)
|
||||
timestep = torch.tensor([1.0], **device_dtype_kwargs)
|
||||
|
||||
return {"hidden_states": hidden_states, "encoder_hidden_states": encoder_hidden_states, "timestep": timestep}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
scenarios = [
|
||||
BenchmarkScenario(
|
||||
name=f"{CKPT_ID}-bf16",
|
||||
model_cls=WanTransformer3DModel,
|
||||
model_init_kwargs={
|
||||
"pretrained_model_name_or_path": CKPT_ID,
|
||||
"torch_dtype": torch.bfloat16,
|
||||
"subfolder": "transformer",
|
||||
},
|
||||
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
|
||||
model_init_fn=model_init_fn,
|
||||
compile_kwargs={"fullgraph": True},
|
||||
),
|
||||
BenchmarkScenario(
|
||||
name=f"{CKPT_ID}-layerwise-upcasting",
|
||||
model_cls=WanTransformer3DModel,
|
||||
model_init_kwargs={
|
||||
"pretrained_model_name_or_path": CKPT_ID,
|
||||
"torch_dtype": torch.bfloat16,
|
||||
"subfolder": "transformer",
|
||||
},
|
||||
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
|
||||
model_init_fn=partial(model_init_fn, layerwise_upcasting=True),
|
||||
),
|
||||
BenchmarkScenario(
|
||||
name=f"{CKPT_ID}-group-offload-leaf",
|
||||
model_cls=WanTransformer3DModel,
|
||||
model_init_kwargs={
|
||||
"pretrained_model_name_or_path": CKPT_ID,
|
||||
"torch_dtype": torch.bfloat16,
|
||||
"subfolder": "transformer",
|
||||
},
|
||||
get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
|
||||
model_init_fn=partial(
|
||||
model_init_fn,
|
||||
group_offload_kwargs={
|
||||
"onload_device": torch_device,
|
||||
"offload_device": torch.device("cpu"),
|
||||
"offload_type": "leaf_level",
|
||||
"use_stream": True,
|
||||
"non_blocking": True,
|
||||
},
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
runner = BenchmarkMixin()
|
||||
runner.run_bencmarks_and_collate(scenarios, filename=RESULT_FILENAME)
|
||||
166
benchmarks/populate_into_db.py
Normal file
166
benchmarks/populate_into_db.py
Normal file
@@ -0,0 +1,166 @@
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
import gpustat
|
||||
import pandas as pd
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
from psycopg2.extensions import register_adapter
|
||||
from psycopg2.extras import Json
|
||||
|
||||
|
||||
register_adapter(dict, Json)
|
||||
|
||||
FINAL_CSV_FILENAME = "collated_results.csv"
|
||||
# https://github.com/huggingface/transformers/blob/593e29c5e2a9b17baec010e8dc7c1431fed6e841/benchmark/init_db.sql#L27
|
||||
BENCHMARKS_TABLE_NAME = "benchmarks"
|
||||
MEASUREMENTS_TABLE_NAME = "model_measurements"
|
||||
|
||||
|
||||
def _init_benchmark(conn, branch, commit_id, commit_msg):
|
||||
gpu_stats = gpustat.GPUStatCollection.new_query()
|
||||
metadata = {"gpu_name": gpu_stats[0]["name"]}
|
||||
repository = "huggingface/diffusers"
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f"INSERT INTO {BENCHMARKS_TABLE_NAME} (repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s) RETURNING benchmark_id",
|
||||
(repository, branch, commit_id, commit_msg, metadata),
|
||||
)
|
||||
benchmark_id = cur.fetchone()[0]
|
||||
print(f"Initialised benchmark #{benchmark_id}")
|
||||
return benchmark_id
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"branch",
|
||||
type=str,
|
||||
help="The branch name on which the benchmarking is performed.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"commit_id",
|
||||
type=str,
|
||||
help="The commit hash on which the benchmarking is performed.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"commit_msg",
|
||||
type=str,
|
||||
help="The commit message associated with the commit, truncated to 70 characters.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
try:
|
||||
conn = psycopg2.connect(
|
||||
host=os.getenv("PGHOST"),
|
||||
database=os.getenv("PGDATABASE"),
|
||||
user=os.getenv("PGUSER"),
|
||||
password=os.getenv("PGPASSWORD"),
|
||||
)
|
||||
print("DB connection established successfully.")
|
||||
except Exception as e:
|
||||
print(f"Problem during DB init: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
benchmark_id = _init_benchmark(
|
||||
conn=conn,
|
||||
branch=args.branch,
|
||||
commit_id=args.commit_id,
|
||||
commit_msg=args.commit_msg,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Problem during initializing benchmark: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
cur = conn.cursor()
|
||||
|
||||
df = pd.read_csv(FINAL_CSV_FILENAME)
|
||||
|
||||
# Helper to cast values (or None) given a dtype
|
||||
def _cast_value(val, dtype: str):
|
||||
if pd.isna(val):
|
||||
return None
|
||||
|
||||
if dtype == "text":
|
||||
return str(val).strip()
|
||||
|
||||
if dtype == "float":
|
||||
try:
|
||||
return float(val)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
if dtype == "bool":
|
||||
s = str(val).strip().lower()
|
||||
if s in ("true", "t", "yes", "1"):
|
||||
return True
|
||||
if s in ("false", "f", "no", "0"):
|
||||
return False
|
||||
if val in (1, 1.0):
|
||||
return True
|
||||
if val in (0, 0.0):
|
||||
return False
|
||||
return None
|
||||
|
||||
return val
|
||||
|
||||
try:
|
||||
rows_to_insert = []
|
||||
for _, row in df.iterrows():
|
||||
scenario = _cast_value(row.get("scenario"), "text")
|
||||
model_cls = _cast_value(row.get("model_cls"), "text")
|
||||
num_params_B = _cast_value(row.get("num_params_B"), "float")
|
||||
flops_G = _cast_value(row.get("flops_G"), "float")
|
||||
time_plain_s = _cast_value(row.get("time_plain_s"), "float")
|
||||
mem_plain_GB = _cast_value(row.get("mem_plain_GB"), "float")
|
||||
time_compile_s = _cast_value(row.get("time_compile_s"), "float")
|
||||
mem_compile_GB = _cast_value(row.get("mem_compile_GB"), "float")
|
||||
fullgraph = _cast_value(row.get("fullgraph"), "bool")
|
||||
mode = _cast_value(row.get("mode"), "text")
|
||||
|
||||
# If "github_sha" column exists in the CSV, cast it; else default to None
|
||||
if "github_sha" in df.columns:
|
||||
github_sha = _cast_value(row.get("github_sha"), "text")
|
||||
else:
|
||||
github_sha = None
|
||||
|
||||
measurements = {
|
||||
"scenario": scenario,
|
||||
"model_cls": model_cls,
|
||||
"num_params_B": num_params_B,
|
||||
"flops_G": flops_G,
|
||||
"time_plain_s": time_plain_s,
|
||||
"mem_plain_GB": mem_plain_GB,
|
||||
"time_compile_s": time_compile_s,
|
||||
"mem_compile_GB": mem_compile_GB,
|
||||
"fullgraph": fullgraph,
|
||||
"mode": mode,
|
||||
"github_sha": github_sha,
|
||||
}
|
||||
rows_to_insert.append((benchmark_id, measurements))
|
||||
|
||||
# Batch-insert all rows
|
||||
insert_sql = f"""
|
||||
INSERT INTO {MEASUREMENTS_TABLE_NAME} (
|
||||
benchmark_id,
|
||||
measurements
|
||||
)
|
||||
VALUES (%s, %s);
|
||||
"""
|
||||
|
||||
psycopg2.extras.execute_batch(cur, insert_sql, rows_to_insert)
|
||||
conn.commit()
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
print(f"Exception: {e}")
|
||||
sys.exit(1)
|
||||
@@ -1,19 +1,19 @@
|
||||
import glob
|
||||
import sys
|
||||
import os
|
||||
|
||||
import pandas as pd
|
||||
from huggingface_hub import hf_hub_download, upload_file
|
||||
from huggingface_hub.utils import EntryNotFoundError
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
from utils import BASE_PATH, FINAL_CSV_FILE, GITHUB_SHA, REPO_ID, collate_csv # noqa: E402
|
||||
REPO_ID = "diffusers/benchmarks"
|
||||
|
||||
|
||||
def has_previous_benchmark() -> str:
|
||||
from run_all import FINAL_CSV_FILENAME
|
||||
|
||||
csv_path = None
|
||||
try:
|
||||
csv_path = hf_hub_download(repo_id=REPO_ID, repo_type="dataset", filename=FINAL_CSV_FILE)
|
||||
csv_path = hf_hub_download(repo_id=REPO_ID, repo_type="dataset", filename=FINAL_CSV_FILENAME)
|
||||
except EntryNotFoundError:
|
||||
csv_path = None
|
||||
return csv_path
|
||||
@@ -26,46 +26,50 @@ def filter_float(value):
|
||||
|
||||
|
||||
def push_to_hf_dataset():
|
||||
all_csvs = sorted(glob.glob(f"{BASE_PATH}/*.csv"))
|
||||
collate_csv(all_csvs, FINAL_CSV_FILE)
|
||||
from run_all import FINAL_CSV_FILENAME, GITHUB_SHA
|
||||
|
||||
# If there's an existing benchmark file, we should report the changes.
|
||||
csv_path = has_previous_benchmark()
|
||||
if csv_path is not None:
|
||||
current_results = pd.read_csv(FINAL_CSV_FILE)
|
||||
current_results = pd.read_csv(FINAL_CSV_FILENAME)
|
||||
previous_results = pd.read_csv(csv_path)
|
||||
|
||||
numeric_columns = current_results.select_dtypes(include=["float64", "int64"]).columns
|
||||
numeric_columns = [
|
||||
c for c in numeric_columns if c not in ["batch_size", "num_inference_steps", "actual_gpu_memory (gbs)"]
|
||||
]
|
||||
|
||||
for column in numeric_columns:
|
||||
previous_results[column] = previous_results[column].map(lambda x: filter_float(x))
|
||||
# get previous values as floats, aligned to current index
|
||||
prev_vals = previous_results[column].map(filter_float).reindex(current_results.index)
|
||||
|
||||
# Calculate the percentage change
|
||||
current_results[column] = current_results[column].astype(float)
|
||||
previous_results[column] = previous_results[column].astype(float)
|
||||
percent_change = ((current_results[column] - previous_results[column]) / previous_results[column]) * 100
|
||||
# get current values as floats
|
||||
curr_vals = current_results[column].astype(float)
|
||||
|
||||
# Format the values with '+' or '-' sign and append to original values
|
||||
current_results[column] = current_results[column].map(str) + percent_change.map(
|
||||
lambda x: f" ({'+' if x > 0 else ''}{x:.2f}%)"
|
||||
# stringify the current values
|
||||
curr_str = curr_vals.map(str)
|
||||
|
||||
# build an appendage only when prev exists and differs
|
||||
append_str = prev_vals.where(prev_vals.notnull() & (prev_vals != curr_vals), other=pd.NA).map(
|
||||
lambda x: f" ({x})" if pd.notnull(x) else ""
|
||||
)
|
||||
# There might be newly added rows. So, filter out the NaNs.
|
||||
current_results[column] = current_results[column].map(lambda x: x.replace(" (nan%)", ""))
|
||||
|
||||
# Overwrite the current result file.
|
||||
current_results.to_csv(FINAL_CSV_FILE, index=False)
|
||||
# combine
|
||||
current_results[column] = curr_str + append_str
|
||||
os.remove(FINAL_CSV_FILENAME)
|
||||
current_results.to_csv(FINAL_CSV_FILENAME, index=False)
|
||||
|
||||
commit_message = f"upload from sha: {GITHUB_SHA}" if GITHUB_SHA is not None else "upload benchmark results"
|
||||
upload_file(
|
||||
repo_id=REPO_ID,
|
||||
path_in_repo=FINAL_CSV_FILE,
|
||||
path_or_fileobj=FINAL_CSV_FILE,
|
||||
path_in_repo=FINAL_CSV_FILENAME,
|
||||
path_or_fileobj=FINAL_CSV_FILENAME,
|
||||
repo_type="dataset",
|
||||
commit_message=commit_message,
|
||||
)
|
||||
upload_file(
|
||||
repo_id="diffusers/benchmark-analyzer",
|
||||
path_in_repo=FINAL_CSV_FILENAME,
|
||||
path_or_fileobj=FINAL_CSV_FILENAME,
|
||||
repo_type="space",
|
||||
commit_message=commit_message,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
6
benchmarks/requirements.txt
Normal file
6
benchmarks/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
pandas
|
||||
psutil
|
||||
gpustat
|
||||
torchprofile
|
||||
bitsandbytes
|
||||
psycopg2==2.9.9
|
||||
@@ -1,101 +1,84 @@
|
||||
import glob
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import List
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
from benchmark_text_to_image import ALL_T2I_CKPTS # noqa: E402
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
PATTERN = "benchmark_*.py"
|
||||
PATTERN = "benchmarking_*.py"
|
||||
FINAL_CSV_FILENAME = "collated_results.csv"
|
||||
GITHUB_SHA = os.getenv("GITHUB_SHA", None)
|
||||
|
||||
|
||||
class SubprocessCallException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
# Taken from `test_examples_utils.py`
|
||||
def run_command(command: List[str], return_stdout=False):
|
||||
"""
|
||||
Runs `command` with `subprocess.check_output` and will potentially return the `stdout`. Will also properly capture
|
||||
if an error occurred while running `command`
|
||||
"""
|
||||
def run_command(command: list[str], return_stdout=False):
|
||||
try:
|
||||
output = subprocess.check_output(command, stderr=subprocess.STDOUT)
|
||||
if return_stdout:
|
||||
if hasattr(output, "decode"):
|
||||
output = output.decode("utf-8")
|
||||
return output
|
||||
if return_stdout and hasattr(output, "decode"):
|
||||
return output.decode("utf-8")
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise SubprocessCallException(
|
||||
f"Command `{' '.join(command)}` failed with the following error:\n\n{e.output.decode()}"
|
||||
) from e
|
||||
raise SubprocessCallException(f"Command `{' '.join(command)}` failed with:\n{e.output.decode()}") from e
|
||||
|
||||
|
||||
def main():
|
||||
python_files = glob.glob(PATTERN)
|
||||
def merge_csvs(final_csv: str = "collated_results.csv"):
|
||||
all_csvs = glob.glob("*.csv")
|
||||
all_csvs = [f for f in all_csvs if f != final_csv]
|
||||
if not all_csvs:
|
||||
logger.info("No result CSVs found to merge.")
|
||||
return
|
||||
|
||||
for file in python_files:
|
||||
print(f"****** Running file: {file} ******")
|
||||
|
||||
# Run with canonical settings.
|
||||
if file != "benchmark_text_to_image.py" and file != "benchmark_ip_adapters.py":
|
||||
command = f"python {file}"
|
||||
run_command(command.split())
|
||||
|
||||
command += " --run_compile"
|
||||
run_command(command.split())
|
||||
|
||||
# Run variants.
|
||||
for file in python_files:
|
||||
# See: https://github.com/pytorch/pytorch/issues/129637
|
||||
if file == "benchmark_ip_adapters.py":
|
||||
df_list = []
|
||||
for f in all_csvs:
|
||||
try:
|
||||
d = pd.read_csv(f)
|
||||
except pd.errors.EmptyDataError:
|
||||
# If a file existed but was zero‐bytes or corrupted, skip it
|
||||
continue
|
||||
df_list.append(d)
|
||||
|
||||
if file == "benchmark_text_to_image.py":
|
||||
for ckpt in ALL_T2I_CKPTS:
|
||||
command = f"python {file} --ckpt {ckpt}"
|
||||
if not df_list:
|
||||
logger.info("All result CSVs were empty or invalid; nothing to merge.")
|
||||
return
|
||||
|
||||
if "turbo" in ckpt:
|
||||
command += " --num_inference_steps 1"
|
||||
final_df = pd.concat(df_list, ignore_index=True)
|
||||
if GITHUB_SHA is not None:
|
||||
final_df["github_sha"] = GITHUB_SHA
|
||||
final_df.to_csv(final_csv, index=False)
|
||||
logger.info(f"Merged {len(all_csvs)} partial CSVs → {final_csv}.")
|
||||
|
||||
run_command(command.split())
|
||||
|
||||
command += " --run_compile"
|
||||
run_command(command.split())
|
||||
def run_scripts():
|
||||
python_files = sorted(glob.glob(PATTERN))
|
||||
python_files = [f for f in python_files if f != "benchmarking_utils.py"]
|
||||
|
||||
elif file == "benchmark_sd_img.py":
|
||||
for ckpt in ["stabilityai/stable-diffusion-xl-refiner-1.0", "stabilityai/sdxl-turbo"]:
|
||||
command = f"python {file} --ckpt {ckpt}"
|
||||
for file in python_files:
|
||||
script_name = file.split(".py")[0].split("_")[-1] # example: benchmarking_foo.py -> foo
|
||||
logger.info(f"\n****** Running file: {file} ******")
|
||||
|
||||
if ckpt == "stabilityai/sdxl-turbo":
|
||||
command += " --num_inference_steps 2"
|
||||
partial_csv = f"{script_name}.csv"
|
||||
if os.path.exists(partial_csv):
|
||||
logger.info(f"Found {partial_csv}. Removing for safer numbers and duplication.")
|
||||
os.remove(partial_csv)
|
||||
|
||||
run_command(command.split())
|
||||
command += " --run_compile"
|
||||
run_command(command.split())
|
||||
command = ["python", file]
|
||||
try:
|
||||
run_command(command)
|
||||
logger.info(f"→ {file} finished normally.")
|
||||
except SubprocessCallException as e:
|
||||
logger.info(f"Error running {file}:\n{e}")
|
||||
finally:
|
||||
logger.info(f"→ Merging partial CSVs after {file} …")
|
||||
merge_csvs(final_csv=FINAL_CSV_FILENAME)
|
||||
|
||||
elif file in ["benchmark_sd_inpainting.py", "benchmark_ip_adapters.py"]:
|
||||
sdxl_ckpt = "stabilityai/stable-diffusion-xl-base-1.0"
|
||||
command = f"python {file} --ckpt {sdxl_ckpt}"
|
||||
run_command(command.split())
|
||||
|
||||
command += " --run_compile"
|
||||
run_command(command.split())
|
||||
|
||||
elif file in ["benchmark_controlnet.py", "benchmark_t2i_adapter.py"]:
|
||||
sdxl_ckpt = (
|
||||
"diffusers/controlnet-canny-sdxl-1.0"
|
||||
if "controlnet" in file
|
||||
else "TencentARC/t2i-adapter-canny-sdxl-1.0"
|
||||
)
|
||||
command = f"python {file} --ckpt {sdxl_ckpt}"
|
||||
run_command(command.split())
|
||||
|
||||
command += " --run_compile"
|
||||
run_command(command.split())
|
||||
logger.info(f"\nAll scripts attempted. Final collated CSV: {FINAL_CSV_FILENAME}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
run_scripts()
|
||||
|
||||
@@ -1,98 +0,0 @@
|
||||
import argparse
|
||||
import csv
|
||||
import gc
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Union
|
||||
|
||||
import torch
|
||||
import torch.utils.benchmark as benchmark
|
||||
|
||||
|
||||
GITHUB_SHA = os.getenv("GITHUB_SHA", None)
|
||||
BENCHMARK_FIELDS = [
|
||||
"pipeline_cls",
|
||||
"ckpt_id",
|
||||
"batch_size",
|
||||
"num_inference_steps",
|
||||
"model_cpu_offload",
|
||||
"run_compile",
|
||||
"time (secs)",
|
||||
"memory (gbs)",
|
||||
"actual_gpu_memory (gbs)",
|
||||
"github_sha",
|
||||
]
|
||||
|
||||
PROMPT = "ghibli style, a fantasy landscape with castles"
|
||||
BASE_PATH = os.getenv("BASE_PATH", ".")
|
||||
TOTAL_GPU_MEMORY = float(os.getenv("TOTAL_GPU_MEMORY", torch.cuda.get_device_properties(0).total_memory / (1024**3)))
|
||||
|
||||
REPO_ID = "diffusers/benchmarks"
|
||||
FINAL_CSV_FILE = "collated_results.csv"
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkInfo:
|
||||
time: float
|
||||
memory: float
|
||||
|
||||
|
||||
def flush():
|
||||
"""Wipes off memory."""
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
|
||||
|
||||
def bytes_to_giga_bytes(bytes):
|
||||
return f"{(bytes / 1024 / 1024 / 1024):.3f}"
|
||||
|
||||
|
||||
def benchmark_fn(f, *args, **kwargs):
|
||||
t0 = benchmark.Timer(
|
||||
stmt="f(*args, **kwargs)",
|
||||
globals={"args": args, "kwargs": kwargs, "f": f},
|
||||
num_threads=torch.get_num_threads(),
|
||||
)
|
||||
return f"{(t0.blocked_autorange().mean):.3f}"
|
||||
|
||||
|
||||
def generate_csv_dict(
|
||||
pipeline_cls: str, ckpt: str, args: argparse.Namespace, benchmark_info: BenchmarkInfo
|
||||
) -> Dict[str, Union[str, bool, float]]:
|
||||
"""Packs benchmarking data into a dictionary for latter serialization."""
|
||||
data_dict = {
|
||||
"pipeline_cls": pipeline_cls,
|
||||
"ckpt_id": ckpt,
|
||||
"batch_size": args.batch_size,
|
||||
"num_inference_steps": args.num_inference_steps,
|
||||
"model_cpu_offload": args.model_cpu_offload,
|
||||
"run_compile": args.run_compile,
|
||||
"time (secs)": benchmark_info.time,
|
||||
"memory (gbs)": benchmark_info.memory,
|
||||
"actual_gpu_memory (gbs)": f"{(TOTAL_GPU_MEMORY):.3f}",
|
||||
"github_sha": GITHUB_SHA,
|
||||
}
|
||||
return data_dict
|
||||
|
||||
|
||||
def write_to_csv(file_name: str, data_dict: Dict[str, Union[str, bool, float]]):
|
||||
"""Serializes a dictionary into a CSV file."""
|
||||
with open(file_name, mode="w", newline="") as csvfile:
|
||||
writer = csv.DictWriter(csvfile, fieldnames=BENCHMARK_FIELDS)
|
||||
writer.writeheader()
|
||||
writer.writerow(data_dict)
|
||||
|
||||
|
||||
def collate_csv(input_files: List[str], output_file: str):
|
||||
"""Collates multiple identically structured CSVs into a single CSV file."""
|
||||
with open(output_file, mode="w", newline="") as outfile:
|
||||
writer = csv.DictWriter(outfile, fieldnames=BENCHMARK_FIELDS)
|
||||
writer.writeheader()
|
||||
|
||||
for file in input_files:
|
||||
with open(file, mode="r") as infile:
|
||||
reader = csv.DictReader(infile)
|
||||
for row in reader:
|
||||
writer.writerow(row)
|
||||
@@ -47,6 +47,10 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
||||
tensorboard \
|
||||
transformers \
|
||||
matplotlib \
|
||||
setuptools==69.5.1
|
||||
setuptools==69.5.1 \
|
||||
bitsandbytes \
|
||||
torchao \
|
||||
gguf \
|
||||
optimum-quanto
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -64,6 +64,8 @@
|
||||
title: Overview
|
||||
- local: using-diffusers/create_a_server
|
||||
title: Create a server
|
||||
- local: using-diffusers/batched_inference
|
||||
title: Batch inference
|
||||
- local: training/distributed_inference
|
||||
title: Distributed inference
|
||||
- local: using-diffusers/scheduler_features
|
||||
@@ -91,6 +93,26 @@
|
||||
- local: hybrid_inference/api_reference
|
||||
title: API Reference
|
||||
title: Hybrid Inference
|
||||
- sections:
|
||||
- local: modular_diffusers/overview
|
||||
title: Overview
|
||||
- local: modular_diffusers/modular_pipeline
|
||||
title: Modular Pipeline
|
||||
- local: modular_diffusers/components_manager
|
||||
title: Components Manager
|
||||
- local: modular_diffusers/modular_diffusers_states
|
||||
title: Modular Diffusers States
|
||||
- local: modular_diffusers/pipeline_block
|
||||
title: Pipeline Block
|
||||
- local: modular_diffusers/sequential_pipeline_blocks
|
||||
title: Sequential Pipeline Blocks
|
||||
- local: modular_diffusers/loop_sequential_pipeline_blocks
|
||||
title: Loop Sequential Pipeline Blocks
|
||||
- local: modular_diffusers/auto_pipeline_blocks
|
||||
title: Auto Pipeline Blocks
|
||||
- local: modular_diffusers/end_to_end_guide
|
||||
title: End-to-End Example
|
||||
title: Modular Diffusers
|
||||
- sections:
|
||||
- local: using-diffusers/consisid
|
||||
title: ConsisID
|
||||
@@ -180,6 +202,10 @@
|
||||
title: Caching
|
||||
- local: optimization/memory
|
||||
title: Reduce memory usage
|
||||
- local: optimization/speed-memory-optims
|
||||
title: Compile and offloading quantized models
|
||||
- local: optimization/pruna
|
||||
title: Pruna
|
||||
- local: optimization/xformers
|
||||
title: xFormers
|
||||
- local: optimization/tome
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
@@ -28,3 +28,9 @@ Cache methods speedup diffusion transformers by storing and reusing intermediate
|
||||
[[autodoc]] FasterCacheConfig
|
||||
|
||||
[[autodoc]] apply_faster_cache
|
||||
|
||||
### FirstBlockCacheConfig
|
||||
|
||||
[[autodoc]] FirstBlockCacheConfig
|
||||
|
||||
[[autodoc]] apply_first_block_cache
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
@@ -37,6 +37,10 @@ To learn more about how to load LoRA weights, see the [LoRA](../../using-diffuse
|
||||
|
||||
</Tip>
|
||||
|
||||
## LoraBaseMixin
|
||||
|
||||
[[autodoc]] loaders.lora_base.LoraBaseMixin
|
||||
|
||||
## StableDiffusionLoraLoaderMixin
|
||||
|
||||
[[autodoc]] loaders.lora_pipeline.StableDiffusionLoraLoaderMixin
|
||||
@@ -96,10 +100,6 @@ To learn more about how to load LoRA weights, see the [LoRA](../../using-diffuse
|
||||
|
||||
[[autodoc]] loaders.lora_pipeline.HiDreamImageLoraLoaderMixin
|
||||
|
||||
## LoraBaseMixin
|
||||
|
||||
[[autodoc]] loaders.lora_base.LoraBaseMixin
|
||||
|
||||
## WanLoraLoaderMixin
|
||||
|
||||
[[autodoc]] loaders.lora_pipeline.WanLoraLoaderMixin
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team and The InstantX Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team and The InstantX Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team and Tencent Hunyuan Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team and Tencent Hunyuan Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team and The InstantX Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team and The InstantX Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team and The InstantX Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team and The InstantX Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user