mirror of
https://github.com/huggingface/diffusers.git
synced 2026-02-10 12:55:19 +08:00
Compare commits
23 Commits
fix-model-
...
max-parall
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ca6d41de0d | ||
|
|
61e962d7d0 | ||
|
|
7492690505 | ||
|
|
decd6758f3 | ||
|
|
0d23645bd1 | ||
|
|
7fa3e5b0f6 | ||
|
|
49b959b540 | ||
|
|
58237364b1 | ||
|
|
3e35628873 | ||
|
|
6a479588db | ||
|
|
fa489eaed6 | ||
|
|
0d7c479023 | ||
|
|
ce97d7e19b | ||
|
|
44ba90caff | ||
|
|
3c85a57297 | ||
|
|
03ca11318e | ||
|
|
3ffa7b46e5 | ||
|
|
c1b2a89e34 | ||
|
|
435d37ce5a | ||
|
|
5915c2985d | ||
|
|
21a7ff12a7 | ||
|
|
8909ab4b19 | ||
|
|
c1edb03c37 |
50
.github/workflows/nightly_tests.yml
vendored
50
.github/workflows/nightly_tests.yml
vendored
@@ -19,7 +19,7 @@ env:
|
|||||||
jobs:
|
jobs:
|
||||||
setup_torch_cuda_pipeline_matrix:
|
setup_torch_cuda_pipeline_matrix:
|
||||||
name: Setup Torch Pipelines Matrix
|
name: Setup Torch Pipelines Matrix
|
||||||
runs-on: ubuntu-latest
|
runs-on: diffusers/diffusers-pytorch-cpu
|
||||||
outputs:
|
outputs:
|
||||||
pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
|
pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
|
||||||
steps:
|
steps:
|
||||||
@@ -67,19 +67,19 @@ jobs:
|
|||||||
fetch-depth: 2
|
fetch-depth: 2
|
||||||
- name: NVIDIA-SMI
|
- name: NVIDIA-SMI
|
||||||
run: nvidia-smi
|
run: nvidia-smi
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
python -m uv pip install -e [quality,test]
|
python -m uv pip install -e [quality,test]
|
||||||
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
||||||
python -m uv pip install pytest-reportlog
|
python -m uv pip install pytest-reportlog
|
||||||
|
|
||||||
- name: Environment
|
- name: Environment
|
||||||
run: |
|
run: |
|
||||||
python utils/print_env.py
|
python utils/print_env.py
|
||||||
|
|
||||||
- name: Nightly PyTorch CUDA checkpoint (pipelines) tests
|
- name: Nightly PyTorch CUDA checkpoint (pipelines) tests
|
||||||
env:
|
env:
|
||||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||||
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
||||||
@@ -88,9 +88,9 @@ jobs:
|
|||||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||||
-s -v -k "not Flax and not Onnx" \
|
-s -v -k "not Flax and not Onnx" \
|
||||||
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \
|
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \
|
||||||
--report-log=tests_pipeline_${{ matrix.module }}_cuda.log \
|
--report-log=tests_pipeline_${{ matrix.module }}_cuda.log \
|
||||||
tests/pipelines/${{ matrix.module }}
|
tests/pipelines/${{ matrix.module }}
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
run: |
|
run: |
|
||||||
@@ -103,7 +103,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: pipeline_${{ matrix.module }}_test_reports
|
name: pipeline_${{ matrix.module }}_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
- name: Generate Report and Notify Channel
|
- name: Generate Report and Notify Channel
|
||||||
if: always()
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
@@ -112,7 +112,7 @@ jobs:
|
|||||||
|
|
||||||
run_nightly_tests_for_other_torch_modules:
|
run_nightly_tests_for_other_torch_modules:
|
||||||
name: Torch Non-Pipelines CUDA Nightly Tests
|
name: Torch Non-Pipelines CUDA Nightly Tests
|
||||||
runs-on: docker-gpu
|
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
||||||
container:
|
container:
|
||||||
image: diffusers/diffusers-pytorch-cuda
|
image: diffusers/diffusers-pytorch-cuda
|
||||||
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
|
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
|
||||||
@@ -139,7 +139,7 @@ jobs:
|
|||||||
run: python utils/print_env.py
|
run: python utils/print_env.py
|
||||||
|
|
||||||
- name: Run nightly PyTorch CUDA tests for non-pipeline modules
|
- name: Run nightly PyTorch CUDA tests for non-pipeline modules
|
||||||
if: ${{ matrix.module != 'examples'}}
|
if: ${{ matrix.module != 'examples'}}
|
||||||
env:
|
env:
|
||||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||||
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
||||||
@@ -148,7 +148,7 @@ jobs:
|
|||||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||||
-s -v -k "not Flax and not Onnx" \
|
-s -v -k "not Flax and not Onnx" \
|
||||||
--make-reports=tests_torch_${{ matrix.module }}_cuda \
|
--make-reports=tests_torch_${{ matrix.module }}_cuda \
|
||||||
--report-log=tests_torch_${{ matrix.module }}_cuda.log \
|
--report-log=tests_torch_${{ matrix.module }}_cuda.log \
|
||||||
tests/${{ matrix.module }}
|
tests/${{ matrix.module }}
|
||||||
|
|
||||||
- name: Run nightly example tests with Torch
|
- name: Run nightly example tests with Torch
|
||||||
@@ -161,13 +161,13 @@ jobs:
|
|||||||
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
|
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
|
||||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||||
-s -v --make-reports=examples_torch_cuda \
|
-s -v --make-reports=examples_torch_cuda \
|
||||||
--report-log=examples_torch_cuda.log \
|
--report-log=examples_torch_cuda.log \
|
||||||
examples/
|
examples/
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
run: |
|
run: |
|
||||||
cat reports/tests_torch_${{ matrix.module }}_cuda_stats.txt
|
cat reports/tests_torch_${{ matrix.module }}_cuda_stats.txt
|
||||||
cat reports/tests_torch_${{ matrix.module }}_cuda_failures_short.txt
|
cat reports/tests_torch_${{ matrix.module }}_cuda_failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
@@ -185,7 +185,7 @@ jobs:
|
|||||||
|
|
||||||
run_lora_nightly_tests:
|
run_lora_nightly_tests:
|
||||||
name: Nightly LoRA Tests with PEFT and TORCH
|
name: Nightly LoRA Tests with PEFT and TORCH
|
||||||
runs-on: docker-gpu
|
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
||||||
container:
|
container:
|
||||||
image: diffusers/diffusers-pytorch-cuda
|
image: diffusers/diffusers-pytorch-cuda
|
||||||
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
|
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
|
||||||
@@ -218,13 +218,13 @@ jobs:
|
|||||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||||
-s -v -k "not Flax and not Onnx" \
|
-s -v -k "not Flax and not Onnx" \
|
||||||
--make-reports=tests_torch_lora_cuda \
|
--make-reports=tests_torch_lora_cuda \
|
||||||
--report-log=tests_torch_lora_cuda.log \
|
--report-log=tests_torch_lora_cuda.log \
|
||||||
tests/lora
|
tests/lora
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
run: |
|
run: |
|
||||||
cat reports/tests_torch_lora_cuda_stats.txt
|
cat reports/tests_torch_lora_cuda_stats.txt
|
||||||
cat reports/tests_torch_lora_cuda_failures_short.txt
|
cat reports/tests_torch_lora_cuda_failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
@@ -239,12 +239,12 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
pip install slack_sdk tabulate
|
pip install slack_sdk tabulate
|
||||||
python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
|
python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
run_flax_tpu_tests:
|
run_flax_tpu_tests:
|
||||||
name: Nightly Flax TPU Tests
|
name: Nightly Flax TPU Tests
|
||||||
runs-on: docker-tpu
|
runs-on: docker-tpu
|
||||||
if: github.event_name == 'schedule'
|
if: github.event_name == 'schedule'
|
||||||
|
|
||||||
container:
|
container:
|
||||||
image: diffusers/diffusers-flax-tpu
|
image: diffusers/diffusers-flax-tpu
|
||||||
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --privileged
|
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --privileged
|
||||||
@@ -274,7 +274,7 @@ jobs:
|
|||||||
python -m pytest -n 0 \
|
python -m pytest -n 0 \
|
||||||
-s -v -k "Flax" \
|
-s -v -k "Flax" \
|
||||||
--make-reports=tests_flax_tpu \
|
--make-reports=tests_flax_tpu \
|
||||||
--report-log=tests_flax_tpu.log \
|
--report-log=tests_flax_tpu.log \
|
||||||
tests/
|
tests/
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
@@ -298,11 +298,11 @@ jobs:
|
|||||||
|
|
||||||
run_nightly_onnx_tests:
|
run_nightly_onnx_tests:
|
||||||
name: Nightly ONNXRuntime CUDA tests on Ubuntu
|
name: Nightly ONNXRuntime CUDA tests on Ubuntu
|
||||||
runs-on: docker-gpu
|
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
||||||
container:
|
container:
|
||||||
image: diffusers/diffusers-onnxruntime-cuda
|
image: diffusers/diffusers-onnxruntime-cuda
|
||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout diffusers
|
- name: Checkout diffusers
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
@@ -321,7 +321,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Environment
|
- name: Environment
|
||||||
run: python utils/print_env.py
|
run: python utils/print_env.py
|
||||||
|
|
||||||
- name: Run nightly ONNXRuntime CUDA tests
|
- name: Run nightly ONNXRuntime CUDA tests
|
||||||
env:
|
env:
|
||||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||||
@@ -329,7 +329,7 @@ jobs:
|
|||||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||||
-s -v -k "Onnx" \
|
-s -v -k "Onnx" \
|
||||||
--make-reports=tests_onnx_cuda \
|
--make-reports=tests_onnx_cuda \
|
||||||
--report-log=tests_onnx_cuda.log \
|
--report-log=tests_onnx_cuda.log \
|
||||||
tests/
|
tests/
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
@@ -344,7 +344,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: ${{ matrix.config.report }}_test_reports
|
name: ${{ matrix.config.report }}_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
- name: Generate Report and Notify Channel
|
- name: Generate Report and Notify Channel
|
||||||
if: always()
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
6
.github/workflows/pr_test_fetcher.yml
vendored
6
.github/workflows/pr_test_fetcher.yml
vendored
@@ -15,7 +15,7 @@ concurrency:
|
|||||||
jobs:
|
jobs:
|
||||||
setup_pr_tests:
|
setup_pr_tests:
|
||||||
name: Setup PR Tests
|
name: Setup PR Tests
|
||||||
runs-on: docker-cpu
|
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
|
||||||
container:
|
container:
|
||||||
image: diffusers/diffusers-pytorch-cpu
|
image: diffusers/diffusers-pytorch-cpu
|
||||||
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
|
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
|
||||||
@@ -73,7 +73,7 @@ jobs:
|
|||||||
max-parallel: 2
|
max-parallel: 2
|
||||||
matrix:
|
matrix:
|
||||||
modules: ${{ fromJson(needs.setup_pr_tests.outputs.matrix) }}
|
modules: ${{ fromJson(needs.setup_pr_tests.outputs.matrix) }}
|
||||||
runs-on: docker-cpu
|
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
|
||||||
container:
|
container:
|
||||||
image: diffusers/diffusers-pytorch-cpu
|
image: diffusers/diffusers-pytorch-cpu
|
||||||
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
|
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
|
||||||
@@ -123,7 +123,7 @@ jobs:
|
|||||||
config:
|
config:
|
||||||
- name: Hub tests for models, schedulers, and pipelines
|
- name: Hub tests for models, schedulers, and pipelines
|
||||||
framework: hub_tests_pytorch
|
framework: hub_tests_pytorch
|
||||||
runner: docker-cpu
|
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
|
||||||
image: diffusers/diffusers-pytorch-cpu
|
image: diffusers/diffusers-pytorch-cpu
|
||||||
report: torch_hub
|
report: torch_hub
|
||||||
|
|
||||||
|
|||||||
44
.github/workflows/push_tests.yml
vendored
44
.github/workflows/push_tests.yml
vendored
@@ -21,7 +21,9 @@ env:
|
|||||||
jobs:
|
jobs:
|
||||||
setup_torch_cuda_pipeline_matrix:
|
setup_torch_cuda_pipeline_matrix:
|
||||||
name: Setup Torch Pipelines CUDA Slow Tests Matrix
|
name: Setup Torch Pipelines CUDA Slow Tests Matrix
|
||||||
runs-on: ubuntu-latest
|
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
|
||||||
|
container:
|
||||||
|
image: diffusers/diffusers-pytorch-cpu
|
||||||
outputs:
|
outputs:
|
||||||
pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
|
pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
|
||||||
steps:
|
steps:
|
||||||
@@ -29,14 +31,13 @@ jobs:
|
|||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
with:
|
with:
|
||||||
fetch-depth: 2
|
fetch-depth: 2
|
||||||
- name: Set up Python
|
|
||||||
uses: actions/setup-python@v4
|
|
||||||
with:
|
|
||||||
python-version: "3.8"
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
pip install -e .
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
pip install huggingface_hub
|
python -m uv pip install -e [quality,test]
|
||||||
|
- name: Environment
|
||||||
|
run: |
|
||||||
|
python utils/print_env.py
|
||||||
- name: Fetch Pipeline Matrix
|
- name: Fetch Pipeline Matrix
|
||||||
id: fetch_pipeline_matrix
|
id: fetch_pipeline_matrix
|
||||||
run: |
|
run: |
|
||||||
@@ -55,12 +56,13 @@ jobs:
|
|||||||
needs: setup_torch_cuda_pipeline_matrix
|
needs: setup_torch_cuda_pipeline_matrix
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
|
max-parallel: 8
|
||||||
matrix:
|
matrix:
|
||||||
module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
|
module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
|
||||||
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
||||||
container:
|
container:
|
||||||
image: diffusers/diffusers-pytorch-cuda
|
image: diffusers/diffusers-pytorch-cuda
|
||||||
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0 --privileged
|
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0 --privileged
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout diffusers
|
- name: Checkout diffusers
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
@@ -114,10 +116,10 @@ jobs:
|
|||||||
|
|
||||||
torch_cuda_tests:
|
torch_cuda_tests:
|
||||||
name: Torch CUDA Tests
|
name: Torch CUDA Tests
|
||||||
runs-on: docker-gpu
|
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
||||||
container:
|
container:
|
||||||
image: diffusers/diffusers-pytorch-cuda
|
image: diffusers/diffusers-pytorch-cuda
|
||||||
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
|
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -166,10 +168,10 @@ jobs:
|
|||||||
|
|
||||||
peft_cuda_tests:
|
peft_cuda_tests:
|
||||||
name: PEFT CUDA Tests
|
name: PEFT CUDA Tests
|
||||||
runs-on: docker-gpu
|
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
||||||
container:
|
container:
|
||||||
image: diffusers/diffusers-pytorch-cuda
|
image: diffusers/diffusers-pytorch-cuda
|
||||||
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
|
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -219,7 +221,7 @@ jobs:
|
|||||||
runs-on: docker-tpu
|
runs-on: docker-tpu
|
||||||
container:
|
container:
|
||||||
image: diffusers/diffusers-flax-tpu
|
image: diffusers/diffusers-flax-tpu
|
||||||
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --privileged
|
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ --privileged
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -263,10 +265,10 @@ jobs:
|
|||||||
|
|
||||||
onnx_cuda_tests:
|
onnx_cuda_tests:
|
||||||
name: ONNX CUDA Tests
|
name: ONNX CUDA Tests
|
||||||
runs-on: docker-gpu
|
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
||||||
container:
|
container:
|
||||||
image: diffusers/diffusers-onnxruntime-cuda
|
image: diffusers/diffusers-onnxruntime-cuda
|
||||||
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
|
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ --gpus 0
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -311,11 +313,11 @@ jobs:
|
|||||||
run_torch_compile_tests:
|
run_torch_compile_tests:
|
||||||
name: PyTorch Compile CUDA tests
|
name: PyTorch Compile CUDA tests
|
||||||
|
|
||||||
runs-on: docker-gpu
|
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
||||||
|
|
||||||
container:
|
container:
|
||||||
image: diffusers/diffusers-pytorch-compile-cuda
|
image: diffusers/diffusers-pytorch-compile-cuda
|
||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout diffusers
|
- name: Checkout diffusers
|
||||||
@@ -352,11 +354,11 @@ jobs:
|
|||||||
run_xformers_tests:
|
run_xformers_tests:
|
||||||
name: PyTorch xformers CUDA tests
|
name: PyTorch xformers CUDA tests
|
||||||
|
|
||||||
runs-on: docker-gpu
|
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
||||||
|
|
||||||
container:
|
container:
|
||||||
image: diffusers/diffusers-pytorch-xformers-cuda
|
image: diffusers/diffusers-pytorch-xformers-cuda
|
||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout diffusers
|
- name: Checkout diffusers
|
||||||
@@ -393,11 +395,11 @@ jobs:
|
|||||||
run_examples_tests:
|
run_examples_tests:
|
||||||
name: Examples PyTorch CUDA tests on Ubuntu
|
name: Examples PyTorch CUDA tests on Ubuntu
|
||||||
|
|
||||||
runs-on: docker-gpu
|
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
||||||
|
|
||||||
container:
|
container:
|
||||||
image: diffusers/diffusers-pytorch-cuda
|
image: diffusers/diffusers-pytorch-cuda
|
||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout diffusers
|
- name: Checkout diffusers
|
||||||
|
|||||||
@@ -81,16 +81,14 @@
|
|||||||
title: ControlNet
|
title: ControlNet
|
||||||
- local: using-diffusers/t2i_adapter
|
- local: using-diffusers/t2i_adapter
|
||||||
title: T2I-Adapter
|
title: T2I-Adapter
|
||||||
|
- local: using-diffusers/inference_with_lcm
|
||||||
|
title: Latent Consistency Model
|
||||||
- local: using-diffusers/textual_inversion_inference
|
- local: using-diffusers/textual_inversion_inference
|
||||||
title: Textual inversion
|
title: Textual inversion
|
||||||
- local: using-diffusers/shap-e
|
- local: using-diffusers/shap-e
|
||||||
title: Shap-E
|
title: Shap-E
|
||||||
- local: using-diffusers/diffedit
|
- local: using-diffusers/diffedit
|
||||||
title: DiffEdit
|
title: DiffEdit
|
||||||
- local: using-diffusers/inference_with_lcm_lora
|
|
||||||
title: Latent Consistency Model-LoRA
|
|
||||||
- local: using-diffusers/inference_with_lcm
|
|
||||||
title: Latent Consistency Model
|
|
||||||
- local: using-diffusers/inference_with_tcd_lora
|
- local: using-diffusers/inference_with_tcd_lora
|
||||||
title: Trajectory Consistency Distillation-LoRA
|
title: Trajectory Consistency Distillation-LoRA
|
||||||
- local: using-diffusers/svd
|
- local: using-diffusers/svd
|
||||||
@@ -141,8 +139,6 @@
|
|||||||
- sections:
|
- sections:
|
||||||
- local: optimization/fp16
|
- local: optimization/fp16
|
||||||
title: Speed up inference
|
title: Speed up inference
|
||||||
- local: using-diffusers/distilled_sd
|
|
||||||
title: Distilled Stable Diffusion inference
|
|
||||||
- local: optimization/memory
|
- local: optimization/memory
|
||||||
title: Reduce memory usage
|
title: Reduce memory usage
|
||||||
- local: optimization/torch2.0
|
- local: optimization/torch2.0
|
||||||
|
|||||||
@@ -55,3 +55,6 @@ An attention processor is a class for applying different types of attention mech
|
|||||||
|
|
||||||
## XFormersAttnProcessor
|
## XFormersAttnProcessor
|
||||||
[[autodoc]] models.attention_processor.XFormersAttnProcessor
|
[[autodoc]] models.attention_processor.XFormersAttnProcessor
|
||||||
|
|
||||||
|
## AttnProcessorNPU
|
||||||
|
[[autodoc]] models.attention_processor.AttnProcessorNPU
|
||||||
|
|||||||
@@ -12,27 +12,23 @@ specific language governing permissions and limitations under the License.
|
|||||||
|
|
||||||
# Speed up inference
|
# Speed up inference
|
||||||
|
|
||||||
There are several ways to optimize 🤗 Diffusers for inference speed. As a general rule of thumb, we recommend using either [xFormers](xformers) or `torch.nn.functional.scaled_dot_product_attention` in PyTorch 2.0 for their memory-efficient attention.
|
There are several ways to optimize Diffusers for inference speed, such as reducing the computational burden by lowering the data precision or using a lightweight distilled model. There are also memory-efficient attention implementations, [xFormers](xformers) and [scaled dot product attetntion](https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html) in PyTorch 2.0, that reduce memory usage which also indirectly speeds up inference. Different speed optimizations can be stacked together to get the fastest inference times.
|
||||||
|
|
||||||
<Tip>
|
> [!TIP]
|
||||||
|
> Optimizing for inference speed or reduced memory usage can lead to improved performance in the other category, so you should try to optimize for both whenever you can. This guide focuses on inference speed, but you can learn more about lowering memory usage in the [Reduce memory usage](memory) guide.
|
||||||
|
|
||||||
In many cases, optimizing for speed or memory leads to improved performance in the other, so you should try to optimize for both whenever you can. This guide focuses on inference speed, but you can learn more about preserving memory in the [Reduce memory usage](memory) guide.
|
The inference times below are obtained from generating a single 512x512 image from the prompt "a photo of an astronaut riding a horse on mars" with 50 DDIM steps on a NVIDIA A100.
|
||||||
|
|
||||||
</Tip>
|
| setup | latency | speed-up |
|
||||||
|
|----------|---------|----------|
|
||||||
|
| baseline | 5.27s | x1 |
|
||||||
|
| tf32 | 4.14s | x1.27 |
|
||||||
|
| fp16 | 3.51s | x1.50 |
|
||||||
|
| combined | 3.41s | x1.54 |
|
||||||
|
|
||||||
The results below are obtained from generating a single 512x512 image from the prompt `a photo of an astronaut riding a horse on mars` with 50 DDIM steps on a Nvidia Titan RTX, demonstrating the speed-up you can expect.
|
## TensorFloat-32
|
||||||
|
|
||||||
| | latency | speed-up |
|
On Ampere and later CUDA devices, matrix multiplications and convolutions can use the [TensorFloat-32 (tf32)](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/) mode for faster, but slightly less accurate computations. By default, PyTorch enables tf32 mode for convolutions but not matrix multiplications. Unless your network requires full float32 precision, we recommend enabling tf32 for matrix multiplications. It can significantly speed up computations with typically negligible loss in numerical accuracy.
|
||||||
| ---------------- | ------- | ------- |
|
|
||||||
| original | 9.50s | x1 |
|
|
||||||
| fp16 | 3.61s | x2.63 |
|
|
||||||
| channels last | 3.30s | x2.88 |
|
|
||||||
| traced UNet | 3.21s | x2.96 |
|
|
||||||
| memory efficient attention | 2.63s | x3.61 |
|
|
||||||
|
|
||||||
## Use TensorFloat-32
|
|
||||||
|
|
||||||
On Ampere and later CUDA devices, matrix multiplications and convolutions can use the [TensorFloat-32 (TF32)](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/) mode for faster, but slightly less accurate computations. By default, PyTorch enables TF32 mode for convolutions but not matrix multiplications. Unless your network requires full float32 precision, we recommend enabling TF32 for matrix multiplications. It can significantly speeds up computations with typically negligible loss in numerical accuracy.
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import torch
|
import torch
|
||||||
@@ -40,11 +36,11 @@ import torch
|
|||||||
torch.backends.cuda.matmul.allow_tf32 = True
|
torch.backends.cuda.matmul.allow_tf32 = True
|
||||||
```
|
```
|
||||||
|
|
||||||
You can learn more about TF32 in the [Mixed precision training](https://huggingface.co/docs/transformers/en/perf_train_gpu_one#tf32) guide.
|
Learn more about tf32 in the [Mixed precision training](https://huggingface.co/docs/transformers/en/perf_train_gpu_one#tf32) guide.
|
||||||
|
|
||||||
## Half-precision weights
|
## Half-precision weights
|
||||||
|
|
||||||
To save GPU memory and get more speed, try loading and running the model weights directly in half-precision or float16:
|
To save GPU memory and get more speed, set `torch_dtype=torch.float16` to load and run the model weights directly with half-precision weights.
|
||||||
|
|
||||||
```Python
|
```Python
|
||||||
import torch
|
import torch
|
||||||
@@ -56,19 +52,76 @@ pipe = DiffusionPipeline.from_pretrained(
|
|||||||
use_safetensors=True,
|
use_safetensors=True,
|
||||||
)
|
)
|
||||||
pipe = pipe.to("cuda")
|
pipe = pipe.to("cuda")
|
||||||
|
|
||||||
prompt = "a photo of an astronaut riding a horse on mars"
|
|
||||||
image = pipe(prompt).images[0]
|
|
||||||
```
|
```
|
||||||
|
|
||||||
<Tip warning={true}>
|
> [!WARNING]
|
||||||
|
> Don't use [torch.autocast](https://pytorch.org/docs/stable/amp.html#torch.autocast) in any of the pipelines as it can lead to black images and is always slower than pure float16 precision.
|
||||||
Don't use [`torch.autocast`](https://pytorch.org/docs/stable/amp.html#torch.autocast) in any of the pipelines as it can lead to black images and is always slower than pure float16 precision.
|
|
||||||
|
|
||||||
</Tip>
|
|
||||||
|
|
||||||
## Distilled model
|
## Distilled model
|
||||||
|
|
||||||
You could also use a distilled Stable Diffusion model and autoencoder to speed up inference. During distillation, many of the UNet's residual and attention blocks are shed to reduce the model size. The distilled model is faster and uses less memory while generating images of comparable quality to the full Stable Diffusion model.
|
You could also use a distilled Stable Diffusion model and autoencoder to speed up inference. During distillation, many of the UNet's residual and attention blocks are shed to reduce the model size by 51% and improve latency on CPU/GPU by 43%. The distilled model is faster and uses less memory while generating images of comparable quality to the full Stable Diffusion model.
|
||||||
|
|
||||||
Learn more about in the [Distilled Stable Diffusion inference](../using-diffusers/distilled_sd) guide!
|
> [!TIP]
|
||||||
|
> Read the [Open-sourcing Knowledge Distillation Code and Weights of SD-Small and SD-Tiny](https://huggingface.co/blog/sd_distillation) blog post to learn more about how knowledge distillation training works to produce a faster, smaller, and cheaper generative model.
|
||||||
|
|
||||||
|
The inference times below are obtained from generating 4 images from the prompt "a photo of an astronaut riding a horse on mars" with 25 PNDM steps on a NVIDIA A100. Each generation is repeated 3 times with the distilled Stable Diffusion v1.4 model by [Nota AI](https://hf.co/nota-ai).
|
||||||
|
|
||||||
|
| setup | latency | speed-up |
|
||||||
|
|------------------------------|---------|----------|
|
||||||
|
| baseline | 6.37s | x1 |
|
||||||
|
| distilled | 4.18s | x1.52 |
|
||||||
|
| distilled + tiny autoencoder | 3.83s | x1.66 |
|
||||||
|
|
||||||
|
Let's load the distilled Stable Diffusion model and compare it against the original Stable Diffusion model.
|
||||||
|
|
||||||
|
```py
|
||||||
|
from diffusers import StableDiffusionPipeline
|
||||||
|
import torch
|
||||||
|
|
||||||
|
distilled = StableDiffusionPipeline.from_pretrained(
|
||||||
|
"nota-ai/bk-sdm-small", torch_dtype=torch.float16, use_safetensors=True,
|
||||||
|
).to("cuda")
|
||||||
|
prompt = "a golden vase with different flowers"
|
||||||
|
generator = torch.manual_seed(2023)
|
||||||
|
image = distilled("a golden vase with different flowers", num_inference_steps=25, generator=generator).images[0]
|
||||||
|
image
|
||||||
|
```
|
||||||
|
|
||||||
|
<div class="flex gap-4">
|
||||||
|
<div>
|
||||||
|
<img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/original_sd.png"/>
|
||||||
|
<figcaption class="mt-2 text-center text-sm text-gray-500">original Stable Diffusion</figcaption>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/distilled_sd.png"/>
|
||||||
|
<figcaption class="mt-2 text-center text-sm text-gray-500">distilled Stable Diffusion</figcaption>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
### Tiny AutoEncoder
|
||||||
|
|
||||||
|
To speed inference up even more, replace the autoencoder with a [distilled version](https://huggingface.co/sayakpaul/taesdxl-diffusers) of it.
|
||||||
|
|
||||||
|
```py
|
||||||
|
import torch
|
||||||
|
from diffusers import AutoencoderTiny, StableDiffusionPipeline
|
||||||
|
|
||||||
|
distilled = StableDiffusionPipeline.from_pretrained(
|
||||||
|
"nota-ai/bk-sdm-small", torch_dtype=torch.float16, use_safetensors=True,
|
||||||
|
).to("cuda")
|
||||||
|
distilled.vae = AutoencoderTiny.from_pretrained(
|
||||||
|
"sayakpaul/taesd-diffusers", torch_dtype=torch.float16, use_safetensors=True,
|
||||||
|
).to("cuda")
|
||||||
|
|
||||||
|
prompt = "a golden vase with different flowers"
|
||||||
|
generator = torch.manual_seed(2023)
|
||||||
|
image = distilled("a golden vase with different flowers", num_inference_steps=25, generator=generator).images[0]
|
||||||
|
image
|
||||||
|
```
|
||||||
|
|
||||||
|
<div class="flex justify-center">
|
||||||
|
<div>
|
||||||
|
<img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/distilled_sd_vae.png" />
|
||||||
|
<figcaption class="mt-2 text-center text-sm text-gray-500">distilled Stable Diffusion + Tiny AutoEncoder</figcaption>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|||||||
@@ -1,133 +0,0 @@
|
|||||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
# Distilled Stable Diffusion inference
|
|
||||||
|
|
||||||
[[open-in-colab]]
|
|
||||||
|
|
||||||
Stable Diffusion inference can be a computationally intensive process because it must iteratively denoise the latents to generate an image. To reduce the computational burden, you can use a *distilled* version of the Stable Diffusion model from [Nota AI](https://huggingface.co/nota-ai). The distilled version of their Stable Diffusion model eliminates some of the residual and attention blocks from the UNet, reducing the model size by 51% and improving latency on CPU/GPU by 43%.
|
|
||||||
|
|
||||||
<Tip>
|
|
||||||
|
|
||||||
Read this [blog post](https://huggingface.co/blog/sd_distillation) to learn more about how knowledge distillation training works to produce a faster, smaller, and cheaper generative model.
|
|
||||||
|
|
||||||
</Tip>
|
|
||||||
|
|
||||||
Let's load the distilled Stable Diffusion model and compare it against the original Stable Diffusion model:
|
|
||||||
|
|
||||||
```py
|
|
||||||
from diffusers import StableDiffusionPipeline
|
|
||||||
import torch
|
|
||||||
|
|
||||||
distilled = StableDiffusionPipeline.from_pretrained(
|
|
||||||
"nota-ai/bk-sdm-small", torch_dtype=torch.float16, use_safetensors=True,
|
|
||||||
).to("cuda")
|
|
||||||
|
|
||||||
original = StableDiffusionPipeline.from_pretrained(
|
|
||||||
"CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16, use_safetensors=True,
|
|
||||||
).to("cuda")
|
|
||||||
```
|
|
||||||
|
|
||||||
Given a prompt, get the inference time for the original model:
|
|
||||||
|
|
||||||
```py
|
|
||||||
import time
|
|
||||||
|
|
||||||
seed = 2023
|
|
||||||
generator = torch.manual_seed(seed)
|
|
||||||
|
|
||||||
NUM_ITERS_TO_RUN = 3
|
|
||||||
NUM_INFERENCE_STEPS = 25
|
|
||||||
NUM_IMAGES_PER_PROMPT = 4
|
|
||||||
|
|
||||||
prompt = "a golden vase with different flowers"
|
|
||||||
|
|
||||||
start = time.time_ns()
|
|
||||||
for _ in range(NUM_ITERS_TO_RUN):
|
|
||||||
images = original(
|
|
||||||
prompt,
|
|
||||||
num_inference_steps=NUM_INFERENCE_STEPS,
|
|
||||||
generator=generator,
|
|
||||||
num_images_per_prompt=NUM_IMAGES_PER_PROMPT
|
|
||||||
).images
|
|
||||||
end = time.time_ns()
|
|
||||||
original_sd = f"{(end - start) / 1e6:.1f}"
|
|
||||||
|
|
||||||
print(f"Execution time -- {original_sd} ms\n")
|
|
||||||
"Execution time -- 45781.5 ms"
|
|
||||||
```
|
|
||||||
|
|
||||||
Time the distilled model inference:
|
|
||||||
|
|
||||||
```py
|
|
||||||
start = time.time_ns()
|
|
||||||
for _ in range(NUM_ITERS_TO_RUN):
|
|
||||||
images = distilled(
|
|
||||||
prompt,
|
|
||||||
num_inference_steps=NUM_INFERENCE_STEPS,
|
|
||||||
generator=generator,
|
|
||||||
num_images_per_prompt=NUM_IMAGES_PER_PROMPT
|
|
||||||
).images
|
|
||||||
end = time.time_ns()
|
|
||||||
|
|
||||||
distilled_sd = f"{(end - start) / 1e6:.1f}"
|
|
||||||
print(f"Execution time -- {distilled_sd} ms\n")
|
|
||||||
"Execution time -- 29884.2 ms"
|
|
||||||
```
|
|
||||||
|
|
||||||
<div class="flex gap-4">
|
|
||||||
<div>
|
|
||||||
<img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/original_sd.png"/>
|
|
||||||
<figcaption class="mt-2 text-center text-sm text-gray-500">original Stable Diffusion (45781.5 ms)</figcaption>
|
|
||||||
</div>
|
|
||||||
<div>
|
|
||||||
<img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/distilled_sd.png"/>
|
|
||||||
<figcaption class="mt-2 text-center text-sm text-gray-500">distilled Stable Diffusion (29884.2 ms)</figcaption>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
## Tiny AutoEncoder
|
|
||||||
|
|
||||||
To speed inference up even more, use a tiny distilled version of the [Stable Diffusion VAE](https://huggingface.co/sayakpaul/taesdxl-diffusers) to denoise the latents into images. Replace the VAE in the distilled Stable Diffusion model with the tiny VAE:
|
|
||||||
|
|
||||||
```py
|
|
||||||
from diffusers import AutoencoderTiny
|
|
||||||
|
|
||||||
distilled.vae = AutoencoderTiny.from_pretrained(
|
|
||||||
"sayakpaul/taesd-diffusers", torch_dtype=torch.float16, use_safetensors=True,
|
|
||||||
).to("cuda")
|
|
||||||
```
|
|
||||||
|
|
||||||
Time the distilled model and distilled VAE inference:
|
|
||||||
|
|
||||||
```py
|
|
||||||
start = time.time_ns()
|
|
||||||
for _ in range(NUM_ITERS_TO_RUN):
|
|
||||||
images = distilled(
|
|
||||||
prompt,
|
|
||||||
num_inference_steps=NUM_INFERENCE_STEPS,
|
|
||||||
generator=generator,
|
|
||||||
num_images_per_prompt=NUM_IMAGES_PER_PROMPT
|
|
||||||
).images
|
|
||||||
end = time.time_ns()
|
|
||||||
|
|
||||||
distilled_tiny_sd = f"{(end - start) / 1e6:.1f}"
|
|
||||||
print(f"Execution time -- {distilled_tiny_sd} ms\n")
|
|
||||||
"Execution time -- 27165.7 ms"
|
|
||||||
```
|
|
||||||
|
|
||||||
<div class="flex justify-center">
|
|
||||||
<div>
|
|
||||||
<img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/distilled_sd_vae.png" />
|
|
||||||
<figcaption class="mt-2 text-center text-sm text-gray-500">distilled Stable Diffusion + Tiny AutoEncoder (27165.7 ms)</figcaption>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
@@ -10,29 +10,30 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
|
|||||||
specific language governing permissions and limitations under the License.
|
specific language governing permissions and limitations under the License.
|
||||||
-->
|
-->
|
||||||
|
|
||||||
[[open-in-colab]]
|
|
||||||
|
|
||||||
# Latent Consistency Model
|
# Latent Consistency Model
|
||||||
|
|
||||||
Latent Consistency Models (LCM) enable quality image generation in typically 2-4 steps making it possible to use diffusion models in almost real-time settings.
|
[[open-in-colab]]
|
||||||
|
|
||||||
From the [official website](https://latent-consistency-models.github.io/):
|
[Latent Consistency Models (LCMs)](https://hf.co/papers/2310.04378) enable fast high-quality image generation by directly predicting the reverse diffusion process in the latent rather than pixel space. In other words, LCMs try to predict the noiseless image from the noisy image in contrast to typical diffusion models that iteratively remove noise from the noisy image. By avoiding the iterative sampling process, LCMs are able to generate high-quality images in 2-4 steps instead of 20-30 steps.
|
||||||
|
|
||||||
> LCMs can be distilled from any pre-trained Stable Diffusion (SD) in only 4,000 training steps (~32 A100 GPU Hours) for generating high quality 768 x 768 resolution images in 2~4 steps or even one step, significantly accelerating text-to-image generation. We employ LCM to distill the Dreamshaper-V7 version of SD in just 4,000 training iterations.
|
LCMs are distilled from pretrained models which requires ~32 hours of A100 compute. To speed this up, [LCM-LoRAs](https://hf.co/papers/2311.05556) train a [LoRA adapter](https://huggingface.co/docs/peft/conceptual_guides/adapter#low-rank-adaptation-lora) which have much fewer parameters to train compared to the full model. The LCM-LoRA can be plugged into a diffusion model once it has been trained.
|
||||||
|
|
||||||
For a more technical overview of LCMs, refer to [the paper](https://huggingface.co/papers/2310.04378).
|
This guide will show you how to use LCMs and LCM-LoRAs for fast inference on tasks and how to use them with other adapters like ControlNet or T2I-Adapter.
|
||||||
|
|
||||||
LCM distilled models are available for [stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5), [stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0), and the [SSD-1B](https://huggingface.co/segmind/SSD-1B) model. All the checkpoints can be found in this [collection](https://huggingface.co/collections/latent-consistency/latent-consistency-models-weights-654ce61a95edd6dffccef6a8).
|
> [!TIP]
|
||||||
|
> LCMs and LCM-LoRAs are available for Stable Diffusion v1.5, Stable Diffusion XL, and the SSD-1B model. You can find their checkpoints on the [Latent Consistency](https://hf.co/collections/latent-consistency/latent-consistency-models-weights-654ce61a95edd6dffccef6a8) Collections.
|
||||||
This guide shows how to perform inference with LCMs for
|
|
||||||
- text-to-image
|
|
||||||
- image-to-image
|
|
||||||
- combined with style LoRAs
|
|
||||||
- ControlNet/T2I-Adapter
|
|
||||||
|
|
||||||
## Text-to-image
|
## Text-to-image
|
||||||
|
|
||||||
You'll use the [`StableDiffusionXLPipeline`] pipeline with the [`LCMScheduler`] and then load the LCM-LoRA. Together with the LCM-LoRA and the scheduler, the pipeline enables a fast inference workflow, overcoming the slow iterative nature of diffusion models.
|
<hfoptions id="lcm-text2img">
|
||||||
|
<hfoption id="LCM">
|
||||||
|
|
||||||
|
To use LCMs, you need to load the LCM checkpoint for your supported model into [`UNet2DConditionModel`] and replace the scheduler with the [`LCMScheduler`]. Then you can use the pipeline as usual, and pass a text prompt to generate an image in just 4 steps.
|
||||||
|
|
||||||
|
A couple of notes to keep in mind when using LCMs are:
|
||||||
|
|
||||||
|
* Typically, batch size is doubled inside the pipeline for classifier-free guidance. But LCM applies guidance with guidance embeddings and doesn't need to double the batch size, which leads to faster inference. The downside is that negative prompts don't work with LCM because they don't have any effect on the denoising process.
|
||||||
|
* The ideal range for `guidance_scale` is [3., 13.] because that is what the UNet was trained with. However, disabling `guidance_scale` with a value of 1.0 is also effective in most cases.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, LCMScheduler
|
from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, LCMScheduler
|
||||||
@@ -49,31 +50,69 @@ pipe = StableDiffusionXLPipeline.from_pretrained(
|
|||||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
||||||
|
|
||||||
prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"
|
prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"
|
||||||
|
|
||||||
generator = torch.manual_seed(0)
|
generator = torch.manual_seed(0)
|
||||||
image = pipe(
|
image = pipe(
|
||||||
prompt=prompt, num_inference_steps=4, generator=generator, guidance_scale=8.0
|
prompt=prompt, num_inference_steps=4, generator=generator, guidance_scale=8.0
|
||||||
).images[0]
|
).images[0]
|
||||||
|
image
|
||||||
```
|
```
|
||||||
|
|
||||||

|
<div class="flex justify-center">
|
||||||
|
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_full_sdxl_t2i.png"/>
|
||||||
|
</div>
|
||||||
|
|
||||||
Notice that we use only 4 steps for generation which is way less than what's typically used for standard SDXL.
|
</hfoption>
|
||||||
|
<hfoption id="LCM-LoRA">
|
||||||
|
|
||||||
Some details to keep in mind:
|
To use LCM-LoRAs, you need to replace the scheduler with the [`LCMScheduler`] and load the LCM-LoRA weights with the [`~loaders.LoraLoaderMixin.load_lora_weights`] method. Then you can use the pipeline as usual, and pass a text prompt to generate an image in just 4 steps.
|
||||||
|
|
||||||
* To perform classifier-free guidance, batch size is usually doubled inside the pipeline. LCM, however, applies guidance using guidance embeddings, so the batch size does not have to be doubled in this case. This leads to a faster inference time, with the drawback that negative prompts don't have any effect on the denoising process.
|
A couple of notes to keep in mind when using LCM-LoRAs are:
|
||||||
* The UNet was trained using the [3., 13.] guidance scale range. So, that is the ideal range for `guidance_scale`. However, disabling `guidance_scale` using a value of 1.0 is also effective in most cases.
|
|
||||||
|
|
||||||
|
* Typically, batch size is doubled inside the pipeline for classifier-free guidance. But LCM applies guidance with guidance embeddings and doesn't need to double the batch size, which leads to faster inference. The downside is that negative prompts don't work with LCM because they don't have any effect on the denoising process.
|
||||||
|
* You could use guidance with LCM-LoRAs, but it is very sensitive to high `guidance_scale` values and can lead to artifacts in the generated image. The best values we've found are between [1.0, 2.0].
|
||||||
|
* Replace [stabilityai/stable-diffusion-xl-base-1.0](https://hf.co/stabilityai/stable-diffusion-xl-base-1.0) with any finetuned model. For example, try using the [animagine-xl](https://huggingface.co/Linaqruf/animagine-xl) checkpoint to generate anime images with SDXL.
|
||||||
|
|
||||||
|
```py
|
||||||
|
import torch
|
||||||
|
from diffusers import DiffusionPipeline, LCMScheduler
|
||||||
|
|
||||||
|
pipe = DiffusionPipeline.from_pretrained(
|
||||||
|
"stabilityai/stable-diffusion-xl-base-1.0",
|
||||||
|
variant="fp16",
|
||||||
|
torch_dtype=torch.float16
|
||||||
|
).to("cuda")
|
||||||
|
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
||||||
|
pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
|
||||||
|
|
||||||
|
prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"
|
||||||
|
generator = torch.manual_seed(42)
|
||||||
|
image = pipe(
|
||||||
|
prompt=prompt, num_inference_steps=4, generator=generator, guidance_scale=1.0
|
||||||
|
).images[0]
|
||||||
|
image
|
||||||
|
```
|
||||||
|
|
||||||
|
<div class="flex justify-center">
|
||||||
|
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdxl_t2i.png"/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</hfoption>
|
||||||
|
</hfoptions>
|
||||||
|
|
||||||
## Image-to-image
|
## Image-to-image
|
||||||
|
|
||||||
LCMs can be applied to image-to-image tasks too. For this example, we'll use the [LCM_Dreamshaper_v7](https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7) model, but the same steps can be applied to other LCM models as well.
|
<hfoptions id="lcm-img2img">
|
||||||
|
<hfoption id="LCM">
|
||||||
|
|
||||||
|
To use LCMs for image-to-image, you need to load the LCM checkpoint for your supported model into [`UNet2DConditionModel`] and replace the scheduler with the [`LCMScheduler`]. Then you can use the pipeline as usual, and pass a text prompt and initial image to generate an image in just 4 steps.
|
||||||
|
|
||||||
|
> [!TIP]
|
||||||
|
> Experiment with different values for `num_inference_steps`, `strength`, and `guidance_scale` to get the best results.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import torch
|
import torch
|
||||||
from diffusers import AutoPipelineForImage2Image, UNet2DConditionModel, LCMScheduler
|
from diffusers import AutoPipelineForImage2Image, UNet2DConditionModel, LCMScheduler
|
||||||
from diffusers.utils import make_image_grid, load_image
|
from diffusers.utils import load_image
|
||||||
|
|
||||||
unet = UNet2DConditionModel.from_pretrained(
|
unet = UNet2DConditionModel.from_pretrained(
|
||||||
"SimianLuo/LCM_Dreamshaper_v7",
|
"SimianLuo/LCM_Dreamshaper_v7",
|
||||||
@@ -89,12 +128,8 @@ pipe = AutoPipelineForImage2Image.from_pretrained(
|
|||||||
).to("cuda")
|
).to("cuda")
|
||||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
||||||
|
|
||||||
# prepare image
|
init_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-init.png")
|
||||||
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-init.png"
|
|
||||||
init_image = load_image(url)
|
|
||||||
prompt = "Astronauts in a jungle, cold color palette, muted colors, detailed, 8k"
|
prompt = "Astronauts in a jungle, cold color palette, muted colors, detailed, 8k"
|
||||||
|
|
||||||
# pass prompt and image to pipeline
|
|
||||||
generator = torch.manual_seed(0)
|
generator = torch.manual_seed(0)
|
||||||
image = pipe(
|
image = pipe(
|
||||||
prompt,
|
prompt,
|
||||||
@@ -104,22 +139,130 @@ image = pipe(
|
|||||||
strength=0.5,
|
strength=0.5,
|
||||||
generator=generator
|
generator=generator
|
||||||
).images[0]
|
).images[0]
|
||||||
make_image_grid([init_image, image], rows=1, cols=2)
|
image
|
||||||
```
|
```
|
||||||
|
|
||||||

|
<div class="flex gap-4">
|
||||||
|
<div>
|
||||||
|
<img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-init.png"/>
|
||||||
|
<figcaption class="mt-2 text-center text-sm text-gray-500">initial image</figcaption>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm-img2img.png"/>
|
||||||
|
<figcaption class="mt-2 text-center text-sm text-gray-500">generated image</figcaption>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</hfoption>
|
||||||
|
<hfoption id="LCM-LoRA">
|
||||||
|
|
||||||
<Tip>
|
To use LCM-LoRAs for image-to-image, you need to replace the scheduler with the [`LCMScheduler`] and load the LCM-LoRA weights with the [`~loaders.LoraLoaderMixin.load_lora_weights`] method. Then you can use the pipeline as usual, and pass a text prompt and initial image to generate an image in just 4 steps.
|
||||||
|
|
||||||
You can get different results based on your prompt and the image you provide. To get the best results, we recommend trying different values for `num_inference_steps`, `strength`, and `guidance_scale` parameters and choose the best one.
|
> [!TIP]
|
||||||
|
> Experiment with different values for `num_inference_steps`, `strength`, and `guidance_scale` to get the best results.
|
||||||
|
|
||||||
</Tip>
|
```py
|
||||||
|
import torch
|
||||||
|
from diffusers import AutoPipelineForImage2Image, LCMScheduler
|
||||||
|
from diffusers.utils import make_image_grid, load_image
|
||||||
|
|
||||||
|
pipe = AutoPipelineForImage2Image.from_pretrained(
|
||||||
|
"Lykon/dreamshaper-7",
|
||||||
|
torch_dtype=torch.float16,
|
||||||
|
variant="fp16",
|
||||||
|
).to("cuda")
|
||||||
|
|
||||||
## Combine with style LoRAs
|
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
||||||
|
|
||||||
LCMs can be used with other styled LoRAs to generate styled-images in very few steps (4-8). In the following example, we'll use the [papercut LoRA](TheLastBen/Papercut_SDXL).
|
pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")
|
||||||
|
|
||||||
|
init_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-init.png")
|
||||||
|
prompt = "Astronauts in a jungle, cold color palette, muted colors, detailed, 8k"
|
||||||
|
|
||||||
|
generator = torch.manual_seed(0)
|
||||||
|
image = pipe(
|
||||||
|
prompt,
|
||||||
|
image=init_image,
|
||||||
|
num_inference_steps=4,
|
||||||
|
guidance_scale=1,
|
||||||
|
strength=0.6,
|
||||||
|
generator=generator
|
||||||
|
).images[0]
|
||||||
|
image
|
||||||
|
```
|
||||||
|
|
||||||
|
<div class="flex gap-4">
|
||||||
|
<div>
|
||||||
|
<img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-init.png"/>
|
||||||
|
<figcaption class="mt-2 text-center text-sm text-gray-500">initial image</figcaption>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm-lora-img2img.png"/>
|
||||||
|
<figcaption class="mt-2 text-center text-sm text-gray-500">generated image</figcaption>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</hfoption>
|
||||||
|
</hfoptions>
|
||||||
|
|
||||||
|
## Inpainting
|
||||||
|
|
||||||
|
To use LCM-LoRAs for inpainting, you need to replace the scheduler with the [`LCMScheduler`] and load the LCM-LoRA weights with the [`~loaders.LoraLoaderMixin.load_lora_weights`] method. Then you can use the pipeline as usual, and pass a text prompt, initial image, and mask image to generate an image in just 4 steps.
|
||||||
|
|
||||||
|
```py
|
||||||
|
import torch
|
||||||
|
from diffusers import AutoPipelineForInpainting, LCMScheduler
|
||||||
|
from diffusers.utils import load_image, make_image_grid
|
||||||
|
|
||||||
|
pipe = AutoPipelineForInpainting.from_pretrained(
|
||||||
|
"runwayml/stable-diffusion-inpainting",
|
||||||
|
torch_dtype=torch.float16,
|
||||||
|
variant="fp16",
|
||||||
|
).to("cuda")
|
||||||
|
|
||||||
|
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
||||||
|
|
||||||
|
pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")
|
||||||
|
|
||||||
|
init_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png")
|
||||||
|
mask_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png")
|
||||||
|
|
||||||
|
prompt = "concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k"
|
||||||
|
generator = torch.manual_seed(0)
|
||||||
|
image = pipe(
|
||||||
|
prompt=prompt,
|
||||||
|
image=init_image,
|
||||||
|
mask_image=mask_image,
|
||||||
|
generator=generator,
|
||||||
|
num_inference_steps=4,
|
||||||
|
guidance_scale=4,
|
||||||
|
).images[0]
|
||||||
|
image
|
||||||
|
```
|
||||||
|
|
||||||
|
<div class="flex gap-4">
|
||||||
|
<div>
|
||||||
|
<img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"/>
|
||||||
|
<figcaption class="mt-2 text-center text-sm text-gray-500">initial image</figcaption>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm-lora-inpaint.png"/>
|
||||||
|
<figcaption class="mt-2 text-center text-sm text-gray-500">generated image</figcaption>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
## Adapters
|
||||||
|
|
||||||
|
LCMs are compatible with adapters like LoRA, ControlNet, T2I-Adapter, and AnimateDiff. You can bring the speed of LCMs to these adapters to generate images in a certain style or condition the model on another input like a canny image.
|
||||||
|
|
||||||
|
### LoRA
|
||||||
|
|
||||||
|
[LoRA](../using-diffusers/loading_adapters#lora) adapters can be rapidly finetuned to learn a new style from just a few images and plugged into a pretrained model to generate images in that style.
|
||||||
|
|
||||||
|
<hfoptions id="lcm-lora">
|
||||||
|
<hfoption id="LCM">
|
||||||
|
|
||||||
|
Load the LCM checkpoint for your supported model into [`UNet2DConditionModel`] and replace the scheduler with the [`LCMScheduler`]. Then you can use the [`~loaders.LoraLoaderMixin.load_lora_weights`] method to load the LoRA weights into the LCM and generate a styled image in a few steps.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, LCMScheduler
|
from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, LCMScheduler
|
||||||
@@ -134,11 +277,9 @@ pipe = StableDiffusionXLPipeline.from_pretrained(
|
|||||||
"stabilityai/stable-diffusion-xl-base-1.0", unet=unet, torch_dtype=torch.float16, variant="fp16",
|
"stabilityai/stable-diffusion-xl-base-1.0", unet=unet, torch_dtype=torch.float16, variant="fp16",
|
||||||
).to("cuda")
|
).to("cuda")
|
||||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
||||||
|
|
||||||
pipe.load_lora_weights("TheLastBen/Papercut_SDXL", weight_name="papercut.safetensors", adapter_name="papercut")
|
pipe.load_lora_weights("TheLastBen/Papercut_SDXL", weight_name="papercut.safetensors", adapter_name="papercut")
|
||||||
|
|
||||||
prompt = "papercut, a cute fox"
|
prompt = "papercut, a cute fox"
|
||||||
|
|
||||||
generator = torch.manual_seed(0)
|
generator = torch.manual_seed(0)
|
||||||
image = pipe(
|
image = pipe(
|
||||||
prompt=prompt, num_inference_steps=4, generator=generator, guidance_scale=8.0
|
prompt=prompt, num_inference_steps=4, generator=generator, guidance_scale=8.0
|
||||||
@@ -146,15 +287,58 @@ image = pipe(
|
|||||||
image
|
image
|
||||||
```
|
```
|
||||||
|
|
||||||

|
<div class="flex justify-center">
|
||||||
|
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_full_sdx_lora_mix.png"/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</hfoption>
|
||||||
|
<hfoption id="LCM-LoRA">
|
||||||
|
|
||||||
## ControlNet/T2I-Adapter
|
Replace the scheduler with the [`LCMScheduler`]. Then you can use the [`~loaders.LoraLoaderMixin.load_lora_weights`] method to load the LCM-LoRA weights and the style LoRA you want to use. Combine both LoRA adapters with the [`~loaders.UNet2DConditionLoadersMixin.set_adapters`] method and generate a styled image in a few steps.
|
||||||
|
|
||||||
Let's look at how we can perform inference with ControlNet/T2I-Adapter and a LCM.
|
```py
|
||||||
|
import torch
|
||||||
|
from diffusers import DiffusionPipeline, LCMScheduler
|
||||||
|
|
||||||
|
pipe = DiffusionPipeline.from_pretrained(
|
||||||
|
"stabilityai/stable-diffusion-xl-base-1.0",
|
||||||
|
variant="fp16",
|
||||||
|
torch_dtype=torch.float16
|
||||||
|
).to("cuda")
|
||||||
|
|
||||||
|
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
||||||
|
|
||||||
|
pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl", adapter_name="lcm")
|
||||||
|
pipe.load_lora_weights("TheLastBen/Papercut_SDXL", weight_name="papercut.safetensors", adapter_name="papercut")
|
||||||
|
|
||||||
|
pipe.set_adapters(["lcm", "papercut"], adapter_weights=[1.0, 0.8])
|
||||||
|
|
||||||
|
prompt = "papercut, a cute fox"
|
||||||
|
generator = torch.manual_seed(0)
|
||||||
|
image = pipe(prompt, num_inference_steps=4, guidance_scale=1, generator=generator).images[0]
|
||||||
|
image
|
||||||
|
```
|
||||||
|
|
||||||
|
<div class="flex justify-center">
|
||||||
|
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdx_lora_mix.png"/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</hfoption>
|
||||||
|
</hfoptions>
|
||||||
|
|
||||||
### ControlNet
|
### ControlNet
|
||||||
For this example, we'll use the [LCM_Dreamshaper_v7](https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7) model with canny ControlNet, but the same steps can be applied to other LCM models as well.
|
|
||||||
|
[ControlNet](./controlnet) are adapters that can be trained on a variety of inputs like canny edge, pose estimation, or depth. The ControlNet can be inserted into the pipeline to provide additional conditioning and control to the model for more accurate generation.
|
||||||
|
|
||||||
|
You can find additional ControlNet models trained on other inputs in [lllyasviel's](https://hf.co/lllyasviel) repository.
|
||||||
|
|
||||||
|
<hfoptions id="lcm-controlnet">
|
||||||
|
<hfoption id="LCM">
|
||||||
|
|
||||||
|
Load a ControlNet model trained on canny images and pass it to the [`ControlNetModel`]. Then you can load a LCM model into [`StableDiffusionControlNetPipeline`] and replace the scheduler with the [`LCMScheduler`]. Now pass the canny image to the pipeline and generate an image.
|
||||||
|
|
||||||
|
> [!TIP]
|
||||||
|
> Experiment with different values for `num_inference_steps`, `controlnet_conditioning_scale`, `cross_attention_kwargs`, and `guidance_scale` to get the best results.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import torch
|
import torch
|
||||||
@@ -186,8 +370,6 @@ pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
|||||||
torch_dtype=torch.float16,
|
torch_dtype=torch.float16,
|
||||||
safety_checker=None,
|
safety_checker=None,
|
||||||
).to("cuda")
|
).to("cuda")
|
||||||
|
|
||||||
# set scheduler
|
|
||||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
||||||
|
|
||||||
generator = torch.manual_seed(0)
|
generator = torch.manual_seed(0)
|
||||||
@@ -200,16 +382,84 @@ image = pipe(
|
|||||||
make_image_grid([canny_image, image], rows=1, cols=2)
|
make_image_grid([canny_image, image], rows=1, cols=2)
|
||||||
```
|
```
|
||||||
|
|
||||||

|
<div class="flex justify-center">
|
||||||
|
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_full_sdv1-5_controlnet.png"/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</hfoption>
|
||||||
|
<hfoption id="LCM-LoRA">
|
||||||
|
|
||||||
<Tip>
|
Load a ControlNet model trained on canny images and pass it to the [`ControlNetModel`]. Then you can load a Stable Diffusion v1.5 model into [`StableDiffusionControlNetPipeline`] and replace the scheduler with the [`LCMScheduler`]. Use the [`~loaders.LoraLoaderMixin.load_lora_weights`] method to load the LCM-LoRA weights, and pass the canny image to the pipeline and generate an image.
|
||||||
The inference parameters in this example might not work for all examples, so we recommend trying different values for the `num_inference_steps`, `guidance_scale`, `controlnet_conditioning_scale`, and `cross_attention_kwargs` parameters and choosing the best one.
|
|
||||||
</Tip>
|
> [!TIP]
|
||||||
|
> Experiment with different values for `num_inference_steps`, `controlnet_conditioning_scale`, `cross_attention_kwargs`, and `guidance_scale` to get the best results.
|
||||||
|
|
||||||
|
```py
|
||||||
|
import torch
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, LCMScheduler
|
||||||
|
from diffusers.utils import load_image
|
||||||
|
|
||||||
|
image = load_image(
|
||||||
|
"https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png"
|
||||||
|
).resize((512, 512))
|
||||||
|
|
||||||
|
image = np.array(image)
|
||||||
|
|
||||||
|
low_threshold = 100
|
||||||
|
high_threshold = 200
|
||||||
|
|
||||||
|
image = cv2.Canny(image, low_threshold, high_threshold)
|
||||||
|
image = image[:, :, None]
|
||||||
|
image = np.concatenate([image, image, image], axis=2)
|
||||||
|
canny_image = Image.fromarray(image)
|
||||||
|
|
||||||
|
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)
|
||||||
|
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
||||||
|
"runwayml/stable-diffusion-v1-5",
|
||||||
|
controlnet=controlnet,
|
||||||
|
torch_dtype=torch.float16,
|
||||||
|
safety_checker=None,
|
||||||
|
variant="fp16"
|
||||||
|
).to("cuda")
|
||||||
|
|
||||||
|
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
||||||
|
|
||||||
|
pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")
|
||||||
|
|
||||||
|
generator = torch.manual_seed(0)
|
||||||
|
image = pipe(
|
||||||
|
"the mona lisa",
|
||||||
|
image=canny_image,
|
||||||
|
num_inference_steps=4,
|
||||||
|
guidance_scale=1.5,
|
||||||
|
controlnet_conditioning_scale=0.8,
|
||||||
|
cross_attention_kwargs={"scale": 1},
|
||||||
|
generator=generator,
|
||||||
|
).images[0]
|
||||||
|
image
|
||||||
|
```
|
||||||
|
|
||||||
|
<div class="flex justify-center">
|
||||||
|
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdv1-5_controlnet.png"/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</hfoption>
|
||||||
|
</hfoptions>
|
||||||
|
|
||||||
### T2I-Adapter
|
### T2I-Adapter
|
||||||
|
|
||||||
This example shows how to use the `lcm-sdxl` with the [Canny T2I-Adapter](TencentARC/t2i-adapter-canny-sdxl-1.0).
|
[T2I-Adapter](./t2i_adapter) is an even more lightweight adapter than ControlNet, that provides an additional input to condition a pretrained model with. It is faster than ControlNet but the results may be slightly worse.
|
||||||
|
|
||||||
|
You can find additional T2I-Adapter checkpoints trained on other inputs in [TencentArc's](https://hf.co/TencentARC) repository.
|
||||||
|
|
||||||
|
<hfoptions id="lcm-t2i">
|
||||||
|
<hfoption id="LCM">
|
||||||
|
|
||||||
|
Load a T2IAdapter trained on canny images and pass it to the [`StableDiffusionXLAdapterPipeline`]. Then load a LCM checkpoint into [`UNet2DConditionModel`] and replace the scheduler with the [`LCMScheduler`]. Now pass the canny image to the pipeline and generate an image.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import torch
|
import torch
|
||||||
@@ -220,10 +470,9 @@ from PIL import Image
|
|||||||
from diffusers import StableDiffusionXLAdapterPipeline, UNet2DConditionModel, T2IAdapter, LCMScheduler
|
from diffusers import StableDiffusionXLAdapterPipeline, UNet2DConditionModel, T2IAdapter, LCMScheduler
|
||||||
from diffusers.utils import load_image, make_image_grid
|
from diffusers.utils import load_image, make_image_grid
|
||||||
|
|
||||||
# Prepare image
|
# detect the canny map in low resolution to avoid high-frequency details
|
||||||
# Detect the canny map in low resolution to avoid high-frequency details
|
|
||||||
image = load_image(
|
image = load_image(
|
||||||
"https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_canny.jpg"
|
"https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png"
|
||||||
).resize((384, 384))
|
).resize((384, 384))
|
||||||
|
|
||||||
image = np.array(image)
|
image = np.array(image)
|
||||||
@@ -236,7 +485,6 @@ image = image[:, :, None]
|
|||||||
image = np.concatenate([image, image, image], axis=2)
|
image = np.concatenate([image, image, image], axis=2)
|
||||||
canny_image = Image.fromarray(image).resize((1024, 1216))
|
canny_image = Image.fromarray(image).resize((1024, 1216))
|
||||||
|
|
||||||
# load adapter
|
|
||||||
adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, varient="fp16").to("cuda")
|
adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, varient="fp16").to("cuda")
|
||||||
|
|
||||||
unet = UNet2DConditionModel.from_pretrained(
|
unet = UNet2DConditionModel.from_pretrained(
|
||||||
@@ -254,7 +502,7 @@ pipe = StableDiffusionXLAdapterPipeline.from_pretrained(
|
|||||||
|
|
||||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
||||||
|
|
||||||
prompt = "Mystical fairy in real, magic, 4k picture, high quality"
|
prompt = "the mona lisa, 4k picture, high quality"
|
||||||
negative_prompt = "extra digit, fewer digits, cropped, worst quality, low quality, glitch, deformed, mutated, ugly, disfigured"
|
negative_prompt = "extra digit, fewer digits, cropped, worst quality, low quality, glitch, deformed, mutated, ugly, disfigured"
|
||||||
|
|
||||||
generator = torch.manual_seed(0)
|
generator = torch.manual_seed(0)
|
||||||
@@ -268,7 +516,116 @@ image = pipe(
|
|||||||
adapter_conditioning_factor=1,
|
adapter_conditioning_factor=1,
|
||||||
generator=generator,
|
generator=generator,
|
||||||
).images[0]
|
).images[0]
|
||||||
grid = make_image_grid([canny_image, image], rows=1, cols=2)
|
|
||||||
```
|
```
|
||||||
|
|
||||||

|
<div class="flex justify-center">
|
||||||
|
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm-t2i.png"/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</hfoption>
|
||||||
|
<hfoption id="LCM-LoRA">
|
||||||
|
|
||||||
|
Load a T2IAdapter trained on canny images and pass it to the [`StableDiffusionXLAdapterPipeline`]. Replace the scheduler with the [`LCMScheduler`], and use the [`~loaders.LoraLoaderMixin.load_lora_weights`] method to load the LCM-LoRA weights. Pass the canny image to the pipeline and generate an image.
|
||||||
|
|
||||||
|
```py
|
||||||
|
import torch
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from diffusers import StableDiffusionXLAdapterPipeline, UNet2DConditionModel, T2IAdapter, LCMScheduler
|
||||||
|
from diffusers.utils import load_image, make_image_grid
|
||||||
|
|
||||||
|
# detect the canny map in low resolution to avoid high-frequency details
|
||||||
|
image = load_image(
|
||||||
|
"https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png"
|
||||||
|
).resize((384, 384))
|
||||||
|
|
||||||
|
image = np.array(image)
|
||||||
|
|
||||||
|
low_threshold = 100
|
||||||
|
high_threshold = 200
|
||||||
|
|
||||||
|
image = cv2.Canny(image, low_threshold, high_threshold)
|
||||||
|
image = image[:, :, None]
|
||||||
|
image = np.concatenate([image, image, image], axis=2)
|
||||||
|
canny_image = Image.fromarray(image).resize((1024, 1024))
|
||||||
|
|
||||||
|
adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, varient="fp16").to("cuda")
|
||||||
|
|
||||||
|
pipe = StableDiffusionXLAdapterPipeline.from_pretrained(
|
||||||
|
"stabilityai/stable-diffusion-xl-base-1.0",
|
||||||
|
adapter=adapter,
|
||||||
|
torch_dtype=torch.float16,
|
||||||
|
variant="fp16",
|
||||||
|
).to("cuda")
|
||||||
|
|
||||||
|
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
||||||
|
|
||||||
|
pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
|
||||||
|
|
||||||
|
prompt = "the mona lisa, 4k picture, high quality"
|
||||||
|
negative_prompt = "extra digit, fewer digits, cropped, worst quality, low quality, glitch, deformed, mutated, ugly, disfigured"
|
||||||
|
|
||||||
|
generator = torch.manual_seed(0)
|
||||||
|
image = pipe(
|
||||||
|
prompt=prompt,
|
||||||
|
negative_prompt=negative_prompt,
|
||||||
|
image=canny_image,
|
||||||
|
num_inference_steps=4,
|
||||||
|
guidance_scale=1.5,
|
||||||
|
adapter_conditioning_scale=0.8,
|
||||||
|
adapter_conditioning_factor=1,
|
||||||
|
generator=generator,
|
||||||
|
).images[0]
|
||||||
|
```
|
||||||
|
|
||||||
|
<div class="flex justify-center">
|
||||||
|
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm-lora-t2i.png"/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</hfoption>
|
||||||
|
</hfoptions>
|
||||||
|
|
||||||
|
### AnimateDiff
|
||||||
|
|
||||||
|
[AnimateDiff](../api/pipelines/animatediff) is an adapter that adds motion to an image. It can be used with most Stable Diffusion models, effectively turning them into "video generation" models. Generating good results with a video model usually requires generating multiple frames (16-24), which can be very slow with a regular Stable Diffusion model. LCM-LoRA can speed up this process by only taking 4-8 steps for each frame.
|
||||||
|
|
||||||
|
Load a [`AnimateDiffPipeline`] and pass a [`MotionAdapter`] to it. Then replace the scheduler with the [`LCMScheduler`], and combine both LoRA adapters with the [`~loaders.UNet2DConditionLoadersMixin.set_adapters`] method. Now you can pass a prompt to the pipeline and generate an animated image.
|
||||||
|
|
||||||
|
```py
|
||||||
|
import torch
|
||||||
|
from diffusers import MotionAdapter, AnimateDiffPipeline, DDIMScheduler, LCMScheduler
|
||||||
|
from diffusers.utils import export_to_gif
|
||||||
|
|
||||||
|
adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5")
|
||||||
|
pipe = AnimateDiffPipeline.from_pretrained(
|
||||||
|
"frankjoshua/toonyou_beta6",
|
||||||
|
motion_adapter=adapter,
|
||||||
|
).to("cuda")
|
||||||
|
|
||||||
|
# set scheduler
|
||||||
|
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
||||||
|
|
||||||
|
# load LCM-LoRA
|
||||||
|
pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5", adapter_name="lcm")
|
||||||
|
pipe.load_lora_weights("guoyww/animatediff-motion-lora-zoom-in", weight_name="diffusion_pytorch_model.safetensors", adapter_name="motion-lora")
|
||||||
|
|
||||||
|
pipe.set_adapters(["lcm", "motion-lora"], adapter_weights=[0.55, 1.2])
|
||||||
|
|
||||||
|
prompt = "best quality, masterpiece, 1girl, looking at viewer, blurry background, upper body, contemporary, dress"
|
||||||
|
generator = torch.manual_seed(0)
|
||||||
|
frames = pipe(
|
||||||
|
prompt=prompt,
|
||||||
|
num_inference_steps=5,
|
||||||
|
guidance_scale=1.25,
|
||||||
|
cross_attention_kwargs={"scale": 1},
|
||||||
|
num_frames=24,
|
||||||
|
generator=generator
|
||||||
|
).frames[0]
|
||||||
|
export_to_gif(frames, "animation.gif")
|
||||||
|
```
|
||||||
|
|
||||||
|
<div class="flex justify-center">
|
||||||
|
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm-lora-animatediff.gif"/>
|
||||||
|
</div>
|
||||||
|
|||||||
@@ -1,422 +0,0 @@
|
|||||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
[[open-in-colab]]
|
|
||||||
|
|
||||||
# Performing inference with LCM-LoRA
|
|
||||||
|
|
||||||
Latent Consistency Models (LCM) enable quality image generation in typically 2-4 steps making it possible to use diffusion models in almost real-time settings.
|
|
||||||
|
|
||||||
From the [official website](https://latent-consistency-models.github.io/):
|
|
||||||
|
|
||||||
> LCMs can be distilled from any pre-trained Stable Diffusion (SD) in only 4,000 training steps (~32 A100 GPU Hours) for generating high quality 768 x 768 resolution images in 2~4 steps or even one step, significantly accelerating text-to-image generation. We employ LCM to distill the Dreamshaper-V7 version of SD in just 4,000 training iterations.
|
|
||||||
|
|
||||||
For a more technical overview of LCMs, refer to [the paper](https://huggingface.co/papers/2310.04378).
|
|
||||||
|
|
||||||
However, each model needs to be distilled separately for latent consistency distillation. The core idea with LCM-LoRA is to train just a few adapter layers, the adapter being LoRA in this case.
|
|
||||||
This way, we don't have to train the full model and keep the number of trainable parameters manageable. The resulting LoRAs can then be applied to any fine-tuned version of the model without distilling them separately.
|
|
||||||
Additionally, the LoRAs can be applied to image-to-image, ControlNet/T2I-Adapter, inpainting, AnimateDiff etc.
|
|
||||||
The LCM-LoRA can also be combined with other LoRAs to generate styled images in very few steps (4-8).
|
|
||||||
|
|
||||||
LCM-LoRAs are available for [stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5), [stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0), and the [SSD-1B](https://huggingface.co/segmind/SSD-1B) model. All the checkpoints can be found in this [collection](https://huggingface.co/collections/latent-consistency/latent-consistency-models-loras-654cdd24e111e16f0865fba6).
|
|
||||||
|
|
||||||
For more details about LCM-LoRA, refer to [the technical report](https://huggingface.co/papers/2311.05556).
|
|
||||||
|
|
||||||
This guide shows how to perform inference with LCM-LoRAs for
|
|
||||||
- text-to-image
|
|
||||||
- image-to-image
|
|
||||||
- combined with styled LoRAs
|
|
||||||
- ControlNet/T2I-Adapter
|
|
||||||
- inpainting
|
|
||||||
- AnimateDiff
|
|
||||||
|
|
||||||
Before going through this guide, we'll take a look at the general workflow for performing inference with LCM-LoRAs.
|
|
||||||
LCM-LoRAs are similar to other Stable Diffusion LoRAs so they can be used with any [`DiffusionPipeline`] that supports LoRAs.
|
|
||||||
|
|
||||||
- Load the task specific pipeline and model.
|
|
||||||
- Set the scheduler to [`LCMScheduler`].
|
|
||||||
- Load the LCM-LoRA weights for the model.
|
|
||||||
- Reduce the `guidance_scale` between `[1.0, 2.0]` and set the `num_inference_steps` between [4, 8].
|
|
||||||
- Perform inference with the pipeline with the usual parameters.
|
|
||||||
|
|
||||||
Let's look at how we can perform inference with LCM-LoRAs for different tasks.
|
|
||||||
|
|
||||||
First, make sure you have [peft](https://github.com/huggingface/peft) installed, for better LoRA support.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install -U peft
|
|
||||||
```
|
|
||||||
|
|
||||||
## Text-to-image
|
|
||||||
|
|
||||||
You'll use the [`StableDiffusionXLPipeline`] with the scheduler: [`LCMScheduler`] and then load the LCM-LoRA. Together with the LCM-LoRA and the scheduler, the pipeline enables a fast inference workflow overcoming the slow iterative nature of diffusion models.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from diffusers import DiffusionPipeline, LCMScheduler
|
|
||||||
|
|
||||||
pipe = DiffusionPipeline.from_pretrained(
|
|
||||||
"stabilityai/stable-diffusion-xl-base-1.0",
|
|
||||||
variant="fp16",
|
|
||||||
torch_dtype=torch.float16
|
|
||||||
).to("cuda")
|
|
||||||
|
|
||||||
# set scheduler
|
|
||||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
|
||||||
|
|
||||||
# load LCM-LoRA
|
|
||||||
pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
|
|
||||||
|
|
||||||
prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"
|
|
||||||
|
|
||||||
generator = torch.manual_seed(42)
|
|
||||||
image = pipe(
|
|
||||||
prompt=prompt, num_inference_steps=4, generator=generator, guidance_scale=1.0
|
|
||||||
).images[0]
|
|
||||||
```
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
Notice that we use only 4 steps for generation which is way less than what's typically used for standard SDXL.
|
|
||||||
|
|
||||||
<Tip>
|
|
||||||
|
|
||||||
You may have noticed that we set `guidance_scale=1.0`, which disables classifer-free-guidance. This is because the LCM-LoRA is trained with guidance, so the batch size does not have to be doubled in this case. This leads to a faster inference time, with the drawback that negative prompts don't have any effect on the denoising process.
|
|
||||||
|
|
||||||
You can also use guidance with LCM-LoRA, but due to the nature of training the model is very sensitve to the `guidance_scale` values, high values can lead to artifacts in the generated images. In our experiments, we found that the best values are in the range of [1.0, 2.0].
|
|
||||||
|
|
||||||
</Tip>
|
|
||||||
|
|
||||||
### Inference with a fine-tuned model
|
|
||||||
|
|
||||||
As mentioned above, the LCM-LoRA can be applied to any fine-tuned version of the model without having to distill them separately. Let's look at how we can perform inference with a fine-tuned model. In this example, we'll use the [animagine-xl](https://huggingface.co/Linaqruf/animagine-xl) model, which is a fine-tuned version of the SDXL model for generating anime.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from diffusers import DiffusionPipeline, LCMScheduler
|
|
||||||
|
|
||||||
pipe = DiffusionPipeline.from_pretrained(
|
|
||||||
"Linaqruf/animagine-xl",
|
|
||||||
variant="fp16",
|
|
||||||
torch_dtype=torch.float16
|
|
||||||
).to("cuda")
|
|
||||||
|
|
||||||
# set scheduler
|
|
||||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
|
||||||
|
|
||||||
# load LCM-LoRA
|
|
||||||
pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
|
|
||||||
|
|
||||||
prompt = "face focus, cute, masterpiece, best quality, 1girl, green hair, sweater, looking at viewer, upper body, beanie, outdoors, night, turtleneck"
|
|
||||||
|
|
||||||
generator = torch.manual_seed(0)
|
|
||||||
image = pipe(
|
|
||||||
prompt=prompt, num_inference_steps=4, generator=generator, guidance_scale=1.0
|
|
||||||
).images[0]
|
|
||||||
```
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
|
|
||||||
## Image-to-image
|
|
||||||
|
|
||||||
LCM-LoRA can be applied to image-to-image tasks too. Let's look at how we can perform image-to-image generation with LCMs. For this example we'll use the [dreamshaper-7](https://huggingface.co/Lykon/dreamshaper-7) model and the LCM-LoRA for `stable-diffusion-v1-5 `.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from diffusers import AutoPipelineForImage2Image, LCMScheduler
|
|
||||||
from diffusers.utils import make_image_grid, load_image
|
|
||||||
|
|
||||||
pipe = AutoPipelineForImage2Image.from_pretrained(
|
|
||||||
"Lykon/dreamshaper-7",
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
variant="fp16",
|
|
||||||
).to("cuda")
|
|
||||||
|
|
||||||
# set scheduler
|
|
||||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
|
||||||
|
|
||||||
# load LCM-LoRA
|
|
||||||
pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")
|
|
||||||
|
|
||||||
# prepare image
|
|
||||||
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-init.png"
|
|
||||||
init_image = load_image(url)
|
|
||||||
prompt = "Astronauts in a jungle, cold color palette, muted colors, detailed, 8k"
|
|
||||||
|
|
||||||
# pass prompt and image to pipeline
|
|
||||||
generator = torch.manual_seed(0)
|
|
||||||
image = pipe(
|
|
||||||
prompt,
|
|
||||||
image=init_image,
|
|
||||||
num_inference_steps=4,
|
|
||||||
guidance_scale=1,
|
|
||||||
strength=0.6,
|
|
||||||
generator=generator
|
|
||||||
).images[0]
|
|
||||||
make_image_grid([init_image, image], rows=1, cols=2)
|
|
||||||
```
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
|
|
||||||
<Tip>
|
|
||||||
|
|
||||||
You can get different results based on your prompt and the image you provide. To get the best results, we recommend trying different values for `num_inference_steps`, `strength`, and `guidance_scale` parameters and choose the best one.
|
|
||||||
|
|
||||||
</Tip>
|
|
||||||
|
|
||||||
|
|
||||||
## Combine with styled LoRAs
|
|
||||||
|
|
||||||
LCM-LoRA can be combined with other LoRAs to generate styled-images in very few steps (4-8). In the following example, we'll use the LCM-LoRA with the [papercut LoRA](TheLastBen/Papercut_SDXL).
|
|
||||||
To learn more about how to combine LoRAs, refer to [this guide](https://huggingface.co/docs/diffusers/tutorials/using_peft_for_inference#combine-multiple-adapters).
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from diffusers import DiffusionPipeline, LCMScheduler
|
|
||||||
|
|
||||||
pipe = DiffusionPipeline.from_pretrained(
|
|
||||||
"stabilityai/stable-diffusion-xl-base-1.0",
|
|
||||||
variant="fp16",
|
|
||||||
torch_dtype=torch.float16
|
|
||||||
).to("cuda")
|
|
||||||
|
|
||||||
# set scheduler
|
|
||||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
|
||||||
|
|
||||||
# load LoRAs
|
|
||||||
pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl", adapter_name="lcm")
|
|
||||||
pipe.load_lora_weights("TheLastBen/Papercut_SDXL", weight_name="papercut.safetensors", adapter_name="papercut")
|
|
||||||
|
|
||||||
# Combine LoRAs
|
|
||||||
pipe.set_adapters(["lcm", "papercut"], adapter_weights=[1.0, 0.8])
|
|
||||||
|
|
||||||
prompt = "papercut, a cute fox"
|
|
||||||
generator = torch.manual_seed(0)
|
|
||||||
image = pipe(prompt, num_inference_steps=4, guidance_scale=1, generator=generator).images[0]
|
|
||||||
image
|
|
||||||
```
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
|
|
||||||
## ControlNet/T2I-Adapter
|
|
||||||
|
|
||||||
Let's look at how we can perform inference with ControlNet/T2I-Adapter and LCM-LoRA.
|
|
||||||
|
|
||||||
### ControlNet
|
|
||||||
For this example, we'll use the SD-v1-5 model and the LCM-LoRA for SD-v1-5 with canny ControlNet.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
import cv2
|
|
||||||
import numpy as np
|
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, LCMScheduler
|
|
||||||
from diffusers.utils import load_image
|
|
||||||
|
|
||||||
image = load_image(
|
|
||||||
"https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png"
|
|
||||||
).resize((512, 512))
|
|
||||||
|
|
||||||
image = np.array(image)
|
|
||||||
|
|
||||||
low_threshold = 100
|
|
||||||
high_threshold = 200
|
|
||||||
|
|
||||||
image = cv2.Canny(image, low_threshold, high_threshold)
|
|
||||||
image = image[:, :, None]
|
|
||||||
image = np.concatenate([image, image, image], axis=2)
|
|
||||||
canny_image = Image.fromarray(image)
|
|
||||||
|
|
||||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)
|
|
||||||
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
|
||||||
"runwayml/stable-diffusion-v1-5",
|
|
||||||
controlnet=controlnet,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
safety_checker=None,
|
|
||||||
variant="fp16"
|
|
||||||
).to("cuda")
|
|
||||||
|
|
||||||
# set scheduler
|
|
||||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
|
||||||
|
|
||||||
# load LCM-LoRA
|
|
||||||
pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")
|
|
||||||
|
|
||||||
generator = torch.manual_seed(0)
|
|
||||||
image = pipe(
|
|
||||||
"the mona lisa",
|
|
||||||
image=canny_image,
|
|
||||||
num_inference_steps=4,
|
|
||||||
guidance_scale=1.5,
|
|
||||||
controlnet_conditioning_scale=0.8,
|
|
||||||
cross_attention_kwargs={"scale": 1},
|
|
||||||
generator=generator,
|
|
||||||
).images[0]
|
|
||||||
make_image_grid([canny_image, image], rows=1, cols=2)
|
|
||||||
```
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
|
|
||||||
<Tip>
|
|
||||||
The inference parameters in this example might not work for all examples, so we recommend you to try different values for `num_inference_steps`, `guidance_scale`, `controlnet_conditioning_scale` and `cross_attention_kwargs` parameters and choose the best one.
|
|
||||||
</Tip>
|
|
||||||
|
|
||||||
### T2I-Adapter
|
|
||||||
|
|
||||||
This example shows how to use the LCM-LoRA with the [Canny T2I-Adapter](TencentARC/t2i-adapter-canny-sdxl-1.0) and SDXL.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
import cv2
|
|
||||||
import numpy as np
|
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
from diffusers import StableDiffusionXLAdapterPipeline, T2IAdapter, LCMScheduler
|
|
||||||
from diffusers.utils import load_image, make_image_grid
|
|
||||||
|
|
||||||
# Prepare image
|
|
||||||
# Detect the canny map in low resolution to avoid high-frequency details
|
|
||||||
image = load_image(
|
|
||||||
"https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_canny.jpg"
|
|
||||||
).resize((384, 384))
|
|
||||||
|
|
||||||
image = np.array(image)
|
|
||||||
|
|
||||||
low_threshold = 100
|
|
||||||
high_threshold = 200
|
|
||||||
|
|
||||||
image = cv2.Canny(image, low_threshold, high_threshold)
|
|
||||||
image = image[:, :, None]
|
|
||||||
image = np.concatenate([image, image, image], axis=2)
|
|
||||||
canny_image = Image.fromarray(image).resize((1024, 1024))
|
|
||||||
|
|
||||||
# load adapter
|
|
||||||
adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, varient="fp16").to("cuda")
|
|
||||||
|
|
||||||
pipe = StableDiffusionXLAdapterPipeline.from_pretrained(
|
|
||||||
"stabilityai/stable-diffusion-xl-base-1.0",
|
|
||||||
adapter=adapter,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
variant="fp16",
|
|
||||||
).to("cuda")
|
|
||||||
|
|
||||||
# set scheduler
|
|
||||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
|
||||||
|
|
||||||
# load LCM-LoRA
|
|
||||||
pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
|
|
||||||
|
|
||||||
prompt = "Mystical fairy in real, magic, 4k picture, high quality"
|
|
||||||
negative_prompt = "extra digit, fewer digits, cropped, worst quality, low quality, glitch, deformed, mutated, ugly, disfigured"
|
|
||||||
|
|
||||||
generator = torch.manual_seed(0)
|
|
||||||
image = pipe(
|
|
||||||
prompt=prompt,
|
|
||||||
negative_prompt=negative_prompt,
|
|
||||||
image=canny_image,
|
|
||||||
num_inference_steps=4,
|
|
||||||
guidance_scale=1.5,
|
|
||||||
adapter_conditioning_scale=0.8,
|
|
||||||
adapter_conditioning_factor=1,
|
|
||||||
generator=generator,
|
|
||||||
).images[0]
|
|
||||||
make_image_grid([canny_image, image], rows=1, cols=2)
|
|
||||||
```
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
|
|
||||||
## Inpainting
|
|
||||||
|
|
||||||
LCM-LoRA can be used for inpainting as well.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from diffusers import AutoPipelineForInpainting, LCMScheduler
|
|
||||||
from diffusers.utils import load_image, make_image_grid
|
|
||||||
|
|
||||||
pipe = AutoPipelineForInpainting.from_pretrained(
|
|
||||||
"runwayml/stable-diffusion-inpainting",
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
variant="fp16",
|
|
||||||
).to("cuda")
|
|
||||||
|
|
||||||
# set scheduler
|
|
||||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
|
||||||
|
|
||||||
# load LCM-LoRA
|
|
||||||
pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")
|
|
||||||
|
|
||||||
# load base and mask image
|
|
||||||
init_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png")
|
|
||||||
mask_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png")
|
|
||||||
|
|
||||||
# generator = torch.Generator("cuda").manual_seed(92)
|
|
||||||
prompt = "concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k"
|
|
||||||
generator = torch.manual_seed(0)
|
|
||||||
image = pipe(
|
|
||||||
prompt=prompt,
|
|
||||||
image=init_image,
|
|
||||||
mask_image=mask_image,
|
|
||||||
generator=generator,
|
|
||||||
num_inference_steps=4,
|
|
||||||
guidance_scale=4,
|
|
||||||
).images[0]
|
|
||||||
make_image_grid([init_image, mask_image, image], rows=1, cols=3)
|
|
||||||
```
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
|
|
||||||
## AnimateDiff
|
|
||||||
|
|
||||||
[`AnimateDiff`] allows you to animate images using Stable Diffusion models. To get good results, we need to generate multiple frames (16-24), and doing this with standard SD models can be very slow.
|
|
||||||
LCM-LoRA can be used to speed up the process significantly, as you just need to do 4-8 steps for each frame. Let's look at how we can perform animation with LCM-LoRA and AnimateDiff.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from diffusers import MotionAdapter, AnimateDiffPipeline, DDIMScheduler, LCMScheduler
|
|
||||||
from diffusers.utils import export_to_gif
|
|
||||||
|
|
||||||
adapter = MotionAdapter.from_pretrained("diffusers/animatediff-motion-adapter-v1-5")
|
|
||||||
pipe = AnimateDiffPipeline.from_pretrained(
|
|
||||||
"frankjoshua/toonyou_beta6",
|
|
||||||
motion_adapter=adapter,
|
|
||||||
).to("cuda")
|
|
||||||
|
|
||||||
# set scheduler
|
|
||||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
|
||||||
|
|
||||||
# load LCM-LoRA
|
|
||||||
pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5", adapter_name="lcm")
|
|
||||||
pipe.load_lora_weights("guoyww/animatediff-motion-lora-zoom-in", weight_name="diffusion_pytorch_model.safetensors", adapter_name="motion-lora")
|
|
||||||
|
|
||||||
pipe.set_adapters(["lcm", "motion-lora"], adapter_weights=[0.55, 1.2])
|
|
||||||
|
|
||||||
prompt = "best quality, masterpiece, 1girl, looking at viewer, blurry background, upper body, contemporary, dress"
|
|
||||||
generator = torch.manual_seed(0)
|
|
||||||
frames = pipe(
|
|
||||||
prompt=prompt,
|
|
||||||
num_inference_steps=5,
|
|
||||||
guidance_scale=1.25,
|
|
||||||
cross_attention_kwargs={"scale": 1},
|
|
||||||
num_frames=24,
|
|
||||||
generator=generator
|
|
||||||
).frames[0]
|
|
||||||
export_to_gif(frames, "animation.gif")
|
|
||||||
```
|
|
||||||
|
|
||||||

|
|
||||||
@@ -1304,7 +1304,11 @@ class DemoFusionSDXLPipeline(
|
|||||||
if isinstance(component, torch.nn.Module):
|
if isinstance(component, torch.nn.Module):
|
||||||
if hasattr(component, "_hf_hook"):
|
if hasattr(component, "_hf_hook"):
|
||||||
is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
|
is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
|
||||||
is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
|
is_sequential_cpu_offload = (
|
||||||
|
isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
|
||||||
|
or hasattr(component._hf_hook, "hooks")
|
||||||
|
and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
|
||||||
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
"Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
|
"Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ import torch.utils.checkpoint
|
|||||||
import transformers
|
import transformers
|
||||||
from accelerate import Accelerator
|
from accelerate import Accelerator
|
||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import ProjectConfiguration, set_seed
|
from accelerate.utils import DistributedType, ProjectConfiguration, set_seed
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from huggingface_hub import create_repo, upload_folder
|
from huggingface_hub import create_repo, upload_folder
|
||||||
from packaging import version
|
from packaging import version
|
||||||
@@ -53,7 +53,7 @@ from diffusers import (
|
|||||||
from diffusers.optimization import get_scheduler
|
from diffusers.optimization import get_scheduler
|
||||||
from diffusers.utils import check_min_version, is_wandb_available, make_image_grid
|
from diffusers.utils import check_min_version, is_wandb_available, make_image_grid
|
||||||
from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
|
from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
|
||||||
from diffusers.utils.import_utils import is_xformers_available
|
from diffusers.utils.import_utils import is_torch_npu_available, is_xformers_available
|
||||||
from diffusers.utils.torch_utils import is_compiled_module
|
from diffusers.utils.torch_utils import is_compiled_module
|
||||||
|
|
||||||
|
|
||||||
@@ -64,6 +64,8 @@ if is_wandb_available():
|
|||||||
check_min_version("0.28.0.dev0")
|
check_min_version("0.28.0.dev0")
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
if is_torch_npu_available():
|
||||||
|
torch.npu.config.allow_internal_format = False
|
||||||
|
|
||||||
|
|
||||||
def log_validation(vae, unet, controlnet, args, accelerator, weight_dtype, step, is_final_validation=False):
|
def log_validation(vae, unet, controlnet, args, accelerator, weight_dtype, step, is_final_validation=False):
|
||||||
@@ -471,6 +473,9 @@ def parse_args(input_args=None):
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers."
|
"--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers."
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--enable_npu_flash_attention", action="store_true", help="Whether or not to use npu flash attention."
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--set_grads_to_none",
|
"--set_grads_to_none",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -936,6 +941,13 @@ def main(args):
|
|||||||
text_encoder_two.requires_grad_(False)
|
text_encoder_two.requires_grad_(False)
|
||||||
controlnet.train()
|
controlnet.train()
|
||||||
|
|
||||||
|
if args.enable_npu_flash_attention:
|
||||||
|
if is_torch_npu_available():
|
||||||
|
logger.info("npu flash attention enabled.")
|
||||||
|
unet.enable_npu_flash_attention()
|
||||||
|
else:
|
||||||
|
raise ValueError("npu flash attention requires torch_npu extensions and is supported only on npu devices.")
|
||||||
|
|
||||||
if args.enable_xformers_memory_efficient_attention:
|
if args.enable_xformers_memory_efficient_attention:
|
||||||
if is_xformers_available():
|
if is_xformers_available():
|
||||||
import xformers
|
import xformers
|
||||||
@@ -1235,7 +1247,8 @@ def main(args):
|
|||||||
progress_bar.update(1)
|
progress_bar.update(1)
|
||||||
global_step += 1
|
global_step += 1
|
||||||
|
|
||||||
if accelerator.is_main_process:
|
# DeepSpeed requires saving weights on every device; saving weights only on the main process would cause issues.
|
||||||
|
if accelerator.distributed_type == DistributedType.DEEPSPEED or accelerator.is_main_process:
|
||||||
if global_step % args.checkpointing_steps == 0:
|
if global_step % args.checkpointing_steps == 0:
|
||||||
# _before_ saving state, check if this save would set us over the `checkpoints_total_limit`
|
# _before_ saving state, check if this save would set us over the `checkpoints_total_limit`
|
||||||
if args.checkpoints_total_limit is not None:
|
if args.checkpoints_total_limit is not None:
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ import torch.utils.checkpoint
|
|||||||
import transformers
|
import transformers
|
||||||
from accelerate import Accelerator
|
from accelerate import Accelerator
|
||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import DistributedDataParallelKwargs, ProjectConfiguration, set_seed
|
from accelerate.utils import DistributedDataParallelKwargs, DistributedType, ProjectConfiguration, set_seed
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from huggingface_hub import create_repo, upload_folder
|
from huggingface_hub import create_repo, upload_folder
|
||||||
from packaging import version
|
from packaging import version
|
||||||
@@ -60,7 +60,7 @@ from diffusers.utils import (
|
|||||||
is_wandb_available,
|
is_wandb_available,
|
||||||
)
|
)
|
||||||
from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
|
from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
|
||||||
from diffusers.utils.import_utils import is_xformers_available
|
from diffusers.utils.import_utils import is_torch_npu_available, is_xformers_available
|
||||||
from diffusers.utils.torch_utils import is_compiled_module
|
from diffusers.utils.torch_utils import is_compiled_module
|
||||||
|
|
||||||
|
|
||||||
@@ -68,6 +68,8 @@ from diffusers.utils.torch_utils import is_compiled_module
|
|||||||
check_min_version("0.28.0.dev0")
|
check_min_version("0.28.0.dev0")
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
if is_torch_npu_available():
|
||||||
|
torch.npu.config.allow_internal_format = False
|
||||||
|
|
||||||
|
|
||||||
def save_model_card(
|
def save_model_card(
|
||||||
@@ -419,6 +421,9 @@ def parse_args(input_args=None):
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers."
|
"--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers."
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--enable_npu_flash_attention", action="store_true", help="Whether or not to use npu flash attention."
|
||||||
|
)
|
||||||
parser.add_argument("--noise_offset", type=float, default=0, help="The scale of noise offset.")
|
parser.add_argument("--noise_offset", type=float, default=0, help="The scale of noise offset.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--rank",
|
"--rank",
|
||||||
@@ -623,6 +628,13 @@ def main(args):
|
|||||||
text_encoder_one.to(accelerator.device, dtype=weight_dtype)
|
text_encoder_one.to(accelerator.device, dtype=weight_dtype)
|
||||||
text_encoder_two.to(accelerator.device, dtype=weight_dtype)
|
text_encoder_two.to(accelerator.device, dtype=weight_dtype)
|
||||||
|
|
||||||
|
if args.enable_npu_flash_attention:
|
||||||
|
if is_torch_npu_available():
|
||||||
|
logger.info("npu flash attention enabled.")
|
||||||
|
unet.enable_npu_flash_attention()
|
||||||
|
else:
|
||||||
|
raise ValueError("npu flash attention requires torch_npu extensions and is supported only on npu devices.")
|
||||||
|
|
||||||
if args.enable_xformers_memory_efficient_attention:
|
if args.enable_xformers_memory_efficient_attention:
|
||||||
if is_xformers_available():
|
if is_xformers_available():
|
||||||
import xformers
|
import xformers
|
||||||
@@ -1149,7 +1161,8 @@ def main(args):
|
|||||||
accelerator.log({"train_loss": train_loss}, step=global_step)
|
accelerator.log({"train_loss": train_loss}, step=global_step)
|
||||||
train_loss = 0.0
|
train_loss = 0.0
|
||||||
|
|
||||||
if accelerator.is_main_process:
|
# DeepSpeed requires saving weights on every device; saving weights only on the main process would cause issues.
|
||||||
|
if accelerator.distributed_type == DistributedType.DEEPSPEED or accelerator.is_main_process:
|
||||||
if global_step % args.checkpointing_steps == 0:
|
if global_step % args.checkpointing_steps == 0:
|
||||||
# _before_ saving state, check if this save would set us over the `checkpoints_total_limit`
|
# _before_ saving state, check if this save would set us over the `checkpoints_total_limit`
|
||||||
if args.checkpoints_total_limit is not None:
|
if args.checkpoints_total_limit is not None:
|
||||||
|
|||||||
@@ -310,9 +310,9 @@ class ConfigMixin:
|
|||||||
force_download (`bool`, *optional*, defaults to `False`):
|
force_download (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
||||||
cached versions if they exist.
|
cached versions if they exist.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -341,7 +341,7 @@ class ConfigMixin:
|
|||||||
"""
|
"""
|
||||||
cache_dir = kwargs.pop("cache_dir", None)
|
cache_dir = kwargs.pop("cache_dir", None)
|
||||||
force_download = kwargs.pop("force_download", False)
|
force_download = kwargs.pop("force_download", False)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
token = kwargs.pop("token", None)
|
token = kwargs.pop("token", None)
|
||||||
local_files_only = kwargs.pop("local_files_only", False)
|
local_files_only = kwargs.pop("local_files_only", False)
|
||||||
|
|||||||
@@ -50,9 +50,9 @@ class FromOriginalVAEMixin:
|
|||||||
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
||||||
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
||||||
is not used.
|
is not used.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -99,7 +99,7 @@ class FromOriginalVAEMixin:
|
|||||||
|
|
||||||
original_config_file = kwargs.pop("original_config_file", None)
|
original_config_file = kwargs.pop("original_config_file", None)
|
||||||
config_file = kwargs.pop("config_file", None)
|
config_file = kwargs.pop("config_file", None)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
force_download = kwargs.pop("force_download", False)
|
force_download = kwargs.pop("force_download", False)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
token = kwargs.pop("token", None)
|
token = kwargs.pop("token", None)
|
||||||
|
|||||||
@@ -50,9 +50,9 @@ class FromOriginalControlNetMixin:
|
|||||||
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
||||||
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
||||||
is not used.
|
is not used.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -89,7 +89,7 @@ class FromOriginalControlNetMixin:
|
|||||||
"""
|
"""
|
||||||
original_config_file = kwargs.pop("original_config_file", None)
|
original_config_file = kwargs.pop("original_config_file", None)
|
||||||
config_file = kwargs.pop("config_file", None)
|
config_file = kwargs.pop("config_file", None)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
force_download = kwargs.pop("force_download", False)
|
force_download = kwargs.pop("force_download", False)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
token = kwargs.pop("token", None)
|
token = kwargs.pop("token", None)
|
||||||
|
|||||||
@@ -90,9 +90,9 @@ class IPAdapterMixin:
|
|||||||
force_download (`bool`, *optional*, defaults to `False`):
|
force_download (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
||||||
cached versions if they exist.
|
cached versions if they exist.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -135,7 +135,7 @@ class IPAdapterMixin:
|
|||||||
# Load the main state dict first.
|
# Load the main state dict first.
|
||||||
cache_dir = kwargs.pop("cache_dir", None)
|
cache_dir = kwargs.pop("cache_dir", None)
|
||||||
force_download = kwargs.pop("force_download", False)
|
force_download = kwargs.pop("force_download", False)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
local_files_only = kwargs.pop("local_files_only", None)
|
local_files_only = kwargs.pop("local_files_only", None)
|
||||||
token = kwargs.pop("token", None)
|
token = kwargs.pop("token", None)
|
||||||
|
|||||||
@@ -176,9 +176,9 @@ class LoraLoaderMixin:
|
|||||||
force_download (`bool`, *optional*, defaults to `False`):
|
force_download (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
||||||
cached versions if they exist.
|
cached versions if they exist.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -208,7 +208,7 @@ class LoraLoaderMixin:
|
|||||||
# UNet and text encoder or both.
|
# UNet and text encoder or both.
|
||||||
cache_dir = kwargs.pop("cache_dir", None)
|
cache_dir = kwargs.pop("cache_dir", None)
|
||||||
force_download = kwargs.pop("force_download", False)
|
force_download = kwargs.pop("force_download", False)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
local_files_only = kwargs.pop("local_files_only", None)
|
local_files_only = kwargs.pop("local_files_only", None)
|
||||||
token = kwargs.pop("token", None)
|
token = kwargs.pop("token", None)
|
||||||
@@ -369,7 +369,11 @@ class LoraLoaderMixin:
|
|||||||
if not is_model_cpu_offload:
|
if not is_model_cpu_offload:
|
||||||
is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
|
is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
|
||||||
if not is_sequential_cpu_offload:
|
if not is_sequential_cpu_offload:
|
||||||
is_sequential_cpu_offload = isinstance(component._hf_hook, AlignDevicesHook)
|
is_sequential_cpu_offload = (
|
||||||
|
isinstance(component._hf_hook, AlignDevicesHook)
|
||||||
|
or hasattr(component._hf_hook, "hooks")
|
||||||
|
and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
|
||||||
|
)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
|
"Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
|
||||||
|
|||||||
@@ -177,9 +177,9 @@ class FromSingleFileMixin:
|
|||||||
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
||||||
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
||||||
is not used.
|
is not used.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -244,7 +244,7 @@ class FromSingleFileMixin:
|
|||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
original_config_file = kwargs.pop("original_config_file", None)
|
original_config_file = kwargs.pop("original_config_file", None)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
force_download = kwargs.pop("force_download", False)
|
force_download = kwargs.pop("force_download", False)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
token = kwargs.pop("token", None)
|
token = kwargs.pop("token", None)
|
||||||
|
|||||||
@@ -305,7 +305,7 @@ def fetch_ldm_config_and_checkpoint(
|
|||||||
pretrained_model_link_or_path,
|
pretrained_model_link_or_path,
|
||||||
class_name,
|
class_name,
|
||||||
original_config_file=None,
|
original_config_file=None,
|
||||||
resume_download=False,
|
resume_download=None,
|
||||||
force_download=False,
|
force_download=False,
|
||||||
proxies=None,
|
proxies=None,
|
||||||
token=None,
|
token=None,
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ TEXT_INVERSION_NAME_SAFE = "learned_embeds.safetensors"
|
|||||||
def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs):
|
def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs):
|
||||||
cache_dir = kwargs.pop("cache_dir", None)
|
cache_dir = kwargs.pop("cache_dir", None)
|
||||||
force_download = kwargs.pop("force_download", False)
|
force_download = kwargs.pop("force_download", False)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
local_files_only = kwargs.pop("local_files_only", None)
|
local_files_only = kwargs.pop("local_files_only", None)
|
||||||
token = kwargs.pop("token", None)
|
token = kwargs.pop("token", None)
|
||||||
@@ -308,9 +308,9 @@ class TextualInversionLoaderMixin:
|
|||||||
force_download (`bool`, *optional*, defaults to `False`):
|
force_download (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
||||||
cached versions if they exist.
|
cached versions if they exist.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -423,7 +423,11 @@ class TextualInversionLoaderMixin:
|
|||||||
if isinstance(component, nn.Module):
|
if isinstance(component, nn.Module):
|
||||||
if hasattr(component, "_hf_hook"):
|
if hasattr(component, "_hf_hook"):
|
||||||
is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
|
is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
|
||||||
is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
|
is_sequential_cpu_offload = (
|
||||||
|
isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
|
||||||
|
or hasattr(component._hf_hook, "hooks")
|
||||||
|
and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
|
||||||
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
"Accelerate hooks detected. Since you have called `load_textual_inversion()`, the previous hooks will be first removed. Then the textual inversion parameters will be loaded and the hooks will be applied again."
|
"Accelerate hooks detected. Since you have called `load_textual_inversion()`, the previous hooks will be first removed. Then the textual inversion parameters will be loaded and the hooks will be applied again."
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -103,9 +103,9 @@ class UNet2DConditionLoadersMixin:
|
|||||||
force_download (`bool`, *optional*, defaults to `False`):
|
force_download (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
||||||
cached versions if they exist.
|
cached versions if they exist.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -149,7 +149,7 @@ class UNet2DConditionLoadersMixin:
|
|||||||
|
|
||||||
cache_dir = kwargs.pop("cache_dir", None)
|
cache_dir = kwargs.pop("cache_dir", None)
|
||||||
force_download = kwargs.pop("force_download", False)
|
force_download = kwargs.pop("force_download", False)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
local_files_only = kwargs.pop("local_files_only", None)
|
local_files_only = kwargs.pop("local_files_only", None)
|
||||||
token = kwargs.pop("token", None)
|
token = kwargs.pop("token", None)
|
||||||
@@ -359,7 +359,11 @@ class UNet2DConditionLoadersMixin:
|
|||||||
for _, component in _pipeline.components.items():
|
for _, component in _pipeline.components.items():
|
||||||
if isinstance(component, nn.Module) and hasattr(component, "_hf_hook"):
|
if isinstance(component, nn.Module) and hasattr(component, "_hf_hook"):
|
||||||
is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
|
is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
|
||||||
is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
|
is_sequential_cpu_offload = (
|
||||||
|
isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
|
||||||
|
or hasattr(component._hf_hook, "hooks")
|
||||||
|
and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
|
||||||
|
)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
|
"Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
|
||||||
@@ -1086,9 +1090,9 @@ class FromOriginalUNetMixin:
|
|||||||
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
||||||
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
||||||
is not used.
|
is not used.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -1110,7 +1114,7 @@ class FromOriginalUNetMixin:
|
|||||||
raise ValueError("FromOriginalUNetMixin is currently only compatible with StableCascadeUNet")
|
raise ValueError("FromOriginalUNetMixin is currently only compatible with StableCascadeUNet")
|
||||||
|
|
||||||
config = kwargs.pop("config", None)
|
config = kwargs.pop("config", None)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
force_download = kwargs.pop("force_download", False)
|
force_download = kwargs.pop("force_download", False)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
token = kwargs.pop("token", None)
|
token = kwargs.pop("token", None)
|
||||||
|
|||||||
@@ -18,8 +18,12 @@ import torch.nn.functional as F
|
|||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
from ..utils import deprecate
|
from ..utils import deprecate
|
||||||
|
from ..utils.import_utils import is_torch_npu_available
|
||||||
|
|
||||||
|
|
||||||
|
if is_torch_npu_available():
|
||||||
|
import torch_npu
|
||||||
|
|
||||||
ACTIVATION_FUNCTIONS = {
|
ACTIVATION_FUNCTIONS = {
|
||||||
"swish": nn.SiLU(),
|
"swish": nn.SiLU(),
|
||||||
"silu": nn.SiLU(),
|
"silu": nn.SiLU(),
|
||||||
@@ -98,9 +102,13 @@ class GEGLU(nn.Module):
|
|||||||
if len(args) > 0 or kwargs.get("scale", None) is not None:
|
if len(args) > 0 or kwargs.get("scale", None) is not None:
|
||||||
deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`."
|
deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`."
|
||||||
deprecate("scale", "1.0.0", deprecation_message)
|
deprecate("scale", "1.0.0", deprecation_message)
|
||||||
|
hidden_states = self.proj(hidden_states)
|
||||||
hidden_states, gate = self.proj(hidden_states).chunk(2, dim=-1)
|
if is_torch_npu_available():
|
||||||
return hidden_states * self.gelu(gate)
|
# using torch_npu.npu_geglu can run faster and save memory on NPU.
|
||||||
|
return torch_npu.npu_geglu(hidden_states, dim=-1, approximate=1)[0]
|
||||||
|
else:
|
||||||
|
hidden_states, gate = hidden_states.chunk(2, dim=-1)
|
||||||
|
return hidden_states * self.gelu(gate)
|
||||||
|
|
||||||
|
|
||||||
class ApproximateGELU(nn.Module):
|
class ApproximateGELU(nn.Module):
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
import inspect
|
import inspect
|
||||||
|
import math
|
||||||
from importlib import import_module
|
from importlib import import_module
|
||||||
from typing import Callable, List, Optional, Union
|
from typing import Callable, List, Optional, Union
|
||||||
|
|
||||||
@@ -21,13 +22,15 @@ from torch import nn
|
|||||||
|
|
||||||
from ..image_processor import IPAdapterMaskProcessor
|
from ..image_processor import IPAdapterMaskProcessor
|
||||||
from ..utils import deprecate, logging
|
from ..utils import deprecate, logging
|
||||||
from ..utils.import_utils import is_xformers_available
|
from ..utils.import_utils import is_torch_npu_available, is_xformers_available
|
||||||
from ..utils.torch_utils import maybe_allow_in_graph
|
from ..utils.torch_utils import maybe_allow_in_graph
|
||||||
from .lora import LoRALinearLayer
|
from .lora import LoRALinearLayer
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||||
|
|
||||||
|
if is_torch_npu_available():
|
||||||
|
import torch_npu
|
||||||
|
|
||||||
if is_xformers_available():
|
if is_xformers_available():
|
||||||
import xformers
|
import xformers
|
||||||
@@ -209,6 +212,23 @@ class Attention(nn.Module):
|
|||||||
)
|
)
|
||||||
self.set_processor(processor)
|
self.set_processor(processor)
|
||||||
|
|
||||||
|
def set_use_npu_flash_attention(self, use_npu_flash_attention: bool) -> None:
|
||||||
|
r"""
|
||||||
|
Set whether to use npu flash attention from `torch_npu` or not.
|
||||||
|
|
||||||
|
"""
|
||||||
|
if use_npu_flash_attention:
|
||||||
|
processor = AttnProcessorNPU()
|
||||||
|
else:
|
||||||
|
# set attention processor
|
||||||
|
# We use the AttnProcessor2_0 by default when torch 2.x is used which uses
|
||||||
|
# torch.nn.functional.scaled_dot_product_attention for native Flash/memory_efficient_attention
|
||||||
|
# but only if it has the default `scale` argument. TODO remove scale_qk check when we move to torch 2.1
|
||||||
|
processor = (
|
||||||
|
AttnProcessor2_0() if hasattr(F, "scaled_dot_product_attention") and self.scale_qk else AttnProcessor()
|
||||||
|
)
|
||||||
|
self.set_processor(processor)
|
||||||
|
|
||||||
def set_use_memory_efficient_attention_xformers(
|
def set_use_memory_efficient_attention_xformers(
|
||||||
self, use_memory_efficient_attention_xformers: bool, attention_op: Optional[Callable] = None
|
self, use_memory_efficient_attention_xformers: bool, attention_op: Optional[Callable] = None
|
||||||
) -> None:
|
) -> None:
|
||||||
@@ -1207,6 +1227,116 @@ class XFormersAttnProcessor:
|
|||||||
return hidden_states
|
return hidden_states
|
||||||
|
|
||||||
|
|
||||||
|
class AttnProcessorNPU:
|
||||||
|
|
||||||
|
r"""
|
||||||
|
Processor for implementing flash attention using torch_npu. Torch_npu supports only fp16 and bf16 data types. If
|
||||||
|
fp32 is used, F.scaled_dot_product_attention will be used for computation, but the acceleration effect on NPU is
|
||||||
|
not significant.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
if not is_torch_npu_available():
|
||||||
|
raise ImportError("AttnProcessorNPU requires torch_npu extensions and is supported only on npu devices.")
|
||||||
|
|
||||||
|
def __call__(
|
||||||
|
self,
|
||||||
|
attn: Attention,
|
||||||
|
hidden_states: torch.FloatTensor,
|
||||||
|
encoder_hidden_states: Optional[torch.FloatTensor] = None,
|
||||||
|
attention_mask: Optional[torch.FloatTensor] = None,
|
||||||
|
temb: Optional[torch.FloatTensor] = None,
|
||||||
|
*args,
|
||||||
|
**kwargs,
|
||||||
|
) -> torch.FloatTensor:
|
||||||
|
if len(args) > 0 or kwargs.get("scale", None) is not None:
|
||||||
|
deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`."
|
||||||
|
deprecate("scale", "1.0.0", deprecation_message)
|
||||||
|
|
||||||
|
residual = hidden_states
|
||||||
|
if attn.spatial_norm is not None:
|
||||||
|
hidden_states = attn.spatial_norm(hidden_states, temb)
|
||||||
|
|
||||||
|
input_ndim = hidden_states.ndim
|
||||||
|
|
||||||
|
if input_ndim == 4:
|
||||||
|
batch_size, channel, height, width = hidden_states.shape
|
||||||
|
hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
|
||||||
|
|
||||||
|
batch_size, sequence_length, _ = (
|
||||||
|
hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
|
||||||
|
)
|
||||||
|
|
||||||
|
if attention_mask is not None:
|
||||||
|
attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
|
||||||
|
# scaled_dot_product_attention expects attention_mask shape to be
|
||||||
|
# (batch, heads, source_length, target_length)
|
||||||
|
attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
|
||||||
|
|
||||||
|
if attn.group_norm is not None:
|
||||||
|
hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
|
||||||
|
|
||||||
|
query = attn.to_q(hidden_states)
|
||||||
|
|
||||||
|
if encoder_hidden_states is None:
|
||||||
|
encoder_hidden_states = hidden_states
|
||||||
|
elif attn.norm_cross:
|
||||||
|
encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
|
||||||
|
|
||||||
|
key = attn.to_k(encoder_hidden_states)
|
||||||
|
value = attn.to_v(encoder_hidden_states)
|
||||||
|
|
||||||
|
inner_dim = key.shape[-1]
|
||||||
|
head_dim = inner_dim // attn.heads
|
||||||
|
|
||||||
|
query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
|
||||||
|
|
||||||
|
key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
|
||||||
|
value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
|
||||||
|
|
||||||
|
# the output of sdp = (batch, num_heads, seq_len, head_dim)
|
||||||
|
if query.dtype in (torch.float16, torch.bfloat16):
|
||||||
|
hidden_states = torch_npu.npu_fusion_attention(
|
||||||
|
query,
|
||||||
|
key,
|
||||||
|
value,
|
||||||
|
attn.heads,
|
||||||
|
input_layout="BNSD",
|
||||||
|
pse=None,
|
||||||
|
atten_mask=attention_mask,
|
||||||
|
scale=1.0 / math.sqrt(query.shape[-1]),
|
||||||
|
pre_tockens=65536,
|
||||||
|
next_tockens=65536,
|
||||||
|
keep_prob=1.0,
|
||||||
|
sync=False,
|
||||||
|
inner_precise=0,
|
||||||
|
)[0]
|
||||||
|
else:
|
||||||
|
# TODO: add support for attn.scale when we move to Torch 2.1
|
||||||
|
hidden_states = F.scaled_dot_product_attention(
|
||||||
|
query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
|
||||||
|
)
|
||||||
|
|
||||||
|
hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
|
||||||
|
hidden_states = hidden_states.to(query.dtype)
|
||||||
|
|
||||||
|
# linear proj
|
||||||
|
hidden_states = attn.to_out[0](hidden_states)
|
||||||
|
# dropout
|
||||||
|
hidden_states = attn.to_out[1](hidden_states)
|
||||||
|
|
||||||
|
if input_ndim == 4:
|
||||||
|
hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
|
||||||
|
|
||||||
|
if attn.residual_connection:
|
||||||
|
hidden_states = hidden_states + residual
|
||||||
|
|
||||||
|
hidden_states = hidden_states / attn.rescale_output_factor
|
||||||
|
|
||||||
|
return hidden_states
|
||||||
|
|
||||||
|
|
||||||
class AttnProcessor2_0:
|
class AttnProcessor2_0:
|
||||||
r"""
|
r"""
|
||||||
Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0).
|
Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0).
|
||||||
|
|||||||
@@ -245,9 +245,9 @@ class FlaxModelMixin(PushToHubMixin):
|
|||||||
force_download (`bool`, *optional*, defaults to `False`):
|
force_download (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
||||||
cached versions if they exist.
|
cached versions if they exist.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -296,7 +296,7 @@ class FlaxModelMixin(PushToHubMixin):
|
|||||||
cache_dir = kwargs.pop("cache_dir", None)
|
cache_dir = kwargs.pop("cache_dir", None)
|
||||||
force_download = kwargs.pop("force_download", False)
|
force_download = kwargs.pop("force_download", False)
|
||||||
from_pt = kwargs.pop("from_pt", False)
|
from_pt = kwargs.pop("from_pt", False)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
local_files_only = kwargs.pop("local_files_only", False)
|
local_files_only = kwargs.pop("local_files_only", False)
|
||||||
token = kwargs.pop("token", None)
|
token = kwargs.pop("token", None)
|
||||||
|
|||||||
@@ -272,6 +272,36 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
|||||||
if self._supports_gradient_checkpointing:
|
if self._supports_gradient_checkpointing:
|
||||||
self.apply(partial(self._set_gradient_checkpointing, value=False))
|
self.apply(partial(self._set_gradient_checkpointing, value=False))
|
||||||
|
|
||||||
|
def set_use_npu_flash_attention(self, valid: bool) -> None:
|
||||||
|
r"""
|
||||||
|
Set the switch for the npu flash attention.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def fn_recursive_set_npu_flash_attention(module: torch.nn.Module):
|
||||||
|
if hasattr(module, "set_use_npu_flash_attention"):
|
||||||
|
module.set_use_npu_flash_attention(valid)
|
||||||
|
|
||||||
|
for child in module.children():
|
||||||
|
fn_recursive_set_npu_flash_attention(child)
|
||||||
|
|
||||||
|
for module in self.children():
|
||||||
|
if isinstance(module, torch.nn.Module):
|
||||||
|
fn_recursive_set_npu_flash_attention(module)
|
||||||
|
|
||||||
|
def enable_npu_flash_attention(self) -> None:
|
||||||
|
r"""
|
||||||
|
Enable npu flash attention from torch_npu
|
||||||
|
|
||||||
|
"""
|
||||||
|
self.set_use_npu_flash_attention(True)
|
||||||
|
|
||||||
|
def disable_npu_flash_attention(self) -> None:
|
||||||
|
r"""
|
||||||
|
disable npu flash attention from torch_npu
|
||||||
|
|
||||||
|
"""
|
||||||
|
self.set_use_npu_flash_attention(False)
|
||||||
|
|
||||||
def set_use_memory_efficient_attention_xformers(
|
def set_use_memory_efficient_attention_xformers(
|
||||||
self, valid: bool, attention_op: Optional[Callable] = None
|
self, valid: bool, attention_op: Optional[Callable] = None
|
||||||
) -> None:
|
) -> None:
|
||||||
@@ -446,9 +476,9 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
|||||||
force_download (`bool`, *optional*, defaults to `False`):
|
force_download (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
||||||
cached versions if they exist.
|
cached versions if they exist.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -530,7 +560,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
|||||||
ignore_mismatched_sizes = kwargs.pop("ignore_mismatched_sizes", False)
|
ignore_mismatched_sizes = kwargs.pop("ignore_mismatched_sizes", False)
|
||||||
force_download = kwargs.pop("force_download", False)
|
force_download = kwargs.pop("force_download", False)
|
||||||
from_flax = kwargs.pop("from_flax", False)
|
from_flax = kwargs.pop("from_flax", False)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
output_loading_info = kwargs.pop("output_loading_info", False)
|
output_loading_info = kwargs.pop("output_loading_info", False)
|
||||||
local_files_only = kwargs.pop("local_files_only", None)
|
local_files_only = kwargs.pop("local_files_only", None)
|
||||||
|
|||||||
@@ -234,9 +234,9 @@ class AutoPipelineForText2Image(ConfigMixin):
|
|||||||
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
||||||
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
||||||
is not used.
|
is not used.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -311,7 +311,7 @@ class AutoPipelineForText2Image(ConfigMixin):
|
|||||||
"""
|
"""
|
||||||
cache_dir = kwargs.pop("cache_dir", None)
|
cache_dir = kwargs.pop("cache_dir", None)
|
||||||
force_download = kwargs.pop("force_download", False)
|
force_download = kwargs.pop("force_download", False)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
token = kwargs.pop("token", None)
|
token = kwargs.pop("token", None)
|
||||||
local_files_only = kwargs.pop("local_files_only", False)
|
local_files_only = kwargs.pop("local_files_only", False)
|
||||||
@@ -507,9 +507,9 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
|||||||
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
||||||
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
||||||
is not used.
|
is not used.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -584,7 +584,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
|||||||
"""
|
"""
|
||||||
cache_dir = kwargs.pop("cache_dir", None)
|
cache_dir = kwargs.pop("cache_dir", None)
|
||||||
force_download = kwargs.pop("force_download", False)
|
force_download = kwargs.pop("force_download", False)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
token = kwargs.pop("token", None)
|
token = kwargs.pop("token", None)
|
||||||
local_files_only = kwargs.pop("local_files_only", False)
|
local_files_only = kwargs.pop("local_files_only", False)
|
||||||
@@ -783,9 +783,9 @@ class AutoPipelineForInpainting(ConfigMixin):
|
|||||||
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
||||||
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
||||||
is not used.
|
is not used.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -860,7 +860,7 @@ class AutoPipelineForInpainting(ConfigMixin):
|
|||||||
"""
|
"""
|
||||||
cache_dir = kwargs.pop("cache_dir", None)
|
cache_dir = kwargs.pop("cache_dir", None)
|
||||||
force_download = kwargs.pop("force_download", False)
|
force_download = kwargs.pop("force_download", False)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
token = kwargs.pop("token", None)
|
token = kwargs.pop("token", None)
|
||||||
local_files_only = kwargs.pop("local_files_only", False)
|
local_files_only = kwargs.pop("local_files_only", False)
|
||||||
|
|||||||
@@ -227,6 +227,9 @@ class DiTPipeline(DiffusionPipeline):
|
|||||||
if output_type == "pil":
|
if output_type == "pil":
|
||||||
samples = self.numpy_to_pil(samples)
|
samples = self.numpy_to_pil(samples)
|
||||||
|
|
||||||
|
# Offload all models
|
||||||
|
self.maybe_free_model_hooks()
|
||||||
|
|
||||||
if not return_dict:
|
if not return_dict:
|
||||||
return (samples,)
|
return (samples,)
|
||||||
|
|
||||||
|
|||||||
@@ -254,9 +254,9 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
|
|||||||
force_download (`bool`, *optional*, defaults to `False`):
|
force_download (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
||||||
cached versions if they exist.
|
cached versions if they exist.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -316,7 +316,7 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
|
|||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
cache_dir = kwargs.pop("cache_dir", None)
|
cache_dir = kwargs.pop("cache_dir", None)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
local_files_only = kwargs.pop("local_files_only", False)
|
local_files_only = kwargs.pop("local_files_only", False)
|
||||||
token = kwargs.pop("token", None)
|
token = kwargs.pop("token", None)
|
||||||
|
|||||||
@@ -435,7 +435,7 @@ def _load_empty_model(
|
|||||||
return_unused_kwargs=True,
|
return_unused_kwargs=True,
|
||||||
return_commit_hash=True,
|
return_commit_hash=True,
|
||||||
force_download=kwargs.pop("force_download", False),
|
force_download=kwargs.pop("force_download", False),
|
||||||
resume_download=kwargs.pop("resume_download", False),
|
resume_download=kwargs.pop("resume_download", None),
|
||||||
proxies=kwargs.pop("proxies", None),
|
proxies=kwargs.pop("proxies", None),
|
||||||
local_files_only=kwargs.pop("local_files_only", False),
|
local_files_only=kwargs.pop("local_files_only", False),
|
||||||
token=kwargs.pop("token", None),
|
token=kwargs.pop("token", None),
|
||||||
@@ -454,7 +454,7 @@ def _load_empty_model(
|
|||||||
cached_folder,
|
cached_folder,
|
||||||
subfolder=name,
|
subfolder=name,
|
||||||
force_download=kwargs.pop("force_download", False),
|
force_download=kwargs.pop("force_download", False),
|
||||||
resume_download=kwargs.pop("resume_download", False),
|
resume_download=kwargs.pop("resume_download", None),
|
||||||
proxies=kwargs.pop("proxies", None),
|
proxies=kwargs.pop("proxies", None),
|
||||||
local_files_only=kwargs.pop("local_files_only", False),
|
local_files_only=kwargs.pop("local_files_only", False),
|
||||||
token=kwargs.pop("token", None),
|
token=kwargs.pop("token", None),
|
||||||
|
|||||||
@@ -376,7 +376,11 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
|||||||
if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
|
if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return hasattr(module, "_hf_hook") and isinstance(module._hf_hook, accelerate.hooks.AlignDevicesHook)
|
return hasattr(module, "_hf_hook") and (
|
||||||
|
isinstance(module._hf_hook, accelerate.hooks.AlignDevicesHook)
|
||||||
|
or hasattr(module._hf_hook, "hooks")
|
||||||
|
and isinstance(module._hf_hook.hooks[0], accelerate.hooks.AlignDevicesHook)
|
||||||
|
)
|
||||||
|
|
||||||
def module_is_offloaded(module):
|
def module_is_offloaded(module):
|
||||||
if not is_accelerate_available() or is_accelerate_version("<", "0.17.0.dev0"):
|
if not is_accelerate_available() or is_accelerate_version("<", "0.17.0.dev0"):
|
||||||
@@ -529,9 +533,9 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
|||||||
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
||||||
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
||||||
is not used.
|
is not used.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -621,7 +625,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
|||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
cache_dir = kwargs.pop("cache_dir", None)
|
cache_dir = kwargs.pop("cache_dir", None)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
force_download = kwargs.pop("force_download", False)
|
force_download = kwargs.pop("force_download", False)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
local_files_only = kwargs.pop("local_files_only", None)
|
local_files_only = kwargs.pop("local_files_only", None)
|
||||||
@@ -1005,8 +1009,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
|||||||
"""
|
"""
|
||||||
for _, model in self.components.items():
|
for _, model in self.components.items():
|
||||||
if isinstance(model, torch.nn.Module) and hasattr(model, "_hf_hook"):
|
if isinstance(model, torch.nn.Module) and hasattr(model, "_hf_hook"):
|
||||||
is_sequential_cpu_offload = isinstance(getattr(model, "_hf_hook"), accelerate.hooks.AlignDevicesHook)
|
accelerate.hooks.remove_hook_from_module(model, recurse=True)
|
||||||
accelerate.hooks.remove_hook_from_module(model, recurse=is_sequential_cpu_offload)
|
|
||||||
self._all_hooks = []
|
self._all_hooks = []
|
||||||
|
|
||||||
def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
|
def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
|
||||||
@@ -1213,9 +1216,9 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
|||||||
force_download (`bool`, *optional*, defaults to `False`):
|
force_download (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
||||||
cached versions if they exist.
|
cached versions if they exist.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
@@ -1268,7 +1271,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
cache_dir = kwargs.pop("cache_dir", None)
|
cache_dir = kwargs.pop("cache_dir", None)
|
||||||
resume_download = kwargs.pop("resume_download", False)
|
resume_download = kwargs.pop("resume_download", None)
|
||||||
force_download = kwargs.pop("force_download", False)
|
force_download = kwargs.pop("force_download", False)
|
||||||
proxies = kwargs.pop("proxies", None)
|
proxies = kwargs.pop("proxies", None)
|
||||||
local_files_only = kwargs.pop("local_files_only", None)
|
local_files_only = kwargs.pop("local_files_only", None)
|
||||||
|
|||||||
@@ -557,7 +557,7 @@ def convert_ldm_unet_checkpoint(
|
|||||||
paths, new_checkpoint, unet_state_dict, additional_replacements=[meta_path], config=config
|
paths, new_checkpoint, unet_state_dict, additional_replacements=[meta_path], config=config
|
||||||
)
|
)
|
||||||
|
|
||||||
output_block_list = {k: sorted(v) for k, v in output_block_list.items()}
|
output_block_list = {k: sorted(v) for k, v in sorted(output_block_list.items())}
|
||||||
if ["conv.bias", "conv.weight"] in output_block_list.values():
|
if ["conv.bias", "conv.weight"] in output_block_list.values():
|
||||||
index = list(output_block_list.values()).index(["conv.bias", "conv.weight"])
|
index = list(output_block_list.values()).index(["conv.bias", "conv.weight"])
|
||||||
new_checkpoint[f"up_blocks.{block_id}.upsamplers.0.conv.weight"] = unet_state_dict[
|
new_checkpoint[f"up_blocks.{block_id}.upsamplers.0.conv.weight"] = unet_state_dict[
|
||||||
|
|||||||
@@ -172,6 +172,7 @@ class StableDiffusionInstructPix2PixPipeline(
|
|||||||
prompt_embeds: Optional[torch.FloatTensor] = None,
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
||||||
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
|
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
|
||||||
ip_adapter_image: Optional[PipelineImageInput] = None,
|
ip_adapter_image: Optional[PipelineImageInput] = None,
|
||||||
|
ip_adapter_image_embeds: Optional[List[torch.FloatTensor]] = None,
|
||||||
output_type: Optional[str] = "pil",
|
output_type: Optional[str] = "pil",
|
||||||
return_dict: bool = True,
|
return_dict: bool = True,
|
||||||
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
||||||
@@ -296,6 +297,8 @@ class StableDiffusionInstructPix2PixPipeline(
|
|||||||
negative_prompt,
|
negative_prompt,
|
||||||
prompt_embeds,
|
prompt_embeds,
|
||||||
negative_prompt_embeds,
|
negative_prompt_embeds,
|
||||||
|
ip_adapter_image,
|
||||||
|
ip_adapter_image_embeds,
|
||||||
callback_on_step_end_tensor_inputs,
|
callback_on_step_end_tensor_inputs,
|
||||||
)
|
)
|
||||||
self._guidance_scale = guidance_scale
|
self._guidance_scale = guidance_scale
|
||||||
@@ -303,14 +306,6 @@ class StableDiffusionInstructPix2PixPipeline(
|
|||||||
|
|
||||||
device = self._execution_device
|
device = self._execution_device
|
||||||
|
|
||||||
if ip_adapter_image is not None:
|
|
||||||
output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
|
|
||||||
image_embeds, negative_image_embeds = self.encode_image(
|
|
||||||
ip_adapter_image, device, num_images_per_prompt, output_hidden_state
|
|
||||||
)
|
|
||||||
if self.do_classifier_free_guidance:
|
|
||||||
image_embeds = torch.cat([image_embeds, negative_image_embeds, negative_image_embeds])
|
|
||||||
|
|
||||||
if image is None:
|
if image is None:
|
||||||
raise ValueError("`image` input cannot be undefined.")
|
raise ValueError("`image` input cannot be undefined.")
|
||||||
|
|
||||||
@@ -335,6 +330,14 @@ class StableDiffusionInstructPix2PixPipeline(
|
|||||||
negative_prompt_embeds=negative_prompt_embeds,
|
negative_prompt_embeds=negative_prompt_embeds,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
|
||||||
|
image_embeds = self.prepare_ip_adapter_image_embeds(
|
||||||
|
ip_adapter_image,
|
||||||
|
ip_adapter_image_embeds,
|
||||||
|
device,
|
||||||
|
batch_size * num_images_per_prompt,
|
||||||
|
self.do_classifier_free_guidance,
|
||||||
|
)
|
||||||
# 3. Preprocess image
|
# 3. Preprocess image
|
||||||
image = self.image_processor.preprocess(image)
|
image = self.image_processor.preprocess(image)
|
||||||
|
|
||||||
@@ -635,6 +638,65 @@ class StableDiffusionInstructPix2PixPipeline(
|
|||||||
|
|
||||||
return image_embeds, uncond_image_embeds
|
return image_embeds, uncond_image_embeds
|
||||||
|
|
||||||
|
def prepare_ip_adapter_image_embeds(
|
||||||
|
self, ip_adapter_image, ip_adapter_image_embeds, device, num_images_per_prompt, do_classifier_free_guidance
|
||||||
|
):
|
||||||
|
if ip_adapter_image_embeds is None:
|
||||||
|
if not isinstance(ip_adapter_image, list):
|
||||||
|
ip_adapter_image = [ip_adapter_image]
|
||||||
|
|
||||||
|
if len(ip_adapter_image) != len(self.unet.encoder_hid_proj.image_projection_layers):
|
||||||
|
raise ValueError(
|
||||||
|
f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {len(self.unet.encoder_hid_proj.image_projection_layers)} IP Adapters."
|
||||||
|
)
|
||||||
|
|
||||||
|
image_embeds = []
|
||||||
|
for single_ip_adapter_image, image_proj_layer in zip(
|
||||||
|
ip_adapter_image, self.unet.encoder_hid_proj.image_projection_layers
|
||||||
|
):
|
||||||
|
output_hidden_state = not isinstance(image_proj_layer, ImageProjection)
|
||||||
|
single_image_embeds, single_negative_image_embeds = self.encode_image(
|
||||||
|
single_ip_adapter_image, device, 1, output_hidden_state
|
||||||
|
)
|
||||||
|
single_image_embeds = torch.stack([single_image_embeds] * num_images_per_prompt, dim=0)
|
||||||
|
single_negative_image_embeds = torch.stack(
|
||||||
|
[single_negative_image_embeds] * num_images_per_prompt, dim=0
|
||||||
|
)
|
||||||
|
|
||||||
|
if do_classifier_free_guidance:
|
||||||
|
single_image_embeds = torch.cat(
|
||||||
|
[single_image_embeds, single_negative_image_embeds, single_negative_image_embeds]
|
||||||
|
)
|
||||||
|
single_image_embeds = single_image_embeds.to(device)
|
||||||
|
|
||||||
|
image_embeds.append(single_image_embeds)
|
||||||
|
else:
|
||||||
|
repeat_dims = [1]
|
||||||
|
image_embeds = []
|
||||||
|
for single_image_embeds in ip_adapter_image_embeds:
|
||||||
|
if do_classifier_free_guidance:
|
||||||
|
(
|
||||||
|
single_image_embeds,
|
||||||
|
single_negative_image_embeds,
|
||||||
|
single_negative_image_embeds,
|
||||||
|
) = single_image_embeds.chunk(3)
|
||||||
|
single_image_embeds = single_image_embeds.repeat(
|
||||||
|
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||||
|
)
|
||||||
|
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||||
|
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||||
|
)
|
||||||
|
single_image_embeds = torch.cat(
|
||||||
|
[single_image_embeds, single_negative_image_embeds, single_negative_image_embeds]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
single_image_embeds = single_image_embeds.repeat(
|
||||||
|
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||||
|
)
|
||||||
|
image_embeds.append(single_image_embeds)
|
||||||
|
|
||||||
|
return image_embeds
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
|
||||||
def run_safety_checker(self, image, device, dtype):
|
def run_safety_checker(self, image, device, dtype):
|
||||||
if self.safety_checker is None:
|
if self.safety_checker is None:
|
||||||
@@ -687,6 +749,8 @@ class StableDiffusionInstructPix2PixPipeline(
|
|||||||
negative_prompt=None,
|
negative_prompt=None,
|
||||||
prompt_embeds=None,
|
prompt_embeds=None,
|
||||||
negative_prompt_embeds=None,
|
negative_prompt_embeds=None,
|
||||||
|
ip_adapter_image=None,
|
||||||
|
ip_adapter_image_embeds=None,
|
||||||
callback_on_step_end_tensor_inputs=None,
|
callback_on_step_end_tensor_inputs=None,
|
||||||
):
|
):
|
||||||
if callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0):
|
if callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0):
|
||||||
@@ -728,6 +792,21 @@ class StableDiffusionInstructPix2PixPipeline(
|
|||||||
f" {negative_prompt_embeds.shape}."
|
f" {negative_prompt_embeds.shape}."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if ip_adapter_image is not None and ip_adapter_image_embeds is not None:
|
||||||
|
raise ValueError(
|
||||||
|
"Provide either `ip_adapter_image` or `ip_adapter_image_embeds`. Cannot leave both `ip_adapter_image` and `ip_adapter_image_embeds` defined."
|
||||||
|
)
|
||||||
|
|
||||||
|
if ip_adapter_image_embeds is not None:
|
||||||
|
if not isinstance(ip_adapter_image_embeds, list):
|
||||||
|
raise ValueError(
|
||||||
|
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||||
|
)
|
||||||
|
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||||
|
raise ValueError(
|
||||||
|
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||||
|
)
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
||||||
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
||||||
shape = (
|
shape = (
|
||||||
|
|||||||
@@ -436,7 +436,6 @@ class StableDiffusionXLInstructPix2PixPipeline(
|
|||||||
extra_step_kwargs["generator"] = generator
|
extra_step_kwargs["generator"] = generator
|
||||||
return extra_step_kwargs
|
return extra_step_kwargs
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_instruct_pix2pix.StableDiffusionInstructPix2PixPipeline.check_inputs
|
|
||||||
def check_inputs(
|
def check_inputs(
|
||||||
self,
|
self,
|
||||||
prompt,
|
prompt,
|
||||||
|
|||||||
@@ -112,9 +112,9 @@ class SchedulerMixin(PushToHubMixin):
|
|||||||
force_download (`bool`, *optional*, defaults to `False`):
|
force_download (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
||||||
cached versions if they exist.
|
cached versions if they exist.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
incompletely downloaded files are deleted.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
|
|||||||
@@ -102,9 +102,9 @@ class FlaxSchedulerMixin(PushToHubMixin):
|
|||||||
force_download (`bool`, *optional*, defaults to `False`):
|
force_download (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
||||||
cached versions if they exist.
|
cached versions if they exist.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to delete incompletely received files. Will attempt to resume the download if such a
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
file exists.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||||
|
|||||||
@@ -201,7 +201,7 @@ def get_cached_module_file(
|
|||||||
module_file: str,
|
module_file: str,
|
||||||
cache_dir: Optional[Union[str, os.PathLike]] = None,
|
cache_dir: Optional[Union[str, os.PathLike]] = None,
|
||||||
force_download: bool = False,
|
force_download: bool = False,
|
||||||
resume_download: bool = False,
|
resume_download: Optional[bool] = None,
|
||||||
proxies: Optional[Dict[str, str]] = None,
|
proxies: Optional[Dict[str, str]] = None,
|
||||||
token: Optional[Union[bool, str]] = None,
|
token: Optional[Union[bool, str]] = None,
|
||||||
revision: Optional[str] = None,
|
revision: Optional[str] = None,
|
||||||
@@ -228,9 +228,9 @@ def get_cached_module_file(
|
|||||||
cache should not be used.
|
cache should not be used.
|
||||||
force_download (`bool`, *optional*, defaults to `False`):
|
force_download (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to force to (re-)download the configuration files and override the cached versions if they
|
Whether or not to force to (re-)download the configuration files and override the cached versions if they
|
||||||
exist.
|
exist. resume_download:
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
|
||||||
Whether or not to delete incompletely received file. Attempts to resume the download if such a file exists.
|
of Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
|
||||||
@@ -380,7 +380,7 @@ def get_class_from_dynamic_module(
|
|||||||
class_name: Optional[str] = None,
|
class_name: Optional[str] = None,
|
||||||
cache_dir: Optional[Union[str, os.PathLike]] = None,
|
cache_dir: Optional[Union[str, os.PathLike]] = None,
|
||||||
force_download: bool = False,
|
force_download: bool = False,
|
||||||
resume_download: bool = False,
|
resume_download: Optional[bool] = None,
|
||||||
proxies: Optional[Dict[str, str]] = None,
|
proxies: Optional[Dict[str, str]] = None,
|
||||||
token: Optional[Union[bool, str]] = None,
|
token: Optional[Union[bool, str]] = None,
|
||||||
revision: Optional[str] = None,
|
revision: Optional[str] = None,
|
||||||
@@ -417,8 +417,9 @@ def get_class_from_dynamic_module(
|
|||||||
force_download (`bool`, *optional*, defaults to `False`):
|
force_download (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to force to (re-)download the configuration files and override the cached versions if they
|
Whether or not to force to (re-)download the configuration files and override the cached versions if they
|
||||||
exist.
|
exist.
|
||||||
resume_download (`bool`, *optional*, defaults to `False`):
|
resume_download:
|
||||||
Whether or not to delete incompletely received file. Attempts to resume the download if such a file exists.
|
Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1 of
|
||||||
|
Diffusers.
|
||||||
proxies (`Dict[str, str]`, *optional*):
|
proxies (`Dict[str, str]`, *optional*):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
|
A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
|
||||||
'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
|
'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
|
||||||
|
|||||||
@@ -283,7 +283,7 @@ def _get_model_file(
|
|||||||
cache_dir: Optional[str] = None,
|
cache_dir: Optional[str] = None,
|
||||||
force_download: bool = False,
|
force_download: bool = False,
|
||||||
proxies: Optional[Dict] = None,
|
proxies: Optional[Dict] = None,
|
||||||
resume_download: bool = False,
|
resume_download: Optional[bool] = None,
|
||||||
local_files_only: bool = False,
|
local_files_only: bool = False,
|
||||||
token: Optional[str] = None,
|
token: Optional[str] = None,
|
||||||
user_agent: Optional[Union[Dict, str]] = None,
|
user_agent: Optional[Union[Dict, str]] = None,
|
||||||
|
|||||||
@@ -30,9 +30,14 @@ from huggingface_hub.utils import is_jinja_available
|
|||||||
from requests.exceptions import HTTPError
|
from requests.exceptions import HTTPError
|
||||||
|
|
||||||
from diffusers.models import UNet2DConditionModel
|
from diffusers.models import UNet2DConditionModel
|
||||||
from diffusers.models.attention_processor import AttnProcessor, AttnProcessor2_0, XFormersAttnProcessor
|
from diffusers.models.attention_processor import (
|
||||||
|
AttnProcessor,
|
||||||
|
AttnProcessor2_0,
|
||||||
|
AttnProcessorNPU,
|
||||||
|
XFormersAttnProcessor,
|
||||||
|
)
|
||||||
from diffusers.training_utils import EMAModel
|
from diffusers.training_utils import EMAModel
|
||||||
from diffusers.utils import is_xformers_available, logging
|
from diffusers.utils import is_torch_npu_available, is_xformers_available, logging
|
||||||
from diffusers.utils.testing_utils import (
|
from diffusers.utils.testing_utils import (
|
||||||
CaptureLogger,
|
CaptureLogger,
|
||||||
get_python_version,
|
get_python_version,
|
||||||
@@ -300,6 +305,53 @@ class ModelTesterMixin:
|
|||||||
|
|
||||||
assert str(error.exception) == f"'{type(model).__name__}' object has no attribute 'does_not_exist'"
|
assert str(error.exception) == f"'{type(model).__name__}' object has no attribute 'does_not_exist'"
|
||||||
|
|
||||||
|
@unittest.skipIf(
|
||||||
|
torch_device != "npu" or not is_torch_npu_available(),
|
||||||
|
reason="torch npu flash attention is only available with NPU and `torch_npu` installed",
|
||||||
|
)
|
||||||
|
def test_set_torch_npu_flash_attn_processor_determinism(self):
|
||||||
|
torch.use_deterministic_algorithms(False)
|
||||||
|
if self.forward_requires_fresh_args:
|
||||||
|
model = self.model_class(**self.init_dict)
|
||||||
|
else:
|
||||||
|
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
||||||
|
model = self.model_class(**init_dict)
|
||||||
|
model.to(torch_device)
|
||||||
|
|
||||||
|
if not hasattr(model, "set_attn_processor"):
|
||||||
|
# If not has `set_attn_processor`, skip test
|
||||||
|
return
|
||||||
|
|
||||||
|
model.set_default_attn_processor()
|
||||||
|
assert all(type(proc) == AttnProcessorNPU for proc in model.attn_processors.values())
|
||||||
|
with torch.no_grad():
|
||||||
|
if self.forward_requires_fresh_args:
|
||||||
|
output = model(**self.inputs_dict(0))[0]
|
||||||
|
else:
|
||||||
|
output = model(**inputs_dict)[0]
|
||||||
|
|
||||||
|
model.enable_npu_flash_attention()
|
||||||
|
assert all(type(proc) == AttnProcessorNPU for proc in model.attn_processors.values())
|
||||||
|
with torch.no_grad():
|
||||||
|
if self.forward_requires_fresh_args:
|
||||||
|
output_2 = model(**self.inputs_dict(0))[0]
|
||||||
|
else:
|
||||||
|
output_2 = model(**inputs_dict)[0]
|
||||||
|
|
||||||
|
model.set_attn_processor(AttnProcessorNPU())
|
||||||
|
assert all(type(proc) == AttnProcessorNPU for proc in model.attn_processors.values())
|
||||||
|
with torch.no_grad():
|
||||||
|
if self.forward_requires_fresh_args:
|
||||||
|
output_3 = model(**self.inputs_dict(0))[0]
|
||||||
|
else:
|
||||||
|
output_3 = model(**inputs_dict)[0]
|
||||||
|
|
||||||
|
torch.use_deterministic_algorithms(True)
|
||||||
|
|
||||||
|
assert torch.allclose(output, output_2, atol=self.base_precision)
|
||||||
|
assert torch.allclose(output, output_3, atol=self.base_precision)
|
||||||
|
assert torch.allclose(output_2, output_3, atol=self.base_precision)
|
||||||
|
|
||||||
@unittest.skipIf(
|
@unittest.skipIf(
|
||||||
torch_device != "cuda" or not is_xformers_available(),
|
torch_device != "cuda" or not is_xformers_available(),
|
||||||
reason="XFormers attention is only available with CUDA and `xformers` installed",
|
reason="XFormers attention is only available with CUDA and `xformers` installed",
|
||||||
@@ -691,6 +743,9 @@ class ModelTesterMixin:
|
|||||||
def test_cpu_offload(self):
|
def test_cpu_offload(self):
|
||||||
config, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
config, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
||||||
model = self.model_class(**config).eval()
|
model = self.model_class(**config).eval()
|
||||||
|
if model._no_split_modules is None:
|
||||||
|
return
|
||||||
|
|
||||||
model = model.to(torch_device)
|
model = model.to(torch_device)
|
||||||
|
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
@@ -718,6 +773,9 @@ class ModelTesterMixin:
|
|||||||
def test_disk_offload_without_safetensors(self):
|
def test_disk_offload_without_safetensors(self):
|
||||||
config, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
config, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
||||||
model = self.model_class(**config).eval()
|
model = self.model_class(**config).eval()
|
||||||
|
if model._no_split_modules is None:
|
||||||
|
return
|
||||||
|
|
||||||
model = model.to(torch_device)
|
model = model.to(torch_device)
|
||||||
|
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
@@ -728,12 +786,12 @@ class ModelTesterMixin:
|
|||||||
model.cpu().save_pretrained(tmp_dir, safe_serialization=False)
|
model.cpu().save_pretrained(tmp_dir, safe_serialization=False)
|
||||||
|
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
max_size = int(self.model_split_percents[1] * model_size)
|
max_size = int(self.model_split_percents[0] * model_size)
|
||||||
max_memory = {0: max_size, "cpu": max_size}
|
max_memory = {0: max_size, "cpu": max_size}
|
||||||
# This errors out because it's missing an offload folder
|
# This errors out because it's missing an offload folder
|
||||||
new_model = self.model_class.from_pretrained(tmp_dir, device_map="auto", max_memory=max_memory)
|
new_model = self.model_class.from_pretrained(tmp_dir, device_map="auto", max_memory=max_memory)
|
||||||
|
|
||||||
max_size = int(self.model_split_percents[1] * model_size)
|
max_size = int(self.model_split_percents[0] * model_size)
|
||||||
max_memory = {0: max_size, "cpu": max_size}
|
max_memory = {0: max_size, "cpu": max_size}
|
||||||
new_model = self.model_class.from_pretrained(
|
new_model = self.model_class.from_pretrained(
|
||||||
tmp_dir, device_map="auto", max_memory=max_memory, offload_folder=tmp_dir
|
tmp_dir, device_map="auto", max_memory=max_memory, offload_folder=tmp_dir
|
||||||
@@ -749,6 +807,9 @@ class ModelTesterMixin:
|
|||||||
def test_disk_offload_with_safetensors(self):
|
def test_disk_offload_with_safetensors(self):
|
||||||
config, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
config, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
||||||
model = self.model_class(**config).eval()
|
model = self.model_class(**config).eval()
|
||||||
|
if model._no_split_modules is None:
|
||||||
|
return
|
||||||
|
|
||||||
model = model.to(torch_device)
|
model = model.to(torch_device)
|
||||||
|
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
@@ -758,7 +819,7 @@ class ModelTesterMixin:
|
|||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
model.cpu().save_pretrained(tmp_dir)
|
model.cpu().save_pretrained(tmp_dir)
|
||||||
|
|
||||||
max_size = int(self.model_split_percents[1] * model_size)
|
max_size = int(self.model_split_percents[0] * model_size)
|
||||||
max_memory = {0: max_size, "cpu": max_size}
|
max_memory = {0: max_size, "cpu": max_size}
|
||||||
new_model = self.model_class.from_pretrained(
|
new_model = self.model_class.from_pretrained(
|
||||||
tmp_dir, device_map="auto", offload_folder=tmp_dir, max_memory=max_memory
|
tmp_dir, device_map="auto", offload_folder=tmp_dir, max_memory=max_memory
|
||||||
@@ -774,6 +835,9 @@ class ModelTesterMixin:
|
|||||||
def test_model_parallelism(self):
|
def test_model_parallelism(self):
|
||||||
config, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
config, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
||||||
model = self.model_class(**config).eval()
|
model = self.model_class(**config).eval()
|
||||||
|
if model._no_split_modules is None:
|
||||||
|
return
|
||||||
|
|
||||||
model = model.to(torch_device)
|
model = model.to(torch_device)
|
||||||
|
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
|
|||||||
@@ -66,16 +66,17 @@ class AudioLDMPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
|||||||
def get_dummy_components(self):
|
def get_dummy_components(self):
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
unet = UNet2DConditionModel(
|
unet = UNet2DConditionModel(
|
||||||
block_out_channels=(32, 64),
|
block_out_channels=(8, 16),
|
||||||
layers_per_block=2,
|
layers_per_block=1,
|
||||||
|
norm_num_groups=8,
|
||||||
sample_size=32,
|
sample_size=32,
|
||||||
in_channels=4,
|
in_channels=4,
|
||||||
out_channels=4,
|
out_channels=4,
|
||||||
down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
|
down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
|
||||||
up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
|
up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
|
||||||
cross_attention_dim=(32, 64),
|
cross_attention_dim=(8, 16),
|
||||||
class_embed_type="simple_projection",
|
class_embed_type="simple_projection",
|
||||||
projection_class_embeddings_input_dim=32,
|
projection_class_embeddings_input_dim=8,
|
||||||
class_embeddings_concat=True,
|
class_embeddings_concat=True,
|
||||||
)
|
)
|
||||||
scheduler = DDIMScheduler(
|
scheduler = DDIMScheduler(
|
||||||
@@ -87,9 +88,10 @@ class AudioLDMPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
|||||||
)
|
)
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
vae = AutoencoderKL(
|
vae = AutoencoderKL(
|
||||||
block_out_channels=[32, 64],
|
block_out_channels=[8, 16],
|
||||||
in_channels=1,
|
in_channels=1,
|
||||||
out_channels=1,
|
out_channels=1,
|
||||||
|
norm_num_groups=8,
|
||||||
down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
|
down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
|
||||||
up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
|
up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
|
||||||
latent_channels=4,
|
latent_channels=4,
|
||||||
@@ -98,14 +100,14 @@ class AudioLDMPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
|||||||
text_encoder_config = ClapTextConfig(
|
text_encoder_config = ClapTextConfig(
|
||||||
bos_token_id=0,
|
bos_token_id=0,
|
||||||
eos_token_id=2,
|
eos_token_id=2,
|
||||||
hidden_size=32,
|
hidden_size=8,
|
||||||
intermediate_size=37,
|
intermediate_size=37,
|
||||||
layer_norm_eps=1e-05,
|
layer_norm_eps=1e-05,
|
||||||
num_attention_heads=4,
|
num_attention_heads=1,
|
||||||
num_hidden_layers=5,
|
num_hidden_layers=1,
|
||||||
pad_token_id=1,
|
pad_token_id=1,
|
||||||
vocab_size=1000,
|
vocab_size=1000,
|
||||||
projection_dim=32,
|
projection_dim=8,
|
||||||
)
|
)
|
||||||
text_encoder = ClapTextModelWithProjection(text_encoder_config)
|
text_encoder = ClapTextModelWithProjection(text_encoder_config)
|
||||||
tokenizer = RobertaTokenizer.from_pretrained("hf-internal-testing/tiny-random-roberta", model_max_length=77)
|
tokenizer = RobertaTokenizer.from_pretrained("hf-internal-testing/tiny-random-roberta", model_max_length=77)
|
||||||
|
|||||||
@@ -64,9 +64,9 @@ class BlipDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
|||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
text_encoder_config = CLIPTextConfig(
|
text_encoder_config = CLIPTextConfig(
|
||||||
vocab_size=1000,
|
vocab_size=1000,
|
||||||
hidden_size=16,
|
hidden_size=8,
|
||||||
intermediate_size=16,
|
intermediate_size=8,
|
||||||
projection_dim=16,
|
projection_dim=8,
|
||||||
num_hidden_layers=1,
|
num_hidden_layers=1,
|
||||||
num_attention_heads=1,
|
num_attention_heads=1,
|
||||||
max_position_embeddings=77,
|
max_position_embeddings=77,
|
||||||
@@ -78,17 +78,17 @@ class BlipDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
|||||||
out_channels=4,
|
out_channels=4,
|
||||||
down_block_types=("DownEncoderBlock2D",),
|
down_block_types=("DownEncoderBlock2D",),
|
||||||
up_block_types=("UpDecoderBlock2D",),
|
up_block_types=("UpDecoderBlock2D",),
|
||||||
block_out_channels=(32,),
|
block_out_channels=(8,),
|
||||||
|
norm_num_groups=8,
|
||||||
layers_per_block=1,
|
layers_per_block=1,
|
||||||
act_fn="silu",
|
act_fn="silu",
|
||||||
latent_channels=4,
|
latent_channels=4,
|
||||||
norm_num_groups=16,
|
sample_size=8,
|
||||||
sample_size=16,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
blip_vision_config = {
|
blip_vision_config = {
|
||||||
"hidden_size": 16,
|
"hidden_size": 8,
|
||||||
"intermediate_size": 16,
|
"intermediate_size": 8,
|
||||||
"num_hidden_layers": 1,
|
"num_hidden_layers": 1,
|
||||||
"num_attention_heads": 1,
|
"num_attention_heads": 1,
|
||||||
"image_size": 224,
|
"image_size": 224,
|
||||||
@@ -98,32 +98,32 @@ class BlipDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
|||||||
|
|
||||||
blip_qformer_config = {
|
blip_qformer_config = {
|
||||||
"vocab_size": 1000,
|
"vocab_size": 1000,
|
||||||
"hidden_size": 16,
|
"hidden_size": 8,
|
||||||
"num_hidden_layers": 1,
|
"num_hidden_layers": 1,
|
||||||
"num_attention_heads": 1,
|
"num_attention_heads": 1,
|
||||||
"intermediate_size": 16,
|
"intermediate_size": 8,
|
||||||
"max_position_embeddings": 512,
|
"max_position_embeddings": 512,
|
||||||
"cross_attention_frequency": 1,
|
"cross_attention_frequency": 1,
|
||||||
"encoder_hidden_size": 16,
|
"encoder_hidden_size": 8,
|
||||||
}
|
}
|
||||||
qformer_config = Blip2Config(
|
qformer_config = Blip2Config(
|
||||||
vision_config=blip_vision_config,
|
vision_config=blip_vision_config,
|
||||||
qformer_config=blip_qformer_config,
|
qformer_config=blip_qformer_config,
|
||||||
num_query_tokens=16,
|
num_query_tokens=8,
|
||||||
tokenizer="hf-internal-testing/tiny-random-bert",
|
tokenizer="hf-internal-testing/tiny-random-bert",
|
||||||
)
|
)
|
||||||
qformer = Blip2QFormerModel(qformer_config)
|
qformer = Blip2QFormerModel(qformer_config)
|
||||||
|
|
||||||
unet = UNet2DConditionModel(
|
unet = UNet2DConditionModel(
|
||||||
block_out_channels=(16, 32),
|
block_out_channels=(8, 16),
|
||||||
norm_num_groups=16,
|
norm_num_groups=8,
|
||||||
layers_per_block=1,
|
layers_per_block=1,
|
||||||
sample_size=16,
|
sample_size=16,
|
||||||
in_channels=4,
|
in_channels=4,
|
||||||
out_channels=4,
|
out_channels=4,
|
||||||
down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
|
down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
|
||||||
up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
|
up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
|
||||||
cross_attention_dim=16,
|
cross_attention_dim=8,
|
||||||
)
|
)
|
||||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||||
|
|
||||||
@@ -189,7 +189,9 @@ class BlipDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
|||||||
|
|
||||||
assert image.shape == (1, 16, 16, 4)
|
assert image.shape == (1, 16, 16, 4)
|
||||||
|
|
||||||
expected_slice = np.array([0.7096, 0.5900, 0.6703, 0.4032, 0.7766, 0.3629, 0.5447, 0.4149, 0.8172])
|
expected_slice = np.array(
|
||||||
|
[0.5329548, 0.8372512, 0.33269387, 0.82096875, 0.43657133, 0.3783, 0.5953028, 0.51934963, 0.42142007]
|
||||||
|
)
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
|
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
|
||||||
|
|||||||
@@ -324,10 +324,6 @@ class PixArtAlphaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
|||||||
def test_inference_batch_single_identical(self):
|
def test_inference_batch_single_identical(self):
|
||||||
self._test_inference_batch_single_identical(expected_max_diff=1e-3)
|
self._test_inference_batch_single_identical(expected_max_diff=1e-3)
|
||||||
|
|
||||||
# PixArt transformer model does not work with sequential offload so skip it for now
|
|
||||||
def test_sequential_offload_forward_pass_twice(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
|
|||||||
@@ -308,10 +308,6 @@ class PixArtSigmaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
|||||||
def test_inference_batch_single_identical(self):
|
def test_inference_batch_single_identical(self):
|
||||||
self._test_inference_batch_single_identical(expected_max_diff=1e-3)
|
self._test_inference_batch_single_identical(expected_max_diff=1e-3)
|
||||||
|
|
||||||
# PixArt transformer model does not work with sequential offload so skip it for now
|
|
||||||
def test_sequential_offload_forward_pass_twice(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
|
|||||||
@@ -1257,8 +1257,8 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
|
|||||||
|
|
||||||
def test_download_from_hub(self):
|
def test_download_from_hub(self):
|
||||||
ckpt_paths = [
|
ckpt_paths = [
|
||||||
"https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt",
|
"https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors",
|
||||||
"https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix_base.ckpt",
|
"https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix.safetensors",
|
||||||
]
|
]
|
||||||
|
|
||||||
for ckpt_path in ckpt_paths:
|
for ckpt_path in ckpt_paths:
|
||||||
@@ -1271,7 +1271,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
|
|||||||
assert image_out.shape == (512, 512, 3)
|
assert image_out.shape == (512, 512, 3)
|
||||||
|
|
||||||
def test_download_local(self):
|
def test_download_local(self):
|
||||||
ckpt_filename = hf_hub_download("runwayml/stable-diffusion-v1-5", filename="v1-5-pruned-emaonly.ckpt")
|
ckpt_filename = hf_hub_download("runwayml/stable-diffusion-v1-5", filename="v1-5-pruned-emaonly.safetensors")
|
||||||
config_filename = hf_hub_download("runwayml/stable-diffusion-v1-5", filename="v1-inference.yaml")
|
config_filename = hf_hub_download("runwayml/stable-diffusion-v1-5", filename="v1-inference.yaml")
|
||||||
|
|
||||||
pipe = StableDiffusionPipeline.from_single_file(
|
pipe = StableDiffusionPipeline.from_single_file(
|
||||||
@@ -1285,7 +1285,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
|
|||||||
assert image_out.shape == (512, 512, 3)
|
assert image_out.shape == (512, 512, 3)
|
||||||
|
|
||||||
def test_download_ckpt_diff_format_is_same(self):
|
def test_download_ckpt_diff_format_is_same(self):
|
||||||
ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt"
|
ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
|
||||||
|
|
||||||
sf_pipe = StableDiffusionPipeline.from_single_file(ckpt_path)
|
sf_pipe = StableDiffusionPipeline.from_single_file(ckpt_path)
|
||||||
sf_pipe.scheduler = DDIMScheduler.from_config(sf_pipe.scheduler.config)
|
sf_pipe.scheduler = DDIMScheduler.from_config(sf_pipe.scheduler.config)
|
||||||
@@ -1310,7 +1310,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
|
|||||||
def test_single_file_component_configs(self):
|
def test_single_file_component_configs(self):
|
||||||
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
|
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
|
||||||
|
|
||||||
ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt"
|
ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
|
||||||
single_file_pipe = StableDiffusionPipeline.from_single_file(ckpt_path, load_safety_checker=True)
|
single_file_pipe = StableDiffusionPipeline.from_single_file(ckpt_path, load_safety_checker=True)
|
||||||
|
|
||||||
for param_name, param_value in single_file_pipe.text_encoder.config.to_dict().items():
|
for param_name, param_value in single_file_pipe.text_encoder.config.to_dict().items():
|
||||||
|
|||||||
@@ -1360,6 +1360,8 @@ class PipelineTesterMixin:
|
|||||||
reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
|
reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
|
||||||
)
|
)
|
||||||
def test_sequential_cpu_offload_forward_pass(self, expected_max_diff=1e-4):
|
def test_sequential_cpu_offload_forward_pass(self, expected_max_diff=1e-4):
|
||||||
|
import accelerate
|
||||||
|
|
||||||
components = self.get_dummy_components()
|
components = self.get_dummy_components()
|
||||||
pipe = self.pipeline_class(**components)
|
pipe = self.pipeline_class(**components)
|
||||||
for component in pipe.components.values():
|
for component in pipe.components.values():
|
||||||
@@ -1373,6 +1375,7 @@ class PipelineTesterMixin:
|
|||||||
output_without_offload = pipe(**inputs)[0]
|
output_without_offload = pipe(**inputs)[0]
|
||||||
|
|
||||||
pipe.enable_sequential_cpu_offload()
|
pipe.enable_sequential_cpu_offload()
|
||||||
|
assert pipe._execution_device.type == pipe._offload_device.type
|
||||||
|
|
||||||
inputs = self.get_dummy_inputs(generator_device)
|
inputs = self.get_dummy_inputs(generator_device)
|
||||||
output_with_offload = pipe(**inputs)[0]
|
output_with_offload = pipe(**inputs)[0]
|
||||||
@@ -1380,11 +1383,48 @@ class PipelineTesterMixin:
|
|||||||
max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max()
|
max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max()
|
||||||
self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results")
|
self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results")
|
||||||
|
|
||||||
|
# make sure all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are offloaded correctly
|
||||||
|
offloaded_modules = {
|
||||||
|
k: v
|
||||||
|
for k, v in pipe.components.items()
|
||||||
|
if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
|
||||||
|
}
|
||||||
|
# 1. all offloaded modules should be saved to cpu and moved to meta device
|
||||||
|
self.assertTrue(
|
||||||
|
all(v.device.type == "meta" for v in offloaded_modules.values()),
|
||||||
|
f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'meta']}",
|
||||||
|
)
|
||||||
|
# 2. all offloaded modules should have hook installed
|
||||||
|
self.assertTrue(
|
||||||
|
all(hasattr(v, "_hf_hook") for k, v in offloaded_modules.items()),
|
||||||
|
f"No hook attached: {[k for k, v in offloaded_modules.items() if not hasattr(v, '_hf_hook')]}",
|
||||||
|
)
|
||||||
|
# 3. all offloaded modules should have correct hooks installed, should be either one of these two
|
||||||
|
# - `AlignDevicesHook`
|
||||||
|
# - a SequentialHook` that contains `AlignDevicesHook`
|
||||||
|
offloaded_modules_with_incorrect_hooks = {}
|
||||||
|
for k, v in offloaded_modules.items():
|
||||||
|
if hasattr(v, "_hf_hook"):
|
||||||
|
if isinstance(v._hf_hook, accelerate.hooks.SequentialHook):
|
||||||
|
# if it is a `SequentialHook`, we loop through its `hooks` attribute to check if it only contains `AlignDevicesHook`
|
||||||
|
for hook in v._hf_hook.hooks:
|
||||||
|
if not isinstance(hook, accelerate.hooks.AlignDevicesHook):
|
||||||
|
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook.hooks[0])
|
||||||
|
elif not isinstance(v._hf_hook, accelerate.hooks.AlignDevicesHook):
|
||||||
|
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook)
|
||||||
|
|
||||||
|
self.assertTrue(
|
||||||
|
len(offloaded_modules_with_incorrect_hooks) == 0,
|
||||||
|
f"Not installed correct hook: {offloaded_modules_with_incorrect_hooks}",
|
||||||
|
)
|
||||||
|
|
||||||
@unittest.skipIf(
|
@unittest.skipIf(
|
||||||
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.17.0"),
|
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.17.0"),
|
||||||
reason="CPU offload is only available with CUDA and `accelerate v0.17.0` or higher",
|
reason="CPU offload is only available with CUDA and `accelerate v0.17.0` or higher",
|
||||||
)
|
)
|
||||||
def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4):
|
def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4):
|
||||||
|
import accelerate
|
||||||
|
|
||||||
generator_device = "cpu"
|
generator_device = "cpu"
|
||||||
components = self.get_dummy_components()
|
components = self.get_dummy_components()
|
||||||
pipe = self.pipeline_class(**components)
|
pipe = self.pipeline_class(**components)
|
||||||
@@ -1400,19 +1440,39 @@ class PipelineTesterMixin:
|
|||||||
output_without_offload = pipe(**inputs)[0]
|
output_without_offload = pipe(**inputs)[0]
|
||||||
|
|
||||||
pipe.enable_model_cpu_offload()
|
pipe.enable_model_cpu_offload()
|
||||||
|
assert pipe._execution_device.type == pipe._offload_device.type
|
||||||
|
|
||||||
inputs = self.get_dummy_inputs(generator_device)
|
inputs = self.get_dummy_inputs(generator_device)
|
||||||
output_with_offload = pipe(**inputs)[0]
|
output_with_offload = pipe(**inputs)[0]
|
||||||
|
|
||||||
max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max()
|
max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max()
|
||||||
self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results")
|
self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results")
|
||||||
offloaded_modules = [
|
|
||||||
v
|
# make sure all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are offloaded correctly
|
||||||
|
offloaded_modules = {
|
||||||
|
k: v
|
||||||
for k, v in pipe.components.items()
|
for k, v in pipe.components.items()
|
||||||
if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
|
if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
|
||||||
]
|
}
|
||||||
(
|
# 1. check if all offloaded modules are saved to cpu
|
||||||
self.assertTrue(all(v.device.type == "cpu" for v in offloaded_modules)),
|
self.assertTrue(
|
||||||
f"Not offloaded: {[v for v in offloaded_modules if v.device.type != 'cpu']}",
|
all(v.device.type == "cpu" for v in offloaded_modules.values()),
|
||||||
|
f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'cpu']}",
|
||||||
|
)
|
||||||
|
# 2. check if all offloaded modules have hooks installed
|
||||||
|
self.assertTrue(
|
||||||
|
all(hasattr(v, "_hf_hook") for k, v in offloaded_modules.items()),
|
||||||
|
f"No hook attached: {[k for k, v in offloaded_modules.items() if not hasattr(v, '_hf_hook')]}",
|
||||||
|
)
|
||||||
|
# 3. check if all offloaded modules have correct type of hooks installed, should be `CpuOffload`
|
||||||
|
offloaded_modules_with_incorrect_hooks = {}
|
||||||
|
for k, v in offloaded_modules.items():
|
||||||
|
if hasattr(v, "_hf_hook") and not isinstance(v._hf_hook, accelerate.hooks.CpuOffload):
|
||||||
|
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook)
|
||||||
|
|
||||||
|
self.assertTrue(
|
||||||
|
len(offloaded_modules_with_incorrect_hooks) == 0,
|
||||||
|
f"Not installed correct hook: {offloaded_modules_with_incorrect_hooks}",
|
||||||
)
|
)
|
||||||
|
|
||||||
@unittest.skipIf(
|
@unittest.skipIf(
|
||||||
@@ -1444,16 +1504,24 @@ class PipelineTesterMixin:
|
|||||||
self.assertLess(
|
self.assertLess(
|
||||||
max_diff, expected_max_diff, "running CPU offloading 2nd time should not affect the inference results"
|
max_diff, expected_max_diff, "running CPU offloading 2nd time should not affect the inference results"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# make sure all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are offloaded correctly
|
||||||
offloaded_modules = {
|
offloaded_modules = {
|
||||||
k: v
|
k: v
|
||||||
for k, v in pipe.components.items()
|
for k, v in pipe.components.items()
|
||||||
if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
|
if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
|
||||||
}
|
}
|
||||||
|
# 1. check if all offloaded modules are saved to cpu
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
all(v.device.type == "cpu" for v in offloaded_modules.values()),
|
all(v.device.type == "cpu" for v in offloaded_modules.values()),
|
||||||
f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'cpu']}",
|
f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'cpu']}",
|
||||||
)
|
)
|
||||||
|
# 2. check if all offloaded modules have hooks installed
|
||||||
|
self.assertTrue(
|
||||||
|
all(hasattr(v, "_hf_hook") for k, v in offloaded_modules.items()),
|
||||||
|
f"No hook attached: {[k for k, v in offloaded_modules.items() if not hasattr(v, '_hf_hook')]}",
|
||||||
|
)
|
||||||
|
# 3. check if all offloaded modules have correct type of hooks installed, should be `CpuOffload`
|
||||||
offloaded_modules_with_incorrect_hooks = {}
|
offloaded_modules_with_incorrect_hooks = {}
|
||||||
for k, v in offloaded_modules.items():
|
for k, v in offloaded_modules.items():
|
||||||
if hasattr(v, "_hf_hook") and not isinstance(v._hf_hook, accelerate.hooks.CpuOffload):
|
if hasattr(v, "_hf_hook") and not isinstance(v._hf_hook, accelerate.hooks.CpuOffload):
|
||||||
@@ -1493,19 +1561,36 @@ class PipelineTesterMixin:
|
|||||||
self.assertLess(
|
self.assertLess(
|
||||||
max_diff, expected_max_diff, "running sequential offloading second time should have the inference results"
|
max_diff, expected_max_diff, "running sequential offloading second time should have the inference results"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# make sure all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are offloaded correctly
|
||||||
offloaded_modules = {
|
offloaded_modules = {
|
||||||
k: v
|
k: v
|
||||||
for k, v in pipe.components.items()
|
for k, v in pipe.components.items()
|
||||||
if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
|
if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
|
||||||
}
|
}
|
||||||
|
# 1. check if all offloaded modules are moved to meta device
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
all(v.device.type == "meta" for v in offloaded_modules.values()),
|
all(v.device.type == "meta" for v in offloaded_modules.values()),
|
||||||
f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'meta']}",
|
f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'meta']}",
|
||||||
)
|
)
|
||||||
|
# 2. check if all offloaded modules have hook installed
|
||||||
|
self.assertTrue(
|
||||||
|
all(hasattr(v, "_hf_hook") for k, v in offloaded_modules.items()),
|
||||||
|
f"No hook attached: {[k for k, v in offloaded_modules.items() if not hasattr(v, '_hf_hook')]}",
|
||||||
|
)
|
||||||
|
# 3. check if all offloaded modules have correct hooks installed, should be either one of these two
|
||||||
|
# - `AlignDevicesHook`
|
||||||
|
# - a SequentialHook` that contains `AlignDevicesHook`
|
||||||
offloaded_modules_with_incorrect_hooks = {}
|
offloaded_modules_with_incorrect_hooks = {}
|
||||||
for k, v in offloaded_modules.items():
|
for k, v in offloaded_modules.items():
|
||||||
if hasattr(v, "_hf_hook") and not isinstance(v._hf_hook, accelerate.hooks.AlignDevicesHook):
|
if hasattr(v, "_hf_hook"):
|
||||||
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook)
|
if isinstance(v._hf_hook, accelerate.hooks.SequentialHook):
|
||||||
|
# if it is a `SequentialHook`, we loop through its `hooks` attribute to check if it only contains `AlignDevicesHook`
|
||||||
|
for hook in v._hf_hook.hooks:
|
||||||
|
if not isinstance(hook, accelerate.hooks.AlignDevicesHook):
|
||||||
|
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook.hooks[0])
|
||||||
|
elif not isinstance(v._hf_hook, accelerate.hooks.AlignDevicesHook):
|
||||||
|
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook)
|
||||||
|
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
len(offloaded_modules_with_incorrect_hooks) == 0,
|
len(offloaded_modules_with_incorrect_hooks) == 0,
|
||||||
|
|||||||
Reference in New Issue
Block a user