mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-08 05:24:20 +08:00
Compare commits
4 Commits
fast-gpu-m
...
rope-init-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
100166ed53 | ||
|
|
16704379a0 | ||
|
|
4bd87a1fe9 | ||
|
|
484443e0b4 |
3
.github/workflows/benchmark.yml
vendored
3
.github/workflows/benchmark.yml
vendored
@@ -7,7 +7,6 @@ on:
|
|||||||
|
|
||||||
env:
|
env:
|
||||||
DIFFUSERS_IS_CI: yes
|
DIFFUSERS_IS_CI: yes
|
||||||
HF_HUB_ENABLE_HF_TRANSFER: 1
|
|
||||||
HF_HOME: /mnt/cache
|
HF_HOME: /mnt/cache
|
||||||
OMP_NUM_THREADS: 8
|
OMP_NUM_THREADS: 8
|
||||||
MKL_NUM_THREADS: 8
|
MKL_NUM_THREADS: 8
|
||||||
@@ -51,7 +50,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: benchmark_test_reports
|
name: benchmark_test_reports
|
||||||
path: benchmarks/benchmark_outputs
|
path: benchmarks/benchmark_outputs
|
||||||
|
|||||||
3
.github/workflows/build_docker_images.yml
vendored
3
.github/workflows/build_docker_images.yml
vendored
@@ -34,7 +34,7 @@ jobs:
|
|||||||
id: file_changes
|
id: file_changes
|
||||||
uses: jitterbit/get-changed-files@v1
|
uses: jitterbit/get-changed-files@v1
|
||||||
with:
|
with:
|
||||||
format: "space-delimited"
|
format: 'space-delimited'
|
||||||
token: ${{ secrets.GITHUB_TOKEN }}
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
- name: Build Changed Docker Images
|
- name: Build Changed Docker Images
|
||||||
@@ -67,7 +67,6 @@ jobs:
|
|||||||
- diffusers-pytorch-cuda
|
- diffusers-pytorch-cuda
|
||||||
- diffusers-pytorch-compile-cuda
|
- diffusers-pytorch-compile-cuda
|
||||||
- diffusers-pytorch-xformers-cuda
|
- diffusers-pytorch-xformers-cuda
|
||||||
- diffusers-pytorch-minimum-cuda
|
|
||||||
- diffusers-flax-cpu
|
- diffusers-flax-cpu
|
||||||
- diffusers-flax-tpu
|
- diffusers-flax-tpu
|
||||||
- diffusers-onnxruntime-cpu
|
- diffusers-onnxruntime-cpu
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_COMMUNITY_MIRROR }}
|
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_COMMUNITY_MIRROR }}
|
||||||
|
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
# Checkout to correct ref
|
# Checkout to correct ref
|
||||||
# If workflow dispatch
|
# If workflow dispatch
|
||||||
|
|||||||
216
.github/workflows/nightly_tests.yml
vendored
216
.github/workflows/nightly_tests.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Pipeline Tests Artifacts
|
- name: Pipeline Tests Artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: test-pipelines.json
|
name: test-pipelines.json
|
||||||
path: reports
|
path: reports
|
||||||
@@ -72,14 +72,14 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
python -m uv pip install -e [quality,test]
|
python -m uv pip install -e [quality,test]
|
||||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
||||||
python -m uv pip install pytest-reportlog
|
python -m uv pip install pytest-reportlog
|
||||||
- name: Environment
|
- name: Environment
|
||||||
run: |
|
run: |
|
||||||
python utils/print_env.py
|
python utils/print_env.py
|
||||||
- name: Pipeline CUDA Test
|
- name: Pipeline CUDA Test
|
||||||
env:
|
env:
|
||||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
||||||
CUBLAS_WORKSPACE_CONFIG: :16:8
|
CUBLAS_WORKSPACE_CONFIG: :16:8
|
||||||
run: |
|
run: |
|
||||||
@@ -95,7 +95,7 @@ jobs:
|
|||||||
cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
|
cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: pipeline_${{ matrix.module }}_test_reports
|
name: pipeline_${{ matrix.module }}_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
@@ -130,8 +130,8 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
python -m uv pip install -e [quality,test]
|
python -m uv pip install -e [quality,test]
|
||||||
|
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
||||||
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
|
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
|
||||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
|
||||||
python -m uv pip install pytest-reportlog
|
python -m uv pip install pytest-reportlog
|
||||||
- name: Environment
|
- name: Environment
|
||||||
run: python utils/print_env.py
|
run: python utils/print_env.py
|
||||||
@@ -139,7 +139,7 @@ jobs:
|
|||||||
- name: Run nightly PyTorch CUDA tests for non-pipeline modules
|
- name: Run nightly PyTorch CUDA tests for non-pipeline modules
|
||||||
if: ${{ matrix.module != 'examples'}}
|
if: ${{ matrix.module != 'examples'}}
|
||||||
env:
|
env:
|
||||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
||||||
CUBLAS_WORKSPACE_CONFIG: :16:8
|
CUBLAS_WORKSPACE_CONFIG: :16:8
|
||||||
run: |
|
run: |
|
||||||
@@ -152,7 +152,7 @@ jobs:
|
|||||||
- name: Run nightly example tests with Torch
|
- name: Run nightly example tests with Torch
|
||||||
if: ${{ matrix.module == 'examples' }}
|
if: ${{ matrix.module == 'examples' }}
|
||||||
env:
|
env:
|
||||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
||||||
CUBLAS_WORKSPACE_CONFIG: :16:8
|
CUBLAS_WORKSPACE_CONFIG: :16:8
|
||||||
run: |
|
run: |
|
||||||
@@ -169,7 +169,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: torch_${{ matrix.module }}_cuda_test_reports
|
name: torch_${{ matrix.module }}_cuda_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
@@ -180,128 +180,14 @@ jobs:
|
|||||||
pip install slack_sdk tabulate
|
pip install slack_sdk tabulate
|
||||||
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
|
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
run_big_gpu_torch_tests:
|
|
||||||
name: Torch tests on big GPU
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
max-parallel: 2
|
|
||||||
runs-on:
|
|
||||||
group: aws-g6e-xlarge-plus
|
|
||||||
container:
|
|
||||||
image: diffusers/diffusers-pytorch-cuda
|
|
||||||
options: --shm-size "16gb" --ipc host --gpus 0
|
|
||||||
steps:
|
|
||||||
- name: Checkout diffusers
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
with:
|
|
||||||
fetch-depth: 2
|
|
||||||
- name: NVIDIA-SMI
|
|
||||||
run: nvidia-smi
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
|
||||||
python -m uv pip install -e [quality,test]
|
|
||||||
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
|
|
||||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
|
||||||
python -m uv pip install pytest-reportlog
|
|
||||||
- name: Environment
|
|
||||||
run: |
|
|
||||||
python utils/print_env.py
|
|
||||||
- name: Selected Torch CUDA Test on big GPU
|
|
||||||
env:
|
|
||||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
|
||||||
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
|
||||||
CUBLAS_WORKSPACE_CONFIG: :16:8
|
|
||||||
BIG_GPU_MEMORY: 40
|
|
||||||
run: |
|
|
||||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
|
||||||
-m "big_gpu_with_torch_cuda" \
|
|
||||||
--make-reports=tests_big_gpu_torch_cuda \
|
|
||||||
--report-log=tests_big_gpu_torch_cuda.log \
|
|
||||||
tests/
|
|
||||||
- name: Failure short reports
|
|
||||||
if: ${{ failure() }}
|
|
||||||
run: |
|
|
||||||
cat reports/tests_big_gpu_torch_cuda_stats.txt
|
|
||||||
cat reports/tests_big_gpu_torch_cuda_failures_short.txt
|
|
||||||
- name: Test suite reports artifacts
|
|
||||||
if: ${{ always() }}
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: torch_cuda_big_gpu_test_reports
|
|
||||||
path: reports
|
|
||||||
- name: Generate Report and Notify Channel
|
|
||||||
if: always()
|
|
||||||
run: |
|
|
||||||
pip install slack_sdk tabulate
|
|
||||||
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
|
|
||||||
|
|
||||||
torch_minimum_version_cuda_tests:
|
|
||||||
name: Torch Minimum Version CUDA Tests
|
|
||||||
runs-on:
|
|
||||||
group: aws-g4dn-2xlarge
|
|
||||||
container:
|
|
||||||
image: diffusers/diffusers-pytorch-minimum-cuda
|
|
||||||
options: --shm-size "16gb" --ipc host --gpus 0
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
steps:
|
|
||||||
- name: Checkout diffusers
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
with:
|
|
||||||
fetch-depth: 2
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
|
||||||
python -m uv pip install -e [quality,test]
|
|
||||||
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
|
|
||||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
|
||||||
|
|
||||||
- name: Environment
|
|
||||||
run: |
|
|
||||||
python utils/print_env.py
|
|
||||||
|
|
||||||
- name: Run PyTorch CUDA tests
|
|
||||||
env:
|
|
||||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
|
||||||
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
|
||||||
CUBLAS_WORKSPACE_CONFIG: :16:8
|
|
||||||
run: |
|
|
||||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
|
||||||
-s -v -k "not Flax and not Onnx" \
|
|
||||||
--make-reports=tests_torch_minimum_version_cuda \
|
|
||||||
tests/models/test_modeling_common.py \
|
|
||||||
tests/pipelines/test_pipelines_common.py \
|
|
||||||
tests/pipelines/test_pipeline_utils.py \
|
|
||||||
tests/pipelines/test_pipelines.py \
|
|
||||||
tests/pipelines/test_pipelines_auto.py \
|
|
||||||
tests/schedulers/test_schedulers.py \
|
|
||||||
tests/others
|
|
||||||
|
|
||||||
- name: Failure short reports
|
|
||||||
if: ${{ failure() }}
|
|
||||||
run: |
|
|
||||||
cat reports/tests_torch_minimum_version_cuda_stats.txt
|
|
||||||
cat reports/tests_torch_minimum_version_cuda_failures_short.txt
|
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
|
||||||
if: ${{ always() }}
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: torch_minimum_version_cuda_test_reports
|
|
||||||
path: reports
|
|
||||||
|
|
||||||
run_flax_tpu_tests:
|
run_flax_tpu_tests:
|
||||||
name: Nightly Flax TPU Tests
|
name: Nightly Flax TPU Tests
|
||||||
runs-on:
|
runs-on: docker-tpu
|
||||||
group: gcp-ct5lp-hightpu-8t
|
|
||||||
if: github.event_name == 'schedule'
|
if: github.event_name == 'schedule'
|
||||||
|
|
||||||
container:
|
container:
|
||||||
image: diffusers/diffusers-flax-tpu
|
image: diffusers/diffusers-flax-tpu
|
||||||
options: --shm-size "16gb" --ipc host --privileged ${{ vars.V5_LITEPOD_8_ENV}} -v /mnt/hf_cache:/mnt/hf_cache
|
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --privileged
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -315,7 +201,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
python -m uv pip install -e [quality,test]
|
python -m uv pip install -e [quality,test]
|
||||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
||||||
python -m uv pip install pytest-reportlog
|
python -m uv pip install pytest-reportlog
|
||||||
|
|
||||||
- name: Environment
|
- name: Environment
|
||||||
@@ -323,7 +209,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Run nightly Flax TPU tests
|
- name: Run nightly Flax TPU tests
|
||||||
env:
|
env:
|
||||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
run: |
|
run: |
|
||||||
python -m pytest -n 0 \
|
python -m pytest -n 0 \
|
||||||
-s -v -k "Flax" \
|
-s -v -k "Flax" \
|
||||||
@@ -339,7 +225,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: flax_tpu_test_reports
|
name: flax_tpu_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
@@ -371,14 +257,14 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
python -m uv pip install -e [quality,test]
|
python -m uv pip install -e [quality,test]
|
||||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
||||||
python -m uv pip install pytest-reportlog
|
python -m uv pip install pytest-reportlog
|
||||||
- name: Environment
|
- name: Environment
|
||||||
run: python utils/print_env.py
|
run: python utils/print_env.py
|
||||||
|
|
||||||
- name: Run Nightly ONNXRuntime CUDA tests
|
- name: Run Nightly ONNXRuntime CUDA tests
|
||||||
env:
|
env:
|
||||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
run: |
|
run: |
|
||||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||||
-s -v -k "Onnx" \
|
-s -v -k "Onnx" \
|
||||||
@@ -394,9 +280,9 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: tests_onnx_cuda_reports
|
name: ${{ matrix.config.report }}_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
- name: Generate Report and Notify Channel
|
- name: Generate Report and Notify Channel
|
||||||
@@ -405,68 +291,6 @@ jobs:
|
|||||||
pip install slack_sdk tabulate
|
pip install slack_sdk tabulate
|
||||||
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
|
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
run_nightly_quantization_tests:
|
|
||||||
name: Torch quantization nightly tests
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
max-parallel: 2
|
|
||||||
matrix:
|
|
||||||
config:
|
|
||||||
- backend: "bitsandbytes"
|
|
||||||
test_location: "bnb"
|
|
||||||
- backend: "gguf"
|
|
||||||
test_location: "gguf"
|
|
||||||
- backend: "torchao"
|
|
||||||
test_location: "torchao"
|
|
||||||
runs-on:
|
|
||||||
group: aws-g6e-xlarge-plus
|
|
||||||
container:
|
|
||||||
image: diffusers/diffusers-pytorch-cuda
|
|
||||||
options: --shm-size "20gb" --ipc host --gpus 0
|
|
||||||
steps:
|
|
||||||
- name: Checkout diffusers
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
with:
|
|
||||||
fetch-depth: 2
|
|
||||||
- name: NVIDIA-SMI
|
|
||||||
run: nvidia-smi
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
|
||||||
python -m uv pip install -e [quality,test]
|
|
||||||
python -m uv pip install -U ${{ matrix.config.backend }}
|
|
||||||
python -m uv pip install pytest-reportlog
|
|
||||||
- name: Environment
|
|
||||||
run: |
|
|
||||||
python utils/print_env.py
|
|
||||||
- name: ${{ matrix.config.backend }} quantization tests on GPU
|
|
||||||
env:
|
|
||||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
|
||||||
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
|
||||||
CUBLAS_WORKSPACE_CONFIG: :16:8
|
|
||||||
BIG_GPU_MEMORY: 40
|
|
||||||
run: |
|
|
||||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
|
||||||
--make-reports=tests_${{ matrix.config.backend }}_torch_cuda \
|
|
||||||
--report-log=tests_${{ matrix.config.backend }}_torch_cuda.log \
|
|
||||||
tests/quantization/${{ matrix.config.test_location }}
|
|
||||||
- name: Failure short reports
|
|
||||||
if: ${{ failure() }}
|
|
||||||
run: |
|
|
||||||
cat reports/tests_${{ matrix.config.backend }}_torch_cuda_stats.txt
|
|
||||||
cat reports/tests_${{ matrix.config.backend }}_torch_cuda_failures_short.txt
|
|
||||||
- name: Test suite reports artifacts
|
|
||||||
if: ${{ always() }}
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: torch_cuda_${{ matrix.config.backend }}_reports
|
|
||||||
path: reports
|
|
||||||
- name: Generate Report and Notify Channel
|
|
||||||
if: always()
|
|
||||||
run: |
|
|
||||||
pip install slack_sdk tabulate
|
|
||||||
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
|
|
||||||
|
|
||||||
# M1 runner currently not well supported
|
# M1 runner currently not well supported
|
||||||
# TODO: (Dhruv) add these back when we setup better testing for Apple Silicon
|
# TODO: (Dhruv) add these back when we setup better testing for Apple Silicon
|
||||||
# run_nightly_tests_apple_m1:
|
# run_nightly_tests_apple_m1:
|
||||||
@@ -516,7 +340,7 @@ jobs:
|
|||||||
#
|
#
|
||||||
# - name: Test suite reports artifacts
|
# - name: Test suite reports artifacts
|
||||||
# if: ${{ always() }}
|
# if: ${{ always() }}
|
||||||
# uses: actions/upload-artifact@v4
|
# uses: actions/upload-artifact@v2
|
||||||
# with:
|
# with:
|
||||||
# name: torch_mps_test_reports
|
# name: torch_mps_test_reports
|
||||||
# path: reports
|
# path: reports
|
||||||
@@ -572,7 +396,7 @@ jobs:
|
|||||||
#
|
#
|
||||||
# - name: Test suite reports artifacts
|
# - name: Test suite reports artifacts
|
||||||
# if: ${{ always() }}
|
# if: ${{ always() }}
|
||||||
# uses: actions/upload-artifact@v4
|
# uses: actions/upload-artifact@v2
|
||||||
# with:
|
# with:
|
||||||
# name: torch_mps_test_reports
|
# name: torch_mps_test_reports
|
||||||
# path: reports
|
# path: reports
|
||||||
@@ -581,4 +405,4 @@ jobs:
|
|||||||
# if: always()
|
# if: always()
|
||||||
# run: |
|
# run: |
|
||||||
# pip install slack_sdk tabulate
|
# pip install slack_sdk tabulate
|
||||||
# python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
|
# python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
|
||||||
@@ -7,7 +7,7 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
|||||||
2
.github/workflows/pr_dependency_test.yml
vendored
2
.github/workflows/pr_dependency_test.yml
vendored
@@ -16,7 +16,7 @@ concurrency:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
check_dependencies:
|
check_dependencies:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ concurrency:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
check_flax_dependencies:
|
check_flax_dependencies:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
|
|||||||
2
.github/workflows/pr_test_fetcher.yml
vendored
2
.github/workflows/pr_test_fetcher.yml
vendored
@@ -171,7 +171,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: pr_${{ matrix.config.report }}_test_reports
|
name: pr_${{ matrix.config.report }}_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|||||||
132
.github/workflows/pr_test_peft_backend.yml
vendored
Normal file
132
.github/workflows/pr_test_peft_backend.yml
vendored
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
name: Fast tests for PRs - PEFT backend
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
paths:
|
||||||
|
- "src/diffusers/**.py"
|
||||||
|
- "tests/**.py"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
DIFFUSERS_IS_CI: yes
|
||||||
|
OMP_NUM_THREADS: 4
|
||||||
|
MKL_NUM_THREADS: 4
|
||||||
|
PYTEST_TIMEOUT: 60
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
check_code_quality:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: "3.8"
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install .[quality]
|
||||||
|
- name: Check quality
|
||||||
|
run: make quality
|
||||||
|
- name: Check if failure
|
||||||
|
if: ${{ failure() }}
|
||||||
|
run: |
|
||||||
|
echo "Quality check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make style && make quality'" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
|
check_repository_consistency:
|
||||||
|
needs: check_code_quality
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: "3.8"
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install .[quality]
|
||||||
|
- name: Check repo consistency
|
||||||
|
run: |
|
||||||
|
python utils/check_copies.py
|
||||||
|
python utils/check_dummies.py
|
||||||
|
make deps_table_check_updated
|
||||||
|
- name: Check if failure
|
||||||
|
if: ${{ failure() }}
|
||||||
|
run: |
|
||||||
|
echo "Repo consistency check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make fix-copies'" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
|
run_fast_tests:
|
||||||
|
needs: [check_code_quality, check_repository_consistency]
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
lib-versions: ["main", "latest"]
|
||||||
|
|
||||||
|
|
||||||
|
name: LoRA - ${{ matrix.lib-versions }}
|
||||||
|
|
||||||
|
runs-on:
|
||||||
|
group: aws-general-8-plus
|
||||||
|
|
||||||
|
container:
|
||||||
|
image: diffusers/diffusers-pytorch-cpu
|
||||||
|
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
|
||||||
|
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: bash
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout diffusers
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
fetch-depth: 2
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
|
python -m uv pip install -e [quality,test]
|
||||||
|
if [ "${{ matrix.lib-versions }}" == "main" ]; then
|
||||||
|
python -m pip install -U peft@git+https://github.com/huggingface/peft.git
|
||||||
|
python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git
|
||||||
|
python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
||||||
|
else
|
||||||
|
python -m uv pip install -U peft transformers accelerate
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Environment
|
||||||
|
run: |
|
||||||
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
|
python utils/print_env.py
|
||||||
|
|
||||||
|
- name: Run fast PyTorch LoRA CPU tests with PEFT backend
|
||||||
|
run: |
|
||||||
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
|
python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
|
||||||
|
-s -v \
|
||||||
|
--make-reports=tests_${{ matrix.config.report }} \
|
||||||
|
tests/lora/
|
||||||
|
python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
|
||||||
|
-s -v \
|
||||||
|
--make-reports=tests_models_lora_${{ matrix.config.report }} \
|
||||||
|
tests/models/ -k "lora"
|
||||||
|
|
||||||
|
|
||||||
|
- name: Failure short reports
|
||||||
|
if: ${{ failure() }}
|
||||||
|
run: |
|
||||||
|
cat reports/tests_${{ matrix.config.report }}_failures_short.txt
|
||||||
|
cat reports/tests_models_lora_${{ matrix.config.report }}_failures_short.txt
|
||||||
|
|
||||||
|
- name: Test suite reports artifacts
|
||||||
|
if: ${{ always() }}
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
with:
|
||||||
|
name: pr_${{ matrix.config.report }}_test_reports
|
||||||
|
path: reports
|
||||||
76
.github/workflows/pr_tests.yml
vendored
76
.github/workflows/pr_tests.yml
vendored
@@ -22,14 +22,13 @@ concurrency:
|
|||||||
|
|
||||||
env:
|
env:
|
||||||
DIFFUSERS_IS_CI: yes
|
DIFFUSERS_IS_CI: yes
|
||||||
HF_HUB_ENABLE_HF_TRANSFER: 1
|
|
||||||
OMP_NUM_THREADS: 4
|
OMP_NUM_THREADS: 4
|
||||||
MKL_NUM_THREADS: 4
|
MKL_NUM_THREADS: 4
|
||||||
PYTEST_TIMEOUT: 60
|
PYTEST_TIMEOUT: 60
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
check_code_quality:
|
check_code_quality:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
@@ -49,7 +48,7 @@ jobs:
|
|||||||
|
|
||||||
check_repository_consistency:
|
check_repository_consistency:
|
||||||
needs: check_code_quality
|
needs: check_code_quality
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
@@ -169,9 +168,9 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: pr_${{ matrix.config.framework }}_${{ matrix.config.report }}_test_reports
|
name: pr_${{ matrix.config.report }}_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
run_staging_tests:
|
run_staging_tests:
|
||||||
@@ -230,72 +229,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: pr_${{ matrix.config.report }}_test_reports
|
name: pr_${{ matrix.config.report }}_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
run_lora_tests:
|
|
||||||
needs: [check_code_quality, check_repository_consistency]
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
|
|
||||||
name: LoRA tests with PEFT main
|
|
||||||
|
|
||||||
runs-on:
|
|
||||||
group: aws-general-8-plus
|
|
||||||
|
|
||||||
container:
|
|
||||||
image: diffusers/diffusers-pytorch-cpu
|
|
||||||
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
|
|
||||||
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout diffusers
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
with:
|
|
||||||
fetch-depth: 2
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
|
||||||
python -m uv pip install -e [quality,test]
|
|
||||||
# TODO (sayakpaul, DN6): revisit `--no-deps`
|
|
||||||
python -m pip install -U peft@git+https://github.com/huggingface/peft.git --no-deps
|
|
||||||
python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git --no-deps
|
|
||||||
python -m uv pip install -U tokenizers
|
|
||||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
|
|
||||||
|
|
||||||
- name: Environment
|
|
||||||
run: |
|
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
|
||||||
python utils/print_env.py
|
|
||||||
|
|
||||||
- name: Run fast PyTorch LoRA tests with PEFT
|
|
||||||
run: |
|
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
|
||||||
python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
|
|
||||||
-s -v \
|
|
||||||
--make-reports=tests_peft_main \
|
|
||||||
tests/lora/
|
|
||||||
python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
|
|
||||||
-s -v \
|
|
||||||
--make-reports=tests_models_lora_peft_main \
|
|
||||||
tests/models/ -k "lora"
|
|
||||||
|
|
||||||
- name: Failure short reports
|
|
||||||
if: ${{ failure() }}
|
|
||||||
run: |
|
|
||||||
cat reports/tests_lora_failures_short.txt
|
|
||||||
cat reports/tests_models_lora_failures_short.txt
|
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
|
||||||
if: ${{ always() }}
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: pr_main_test_reports
|
|
||||||
path: reports
|
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ concurrency:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
check_torch_dependencies:
|
check_torch_dependencies:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
|
|||||||
49
.github/workflows/push_tests.yml
vendored
49
.github/workflows/push_tests.yml
vendored
@@ -1,7 +1,6 @@
|
|||||||
name: Fast GPU Tests on main
|
name: Fast GPU Tests on main
|
||||||
|
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- main
|
- main
|
||||||
@@ -14,7 +13,6 @@ env:
|
|||||||
DIFFUSERS_IS_CI: yes
|
DIFFUSERS_IS_CI: yes
|
||||||
OMP_NUM_THREADS: 8
|
OMP_NUM_THREADS: 8
|
||||||
MKL_NUM_THREADS: 8
|
MKL_NUM_THREADS: 8
|
||||||
HF_HUB_ENABLE_HF_TRANSFER: 1
|
|
||||||
PYTEST_TIMEOUT: 600
|
PYTEST_TIMEOUT: 600
|
||||||
PIPELINE_USAGE_CUTOFF: 50000
|
PIPELINE_USAGE_CUTOFF: 50000
|
||||||
|
|
||||||
@@ -47,7 +45,7 @@ jobs:
|
|||||||
echo "pipeline_test_matrix=$matrix" >> $GITHUB_OUTPUT
|
echo "pipeline_test_matrix=$matrix" >> $GITHUB_OUTPUT
|
||||||
- name: Pipeline Tests Artifacts
|
- name: Pipeline Tests Artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: test-pipelines.json
|
name: test-pipelines.json
|
||||||
path: reports
|
path: reports
|
||||||
@@ -77,13 +75,13 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
python -m uv pip install -e [quality,test]
|
python -m uv pip install -e [quality,test]
|
||||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
||||||
- name: Environment
|
- name: Environment
|
||||||
run: |
|
run: |
|
||||||
python utils/print_env.py
|
python utils/print_env.py
|
||||||
- name: PyTorch CUDA checkpoint tests on Ubuntu
|
- name: Slow PyTorch CUDA checkpoint tests on Ubuntu
|
||||||
env:
|
env:
|
||||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
||||||
CUBLAS_WORKSPACE_CONFIG: :16:8
|
CUBLAS_WORKSPACE_CONFIG: :16:8
|
||||||
run: |
|
run: |
|
||||||
@@ -98,7 +96,7 @@ jobs:
|
|||||||
cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
|
cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: pipeline_${{ matrix.module }}_test_reports
|
name: pipeline_${{ matrix.module }}_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
@@ -128,8 +126,8 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
python -m uv pip install -e [quality,test]
|
python -m uv pip install -e [quality,test]
|
||||||
|
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
||||||
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
|
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
|
||||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
|
||||||
|
|
||||||
- name: Environment
|
- name: Environment
|
||||||
run: |
|
run: |
|
||||||
@@ -137,35 +135,34 @@ jobs:
|
|||||||
|
|
||||||
- name: Run PyTorch CUDA tests
|
- name: Run PyTorch CUDA tests
|
||||||
env:
|
env:
|
||||||
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
||||||
CUBLAS_WORKSPACE_CONFIG: :16:8
|
CUBLAS_WORKSPACE_CONFIG: :16:8
|
||||||
run: |
|
run: |
|
||||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||||
-s -v -k "not Flax and not Onnx" \
|
-s -v -k "not Flax and not Onnx" \
|
||||||
--make-reports=tests_torch_cuda_${{ matrix.module }} \
|
--make-reports=tests_torch_cuda \
|
||||||
tests/${{ matrix.module }}
|
tests/${{ matrix.module }}
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
run: |
|
run: |
|
||||||
cat reports/tests_torch_cuda_${{ matrix.module }}_stats.txt
|
cat reports/tests_torch_cuda_stats.txt
|
||||||
cat reports/tests_torch_cuda_${{ matrix.module }}_failures_short.txt
|
cat reports/tests_torch_cuda_failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: torch_cuda_test_reports_${{ matrix.module }}
|
name: torch_cuda_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
flax_tpu_tests:
|
flax_tpu_tests:
|
||||||
name: Flax TPU Tests
|
name: Flax TPU Tests
|
||||||
runs-on:
|
runs-on: docker-tpu
|
||||||
group: gcp-ct5lp-hightpu-8t
|
|
||||||
container:
|
container:
|
||||||
image: diffusers/diffusers-flax-tpu
|
image: diffusers/diffusers-flax-tpu
|
||||||
options: --shm-size "16gb" --ipc host --privileged ${{ vars.V5_LITEPOD_8_ENV}} -v /mnt/hf_cache:/mnt/hf_cache
|
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ --privileged
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -179,13 +176,13 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
python -m uv pip install -e [quality,test]
|
python -m uv pip install -e [quality,test]
|
||||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
||||||
|
|
||||||
- name: Environment
|
- name: Environment
|
||||||
run: |
|
run: |
|
||||||
python utils/print_env.py
|
python utils/print_env.py
|
||||||
|
|
||||||
- name: Run Flax TPU tests
|
- name: Run slow Flax TPU tests
|
||||||
env:
|
env:
|
||||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
run: |
|
run: |
|
||||||
@@ -202,7 +199,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: flax_tpu_test_reports
|
name: flax_tpu_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
@@ -227,13 +224,13 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
python -m uv pip install -e [quality,test]
|
python -m uv pip install -e [quality,test]
|
||||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
||||||
|
|
||||||
- name: Environment
|
- name: Environment
|
||||||
run: |
|
run: |
|
||||||
python utils/print_env.py
|
python utils/print_env.py
|
||||||
|
|
||||||
- name: Run ONNXRuntime CUDA tests
|
- name: Run slow ONNXRuntime CUDA tests
|
||||||
env:
|
env:
|
||||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
run: |
|
run: |
|
||||||
@@ -250,7 +247,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: onnx_cuda_test_reports
|
name: onnx_cuda_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
@@ -293,7 +290,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: torch_compile_test_reports
|
name: torch_compile_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
@@ -335,7 +332,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: torch_xformers_test_reports
|
name: torch_xformers_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
@@ -386,7 +383,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: examples_test_reports
|
name: examples_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|||||||
3
.github/workflows/push_tests_fast.yml
vendored
3
.github/workflows/push_tests_fast.yml
vendored
@@ -18,7 +18,6 @@ env:
|
|||||||
HF_HOME: /mnt/cache
|
HF_HOME: /mnt/cache
|
||||||
OMP_NUM_THREADS: 8
|
OMP_NUM_THREADS: 8
|
||||||
MKL_NUM_THREADS: 8
|
MKL_NUM_THREADS: 8
|
||||||
HF_HUB_ENABLE_HF_TRANSFER: 1
|
|
||||||
PYTEST_TIMEOUT: 600
|
PYTEST_TIMEOUT: 600
|
||||||
RUN_SLOW: no
|
RUN_SLOW: no
|
||||||
|
|
||||||
@@ -120,7 +119,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: pr_${{ matrix.config.report }}_test_reports
|
name: pr_${{ matrix.config.report }}_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|||||||
5
.github/workflows/push_tests_mps.yml
vendored
5
.github/workflows/push_tests_mps.yml
vendored
@@ -13,7 +13,6 @@ env:
|
|||||||
HF_HOME: /mnt/cache
|
HF_HOME: /mnt/cache
|
||||||
OMP_NUM_THREADS: 8
|
OMP_NUM_THREADS: 8
|
||||||
MKL_NUM_THREADS: 8
|
MKL_NUM_THREADS: 8
|
||||||
HF_HUB_ENABLE_HF_TRANSFER: 1
|
|
||||||
PYTEST_TIMEOUT: 600
|
PYTEST_TIMEOUT: 600
|
||||||
RUN_SLOW: no
|
RUN_SLOW: no
|
||||||
|
|
||||||
@@ -46,7 +45,7 @@ jobs:
|
|||||||
shell: arch -arch arm64 bash {0}
|
shell: arch -arch arm64 bash {0}
|
||||||
run: |
|
run: |
|
||||||
${CONDA_RUN} python -m pip install --upgrade pip uv
|
${CONDA_RUN} python -m pip install --upgrade pip uv
|
||||||
${CONDA_RUN} python -m uv pip install -e ".[quality,test]"
|
${CONDA_RUN} python -m uv pip install -e [quality,test]
|
||||||
${CONDA_RUN} python -m uv pip install torch torchvision torchaudio
|
${CONDA_RUN} python -m uv pip install torch torchvision torchaudio
|
||||||
${CONDA_RUN} python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
${CONDA_RUN} python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
||||||
${CONDA_RUN} python -m uv pip install transformers --upgrade
|
${CONDA_RUN} python -m uv pip install transformers --upgrade
|
||||||
@@ -70,7 +69,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: pr_torch_mps_test_reports
|
name: pr_torch_mps_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|||||||
6
.github/workflows/pypi_publish.yaml
vendored
6
.github/workflows/pypi_publish.yaml
vendored
@@ -10,7 +10,7 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
find-and-checkout-latest-branch:
|
find-and-checkout-latest-branch:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
outputs:
|
outputs:
|
||||||
latest_branch: ${{ steps.set_latest_branch.outputs.latest_branch }}
|
latest_branch: ${{ steps.set_latest_branch.outputs.latest_branch }}
|
||||||
steps:
|
steps:
|
||||||
@@ -36,7 +36,7 @@ jobs:
|
|||||||
|
|
||||||
release:
|
release:
|
||||||
needs: find-and-checkout-latest-branch
|
needs: find-and-checkout-latest-branch
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Repo
|
- name: Checkout Repo
|
||||||
@@ -68,7 +68,7 @@ jobs:
|
|||||||
- name: Test installing diffusers and importing
|
- name: Test installing diffusers and importing
|
||||||
run: |
|
run: |
|
||||||
pip install diffusers && pip uninstall diffusers -y
|
pip install diffusers && pip uninstall diffusers -y
|
||||||
pip install -i https://test.pypi.org/simple/ diffusers
|
pip install -i https://testpypi.python.org/pypi diffusers
|
||||||
python -c "from diffusers import __version__; print(__version__)"
|
python -c "from diffusers import __version__; print(__version__)"
|
||||||
python -c "from diffusers import DiffusionPipeline; pipe = DiffusionPipeline.from_pretrained('fusing/unet-ldm-dummy-update'); pipe()"
|
python -c "from diffusers import DiffusionPipeline; pipe = DiffusionPipeline.from_pretrained('fusing/unet-ldm-dummy-update'); pipe()"
|
||||||
python -c "from diffusers import DiffusionPipeline; pipe = DiffusionPipeline.from_pretrained('hf-internal-testing/tiny-stable-diffusion-pipe', safety_checker=None); pipe('ah suh du')"
|
python -c "from diffusers import DiffusionPipeline; pipe = DiffusionPipeline.from_pretrained('hf-internal-testing/tiny-stable-diffusion-pipe', safety_checker=None); pipe('ah suh du')"
|
||||||
|
|||||||
89
.github/workflows/release_tests_fast.yml
vendored
89
.github/workflows/release_tests_fast.yml
vendored
@@ -45,7 +45,7 @@ jobs:
|
|||||||
echo "pipeline_test_matrix=$matrix" >> $GITHUB_OUTPUT
|
echo "pipeline_test_matrix=$matrix" >> $GITHUB_OUTPUT
|
||||||
- name: Pipeline Tests Artifacts
|
- name: Pipeline Tests Artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: test-pipelines.json
|
name: test-pipelines.json
|
||||||
path: reports
|
path: reports
|
||||||
@@ -75,7 +75,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
python -m uv pip install -e [quality,test]
|
python -m uv pip install -e [quality,test]
|
||||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
||||||
- name: Environment
|
- name: Environment
|
||||||
run: |
|
run: |
|
||||||
python utils/print_env.py
|
python utils/print_env.py
|
||||||
@@ -96,7 +96,7 @@ jobs:
|
|||||||
cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
|
cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: pipeline_${{ matrix.module }}_test_reports
|
name: pipeline_${{ matrix.module }}_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
@@ -126,8 +126,8 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
python -m uv pip install -e [quality,test]
|
python -m uv pip install -e [quality,test]
|
||||||
|
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
||||||
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
|
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
|
||||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
|
||||||
|
|
||||||
- name: Environment
|
- name: Environment
|
||||||
run: |
|
run: |
|
||||||
@@ -141,79 +141,22 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||||
-s -v -k "not Flax and not Onnx" \
|
-s -v -k "not Flax and not Onnx" \
|
||||||
--make-reports=tests_torch_${{ matrix.module }}_cuda \
|
--make-reports=tests_torch_cuda \
|
||||||
tests/${{ matrix.module }}
|
tests/${{ matrix.module }}
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
run: |
|
run: |
|
||||||
cat reports/tests_torch_${{ matrix.module }}_cuda_stats.txt
|
cat reports/tests_torch_cuda_stats.txt
|
||||||
cat reports/tests_torch_${{ matrix.module }}_cuda_failures_short.txt
|
cat reports/tests_torch_cuda_failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: torch_cuda_${{ matrix.module }}_test_reports
|
name: torch_cuda_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
torch_minimum_version_cuda_tests:
|
|
||||||
name: Torch Minimum Version CUDA Tests
|
|
||||||
runs-on:
|
|
||||||
group: aws-g4dn-2xlarge
|
|
||||||
container:
|
|
||||||
image: diffusers/diffusers-pytorch-minimum-cuda
|
|
||||||
options: --shm-size "16gb" --ipc host --gpus 0
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
steps:
|
|
||||||
- name: Checkout diffusers
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
with:
|
|
||||||
fetch-depth: 2
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
|
||||||
python -m uv pip install -e [quality,test]
|
|
||||||
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
|
|
||||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
|
||||||
|
|
||||||
- name: Environment
|
|
||||||
run: |
|
|
||||||
python utils/print_env.py
|
|
||||||
|
|
||||||
- name: Run PyTorch CUDA tests
|
|
||||||
env:
|
|
||||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
|
||||||
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
|
||||||
CUBLAS_WORKSPACE_CONFIG: :16:8
|
|
||||||
run: |
|
|
||||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
|
||||||
-s -v -k "not Flax and not Onnx" \
|
|
||||||
--make-reports=tests_torch_minimum_cuda \
|
|
||||||
tests/models/test_modeling_common.py \
|
|
||||||
tests/pipelines/test_pipelines_common.py \
|
|
||||||
tests/pipelines/test_pipeline_utils.py \
|
|
||||||
tests/pipelines/test_pipelines.py \
|
|
||||||
tests/pipelines/test_pipelines_auto.py \
|
|
||||||
tests/schedulers/test_schedulers.py \
|
|
||||||
tests/others
|
|
||||||
|
|
||||||
- name: Failure short reports
|
|
||||||
if: ${{ failure() }}
|
|
||||||
run: |
|
|
||||||
cat reports/tests_torch_minimum_version_cuda_stats.txt
|
|
||||||
cat reports/tests_torch_minimum_version_cuda_failures_short.txt
|
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
|
||||||
if: ${{ always() }}
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: torch_minimum_version_cuda_test_reports
|
|
||||||
path: reports
|
|
||||||
|
|
||||||
flax_tpu_tests:
|
flax_tpu_tests:
|
||||||
name: Flax TPU Tests
|
name: Flax TPU Tests
|
||||||
runs-on: docker-tpu
|
runs-on: docker-tpu
|
||||||
@@ -233,7 +176,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
python -m uv pip install -e [quality,test]
|
python -m uv pip install -e [quality,test]
|
||||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
||||||
|
|
||||||
- name: Environment
|
- name: Environment
|
||||||
run: |
|
run: |
|
||||||
@@ -256,7 +199,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: flax_tpu_test_reports
|
name: flax_tpu_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
@@ -281,7 +224,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||||
python -m uv pip install -e [quality,test]
|
python -m uv pip install -e [quality,test]
|
||||||
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
|
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
||||||
|
|
||||||
- name: Environment
|
- name: Environment
|
||||||
run: |
|
run: |
|
||||||
@@ -304,7 +247,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: onnx_cuda_test_reports
|
name: onnx_cuda_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
@@ -347,7 +290,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: torch_compile_test_reports
|
name: torch_compile_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
@@ -389,7 +332,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: torch_xformers_test_reports
|
name: torch_xformers_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
@@ -440,7 +383,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: examples_test_reports
|
name: examples_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|||||||
7
.github/workflows/ssh-runner.yml
vendored
7
.github/workflows/ssh-runner.yml
vendored
@@ -4,13 +4,8 @@ on:
|
|||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
runner_type:
|
runner_type:
|
||||||
description: 'Type of runner to test (aws-g6-4xlarge-plus: a10, aws-g4dn-2xlarge: t4, aws-g6e-xlarge-plus: L40)'
|
description: 'Type of runner to test (a10 or t4)'
|
||||||
type: choice
|
|
||||||
required: true
|
required: true
|
||||||
options:
|
|
||||||
- aws-g6-4xlarge-plus
|
|
||||||
- aws-g4dn-2xlarge
|
|
||||||
- aws-g6e-xlarge-plus
|
|
||||||
docker_image:
|
docker_image:
|
||||||
description: 'Name of the Docker image'
|
description: 'Name of the Docker image'
|
||||||
required: true
|
required: true
|
||||||
|
|||||||
5
.github/workflows/stale.yml
vendored
5
.github/workflows/stale.yml
vendored
@@ -8,10 +8,7 @@ jobs:
|
|||||||
close_stale_issues:
|
close_stale_issues:
|
||||||
name: Close Stale Issues
|
name: Close Stale Issues
|
||||||
if: github.repository == 'huggingface/diffusers'
|
if: github.repository == 'huggingface/diffusers'
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
permissions:
|
|
||||||
issues: write
|
|
||||||
pull-requests: write
|
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
2
.github/workflows/trufflehog.yml
vendored
2
.github/workflows/trufflehog.yml
vendored
@@ -5,7 +5,7 @@ name: Secret Leaks
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
trufflehog:
|
trufflehog:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|||||||
2
.github/workflows/typos.yml
vendored
2
.github/workflows/typos.yml
vendored
@@ -5,7 +5,7 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ Pipelines are designed to be easy to use (therefore do not follow [*Simple over
|
|||||||
The following design principles are followed:
|
The following design principles are followed:
|
||||||
- Pipelines follow the single-file policy. All pipelines can be found in individual directories under src/diffusers/pipelines. One pipeline folder corresponds to one diffusion paper/project/release. Multiple pipeline files can be gathered in one pipeline folder, as it’s done for [`src/diffusers/pipelines/stable-diffusion`](https://github.com/huggingface/diffusers/tree/main/src/diffusers/pipelines/stable_diffusion). If pipelines share similar functionality, one can make use of the [# Copied from mechanism](https://github.com/huggingface/diffusers/blob/125d783076e5bd9785beb05367a2d2566843a271/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py#L251).
|
- Pipelines follow the single-file policy. All pipelines can be found in individual directories under src/diffusers/pipelines. One pipeline folder corresponds to one diffusion paper/project/release. Multiple pipeline files can be gathered in one pipeline folder, as it’s done for [`src/diffusers/pipelines/stable-diffusion`](https://github.com/huggingface/diffusers/tree/main/src/diffusers/pipelines/stable_diffusion). If pipelines share similar functionality, one can make use of the [# Copied from mechanism](https://github.com/huggingface/diffusers/blob/125d783076e5bd9785beb05367a2d2566843a271/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py#L251).
|
||||||
- Pipelines all inherit from [`DiffusionPipeline`].
|
- Pipelines all inherit from [`DiffusionPipeline`].
|
||||||
- Every pipeline consists of different model and scheduler components, that are documented in the [`model_index.json` file](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/model_index.json), are accessible under the same name as attributes of the pipeline and can be shared between pipelines with [`DiffusionPipeline.components`](https://huggingface.co/docs/diffusers/main/en/api/diffusion_pipeline#diffusers.DiffusionPipeline.components) function.
|
- Every pipeline consists of different model and scheduler components, that are documented in the [`model_index.json` file](https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/model_index.json), are accessible under the same name as attributes of the pipeline and can be shared between pipelines with [`DiffusionPipeline.components`](https://huggingface.co/docs/diffusers/main/en/api/diffusion_pipeline#diffusers.DiffusionPipeline.components) function.
|
||||||
- Every pipeline should be loadable via the [`DiffusionPipeline.from_pretrained`](https://huggingface.co/docs/diffusers/main/en/api/diffusion_pipeline#diffusers.DiffusionPipeline.from_pretrained) function.
|
- Every pipeline should be loadable via the [`DiffusionPipeline.from_pretrained`](https://huggingface.co/docs/diffusers/main/en/api/diffusion_pipeline#diffusers.DiffusionPipeline.from_pretrained) function.
|
||||||
- Pipelines should be used **only** for inference.
|
- Pipelines should be used **only** for inference.
|
||||||
- Pipelines should be very readable, self-explanatory, and easy to tweak.
|
- Pipelines should be very readable, self-explanatory, and easy to tweak.
|
||||||
|
|||||||
12
README.md
12
README.md
@@ -73,7 +73,7 @@ Generating outputs is super easy with 🤗 Diffusers. To generate an image from
|
|||||||
from diffusers import DiffusionPipeline
|
from diffusers import DiffusionPipeline
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
pipeline = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16)
|
pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
|
||||||
pipeline.to("cuda")
|
pipeline.to("cuda")
|
||||||
pipeline("An image of a squirrel in Picasso style").images[0]
|
pipeline("An image of a squirrel in Picasso style").images[0]
|
||||||
```
|
```
|
||||||
@@ -112,9 +112,9 @@ Check out the [Quickstart](https://huggingface.co/docs/diffusers/quicktour) to l
|
|||||||
| **Documentation** | **What can I learn?** |
|
| **Documentation** | **What can I learn?** |
|
||||||
|---------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|---------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
| [Tutorial](https://huggingface.co/docs/diffusers/tutorials/tutorial_overview) | A basic crash course for learning how to use the library's most important features like using models and schedulers to build your own diffusion system, and training your own diffusion model. |
|
| [Tutorial](https://huggingface.co/docs/diffusers/tutorials/tutorial_overview) | A basic crash course for learning how to use the library's most important features like using models and schedulers to build your own diffusion system, and training your own diffusion model. |
|
||||||
| [Loading](https://huggingface.co/docs/diffusers/using-diffusers/loading) | Guides for how to load and configure all the components (pipelines, models, and schedulers) of the library, as well as how to use different schedulers. |
|
| [Loading](https://huggingface.co/docs/diffusers/using-diffusers/loading_overview) | Guides for how to load and configure all the components (pipelines, models, and schedulers) of the library, as well as how to use different schedulers. |
|
||||||
| [Pipelines for inference](https://huggingface.co/docs/diffusers/using-diffusers/overview_techniques) | Guides for how to use pipelines for different inference tasks, batched generation, controlling generated outputs and randomness, and how to contribute a pipeline to the library. |
|
| [Pipelines for inference](https://huggingface.co/docs/diffusers/using-diffusers/pipeline_overview) | Guides for how to use pipelines for different inference tasks, batched generation, controlling generated outputs and randomness, and how to contribute a pipeline to the library. |
|
||||||
| [Optimization](https://huggingface.co/docs/diffusers/optimization/fp16) | Guides for how to optimize your diffusion model to run faster and consume less memory. |
|
| [Optimization](https://huggingface.co/docs/diffusers/optimization/opt_overview) | Guides for how to optimize your diffusion model to run faster and consume less memory. |
|
||||||
| [Training](https://huggingface.co/docs/diffusers/training/overview) | Guides for how to train a diffusion model for different tasks with different training techniques. |
|
| [Training](https://huggingface.co/docs/diffusers/training/overview) | Guides for how to train a diffusion model for different tasks with different training techniques. |
|
||||||
## Contribution
|
## Contribution
|
||||||
|
|
||||||
@@ -144,7 +144,7 @@ Also, say 👋 in our public Discord channel <a href="https://discord.gg/G7tWnz9
|
|||||||
<tr style="border-top: 2px solid black">
|
<tr style="border-top: 2px solid black">
|
||||||
<td>Text-to-Image</td>
|
<td>Text-to-Image</td>
|
||||||
<td><a href="https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/text2img">Stable Diffusion Text-to-Image</a></td>
|
<td><a href="https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/text2img">Stable Diffusion Text-to-Image</a></td>
|
||||||
<td><a href="https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5"> stable-diffusion-v1-5/stable-diffusion-v1-5 </a></td>
|
<td><a href="https://huggingface.co/runwayml/stable-diffusion-v1-5"> runwayml/stable-diffusion-v1-5 </a></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>Text-to-Image</td>
|
<td>Text-to-Image</td>
|
||||||
@@ -174,7 +174,7 @@ Also, say 👋 in our public Discord channel <a href="https://discord.gg/G7tWnz9
|
|||||||
<tr>
|
<tr>
|
||||||
<td>Text-guided Image-to-Image</td>
|
<td>Text-guided Image-to-Image</td>
|
||||||
<td><a href="https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/img2img">Stable Diffusion Image-to-Image</a></td>
|
<td><a href="https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/img2img">Stable Diffusion Image-to-Image</a></td>
|
||||||
<td><a href="https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5"> stable-diffusion-v1-5/stable-diffusion-v1-5 </a></td>
|
<td><a href="https://huggingface.co/runwayml/stable-diffusion-v1-5"> runwayml/stable-diffusion-v1-5 </a></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr style="border-top: 2px solid black">
|
<tr style="border-top: 2px solid black">
|
||||||
<td>Text-guided Image Inpainting</td>
|
<td>Text-guided Image Inpainting</td>
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ from utils import ( # noqa: E402
|
|||||||
|
|
||||||
|
|
||||||
RESOLUTION_MAPPING = {
|
RESOLUTION_MAPPING = {
|
||||||
"Lykon/DreamShaper": (512, 512),
|
"runwayml/stable-diffusion-v1-5": (512, 512),
|
||||||
"lllyasviel/sd-controlnet-canny": (512, 512),
|
"lllyasviel/sd-controlnet-canny": (512, 512),
|
||||||
"diffusers/controlnet-canny-sdxl-1.0": (1024, 1024),
|
"diffusers/controlnet-canny-sdxl-1.0": (1024, 1024),
|
||||||
"TencentARC/t2iadapter_canny_sd14v1": (512, 512),
|
"TencentARC/t2iadapter_canny_sd14v1": (512, 512),
|
||||||
@@ -268,7 +268,7 @@ class IPAdapterTextToImageBenchmark(TextToImageBenchmark):
|
|||||||
class ControlNetBenchmark(TextToImageBenchmark):
|
class ControlNetBenchmark(TextToImageBenchmark):
|
||||||
pipeline_class = StableDiffusionControlNetPipeline
|
pipeline_class = StableDiffusionControlNetPipeline
|
||||||
aux_network_class = ControlNetModel
|
aux_network_class = ControlNetModel
|
||||||
root_ckpt = "Lykon/DreamShaper"
|
root_ckpt = "runwayml/stable-diffusion-v1-5"
|
||||||
|
|
||||||
url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/canny_image_condition.png"
|
url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/canny_image_condition.png"
|
||||||
image = load_image(url).convert("RGB")
|
image = load_image(url).convert("RGB")
|
||||||
@@ -311,7 +311,7 @@ class ControlNetSDXLBenchmark(ControlNetBenchmark):
|
|||||||
class T2IAdapterBenchmark(ControlNetBenchmark):
|
class T2IAdapterBenchmark(ControlNetBenchmark):
|
||||||
pipeline_class = StableDiffusionAdapterPipeline
|
pipeline_class = StableDiffusionAdapterPipeline
|
||||||
aux_network_class = T2IAdapter
|
aux_network_class = T2IAdapter
|
||||||
root_ckpt = "Lykon/DreamShaper"
|
root_ckpt = "CompVis/stable-diffusion-v1-4"
|
||||||
|
|
||||||
url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/canny_for_adapter.png"
|
url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/canny_for_adapter.png"
|
||||||
image = load_image(url).convert("L")
|
image = load_image(url).convert("L")
|
||||||
|
|||||||
@@ -7,8 +7,7 @@ from base_classes import IPAdapterTextToImageBenchmark # noqa: E402
|
|||||||
|
|
||||||
|
|
||||||
IP_ADAPTER_CKPTS = {
|
IP_ADAPTER_CKPTS = {
|
||||||
# because original SD v1.5 has been taken down.
|
"runwayml/stable-diffusion-v1-5": ("h94/IP-Adapter", "ip-adapter_sd15.bin"),
|
||||||
"Lykon/DreamShaper": ("h94/IP-Adapter", "ip-adapter_sd15.bin"),
|
|
||||||
"stabilityai/stable-diffusion-xl-base-1.0": ("h94/IP-Adapter", "ip-adapter_sdxl.bin"),
|
"stabilityai/stable-diffusion-xl-base-1.0": ("h94/IP-Adapter", "ip-adapter_sdxl.bin"),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -18,7 +17,7 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--ckpt",
|
"--ckpt",
|
||||||
type=str,
|
type=str,
|
||||||
default="rstabilityai/stable-diffusion-xl-base-1.0",
|
default="runwayml/stable-diffusion-v1-5",
|
||||||
choices=list(IP_ADAPTER_CKPTS.keys()),
|
choices=list(IP_ADAPTER_CKPTS.keys()),
|
||||||
)
|
)
|
||||||
parser.add_argument("--batch_size", type=int, default=1)
|
parser.add_argument("--batch_size", type=int, default=1)
|
||||||
|
|||||||
@@ -11,9 +11,9 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--ckpt",
|
"--ckpt",
|
||||||
type=str,
|
type=str,
|
||||||
default="Lykon/DreamShaper",
|
default="runwayml/stable-diffusion-v1-5",
|
||||||
choices=[
|
choices=[
|
||||||
"Lykon/DreamShaper",
|
"runwayml/stable-diffusion-v1-5",
|
||||||
"stabilityai/stable-diffusion-2-1",
|
"stabilityai/stable-diffusion-2-1",
|
||||||
"stabilityai/stable-diffusion-xl-refiner-1.0",
|
"stabilityai/stable-diffusion-xl-refiner-1.0",
|
||||||
"stabilityai/sdxl-turbo",
|
"stabilityai/sdxl-turbo",
|
||||||
|
|||||||
@@ -11,9 +11,9 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--ckpt",
|
"--ckpt",
|
||||||
type=str,
|
type=str,
|
||||||
default="Lykon/DreamShaper",
|
default="runwayml/stable-diffusion-v1-5",
|
||||||
choices=[
|
choices=[
|
||||||
"Lykon/DreamShaper",
|
"runwayml/stable-diffusion-v1-5",
|
||||||
"stabilityai/stable-diffusion-2-1",
|
"stabilityai/stable-diffusion-2-1",
|
||||||
"stabilityai/stable-diffusion-xl-base-1.0",
|
"stabilityai/stable-diffusion-xl-base-1.0",
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from base_classes import TextToImageBenchmark, TurboTextToImageBenchmark # noqa
|
|||||||
|
|
||||||
|
|
||||||
ALL_T2I_CKPTS = [
|
ALL_T2I_CKPTS = [
|
||||||
"Lykon/DreamShaper",
|
"runwayml/stable-diffusion-v1-5",
|
||||||
"segmind/SSD-1B",
|
"segmind/SSD-1B",
|
||||||
"stabilityai/stable-diffusion-xl-base-1.0",
|
"stabilityai/stable-diffusion-xl-base-1.0",
|
||||||
"kandinsky-community/kandinsky-2-2-decoder",
|
"kandinsky-community/kandinsky-2-2-decoder",
|
||||||
@@ -21,7 +21,7 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--ckpt",
|
"--ckpt",
|
||||||
type=str,
|
type=str,
|
||||||
default="Lykon/DreamShaper",
|
default="runwayml/stable-diffusion-v1-5",
|
||||||
choices=ALL_T2I_CKPTS,
|
choices=ALL_T2I_CKPTS,
|
||||||
)
|
)
|
||||||
parser.add_argument("--batch_size", type=int, default=1)
|
parser.add_argument("--batch_size", type=int, default=1)
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import sys
|
|||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from huggingface_hub import hf_hub_download, upload_file
|
from huggingface_hub import hf_hub_download, upload_file
|
||||||
from huggingface_hub.utils import EntryNotFoundError
|
from huggingface_hub.utils._errors import EntryNotFoundError
|
||||||
|
|
||||||
|
|
||||||
sys.path.append(".")
|
sys.path.append(".")
|
||||||
|
|||||||
@@ -43,7 +43,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
|||||||
numpy==1.26.4 \
|
numpy==1.26.4 \
|
||||||
scipy \
|
scipy \
|
||||||
tensorboard \
|
tensorboard \
|
||||||
transformers \
|
transformers
|
||||||
hf_transfer
|
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
@@ -45,7 +45,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
|||||||
numpy==1.26.4 \
|
numpy==1.26.4 \
|
||||||
scipy \
|
scipy \
|
||||||
tensorboard \
|
tensorboard \
|
||||||
transformers \
|
transformers
|
||||||
hf_transfer
|
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
@@ -43,7 +43,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
|||||||
numpy==1.26.4 \
|
numpy==1.26.4 \
|
||||||
scipy \
|
scipy \
|
||||||
tensorboard \
|
tensorboard \
|
||||||
transformers \
|
transformers
|
||||||
hf_transfer
|
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
@@ -44,7 +44,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
|||||||
numpy==1.26.4 \
|
numpy==1.26.4 \
|
||||||
scipy \
|
scipy \
|
||||||
tensorboard \
|
tensorboard \
|
||||||
transformers \
|
transformers
|
||||||
hf_transfer
|
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
@@ -44,7 +44,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
|||||||
numpy==1.26.4 \
|
numpy==1.26.4 \
|
||||||
scipy \
|
scipy \
|
||||||
tensorboard \
|
tensorboard \
|
||||||
transformers \
|
transformers
|
||||||
hf_transfer
|
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
|
|||||||
@@ -44,7 +44,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
|||||||
numpy==1.26.4 \
|
numpy==1.26.4 \
|
||||||
scipy \
|
scipy \
|
||||||
tensorboard \
|
tensorboard \
|
||||||
transformers matplotlib \
|
transformers matplotlib
|
||||||
hf_transfer
|
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
|
|||||||
@@ -45,7 +45,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
|||||||
scipy \
|
scipy \
|
||||||
tensorboard \
|
tensorboard \
|
||||||
transformers \
|
transformers \
|
||||||
pytorch-lightning \
|
pytorch-lightning
|
||||||
hf_transfer
|
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
|
|||||||
@@ -1,53 +0,0 @@
|
|||||||
FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
|
|
||||||
LABEL maintainer="Hugging Face"
|
|
||||||
LABEL repository="diffusers"
|
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
|
||||||
ENV MINIMUM_SUPPORTED_TORCH_VERSION="2.1.0"
|
|
||||||
ENV MINIMUM_SUPPORTED_TORCHVISION_VERSION="0.16.0"
|
|
||||||
ENV MINIMUM_SUPPORTED_TORCHAUDIO_VERSION="2.1.0"
|
|
||||||
|
|
||||||
RUN apt-get -y update \
|
|
||||||
&& apt-get install -y software-properties-common \
|
|
||||||
&& add-apt-repository ppa:deadsnakes/ppa
|
|
||||||
|
|
||||||
RUN apt install -y bash \
|
|
||||||
build-essential \
|
|
||||||
git \
|
|
||||||
git-lfs \
|
|
||||||
curl \
|
|
||||||
ca-certificates \
|
|
||||||
libsndfile1-dev \
|
|
||||||
libgl1 \
|
|
||||||
python3.10 \
|
|
||||||
python3.10-dev \
|
|
||||||
python3-pip \
|
|
||||||
python3.10-venv && \
|
|
||||||
rm -rf /var/lib/apt/lists
|
|
||||||
|
|
||||||
# make sure to use venv
|
|
||||||
RUN python3.10 -m venv /opt/venv
|
|
||||||
ENV PATH="/opt/venv/bin:$PATH"
|
|
||||||
|
|
||||||
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
|
|
||||||
RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
|
||||||
python3.10 -m uv pip install --no-cache-dir \
|
|
||||||
torch==$MINIMUM_SUPPORTED_TORCH_VERSION \
|
|
||||||
torchvision==$MINIMUM_SUPPORTED_TORCHVISION_VERSION \
|
|
||||||
torchaudio==$MINIMUM_SUPPORTED_TORCHAUDIO_VERSION \
|
|
||||||
invisible_watermark && \
|
|
||||||
python3.10 -m pip install --no-cache-dir \
|
|
||||||
accelerate \
|
|
||||||
datasets \
|
|
||||||
hf-doc-builder \
|
|
||||||
huggingface-hub \
|
|
||||||
hf_transfer \
|
|
||||||
Jinja2 \
|
|
||||||
librosa \
|
|
||||||
numpy==1.26.4 \
|
|
||||||
scipy \
|
|
||||||
tensorboard \
|
|
||||||
transformers \
|
|
||||||
hf_transfer
|
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
|
||||||
@@ -45,7 +45,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
|||||||
scipy \
|
scipy \
|
||||||
tensorboard \
|
tensorboard \
|
||||||
transformers \
|
transformers \
|
||||||
xformers \
|
xformers
|
||||||
hf_transfer
|
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
|
|||||||
@@ -48,17 +48,15 @@
|
|||||||
- local: using-diffusers/inpaint
|
- local: using-diffusers/inpaint
|
||||||
title: Inpainting
|
title: Inpainting
|
||||||
- local: using-diffusers/text-img2vid
|
- local: using-diffusers/text-img2vid
|
||||||
title: Video generation
|
title: Text or image-to-video
|
||||||
- local: using-diffusers/depth2img
|
- local: using-diffusers/depth2img
|
||||||
title: Depth-to-image
|
title: Depth-to-image
|
||||||
title: Generative tasks
|
title: Generative tasks
|
||||||
- sections:
|
- sections:
|
||||||
- local: using-diffusers/overview_techniques
|
- local: using-diffusers/overview_techniques
|
||||||
title: Overview
|
title: Overview
|
||||||
- local: using-diffusers/create_a_server
|
|
||||||
title: Create a server
|
|
||||||
- local: training/distributed_inference
|
- local: training/distributed_inference
|
||||||
title: Distributed inference
|
title: Distributed inference with multiple GPUs
|
||||||
- local: using-diffusers/merge_loras
|
- local: using-diffusers/merge_loras
|
||||||
title: Merge LoRAs
|
title: Merge LoRAs
|
||||||
- local: using-diffusers/scheduler_features
|
- local: using-diffusers/scheduler_features
|
||||||
@@ -77,8 +75,6 @@
|
|||||||
title: Outpainting
|
title: Outpainting
|
||||||
title: Advanced inference
|
title: Advanced inference
|
||||||
- sections:
|
- sections:
|
||||||
- local: using-diffusers/cogvideox
|
|
||||||
title: CogVideoX
|
|
||||||
- local: using-diffusers/sdxl
|
- local: using-diffusers/sdxl
|
||||||
title: Stable Diffusion XL
|
title: Stable Diffusion XL
|
||||||
- local: using-diffusers/sdxl_turbo
|
- local: using-diffusers/sdxl_turbo
|
||||||
@@ -133,8 +129,6 @@
|
|||||||
title: T2I-Adapters
|
title: T2I-Adapters
|
||||||
- local: training/instructpix2pix
|
- local: training/instructpix2pix
|
||||||
title: InstructPix2Pix
|
title: InstructPix2Pix
|
||||||
- local: training/cogvideox
|
|
||||||
title: CogVideoX
|
|
||||||
title: Models
|
title: Models
|
||||||
- isExpanded: false
|
- isExpanded: false
|
||||||
sections:
|
sections:
|
||||||
@@ -152,16 +146,6 @@
|
|||||||
title: Reinforcement learning training with DDPO
|
title: Reinforcement learning training with DDPO
|
||||||
title: Methods
|
title: Methods
|
||||||
title: Training
|
title: Training
|
||||||
- sections:
|
|
||||||
- local: quantization/overview
|
|
||||||
title: Getting Started
|
|
||||||
- local: quantization/bitsandbytes
|
|
||||||
title: bitsandbytes
|
|
||||||
- local: quantization/gguf
|
|
||||||
title: gguf
|
|
||||||
- local: quantization/torchao
|
|
||||||
title: torchao
|
|
||||||
title: Quantization Methods
|
|
||||||
- sections:
|
- sections:
|
||||||
- local: optimization/fp16
|
- local: optimization/fp16
|
||||||
title: Speed up inference
|
title: Speed up inference
|
||||||
@@ -177,8 +161,6 @@
|
|||||||
title: DeepCache
|
title: DeepCache
|
||||||
- local: optimization/tgate
|
- local: optimization/tgate
|
||||||
title: TGATE
|
title: TGATE
|
||||||
- local: optimization/xdit
|
|
||||||
title: xDiT
|
|
||||||
- sections:
|
- sections:
|
||||||
- local: using-diffusers/stable_diffusion_jax_how_to
|
- local: using-diffusers/stable_diffusion_jax_how_to
|
||||||
title: JAX/Flax
|
title: JAX/Flax
|
||||||
@@ -194,8 +176,6 @@
|
|||||||
title: Metal Performance Shaders (MPS)
|
title: Metal Performance Shaders (MPS)
|
||||||
- local: optimization/habana
|
- local: optimization/habana
|
||||||
title: Habana Gaudi
|
title: Habana Gaudi
|
||||||
- local: optimization/neuron
|
|
||||||
title: AWS Neuron
|
|
||||||
title: Optimized hardware
|
title: Optimized hardware
|
||||||
title: Accelerate inference and reduce memory
|
title: Accelerate inference and reduce memory
|
||||||
- sections:
|
- sections:
|
||||||
@@ -223,8 +203,6 @@
|
|||||||
title: Logging
|
title: Logging
|
||||||
- local: api/outputs
|
- local: api/outputs
|
||||||
title: Outputs
|
title: Outputs
|
||||||
- local: api/quantization
|
|
||||||
title: Quantization
|
|
||||||
title: Main Classes
|
title: Main Classes
|
||||||
- isExpanded: false
|
- isExpanded: false
|
||||||
sections:
|
sections:
|
||||||
@@ -238,8 +216,6 @@
|
|||||||
title: Textual Inversion
|
title: Textual Inversion
|
||||||
- local: api/loaders/unet
|
- local: api/loaders/unet
|
||||||
title: UNet
|
title: UNet
|
||||||
- local: api/loaders/transformer_sd3
|
|
||||||
title: SD3Transformer2D
|
|
||||||
- local: api/loaders/peft
|
- local: api/loaders/peft
|
||||||
title: PEFT
|
title: PEFT
|
||||||
title: Loaders
|
title: Loaders
|
||||||
@@ -258,42 +234,28 @@
|
|||||||
title: SD3ControlNetModel
|
title: SD3ControlNetModel
|
||||||
- local: api/models/controlnet_sparsectrl
|
- local: api/models/controlnet_sparsectrl
|
||||||
title: SparseControlNetModel
|
title: SparseControlNetModel
|
||||||
- local: api/models/controlnet_union
|
|
||||||
title: ControlNetUnionModel
|
|
||||||
title: ControlNets
|
title: ControlNets
|
||||||
- sections:
|
- sections:
|
||||||
- local: api/models/allegro_transformer3d
|
|
||||||
title: AllegroTransformer3DModel
|
|
||||||
- local: api/models/aura_flow_transformer2d
|
- local: api/models/aura_flow_transformer2d
|
||||||
title: AuraFlowTransformer2DModel
|
title: AuraFlowTransformer2DModel
|
||||||
- local: api/models/cogvideox_transformer3d
|
- local: api/models/cogvideox_transformer3d
|
||||||
title: CogVideoXTransformer3DModel
|
title: CogVideoXTransformer3DModel
|
||||||
- local: api/models/cogview3plus_transformer2d
|
|
||||||
title: CogView3PlusTransformer2DModel
|
|
||||||
- local: api/models/dit_transformer2d
|
- local: api/models/dit_transformer2d
|
||||||
title: DiTTransformer2DModel
|
title: DiTTransformer2DModel
|
||||||
- local: api/models/flux_transformer
|
- local: api/models/flux_transformer
|
||||||
title: FluxTransformer2DModel
|
title: FluxTransformer2DModel
|
||||||
- local: api/models/hunyuan_transformer2d
|
- local: api/models/hunyuan_transformer2d
|
||||||
title: HunyuanDiT2DModel
|
title: HunyuanDiT2DModel
|
||||||
- local: api/models/hunyuan_video_transformer_3d
|
|
||||||
title: HunyuanVideoTransformer3DModel
|
|
||||||
- local: api/models/latte_transformer3d
|
- local: api/models/latte_transformer3d
|
||||||
title: LatteTransformer3DModel
|
title: LatteTransformer3DModel
|
||||||
- local: api/models/lumina_nextdit2d
|
- local: api/models/lumina_nextdit2d
|
||||||
title: LuminaNextDiT2DModel
|
title: LuminaNextDiT2DModel
|
||||||
- local: api/models/ltx_video_transformer3d
|
|
||||||
title: LTXVideoTransformer3DModel
|
|
||||||
- local: api/models/mochi_transformer3d
|
|
||||||
title: MochiTransformer3DModel
|
|
||||||
- local: api/models/pixart_transformer2d
|
- local: api/models/pixart_transformer2d
|
||||||
title: PixArtTransformer2DModel
|
title: PixArtTransformer2DModel
|
||||||
- local: api/models/prior_transformer
|
- local: api/models/prior_transformer
|
||||||
title: PriorTransformer
|
title: PriorTransformer
|
||||||
- local: api/models/sd3_transformer2d
|
- local: api/models/sd3_transformer2d
|
||||||
title: SD3Transformer2DModel
|
title: SD3Transformer2DModel
|
||||||
- local: api/models/sana_transformer2d
|
|
||||||
title: SanaTransformer2DModel
|
|
||||||
- local: api/models/stable_audio_transformer
|
- local: api/models/stable_audio_transformer
|
||||||
title: StableAudioDiTModel
|
title: StableAudioDiTModel
|
||||||
- local: api/models/transformer2d
|
- local: api/models/transformer2d
|
||||||
@@ -320,20 +282,10 @@
|
|||||||
- sections:
|
- sections:
|
||||||
- local: api/models/autoencoderkl
|
- local: api/models/autoencoderkl
|
||||||
title: AutoencoderKL
|
title: AutoencoderKL
|
||||||
- local: api/models/autoencoderkl_allegro
|
|
||||||
title: AutoencoderKLAllegro
|
|
||||||
- local: api/models/autoencoderkl_cogvideox
|
- local: api/models/autoencoderkl_cogvideox
|
||||||
title: AutoencoderKLCogVideoX
|
title: AutoencoderKLCogVideoX
|
||||||
- local: api/models/autoencoder_kl_hunyuan_video
|
|
||||||
title: AutoencoderKLHunyuanVideo
|
|
||||||
- local: api/models/autoencoderkl_ltx_video
|
|
||||||
title: AutoencoderKLLTXVideo
|
|
||||||
- local: api/models/autoencoderkl_mochi
|
|
||||||
title: AutoencoderKLMochi
|
|
||||||
- local: api/models/asymmetricautoencoderkl
|
- local: api/models/asymmetricautoencoderkl
|
||||||
title: AsymmetricAutoencoderKL
|
title: AsymmetricAutoencoderKL
|
||||||
- local: api/models/autoencoder_dc
|
|
||||||
title: AutoencoderDC
|
|
||||||
- local: api/models/consistency_decoder_vae
|
- local: api/models/consistency_decoder_vae
|
||||||
title: ConsistencyDecoderVAE
|
title: ConsistencyDecoderVAE
|
||||||
- local: api/models/autoencoder_oobleck
|
- local: api/models/autoencoder_oobleck
|
||||||
@@ -348,8 +300,6 @@
|
|||||||
sections:
|
sections:
|
||||||
- local: api/pipelines/overview
|
- local: api/pipelines/overview
|
||||||
title: Overview
|
title: Overview
|
||||||
- local: api/pipelines/allegro
|
|
||||||
title: Allegro
|
|
||||||
- local: api/pipelines/amused
|
- local: api/pipelines/amused
|
||||||
title: aMUSEd
|
title: aMUSEd
|
||||||
- local: api/pipelines/animatediff
|
- local: api/pipelines/animatediff
|
||||||
@@ -368,8 +318,6 @@
|
|||||||
title: BLIP-Diffusion
|
title: BLIP-Diffusion
|
||||||
- local: api/pipelines/cogvideox
|
- local: api/pipelines/cogvideox
|
||||||
title: CogVideoX
|
title: CogVideoX
|
||||||
- local: api/pipelines/cogview3
|
|
||||||
title: CogView3
|
|
||||||
- local: api/pipelines/consistency_models
|
- local: api/pipelines/consistency_models
|
||||||
title: Consistency Models
|
title: Consistency Models
|
||||||
- local: api/pipelines/controlnet
|
- local: api/pipelines/controlnet
|
||||||
@@ -386,8 +334,6 @@
|
|||||||
title: ControlNet-XS
|
title: ControlNet-XS
|
||||||
- local: api/pipelines/controlnetxs_sdxl
|
- local: api/pipelines/controlnetxs_sdxl
|
||||||
title: ControlNet-XS with Stable Diffusion XL
|
title: ControlNet-XS with Stable Diffusion XL
|
||||||
- local: api/pipelines/controlnet_union
|
|
||||||
title: ControlNetUnion
|
|
||||||
- local: api/pipelines/dance_diffusion
|
- local: api/pipelines/dance_diffusion
|
||||||
title: Dance Diffusion
|
title: Dance Diffusion
|
||||||
- local: api/pipelines/ddim
|
- local: api/pipelines/ddim
|
||||||
@@ -402,12 +348,8 @@
|
|||||||
title: DiT
|
title: DiT
|
||||||
- local: api/pipelines/flux
|
- local: api/pipelines/flux
|
||||||
title: Flux
|
title: Flux
|
||||||
- local: api/pipelines/control_flux_inpaint
|
|
||||||
title: FluxControlInpaint
|
|
||||||
- local: api/pipelines/hunyuandit
|
- local: api/pipelines/hunyuandit
|
||||||
title: Hunyuan-DiT
|
title: Hunyuan-DiT
|
||||||
- local: api/pipelines/hunyuan_video
|
|
||||||
title: HunyuanVideo
|
|
||||||
- local: api/pipelines/i2vgenxl
|
- local: api/pipelines/i2vgenxl
|
||||||
title: I2VGen-XL
|
title: I2VGen-XL
|
||||||
- local: api/pipelines/pix2pix
|
- local: api/pipelines/pix2pix
|
||||||
@@ -428,14 +370,10 @@
|
|||||||
title: Latte
|
title: Latte
|
||||||
- local: api/pipelines/ledits_pp
|
- local: api/pipelines/ledits_pp
|
||||||
title: LEDITS++
|
title: LEDITS++
|
||||||
- local: api/pipelines/ltx_video
|
|
||||||
title: LTXVideo
|
|
||||||
- local: api/pipelines/lumina
|
- local: api/pipelines/lumina
|
||||||
title: Lumina-T2X
|
title: Lumina-T2X
|
||||||
- local: api/pipelines/marigold
|
- local: api/pipelines/marigold
|
||||||
title: Marigold
|
title: Marigold
|
||||||
- local: api/pipelines/mochi
|
|
||||||
title: Mochi
|
|
||||||
- local: api/pipelines/panorama
|
- local: api/pipelines/panorama
|
||||||
title: MultiDiffusion
|
title: MultiDiffusion
|
||||||
- local: api/pipelines/musicldm
|
- local: api/pipelines/musicldm
|
||||||
@@ -450,8 +388,6 @@
|
|||||||
title: PixArt-α
|
title: PixArt-α
|
||||||
- local: api/pipelines/pixart_sigma
|
- local: api/pipelines/pixart_sigma
|
||||||
title: PixArt-Σ
|
title: PixArt-Σ
|
||||||
- local: api/pipelines/sana
|
|
||||||
title: Sana
|
|
||||||
- local: api/pipelines/self_attention_guidance
|
- local: api/pipelines/self_attention_guidance
|
||||||
title: Self-Attention Guidance
|
title: Self-Attention Guidance
|
||||||
- local: api/pipelines/semantic_stable_diffusion
|
- local: api/pipelines/semantic_stable_diffusion
|
||||||
|
|||||||
@@ -15,135 +15,40 @@ specific language governing permissions and limitations under the License.
|
|||||||
An attention processor is a class for applying different types of attention mechanisms.
|
An attention processor is a class for applying different types of attention mechanisms.
|
||||||
|
|
||||||
## AttnProcessor
|
## AttnProcessor
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.AttnProcessor
|
[[autodoc]] models.attention_processor.AttnProcessor
|
||||||
|
|
||||||
|
## AttnProcessor2_0
|
||||||
[[autodoc]] models.attention_processor.AttnProcessor2_0
|
[[autodoc]] models.attention_processor.AttnProcessor2_0
|
||||||
|
|
||||||
|
## AttnAddedKVProcessor
|
||||||
[[autodoc]] models.attention_processor.AttnAddedKVProcessor
|
[[autodoc]] models.attention_processor.AttnAddedKVProcessor
|
||||||
|
|
||||||
|
## AttnAddedKVProcessor2_0
|
||||||
[[autodoc]] models.attention_processor.AttnAddedKVProcessor2_0
|
[[autodoc]] models.attention_processor.AttnAddedKVProcessor2_0
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.AttnProcessorNPU
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.FusedAttnProcessor2_0
|
|
||||||
|
|
||||||
## Allegro
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.AllegroAttnProcessor2_0
|
|
||||||
|
|
||||||
## AuraFlow
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.AuraFlowAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.FusedAuraFlowAttnProcessor2_0
|
|
||||||
|
|
||||||
## CogVideoX
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.CogVideoXAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.FusedCogVideoXAttnProcessor2_0
|
|
||||||
|
|
||||||
## CrossFrameAttnProcessor
|
## CrossFrameAttnProcessor
|
||||||
|
|
||||||
[[autodoc]] pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.CrossFrameAttnProcessor
|
[[autodoc]] pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.CrossFrameAttnProcessor
|
||||||
|
|
||||||
## Custom Diffusion
|
## CustomDiffusionAttnProcessor
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.CustomDiffusionAttnProcessor
|
[[autodoc]] models.attention_processor.CustomDiffusionAttnProcessor
|
||||||
|
|
||||||
|
## CustomDiffusionAttnProcessor2_0
|
||||||
[[autodoc]] models.attention_processor.CustomDiffusionAttnProcessor2_0
|
[[autodoc]] models.attention_processor.CustomDiffusionAttnProcessor2_0
|
||||||
|
|
||||||
|
## CustomDiffusionXFormersAttnProcessor
|
||||||
[[autodoc]] models.attention_processor.CustomDiffusionXFormersAttnProcessor
|
[[autodoc]] models.attention_processor.CustomDiffusionXFormersAttnProcessor
|
||||||
|
|
||||||
## Flux
|
## FusedAttnProcessor2_0
|
||||||
|
[[autodoc]] models.attention_processor.FusedAttnProcessor2_0
|
||||||
[[autodoc]] models.attention_processor.FluxAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.FusedFluxAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.FluxSingleAttnProcessor2_0
|
|
||||||
|
|
||||||
## Hunyuan
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.HunyuanAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.FusedHunyuanAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.PAGHunyuanAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.PAGCFGHunyuanAttnProcessor2_0
|
|
||||||
|
|
||||||
## IdentitySelfAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.PAGIdentitySelfAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.PAGCFGIdentitySelfAttnProcessor2_0
|
|
||||||
|
|
||||||
## IP-Adapter
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.IPAdapterAttnProcessor
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.IPAdapterAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.SD3IPAdapterJointAttnProcessor2_0
|
|
||||||
|
|
||||||
## JointAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.JointAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.PAGJointAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.PAGCFGJointAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.FusedJointAttnProcessor2_0
|
|
||||||
|
|
||||||
## LoRA
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.LoRAAttnProcessor
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.LoRAAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.LoRAAttnAddedKVProcessor
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.LoRAXFormersAttnProcessor
|
|
||||||
|
|
||||||
## Lumina-T2X
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.LuminaAttnProcessor2_0
|
|
||||||
|
|
||||||
## Mochi
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.MochiAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.MochiVaeAttnProcessor2_0
|
|
||||||
|
|
||||||
## Sana
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.SanaLinearAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.SanaMultiscaleAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.PAGCFGSanaLinearAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.PAGIdentitySanaLinearAttnProcessor2_0
|
|
||||||
|
|
||||||
## Stable Audio
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.StableAudioAttnProcessor2_0
|
|
||||||
|
|
||||||
## SlicedAttnProcessor
|
## SlicedAttnProcessor
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.SlicedAttnProcessor
|
[[autodoc]] models.attention_processor.SlicedAttnProcessor
|
||||||
|
|
||||||
|
## SlicedAttnAddedKVProcessor
|
||||||
[[autodoc]] models.attention_processor.SlicedAttnAddedKVProcessor
|
[[autodoc]] models.attention_processor.SlicedAttnAddedKVProcessor
|
||||||
|
|
||||||
## XFormersAttnProcessor
|
## XFormersAttnProcessor
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.XFormersAttnProcessor
|
[[autodoc]] models.attention_processor.XFormersAttnProcessor
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.XFormersAttnAddedKVProcessor
|
## AttnProcessorNPU
|
||||||
|
[[autodoc]] models.attention_processor.AttnProcessorNPU
|
||||||
## XLAFlashAttnProcessor2_0
|
|
||||||
|
|
||||||
[[autodoc]] models.attention_processor.XLAFlashAttnProcessor2_0
|
|
||||||
|
|||||||
@@ -24,12 +24,6 @@ Learn how to load an IP-Adapter checkpoint and image in the IP-Adapter [loading]
|
|||||||
|
|
||||||
[[autodoc]] loaders.ip_adapter.IPAdapterMixin
|
[[autodoc]] loaders.ip_adapter.IPAdapterMixin
|
||||||
|
|
||||||
## SD3IPAdapterMixin
|
|
||||||
|
|
||||||
[[autodoc]] loaders.ip_adapter.SD3IPAdapterMixin
|
|
||||||
- all
|
|
||||||
- is_ip_adapter_active
|
|
||||||
|
|
||||||
## IPAdapterMaskProcessor
|
## IPAdapterMaskProcessor
|
||||||
|
|
||||||
[[autodoc]] image_processor.IPAdapterMaskProcessor
|
[[autodoc]] image_processor.IPAdapterMaskProcessor
|
||||||
@@ -17,9 +17,6 @@ LoRA is a fast and lightweight training method that inserts and trains a signifi
|
|||||||
- [`StableDiffusionLoraLoaderMixin`] provides functions for loading and unloading, fusing and unfusing, enabling and disabling, and more functions for managing LoRA weights. This class can be used with any model.
|
- [`StableDiffusionLoraLoaderMixin`] provides functions for loading and unloading, fusing and unfusing, enabling and disabling, and more functions for managing LoRA weights. This class can be used with any model.
|
||||||
- [`StableDiffusionXLLoraLoaderMixin`] is a [Stable Diffusion (SDXL)](../../api/pipelines/stable_diffusion/stable_diffusion_xl) version of the [`StableDiffusionLoraLoaderMixin`] class for loading and saving LoRA weights. It can only be used with the SDXL model.
|
- [`StableDiffusionXLLoraLoaderMixin`] is a [Stable Diffusion (SDXL)](../../api/pipelines/stable_diffusion/stable_diffusion_xl) version of the [`StableDiffusionLoraLoaderMixin`] class for loading and saving LoRA weights. It can only be used with the SDXL model.
|
||||||
- [`SD3LoraLoaderMixin`] provides similar functions for [Stable Diffusion 3](https://huggingface.co/blog/sd3).
|
- [`SD3LoraLoaderMixin`] provides similar functions for [Stable Diffusion 3](https://huggingface.co/blog/sd3).
|
||||||
- [`FluxLoraLoaderMixin`] provides similar functions for [Flux](https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux).
|
|
||||||
- [`CogVideoXLoraLoaderMixin`] provides similar functions for [CogVideoX](https://huggingface.co/docs/diffusers/main/en/api/pipelines/cogvideox).
|
|
||||||
- [`Mochi1LoraLoaderMixin`] provides similar functions for [Mochi](https://huggingface.co/docs/diffusers/main/en/api/pipelines/mochi).
|
|
||||||
- [`AmusedLoraLoaderMixin`] is for the [`AmusedPipeline`].
|
- [`AmusedLoraLoaderMixin`] is for the [`AmusedPipeline`].
|
||||||
- [`LoraBaseMixin`] provides a base class with several utility methods to fuse, unfuse, unload, LoRAs and more.
|
- [`LoraBaseMixin`] provides a base class with several utility methods to fuse, unfuse, unload, LoRAs and more.
|
||||||
|
|
||||||
@@ -41,18 +38,6 @@ To learn more about how to load LoRA weights, see the [LoRA](../../using-diffuse
|
|||||||
|
|
||||||
[[autodoc]] loaders.lora_pipeline.SD3LoraLoaderMixin
|
[[autodoc]] loaders.lora_pipeline.SD3LoraLoaderMixin
|
||||||
|
|
||||||
## FluxLoraLoaderMixin
|
|
||||||
|
|
||||||
[[autodoc]] loaders.lora_pipeline.FluxLoraLoaderMixin
|
|
||||||
|
|
||||||
## CogVideoXLoraLoaderMixin
|
|
||||||
|
|
||||||
[[autodoc]] loaders.lora_pipeline.CogVideoXLoraLoaderMixin
|
|
||||||
|
|
||||||
## Mochi1LoraLoaderMixin
|
|
||||||
|
|
||||||
[[autodoc]] loaders.lora_pipeline.Mochi1LoraLoaderMixin
|
|
||||||
|
|
||||||
## AmusedLoraLoaderMixin
|
## AmusedLoraLoaderMixin
|
||||||
|
|
||||||
[[autodoc]] loaders.lora_pipeline.AmusedLoraLoaderMixin
|
[[autodoc]] loaders.lora_pipeline.AmusedLoraLoaderMixin
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ The [`~loaders.FromSingleFileMixin.from_single_file`] method allows you to load:
|
|||||||
|
|
||||||
## Supported pipelines
|
## Supported pipelines
|
||||||
|
|
||||||
|
- [`CogVideoXPipeline`]
|
||||||
- [`StableDiffusionPipeline`]
|
- [`StableDiffusionPipeline`]
|
||||||
- [`StableDiffusionImg2ImgPipeline`]
|
- [`StableDiffusionImg2ImgPipeline`]
|
||||||
- [`StableDiffusionInpaintPipeline`]
|
- [`StableDiffusionInpaintPipeline`]
|
||||||
@@ -49,6 +50,7 @@ The [`~loaders.FromSingleFileMixin.from_single_file`] method allows you to load:
|
|||||||
- [`UNet2DConditionModel`]
|
- [`UNet2DConditionModel`]
|
||||||
- [`StableCascadeUNet`]
|
- [`StableCascadeUNet`]
|
||||||
- [`AutoencoderKL`]
|
- [`AutoencoderKL`]
|
||||||
|
- [`AutoencoderKLCogVideoX`]
|
||||||
- [`ControlNetModel`]
|
- [`ControlNetModel`]
|
||||||
- [`SD3Transformer2DModel`]
|
- [`SD3Transformer2DModel`]
|
||||||
- [`FluxTransformer2DModel`]
|
- [`FluxTransformer2DModel`]
|
||||||
|
|||||||
@@ -1,29 +0,0 @@
|
|||||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
# SD3Transformer2D
|
|
||||||
|
|
||||||
This class is useful when *only* loading weights into a [`SD3Transformer2DModel`]. If you need to load weights into the text encoder or a text encoder and SD3Transformer2DModel, check [`SD3LoraLoaderMixin`](lora#diffusers.loaders.SD3LoraLoaderMixin) class instead.
|
|
||||||
|
|
||||||
The [`SD3Transformer2DLoadersMixin`] class currently only loads IP-Adapter weights, but will be used in the future to save weights and load LoRAs.
|
|
||||||
|
|
||||||
<Tip>
|
|
||||||
|
|
||||||
To learn more about how to load LoRA weights, see the [LoRA](../../using-diffusers/loading_adapters#lora) loading guide.
|
|
||||||
|
|
||||||
</Tip>
|
|
||||||
|
|
||||||
## SD3Transformer2DLoadersMixin
|
|
||||||
|
|
||||||
[[autodoc]] loaders.transformer_sd3.SD3Transformer2DLoadersMixin
|
|
||||||
- all
|
|
||||||
- _load_ip_adapter_weights
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License. -->
|
|
||||||
|
|
||||||
# AllegroTransformer3DModel
|
|
||||||
|
|
||||||
A Diffusion Transformer model for 3D data from [Allegro](https://github.com/rhymes-ai/Allegro) was introduced in [Allegro: Open the Black Box of Commercial-Level Video Generation Model](https://huggingface.co/papers/2410.15458) by RhymesAI.
|
|
||||||
|
|
||||||
The model can be loaded with the following code snippet.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from diffusers import AllegroTransformer3DModel
|
|
||||||
|
|
||||||
transformer = AllegroTransformer3DModel.from_pretrained("rhymes-ai/Allegro", subfolder="transformer", torch_dtype=torch.bfloat16).to("cuda")
|
|
||||||
```
|
|
||||||
|
|
||||||
## AllegroTransformer3DModel
|
|
||||||
|
|
||||||
[[autodoc]] AllegroTransformer3DModel
|
|
||||||
|
|
||||||
## Transformer2DModelOutput
|
|
||||||
|
|
||||||
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput
|
|
||||||
@@ -1,72 +0,0 @@
|
|||||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License. -->
|
|
||||||
|
|
||||||
# AutoencoderDC
|
|
||||||
|
|
||||||
The 2D Autoencoder model used in [SANA](https://huggingface.co/papers/2410.10629) and introduced in [DCAE](https://huggingface.co/papers/2410.10733) by authors Junyu Chen\*, Han Cai\*, Junsong Chen, Enze Xie, Shang Yang, Haotian Tang, Muyang Li, Yao Lu, Song Han from MIT HAN Lab.
|
|
||||||
|
|
||||||
The abstract from the paper is:
|
|
||||||
|
|
||||||
*We present Deep Compression Autoencoder (DC-AE), a new family of autoencoder models for accelerating high-resolution diffusion models. Existing autoencoder models have demonstrated impressive results at a moderate spatial compression ratio (e.g., 8x), but fail to maintain satisfactory reconstruction accuracy for high spatial compression ratios (e.g., 64x). We address this challenge by introducing two key techniques: (1) Residual Autoencoding, where we design our models to learn residuals based on the space-to-channel transformed features to alleviate the optimization difficulty of high spatial-compression autoencoders; (2) Decoupled High-Resolution Adaptation, an efficient decoupled three-phases training strategy for mitigating the generalization penalty of high spatial-compression autoencoders. With these designs, we improve the autoencoder's spatial compression ratio up to 128 while maintaining the reconstruction quality. Applying our DC-AE to latent diffusion models, we achieve significant speedup without accuracy drop. For example, on ImageNet 512x512, our DC-AE provides 19.1x inference speedup and 17.9x training speedup on H100 GPU for UViT-H while achieving a better FID, compared with the widely used SD-VAE-f8 autoencoder. Our code is available at [this https URL](https://github.com/mit-han-lab/efficientvit).*
|
|
||||||
|
|
||||||
The following DCAE models are released and supported in Diffusers.
|
|
||||||
|
|
||||||
| Diffusers format | Original format |
|
|
||||||
|:----------------:|:---------------:|
|
|
||||||
| [`mit-han-lab/dc-ae-f32c32-sana-1.0-diffusers`](https://huggingface.co/mit-han-lab/dc-ae-f32c32-sana-1.0-diffusers) | [`mit-han-lab/dc-ae-f32c32-sana-1.0`](https://huggingface.co/mit-han-lab/dc-ae-f32c32-sana-1.0)
|
|
||||||
| [`mit-han-lab/dc-ae-f32c32-in-1.0-diffusers`](https://huggingface.co/mit-han-lab/dc-ae-f32c32-in-1.0-diffusers) | [`mit-han-lab/dc-ae-f32c32-in-1.0`](https://huggingface.co/mit-han-lab/dc-ae-f32c32-in-1.0)
|
|
||||||
| [`mit-han-lab/dc-ae-f32c32-mix-1.0-diffusers`](https://huggingface.co/mit-han-lab/dc-ae-f32c32-mix-1.0-diffusers) | [`mit-han-lab/dc-ae-f32c32-mix-1.0`](https://huggingface.co/mit-han-lab/dc-ae-f32c32-mix-1.0)
|
|
||||||
| [`mit-han-lab/dc-ae-f64c128-in-1.0-diffusers`](https://huggingface.co/mit-han-lab/dc-ae-f64c128-in-1.0-diffusers) | [`mit-han-lab/dc-ae-f64c128-in-1.0`](https://huggingface.co/mit-han-lab/dc-ae-f64c128-in-1.0)
|
|
||||||
| [`mit-han-lab/dc-ae-f64c128-mix-1.0-diffusers`](https://huggingface.co/mit-han-lab/dc-ae-f64c128-mix-1.0-diffusers) | [`mit-han-lab/dc-ae-f64c128-mix-1.0`](https://huggingface.co/mit-han-lab/dc-ae-f64c128-mix-1.0)
|
|
||||||
| [`mit-han-lab/dc-ae-f128c512-in-1.0-diffusers`](https://huggingface.co/mit-han-lab/dc-ae-f128c512-in-1.0-diffusers) | [`mit-han-lab/dc-ae-f128c512-in-1.0`](https://huggingface.co/mit-han-lab/dc-ae-f128c512-in-1.0)
|
|
||||||
| [`mit-han-lab/dc-ae-f128c512-mix-1.0-diffusers`](https://huggingface.co/mit-han-lab/dc-ae-f128c512-mix-1.0-diffusers) | [`mit-han-lab/dc-ae-f128c512-mix-1.0`](https://huggingface.co/mit-han-lab/dc-ae-f128c512-mix-1.0)
|
|
||||||
|
|
||||||
This model was contributed by [lawrence-cj](https://github.com/lawrence-cj).
|
|
||||||
|
|
||||||
Load a model in Diffusers format with [`~ModelMixin.from_pretrained`].
|
|
||||||
|
|
||||||
```python
|
|
||||||
from diffusers import AutoencoderDC
|
|
||||||
|
|
||||||
ae = AutoencoderDC.from_pretrained("mit-han-lab/dc-ae-f32c32-sana-1.0-diffusers", torch_dtype=torch.float32).to("cuda")
|
|
||||||
```
|
|
||||||
|
|
||||||
## Load a model in Diffusers via `from_single_file`
|
|
||||||
|
|
||||||
```python
|
|
||||||
from difusers import AutoencoderDC
|
|
||||||
|
|
||||||
ckpt_path = "https://huggingface.co/mit-han-lab/dc-ae-f32c32-sana-1.0/blob/main/model.safetensors"
|
|
||||||
model = AutoencoderDC.from_single_file(ckpt_path)
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
The `AutoencoderDC` model has `in` and `mix` single file checkpoint variants that have matching checkpoint keys, but use different scaling factors. It is not possible for Diffusers to automatically infer the correct config file to use with the model based on just the checkpoint and will default to configuring the model using the `mix` variant config file. To override the automatically determined config, please use the `config` argument when using single file loading with `in` variant checkpoints.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from diffusers import AutoencoderDC
|
|
||||||
|
|
||||||
ckpt_path = "https://huggingface.co/mit-han-lab/dc-ae-f128c512-in-1.0/blob/main/model.safetensors"
|
|
||||||
model = AutoencoderDC.from_single_file(ckpt_path, config="mit-han-lab/dc-ae-f128c512-in-1.0-diffusers")
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## AutoencoderDC
|
|
||||||
|
|
||||||
[[autodoc]] AutoencoderDC
|
|
||||||
- encode
|
|
||||||
- decode
|
|
||||||
- all
|
|
||||||
|
|
||||||
## DecoderOutput
|
|
||||||
|
|
||||||
[[autodoc]] models.autoencoders.vae.DecoderOutput
|
|
||||||
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License. -->
|
|
||||||
|
|
||||||
# AutoencoderKLHunyuanVideo
|
|
||||||
|
|
||||||
The 3D variational autoencoder (VAE) model with KL loss used in [HunyuanVideo](https://github.com/Tencent/HunyuanVideo/), which was introduced in [HunyuanVideo: A Systematic Framework For Large Video Generative Models](https://huggingface.co/papers/2412.03603) by Tencent.
|
|
||||||
|
|
||||||
The model can be loaded with the following code snippet.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from diffusers import AutoencoderKLHunyuanVideo
|
|
||||||
|
|
||||||
vae = AutoencoderKLHunyuanVideo.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="vae", torch_dtype=torch.float16)
|
|
||||||
```
|
|
||||||
|
|
||||||
## AutoencoderKLHunyuanVideo
|
|
||||||
|
|
||||||
[[autodoc]] AutoencoderKLHunyuanVideo
|
|
||||||
- decode
|
|
||||||
- all
|
|
||||||
|
|
||||||
## DecoderOutput
|
|
||||||
|
|
||||||
[[autodoc]] models.autoencoders.vae.DecoderOutput
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License. -->
|
|
||||||
|
|
||||||
# AutoencoderKLAllegro
|
|
||||||
|
|
||||||
The 3D variational autoencoder (VAE) model with KL loss used in [Allegro](https://github.com/rhymes-ai/Allegro) was introduced in [Allegro: Open the Black Box of Commercial-Level Video Generation Model](https://huggingface.co/papers/2410.15458) by RhymesAI.
|
|
||||||
|
|
||||||
The model can be loaded with the following code snippet.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from diffusers import AutoencoderKLAllegro
|
|
||||||
|
|
||||||
vae = AutoencoderKLCogVideoX.from_pretrained("rhymes-ai/Allegro", subfolder="vae", torch_dtype=torch.float32).to("cuda")
|
|
||||||
```
|
|
||||||
|
|
||||||
## AutoencoderKLAllegro
|
|
||||||
|
|
||||||
[[autodoc]] AutoencoderKLAllegro
|
|
||||||
- decode
|
|
||||||
- encode
|
|
||||||
- all
|
|
||||||
|
|
||||||
## AutoencoderKLOutput
|
|
||||||
|
|
||||||
[[autodoc]] models.autoencoders.autoencoder_kl.AutoencoderKLOutput
|
|
||||||
|
|
||||||
## DecoderOutput
|
|
||||||
|
|
||||||
[[autodoc]] models.autoencoders.vae.DecoderOutput
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License. -->
|
|
||||||
|
|
||||||
# AutoencoderKLLTXVideo
|
|
||||||
|
|
||||||
The 3D variational autoencoder (VAE) model with KL loss used in [LTX](https://huggingface.co/Lightricks/LTX-Video) was introduced by Lightricks.
|
|
||||||
|
|
||||||
The model can be loaded with the following code snippet.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from diffusers import AutoencoderKLLTXVideo
|
|
||||||
|
|
||||||
vae = AutoencoderKLLTXVideo.from_pretrained("Lightricks/LTX-Video", subfolder="vae", torch_dtype=torch.float32).to("cuda")
|
|
||||||
```
|
|
||||||
|
|
||||||
## AutoencoderKLLTXVideo
|
|
||||||
|
|
||||||
[[autodoc]] AutoencoderKLLTXVideo
|
|
||||||
- decode
|
|
||||||
- encode
|
|
||||||
- all
|
|
||||||
|
|
||||||
## AutoencoderKLOutput
|
|
||||||
|
|
||||||
[[autodoc]] models.autoencoders.autoencoder_kl.AutoencoderKLOutput
|
|
||||||
|
|
||||||
## DecoderOutput
|
|
||||||
|
|
||||||
[[autodoc]] models.autoencoders.vae.DecoderOutput
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License. -->
|
|
||||||
|
|
||||||
# AutoencoderKLMochi
|
|
||||||
|
|
||||||
The 3D variational autoencoder (VAE) model with KL loss used in [Mochi](https://github.com/genmoai/models) was introduced in [Mochi 1 Preview](https://huggingface.co/genmo/mochi-1-preview) by Tsinghua University & ZhipuAI.
|
|
||||||
|
|
||||||
The model can be loaded with the following code snippet.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from diffusers import AutoencoderKLMochi
|
|
||||||
|
|
||||||
vae = AutoencoderKLMochi.from_pretrained("genmo/mochi-1-preview", subfolder="vae", torch_dtype=torch.float32).to("cuda")
|
|
||||||
```
|
|
||||||
|
|
||||||
## AutoencoderKLMochi
|
|
||||||
|
|
||||||
[[autodoc]] AutoencoderKLMochi
|
|
||||||
- decode
|
|
||||||
- all
|
|
||||||
|
|
||||||
## DecoderOutput
|
|
||||||
|
|
||||||
[[autodoc]] models.autoencoders.vae.DecoderOutput
|
|
||||||
@@ -18,7 +18,7 @@ The model can be loaded with the following code snippet.
|
|||||||
```python
|
```python
|
||||||
from diffusers import CogVideoXTransformer3DModel
|
from diffusers import CogVideoXTransformer3DModel
|
||||||
|
|
||||||
transformer = CogVideoXTransformer3DModel.from_pretrained("THUDM/CogVideoX-2b", subfolder="transformer", torch_dtype=torch.float16).to("cuda")
|
vae = CogVideoXTransformer3DModel.from_pretrained("THUDM/CogVideoX-2b", subfolder="transformer", torch_dtype=torch.float16).to("cuda")
|
||||||
```
|
```
|
||||||
|
|
||||||
## CogVideoXTransformer3DModel
|
## CogVideoXTransformer3DModel
|
||||||
|
|||||||
@@ -1,30 +0,0 @@
|
|||||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License. -->
|
|
||||||
|
|
||||||
# CogView3PlusTransformer2DModel
|
|
||||||
|
|
||||||
A Diffusion Transformer model for 2D data from [CogView3Plus](https://github.com/THUDM/CogView3) was introduced in [CogView3: Finer and Faster Text-to-Image Generation via Relay Diffusion](https://huggingface.co/papers/2403.05121) by Tsinghua University & ZhipuAI.
|
|
||||||
|
|
||||||
The model can be loaded with the following code snippet.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from diffusers import CogView3PlusTransformer2DModel
|
|
||||||
|
|
||||||
transformer = CogView3PlusTransformer2DModel.from_pretrained("THUDM/CogView3Plus-3b", subfolder="transformer", torch_dtype=torch.bfloat16).to("cuda")
|
|
||||||
```
|
|
||||||
|
|
||||||
## CogView3PlusTransformer2DModel
|
|
||||||
|
|
||||||
[[autodoc]] CogView3PlusTransformer2DModel
|
|
||||||
|
|
||||||
## Transformer2DModelOutput
|
|
||||||
|
|
||||||
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput
|
|
||||||
@@ -29,7 +29,7 @@ from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
|
|||||||
url = "https://huggingface.co/lllyasviel/ControlNet-v1-1/blob/main/control_v11p_sd15_canny.pth" # can also be a local path
|
url = "https://huggingface.co/lllyasviel/ControlNet-v1-1/blob/main/control_v11p_sd15_canny.pth" # can also be a local path
|
||||||
controlnet = ControlNetModel.from_single_file(url)
|
controlnet = ControlNetModel.from_single_file(url)
|
||||||
|
|
||||||
url = "https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned.safetensors" # can also be a local path
|
url = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned.safetensors" # can also be a local path
|
||||||
pipe = StableDiffusionControlNetPipeline.from_single_file(url, controlnet=controlnet)
|
pipe = StableDiffusionControlNetPipeline.from_single_file(url, controlnet=controlnet)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -39,7 +39,7 @@ pipe = StableDiffusionControlNetPipeline.from_single_file(url, controlnet=contro
|
|||||||
|
|
||||||
## ControlNetOutput
|
## ControlNetOutput
|
||||||
|
|
||||||
[[autodoc]] models.controlnets.controlnet.ControlNetOutput
|
[[autodoc]] models.controlnet.ControlNetOutput
|
||||||
|
|
||||||
## FlaxControlNetModel
|
## FlaxControlNetModel
|
||||||
|
|
||||||
@@ -47,4 +47,4 @@ pipe = StableDiffusionControlNetPipeline.from_single_file(url, controlnet=contro
|
|||||||
|
|
||||||
## FlaxControlNetOutput
|
## FlaxControlNetOutput
|
||||||
|
|
||||||
[[autodoc]] models.controlnets.controlnet_flax.FlaxControlNetOutput
|
[[autodoc]] models.controlnet_flax.FlaxControlNetOutput
|
||||||
|
|||||||
@@ -38,5 +38,5 @@ pipe = StableDiffusion3ControlNetPipeline.from_pretrained("stabilityai/stable-di
|
|||||||
|
|
||||||
## SD3ControlNetOutput
|
## SD3ControlNetOutput
|
||||||
|
|
||||||
[[autodoc]] models.controlnets.controlnet_sd3.SD3ControlNetOutput
|
[[autodoc]] models.controlnet_sd3.SD3ControlNetOutput
|
||||||
|
|
||||||
|
|||||||
@@ -1,35 +0,0 @@
|
|||||||
<!--Copyright 2024 The HuggingFace Team and The InstantX Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
# ControlNetUnionModel
|
|
||||||
|
|
||||||
ControlNetUnionModel is an implementation of ControlNet for Stable Diffusion XL.
|
|
||||||
|
|
||||||
The ControlNet model was introduced in [ControlNetPlus](https://github.com/xinsir6/ControlNetPlus) by xinsir6. It supports multiple conditioning inputs without increasing computation.
|
|
||||||
|
|
||||||
*We design a new architecture that can support 10+ control types in condition text-to-image generation and can generate high resolution images visually comparable with midjourney. The network is based on the original ControlNet architecture, we propose two new modules to: 1 Extend the original ControlNet to support different image conditions using the same network parameter. 2 Support multiple conditions input without increasing computation offload, which is especially important for designers who want to edit image in detail, different conditions use the same condition encoder, without adding extra computations or parameters.*
|
|
||||||
|
|
||||||
## Loading
|
|
||||||
|
|
||||||
By default the [`ControlNetUnionModel`] should be loaded with [`~ModelMixin.from_pretrained`].
|
|
||||||
|
|
||||||
```py
|
|
||||||
from diffusers import StableDiffusionXLControlNetUnionPipeline, ControlNetUnionModel
|
|
||||||
|
|
||||||
controlnet = ControlNetUnionModel.from_pretrained("xinsir/controlnet-union-sdxl-1.0")
|
|
||||||
pipe = StableDiffusionXLControlNetUnionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet)
|
|
||||||
```
|
|
||||||
|
|
||||||
## ControlNetUnionModel
|
|
||||||
|
|
||||||
[[autodoc]] ControlNetUnionModel
|
|
||||||
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License. -->
|
|
||||||
|
|
||||||
# HunyuanVideoTransformer3DModel
|
|
||||||
|
|
||||||
A Diffusion Transformer model for 3D video-like data was introduced in [HunyuanVideo: A Systematic Framework For Large Video Generative Models](https://huggingface.co/papers/2412.03603) by Tencent.
|
|
||||||
|
|
||||||
The model can be loaded with the following code snippet.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from diffusers import HunyuanVideoTransformer3DModel
|
|
||||||
|
|
||||||
transformer = HunyuanVideoTransformer3DModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="transformer", torch_dtype=torch.bfloat16)
|
|
||||||
```
|
|
||||||
|
|
||||||
## HunyuanVideoTransformer3DModel
|
|
||||||
|
|
||||||
[[autodoc]] HunyuanVideoTransformer3DModel
|
|
||||||
|
|
||||||
## Transformer2DModelOutput
|
|
||||||
|
|
||||||
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License. -->
|
|
||||||
|
|
||||||
# LTXVideoTransformer3DModel
|
|
||||||
|
|
||||||
A Diffusion Transformer model for 3D data from [LTX](https://huggingface.co/Lightricks/LTX-Video) was introduced by Lightricks.
|
|
||||||
|
|
||||||
The model can be loaded with the following code snippet.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from diffusers import LTXVideoTransformer3DModel
|
|
||||||
|
|
||||||
transformer = LTXVideoTransformer3DModel.from_pretrained("Lightricks/LTX-Video", subfolder="transformer", torch_dtype=torch.bfloat16).to("cuda")
|
|
||||||
```
|
|
||||||
|
|
||||||
## LTXVideoTransformer3DModel
|
|
||||||
|
|
||||||
[[autodoc]] LTXVideoTransformer3DModel
|
|
||||||
|
|
||||||
## Transformer2DModelOutput
|
|
||||||
|
|
||||||
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License. -->
|
|
||||||
|
|
||||||
# MochiTransformer3DModel
|
|
||||||
|
|
||||||
A Diffusion Transformer model for 3D video-like data was introduced in [Mochi-1 Preview](https://huggingface.co/genmo/mochi-1-preview) by Genmo.
|
|
||||||
|
|
||||||
The model can be loaded with the following code snippet.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from diffusers import MochiTransformer3DModel
|
|
||||||
|
|
||||||
transformer = MochiTransformer3DModel.from_pretrained("genmo/mochi-1-preview", subfolder="transformer", torch_dtype=torch.float16).to("cuda")
|
|
||||||
```
|
|
||||||
|
|
||||||
## MochiTransformer3DModel
|
|
||||||
|
|
||||||
[[autodoc]] MochiTransformer3DModel
|
|
||||||
|
|
||||||
## Transformer2DModelOutput
|
|
||||||
|
|
||||||
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput
|
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License. -->
|
|
||||||
|
|
||||||
# SanaTransformer2DModel
|
|
||||||
|
|
||||||
A Diffusion Transformer model for 2D data from [SANA: Efficient High-Resolution Image Synthesis with Linear Diffusion Transformers](https://huggingface.co/papers/2410.10629) was introduced from NVIDIA and MIT HAN Lab, by Enze Xie, Junsong Chen, Junyu Chen, Han Cai, Haotian Tang, Yujun Lin, Zhekai Zhang, Muyang Li, Ligeng Zhu, Yao Lu, Song Han.
|
|
||||||
|
|
||||||
The abstract from the paper is:
|
|
||||||
|
|
||||||
*We introduce Sana, a text-to-image framework that can efficiently generate images up to 4096×4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU. Core designs include: (1) Deep compression autoencoder: unlike traditional AEs, which compress images only 8×, we trained an AE that can compress images 32×, effectively reducing the number of latent tokens. (2) Linear DiT: we replace all vanilla attention in DiT with linear attention, which is more efficient at high resolutions without sacrificing quality. (3) Decoder-only text encoder: we replaced T5 with modern decoder-only small LLM as the text encoder and designed complex human instruction with in-context learning to enhance the image-text alignment. (4) Efficient training and sampling: we propose Flow-DPM-Solver to reduce sampling steps, with efficient caption labeling and selection to accelerate convergence. As a result, Sana-0.6B is very competitive with modern giant diffusion model (e.g. Flux-12B), being 20 times smaller and 100+ times faster in measured throughput. Moreover, Sana-0.6B can be deployed on a 16GB laptop GPU, taking less than 1 second to generate a 1024×1024 resolution image. Sana enables content creation at low cost. Code and model will be publicly released.*
|
|
||||||
|
|
||||||
The model can be loaded with the following code snippet.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from diffusers import SanaTransformer2DModel
|
|
||||||
|
|
||||||
transformer = SanaTransformer2DModel.from_pretrained("Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
|
|
||||||
```
|
|
||||||
|
|
||||||
## SanaTransformer2DModel
|
|
||||||
|
|
||||||
[[autodoc]] SanaTransformer2DModel
|
|
||||||
|
|
||||||
## Transformer2DModelOutput
|
|
||||||
|
|
||||||
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput
|
|
||||||
@@ -1,79 +0,0 @@
|
|||||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License. -->
|
|
||||||
|
|
||||||
# Allegro
|
|
||||||
|
|
||||||
[Allegro: Open the Black Box of Commercial-Level Video Generation Model](https://huggingface.co/papers/2410.15458) from RhymesAI, by Yuan Zhou, Qiuyue Wang, Yuxuan Cai, Huan Yang.
|
|
||||||
|
|
||||||
The abstract from the paper is:
|
|
||||||
|
|
||||||
*Significant advancements have been made in the field of video generation, with the open-source community contributing a wealth of research papers and tools for training high-quality models. However, despite these efforts, the available information and resources remain insufficient for achieving commercial-level performance. In this report, we open the black box and introduce Allegro, an advanced video generation model that excels in both quality and temporal consistency. We also highlight the current limitations in the field and present a comprehensive methodology for training high-performance, commercial-level video generation models, addressing key aspects such as data, model architecture, training pipeline, and evaluation. Our user study shows that Allegro surpasses existing open-source models and most commercial models, ranking just behind Hailuo and Kling. Code: https://github.com/rhymes-ai/Allegro , Model: https://huggingface.co/rhymes-ai/Allegro , Gallery: https://rhymes.ai/allegro_gallery .*
|
|
||||||
|
|
||||||
<Tip>
|
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
|
||||||
|
|
||||||
</Tip>
|
|
||||||
|
|
||||||
## Quantization
|
|
||||||
|
|
||||||
Quantization helps reduce the memory requirements of very large models by storing model weights in a lower precision data type. However, quantization may have varying impact on video quality depending on the video model.
|
|
||||||
|
|
||||||
Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`AllegroPipeline`] for inference with bitsandbytes.
|
|
||||||
|
|
||||||
```py
|
|
||||||
import torch
|
|
||||||
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, AllegroTransformer3DModel, AllegroPipeline
|
|
||||||
from diffusers.utils import export_to_video
|
|
||||||
from transformers import BitsAndBytesConfig as BitsAndBytesConfig, T5EncoderModel
|
|
||||||
|
|
||||||
quant_config = BitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
text_encoder_8bit = T5EncoderModel.from_pretrained(
|
|
||||||
"rhymes-ai/Allegro",
|
|
||||||
subfolder="text_encoder",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
transformer_8bit = AllegroTransformer3DModel.from_pretrained(
|
|
||||||
"rhymes-ai/Allegro",
|
|
||||||
subfolder="transformer",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
pipeline = AllegroPipeline.from_pretrained(
|
|
||||||
"rhymes-ai/Allegro",
|
|
||||||
text_encoder=text_encoder_8bit,
|
|
||||||
transformer=transformer_8bit,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
device_map="balanced",
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt = (
|
|
||||||
"A seaside harbor with bright sunlight and sparkling seawater, with many boats in the water. From an aerial view, "
|
|
||||||
"the boats vary in size and color, some moving and some stationary. Fishing boats in the water suggest that this "
|
|
||||||
"location might be a popular spot for docking fishing boats."
|
|
||||||
)
|
|
||||||
video = pipeline(prompt, guidance_scale=7.5, max_sequence_length=512).frames[0]
|
|
||||||
export_to_video(video, "harbor.mp4", fps=15)
|
|
||||||
```
|
|
||||||
|
|
||||||
## AllegroPipeline
|
|
||||||
|
|
||||||
[[autodoc]] AllegroPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## AllegroPipelineOutput
|
|
||||||
|
|
||||||
[[autodoc]] pipelines.allegro.pipeline_output.AllegroPipelineOutput
|
|
||||||
@@ -29,7 +29,6 @@ The abstract of the paper is the following:
|
|||||||
| [AnimateDiffSparseControlNetPipeline](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py) | *Controlled Video-to-Video Generation with AnimateDiff using SparseCtrl* |
|
| [AnimateDiffSparseControlNetPipeline](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py) | *Controlled Video-to-Video Generation with AnimateDiff using SparseCtrl* |
|
||||||
| [AnimateDiffSDXLPipeline](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py) | *Video-to-Video Generation with AnimateDiff* |
|
| [AnimateDiffSDXLPipeline](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py) | *Video-to-Video Generation with AnimateDiff* |
|
||||||
| [AnimateDiffVideoToVideoPipeline](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py) | *Video-to-Video Generation with AnimateDiff* |
|
| [AnimateDiffVideoToVideoPipeline](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py) | *Video-to-Video Generation with AnimateDiff* |
|
||||||
| [AnimateDiffVideoToVideoControlNetPipeline](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py) | *Video-to-Video Generation with AnimateDiff using ControlNet* |
|
|
||||||
|
|
||||||
## Available checkpoints
|
## Available checkpoints
|
||||||
|
|
||||||
@@ -519,97 +518,6 @@ Here are some sample outputs:
|
|||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### AnimateDiffVideoToVideoControlNetPipeline
|
|
||||||
|
|
||||||
AnimateDiff can be used together with ControlNets to enhance video-to-video generation by allowing for precise control over the output. ControlNet was introduced in [Adding Conditional Control to Text-to-Image Diffusion Models](https://huggingface.co/papers/2302.05543) by Lvmin Zhang, Anyi Rao, and Maneesh Agrawala, and allows you to condition Stable Diffusion with an additional control image to ensure that the spatial information is preserved throughout the video.
|
|
||||||
|
|
||||||
This pipeline allows you to condition your generation both on the original video and on a sequence of control images.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from PIL import Image
|
|
||||||
from tqdm.auto import tqdm
|
|
||||||
|
|
||||||
from controlnet_aux.processor import OpenposeDetector
|
|
||||||
from diffusers import AnimateDiffVideoToVideoControlNetPipeline
|
|
||||||
from diffusers.utils import export_to_gif, load_video
|
|
||||||
from diffusers import AutoencoderKL, ControlNetModel, MotionAdapter, LCMScheduler
|
|
||||||
|
|
||||||
# Load the ControlNet
|
|
||||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-openpose", torch_dtype=torch.float16)
|
|
||||||
# Load the motion adapter
|
|
||||||
motion_adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM")
|
|
||||||
# Load SD 1.5 based finetuned model
|
|
||||||
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
|
|
||||||
pipe = AnimateDiffVideoToVideoControlNetPipeline.from_pretrained(
|
|
||||||
"SG161222/Realistic_Vision_V5.1_noVAE",
|
|
||||||
motion_adapter=motion_adapter,
|
|
||||||
controlnet=controlnet,
|
|
||||||
vae=vae,
|
|
||||||
).to(device="cuda", dtype=torch.float16)
|
|
||||||
|
|
||||||
# Enable LCM to speed up inference
|
|
||||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
|
|
||||||
pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora")
|
|
||||||
pipe.set_adapters(["lcm-lora"], [0.8])
|
|
||||||
|
|
||||||
video = load_video("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/dance.gif")
|
|
||||||
video = [frame.convert("RGB") for frame in video]
|
|
||||||
|
|
||||||
prompt = "astronaut in space, dancing"
|
|
||||||
negative_prompt = "bad quality, worst quality, jpeg artifacts, ugly"
|
|
||||||
|
|
||||||
# Create controlnet preprocessor
|
|
||||||
open_pose = OpenposeDetector.from_pretrained("lllyasviel/Annotators").to("cuda")
|
|
||||||
|
|
||||||
# Preprocess controlnet images
|
|
||||||
conditioning_frames = []
|
|
||||||
for frame in tqdm(video):
|
|
||||||
conditioning_frames.append(open_pose(frame))
|
|
||||||
|
|
||||||
strength = 0.8
|
|
||||||
with torch.inference_mode():
|
|
||||||
video = pipe(
|
|
||||||
video=video,
|
|
||||||
prompt=prompt,
|
|
||||||
negative_prompt=negative_prompt,
|
|
||||||
num_inference_steps=10,
|
|
||||||
guidance_scale=2.0,
|
|
||||||
controlnet_conditioning_scale=0.75,
|
|
||||||
conditioning_frames=conditioning_frames,
|
|
||||||
strength=strength,
|
|
||||||
generator=torch.Generator().manual_seed(42),
|
|
||||||
).frames[0]
|
|
||||||
|
|
||||||
video = [frame.resize(conditioning_frames[0].size) for frame in video]
|
|
||||||
export_to_gif(video, f"animatediff_vid2vid_controlnet.gif", fps=8)
|
|
||||||
```
|
|
||||||
|
|
||||||
Here are some sample outputs:
|
|
||||||
|
|
||||||
<table align="center">
|
|
||||||
<tr>
|
|
||||||
<th align="center">Source Video</th>
|
|
||||||
<th align="center">Output Video</th>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td align="center">
|
|
||||||
anime girl, dancing
|
|
||||||
<br />
|
|
||||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/dance.gif" alt="anime girl, dancing" />
|
|
||||||
</td>
|
|
||||||
<td align="center">
|
|
||||||
astronaut in space, dancing
|
|
||||||
<br/>
|
|
||||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff_vid2vid_controlnet.gif" alt="astronaut in space, dancing" />
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
**The lights and composition were transferred from the Source Video.**
|
|
||||||
|
|
||||||
## Using Motion LoRAs
|
## Using Motion LoRAs
|
||||||
|
|
||||||
Motion LoRAs are a collection of LoRAs that work with the `guoyww/animatediff-motion-adapter-v1-5-2` checkpoint. These LoRAs are responsible for adding specific types of motion to the animations.
|
Motion LoRAs are a collection of LoRAs that work with the `guoyww/animatediff-motion-adapter-v1-5-2` checkpoint. These LoRAs are responsible for adding specific types of motion to the animations.
|
||||||
@@ -803,7 +711,7 @@ FreeInit is not really free - the improved quality comes at the cost of extra co
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
@@ -914,89 +822,6 @@ export_to_gif(frames, "animatelcm-motion-lora.gif")
|
|||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
## Using FreeNoise
|
|
||||||
|
|
||||||
[FreeNoise: Tuning-Free Longer Video Diffusion via Noise Rescheduling](https://arxiv.org/abs/2310.15169) by Haonan Qiu, Menghan Xia, Yong Zhang, Yingqing He, Xintao Wang, Ying Shan, Ziwei Liu.
|
|
||||||
|
|
||||||
FreeNoise is a sampling mechanism that can generate longer videos with short-video generation models by employing noise-rescheduling, temporal attention over sliding windows, and weighted averaging of latent frames. It also can be used with multiple prompts to allow for interpolated video generations. More details are available in the paper.
|
|
||||||
|
|
||||||
The currently supported AnimateDiff pipelines that can be used with FreeNoise are:
|
|
||||||
- [`AnimateDiffPipeline`]
|
|
||||||
- [`AnimateDiffControlNetPipeline`]
|
|
||||||
- [`AnimateDiffVideoToVideoPipeline`]
|
|
||||||
- [`AnimateDiffVideoToVideoControlNetPipeline`]
|
|
||||||
|
|
||||||
In order to use FreeNoise, a single line needs to be added to the inference code after loading your pipelines.
|
|
||||||
|
|
||||||
```diff
|
|
||||||
+ pipe.enable_free_noise()
|
|
||||||
```
|
|
||||||
|
|
||||||
After this, either a single prompt could be used, or multiple prompts can be passed as a dictionary of integer-string pairs. The integer keys of the dictionary correspond to the frame index at which the influence of that prompt would be maximum. Each frame index should map to a single string prompt. The prompts for intermediate frame indices, that are not passed in the dictionary, are created by interpolating between the frame prompts that are passed. By default, simple linear interpolation is used. However, you can customize this behaviour with a callback to the `prompt_interpolation_callback` parameter when enabling FreeNoise.
|
|
||||||
|
|
||||||
Full example:
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from diffusers import AutoencoderKL, AnimateDiffPipeline, LCMScheduler, MotionAdapter
|
|
||||||
from diffusers.utils import export_to_video, load_image
|
|
||||||
|
|
||||||
# Load pipeline
|
|
||||||
dtype = torch.float16
|
|
||||||
motion_adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM", torch_dtype=dtype)
|
|
||||||
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=dtype)
|
|
||||||
|
|
||||||
pipe = AnimateDiffPipeline.from_pretrained("emilianJR/epiCRealism", motion_adapter=motion_adapter, vae=vae, torch_dtype=dtype)
|
|
||||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
|
|
||||||
|
|
||||||
pipe.load_lora_weights(
|
|
||||||
"wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm_lora"
|
|
||||||
)
|
|
||||||
pipe.set_adapters(["lcm_lora"], [0.8])
|
|
||||||
|
|
||||||
# Enable FreeNoise for long prompt generation
|
|
||||||
pipe.enable_free_noise(context_length=16, context_stride=4)
|
|
||||||
pipe.to("cuda")
|
|
||||||
|
|
||||||
# Can be a single prompt, or a dictionary with frame timesteps
|
|
||||||
prompt = {
|
|
||||||
0: "A caterpillar on a leaf, high quality, photorealistic",
|
|
||||||
40: "A caterpillar transforming into a cocoon, on a leaf, near flowers, photorealistic",
|
|
||||||
80: "A cocoon on a leaf, flowers in the backgrond, photorealistic",
|
|
||||||
120: "A cocoon maturing and a butterfly being born, flowers and leaves visible in the background, photorealistic",
|
|
||||||
160: "A beautiful butterfly, vibrant colors, sitting on a leaf, flowers in the background, photorealistic",
|
|
||||||
200: "A beautiful butterfly, flying away in a forest, photorealistic",
|
|
||||||
240: "A cyberpunk butterfly, neon lights, glowing",
|
|
||||||
}
|
|
||||||
negative_prompt = "bad quality, worst quality, jpeg artifacts"
|
|
||||||
|
|
||||||
# Run inference
|
|
||||||
output = pipe(
|
|
||||||
prompt=prompt,
|
|
||||||
negative_prompt=negative_prompt,
|
|
||||||
num_frames=256,
|
|
||||||
guidance_scale=2.5,
|
|
||||||
num_inference_steps=10,
|
|
||||||
generator=torch.Generator("cpu").manual_seed(0),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Save video
|
|
||||||
frames = output.frames[0]
|
|
||||||
export_to_video(frames, "output.mp4", fps=16)
|
|
||||||
```
|
|
||||||
|
|
||||||
### FreeNoise memory savings
|
|
||||||
|
|
||||||
Since FreeNoise processes multiple frames together, there are parts in the modeling where the memory required exceeds that available on normal consumer GPUs. The main memory bottlenecks that we identified are spatial and temporal attention blocks, upsampling and downsampling blocks, resnet blocks and feed-forward layers. Since most of these blocks operate effectively only on the channel/embedding dimension, one can perform chunked inference across the batch dimensions. The batch dimension in AnimateDiff are either spatial (`[B x F, H x W, C]`) or temporal (`B x H x W, F, C`) in nature (note that it may seem counter-intuitive, but the batch dimension here are correct, because spatial blocks process across the `B x F` dimension while the temporal blocks process across the `B x H x W` dimension). We introduce a `SplitInferenceModule` that makes it easier to chunk across any dimension and perform inference. This saves a lot of memory but comes at the cost of requiring more time for inference.
|
|
||||||
|
|
||||||
```diff
|
|
||||||
# Load pipeline and adapters
|
|
||||||
# ...
|
|
||||||
+ pipe.enable_free_noise_split_inference()
|
|
||||||
+ pipe.unet.enable_forward_chunking(16)
|
|
||||||
```
|
|
||||||
|
|
||||||
The call to `pipe.enable_free_noise_split_inference` method accepts two parameters: `spatial_split_size` (defaults to `256`) and `temporal_split_size` (defaults to `16`). These can be configured based on how much VRAM you have available. A lower split size results in lower memory usage but slower inference, whereas a larger split size results in faster inference at the cost of more memory.
|
|
||||||
|
|
||||||
## Using `from_single_file` with the MotionAdapter
|
## Using `from_single_file` with the MotionAdapter
|
||||||
|
|
||||||
@@ -1041,12 +866,6 @@ pipe = AnimateDiffPipeline.from_pretrained("emilianJR/epiCRealism", motion_adapt
|
|||||||
- all
|
- all
|
||||||
- __call__
|
- __call__
|
||||||
|
|
||||||
## AnimateDiffVideoToVideoControlNetPipeline
|
|
||||||
|
|
||||||
[[autodoc]] AnimateDiffVideoToVideoControlNetPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## AnimateDiffPipelineOutput
|
## AnimateDiffPipelineOutput
|
||||||
|
|
||||||
[[autodoc]] pipelines.animatediff.AnimateDiffPipelineOutput
|
[[autodoc]] pipelines.animatediff.AnimateDiffPipelineOutput
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ You can find additional information about Attend-and-Excite on the [project page
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ During inference:
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ The following example demonstrates how to construct good music and speech genera
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ specific language governing permissions and limitations under the License.
|
|||||||
|
|
||||||
# AuraFlow
|
# AuraFlow
|
||||||
|
|
||||||
AuraFlow is inspired by [Stable Diffusion 3](../pipelines/stable_diffusion/stable_diffusion_3) and is by far the largest text-to-image generation model that comes with an Apache 2.0 license. This model achieves state-of-the-art results on the [GenEval](https://github.com/djghosh13/geneval) benchmark.
|
AuraFlow is inspired by [Stable Diffusion 3](../pipelines/stable_diffusion/stable_diffusion_3.md) and is by far the largest text-to-image generation model that comes with an Apache 2.0 license. This model achieves state-of-the-art results on the [GenEval](https://github.com/djghosh13/geneval) benchmark.
|
||||||
|
|
||||||
It was developed by the Fal team and more details about it can be found in [this blog post](https://blog.fal.ai/auraflow/).
|
It was developed by the Fal team and more details about it can be found in [this blog post](https://blog.fal.ai/auraflow/).
|
||||||
|
|
||||||
@@ -22,73 +22,6 @@ AuraFlow can be quite expensive to run on consumer hardware devices. However, yo
|
|||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
## Quantization
|
|
||||||
|
|
||||||
Quantization helps reduce the memory requirements of very large models by storing model weights in a lower precision data type. However, quantization may have varying impact on video quality depending on the video model.
|
|
||||||
|
|
||||||
Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`AuraFlowPipeline`] for inference with bitsandbytes.
|
|
||||||
|
|
||||||
```py
|
|
||||||
import torch
|
|
||||||
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, AuraFlowTransformer2DModel, AuraFlowPipeline
|
|
||||||
from transformers import BitsAndBytesConfig as BitsAndBytesConfig, T5EncoderModel
|
|
||||||
|
|
||||||
quant_config = BitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
text_encoder_8bit = T5EncoderModel.from_pretrained(
|
|
||||||
"fal/AuraFlow",
|
|
||||||
subfolder="text_encoder",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
transformer_8bit = AuraFlowTransformer2DModel.from_pretrained(
|
|
||||||
"fal/AuraFlow",
|
|
||||||
subfolder="transformer",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
pipeline = AuraFlowPipeline.from_pretrained(
|
|
||||||
"fal/AuraFlow",
|
|
||||||
text_encoder=text_encoder_8bit,
|
|
||||||
transformer=transformer_8bit,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
device_map="balanced",
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt = "a tiny astronaut hatching from an egg on the moon"
|
|
||||||
image = pipeline(prompt).images[0]
|
|
||||||
image.save("auraflow.png")
|
|
||||||
```
|
|
||||||
|
|
||||||
Loading [GGUF checkpoints](https://huggingface.co/docs/diffusers/quantization/gguf) are also supported:
|
|
||||||
|
|
||||||
```py
|
|
||||||
import torch
|
|
||||||
from diffusers import (
|
|
||||||
AuraFlowPipeline,
|
|
||||||
GGUFQuantizationConfig,
|
|
||||||
AuraFlowTransformer2DModel,
|
|
||||||
)
|
|
||||||
|
|
||||||
transformer = AuraFlowTransformer2DModel.from_single_file(
|
|
||||||
"https://huggingface.co/city96/AuraFlow-v0.3-gguf/blob/main/aura_flow_0.3-Q2_K.gguf",
|
|
||||||
quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
|
|
||||||
torch_dtype=torch.bfloat16,
|
|
||||||
)
|
|
||||||
|
|
||||||
pipeline = AuraFlowPipeline.from_pretrained(
|
|
||||||
"fal/AuraFlow-v0.3",
|
|
||||||
transformer=transformer,
|
|
||||||
torch_dtype=torch.bfloat16,
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt = "a cute pony in a field of flowers"
|
|
||||||
image = pipeline(prompt).images[0]
|
|
||||||
image.save("auraflow.png")
|
|
||||||
```
|
|
||||||
|
|
||||||
## AuraFlowPipeline
|
## AuraFlowPipeline
|
||||||
|
|
||||||
[[autodoc]] AuraFlowPipeline
|
[[autodoc]] AuraFlowPipeline
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ The original codebase can be found at [salesforce/LAVIS](https://github.com/sale
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -23,38 +23,15 @@ The abstract from the paper is:
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers.md) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading.md#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
This pipeline was contributed by [zRzRzRzRzRzRzR](https://github.com/zRzRzRzRzRzRzR). The original codebase can be found [here](https://huggingface.co/THUDM). The original weights can be found under [hf.co/THUDM](https://huggingface.co/THUDM).
|
This pipeline was contributed by [zRzRzRzRzRzRzR](https://github.com/zRzRzRzRzRzRzR). The original codebase can be found [here](https://huggingface.co/THUDM). The original weights can be found under [hf.co/THUDM](https://huggingface.co/THUDM).
|
||||||
|
|
||||||
There are three official CogVideoX checkpoints for text-to-video and video-to-video.
|
There are two models available that can be used with the CogVideoX pipeline:
|
||||||
|
- [`THUDM/CogVideoX-2b`](https://huggingface.co/THUDM/CogVideoX-2b)
|
||||||
| checkpoints | recommended inference dtype |
|
- [`THUDM/CogVideoX-5b`](https://huggingface.co/THUDM/CogVideoX-5b)
|
||||||
|:---:|:---:|
|
|
||||||
| [`THUDM/CogVideoX-2b`](https://huggingface.co/THUDM/CogVideoX-2b) | torch.float16 |
|
|
||||||
| [`THUDM/CogVideoX-5b`](https://huggingface.co/THUDM/CogVideoX-5b) | torch.bfloat16 |
|
|
||||||
| [`THUDM/CogVideoX1.5-5b`](https://huggingface.co/THUDM/CogVideoX1.5-5b) | torch.bfloat16 |
|
|
||||||
|
|
||||||
There are two official CogVideoX checkpoints available for image-to-video.
|
|
||||||
|
|
||||||
| checkpoints | recommended inference dtype |
|
|
||||||
|:---:|:---:|
|
|
||||||
| [`THUDM/CogVideoX-5b-I2V`](https://huggingface.co/THUDM/CogVideoX-5b-I2V) | torch.bfloat16 |
|
|
||||||
| [`THUDM/CogVideoX-1.5-5b-I2V`](https://huggingface.co/THUDM/CogVideoX-1.5-5b-I2V) | torch.bfloat16 |
|
|
||||||
|
|
||||||
For the CogVideoX 1.5 series:
|
|
||||||
- Text-to-video (T2V) works best at a resolution of 1360x768 because it was trained with that specific resolution.
|
|
||||||
- Image-to-video (I2V) works for multiple resolutions. The width can vary from 768 to 1360, but the height must be 768. The height/width must be divisible by 16.
|
|
||||||
- Both T2V and I2V models support generation with 81 and 161 frames and work best at this value. Exporting videos at 16 FPS is recommended.
|
|
||||||
|
|
||||||
There are two official CogVideoX checkpoints that support pose controllable generation (by the [Alibaba-PAI](https://huggingface.co/alibaba-pai) team).
|
|
||||||
|
|
||||||
| checkpoints | recommended inference dtype |
|
|
||||||
|:---:|:---:|
|
|
||||||
| [`alibaba-pai/CogVideoX-Fun-V1.1-2b-Pose`](https://huggingface.co/alibaba-pai/CogVideoX-Fun-V1.1-2b-Pose) | torch.bfloat16 |
|
|
||||||
| [`alibaba-pai/CogVideoX-Fun-V1.1-5b-Pose`](https://huggingface.co/alibaba-pai/CogVideoX-Fun-V1.1-5b-Pose) | torch.bfloat16 |
|
|
||||||
|
|
||||||
## Inference
|
## Inference
|
||||||
|
|
||||||
@@ -64,15 +41,10 @@ First, load the pipeline:
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
import torch
|
import torch
|
||||||
from diffusers import CogVideoXPipeline, CogVideoXImageToVideoPipeline
|
from diffusers import CogVideoXPipeline
|
||||||
from diffusers.utils import export_to_video,load_image
|
from diffusers.utils import export_to_video
|
||||||
pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b").to("cuda") # or "THUDM/CogVideoX-2b"
|
|
||||||
```
|
|
||||||
|
|
||||||
If you are using the image-to-video pipeline, load it as follows:
|
pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-2b").to("cuda")
|
||||||
|
|
||||||
```python
|
|
||||||
pipe = CogVideoXImageToVideoPipeline.from_pretrained("THUDM/CogVideoX-5b-I2V").to("cuda")
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Then change the memory layout of the pipelines `transformer` component to `torch.channels_last`:
|
Then change the memory layout of the pipelines `transformer` component to `torch.channels_last`:
|
||||||
@@ -81,7 +53,7 @@ Then change the memory layout of the pipelines `transformer` component to `torch
|
|||||||
pipe.transformer.to(memory_format=torch.channels_last)
|
pipe.transformer.to(memory_format=torch.channels_last)
|
||||||
```
|
```
|
||||||
|
|
||||||
Compile the components and run inference:
|
Finally, compile the components and run inference:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
pipe.transformer = torch.compile(pipeline.transformer, mode="max-autotune", fullgraph=True)
|
pipe.transformer = torch.compile(pipeline.transformer, mode="max-autotune", fullgraph=True)
|
||||||
@@ -91,7 +63,7 @@ prompt = "A panda, dressed in a small, red jacket and a tiny hat, sits on a wood
|
|||||||
video = pipe(prompt=prompt, guidance_scale=6, num_inference_steps=50).frames[0]
|
video = pipe(prompt=prompt, guidance_scale=6, num_inference_steps=50).frames[0]
|
||||||
```
|
```
|
||||||
|
|
||||||
The [T2V benchmark](https://gist.github.com/a-r-r-o-w/5183d75e452a368fd17448fcc810bd3f) results on an 80GB A100 machine are:
|
The [benchmark](https://gist.github.com/a-r-r-o-w/5183d75e452a368fd17448fcc810bd3f) results on an 80GB A100 machine are:
|
||||||
|
|
||||||
```
|
```
|
||||||
Without torch.compile(): Average inference time: 96.89 seconds.
|
Without torch.compile(): Average inference time: 96.89 seconds.
|
||||||
@@ -105,78 +77,16 @@ CogVideoX-2b requires about 19 GB of GPU memory to decode 49 frames (6 seconds o
|
|||||||
- `pipe.enable_model_cpu_offload()`:
|
- `pipe.enable_model_cpu_offload()`:
|
||||||
- Without enabling cpu offloading, memory usage is `33 GB`
|
- Without enabling cpu offloading, memory usage is `33 GB`
|
||||||
- With enabling cpu offloading, memory usage is `19 GB`
|
- With enabling cpu offloading, memory usage is `19 GB`
|
||||||
- `pipe.enable_sequential_cpu_offload()`:
|
|
||||||
- Similar to `enable_model_cpu_offload` but can significantly reduce memory usage at the cost of slow inference
|
|
||||||
- When enabled, memory usage is under `4 GB`
|
|
||||||
- `pipe.vae.enable_tiling()`:
|
- `pipe.vae.enable_tiling()`:
|
||||||
- With enabling cpu offloading and tiling, memory usage is `11 GB`
|
- With enabling cpu offloading and tiling, memory usage is `11 GB`
|
||||||
- `pipe.vae.enable_slicing()`
|
- `pipe.vae.enable_slicing()`
|
||||||
|
|
||||||
## Quantization
|
|
||||||
|
|
||||||
Quantization helps reduce the memory requirements of very large models by storing model weights in a lower precision data type. However, quantization may have varying impact on video quality depending on the video model.
|
|
||||||
|
|
||||||
Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`CogVideoXPipeline`] for inference with bitsandbytes.
|
|
||||||
|
|
||||||
```py
|
|
||||||
import torch
|
|
||||||
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, CogVideoXTransformer3DModel, CogVideoXPipeline
|
|
||||||
from diffusers.utils import export_to_video
|
|
||||||
from transformers import BitsAndBytesConfig as BitsAndBytesConfig, T5EncoderModel
|
|
||||||
|
|
||||||
quant_config = BitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
text_encoder_8bit = T5EncoderModel.from_pretrained(
|
|
||||||
"THUDM/CogVideoX-2b",
|
|
||||||
subfolder="text_encoder",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
transformer_8bit = CogVideoXTransformer3DModel.from_pretrained(
|
|
||||||
"THUDM/CogVideoX-2b",
|
|
||||||
subfolder="transformer",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
pipeline = CogVideoXPipeline.from_pretrained(
|
|
||||||
"THUDM/CogVideoX-2b",
|
|
||||||
text_encoder=text_encoder_8bit,
|
|
||||||
transformer=transformer_8bit,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
device_map="balanced",
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt = "A detailed wooden toy ship with intricately carved masts and sails is seen gliding smoothly over a plush, blue carpet that mimics the waves of the sea. The ship's hull is painted a rich brown, with tiny windows. The carpet, soft and textured, provides a perfect backdrop, resembling an oceanic expanse. Surrounding the ship are various other toys and children's items, hinting at a playful environment. The scene captures the innocence and imagination of childhood, with the toy ship's journey symbolizing endless adventures in a whimsical, indoor setting."
|
|
||||||
video = pipeline(prompt=prompt, guidance_scale=6, num_inference_steps=50).frames[0]
|
|
||||||
export_to_video(video, "ship.mp4", fps=8)
|
|
||||||
```
|
|
||||||
|
|
||||||
## CogVideoXPipeline
|
## CogVideoXPipeline
|
||||||
|
|
||||||
[[autodoc]] CogVideoXPipeline
|
[[autodoc]] CogVideoXPipeline
|
||||||
- all
|
- all
|
||||||
- __call__
|
- __call__
|
||||||
|
|
||||||
## CogVideoXImageToVideoPipeline
|
|
||||||
|
|
||||||
[[autodoc]] CogVideoXImageToVideoPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## CogVideoXVideoToVideoPipeline
|
|
||||||
|
|
||||||
[[autodoc]] CogVideoXVideoToVideoPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## CogVideoXFunControlPipeline
|
|
||||||
|
|
||||||
[[autodoc]] CogVideoXFunControlPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## CogVideoXPipelineOutput
|
## CogVideoXPipelineOutput
|
||||||
|
|
||||||
[[autodoc]] pipelines.cogvideo.pipeline_output.CogVideoXPipelineOutput
|
[[autodoc]] pipelines.cogvideo.pipeline_cogvideox.CogVideoXPipelineOutput
|
||||||
|
|||||||
@@ -1,40 +0,0 @@
|
|||||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
# CogView3Plus
|
|
||||||
|
|
||||||
[CogView3: Finer and Faster Text-to-Image Generation via Relay Diffusion](https://huggingface.co/papers/2403.05121) from Tsinghua University & ZhipuAI, by Wendi Zheng, Jiayan Teng, Zhuoyi Yang, Weihan Wang, Jidong Chen, Xiaotao Gu, Yuxiao Dong, Ming Ding, Jie Tang.
|
|
||||||
|
|
||||||
The abstract from the paper is:
|
|
||||||
|
|
||||||
*Recent advancements in text-to-image generative systems have been largely driven by diffusion models. However, single-stage text-to-image diffusion models still face challenges, in terms of computational efficiency and the refinement of image details. To tackle the issue, we propose CogView3, an innovative cascaded framework that enhances the performance of text-to-image diffusion. CogView3 is the first model implementing relay diffusion in the realm of text-to-image generation, executing the task by first creating low-resolution images and subsequently applying relay-based super-resolution. This methodology not only results in competitive text-to-image outputs but also greatly reduces both training and inference costs. Our experimental results demonstrate that CogView3 outperforms SDXL, the current state-of-the-art open-source text-to-image diffusion model, by 77.0% in human evaluations, all while requiring only about 1/2 of the inference time. The distilled variant of CogView3 achieves comparable performance while only utilizing 1/10 of the inference time by SDXL.*
|
|
||||||
|
|
||||||
<Tip>
|
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
|
||||||
|
|
||||||
</Tip>
|
|
||||||
|
|
||||||
This pipeline was contributed by [zRzRzRzRzRzRzR](https://github.com/zRzRzRzRzRzRzR). The original codebase can be found [here](https://huggingface.co/THUDM). The original weights can be found under [hf.co/THUDM](https://huggingface.co/THUDM).
|
|
||||||
|
|
||||||
## CogView3PlusPipeline
|
|
||||||
|
|
||||||
[[autodoc]] CogView3PlusPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## CogView3PipelineOutput
|
|
||||||
|
|
||||||
[[autodoc]] pipelines.cogview3.pipeline_output.CogView3PipelineOutput
|
|
||||||
@@ -1,89 +0,0 @@
|
|||||||
<!--Copyright 2024 The HuggingFace Team, The Black Forest Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
# FluxControlInpaint
|
|
||||||
|
|
||||||
FluxControlInpaintPipeline is an implementation of Inpainting for Flux.1 Depth/Canny models. It is a pipeline that allows you to inpaint images using the Flux.1 Depth/Canny models. The pipeline takes an image and a mask as input and returns the inpainted image.
|
|
||||||
|
|
||||||
FLUX.1 Depth and Canny [dev] is a 12 billion parameter rectified flow transformer capable of generating an image based on a text description while following the structure of a given input image. **This is not a ControlNet model**.
|
|
||||||
|
|
||||||
| Control type | Developer | Link |
|
|
||||||
| -------- | ---------- | ---- |
|
|
||||||
| Depth | [Black Forest Labs](https://huggingface.co/black-forest-labs) | [Link](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev) |
|
|
||||||
| Canny | [Black Forest Labs](https://huggingface.co/black-forest-labs) | [Link](https://huggingface.co/black-forest-labs/FLUX.1-Canny-dev) |
|
|
||||||
|
|
||||||
|
|
||||||
<Tip>
|
|
||||||
|
|
||||||
Flux can be quite expensive to run on consumer hardware devices. However, you can perform a suite of optimizations to run it faster and in a more memory-friendly manner. Check out [this section](https://huggingface.co/blog/sd3#memory-optimizations-for-sd3) for more details. Additionally, Flux can benefit from quantization for memory efficiency with a trade-off in inference latency. Refer to [this blog post](https://huggingface.co/blog/quanto-diffusers) to learn more. For an exhaustive list of resources, check out [this gist](https://gist.github.com/sayakpaul/b664605caf0aa3bf8585ab109dd5ac9c).
|
|
||||||
|
|
||||||
</Tip>
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from diffusers import FluxControlInpaintPipeline
|
|
||||||
from diffusers.models.transformers import FluxTransformer2DModel
|
|
||||||
from transformers import T5EncoderModel
|
|
||||||
from diffusers.utils import load_image, make_image_grid
|
|
||||||
from image_gen_aux import DepthPreprocessor # https://github.com/huggingface/image_gen_aux
|
|
||||||
from PIL import Image
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
pipe = FluxControlInpaintPipeline.from_pretrained(
|
|
||||||
"black-forest-labs/FLUX.1-Depth-dev",
|
|
||||||
torch_dtype=torch.bfloat16,
|
|
||||||
)
|
|
||||||
# use following lines if you have GPU constraints
|
|
||||||
# ---------------------------------------------------------------
|
|
||||||
transformer = FluxTransformer2DModel.from_pretrained(
|
|
||||||
"sayakpaul/FLUX.1-Depth-dev-nf4", subfolder="transformer", torch_dtype=torch.bfloat16
|
|
||||||
)
|
|
||||||
text_encoder_2 = T5EncoderModel.from_pretrained(
|
|
||||||
"sayakpaul/FLUX.1-Depth-dev-nf4", subfolder="text_encoder_2", torch_dtype=torch.bfloat16
|
|
||||||
)
|
|
||||||
pipe.transformer = transformer
|
|
||||||
pipe.text_encoder_2 = text_encoder_2
|
|
||||||
pipe.enable_model_cpu_offload()
|
|
||||||
# ---------------------------------------------------------------
|
|
||||||
pipe.to("cuda")
|
|
||||||
|
|
||||||
prompt = "a blue robot singing opera with human-like expressions"
|
|
||||||
image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
|
|
||||||
|
|
||||||
head_mask = np.zeros_like(image)
|
|
||||||
head_mask[65:580,300:642] = 255
|
|
||||||
mask_image = Image.fromarray(head_mask)
|
|
||||||
|
|
||||||
processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
|
|
||||||
control_image = processor(image)[0].convert("RGB")
|
|
||||||
|
|
||||||
output = pipe(
|
|
||||||
prompt=prompt,
|
|
||||||
image=image,
|
|
||||||
control_image=control_image,
|
|
||||||
mask_image=mask_image,
|
|
||||||
num_inference_steps=30,
|
|
||||||
strength=0.9,
|
|
||||||
guidance_scale=10.0,
|
|
||||||
generator=torch.Generator().manual_seed(42),
|
|
||||||
).images[0]
|
|
||||||
make_image_grid([image, control_image, mask_image, output.resize(image.size)], rows=1, cols=4).save("output.png")
|
|
||||||
```
|
|
||||||
|
|
||||||
## FluxControlInpaintPipeline
|
|
||||||
[[autodoc]] FluxControlInpaintPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
|
|
||||||
## FluxPipelineOutput
|
|
||||||
[[autodoc]] pipelines.flux.pipeline_output.FluxPipelineOutput
|
|
||||||
@@ -26,7 +26,7 @@ The original codebase can be found at [lllyasviel/ControlNet](https://github.com
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
<!--Copyright 2024 The HuggingFace Team, The InstantX Team, and the XLabs Team. All rights reserved.
|
<!--Copyright 2024 The HuggingFace Team and The InstantX Team. All rights reserved.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||||
the License. You may obtain a copy of the License at
|
the License. You may obtain a copy of the License at
|
||||||
@@ -31,18 +31,10 @@ This controlnet code is implemented by [The InstantX Team](https://huggingface.c
|
|||||||
| Depth | [The InstantX Team](https://huggingface.co/InstantX) | [Link](https://huggingface.co/Shakker-Labs/FLUX.1-dev-ControlNet-Depth) |
|
| Depth | [The InstantX Team](https://huggingface.co/InstantX) | [Link](https://huggingface.co/Shakker-Labs/FLUX.1-dev-ControlNet-Depth) |
|
||||||
| Union | [The InstantX Team](https://huggingface.co/InstantX) | [Link](https://huggingface.co/InstantX/FLUX.1-dev-Controlnet-Union) |
|
| Union | [The InstantX Team](https://huggingface.co/InstantX) | [Link](https://huggingface.co/InstantX/FLUX.1-dev-Controlnet-Union) |
|
||||||
|
|
||||||
XLabs ControlNets are also supported, which was contributed by the [XLabs team](https://huggingface.co/XLabs-AI).
|
|
||||||
|
|
||||||
| ControlNet type | Developer | Link |
|
|
||||||
| -------- | ---------- | ---- |
|
|
||||||
| Canny | [The XLabs Team](https://huggingface.co/XLabs-AI) | [Link](https://huggingface.co/XLabs-AI/flux-controlnet-canny-diffusers) |
|
|
||||||
| Depth | [The XLabs Team](https://huggingface.co/XLabs-AI) | [Link](https://huggingface.co/XLabs-AI/flux-controlnet-depth-diffusers) |
|
|
||||||
| HED | [The XLabs Team](https://huggingface.co/XLabs-AI) | [Link](https://huggingface.co/XLabs-AI/flux-controlnet-hed-diffusers) |
|
|
||||||
|
|
||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ This code is implemented by Tencent Hunyuan Team. You can find pre-trained check
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -28,7 +28,6 @@ This controlnet code is mainly implemented by [The InstantX Team](https://huggin
|
|||||||
| ControlNet type | Developer | Link |
|
| ControlNet type | Developer | Link |
|
||||||
| -------- | ---------- | ---- |
|
| -------- | ---------- | ---- |
|
||||||
| Canny | [The InstantX Team](https://huggingface.co/InstantX) | [Link](https://huggingface.co/InstantX/SD3-Controlnet-Canny) |
|
| Canny | [The InstantX Team](https://huggingface.co/InstantX) | [Link](https://huggingface.co/InstantX/SD3-Controlnet-Canny) |
|
||||||
| Depth | [The InstantX Team](https://huggingface.co/InstantX) | [Link](https://huggingface.co/InstantX/SD3-Controlnet-Depth) |
|
|
||||||
| Pose | [The InstantX Team](https://huggingface.co/InstantX) | [Link](https://huggingface.co/InstantX/SD3-Controlnet-Pose) |
|
| Pose | [The InstantX Team](https://huggingface.co/InstantX) | [Link](https://huggingface.co/InstantX/SD3-Controlnet-Pose) |
|
||||||
| Tile | [The InstantX Team](https://huggingface.co/InstantX) | [Link](https://huggingface.co/InstantX/SD3-Controlnet-Tile) |
|
| Tile | [The InstantX Team](https://huggingface.co/InstantX) | [Link](https://huggingface.co/InstantX/SD3-Controlnet-Tile) |
|
||||||
| Inpainting | [The AlimamaCreative Team](https://huggingface.co/alimama-creative) | [link](https://huggingface.co/alimama-creative/SD3-Controlnet-Inpainting) |
|
| Inpainting | [The AlimamaCreative Team](https://huggingface.co/alimama-creative) | [link](https://huggingface.co/alimama-creative/SD3-Controlnet-Inpainting) |
|
||||||
@@ -36,7 +35,7 @@ This controlnet code is mainly implemented by [The InstantX Team](https://huggin
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ If you don't see a checkpoint you're interested in, you can train your own SDXL
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -1,35 +0,0 @@
|
|||||||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
|
||||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
# ControlNetUnion
|
|
||||||
|
|
||||||
ControlNetUnionModel is an implementation of ControlNet for Stable Diffusion XL.
|
|
||||||
|
|
||||||
The ControlNet model was introduced in [ControlNetPlus](https://github.com/xinsir6/ControlNetPlus) by xinsir6. It supports multiple conditioning inputs without increasing computation.
|
|
||||||
|
|
||||||
*We design a new architecture that can support 10+ control types in condition text-to-image generation and can generate high resolution images visually comparable with midjourney. The network is based on the original ControlNet architecture, we propose two new modules to: 1 Extend the original ControlNet to support different image conditions using the same network parameter. 2 Support multiple conditions input without increasing computation offload, which is especially important for designers who want to edit image in detail, different conditions use the same condition encoder, without adding extra computations or parameters.*
|
|
||||||
|
|
||||||
|
|
||||||
## StableDiffusionXLControlNetUnionPipeline
|
|
||||||
[[autodoc]] StableDiffusionXLControlNetUnionPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## StableDiffusionXLControlNetUnionImg2ImgPipeline
|
|
||||||
[[autodoc]] StableDiffusionXLControlNetUnionImg2ImgPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## StableDiffusionXLControlNetUnionInpaintPipeline
|
|
||||||
[[autodoc]] StableDiffusionXLControlNetUnionInpaintPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
@@ -26,7 +26,7 @@ This model was contributed by [UmerHA](https://twitter.com/UmerHAdil). ❤️
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ This model was contributed by [UmerHA](https://twitter.com/UmerHAdil). ❤️
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ Dance Diffusion is the first in a suite of generative audio tools for producers
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ The original codebase can be found at [hohonathanho/diffusion](https://github.co
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ The original codebase can be found at [facebookresearch/dit](https://github.com/
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -22,20 +22,12 @@ Flux can be quite expensive to run on consumer hardware devices. However, you ca
|
|||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
Flux comes in the following variants:
|
Flux comes in two variants:
|
||||||
|
|
||||||
| model type | model id |
|
* Timestep-distilled (`black-forest-labs/FLUX.1-schnell`)
|
||||||
|:----------:|:--------:|
|
* Guidance-distilled (`black-forest-labs/FLUX.1-dev`)
|
||||||
| Timestep-distilled | [`black-forest-labs/FLUX.1-schnell`](https://huggingface.co/black-forest-labs/FLUX.1-schnell) |
|
|
||||||
| Guidance-distilled | [`black-forest-labs/FLUX.1-dev`](https://huggingface.co/black-forest-labs/FLUX.1-dev) |
|
|
||||||
| Fill Inpainting/Outpainting (Guidance-distilled) | [`black-forest-labs/FLUX.1-Fill-dev`](https://huggingface.co/black-forest-labs/FLUX.1-Fill-dev) |
|
|
||||||
| Canny Control (Guidance-distilled) | [`black-forest-labs/FLUX.1-Canny-dev`](https://huggingface.co/black-forest-labs/FLUX.1-Canny-dev) |
|
|
||||||
| Depth Control (Guidance-distilled) | [`black-forest-labs/FLUX.1-Depth-dev`](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev) |
|
|
||||||
| Canny Control (LoRA) | [`black-forest-labs/FLUX.1-Canny-dev-lora`](https://huggingface.co/black-forest-labs/FLUX.1-Canny-dev-lora) |
|
|
||||||
| Depth Control (LoRA) | [`black-forest-labs/FLUX.1-Depth-dev-lora`](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev-lora) |
|
|
||||||
| Redux (Adapter) | [`black-forest-labs/FLUX.1-Redux-dev`](https://huggingface.co/black-forest-labs/FLUX.1-Redux-dev) |
|
|
||||||
|
|
||||||
All checkpoints have different usage which we detail below.
|
Both checkpoints have slightly difference usage which we detail below.
|
||||||
|
|
||||||
### Timestep-distilled
|
### Timestep-distilled
|
||||||
|
|
||||||
@@ -85,232 +77,7 @@ out = pipe(
|
|||||||
out.save("image.png")
|
out.save("image.png")
|
||||||
```
|
```
|
||||||
|
|
||||||
### Fill Inpainting/Outpainting
|
|
||||||
|
|
||||||
* Flux Fill pipeline does not require `strength` as an input like regular inpainting pipelines.
|
|
||||||
* It supports both inpainting and outpainting.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from diffusers import FluxFillPipeline
|
|
||||||
from diffusers.utils import load_image
|
|
||||||
|
|
||||||
image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/cup.png")
|
|
||||||
mask = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/cup_mask.png")
|
|
||||||
|
|
||||||
repo_id = "black-forest-labs/FLUX.1-Fill-dev"
|
|
||||||
pipe = FluxFillPipeline.from_pretrained(repo_id, torch_dtype=torch.bfloat16).to("cuda")
|
|
||||||
|
|
||||||
image = pipe(
|
|
||||||
prompt="a white paper cup",
|
|
||||||
image=image,
|
|
||||||
mask_image=mask,
|
|
||||||
height=1632,
|
|
||||||
width=1232,
|
|
||||||
max_sequence_length=512,
|
|
||||||
generator=torch.Generator("cpu").manual_seed(0)
|
|
||||||
).images[0]
|
|
||||||
image.save(f"output.png")
|
|
||||||
```
|
|
||||||
|
|
||||||
### Canny Control
|
|
||||||
|
|
||||||
**Note:** `black-forest-labs/Flux.1-Canny-dev` is _not_ a [`ControlNetModel`] model. ControlNet models are a separate component from the UNet/Transformer whose residuals are added to the actual underlying model. Canny Control is an alternate architecture that achieves effectively the same results as a ControlNet model would, by using channel-wise concatenation with input control condition and ensuring the transformer learns structure control by following the condition as closely as possible.
|
|
||||||
|
|
||||||
```python
|
|
||||||
# !pip install -U controlnet-aux
|
|
||||||
import torch
|
|
||||||
from controlnet_aux import CannyDetector
|
|
||||||
from diffusers import FluxControlPipeline
|
|
||||||
from diffusers.utils import load_image
|
|
||||||
|
|
||||||
pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Canny-dev", torch_dtype=torch.bfloat16).to("cuda")
|
|
||||||
|
|
||||||
prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
|
|
||||||
control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
|
|
||||||
|
|
||||||
processor = CannyDetector()
|
|
||||||
control_image = processor(control_image, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024)
|
|
||||||
|
|
||||||
image = pipe(
|
|
||||||
prompt=prompt,
|
|
||||||
control_image=control_image,
|
|
||||||
height=1024,
|
|
||||||
width=1024,
|
|
||||||
num_inference_steps=50,
|
|
||||||
guidance_scale=30.0,
|
|
||||||
).images[0]
|
|
||||||
image.save("output.png")
|
|
||||||
```
|
|
||||||
|
|
||||||
Canny Control is also possible with a LoRA variant of this condition. The usage is as follows:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# !pip install -U controlnet-aux
|
|
||||||
import torch
|
|
||||||
from controlnet_aux import CannyDetector
|
|
||||||
from diffusers import FluxControlPipeline
|
|
||||||
from diffusers.utils import load_image
|
|
||||||
|
|
||||||
pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16).to("cuda")
|
|
||||||
pipe.load_lora_weights("black-forest-labs/FLUX.1-Canny-dev-lora")
|
|
||||||
|
|
||||||
prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
|
|
||||||
control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
|
|
||||||
|
|
||||||
processor = CannyDetector()
|
|
||||||
control_image = processor(control_image, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024)
|
|
||||||
|
|
||||||
image = pipe(
|
|
||||||
prompt=prompt,
|
|
||||||
control_image=control_image,
|
|
||||||
height=1024,
|
|
||||||
width=1024,
|
|
||||||
num_inference_steps=50,
|
|
||||||
guidance_scale=30.0,
|
|
||||||
).images[0]
|
|
||||||
image.save("output.png")
|
|
||||||
```
|
|
||||||
|
|
||||||
### Depth Control
|
|
||||||
|
|
||||||
**Note:** `black-forest-labs/Flux.1-Depth-dev` is _not_ a ControlNet model. [`ControlNetModel`] models are a separate component from the UNet/Transformer whose residuals are added to the actual underlying model. Depth Control is an alternate architecture that achieves effectively the same results as a ControlNet model would, by using channel-wise concatenation with input control condition and ensuring the transformer learns structure control by following the condition as closely as possible.
|
|
||||||
|
|
||||||
```python
|
|
||||||
# !pip install git+https://github.com/huggingface/image_gen_aux
|
|
||||||
import torch
|
|
||||||
from diffusers import FluxControlPipeline, FluxTransformer2DModel
|
|
||||||
from diffusers.utils import load_image
|
|
||||||
from image_gen_aux import DepthPreprocessor
|
|
||||||
|
|
||||||
pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Depth-dev", torch_dtype=torch.bfloat16).to("cuda")
|
|
||||||
|
|
||||||
prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
|
|
||||||
control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
|
|
||||||
|
|
||||||
processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
|
|
||||||
control_image = processor(control_image)[0].convert("RGB")
|
|
||||||
|
|
||||||
image = pipe(
|
|
||||||
prompt=prompt,
|
|
||||||
control_image=control_image,
|
|
||||||
height=1024,
|
|
||||||
width=1024,
|
|
||||||
num_inference_steps=30,
|
|
||||||
guidance_scale=10.0,
|
|
||||||
generator=torch.Generator().manual_seed(42),
|
|
||||||
).images[0]
|
|
||||||
image.save("output.png")
|
|
||||||
```
|
|
||||||
|
|
||||||
Depth Control is also possible with a LoRA variant of this condition. The usage is as follows:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# !pip install git+https://github.com/huggingface/image_gen_aux
|
|
||||||
import torch
|
|
||||||
from diffusers import FluxControlPipeline, FluxTransformer2DModel
|
|
||||||
from diffusers.utils import load_image
|
|
||||||
from image_gen_aux import DepthPreprocessor
|
|
||||||
|
|
||||||
pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16).to("cuda")
|
|
||||||
pipe.load_lora_weights("black-forest-labs/FLUX.1-Depth-dev-lora")
|
|
||||||
|
|
||||||
prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
|
|
||||||
control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
|
|
||||||
|
|
||||||
processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
|
|
||||||
control_image = processor(control_image)[0].convert("RGB")
|
|
||||||
|
|
||||||
image = pipe(
|
|
||||||
prompt=prompt,
|
|
||||||
control_image=control_image,
|
|
||||||
height=1024,
|
|
||||||
width=1024,
|
|
||||||
num_inference_steps=30,
|
|
||||||
guidance_scale=10.0,
|
|
||||||
generator=torch.Generator().manual_seed(42),
|
|
||||||
).images[0]
|
|
||||||
image.save("output.png")
|
|
||||||
```
|
|
||||||
|
|
||||||
### Redux
|
|
||||||
|
|
||||||
* Flux Redux pipeline is an adapter for FLUX.1 base models. It can be used with both flux-dev and flux-schnell, for image-to-image generation.
|
|
||||||
* You can first use the `FluxPriorReduxPipeline` to get the `prompt_embeds` and `pooled_prompt_embeds`, and then feed them into the `FluxPipeline` for image-to-image generation.
|
|
||||||
* When use `FluxPriorReduxPipeline` with a base pipeline, you can set `text_encoder=None` and `text_encoder_2=None` in the base pipeline, in order to save VRAM.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from diffusers import FluxPriorReduxPipeline, FluxPipeline
|
|
||||||
from diffusers.utils import load_image
|
|
||||||
device = "cuda"
|
|
||||||
dtype = torch.bfloat16
|
|
||||||
|
|
||||||
|
|
||||||
repo_redux = "black-forest-labs/FLUX.1-Redux-dev"
|
|
||||||
repo_base = "black-forest-labs/FLUX.1-dev"
|
|
||||||
pipe_prior_redux = FluxPriorReduxPipeline.from_pretrained(repo_redux, torch_dtype=dtype).to(device)
|
|
||||||
pipe = FluxPipeline.from_pretrained(
|
|
||||||
repo_base,
|
|
||||||
text_encoder=None,
|
|
||||||
text_encoder_2=None,
|
|
||||||
torch_dtype=torch.bfloat16
|
|
||||||
).to(device)
|
|
||||||
|
|
||||||
image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/style_ziggy/img5.png")
|
|
||||||
pipe_prior_output = pipe_prior_redux(image)
|
|
||||||
images = pipe(
|
|
||||||
guidance_scale=2.5,
|
|
||||||
num_inference_steps=50,
|
|
||||||
generator=torch.Generator("cpu").manual_seed(0),
|
|
||||||
**pipe_prior_output,
|
|
||||||
).images
|
|
||||||
images[0].save("flux-redux.png")
|
|
||||||
```
|
|
||||||
|
|
||||||
## Combining Flux Turbo LoRAs with Flux Control, Fill, and Redux
|
|
||||||
|
|
||||||
We can combine Flux Turbo LoRAs with Flux Control and other pipelines like Fill and Redux to enable few-steps' inference. The example below shows how to do that for Flux Control LoRA for depth and turbo LoRA from [`ByteDance/Hyper-SD`](https://hf.co/ByteDance/Hyper-SD).
|
|
||||||
|
|
||||||
```py
|
|
||||||
from diffusers import FluxControlPipeline
|
|
||||||
from image_gen_aux import DepthPreprocessor
|
|
||||||
from diffusers.utils import load_image
|
|
||||||
from huggingface_hub import hf_hub_download
|
|
||||||
import torch
|
|
||||||
|
|
||||||
control_pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16)
|
|
||||||
control_pipe.load_lora_weights("black-forest-labs/FLUX.1-Depth-dev-lora", adapter_name="depth")
|
|
||||||
control_pipe.load_lora_weights(
|
|
||||||
hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"), adapter_name="hyper-sd"
|
|
||||||
)
|
|
||||||
control_pipe.set_adapters(["depth", "hyper-sd"], adapter_weights=[0.85, 0.125])
|
|
||||||
control_pipe.enable_model_cpu_offload()
|
|
||||||
|
|
||||||
prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
|
|
||||||
control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
|
|
||||||
|
|
||||||
processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
|
|
||||||
control_image = processor(control_image)[0].convert("RGB")
|
|
||||||
|
|
||||||
image = control_pipe(
|
|
||||||
prompt=prompt,
|
|
||||||
control_image=control_image,
|
|
||||||
height=1024,
|
|
||||||
width=1024,
|
|
||||||
num_inference_steps=8,
|
|
||||||
guidance_scale=10.0,
|
|
||||||
generator=torch.Generator().manual_seed(42),
|
|
||||||
).images[0]
|
|
||||||
image.save("output.png")
|
|
||||||
```
|
|
||||||
|
|
||||||
## Note about `unload_lora_weights()` when using Flux LoRAs
|
|
||||||
|
|
||||||
When unloading the Control LoRA weights, call `pipe.unload_lora_weights(reset_to_overwritten_params=True)` to reset the `pipe.transformer` completely back to its original form. The resultant pipeline can then be used with methods like [`DiffusionPipeline.from_pipe`]. More details about this argument are available in [this PR](https://github.com/huggingface/diffusers/pull/10397).
|
|
||||||
|
|
||||||
## Running FP16 inference
|
## Running FP16 inference
|
||||||
|
|
||||||
Flux can generate high-quality images with FP16 (i.e. to accelerate inference on Turing/Volta GPUs) but produces different outputs compared to FP32/BF16. The issue is that some activations in the text encoders have to be clipped when running in FP16, which affects the overall image. Forcing text encoders to run with FP32 inference thus removes this output difference. See [here](https://github.com/huggingface/diffusers/pull/9097#issuecomment-2272292516) for details.
|
Flux can generate high-quality images with FP16 (i.e. to accelerate inference on Turing/Volta GPUs) but produces different outputs compared to FP32/BF16. The issue is that some activations in the text encoders have to be clipped when running in FP16, which affects the overall image. Forcing text encoders to run with FP32 inference thus removes this output difference. See [here](https://github.com/huggingface/diffusers/pull/9097#issuecomment-2272292516) for details.
|
||||||
|
|
||||||
FP16 inference code:
|
FP16 inference code:
|
||||||
@@ -338,46 +105,6 @@ out = pipe(
|
|||||||
out.save("image.png")
|
out.save("image.png")
|
||||||
```
|
```
|
||||||
|
|
||||||
## Quantization
|
|
||||||
|
|
||||||
Quantization helps reduce the memory requirements of very large models by storing model weights in a lower precision data type. However, quantization may have varying impact on video quality depending on the video model.
|
|
||||||
|
|
||||||
Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`FluxPipeline`] for inference with bitsandbytes.
|
|
||||||
|
|
||||||
```py
|
|
||||||
import torch
|
|
||||||
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, FluxTransformer2DModel, FluxPipeline
|
|
||||||
from transformers import BitsAndBytesConfig as BitsAndBytesConfig, T5EncoderModel
|
|
||||||
|
|
||||||
quant_config = BitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
text_encoder_8bit = T5EncoderModel.from_pretrained(
|
|
||||||
"black-forest-labs/FLUX.1-dev",
|
|
||||||
subfolder="text_encoder_2",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
transformer_8bit = FluxTransformer2DModel.from_pretrained(
|
|
||||||
"black-forest-labs/FLUX.1-dev",
|
|
||||||
subfolder="transformer",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
pipeline = FluxPipeline.from_pretrained(
|
|
||||||
"black-forest-labs/FLUX.1-dev",
|
|
||||||
text_encoder_2=text_encoder_8bit,
|
|
||||||
transformer=transformer_8bit,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
device_map="balanced",
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt = "a tiny astronaut hatching from an egg on the moon"
|
|
||||||
image = pipeline(prompt, guidance_scale=3.5, height=768, width=1360, num_inference_steps=50).images[0]
|
|
||||||
image.save("flux.png")
|
|
||||||
```
|
|
||||||
|
|
||||||
## Single File Loading for the `FluxTransformer2DModel`
|
## Single File Loading for the `FluxTransformer2DModel`
|
||||||
|
|
||||||
The `FluxTransformer2DModel` supports loading checkpoints in the original format shipped by Black Forest Labs. This is also useful when trying to load finetunes or quantized versions of the models that have been published by the community.
|
The `FluxTransformer2DModel` supports loading checkpoints in the original format shipped by Black Forest Labs. This is also useful when trying to load finetunes or quantized versions of the models that have been published by the community.
|
||||||
@@ -436,52 +163,3 @@ image.save("flux-fp8-dev.png")
|
|||||||
[[autodoc]] FluxPipeline
|
[[autodoc]] FluxPipeline
|
||||||
- all
|
- all
|
||||||
- __call__
|
- __call__
|
||||||
|
|
||||||
## FluxImg2ImgPipeline
|
|
||||||
|
|
||||||
[[autodoc]] FluxImg2ImgPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## FluxInpaintPipeline
|
|
||||||
|
|
||||||
[[autodoc]] FluxInpaintPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
|
|
||||||
## FluxControlNetInpaintPipeline
|
|
||||||
|
|
||||||
[[autodoc]] FluxControlNetInpaintPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## FluxControlNetImg2ImgPipeline
|
|
||||||
|
|
||||||
[[autodoc]] FluxControlNetImg2ImgPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## FluxControlPipeline
|
|
||||||
|
|
||||||
[[autodoc]] FluxControlPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## FluxControlImg2ImgPipeline
|
|
||||||
|
|
||||||
[[autodoc]] FluxControlImg2ImgPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## FluxPriorReduxPipeline
|
|
||||||
|
|
||||||
[[autodoc]] FluxPriorReduxPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## FluxFillPipeline
|
|
||||||
|
|
||||||
[[autodoc]] FluxFillPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|||||||
@@ -1,74 +0,0 @@
|
|||||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License. -->
|
|
||||||
|
|
||||||
# HunyuanVideo
|
|
||||||
|
|
||||||
[HunyuanVideo](https://www.arxiv.org/abs/2412.03603) by Tencent.
|
|
||||||
|
|
||||||
*Recent advancements in video generation have significantly impacted daily life for both individuals and industries. However, the leading video generation models remain closed-source, resulting in a notable performance gap between industry capabilities and those available to the public. In this report, we introduce HunyuanVideo, an innovative open-source video foundation model that demonstrates performance in video generation comparable to, or even surpassing, that of leading closed-source models. HunyuanVideo encompasses a comprehensive framework that integrates several key elements, including data curation, advanced architectural design, progressive model scaling and training, and an efficient infrastructure tailored for large-scale model training and inference. As a result, we successfully trained a video generative model with over 13 billion parameters, making it the largest among all open-source models. We conducted extensive experiments and implemented a series of targeted designs to ensure high visual quality, motion dynamics, text-video alignment, and advanced filming techniques. According to evaluations by professionals, HunyuanVideo outperforms previous state-of-the-art models, including Runway Gen-3, Luma 1.6, and three top-performing Chinese video generative models. By releasing the code for the foundation model and its applications, we aim to bridge the gap between closed-source and open-source communities. This initiative will empower individuals within the community to experiment with their ideas, fostering a more dynamic and vibrant video generation ecosystem. The code is publicly available at [this https URL](https://github.com/tencent/HunyuanVideo).*
|
|
||||||
|
|
||||||
<Tip>
|
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
|
||||||
|
|
||||||
</Tip>
|
|
||||||
|
|
||||||
Recommendations for inference:
|
|
||||||
- Both text encoders should be in `torch.float16`.
|
|
||||||
- Transformer should be in `torch.bfloat16`.
|
|
||||||
- VAE should be in `torch.float16`.
|
|
||||||
- `num_frames` should be of the form `4 * k + 1`, for example `49` or `129`.
|
|
||||||
- For smaller resolution videos, try lower values of `shift` (between `2.0` to `5.0`) in the [Scheduler](https://huggingface.co/docs/diffusers/main/en/api/schedulers/flow_match_euler_discrete#diffusers.FlowMatchEulerDiscreteScheduler.shift). For larger resolution images, try higher values (between `7.0` and `12.0`). The default value is `7.0` for HunyuanVideo.
|
|
||||||
- For more information about supported resolutions and other details, please refer to the original repository [here](https://github.com/Tencent/HunyuanVideo/).
|
|
||||||
|
|
||||||
## Quantization
|
|
||||||
|
|
||||||
Quantization helps reduce the memory requirements of very large models by storing model weights in a lower precision data type. However, quantization may have varying impact on video quality depending on the video model.
|
|
||||||
|
|
||||||
Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`HunyuanVideoPipeline`] for inference with bitsandbytes.
|
|
||||||
|
|
||||||
```py
|
|
||||||
import torch
|
|
||||||
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, HunyuanVideoTransformer3DModel, HunyuanVideoPipeline
|
|
||||||
from diffusers.utils import export_to_video
|
|
||||||
|
|
||||||
quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
transformer_8bit = HunyuanVideoTransformer3DModel.from_pretrained(
|
|
||||||
"hunyuanvideo-community/HunyuanVideo",
|
|
||||||
subfolder="transformer",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.bfloat16,
|
|
||||||
)
|
|
||||||
|
|
||||||
pipeline = HunyuanVideoPipeline.from_pretrained(
|
|
||||||
"hunyuanvideo-community/HunyuanVideo",
|
|
||||||
transformer=transformer_8bit,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
device_map="balanced",
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt = "A cat walks on the grass, realistic style."
|
|
||||||
video = pipeline(prompt=prompt, num_frames=61, num_inference_steps=30).frames[0]
|
|
||||||
export_to_video(video, "cat.mp4", fps=15)
|
|
||||||
```
|
|
||||||
|
|
||||||
## HunyuanVideoPipeline
|
|
||||||
|
|
||||||
[[autodoc]] HunyuanVideoPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## HunyuanVideoPipelineOutput
|
|
||||||
|
|
||||||
[[autodoc]] pipelines.hunyuan_video.pipeline_output.HunyuanVideoPipelineOutput
|
|
||||||
@@ -30,7 +30,7 @@ HunyuanDiT has the following components:
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers.md) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading.md#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ The original codebase can be found [here](https://github.com/ali-vilab/i2vgen-xl
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. Also, to know more about reducing the memory usage of this pipeline, refer to the ["Reduce memory usage"] section [here](../../using-diffusers/svd#reduce-memory-usage).
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. Also, to know more about reducing the memory usage of this pipeline, refer to the ["Reduce memory usage"] section [here](../../using-diffusers/svd#reduce-memory-usage).
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ Check out the [Kandinsky Community](https://huggingface.co/kandinsky-community)
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ Check out the [Kandinsky Community](https://huggingface.co/kandinsky-community)
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ Check out the [Kandinsky Community](https://huggingface.co/kandinsky-community)
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -105,11 +105,3 @@ image.save("kolors_ipa_sample.png")
|
|||||||
|
|
||||||
- all
|
- all
|
||||||
- __call__
|
- __call__
|
||||||
|
|
||||||
## KolorsImg2ImgPipeline
|
|
||||||
|
|
||||||
[[autodoc]] KolorsImg2ImgPipeline
|
|
||||||
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ The original codebase can be found at [CompVis/latent-diffusion](https://github.
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ This pipeline was contributed by [maxin-cn](https://github.com/maxin-cn). The or
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers.md) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading.md#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
@@ -70,47 +70,6 @@ Without torch.compile(): Average inference time: 16.246 seconds.
|
|||||||
With torch.compile(): Average inference time: 14.573 seconds.
|
With torch.compile(): Average inference time: 14.573 seconds.
|
||||||
```
|
```
|
||||||
|
|
||||||
## Quantization
|
|
||||||
|
|
||||||
Quantization helps reduce the memory requirements of very large models by storing model weights in a lower precision data type. However, quantization may have varying impact on video quality depending on the video model.
|
|
||||||
|
|
||||||
Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`LattePipeline`] for inference with bitsandbytes.
|
|
||||||
|
|
||||||
```py
|
|
||||||
import torch
|
|
||||||
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, LatteTransformer3DModel, LattePipeline
|
|
||||||
from diffusers.utils import export_to_gif
|
|
||||||
from transformers import BitsAndBytesConfig as BitsAndBytesConfig, T5EncoderModel
|
|
||||||
|
|
||||||
quant_config = BitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
text_encoder_8bit = T5EncoderModel.from_pretrained(
|
|
||||||
"maxin-cn/Latte-1",
|
|
||||||
subfolder="text_encoder",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
transformer_8bit = LatteTransformer3DModel.from_pretrained(
|
|
||||||
"maxin-cn/Latte-1",
|
|
||||||
subfolder="transformer",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
pipeline = LattePipeline.from_pretrained(
|
|
||||||
"maxin-cn/Latte-1",
|
|
||||||
text_encoder=text_encoder_8bit,
|
|
||||||
transformer=transformer_8bit,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
device_map="balanced",
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt = "A small cactus with a happy face in the Sahara desert."
|
|
||||||
video = pipeline(prompt).frames[0]
|
|
||||||
export_to_gif(video, "latte.gif")
|
|
||||||
```
|
|
||||||
|
|
||||||
## LattePipeline
|
## LattePipeline
|
||||||
|
|
||||||
[[autodoc]] LattePipeline
|
[[autodoc]] LattePipeline
|
||||||
|
|||||||
@@ -1,197 +0,0 @@
|
|||||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License. -->
|
|
||||||
|
|
||||||
# LTX Video
|
|
||||||
|
|
||||||
[LTX Video](https://huggingface.co/Lightricks/LTX-Video) is the first DiT-based video generation model capable of generating high-quality videos in real-time. It produces 24 FPS videos at a 768x512 resolution faster than they can be watched. Trained on a large-scale dataset of diverse videos, the model generates high-resolution videos with realistic and varied content. We provide a model for both text-to-video as well as image + text-to-video usecases.
|
|
||||||
|
|
||||||
<Tip>
|
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
|
||||||
|
|
||||||
</Tip>
|
|
||||||
|
|
||||||
Available models:
|
|
||||||
|
|
||||||
| Model name | Recommended dtype |
|
|
||||||
|:-------------:|:-----------------:|
|
|
||||||
| [`LTX Video 0.9.0`](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltx-video-2b-v0.9.safetensors) | `torch.bfloat16` |
|
|
||||||
| [`LTX Video 0.9.1`](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltx-video-2b-v0.9.1.safetensors) | `torch.bfloat16` |
|
|
||||||
|
|
||||||
Note: The recommended dtype is for the transformer component. The VAE and text encoders can be either `torch.float32`, `torch.bfloat16` or `torch.float16` but the recommended dtype is `torch.bfloat16` as used in the original repository.
|
|
||||||
|
|
||||||
## Loading Single Files
|
|
||||||
|
|
||||||
Loading the original LTX Video checkpoints is also possible with [`~ModelMixin.from_single_file`]. We recommend using `from_single_file` for the Lightricks series of models, as they plan to release multiple models in the future in the single file format.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from diffusers import AutoencoderKLLTXVideo, LTXImageToVideoPipeline, LTXVideoTransformer3DModel
|
|
||||||
|
|
||||||
# `single_file_url` could also be https://huggingface.co/Lightricks/LTX-Video/ltx-video-2b-v0.9.1.safetensors
|
|
||||||
single_file_url = "https://huggingface.co/Lightricks/LTX-Video/ltx-video-2b-v0.9.safetensors"
|
|
||||||
transformer = LTXVideoTransformer3DModel.from_single_file(
|
|
||||||
single_file_url, torch_dtype=torch.bfloat16
|
|
||||||
)
|
|
||||||
vae = AutoencoderKLLTXVideo.from_single_file(single_file_url, torch_dtype=torch.bfloat16)
|
|
||||||
pipe = LTXImageToVideoPipeline.from_pretrained(
|
|
||||||
"Lightricks/LTX-Video", transformer=transformer, vae=vae, torch_dtype=torch.bfloat16
|
|
||||||
)
|
|
||||||
|
|
||||||
# ... inference code ...
|
|
||||||
```
|
|
||||||
|
|
||||||
Alternatively, the pipeline can be used to load the weights with [`~FromSingleFileMixin.from_single_file`].
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from diffusers import LTXImageToVideoPipeline
|
|
||||||
from transformers import T5EncoderModel, T5Tokenizer
|
|
||||||
|
|
||||||
single_file_url = "https://huggingface.co/Lightricks/LTX-Video/ltx-video-2b-v0.9.safetensors"
|
|
||||||
text_encoder = T5EncoderModel.from_pretrained(
|
|
||||||
"Lightricks/LTX-Video", subfolder="text_encoder", torch_dtype=torch.bfloat16
|
|
||||||
)
|
|
||||||
tokenizer = T5Tokenizer.from_pretrained(
|
|
||||||
"Lightricks/LTX-Video", subfolder="tokenizer", torch_dtype=torch.bfloat16
|
|
||||||
)
|
|
||||||
pipe = LTXImageToVideoPipeline.from_single_file(
|
|
||||||
single_file_url, text_encoder=text_encoder, tokenizer=tokenizer, torch_dtype=torch.bfloat16
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
Loading [LTX GGUF checkpoints](https://huggingface.co/city96/LTX-Video-gguf) are also supported:
|
|
||||||
|
|
||||||
```py
|
|
||||||
import torch
|
|
||||||
from diffusers.utils import export_to_video
|
|
||||||
from diffusers import LTXPipeline, LTXVideoTransformer3DModel, GGUFQuantizationConfig
|
|
||||||
|
|
||||||
ckpt_path = (
|
|
||||||
"https://huggingface.co/city96/LTX-Video-gguf/blob/main/ltx-video-2b-v0.9-Q3_K_S.gguf"
|
|
||||||
)
|
|
||||||
transformer = LTXVideoTransformer3DModel.from_single_file(
|
|
||||||
ckpt_path,
|
|
||||||
quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
|
|
||||||
torch_dtype=torch.bfloat16,
|
|
||||||
)
|
|
||||||
pipe = LTXPipeline.from_pretrained(
|
|
||||||
"Lightricks/LTX-Video",
|
|
||||||
transformer=transformer,
|
|
||||||
torch_dtype=torch.bfloat16,
|
|
||||||
)
|
|
||||||
pipe.enable_model_cpu_offload()
|
|
||||||
|
|
||||||
prompt = "A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage"
|
|
||||||
negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
|
|
||||||
|
|
||||||
video = pipe(
|
|
||||||
prompt=prompt,
|
|
||||||
negative_prompt=negative_prompt,
|
|
||||||
width=704,
|
|
||||||
height=480,
|
|
||||||
num_frames=161,
|
|
||||||
num_inference_steps=50,
|
|
||||||
).frames[0]
|
|
||||||
export_to_video(video, "output_gguf_ltx.mp4", fps=24)
|
|
||||||
```
|
|
||||||
|
|
||||||
Make sure to read the [documentation on GGUF](../../quantization/gguf) to learn more about our GGUF support.
|
|
||||||
|
|
||||||
<!-- TODO(aryan): Update this when official weights are supported -->
|
|
||||||
|
|
||||||
Loading and running inference with [LTX Video 0.9.1](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltx-video-2b-v0.9.1.safetensors) weights.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from diffusers import LTXPipeline
|
|
||||||
from diffusers.utils import export_to_video
|
|
||||||
|
|
||||||
pipe = LTXPipeline.from_pretrained("a-r-r-o-w/LTX-Video-0.9.1-diffusers", torch_dtype=torch.bfloat16)
|
|
||||||
pipe.to("cuda")
|
|
||||||
|
|
||||||
prompt = "A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage"
|
|
||||||
negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
|
|
||||||
|
|
||||||
video = pipe(
|
|
||||||
prompt=prompt,
|
|
||||||
negative_prompt=negative_prompt,
|
|
||||||
width=768,
|
|
||||||
height=512,
|
|
||||||
num_frames=161,
|
|
||||||
decode_timestep=0.03,
|
|
||||||
decode_noise_scale=0.025,
|
|
||||||
num_inference_steps=50,
|
|
||||||
).frames[0]
|
|
||||||
export_to_video(video, "output.mp4", fps=24)
|
|
||||||
```
|
|
||||||
|
|
||||||
Refer to [this section](https://huggingface.co/docs/diffusers/main/en/api/pipelines/cogvideox#memory-optimization) to learn more about optimizing memory consumption.
|
|
||||||
|
|
||||||
## Quantization
|
|
||||||
|
|
||||||
Quantization helps reduce the memory requirements of very large models by storing model weights in a lower precision data type. However, quantization may have varying impact on video quality depending on the video model.
|
|
||||||
|
|
||||||
Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`LTXPipeline`] for inference with bitsandbytes.
|
|
||||||
|
|
||||||
```py
|
|
||||||
import torch
|
|
||||||
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, LTXVideoTransformer3DModel, LTXPipeline
|
|
||||||
from diffusers.utils import export_to_video
|
|
||||||
from transformers import BitsAndBytesConfig as BitsAndBytesConfig, T5EncoderModel
|
|
||||||
|
|
||||||
quant_config = BitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
text_encoder_8bit = T5EncoderModel.from_pretrained(
|
|
||||||
"Lightricks/LTX-Video",
|
|
||||||
subfolder="text_encoder",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
transformer_8bit = LTXVideoTransformer3DModel.from_pretrained(
|
|
||||||
"Lightricks/LTX-Video",
|
|
||||||
subfolder="transformer",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
pipeline = LTXPipeline.from_pretrained(
|
|
||||||
"Lightricks/LTX-Video",
|
|
||||||
text_encoder=text_encoder_8bit,
|
|
||||||
transformer=transformer_8bit,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
device_map="balanced",
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt = "A detailed wooden toy ship with intricately carved masts and sails is seen gliding smoothly over a plush, blue carpet that mimics the waves of the sea. The ship's hull is painted a rich brown, with tiny windows. The carpet, soft and textured, provides a perfect backdrop, resembling an oceanic expanse. Surrounding the ship are various other toys and children's items, hinting at a playful environment. The scene captures the innocence and imagination of childhood, with the toy ship's journey symbolizing endless adventures in a whimsical, indoor setting."
|
|
||||||
video = pipeline(prompt=prompt, num_frames=161, num_inference_steps=50).frames[0]
|
|
||||||
export_to_video(video, "ship.mp4", fps=24)
|
|
||||||
```
|
|
||||||
|
|
||||||
## LTXPipeline
|
|
||||||
|
|
||||||
[[autodoc]] LTXPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## LTXImageToVideoPipeline
|
|
||||||
|
|
||||||
[[autodoc]] LTXImageToVideoPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## LTXPipelineOutput
|
|
||||||
|
|
||||||
[[autodoc]] pipelines.ltx.pipeline_output.LTXPipelineOutput
|
|
||||||
@@ -47,7 +47,7 @@ This pipeline was contributed by [PommesPeter](https://github.com/PommesPeter).
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers.md) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading.md#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
@@ -82,46 +82,6 @@ pipeline.vae.decode = torch.compile(pipeline.vae.decode, mode="max-autotune", fu
|
|||||||
image = pipeline(prompt="Upper body of a young woman in a Victorian-era outfit with brass goggles and leather straps. Background shows an industrial revolution cityscape with smoky skies and tall, metal structures").images[0]
|
image = pipeline(prompt="Upper body of a young woman in a Victorian-era outfit with brass goggles and leather straps. Background shows an industrial revolution cityscape with smoky skies and tall, metal structures").images[0]
|
||||||
```
|
```
|
||||||
|
|
||||||
## Quantization
|
|
||||||
|
|
||||||
Quantization helps reduce the memory requirements of very large models by storing model weights in a lower precision data type. However, quantization may have varying impact on video quality depending on the video model.
|
|
||||||
|
|
||||||
Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`LuminaText2ImgPipeline`] for inference with bitsandbytes.
|
|
||||||
|
|
||||||
```py
|
|
||||||
import torch
|
|
||||||
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, Transformer2DModel, LuminaText2ImgPipeline
|
|
||||||
from transformers import BitsAndBytesConfig as BitsAndBytesConfig, T5EncoderModel
|
|
||||||
|
|
||||||
quant_config = BitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
text_encoder_8bit = T5EncoderModel.from_pretrained(
|
|
||||||
"Alpha-VLLM/Lumina-Next-SFT-diffusers",
|
|
||||||
subfolder="text_encoder",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
transformer_8bit = Transformer2DModel.from_pretrained(
|
|
||||||
"Alpha-VLLM/Lumina-Next-SFT-diffusers",
|
|
||||||
subfolder="transformer",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
pipeline = LuminaText2ImgPipeline.from_pretrained(
|
|
||||||
"Alpha-VLLM/Lumina-Next-SFT-diffusers",
|
|
||||||
text_encoder=text_encoder_8bit,
|
|
||||||
transformer=transformer_8bit,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
device_map="balanced",
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt = "a tiny astronaut hatching from an egg on the moon"
|
|
||||||
image = pipeline(prompt).images[0]
|
|
||||||
image.save("lumina.png")
|
|
||||||
```
|
|
||||||
|
|
||||||
## LuminaText2ImgPipeline
|
## LuminaText2ImgPipeline
|
||||||
|
|
||||||
[[autodoc]] LuminaText2ImgPipeline
|
[[autodoc]] LuminaText2ImgPipeline
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ The original checkpoints can be found under the [PRS-ETH](https://huggingface.co
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. Also, to know more about reducing the memory usage of this pipeline, refer to the ["Reduce memory usage"] section [here](../../using-diffusers/svd#reduce-memory-usage).
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. Also, to know more about reducing the memory usage of this pipeline, refer to the ["Reduce memory usage"] section [here](../../using-diffusers/svd#reduce-memory-usage).
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -1,275 +0,0 @@
|
|||||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
# Mochi 1 Preview
|
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> Only a research preview of the model weights is available at the moment.
|
|
||||||
|
|
||||||
[Mochi 1](https://huggingface.co/genmo/mochi-1-preview) is a video generation model by Genmo with a strong focus on prompt adherence and motion quality. The model features a 10B parameter Asmmetric Diffusion Transformer (AsymmDiT) architecture, and uses non-square QKV and output projection layers to reduce inference memory requirements. A single T5-XXL model is used to encode prompts.
|
|
||||||
|
|
||||||
*Mochi 1 preview is an open state-of-the-art video generation model with high-fidelity motion and strong prompt adherence in preliminary evaluation. This model dramatically closes the gap between closed and open video generation systems. The model is released under a permissive Apache 2.0 license.*
|
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
|
||||||
|
|
||||||
## Quantization
|
|
||||||
|
|
||||||
Quantization helps reduce the memory requirements of very large models by storing model weights in a lower precision data type. However, quantization may have varying impact on video quality depending on the video model.
|
|
||||||
|
|
||||||
Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`MochiPipeline`] for inference with bitsandbytes.
|
|
||||||
|
|
||||||
```py
|
|
||||||
import torch
|
|
||||||
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, MochiTransformer3DModel, MochiPipeline
|
|
||||||
from diffusers.utils import export_to_video
|
|
||||||
from transformers import BitsAndBytesConfig as BitsAndBytesConfig, T5EncoderModel
|
|
||||||
|
|
||||||
quant_config = BitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
text_encoder_8bit = T5EncoderModel.from_pretrained(
|
|
||||||
"genmo/mochi-1-preview",
|
|
||||||
subfolder="text_encoder",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
transformer_8bit = MochiTransformer3DModel.from_pretrained(
|
|
||||||
"genmo/mochi-1-preview",
|
|
||||||
subfolder="transformer",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
pipeline = MochiPipeline.from_pretrained(
|
|
||||||
"genmo/mochi-1-preview",
|
|
||||||
text_encoder=text_encoder_8bit,
|
|
||||||
transformer=transformer_8bit,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
device_map="balanced",
|
|
||||||
)
|
|
||||||
|
|
||||||
video = pipeline(
|
|
||||||
"Close-up of a cats eye, with the galaxy reflected in the cats eye. Ultra high resolution 4k.",
|
|
||||||
num_inference_steps=28,
|
|
||||||
guidance_scale=3.5
|
|
||||||
).frames[0]
|
|
||||||
export_to_video(video, "cat.mp4")
|
|
||||||
```
|
|
||||||
|
|
||||||
## Generating videos with Mochi-1 Preview
|
|
||||||
|
|
||||||
The following example will download the full precision `mochi-1-preview` weights and produce the highest quality results but will require at least 42GB VRAM to run.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from diffusers import MochiPipeline
|
|
||||||
from diffusers.utils import export_to_video
|
|
||||||
|
|
||||||
pipe = MochiPipeline.from_pretrained("genmo/mochi-1-preview")
|
|
||||||
|
|
||||||
# Enable memory savings
|
|
||||||
pipe.enable_model_cpu_offload()
|
|
||||||
pipe.enable_vae_tiling()
|
|
||||||
|
|
||||||
prompt = "Close-up of a chameleon's eye, with its scaly skin changing color. Ultra high resolution 4k."
|
|
||||||
|
|
||||||
with torch.autocast("cuda", torch.bfloat16, cache_enabled=False):
|
|
||||||
frames = pipe(prompt, num_frames=85).frames[0]
|
|
||||||
|
|
||||||
export_to_video(frames, "mochi.mp4", fps=30)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Using a lower precision variant to save memory
|
|
||||||
|
|
||||||
The following example will use the `bfloat16` variant of the model and requires 22GB VRAM to run. There is a slight drop in the quality of the generated video as a result.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from diffusers import MochiPipeline
|
|
||||||
from diffusers.utils import export_to_video
|
|
||||||
|
|
||||||
pipe = MochiPipeline.from_pretrained("genmo/mochi-1-preview", variant="bf16", torch_dtype=torch.bfloat16)
|
|
||||||
|
|
||||||
# Enable memory savings
|
|
||||||
pipe.enable_model_cpu_offload()
|
|
||||||
pipe.enable_vae_tiling()
|
|
||||||
|
|
||||||
prompt = "Close-up of a chameleon's eye, with its scaly skin changing color. Ultra high resolution 4k."
|
|
||||||
frames = pipe(prompt, num_frames=85).frames[0]
|
|
||||||
|
|
||||||
export_to_video(frames, "mochi.mp4", fps=30)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Reproducing the results from the Genmo Mochi repo
|
|
||||||
|
|
||||||
The [Genmo Mochi implementation](https://github.com/genmoai/mochi/tree/main) uses different precision values for each stage in the inference process. The text encoder and VAE use `torch.float32`, while the DiT uses `torch.bfloat16` with the [attention kernel](https://pytorch.org/docs/stable/generated/torch.nn.attention.sdpa_kernel.html#torch.nn.attention.sdpa_kernel) set to `EFFICIENT_ATTENTION`. Diffusers pipelines currently do not support setting different `dtypes` for different stages of the pipeline. In order to run inference in the same way as the the original implementation, please refer to the following example.
|
|
||||||
|
|
||||||
<Tip>
|
|
||||||
The original Mochi implementation zeros out empty prompts. However, enabling this option and placing the entire pipeline under autocast can lead to numerical overflows with the T5 text encoder.
|
|
||||||
|
|
||||||
When enabling `force_zeros_for_empty_prompt`, it is recommended to run the text encoding step outside the autocast context in full precision.
|
|
||||||
</Tip>
|
|
||||||
|
|
||||||
<Tip>
|
|
||||||
Decoding the latents in full precision is very memory intensive. You will need at least 70GB VRAM to generate the 163 frames in this example. To reduce memory, either reduce the number of frames or run the decoding step in `torch.bfloat16`.
|
|
||||||
</Tip>
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from torch.nn.attention import SDPBackend, sdpa_kernel
|
|
||||||
|
|
||||||
from diffusers import MochiPipeline
|
|
||||||
from diffusers.utils import export_to_video
|
|
||||||
from diffusers.video_processor import VideoProcessor
|
|
||||||
|
|
||||||
pipe = MochiPipeline.from_pretrained("genmo/mochi-1-preview", force_zeros_for_empty_prompt=True)
|
|
||||||
pipe.enable_vae_tiling()
|
|
||||||
pipe.enable_model_cpu_offload()
|
|
||||||
|
|
||||||
prompt = "An aerial shot of a parade of elephants walking across the African savannah. The camera showcases the herd and the surrounding landscape."
|
|
||||||
|
|
||||||
with torch.no_grad():
|
|
||||||
prompt_embeds, prompt_attention_mask, negative_prompt_embeds, negative_prompt_attention_mask = (
|
|
||||||
pipe.encode_prompt(prompt=prompt)
|
|
||||||
)
|
|
||||||
|
|
||||||
with torch.autocast("cuda", torch.bfloat16):
|
|
||||||
with sdpa_kernel(SDPBackend.EFFICIENT_ATTENTION):
|
|
||||||
frames = pipe(
|
|
||||||
prompt_embeds=prompt_embeds,
|
|
||||||
prompt_attention_mask=prompt_attention_mask,
|
|
||||||
negative_prompt_embeds=negative_prompt_embeds,
|
|
||||||
negative_prompt_attention_mask=negative_prompt_attention_mask,
|
|
||||||
guidance_scale=4.5,
|
|
||||||
num_inference_steps=64,
|
|
||||||
height=480,
|
|
||||||
width=848,
|
|
||||||
num_frames=163,
|
|
||||||
generator=torch.Generator("cuda").manual_seed(0),
|
|
||||||
output_type="latent",
|
|
||||||
return_dict=False,
|
|
||||||
)[0]
|
|
||||||
|
|
||||||
video_processor = VideoProcessor(vae_scale_factor=8)
|
|
||||||
has_latents_mean = hasattr(pipe.vae.config, "latents_mean") and pipe.vae.config.latents_mean is not None
|
|
||||||
has_latents_std = hasattr(pipe.vae.config, "latents_std") and pipe.vae.config.latents_std is not None
|
|
||||||
if has_latents_mean and has_latents_std:
|
|
||||||
latents_mean = (
|
|
||||||
torch.tensor(pipe.vae.config.latents_mean).view(1, 12, 1, 1, 1).to(frames.device, frames.dtype)
|
|
||||||
)
|
|
||||||
latents_std = (
|
|
||||||
torch.tensor(pipe.vae.config.latents_std).view(1, 12, 1, 1, 1).to(frames.device, frames.dtype)
|
|
||||||
)
|
|
||||||
frames = frames * latents_std / pipe.vae.config.scaling_factor + latents_mean
|
|
||||||
else:
|
|
||||||
frames = frames / pipe.vae.config.scaling_factor
|
|
||||||
|
|
||||||
with torch.no_grad():
|
|
||||||
video = pipe.vae.decode(frames.to(pipe.vae.dtype), return_dict=False)[0]
|
|
||||||
|
|
||||||
video = video_processor.postprocess_video(video)[0]
|
|
||||||
export_to_video(video, "mochi.mp4", fps=30)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Running inference with multiple GPUs
|
|
||||||
|
|
||||||
It is possible to split the large Mochi transformer across multiple GPUs using the `device_map` and `max_memory` options in `from_pretrained`. In the following example we split the model across two GPUs, each with 24GB of VRAM.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from diffusers import MochiPipeline, MochiTransformer3DModel
|
|
||||||
from diffusers.utils import export_to_video
|
|
||||||
|
|
||||||
model_id = "genmo/mochi-1-preview"
|
|
||||||
transformer = MochiTransformer3DModel.from_pretrained(
|
|
||||||
model_id,
|
|
||||||
subfolder="transformer",
|
|
||||||
device_map="auto",
|
|
||||||
max_memory={0: "24GB", 1: "24GB"}
|
|
||||||
)
|
|
||||||
|
|
||||||
pipe = MochiPipeline.from_pretrained(model_id, transformer=transformer)
|
|
||||||
pipe.enable_model_cpu_offload()
|
|
||||||
pipe.enable_vae_tiling()
|
|
||||||
|
|
||||||
with torch.autocast(device_type="cuda", dtype=torch.bfloat16, cache_enabled=False):
|
|
||||||
frames = pipe(
|
|
||||||
prompt="Close-up of a chameleon's eye, with its scaly skin changing color. Ultra high resolution 4k.",
|
|
||||||
negative_prompt="",
|
|
||||||
height=480,
|
|
||||||
width=848,
|
|
||||||
num_frames=85,
|
|
||||||
num_inference_steps=50,
|
|
||||||
guidance_scale=4.5,
|
|
||||||
num_videos_per_prompt=1,
|
|
||||||
generator=torch.Generator(device="cuda").manual_seed(0),
|
|
||||||
max_sequence_length=256,
|
|
||||||
output_type="pil",
|
|
||||||
).frames[0]
|
|
||||||
|
|
||||||
export_to_video(frames, "output.mp4", fps=30)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Using single file loading with the Mochi Transformer
|
|
||||||
|
|
||||||
You can use `from_single_file` to load the Mochi transformer in its original format.
|
|
||||||
|
|
||||||
<Tip>
|
|
||||||
Diffusers currently doesn't support using the FP8 scaled versions of the Mochi single file checkpoints.
|
|
||||||
</Tip>
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from diffusers import MochiPipeline, MochiTransformer3DModel
|
|
||||||
from diffusers.utils import export_to_video
|
|
||||||
|
|
||||||
model_id = "genmo/mochi-1-preview"
|
|
||||||
|
|
||||||
ckpt_path = "https://huggingface.co/Comfy-Org/mochi_preview_repackaged/blob/main/split_files/diffusion_models/mochi_preview_bf16.safetensors"
|
|
||||||
|
|
||||||
transformer = MochiTransformer3DModel.from_pretrained(ckpt_path, torch_dtype=torch.bfloat16)
|
|
||||||
|
|
||||||
pipe = MochiPipeline.from_pretrained(model_id, transformer=transformer)
|
|
||||||
pipe.enable_model_cpu_offload()
|
|
||||||
pipe.enable_vae_tiling()
|
|
||||||
|
|
||||||
with torch.autocast(device_type="cuda", dtype=torch.bfloat16, cache_enabled=False):
|
|
||||||
frames = pipe(
|
|
||||||
prompt="Close-up of a chameleon's eye, with its scaly skin changing color. Ultra high resolution 4k.",
|
|
||||||
negative_prompt="",
|
|
||||||
height=480,
|
|
||||||
width=848,
|
|
||||||
num_frames=85,
|
|
||||||
num_inference_steps=50,
|
|
||||||
guidance_scale=4.5,
|
|
||||||
num_videos_per_prompt=1,
|
|
||||||
generator=torch.Generator(device="cuda").manual_seed(0),
|
|
||||||
max_sequence_length=256,
|
|
||||||
output_type="pil",
|
|
||||||
).frames[0]
|
|
||||||
|
|
||||||
export_to_video(frames, "output.mp4", fps=30)
|
|
||||||
```
|
|
||||||
|
|
||||||
## MochiPipeline
|
|
||||||
|
|
||||||
[[autodoc]] MochiPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## MochiPipelineOutput
|
|
||||||
|
|
||||||
[[autodoc]] pipelines.mochi.pipeline_output.MochiPipelineOutput
|
|
||||||
@@ -42,7 +42,7 @@ During inference:
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -48,26 +48,13 @@ Since RegEx is supported as a way for matching layer identifiers, it is crucial
|
|||||||
- all
|
- all
|
||||||
- __call__
|
- __call__
|
||||||
|
|
||||||
## StableDiffusionPAGInpaintPipeline
|
|
||||||
[[autodoc]] StableDiffusionPAGInpaintPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## StableDiffusionPAGPipeline
|
## StableDiffusionPAGPipeline
|
||||||
[[autodoc]] StableDiffusionPAGPipeline
|
[[autodoc]] StableDiffusionPAGPipeline
|
||||||
- all
|
- all
|
||||||
- __call__
|
- __call__
|
||||||
|
|
||||||
## StableDiffusionPAGImg2ImgPipeline
|
|
||||||
[[autodoc]] StableDiffusionPAGImg2ImgPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## StableDiffusionControlNetPAGPipeline
|
## StableDiffusionControlNetPAGPipeline
|
||||||
[[autodoc]] StableDiffusionControlNetPAGPipeline
|
[[autodoc]] StableDiffusionControlNetPAGPipeline
|
||||||
|
|
||||||
## StableDiffusionControlNetPAGInpaintPipeline
|
|
||||||
[[autodoc]] StableDiffusionControlNetPAGInpaintPipeline
|
|
||||||
- all
|
- all
|
||||||
- __call__
|
- __call__
|
||||||
|
|
||||||
@@ -101,10 +88,6 @@ Since RegEx is supported as a way for matching layer identifiers, it is crucial
|
|||||||
- all
|
- all
|
||||||
- __call__
|
- __call__
|
||||||
|
|
||||||
## StableDiffusion3PAGImg2ImgPipeline
|
|
||||||
[[autodoc]] StableDiffusion3PAGImg2ImgPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## PixArtSigmaPAGPipeline
|
## PixArtSigmaPAGPipeline
|
||||||
[[autodoc]] PixArtSigmaPAGPipeline
|
[[autodoc]] PixArtSigmaPAGPipeline
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ Paint by Example is supported by the official [Fantasy-Studio/Paint-by-Example](
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ But with circular padding, the right and the left parts are matching (`circular_
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ You can find additional information about InstructPix2Pix on the [project page](
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ Some notes about this pipeline:
|
|||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers.md) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading.md#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
|
||||||
|
|||||||
@@ -1,107 +0,0 @@
|
|||||||
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License. -->
|
|
||||||
|
|
||||||
# SanaPipeline
|
|
||||||
|
|
||||||
[SANA: Efficient High-Resolution Image Synthesis with Linear Diffusion Transformers](https://huggingface.co/papers/2410.10629) from NVIDIA and MIT HAN Lab, by Enze Xie, Junsong Chen, Junyu Chen, Han Cai, Haotian Tang, Yujun Lin, Zhekai Zhang, Muyang Li, Ligeng Zhu, Yao Lu, Song Han.
|
|
||||||
|
|
||||||
The abstract from the paper is:
|
|
||||||
|
|
||||||
*We introduce Sana, a text-to-image framework that can efficiently generate images up to 4096×4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU. Core designs include: (1) Deep compression autoencoder: unlike traditional AEs, which compress images only 8×, we trained an AE that can compress images 32×, effectively reducing the number of latent tokens. (2) Linear DiT: we replace all vanilla attention in DiT with linear attention, which is more efficient at high resolutions without sacrificing quality. (3) Decoder-only text encoder: we replaced T5 with modern decoder-only small LLM as the text encoder and designed complex human instruction with in-context learning to enhance the image-text alignment. (4) Efficient training and sampling: we propose Flow-DPM-Solver to reduce sampling steps, with efficient caption labeling and selection to accelerate convergence. As a result, Sana-0.6B is very competitive with modern giant diffusion model (e.g. Flux-12B), being 20 times smaller and 100+ times faster in measured throughput. Moreover, Sana-0.6B can be deployed on a 16GB laptop GPU, taking less than 1 second to generate a 1024×1024 resolution image. Sana enables content creation at low cost. Code and model will be publicly released.*
|
|
||||||
|
|
||||||
<Tip>
|
|
||||||
|
|
||||||
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
|
|
||||||
|
|
||||||
</Tip>
|
|
||||||
|
|
||||||
This pipeline was contributed by [lawrence-cj](https://github.com/lawrence-cj) and [chenjy2003](https://github.com/chenjy2003). The original codebase can be found [here](https://github.com/NVlabs/Sana). The original weights can be found under [hf.co/Efficient-Large-Model](https://huggingface.co/Efficient-Large-Model).
|
|
||||||
|
|
||||||
Available models:
|
|
||||||
|
|
||||||
| Model | Recommended dtype |
|
|
||||||
|:-----:|:-----------------:|
|
|
||||||
| [`Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers`](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers) | `torch.bfloat16` |
|
|
||||||
| [`Efficient-Large-Model/Sana_1600M_1024px_diffusers`](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px_diffusers) | `torch.float16` |
|
|
||||||
| [`Efficient-Large-Model/Sana_1600M_1024px_MultiLing_diffusers`](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px_MultiLing_diffusers) | `torch.float16` |
|
|
||||||
| [`Efficient-Large-Model/Sana_1600M_512px_diffusers`](https://huggingface.co/Efficient-Large-Model/Sana_1600M_512px_diffusers) | `torch.float16` |
|
|
||||||
| [`Efficient-Large-Model/Sana_1600M_512px_MultiLing_diffusers`](https://huggingface.co/Efficient-Large-Model/Sana_1600M_512px_MultiLing_diffusers) | `torch.float16` |
|
|
||||||
| [`Efficient-Large-Model/Sana_600M_1024px_diffusers`](https://huggingface.co/Efficient-Large-Model/Sana_600M_1024px_diffusers) | `torch.float16` |
|
|
||||||
| [`Efficient-Large-Model/Sana_600M_512px_diffusers`](https://huggingface.co/Efficient-Large-Model/Sana_600M_512px_diffusers) | `torch.float16` |
|
|
||||||
|
|
||||||
Refer to [this](https://huggingface.co/collections/Efficient-Large-Model/sana-673efba2a57ed99843f11f9e) collection for more information.
|
|
||||||
|
|
||||||
Note: The recommended dtype mentioned is for the transformer weights. The text encoder and VAE weights must stay in `torch.bfloat16` or `torch.float32` for the model to work correctly. Please refer to the inference example below to see how to load the model with the recommended dtype.
|
|
||||||
|
|
||||||
<Tip>
|
|
||||||
|
|
||||||
Make sure to pass the `variant` argument for downloaded checkpoints to use lower disk space. Set it to `"fp16"` for models with recommended dtype as `torch.float16`, and `"bf16"` for models with recommended dtype as `torch.bfloat16`. By default, `torch.float32` weights are downloaded, which use twice the amount of disk storage. Additionally, `torch.float32` weights can be downcasted on-the-fly by specifying the `torch_dtype` argument. Read about it in the [docs](https://huggingface.co/docs/diffusers/v0.31.0/en/api/pipelines/overview#diffusers.DiffusionPipeline.from_pretrained).
|
|
||||||
|
|
||||||
</Tip>
|
|
||||||
|
|
||||||
## Quantization
|
|
||||||
|
|
||||||
Quantization helps reduce the memory requirements of very large models by storing model weights in a lower precision data type. However, quantization may have varying impact on video quality depending on the video model.
|
|
||||||
|
|
||||||
Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`SanaPipeline`] for inference with bitsandbytes.
|
|
||||||
|
|
||||||
```py
|
|
||||||
import torch
|
|
||||||
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, SanaTransformer2DModel, SanaPipeline
|
|
||||||
from transformers import BitsAndBytesConfig as BitsAndBytesConfig, AutoModel
|
|
||||||
|
|
||||||
quant_config = BitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
text_encoder_8bit = AutoModel.from_pretrained(
|
|
||||||
"Efficient-Large-Model/Sana_1600M_1024px_diffusers",
|
|
||||||
subfolder="text_encoder",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True)
|
|
||||||
transformer_8bit = SanaTransformer2DModel.from_pretrained(
|
|
||||||
"Efficient-Large-Model/Sana_1600M_1024px_diffusers",
|
|
||||||
subfolder="transformer",
|
|
||||||
quantization_config=quant_config,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
)
|
|
||||||
|
|
||||||
pipeline = SanaPipeline.from_pretrained(
|
|
||||||
"Efficient-Large-Model/Sana_1600M_1024px_diffusers",
|
|
||||||
text_encoder=text_encoder_8bit,
|
|
||||||
transformer=transformer_8bit,
|
|
||||||
torch_dtype=torch.float16,
|
|
||||||
device_map="balanced",
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt = "a tiny astronaut hatching from an egg on the moon"
|
|
||||||
image = pipeline(prompt).images[0]
|
|
||||||
image.save("sana.png")
|
|
||||||
```
|
|
||||||
|
|
||||||
## SanaPipeline
|
|
||||||
|
|
||||||
[[autodoc]] SanaPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## SanaPAGPipeline
|
|
||||||
|
|
||||||
[[autodoc]] SanaPAGPipeline
|
|
||||||
- all
|
|
||||||
- __call__
|
|
||||||
|
|
||||||
## SanaPipelineOutput
|
|
||||||
|
|
||||||
[[autodoc]] pipelines.sana.pipeline_output.SanaPipelineOutput
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user