Compare commits

..

3 Commits

Author SHA1 Message Date
Sayak Paul
12f65435cb Merge branch 'main' into complete-sentences-scripts 2025-08-07 09:58:56 +05:30
Sayak Paul
3e615b3f5b Merge branch 'main' into complete-sentences-scripts 2025-08-01 08:14:31 +05:30
sayakpaul
65c2da5f42 complete the licensing statement. 2025-07-28 11:33:35 +05:30
1339 changed files with 25011 additions and 106198 deletions

View File

@@ -7,7 +7,7 @@ on:
env: env:
DIFFUSERS_IS_CI: yes DIFFUSERS_IS_CI: yes
HF_XET_HIGH_PERFORMANCE: 1 HF_HUB_ENABLE_HF_TRANSFER: 1
HF_HOME: /mnt/cache HF_HOME: /mnt/cache
OMP_NUM_THREADS: 8 OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8 MKL_NUM_THREADS: 8
@@ -38,8 +38,9 @@ jobs:
run: | run: |
apt update apt update
apt install -y libpq-dev postgresql-client apt install -y libpq-dev postgresql-client
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install -r benchmarks/requirements.txt python -m uv pip install -e [quality,test]
python -m uv pip install -r benchmarks/requirements.txt
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py

View File

@@ -42,39 +42,18 @@ jobs:
CHANGED_FILES: ${{ steps.file_changes.outputs.all }} CHANGED_FILES: ${{ steps.file_changes.outputs.all }}
run: | run: |
echo "$CHANGED_FILES" echo "$CHANGED_FILES"
ALLOWED_IMAGES=( for FILE in $CHANGED_FILES; do
diffusers-pytorch-cpu
diffusers-pytorch-cuda
diffusers-pytorch-xformers-cuda
diffusers-pytorch-minimum-cuda
diffusers-doc-builder
)
declare -A IMAGES_TO_BUILD=()
for FILE in $CHANGED_FILES; do
# skip anything that isn't still on disk # skip anything that isn't still on disk
if [[ ! -e "$FILE" ]]; then if [[ ! -f "$FILE" ]]; then
echo "Skipping removed file $FILE" echo "Skipping removed file $FILE"
continue continue
fi
if [[ "$FILE" == docker/*Dockerfile ]]; then
DOCKER_PATH="${FILE%/Dockerfile}"
DOCKER_TAG=$(basename "$DOCKER_PATH")
echo "Building Docker image for $DOCKER_TAG"
docker build -t "$DOCKER_TAG" "$DOCKER_PATH"
fi fi
for IMAGE in "${ALLOWED_IMAGES[@]}"; do
if [[ "$FILE" == docker/${IMAGE}/* ]]; then
IMAGES_TO_BUILD["$IMAGE"]=1
fi
done
done
if [[ ${#IMAGES_TO_BUILD[@]} -eq 0 ]]; then
echo "No relevant Docker changes detected."
exit 0
fi
for IMAGE in "${!IMAGES_TO_BUILD[@]}"; do
DOCKER_PATH="docker/${IMAGE}"
echo "Building Docker image for $IMAGE"
docker build -t "$IMAGE" "$DOCKER_PATH"
done done
if: steps.file_changes.outputs.all != '' if: steps.file_changes.outputs.all != ''
@@ -93,6 +72,7 @@ jobs:
image-name: image-name:
- diffusers-pytorch-cpu - diffusers-pytorch-cpu
- diffusers-pytorch-cuda - diffusers-pytorch-cuda
- diffusers-pytorch-cuda
- diffusers-pytorch-xformers-cuda - diffusers-pytorch-xformers-cuda
- diffusers-pytorch-minimum-cuda - diffusers-pytorch-minimum-cuda
- diffusers-doc-builder - diffusers-doc-builder

View File

@@ -12,33 +12,7 @@ concurrency:
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:
check-links:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install uv
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
- name: Install doc-builder
run: |
uv pip install --system git+https://github.com/huggingface/doc-builder.git@main
- name: Check documentation links
run: |
uv run doc-builder check-links docs/source/en
build: build:
needs: check-links
uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
with: with:
commit_sha: ${{ github.event.pull_request.head.sha }} commit_sha: ${{ github.event.pull_request.head.sha }}

View File

@@ -74,7 +74,7 @@ jobs:
python-version: "3.10" python-version: "3.10"
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install --upgrade pip python -m pip install --upgrade pip
pip install --upgrade huggingface_hub pip install --upgrade huggingface_hub
# Check secret is set # Check secret is set

View File

@@ -7,7 +7,7 @@ on:
env: env:
DIFFUSERS_IS_CI: yes DIFFUSERS_IS_CI: yes
HF_XET_HIGH_PERFORMANCE: 1 HF_HUB_ENABLE_HF_TRANSFER: 1
OMP_NUM_THREADS: 8 OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8 MKL_NUM_THREADS: 8
PYTEST_TIMEOUT: 600 PYTEST_TIMEOUT: 600
@@ -71,11 +71,10 @@ jobs:
run: nvidia-smi run: nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install -e [quality,test]
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 python -m uv pip install pytest-reportlog
uv pip install pytest-reportlog
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -85,8 +84,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \ --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
--report-log=tests_pipeline_${{ matrix.module }}_cuda.log \ --report-log=tests_pipeline_${{ matrix.module }}_cuda.log \
tests/pipelines/${{ matrix.module }} tests/pipelines/${{ matrix.module }}
@@ -125,12 +124,11 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install peft@git+https://github.com/huggingface/peft.git python -m uv pip install -e [quality,test]
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install peft@git+https://github.com/huggingface/peft.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 python -m uv pip install pytest-reportlog
uv pip install pytest-reportlog
- name: Environment - name: Environment
run: python utils/print_env.py run: python utils/print_env.py
@@ -141,8 +139,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_torch_${{ matrix.module }}_cuda \ --make-reports=tests_torch_${{ matrix.module }}_cuda \
--report-log=tests_torch_${{ matrix.module }}_cuda.log \ --report-log=tests_torch_${{ matrix.module }}_cuda.log \
tests/${{ matrix.module }} tests/${{ matrix.module }}
@@ -154,8 +152,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
--make-reports=examples_torch_cuda \ -s -v --make-reports=examples_torch_cuda \
--report-log=examples_torch_cuda.log \ --report-log=examples_torch_cuda.log \
examples/ examples/
@@ -193,9 +191,8 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality,training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git python -m uv pip install -e [quality,test,training]
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -204,7 +201,7 @@ jobs:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
RUN_COMPILE: yes RUN_COMPILE: yes
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "compile" --make-reports=tests_torch_compile_cuda tests/ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "compile" --make-reports=tests_torch_compile_cuda tests/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}
run: cat reports/tests_torch_compile_cuda_failures_short.txt run: cat reports/tests_torch_compile_cuda_failures_short.txt
@@ -235,12 +232,11 @@ jobs:
run: nvidia-smi run: nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install peft@git+https://github.com/huggingface/peft.git python -m uv pip install -e [quality,test]
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install peft@git+https://github.com/huggingface/peft.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 python -m uv pip install pytest-reportlog
uv pip install pytest-reportlog
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -251,7 +247,7 @@ jobs:
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
BIG_GPU_MEMORY: 40 BIG_GPU_MEMORY: 40
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-m "big_accelerator" \ -m "big_accelerator" \
--make-reports=tests_big_gpu_torch_cuda \ --make-reports=tests_big_gpu_torch_cuda \
--report-log=tests_big_gpu_torch_cuda.log \ --report-log=tests_big_gpu_torch_cuda.log \
@@ -286,11 +282,10 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install peft@git+https://github.com/huggingface/peft.git python -m uv pip install -e [quality,test]
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install peft@git+https://github.com/huggingface/peft.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment - name: Environment
run: | run: |
@@ -302,8 +297,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_torch_minimum_version_cuda \ --make-reports=tests_torch_minimum_version_cuda \
tests/models/test_modeling_common.py \ tests/models/test_modeling_common.py \
tests/pipelines/test_pipelines_common.py \ tests/pipelines/test_pipelines_common.py \
@@ -345,9 +340,6 @@ jobs:
- backend: "optimum_quanto" - backend: "optimum_quanto"
test_location: "quanto" test_location: "quanto"
additional_deps: [] additional_deps: []
- backend: "nvidia_modelopt"
test_location: "modelopt"
additional_deps: []
runs-on: runs-on:
group: aws-g6e-xlarge-plus group: aws-g6e-xlarge-plus
container: container:
@@ -362,14 +354,13 @@ jobs:
run: nvidia-smi run: nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install -U ${{ matrix.config.backend }} python -m uv pip install -e [quality,test]
python -m uv pip install -U ${{ matrix.config.backend }}
if [ "${{ join(matrix.config.additional_deps, ' ') }}" != "" ]; then if [ "${{ join(matrix.config.additional_deps, ' ') }}" != "" ]; then
uv pip install ${{ join(matrix.config.additional_deps, ' ') }} python -m uv pip install ${{ join(matrix.config.additional_deps, ' ') }}
fi fi
uv pip install pytest-reportlog python -m uv pip install pytest-reportlog
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -380,7 +371,7 @@ jobs:
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
BIG_GPU_MEMORY: 40 BIG_GPU_MEMORY: 40
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
--make-reports=tests_${{ matrix.config.backend }}_torch_cuda \ --make-reports=tests_${{ matrix.config.backend }}_torch_cuda \
--report-log=tests_${{ matrix.config.backend }}_torch_cuda.log \ --report-log=tests_${{ matrix.config.backend }}_torch_cuda.log \
tests/quantization/${{ matrix.config.test_location }} tests/quantization/${{ matrix.config.test_location }}
@@ -415,11 +406,10 @@ jobs:
run: nvidia-smi run: nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install -U bitsandbytes optimum_quanto python -m uv pip install -e [quality,test]
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git python -m uv pip install -U bitsandbytes optimum_quanto
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 python -m uv pip install pytest-reportlog
uv pip install pytest-reportlog
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -430,7 +420,7 @@ jobs:
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
BIG_GPU_MEMORY: 40 BIG_GPU_MEMORY: 40
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
--make-reports=tests_pipeline_level_quant_torch_cuda \ --make-reports=tests_pipeline_level_quant_torch_cuda \
--report-log=tests_pipeline_level_quant_torch_cuda.log \ --report-log=tests_pipeline_level_quant_torch_cuda.log \
tests/quantization/test_pipeline_level_quantization.py tests/quantization/test_pipeline_level_quantization.py
@@ -530,11 +520,11 @@ jobs:
# - name: Install dependencies # - name: Install dependencies
# shell: arch -arch arm64 bash {0} # shell: arch -arch arm64 bash {0}
# run: | # run: |
# ${CONDA_RUN} pip install --upgrade pip uv # ${CONDA_RUN} python -m pip install --upgrade pip uv
# ${CONDA_RUN} uv pip install -e ".[quality]" # ${CONDA_RUN} python -m uv pip install -e [quality,test]
# ${CONDA_RUN} uv pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu # ${CONDA_RUN} python -m uv pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
# ${CONDA_RUN} uv pip install accelerate@git+https://github.com/huggingface/accelerate # ${CONDA_RUN} python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate
# ${CONDA_RUN} uv pip install pytest-reportlog # ${CONDA_RUN} python -m uv pip install pytest-reportlog
# - name: Environment # - name: Environment
# shell: arch -arch arm64 bash {0} # shell: arch -arch arm64 bash {0}
# run: | # run: |
@@ -545,7 +535,7 @@ jobs:
# HF_HOME: /System/Volumes/Data/mnt/cache # HF_HOME: /System/Volumes/Data/mnt/cache
# HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} # HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
# run: | # run: |
# ${CONDA_RUN} pytest -n 1 --make-reports=tests_torch_mps \ # ${CONDA_RUN} python -m pytest -n 1 -s -v --make-reports=tests_torch_mps \
# --report-log=tests_torch_mps.log \ # --report-log=tests_torch_mps.log \
# tests/ # tests/
# - name: Failure short reports # - name: Failure short reports
@@ -586,11 +576,11 @@ jobs:
# - name: Install dependencies # - name: Install dependencies
# shell: arch -arch arm64 bash {0} # shell: arch -arch arm64 bash {0}
# run: | # run: |
# ${CONDA_RUN} pip install --upgrade pip uv # ${CONDA_RUN} python -m pip install --upgrade pip uv
# ${CONDA_RUN} uv pip install -e ".[quality]" # ${CONDA_RUN} python -m uv pip install -e [quality,test]
# ${CONDA_RUN} uv pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu # ${CONDA_RUN} python -m uv pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
# ${CONDA_RUN} uv pip install accelerate@git+https://github.com/huggingface/accelerate # ${CONDA_RUN} python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate
# ${CONDA_RUN} uv pip install pytest-reportlog # ${CONDA_RUN} python -m uv pip install pytest-reportlog
# - name: Environment # - name: Environment
# shell: arch -arch arm64 bash {0} # shell: arch -arch arm64 bash {0}
# run: | # run: |
@@ -601,7 +591,7 @@ jobs:
# HF_HOME: /System/Volumes/Data/mnt/cache # HF_HOME: /System/Volumes/Data/mnt/cache
# HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} # HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
# run: | # run: |
# ${CONDA_RUN} pytest -n 1 --make-reports=tests_torch_mps \ # ${CONDA_RUN} python -m pytest -n 1 -s -v --make-reports=tests_torch_mps \
# --report-log=tests_torch_mps.log \ # --report-log=tests_torch_mps.log \
# tests/ # tests/
# - name: Failure short reports # - name: Failure short reports

View File

@@ -25,8 +25,11 @@ jobs:
python-version: "3.8" python-version: "3.8"
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install -e . python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pip install pytest python -m pip install --upgrade pip uv
python -m uv pip install -e .
python -m uv pip install pytest
- name: Check for soft dependencies - name: Check for soft dependencies
run: | run: |
pytest tests/others/test_dependencies.py python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest tests/others/test_dependencies.py

View File

@@ -0,0 +1,38 @@
name: Run Flax dependency tests
on:
pull_request:
branches:
- main
paths:
- "src/diffusers/**.py"
push:
branches:
- main
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
check_flax_dependencies:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.8"
- name: Install dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m pip install --upgrade pip uv
python -m uv pip install -e .
python -m uv pip install "jax[cpu]>=0.2.16,!=0.3.2"
python -m uv pip install "flax>=0.4.1"
python -m uv pip install "jaxlib>=0.1.65"
python -m uv pip install pytest
- name: Check for soft dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest tests/others/test_dependencies.py

View File

@@ -1,139 +0,0 @@
name: Fast PR tests for Modular
on:
pull_request:
branches: [main]
paths:
- "src/diffusers/modular_pipelines/**.py"
- "src/diffusers/models/modeling_utils.py"
- "src/diffusers/models/model_loading_utils.py"
- "src/diffusers/pipelines/pipeline_utils.py"
- "src/diffusers/pipeline_loading_utils.py"
- "src/diffusers/loaders/lora_base.py"
- "src/diffusers/loaders/lora_pipeline.py"
- "src/diffusers/loaders/peft.py"
- "tests/modular_pipelines/**.py"
- ".github/**.yml"
- "utils/**.py"
- "setup.py"
push:
branches:
- ci-*
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
DIFFUSERS_IS_CI: yes
HF_XET_HIGH_PERFORMANCE: 1
OMP_NUM_THREADS: 4
MKL_NUM_THREADS: 4
PYTEST_TIMEOUT: 60
jobs:
check_code_quality:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install dependencies
run: |
pip install --upgrade pip
pip install .[quality]
- name: Check quality
run: make quality
- name: Check if failure
if: ${{ failure() }}
run: |
echo "Quality check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make style && make quality'" >> $GITHUB_STEP_SUMMARY
check_repository_consistency:
needs: check_code_quality
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install dependencies
run: |
pip install --upgrade pip
pip install .[quality]
- name: Check repo consistency
run: |
python utils/check_copies.py
python utils/check_dummies.py
python utils/check_support_list.py
make deps_table_check_updated
- name: Check if failure
if: ${{ failure() }}
run: |
echo "Repo consistency check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make fix-copies'" >> $GITHUB_STEP_SUMMARY
run_fast_tests:
needs: [check_code_quality, check_repository_consistency]
strategy:
fail-fast: false
matrix:
config:
- name: Fast PyTorch Modular Pipeline CPU tests
framework: pytorch_pipelines
runner: aws-highmemory-32-plus
image: diffusers/diffusers-pytorch-cpu
report: torch_cpu_modular_pipelines
name: ${{ matrix.config.name }}
runs-on:
group: ${{ matrix.config.runner }}
container:
image: ${{ matrix.config.image }}
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
defaults:
run:
shell: bash
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Install dependencies
run: |
uv pip install -e ".[quality]"
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
- name: Environment
run: |
python utils/print_env.py
- name: Run fast PyTorch Pipeline CPU tests
if: ${{ matrix.config.framework == 'pytorch_pipelines' }}
run: |
pytest -n 8 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \
--make-reports=tests_${{ matrix.config.report }} \
tests/modular_pipelines
- name: Failure short reports
if: ${{ failure() }}
run: cat reports/tests_${{ matrix.config.report }}_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: pr_${{ matrix.config.framework }}_${{ matrix.config.report }}_test_reports
path: reports

View File

@@ -33,7 +33,8 @@ jobs:
fetch-depth: 0 fetch-depth: 0
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -89,16 +90,19 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install accelerate python -m pip install -e [quality,test]
python -m pip install accelerate
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run all selected tests on CPU - name: Run all selected tests on CPU
run: | run: |
pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.modules }}_tests_cpu ${{ fromJson(needs.setup_pr_tests.outputs.test_map)[matrix.modules] }} python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.modules }}_tests_cpu ${{ fromJson(needs.setup_pr_tests.outputs.test_map)[matrix.modules] }}
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}
@@ -144,16 +148,19 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install -e [quality] python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m pip install -e [quality,test]
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run Hub tests for models, schedulers, and pipelines on a staging env - name: Run Hub tests for models, schedulers, and pipelines on a staging env
if: ${{ matrix.config.framework == 'hub_tests_pytorch' }} if: ${{ matrix.config.framework == 'hub_tests_pytorch' }}
run: | run: |
HUGGINGFACE_CO_STAGING=true pytest \ python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
HUGGINGFACE_CO_STAGING=true python -m pytest \
-m "is_staging_test" \ -m "is_staging_test" \
--make-reports=tests_${{ matrix.config.report }} \ --make-reports=tests_${{ matrix.config.report }} \
tests tests

View File

@@ -22,7 +22,7 @@ concurrency:
env: env:
DIFFUSERS_IS_CI: yes DIFFUSERS_IS_CI: yes
HF_XET_HIGH_PERFORMANCE: 1 HF_HUB_ENABLE_HF_TRANSFER: 1
OMP_NUM_THREADS: 4 OMP_NUM_THREADS: 4
MKL_NUM_THREADS: 4 MKL_NUM_THREADS: 4
PYTEST_TIMEOUT: 60 PYTEST_TIMEOUT: 60
@@ -38,7 +38,7 @@ jobs:
python-version: "3.8" python-version: "3.8"
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install --upgrade pip python -m pip install --upgrade pip
pip install .[quality] pip install .[quality]
- name: Check quality - name: Check quality
run: make quality run: make quality
@@ -58,7 +58,7 @@ jobs:
python-version: "3.8" python-version: "3.8"
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install --upgrade pip python -m pip install --upgrade pip
pip install .[quality] pip install .[quality]
- name: Check repo consistency - name: Check repo consistency
run: | run: |
@@ -114,36 +114,40 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git python -m uv pip install -e [quality,test]
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 pip uninstall transformers -y && python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git --no-deps
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run fast PyTorch Pipeline CPU tests - name: Run fast PyTorch Pipeline CPU tests
if: ${{ matrix.config.framework == 'pytorch_pipelines' }} if: ${{ matrix.config.framework == 'pytorch_pipelines' }}
run: | run: |
pytest -n 8 --max-worker-restart=0 --dist=loadfile \ python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-k "not Flax and not Onnx" \ python -m pytest -n 8 --max-worker-restart=0 --dist=loadfile \
-s -v -k "not Flax and not Onnx" \
--make-reports=tests_${{ matrix.config.report }} \ --make-reports=tests_${{ matrix.config.report }} \
tests/pipelines tests/pipelines
- name: Run fast PyTorch Model Scheduler CPU tests - name: Run fast PyTorch Model Scheduler CPU tests
if: ${{ matrix.config.framework == 'pytorch_models' }} if: ${{ matrix.config.framework == 'pytorch_models' }}
run: | run: |
pytest -n 4 --max-worker-restart=0 --dist=loadfile \ python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-k "not Flax and not Onnx and not Dependency" \ python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
-s -v -k "not Flax and not Onnx and not Dependency" \
--make-reports=tests_${{ matrix.config.report }} \ --make-reports=tests_${{ matrix.config.report }} \
tests/models tests/schedulers tests/others tests/models tests/schedulers tests/others
- name: Run example PyTorch CPU tests - name: Run example PyTorch CPU tests
if: ${{ matrix.config.framework == 'pytorch_examples' }} if: ${{ matrix.config.framework == 'pytorch_examples' }}
run: | run: |
uv pip install ".[training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest -n 4 --max-worker-restart=0 --dist=loadfile \ python -m uv pip install peft timm
python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
--make-reports=tests_${{ matrix.config.report }} \ --make-reports=tests_${{ matrix.config.report }} \
examples examples
@@ -191,16 +195,19 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run Hub tests for models, schedulers, and pipelines on a staging env - name: Run Hub tests for models, schedulers, and pipelines on a staging env
if: ${{ matrix.config.framework == 'hub_tests_pytorch' }} if: ${{ matrix.config.framework == 'hub_tests_pytorch' }}
run: | run: |
HUGGINGFACE_CO_STAGING=true pytest \ python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
HUGGINGFACE_CO_STAGING=true python -m pytest \
-m "is_staging_test" \ -m "is_staging_test" \
--make-reports=tests_${{ matrix.config.report }} \ --make-reports=tests_${{ matrix.config.report }} \
tests tests
@@ -242,26 +249,28 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
# TODO (sayakpaul, DN6): revisit `--no-deps` # TODO (sayakpaul, DN6): revisit `--no-deps`
uv pip install -U peft@git+https://github.com/huggingface/peft.git --no-deps python -m pip install -U peft@git+https://github.com/huggingface/peft.git --no-deps
uv pip install -U tokenizers python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git --no-deps
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps python -m uv pip install -U tokenizers
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run fast PyTorch LoRA tests with PEFT - name: Run fast PyTorch LoRA tests with PEFT
run: | run: |
pytest -n 4 --max-worker-restart=0 --dist=loadfile \ python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
\ python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
-s -v \
--make-reports=tests_peft_main \ --make-reports=tests_peft_main \
tests/lora/ tests/lora/
pytest -n 4 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
\ -s -v \
--make-reports=tests_models_lora_peft_main \ --make-reports=tests_models_lora_peft_main \
tests/models/ -k "lora" tests/models/ -k "lora"

View File

@@ -1,4 +1,4 @@
name: Fast GPU Tests on PR name: Fast GPU Tests on PR
on: on:
pull_request: pull_request:
@@ -24,7 +24,7 @@ env:
DIFFUSERS_IS_CI: yes DIFFUSERS_IS_CI: yes
OMP_NUM_THREADS: 8 OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8 MKL_NUM_THREADS: 8
HF_XET_HIGH_PERFORMANCE: 1 HF_HUB_ENABLE_HF_TRANSFER: 1
PYTEST_TIMEOUT: 600 PYTEST_TIMEOUT: 600
PIPELINE_USAGE_CUTOFF: 1000000000 # set high cutoff so that only always-test pipelines run PIPELINE_USAGE_CUTOFF: 1000000000 # set high cutoff so that only always-test pipelines run
@@ -39,7 +39,7 @@ jobs:
python-version: "3.8" python-version: "3.8"
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install --upgrade pip python -m pip install --upgrade pip
pip install .[quality] pip install .[quality]
- name: Check quality - name: Check quality
run: make quality run: make quality
@@ -59,7 +59,7 @@ jobs:
python-version: "3.8" python-version: "3.8"
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install --upgrade pip python -m pip install --upgrade pip
pip install .[quality] pip install .[quality]
- name: Check repo consistency - name: Check repo consistency
run: | run: |
@@ -71,7 +71,7 @@ jobs:
if: ${{ failure() }} if: ${{ failure() }}
run: | run: |
echo "Repo consistency check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make fix-copies'" >> $GITHUB_STEP_SUMMARY echo "Repo consistency check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make fix-copies'" >> $GITHUB_STEP_SUMMARY
setup_torch_cuda_pipeline_matrix: setup_torch_cuda_pipeline_matrix:
needs: [check_code_quality, check_repository_consistency] needs: [check_code_quality, check_repository_consistency]
name: Setup Torch Pipelines CUDA Slow Tests Matrix name: Setup Torch Pipelines CUDA Slow Tests Matrix
@@ -88,7 +88,8 @@ jobs:
fetch-depth: 2 fetch-depth: 2
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -129,10 +130,10 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install -e [quality,test]
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 pip uninstall transformers -y && python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git --no-deps
- name: Environment - name: Environment
run: | run: |
@@ -150,18 +151,18 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
if [ "${{ matrix.module }}" = "ip_adapters" ]; then if [ "${{ matrix.module }}" = "ip_adapters" ]; then
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \ --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
tests/pipelines/${{ matrix.module }} tests/pipelines/${{ matrix.module }}
else else
pattern=$(cat ${{ steps.extract_tests.outputs.pattern_file }}) pattern=$(cat ${{ steps.extract_tests.outputs.pattern_file }})
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx and $pattern" \ -s -v -k "not Flax and not Onnx and $pattern" \
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \ --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
tests/pipelines/${{ matrix.module }} tests/pipelines/${{ matrix.module }}
fi fi
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}
@@ -199,11 +200,11 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install peft@git+https://github.com/huggingface/peft.git python -m uv pip install -e [quality,test]
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install peft@git+https://github.com/huggingface/peft.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 pip uninstall transformers -y && python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git --no-deps
- name: Environment - name: Environment
run: | run: |
@@ -224,11 +225,11 @@ jobs:
run: | run: |
pattern=$(cat ${{ steps.extract_tests.outputs.pattern_file }}) pattern=$(cat ${{ steps.extract_tests.outputs.pattern_file }})
if [ -z "$pattern" ]; then if [ -z "$pattern" ]; then
pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "not Flax and not Onnx" tests/${{ matrix.module }} \ python -m pytest -n 1 -sv --max-worker-restart=0 --dist=loadfile -k "not Flax and not Onnx" tests/${{ matrix.module }} \
--make-reports=tests_torch_cuda_${{ matrix.module }} --make-reports=tests_torch_cuda_${{ matrix.module }}
else else
pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "not Flax and not Onnx and $pattern" tests/${{ matrix.module }} \ python -m pytest -n 1 -sv --max-worker-restart=0 --dist=loadfile -k "not Flax and not Onnx and $pattern" tests/${{ matrix.module }} \
--make-reports=tests_torch_cuda_${{ matrix.module }} --make-reports=tests_torch_cuda_${{ matrix.module }}
fi fi
- name: Failure short reports - name: Failure short reports
@@ -264,20 +265,22 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 pip uninstall transformers -y && python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git --no-deps
uv pip install -e ".[quality,training]" python -m uv pip install -e [quality,test,training]
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run example tests on GPU - name: Run example tests on GPU
env: env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: | run: |
uv pip install ".[training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest -n 1 --max-worker-restart=0 --dist=loadfile --make-reports=examples_torch_cuda examples/ python -m uv pip install timm
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_cuda examples/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}

View File

@@ -25,8 +25,12 @@ jobs:
python-version: "3.8" python-version: "3.8"
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install -e . python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pip install torch torchvision torchaudio pytest python -m pip install --upgrade pip uv
python -m uv pip install -e .
python -m uv pip install torch torchvision torchaudio
python -m uv pip install pytest
- name: Check for soft dependencies - name: Check for soft dependencies
run: | run: |
pytest tests/others/test_dependencies.py python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest tests/others/test_dependencies.py

View File

@@ -14,7 +14,7 @@ env:
DIFFUSERS_IS_CI: yes DIFFUSERS_IS_CI: yes
OMP_NUM_THREADS: 8 OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8 MKL_NUM_THREADS: 8
HF_XET_HIGH_PERFORMANCE: 1 HF_HUB_ENABLE_HF_TRANSFER: 1
PYTEST_TIMEOUT: 600 PYTEST_TIMEOUT: 600
PIPELINE_USAGE_CUTOFF: 50000 PIPELINE_USAGE_CUTOFF: 50000
@@ -34,7 +34,8 @@ jobs:
fetch-depth: 2 fetch-depth: 2
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -74,10 +75,9 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install -e [quality,test]
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -87,8 +87,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \ --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
tests/pipelines/${{ matrix.module }} tests/pipelines/${{ matrix.module }}
- name: Failure short reports - name: Failure short reports
@@ -126,11 +126,10 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install peft@git+https://github.com/huggingface/peft.git python -m uv pip install -e [quality,test]
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install peft@git+https://github.com/huggingface/peft.git
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment - name: Environment
run: | run: |
@@ -142,8 +141,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_torch_cuda_${{ matrix.module }} \ --make-reports=tests_torch_cuda_${{ matrix.module }} \
tests/${{ matrix.module }} tests/${{ matrix.module }}
@@ -181,9 +180,8 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality,training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
#uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git python -m uv pip install -e [quality,test,training]
uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -192,7 +190,7 @@ jobs:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
RUN_COMPILE: yes RUN_COMPILE: yes
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "compile" --make-reports=tests_torch_compile_cuda tests/ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "compile" --make-reports=tests_torch_compile_cuda tests/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}
run: cat reports/tests_torch_compile_cuda_failures_short.txt run: cat reports/tests_torch_compile_cuda_failures_short.txt
@@ -225,7 +223,8 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality,training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test,training]
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -233,7 +232,7 @@ jobs:
env: env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "xformers" --make-reports=tests_torch_xformers_cuda tests/ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "xformers" --make-reports=tests_torch_xformers_cuda tests/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}
run: cat reports/tests_torch_xformers_cuda_failures_short.txt run: cat reports/tests_torch_xformers_cuda_failures_short.txt
@@ -265,18 +264,21 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality,training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test,training]
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run example tests on GPU - name: Run example tests on GPU
env: env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: | run: |
uv pip install ".[training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest -n 1 --max-worker-restart=0 --dist=loadfile --make-reports=examples_torch_cuda examples/ python -m uv pip install timm
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_cuda examples/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}

View File

@@ -18,7 +18,7 @@ env:
HF_HOME: /mnt/cache HF_HOME: /mnt/cache
OMP_NUM_THREADS: 8 OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8 MKL_NUM_THREADS: 8
HF_XET_HIGH_PERFORMANCE: 1 HF_HUB_ENABLE_HF_TRANSFER: 1
PYTEST_TIMEOUT: 600 PYTEST_TIMEOUT: 600
RUN_SLOW: no RUN_SLOW: no
@@ -60,25 +60,29 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run fast PyTorch CPU tests - name: Run fast PyTorch CPU tests
if: ${{ matrix.config.framework == 'pytorch' }} if: ${{ matrix.config.framework == 'pytorch' }}
run: | run: |
pytest -n 4 --max-worker-restart=0 --dist=loadfile \ python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-k "not Flax and not Onnx" \ python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
-s -v -k "not Flax and not Onnx" \
--make-reports=tests_${{ matrix.config.report }} \ --make-reports=tests_${{ matrix.config.report }} \
tests/ tests/
- name: Run example PyTorch CPU tests - name: Run example PyTorch CPU tests
if: ${{ matrix.config.framework == 'pytorch_examples' }} if: ${{ matrix.config.framework == 'pytorch_examples' }}
run: | run: |
uv pip install ".[training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest -n 4 --max-worker-restart=0 --dist=loadfile \ python -m uv pip install peft timm
python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
--make-reports=tests_${{ matrix.config.report }} \ --make-reports=tests_${{ matrix.config.report }} \
examples examples

View File

@@ -8,7 +8,7 @@ env:
HF_HOME: /mnt/cache HF_HOME: /mnt/cache
OMP_NUM_THREADS: 8 OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8 MKL_NUM_THREADS: 8
HF_XET_HIGH_PERFORMANCE: 1 HF_HUB_ENABLE_HF_TRANSFER: 1
PYTEST_TIMEOUT: 600 PYTEST_TIMEOUT: 600
RUN_SLOW: no RUN_SLOW: no
@@ -57,7 +57,7 @@ jobs:
HF_HOME: /System/Volumes/Data/mnt/cache HF_HOME: /System/Volumes/Data/mnt/cache
HF_TOKEN: ${{ secrets.HF_TOKEN }} HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: | run: |
${CONDA_RUN} python -m pytest -n 0 --make-reports=tests_torch_mps tests/ ${CONDA_RUN} python -m pytest -n 0 -s -v --make-reports=tests_torch_mps tests/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}

View File

@@ -32,7 +32,8 @@ jobs:
fetch-depth: 2 fetch-depth: 2
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -72,8 +73,9 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install -e [quality,test]
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -83,8 +85,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \ --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
tests/pipelines/${{ matrix.module }} tests/pipelines/${{ matrix.module }}
- name: Failure short reports - name: Failure short reports
@@ -122,9 +124,10 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install peft@git+https://github.com/huggingface/peft.git python -m uv pip install -e [quality,test]
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install peft@git+https://github.com/huggingface/peft.git
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
- name: Environment - name: Environment
run: | run: |
@@ -136,8 +139,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_torch_${{ matrix.module }}_cuda \ --make-reports=tests_torch_${{ matrix.module }}_cuda \
tests/${{ matrix.module }} tests/${{ matrix.module }}
@@ -172,9 +175,10 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install peft@git+https://github.com/huggingface/peft.git python -m uv pip install -e [quality,test]
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install peft@git+https://github.com/huggingface/peft.git
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
- name: Environment - name: Environment
run: | run: |
@@ -186,8 +190,8 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8 CUBLAS_WORKSPACE_CONFIG: :16:8
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile \ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-k "not Flax and not Onnx" \ -s -v -k "not Flax and not Onnx" \
--make-reports=tests_torch_minimum_cuda \ --make-reports=tests_torch_minimum_cuda \
tests/models/test_modeling_common.py \ tests/models/test_modeling_common.py \
tests/pipelines/test_pipelines_common.py \ tests/pipelines/test_pipelines_common.py \
@@ -231,7 +235,8 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality,training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test,training]
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -240,7 +245,7 @@ jobs:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
RUN_COMPILE: yes RUN_COMPILE: yes
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "compile" --make-reports=tests_torch_compile_cuda tests/ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "compile" --make-reports=tests_torch_compile_cuda tests/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}
run: cat reports/tests_torch_compile_cuda_failures_short.txt run: cat reports/tests_torch_compile_cuda_failures_short.txt
@@ -273,7 +278,8 @@ jobs:
nvidia-smi nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality,training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test,training]
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -281,7 +287,7 @@ jobs:
env: env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: | run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "xformers" --make-reports=tests_torch_xformers_cuda tests/ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "xformers" --make-reports=tests_torch_xformers_cuda tests/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}
run: cat reports/tests_torch_xformers_cuda_failures_short.txt run: cat reports/tests_torch_xformers_cuda_failures_short.txt
@@ -315,18 +321,21 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality,training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test,training]
- name: Environment - name: Environment
run: | run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python utils/print_env.py python utils/print_env.py
- name: Run example tests on GPU - name: Run example tests on GPU
env: env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: | run: |
uv pip install ".[training]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
pytest -n 1 --max-worker-restart=0 --dist=loadfile --make-reports=examples_torch_cuda examples/ python -m uv pip install timm
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_cuda examples/
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}

View File

@@ -63,8 +63,9 @@ jobs:
- name: Install pytest - name: Install pytest
run: | run: |
uv pip install -e ".[quality]" python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
uv pip install peft python -m uv pip install -e [quality,test]
python -m uv pip install peft
- name: Run tests - name: Run tests
env: env:

3
.gitignore vendored
View File

@@ -125,9 +125,6 @@ dmypy.json
.vs .vs
.vscode .vscode
# Cursor
.cursor
# Pycharm # Pycharm
.idea .idea

View File

@@ -37,7 +37,7 @@ limitations under the License.
## Installation ## Installation
We recommend installing 🤗 Diffusers in a virtual environment from PyPI or Conda. For more details about installing [PyTorch](https://pytorch.org/get-started/locally/), please refer to their official documentation. We recommend installing 🤗 Diffusers in a virtual environment from PyPI or Conda. For more details about installing [PyTorch](https://pytorch.org/get-started/locally/) and [Flax](https://flax.readthedocs.io/en/latest/#installation), please refer to their official documentation.
### PyTorch ### PyTorch
@@ -53,6 +53,14 @@ With `conda` (maintained by the community):
conda install -c conda-forge diffusers conda install -c conda-forge diffusers
``` ```
### Flax
With `pip` (official package):
```bash
pip install --upgrade diffusers[flax]
```
### Apple Silicon (M1/M2) support ### Apple Silicon (M1/M2) support
Please refer to the [How to use Stable Diffusion in Apple Silicon](https://huggingface.co/docs/diffusers/optimization/mps) guide. Please refer to the [How to use Stable Diffusion in Apple Silicon](https://huggingface.co/docs/diffusers/optimization/mps) guide.
@@ -171,7 +179,7 @@ Also, say 👋 in our public Discord channel <a href="https://discord.gg/G7tWnz9
<tr style="border-top: 2px solid black"> <tr style="border-top: 2px solid black">
<td>Text-guided Image Inpainting</td> <td>Text-guided Image Inpainting</td>
<td><a href="https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/inpaint">Stable Diffusion Inpainting</a></td> <td><a href="https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/inpaint">Stable Diffusion Inpainting</a></td>
<td><a href="https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-inpainting"> stable-diffusion-v1-5/stable-diffusion-inpainting </a></td> <td><a href="https://huggingface.co/runwayml/stable-diffusion-inpainting"> runwayml/stable-diffusion-inpainting </a></td>
</tr> </tr>
<tr style="border-top: 2px solid black"> <tr style="border-top: 2px solid black">
<td>Image Variation</td> <td>Image Variation</td>

View File

@@ -1,45 +1,56 @@
FROM python:3.10-slim FROM ubuntu:20.04
ENV PYTHONDONTWRITEBYTECODE=1
LABEL maintainer="Hugging Face" LABEL maintainer="Hugging Face"
LABEL repository="diffusers" LABEL repository="diffusers"
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update && apt-get install -y bash \ RUN apt-get -y update \
build-essential \ && apt-get install -y software-properties-common \
git \ && add-apt-repository ppa:deadsnakes/ppa
git-lfs \
curl \
ca-certificates \
libglib2.0-0 \
libsndfile1-dev \
libgl1 \
zip \
wget
ENV UV_PYTHON=/usr/local/bin/python RUN apt install -y bash \
build-essential \
git \
git-lfs \
curl \
ca-certificates \
libsndfile1-dev \
python3.10 \
python3-pip \
libgl1 \
zip \
wget \
python3.10-venv && \
rm -rf /var/lib/apt/lists
# make sure to use venv
RUN python3.10 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN pip install uv RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
RUN uv pip install --no-cache-dir \ python3.10 -m uv pip install --no-cache-dir \
torch \ torch \
torchvision \ torchvision \
torchaudio \ torchaudio \
--extra-index-url https://download.pytorch.org/whl/cpu invisible_watermark \
--extra-index-url https://download.pytorch.org/whl/cpu && \
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]" python3.10 -m uv pip install --no-cache-dir \
accelerate \
# Extra dependencies datasets \
RUN uv pip install --no-cache-dir \ hf-doc-builder \
accelerate \ huggingface-hub \
numpy==1.26.4 \ Jinja2 \
hf_xet \ librosa \
setuptools==69.5.1 \ numpy==1.26.4 \
bitsandbytes \ scipy \
torchao \ tensorboard \
gguf \ transformers \
optimum-quanto matplotlib \
setuptools==69.5.1 \
RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean bitsandbytes \
torchao \
gguf \
optimum-quanto
CMD ["/bin/bash"] CMD ["/bin/bash"]

View File

@@ -0,0 +1,49 @@
FROM ubuntu:20.04
LABEL maintainer="Hugging Face"
LABEL repository="diffusers"
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update \
&& apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa
RUN apt install -y bash \
build-essential \
git \
git-lfs \
curl \
ca-certificates \
libsndfile1-dev \
libgl1 \
python3.10 \
python3-pip \
python3.10-venv && \
rm -rf /var/lib/apt/lists
# make sure to use venv
RUN python3.10 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
# follow the instructions here: https://cloud.google.com/tpu/docs/run-in-container#train_a_jax_model_in_a_docker_container
RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
python3 -m uv pip install --upgrade --no-cache-dir \
clu \
"jax[cpu]>=0.2.16,!=0.3.2" \
"flax>=0.4.1" \
"jaxlib>=0.1.65" && \
python3 -m uv pip install --no-cache-dir \
accelerate \
datasets \
hf-doc-builder \
huggingface-hub \
Jinja2 \
librosa \
numpy==1.26.4 \
scipy \
tensorboard \
transformers \
hf_transfer
CMD ["/bin/bash"]

View File

@@ -0,0 +1,51 @@
FROM ubuntu:20.04
LABEL maintainer="Hugging Face"
LABEL repository="diffusers"
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update \
&& apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa
RUN apt install -y bash \
build-essential \
git \
git-lfs \
curl \
ca-certificates \
libsndfile1-dev \
libgl1 \
python3.10 \
python3-pip \
python3.10-venv && \
rm -rf /var/lib/apt/lists
# make sure to use venv
RUN python3.10 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
# follow the instructions here: https://cloud.google.com/tpu/docs/run-in-container#train_a_jax_model_in_a_docker_container
RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
python3 -m pip install --no-cache-dir \
"jax[tpu]>=0.2.16,!=0.3.2" \
-f https://storage.googleapis.com/jax-releases/libtpu_releases.html && \
python3 -m uv pip install --upgrade --no-cache-dir \
clu \
"flax>=0.4.1" \
"jaxlib>=0.1.65" && \
python3 -m uv pip install --no-cache-dir \
accelerate \
datasets \
hf-doc-builder \
huggingface-hub \
Jinja2 \
librosa \
numpy==1.26.4 \
scipy \
tensorboard \
transformers \
hf_transfer
CMD ["/bin/bash"]

View File

@@ -44,6 +44,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
scipy \ scipy \
tensorboard \ tensorboard \
transformers \ transformers \
hf_xet hf_transfer
CMD ["/bin/bash"] CMD ["/bin/bash"]

View File

@@ -38,12 +38,13 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
datasets \ datasets \
hf-doc-builder \ hf-doc-builder \
huggingface-hub \ huggingface-hub \
hf_xet \ hf_transfer \
Jinja2 \ Jinja2 \
librosa \ librosa \
numpy==1.26.4 \ numpy==1.26.4 \
scipy \ scipy \
tensorboard \ tensorboard \
transformers transformers \
hf_transfer
CMD ["/bin/bash"] CMD ["/bin/bash"]

View File

@@ -1,38 +1,50 @@
FROM python:3.10-slim FROM ubuntu:20.04
ENV PYTHONDONTWRITEBYTECODE=1
LABEL maintainer="Hugging Face" LABEL maintainer="Hugging Face"
LABEL repository="diffusers" LABEL repository="diffusers"
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update && apt-get install -y bash \ RUN apt-get -y update \
build-essential \ && apt-get install -y software-properties-common \
git \ && add-apt-repository ppa:deadsnakes/ppa
git-lfs \
curl \
ca-certificates \
libglib2.0-0 \
libsndfile1-dev \
libgl1
ENV UV_PYTHON=/usr/local/bin/python RUN apt install -y bash \
build-essential \
git \
git-lfs \
curl \
ca-certificates \
libsndfile1-dev \
python3.10 \
python3.10-dev \
python3-pip \
libgl1 \
python3.10-venv && \
rm -rf /var/lib/apt/lists
# make sure to use venv
RUN python3.10 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN pip install uv RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
RUN uv pip install --no-cache-dir \ python3.10 -m uv pip install --no-cache-dir \
torch \ torch \
torchvision \ torchvision \
torchaudio \ torchaudio \
--extra-index-url https://download.pytorch.org/whl/cpu invisible_watermark \
--extra-index-url https://download.pytorch.org/whl/cpu && \
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]" python3.10 -m uv pip install --no-cache-dir \
accelerate \
# Extra dependencies datasets \
RUN uv pip install --no-cache-dir \ hf-doc-builder \
accelerate \ huggingface-hub \
numpy==1.26.4 \ Jinja2 \
hf_xet librosa \
numpy==1.26.4 \
RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean scipy \
tensorboard \
transformers matplotlib \
hf_transfer
CMD ["/bin/bash"] CMD ["/bin/bash"]

View File

@@ -2,13 +2,11 @@ FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
LABEL maintainer="Hugging Face" LABEL maintainer="Hugging Face"
LABEL repository="diffusers" LABEL repository="diffusers"
ARG PYTHON_VERSION=3.12
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update \ RUN apt-get -y update \
&& apt-get install -y software-properties-common \ && apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa && \ && add-apt-repository ppa:deadsnakes/ppa
apt-get update
RUN apt install -y bash \ RUN apt install -y bash \
build-essential \ build-essential \
@@ -16,34 +14,38 @@ RUN apt install -y bash \
git-lfs \ git-lfs \
curl \ curl \
ca-certificates \ ca-certificates \
libglib2.0-0 \
libsndfile1-dev \ libsndfile1-dev \
libgl1 \ libgl1 \
python3 \ python3.10 \
python3.10-dev \
python3-pip \ python3-pip \
&& apt-get clean \ python3.10-venv && \
&& rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists
RUN curl -LsSf https://astral.sh/uv/install.sh | sh # make sure to use venv
ENV PATH="/root/.local/bin:$PATH" RUN python3.10 -m venv /opt/venv
ENV VIRTUAL_ENV="/opt/venv" ENV PATH="/opt/venv/bin:$PATH"
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN uv pip install --no-cache-dir \ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
python3.10 -m uv pip install --no-cache-dir \
torch \ torch \
torchvision \ torchvision \
torchaudio torchaudio \
invisible_watermark && \
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]" python3.10 -m pip install --no-cache-dir \
# Extra dependencies
RUN uv pip install --no-cache-dir \
accelerate \ accelerate \
datasets \
hf-doc-builder \
huggingface-hub \
hf_transfer \
Jinja2 \
librosa \
numpy==1.26.4 \ numpy==1.26.4 \
pytorch-lightning \ scipy \
hf_xet tensorboard \
transformers \
pytorch-lightning \
hf_transfer
CMD ["/bin/bash"] CMD ["/bin/bash"]

View File

@@ -2,7 +2,6 @@ FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
LABEL maintainer="Hugging Face" LABEL maintainer="Hugging Face"
LABEL repository="diffusers" LABEL repository="diffusers"
ARG PYTHON_VERSION=3.10
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
ENV MINIMUM_SUPPORTED_TORCH_VERSION="2.1.0" ENV MINIMUM_SUPPORTED_TORCH_VERSION="2.1.0"
ENV MINIMUM_SUPPORTED_TORCHVISION_VERSION="0.16.0" ENV MINIMUM_SUPPORTED_TORCHVISION_VERSION="0.16.0"
@@ -10,8 +9,7 @@ ENV MINIMUM_SUPPORTED_TORCHAUDIO_VERSION="2.1.0"
RUN apt-get -y update \ RUN apt-get -y update \
&& apt-get install -y software-properties-common \ && apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa && \ && add-apt-repository ppa:deadsnakes/ppa
apt-get update
RUN apt install -y bash \ RUN apt install -y bash \
build-essential \ build-essential \
@@ -19,34 +17,37 @@ RUN apt install -y bash \
git-lfs \ git-lfs \
curl \ curl \
ca-certificates \ ca-certificates \
libglib2.0-0 \
libsndfile1-dev \ libsndfile1-dev \
libgl1 \ libgl1 \
python3 \ python3.10 \
python3.10-dev \
python3-pip \ python3-pip \
&& apt-get clean \ python3.10-venv && \
&& rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists
RUN curl -LsSf https://astral.sh/uv/install.sh | sh # make sure to use venv
ENV PATH="/root/.local/bin:$PATH" RUN python3.10 -m venv /opt/venv
ENV VIRTUAL_ENV="/opt/venv" ENV PATH="/opt/venv/bin:$PATH"
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN uv pip install --no-cache-dir \ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
python3.10 -m uv pip install --no-cache-dir \
torch==$MINIMUM_SUPPORTED_TORCH_VERSION \ torch==$MINIMUM_SUPPORTED_TORCH_VERSION \
torchvision==$MINIMUM_SUPPORTED_TORCHVISION_VERSION \ torchvision==$MINIMUM_SUPPORTED_TORCHVISION_VERSION \
torchaudio==$MINIMUM_SUPPORTED_TORCHAUDIO_VERSION torchaudio==$MINIMUM_SUPPORTED_TORCHAUDIO_VERSION \
invisible_watermark && \
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]" python3.10 -m pip install --no-cache-dir \
# Extra dependencies
RUN uv pip install --no-cache-dir \
accelerate \ accelerate \
datasets \
hf-doc-builder \
huggingface-hub \
hf_transfer \
Jinja2 \
librosa \
numpy==1.26.4 \ numpy==1.26.4 \
pytorch-lightning \ scipy \
hf_xet tensorboard \
transformers \
hf_transfer
CMD ["/bin/bash"] CMD ["/bin/bash"]

View File

@@ -2,49 +2,50 @@ FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
LABEL maintainer="Hugging Face" LABEL maintainer="Hugging Face"
LABEL repository="diffusers" LABEL repository="diffusers"
ARG PYTHON_VERSION=3.12
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update \ RUN apt-get -y update \
&& apt-get install -y software-properties-common \ && apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa && \ && add-apt-repository ppa:deadsnakes/ppa
apt-get update
RUN apt install -y bash \ RUN apt install -y bash \
build-essential \ build-essential \
git \ git \
git-lfs \ git-lfs \
curl \ curl \
ca-certificates \ ca-certificates \
libglib2.0-0 \ libsndfile1-dev \
libsndfile1-dev \ libgl1 \
libgl1 \ python3.10 \
python3 \ python3.10-dev \
python3-pip \ python3-pip \
&& apt-get clean \ python3.10-venv && \
&& rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists
RUN curl -LsSf https://astral.sh/uv/install.sh | sh # make sure to use venv
ENV PATH="/root/.local/bin:$PATH" RUN python3.10 -m venv /opt/venv
ENV VIRTUAL_ENV="/opt/venv" ENV PATH="/opt/venv/bin:$PATH"
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN uv pip install --no-cache-dir \ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
torch \ python3.10 -m pip install --no-cache-dir \
torchvision \ torch \
torchaudio torchvision \
torchaudio \
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]" invisible_watermark && \
python3.10 -m uv pip install --no-cache-dir \
# Extra dependencies accelerate \
RUN uv pip install --no-cache-dir \ datasets \
accelerate \ hf-doc-builder \
numpy==1.26.4 \ huggingface-hub \
pytorch-lightning \ hf_transfer \
hf_xet \ Jinja2 \
xformers librosa \
numpy==1.26.4 \
scipy \
tensorboard \
transformers \
xformers \
hf_transfer
CMD ["/bin/bash"] CMD ["/bin/bash"]

View File

@@ -1,35 +1,38 @@
- sections: - title: Get started
sections:
- local: index - local: index
title: Diffusers title: Diffusers
- local: installation - local: installation
title: Installation title: Installation
- local: quicktour - local: quicktour
title: Quickstart title: Quicktour
- local: stable_diffusion - local: stable_diffusion
title: Basic performance title: Effective and efficient diffusion
title: Get started
- isExpanded: false - title: DiffusionPipeline
isExpanded: false
sections: sections:
- local: using-diffusers/loading - local: using-diffusers/loading
title: DiffusionPipeline title: Load pipelines
- local: tutorials/autopipeline - local: tutorials/autopipeline
title: AutoPipeline title: AutoPipeline
- local: using-diffusers/custom_pipeline_overview - local: using-diffusers/custom_pipeline_overview
title: Community pipelines and components title: Load community pipelines and components
- local: using-diffusers/callback - local: using-diffusers/callback
title: Pipeline callbacks title: Pipeline callbacks
- local: using-diffusers/reusing_seeds - local: using-diffusers/reusing_seeds
title: Reproducibility title: Reproducible pipelines
- local: using-diffusers/schedulers - local: using-diffusers/schedulers
title: Schedulers title: Load schedulers and models
- local: using-diffusers/automodel - local: using-diffusers/scheduler_features
title: AutoModel title: Scheduler features
- local: using-diffusers/other-formats - local: using-diffusers/other-formats
title: Model formats title: Model files and layouts
- local: using-diffusers/push_to_hub - local: using-diffusers/push_to_hub
title: Sharing pipelines and models title: Push files to the Hub
title: Pipelines
- isExpanded: false - title: Adapters
isExpanded: false
sections: sections:
- local: tutorials/using_peft_for_inference - local: tutorials/using_peft_for_inference
title: LoRA title: LoRA
@@ -43,31 +46,40 @@
title: DreamBooth title: DreamBooth
- local: using-diffusers/textual_inversion_inference - local: using-diffusers/textual_inversion_inference
title: Textual inversion title: Textual inversion
title: Adapters
- isExpanded: false - title: Inference
isExpanded: false
sections: sections:
- local: using-diffusers/weighted_prompts - local: using-diffusers/weighted_prompts
title: Prompting title: Prompt techniques
- local: using-diffusers/create_a_server - local: using-diffusers/create_a_server
title: Create a server title: Create a server
- local: using-diffusers/batched_inference - local: using-diffusers/batched_inference
title: Batch inference title: Batch inference
- local: training/distributed_inference - local: training/distributed_inference
title: Distributed inference title: Distributed inference
title: Inference - local: using-diffusers/scheduler_features
- isExpanded: false title: Scheduler features
- local: using-diffusers/callback
title: Pipeline callbacks
- local: using-diffusers/reusing_seeds
title: Reproducible pipelines
- local: using-diffusers/image_quality
title: Controlling image quality
- title: Inference optimization
isExpanded: false
sections: sections:
- local: optimization/fp16 - local: optimization/fp16
title: Accelerate inference title: Accelerate inference
- local: optimization/cache - local: optimization/cache
title: Caching title: Caching
- local: optimization/attention_backends
title: Attention backends
- local: optimization/memory - local: optimization/memory
title: Reduce memory usage title: Reduce memory usage
- local: optimization/speed-memory-optims - local: optimization/speed-memory-optims
title: Compiling and offloading quantized models title: Compile and offloading quantized models
- sections: - title: Community optimizations
sections:
- local: optimization/pruna - local: optimization/pruna
title: Pruna title: Pruna
- local: optimization/xformers - local: optimization/xformers
@@ -76,19 +88,15 @@
title: Token merging title: Token merging
- local: optimization/deepcache - local: optimization/deepcache
title: DeepCache title: DeepCache
- local: optimization/cache_dit
title: CacheDiT
- local: optimization/tgate - local: optimization/tgate
title: TGATE title: TGATE
- local: optimization/xdit - local: optimization/xdit
title: xDiT title: xDiT
- local: optimization/para_attn - local: optimization/para_attn
title: ParaAttention title: ParaAttention
- local: using-diffusers/image_quality
title: FreeU - title: Hybrid Inference
title: Community optimizations isExpanded: false
title: Inference optimization
- isExpanded: false
sections: sections:
- local: hybrid_inference/overview - local: hybrid_inference/overview
title: Overview title: Overview
@@ -98,33 +106,31 @@
title: VAE Encode title: VAE Encode
- local: hybrid_inference/api_reference - local: hybrid_inference/api_reference
title: API Reference title: API Reference
title: Hybrid Inference
- isExpanded: false - title: Modular Diffusers
isExpanded: false
sections: sections:
- local: modular_diffusers/overview - local: modular_diffusers/overview
title: Overview title: Overview
- local: modular_diffusers/quickstart
title: Quickstart
- local: modular_diffusers/modular_diffusers_states
title: States
- local: modular_diffusers/pipeline_block
title: ModularPipelineBlocks
- local: modular_diffusers/sequential_pipeline_blocks
title: SequentialPipelineBlocks
- local: modular_diffusers/loop_sequential_pipeline_blocks
title: LoopSequentialPipelineBlocks
- local: modular_diffusers/auto_pipeline_blocks
title: AutoPipelineBlocks
- local: modular_diffusers/modular_pipeline - local: modular_diffusers/modular_pipeline
title: ModularPipeline title: Modular Pipeline
- local: modular_diffusers/components_manager - local: modular_diffusers/components_manager
title: ComponentsManager title: Components Manager
- local: modular_diffusers/guiders - local: modular_diffusers/modular_diffusers_states
title: Guiders title: Modular Diffusers States
- local: modular_diffusers/custom_blocks - local: modular_diffusers/pipeline_block
title: Building Custom Blocks title: Pipeline Block
title: Modular Diffusers - local: modular_diffusers/sequential_pipeline_blocks
- isExpanded: false title: Sequential Pipeline Blocks
- local: modular_diffusers/loop_sequential_pipeline_blocks
title: Loop Sequential Pipeline Blocks
- local: modular_diffusers/auto_pipeline_blocks
title: Auto Pipeline Blocks
- local: modular_diffusers/end_to_end_guide
title: End-to-End Example
- title: Training
isExpanded: false
sections: sections:
- local: training/overview - local: training/overview
title: Overview title: Overview
@@ -134,7 +140,8 @@
title: Adapt a model to a new task title: Adapt a model to a new task
- local: tutorials/basic_training - local: tutorials/basic_training
title: Train a diffusion model title: Train a diffusion model
- sections: - title: Models
sections:
- local: training/unconditional_training - local: training/unconditional_training
title: Unconditional image generation title: Unconditional image generation
- local: training/text2image - local: training/text2image
@@ -153,8 +160,8 @@
title: InstructPix2Pix title: InstructPix2Pix
- local: training/cogvideox - local: training/cogvideox
title: CogVideoX title: CogVideoX
title: Models - title: Methods
- sections: sections:
- local: training/text_inversion - local: training/text_inversion
title: Textual Inversion title: Textual Inversion
- local: training/dreambooth - local: training/dreambooth
@@ -167,9 +174,9 @@
title: Latent Consistency Distillation title: Latent Consistency Distillation
- local: training/ddpo - local: training/ddpo
title: Reinforcement learning training with DDPO title: Reinforcement learning training with DDPO
title: Methods
title: Training - title: Quantization
- isExpanded: false isExpanded: false
sections: sections:
- local: quantization/overview - local: quantization/overview
title: Getting started title: Getting started
@@ -181,11 +188,12 @@
title: torchao title: torchao
- local: quantization/quanto - local: quantization/quanto
title: quanto title: quanto
- local: quantization/modelopt
title: NVIDIA ModelOpt - title: Model accelerators and hardware
title: Quantization isExpanded: false
- isExpanded: false
sections: sections:
- local: using-diffusers/stable_diffusion_jax_how_to
title: JAX/Flax
- local: optimization/onnx - local: optimization/onnx
title: ONNX title: ONNX
- local: optimization/open_vino - local: optimization/open_vino
@@ -198,8 +206,9 @@
title: Intel Gaudi title: Intel Gaudi
- local: optimization/neuron - local: optimization/neuron
title: AWS Neuron title: AWS Neuron
title: Model accelerators and hardware
- isExpanded: false - title: Specific pipeline examples
isExpanded: false
sections: sections:
- local: using-diffusers/consisid - local: using-diffusers/consisid
title: ConsisID title: ConsisID
@@ -225,10 +234,12 @@
title: Stable Video Diffusion title: Stable Video Diffusion
- local: using-diffusers/marigold_usage - local: using-diffusers/marigold_usage
title: Marigold Computer Vision title: Marigold Computer Vision
title: Specific pipeline examples
- isExpanded: false - title: Resources
isExpanded: false
sections: sections:
- sections: - title: Task recipes
sections:
- local: using-diffusers/unconditional_image_generation - local: using-diffusers/unconditional_image_generation
title: Unconditional image generation title: Unconditional image generation
- local: using-diffusers/conditional_image_generation - local: using-diffusers/conditional_image_generation
@@ -243,7 +254,6 @@
title: Video generation title: Video generation
- local: using-diffusers/depth2img - local: using-diffusers/depth2img
title: Depth-to-image title: Depth-to-image
title: Task recipes
- local: using-diffusers/write_own_pipeline - local: using-diffusers/write_own_pipeline
title: Understanding pipelines, models and schedulers title: Understanding pipelines, models and schedulers
- local: community_projects - local: community_projects
@@ -258,10 +268,12 @@
title: Diffusers' Ethical Guidelines title: Diffusers' Ethical Guidelines
- local: conceptual/evaluation - local: conceptual/evaluation
title: Evaluating Diffusion Models title: Evaluating Diffusion Models
title: Resources
- isExpanded: false - title: API
isExpanded: false
sections: sections:
- sections: - title: Main Classes
sections:
- local: api/configuration - local: api/configuration
title: Configuration title: Configuration
- local: api/logging - local: api/logging
@@ -270,22 +282,8 @@
title: Outputs title: Outputs
- local: api/quantization - local: api/quantization
title: Quantization title: Quantization
- local: api/parallel - title: Loaders
title: Parallel inference sections:
title: Main Classes
- sections:
- local: api/modular_diffusers/pipeline
title: Pipeline
- local: api/modular_diffusers/pipeline_blocks
title: Blocks
- local: api/modular_diffusers/pipeline_states
title: States
- local: api/modular_diffusers/pipeline_components
title: Components and configs
- local: api/modular_diffusers/guiders
title: Guiders
title: Modular
- sections:
- local: api/loaders/ip_adapter - local: api/loaders/ip_adapter
title: IP-Adapter title: IP-Adapter
- local: api/loaders/lora - local: api/loaders/lora
@@ -300,13 +298,14 @@
title: SD3Transformer2D title: SD3Transformer2D
- local: api/loaders/peft - local: api/loaders/peft
title: PEFT title: PEFT
title: Loaders - title: Models
- sections: sections:
- local: api/models/overview - local: api/models/overview
title: Overview title: Overview
- local: api/models/auto_model - local: api/models/auto_model
title: AutoModel title: AutoModel
- sections: - title: ControlNets
sections:
- local: api/models/controlnet - local: api/models/controlnet
title: ControlNetModel title: ControlNetModel
- local: api/models/controlnet_union - local: api/models/controlnet_union
@@ -321,20 +320,14 @@
title: SD3ControlNetModel title: SD3ControlNetModel
- local: api/models/controlnet_sparsectrl - local: api/models/controlnet_sparsectrl
title: SparseControlNetModel title: SparseControlNetModel
title: ControlNets - title: Transformers
- sections: sections:
- local: api/models/allegro_transformer3d - local: api/models/allegro_transformer3d
title: AllegroTransformer3DModel title: AllegroTransformer3DModel
- local: api/models/aura_flow_transformer2d - local: api/models/aura_flow_transformer2d
title: AuraFlowTransformer2DModel title: AuraFlowTransformer2DModel
- local: api/models/transformer_bria_fibo
title: BriaFiboTransformer2DModel
- local: api/models/bria_transformer
title: BriaTransformer2DModel
- local: api/models/chroma_transformer - local: api/models/chroma_transformer
title: ChromaTransformer2DModel title: ChromaTransformer2DModel
- local: api/models/chronoedit_transformer_3d
title: ChronoEditTransformer3DModel
- local: api/models/cogvideox_transformer3d - local: api/models/cogvideox_transformer3d
title: CogVideoXTransformer3DModel title: CogVideoXTransformer3DModel
- local: api/models/cogview3plus_transformer2d - local: api/models/cogview3plus_transformer2d
@@ -349,18 +342,12 @@
title: DiTTransformer2DModel title: DiTTransformer2DModel
- local: api/models/easyanimate_transformer3d - local: api/models/easyanimate_transformer3d
title: EasyAnimateTransformer3DModel title: EasyAnimateTransformer3DModel
- local: api/models/flux2_transformer
title: Flux2Transformer2DModel
- local: api/models/flux_transformer - local: api/models/flux_transformer
title: FluxTransformer2DModel title: FluxTransformer2DModel
- local: api/models/hidream_image_transformer - local: api/models/hidream_image_transformer
title: HiDreamImageTransformer2DModel title: HiDreamImageTransformer2DModel
- local: api/models/hunyuan_transformer2d - local: api/models/hunyuan_transformer2d
title: HunyuanDiT2DModel title: HunyuanDiT2DModel
- local: api/models/hunyuanimage_transformer_2d
title: HunyuanImageTransformer2DModel
- local: api/models/hunyuan_video15_transformer_3d
title: HunyuanVideo15Transformer3DModel
- local: api/models/hunyuan_video_transformer_3d - local: api/models/hunyuan_video_transformer_3d
title: HunyuanVideoTransformer3DModel title: HunyuanVideoTransformer3DModel
- local: api/models/latte_transformer3d - local: api/models/latte_transformer3d
@@ -375,8 +362,6 @@
title: MochiTransformer3DModel title: MochiTransformer3DModel
- local: api/models/omnigen_transformer - local: api/models/omnigen_transformer
title: OmniGenTransformer2DModel title: OmniGenTransformer2DModel
- local: api/models/ovisimage_transformer2d
title: OvisImageTransformer2DModel
- local: api/models/pixart_transformer2d - local: api/models/pixart_transformer2d
title: PixArtTransformer2DModel title: PixArtTransformer2DModel
- local: api/models/prior_transformer - local: api/models/prior_transformer
@@ -385,8 +370,6 @@
title: QwenImageTransformer2DModel title: QwenImageTransformer2DModel
- local: api/models/sana_transformer2d - local: api/models/sana_transformer2d
title: SanaTransformer2DModel title: SanaTransformer2DModel
- local: api/models/sana_video_transformer3d
title: SanaVideoTransformer3DModel
- local: api/models/sd3_transformer2d - local: api/models/sd3_transformer2d
title: SD3Transformer2DModel title: SD3Transformer2DModel
- local: api/models/skyreels_v2_transformer_3d - local: api/models/skyreels_v2_transformer_3d
@@ -397,14 +380,10 @@
title: Transformer2DModel title: Transformer2DModel
- local: api/models/transformer_temporal - local: api/models/transformer_temporal
title: TransformerTemporalModel title: TransformerTemporalModel
- local: api/models/wan_animate_transformer_3d
title: WanAnimateTransformer3DModel
- local: api/models/wan_transformer_3d - local: api/models/wan_transformer_3d
title: WanTransformer3DModel title: WanTransformer3DModel
- local: api/models/z_image_transformer2d - title: UNets
title: ZImageTransformer2DModel sections:
title: Transformers
- sections:
- local: api/models/stable_cascade_unet - local: api/models/stable_cascade_unet
title: StableCascadeUNet title: StableCascadeUNet
- local: api/models/unet - local: api/models/unet
@@ -419,8 +398,8 @@
title: UNetMotionModel title: UNetMotionModel
- local: api/models/uvit2d - local: api/models/uvit2d
title: UViT2DModel title: UViT2DModel
title: UNets - title: VAEs
- sections: sections:
- local: api/models/asymmetricautoencoderkl - local: api/models/asymmetricautoencoderkl
title: AsymmetricAutoencoderKL title: AsymmetricAutoencoderKL
- local: api/models/autoencoder_dc - local: api/models/autoencoder_dc
@@ -433,14 +412,8 @@
title: AutoencoderKLCogVideoX title: AutoencoderKLCogVideoX
- local: api/models/autoencoderkl_cosmos - local: api/models/autoencoderkl_cosmos
title: AutoencoderKLCosmos title: AutoencoderKLCosmos
- local: api/models/autoencoder_kl_hunyuanimage
title: AutoencoderKLHunyuanImage
- local: api/models/autoencoder_kl_hunyuanimage_refiner
title: AutoencoderKLHunyuanImageRefiner
- local: api/models/autoencoder_kl_hunyuan_video - local: api/models/autoencoder_kl_hunyuan_video
title: AutoencoderKLHunyuanVideo title: AutoencoderKLHunyuanVideo
- local: api/models/autoencoder_kl_hunyuan_video15
title: AutoencoderKLHunyuanVideo15
- local: api/models/autoencoderkl_ltx_video - local: api/models/autoencoderkl_ltx_video
title: AutoencoderKLLTXVideo title: AutoencoderKLLTXVideo
- local: api/models/autoencoderkl_magvit - local: api/models/autoencoderkl_magvit
@@ -459,240 +432,208 @@
title: Tiny AutoEncoder title: Tiny AutoEncoder
- local: api/models/vq - local: api/models/vq
title: VQModel title: VQModel
title: VAEs - title: Pipelines
title: Models sections:
- sections:
- local: api/pipelines/overview - local: api/pipelines/overview
title: Overview title: Overview
- local: api/pipelines/allegro
title: Allegro
- local: api/pipelines/amused
title: aMUSEd
- local: api/pipelines/animatediff
title: AnimateDiff
- local: api/pipelines/attend_and_excite
title: Attend-and-Excite
- local: api/pipelines/audioldm
title: AudioLDM
- local: api/pipelines/audioldm2
title: AudioLDM 2
- local: api/pipelines/aura_flow
title: AuraFlow
- local: api/pipelines/auto_pipeline - local: api/pipelines/auto_pipeline
title: AutoPipeline title: AutoPipeline
- sections: - local: api/pipelines/blip_diffusion
- local: api/pipelines/audioldm title: BLIP-Diffusion
title: AudioLDM - local: api/pipelines/chroma
- local: api/pipelines/audioldm2 title: Chroma
title: AudioLDM 2 - local: api/pipelines/cogvideox
- local: api/pipelines/dance_diffusion title: CogVideoX
title: Dance Diffusion - local: api/pipelines/cogview3
- local: api/pipelines/musicldm title: CogView3
title: MusicLDM - local: api/pipelines/cogview4
- local: api/pipelines/stable_audio title: CogView4
title: Stable Audio - local: api/pipelines/consisid
title: Audio title: ConsisID
- sections: - local: api/pipelines/consistency_models
- local: api/pipelines/amused title: Consistency Models
title: aMUSEd - local: api/pipelines/controlnet
- local: api/pipelines/animatediff title: ControlNet
title: AnimateDiff - local: api/pipelines/controlnet_flux
- local: api/pipelines/attend_and_excite title: ControlNet with Flux.1
title: Attend-and-Excite - local: api/pipelines/controlnet_hunyuandit
- local: api/pipelines/aura_flow title: ControlNet with Hunyuan-DiT
title: AuraFlow - local: api/pipelines/controlnet_sd3
- local: api/pipelines/blip_diffusion title: ControlNet with Stable Diffusion 3
title: BLIP-Diffusion - local: api/pipelines/controlnet_sdxl
- local: api/pipelines/bria_3_2 title: ControlNet with Stable Diffusion XL
title: Bria 3.2 - local: api/pipelines/controlnet_sana
- local: api/pipelines/bria_fibo title: ControlNet-Sana
title: Bria Fibo - local: api/pipelines/controlnetxs
- local: api/pipelines/chroma title: ControlNet-XS
title: Chroma - local: api/pipelines/controlnetxs_sdxl
- local: api/pipelines/cogview3 title: ControlNet-XS with Stable Diffusion XL
title: CogView3 - local: api/pipelines/controlnet_union
- local: api/pipelines/cogview4 title: ControlNetUnion
title: CogView4 - local: api/pipelines/cosmos
- local: api/pipelines/consistency_models title: Cosmos
title: Consistency Models - local: api/pipelines/dance_diffusion
- local: api/pipelines/controlnet title: Dance Diffusion
title: ControlNet - local: api/pipelines/ddim
- local: api/pipelines/controlnet_flux title: DDIM
title: ControlNet with Flux.1 - local: api/pipelines/ddpm
- local: api/pipelines/controlnet_hunyuandit title: DDPM
title: ControlNet with Hunyuan-DiT - local: api/pipelines/deepfloyd_if
- local: api/pipelines/controlnet_sd3 title: DeepFloyd IF
title: ControlNet with Stable Diffusion 3 - local: api/pipelines/diffedit
- local: api/pipelines/controlnet_sdxl title: DiffEdit
title: ControlNet with Stable Diffusion XL - local: api/pipelines/dit
- local: api/pipelines/controlnet_sana title: DiT
title: ControlNet-Sana - local: api/pipelines/easyanimate
- local: api/pipelines/controlnetxs title: EasyAnimate
title: ControlNet-XS - local: api/pipelines/flux
- local: api/pipelines/controlnetxs_sdxl title: Flux
title: ControlNet-XS with Stable Diffusion XL - local: api/pipelines/control_flux_inpaint
- local: api/pipelines/controlnet_union title: FluxControlInpaint
title: ControlNetUnion - local: api/pipelines/framepack
- local: api/pipelines/cosmos title: Framepack
title: Cosmos - local: api/pipelines/hidream
- local: api/pipelines/ddim title: HiDream-I1
title: DDIM - local: api/pipelines/hunyuandit
- local: api/pipelines/ddpm title: Hunyuan-DiT
title: DDPM - local: api/pipelines/hunyuan_video
- local: api/pipelines/deepfloyd_if title: HunyuanVideo
title: DeepFloyd IF - local: api/pipelines/i2vgenxl
- local: api/pipelines/diffedit title: I2VGen-XL
title: DiffEdit - local: api/pipelines/pix2pix
- local: api/pipelines/dit title: InstructPix2Pix
title: DiT - local: api/pipelines/kandinsky
- local: api/pipelines/easyanimate title: Kandinsky 2.1
title: EasyAnimate - local: api/pipelines/kandinsky_v22
- local: api/pipelines/flux title: Kandinsky 2.2
title: Flux - local: api/pipelines/kandinsky3
- local: api/pipelines/flux2 title: Kandinsky 3
title: Flux2 - local: api/pipelines/kolors
- local: api/pipelines/control_flux_inpaint title: Kolors
title: FluxControlInpaint - local: api/pipelines/latent_consistency_models
- local: api/pipelines/hidream title: Latent Consistency Models
title: HiDream-I1 - local: api/pipelines/latent_diffusion
- local: api/pipelines/hunyuandit title: Latent Diffusion
title: Hunyuan-DiT - local: api/pipelines/latte
- local: api/pipelines/hunyuanimage21 title: Latte
title: HunyuanImage2.1 - local: api/pipelines/ledits_pp
- local: api/pipelines/pix2pix title: LEDITS++
title: InstructPix2Pix - local: api/pipelines/ltx_video
- local: api/pipelines/kandinsky title: LTXVideo
title: Kandinsky 2.1 - local: api/pipelines/lumina2
- local: api/pipelines/kandinsky_v22 title: Lumina 2.0
title: Kandinsky 2.2 - local: api/pipelines/lumina
- local: api/pipelines/kandinsky3 title: Lumina-T2X
title: Kandinsky 3 - local: api/pipelines/marigold
- local: api/pipelines/kandinsky5_image title: Marigold
title: Kandinsky 5.0 Image - local: api/pipelines/mochi
- local: api/pipelines/kolors title: Mochi
title: Kolors - local: api/pipelines/panorama
- local: api/pipelines/latent_consistency_models title: MultiDiffusion
title: Latent Consistency Models - local: api/pipelines/musicldm
- local: api/pipelines/latent_diffusion title: MusicLDM
title: Latent Diffusion - local: api/pipelines/omnigen
- local: api/pipelines/ledits_pp title: OmniGen
title: LEDITS++ - local: api/pipelines/pag
- local: api/pipelines/lumina2 title: PAG
title: Lumina 2.0 - local: api/pipelines/paint_by_example
- local: api/pipelines/lumina title: Paint by Example
title: Lumina-T2X - local: api/pipelines/pia
- local: api/pipelines/marigold title: Personalized Image Animator (PIA)
title: Marigold - local: api/pipelines/pixart
- local: api/pipelines/panorama title: PixArt-α
title: MultiDiffusion - local: api/pipelines/pixart_sigma
- local: api/pipelines/omnigen title: PixArt-Σ
title: OmniGen - local: api/pipelines/qwenimage
- local: api/pipelines/ovis_image title: QwenImage
title: Ovis-Image - local: api/pipelines/sana
- local: api/pipelines/pag title: Sana
title: PAG - local: api/pipelines/sana_sprint
- local: api/pipelines/paint_by_example title: Sana Sprint
title: Paint by Example - local: api/pipelines/self_attention_guidance
- local: api/pipelines/pixart title: Self-Attention Guidance
title: PixArt-α - local: api/pipelines/semantic_stable_diffusion
- local: api/pipelines/pixart_sigma title: Semantic Guidance
title: PixArt-Σ - local: api/pipelines/shap_e
- local: api/pipelines/prx title: Shap-E
title: PRX - local: api/pipelines/skyreels_v2
- local: api/pipelines/qwenimage title: SkyReels-V2
title: QwenImage - local: api/pipelines/stable_audio
- local: api/pipelines/sana title: Stable Audio
title: Sana - local: api/pipelines/stable_cascade
- local: api/pipelines/sana_sprint title: Stable Cascade
title: Sana Sprint - title: Stable Diffusion
- local: api/pipelines/sana_video sections:
title: Sana Video - local: api/pipelines/stable_diffusion/overview
- local: api/pipelines/self_attention_guidance title: Overview
title: Self-Attention Guidance - local: api/pipelines/stable_diffusion/depth2img
- local: api/pipelines/semantic_stable_diffusion title: Depth-to-image
title: Semantic Guidance - local: api/pipelines/stable_diffusion/gligen
- local: api/pipelines/shap_e title: GLIGEN (Grounded Language-to-Image Generation)
title: Shap-E - local: api/pipelines/stable_diffusion/image_variation
- local: api/pipelines/stable_cascade title: Image variation
title: Stable Cascade - local: api/pipelines/stable_diffusion/img2img
- sections: title: Image-to-image
- local: api/pipelines/stable_diffusion/overview
title: Overview
- local: api/pipelines/stable_diffusion/depth2img
title: Depth-to-image
- local: api/pipelines/stable_diffusion/gligen
title: GLIGEN (Grounded Language-to-Image Generation)
- local: api/pipelines/stable_diffusion/image_variation
title: Image variation
- local: api/pipelines/stable_diffusion/img2img
title: Image-to-image
- local: api/pipelines/stable_diffusion/inpaint
title: Inpainting
- local: api/pipelines/stable_diffusion/k_diffusion
title: K-Diffusion
- local: api/pipelines/stable_diffusion/latent_upscale
title: Latent upscaler
- local: api/pipelines/stable_diffusion/ldm3d_diffusion
title: LDM3D Text-to-(RGB, Depth), Text-to-(RGB-pano, Depth-pano), LDM3D
Upscaler
- local: api/pipelines/stable_diffusion/stable_diffusion_safe
title: Safe Stable Diffusion
- local: api/pipelines/stable_diffusion/sdxl_turbo
title: SDXL Turbo
- local: api/pipelines/stable_diffusion/stable_diffusion_2
title: Stable Diffusion 2
- local: api/pipelines/stable_diffusion/stable_diffusion_3
title: Stable Diffusion 3
- local: api/pipelines/stable_diffusion/stable_diffusion_xl
title: Stable Diffusion XL
- local: api/pipelines/stable_diffusion/upscale
title: Super-resolution
- local: api/pipelines/stable_diffusion/adapter
title: T2I-Adapter
- local: api/pipelines/stable_diffusion/text2img
title: Text-to-image
title: Stable Diffusion
- local: api/pipelines/stable_unclip
title: Stable unCLIP
- local: api/pipelines/unclip
title: unCLIP
- local: api/pipelines/unidiffuser
title: UniDiffuser
- local: api/pipelines/value_guided_sampling
title: Value-guided sampling
- local: api/pipelines/visualcloze
title: VisualCloze
- local: api/pipelines/wuerstchen
title: Wuerstchen
- local: api/pipelines/z_image
title: Z-Image
title: Image
- sections:
- local: api/pipelines/allegro
title: Allegro
- local: api/pipelines/chronoedit
title: ChronoEdit
- local: api/pipelines/cogvideox
title: CogVideoX
- local: api/pipelines/consisid
title: ConsisID
- local: api/pipelines/framepack
title: Framepack
- local: api/pipelines/hunyuan_video
title: HunyuanVideo
- local: api/pipelines/hunyuan_video15
title: HunyuanVideo1.5
- local: api/pipelines/i2vgenxl
title: I2VGen-XL
- local: api/pipelines/kandinsky5_video
title: Kandinsky 5.0 Video
- local: api/pipelines/latte
title: Latte
- local: api/pipelines/ltx_video
title: LTXVideo
- local: api/pipelines/mochi
title: Mochi
- local: api/pipelines/pia
title: Personalized Image Animator (PIA)
- local: api/pipelines/skyreels_v2
title: SkyReels-V2
- local: api/pipelines/stable_diffusion/svd - local: api/pipelines/stable_diffusion/svd
title: Stable Video Diffusion title: Image-to-video
- local: api/pipelines/text_to_video - local: api/pipelines/stable_diffusion/inpaint
title: Text-to-video title: Inpainting
- local: api/pipelines/text_to_video_zero - local: api/pipelines/stable_diffusion/k_diffusion
title: Text2Video-Zero title: K-Diffusion
- local: api/pipelines/wan - local: api/pipelines/stable_diffusion/latent_upscale
title: Wan title: Latent upscaler
title: Video - local: api/pipelines/stable_diffusion/ldm3d_diffusion
title: Pipelines title: LDM3D Text-to-(RGB, Depth), Text-to-(RGB-pano, Depth-pano), LDM3D Upscaler
- sections: - local: api/pipelines/stable_diffusion/stable_diffusion_safe
title: Safe Stable Diffusion
- local: api/pipelines/stable_diffusion/sdxl_turbo
title: SDXL Turbo
- local: api/pipelines/stable_diffusion/stable_diffusion_2
title: Stable Diffusion 2
- local: api/pipelines/stable_diffusion/stable_diffusion_3
title: Stable Diffusion 3
- local: api/pipelines/stable_diffusion/stable_diffusion_xl
title: Stable Diffusion XL
- local: api/pipelines/stable_diffusion/upscale
title: Super-resolution
- local: api/pipelines/stable_diffusion/adapter
title: T2I-Adapter
- local: api/pipelines/stable_diffusion/text2img
title: Text-to-image
- local: api/pipelines/stable_unclip
title: Stable unCLIP
- local: api/pipelines/text_to_video
title: Text-to-video
- local: api/pipelines/text_to_video_zero
title: Text2Video-Zero
- local: api/pipelines/unclip
title: unCLIP
- local: api/pipelines/unidiffuser
title: UniDiffuser
- local: api/pipelines/value_guided_sampling
title: Value-guided sampling
- local: api/pipelines/visualcloze
title: VisualCloze
- local: api/pipelines/wan
title: Wan
- local: api/pipelines/wuerstchen
title: Wuerstchen
- title: Schedulers
sections:
- local: api/schedulers/overview - local: api/schedulers/overview
title: Overview title: Overview
- local: api/schedulers/cm_stochastic_iterative - local: api/schedulers/cm_stochastic_iterative
@@ -761,8 +702,8 @@
title: UniPCMultistepScheduler title: UniPCMultistepScheduler
- local: api/schedulers/vq_diffusion - local: api/schedulers/vq_diffusion
title: VQDiffusionScheduler title: VQDiffusionScheduler
title: Schedulers - title: Internal classes
- sections: sections:
- local: api/internal_classes_overview - local: api/internal_classes_overview
title: Overview title: Overview
- local: api/attnprocessor - local: api/attnprocessor
@@ -779,5 +720,3 @@
title: VAE Image Processor title: VAE Image Processor
- local: api/video_processor - local: api/video_processor
title: Video Processor title: Video Processor
title: Internal classes
title: API

View File

@@ -34,9 +34,3 @@ Cache methods speedup diffusion transformers by storing and reusing intermediate
[[autodoc]] FirstBlockCacheConfig [[autodoc]] FirstBlockCacheConfig
[[autodoc]] apply_first_block_cache [[autodoc]] apply_first_block_cache
### TaylorSeerCacheConfig
[[autodoc]] TaylorSeerCacheConfig
[[autodoc]] apply_taylorseer_cache

View File

@@ -14,8 +14,11 @@ specific language governing permissions and limitations under the License.
Schedulers from [`~schedulers.scheduling_utils.SchedulerMixin`] and models from [`ModelMixin`] inherit from [`ConfigMixin`] which stores all the parameters that are passed to their respective `__init__` methods in a JSON-configuration file. Schedulers from [`~schedulers.scheduling_utils.SchedulerMixin`] and models from [`ModelMixin`] inherit from [`ConfigMixin`] which stores all the parameters that are passed to their respective `__init__` methods in a JSON-configuration file.
> [!TIP] <Tip>
> To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf auth login`.
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf auth login`.
</Tip>
## ConfigMixin ## ConfigMixin

View File

@@ -20,12 +20,6 @@ All pipelines with [`VaeImageProcessor`] accept PIL Image, PyTorch tensor, or Nu
[[autodoc]] image_processor.VaeImageProcessor [[autodoc]] image_processor.VaeImageProcessor
## InpaintProcessor
The [`InpaintProcessor`] accepts `mask` and `image` inputs and process them together. Optionally, it can accept padding_mask_crop and apply mask overlay.
[[autodoc]] image_processor.InpaintProcessor
## VaeImageProcessorLDM3D ## VaeImageProcessorLDM3D
The [`VaeImageProcessorLDM3D`] accepts RGB and depth inputs and returns RGB and depth outputs. The [`VaeImageProcessorLDM3D`] accepts RGB and depth inputs and returns RGB and depth outputs.

View File

@@ -14,8 +14,11 @@ specific language governing permissions and limitations under the License.
[IP-Adapter](https://hf.co/papers/2308.06721) is a lightweight adapter that enables prompting a diffusion model with an image. This method decouples the cross-attention layers of the image and text features. The image features are generated from an image encoder. [IP-Adapter](https://hf.co/papers/2308.06721) is a lightweight adapter that enables prompting a diffusion model with an image. This method decouples the cross-attention layers of the image and text features. The image features are generated from an image encoder.
> [!TIP] <Tip>
> Learn how to load and use an IP-Adapter checkpoint and image in the [IP-Adapter](../../using-diffusers/ip_adapter) guide,.
Learn how to load an IP-Adapter checkpoint and image in the IP-Adapter [loading](../../using-diffusers/loading_adapters#ip-adapter) guide, and you can see how to use it in the [usage](../../using-diffusers/ip_adapter) guide.
</Tip>
## IPAdapterMixin ## IPAdapterMixin

View File

@@ -30,13 +30,14 @@ LoRA is a fast and lightweight training method that inserts and trains a signifi
- [`CogView4LoraLoaderMixin`] provides similar functions for [CogView4](https://huggingface.co/docs/diffusers/main/en/api/pipelines/cogview4). - [`CogView4LoraLoaderMixin`] provides similar functions for [CogView4](https://huggingface.co/docs/diffusers/main/en/api/pipelines/cogview4).
- [`AmusedLoraLoaderMixin`] is for the [`AmusedPipeline`]. - [`AmusedLoraLoaderMixin`] is for the [`AmusedPipeline`].
- [`HiDreamImageLoraLoaderMixin`] provides similar functions for [HiDream Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/hidream) - [`HiDreamImageLoraLoaderMixin`] provides similar functions for [HiDream Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/hidream)
- [`QwenImageLoraLoaderMixin`] provides similar functions for [Qwen Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/qwen). - [`QwenImageLoraLoaderMixin`] provides similar functions for [Qwen Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/qwen)
- [`ZImageLoraLoaderMixin`] provides similar functions for [Z-Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/zimage).
- [`Flux2LoraLoaderMixin`] provides similar functions for [Flux2](https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux2).
- [`LoraBaseMixin`] provides a base class with several utility methods to fuse, unfuse, unload, LoRAs and more. - [`LoraBaseMixin`] provides a base class with several utility methods to fuse, unfuse, unload, LoRAs and more.
> [!TIP] <Tip>
> To learn more about how to load LoRA weights, see the [LoRA](../../tutorials/using_peft_for_inference) loading guide.
To learn more about how to load LoRA weights, see the [LoRA](../../using-diffusers/loading_adapters#lora) loading guide.
</Tip>
## LoraBaseMixin ## LoraBaseMixin
@@ -58,10 +59,6 @@ LoRA is a fast and lightweight training method that inserts and trains a signifi
[[autodoc]] loaders.lora_pipeline.FluxLoraLoaderMixin [[autodoc]] loaders.lora_pipeline.FluxLoraLoaderMixin
## Flux2LoraLoaderMixin
[[autodoc]] loaders.lora_pipeline.Flux2LoraLoaderMixin
## CogVideoXLoraLoaderMixin ## CogVideoXLoraLoaderMixin
[[autodoc]] loaders.lora_pipeline.CogVideoXLoraLoaderMixin [[autodoc]] loaders.lora_pipeline.CogVideoXLoraLoaderMixin
@@ -113,13 +110,6 @@ LoRA is a fast and lightweight training method that inserts and trains a signifi
[[autodoc]] loaders.lora_pipeline.QwenImageLoraLoaderMixin [[autodoc]] loaders.lora_pipeline.QwenImageLoraLoaderMixin
## ZImageLoraLoaderMixin
[[autodoc]] loaders.lora_pipeline.ZImageLoraLoaderMixin
## KandinskyLoraLoaderMixin
[[autodoc]] loaders.lora_pipeline.KandinskyLoraLoaderMixin
## LoraBaseMixin ## LoraBaseMixin
[[autodoc]] loaders.lora_base.LoraBaseMixin [[autodoc]] loaders.lora_base.LoraBaseMixin

View File

@@ -12,10 +12,13 @@ specific language governing permissions and limitations under the License.
# PEFT # PEFT
Diffusers supports loading adapters such as [LoRA](../../tutorials/using_peft_for_inference) with the [PEFT](https://huggingface.co/docs/peft/index) library with the [`~loaders.peft.PeftAdapterMixin`] class. This allows modeling classes in Diffusers like [`UNet2DConditionModel`], [`SD3Transformer2DModel`] to operate with an adapter. Diffusers supports loading adapters such as [LoRA](../../using-diffusers/loading_adapters) with the [PEFT](https://huggingface.co/docs/peft/index) library with the [`~loaders.peft.PeftAdapterMixin`] class. This allows modeling classes in Diffusers like [`UNet2DConditionModel`], [`SD3Transformer2DModel`] to operate with an adapter.
> [!TIP] <Tip>
> Refer to the [Inference with PEFT](../../tutorials/using_peft_for_inference.md) tutorial for an overview of how to use PEFT in Diffusers for inference.
Refer to the [Inference with PEFT](../../tutorials/using_peft_for_inference.md) tutorial for an overview of how to use PEFT in Diffusers for inference.
</Tip>
## PeftAdapterMixin ## PeftAdapterMixin

View File

@@ -16,8 +16,11 @@ Textual Inversion is a training method for personalizing models by learning new
[`TextualInversionLoaderMixin`] provides a function for loading Textual Inversion embeddings from Diffusers and Automatic1111 into the text encoder and loading a special token to activate the embeddings. [`TextualInversionLoaderMixin`] provides a function for loading Textual Inversion embeddings from Diffusers and Automatic1111 into the text encoder and loading a special token to activate the embeddings.
> [!TIP] <Tip>
> To learn more about how to load Textual Inversion embeddings, see the [Textual Inversion](../../using-diffusers/textual_inversion_inference) loading guide.
To learn more about how to load Textual Inversion embeddings, see the [Textual Inversion](../../using-diffusers/loading_adapters#textual-inversion) loading guide.
</Tip>
## TextualInversionLoaderMixin ## TextualInversionLoaderMixin

View File

@@ -16,8 +16,11 @@ This class is useful when *only* loading weights into a [`SD3Transformer2DModel`
The [`SD3Transformer2DLoadersMixin`] class currently only loads IP-Adapter weights, but will be used in the future to save weights and load LoRAs. The [`SD3Transformer2DLoadersMixin`] class currently only loads IP-Adapter weights, but will be used in the future to save weights and load LoRAs.
> [!TIP] <Tip>
> To learn more about how to load LoRA weights, see the [LoRA](../../tutorials/using_peft_for_inference) loading guide.
To learn more about how to load LoRA weights, see the [LoRA](../../using-diffusers/loading_adapters#lora) loading guide.
</Tip>
## SD3Transformer2DLoadersMixin ## SD3Transformer2DLoadersMixin

View File

@@ -16,8 +16,11 @@ Some training methods - like LoRA and Custom Diffusion - typically target the UN
The [`UNet2DConditionLoadersMixin`] class provides functions for loading and saving weights, fusing and unfusing LoRAs, disabling and enabling LoRAs, and setting and deleting adapters. The [`UNet2DConditionLoadersMixin`] class provides functions for loading and saving weights, fusing and unfusing LoRAs, disabling and enabling LoRAs, and setting and deleting adapters.
> [!TIP] <Tip>
> To learn more about how to load LoRA weights, see the [LoRA](../../tutorials/using_peft_for_inference) guide.
To learn more about how to load LoRA weights, see the [LoRA](../../using-diffusers/loading_adapters#lora) loading guide.
</Tip>
## UNet2DConditionLoadersMixin ## UNet2DConditionLoadersMixin

View File

@@ -39,7 +39,7 @@ mask_url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images
original_image = load_image(img_url).resize((512, 512)) original_image = load_image(img_url).resize((512, 512))
mask_image = load_image(mask_url).resize((512, 512)) mask_image = load_image(mask_url).resize((512, 512))
pipe = StableDiffusionInpaintPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-inpainting") pipe = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting")
pipe.vae = AsymmetricAutoencoderKL.from_pretrained("cross-attention/asymmetric-autoencoder-kl-x-1-5") pipe.vae = AsymmetricAutoencoderKL.from_pretrained("cross-attention/asymmetric-autoencoder-kl-x-1-5")
pipe.to("cuda") pipe.to("cuda")

View File

@@ -12,7 +12,15 @@ specific language governing permissions and limitations under the License.
# AutoModel # AutoModel
[`AutoModel`] automatically retrieves the correct model class from the checkpoint `config.json` file. The `AutoModel` is designed to make it easy to load a checkpoint without needing to know the specific model class. `AutoModel` automatically retrieves the correct model class from the checkpoint `config.json` file.
```python
from diffusers import AutoModel, AutoPipelineForText2Image
unet = AutoModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="unet")
pipe = AutoPipelineForText2Image.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", unet=unet)
```
## AutoModel ## AutoModel

View File

@@ -1,36 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# AutoencoderKLHunyuanVideo15
The 3D variational autoencoder (VAE) model with KL loss used in [HunyuanVideo1.5](https://github.com/Tencent/HunyuanVideo1-1.5) by Tencent.
The model can be loaded with the following code snippet.
```python
from diffusers import AutoencoderKLHunyuanVideo15
vae = AutoencoderKLHunyuanVideo15.from_pretrained("hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-480p_t2v", subfolder="vae", torch_dtype=torch.float32)
# make sure to enable tiling to avoid OOM
vae.enable_tiling()
```
## AutoencoderKLHunyuanVideo15
[[autodoc]] AutoencoderKLHunyuanVideo15
- decode
- encode
- all
## DecoderOutput
[[autodoc]] models.autoencoders.vae.DecoderOutput

View File

@@ -1,32 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# AutoencoderKLHunyuanImage
The 2D variational autoencoder (VAE) model with KL loss used in [HunyuanImage2.1].
The model can be loaded with the following code snippet.
```python
from diffusers import AutoencoderKLHunyuanImage
vae = AutoencoderKLHunyuanImage.from_pretrained("hunyuanvideo-community/HunyuanImage-2.1-Diffusers", subfolder="vae", torch_dtype=torch.bfloat16)
```
## AutoencoderKLHunyuanImage
[[autodoc]] AutoencoderKLHunyuanImage
- decode
- all
## DecoderOutput
[[autodoc]] models.autoencoders.vae.DecoderOutput

View File

@@ -1,32 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# AutoencoderKLHunyuanImageRefiner
The 3D variational autoencoder (VAE) model with KL loss used in [HunyuanImage2.1](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1) for its refiner pipeline.
The model can be loaded with the following code snippet.
```python
from diffusers import AutoencoderKLHunyuanImageRefiner
vae = AutoencoderKLHunyuanImageRefiner.from_pretrained("hunyuanvideo-community/HunyuanImage-2.1-Refiner-Diffusers", subfolder="vae", torch_dtype=torch.bfloat16)
```
## AutoencoderKLHunyuanImageRefiner
[[autodoc]] AutoencoderKLHunyuanImageRefiner
- decode
- all
## DecoderOutput
[[autodoc]] models.autoencoders.vae.DecoderOutput

View File

@@ -44,3 +44,15 @@ model = AutoencoderKL.from_single_file(url)
## DecoderOutput ## DecoderOutput
[[autodoc]] models.autoencoders.vae.DecoderOutput [[autodoc]] models.autoencoders.vae.DecoderOutput
## FlaxAutoencoderKL
[[autodoc]] FlaxAutoencoderKL
## FlaxAutoencoderKLOutput
[[autodoc]] models.vae_flax.FlaxAutoencoderKLOutput
## FlaxDecoderOutput
[[autodoc]] models.vae_flax.FlaxDecoderOutput

View File

@@ -1,19 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# BriaTransformer2DModel
A modified flux Transformer model from [Bria](https://huggingface.co/briaai/BRIA-3.2)
## BriaTransformer2DModel
[[autodoc]] BriaTransformer2DModel

View File

@@ -12,7 +12,7 @@ specific language governing permissions and limitations under the License.
# ChromaTransformer2DModel # ChromaTransformer2DModel
A modified flux Transformer model from [Chroma](https://huggingface.co/lodestones/Chroma1-HD) A modified flux Transformer model from [Chroma](https://huggingface.co/lodestones/Chroma)
## ChromaTransformer2DModel ## ChromaTransformer2DModel

View File

@@ -1,32 +0,0 @@
<!-- Copyright 2025 The ChronoEdit Team and HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# ChronoEditTransformer3DModel
A Diffusion Transformer model for 3D video-like data from [ChronoEdit: Towards Temporal Reasoning for Image Editing and World Simulation](https://huggingface.co/papers/2510.04290) from NVIDIA and University of Toronto, by Jay Zhangjie Wu, Xuanchi Ren, Tianchang Shen, Tianshi Cao, Kai He, Yifan Lu, Ruiyuan Gao, Enze Xie, Shiyi Lan, Jose M. Alvarez, Jun Gao, Sanja Fidler, Zian Wang, Huan Ling.
> **TL;DR:** ChronoEdit reframes image editing as a video generation task, using input and edited images as start/end frames to leverage pretrained video models with temporal consistency. A temporal reasoning stage introduces reasoning tokens to ensure physically plausible edits and visualize the editing trajectory.
The model can be loaded with the following code snippet.
```python
from diffusers import ChronoEditTransformer3DModel
transformer = ChronoEditTransformer3DModel.from_pretrained("nvidia/ChronoEdit-14B-Diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
```
## ChronoEditTransformer3DModel
[[autodoc]] ChronoEditTransformer3DModel
## Transformer2DModelOutput
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput

View File

@@ -16,8 +16,11 @@ Consistency decoder can be used to decode the latents from the denoising UNet in
The original codebase can be found at [openai/consistencydecoder](https://github.com/openai/consistencydecoder). The original codebase can be found at [openai/consistencydecoder](https://github.com/openai/consistencydecoder).
> [!WARNING] <Tip warning={true}>
> Inference is only supported for 2 iterations as of now.
Inference is only supported for 2 iterations as of now.
</Tip>
The pipeline could not have been contributed without the help of [madebyollin](https://github.com/madebyollin) and [mrsteyk](https://github.com/mrsteyk) from [this issue](https://github.com/openai/consistencydecoder/issues/1). The pipeline could not have been contributed without the help of [madebyollin](https://github.com/madebyollin) and [mrsteyk](https://github.com/mrsteyk) from [this issue](https://github.com/openai/consistencydecoder/issues/1).

View File

@@ -40,3 +40,11 @@ pipe = StableDiffusionControlNetPipeline.from_single_file(url, controlnet=contro
## ControlNetOutput ## ControlNetOutput
[[autodoc]] models.controlnets.controlnet.ControlNetOutput [[autodoc]] models.controlnets.controlnet.ControlNetOutput
## FlaxControlNetModel
[[autodoc]] FlaxControlNetModel
## FlaxControlNetOutput
[[autodoc]] models.controlnets.controlnet_flax.FlaxControlNetOutput

View File

@@ -1,19 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# Flux2Transformer2DModel
A Transformer model for image-like data from [Flux2](https://hf.co/black-forest-labs/FLUX.2-dev).
## Flux2Transformer2DModel
[[autodoc]] Flux2Transformer2DModel

View File

@@ -1,30 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# HunyuanVideo15Transformer3DModel
A Diffusion Transformer model for 3D video-like data used in [HunyuanVideo1.5](https://github.com/Tencent/HunyuanVideo1-1.5).
The model can be loaded with the following code snippet.
```python
from diffusers import HunyuanVideo15Transformer3DModel
transformer = HunyuanVideo15Transformer3DModel.from_pretrained("hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-480p_t2v" subfolder="transformer", torch_dtype=torch.bfloat16)
```
## HunyuanVideo15Transformer3DModel
[[autodoc]] HunyuanVideo15Transformer3DModel
## Transformer2DModelOutput
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput

View File

@@ -1,30 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# HunyuanImageTransformer2DModel
A Diffusion Transformer model for [HunyuanImage2.1](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1).
The model can be loaded with the following code snippet.
```python
from diffusers import HunyuanImageTransformer2DModel
transformer = HunyuanImageTransformer2DModel.from_pretrained("hunyuanvideo-community/HunyuanImage-2.1-Diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
```
## HunyuanImageTransformer2DModel
[[autodoc]] HunyuanImageTransformer2DModel
## Transformer2DModelOutput
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput

View File

@@ -19,6 +19,10 @@ All models are built from the base [`ModelMixin`] class which is a [`torch.nn.Mo
## ModelMixin ## ModelMixin
[[autodoc]] ModelMixin [[autodoc]] ModelMixin
## FlaxModelMixin
[[autodoc]] FlaxModelMixin
## PushToHubMixin ## PushToHubMixin
[[autodoc]] utils.PushToHubMixin [[autodoc]] utils.PushToHubMixin

View File

@@ -1,24 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# OvisImageTransformer2DModel
The model can be loaded with the following code snippet.
```python
from diffusers import OvisImageTransformer2DModel
transformer = OvisImageTransformer2DModel.from_pretrained("AIDC-AI/Ovis-Image-7B", subfolder="transformer", torch_dtype=torch.bfloat16)
```
## OvisImageTransformer2DModel
[[autodoc]] OvisImageTransformer2DModel

View File

@@ -1,36 +0,0 @@
<!-- Copyright 2025 The SANA-Video Authors and HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# SanaVideoTransformer3DModel
A Diffusion Transformer model for 3D data (video) from [SANA-Video: Efficient Video Generation with Block Linear Diffusion Transformer](https://huggingface.co/papers/2509.24695) from NVIDIA and MIT HAN Lab, by Junsong Chen, Yuyang Zhao, Jincheng Yu, Ruihang Chu, Junyu Chen, Shuai Yang, Xianbang Wang, Yicheng Pan, Daquan Zhou, Huan Ling, Haozhe Liu, Hongwei Yi, Hao Zhang, Muyang Li, Yukang Chen, Han Cai, Sanja Fidler, Ping Luo, Song Han, Enze Xie.
The abstract from the paper is:
*We introduce SANA-Video, a small diffusion model that can efficiently generate videos up to 720x1280 resolution and minute-length duration. SANA-Video synthesizes high-resolution, high-quality and long videos with strong text-video alignment at a remarkably fast speed, deployable on RTX 5090 GPU. Two core designs ensure our efficient, effective and long video generation: (1) Linear DiT: We leverage linear attention as the core operation, which is more efficient than vanilla attention given the large number of tokens processed in video generation. (2) Constant-Memory KV cache for Block Linear Attention: we design block-wise autoregressive approach for long video generation by employing a constant-memory state, derived from the cumulative properties of linear attention. This KV cache provides the Linear DiT with global context at a fixed memory cost, eliminating the need for a traditional KV cache and enabling efficient, minute-long video generation. In addition, we explore effective data filters and model training strategies, narrowing the training cost to 12 days on 64 H100 GPUs, which is only 1% of the cost of MovieGen. Given its low cost, SANA-Video achieves competitive performance compared to modern state-of-the-art small diffusion models (e.g., Wan 2.1-1.3B and SkyReel-V2-1.3B) while being 16x faster in measured latency. Moreover, SANA-Video can be deployed on RTX 5090 GPUs with NVFP4 precision, accelerating the inference speed of generating a 5-second 720p video from 71s to 29s (2.4x speedup). In summary, SANA-Video enables low-cost, high-quality video generation.*
The model can be loaded with the following code snippet.
```python
from diffusers import SanaVideoTransformer3DModel
import torch
transformer = SanaVideoTransformer3DModel.from_pretrained("Efficient-Large-Model/SANA-Video_2B_480p_diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
```
## SanaVideoTransformer3DModel
[[autodoc]] SanaVideoTransformer3DModel
## Transformer2DModelOutput
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput

View File

@@ -22,8 +22,11 @@ When the input is **continuous**:
When the input is **discrete**: When the input is **discrete**:
> [!TIP] <Tip>
> It is assumed one of the input classes is the masked latent pixel. The predicted classes of the unnoised image don't contain a prediction for the masked pixel because the unnoised image cannot be masked.
It is assumed one of the input classes is the masked latent pixel. The predicted classes of the unnoised image don't contain a prediction for the masked pixel because the unnoised image cannot be masked.
</Tip>
1. Convert input (classes of latent pixels) to embeddings and apply positional embeddings. 1. Convert input (classes of latent pixels) to embeddings and apply positional embeddings.
2. Apply the Transformer blocks in the standard way. 2. Apply the Transformer blocks in the standard way.

View File

@@ -1,19 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# BriaFiboTransformer2DModel
A modified flux Transformer model from [Bria](https://huggingface.co/briaai/FIBO)
## BriaFiboTransformer2DModel
[[autodoc]] BriaFiboTransformer2DModel

View File

@@ -23,3 +23,9 @@ The abstract from the paper is:
## UNet2DConditionOutput ## UNet2DConditionOutput
[[autodoc]] models.unets.unet_2d_condition.UNet2DConditionOutput [[autodoc]] models.unets.unet_2d_condition.UNet2DConditionOutput
## FlaxUNet2DConditionModel
[[autodoc]] models.unets.unet_2d_condition_flax.FlaxUNet2DConditionModel
## FlaxUNet2DConditionOutput
[[autodoc]] models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput

View File

@@ -1,30 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# WanAnimateTransformer3DModel
A Diffusion Transformer model for 3D video-like data was introduced in [Wan Animate](https://github.com/Wan-Video/Wan2.2) by the Alibaba Wan Team.
The model can be loaded with the following code snippet.
```python
from diffusers import WanAnimateTransformer3DModel
transformer = WanAnimateTransformer3DModel.from_pretrained("Wan-AI/Wan2.2-Animate-14B-Diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
```
## WanAnimateTransformer3DModel
[[autodoc]] WanAnimateTransformer3DModel
## Transformer2DModelOutput
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput

View File

@@ -1,19 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# ZImageTransformer2DModel
A Transformer model for image-like data from [Z-Image](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo).
## ZImageTransformer2DModel
[[autodoc]] ZImageTransformer2DModel

View File

@@ -1,39 +0,0 @@
# Guiders
Guiders are components in Modular Diffusers that control how the diffusion process is guided during generation. They implement various guidance techniques to improve generation quality and control.
## BaseGuidance
[[autodoc]] diffusers.guiders.guider_utils.BaseGuidance
## ClassifierFreeGuidance
[[autodoc]] diffusers.guiders.classifier_free_guidance.ClassifierFreeGuidance
## ClassifierFreeZeroStarGuidance
[[autodoc]] diffusers.guiders.classifier_free_zero_star_guidance.ClassifierFreeZeroStarGuidance
## SkipLayerGuidance
[[autodoc]] diffusers.guiders.skip_layer_guidance.SkipLayerGuidance
## SmoothedEnergyGuidance
[[autodoc]] diffusers.guiders.smoothed_energy_guidance.SmoothedEnergyGuidance
## PerturbedAttentionGuidance
[[autodoc]] diffusers.guiders.perturbed_attention_guidance.PerturbedAttentionGuidance
## AdaptiveProjectedGuidance
[[autodoc]] diffusers.guiders.adaptive_projected_guidance.AdaptiveProjectedGuidance
## AutoGuidance
[[autodoc]] diffusers.guiders.auto_guidance.AutoGuidance
## TangentialClassifierFreeGuidance
[[autodoc]] diffusers.guiders.tangential_classifier_free_guidance.TangentialClassifierFreeGuidance

View File

@@ -1,5 +0,0 @@
# Pipeline
## ModularPipeline
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ModularPipeline

View File

@@ -1,17 +0,0 @@
# Pipeline blocks
## ModularPipelineBlocks
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ModularPipelineBlocks
## SequentialPipelineBlocks
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.SequentialPipelineBlocks
## LoopSequentialPipelineBlocks
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.LoopSequentialPipelineBlocks
## AutoPipelineBlocks
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.AutoPipelineBlocks

View File

@@ -1,17 +0,0 @@
# Components and configs
## ComponentSpec
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ComponentSpec
## ConfigSpec
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ConfigSpec
## ComponentsManager
[[autodoc]] diffusers.modular_pipelines.components_manager.ComponentsManager
## InsertableDict
[[autodoc]] diffusers.modular_pipelines.modular_pipeline_utils.InsertableDict

View File

@@ -1,9 +0,0 @@
# Pipeline states
## PipelineState
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.PipelineState
## BlockState
[[autodoc]] diffusers.modular_pipelines.modular_pipeline.BlockState

View File

@@ -39,8 +39,11 @@ For instance, retrieving an image by indexing into it returns the tuple `(output
outputs[:1] outputs[:1]
``` ```
> [!TIP] <Tip>
> To check a specific pipeline or model output, refer to its corresponding API documentation.
To check a specific pipeline or model output, refer to its corresponding API documentation.
</Tip>
## BaseOutput ## BaseOutput
@@ -51,6 +54,10 @@ outputs[:1]
[[autodoc]] pipelines.ImagePipelineOutput [[autodoc]] pipelines.ImagePipelineOutput
## FlaxImagePipelineOutput
[[autodoc]] pipelines.pipeline_flax_utils.FlaxImagePipelineOutput
## AudioPipelineOutput ## AudioPipelineOutput
[[autodoc]] pipelines.AudioPipelineOutput [[autodoc]] pipelines.AudioPipelineOutput

View File

@@ -1,24 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. -->
# Parallelism
Parallelism strategies help speed up diffusion transformers by distributing computations across multiple devices, allowing for faster inference/training times. Refer to the [Distributed inferece](../training/distributed_inference) guide to learn more.
## ParallelConfig
[[autodoc]] ParallelConfig
## ContextParallelConfig
[[autodoc]] ContextParallelConfig
[[autodoc]] hooks.apply_context_parallel

View File

@@ -17,8 +17,11 @@ The abstract from the paper is:
*Significant advancements have been made in the field of video generation, with the open-source community contributing a wealth of research papers and tools for training high-quality models. However, despite these efforts, the available information and resources remain insufficient for achieving commercial-level performance. In this report, we open the black box and introduce Allegro, an advanced video generation model that excels in both quality and temporal consistency. We also highlight the current limitations in the field and present a comprehensive methodology for training high-performance, commercial-level video generation models, addressing key aspects such as data, model architecture, training pipeline, and evaluation. Our user study shows that Allegro surpasses existing open-source models and most commercial models, ranking just behind Hailuo and Kling. Code: https://github.com/rhymes-ai/Allegro , Model: https://huggingface.co/rhymes-ai/Allegro , Gallery: https://rhymes.ai/allegro_gallery .* *Significant advancements have been made in the field of video generation, with the open-source community contributing a wealth of research papers and tools for training high-quality models. However, despite these efforts, the available information and resources remain insufficient for achieving commercial-level performance. In this report, we open the black box and introduce Allegro, an advanced video generation model that excels in both quality and temporal consistency. We also highlight the current limitations in the field and present a comprehensive methodology for training high-performance, commercial-level video generation models, addressing key aspects such as data, model architecture, training pipeline, and evaluation. Our user study shows that Allegro surpasses existing open-source models and most commercial models, ranking just behind Hailuo and Kling. Code: https://github.com/rhymes-ai/Allegro , Model: https://huggingface.co/rhymes-ai/Allegro , Gallery: https://rhymes.ai/allegro_gallery .*
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## Quantization ## Quantization

View File

@@ -102,8 +102,11 @@ Here are some sample outputs:
</tr> </tr>
</table> </table>
> [!TIP] <Tip>
> AnimateDiff tends to work better with finetuned Stable Diffusion models. If you plan on using a scheduler that can clip samples, make sure to disable it by setting `clip_sample=False` in the scheduler as this can also have an adverse effect on generated samples. Additionally, the AnimateDiff checkpoints can be sensitive to the beta schedule of the scheduler. We recommend setting this to `linear`.
AnimateDiff tends to work better with finetuned Stable Diffusion models. If you plan on using a scheduler that can clip samples, make sure to disable it by setting `clip_sample=False` in the scheduler as this can also have an adverse effect on generated samples. Additionally, the AnimateDiff checkpoints can be sensitive to the beta schedule of the scheduler. We recommend setting this to `linear`.
</Tip>
### AnimateDiffControlNetPipeline ### AnimateDiffControlNetPipeline
@@ -796,11 +799,17 @@ frames = output.frames[0]
export_to_gif(frames, "animation.gif") export_to_gif(frames, "animation.gif")
``` ```
> [!WARNING] <Tip warning={true}>
> FreeInit is not really free - the improved quality comes at the cost of extra computation. It requires sampling a few extra times depending on the `num_iters` parameter that is set when enabling it. Setting the `use_fast_sampling` parameter to `True` can improve the overall performance (at the cost of lower quality compared to when `use_fast_sampling=False` but still better results than vanilla video generation models).
> [!TIP] FreeInit is not really free - the improved quality comes at the cost of extra computation. It requires sampling a few extra times depending on the `num_iters` parameter that is set when enabling it. Setting the `use_fast_sampling` parameter to `True` can improve the overall performance (at the cost of lower quality compared to when `use_fast_sampling=False` but still better results than vanilla video generation models).
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
<Tip>
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
<table> <table>
<tr> <tr>

View File

@@ -23,8 +23,11 @@ The abstract from the paper is:
You can find additional information about Attend-and-Excite on the [project page](https://attendandexcite.github.io/Attend-and-Excite/), the [original codebase](https://github.com/AttendAndExcite/Attend-and-Excite), or try it out in a [demo](https://huggingface.co/spaces/AttendAndExcite/Attend-and-Excite). You can find additional information about Attend-and-Excite on the [project page](https://attendandexcite.github.io/Attend-and-Excite/), the [original codebase](https://github.com/AttendAndExcite/Attend-and-Excite), or try it out in a [demo](https://huggingface.co/spaces/AttendAndExcite/Attend-and-Excite).
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## StableDiffusionAttendAndExcitePipeline ## StableDiffusionAttendAndExcitePipeline

View File

@@ -38,8 +38,11 @@ During inference:
* The _quality_ of the predicted audio sample can be controlled by the `num_inference_steps` argument; higher steps give higher quality audio at the expense of slower inference. * The _quality_ of the predicted audio sample can be controlled by the `num_inference_steps` argument; higher steps give higher quality audio at the expense of slower inference.
* The _length_ of the predicted audio sample can be controlled by varying the `audio_length_in_s` argument. * The _length_ of the predicted audio sample can be controlled by varying the `audio_length_in_s` argument.
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## AudioLDMPipeline ## AudioLDMPipeline
[[autodoc]] AudioLDMPipeline [[autodoc]] AudioLDMPipeline

View File

@@ -58,8 +58,11 @@ See table below for details on the three checkpoints:
The following example demonstrates how to construct good music and speech generation using the aforementioned tips: [example](https://huggingface.co/docs/diffusers/main/en/api/pipelines/audioldm2#diffusers.AudioLDM2Pipeline.__call__.example). The following example demonstrates how to construct good music and speech generation using the aforementioned tips: [example](https://huggingface.co/docs/diffusers/main/en/api/pipelines/audioldm2#diffusers.AudioLDM2Pipeline.__call__.example).
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## AudioLDM2Pipeline ## AudioLDM2Pipeline
[[autodoc]] AudioLDM2Pipeline [[autodoc]] AudioLDM2Pipeline

View File

@@ -16,8 +16,11 @@ AuraFlow is inspired by [Stable Diffusion 3](../pipelines/stable_diffusion/stabl
It was developed by the Fal team and more details about it can be found in [this blog post](https://blog.fal.ai/auraflow/). It was developed by the Fal team and more details about it can be found in [this blog post](https://blog.fal.ai/auraflow/).
> [!TIP] <Tip>
> AuraFlow can be quite expensive to run on consumer hardware devices. However, you can perform a suite of optimizations to run it faster and in a more memory-friendly manner. Check out [this section](https://huggingface.co/blog/sd3#memory-optimizations-for-sd3) for more details.
AuraFlow can be quite expensive to run on consumer hardware devices. However, you can perform a suite of optimizations to run it faster and in a more memory-friendly manner. Check out [this section](https://huggingface.co/blog/sd3#memory-optimizations-for-sd3) for more details.
</Tip>
## Quantization ## Quantization

View File

@@ -26,8 +26,11 @@ The original codebase can be found at [salesforce/LAVIS](https://github.com/sale
`BlipDiffusionPipeline` and `BlipDiffusionControlNetPipeline` were contributed by [`ayushtues`](https://github.com/ayushtues/). `BlipDiffusionPipeline` and `BlipDiffusionControlNetPipeline` were contributed by [`ayushtues`](https://github.com/ayushtues/).
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## BlipDiffusionPipeline ## BlipDiffusionPipeline

View File

@@ -1,44 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# Bria 3.2
Bria 3.2 is the next-generation commercial-ready text-to-image model. With just 4 billion parameters, it provides exceptional aesthetics and text rendering, evaluated to provide on par results to leading open-source models, and outperforming other licensed models.
In addition to being built entirely on licensed data, 3.2 provides several advantages for enterprise and commercial use:
- Efficient Compute - the model is X3 smaller than the equivalent models in the market (4B parameters vs 12B parameters other open source models)
- Architecture Consistency: Same architecture as 3.1—ideal for users looking to upgrade without disruption.
- Fine-tuning Speedup: 2x faster fine-tuning on L40S and A100.
Original model checkpoints for Bria 3.2 can be found [here](https://huggingface.co/briaai/BRIA-3.2).
Github repo for Bria 3.2 can be found [here](https://github.com/Bria-AI/BRIA-3.2).
If you want to learn more about the Bria platform, and get free traril access, please visit [bria.ai](https://bria.ai).
## Usage
_As the model is gated, before using it with diffusers you first need to go to the [Bria 3.2 Hugging Face page](https://huggingface.co/briaai/BRIA-3.2), fill in the form and accept the gate. Once you are in, you need to login so that your system knows youve accepted the gate._
Use the command below to log in:
```bash
hf auth login
```
## BriaPipeline
[[autodoc]] BriaPipeline
- all
- __call__

View File

@@ -1,45 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# Bria Fibo
Text-to-image models have mastered imagination - but not control. FIBO changes that.
FIBO is trained on structured JSON captions up to 1,000+ words and designed to understand and control different visual parameters such as lighting, composition, color, and camera settings, enabling precise and reproducible outputs.
With only 8 billion parameters, FIBO provides a new level of image quality, prompt adherence and proffesional control.
FIBO is trained exclusively on a structured prompt and will not work with freeform text prompts.
you can use the [FIBO-VLM-prompt-to-JSON](https://huggingface.co/briaai/FIBO-VLM-prompt-to-JSON) model or the [FIBO-gemini-prompt-to-JSON](https://huggingface.co/briaai/FIBO-gemini-prompt-to-JSON) to convert your freeform text prompt to a structured JSON prompt.
> [!NOTE]
> Avoid using freeform text prompts directly with FIBO because it does not produce the best results.
Refer to the Bria Fibo Hugging Face [page](https://huggingface.co/briaai/FIBO) to learn more.
## Usage
_As the model is gated, before using it with diffusers you first need to go to the [Bria Fibo Hugging Face page](https://huggingface.co/briaai/FIBO), fill in the form and accept the gate. Once you are in, you need to login so that your system knows youve accepted the gate._
Use the command below to log in:
```bash
hf auth login
```
## BriaFiboPipeline
[[autodoc]] BriaFiboPipeline
- all
- __call__

View File

@@ -19,21 +19,23 @@ specific language governing permissions and limitations under the License.
Chroma is a text to image generation model based on Flux. Chroma is a text to image generation model based on Flux.
Original model checkpoints for Chroma can be found here: Original model checkpoints for Chroma can be found [here](https://huggingface.co/lodestones/Chroma).
* High-resolution finetune: [lodestones/Chroma1-HD](https://huggingface.co/lodestones/Chroma1-HD)
* Base model: [lodestones/Chroma1-Base](https://huggingface.co/lodestones/Chroma1-Base)
* Original repo with progress checkpoints: [lodestones/Chroma](https://huggingface.co/lodestones/Chroma) (loading this repo with `from_pretrained` will load a Diffusers-compatible version of the `unlocked-v37` checkpoint)
> [!TIP] <Tip>
> Chroma can use all the same optimizations as Flux.
Chroma can use all the same optimizations as Flux.
</Tip>
## Inference ## Inference
The Diffusers version of Chroma is based on the [`unlocked-v37`](https://huggingface.co/lodestones/Chroma/blob/main/chroma-unlocked-v37.safetensors) version of the original model, which is available in the [Chroma repository](https://huggingface.co/lodestones/Chroma).
```python ```python
import torch import torch
from diffusers import ChromaPipeline from diffusers import ChromaPipeline
pipe = ChromaPipeline.from_pretrained("lodestones/Chroma1-HD", torch_dtype=torch.bfloat16) pipe = ChromaPipeline.from_pretrained("lodestones/Chroma", torch_dtype=torch.bfloat16)
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload()
prompt = [ prompt = [
@@ -64,10 +66,10 @@ Then run the following example
import torch import torch
from diffusers import ChromaTransformer2DModel, ChromaPipeline from diffusers import ChromaTransformer2DModel, ChromaPipeline
model_id = "lodestones/Chroma1-HD" model_id = "lodestones/Chroma"
dtype = torch.bfloat16 dtype = torch.bfloat16
transformer = ChromaTransformer2DModel.from_single_file("https://huggingface.co/lodestones/Chroma1-HD/blob/main/Chroma1-HD.safetensors", torch_dtype=dtype) transformer = ChromaTransformer2DModel.from_single_file("https://huggingface.co/lodestones/Chroma/blob/main/chroma-unlocked-v37.safetensors", torch_dtype=dtype)
pipe = ChromaPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=dtype) pipe = ChromaPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=dtype)
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload()

View File

@@ -1,156 +0,0 @@
<!-- Copyright 2025 The ChronoEdit Team and HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. -->
<div style="float: right;">
<div class="flex flex-wrap space-x-1">
<a href="https://huggingface.co/docs/diffusers/main/en/tutorials/using_peft_for_inference" target="_blank" rel="noopener">
<img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
</a>
</div>
</div>
# ChronoEdit
[ChronoEdit: Towards Temporal Reasoning for Image Editing and World Simulation](https://huggingface.co/papers/2510.04290) from NVIDIA and University of Toronto, by Jay Zhangjie Wu, Xuanchi Ren, Tianchang Shen, Tianshi Cao, Kai He, Yifan Lu, Ruiyuan Gao, Enze Xie, Shiyi Lan, Jose M. Alvarez, Jun Gao, Sanja Fidler, Zian Wang, Huan Ling.
> **TL;DR:** ChronoEdit reframes image editing as a video generation task, using input and edited images as start/end frames to leverage pretrained video models with temporal consistency. A temporal reasoning stage introduces reasoning tokens to ensure physically plausible edits and visualize the editing trajectory.
*Recent advances in large generative models have greatly enhanced both image editing and in-context image generation, yet a critical gap remains in ensuring physical consistency, where edited objects must remain coherent. This capability is especially vital for world simulation related tasks. In this paper, we present ChronoEdit, a framework that reframes image editing as a video generation problem. First, ChronoEdit treats the input and edited images as the first and last frames of a video, allowing it to leverage large pretrained video generative models that capture not only object appearance but also the implicit physics of motion and interaction through learned temporal consistency. Second, ChronoEdit introduces a temporal reasoning stage that explicitly performs editing at inference time. Under this setting, target frame is jointly denoised with reasoning tokens to imagine a plausible editing trajectory that constrains the solution space to physically viable transformations. The reasoning tokens are then dropped after a few steps to avoid the high computational cost of rendering a full video. To validate ChronoEdit, we introduce PBench-Edit, a new benchmark of image-prompt pairs for contexts that require physical consistency, and demonstrate that ChronoEdit surpasses state-of-the-art baselines in both visual fidelity and physical plausibility. Project page for code and models: [this https URL](https://research.nvidia.com/labs/toronto-ai/chronoedit).*
The ChronoEdit pipeline is developed by the ChronoEdit Team. The original code is available on [GitHub](https://github.com/nv-tlabs/ChronoEdit), and pretrained models can be found in the [nvidia/ChronoEdit](https://huggingface.co/collections/nvidia/chronoedit) collection on Hugging Face.
### Image Editing
```py
import torch
import numpy as np
from diffusers import AutoencoderKLWan, ChronoEditTransformer3DModel, ChronoEditPipeline
from diffusers.utils import export_to_video, load_image
from transformers import CLIPVisionModel
from PIL import Image
model_id = "nvidia/ChronoEdit-14B-Diffusers"
image_encoder = CLIPVisionModel.from_pretrained(model_id, subfolder="image_encoder", torch_dtype=torch.float32)
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
transformer = ChronoEditTransformer3DModel.from_pretrained(model_id, subfolder="transformer", torch_dtype=torch.bfloat16)
pipe = ChronoEditPipeline.from_pretrained(model_id, image_encoder=image_encoder, transformer=transformer, vae=vae, torch_dtype=torch.bfloat16)
pipe.to("cuda")
image = load_image(
"https://huggingface.co/spaces/nvidia/ChronoEdit/resolve/main/examples/3.png"
)
max_area = 720 * 1280
aspect_ratio = image.height / image.width
mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
height = round(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value
width = round(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value
print("width", width, "height", height)
image = image.resize((width, height))
prompt = (
"The user wants to transform the image by adding a small, cute mouse sitting inside the floral teacup, enjoying a spa bath. The mouse should appear relaxed and cheerful, with a tiny white bath towel draped over its head like a turban. It should be positioned comfortably in the cups liquid, with gentle steam rising around it to blend with the cozy atmosphere. "
"The mouses pose should be natural—perhaps sitting upright with paws resting lightly on the rim or submerged in the tea. The teacups floral design, gold trim, and warm lighting must remain unchanged to preserve the original aesthetic. The steam should softly swirl around the mouse, enhancing the spa-like, whimsical mood."
)
output = pipe(
image=image,
prompt=prompt,
height=height,
width=width,
num_frames=5,
num_inference_steps=50,
guidance_scale=5.0,
enable_temporal_reasoning=False,
num_temporal_reasoning_steps=0,
).frames[0]
Image.fromarray((output[-1] * 255).clip(0, 255).astype("uint8")).save("output.png")
```
Optionally, enable **temporal reasoning** for improved physical consistency:
```py
output = pipe(
image=image,
prompt=prompt,
height=height,
width=width,
num_frames=29,
num_inference_steps=50,
guidance_scale=5.0,
enable_temporal_reasoning=True,
num_temporal_reasoning_steps=50,
).frames[0]
export_to_video(output, "output.mp4", fps=16)
Image.fromarray((output[-1] * 255).clip(0, 255).astype("uint8")).save("output.png")
```
### Inference with 8-Step Distillation Lora
```py
import torch
import numpy as np
from diffusers import AutoencoderKLWan, ChronoEditTransformer3DModel, ChronoEditPipeline
from diffusers.utils import export_to_video, load_image
from transformers import CLIPVisionModel
from PIL import Image
model_id = "nvidia/ChronoEdit-14B-Diffusers"
image_encoder = CLIPVisionModel.from_pretrained(model_id, subfolder="image_encoder", torch_dtype=torch.float32)
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
transformer = ChronoEditTransformer3DModel.from_pretrained(model_id, subfolder="transformer", torch_dtype=torch.bfloat16)
pipe = ChronoEditPipeline.from_pretrained(model_id, image_encoder=image_encoder, transformer=transformer, vae=vae, torch_dtype=torch.bfloat16)
lora_path = hf_hub_download(repo_id=model_id, filename="lora/chronoedit_distill_lora.safetensors")
pipe.load_lora_weights(lora_path)
pipe.fuse_lora(lora_scale=1.0)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=2.0)
pipe.to("cuda")
image = load_image(
"https://huggingface.co/spaces/nvidia/ChronoEdit/resolve/main/examples/3.png"
)
max_area = 720 * 1280
aspect_ratio = image.height / image.width
mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
height = round(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value
width = round(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value
print("width", width, "height", height)
image = image.resize((width, height))
prompt = (
"The user wants to transform the image by adding a small, cute mouse sitting inside the floral teacup, enjoying a spa bath. The mouse should appear relaxed and cheerful, with a tiny white bath towel draped over its head like a turban. It should be positioned comfortably in the cups liquid, with gentle steam rising around it to blend with the cozy atmosphere. "
"The mouses pose should be natural—perhaps sitting upright with paws resting lightly on the rim or submerged in the tea. The teacups floral design, gold trim, and warm lighting must remain unchanged to preserve the original aesthetic. The steam should softly swirl around the mouse, enhancing the spa-like, whimsical mood."
)
output = pipe(
image=image,
prompt=prompt,
height=height,
width=width,
num_frames=5,
num_inference_steps=8,
guidance_scale=1.0,
enable_temporal_reasoning=False,
num_temporal_reasoning_steps=0,
).frames[0]
export_to_video(output, "output.mp4", fps=16)
Image.fromarray((output[-1] * 255).clip(0, 255).astype("uint8")).save("output.png")
```
## ChronoEditPipeline
[[autodoc]] ChronoEditPipeline
- all
- __call__
## ChronoEditPipelineOutput
[[autodoc]] pipelines.chronoedit.pipeline_output.ChronoEditPipelineOutput

View File

@@ -50,7 +50,7 @@ from diffusers.utils import export_to_video
pipeline_quant_config = PipelineQuantizationConfig( pipeline_quant_config = PipelineQuantizationConfig(
quant_backend="torchao", quant_backend="torchao",
quant_kwargs={"quant_type": "int8wo"}, quant_kwargs={"quant_type": "int8wo"},
components_to_quantize="transformer" components_to_quantize=["transformer"]
) )
# fp8 layerwise weight-casting # fp8 layerwise weight-casting

View File

@@ -21,8 +21,11 @@ The abstract from the paper is:
*Recent advancements in text-to-image generative systems have been largely driven by diffusion models. However, single-stage text-to-image diffusion models still face challenges, in terms of computational efficiency and the refinement of image details. To tackle the issue, we propose CogView3, an innovative cascaded framework that enhances the performance of text-to-image diffusion. CogView3 is the first model implementing relay diffusion in the realm of text-to-image generation, executing the task by first creating low-resolution images and subsequently applying relay-based super-resolution. This methodology not only results in competitive text-to-image outputs but also greatly reduces both training and inference costs. Our experimental results demonstrate that CogView3 outperforms SDXL, the current state-of-the-art open-source text-to-image diffusion model, by 77.0% in human evaluations, all while requiring only about 1/2 of the inference time. The distilled variant of CogView3 achieves comparable performance while only utilizing 1/10 of the inference time by SDXL.* *Recent advancements in text-to-image generative systems have been largely driven by diffusion models. However, single-stage text-to-image diffusion models still face challenges, in terms of computational efficiency and the refinement of image details. To tackle the issue, we propose CogView3, an innovative cascaded framework that enhances the performance of text-to-image diffusion. CogView3 is the first model implementing relay diffusion in the realm of text-to-image generation, executing the task by first creating low-resolution images and subsequently applying relay-based super-resolution. This methodology not only results in competitive text-to-image outputs but also greatly reduces both training and inference costs. Our experimental results demonstrate that CogView3 outperforms SDXL, the current state-of-the-art open-source text-to-image diffusion model, by 77.0% in human evaluations, all while requiring only about 1/2 of the inference time. The distilled variant of CogView3 achieves comparable performance while only utilizing 1/10 of the inference time by SDXL.*
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
This pipeline was contributed by [zRzRzRzRzRzRzR](https://github.com/zRzRzRzRzRzRzR). The original codebase can be found [here](https://huggingface.co/THUDM). The original weights can be found under [hf.co/THUDM](https://huggingface.co/THUDM). This pipeline was contributed by [zRzRzRzRzRzRzR](https://github.com/zRzRzRzRzRzRzR). The original codebase can be found [here](https://huggingface.co/THUDM). The original weights can be found under [hf.co/THUDM](https://huggingface.co/THUDM).

View File

@@ -15,8 +15,11 @@
# CogView4 # CogView4
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
This pipeline was contributed by [zRzRzRzRzRzRzR](https://github.com/zRzRzRzRzRzRzR). The original codebase can be found [here](https://huggingface.co/THUDM). The original weights can be found under [hf.co/THUDM](https://huggingface.co/THUDM). This pipeline was contributed by [zRzRzRzRzRzRzR](https://github.com/zRzRzRzRzRzRzR). The original codebase can be found [here](https://huggingface.co/THUDM). The original weights can be found under [hf.co/THUDM](https://huggingface.co/THUDM).

View File

@@ -25,8 +25,11 @@ The abstract from the paper is:
*Identity-preserving text-to-video (IPT2V) generation aims to create high-fidelity videos with consistent human identity. It is an important task in video generation but remains an open problem for generative models. This paper pushes the technical frontier of IPT2V in two directions that have not been resolved in the literature: (1) A tuning-free pipeline without tedious case-by-case finetuning, and (2) A frequency-aware heuristic identity-preserving Diffusion Transformer (DiT)-based control scheme. To achieve these goals, we propose **ConsisID**, a tuning-free DiT-based controllable IPT2V model to keep human-**id**entity **consis**tent in the generated video. Inspired by prior findings in frequency analysis of vision/diffusion transformers, it employs identity-control signals in the frequency domain, where facial features can be decomposed into low-frequency global features (e.g., profile, proportions) and high-frequency intrinsic features (e.g., identity markers that remain unaffected by pose changes). First, from a low-frequency perspective, we introduce a global facial extractor, which encodes the reference image and facial key points into a latent space, generating features enriched with low-frequency information. These features are then integrated into the shallow layers of the network to alleviate training challenges associated with DiT. Second, from a high-frequency perspective, we design a local facial extractor to capture high-frequency details and inject them into the transformer blocks, enhancing the model's ability to preserve fine-grained features. To leverage the frequency information for identity preservation, we propose a hierarchical training strategy, transforming a vanilla pre-trained video generation model into an IPT2V model. Extensive experiments demonstrate that our frequency-aware heuristic scheme provides an optimal control solution for DiT-based models. Thanks to this scheme, our **ConsisID** achieves excellent results in generating high-quality, identity-preserving videos, making strides towards more effective IPT2V. The model weight of ConsID is publicly available at https://github.com/PKU-YuanGroup/ConsisID.* *Identity-preserving text-to-video (IPT2V) generation aims to create high-fidelity videos with consistent human identity. It is an important task in video generation but remains an open problem for generative models. This paper pushes the technical frontier of IPT2V in two directions that have not been resolved in the literature: (1) A tuning-free pipeline without tedious case-by-case finetuning, and (2) A frequency-aware heuristic identity-preserving Diffusion Transformer (DiT)-based control scheme. To achieve these goals, we propose **ConsisID**, a tuning-free DiT-based controllable IPT2V model to keep human-**id**entity **consis**tent in the generated video. Inspired by prior findings in frequency analysis of vision/diffusion transformers, it employs identity-control signals in the frequency domain, where facial features can be decomposed into low-frequency global features (e.g., profile, proportions) and high-frequency intrinsic features (e.g., identity markers that remain unaffected by pose changes). First, from a low-frequency perspective, we introduce a global facial extractor, which encodes the reference image and facial key points into a latent space, generating features enriched with low-frequency information. These features are then integrated into the shallow layers of the network to alleviate training challenges associated with DiT. Second, from a high-frequency perspective, we design a local facial extractor to capture high-frequency details and inject them into the transformer blocks, enhancing the model's ability to preserve fine-grained features. To leverage the frequency information for identity preservation, we propose a hierarchical training strategy, transforming a vanilla pre-trained video generation model into an IPT2V model. Extensive experiments demonstrate that our frequency-aware heuristic scheme provides an optimal control solution for DiT-based models. Thanks to this scheme, our **ConsisID** achieves excellent results in generating high-quality, identity-preserving videos, making strides towards more effective IPT2V. The model weight of ConsID is publicly available at https://github.com/PKU-YuanGroup/ConsisID.*
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers.md) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading.md#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers.md) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading.md#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
This pipeline was contributed by [SHYuanBest](https://github.com/SHYuanBest). The original codebase can be found [here](https://github.com/PKU-YuanGroup/ConsisID). The original weights can be found under [hf.co/BestWishYsh](https://huggingface.co/BestWishYsh). This pipeline was contributed by [SHYuanBest](https://github.com/SHYuanBest). The original codebase can be found [here](https://github.com/PKU-YuanGroup/ConsisID). The original weights can be found under [hf.co/BestWishYsh](https://huggingface.co/BestWishYsh).

View File

@@ -26,8 +26,11 @@ FLUX.1 Depth and Canny [dev] is a 12 billion parameter rectified flow transforme
| Canny | [Black Forest Labs](https://huggingface.co/black-forest-labs) | [Link](https://huggingface.co/black-forest-labs/FLUX.1-Canny-dev) | | Canny | [Black Forest Labs](https://huggingface.co/black-forest-labs) | [Link](https://huggingface.co/black-forest-labs/FLUX.1-Canny-dev) |
> [!TIP] <Tip>
> Flux can be quite expensive to run on consumer hardware devices. However, you can perform a suite of optimizations to run it faster and in a more memory-friendly manner. Check out [this section](https://huggingface.co/blog/sd3#memory-optimizations-for-sd3) for more details. Additionally, Flux can benefit from quantization for memory efficiency with a trade-off in inference latency. Refer to [this blog post](https://huggingface.co/blog/quanto-diffusers) to learn more. For an exhaustive list of resources, check out [this gist](https://gist.github.com/sayakpaul/b664605caf0aa3bf8585ab109dd5ac9c).
Flux can be quite expensive to run on consumer hardware devices. However, you can perform a suite of optimizations to run it faster and in a more memory-friendly manner. Check out [this section](https://huggingface.co/blog/sd3#memory-optimizations-for-sd3) for more details. Additionally, Flux can benefit from quantization for memory efficiency with a trade-off in inference latency. Refer to [this blog post](https://huggingface.co/blog/quanto-diffusers) to learn more. For an exhaustive list of resources, check out [this gist](https://gist.github.com/sayakpaul/b664605caf0aa3bf8585ab109dd5ac9c).
</Tip>
```python ```python
import torch import torch

View File

@@ -28,8 +28,11 @@ This model was contributed by [takuma104](https://huggingface.co/takuma104). ❤
The original codebase can be found at [lllyasviel/ControlNet](https://github.com/lllyasviel/ControlNet), and you can find official ControlNet checkpoints on [lllyasviel's](https://huggingface.co/lllyasviel) Hub profile. The original codebase can be found at [lllyasviel/ControlNet](https://github.com/lllyasviel/ControlNet), and you can find official ControlNet checkpoints on [lllyasviel's](https://huggingface.co/lllyasviel) Hub profile.
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## StableDiffusionControlNetPipeline ## StableDiffusionControlNetPipeline
[[autodoc]] StableDiffusionControlNetPipeline [[autodoc]] StableDiffusionControlNetPipeline
@@ -69,3 +72,11 @@ The original codebase can be found at [lllyasviel/ControlNet](https://github.com
## StableDiffusionPipelineOutput ## StableDiffusionPipelineOutput
[[autodoc]] pipelines.stable_diffusion.StableDiffusionPipelineOutput [[autodoc]] pipelines.stable_diffusion.StableDiffusionPipelineOutput
## FlaxStableDiffusionControlNetPipeline
[[autodoc]] FlaxStableDiffusionControlNetPipeline
- all
- __call__
## FlaxStableDiffusionControlNetPipelineOutput
[[autodoc]] pipelines.stable_diffusion.FlaxStableDiffusionPipelineOutput

View File

@@ -44,8 +44,11 @@ XLabs ControlNets are also supported, which was contributed by the [XLabs team](
| HED | [The XLabs Team](https://huggingface.co/XLabs-AI) | [Link](https://huggingface.co/XLabs-AI/flux-controlnet-hed-diffusers) | | HED | [The XLabs Team](https://huggingface.co/XLabs-AI) | [Link](https://huggingface.co/XLabs-AI/flux-controlnet-hed-diffusers) |
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## FluxControlNetPipeline ## FluxControlNetPipeline
[[autodoc]] FluxControlNetPipeline [[autodoc]] FluxControlNetPipeline

View File

@@ -24,8 +24,11 @@ The abstract from the paper is:
This code is implemented by Tencent Hunyuan Team. You can find pre-trained checkpoints for Hunyuan-DiT ControlNets on [Tencent Hunyuan](https://huggingface.co/Tencent-Hunyuan). This code is implemented by Tencent Hunyuan Team. You can find pre-trained checkpoints for Hunyuan-DiT ControlNets on [Tencent Hunyuan](https://huggingface.co/Tencent-Hunyuan).
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## HunyuanDiTControlNetPipeline ## HunyuanDiTControlNetPipeline
[[autodoc]] HunyuanDiTControlNetPipeline [[autodoc]] HunyuanDiTControlNetPipeline

View File

@@ -38,8 +38,11 @@ This controlnet code is mainly implemented by [The InstantX Team](https://huggin
| Inpainting | [The AlimamaCreative Team](https://huggingface.co/alimama-creative) | [link](https://huggingface.co/alimama-creative/SD3-Controlnet-Inpainting) | | Inpainting | [The AlimamaCreative Team](https://huggingface.co/alimama-creative) | [link](https://huggingface.co/alimama-creative/SD3-Controlnet-Inpainting) |
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## StableDiffusion3ControlNetPipeline ## StableDiffusion3ControlNetPipeline
[[autodoc]] StableDiffusion3ControlNetPipeline [[autodoc]] StableDiffusion3ControlNetPipeline

View File

@@ -26,13 +26,19 @@ The abstract from the paper is:
You can find additional smaller Stable Diffusion XL (SDXL) ControlNet checkpoints from the 🤗 [Diffusers](https://huggingface.co/diffusers) Hub organization, and browse [community-trained](https://huggingface.co/models?other=stable-diffusion-xl&other=controlnet) checkpoints on the Hub. You can find additional smaller Stable Diffusion XL (SDXL) ControlNet checkpoints from the 🤗 [Diffusers](https://huggingface.co/diffusers) Hub organization, and browse [community-trained](https://huggingface.co/models?other=stable-diffusion-xl&other=controlnet) checkpoints on the Hub.
> [!WARNING] <Tip warning={true}>
> 🧪 Many of the SDXL ControlNet checkpoints are experimental, and there is a lot of room for improvement. Feel free to open an [Issue](https://github.com/huggingface/diffusers/issues/new/choose) and leave us feedback on how we can improve!
🧪 Many of the SDXL ControlNet checkpoints are experimental, and there is a lot of room for improvement. Feel free to open an [Issue](https://github.com/huggingface/diffusers/issues/new/choose) and leave us feedback on how we can improve!
</Tip>
If you don't see a checkpoint you're interested in, you can train your own SDXL ControlNet with our [training script](../../../../../examples/controlnet/README_sdxl). If you don't see a checkpoint you're interested in, you can train your own SDXL ControlNet with our [training script](../../../../../examples/controlnet/README_sdxl).
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## StableDiffusionXLControlNetPipeline ## StableDiffusionXLControlNetPipeline
[[autodoc]] StableDiffusionXLControlNetPipeline [[autodoc]] StableDiffusionXLControlNetPipeline

View File

@@ -31,8 +31,11 @@ Here's the overview from the [project page](https://vislearn.github.io/ControlNe
This model was contributed by [UmerHA](https://twitter.com/UmerHAdil). ❤️ This model was contributed by [UmerHA](https://twitter.com/UmerHAdil). ❤️
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## StableDiffusionControlNetXSPipeline ## StableDiffusionControlNetXSPipeline
[[autodoc]] StableDiffusionControlNetXSPipeline [[autodoc]] StableDiffusionControlNetXSPipeline

View File

@@ -27,11 +27,17 @@ Here's the overview from the [project page](https://vislearn.github.io/ControlNe
This model was contributed by [UmerHA](https://twitter.com/UmerHAdil). ❤️ This model was contributed by [UmerHA](https://twitter.com/UmerHAdil). ❤️
> [!WARNING] <Tip warning={true}>
> 🧪 Many of the SDXL ControlNet checkpoints are experimental, and there is a lot of room for improvement. Feel free to open an [Issue](https://github.com/huggingface/diffusers/issues/new/choose) and leave us feedback on how we can improve!
> [!TIP] 🧪 Many of the SDXL ControlNet checkpoints are experimental, and there is a lot of room for improvement. Feel free to open an [Issue](https://github.com/huggingface/diffusers/issues/new/choose) and leave us feedback on how we can improve!
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
<Tip>
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## StableDiffusionXLControlNetXSPipeline ## StableDiffusionXLControlNetXSPipeline
[[autodoc]] StableDiffusionXLControlNetXSPipeline [[autodoc]] StableDiffusionXLControlNetXSPipeline

View File

@@ -18,8 +18,11 @@
*Physical AI needs to be trained digitally first. It needs a digital twin of itself, the policy model, and a digital twin of the world, the world model. In this paper, we present the Cosmos World Foundation Model Platform to help developers build customized world models for their Physical AI setups. We position a world foundation model as a general-purpose world model that can be fine-tuned into customized world models for downstream applications. Our platform covers a video curation pipeline, pre-trained world foundation models, examples of post-training of pre-trained world foundation models, and video tokenizers. To help Physical AI builders solve the most critical problems of our society, we make our platform open-source and our models open-weight with permissive licenses available via https://github.com/NVIDIA/Cosmos.* *Physical AI needs to be trained digitally first. It needs a digital twin of itself, the policy model, and a digital twin of the world, the world model. In this paper, we present the Cosmos World Foundation Model Platform to help developers build customized world models for their Physical AI setups. We position a world foundation model as a general-purpose world model that can be fine-tuned into customized world models for downstream applications. Our platform covers a video curation pipeline, pre-trained world foundation models, examples of post-training of pre-trained world foundation models, and video tokenizers. To help Physical AI builders solve the most critical problems of our society, we make our platform open-source and our models open-weight with permissive licenses available via https://github.com/NVIDIA/Cosmos.*
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## Loading original format checkpoints ## Loading original format checkpoints

View File

@@ -20,8 +20,11 @@ specific language governing permissions and limitations under the License.
Dance Diffusion is the first in a suite of generative audio tools for producers and musicians released by [Harmonai](https://github.com/Harmonai-org). Dance Diffusion is the first in a suite of generative audio tools for producers and musicians released by [Harmonai](https://github.com/Harmonai-org).
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## DanceDiffusionPipeline ## DanceDiffusionPipeline
[[autodoc]] DanceDiffusionPipeline [[autodoc]] DanceDiffusionPipeline

View File

@@ -20,8 +20,11 @@ The abstract from the paper is:
The original codebase can be found at [hohonathanho/diffusion](https://github.com/hojonathanho/diffusion). The original codebase can be found at [hohonathanho/diffusion](https://github.com/hojonathanho/diffusion).
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
# DDPMPipeline # DDPMPipeline
[[autodoc]] DDPMPipeline [[autodoc]] DDPMPipeline

View File

@@ -20,8 +20,11 @@ The abstract from the paper is:
The original codebase can be found at [facebookresearch/dit](https://github.com/facebookresearch/dit). The original codebase can be found at [facebookresearch/dit](https://github.com/facebookresearch/dit).
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## DiTPipeline ## DiTPipeline
[[autodoc]] DiTPipeline [[autodoc]] DiTPipeline

View File

@@ -21,10 +21,11 @@ Flux is a series of text-to-image generation models based on diffusion transform
Original model checkpoints for Flux can be found [here](https://huggingface.co/black-forest-labs). Original inference code can be found [here](https://github.com/black-forest-labs/flux). Original model checkpoints for Flux can be found [here](https://huggingface.co/black-forest-labs). Original inference code can be found [here](https://github.com/black-forest-labs/flux).
> [!TIP] <Tip>
> Flux can be quite expensive to run on consumer hardware devices. However, you can perform a suite of optimizations to run it faster and in a more memory-friendly manner. Check out [this section](https://huggingface.co/blog/sd3#memory-optimizations-for-sd3) for more details. Additionally, Flux can benefit from quantization for memory efficiency with a trade-off in inference latency. Refer to [this blog post](https://huggingface.co/blog/quanto-diffusers) to learn more. For an exhaustive list of resources, check out [this gist](https://gist.github.com/sayakpaul/b664605caf0aa3bf8585ab109dd5ac9c).
> Flux can be quite expensive to run on consumer hardware devices. However, you can perform a suite of optimizations to run it faster and in a more memory-friendly manner. Check out [this section](https://huggingface.co/blog/sd3#memory-optimizations-for-sd3) for more details. Additionally, Flux can benefit from quantization for memory efficiency with a trade-off in inference latency. Refer to [this blog post](https://huggingface.co/blog/quanto-diffusers) to learn more. For an exhaustive list of resources, check out [this gist](https://gist.github.com/sayakpaul/b664605caf0aa3bf8585ab109dd5ac9c).
> [Caching](../../optimization/cache) may also speed up inference by storing and reusing intermediate outputs.
</Tip>
Flux comes in the following variants: Flux comes in the following variants:
@@ -313,67 +314,6 @@ if integrity_checker.test_image(image_):
raise ValueError("Your image has been flagged. Choose another prompt/image or try again.") raise ValueError("Your image has been flagged. Choose another prompt/image or try again.")
``` ```
### Kontext Inpainting
`FluxKontextInpaintPipeline` enables image modification within a fixed mask region. It currently supports both text-based conditioning and image-reference conditioning.
<hfoptions id="kontext-inpaint">
<hfoption id="text-only">
```python
import torch
from diffusers import FluxKontextInpaintPipeline
from diffusers.utils import load_image
prompt = "Change the yellow dinosaur to green one"
img_url = (
"https://github.com/ZenAI-Vietnam/Flux-Kontext-pipelines/blob/main/assets/dinosaur_input.jpeg?raw=true"
)
mask_url = (
"https://github.com/ZenAI-Vietnam/Flux-Kontext-pipelines/blob/main/assets/dinosaur_mask.png?raw=true"
)
source = load_image(img_url)
mask = load_image(mask_url)
pipe = FluxKontextInpaintPipeline.from_pretrained(
"black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16
)
pipe.to("cuda")
image = pipe(prompt=prompt, image=source, mask_image=mask, strength=1.0).images[0]
image.save("kontext_inpainting_normal.png")
```
</hfoption>
<hfoption id="image conditioning">
```python
import torch
from diffusers import FluxKontextInpaintPipeline
from diffusers.utils import load_image
pipe = FluxKontextInpaintPipeline.from_pretrained(
"black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16
)
pipe.to("cuda")
prompt = "Replace this ball"
img_url = "https://images.pexels.com/photos/39362/the-ball-stadion-football-the-pitch-39362.jpeg?auto=compress&cs=tinysrgb&dpr=1&w=500"
mask_url = "https://github.com/ZenAI-Vietnam/Flux-Kontext-pipelines/blob/main/assets/ball_mask.png?raw=true"
image_reference_url = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTah3x6OL_ECMBaZ5ZlJJhNsyC-OSMLWAI-xw&s"
source = load_image(img_url)
mask = load_image(mask_url)
image_reference = load_image(image_reference_url)
mask = pipe.mask_processor.blur(mask, blur_factor=12)
image = pipe(
prompt=prompt, image=source, mask_image=mask, image_reference=image_reference, strength=1.0
).images[0]
image.save("kontext_inpainting_ref.png")
```
</hfoption>
</hfoptions>
## Combining Flux Turbo LoRAs with Flux Control, Fill, and Redux ## Combining Flux Turbo LoRAs with Flux Control, Fill, and Redux
We can combine Flux Turbo LoRAs with Flux Control and other pipelines like Fill and Redux to enable few-steps' inference. The example below shows how to do that for Flux Control LoRA for depth and turbo LoRA from [`ByteDance/Hyper-SD`](https://hf.co/ByteDance/Hyper-SD). We can combine Flux Turbo LoRAs with Flux Control and other pipelines like Fill and Redux to enable few-steps' inference. The example below shows how to do that for Flux Control LoRA for depth and turbo LoRA from [`ByteDance/Hyper-SD`](https://hf.co/ByteDance/Hyper-SD).
@@ -417,8 +357,11 @@ When unloading the Control LoRA weights, call `pipe.unload_lora_weights(reset_to
## IP-Adapter ## IP-Adapter
> [!TIP] <Tip>
> Check out [IP-Adapter](../../using-diffusers/ip_adapter) to learn more about how IP-Adapters work.
Check out [IP-Adapter](../../../using-diffusers/ip_adapter) to learn more about how IP-Adapters work.
</Tip>
An IP-Adapter lets you prompt Flux with images, in addition to the text prompt. This is especially useful when describing complex concepts that are difficult to articulate through text alone and you have reference images. An IP-Adapter lets you prompt Flux with images, in addition to the text prompt. This is especially useful when describing complex concepts that are difficult to articulate through text alone and you have reference images.
@@ -598,8 +541,9 @@ image.save("flux.png")
The `FluxTransformer2DModel` supports loading checkpoints in the original format shipped by Black Forest Labs. This is also useful when trying to load finetunes or quantized versions of the models that have been published by the community. The `FluxTransformer2DModel` supports loading checkpoints in the original format shipped by Black Forest Labs. This is also useful when trying to load finetunes or quantized versions of the models that have been published by the community.
> [!TIP] <Tip>
> `FP8` inference can be brittle depending on the GPU type, CUDA version, and `torch` version that you are using. It is recommended that you use the `optimum-quanto` library in order to run FP8 inference on your machine. `FP8` inference can be brittle depending on the GPU type, CUDA version, and `torch` version that you are using. It is recommended that you use the `optimum-quanto` library in order to run FP8 inference on your machine.
</Tip>
The following example demonstrates how to run Flux with less than 16GB of VRAM. The following example demonstrates how to run Flux with less than 16GB of VRAM.
@@ -700,15 +644,3 @@ image.save("flux-fp8-dev.png")
[[autodoc]] FluxFillPipeline [[autodoc]] FluxFillPipeline
- all - all
- __call__ - __call__
## FluxKontextPipeline
[[autodoc]] FluxKontextPipeline
- all
- __call__
## FluxKontextInpaintPipeline
[[autodoc]] FluxKontextInpaintPipeline
- all
- __call__

View File

@@ -1,39 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# Flux2
<div class="flex flex-wrap space-x-1">
<img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
<img alt="MPS" src="https://img.shields.io/badge/MPS-000000?style=flat&logo=apple&logoColor=white%22">
</div>
Flux.2 is the recent series of image generation models from Black Forest Labs, preceded by the [Flux.1](./flux.md) series. It is an entirely new model with a new architecture and pre-training done from scratch!
Original model checkpoints for Flux can be found [here](https://huggingface.co/black-forest-labs). Original inference code can be found [here](https://github.com/black-forest-labs/flux2).
> [!TIP]
> Flux2 can be quite expensive to run on consumer hardware devices. However, you can perform a suite of optimizations to run it faster and in a more memory-friendly manner. Check out [this section](https://huggingface.co/blog/sd3#memory-optimizations-for-sd3) for more details. Additionally, Flux can benefit from quantization for memory efficiency with a trade-off in inference latency. Refer to [this blog post](https://huggingface.co/blog/quanto-diffusers) to learn more.
>
> [Caching](../../optimization/cache) may also speed up inference by storing and reusing intermediate outputs.
## Caption upsampling
Flux.2 can potentially generate better better outputs with better prompts. We can "upsample"
an input prompt by setting the `caption_upsample_temperature` argument in the pipeline call arguments.
The [official implementation](https://github.com/black-forest-labs/flux2/blob/5a5d316b1b42f6b59a8c9194b77c8256be848432/src/flux2/text_encoder.py#L140) recommends this value to be 0.15.
## Flux2Pipeline
[[autodoc]] Flux2Pipeline
- all
- __call__

View File

@@ -22,8 +22,11 @@
*We present a neural network structure, FramePack, to train next-frame (or next-frame-section) prediction models for video generation. The FramePack compresses input frames to make the transformer context length a fixed number regardless of the video length. As a result, we are able to process a large number of frames using video diffusion with computation bottleneck similar to image diffusion. This also makes the training video batch sizes significantly higher (batch sizes become comparable to image diffusion training). We also propose an anti-drifting sampling method that generates frames in inverted temporal order with early-established endpoints to avoid exposure bias (error accumulation over iterations). Finally, we show that existing video diffusion models can be finetuned with FramePack, and their visual quality may be improved because the next-frame prediction supports more balanced diffusion schedulers with less extreme flow shift timesteps.* *We present a neural network structure, FramePack, to train next-frame (or next-frame-section) prediction models for video generation. The FramePack compresses input frames to make the transformer context length a fixed number regardless of the video length. As a result, we are able to process a large number of frames using video diffusion with computation bottleneck similar to image diffusion. This also makes the training video batch sizes significantly higher (batch sizes become comparable to image diffusion training). We also propose an anti-drifting sampling method that generates frames in inverted temporal order with early-established endpoints to avoid exposure bias (error accumulation over iterations). Finally, we show that existing video diffusion models can be finetuned with FramePack, and their visual quality may be improved because the next-frame prediction supports more balanced diffusion schedulers with less extreme flow shift timesteps.*
> [!TIP] <Tip>
> Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## Available models ## Available models

View File

@@ -16,12 +16,15 @@
[HiDream-I1](https://huggingface.co/HiDream-ai) by HiDream.ai [HiDream-I1](https://huggingface.co/HiDream-ai) by HiDream.ai
> [!TIP] <Tip>
> [Caching](../../optimization/cache) may also speed up inference by storing and reusing intermediate outputs.
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
</Tip>
## Available models ## Available models
The following models are available for the [`HiDreamImagePipeline`] pipeline: The following models are available for the [`HiDreamImagePipeline`](text-to-image) pipeline:
| Model name | Description | | Model name | Description |
|:---|:---| |:---|:---|

View File

@@ -54,7 +54,7 @@ pipeline_quant_config = PipelineQuantizationConfig(
"bnb_4bit_quant_type": "nf4", "bnb_4bit_quant_type": "nf4",
"bnb_4bit_compute_dtype": torch.bfloat16 "bnb_4bit_compute_dtype": torch.bfloat16
}, },
components_to_quantize="transformer" components_to_quantize=["transformer"]
) )
pipeline = HunyuanVideoPipeline.from_pretrained( pipeline = HunyuanVideoPipeline.from_pretrained(
@@ -91,7 +91,7 @@ pipeline_quant_config = PipelineQuantizationConfig(
"bnb_4bit_quant_type": "nf4", "bnb_4bit_quant_type": "nf4",
"bnb_4bit_compute_dtype": torch.bfloat16 "bnb_4bit_compute_dtype": torch.bfloat16
}, },
components_to_quantize="transformer" components_to_quantize=["transformer"]
) )
pipeline = HunyuanVideoPipeline.from_pretrained( pipeline = HunyuanVideoPipeline.from_pretrained(
@@ -139,7 +139,7 @@ export_to_video(video, "output.mp4", fps=15)
"bnb_4bit_quant_type": "nf4", "bnb_4bit_quant_type": "nf4",
"bnb_4bit_compute_dtype": torch.bfloat16 "bnb_4bit_compute_dtype": torch.bfloat16
}, },
components_to_quantize="transformer" components_to_quantize=["transformer"]
) )
pipeline = HunyuanVideoPipeline.from_pretrained( pipeline = HunyuanVideoPipeline.from_pretrained(

View File

@@ -1,120 +0,0 @@
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. -->
# HunyuanVideo-1.5
HunyuanVideo-1.5 is a lightweight yet powerful video generation model that achieves state-of-the-art visual quality and motion coherence with only 8.3 billion parameters, enabling efficient inference on consumer-grade GPUs. This achievement is built upon several key components, including meticulous data curation, an advanced DiT architecture with selective and sliding tile attention (SSTA), enhanced bilingual understanding through glyph-aware text encoding, progressive pre-training and post-training, and an efficient video super-resolution network. Leveraging these designs, we developed a unified framework capable of high-quality text-to-video and image-to-video generation across multiple durations and resolutions. Extensive experiments demonstrate that this compact and proficient model establishes a new state-of-the-art among open-source models.
You can find all the original HunyuanVideo checkpoints under the [Tencent](https://huggingface.co/tencent) organization.
> [!TIP]
> Click on the HunyuanVideo models in the right sidebar for more examples of video generation tasks.
>
> The examples below use a checkpoint from [hunyuanvideo-community](https://huggingface.co/hunyuanvideo-community) because the weights are stored in a layout compatible with Diffusers.
The example below demonstrates how to generate a video optimized for memory or inference speed.
<hfoptions id="usage">
<hfoption id="memory">
Refer to the [Reduce memory usage](../../optimization/memory) guide for more details about the various memory saving techniques.
```py
import torch
from diffusers import AutoModel, HunyuanVideo15Pipeline
from diffusers.utils import export_to_video
pipeline = HunyuanVideo15Pipeline.from_pretrained(
"HunyuanVideo-1.5-Diffusers-480p_t2v",
torch_dtype=torch.bfloat16,
)
# model-offloading and tiling
pipeline.enable_model_cpu_offload()
pipeline.vae.enable_tiling()
prompt = "A fluffy teddy bear sits on a bed of soft pillows surrounded by children's toys."
video = pipeline(prompt=prompt, num_frames=61, num_inference_steps=30).frames[0]
export_to_video(video, "output.mp4", fps=15)
```
## Notes
- HunyuanVideo1.5 use attention masks with variable-length sequences. For best performance, we recommend using an attention backend that handles padding efficiently.
- **H100/H800:** `_flash_3_hub` or `_flash_3_varlen_hub`
- **A100/A800/RTX 4090:** `flash_hub` or `flash_varlen_hub`
- **Other GPUs:** `sage_hub`
Refer to the [Attention backends](../../optimization/attention_backends) guide for more details about using a different backend.
```py
pipe.transformer.set_attention_backend("flash_hub") # or your preferred backend
```
- [`HunyuanVideo15Pipeline`] use guider and does not take `guidance_scale` parameter at runtime.
You can check the default guider configuration using `pipe.guider`:
```py
>>> pipe.guider
ClassifierFreeGuidance {
"_class_name": "ClassifierFreeGuidance",
"_diffusers_version": "0.36.0.dev0",
"enabled": true,
"guidance_rescale": 0.0,
"guidance_scale": 6.0,
"start": 0.0,
"stop": 1.0,
"use_original_formulation": false
}
State:
step: None
num_inference_steps: None
timestep: None
count_prepared: 0
enabled: True
num_conditions: 2
```
To update guider configuration, you can run `pipe.guider = pipe.guider.new(...)`
```py
pipe.guider = pipe.guider.new(guidance_scale=5.0)
```
Read more on Guider [here](../../modular_diffusers/guiders).
## HunyuanVideo15Pipeline
[[autodoc]] HunyuanVideo15Pipeline
- all
- __call__
## HunyuanVideo15ImageToVideoPipeline
[[autodoc]] HunyuanVideo15ImageToVideoPipeline
- all
- __call__
## HunyuanVideo15PipelineOutput
[[autodoc]] pipelines.hunyuan_video1_5.pipeline_output.HunyuanVideo15PipelineOutput

Some files were not shown because too many files have changed in this diff Show More