add rest of the lora loader mixins to the docs.

2026-03-05 00:00:50 +08:00 · 2024-12-16 08:32:25 +05:30
2124 changed files with 52066 additions and 325096 deletions
--- a/.github/ISSUE_TEMPLATE/remote-vae-pilot-feedback.yml
+++ b/.github/ISSUE_TEMPLATE/remote-vae-pilot-feedback.yml
@@ -1,38 +0,0 @@
 name: "\U0001F31F Remote VAE"
 description: Feedback for remote VAE pilot
 labels: [ "Remote VAE" ]
 body:
  - type: textarea
    id: positive
    validations:
      required: true
    attributes:
      label: Did you like the remote VAE solution?
      description: |
        If you liked it, we would appreciate it if you could elaborate what you liked.
  - type: textarea
    id: feedback
    validations:
      required: true
    attributes:
      label: What can be improved about the current solution?
      description: |
        Let us know the things you would like to see improved. Note that we will work optimizing the solution once the pilot is over and we have usage.
  - type: textarea
    id: others
    validations:
      required: true
    attributes:
      label: What other VAEs you would like to see if the pilot goes well?
      description: |
        Provide a list of the VAEs you would like to see in the future if the pilot goes well.
  - type: textarea
    id: additional-info
    attributes:
      label: Notify the members of the team
      description: |
        Tag the following folks when submitting this feedback: @hlky @sayakpaul
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -7,28 +7,27 @@ on:
 env:
  DIFFUSERS_IS_CI: yes
-  HF_XET_HIGH_PERFORMANCE: 1
+  HF_HUB_ENABLE_HF_TRANSFER: 1
  HF_HOME: /mnt/cache
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
  BASE_PATH: benchmark_outputs
 jobs:
-  torch_models_cuda_benchmark_tests:
+  torch_pipelines_cuda_benchmark_tests:
    env:
      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_BENCHMARK }}
-    name: Torch Core Models CUDA Benchmarking Tests
+    name: Torch Core Pipelines CUDA Benchmarking Tests
    strategy:
      fail-fast: false
      max-parallel: 1
    runs-on:
-      group: aws-g6e-4xlarge
+      group: aws-g6-4xlarge-plus
    container:
-      image: diffusers/diffusers-pytorch-cuda
+      image: diffusers/diffusers-pytorch-compile-cuda
-      options: --shm-size "16gb" --ipc host --gpus all
+      options: --shm-size "16gb" --ipc host --gpus 0
    steps:
      - name: Checkout diffusers
-        uses: actions/checkout@v6
+        uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: NVIDIA-SMI
@@ -36,46 +35,26 @@ jobs:
          nvidia-smi
      - name: Install dependencies
        run: |
-          apt update
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          apt install -y libpq-dev postgresql-client
+          python -m uv pip install -e [quality,test]
-          uv pip install -e ".[quality]"
+          python -m uv pip install pandas peft
          uv pip install -r benchmarks/requirements.txt
      - name: Environment
        run: |
          python utils/print_env.py
      - name: Diffusers Benchmarking
        env:
-          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+            HF_TOKEN: ${{ secrets.DIFFUSERS_BOT_TOKEN }}
            BASE_PATH: benchmark_outputs
        run: |
-          cd benchmarks && python run_all.py
+          export TOTAL_GPU_MEMORY=$(python -c "import torch; print(torch.cuda.get_device_properties(0).total_memory / (1024**3))")
-
+          cd benchmarks && mkdir ${BASE_PATH} && python run_all.py && python push_results.py
      - name: Push results to the Hub
        env: 
          HF_TOKEN: ${{ secrets.DIFFUSERS_BOT_TOKEN }}
        run: |
          cd benchmarks && python push_results.py
          mkdir $BASE_PATH && cp *.csv $BASE_PATH
      - name: Test suite reports artifacts
        if: ${{ always() }}
-        uses: actions/upload-artifact@v6
+        uses: actions/upload-artifact@v4
        with:
          name: benchmark_test_reports
-          path: benchmarks/${{ env.BASE_PATH }}
+          path: benchmarks/benchmark_outputs
      # TODO: enable this once the connection problem has been resolved.
      - name: Update benchmarking results to DB
        env:
          PGDATABASE: metrics
          PGHOST: ${{ secrets.DIFFUSERS_BENCHMARKS_PGHOST }}
          PGUSER: transformers_benchmarks
          PGPASSWORD: ${{ secrets.DIFFUSERS_BENCHMARKS_PGPASSWORD }}
          BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
        run: |
          git config --global --add safe.directory /__w/diffusers/diffusers
          commit_id=$GITHUB_SHA
          commit_msg=$(git show -s --format=%s "$commit_id" | cut -c1-70)
          cd benchmarks && python populate_into_db.py "$BRANCH_NAME" "$commit_id" "$commit_msg"
      - name: Report success status
        if: ${{ success() }}
--- a/.github/workflows/build_docker_images.yml
+++ b/.github/workflows/build_docker_images.yml
@@ -28,53 +28,25 @@ jobs:
        uses: docker/setup-buildx-action@v1
      - name: Check out code
-        uses: actions/checkout@v6
+        uses: actions/checkout@v3
      - name: Find Changed Dockerfiles
        id: file_changes
        uses: jitterbit/get-changed-files@v1
        with:
-          format: "space-delimited"
+          format: 'space-delimited'
          token: ${{ secrets.GITHUB_TOKEN }}
      - name: Build Changed Docker Images
        env: 
          CHANGED_FILES: ${{ steps.file_changes.outputs.all }}
        run: |
-          echo "$CHANGED_FILES"
+          CHANGED_FILES="${{ steps.file_changes.outputs.all }}"
          ALLOWED_IMAGES=(
            diffusers-pytorch-cpu
            diffusers-pytorch-cuda
            diffusers-pytorch-xformers-cuda
            diffusers-pytorch-minimum-cuda
            diffusers-doc-builder
          )
          declare -A IMAGES_TO_BUILD=()
          for FILE in $CHANGED_FILES; do
-            # skip anything that isn't still on disk
+            if [[ "$FILE" == docker/*Dockerfile ]]; then
-            if [[ ! -e "$FILE" ]]; then
+              DOCKER_PATH="${FILE%/Dockerfile}"
-              echo "Skipping removed file $FILE"
+              DOCKER_TAG=$(basename "$DOCKER_PATH")
-              continue
+              echo "Building Docker image for $DOCKER_TAG"
              docker build -t "$DOCKER_TAG" "$DOCKER_PATH"
            fi
            for IMAGE in "${ALLOWED_IMAGES[@]}"; do
              if [[ "$FILE" == docker/${IMAGE}/* ]]; then
                IMAGES_TO_BUILD["$IMAGE"]=1
              fi
            done
          done
          if [[ ${#IMAGES_TO_BUILD[@]} -eq 0 ]]; then
            echo "No relevant Docker changes detected."
            exit 0
          fi
          for IMAGE in "${!IMAGES_TO_BUILD[@]}"; do
            DOCKER_PATH="docker/${IMAGE}"
            echo "Building Docker image for $IMAGE"
            docker build -t "$IMAGE" "$DOCKER_PATH"
          done
        if: steps.file_changes.outputs.all != ''
@@ -93,13 +65,17 @@ jobs:
        image-name:
          - diffusers-pytorch-cpu
          - diffusers-pytorch-cuda
          - diffusers-pytorch-compile-cuda
          - diffusers-pytorch-xformers-cuda
-          - diffusers-pytorch-minimum-cuda
+          - diffusers-flax-cpu
          - diffusers-flax-tpu
          - diffusers-onnxruntime-cpu
          - diffusers-onnxruntime-cuda
          - diffusers-doc-builder
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v3
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v1
      - name: Login to Docker Hub
--- a/.github/workflows/build_pr_documentation.yml
+++ b/.github/workflows/build_pr_documentation.yml
@@ -12,33 +12,7 @@ concurrency:
  cancel-in-progress: true
 jobs:
  check-links:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: '3.10'
      - name: Install uv
        run: |
          curl -LsSf https://astral.sh/uv/install.sh | sh
          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
      - name: Install doc-builder
        run: |
          uv pip install --system git+https://github.com/huggingface/doc-builder.git@main
      - name: Check documentation links
        run: |
          uv run doc-builder check-links docs/source/en
  build:
    needs: check-links
    uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
    with:
      commit_sha: ${{ github.event.pull_request.head.sha }}
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -1,22 +0,0 @@
 ---
 name: CodeQL Security Analysis For Github Actions
 on:
  push:
    branches: ["main"]
  workflow_dispatch:
  # pull_request:
 jobs:
  codeql:
    name: CodeQL Analysis
    uses: huggingface/security-workflows/.github/workflows/codeql-reusable.yml@v1
    permissions:
      security-events: write
      packages: read
      actions: read
      contents: read
    with:
      languages: '["actions","python"]'
      queries: 'security-extended,security-and-quality'
      runner: 'ubuntu-latest' #optional if need custom runner
--- a/.github/workflows/mirror_community_pipeline.yml
+++ b/.github/workflows/mirror_community_pipeline.yml
@@ -24,6 +24,7 @@ jobs:
  mirror_community_pipeline:
    env:
      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_COMMUNITY_MIRROR }}
    runs-on: ubuntu-22.04
    steps:
      # Checkout to correct ref
@@ -38,58 +39,54 @@ jobs:
      #     If ref is 'refs/heads/main' => set 'main'
      #     Else it must be a tag => set {tag}
      - name: Set checkout_ref and path_in_repo
        env:
          EVENT_NAME: ${{ github.event_name }}
          EVENT_INPUT_REF: ${{ github.event.inputs.ref }}
          GITHUB_REF: ${{ github.ref }}
        run: |
-          if [ "$EVENT_NAME" == "workflow_dispatch" ]; then
+          if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
-            if [ -z "$EVENT_INPUT_REF" ]; then
+            if [ -z "${{ github.event.inputs.ref }}" ]; then
              echo "Error: Missing ref input"
              exit 1
-            elif [ "$EVENT_INPUT_REF" == "main" ]; then
+            elif [ "${{ github.event.inputs.ref }}" == "main" ]; then
              echo "CHECKOUT_REF=refs/heads/main" >> $GITHUB_ENV
              echo "PATH_IN_REPO=main" >> $GITHUB_ENV
            else
-              echo "CHECKOUT_REF=refs/tags/$EVENT_INPUT_REF" >> $GITHUB_ENV
+              echo "CHECKOUT_REF=refs/tags/${{ github.event.inputs.ref }}" >> $GITHUB_ENV
-              echo "PATH_IN_REPO=$EVENT_INPUT_REF" >> $GITHUB_ENV
+              echo "PATH_IN_REPO=${{ github.event.inputs.ref }}" >> $GITHUB_ENV
            fi
-          elif [ "$GITHUB_REF" == "refs/heads/main" ]; then
+          elif [ "${{ github.ref }}" == "refs/heads/main" ]; then
-            echo "CHECKOUT_REF=$GITHUB_REF" >> $GITHUB_ENV
+            echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
            echo "PATH_IN_REPO=main" >> $GITHUB_ENV
          else
            # e.g. refs/tags/v0.28.1 -> v0.28.1
-            echo "CHECKOUT_REF=$GITHUB_REF" >> $GITHUB_ENV
+            echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
-            echo "PATH_IN_REPO=$(echo $GITHUB_REF | sed 's/^refs\/tags\///')" >> $GITHUB_ENV
+            echo "PATH_IN_REPO=$(echo ${{ github.ref }} | sed 's/^refs\/tags\///')" >> $GITHUB_ENV
          fi
      - name: Print env vars
        run: |
          echo "CHECKOUT_REF: ${{ env.CHECKOUT_REF }}"
          echo "PATH_IN_REPO: ${{ env.PATH_IN_REPO }}"
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v3
        with:
          ref: ${{ env.CHECKOUT_REF }}
      # Setup + install dependencies
      - name: Set up Python
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v4
        with:
          python-version: "3.10"
      - name: Install dependencies
        run: |
-          pip install --upgrade pip
+          python -m pip install --upgrade pip
          pip install --upgrade huggingface_hub
      # Check secret is set
      - name: whoami
-        run: hf auth whoami
+        run: huggingface-cli whoami
        env:
            HF_TOKEN: ${{ secrets.HF_TOKEN_MIRROR_COMMUNITY_PIPELINES }}
      # Push to HF! (under subfolder based on checkout ref)
      # https://huggingface.co/datasets/diffusers/community-pipelines-mirror
      - name: Mirror community pipeline to HF
-        run: hf upload diffusers/community-pipelines-mirror ./examples/community ${PATH_IN_REPO} --repo-type dataset
+        run: huggingface-cli upload diffusers/community-pipelines-mirror ./examples/community ${PATH_IN_REPO} --repo-type dataset
        env:
            PATH_IN_REPO: ${{ env.PATH_IN_REPO }}
            HF_TOKEN: ${{ secrets.HF_TOKEN_MIRROR_COMMUNITY_PIPELINES }}
@@ -102,4 +99,4 @@ jobs:
      - name: Report failure status
        if: ${{ failure() }}
        run: |
-          pip install requests && python utils/notify_community_pipelines_mirror.py --status=failure
+          pip install requests && python utils/notify_community_pipelines_mirror.py --status=failure
--- a/.github/workflows/nightly_tests.yml
+++ b/.github/workflows/nightly_tests.yml
@@ -7,15 +7,14 @@ on:
 env:
  DIFFUSERS_IS_CI: yes
-  HF_XET_HIGH_PERFORMANCE: 1
+  HF_HUB_ENABLE_HF_TRANSFER: 1
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
  PYTEST_TIMEOUT: 600
  RUN_SLOW: yes
  RUN_NIGHTLY: yes
-  PIPELINE_USAGE_CUTOFF: 0
+  PIPELINE_USAGE_CUTOFF: 5000
  SLACK_API_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
  CONSOLIDATED_REPORT_PATH: consolidated_test_report.md
 jobs:
  setup_torch_cuda_pipeline_matrix:
@@ -28,7 +27,7 @@ jobs:
      pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
    steps:
      - name: Checkout diffusers
-        uses: actions/checkout@v6
+        uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: Install dependencies
@@ -44,7 +43,7 @@ jobs:
      - name: Pipeline Tests Artifacts
        if: ${{ always() }}
-        uses: actions/upload-artifact@v6
+        uses: actions/upload-artifact@v4
        with:
          name: test-pipelines.json
          path: reports
@@ -61,21 +60,20 @@ jobs:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-cuda
-      options: --shm-size "16gb" --ipc host --gpus all
+      options: --shm-size "16gb" --ipc host --gpus 0
    steps:
      - name: Checkout diffusers
-        uses: actions/checkout@v6
+        uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: NVIDIA-SMI
        run: nvidia-smi
      - name: Install dependencies
        run: |
-          uv pip install -e ".[quality]"
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
+          python -m uv pip install -e [quality,test]
-          #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+          pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
-          uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 
+          python -m uv pip install pytest-reportlog
          uv pip install pytest-reportlog
      - name: Environment
        run: |
          python utils/print_env.py
@@ -85,8 +83,8 @@ jobs:
          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
          CUBLAS_WORKSPACE_CONFIG: :16:8
        run: |
-          pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+          python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-             -k "not Flax and not Onnx" \
+            -s -v -k "not Flax and not Onnx" \
            --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
            --report-log=tests_pipeline_${{ matrix.module }}_cuda.log \
            tests/pipelines/${{ matrix.module }}
@@ -97,10 +95,15 @@ jobs:
          cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
      - name: Test suite reports artifacts
        if: ${{ always() }}
-        uses: actions/upload-artifact@v6
+        uses: actions/upload-artifact@v4
        with:
          name: pipeline_${{ matrix.module }}_test_reports
          path: reports
      - name: Generate Report and Notify Channel
        if: always()
        run: |
          pip install slack_sdk tabulate
          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
  run_nightly_tests_for_other_torch_modules:
    name: Nightly Torch CUDA Tests
@@ -108,7 +111,7 @@ jobs:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-cuda
-      options: --shm-size "16gb" --ipc host --gpus all
+      options: --shm-size "16gb" --ipc host --gpus 0
    defaults:
      run:
        shell: bash
@@ -119,18 +122,17 @@ jobs:
        module: [models, schedulers, lora, others, single_file, examples]
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 2
    - name: Install dependencies
      run: |
-        uv pip install -e ".[quality]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-        uv pip install peft@git+https://github.com/huggingface/peft.git
+        python -m uv pip install -e [quality,test]
-        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
+        python -m uv pip install peft@git+https://github.com/huggingface/peft.git
-        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+        pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
-        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 
+        python -m uv pip install pytest-reportlog
        uv pip install pytest-reportlog
    - name: Environment
      run: python utils/print_env.py
@@ -141,8 +143,8 @@ jobs:
        # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
        CUBLAS_WORKSPACE_CONFIG: :16:8
      run: |
-        pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-          -k "not Flax and not Onnx" \
+          -s -v -k "not Flax and not Onnx" \
          --make-reports=tests_torch_${{ matrix.module }}_cuda \
          --report-log=tests_torch_${{ matrix.module }}_cuda.log \
          tests/${{ matrix.module }}
@@ -154,8 +156,8 @@ jobs:
        # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
        CUBLAS_WORKSPACE_CONFIG: :16:8
      run: |
-        pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-          --make-reports=examples_torch_cuda \
+          -s -v --make-reports=examples_torch_cuda \
          --report-log=examples_torch_cuda.log \
          examples/
@@ -167,54 +169,16 @@ jobs:
    - name: Test suite reports artifacts
      if: ${{ always() }}
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
      with:
        name: torch_${{ matrix.module }}_cuda_test_reports
        path: reports
-  run_torch_compile_tests:
+    - name: Generate Report and Notify Channel
-    name: PyTorch Compile CUDA tests
+      if: always()
    runs-on:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --gpus all --shm-size "16gb" --ipc host
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v6
      with:
        fetch-depth: 2
    - name: NVIDIA-SMI
      run: |
-        nvidia-smi
+        pip install slack_sdk tabulate
-    - name: Install dependencies
+        python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
      run: |
        uv pip install -e ".[quality,training]"
        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 
    - name: Environment
      run: |
        python utils/print_env.py
    - name: Run torch compile tests on GPU
      env:
        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
        RUN_COMPILE: yes
      run: |
        pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "compile" --make-reports=tests_torch_compile_cuda tests/
    - name: Failure short reports
      if: ${{ failure() }}
      run: cat reports/tests_torch_compile_cuda_failures_short.txt
    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      with:
        name: torch_compile_test_reports
        path: reports
  run_big_gpu_torch_tests:
    name: Torch tests on big GPU
@@ -225,22 +189,21 @@ jobs:
      group: aws-g6e-xlarge-plus
    container:
      image: diffusers/diffusers-pytorch-cuda
-      options: --shm-size "16gb" --ipc host --gpus all
+      options: --shm-size "16gb" --ipc host --gpus 0
    steps:
      - name: Checkout diffusers
-        uses: actions/checkout@v6
+        uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: NVIDIA-SMI
        run: nvidia-smi
      - name: Install dependencies
        run: |
-          uv pip install -e ".[quality]"
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          uv pip install peft@git+https://github.com/huggingface/peft.git
+          python -m uv pip install -e [quality,test]
-          uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
+          python -m uv pip install peft@git+https://github.com/huggingface/peft.git
-          #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+          pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
-          uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 
+          python -m uv pip install pytest-reportlog
          uv pip install pytest-reportlog
      - name: Environment
        run: |
          python utils/print_env.py
@@ -251,8 +214,8 @@ jobs:
          CUBLAS_WORKSPACE_CONFIG: :16:8
          BIG_GPU_MEMORY: 40
        run: |
-          pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+          python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-            -m "big_accelerator" \
+            -m "big_gpu_with_torch_cuda" \
            --make-reports=tests_big_gpu_torch_cuda \
            --report-log=tests_big_gpu_torch_cuda.log \
            tests/
@@ -263,113 +226,155 @@ jobs:
          cat reports/tests_big_gpu_torch_cuda_failures_short.txt
      - name: Test suite reports artifacts
        if: ${{ always() }}
-        uses: actions/upload-artifact@v6
+        uses: actions/upload-artifact@v4
        with:
          name: torch_cuda_big_gpu_test_reports
          path: reports
      - name: Generate Report and Notify Channel
        if: always()
        run: |
          pip install slack_sdk tabulate
          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
-  torch_minimum_version_cuda_tests:
+  run_flax_tpu_tests:
-    name: Torch Minimum Version CUDA Tests
+    name: Nightly Flax TPU Tests
    runs-on:
-      group: aws-g4dn-2xlarge
+      group: gcp-ct5lp-hightpu-8t
    if: github.event_name == 'schedule'
    container:
-      image: diffusers/diffusers-pytorch-minimum-cuda
+      image: diffusers/diffusers-flax-tpu
-      options: --shm-size "16gb" --ipc host --gpus all
+      options: --shm-size "16gb" --ipc host --privileged ${{ vars.V5_LITEPOD_8_ENV}} -v /mnt/hf_cache:/mnt/hf_cache
    defaults:
      run:
        shell: bash
    steps:
-      - name: Checkout diffusers
+    - name: Checkout diffusers
-        uses: actions/checkout@v6
+      uses: actions/checkout@v3
-        with:
+      with:
-          fetch-depth: 2
+        fetch-depth: 2
-      - name: Install dependencies
+    - name: Install dependencies
-        run: |
+      run: |
-          uv pip install -e ".[quality]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          uv pip install peft@git+https://github.com/huggingface/peft.git
+        python -m uv pip install -e [quality,test]
-          uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
+        pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
-          #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+        python -m uv pip install pytest-reportlog
          uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 
-      - name: Environment
+    - name: Environment
-        run: |
+      run: python utils/print_env.py
          python utils/print_env.py
-      - name: Run PyTorch CUDA tests
+    - name: Run nightly Flax TPU tests
-        env:
+      env:
-          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
-          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
+      run: |
-          CUBLAS_WORKSPACE_CONFIG: :16:8
+        python -m pytest -n 0 \
-        run: |
+          -s -v -k "Flax" \
-          pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+          --make-reports=tests_flax_tpu \
-            -k "not Flax and not Onnx" \
+          --report-log=tests_flax_tpu.log \
-            --make-reports=tests_torch_minimum_version_cuda \
+          tests/
            tests/models/test_modeling_common.py \
            tests/pipelines/test_pipelines_common.py \
            tests/pipelines/test_pipeline_utils.py \
            tests/pipelines/test_pipelines.py \
            tests/pipelines/test_pipelines_auto.py \
            tests/schedulers/test_schedulers.py \
            tests/others
-      - name: Failure short reports
+    - name: Failure short reports
-        if: ${{ failure() }}
+      if: ${{ failure() }}
-        run: |
+      run: |
-          cat reports/tests_torch_minimum_version_cuda_stats.txt
+        cat reports/tests_flax_tpu_stats.txt
-          cat reports/tests_torch_minimum_version_cuda_failures_short.txt
+        cat reports/tests_flax_tpu_failures_short.txt
-      - name: Test suite reports artifacts
+    - name: Test suite reports artifacts
-        if: ${{ always() }}
+      if: ${{ always() }}
-        uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
-        with:
+      with:
-          name: torch_minimum_version_cuda_test_reports
+        name: flax_tpu_test_reports
-          path: reports
+        path: reports
    - name: Generate Report and Notify Channel
      if: always()
      run: |
        pip install slack_sdk tabulate
        python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
  run_nightly_onnx_tests:
    name: Nightly ONNXRuntime CUDA tests on Ubuntu
    runs-on:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-onnxruntime-cuda
      options: --gpus 0 --shm-size "16gb" --ipc host
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v3
      with:
        fetch-depth: 2
    - name: NVIDIA-SMI
      run: nvidia-smi
    - name: Install dependencies
      run: |
        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m uv pip install -e [quality,test]
        pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
        python -m uv pip install pytest-reportlog
    - name: Environment
      run: python utils/print_env.py
    - name: Run Nightly ONNXRuntime CUDA tests
      env:
        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
      run: |
        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
          -s -v -k "Onnx" \
          --make-reports=tests_onnx_cuda \
          --report-log=tests_onnx_cuda.log \
          tests/
    - name: Failure short reports
      if: ${{ failure() }}
      run: |
        cat reports/tests_onnx_cuda_stats.txt
        cat reports/tests_onnx_cuda_failures_short.txt
    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v4
      with:
        name: tests_onnx_cuda_reports
        path: reports
    - name: Generate Report and Notify Channel
      if: always()
      run: |
        pip install slack_sdk tabulate
        python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
  run_nightly_quantization_tests:
    name: Torch quantization nightly tests
    strategy:
      fail-fast: false
      max-parallel: 2
-      matrix:
+      matrix: 
        config:
          - backend: "bitsandbytes"
            test_location: "bnb"
            additional_deps: ["peft"]
          - backend: "gguf"
            test_location: "gguf"
            additional_deps: ["peft", "kernels"]
          - backend: "torchao"
            test_location: "torchao"
            additional_deps: []
          - backend: "optimum_quanto"
            test_location: "quanto"
            additional_deps: []
          - backend: "nvidia_modelopt"
            test_location: "modelopt"
            additional_deps: []
    runs-on:
      group: aws-g6e-xlarge-plus
    container:
      image: diffusers/diffusers-pytorch-cuda
-      options: --shm-size "20gb" --ipc host --gpus all
+      options: --shm-size "20gb" --ipc host --gpus 0
    steps:
      - name: Checkout diffusers
-        uses: actions/checkout@v6
+        uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: NVIDIA-SMI
        run: nvidia-smi
      - name: Install dependencies
        run: |
-          uv pip install -e ".[quality]"
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          uv pip install -U ${{ matrix.config.backend }}
+          python -m uv pip install -e [quality,test]
-          if [ "${{ join(matrix.config.additional_deps, ' ') }}" != "" ]; then
+          python -m uv pip install -U ${{ matrix.config.backend }}
-              uv pip install ${{ join(matrix.config.additional_deps, ' ') }}
+          python -m uv pip install pytest-reportlog
          fi
          uv pip install pytest-reportlog
          #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
          uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 
      - name: Environment
        run: |
          python utils/print_env.py
@@ -380,7 +385,7 @@ jobs:
          CUBLAS_WORKSPACE_CONFIG: :16:8
          BIG_GPU_MEMORY: 40
        run: |
-          pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+          python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
            --make-reports=tests_${{ matrix.config.backend }}_torch_cuda \
            --report-log=tests_${{ matrix.config.backend }}_torch_cuda.log \
            tests/quantization/${{ matrix.config.test_location }}
@@ -391,119 +396,15 @@ jobs:
          cat reports/tests_${{ matrix.config.backend }}_torch_cuda_failures_short.txt
      - name: Test suite reports artifacts
        if: ${{ always() }}
-        uses: actions/upload-artifact@v6
+        uses: actions/upload-artifact@v4
        with:
          name: torch_cuda_${{ matrix.config.backend }}_reports
          path: reports
-          
+      - name: Generate Report and Notify Channel
-  run_nightly_pipeline_level_quantization_tests:
+        if: always()
    name: Torch quantization nightly tests
    strategy:
      fail-fast: false
      max-parallel: 2
    runs-on:
      group: aws-g6e-xlarge-plus
    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --shm-size "20gb" --ipc host --gpus all
    steps:
      - name: Checkout diffusers
        uses: actions/checkout@v6
        with:
          fetch-depth: 2
      - name: NVIDIA-SMI
        run: nvidia-smi
      - name: Install dependencies
        run: |
          uv pip install -e ".[quality]"
          uv pip install -U bitsandbytes optimum_quanto
          #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
          uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 
          uv pip install pytest-reportlog
      - name: Environment
        run: |
          python utils/print_env.py
      - name: Pipeline-level quantization tests on GPU
        env:
          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
          CUBLAS_WORKSPACE_CONFIG: :16:8
          BIG_GPU_MEMORY: 40
        run: |
          pytest -n 1 --max-worker-restart=0 --dist=loadfile \
            --make-reports=tests_pipeline_level_quant_torch_cuda \
            --report-log=tests_pipeline_level_quant_torch_cuda.log \
            tests/quantization/test_pipeline_level_quantization.py
      - name: Failure short reports
        if: ${{ failure() }}
        run: |
          cat reports/tests_pipeline_level_quant_torch_cuda_stats.txt
          cat reports/tests_pipeline_level_quant_torch_cuda_failures_short.txt
      - name: Test suite reports artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@v6
        with:
          name: torch_cuda_pipeline_level_quant_reports
          path: reports
  generate_consolidated_report:
    name: Generate Consolidated Test Report
    needs: [
      run_nightly_tests_for_torch_pipelines,
      run_nightly_tests_for_other_torch_modules,
      run_torch_compile_tests,
      run_big_gpu_torch_tests,
      run_nightly_quantization_tests,
      run_nightly_pipeline_level_quantization_tests,
      # run_nightly_onnx_tests,
      torch_minimum_version_cuda_tests,
      # run_flax_tpu_tests
    ]
    if: always()
    runs-on:
      group: aws-general-8-plus
    container:
      image: diffusers/diffusers-pytorch-cpu
    steps:
      - name: Checkout diffusers
        uses: actions/checkout@v6
        with:
          fetch-depth: 2
      - name: Create reports directory
        run: mkdir -p combined_reports
      - name: Download all test reports
        uses: actions/download-artifact@v7
        with:
          path: artifacts
      - name: Prepare reports
        run: |
          # Move all report files to a single directory for processing
          find artifacts -name "*.txt" -exec cp {} combined_reports/ \;
      - name: Install dependencies
        run: |
          pip install -e .[test]
          pip install slack_sdk tabulate
-
+          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
      - name: Generate consolidated report
        run: |
          python utils/consolidated_test_report.py \
            --reports_dir combined_reports \
            --output_file $CONSOLIDATED_REPORT_PATH \
            --slack_channel_name diffusers-ci-nightly
      - name: Show consolidated report
        run: |
          cat $CONSOLIDATED_REPORT_PATH >> $GITHUB_STEP_SUMMARY
      - name: Upload consolidated report
        uses: actions/upload-artifact@v6
        with:
          name: consolidated_test_report
          path: ${{ env.CONSOLIDATED_REPORT_PATH }}
 # M1 runner currently not well supported
 # TODO: (Dhruv) add these back when we setup better testing for Apple Silicon
@@ -514,7 +415,7 @@ jobs:
 #
 #    steps:
 #      - name: Checkout diffusers
-#        uses: actions/checkout@v6
+#        uses: actions/checkout@v3
 #        with:
 #          fetch-depth: 2
 #
@@ -530,11 +431,11 @@ jobs:
 #      - name: Install dependencies
 #        shell: arch -arch arm64 bash {0}
 #        run: |
-#          ${CONDA_RUN} pip install --upgrade pip uv
+#          ${CONDA_RUN} python -m pip install --upgrade pip uv
-#          ${CONDA_RUN} uv pip install -e ".[quality]"
+#          ${CONDA_RUN} python -m uv pip install -e [quality,test]
-#          ${CONDA_RUN} uv pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
+#          ${CONDA_RUN} python -m uv pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
-#          ${CONDA_RUN} uv pip install accelerate@git+https://github.com/huggingface/accelerate
+#          ${CONDA_RUN} python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate
-#          ${CONDA_RUN} uv pip install pytest-reportlog
+#          ${CONDA_RUN} python -m uv pip install pytest-reportlog
 #      - name: Environment
 #        shell: arch -arch arm64 bash {0}
 #        run: |
@@ -543,9 +444,9 @@ jobs:
 #        shell: arch -arch arm64 bash {0}
 #        env:
 #          HF_HOME: /System/Volumes/Data/mnt/cache
-#          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+#          HF_TOKEN: ${{ secrets.HF_TOKEN }}
 #        run: |
-#          ${CONDA_RUN} pytest -n 1  --make-reports=tests_torch_mps \
+#          ${CONDA_RUN} python -m pytest -n 1 -s -v --make-reports=tests_torch_mps \
 #            --report-log=tests_torch_mps.log \
 #            tests/
 #      - name: Failure short reports
@@ -554,7 +455,7 @@ jobs:
 #
 #      - name: Test suite reports artifacts
 #        if: ${{ always() }}
-#        uses: actions/upload-artifact@v6
+#        uses: actions/upload-artifact@v4
 #        with:
 #          name: torch_mps_test_reports
 #          path: reports
@@ -570,7 +471,7 @@ jobs:
 #
 #    steps:
 #      - name: Checkout diffusers
-#        uses: actions/checkout@v6
+#        uses: actions/checkout@v3
 #        with:
 #          fetch-depth: 2
 #
@@ -586,11 +487,11 @@ jobs:
 #      - name: Install dependencies
 #        shell: arch -arch arm64 bash {0}
 #        run: |
-#          ${CONDA_RUN} pip install --upgrade pip uv
+#          ${CONDA_RUN} python -m pip install --upgrade pip uv
-#          ${CONDA_RUN} uv pip install -e ".[quality]"
+#          ${CONDA_RUN} python -m uv pip install -e [quality,test]
-#          ${CONDA_RUN} uv pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
+#          ${CONDA_RUN} python -m uv pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
-#          ${CONDA_RUN} uv pip install accelerate@git+https://github.com/huggingface/accelerate
+#          ${CONDA_RUN} python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate
-#          ${CONDA_RUN} uv pip install pytest-reportlog
+#          ${CONDA_RUN} python -m uv pip install pytest-reportlog
 #      - name: Environment
 #        shell: arch -arch arm64 bash {0}
 #        run: |
@@ -599,9 +500,9 @@ jobs:
 #        shell: arch -arch arm64 bash {0}
 #        env:
 #          HF_HOME: /System/Volumes/Data/mnt/cache
-#          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+#          HF_TOKEN: ${{ secrets.HF_TOKEN }}
 #        run: |
-#          ${CONDA_RUN} pytest -n 1  --make-reports=tests_torch_mps \
+#          ${CONDA_RUN} python -m pytest -n 1 -s -v --make-reports=tests_torch_mps \
 #            --report-log=tests_torch_mps.log \
 #            tests/
 #      - name: Failure short reports
@@ -610,7 +511,7 @@ jobs:
 #
 #      - name: Test suite reports artifacts
 #        if: ${{ always() }}
-#        uses: actions/upload-artifact@v6
+#        uses: actions/upload-artifact@v4
 #        with:
 #          name: torch_mps_test_reports
 #          path: reports
--- a/.github/workflows/notify_slack_about_release.yml
+++ b/.github/workflows/notify_slack_about_release.yml
@@ -10,10 +10,10 @@ jobs:
    runs-on: ubuntu-22.04
    steps:
-    - uses: actions/checkout@v6
+    - uses: actions/checkout@v3
    - name: Setup Python
-      uses: actions/setup-python@v6
+      uses: actions/setup-python@v4
      with:
        python-version: '3.8'
--- a/.github/workflows/pr_dependency_test.yml
+++ b/.github/workflows/pr_dependency_test.yml
@@ -18,15 +18,18 @@ jobs:
  check_dependencies:
    runs-on: ubuntu-22.04
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v3
      - name: Set up Python
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v4
        with:
          python-version: "3.8"
      - name: Install dependencies
        run: |
-          pip install -e .
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          pip install pytest
+          python -m pip install --upgrade pip uv
          python -m uv pip install -e .
          python -m uv pip install pytest
      - name: Check for soft dependencies
        run: |
-            pytest tests/others/test_dependencies.py
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
          pytest tests/others/test_dependencies.py
--- a/.github/workflows/pr_flax_dependency_test.yml
+++ b/.github/workflows/pr_flax_dependency_test.yml
@@ -0,0 +1,38 @@
 name: Run Flax dependency tests
 on:
  pull_request:
    branches:
      - main
    paths:
      - "src/diffusers/**.py"
  push:
    branches:
      - main
 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true
 jobs:
  check_flax_dependencies:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v3
      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: "3.8"
      - name: Install dependencies
        run: |
          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
          python -m pip install --upgrade pip uv
          python -m uv pip install -e .
          python -m uv pip install "jax[cpu]>=0.2.16,!=0.3.2"
          python -m uv pip install "flax>=0.4.1"
          python -m uv pip install "jaxlib>=0.1.65"
          python -m uv pip install pytest
      - name: Check for soft dependencies
        run: |
          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
          pytest tests/others/test_dependencies.py
--- a/.github/workflows/pr_modular_tests.yml
+++ b/.github/workflows/pr_modular_tests.yml
@@ -1,127 +0,0 @@
 name: Fast PR tests for Modular
 on:
  pull_request:
    branches: [main]
    paths:
      - "src/diffusers/modular_pipelines/**.py"
      - "src/diffusers/models/modeling_utils.py"
      - "src/diffusers/models/model_loading_utils.py"
      - "src/diffusers/pipelines/pipeline_utils.py"
      - "src/diffusers/pipeline_loading_utils.py"
      - "src/diffusers/loaders/lora_base.py"
      - "src/diffusers/loaders/lora_pipeline.py"
      - "src/diffusers/loaders/peft.py"
      - "tests/modular_pipelines/**.py"
      - ".github/**.yml"
      - "utils/**.py"
      - "setup.py"
  push:
    branches:
      - ci-*
 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true
 env:
  DIFFUSERS_IS_CI: yes
  HF_XET_HIGH_PERFORMANCE: 1
  OMP_NUM_THREADS: 4
  MKL_NUM_THREADS: 4
  PYTEST_TIMEOUT: 60
 jobs:
  check_code_quality:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v6
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.10"
      - name: Install dependencies
        run: |
          pip install --upgrade pip
          pip install .[quality]
      - name: Check quality
        run: make quality
      - name: Check if failure
        if: ${{ failure() }}
        run: |
          echo "Quality check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make style && make quality'" >> $GITHUB_STEP_SUMMARY
  check_repository_consistency:
    needs: check_code_quality
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v6
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.10"
      - name: Install dependencies
        run: |
          pip install --upgrade pip
          pip install .[quality]
      - name: Check repo consistency
        run: |
          python utils/check_copies.py
          python utils/check_dummies.py
          python utils/check_support_list.py
          make deps_table_check_updated
      - name: Check if failure
        if: ${{ failure() }}
        run: |
          echo "Repo consistency check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make fix-copies'" >> $GITHUB_STEP_SUMMARY
  run_fast_tests:
    needs: [check_code_quality, check_repository_consistency]
    name: Fast PyTorch Modular Pipeline CPU tests
    runs-on:
      group: aws-highmemory-32-plus
    container:
      image: diffusers/diffusers-pytorch-cpu
      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
    defaults:
      run:
        shell: bash
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v6
      with:
        fetch-depth: 2
    - name: Install dependencies
      run: |
        uv pip install -e ".[quality]"
        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
    - name: Environment
      run: |
        python utils/print_env.py
    - name: Run fast PyTorch Pipeline CPU tests
      run: |
        pytest -n 8 --max-worker-restart=0 --dist=loadfile \
          -k "not Flax and not Onnx" \
          --make-reports=tests_torch_cpu_modular_pipelines \
          tests/modular_pipelines
    - name: Failure short reports
      if: ${{ failure() }}
      run: cat reports/tests_torch_cpu_modular_pipelines_failures_short.txt
    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      with:
        name: pr_pytorch_pipelines_torch_cpu_modular_pipelines_test_reports
        path: reports
--- a/.github/workflows/pr_style_bot.yml
+++ b/.github/workflows/pr_style_bot.yml
@@ -1,17 +0,0 @@
 name: PR Style Bot
 on:
  issue_comment:
    types: [created]
 permissions:
  contents: write
  pull-requests: write
 jobs:
  style:
    uses: huggingface/huggingface_hub/.github/workflows/style-bot-action.yml@main
    with:
      python_quality_dependencies: "[quality]"
    secrets:
      bot_token: ${{ secrets.HF_STYLE_BOT_ACTION }}
--- a/.github/workflows/pr_test_fetcher.yml
+++ b/.github/workflows/pr_test_fetcher.yml
@@ -28,12 +28,13 @@ jobs:
      test_map: ${{ steps.set_matrix.outputs.test_map }}
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 0
    - name: Install dependencies
      run: |
-        uv pip install -e ".[quality]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m uv pip install -e [quality,test]
    - name: Environment
      run: |
        python utils/print_env.py
@@ -42,7 +43,7 @@ jobs:
      run: |
        python utils/tests_fetcher.py | tee test_preparation.txt
    - name: Report fetched tests
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v3
      with:
        name: test_fetched
        path: test_preparation.txt
@@ -83,22 +84,25 @@ jobs:
        shell: bash
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 2
    - name: Install dependencies
      run: |
-        uv pip install -e ".[quality]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-        uv pip install accelerate
+        python -m pip install -e [quality,test]
        python -m pip install accelerate
    - name: Environment
      run: |
        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python utils/print_env.py
    - name: Run all selected tests on CPU
      run: |
-        pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.modules }}_tests_cpu ${{ fromJson(needs.setup_pr_tests.outputs.test_map)[matrix.modules] }}
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.modules }}_tests_cpu ${{ fromJson(needs.setup_pr_tests.outputs.test_map)[matrix.modules] }}
    - name: Failure short reports
      if: ${{ failure() }}
@@ -109,7 +113,7 @@ jobs:
    - name: Test suite reports artifacts
      if: ${{ always() }}
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v3
      with:
          name: ${{ matrix.modules }}_test_reports
          path: reports
@@ -138,22 +142,25 @@ jobs:
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 2
    - name: Install dependencies
      run: |
-        pip install -e [quality]
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m pip install -e [quality,test]
    - name: Environment
      run: |
        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python utils/print_env.py
    - name: Run Hub tests for models, schedulers, and pipelines on a staging env
      if: ${{ matrix.config.framework == 'hub_tests_pytorch' }}
      run: |
-        HUGGINGFACE_CO_STAGING=true pytest \
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        HUGGINGFACE_CO_STAGING=true python -m pytest \
          -m "is_staging_test" \
          --make-reports=tests_${{ matrix.config.report }} \
          tests
@@ -164,7 +171,7 @@ jobs:
    - name: Test suite reports artifacts
      if: ${{ always() }}
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
      with:
        name: pr_${{ matrix.config.report }}_test_reports
        path: reports
--- a/.github/workflows/pr_tests.yml
+++ b/.github/workflows/pr_tests.yml
@@ -2,7 +2,8 @@ name: Fast tests for PRs
 on:
  pull_request:
-    branches: [main]
+    branches:
      - main
    paths:
      - "src/diffusers/**.py"
      - "benchmarks/**.py"
@@ -11,7 +12,6 @@ on:
      - "tests/**.py"
      - ".github/**.yml"
      - "utils/**.py"
      - "setup.py"
  push:
    branches:
      - ci-*
@@ -22,7 +22,7 @@ concurrency:
 env:
  DIFFUSERS_IS_CI: yes
-  HF_XET_HIGH_PERFORMANCE: 1
+  HF_HUB_ENABLE_HF_TRANSFER: 1
  OMP_NUM_THREADS: 4
  MKL_NUM_THREADS: 4
  PYTEST_TIMEOUT: 60
@@ -31,14 +31,14 @@ jobs:
  check_code_quality:
    runs-on: ubuntu-22.04
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v3
      - name: Set up Python
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v4
        with:
          python-version: "3.8"
      - name: Install dependencies
        run: |
-          pip install --upgrade pip
+          python -m pip install --upgrade pip
          pip install .[quality]
      - name: Check quality
        run: make quality
@@ -51,20 +51,19 @@ jobs:
    needs: check_code_quality
    runs-on: ubuntu-22.04
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v3
      - name: Set up Python
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v4
        with:
          python-version: "3.8"
      - name: Install dependencies
        run: |
-          pip install --upgrade pip
+          python -m pip install --upgrade pip
          pip install .[quality]
      - name: Check repo consistency
        run: |
          python utils/check_copies.py
          python utils/check_dummies.py
          python utils/check_support_list.py
          make deps_table_check_updated
      - name: Check if failure
        if: ${{ failure() }}
@@ -87,6 +86,11 @@ jobs:
            runner: aws-general-8-plus
            image: diffusers/diffusers-pytorch-cpu
            report: torch_cpu_models_schedulers
          - name: Fast Flax CPU tests
            framework: flax
            runner: aws-general-8-plus
            image: diffusers/diffusers-flax-cpu
            report: flax_cpu
          - name: PyTorch Example CPU tests
            framework: pytorch_examples
            runner: aws-general-8-plus
@@ -108,42 +112,54 @@ jobs:
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 2
    - name: Install dependencies
      run: |
-        uv pip install -e ".[quality]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+        python -m uv pip install -e [quality,test]
-        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
+        python -m uv pip install accelerate
        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
    - name: Environment
      run: |
        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python utils/print_env.py
    - name: Run fast PyTorch Pipeline CPU tests
      if: ${{ matrix.config.framework == 'pytorch_pipelines' }}
      run: |
-        pytest -n 8 --max-worker-restart=0 --dist=loadfile \
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          -k "not Flax and not Onnx" \
+        python -m pytest -n 8 --max-worker-restart=0 --dist=loadfile \
          -s -v -k "not Flax and not Onnx" \
          --make-reports=tests_${{ matrix.config.report }} \
          tests/pipelines
    - name: Run fast PyTorch Model Scheduler CPU tests
      if: ${{ matrix.config.framework == 'pytorch_models' }}
      run: |
-        pytest -n 4 --max-worker-restart=0 --dist=loadfile \
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          -k "not Flax and not Onnx and not Dependency" \
+        python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
          -s -v -k "not Flax and not Onnx and not Dependency" \
          --make-reports=tests_${{ matrix.config.report }} \
          tests/models tests/schedulers tests/others
    - name: Run fast Flax TPU tests
      if: ${{ matrix.config.framework == 'flax' }}
      run: |
        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
          -s -v -k "Flax" \
          --make-reports=tests_${{ matrix.config.report }} \
          tests
    - name: Run example PyTorch CPU tests
      if: ${{ matrix.config.framework == 'pytorch_examples' }}
      run: |
-        uv pip install ".[training]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-        pytest -n 4 --max-worker-restart=0 --dist=loadfile \
+        python -m uv pip install peft timm
        python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
          --make-reports=tests_${{ matrix.config.report }} \
          examples
@@ -153,7 +169,7 @@ jobs:
    - name: Test suite reports artifacts
      if: ${{ always() }}
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
      with:
        name: pr_${{ matrix.config.framework }}_${{ matrix.config.report }}_test_reports
        path: reports
@@ -185,22 +201,25 @@ jobs:
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 2
    - name: Install dependencies
      run: |
-        uv pip install -e ".[quality]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m uv pip install -e [quality,test]
    - name: Environment
      run: |
        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python utils/print_env.py
    - name: Run Hub tests for models, schedulers, and pipelines on a staging env
      if: ${{ matrix.config.framework == 'hub_tests_pytorch' }}
      run: |
-        HUGGINGFACE_CO_STAGING=true pytest \
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        HUGGINGFACE_CO_STAGING=true python -m pytest \
          -m "is_staging_test" \
          --make-reports=tests_${{ matrix.config.report }} \
          tests
@@ -211,7 +230,7 @@ jobs:
    - name: Test suite reports artifacts
      if: ${{ always() }}
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
      with:
        name: pr_${{ matrix.config.report }}_test_reports
        path: reports
@@ -236,44 +255,45 @@ jobs:
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 2
    - name: Install dependencies
      run: |
-        uv pip install -e ".[quality]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m uv pip install -e [quality,test]
        # TODO (sayakpaul, DN6): revisit `--no-deps`
-        uv pip install -U peft@git+https://github.com/huggingface/peft.git --no-deps
+        python -m pip install -U peft@git+https://github.com/huggingface/peft.git --no-deps
-        uv pip install -U tokenizers
+        python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git --no-deps
-        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
+        pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
    - name: Environment
      run: |
        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python utils/print_env.py
    - name: Run fast PyTorch LoRA tests with PEFT
      run: |
-        pytest -n 4 --max-worker-restart=0 --dist=loadfile \
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          \
+        python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
          -s -v \
          --make-reports=tests_peft_main \
          tests/lora/
-        pytest -n 4 --max-worker-restart=0 --dist=loadfile \
+        python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
-          \
+          -s -v \
          --make-reports=tests_models_lora_peft_main \
          tests/models/ -k "lora"
    - name: Failure short reports
      if: ${{ failure() }}
      run: |
-        cat reports/tests_peft_main_failures_short.txt
+        cat reports/tests_lora_failures_short.txt
-        cat reports/tests_models_lora_peft_main_failures_short.txt
+        cat reports/tests_models_lora_failures_short.txt
    - name: Test suite reports artifacts
      if: ${{ always() }}
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
      with:
        name: pr_main_test_reports
        path: reports
--- a/.github/workflows/pr_tests_gpu.yml
+++ b/.github/workflows/pr_tests_gpu.yml
@@ -1,294 +0,0 @@
 name: Fast GPU Tests on PR
 on:
  pull_request:
    branches: main
    paths:
      - "src/diffusers/models/modeling_utils.py"
      - "src/diffusers/models/model_loading_utils.py"
      - "src/diffusers/pipelines/pipeline_utils.py"
      - "src/diffusers/pipeline_loading_utils.py"
      - "src/diffusers/loaders/lora_base.py"
      - "src/diffusers/loaders/lora_pipeline.py"
      - "src/diffusers/loaders/peft.py"
      - "tests/pipelines/test_pipelines_common.py"
      - "tests/models/test_modeling_common.py"
      - "examples/**/*.py"
  workflow_dispatch:
 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true
 env:
  DIFFUSERS_IS_CI: yes
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
  HF_XET_HIGH_PERFORMANCE: 1
  PYTEST_TIMEOUT: 600
  PIPELINE_USAGE_CUTOFF: 1000000000 # set high cutoff so that only always-test pipelines run
 jobs:
  check_code_quality:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v6
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.8"
      - name: Install dependencies
        run: |
          pip install --upgrade pip
          pip install .[quality]
      - name: Check quality
        run: make quality
      - name: Check if failure
        if: ${{ failure() }}
        run: |
          echo "Quality check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make style && make quality'" >> $GITHUB_STEP_SUMMARY
  check_repository_consistency:
    needs: check_code_quality
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v6
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.8"
      - name: Install dependencies
        run: |
          pip install --upgrade pip
          pip install .[quality]
      - name: Check repo consistency
        run: |
          python utils/check_copies.py
          python utils/check_dummies.py
          python utils/check_support_list.py
          make deps_table_check_updated
      - name: Check if failure
        if: ${{ failure() }}
        run: |
          echo "Repo consistency check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make fix-copies'" >> $GITHUB_STEP_SUMMARY
  setup_torch_cuda_pipeline_matrix:
    needs: [check_code_quality, check_repository_consistency]
    name: Setup Torch Pipelines CUDA Slow Tests Matrix
    runs-on:
      group: aws-general-8-plus
    container:
      image: diffusers/diffusers-pytorch-cpu
    outputs:
      pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
    steps:
      - name: Checkout diffusers
        uses: actions/checkout@v6
        with:
          fetch-depth: 2
      - name: Install dependencies
        run: |
          uv pip install -e ".[quality]"
      - name: Environment
        run: |
          python utils/print_env.py
      - name: Fetch Pipeline Matrix
        id: fetch_pipeline_matrix
        run: |
          matrix=$(python utils/fetch_torch_cuda_pipeline_test_matrix.py)
          echo $matrix
          echo "pipeline_test_matrix=$matrix" >> $GITHUB_OUTPUT
      - name: Pipeline Tests Artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@v6
        with:
          name: test-pipelines.json
          path: reports
  torch_pipelines_cuda_tests:
    name: Torch Pipelines CUDA Tests
    needs: setup_torch_cuda_pipeline_matrix
    strategy:
      fail-fast: false
      max-parallel: 8
      matrix:
        module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
    runs-on:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --shm-size "16gb" --ipc host --gpus all
    steps:
      - name: Checkout diffusers
        uses: actions/checkout@v6
        with:
          fetch-depth: 2
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Install dependencies
        run: |
          uv pip install -e ".[quality]"
          uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
          #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
          uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
      - name: Environment
        run: |
          python utils/print_env.py
      - name: Extract tests
        id: extract_tests
        run: |
          pattern=$(python utils/extract_tests_from_mixin.py --type pipeline)
          echo "$pattern" > /tmp/test_pattern.txt
          echo "pattern_file=/tmp/test_pattern.txt" >> $GITHUB_OUTPUT
      - name: PyTorch CUDA checkpoint tests on Ubuntu
        env:
          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
          CUBLAS_WORKSPACE_CONFIG: :16:8
        run: |
          if [ "${{ matrix.module }}" = "ip_adapters" ]; then
              pytest -n 1 --max-worker-restart=0 --dist=loadfile \
              -k "not Flax and not Onnx" \
              --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
              tests/pipelines/${{ matrix.module }}
          else
              pattern=$(cat ${{ steps.extract_tests.outputs.pattern_file }})
              pytest -n 1 --max-worker-restart=0 --dist=loadfile \
              -k "not Flax and not Onnx and $pattern" \
              --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
              tests/pipelines/${{ matrix.module }}
          fi
      - name: Failure short reports
        if: ${{ failure() }}
        run: |
          cat reports/tests_pipeline_${{ matrix.module }}_cuda_stats.txt
          cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
      - name: Test suite reports artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@v6
        with:
          name: pipeline_${{ matrix.module }}_test_reports
          path: reports
  torch_cuda_tests:
    name: Torch CUDA Tests
    needs: [check_code_quality, check_repository_consistency]
    runs-on:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --shm-size "16gb" --ipc host --gpus all
    defaults:
      run:
        shell: bash
    strategy:
      fail-fast: false
      max-parallel: 4
      matrix:
        module: [models, schedulers, lora, others]
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v6
      with:
        fetch-depth: 2
    - name: Install dependencies
      run: |
        uv pip install -e ".[quality]"
        uv pip install peft@git+https://github.com/huggingface/peft.git
        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
    - name: Environment
      run: |
        python utils/print_env.py
    - name: Extract tests
      id: extract_tests
      run: |
        pattern=$(python utils/extract_tests_from_mixin.py --type ${{ matrix.module }})
        echo "$pattern" > /tmp/test_pattern.txt
        echo "pattern_file=/tmp/test_pattern.txt" >> $GITHUB_OUTPUT
    - name: Run PyTorch CUDA tests
      env:
        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
        # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
        CUBLAS_WORKSPACE_CONFIG: :16:8
      run: |
        pattern=$(cat ${{ steps.extract_tests.outputs.pattern_file }})
        if [ -z "$pattern" ]; then
          pytest -n 1  --max-worker-restart=0 --dist=loadfile -k "not Flax and not Onnx" tests/${{ matrix.module }} \
          --make-reports=tests_torch_cuda_${{ matrix.module }}
        else
          pytest -n 1  --max-worker-restart=0 --dist=loadfile -k "not Flax and not Onnx and $pattern" tests/${{ matrix.module }} \
          --make-reports=tests_torch_cuda_${{ matrix.module }}
        fi
    - name: Failure short reports
      if: ${{ failure() }}
      run: |
        cat reports/tests_torch_cuda_${{ matrix.module }}_stats.txt
        cat reports/tests_torch_cuda_${{ matrix.module }}_failures_short.txt
    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      with:
        name: torch_cuda_test_reports_${{ matrix.module }}
        path: reports
  run_examples_tests:
    name: Examples PyTorch CUDA tests on Ubuntu
    needs: [check_code_quality, check_repository_consistency]
    runs-on:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --gpus all --shm-size "16gb" --ipc host
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v6
      with:
        fetch-depth: 2
    - name: NVIDIA-SMI
      run: |
        nvidia-smi
    - name: Install dependencies
      run: |
        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
        uv pip install -e ".[quality,training]"
    - name: Environment
      run: |
        python utils/print_env.py
    - name: Run example tests on GPU
      env:
        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
      run: |
        uv pip install ".[training]"
        pytest -n 1 --max-worker-restart=0 --dist=loadfile --make-reports=examples_torch_cuda examples/
    - name: Failure short reports
      if: ${{ failure() }}
      run: |
        cat reports/examples_torch_cuda_stats.txt
        cat reports/examples_torch_cuda_failures_short.txt
    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      with:
        name: examples_test_reports
        path: reports
--- a/.github/workflows/pr_torch_dependency_test.yml
+++ b/.github/workflows/pr_torch_dependency_test.yml
@@ -18,15 +18,19 @@ jobs:
  check_torch_dependencies:
    runs-on: ubuntu-22.04
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v3
      - name: Set up Python
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v4
        with:
          python-version: "3.8"
      - name: Install dependencies
        run: |
-          pip install -e .
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          pip install torch torchvision torchaudio pytest
+          python -m pip install --upgrade pip uv
          python -m uv pip install -e .
          python -m uv pip install torch torchvision torchaudio
          python -m uv pip install pytest
      - name: Check for soft dependencies
        run: |
-            pytest tests/others/test_dependencies.py
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
          pytest tests/others/test_dependencies.py
--- a/.github/workflows/push_tests.yml
+++ b/.github/workflows/push_tests.yml
@@ -14,7 +14,7 @@ env:
  DIFFUSERS_IS_CI: yes
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
-  HF_XET_HIGH_PERFORMANCE: 1
+  HF_HUB_ENABLE_HF_TRANSFER: 1
  PYTEST_TIMEOUT: 600
  PIPELINE_USAGE_CUTOFF: 50000
@@ -29,12 +29,13 @@ jobs:
      pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
    steps:
      - name: Checkout diffusers
-        uses: actions/checkout@v6
+        uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: Install dependencies
        run: |
-          uv pip install -e ".[quality]"
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
          python -m uv pip install -e [quality,test]
      - name: Environment
        run: |
          python utils/print_env.py
@@ -46,7 +47,7 @@ jobs:
          echo "pipeline_test_matrix=$matrix" >> $GITHUB_OUTPUT
      - name: Pipeline Tests Artifacts
        if: ${{ always() }}
-        uses: actions/upload-artifact@v6
+        uses: actions/upload-artifact@v4
        with:
          name: test-pipelines.json
          path: reports
@@ -63,10 +64,10 @@ jobs:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-cuda
-      options: --shm-size "16gb" --ipc host --gpus all
+      options: --shm-size "16gb" --ipc host --gpus 0
    steps:
      - name: Checkout diffusers
-        uses: actions/checkout@v6
+        uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: NVIDIA-SMI
@@ -74,21 +75,20 @@ jobs:
          nvidia-smi
      - name: Install dependencies
        run: |
-          uv pip install -e ".[quality]"
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
+          python -m uv pip install -e [quality,test]
-          #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+          pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
          uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
      - name: Environment
        run: |
          python utils/print_env.py
      - name: PyTorch CUDA checkpoint tests on Ubuntu
        env:
-          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
          CUBLAS_WORKSPACE_CONFIG: :16:8
        run: |
-          pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+          python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-            -k "not Flax and not Onnx" \
+            -s -v -k "not Flax and not Onnx" \
            --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
            tests/pipelines/${{ matrix.module }}
      - name: Failure short reports
@@ -98,7 +98,7 @@ jobs:
          cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
      - name: Test suite reports artifacts
        if: ${{ always() }}
-        uses: actions/upload-artifact@v6
+        uses: actions/upload-artifact@v4
        with:
          name: pipeline_${{ matrix.module }}_test_reports
          path: reports
@@ -109,7 +109,7 @@ jobs:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-cuda
-      options: --shm-size "16gb" --ipc host --gpus all
+      options: --shm-size "16gb" --ipc host --gpus 0
    defaults:
      run:
        shell: bash
@@ -120,17 +120,16 @@ jobs:
        module: [models, schedulers, lora, others, single_file]
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 2
    - name: Install dependencies
      run: |
-        uv pip install -e ".[quality]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-        uv pip install peft@git+https://github.com/huggingface/peft.git
+        python -m uv pip install -e [quality,test]
-        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
+        python -m uv pip install peft@git+https://github.com/huggingface/peft.git
-        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+        pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
    - name: Environment
      run: |
@@ -138,12 +137,12 @@ jobs:
    - name: Run PyTorch CUDA tests
      env:
-        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
        # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
        CUBLAS_WORKSPACE_CONFIG: :16:8
      run: |
-        pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-          -k "not Flax and not Onnx" \
+          -s -v -k "not Flax and not Onnx" \
          --make-reports=tests_torch_cuda_${{ matrix.module }} \
          tests/${{ matrix.module }}
@@ -155,11 +154,106 @@ jobs:
    - name: Test suite reports artifacts
      if: ${{ always() }}
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
      with:
        name: torch_cuda_test_reports_${{ matrix.module }}
        path: reports
  flax_tpu_tests:
    name: Flax TPU Tests
    runs-on:
      group: gcp-ct5lp-hightpu-8t
    container:
      image: diffusers/diffusers-flax-tpu
      options: --shm-size "16gb" --ipc host --privileged ${{ vars.V5_LITEPOD_8_ENV}} -v /mnt/hf_cache:/mnt/hf_cache    defaults:
      run:
        shell: bash
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v3
      with:
        fetch-depth: 2
    - name: Install dependencies
      run: |
        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m uv pip install -e [quality,test]
        pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
    - name: Environment
      run: |
        python utils/print_env.py
    - name: Run Flax TPU tests
      env:
        HF_TOKEN: ${{ secrets.HF_TOKEN }}
      run: |
        python -m pytest -n 0 \
          -s -v -k "Flax" \
          --make-reports=tests_flax_tpu \
          tests/
    - name: Failure short reports
      if: ${{ failure() }}
      run: |
        cat reports/tests_flax_tpu_stats.txt
        cat reports/tests_flax_tpu_failures_short.txt
    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v4
      with:
        name: flax_tpu_test_reports
        path: reports
  onnx_cuda_tests:
    name: ONNX CUDA Tests
    runs-on:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-onnxruntime-cuda
      options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ --gpus 0
    defaults:
      run:
        shell: bash
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v3
      with:
        fetch-depth: 2
    - name: Install dependencies
      run: |
        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m uv pip install -e [quality,test]
        pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
    - name: Environment
      run: |
        python utils/print_env.py
    - name: Run ONNXRuntime CUDA tests
      env:
        HF_TOKEN: ${{ secrets.HF_TOKEN }}
      run: |
        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
          -s -v -k "Onnx" \
          --make-reports=tests_onnx_cuda \
          tests/
    - name: Failure short reports
      if: ${{ failure() }}
      run: |
        cat reports/tests_onnx_cuda_stats.txt
        cat reports/tests_onnx_cuda_failures_short.txt
    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v4
      with:
        name: onnx_cuda_test_reports
        path: reports
  run_torch_compile_tests:
    name: PyTorch Compile CUDA tests
@@ -167,12 +261,12 @@ jobs:
      group: aws-g4dn-2xlarge
    container:
-      image: diffusers/diffusers-pytorch-cuda
+      image: diffusers/diffusers-pytorch-compile-cuda
-      options: --gpus all --shm-size "16gb" --ipc host
+      options: --gpus 0 --shm-size "16gb" --ipc host
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 2
@@ -181,25 +275,24 @@ jobs:
        nvidia-smi
    - name: Install dependencies
      run: |
-        uv pip install -e ".[quality,training]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
+        python -m uv pip install -e [quality,test,training]
        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
    - name: Environment
      run: |
        python utils/print_env.py
    - name: Run example tests on GPU
      env:
-        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
        RUN_COMPILE: yes
      run: |
-        pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "compile" --make-reports=tests_torch_compile_cuda tests/
+        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "compile" --make-reports=tests_torch_compile_cuda tests/
    - name: Failure short reports
      if: ${{ failure() }}
      run: cat reports/tests_torch_compile_cuda_failures_short.txt
    - name: Test suite reports artifacts
      if: ${{ always() }}
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
      with:
        name: torch_compile_test_reports
        path: reports
@@ -212,11 +305,11 @@ jobs:
    container:
      image: diffusers/diffusers-pytorch-xformers-cuda
-      options: --gpus all --shm-size "16gb" --ipc host
+      options: --gpus 0 --shm-size "16gb" --ipc host
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 2
@@ -225,22 +318,23 @@ jobs:
        nvidia-smi
    - name: Install dependencies
      run: |
-        uv pip install -e ".[quality,training]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m uv pip install -e [quality,test,training]
    - name: Environment
      run: |
        python utils/print_env.py
    - name: Run example tests on GPU
      env:
-        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
      run: |
-        pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "xformers" --make-reports=tests_torch_xformers_cuda tests/
+        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "xformers" --make-reports=tests_torch_xformers_cuda tests/
    - name: Failure short reports
      if: ${{ failure() }}
      run: cat reports/tests_torch_xformers_cuda_failures_short.txt
    - name: Test suite reports artifacts
      if: ${{ always() }}
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
      with:
        name: torch_xformers_test_reports
        path: reports
@@ -253,30 +347,35 @@ jobs:
    container:
      image: diffusers/diffusers-pytorch-cuda
-      options: --gpus all --shm-size "16gb" --ipc host
+      options: --gpus 0 --shm-size "16gb" --ipc host
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 2
    - name: NVIDIA-SMI
      run: |
        nvidia-smi
    - name: Install dependencies
      run: |
-        uv pip install -e ".[quality,training]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m uv pip install -e [quality,test,training]
    - name: Environment
      run: |
        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python utils/print_env.py
    - name: Run example tests on GPU
      env:
-        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
      run: |
-        uv pip install ".[training]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-        pytest -n 1 --max-worker-restart=0 --dist=loadfile --make-reports=examples_torch_cuda examples/
+        python -m uv pip install timm
        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_cuda examples/
    - name: Failure short reports
      if: ${{ failure() }}
@@ -286,7 +385,7 @@ jobs:
    - name: Test suite reports artifacts
      if: ${{ always() }}
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
      with:
        name: examples_test_reports
        path: reports
--- a/.github/workflows/push_tests_fast.yml
+++ b/.github/workflows/push_tests_fast.yml
@@ -18,7 +18,7 @@ env:
  HF_HOME: /mnt/cache
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
-  HF_XET_HIGH_PERFORMANCE: 1
+  HF_HUB_ENABLE_HF_TRANSFER: 1
  PYTEST_TIMEOUT: 600
  RUN_SLOW: no
@@ -33,6 +33,16 @@ jobs:
            runner: aws-general-8-plus
            image: diffusers/diffusers-pytorch-cpu
            report: torch_cpu
          - name: Fast Flax CPU tests on Ubuntu
            framework: flax
            runner: aws-general-8-plus
            image: diffusers/diffusers-flax-cpu
            report: flax_cpu
          - name: Fast ONNXRuntime CPU tests on Ubuntu
            framework: onnxruntime
            runner: aws-general-8-plus
            image: diffusers/diffusers-onnxruntime-cpu
            report: onnx_cpu
          - name: PyTorch Example CPU tests on Ubuntu
            framework: pytorch_examples
            runner: aws-general-8-plus
@@ -54,31 +64,53 @@ jobs:
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 2
    - name: Install dependencies
      run: |
-        uv pip install -e ".[quality]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m uv pip install -e [quality,test]
    - name: Environment
      run: |
        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python utils/print_env.py
    - name: Run fast PyTorch CPU tests
      if: ${{ matrix.config.framework == 'pytorch' }}
      run: |
-        pytest -n 4 --max-worker-restart=0 --dist=loadfile \
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          -k "not Flax and not Onnx" \
+        python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
          -s -v -k "not Flax and not Onnx" \
          --make-reports=tests_${{ matrix.config.report }} \
          tests/
    - name: Run fast Flax TPU tests
      if: ${{ matrix.config.framework == 'flax' }}
      run: |
        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
          -s -v -k "Flax" \
          --make-reports=tests_${{ matrix.config.report }} \
          tests/
    - name: Run fast ONNXRuntime CPU tests
      if: ${{ matrix.config.framework == 'onnxruntime' }}
      run: |
        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
          -s -v -k "Onnx" \
          --make-reports=tests_${{ matrix.config.report }} \
          tests/
    - name: Run example PyTorch CPU tests
      if: ${{ matrix.config.framework == 'pytorch_examples' }}
      run: |
-        uv pip install ".[training]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-        pytest -n 4 --max-worker-restart=0 --dist=loadfile \
+        python -m uv pip install peft timm
        python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile \
          --make-reports=tests_${{ matrix.config.report }} \
          examples
@@ -88,7 +120,7 @@ jobs:
    - name: Test suite reports artifacts
      if: ${{ always() }}
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
      with:
        name: pr_${{ matrix.config.report }}_test_reports
        path: reports
--- a/.github/workflows/push_tests_mps.yml
+++ b/.github/workflows/push_tests_mps.yml
@@ -1,14 +1,19 @@
 name: Fast mps tests on main
 on:
-  workflow_dispatch:
+  push:
    branches:
      - main
    paths:
      - "src/diffusers/**.py"
      - "tests/**.py"
 env:
  DIFFUSERS_IS_CI: yes
  HF_HOME: /mnt/cache
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
-  HF_XET_HIGH_PERFORMANCE: 1
+  HF_HUB_ENABLE_HF_TRANSFER: 1
  PYTEST_TIMEOUT: 600
  RUN_SLOW: no
@@ -23,7 +28,7 @@ jobs:
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 2
@@ -41,7 +46,7 @@ jobs:
      shell: arch -arch arm64 bash {0}
      run: |
        ${CONDA_RUN} python -m pip install --upgrade pip uv
-        ${CONDA_RUN} python -m uv pip install -e ".[quality,test]"
+        ${CONDA_RUN} python -m uv pip install -e [quality,test]
        ${CONDA_RUN} python -m uv pip install torch torchvision torchaudio
        ${CONDA_RUN} python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
        ${CONDA_RUN} python -m uv pip install transformers --upgrade
@@ -57,7 +62,7 @@ jobs:
        HF_HOME: /System/Volumes/Data/mnt/cache
        HF_TOKEN: ${{ secrets.HF_TOKEN }}
      run: |
-        ${CONDA_RUN} python -m pytest -n 0 --make-reports=tests_torch_mps tests/
+        ${CONDA_RUN} python -m pytest -n 0 -s -v --make-reports=tests_torch_mps tests/
    - name: Failure short reports
      if: ${{ failure() }}
@@ -65,7 +70,7 @@ jobs:
    - name: Test suite reports artifacts
      if: ${{ always() }}
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
      with:
        name: pr_torch_mps_test_reports
        path: reports
--- a/.github/workflows/pypi_publish.yaml
+++ b/.github/workflows/pypi_publish.yaml
@@ -15,10 +15,10 @@ jobs:
      latest_branch: ${{ steps.set_latest_branch.outputs.latest_branch }}
    steps:
      - name: Checkout Repo
-        uses: actions/checkout@v6
+        uses: actions/checkout@v3
      - name: Set up Python
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v4
        with:
          python-version: '3.8'
@@ -40,12 +40,12 @@ jobs:
    steps:
      - name: Checkout Repo
-        uses: actions/checkout@v6
+        uses: actions/checkout@v3
        with:
          ref: ${{ needs.find-and-checkout-latest-branch.outputs.latest_branch }}
      - name: Setup Python
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v4
        with:
          python-version: "3.8"
@@ -68,7 +68,7 @@ jobs:
      - name: Test installing diffusers and importing
        run: |
          pip install diffusers && pip uninstall diffusers -y
-          pip install -i https://test.pypi.org/simple/ diffusers
+          pip install -i https://testpypi.python.org/pypi diffusers
          python -c "from diffusers import __version__; print(__version__)"
          python -c "from diffusers import DiffusionPipeline; pipe = DiffusionPipeline.from_pretrained('fusing/unet-ldm-dummy-update'); pipe()"
          python -c "from diffusers import DiffusionPipeline; pipe = DiffusionPipeline.from_pretrained('hf-internal-testing/tiny-stable-diffusion-pipe', safety_checker=None); pipe('ah suh du')"
--- a/.github/workflows/release_tests_fast.yml
+++ b/.github/workflows/release_tests_fast.yml
@@ -27,12 +27,13 @@ jobs:
      pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
    steps:
      - name: Checkout diffusers
-        uses: actions/checkout@v6
+        uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: Install dependencies
        run: |
-          uv pip install -e ".[quality]"
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
          python -m uv pip install -e [quality,test]
      - name: Environment
        run: |
          python utils/print_env.py
@@ -44,7 +45,7 @@ jobs:
          echo "pipeline_test_matrix=$matrix" >> $GITHUB_OUTPUT
      - name: Pipeline Tests Artifacts
        if: ${{ always() }}
-        uses: actions/upload-artifact@v6
+        uses: actions/upload-artifact@v4
        with:
          name: test-pipelines.json
          path: reports
@@ -61,10 +62,10 @@ jobs:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-cuda
-      options: --shm-size "16gb" --ipc host --gpus all
+      options: --shm-size "16gb" --ipc host --gpus 0
    steps:
      - name: Checkout diffusers
-        uses: actions/checkout@v6
+        uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: NVIDIA-SMI
@@ -72,19 +73,20 @@ jobs:
          nvidia-smi
      - name: Install dependencies
        run: |
-          uv pip install -e ".[quality]"
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
+          python -m uv pip install -e [quality,test]
          pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
      - name: Environment
        run: |
          python utils/print_env.py
      - name: Slow PyTorch CUDA checkpoint tests on Ubuntu
        env:
-          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
          CUBLAS_WORKSPACE_CONFIG: :16:8
        run: |
-          pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+          python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-            -k "not Flax and not Onnx" \
+            -s -v -k "not Flax and not Onnx" \
            --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
            tests/pipelines/${{ matrix.module }}
      - name: Failure short reports
@@ -94,7 +96,7 @@ jobs:
          cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
      - name: Test suite reports artifacts
        if: ${{ always() }}
-        uses: actions/upload-artifact@v6
+        uses: actions/upload-artifact@v4
        with:
          name: pipeline_${{ matrix.module }}_test_reports
          path: reports
@@ -105,7 +107,7 @@ jobs:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-cuda
-      options: --shm-size "16gb" --ipc host --gpus all
+      options: --shm-size "16gb" --ipc host --gpus 0
    defaults:
      run:
        shell: bash
@@ -116,15 +118,16 @@ jobs:
        module: [models, schedulers, lora, others, single_file]
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 2
    - name: Install dependencies
      run: |
-        uv pip install -e ".[quality]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-        uv pip install peft@git+https://github.com/huggingface/peft.git
+        python -m uv pip install -e [quality,test]
-        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
+        python -m uv pip install peft@git+https://github.com/huggingface/peft.git
        pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
    - name: Environment
      run: |
@@ -132,12 +135,12 @@ jobs:
    - name: Run PyTorch CUDA tests
      env:
-        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
        # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
        CUBLAS_WORKSPACE_CONFIG: :16:8
      run: |
-        pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-          -k "not Flax and not Onnx" \
+          -s -v -k "not Flax and not Onnx" \
          --make-reports=tests_torch_${{ matrix.module }}_cuda \
          tests/${{ matrix.module }}
@@ -149,66 +152,105 @@ jobs:
    - name: Test suite reports artifacts
      if: ${{ always() }}
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
      with:
        name: torch_cuda_${{ matrix.module }}_test_reports
        path: reports
-  torch_minimum_version_cuda_tests:
+  flax_tpu_tests:
-    name: Torch Minimum Version CUDA Tests
+    name: Flax TPU Tests
-    runs-on:
+    runs-on: docker-tpu
      group: aws-g4dn-2xlarge
    container:
-      image: diffusers/diffusers-pytorch-minimum-cuda
+      image: diffusers/diffusers-flax-tpu
-      options: --shm-size "16gb" --ipc host --gpus all
+      options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ --privileged
    defaults:
      run:
        shell: bash
    steps:
-      - name: Checkout diffusers
+    - name: Checkout diffusers
-        uses: actions/checkout@v6
+      uses: actions/checkout@v3
-        with:
+      with:
-          fetch-depth: 2
+        fetch-depth: 2
-      - name: Install dependencies
+    - name: Install dependencies
-        run: |
+      run: |
-          uv pip install -e ".[quality]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          uv pip install peft@git+https://github.com/huggingface/peft.git
+        python -m uv pip install -e [quality,test]
-          uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
+        pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
-      - name: Environment
+    - name: Environment
-        run: |
+      run: |
-          python utils/print_env.py
+        python utils/print_env.py
-      - name: Run PyTorch CUDA tests
+    - name: Run slow Flax TPU tests
-        env:
+      env:
-          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
-          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
+      run: |
-          CUBLAS_WORKSPACE_CONFIG: :16:8
+        python -m pytest -n 0 \
-        run: |
+          -s -v -k "Flax" \
-          pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+          --make-reports=tests_flax_tpu \
-            -k "not Flax and not Onnx" \
+          tests/
            --make-reports=tests_torch_minimum_cuda \
            tests/models/test_modeling_common.py \
            tests/pipelines/test_pipelines_common.py \
            tests/pipelines/test_pipeline_utils.py \
            tests/pipelines/test_pipelines.py \
            tests/pipelines/test_pipelines_auto.py \
            tests/schedulers/test_schedulers.py \
            tests/others
-      - name: Failure short reports
+    - name: Failure short reports
-        if: ${{ failure() }}
+      if: ${{ failure() }}
-        run: |
+      run: |
-          cat reports/tests_torch_minimum_version_cuda_stats.txt
+        cat reports/tests_flax_tpu_stats.txt
-          cat reports/tests_torch_minimum_version_cuda_failures_short.txt
+        cat reports/tests_flax_tpu_failures_short.txt
-      - name: Test suite reports artifacts
+    - name: Test suite reports artifacts
-        if: ${{ always() }}
+      if: ${{ always() }}
-        uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
-        with:
+      with:
-          name: torch_minimum_version_cuda_test_reports
+        name: flax_tpu_test_reports
-          path: reports
+        path: reports
  onnx_cuda_tests:
    name: ONNX CUDA Tests
    runs-on:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-onnxruntime-cuda
      options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ --gpus 0
    defaults:
      run:
        shell: bash
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v3
      with:
        fetch-depth: 2
    - name: Install dependencies
      run: |
        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m uv pip install -e [quality,test]
        pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
    - name: Environment
      run: |
        python utils/print_env.py
    - name: Run slow ONNXRuntime CUDA tests
      env:
        HF_TOKEN: ${{ secrets.HF_TOKEN }}
      run: |
        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
          -s -v -k "Onnx" \
          --make-reports=tests_onnx_cuda \
          tests/
    - name: Failure short reports
      if: ${{ failure() }}
      run: |
        cat reports/tests_onnx_cuda_stats.txt
        cat reports/tests_onnx_cuda_failures_short.txt
    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v4
      with:
        name: onnx_cuda_test_reports
        path: reports
  run_torch_compile_tests:
    name: PyTorch Compile CUDA tests
@@ -217,12 +259,12 @@ jobs:
      group: aws-g4dn-2xlarge
    container:
-      image: diffusers/diffusers-pytorch-cuda
+      image: diffusers/diffusers-pytorch-compile-cuda
-      options: --gpus all --shm-size "16gb" --ipc host
+      options: --gpus 0 --shm-size "16gb" --ipc host
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 2
@@ -231,23 +273,24 @@ jobs:
        nvidia-smi
    - name: Install dependencies
      run: |
-        uv pip install -e ".[quality,training]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m uv pip install -e [quality,test,training]
    - name: Environment
      run: |
        python utils/print_env.py
-    - name: Run torch compile tests on GPU
+    - name: Run example tests on GPU
      env:
-        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
        RUN_COMPILE: yes
      run: |
-        pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "compile" --make-reports=tests_torch_compile_cuda tests/
+        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "compile" --make-reports=tests_torch_compile_cuda tests/
    - name: Failure short reports
      if: ${{ failure() }}
      run: cat reports/tests_torch_compile_cuda_failures_short.txt
    - name: Test suite reports artifacts
      if: ${{ always() }}
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
      with:
        name: torch_compile_test_reports
        path: reports
@@ -260,11 +303,11 @@ jobs:
    container:
      image: diffusers/diffusers-pytorch-xformers-cuda
-      options: --gpus all --shm-size "16gb" --ipc host
+      options: --gpus 0 --shm-size "16gb" --ipc host
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 2
@@ -273,22 +316,23 @@ jobs:
        nvidia-smi
    - name: Install dependencies
      run: |
-        uv pip install -e ".[quality,training]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m uv pip install -e [quality,test,training]
    - name: Environment
      run: |
        python utils/print_env.py
    - name: Run example tests on GPU
      env:
-        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
      run: |
-        pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "xformers" --make-reports=tests_torch_xformers_cuda tests/
+        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "xformers" --make-reports=tests_torch_xformers_cuda tests/
    - name: Failure short reports
      if: ${{ failure() }}
      run: cat reports/tests_torch_xformers_cuda_failures_short.txt
    - name: Test suite reports artifacts
      if: ${{ always() }}
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
      with:
        name: torch_xformers_test_reports
        path: reports
@@ -301,11 +345,11 @@ jobs:
    container:
      image: diffusers/diffusers-pytorch-cuda
-      options: --gpus all --shm-size "16gb" --ipc host
+      options: --gpus 0 --shm-size "16gb" --ipc host
    steps:
    - name: Checkout diffusers
-      uses: actions/checkout@v6
+      uses: actions/checkout@v3
      with:
        fetch-depth: 2
@@ -315,18 +359,21 @@ jobs:
    - name: Install dependencies
      run: |
-        uv pip install -e ".[quality,training]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python -m uv pip install -e [quality,test,training]
    - name: Environment
      run: |
        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
        python utils/print_env.py
    - name: Run example tests on GPU
      env:
-        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
      run: |
-        uv pip install ".[training]"
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-        pytest -n 1 --max-worker-restart=0 --dist=loadfile --make-reports=examples_torch_cuda examples/
+        python -m uv pip install timm
        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_cuda examples/
    - name: Failure short reports
      if: ${{ failure() }}
@@ -336,7 +383,7 @@ jobs:
    - name: Test suite reports artifacts
      if: ${{ always() }}
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
      with:
        name: examples_test_reports
        path: reports
--- a/.github/workflows/run_tests_from_a_pr.yml
+++ b/.github/workflows/run_tests_from_a_pr.yml
@@ -7,8 +7,8 @@ on:
        default: 'diffusers/diffusers-pytorch-cuda'
        description: 'Name of the Docker image'
        required: true
-      pr_number:
+      branch:
-        description: 'PR number to test on'
+        description: 'PR Branch to test on'
        required: true
      test:
        description: 'Tests to run (e.g.: `tests/models`).'
@@ -30,7 +30,7 @@ jobs:
      group: aws-g4dn-2xlarge
    container:
      image: ${{ github.event.inputs.docker_image }}
-      options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus 0 --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Validate test files input
@@ -43,8 +43,8 @@ jobs:
            exit 1
          fi
-          if [[ ! "$PY_TEST" =~ ^tests/(models|pipelines|lora) ]]; then
+          if [[ ! "$PY_TEST" =~ ^tests/(models|pipelines) ]]; then
-            echo "Error: The input string must contain either 'models', 'pipelines', or 'lora' after 'tests/'."
+            echo "Error: The input string must contain either 'models' or 'pipelines' after 'tests/'."
            exit 1
          fi
@@ -53,18 +53,19 @@ jobs:
            exit 1
          fi
          echo "$PY_TEST"
        shell: bash -e {0}
      - name: Checkout PR branch
-        uses: actions/checkout@v6
+        uses: actions/checkout@v4
        with:
-          ref: refs/pull/${{ inputs.pr_number }}/head
+          ref: ${{ github.event.inputs.branch }}
          repository: ${{ github.event.pull_request.head.repo.full_name }}
      - name: Install pytest
        run: |
-          uv pip install -e ".[quality]"
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          uv pip install peft
+          python -m uv pip install -e [quality,test]
          python -m uv pip install peft
      - name: Run tests
        env:
--- a/.github/workflows/ssh-pr-runner.yml
+++ b/.github/workflows/ssh-pr-runner.yml
@@ -27,7 +27,7 @@ jobs:
    steps:
      - name: Checkout diffusers
-        uses: actions/checkout@v6
+        uses: actions/checkout@v3
        with:
          fetch-depth: 2
--- a/.github/workflows/ssh-runner.yml
+++ b/.github/workflows/ssh-runner.yml
@@ -31,11 +31,11 @@ jobs:
      group: "${{ github.event.inputs.runner_type }}"
    container:
      image: ${{ github.event.inputs.docker_image }}
-      options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus all --privileged
+      options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0 --privileged
    steps:
      - name: Checkout diffusers
-        uses: actions/checkout@v6
+        uses: actions/checkout@v3
        with:
          fetch-depth: 2
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -15,10 +15,10 @@ jobs:
    env:
      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
    steps:
-    - uses: actions/checkout@v6
+    - uses: actions/checkout@v2
    - name: Setup Python
-      uses: actions/setup-python@v6
+      uses: actions/setup-python@v1
      with:
        python-version: 3.8
--- a/.github/workflows/trufflehog.yml
+++ b/.github/workflows/trufflehog.yml
@@ -8,11 +8,8 @@ jobs:
    runs-on: ubuntu-22.04
    steps:
    - name: Checkout code
-      uses: actions/checkout@v6
+      uses: actions/checkout@v4
      with:
        fetch-depth: 0
    - name: Secret Scanning
      uses: trufflesecurity/trufflehog@main
      with:
        extra_args: --results=verified,unknown
--- a/.github/workflows/typos.yml
+++ b/.github/workflows/typos.yml
@@ -8,7 +8,7 @@ jobs:
    runs-on: ubuntu-22.04
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v3
      - name: typos-action
        uses: crate-ci/typos@v1.12.4
--- a/.github/workflows/update_metadata.yml
+++ b/.github/workflows/update_metadata.yml
@@ -15,7 +15,7 @@ jobs:
        shell: bash -l {0}
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v3
      - name: Setup environment
        run: |
--- a/.gitignore
+++ b/.gitignore
@@ -125,9 +125,6 @@ dmypy.json
 .vs
 .vscode
 # Cursor
 .cursor
 # Pycharm
 .idea
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
--- a/PHILOSOPHY.md
+++ b/PHILOSOPHY.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ limitations under the License.
 ## Installation
-We recommend installing 🤗 Diffusers in a virtual environment from PyPI or Conda. For more details about installing [PyTorch](https://pytorch.org/get-started/locally/), please refer to their official documentation.
+We recommend installing 🤗 Diffusers in a virtual environment from PyPI or Conda. For more details about installing [PyTorch](https://pytorch.org/get-started/locally/) and [Flax](https://flax.readthedocs.io/en/latest/#installation), please refer to their official documentation.
 ### PyTorch
@@ -53,6 +53,14 @@ With `conda` (maintained by the community):
 conda install -c conda-forge diffusers
 ```
 ### Flax
 With `pip` (official package):
 ```bash
 pip install --upgrade diffusers[flax]
 ```
 ### Apple Silicon (M1/M2) support
 Please refer to the [How to use Stable Diffusion in Apple Silicon](https://huggingface.co/docs/diffusers/optimization/mps) guide.
@@ -171,7 +179,7 @@ Also, say 👋 in our public Discord channel <a href="https://discord.gg/G7tWnz9
  <tr style="border-top: 2px solid black">
    <td>Text-guided Image Inpainting</td>
    <td><a href="https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/inpaint">Stable Diffusion Inpainting</a></td>
-      <td><a href="https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-inpainting"> stable-diffusion-v1-5/stable-diffusion-inpainting </a></td>
+      <td><a href="https://huggingface.co/runwayml/stable-diffusion-inpainting"> runwayml/stable-diffusion-inpainting </a></td>
  </tr>
  <tr style="border-top: 2px solid black">
    <td>Image Variation</td>
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -1,69 +0,0 @@
 # Diffusers Benchmarks
 Welcome to Diffusers Benchmarks. These benchmarks are use to obtain latency and memory information of the most popular models across different scenarios such as:
 * Base case i.e., when using `torch.bfloat16` and `torch.nn.functional.scaled_dot_product_attention`.
 * Base + `torch.compile()`
 * NF4 quantization
 * Layerwise upcasting
 Instead of full diffusion pipelines, only the forward pass of the respective model classes (such as `FluxTransformer2DModel`) is tested with the real checkpoints (such as `"black-forest-labs/FLUX.1-dev"`). 
 The entrypoint to running all the currently available benchmarks is in `run_all.py`. However, one can run the individual benchmarks, too, e.g., `python benchmarking_flux.py`. It should produce a CSV file containing various information about the benchmarks run.
 The benchmarks are run on a weekly basis and the CI is defined in [benchmark.yml](../.github/workflows/benchmark.yml).
 ## Running the benchmarks manually
 First set up `torch` and install `diffusers` from the root of the directory:
 ```py
 pip install -e ".[quality,test]"
 ```
 Then make sure the other dependencies are installed:
 ```sh
 cd benchmarks/
 pip install -r requirements.txt
 ```
 We need to be authenticated to access some of the checkpoints used during benchmarking:
 ```sh
 hf auth login
 ```
 We use an L40 GPU with 128GB RAM to run the benchmark CI. As such, the benchmarks are configured to run on NVIDIA GPUs. So, make sure you have access to a similar machine (or modify the benchmarking scripts accordingly).
 Then you can either launch the entire benchmarking suite by running:
 ```sh
 python run_all.py
 ```
 Or, you can run the individual benchmarks.
 ## Customizing the benchmarks
 We define "scenarios" to cover the most common ways in which these models are used. You can
 define a new scenario, modifying an existing benchmark file:
 ```py
 BenchmarkScenario(
    name=f"{CKPT_ID}-bnb-8bit",
    model_cls=FluxTransformer2DModel,
    model_init_kwargs={
        "pretrained_model_name_or_path": CKPT_ID,
        "torch_dtype": torch.bfloat16,
        "subfolder": "transformer",
        "quantization_config": BitsAndBytesConfig(load_in_8bit=True),
    },
    get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
    model_init_fn=model_init_fn,
 )
 ```
 You can also configure a new model-level benchmark and add it to the existing suite. To do so, just defining a valid benchmarking file like `benchmarking_flux.py` should be enough.
 Happy benchmarking 🧨
--- a/benchmarks/base_classes.py
+++ b/benchmarks/base_classes.py
@@ -0,0 +1,346 @@
 import os
 import sys
 import torch
 from diffusers import (
    AutoPipelineForImage2Image,
    AutoPipelineForInpainting,
    AutoPipelineForText2Image,
    ControlNetModel,
    LCMScheduler,
    StableDiffusionAdapterPipeline,
    StableDiffusionControlNetPipeline,
    StableDiffusionXLAdapterPipeline,
    StableDiffusionXLControlNetPipeline,
    T2IAdapter,
    WuerstchenCombinedPipeline,
 )
 from diffusers.utils import load_image
 sys.path.append(".")
 from utils import (  # noqa: E402
    BASE_PATH,
    PROMPT,
    BenchmarkInfo,
    benchmark_fn,
    bytes_to_giga_bytes,
    flush,
    generate_csv_dict,
    write_to_csv,
 )
 RESOLUTION_MAPPING = {
    "Lykon/DreamShaper": (512, 512),
    "lllyasviel/sd-controlnet-canny": (512, 512),
    "diffusers/controlnet-canny-sdxl-1.0": (1024, 1024),
    "TencentARC/t2iadapter_canny_sd14v1": (512, 512),
    "TencentARC/t2i-adapter-canny-sdxl-1.0": (1024, 1024),
    "stabilityai/stable-diffusion-2-1": (768, 768),
    "stabilityai/stable-diffusion-xl-base-1.0": (1024, 1024),
    "stabilityai/stable-diffusion-xl-refiner-1.0": (1024, 1024),
    "stabilityai/sdxl-turbo": (512, 512),
 }
 class BaseBenchmak:
    pipeline_class = None
    def __init__(self, args):
        super().__init__()
    def run_inference(self, args):
        raise NotImplementedError
    def benchmark(self, args):
        raise NotImplementedError
    def get_result_filepath(self, args):
        pipeline_class_name = str(self.pipe.__class__.__name__)
        name = (
            args.ckpt.replace("/", "_")
            + "_"
            + pipeline_class_name
            + f"-bs@{args.batch_size}-steps@{args.num_inference_steps}-mco@{args.model_cpu_offload}-compile@{args.run_compile}.csv"
        )
        filepath = os.path.join(BASE_PATH, name)
        return filepath
 class TextToImageBenchmark(BaseBenchmak):
    pipeline_class = AutoPipelineForText2Image
    def __init__(self, args):
        pipe = self.pipeline_class.from_pretrained(args.ckpt, torch_dtype=torch.float16)
        pipe = pipe.to("cuda")
        if args.run_compile:
            if not isinstance(pipe, WuerstchenCombinedPipeline):
                pipe.unet.to(memory_format=torch.channels_last)
                print("Run torch compile")
                pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
                if hasattr(pipe, "movq") and getattr(pipe, "movq", None) is not None:
                    pipe.movq.to(memory_format=torch.channels_last)
                    pipe.movq = torch.compile(pipe.movq, mode="reduce-overhead", fullgraph=True)
            else:
                print("Run torch compile")
                pipe.decoder = torch.compile(pipe.decoder, mode="reduce-overhead", fullgraph=True)
                pipe.vqgan = torch.compile(pipe.vqgan, mode="reduce-overhead", fullgraph=True)
        pipe.set_progress_bar_config(disable=True)
        self.pipe = pipe
    def run_inference(self, pipe, args):
        _ = pipe(
            prompt=PROMPT,
            num_inference_steps=args.num_inference_steps,
            num_images_per_prompt=args.batch_size,
        )
    def benchmark(self, args):
        flush()
        print(f"[INFO] {self.pipe.__class__.__name__}: Running benchmark with: {vars(args)}\n")
        time = benchmark_fn(self.run_inference, self.pipe, args)  # in seconds.
        memory = bytes_to_giga_bytes(torch.cuda.max_memory_allocated())  # in GBs.
        benchmark_info = BenchmarkInfo(time=time, memory=memory)
        pipeline_class_name = str(self.pipe.__class__.__name__)
        flush()
        csv_dict = generate_csv_dict(
            pipeline_cls=pipeline_class_name, ckpt=args.ckpt, args=args, benchmark_info=benchmark_info
        )
        filepath = self.get_result_filepath(args)
        write_to_csv(filepath, csv_dict)
        print(f"Logs written to: {filepath}")
        flush()
 class TurboTextToImageBenchmark(TextToImageBenchmark):
    def __init__(self, args):
        super().__init__(args)
    def run_inference(self, pipe, args):
        _ = pipe(
            prompt=PROMPT,
            num_inference_steps=args.num_inference_steps,
            num_images_per_prompt=args.batch_size,
            guidance_scale=0.0,
        )
 class LCMLoRATextToImageBenchmark(TextToImageBenchmark):
    lora_id = "latent-consistency/lcm-lora-sdxl"
    def __init__(self, args):
        super().__init__(args)
        self.pipe.load_lora_weights(self.lora_id)
        self.pipe.fuse_lora()
        self.pipe.unload_lora_weights()
        self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
    def get_result_filepath(self, args):
        pipeline_class_name = str(self.pipe.__class__.__name__)
        name = (
            self.lora_id.replace("/", "_")
            + "_"
            + pipeline_class_name
            + f"-bs@{args.batch_size}-steps@{args.num_inference_steps}-mco@{args.model_cpu_offload}-compile@{args.run_compile}.csv"
        )
        filepath = os.path.join(BASE_PATH, name)
        return filepath
    def run_inference(self, pipe, args):
        _ = pipe(
            prompt=PROMPT,
            num_inference_steps=args.num_inference_steps,
            num_images_per_prompt=args.batch_size,
            guidance_scale=1.0,
        )
    def benchmark(self, args):
        flush()
        print(f"[INFO] {self.pipe.__class__.__name__}: Running benchmark with: {vars(args)}\n")
        time = benchmark_fn(self.run_inference, self.pipe, args)  # in seconds.
        memory = bytes_to_giga_bytes(torch.cuda.max_memory_allocated())  # in GBs.
        benchmark_info = BenchmarkInfo(time=time, memory=memory)
        pipeline_class_name = str(self.pipe.__class__.__name__)
        flush()
        csv_dict = generate_csv_dict(
            pipeline_cls=pipeline_class_name, ckpt=self.lora_id, args=args, benchmark_info=benchmark_info
        )
        filepath = self.get_result_filepath(args)
        write_to_csv(filepath, csv_dict)
        print(f"Logs written to: {filepath}")
        flush()
 class ImageToImageBenchmark(TextToImageBenchmark):
    pipeline_class = AutoPipelineForImage2Image
    url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/1665_Girl_with_a_Pearl_Earring.jpg"
    image = load_image(url).convert("RGB")
    def __init__(self, args):
        super().__init__(args)
        self.image = self.image.resize(RESOLUTION_MAPPING[args.ckpt])
    def run_inference(self, pipe, args):
        _ = pipe(
            prompt=PROMPT,
            image=self.image,
            num_inference_steps=args.num_inference_steps,
            num_images_per_prompt=args.batch_size,
        )
 class TurboImageToImageBenchmark(ImageToImageBenchmark):
    def __init__(self, args):
        super().__init__(args)
    def run_inference(self, pipe, args):
        _ = pipe(
            prompt=PROMPT,
            image=self.image,
            num_inference_steps=args.num_inference_steps,
            num_images_per_prompt=args.batch_size,
            guidance_scale=0.0,
            strength=0.5,
        )
 class InpaintingBenchmark(ImageToImageBenchmark):
    pipeline_class = AutoPipelineForInpainting
    mask_url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/overture-creations-5sI6fQgYIuo_mask.png"
    mask = load_image(mask_url).convert("RGB")
    def __init__(self, args):
        super().__init__(args)
        self.image = self.image.resize(RESOLUTION_MAPPING[args.ckpt])
        self.mask = self.mask.resize(RESOLUTION_MAPPING[args.ckpt])
    def run_inference(self, pipe, args):
        _ = pipe(
            prompt=PROMPT,
            image=self.image,
            mask_image=self.mask,
            num_inference_steps=args.num_inference_steps,
            num_images_per_prompt=args.batch_size,
        )
 class IPAdapterTextToImageBenchmark(TextToImageBenchmark):
    url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/load_neg_embed.png"
    image = load_image(url)
    def __init__(self, args):
        pipe = self.pipeline_class.from_pretrained(args.ckpt, torch_dtype=torch.float16).to("cuda")
        pipe.load_ip_adapter(
            args.ip_adapter_id[0],
            subfolder="models" if "sdxl" not in args.ip_adapter_id[1] else "sdxl_models",
            weight_name=args.ip_adapter_id[1],
        )
        if args.run_compile:
            pipe.unet.to(memory_format=torch.channels_last)
            print("Run torch compile")
            pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
        pipe.set_progress_bar_config(disable=True)
        self.pipe = pipe
    def run_inference(self, pipe, args):
        _ = pipe(
            prompt=PROMPT,
            ip_adapter_image=self.image,
            num_inference_steps=args.num_inference_steps,
            num_images_per_prompt=args.batch_size,
        )
 class ControlNetBenchmark(TextToImageBenchmark):
    pipeline_class = StableDiffusionControlNetPipeline
    aux_network_class = ControlNetModel
    root_ckpt = "Lykon/DreamShaper"
    url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/canny_image_condition.png"
    image = load_image(url).convert("RGB")
    def __init__(self, args):
        aux_network = self.aux_network_class.from_pretrained(args.ckpt, torch_dtype=torch.float16)
        pipe = self.pipeline_class.from_pretrained(self.root_ckpt, controlnet=aux_network, torch_dtype=torch.float16)
        pipe = pipe.to("cuda")
        pipe.set_progress_bar_config(disable=True)
        self.pipe = pipe
        if args.run_compile:
            pipe.unet.to(memory_format=torch.channels_last)
            pipe.controlnet.to(memory_format=torch.channels_last)
            print("Run torch compile")
            pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
            pipe.controlnet = torch.compile(pipe.controlnet, mode="reduce-overhead", fullgraph=True)
        self.image = self.image.resize(RESOLUTION_MAPPING[args.ckpt])
    def run_inference(self, pipe, args):
        _ = pipe(
            prompt=PROMPT,
            image=self.image,
            num_inference_steps=args.num_inference_steps,
            num_images_per_prompt=args.batch_size,
        )
 class ControlNetSDXLBenchmark(ControlNetBenchmark):
    pipeline_class = StableDiffusionXLControlNetPipeline
    root_ckpt = "stabilityai/stable-diffusion-xl-base-1.0"
    def __init__(self, args):
        super().__init__(args)
 class T2IAdapterBenchmark(ControlNetBenchmark):
    pipeline_class = StableDiffusionAdapterPipeline
    aux_network_class = T2IAdapter
    root_ckpt = "Lykon/DreamShaper"
    url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/canny_for_adapter.png"
    image = load_image(url).convert("L")
    def __init__(self, args):
        aux_network = self.aux_network_class.from_pretrained(args.ckpt, torch_dtype=torch.float16)
        pipe = self.pipeline_class.from_pretrained(self.root_ckpt, adapter=aux_network, torch_dtype=torch.float16)
        pipe = pipe.to("cuda")
        pipe.set_progress_bar_config(disable=True)
        self.pipe = pipe
        if args.run_compile:
            pipe.unet.to(memory_format=torch.channels_last)
            pipe.adapter.to(memory_format=torch.channels_last)
            print("Run torch compile")
            pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
            pipe.adapter = torch.compile(pipe.adapter, mode="reduce-overhead", fullgraph=True)
        self.image = self.image.resize(RESOLUTION_MAPPING[args.ckpt])
 class T2IAdapterSDXLBenchmark(T2IAdapterBenchmark):
    pipeline_class = StableDiffusionXLAdapterPipeline
    root_ckpt = "stabilityai/stable-diffusion-xl-base-1.0"
    url = "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/benchmarking/canny_for_adapter_sdxl.png"
    image = load_image(url)
    def __init__(self, args):
        super().__init__(args)
--- a/benchmarks/benchmark_controlnet.py
+++ b/benchmarks/benchmark_controlnet.py
@@ -0,0 +1,26 @@
 import argparse
 import sys
 sys.path.append(".")
 from base_classes import ControlNetBenchmark, ControlNetSDXLBenchmark  # noqa: E402
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--ckpt",
        type=str,
        default="lllyasviel/sd-controlnet-canny",
        choices=["lllyasviel/sd-controlnet-canny", "diffusers/controlnet-canny-sdxl-1.0"],
    )
    parser.add_argument("--batch_size", type=int, default=1)
    parser.add_argument("--num_inference_steps", type=int, default=50)
    parser.add_argument("--model_cpu_offload", action="store_true")
    parser.add_argument("--run_compile", action="store_true")
    args = parser.parse_args()
    benchmark_pipe = (
        ControlNetBenchmark(args) if args.ckpt == "lllyasviel/sd-controlnet-canny" else ControlNetSDXLBenchmark(args)
    )
    benchmark_pipe.benchmark(args)
--- a/benchmarks/benchmark_ip_adapters.py
+++ b/benchmarks/benchmark_ip_adapters.py
@@ -0,0 +1,33 @@
 import argparse
 import sys
 sys.path.append(".")
 from base_classes import IPAdapterTextToImageBenchmark  # noqa: E402
 IP_ADAPTER_CKPTS = {
    # because original SD v1.5 has been taken down.
    "Lykon/DreamShaper": ("h94/IP-Adapter", "ip-adapter_sd15.bin"),
    "stabilityai/stable-diffusion-xl-base-1.0": ("h94/IP-Adapter", "ip-adapter_sdxl.bin"),
 }
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--ckpt",
        type=str,
        default="rstabilityai/stable-diffusion-xl-base-1.0",
        choices=list(IP_ADAPTER_CKPTS.keys()),
    )
    parser.add_argument("--batch_size", type=int, default=1)
    parser.add_argument("--num_inference_steps", type=int, default=50)
    parser.add_argument("--model_cpu_offload", action="store_true")
    parser.add_argument("--run_compile", action="store_true")
    args = parser.parse_args()
    args.ip_adapter_id = IP_ADAPTER_CKPTS[args.ckpt]
    benchmark_pipe = IPAdapterTextToImageBenchmark(args)
    args.ckpt = f"{args.ckpt} (IP-Adapter)"
    benchmark_pipe.benchmark(args)
--- a/benchmarks/benchmark_sd_img.py
+++ b/benchmarks/benchmark_sd_img.py
@@ -0,0 +1,29 @@
 import argparse
 import sys
 sys.path.append(".")
 from base_classes import ImageToImageBenchmark, TurboImageToImageBenchmark  # noqa: E402
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--ckpt",
        type=str,
        default="Lykon/DreamShaper",
        choices=[
            "Lykon/DreamShaper",
            "stabilityai/stable-diffusion-2-1",
            "stabilityai/stable-diffusion-xl-refiner-1.0",
            "stabilityai/sdxl-turbo",
        ],
    )
    parser.add_argument("--batch_size", type=int, default=1)
    parser.add_argument("--num_inference_steps", type=int, default=50)
    parser.add_argument("--model_cpu_offload", action="store_true")
    parser.add_argument("--run_compile", action="store_true")
    args = parser.parse_args()
    benchmark_pipe = ImageToImageBenchmark(args) if "turbo" not in args.ckpt else TurboImageToImageBenchmark(args)
    benchmark_pipe.benchmark(args)
--- a/benchmarks/benchmark_sd_inpainting.py
+++ b/benchmarks/benchmark_sd_inpainting.py
@@ -0,0 +1,28 @@
 import argparse
 import sys
 sys.path.append(".")
 from base_classes import InpaintingBenchmark  # noqa: E402
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--ckpt",
        type=str,
        default="Lykon/DreamShaper",
        choices=[
            "Lykon/DreamShaper",
            "stabilityai/stable-diffusion-2-1",
            "stabilityai/stable-diffusion-xl-base-1.0",
        ],
    )
    parser.add_argument("--batch_size", type=int, default=1)
    parser.add_argument("--num_inference_steps", type=int, default=50)
    parser.add_argument("--model_cpu_offload", action="store_true")
    parser.add_argument("--run_compile", action="store_true")
    args = parser.parse_args()
    benchmark_pipe = InpaintingBenchmark(args)
    benchmark_pipe.benchmark(args)
--- a/benchmarks/benchmark_t2i_adapter.py
+++ b/benchmarks/benchmark_t2i_adapter.py
@@ -0,0 +1,28 @@
 import argparse
 import sys
 sys.path.append(".")
 from base_classes import T2IAdapterBenchmark, T2IAdapterSDXLBenchmark  # noqa: E402
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--ckpt",
        type=str,
        default="TencentARC/t2iadapter_canny_sd14v1",
        choices=["TencentARC/t2iadapter_canny_sd14v1", "TencentARC/t2i-adapter-canny-sdxl-1.0"],
    )
    parser.add_argument("--batch_size", type=int, default=1)
    parser.add_argument("--num_inference_steps", type=int, default=50)
    parser.add_argument("--model_cpu_offload", action="store_true")
    parser.add_argument("--run_compile", action="store_true")
    args = parser.parse_args()
    benchmark_pipe = (
        T2IAdapterBenchmark(args)
        if args.ckpt == "TencentARC/t2iadapter_canny_sd14v1"
        else T2IAdapterSDXLBenchmark(args)
    )
    benchmark_pipe.benchmark(args)
--- a/benchmarks/benchmark_t2i_lcm_lora.py
+++ b/benchmarks/benchmark_t2i_lcm_lora.py
@@ -0,0 +1,23 @@
 import argparse
 import sys
 sys.path.append(".")
 from base_classes import LCMLoRATextToImageBenchmark  # noqa: E402
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--ckpt",
        type=str,
        default="stabilityai/stable-diffusion-xl-base-1.0",
    )
    parser.add_argument("--batch_size", type=int, default=1)
    parser.add_argument("--num_inference_steps", type=int, default=4)
    parser.add_argument("--model_cpu_offload", action="store_true")
    parser.add_argument("--run_compile", action="store_true")
    args = parser.parse_args()
    benchmark_pipe = LCMLoRATextToImageBenchmark(args)
    benchmark_pipe.benchmark(args)
--- a/benchmarks/benchmark_text_to_image.py
+++ b/benchmarks/benchmark_text_to_image.py
@@ -0,0 +1,40 @@
 import argparse
 import sys
 sys.path.append(".")
 from base_classes import TextToImageBenchmark, TurboTextToImageBenchmark  # noqa: E402
 ALL_T2I_CKPTS = [
    "Lykon/DreamShaper",
    "segmind/SSD-1B",
    "stabilityai/stable-diffusion-xl-base-1.0",
    "kandinsky-community/kandinsky-2-2-decoder",
    "warp-ai/wuerstchen",
    "stabilityai/sdxl-turbo",
 ]
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--ckpt",
        type=str,
        default="Lykon/DreamShaper",
        choices=ALL_T2I_CKPTS,
    )
    parser.add_argument("--batch_size", type=int, default=1)
    parser.add_argument("--num_inference_steps", type=int, default=50)
    parser.add_argument("--model_cpu_offload", action="store_true")
    parser.add_argument("--run_compile", action="store_true")
    args = parser.parse_args()
    benchmark_cls = None
    if "turbo" in args.ckpt:
        benchmark_cls = TurboTextToImageBenchmark
    else:
        benchmark_cls = TextToImageBenchmark
    benchmark_pipe = benchmark_cls(args)
    benchmark_pipe.benchmark(args)
--- a/benchmarks/benchmarking_flux.py
+++ b/benchmarks/benchmarking_flux.py
@@ -1,98 +0,0 @@
 from functools import partial
 import torch
 from benchmarking_utils import BenchmarkMixin, BenchmarkScenario, model_init_fn
 from diffusers import BitsAndBytesConfig, FluxTransformer2DModel
 from diffusers.utils.testing_utils import torch_device
 CKPT_ID = "black-forest-labs/FLUX.1-dev"
 RESULT_FILENAME = "flux.csv"
 def get_input_dict(**device_dtype_kwargs):
    # resolution: 1024x1024
    # maximum sequence length 512
    hidden_states = torch.randn(1, 4096, 64, **device_dtype_kwargs)
    encoder_hidden_states = torch.randn(1, 512, 4096, **device_dtype_kwargs)
    pooled_prompt_embeds = torch.randn(1, 768, **device_dtype_kwargs)
    image_ids = torch.ones(512, 3, **device_dtype_kwargs)
    text_ids = torch.ones(4096, 3, **device_dtype_kwargs)
    timestep = torch.tensor([1.0], **device_dtype_kwargs)
    guidance = torch.tensor([1.0], **device_dtype_kwargs)
    return {
        "hidden_states": hidden_states,
        "encoder_hidden_states": encoder_hidden_states,
        "img_ids": image_ids,
        "txt_ids": text_ids,
        "pooled_projections": pooled_prompt_embeds,
        "timestep": timestep,
        "guidance": guidance,
    }
 if __name__ == "__main__":
    scenarios = [
        BenchmarkScenario(
            name=f"{CKPT_ID}-bf16",
            model_cls=FluxTransformer2DModel,
            model_init_kwargs={
                "pretrained_model_name_or_path": CKPT_ID,
                "torch_dtype": torch.bfloat16,
                "subfolder": "transformer",
            },
            get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
            model_init_fn=model_init_fn,
            compile_kwargs={"fullgraph": True},
        ),
        BenchmarkScenario(
            name=f"{CKPT_ID}-bnb-nf4",
            model_cls=FluxTransformer2DModel,
            model_init_kwargs={
                "pretrained_model_name_or_path": CKPT_ID,
                "torch_dtype": torch.bfloat16,
                "subfolder": "transformer",
                "quantization_config": BitsAndBytesConfig(
                    load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_quant_type="nf4"
                ),
            },
            get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
            model_init_fn=model_init_fn,
        ),
        BenchmarkScenario(
            name=f"{CKPT_ID}-layerwise-upcasting",
            model_cls=FluxTransformer2DModel,
            model_init_kwargs={
                "pretrained_model_name_or_path": CKPT_ID,
                "torch_dtype": torch.bfloat16,
                "subfolder": "transformer",
            },
            get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
            model_init_fn=partial(model_init_fn, layerwise_upcasting=True),
        ),
        BenchmarkScenario(
            name=f"{CKPT_ID}-group-offload-leaf",
            model_cls=FluxTransformer2DModel,
            model_init_kwargs={
                "pretrained_model_name_or_path": CKPT_ID,
                "torch_dtype": torch.bfloat16,
                "subfolder": "transformer",
            },
            get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
            model_init_fn=partial(
                model_init_fn,
                group_offload_kwargs={
                    "onload_device": torch_device,
                    "offload_device": torch.device("cpu"),
                    "offload_type": "leaf_level",
                    "use_stream": True,
                    "non_blocking": True,
                },
            ),
        ),
    ]
    runner = BenchmarkMixin()
    runner.run_bencmarks_and_collate(scenarios, filename=RESULT_FILENAME)
--- a/benchmarks/benchmarking_ltx.py
+++ b/benchmarks/benchmarking_ltx.py
@@ -1,80 +0,0 @@
 from functools import partial
 import torch
 from benchmarking_utils import BenchmarkMixin, BenchmarkScenario, model_init_fn
 from diffusers import LTXVideoTransformer3DModel
 from diffusers.utils.testing_utils import torch_device
 CKPT_ID = "Lightricks/LTX-Video-0.9.7-dev"
 RESULT_FILENAME = "ltx.csv"
 def get_input_dict(**device_dtype_kwargs):
    # 512x704 (161 frames)
    # `max_sequence_length`: 256
    hidden_states = torch.randn(1, 7392, 128, **device_dtype_kwargs)
    encoder_hidden_states = torch.randn(1, 256, 4096, **device_dtype_kwargs)
    encoder_attention_mask = torch.ones(1, 256, **device_dtype_kwargs)
    timestep = torch.tensor([1.0], **device_dtype_kwargs)
    video_coords = torch.randn(1, 3, 7392, **device_dtype_kwargs)
    return {
        "hidden_states": hidden_states,
        "encoder_hidden_states": encoder_hidden_states,
        "encoder_attention_mask": encoder_attention_mask,
        "timestep": timestep,
        "video_coords": video_coords,
    }
 if __name__ == "__main__":
    scenarios = [
        BenchmarkScenario(
            name=f"{CKPT_ID}-bf16",
            model_cls=LTXVideoTransformer3DModel,
            model_init_kwargs={
                "pretrained_model_name_or_path": CKPT_ID,
                "torch_dtype": torch.bfloat16,
                "subfolder": "transformer",
            },
            get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
            model_init_fn=model_init_fn,
            compile_kwargs={"fullgraph": True},
        ),
        BenchmarkScenario(
            name=f"{CKPT_ID}-layerwise-upcasting",
            model_cls=LTXVideoTransformer3DModel,
            model_init_kwargs={
                "pretrained_model_name_or_path": CKPT_ID,
                "torch_dtype": torch.bfloat16,
                "subfolder": "transformer",
            },
            get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
            model_init_fn=partial(model_init_fn, layerwise_upcasting=True),
        ),
        BenchmarkScenario(
            name=f"{CKPT_ID}-group-offload-leaf",
            model_cls=LTXVideoTransformer3DModel,
            model_init_kwargs={
                "pretrained_model_name_or_path": CKPT_ID,
                "torch_dtype": torch.bfloat16,
                "subfolder": "transformer",
            },
            get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
            model_init_fn=partial(
                model_init_fn,
                group_offload_kwargs={
                    "onload_device": torch_device,
                    "offload_device": torch.device("cpu"),
                    "offload_type": "leaf_level",
                    "use_stream": True,
                    "non_blocking": True,
                },
            ),
        ),
    ]
    runner = BenchmarkMixin()
    runner.run_bencmarks_and_collate(scenarios, filename=RESULT_FILENAME)
--- a/benchmarks/benchmarking_sdxl.py
+++ b/benchmarks/benchmarking_sdxl.py
@@ -1,82 +0,0 @@
 from functools import partial
 import torch
 from benchmarking_utils import BenchmarkMixin, BenchmarkScenario, model_init_fn
 from diffusers import UNet2DConditionModel
 from diffusers.utils.testing_utils import torch_device
 CKPT_ID = "stabilityai/stable-diffusion-xl-base-1.0"
 RESULT_FILENAME = "sdxl.csv"
 def get_input_dict(**device_dtype_kwargs):
    # height: 1024
    # width: 1024
    # max_sequence_length: 77
    hidden_states = torch.randn(1, 4, 128, 128, **device_dtype_kwargs)
    encoder_hidden_states = torch.randn(1, 77, 2048, **device_dtype_kwargs)
    timestep = torch.tensor([1.0], **device_dtype_kwargs)
    added_cond_kwargs = {
        "text_embeds": torch.randn(1, 1280, **device_dtype_kwargs),
        "time_ids": torch.ones(1, 6, **device_dtype_kwargs),
    }
    return {
        "sample": hidden_states,
        "encoder_hidden_states": encoder_hidden_states,
        "timestep": timestep,
        "added_cond_kwargs": added_cond_kwargs,
    }
 if __name__ == "__main__":
    scenarios = [
        BenchmarkScenario(
            name=f"{CKPT_ID}-bf16",
            model_cls=UNet2DConditionModel,
            model_init_kwargs={
                "pretrained_model_name_or_path": CKPT_ID,
                "torch_dtype": torch.bfloat16,
                "subfolder": "unet",
            },
            get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
            model_init_fn=model_init_fn,
            compile_kwargs={"fullgraph": True},
        ),
        BenchmarkScenario(
            name=f"{CKPT_ID}-layerwise-upcasting",
            model_cls=UNet2DConditionModel,
            model_init_kwargs={
                "pretrained_model_name_or_path": CKPT_ID,
                "torch_dtype": torch.bfloat16,
                "subfolder": "unet",
            },
            get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
            model_init_fn=partial(model_init_fn, layerwise_upcasting=True),
        ),
        BenchmarkScenario(
            name=f"{CKPT_ID}-group-offload-leaf",
            model_cls=UNet2DConditionModel,
            model_init_kwargs={
                "pretrained_model_name_or_path": CKPT_ID,
                "torch_dtype": torch.bfloat16,
                "subfolder": "unet",
            },
            get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
            model_init_fn=partial(
                model_init_fn,
                group_offload_kwargs={
                    "onload_device": torch_device,
                    "offload_device": torch.device("cpu"),
                    "offload_type": "leaf_level",
                    "use_stream": True,
                    "non_blocking": True,
                },
            ),
        ),
    ]
    runner = BenchmarkMixin()
    runner.run_bencmarks_and_collate(scenarios, filename=RESULT_FILENAME)
--- a/benchmarks/benchmarking_utils.py
+++ b/benchmarks/benchmarking_utils.py
@@ -1,244 +0,0 @@
 import gc
 import inspect
 import logging
 import os
 import queue
 import threading
 from contextlib import nullcontext
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, Optional, Union
 import pandas as pd
 import torch
 import torch.utils.benchmark as benchmark
 from diffusers.models.modeling_utils import ModelMixin
 from diffusers.utils.testing_utils import require_torch_gpu, torch_device
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
 logger = logging.getLogger(__name__)
 NUM_WARMUP_ROUNDS = 5
 def benchmark_fn(f, *args, **kwargs):
    t0 = benchmark.Timer(
        stmt="f(*args, **kwargs)",
        globals={"args": args, "kwargs": kwargs, "f": f},
        num_threads=1,
    )
    return float(f"{(t0.blocked_autorange().mean):.3f}")
 def flush():
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.reset_max_memory_allocated()
    torch.cuda.reset_peak_memory_stats()
 # Adapted from https://github.com/lucasb-eyer/cnn_vit_benchmarks/blob/15b665ff758e8062131353076153905cae00a71f/main.py
 def calculate_flops(model, input_dict):
    try:
        from torchprofile import profile_macs
    except ModuleNotFoundError:
        raise
    # This is a hacky way to convert the kwargs to args as `profile_macs` cries about kwargs.
    sig = inspect.signature(model.forward)
    param_names = [
        p.name
        for p in sig.parameters.values()
        if p.kind
        in (
            inspect.Parameter.POSITIONAL_ONLY,
            inspect.Parameter.POSITIONAL_OR_KEYWORD,
        )
        and p.name != "self"
    ]
    bound = sig.bind_partial(**input_dict)
    bound.apply_defaults()
    args = tuple(bound.arguments[name] for name in param_names)
    model.eval()
    with torch.no_grad():
        macs = profile_macs(model, args)
    flops = 2 * macs  # 1 MAC operation = 2 FLOPs (1 multiplication + 1 addition)
    return flops
 def calculate_params(model):
    return sum(p.numel() for p in model.parameters())
 # Users can define their own in case this doesn't suffice. For most cases,
 # it should be sufficient.
 def model_init_fn(model_cls, group_offload_kwargs=None, layerwise_upcasting=False, **init_kwargs):
    model = model_cls.from_pretrained(**init_kwargs).eval()
    if group_offload_kwargs and isinstance(group_offload_kwargs, dict):
        model.enable_group_offload(**group_offload_kwargs)
    else:
        model.to(torch_device)
    if layerwise_upcasting:
        model.enable_layerwise_casting(
            storage_dtype=torch.float8_e4m3fn, compute_dtype=init_kwargs.get("torch_dtype", torch.bfloat16)
        )
    return model
@dataclass
 class BenchmarkScenario:
    name: str
    model_cls: ModelMixin
    model_init_kwargs: Dict[str, Any]
    model_init_fn: Callable
    get_model_input_dict: Callable
    compile_kwargs: Optional[Dict[str, Any]] = None
@require_torch_gpu
 class BenchmarkMixin:
    def pre_benchmark(self):
        flush()
        torch.compiler.reset()
    def post_benchmark(self, model):
        model.cpu()
        flush()
        torch.compiler.reset()
    @torch.no_grad()
    def run_benchmark(self, scenario: BenchmarkScenario):
        # 0) Basic stats
        logger.info(f"Running scenario: {scenario.name}.")
        try:
            model = model_init_fn(scenario.model_cls, **scenario.model_init_kwargs)
            num_params = round(calculate_params(model) / 1e9, 2)
            try:
                flops = round(calculate_flops(model, input_dict=scenario.get_model_input_dict()) / 1e9, 2)
            except Exception as e:
                logger.info(f"Problem in calculating FLOPs:\n{e}")
                flops = None
            model.cpu()
            del model
        except Exception as e:
            logger.info(f"Error while initializing the model and calculating FLOPs:\n{e}")
            return {}
        self.pre_benchmark()
        # 1) plain stats
        results = {}
        plain = None
        try:
            plain = self._run_phase(
                model_cls=scenario.model_cls,
                init_fn=scenario.model_init_fn,
                init_kwargs=scenario.model_init_kwargs,
                get_input_fn=scenario.get_model_input_dict,
                compile_kwargs=None,
            )
        except Exception as e:
            logger.info(f"Benchmark could not be run with the following error:\n{e}")
            return results
        # 2) compiled stats (if any)
        compiled = {"time": None, "memory": None}
        if scenario.compile_kwargs:
            try:
                compiled = self._run_phase(
                    model_cls=scenario.model_cls,
                    init_fn=scenario.model_init_fn,
                    init_kwargs=scenario.model_init_kwargs,
                    get_input_fn=scenario.get_model_input_dict,
                    compile_kwargs=scenario.compile_kwargs,
                )
            except Exception as e:
                logger.info(f"Compilation benchmark could not be run with the following error\n: {e}")
                if plain is None:
                    return results
        # 3) merge
        result = {
            "scenario": scenario.name,
            "model_cls": scenario.model_cls.__name__,
            "num_params_B": num_params,
            "flops_G": flops,
            "time_plain_s": plain["time"],
            "mem_plain_GB": plain["memory"],
            "time_compile_s": compiled["time"],
            "mem_compile_GB": compiled["memory"],
        }
        if scenario.compile_kwargs:
            result["fullgraph"] = scenario.compile_kwargs.get("fullgraph", False)
            result["mode"] = scenario.compile_kwargs.get("mode", "default")
        else:
            result["fullgraph"], result["mode"] = None, None
        return result
    def run_bencmarks_and_collate(self, scenarios: Union[BenchmarkScenario, list[BenchmarkScenario]], filename: str):
        if not isinstance(scenarios, list):
            scenarios = [scenarios]
        record_queue = queue.Queue()
        stop_signal = object()
        def _writer_thread():
            while True:
                item = record_queue.get()
                if item is stop_signal:
                    break
                df_row = pd.DataFrame([item])
                write_header = not os.path.exists(filename)
                df_row.to_csv(filename, mode="a", header=write_header, index=False)
                record_queue.task_done()
            record_queue.task_done()
        writer = threading.Thread(target=_writer_thread, daemon=True)
        writer.start()
        for s in scenarios:
            try:
                record = self.run_benchmark(s)
                if record:
                    record_queue.put(record)
                else:
                    logger.info(f"Record empty from scenario: {s.name}.")
            except Exception as e:
                logger.info(f"Running scenario ({s.name}) led to error:\n{e}")
        record_queue.put(stop_signal)
        logger.info(f"Results serialized to {filename=}.")
    def _run_phase(
        self,
        *,
        model_cls: ModelMixin,
        init_fn: Callable,
        init_kwargs: Dict[str, Any],
        get_input_fn: Callable,
        compile_kwargs: Optional[Dict[str, Any]],
    ) -> Dict[str, float]:
        # setup
        self.pre_benchmark()
        # init & (optional) compile
        model = init_fn(model_cls, **init_kwargs)
        if compile_kwargs:
            model.compile(**compile_kwargs)
        # build inputs
        inp = get_input_fn()
        # measure
        run_ctx = torch._inductor.utils.fresh_inductor_cache() if compile_kwargs else nullcontext()
        with run_ctx:
            for _ in range(NUM_WARMUP_ROUNDS):
                _ = model(**inp)
            time_s = benchmark_fn(lambda m, d: m(**d), model, inp)
        mem_gb = torch.cuda.max_memory_allocated() / (1024**3)
        mem_gb = round(mem_gb, 2)
        # teardown
        self.post_benchmark(model)
        del model
        return {"time": time_s, "memory": mem_gb}
--- a/benchmarks/benchmarking_wan.py
+++ b/benchmarks/benchmarking_wan.py
@@ -1,74 +0,0 @@
 from functools import partial
 import torch
 from benchmarking_utils import BenchmarkMixin, BenchmarkScenario, model_init_fn
 from diffusers import WanTransformer3DModel
 from diffusers.utils.testing_utils import torch_device
 CKPT_ID = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
 RESULT_FILENAME = "wan.csv"
 def get_input_dict(**device_dtype_kwargs):
    # height: 480
    # width: 832
    # num_frames: 81
    # max_sequence_length: 512
    hidden_states = torch.randn(1, 16, 21, 60, 104, **device_dtype_kwargs)
    encoder_hidden_states = torch.randn(1, 512, 4096, **device_dtype_kwargs)
    timestep = torch.tensor([1.0], **device_dtype_kwargs)
    return {"hidden_states": hidden_states, "encoder_hidden_states": encoder_hidden_states, "timestep": timestep}
 if __name__ == "__main__":
    scenarios = [
        BenchmarkScenario(
            name=f"{CKPT_ID}-bf16",
            model_cls=WanTransformer3DModel,
            model_init_kwargs={
                "pretrained_model_name_or_path": CKPT_ID,
                "torch_dtype": torch.bfloat16,
                "subfolder": "transformer",
            },
            get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
            model_init_fn=model_init_fn,
            compile_kwargs={"fullgraph": True},
        ),
        BenchmarkScenario(
            name=f"{CKPT_ID}-layerwise-upcasting",
            model_cls=WanTransformer3DModel,
            model_init_kwargs={
                "pretrained_model_name_or_path": CKPT_ID,
                "torch_dtype": torch.bfloat16,
                "subfolder": "transformer",
            },
            get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
            model_init_fn=partial(model_init_fn, layerwise_upcasting=True),
        ),
        BenchmarkScenario(
            name=f"{CKPT_ID}-group-offload-leaf",
            model_cls=WanTransformer3DModel,
            model_init_kwargs={
                "pretrained_model_name_or_path": CKPT_ID,
                "torch_dtype": torch.bfloat16,
                "subfolder": "transformer",
            },
            get_model_input_dict=partial(get_input_dict, device=torch_device, dtype=torch.bfloat16),
            model_init_fn=partial(
                model_init_fn,
                group_offload_kwargs={
                    "onload_device": torch_device,
                    "offload_device": torch.device("cpu"),
                    "offload_type": "leaf_level",
                    "use_stream": True,
                    "non_blocking": True,
                },
            ),
        ),
    ]
    runner = BenchmarkMixin()
    runner.run_bencmarks_and_collate(scenarios, filename=RESULT_FILENAME)
--- a/benchmarks/populate_into_db.py
+++ b/benchmarks/populate_into_db.py
@@ -1,166 +0,0 @@
 import argparse
 import os
 import sys
 import gpustat
 import pandas as pd
 import psycopg2
 import psycopg2.extras
 from psycopg2.extensions import register_adapter
 from psycopg2.extras import Json
 register_adapter(dict, Json)
 FINAL_CSV_FILENAME = "collated_results.csv"
 # https://github.com/huggingface/transformers/blob/593e29c5e2a9b17baec010e8dc7c1431fed6e841/benchmark/init_db.sql#L27
 BENCHMARKS_TABLE_NAME = "benchmarks"
 MEASUREMENTS_TABLE_NAME = "model_measurements"
 def _init_benchmark(conn, branch, commit_id, commit_msg):
    gpu_stats = gpustat.GPUStatCollection.new_query()
    metadata = {"gpu_name": gpu_stats[0]["name"]}
    repository = "huggingface/diffusers"
    with conn.cursor() as cur:
        cur.execute(
            f"INSERT INTO {BENCHMARKS_TABLE_NAME} (repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s) RETURNING benchmark_id",
            (repository, branch, commit_id, commit_msg, metadata),
        )
        benchmark_id = cur.fetchone()[0]
        print(f"Initialised benchmark #{benchmark_id}")
        return benchmark_id
 def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "branch",
        type=str,
        help="The branch name on which the benchmarking is performed.",
    )
    parser.add_argument(
        "commit_id",
        type=str,
        help="The commit hash on which the benchmarking is performed.",
    )
    parser.add_argument(
        "commit_msg",
        type=str,
        help="The commit message associated with the commit, truncated to 70 characters.",
    )
    args = parser.parse_args()
    return args
 if __name__ == "__main__":
    args = parse_args()
    try:
        conn = psycopg2.connect(
            host=os.getenv("PGHOST"),
            database=os.getenv("PGDATABASE"),
            user=os.getenv("PGUSER"),
            password=os.getenv("PGPASSWORD"),
        )
        print("DB connection established successfully.")
    except Exception as e:
        print(f"Problem during DB init: {e}")
        sys.exit(1)
    try:
        benchmark_id = _init_benchmark(
            conn=conn,
            branch=args.branch,
            commit_id=args.commit_id,
            commit_msg=args.commit_msg,
        )
    except Exception as e:
        print(f"Problem during initializing benchmark: {e}")
        sys.exit(1)
    cur = conn.cursor()
    df = pd.read_csv(FINAL_CSV_FILENAME)
    # Helper to cast values (or None) given a dtype
    def _cast_value(val, dtype: str):
        if pd.isna(val):
            return None
        if dtype == "text":
            return str(val).strip()
        if dtype == "float":
            try:
                return float(val)
            except ValueError:
                return None
        if dtype == "bool":
            s = str(val).strip().lower()
            if s in ("true", "t", "yes", "1"):
                return True
            if s in ("false", "f", "no", "0"):
                return False
            if val in (1, 1.0):
                return True
            if val in (0, 0.0):
                return False
            return None
        return val
    try:
        rows_to_insert = []
        for _, row in df.iterrows():
            scenario = _cast_value(row.get("scenario"), "text")
            model_cls = _cast_value(row.get("model_cls"), "text")
            num_params_B = _cast_value(row.get("num_params_B"), "float")
            flops_G = _cast_value(row.get("flops_G"), "float")
            time_plain_s = _cast_value(row.get("time_plain_s"), "float")
            mem_plain_GB = _cast_value(row.get("mem_plain_GB"), "float")
            time_compile_s = _cast_value(row.get("time_compile_s"), "float")
            mem_compile_GB = _cast_value(row.get("mem_compile_GB"), "float")
            fullgraph = _cast_value(row.get("fullgraph"), "bool")
            mode = _cast_value(row.get("mode"), "text")
            # If "github_sha" column exists in the CSV, cast it; else default to None
            if "github_sha" in df.columns:
                github_sha = _cast_value(row.get("github_sha"), "text")
            else:
                github_sha = None
            measurements = {
                "scenario": scenario,
                "model_cls": model_cls,
                "num_params_B": num_params_B,
                "flops_G": flops_G,
                "time_plain_s": time_plain_s,
                "mem_plain_GB": mem_plain_GB,
                "time_compile_s": time_compile_s,
                "mem_compile_GB": mem_compile_GB,
                "fullgraph": fullgraph,
                "mode": mode,
                "github_sha": github_sha,
            }
            rows_to_insert.append((benchmark_id, measurements))
        # Batch-insert all rows
        insert_sql = f"""
        INSERT INTO {MEASUREMENTS_TABLE_NAME} (
            benchmark_id,
            measurements
        )
        VALUES (%s, %s);
        """
        psycopg2.extras.execute_batch(cur, insert_sql, rows_to_insert)
        conn.commit()
        cur.close()
        conn.close()
    except Exception as e:
        print(f"Exception: {e}")
        sys.exit(1)
--- a/benchmarks/push_results.py
+++ b/benchmarks/push_results.py
@@ -1,19 +1,19 @@
-import os
+import glob
 import sys
 import pandas as pd
 from huggingface_hub import hf_hub_download, upload_file
 from huggingface_hub.utils import EntryNotFoundError
-REPO_ID = "diffusers/benchmarks"
+sys.path.append(".")
 from utils import BASE_PATH, FINAL_CSV_FILE, GITHUB_SHA, REPO_ID, collate_csv  # noqa: E402
 def has_previous_benchmark() -> str:
    from run_all import FINAL_CSV_FILENAME
    csv_path = None
    try:
-        csv_path = hf_hub_download(repo_id=REPO_ID, repo_type="dataset", filename=FINAL_CSV_FILENAME)
+        csv_path = hf_hub_download(repo_id=REPO_ID, repo_type="dataset", filename=FINAL_CSV_FILE)
    except EntryNotFoundError:
        csv_path = None
    return csv_path
@@ -26,50 +26,46 @@ def filter_float(value):
 def push_to_hf_dataset():
-    from run_all import FINAL_CSV_FILENAME, GITHUB_SHA
+    all_csvs = sorted(glob.glob(f"{BASE_PATH}/*.csv"))
    collate_csv(all_csvs, FINAL_CSV_FILE)
    # If there's an existing benchmark file, we should report the changes.
    csv_path = has_previous_benchmark()
    if csv_path is not None:
-        current_results = pd.read_csv(FINAL_CSV_FILENAME)
+        current_results = pd.read_csv(FINAL_CSV_FILE)
        previous_results = pd.read_csv(csv_path)
        numeric_columns = current_results.select_dtypes(include=["float64", "int64"]).columns
        numeric_columns = [
            c for c in numeric_columns if c not in ["batch_size", "num_inference_steps", "actual_gpu_memory (gbs)"]
        ]
        for column in numeric_columns:
-            # get previous values as floats, aligned to current index
+            previous_results[column] = previous_results[column].map(lambda x: filter_float(x))
            prev_vals = previous_results[column].map(filter_float).reindex(current_results.index)
-            # get current values as floats
+            # Calculate the percentage change
-            curr_vals = current_results[column].astype(float)
+            current_results[column] = current_results[column].astype(float)
            previous_results[column] = previous_results[column].astype(float)
            percent_change = ((current_results[column] - previous_results[column]) / previous_results[column]) * 100
-            # stringify the current values
+            # Format the values with '+' or '-' sign and append to original values
-            curr_str = curr_vals.map(str)
+            current_results[column] = current_results[column].map(str) + percent_change.map(
-
+                lambda x: f" ({'+' if x > 0 else ''}{x:.2f}%)"
            # build an appendage only when prev exists and differs
            append_str = prev_vals.where(prev_vals.notnull() & (prev_vals != curr_vals), other=pd.NA).map(
                lambda x: f" ({x})" if pd.notnull(x) else ""
            )
            # There might be newly added rows. So, filter out the NaNs.
            current_results[column] = current_results[column].map(lambda x: x.replace(" (nan%)", ""))
-            # combine
+        # Overwrite the current result file.
-            current_results[column] = curr_str + append_str
+        current_results.to_csv(FINAL_CSV_FILE, index=False)
        os.remove(FINAL_CSV_FILENAME)
        current_results.to_csv(FINAL_CSV_FILENAME, index=False)
    commit_message = f"upload from sha: {GITHUB_SHA}" if GITHUB_SHA is not None else "upload benchmark results"
    upload_file(
        repo_id=REPO_ID,
-        path_in_repo=FINAL_CSV_FILENAME,
+        path_in_repo=FINAL_CSV_FILE,
-        path_or_fileobj=FINAL_CSV_FILENAME,
+        path_or_fileobj=FINAL_CSV_FILE,
        repo_type="dataset",
        commit_message=commit_message,
    )
    upload_file(
        repo_id="diffusers/benchmark-analyzer",
        path_in_repo=FINAL_CSV_FILENAME,
        path_or_fileobj=FINAL_CSV_FILENAME,
        repo_type="space",
        commit_message=commit_message,
    )
 if __name__ == "__main__":
--- a/benchmarks/requirements.txt
+++ b/benchmarks/requirements.txt
@@ -1,6 +0,0 @@
 pandas 
 psutil
 gpustat
 torchprofile
 bitsandbytes
 psycopg2==2.9.9
--- a/benchmarks/run_all.py
+++ b/benchmarks/run_all.py
@@ -1,84 +1,101 @@
 import glob
 import logging
 import os
 import subprocess
-
+import sys
-import pandas as pd
+from typing import List
-logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
+sys.path.append(".")
-logger = logging.getLogger(__name__)
+from benchmark_text_to_image import ALL_T2I_CKPTS  # noqa: E402
-PATTERN = "benchmarking_*.py"
+
-FINAL_CSV_FILENAME = "collated_results.csv"
+PATTERN = "benchmark_*.py"
 GITHUB_SHA = os.getenv("GITHUB_SHA", None)
 class SubprocessCallException(Exception):
    pass
-def run_command(command: list[str], return_stdout=False):
+# Taken from `test_examples_utils.py`
 def run_command(command: List[str], return_stdout=False):
    """
    Runs `command` with `subprocess.check_output` and will potentially return the `stdout`. Will also properly capture
    if an error occurred while running `command`
    """
    try:
        output = subprocess.check_output(command, stderr=subprocess.STDOUT)
-        if return_stdout and hasattr(output, "decode"):
+        if return_stdout:
-            return output.decode("utf-8")
+            if hasattr(output, "decode"):
                output = output.decode("utf-8")
            return output
    except subprocess.CalledProcessError as e:
-        raise SubprocessCallException(f"Command `{' '.join(command)}` failed with:\n{e.output.decode()}") from e
+        raise SubprocessCallException(
            f"Command `{' '.join(command)}` failed with the following error:\n\n{e.output.decode()}"
        ) from e
-def merge_csvs(final_csv: str = "collated_results.csv"):
+def main():
-    all_csvs = glob.glob("*.csv")
+    python_files = glob.glob(PATTERN)
    all_csvs = [f for f in all_csvs if f != final_csv]
    if not all_csvs:
        logger.info("No result CSVs found to merge.")
        return
    df_list = []
    for f in all_csvs:
        try:
            d = pd.read_csv(f)
        except pd.errors.EmptyDataError:
            # If a file existed but was zero‐bytes or corrupted, skip it
            continue
        df_list.append(d)
    if not df_list:
        logger.info("All result CSVs were empty or invalid; nothing to merge.")
        return
    final_df = pd.concat(df_list, ignore_index=True)
    if GITHUB_SHA is not None:
        final_df["github_sha"] = GITHUB_SHA
    final_df.to_csv(final_csv, index=False)
    logger.info(f"Merged {len(all_csvs)} partial CSVs → {final_csv}.")
 def run_scripts():
    python_files = sorted(glob.glob(PATTERN))
    python_files = [f for f in python_files if f != "benchmarking_utils.py"]
    for file in python_files:
-        script_name = file.split(".py")[0].split("_")[-1]  # example: benchmarking_foo.py -> foo
+        print(f"****** Running file: {file} ******")
        logger.info(f"\n****** Running file: {file} ******")
-        partial_csv = f"{script_name}.csv"
+        # Run with canonical settings.
-        if os.path.exists(partial_csv):
+        if file != "benchmark_text_to_image.py" and file != "benchmark_ip_adapters.py":
-            logger.info(f"Found {partial_csv}. Removing for safer numbers and duplication.")
+            command = f"python {file}"
-            os.remove(partial_csv)
+            run_command(command.split())
-        command = ["python", file]
+            command += " --run_compile"
-        try:
+            run_command(command.split())
            run_command(command)
            logger.info(f"→ {file} finished normally.")
        except SubprocessCallException as e:
            logger.info(f"Error running {file}:\n{e}")
        finally:
            logger.info(f"→ Merging partial CSVs after {file} …")
            merge_csvs(final_csv=FINAL_CSV_FILENAME)
-    logger.info(f"\nAll scripts attempted. Final collated CSV: {FINAL_CSV_FILENAME}")
+    # Run variants.
    for file in python_files:
        # See: https://github.com/pytorch/pytorch/issues/129637
        if file == "benchmark_ip_adapters.py":
            continue
        if file == "benchmark_text_to_image.py":
            for ckpt in ALL_T2I_CKPTS:
                command = f"python {file} --ckpt {ckpt}"
                if "turbo" in ckpt:
                    command += " --num_inference_steps 1"
                run_command(command.split())
                command += " --run_compile"
                run_command(command.split())
        elif file == "benchmark_sd_img.py":
            for ckpt in ["stabilityai/stable-diffusion-xl-refiner-1.0", "stabilityai/sdxl-turbo"]:
                command = f"python {file} --ckpt {ckpt}"
                if ckpt == "stabilityai/sdxl-turbo":
                    command += " --num_inference_steps 2"
                run_command(command.split())
                command += " --run_compile"
                run_command(command.split())
        elif file in ["benchmark_sd_inpainting.py", "benchmark_ip_adapters.py"]:
            sdxl_ckpt = "stabilityai/stable-diffusion-xl-base-1.0"
            command = f"python {file} --ckpt {sdxl_ckpt}"
            run_command(command.split())
            command += " --run_compile"
            run_command(command.split())
        elif file in ["benchmark_controlnet.py", "benchmark_t2i_adapter.py"]:
            sdxl_ckpt = (
                "diffusers/controlnet-canny-sdxl-1.0"
                if "controlnet" in file
                else "TencentARC/t2i-adapter-canny-sdxl-1.0"
            )
            command = f"python {file} --ckpt {sdxl_ckpt}"
            run_command(command.split())
            command += " --run_compile"
            run_command(command.split())
 if __name__ == "__main__":
-    run_scripts()
+    main()
--- a/benchmarks/utils.py
+++ b/benchmarks/utils.py
@@ -0,0 +1,98 @@
 import argparse
 import csv
 import gc
 import os
 from dataclasses import dataclass
 from typing import Dict, List, Union
 import torch
 import torch.utils.benchmark as benchmark
 GITHUB_SHA = os.getenv("GITHUB_SHA", None)
 BENCHMARK_FIELDS = [
    "pipeline_cls",
    "ckpt_id",
    "batch_size",
    "num_inference_steps",
    "model_cpu_offload",
    "run_compile",
    "time (secs)",
    "memory (gbs)",
    "actual_gpu_memory (gbs)",
    "github_sha",
 ]
 PROMPT = "ghibli style, a fantasy landscape with castles"
 BASE_PATH = os.getenv("BASE_PATH", ".")
 TOTAL_GPU_MEMORY = float(os.getenv("TOTAL_GPU_MEMORY", torch.cuda.get_device_properties(0).total_memory / (1024**3)))
 REPO_ID = "diffusers/benchmarks"
 FINAL_CSV_FILE = "collated_results.csv"
@dataclass
 class BenchmarkInfo:
    time: float
    memory: float
 def flush():
    """Wipes off memory."""
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.reset_max_memory_allocated()
    torch.cuda.reset_peak_memory_stats()
 def bytes_to_giga_bytes(bytes):
    return f"{(bytes / 1024 / 1024 / 1024):.3f}"
 def benchmark_fn(f, *args, **kwargs):
    t0 = benchmark.Timer(
        stmt="f(*args, **kwargs)",
        globals={"args": args, "kwargs": kwargs, "f": f},
        num_threads=torch.get_num_threads(),
    )
    return f"{(t0.blocked_autorange().mean):.3f}"
 def generate_csv_dict(
    pipeline_cls: str, ckpt: str, args: argparse.Namespace, benchmark_info: BenchmarkInfo
 ) -> Dict[str, Union[str, bool, float]]:
    """Packs benchmarking data into a dictionary for latter serialization."""
    data_dict = {
        "pipeline_cls": pipeline_cls,
        "ckpt_id": ckpt,
        "batch_size": args.batch_size,
        "num_inference_steps": args.num_inference_steps,
        "model_cpu_offload": args.model_cpu_offload,
        "run_compile": args.run_compile,
        "time (secs)": benchmark_info.time,
        "memory (gbs)": benchmark_info.memory,
        "actual_gpu_memory (gbs)": f"{(TOTAL_GPU_MEMORY):.3f}",
        "github_sha": GITHUB_SHA,
    }
    return data_dict
 def write_to_csv(file_name: str, data_dict: Dict[str, Union[str, bool, float]]):
    """Serializes a dictionary into a CSV file."""
    with open(file_name, mode="w", newline="") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=BENCHMARK_FIELDS)
        writer.writeheader()
        writer.writerow(data_dict)
 def collate_csv(input_files: List[str], output_file: str):
    """Collates multiple identically structured CSVs into a single CSV file."""
    with open(output_file, mode="w", newline="") as outfile:
        writer = csv.DictWriter(outfile, fieldnames=BENCHMARK_FIELDS)
        writer.writeheader()
        for file in input_files:
            with open(file, mode="r") as infile:
                reader = csv.DictReader(infile)
                for row in reader:
                    writer.writerow(row)
--- a/docker/diffusers-doc-builder/Dockerfile
+++ b/docker/diffusers-doc-builder/Dockerfile
@@ -1,45 +1,52 @@
-FROM python:3.10-slim
+FROM ubuntu:20.04
 ENV PYTHONDONTWRITEBYTECODE=1
 LABEL maintainer="Hugging Face"
 LABEL repository="diffusers"
 ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get -y update && apt-get install -y bash \
+RUN apt-get -y update \
-    build-essential \
+    && apt-get install -y software-properties-common \
-    git \
+    && add-apt-repository ppa:deadsnakes/ppa
    git-lfs \
    curl \
    ca-certificates \
    libglib2.0-0 \
    libsndfile1-dev \
    libgl1 \
    zip \
    wget
-ENV UV_PYTHON=/usr/local/bin/python
+RUN apt install -y bash \
                   build-essential \
                   git \
                   git-lfs \
                   curl \
                   ca-certificates \
                   libsndfile1-dev \
                   python3.10 \
                   python3-pip \
                   libgl1 \
                   zip \
                   wget \
                   python3.10-venv && \
    rm -rf /var/lib/apt/lists
 # make sure to use venv
 RUN python3.10 -m venv /opt/venv
 ENV PATH="/opt/venv/bin:$PATH"
 # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
-RUN pip install uv
+RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
-RUN uv pip install --no-cache-dir \
+    python3.10 -m uv pip install --no-cache-dir \
-    torch \
+        torch \
-    torchvision \
+        torchvision \
-    torchaudio \
+        torchaudio \
-    --extra-index-url https://download.pytorch.org/whl/cpu
+        invisible_watermark \
-
+        --extra-index-url https://download.pytorch.org/whl/cpu && \
-RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]"
+    python3.10 -m uv pip install --no-cache-dir \
-
+        accelerate \
-# Extra dependencies
+        datasets \
-RUN uv pip install --no-cache-dir \
+        hf-doc-builder \
-    accelerate \
+        huggingface-hub \
-    numpy==1.26.4 \
+        Jinja2 \
-    hf_xet \
+        librosa \
-    setuptools==69.5.1 \
+        numpy==1.26.4 \
-    bitsandbytes \
+        scipy \
-    torchao \
+        tensorboard \
-    gguf \
+        transformers \
-    optimum-quanto
+        matplotlib \
-
+        setuptools==69.5.1
 RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
 CMD ["/bin/bash"]
--- a/docker/diffusers-flax-cpu/Dockerfile
+++ b/docker/diffusers-flax-cpu/Dockerfile
@@ -0,0 +1,49 @@
 FROM ubuntu:20.04
 LABEL maintainer="Hugging Face"
 LABEL repository="diffusers"
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get -y update \
    && apt-get install -y software-properties-common \
    && add-apt-repository ppa:deadsnakes/ppa
 RUN apt install -y bash \
        build-essential \
        git \
        git-lfs \
        curl \
        ca-certificates \
        libsndfile1-dev \
        libgl1 \
        python3.10 \
        python3-pip \
        python3.10-venv && \
    rm -rf /var/lib/apt/lists
 # make sure to use venv
 RUN python3.10 -m venv /opt/venv
 ENV PATH="/opt/venv/bin:$PATH"
 # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
 # follow the instructions here: https://cloud.google.com/tpu/docs/run-in-container#train_a_jax_model_in_a_docker_container
 RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
    python3 -m uv pip install --upgrade --no-cache-dir \
        clu \
        "jax[cpu]>=0.2.16,!=0.3.2" \
        "flax>=0.4.1" \
        "jaxlib>=0.1.65" && \
    python3 -m uv pip install --no-cache-dir \
        accelerate \
        datasets \
        hf-doc-builder \
        huggingface-hub \
        Jinja2 \
        librosa \
        numpy==1.26.4 \
        scipy \
        tensorboard \
        transformers \
        hf_transfer
 CMD ["/bin/bash"]
--- a/docker/diffusers-flax-tpu/Dockerfile
+++ b/docker/diffusers-flax-tpu/Dockerfile
@@ -0,0 +1,51 @@
 FROM ubuntu:20.04
 LABEL maintainer="Hugging Face"
 LABEL repository="diffusers"
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get -y update \
    && apt-get install -y software-properties-common \
    && add-apt-repository ppa:deadsnakes/ppa
 RUN apt install -y bash \
                   build-essential \
                   git \
                   git-lfs \
                   curl \
                   ca-certificates \
                   libsndfile1-dev \
                   libgl1 \
                   python3.10 \
                   python3-pip \
                   python3.10-venv && \
    rm -rf /var/lib/apt/lists
 # make sure to use venv
 RUN python3.10 -m venv /opt/venv
 ENV PATH="/opt/venv/bin:$PATH"
 # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
 # follow the instructions here: https://cloud.google.com/tpu/docs/run-in-container#train_a_jax_model_in_a_docker_container
 RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
    python3 -m pip install --no-cache-dir \
        "jax[tpu]>=0.2.16,!=0.3.2" \
        -f https://storage.googleapis.com/jax-releases/libtpu_releases.html && \
    python3 -m uv pip install --upgrade --no-cache-dir \
        clu \
        "flax>=0.4.1" \
        "jaxlib>=0.1.65" && \
    python3 -m uv pip install --no-cache-dir \
        accelerate \
        datasets \
        hf-doc-builder \
        huggingface-hub \
        Jinja2 \
        librosa \
        numpy==1.26.4 \
        scipy \
        tensorboard \
        transformers \
        hf_transfer
 CMD ["/bin/bash"]
--- a/docker/diffusers-onnxruntime-cpu/Dockerfile
+++ b/docker/diffusers-onnxruntime-cpu/Dockerfile
@@ -28,9 +28,9 @@ ENV PATH="/opt/venv/bin:$PATH"
 # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
 RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
    python3 -m uv pip install --no-cache-dir \
-        torch \
+        torch==2.1.2 \
-        torchvision \
+        torchvision==0.16.2 \
-        torchaudio\
+        torchaudio==2.1.2 \
        onnxruntime \
        --extra-index-url https://download.pytorch.org/whl/cpu && \
    python3 -m uv pip install --no-cache-dir \
@@ -44,6 +44,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
        scipy \
        tensorboard \
        transformers \
-        hf_xet
+        hf_transfer
 CMD ["/bin/bash"]
--- a/docker/diffusers-onnxruntime-cuda/Dockerfile
+++ b/docker/diffusers-onnxruntime-cuda/Dockerfile
@@ -38,12 +38,13 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
        datasets \
        hf-doc-builder \
        huggingface-hub \
-        hf_xet \
+        hf_transfer \
        Jinja2 \
        librosa \
        numpy==1.26.4 \
        scipy \
        tensorboard \
-        transformers
+        transformers \
        hf_transfer
 CMD ["/bin/bash"]
--- a/docker/diffusers-pytorch-compile-cuda/Dockerfile
+++ b/docker/diffusers-pytorch-compile-cuda/Dockerfile
@@ -0,0 +1,50 @@
 FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
 LABEL maintainer="Hugging Face"
 LABEL repository="diffusers"
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get -y update \
    && apt-get install -y software-properties-common \
    && add-apt-repository ppa:deadsnakes/ppa
 RUN apt install -y bash \
    build-essential \
    git \
    git-lfs \
    curl \
    ca-certificates \
    libsndfile1-dev \
    libgl1 \
    python3.10 \
    python3.10-dev \
    python3-pip \
    python3.10-venv && \
    rm -rf /var/lib/apt/lists
 # make sure to use venv
 RUN python3.10 -m venv /opt/venv
 ENV PATH="/opt/venv/bin:$PATH"
 # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
 RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
    python3.10 -m uv pip install --no-cache-dir \
    torch \
    torchvision \
    torchaudio \
    invisible_watermark && \
    python3.10 -m pip install --no-cache-dir \
    accelerate \
    datasets \
    hf-doc-builder \
    huggingface-hub \
    hf_transfer \
    Jinja2 \
    librosa \
    numpy==1.26.4 \
    scipy \
    tensorboard \
    transformers \
    hf_transfer
 CMD ["/bin/bash"]
--- a/docker/diffusers-pytorch-cpu/Dockerfile
+++ b/docker/diffusers-pytorch-cpu/Dockerfile
@@ -1,38 +1,50 @@
-FROM python:3.10-slim
+FROM ubuntu:20.04
 ENV PYTHONDONTWRITEBYTECODE=1
 LABEL maintainer="Hugging Face"
 LABEL repository="diffusers"
 ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get -y update && apt-get install -y bash \
+RUN apt-get -y update \
-    build-essential \
+    && apt-get install -y software-properties-common \
-    git \
+    && add-apt-repository ppa:deadsnakes/ppa
    git-lfs \
    curl \
    ca-certificates \
    libglib2.0-0 \
    libsndfile1-dev \
    libgl1
-ENV UV_PYTHON=/usr/local/bin/python
+RUN apt install -y bash \
                   build-essential \
                   git \
                   git-lfs \
                   curl \
                   ca-certificates \
                   libsndfile1-dev \
                   python3.10 \
                   python3.10-dev \
                   python3-pip \
                   libgl1 \
                   python3.10-venv && \
    rm -rf /var/lib/apt/lists
 # make sure to use venv
 RUN python3.10 -m venv /opt/venv
 ENV PATH="/opt/venv/bin:$PATH"
 # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
-RUN pip install uv
+RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
-RUN uv pip install --no-cache-dir \
+    python3.10 -m uv pip install --no-cache-dir \
-    torch \
+        torch \
-    torchvision \
+        torchvision \
-    torchaudio \
+        torchaudio \
-    --extra-index-url https://download.pytorch.org/whl/cpu
+        invisible_watermark \
-
+        --extra-index-url https://download.pytorch.org/whl/cpu && \
-RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]"
+    python3.10 -m uv pip install --no-cache-dir \
-
+        accelerate \
-# Extra dependencies
+        datasets \
-RUN uv pip install --no-cache-dir \
+        hf-doc-builder \
-    accelerate \
+        huggingface-hub \
-    numpy==1.26.4 \
+        Jinja2 \
-    hf_xet
+        librosa \
-
+        numpy==1.26.4 \
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
+        scipy \
        tensorboard \
        transformers matplotlib  \
        hf_transfer
 CMD ["/bin/bash"]
--- a/docker/diffusers-pytorch-cuda/Dockerfile
+++ b/docker/diffusers-pytorch-cuda/Dockerfile
@@ -2,13 +2,11 @@ FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
 LABEL maintainer="Hugging Face"
 LABEL repository="diffusers"
 ARG PYTHON_VERSION=3.12
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get -y update \
    && apt-get install -y software-properties-common \
-    && add-apt-repository ppa:deadsnakes/ppa && \
+    && add-apt-repository ppa:deadsnakes/ppa
    apt-get update
 RUN apt install -y bash \
    build-essential \
@@ -16,34 +14,38 @@ RUN apt install -y bash \
    git-lfs \
    curl \
    ca-certificates \
    libglib2.0-0 \
    libsndfile1-dev \
    libgl1 \
-    python3 \
+    python3.10 \
    python3.10-dev \
    python3-pip \
-    && apt-get clean \
+    python3.10-venv && \
-    && rm -rf /var/lib/apt/lists/*
+    rm -rf /var/lib/apt/lists
-RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+# make sure to use venv
-ENV PATH="/root/.local/bin:$PATH"
+RUN python3.10 -m venv /opt/venv
-ENV VIRTUAL_ENV="/opt/venv"
+ENV PATH="/opt/venv/bin:$PATH"
 ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
 RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
-RUN uv pip install --no-cache-dir \
+RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
    python3.10 -m uv pip install --no-cache-dir \
    torch \
    torchvision \
-    torchaudio
+    torchaudio \
-
+    invisible_watermark && \
-RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]"
+    python3.10 -m pip install --no-cache-dir \
 # Extra dependencies
 RUN uv pip install --no-cache-dir \
    accelerate \
    datasets \
    hf-doc-builder \
    huggingface-hub \
    hf_transfer \
    Jinja2 \
    librosa \
    numpy==1.26.4 \
-    pytorch-lightning \
+    scipy \
-    hf_xet
+    tensorboard \
    transformers \
    pytorch-lightning  \
    hf_transfer
 CMD ["/bin/bash"]
--- a/docker/diffusers-pytorch-minimum-cuda/Dockerfile
+++ b/docker/diffusers-pytorch-minimum-cuda/Dockerfile
@@ -1,52 +0,0 @@
 FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
 LABEL maintainer="Hugging Face"
 LABEL repository="diffusers"
 ARG PYTHON_VERSION=3.10
 ENV DEBIAN_FRONTEND=noninteractive
 ENV MINIMUM_SUPPORTED_TORCH_VERSION="2.1.0"
 ENV MINIMUM_SUPPORTED_TORCHVISION_VERSION="0.16.0"
 ENV MINIMUM_SUPPORTED_TORCHAUDIO_VERSION="2.1.0"
 RUN apt-get -y update \
    && apt-get install -y software-properties-common \
    && add-apt-repository ppa:deadsnakes/ppa && \
    apt-get update
 RUN apt install -y bash \
    build-essential \
    git \
    git-lfs \
    curl \
    ca-certificates \
    libglib2.0-0 \
    libsndfile1-dev \
    libgl1 \
    python3 \
    python3-pip \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*
 RUN curl -LsSf https://astral.sh/uv/install.sh | sh
 ENV PATH="/root/.local/bin:$PATH"
 ENV VIRTUAL_ENV="/opt/venv"
 ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
 RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
 RUN uv pip install --no-cache-dir \
    torch==$MINIMUM_SUPPORTED_TORCH_VERSION \
    torchvision==$MINIMUM_SUPPORTED_TORCHVISION_VERSION \
    torchaudio==$MINIMUM_SUPPORTED_TORCHAUDIO_VERSION
 RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]"
 # Extra dependencies
 RUN uv pip install --no-cache-dir \
    accelerate \
    numpy==1.26.4 \
    pytorch-lightning \
    hf_xet
 CMD ["/bin/bash"]
--- a/docker/diffusers-pytorch-xformers-cuda/Dockerfile
+++ b/docker/diffusers-pytorch-xformers-cuda/Dockerfile
@@ -2,49 +2,50 @@ FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
 LABEL maintainer="Hugging Face"
 LABEL repository="diffusers"
 ARG PYTHON_VERSION=3.12
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get -y update \
    && apt-get install -y software-properties-common \
-    && add-apt-repository ppa:deadsnakes/ppa && \
+    && add-apt-repository ppa:deadsnakes/ppa
    apt-get update
 RUN apt install -y bash \
-    build-essential \
+                   build-essential \
-    git \
+                   git \
-    git-lfs \
+                   git-lfs \
-    curl \
+                   curl \
-    ca-certificates \
+                   ca-certificates \
-    libglib2.0-0 \
+                   libsndfile1-dev \
-    libsndfile1-dev \
+                   libgl1 \
-    libgl1 \
+                   python3.10 \
-    python3 \
+                   python3.10-dev \
-    python3-pip \
+                   python3-pip \
-    && apt-get clean \
+                   python3.10-venv && \
-    && rm -rf /var/lib/apt/lists/*
+    rm -rf /var/lib/apt/lists
-RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+# make sure to use venv
-ENV PATH="/root/.local/bin:$PATH"
+RUN python3.10 -m venv /opt/venv
-ENV VIRTUAL_ENV="/opt/venv"
+ENV PATH="/opt/venv/bin:$PATH"
 ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
 RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
-RUN uv pip install --no-cache-dir \
+RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
-    torch \
+    python3.10 -m pip install --no-cache-dir \
-    torchvision \
+        torch \
-    torchaudio
+        torchvision \
-
+        torchaudio \
-RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/diffusers.git@main#egg=diffusers[test]"
+        invisible_watermark && \
-
+    python3.10 -m uv pip install --no-cache-dir \
-# Extra dependencies
+        accelerate \
-RUN uv pip install --no-cache-dir \
+        datasets \
-    accelerate \
+        hf-doc-builder \
-    numpy==1.26.4 \
+        huggingface-hub \
-    pytorch-lightning \
+        hf_transfer \
-    hf_xet \
+        Jinja2 \
-    xformers
+        librosa \
        numpy==1.26.4 \
        scipy \
        tensorboard \
        transformers \
        xformers  \
        hf_transfer
 CMD ["/bin/bash"]
--- a/docs/TRANSLATING.md
+++ b/docs/TRANSLATING.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
--- a/docs/source/en/advanced_inference/outpaint.md
+++ b/docs/source/en/advanced_inference/outpaint.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
--- a/docs/source/en/api/activations.md
+++ b/docs/source/en/api/activations.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -25,16 +25,3 @@ Customized activation functions for supporting various models in 🤗 Diffusers.
 ## ApproximateGELU
 [[autodoc]] models.activations.ApproximateGELU
 ## SwiGLU
 [[autodoc]] models.activations.SwiGLU
 ## FP32SiLU
 [[autodoc]] models.activations.FP32SiLU
 ## LinearActivation
 [[autodoc]] models.activations.LinearActivation
--- a/docs/source/en/api/attnprocessor.md
+++ b/docs/source/en/api/attnprocessor.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -15,152 +15,40 @@ specific language governing permissions and limitations under the License.
 An attention processor is a class for applying different types of attention mechanisms.
 ## AttnProcessor
 [[autodoc]] models.attention_processor.AttnProcessor
 ## AttnProcessor2_0
 [[autodoc]] models.attention_processor.AttnProcessor2_0
 ## AttnAddedKVProcessor
 [[autodoc]] models.attention_processor.AttnAddedKVProcessor
 ## AttnAddedKVProcessor2_0
 [[autodoc]] models.attention_processor.AttnAddedKVProcessor2_0
 [[autodoc]] models.attention_processor.AttnProcessorNPU
 [[autodoc]] models.attention_processor.FusedAttnProcessor2_0
 ## Allegro
 [[autodoc]] models.attention_processor.AllegroAttnProcessor2_0
 ## AuraFlow
 [[autodoc]] models.attention_processor.AuraFlowAttnProcessor2_0
 [[autodoc]] models.attention_processor.FusedAuraFlowAttnProcessor2_0
 ## CogVideoX
 [[autodoc]] models.attention_processor.CogVideoXAttnProcessor2_0
 [[autodoc]] models.attention_processor.FusedCogVideoXAttnProcessor2_0
 ## CrossFrameAttnProcessor
 [[autodoc]] pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.CrossFrameAttnProcessor
-## Custom Diffusion
+## CustomDiffusionAttnProcessor
 [[autodoc]] models.attention_processor.CustomDiffusionAttnProcessor
 ## CustomDiffusionAttnProcessor2_0
 [[autodoc]] models.attention_processor.CustomDiffusionAttnProcessor2_0
 ## CustomDiffusionXFormersAttnProcessor
 [[autodoc]] models.attention_processor.CustomDiffusionXFormersAttnProcessor
-## Flux
+## FusedAttnProcessor2_0
-
+[[autodoc]] models.attention_processor.FusedAttnProcessor2_0
 [[autodoc]] models.attention_processor.FluxAttnProcessor2_0
 [[autodoc]] models.attention_processor.FusedFluxAttnProcessor2_0
 [[autodoc]] models.attention_processor.FluxSingleAttnProcessor2_0
 ## Hunyuan
 [[autodoc]] models.attention_processor.HunyuanAttnProcessor2_0
 [[autodoc]] models.attention_processor.FusedHunyuanAttnProcessor2_0
 [[autodoc]] models.attention_processor.PAGHunyuanAttnProcessor2_0
 [[autodoc]] models.attention_processor.PAGCFGHunyuanAttnProcessor2_0
 ## IdentitySelfAttnProcessor2_0
 [[autodoc]] models.attention_processor.PAGIdentitySelfAttnProcessor2_0
 [[autodoc]] models.attention_processor.PAGCFGIdentitySelfAttnProcessor2_0
 ## IP-Adapter
 [[autodoc]] models.attention_processor.IPAdapterAttnProcessor
 [[autodoc]] models.attention_processor.IPAdapterAttnProcessor2_0
 [[autodoc]] models.attention_processor.SD3IPAdapterJointAttnProcessor2_0
 ## JointAttnProcessor2_0
 [[autodoc]] models.attention_processor.JointAttnProcessor2_0
 [[autodoc]] models.attention_processor.PAGJointAttnProcessor2_0
 [[autodoc]] models.attention_processor.PAGCFGJointAttnProcessor2_0
 [[autodoc]] models.attention_processor.FusedJointAttnProcessor2_0
 ## LoRA
 [[autodoc]] models.attention_processor.LoRAAttnProcessor
 [[autodoc]] models.attention_processor.LoRAAttnProcessor2_0
 [[autodoc]] models.attention_processor.LoRAAttnAddedKVProcessor
 [[autodoc]] models.attention_processor.LoRAXFormersAttnProcessor
 ## Lumina-T2X
 [[autodoc]] models.attention_processor.LuminaAttnProcessor2_0
 ## Mochi
 [[autodoc]] models.attention_processor.MochiAttnProcessor2_0
 [[autodoc]] models.attention_processor.MochiVaeAttnProcessor2_0
 ## Sana
 [[autodoc]] models.attention_processor.SanaLinearAttnProcessor2_0
 [[autodoc]] models.attention_processor.SanaMultiscaleAttnProcessor2_0
 [[autodoc]] models.attention_processor.PAGCFGSanaLinearAttnProcessor2_0
 [[autodoc]] models.attention_processor.PAGIdentitySanaLinearAttnProcessor2_0
 ## Stable Audio
 [[autodoc]] models.attention_processor.StableAudioAttnProcessor2_0
 ## SlicedAttnProcessor
 [[autodoc]] models.attention_processor.SlicedAttnProcessor
 ## SlicedAttnAddedKVProcessor
 [[autodoc]] models.attention_processor.SlicedAttnAddedKVProcessor
 ## XFormersAttnProcessor
 [[autodoc]] models.attention_processor.XFormersAttnProcessor
-[[autodoc]] models.attention_processor.XFormersAttnAddedKVProcessor
+## AttnProcessorNPU
-
+[[autodoc]] models.attention_processor.AttnProcessorNPU
 ## XLAFlashAttnProcessor2_0
 [[autodoc]] models.attention_processor.XLAFlashAttnProcessor2_0
 ## XFormersJointAttnProcessor
 [[autodoc]] models.attention_processor.XFormersJointAttnProcessor
 ## IPAdapterXFormersAttnProcessor
 [[autodoc]] models.attention_processor.IPAdapterXFormersAttnProcessor
 ## FluxIPAdapterJointAttnProcessor2_0
 [[autodoc]] models.attention_processor.FluxIPAdapterJointAttnProcessor2_0
 ## XLAFluxFlashAttnProcessor2_0
 [[autodoc]] models.attention_processor.XLAFluxFlashAttnProcessor2_0
--- a/docs/source/en/api/cache.md
+++ b/docs/source/en/api/cache.md
@@ -1,42 +0,0 @@
 <!-- Copyright 2025 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 specific language governing permissions and limitations under the License. -->
 # Caching methods
 Cache methods speedup diffusion transformers by storing and reusing intermediate outputs of specific layers, such as attention and feedforward layers, instead of recalculating them at each inference step.
 ## CacheMixin
 [[autodoc]] CacheMixin
 ## PyramidAttentionBroadcastConfig
 [[autodoc]] PyramidAttentionBroadcastConfig
 [[autodoc]] apply_pyramid_attention_broadcast
 ## FasterCacheConfig
 [[autodoc]] FasterCacheConfig
 [[autodoc]] apply_faster_cache
 ### FirstBlockCacheConfig
 [[autodoc]] FirstBlockCacheConfig
 [[autodoc]] apply_first_block_cache
 ### TaylorSeerCacheConfig
 [[autodoc]] TaylorSeerCacheConfig
 [[autodoc]] apply_taylorseer_cache
--- a/docs/source/en/api/configuration.md
+++ b/docs/source/en/api/configuration.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -14,8 +14,11 @@ specific language governing permissions and limitations under the License.
 Schedulers from [`~schedulers.scheduling_utils.SchedulerMixin`] and models from [`ModelMixin`] inherit from [`ConfigMixin`] which stores all the parameters that are passed to their respective `__init__` methods in a JSON-configuration file.
-> [!TIP]
+<Tip>
-> To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf auth login`.
+
 To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `huggingface-cli login`.
 </Tip>
 ## ConfigMixin
--- a/docs/source/en/api/image_processor.md
+++ b/docs/source/en/api/image_processor.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -20,12 +20,6 @@ All pipelines with [`VaeImageProcessor`] accept PIL Image, PyTorch tensor, or Nu
 [[autodoc]] image_processor.VaeImageProcessor
 ## InpaintProcessor
 The [`InpaintProcessor`] accepts `mask` and `image` inputs and process them together. Optionally, it can accept padding_mask_crop and apply mask overlay.
 [[autodoc]] image_processor.InpaintProcessor
 ## VaeImageProcessorLDM3D
 The [`VaeImageProcessorLDM3D`] accepts RGB and depth inputs and returns RGB and depth outputs.
--- a/docs/source/en/api/internal_classes_overview.md
+++ b/docs/source/en/api/internal_classes_overview.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
--- a/docs/source/en/api/loaders/ip_adapter.md
+++ b/docs/source/en/api/loaders/ip_adapter.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -14,19 +14,16 @@ specific language governing permissions and limitations under the License.
 [IP-Adapter](https://hf.co/papers/2308.06721) is a lightweight adapter that enables prompting a diffusion model with an image. This method decouples the cross-attention layers of the image and text features. The image features are generated from an image encoder.
-> [!TIP]
+<Tip>
-> Learn how to load and use an IP-Adapter checkpoint and image in the [IP-Adapter](../../using-diffusers/ip_adapter) guide,.
+
 Learn how to load an IP-Adapter checkpoint and image in the IP-Adapter [loading](../../using-diffusers/loading_adapters#ip-adapter) guide, and you can see how to use it in the [usage](../../using-diffusers/ip_adapter) guide.
 </Tip>
 ## IPAdapterMixin
 [[autodoc]] loaders.ip_adapter.IPAdapterMixin
 ## SD3IPAdapterMixin
 [[autodoc]] loaders.ip_adapter.SD3IPAdapterMixin
    - all
    - is_ip_adapter_active
 ## IPAdapterMaskProcessor
 [[autodoc]] image_processor.IPAdapterMaskProcessor
--- a/docs/source/en/api/loaders/lora.md
+++ b/docs/source/en/api/loaders/lora.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -20,27 +20,14 @@ LoRA is a fast and lightweight training method that inserts and trains a signifi
 - [`FluxLoraLoaderMixin`] provides similar functions for [Flux](https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux).
 - [`CogVideoXLoraLoaderMixin`] provides similar functions for [CogVideoX](https://huggingface.co/docs/diffusers/main/en/api/pipelines/cogvideox).
 - [`Mochi1LoraLoaderMixin`] provides similar functions for [Mochi](https://huggingface.co/docs/diffusers/main/en/api/pipelines/mochi).
 - [`AuraFlowLoraLoaderMixin`] provides similar functions for [AuraFlow](https://huggingface.co/fal/AuraFlow).
 - [`LTXVideoLoraLoaderMixin`] provides similar functions for [LTX-Video](https://huggingface.co/docs/diffusers/main/en/api/pipelines/ltx_video).
 - [`SanaLoraLoaderMixin`] provides similar functions for [Sana](https://huggingface.co/docs/diffusers/main/en/api/pipelines/sana).
 - [`HunyuanVideoLoraLoaderMixin`] provides similar functions for [HunyuanVideo](https://huggingface.co/docs/diffusers/main/en/api/pipelines/hunyuan_video).
 - [`Lumina2LoraLoaderMixin`] provides similar functions for [Lumina2](https://huggingface.co/docs/diffusers/main/en/api/pipelines/lumina2).
 - [`WanLoraLoaderMixin`] provides similar functions for [Wan](https://huggingface.co/docs/diffusers/main/en/api/pipelines/wan).
 - [`SkyReelsV2LoraLoaderMixin`] provides similar functions for [SkyReels-V2](https://huggingface.co/docs/diffusers/main/en/api/pipelines/skyreels_v2).
 - [`CogView4LoraLoaderMixin`] provides similar functions for [CogView4](https://huggingface.co/docs/diffusers/main/en/api/pipelines/cogview4).
 - [`AmusedLoraLoaderMixin`] is for the [`AmusedPipeline`].
 - [`HiDreamImageLoraLoaderMixin`] provides similar functions for [HiDream Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/hidream)
 - [`QwenImageLoraLoaderMixin`] provides similar functions for [Qwen Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/qwen).
 - [`ZImageLoraLoaderMixin`] provides similar functions for [Z-Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/zimage).
 - [`Flux2LoraLoaderMixin`] provides similar functions for [Flux2](https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux2).
 - [`LoraBaseMixin`] provides a base class with several utility methods to fuse, unfuse, unload, LoRAs and more.
-> [!TIP]
+<Tip>
 > To learn more about how to load LoRA weights, see the [LoRA](../../tutorials/using_peft_for_inference) loading guide.
-## LoraBaseMixin
+To learn more about how to load LoRA weights, see the [LoRA](../../using-diffusers/loading_adapters#lora) loading guide.
-[[autodoc]] loaders.lora_base.LoraBaseMixin
+</Tip>
 ## StableDiffusionLoraLoaderMixin
@@ -58,10 +45,6 @@ LoRA is a fast and lightweight training method that inserts and trains a signifi
 [[autodoc]] loaders.lora_pipeline.FluxLoraLoaderMixin
 ## Flux2LoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.Flux2LoraLoaderMixin
 ## CogVideoXLoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.CogVideoXLoraLoaderMixin
@@ -69,57 +52,11 @@ LoRA is a fast and lightweight training method that inserts and trains a signifi
 ## Mochi1LoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.Mochi1LoraLoaderMixin
 ## AuraFlowLoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.AuraFlowLoraLoaderMixin
 ## LTXVideoLoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.LTXVideoLoraLoaderMixin
 ## SanaLoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.SanaLoraLoaderMixin
 ## HunyuanVideoLoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.HunyuanVideoLoraLoaderMixin
 ## Lumina2LoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.Lumina2LoraLoaderMixin
 ## CogView4LoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.CogView4LoraLoaderMixin
 ## WanLoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.WanLoraLoaderMixin
 ## SkyReelsV2LoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.SkyReelsV2LoraLoaderMixin
 ## AmusedLoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.AmusedLoraLoaderMixin
 ## HiDreamImageLoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.HiDreamImageLoraLoaderMixin
 ## QwenImageLoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.QwenImageLoraLoaderMixin
 ## ZImageLoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.ZImageLoraLoaderMixin
 ## KandinskyLoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.KandinskyLoraLoaderMixin
 ## LoraBaseMixin
 [[autodoc]] loaders.lora_base.LoraBaseMixin
--- a/docs/source/en/api/loaders/peft.md
+++ b/docs/source/en/api/loaders/peft.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -12,10 +12,13 @@ specific language governing permissions and limitations under the License.
 # PEFT
-Diffusers supports loading adapters such as [LoRA](../../tutorials/using_peft_for_inference) with the [PEFT](https://huggingface.co/docs/peft/index) library with the [`~loaders.peft.PeftAdapterMixin`] class. This allows modeling classes in Diffusers like [`UNet2DConditionModel`], [`SD3Transformer2DModel`] to operate with an adapter.
+Diffusers supports loading adapters such as [LoRA](../../using-diffusers/loading_adapters) with the [PEFT](https://huggingface.co/docs/peft/index) library with the [`~loaders.peft.PeftAdapterMixin`] class. This allows modeling classes in Diffusers like [`UNet2DConditionModel`], [`SD3Transformer2DModel`] to operate with an adapter.
-> [!TIP]
+<Tip>
-> Refer to the [Inference with PEFT](../../tutorials/using_peft_for_inference.md) tutorial for an overview of how to use PEFT in Diffusers for inference.
+
 Refer to the [Inference with PEFT](../../tutorials/using_peft_for_inference.md) tutorial for an overview of how to use PEFT in Diffusers for inference.
 </Tip>
 ## PeftAdapterMixin
--- a/docs/source/en/api/loaders/single_file.md
+++ b/docs/source/en/api/loaders/single_file.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
--- a/docs/source/en/api/loaders/textual_inversion.md
+++ b/docs/source/en/api/loaders/textual_inversion.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -16,8 +16,11 @@ Textual Inversion is a training method for personalizing models by learning new
 [`TextualInversionLoaderMixin`] provides a function for loading Textual Inversion embeddings from Diffusers and Automatic1111 into the text encoder and loading a special token to activate the embeddings.
-> [!TIP]
+<Tip>
-> To learn more about how to load Textual Inversion embeddings, see the [Textual Inversion](../../using-diffusers/textual_inversion_inference) loading guide.
+
 To learn more about how to load Textual Inversion embeddings, see the [Textual Inversion](../../using-diffusers/loading_adapters#textual-inversion) loading guide.
 </Tip>
 ## TextualInversionLoaderMixin
--- a/docs/source/en/api/loaders/transformer_sd3.md
+++ b/docs/source/en/api/loaders/transformer_sd3.md
@@ -1,26 +0,0 @@
 <!--Copyright 2025 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 specific language governing permissions and limitations under the License.
 -->
 # SD3Transformer2D
 This class is useful when *only* loading weights into a [`SD3Transformer2DModel`]. If you need to load weights into the text encoder or a text encoder and SD3Transformer2DModel, check [`SD3LoraLoaderMixin`](lora#diffusers.loaders.SD3LoraLoaderMixin) class instead.
 The [`SD3Transformer2DLoadersMixin`] class currently only loads IP-Adapter weights, but will be used in the future to save weights and load LoRAs.
 > [!TIP]
 > To learn more about how to load LoRA weights, see the [LoRA](../../tutorials/using_peft_for_inference) loading guide.
 ## SD3Transformer2DLoadersMixin
 [[autodoc]] loaders.transformer_sd3.SD3Transformer2DLoadersMixin
    - all
    - _load_ip_adapter_weights
--- a/docs/source/en/api/loaders/unet.md
+++ b/docs/source/en/api/loaders/unet.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -16,8 +16,11 @@ Some training methods - like LoRA and Custom Diffusion - typically target the UN
 The [`UNet2DConditionLoadersMixin`] class provides functions for loading and saving weights, fusing and unfusing LoRAs, disabling and enabling LoRAs, and setting and deleting adapters.
-> [!TIP]
+<Tip>
-> To learn more about how to load LoRA weights, see the [LoRA](../../tutorials/using_peft_for_inference) guide.
+
 To learn more about how to load LoRA weights, see the [LoRA](../../using-diffusers/loading_adapters#lora) loading guide.
 </Tip>
 ## UNet2DConditionLoadersMixin
--- a/docs/source/en/api/logging.md
+++ b/docs/source/en/api/logging.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
--- a/docs/source/en/api/models/allegro_transformer3d.md
+++ b/docs/source/en/api/models/allegro_transformer3d.md
@@ -1,4 +1,4 @@
-<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
+<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -18,7 +18,7 @@ The model can be loaded with the following code snippet.
 ```python
 from diffusers import AllegroTransformer3DModel
-transformer = AllegroTransformer3DModel.from_pretrained("rhymes-ai/Allegro", subfolder="transformer", torch_dtype=torch.bfloat16).to("cuda")
+vae = AllegroTransformer3DModel.from_pretrained("rhymes-ai/Allegro", subfolder="transformer", torch_dtype=torch.bfloat16).to("cuda")
 ```
 ## AllegroTransformer3DModel
--- a/docs/source/en/api/models/asymmetricautoencoderkl.md
+++ b/docs/source/en/api/models/asymmetricautoencoderkl.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -12,7 +12,7 @@ specific language governing permissions and limitations under the License.
 # AsymmetricAutoencoderKL
-Improved larger variational autoencoder (VAE) model with KL loss for inpainting task: [Designing a Better Asymmetric VQGAN for StableDiffusion](https://huggingface.co/papers/2306.04632) by Zixin Zhu, Xuelu Feng, Dongdong Chen, Jianmin Bao, Le Wang, Yinpeng Chen, Lu Yuan, Gang Hua.
+Improved larger variational autoencoder (VAE) model with KL loss for inpainting task: [Designing a Better Asymmetric VQGAN for StableDiffusion](https://arxiv.org/abs/2306.04632) by Zixin Zhu, Xuelu Feng, Dongdong Chen, Jianmin Bao, Le Wang, Yinpeng Chen, Lu Yuan, Gang Hua.
 The abstract from the paper is:
@@ -39,7 +39,7 @@ mask_url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images
 original_image = load_image(img_url).resize((512, 512))
 mask_image = load_image(mask_url).resize((512, 512))
-pipe = StableDiffusionInpaintPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-inpainting")
+pipe = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting")
 pipe.vae = AsymmetricAutoencoderKL.from_pretrained("cross-attention/asymmetric-autoencoder-kl-x-1-5")
 pipe.to("cuda")
--- a/docs/source/en/api/models/aura_flow_transformer2d.md
+++ b/docs/source/en/api/models/aura_flow_transformer2d.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
--- a/docs/source/en/api/models/auto_model.md
+++ b/docs/source/en/api/models/auto_model.md
@@ -1,21 +0,0 @@
 <!--Copyright 2025 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 specific language governing permissions and limitations under the License.
 -->
 # AutoModel
 [`AutoModel`] automatically retrieves the correct model class from the checkpoint `config.json` file.
 ## AutoModel
 [[autodoc]] AutoModel
 	- all
 	- from_pretrained
--- a/docs/source/en/api/models/autoencoder_dc.md
+++ b/docs/source/en/api/models/autoencoder_dc.md
@@ -1,4 +1,4 @@
-<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
+<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -29,8 +29,6 @@ The following DCAE models are released and supported in Diffusers.
 | [`mit-han-lab/dc-ae-f128c512-in-1.0-diffusers`](https://huggingface.co/mit-han-lab/dc-ae-f128c512-in-1.0-diffusers) | [`mit-han-lab/dc-ae-f128c512-in-1.0`](https://huggingface.co/mit-han-lab/dc-ae-f128c512-in-1.0)
 | [`mit-han-lab/dc-ae-f128c512-mix-1.0-diffusers`](https://huggingface.co/mit-han-lab/dc-ae-f128c512-mix-1.0-diffusers) | [`mit-han-lab/dc-ae-f128c512-mix-1.0`](https://huggingface.co/mit-han-lab/dc-ae-f128c512-mix-1.0)
 This model was contributed by [lawrence-cj](https://github.com/lawrence-cj).
 Load a model in Diffusers format with [`~ModelMixin.from_pretrained`].
 ```python
--- a/docs/source/en/api/models/autoencoder_kl_hunyuan_video.md
+++ b/docs/source/en/api/models/autoencoder_kl_hunyuan_video.md
@@ -1,32 +0,0 @@
 <!-- Copyright 2025 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 specific language governing permissions and limitations under the License. -->
 # AutoencoderKLHunyuanVideo
 The 3D variational autoencoder (VAE) model with KL loss used in [HunyuanVideo](https://github.com/Tencent/HunyuanVideo/), which was introduced in [HunyuanVideo: A Systematic Framework For Large Video Generative Models](https://huggingface.co/papers/2412.03603) by Tencent.
 The model can be loaded with the following code snippet.
 ```python
 from diffusers import AutoencoderKLHunyuanVideo
 vae = AutoencoderKLHunyuanVideo.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="vae", torch_dtype=torch.float16)
 ```
 ## AutoencoderKLHunyuanVideo
 [[autodoc]] AutoencoderKLHunyuanVideo
  - decode
  - all
 ## DecoderOutput
 [[autodoc]] models.autoencoders.vae.DecoderOutput
--- a/docs/source/en/api/models/autoencoder_kl_hunyuan_video15.md
+++ b/docs/source/en/api/models/autoencoder_kl_hunyuan_video15.md
@@ -1,36 +0,0 @@
 <!-- Copyright 2025 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 specific language governing permissions and limitations under the License. -->
 # AutoencoderKLHunyuanVideo15
 The 3D variational autoencoder (VAE) model with KL loss used in [HunyuanVideo1.5](https://github.com/Tencent/HunyuanVideo1-1.5) by Tencent.
 The model can be loaded with the following code snippet.
 ```python
 from diffusers import AutoencoderKLHunyuanVideo15
 vae = AutoencoderKLHunyuanVideo15.from_pretrained("hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-480p_t2v", subfolder="vae", torch_dtype=torch.float32)
 # make sure to enable tiling to avoid OOM
 vae.enable_tiling()
 ```
 ## AutoencoderKLHunyuanVideo15
 [[autodoc]] AutoencoderKLHunyuanVideo15
  - decode
  - encode
  - all
 ## DecoderOutput
 [[autodoc]] models.autoencoders.vae.DecoderOutput
--- a/docs/source/en/api/models/autoencoder_kl_hunyuanimage.md
+++ b/docs/source/en/api/models/autoencoder_kl_hunyuanimage.md
@@ -1,32 +0,0 @@
 <!-- Copyright 2025 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 specific language governing permissions and limitations under the License. -->
 # AutoencoderKLHunyuanImage
 The 2D variational autoencoder (VAE) model with KL loss used in [HunyuanImage2.1].
 The model can be loaded with the following code snippet.
 ```python
 from diffusers import AutoencoderKLHunyuanImage
 vae = AutoencoderKLHunyuanImage.from_pretrained("hunyuanvideo-community/HunyuanImage-2.1-Diffusers", subfolder="vae", torch_dtype=torch.bfloat16)
 ```
 ## AutoencoderKLHunyuanImage
 [[autodoc]] AutoencoderKLHunyuanImage
  - decode
  - all
 ## DecoderOutput
 [[autodoc]] models.autoencoders.vae.DecoderOutput
--- a/docs/source/en/api/models/autoencoder_kl_hunyuanimage_refiner.md
+++ b/docs/source/en/api/models/autoencoder_kl_hunyuanimage_refiner.md
@@ -1,32 +0,0 @@
 <!-- Copyright 2025 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 specific language governing permissions and limitations under the License. -->
 # AutoencoderKLHunyuanImageRefiner
 The 3D variational autoencoder (VAE) model with KL loss used in [HunyuanImage2.1](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1) for its refiner pipeline.
 The model can be loaded with the following code snippet.
 ```python
 from diffusers import AutoencoderKLHunyuanImageRefiner
 vae = AutoencoderKLHunyuanImageRefiner.from_pretrained("hunyuanvideo-community/HunyuanImage-2.1-Refiner-Diffusers", subfolder="vae", torch_dtype=torch.bfloat16)
 ```
 ## AutoencoderKLHunyuanImageRefiner
 [[autodoc]] AutoencoderKLHunyuanImageRefiner
  - decode
  - all
 ## DecoderOutput
 [[autodoc]] models.autoencoders.vae.DecoderOutput
--- a/docs/source/en/api/models/autoencoder_kl_wan.md
+++ b/docs/source/en/api/models/autoencoder_kl_wan.md
@@ -1,32 +0,0 @@
 <!-- Copyright 2025 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 specific language governing permissions and limitations under the License. -->
 # AutoencoderKLWan
 The 3D variational autoencoder (VAE) model with KL loss used in [Wan 2.1](https://github.com/Wan-Video/Wan2.1) by the Alibaba Wan Team.
 The model can be loaded with the following code snippet.
 ```python
 from diffusers import AutoencoderKLWan
 vae = AutoencoderKLWan.from_pretrained("Wan-AI/Wan2.1-T2V-1.3B-Diffusers", subfolder="vae", torch_dtype=torch.float32)
 ```
 ## AutoencoderKLWan
 [[autodoc]] AutoencoderKLWan
  - decode
  - all
 ## DecoderOutput
 [[autodoc]] models.autoencoders.vae.DecoderOutput
--- a/docs/source/en/api/models/autoencoder_oobleck.md
+++ b/docs/source/en/api/models/autoencoder_oobleck.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
--- a/docs/source/en/api/models/autoencoder_tiny.md
+++ b/docs/source/en/api/models/autoencoder_tiny.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
--- a/docs/source/en/api/models/autoencoderkl.md
+++ b/docs/source/en/api/models/autoencoderkl.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -12,7 +12,7 @@ specific language governing permissions and limitations under the License.
 # AutoencoderKL
-The variational autoencoder (VAE) model with KL loss was introduced in [Auto-Encoding Variational Bayes](https://huggingface.co/papers/1312.6114v11) by Diederik P. Kingma and Max Welling. The model is used in 🤗 Diffusers to encode images into latents and to decode latent representations into images.
+The variational autoencoder (VAE) model with KL loss was introduced in [Auto-Encoding Variational Bayes](https://arxiv.org/abs/1312.6114v11) by Diederik P. Kingma and Max Welling. The model is used in 🤗 Diffusers to encode images into latents and to decode latent representations into images.
 The abstract from the paper is:
@@ -44,3 +44,15 @@ model = AutoencoderKL.from_single_file(url)
 ## DecoderOutput
 [[autodoc]] models.autoencoders.vae.DecoderOutput
 ## FlaxAutoencoderKL
 [[autodoc]] FlaxAutoencoderKL
 ## FlaxAutoencoderKLOutput
 [[autodoc]] models.vae_flax.FlaxAutoencoderKLOutput
 ## FlaxDecoderOutput
 [[autodoc]] models.vae_flax.FlaxDecoderOutput
--- a/docs/source/en/api/models/autoencoderkl_allegro.md
+++ b/docs/source/en/api/models/autoencoderkl_allegro.md
@@ -1,4 +1,4 @@
-<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
+<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -18,7 +18,7 @@ The model can be loaded with the following code snippet.
 ```python
 from diffusers import AutoencoderKLAllegro
-vae = AutoencoderKLAllegro.from_pretrained("rhymes-ai/Allegro", subfolder="vae", torch_dtype=torch.float32).to("cuda")
+vae = AutoencoderKLCogVideoX.from_pretrained("rhymes-ai/Allegro", subfolder="vae", torch_dtype=torch.float32).to("cuda")
 ```
 ## AutoencoderKLAllegro
--- a/docs/source/en/api/models/autoencoderkl_audio_ltx_2.md
+++ b/docs/source/en/api/models/autoencoderkl_audio_ltx_2.md
@@ -1,29 +0,0 @@
 <!-- Copyright 2025 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 specific language governing permissions and limitations under the License. -->
 # AutoencoderKLLTX2Audio
 The 3D variational autoencoder (VAE) model with KL loss used in [LTX-2](https://huggingface.co/Lightricks/LTX-2) was introduced by Lightricks. This is for encoding and decoding audio latent representations.
 The model can be loaded with the following code snippet.
 ```python
 from diffusers import AutoencoderKLLTX2Audio
 vae = AutoencoderKLLTX2Audio.from_pretrained("Lightricks/LTX-2", subfolder="vae", torch_dtype=torch.float32).to("cuda")
 ```
 ## AutoencoderKLLTX2Audio
 [[autodoc]] AutoencoderKLLTX2Audio
    - encode
    - decode
    - all
--- a/docs/source/en/api/models/autoencoderkl_cogvideox.md
+++ b/docs/source/en/api/models/autoencoderkl_cogvideox.md
@@ -1,4 +1,4 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
--- a/docs/source/en/api/models/autoencoderkl_cosmos.md
+++ b/docs/source/en/api/models/autoencoderkl_cosmos.md
@@ -1,40 +0,0 @@
 <!-- Copyright 2025 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 specific language governing permissions and limitations under the License. -->
 # AutoencoderKLCosmos
 [Cosmos Tokenizers](https://github.com/NVIDIA/Cosmos-Tokenizer).
 Supported models:
 - [nvidia/Cosmos-1.0-Tokenizer-CV8x8x8](https://huggingface.co/nvidia/Cosmos-1.0-Tokenizer-CV8x8x8)
 The model can be loaded with the following code snippet.
 ```python
 from diffusers import AutoencoderKLCosmos
 vae = AutoencoderKLCosmos.from_pretrained("nvidia/Cosmos-1.0-Tokenizer-CV8x8x8", subfolder="vae")
 ```
 ## AutoencoderKLCosmos
 [[autodoc]] AutoencoderKLCosmos
    - decode
    - encode
    - all
 ## AutoencoderKLOutput
 [[autodoc]] models.autoencoders.autoencoder_kl.AutoencoderKLOutput
 ## DecoderOutput
 [[autodoc]] models.autoencoders.vae.DecoderOutput
--- a/docs/source/en/api/models/autoencoderkl_ltx_2.md
+++ b/docs/source/en/api/models/autoencoderkl_ltx_2.md
@@ -1,29 +0,0 @@
 <!-- Copyright 2025 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 specific language governing permissions and limitations under the License. -->
 # AutoencoderKLLTX2Video
 The 3D variational autoencoder (VAE) model with KL loss used in [LTX-2](https://huggingface.co/Lightricks/LTX-2) was introduced by Lightricks.
 The model can be loaded with the following code snippet.
 ```python
 from diffusers import AutoencoderKLLTX2Video
 vae = AutoencoderKLLTX2Video.from_pretrained("Lightricks/LTX-2", subfolder="vae", torch_dtype=torch.float32).to("cuda")
 ```
 ## AutoencoderKLLTX2Video
 [[autodoc]] AutoencoderKLLTX2Video
    - decode
    - encode
    - all
--- a/docs/source/en/api/models/autoencoderkl_ltx_video.md
+++ b/docs/source/en/api/models/autoencoderkl_ltx_video.md
@@ -1,4 +1,4 @@
-<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
+<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -18,7 +18,7 @@ The model can be loaded with the following code snippet.
 ```python
 from diffusers import AutoencoderKLLTXVideo
-vae = AutoencoderKLLTXVideo.from_pretrained("Lightricks/LTX-Video", subfolder="vae", torch_dtype=torch.float32).to("cuda")
+vae = AutoencoderKLLTXVideo.from_pretrained("TODO/TODO", subfolder="vae", torch_dtype=torch.float32).to("cuda")
 ```
 ## AutoencoderKLLTXVideo
--- a/docs/source/en/api/models/autoencoderkl_magvit.md
+++ b/docs/source/en/api/models/autoencoderkl_magvit.md
@@ -1,37 +0,0 @@
 <!--Copyright 2025 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 specific language governing permissions and limitations under the License. -->
 # AutoencoderKLMagvit
 The 3D variational autoencoder (VAE) model with KL loss used in [EasyAnimate](https://github.com/aigc-apps/EasyAnimate) was introduced by Alibaba PAI.
 The model can be loaded with the following code snippet.
 ```python
 from diffusers import AutoencoderKLMagvit
 vae = AutoencoderKLMagvit.from_pretrained("alibaba-pai/EasyAnimateV5.1-12b-zh", subfolder="vae", torch_dtype=torch.float16).to("cuda")
 ```
 ## AutoencoderKLMagvit
 [[autodoc]] AutoencoderKLMagvit
    - decode
    - encode
    - all
 ## AutoencoderKLOutput
 [[autodoc]] models.autoencoders.autoencoder_kl.AutoencoderKLOutput
 ## DecoderOutput
 [[autodoc]] models.autoencoders.vae.DecoderOutput
--- a/docs/source/en/api/models/autoencoderkl_mochi.md
+++ b/docs/source/en/api/models/autoencoderkl_mochi.md
@@ -1,4 +1,4 @@
-<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
+<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
--- a/docs/source/en/api/models/autoencoderkl_qwenimage.md
+++ b/docs/source/en/api/models/autoencoderkl_qwenimage.md
@@ -1,35 +0,0 @@
 <!-- Copyright 2025 The HuggingFace Team. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 specific language governing permissions and limitations under the License. -->
 # AutoencoderKLQwenImage
 The model can be loaded with the following code snippet.
 ```python
 from diffusers import AutoencoderKLQwenImage
 vae = AutoencoderKLQwenImage.from_pretrained("Qwen/QwenImage-20B", subfolder="vae")
 ```
 ## AutoencoderKLQwenImage
 [[autodoc]] AutoencoderKLQwenImage
    - decode
    - encode
    - all
 ## AutoencoderKLOutput
 [[autodoc]] models.autoencoders.autoencoder_kl.AutoencoderKLOutput
 ## DecoderOutput
 [[autodoc]] models.autoencoders.vae.DecoderOutput
--- a/Show More
+++ b/Show More