[CI] Some improvements to Nightly reports summaries (#11166)

* update * update * update * update * update * update * update * update * update * update * update * updatee * update * update * update * update * update * update * update * update * update * update * update * update * update * update
2025-12-06 12:34:13 +08:00 · 2025-06-05 10:25:01 +02:00
parent c934720629
commit d04cd95012
2 changed files with 903 additions and 99 deletions
--- a/.github/workflows/nightly_tests.yml
+++ b/.github/workflows/nightly_tests.yml
@@ -13,8 +13,9 @@ env:
  PYTEST_TIMEOUT: 600
  RUN_SLOW: yes
  RUN_NIGHTLY: yes
-  PIPELINE_USAGE_CUTOFF: 5000
+  PIPELINE_USAGE_CUTOFF: 0
  SLACK_API_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
+  CONSOLIDATED_REPORT_PATH: consolidated_test_report.md

 jobs:
  setup_torch_cuda_pipeline_matrix:
@@ -99,11 +100,6 @@ jobs:
        with:
          name: pipeline_${{ matrix.module }}_test_reports
          path: reports
-      - name: Generate Report and Notify Channel
-        if: always()
-        run: |
-          pip install slack_sdk tabulate
-          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY

  run_nightly_tests_for_other_torch_modules:
    name: Nightly Torch CUDA Tests
@@ -142,7 +138,6 @@ jobs:
        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
        # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
        CUBLAS_WORKSPACE_CONFIG: :16:8
-        RUN_COMPILE: yes
      run: |
        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
          -s -v -k "not Flax and not Onnx" \
@@ -175,12 +170,6 @@ jobs:
        name: torch_${{ matrix.module }}_cuda_test_reports
        path: reports

-    - name: Generate Report and Notify Channel
-      if: always()
-      run: |
-        pip install slack_sdk tabulate
-        python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
-
  run_torch_compile_tests:
    name: PyTorch Compile CUDA tests

@@ -224,12 +213,6 @@ jobs:
        name: torch_compile_test_reports
        path: reports

-    - name: Generate Report and Notify Channel
-      if: always()
-      run: |
-        pip install slack_sdk tabulate
-        python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
-  
  run_big_gpu_torch_tests:
    name: Torch tests on big GPU
    strategy:
@@ -280,12 +263,7 @@ jobs:
        with:
          name: torch_cuda_big_gpu_test_reports
          path: reports
-      - name: Generate Report and Notify Channel
-        if: always()
-        run: |
-          pip install slack_sdk tabulate
-          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
-          
+
  torch_minimum_version_cuda_tests:
    name: Torch Minimum Version CUDA Tests
    runs-on:
@@ -342,63 +320,6 @@ jobs:
        with:
          name: torch_minimum_version_cuda_test_reports
          path: reports
- 
-  run_flax_tpu_tests:
-    name: Nightly Flax TPU Tests
-    runs-on:
-      group: gcp-ct5lp-hightpu-8t
-    if: github.event_name == 'schedule'
-
-    container:
-      image: diffusers/diffusers-flax-tpu
-      options: --shm-size "16gb" --ipc host --privileged ${{ vars.V5_LITEPOD_8_ENV}} -v /mnt/hf_cache:/mnt/hf_cache
-    defaults:
-      run:
-        shell: bash
-    steps:
-    - name: Checkout diffusers
-      uses: actions/checkout@v3
-      with:
-        fetch-depth: 2
-
-    - name: Install dependencies
-      run: |
-        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-        python -m uv pip install -e [quality,test]
-        pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
-        python -m uv pip install pytest-reportlog
-
-    - name: Environment
-      run: python utils/print_env.py
-
-    - name: Run nightly Flax TPU tests
-      env:
-        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
-      run: |
-        python -m pytest -n 0 \
-          -s -v -k "Flax" \
-          --make-reports=tests_flax_tpu \
-          --report-log=tests_flax_tpu.log \
-          tests/
-
-    - name: Failure short reports
-      if: ${{ failure() }}
-      run: |
-        cat reports/tests_flax_tpu_stats.txt
-        cat reports/tests_flax_tpu_failures_short.txt
-
-    - name: Test suite reports artifacts
-      if: ${{ always() }}
-      uses: actions/upload-artifact@v4
-      with:
-        name: flax_tpu_test_reports
-        path: reports
-
-    - name: Generate Report and Notify Channel
-      if: always()
-      run: |
-        pip install slack_sdk tabulate
-        python utils/log_reports.py >> $GITHUB_STEP_SUMMARY

  run_nightly_onnx_tests:
    name: Nightly ONNXRuntime CUDA tests on Ubuntu
@@ -449,18 +370,12 @@ jobs:
        name: tests_onnx_cuda_reports
        path: reports

-    - name: Generate Report and Notify Channel
-      if: always()
-      run: |
-        pip install slack_sdk tabulate
-        python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
-
  run_nightly_quantization_tests:
    name: Torch quantization nightly tests
    strategy:
      fail-fast: false
      max-parallel: 2
-      matrix: 
+      matrix:
        config:
          - backend: "bitsandbytes"
            test_location: "bnb"
@@ -520,12 +435,7 @@ jobs:
        with:
          name: torch_cuda_${{ matrix.config.backend }}_reports
          path: reports
-      - name: Generate Report and Notify Channel
-        if: always()
-        run: |
-          pip install slack_sdk tabulate
-          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
-
+          
  run_nightly_pipeline_level_quantization_tests:
    name: Torch quantization nightly tests
    strategy:
@@ -574,12 +484,117 @@ jobs:
        with:
          name: torch_cuda_pipeline_level_quant_reports
          path: reports
-      - name: Generate Report and Notify Channel
-        if: always()
+
+  run_flax_tpu_tests:
+    name: Nightly Flax TPU Tests
+    runs-on:
+      group: gcp-ct5lp-hightpu-8t
+    if: github.event_name == 'schedule'
+
+    container:
+      image: diffusers/diffusers-flax-tpu
+      options: --shm-size "16gb" --ipc host --privileged ${{ vars.V5_LITEPOD_8_ENV}} -v /mnt/hf_cache:/mnt/hf_cache
+    defaults:
+      run:
+        shell: bash
+    steps:
+    - name: Checkout diffusers
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+
+    - name: Install dependencies
+      run: |
+        python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
+        python -m uv pip install -e [quality,test]
+        pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
+        python -m uv pip install pytest-reportlog
+
+    - name: Environment
+      run: python utils/print_env.py
+
+    - name: Run nightly Flax TPU tests
+      env:
+        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+      run: |
+        python -m pytest -n 0 \
+          -s -v -k "Flax" \
+          --make-reports=tests_flax_tpu \
+          --report-log=tests_flax_tpu.log \
+          tests/
+
+    - name: Failure short reports
+      if: ${{ failure() }}
+      run: |
+        cat reports/tests_flax_tpu_stats.txt
+        cat reports/tests_flax_tpu_failures_short.txt
+
+    - name: Test suite reports artifacts
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v4
+      with:
+        name: flax_tpu_test_reports
+        path: reports
+
+  generate_consolidated_report:
+    name: Generate Consolidated Test Report
+    needs: [
+      run_nightly_tests_for_torch_pipelines,
+      run_nightly_tests_for_other_torch_modules,
+      run_torch_compile_tests,
+      run_big_gpu_torch_tests,
+      run_nightly_quantization_tests,
+      run_nightly_pipeline_level_quantization_tests,
+      run_nightly_onnx_tests,
+      torch_minimum_version_cuda_tests,
+      run_flax_tpu_tests
+    ]
+    if: always()
+    runs-on:
+      group: aws-general-8-plus
+    container:
+      image: diffusers/diffusers-pytorch-cpu
+    steps:
+      - name: Checkout diffusers
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 2
+
+      - name: Create reports directory
+        run: mkdir -p combined_reports
+
+      - name: Download all test reports
+        uses: actions/download-artifact@v4
+        with:
+          path: artifacts
+
+      - name: Prepare reports
        run: |
+          # Move all report files to a single directory for processing
+          find artifacts -name "*.txt" -exec cp {} combined_reports/ \;
+
+      - name: Install dependencies
+        run: |
+          pip install -e .[test]
          pip install slack_sdk tabulate
-          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
-  
+
+      - name: Generate consolidated report
+        run: |
+          python utils/consolidated_test_report.py \
+            --reports_dir combined_reports \
+            --output_file $CONSOLIDATED_REPORT_PATH \
+            --slack_channel_name diffusers-ci-nightly
+
+      - name: Show consolidated report
+        run: |
+          cat $CONSOLIDATED_REPORT_PATH >> $GITHUB_STEP_SUMMARY
+
+      - name: Upload consolidated report
+        uses: actions/upload-artifact@v4
+        with:
+          name: consolidated_test_report
+          path: ${{ env.CONSOLIDATED_REPORT_PATH }}
+
 # M1 runner currently not well supported
 # TODO: (Dhruv) add these back when we setup better testing for Apple Silicon
 #  run_nightly_tests_apple_m1:
--- a/utils/consolidated_test_report.py
+++ b/utils/consolidated_test_report.py
@@ -0,0 +1,789 @@
+#!/usr/bin/env python
+import argparse
+import glob
+import os
+import re
+from datetime import date, datetime
+
+from slack_sdk import WebClient
+from tabulate import tabulate
+
+
+MAX_LEN_MESSAGE = 3001  # slack endpoint has a limit of 3001 characters
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--slack_channel_name", default="diffusers-ci-nightly")
+parser.add_argument(
+    "--reports_dir",
+    default="reports",
+    help="Directory containing test reports (will search recursively in all subdirectories)",
+)
+parser.add_argument("--output_file", default=None, help="Path to save the consolidated report (markdown format)")
+
+
+def parse_stats_file(file_path):
+    """Parse a stats file to extract test statistics."""
+    try:
+        with open(file_path, "r") as f:
+            content = f.read()
+
+            # Extract the numbers using regex
+            tests_pattern = r"collected (\d+) items"
+            passed_pattern = r"(\d+) passed"
+            failed_pattern = r"(\d+) failed"
+            skipped_pattern = r"(\d+) skipped"
+            xpassed_pattern = r"(\d+) xpassed"
+
+            tests_match = re.search(tests_pattern, content)
+            passed_match = re.search(passed_pattern, content)
+            failed_match = re.search(failed_pattern, content)
+            skipped_match = re.search(skipped_pattern, content)
+            xpassed_match = re.search(xpassed_pattern, content)
+
+            passed = int(passed_match.group(1)) if passed_match else 0
+            failed = int(failed_match.group(1)) if failed_match else 0
+            skipped = int(skipped_match.group(1)) if skipped_match else 0
+            xpassed = int(xpassed_match.group(1)) if xpassed_match else 0
+
+            # If tests_match exists, use it, otherwise calculate from passed/failed/skipped
+            if tests_match:
+                tests = int(tests_match.group(1))
+            else:
+                tests = passed + failed + skipped + xpassed
+
+            # Extract timing information if available
+            timing_pattern = r"slowest \d+ test durations[\s\S]*?\n([\s\S]*?)={70}"
+            timing_match = re.search(timing_pattern, content, re.MULTILINE)
+            slowest_tests = []
+
+            if timing_match:
+                timing_text = timing_match.group(1).strip()
+                test_timing_lines = timing_text.split("\n")
+                for line in test_timing_lines:
+                    if line.strip():
+                        # Format is typically: 10.37s call     tests/path/to/test.py::TestClass::test_method
+                        parts = line.strip().split()
+                        if len(parts) >= 3:
+                            time_str = parts[0]
+                            test_path = " ".join(parts[2:])
+
+                            # Skip entries with "< 0.05 secs were omitted" or similar
+                            if "secs were omitted" in test_path:
+                                continue
+
+                            try:
+                                time_seconds = float(time_str.rstrip("s"))
+                                slowest_tests.append({"test": test_path, "duration": time_seconds})
+                            except ValueError:
+                                pass
+
+            return {
+                "tests": tests,
+                "passed": passed,
+                "failed": failed,
+                "skipped": skipped,
+                "slowest_tests": slowest_tests,
+            }
+    except Exception as e:
+        print(f"Error parsing {file_path}: {e}")
+        return {"tests": 0, "passed": 0, "failed": 0, "skipped": 0, "slowest_tests": []}
+
+
+def parse_durations_file(file_path):
+    """Parse a durations file to extract test timing information."""
+    slowest_tests = []
+    try:
+        durations_file = file_path.replace("_stats.txt", "_durations.txt")
+        if os.path.exists(durations_file):
+            with open(durations_file, "r") as f:
+                content = f.read()
+
+                # Skip the header line
+                for line in content.split("\n")[1:]:
+                    if line.strip():
+                        # Format is typically: 10.37s call     tests/path/to/test.py::TestClass::test_method
+                        parts = line.strip().split()
+                        if len(parts) >= 3:
+                            time_str = parts[0]
+                            test_path = " ".join(parts[2:])
+
+                            # Skip entries with "< 0.05 secs were omitted" or similar
+                            if "secs were omitted" in test_path:
+                                continue
+
+                            try:
+                                time_seconds = float(time_str.rstrip("s"))
+                                slowest_tests.append({"test": test_path, "duration": time_seconds})
+                            except ValueError:
+                                # If time_str is not a valid float, it might be a different format
+                                # For example, some pytest formats show "< 0.05s" or similar
+                                if test_path.startswith("<") and "secs were omitted" in test_path:
+                                    # Extract the time value from test_path if it's in the format "< 0.05 secs were omitted"
+                                    try:
+                                        # This handles entries where the time is in the test_path itself
+                                        dur_match = re.search(r"(\d+(?:\.\d+)?)", test_path)
+                                        if dur_match:
+                                            time_seconds = float(dur_match.group(1))
+                                            slowest_tests.append({"test": test_path, "duration": time_seconds})
+                                    except ValueError:
+                                        pass
+    except Exception as e:
+        print(f"Error parsing durations file {file_path.replace('_stats.txt', '_durations.txt')}: {e}")
+
+    return slowest_tests
+
+
+def parse_failures_file(file_path):
+    """Parse a failures file to extract failed test details."""
+    failures = []
+    try:
+        with open(file_path, "r") as f:
+            content = f.read()
+
+            # We don't need the base file name anymore as we're getting test paths from summary
+
+            # Check if it's a short stack format
+            if "============================= FAILURES SHORT STACK =============================" in content:
+                # First, look for pytest-style failure headers with underscores and clean them up
+                test_headers = re.findall(r"_{5,}\s+([^_\n]+?)\s+_{5,}", content)
+
+                for test_name in test_headers:
+                    test_name = test_name.strip()
+                    # Make sure it's a valid test name (contains a dot and doesn't look like a number)
+                    if "." in test_name and not test_name.replace(".", "").isdigit():
+                        # For test names missing the full path, check if we can reconstruct it from failures_line.txt
+                        # This is a best effort - we won't always have the line file available
+                        if not test_name.endswith(".py") and "::" not in test_name and "/" not in test_name:
+                            # Try to look for a corresponding line file
+                            line_file = file_path.replace("_failures_short.txt", "_failures_line.txt")
+                            if os.path.exists(line_file):
+                                try:
+                                    with open(line_file, "r") as lf:
+                                        line_content = lf.read()
+                                        # Look for test name in line file which might have the full path
+                                        path_match = re.search(
+                                            r"(tests/[\w/]+\.py::[^:]+::" + test_name.split(".")[-1] + ")",
+                                            line_content,
+                                        )
+                                        if path_match:
+                                            test_name = path_match.group(1)
+                                except Exception:
+                                    pass  # If we can't read the line file, just use what we have
+
+                        failures.append(
+                            {
+                                "test": test_name,
+                                "error": "Error occurred",
+                                "original_test_name": test_name,  # Keep original for reference
+                            }
+                        )
+
+                # If we didn't find any pytest-style headers, try other formats
+                if not failures:
+                    # Look for test names at the beginning of the file (in first few lines)
+                    first_lines = content.split("\n")[:20]  # Look at first 20 lines
+                    for line in first_lines:
+                        # Look for test names in various formats
+                        # Format: tests/file.py::TestClass::test_method
+                        path_match = re.search(r"(tests/[\w/]+\.py::[\w\.]+::\w+)", line)
+                        # Format: TestClass.test_method
+                        class_match = re.search(r"([A-Za-z][A-Za-z0-9_]+\.[A-Za-z][A-Za-z0-9_]+)", line)
+
+                        if path_match:
+                            test_name = path_match.group(1)
+                            failures.append(
+                                {"test": test_name, "error": "Error occurred", "original_test_name": test_name}
+                            )
+                            break  # Found a full path, stop looking
+                        elif class_match and "test" in line.lower():
+                            test_name = class_match.group(1)
+                            # Make sure it's likely a test name (contains test in method name)
+                            if "test" in test_name.lower():
+                                failures.append(
+                                    {"test": test_name, "error": "Error occurred", "original_test_name": test_name}
+                                )
+            else:
+                # Standard format - try to extract from standard pytest output
+                failure_blocks = re.split(r"={70}", content)
+
+                for block in failure_blocks:
+                    if not block.strip():
+                        continue
+
+                    # Look for test paths in the format: path/to/test.py::TestClass::test_method
+                    path_matches = re.findall(r"([\w/]+\.py::[\w\.]+::\w+)", block)
+                    if path_matches:
+                        for test_name in path_matches:
+                            failures.append(
+                                {"test": test_name, "error": "Error occurred", "original_test_name": test_name}
+                            )
+                    else:
+                        # Try alternative format: TestClass.test_method
+                        class_matches = re.findall(r"([A-Za-z][A-Za-z0-9_]+\.[A-Za-z][A-Za-z0-9_]+)", block)
+                        for test_name in class_matches:
+                            # Filter out things that don't look like test names
+                            if (
+                                not test_name.startswith(("e.g", "i.e", "etc."))
+                                and not test_name.isdigit()
+                                and "test" in test_name.lower()
+                            ):
+                                failures.append(
+                                    {"test": test_name, "error": "Error occurred", "original_test_name": test_name}
+                                )
+
+    except Exception as e:
+        print(f"Error parsing failures in {file_path}: {e}")
+
+    return failures
+
+
+def consolidate_reports(reports_dir):
+    """Consolidate test reports from multiple test runs, including from subdirectories."""
+    # Get all stats files, including those in subdirectories
+    stats_files = glob.glob(f"{reports_dir}/**/*_stats.txt", recursive=True)
+
+    results = {}
+    total_stats = {"tests": 0, "passed": 0, "failed": 0, "skipped": 0}
+
+    # Collect all slow tests across all test suites
+    all_slow_tests = []
+
+    # Process each stats file and its corresponding failures file
+    for stats_file in stats_files:
+        # Extract test suite name from filename (e.g., tests_pipeline_allegro_cuda_stats.txt -> pipeline_allegro_cuda)
+        base_name = os.path.basename(stats_file).replace("_stats.txt", "")
+
+        # Include parent directory in suite name if it's in a subdirectory
+        rel_path = os.path.relpath(os.path.dirname(stats_file), reports_dir)
+        if rel_path and rel_path != ".":
+            # Remove 'test_reports' suffix from directory name if present
+            dir_name = os.path.basename(rel_path)
+            if dir_name.endswith("_test_reports"):
+                dir_name = dir_name[:-13]  # Remove '_test_reports' suffix
+            base_name = f"{dir_name}/{base_name}"
+
+        # Parse stats
+        stats = parse_stats_file(stats_file)
+
+        # If no slowest tests found in stats file, try the durations file directly
+        if not stats.get("slowest_tests"):
+            stats["slowest_tests"] = parse_durations_file(stats_file)
+
+        # Update total stats
+        for key in ["tests", "passed", "failed", "skipped"]:
+            total_stats[key] += stats[key]
+
+        # Collect slowest tests with their suite name
+        for slow_test in stats.get("slowest_tests", []):
+            all_slow_tests.append({"test": slow_test["test"], "duration": slow_test["duration"], "suite": base_name})
+
+        # Parse failures if there are any
+        failures = []
+        if stats["failed"] > 0:
+            # First try to get test paths from summary_short.txt which has the best format
+            summary_file = stats_file.replace("_stats.txt", "_summary_short.txt")
+            if os.path.exists(summary_file):
+                try:
+                    with open(summary_file, "r") as f:
+                        content = f.read()
+                        # Look for full lines with test path and error message: "FAILED test_path - error_msg"
+                        failed_test_lines = re.findall(
+                            r"FAILED\s+(tests/[\w/]+\.py::[A-Za-z0-9_\.]+::[A-Za-z0-9_]+)(?:\s+-\s+(.+))?", content
+                        )
+
+                        if failed_test_lines:
+                            for match in failed_test_lines:
+                                test_path = match[0]
+                                error_msg = match[1] if len(match) > 1 and match[1] else "No error message"
+
+                                failures.append({"test": test_path, "error": error_msg})
+                except Exception as e:
+                    print(f"Error parsing summary file: {e}")
+
+            # If no failures found in summary, try other failure files
+            if not failures:
+                failure_patterns = ["_failures_short.txt", "_failures.txt", "_failures_line.txt", "_failures_long.txt"]
+
+                for pattern in failure_patterns:
+                    failures_file = stats_file.replace("_stats.txt", pattern)
+                    if os.path.exists(failures_file):
+                        failures = parse_failures_file(failures_file)
+                        if failures:
+                            break
+
+                # No debug output needed
+
+        # Store results for this test suite
+        results[base_name] = {"stats": stats, "failures": failures}
+
+    # Filter out entries with "secs were omitted"
+    filtered_slow_tests = [test for test in all_slow_tests if "secs were omitted" not in test["test"]]
+
+    # Sort all slow tests by duration (descending)
+    filtered_slow_tests.sort(key=lambda x: x["duration"], reverse=True)
+
+    # Get the number of slowest tests to show from environment variable or default to 10
+    num_slowest_tests = int(os.environ.get("SHOW_SLOWEST_TESTS", "10"))
+    top_slowest_tests = filtered_slow_tests[:num_slowest_tests] if filtered_slow_tests else []
+
+    # Calculate additional duration statistics
+    total_duration = sum(test["duration"] for test in all_slow_tests)
+
+    # Calculate duration per suite
+    suite_durations = {}
+    for test in all_slow_tests:
+        suite_name = test["suite"]
+        if suite_name not in suite_durations:
+            suite_durations[suite_name] = 0
+        suite_durations[suite_name] += test["duration"]
+
+    # Removed duration categories
+
+    return {
+        "total_stats": total_stats,
+        "test_suites": results,
+        "slowest_tests": top_slowest_tests,
+        "duration_stats": {"total_duration": total_duration, "suite_durations": suite_durations},
+    }
+
+
+def generate_report(consolidated_data):
+    """Generate a comprehensive markdown report from consolidated data."""
+    report = []
+
+    # Add report header
+    report.append("# Diffusers Nightly Test Report")
+    report.append(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+
+    # Removed comparison section
+
+    # Add summary section
+    total = consolidated_data["total_stats"]
+    report.append("## Summary")
+
+    # Get duration stats if available
+    duration_stats = consolidated_data.get("duration_stats", {})
+    total_duration = duration_stats.get("total_duration", 0)
+
+    summary_table = [
+        ["Total Tests", total["tests"]],
+        ["Passed", total["passed"]],
+        ["Failed", total["failed"]],
+        ["Skipped", total["skipped"]],
+        ["Success Rate", f"{(total['passed'] / total['tests'] * 100):.2f}%" if total["tests"] > 0 else "N/A"],
+        ["Total Duration", f"{total_duration:.2f}s" if total_duration else "N/A"],
+    ]
+
+    report.append(tabulate(summary_table, tablefmt="pipe"))
+    report.append("")
+
+    # Removed duration distribution section
+
+    # Add test suites summary
+    report.append("## Test Suites")
+
+    # Include duration in test suites table if available
+    suite_durations = consolidated_data.get("duration_stats", {}).get("suite_durations", {})
+
+    if suite_durations:
+        suites_table = [["Test Suite", "Tests", "Passed", "Failed", "Skipped", "Success Rate", "Duration (s)"]]
+    else:
+        suites_table = [["Test Suite", "Tests", "Passed", "Failed", "Skipped", "Success Rate"]]
+
+    # Sort test suites by success rate (ascending - least successful first)
+    sorted_suites = sorted(
+        consolidated_data["test_suites"].items(),
+        key=lambda x: (x[1]["stats"]["passed"] / x[1]["stats"]["tests"] * 100) if x[1]["stats"]["tests"] > 0 else 0,
+        reverse=False,
+    )
+
+    for suite_name, suite_data in sorted_suites:
+        stats = suite_data["stats"]
+        success_rate = f"{(stats['passed'] / stats['tests'] * 100):.2f}%" if stats["tests"] > 0 else "N/A"
+
+        if suite_durations:
+            duration = suite_durations.get(suite_name, 0)
+            suites_table.append(
+                [
+                    suite_name,
+                    stats["tests"],
+                    stats["passed"],
+                    stats["failed"],
+                    stats["skipped"],
+                    success_rate,
+                    f"{duration:.2f}",
+                ]
+            )
+        else:
+            suites_table.append(
+                [suite_name, stats["tests"], stats["passed"], stats["failed"], stats["skipped"], success_rate]
+            )
+
+    report.append(tabulate(suites_table, headers="firstrow", tablefmt="pipe"))
+    report.append("")
+
+    # Add slowest tests section
+    slowest_tests = consolidated_data.get("slowest_tests", [])
+    if slowest_tests:
+        report.append("## Slowest Tests")
+
+        slowest_table = [["Rank", "Test", "Duration (s)", "Test Suite"]]
+        for i, test in enumerate(slowest_tests, 1):
+            # Skip entries that don't contain actual test names
+            if "< 0.05 secs were omitted" in test["test"]:
+                continue
+            slowest_table.append([i, test["test"], f"{test['duration']:.2f}", test["suite"]])
+
+        report.append(tabulate(slowest_table, headers="firstrow", tablefmt="pipe"))
+        report.append("")
+
+    # Add failures section if there are any
+    failed_suites = [s for s in sorted_suites if s[1]["stats"]["failed"] > 0]
+
+    if failed_suites:
+        report.append("## Failures")
+
+        # Group failures by module for cleaner organization
+        failures_by_module = {}
+
+        for suite_name, suite_data in failed_suites:
+            # Extract failures data for this suite
+            for failure in suite_data.get("failures", []):
+                test_name = failure["test"]
+
+                # If test name doesn't look like a full path, try to reconstruct it
+                if not ("/" in test_name or "::" in test_name) and "." in test_name:
+                    # For simple 'TestClass.test_method' format, try to get full path from suite name
+                    # Form: tests_<suite>_cuda -> tests/<suite>/test_<suite>.py::TestClass::test_method
+                    if suite_name.startswith("tests_") and "_cuda" in suite_name:
+                        # Extract component name from suite
+                        component = suite_name.replace("tests_", "").replace("_cuda", "")
+                        if "." in test_name:
+                            class_name, method_name = test_name.split(".", 1)
+                            possible_path = f"tests/{component}/test_{component}.py::{class_name}::{method_name}"
+                            # Use this constructed path if it seems reasonable
+                            if "test_" in method_name:
+                                test_name = possible_path
+
+                # Extract module name from test name
+                if "::" in test_name:
+                    # For path/file.py::TestClass::test_method format
+                    parts = test_name.split("::")
+                    module_name = parts[-2] if len(parts) >= 2 else "Other"  # TestClass
+                elif "." in test_name:
+                    # For TestClass.test_method format
+                    parts = test_name.split(".")
+                    module_name = parts[0]  # TestClass
+                else:
+                    module_name = "Other"
+
+                # Skip module names that don't look like class/module names
+                if (
+                    module_name.startswith(("e.g", "i.e", "etc"))
+                    or module_name.replace(".", "").isdigit()
+                    or len(module_name) < 3
+                ):
+                    module_name = "Other"
+
+                # Add to the module group
+                if module_name not in failures_by_module:
+                    failures_by_module[module_name] = []
+
+                # Prepend the suite name if the test name doesn't already have a full path
+                if "/" not in test_name and suite_name not in test_name:
+                    full_test_name = f"{suite_name}::{test_name}"
+                else:
+                    full_test_name = test_name
+
+                # Add this failure to the module group
+                failures_by_module[module_name].append(
+                    {"test": full_test_name, "original_test": test_name, "error": failure["error"]}
+                )
+
+        # Create a list of failing tests for each module
+        if failures_by_module:
+            for module_name, failures in sorted(failures_by_module.items()):
+                report.append(f"### {module_name}")
+
+                # Put all failed tests in a single code block
+                report.append("```")
+                for failure in failures:
+                    # Show test path and error message if available
+                    if failure.get("error") and failure["error"] != "No error message":
+                        report.append(f"{failure['test']} - {failure['error']}")
+                    else:
+                        report.append(failure["test"])
+                report.append("```")
+
+                report.append("")  # Add space between modules
+        else:
+            report.append("*No detailed failure information available*")
+            report.append("")
+
+    return "\n".join(report)
+
+
+def create_test_groups_table(test_groups, total_tests, total_success_rate):
+    """Create a table-like format for test groups showing total tests and success rate."""
+    if not test_groups:
+        return None
+
+    # Sort by total test count (descending)
+    sorted_groups = sorted(test_groups.items(), key=lambda x: x[1]["total"], reverse=True)
+
+    # Create table lines
+    table_lines = ["```"]
+    table_lines.append("Test Results Summary")
+    table_lines.append("-------------------")
+    table_lines.append(f"Total Tests:  {total_tests:,}")
+    table_lines.append(f"Success Rate: {total_success_rate}")
+    table_lines.append("")
+    table_lines.append("Category            | Total Tests | Failed | Success Rate")
+    table_lines.append("------------------- | ----------- | ------ | ------------")
+
+    # Add rows
+    for category, stats in sorted_groups:
+        # Pad category name to fixed width (19 chars)
+        padded_cat = category[:19].ljust(19)  # Truncate if too long
+        # Right-align counts
+        padded_total = str(stats["total"]).rjust(11)
+        padded_failed = str(stats["failed"]).rjust(6)
+        # Calculate and format success rate
+        if stats["total"] > 0:
+            cat_success_rate = f"{((stats['total'] - stats['failed']) / stats['total'] * 100):.1f}%"
+        else:
+            cat_success_rate = "N/A"
+        padded_rate = cat_success_rate.rjust(12)
+        table_lines.append(f"{padded_cat} | {padded_total} | {padded_failed} | {padded_rate}")
+
+    table_lines.append("```")
+
+    total_failures = sum(stats["failed"] for stats in test_groups.values())
+    return (
+        f"*Test Groups Summary ({total_failures} {'failure' if total_failures == 1 else 'failures'}):*\n"
+        + "\n".join(table_lines)
+    )
+
+
+def create_slack_payload(consolidated_data):
+    """Create a concise Slack message payload from consolidated data."""
+    total = consolidated_data["total_stats"]
+    success_rate = f"{(total['passed'] / total['tests'] * 100):.2f}%" if total["tests"] > 0 else "N/A"
+
+    # Determine emoji based on success rate
+    if total["failed"] == 0:
+        emoji = "✅"
+    elif total["failed"] / total["tests"] < 0.1:
+        emoji = "⚠️"
+    else:
+        emoji = "❌"
+
+    # Create a more compact summary section
+    summary = f"{emoji} *Diffusers Nightly Tests:* {success_rate} success ({total['passed']}/{total['tests']} tests"
+    if total["skipped"] > 0:
+        summary += f", {total['skipped']} skipped"
+    summary += ")"
+
+    # Create the test suites table in markdown format
+    # Build the markdown table with proper alignment
+    table_lines = []
+    table_lines.append("```")
+
+    # Sort test suites by success rate (ascending - least successful first)
+    sorted_suites = sorted(
+        consolidated_data["test_suites"].items(),
+        key=lambda x: (x[1]["stats"]["passed"] / x[1]["stats"]["tests"] * 100) if x[1]["stats"]["tests"] > 0 else 0,
+        reverse=False,
+    )
+
+    # Calculate max widths for proper alignment
+    max_suite_name_len = max(len(suite_name) for suite_name, _ in sorted_suites) if sorted_suites else 10
+    max_suite_name_len = max(max_suite_name_len, len("Test Suite"))  # Ensure header fits
+
+    # Create header with proper spacing (only Tests, Failed, Success Rate)
+    header = f"| {'Test Suite'.ljust(max_suite_name_len)} | {'Tests'.rjust(6)} | {'Failed'.rjust(6)} | {'Success Rate'.ljust(12)} |"
+    separator = f"|:{'-' * max_suite_name_len}|{'-' * 7}:|{'-' * 7}:|:{'-' * 11}|"
+
+    table_lines.append(header)
+    table_lines.append(separator)
+
+    # Add data rows with proper alignment
+    for suite_name, suite_data in sorted_suites:
+        stats = suite_data["stats"]
+        suite_success_rate = f"{(stats['passed'] / stats['tests'] * 100):.2f}%" if stats["tests"] > 0 else "N/A"
+
+        row = f"| {suite_name.ljust(max_suite_name_len)} | {str(stats['tests']).rjust(6)} | {str(stats['failed']).rjust(6)} | {suite_success_rate.ljust(12)} |"
+
+        table_lines.append(row)
+
+    table_lines.append("```")
+
+    # Create the Slack payload with character limit enforcement
+    payload = [
+        {"type": "section", "text": {"type": "mrkdwn", "text": summary}},
+        {"type": "section", "text": {"type": "mrkdwn", "text": "\n".join(table_lines)}},
+    ]
+
+    # Add action button
+    if os.environ.get("GITHUB_RUN_ID"):
+        run_id = os.environ["GITHUB_RUN_ID"]
+        payload.append(
+            {
+                "type": "section",
+                "text": {
+                    "type": "mrkdwn",
+                    "text": f"*<https://github.com/huggingface/diffusers/actions/runs/{run_id}|View full report on GitHub>*",
+                },
+            }
+        )
+
+    # Add date in more compact form
+    payload.append(
+        {
+            "type": "context",
+            "elements": [
+                {
+                    "type": "plain_text",
+                    "text": f"Results for {date.today()}",
+                },
+            ],
+        }
+    )
+
+    # Enforce 3001 character limit
+    payload_text = str(payload)
+    if len(payload_text) > MAX_LEN_MESSAGE:
+        # Truncate table if payload is too long
+        # Remove rows from the bottom until under limit
+        original_table_lines = table_lines[:]
+        while len(str(payload)) > MAX_LEN_MESSAGE and len(table_lines) > 3:  # Keep at least header and separator
+            # Remove the last data row (but keep ``` at the end)
+            table_lines.pop(-2)  # Remove second to last (last is the closing ```)
+
+            # Recreate payload with truncated table
+            payload[1] = {"type": "section", "text": {"type": "mrkdwn", "text": "\n".join(table_lines)}}
+
+        # Add note if we had to truncate
+        if len(table_lines) < len(original_table_lines):
+            truncated_count = len(original_table_lines) - len(table_lines)
+            table_lines.insert(-1, f"... {truncated_count} more test suites (truncated due to message limit)")
+            payload[1] = {"type": "section", "text": {"type": "mrkdwn", "text": "\n".join(table_lines)}}
+
+    return payload
+
+
+def create_failed_tests_by_suite_ordered(consolidated_data):
+    """Group failed tests by test suite, ordered by success rate (ascending)."""
+    # Sort test suites by success rate (ascending - least successful first)
+    sorted_suites = sorted(
+        consolidated_data["test_suites"].items(),
+        key=lambda x: (x[1]["stats"]["passed"] / x[1]["stats"]["tests"] * 100) if x[1]["stats"]["tests"] > 0 else 0,
+        reverse=False,
+    )
+
+    failed_suite_tests = []
+
+    # Process suites in order of success rate
+    for suite_name, suite_data in sorted_suites:
+        if suite_data["stats"]["failed"] > 0:
+            suite_failures = []
+
+            for failure in suite_data.get("failures", []):
+                test_name = failure["test"]
+
+                # Try to reconstruct full path if partial
+                if "::" in test_name and "/" in test_name:
+                    full_test_name = test_name
+                elif "::" in test_name or "." in test_name:
+                    if "/" not in test_name and suite_name not in test_name:
+                        full_test_name = f"{suite_name}::{test_name}"
+                    else:
+                        full_test_name = test_name
+                else:
+                    full_test_name = f"{suite_name}::{test_name}"
+
+                suite_failures.append(full_test_name)
+
+            # Sort and deduplicate tests within the suite
+            suite_failures = sorted(set(suite_failures))
+
+            if suite_failures:
+                failed_suite_tests.append(
+                    {
+                        "suite_name": suite_name,
+                        "tests": suite_failures,
+                        "success_rate": (suite_data["stats"]["passed"] / suite_data["stats"]["tests"] * 100)
+                        if suite_data["stats"]["tests"] > 0
+                        else 0,
+                    }
+                )
+
+    return failed_suite_tests
+
+
+def main(args):
+    # Make sure reports directory exists
+    if not os.path.isdir(args.reports_dir):
+        print(f"Error: Reports directory '{args.reports_dir}' does not exist.")
+        return
+
+    # Consolidate reports
+    consolidated_data = consolidate_reports(args.reports_dir)
+
+    # Check if we found any test results
+    if consolidated_data["total_stats"]["tests"] == 0:
+        print(f"Warning: No test results found in '{args.reports_dir}' or its subdirectories.")
+
+    # Generate markdown report
+    report = generate_report(consolidated_data)
+
+    # Save report to file if specified
+    if args.output_file:
+        # Create parent directories if they don't exist
+        output_dir = os.path.dirname(args.output_file)
+        if output_dir and not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+
+        with open(args.output_file, "w") as f:
+            f.write(report)
+
+        # Only print the report when saving to file
+        print(report)
+
+    # Send to Slack if token is available (optional, can be disabled)
+    slack_token = os.environ.get("SLACK_API_TOKEN")
+    if slack_token and args.slack_channel_name:
+        payload = create_slack_payload(consolidated_data)
+
+        try:
+            client = WebClient(token=slack_token)
+            # Send main message
+            response = client.chat_postMessage(channel=f"#{args.slack_channel_name}", blocks=payload)
+            print(f"Report sent to Slack channel: {args.slack_channel_name}")
+
+            # Send failed tests as separate threaded replies grouped by test suite (ordered by success rate)
+            total = consolidated_data["total_stats"]
+            if total["failed"] > 0:
+                failed_suites = create_failed_tests_by_suite_ordered(consolidated_data)
+                for suite_info in failed_suites:
+                    suite_name = suite_info["suite_name"]
+                    suite_tests = suite_info["tests"]
+                    success_rate = suite_info["success_rate"]
+                    message_text = (
+                        f"**{suite_name}** (Success Rate: {success_rate:.2f}%)\n```\n"
+                        + "\n".join(suite_tests)
+                        + "\n```"
+                    )
+                    client.chat_postMessage(
+                        channel=f"#{args.slack_channel_name}",
+                        thread_ts=response["ts"],  # Reply in thread
+                        text=message_text,  # Use text instead of blocks for markdown
+                    )
+                print(f"Failed tests details sent as {len(failed_suites)} thread replies")
+        except Exception as e:
+            print(f"Error sending report to Slack: {e}")
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    main(args)