update

Update src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
2025-12-08 21:44:27 +08:00 · 2024-02-02 05:12:27 +00:00 · 2024-02-02 05:07:12 +00:00 · 2024-02-02 10:27:34 +05:30 · 2024-02-02 04:30:44 +00:00 · 2024-02-01 08:02:27 +00:00
7 changed files with 39 additions and 34 deletions
--- a/docs/source/en/api/pipelines/i2vgenxl.md
+++ b/docs/source/en/api/pipelines/i2vgenxl.md
@@ -31,7 +31,7 @@ Sample output with I2VGenXL:
 <table>
    <tr>
        <td><center>
-        masterpiece, bestquality, sunset.
+        library.
        <br>
        <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/i2vgen-xl-example.gif"
            alt="library"
--- a/docs/source/en/api/pipelines/pia.md
+++ b/docs/source/en/api/pipelines/pia.md
@@ -70,7 +70,7 @@ Here are some sample outputs:
 <table>
    <tr>
        <td><center>
-        masterpiece, bestquality, sunset.
+        cat in a field.
        <br>
        <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/pia-default-output.gif"
            alt="cat in a field"
@@ -119,7 +119,7 @@ image = load_image(
    "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/pix2pix/cat_6.png?download=true"
 )
 image = image.resize((512, 512))
-prompt = "cat in a hat"
+prompt = "cat in a field"
 negative_prompt = "wrong white balance, dark, sketches,worst quality,low quality"

 generator = torch.Generator("cpu").manual_seed(0)
@@ -132,7 +132,7 @@ export_to_gif(frames, "pia-freeinit-animation.gif")
 <table>
    <tr>
        <td><center>
-        masterpiece, bestquality, sunset.
+        cat in a field.
        <br>
        <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/pia-freeinit-output-cat.gif"
            alt="cat in a field"
--- a/docs/source/en/api/pipelines/text_to_video.md
+++ b/docs/source/en/api/pipelines/text_to_video.md
@@ -41,7 +41,7 @@ pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", tor
 pipe = pipe.to("cuda")

 prompt = "Spiderman is surfing"
-video_frames = pipe(prompt).frames
+video_frames = pipe(prompt).frames[0]
 video_path = export_to_video(video_frames)
 video_path
 ```
@@ -64,7 +64,7 @@ pipe.enable_model_cpu_offload()
 pipe.enable_vae_slicing()

 prompt = "Darth Vader surfing a wave"
-video_frames = pipe(prompt, num_frames=64).frames
+video_frames = pipe(prompt, num_frames=64).frames[0]
 video_path = export_to_video(video_frames)
 video_path
 ```
@@ -83,7 +83,7 @@ pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
 pipe.enable_model_cpu_offload()

 prompt = "Spiderman is surfing"
-video_frames = pipe(prompt, num_inference_steps=25).frames
+video_frames = pipe(prompt, num_inference_steps=25).frames[0]
 video_path = export_to_video(video_frames)
 video_path
 ```
@@ -130,7 +130,7 @@ pipe.unet.enable_forward_chunking(chunk_size=1, dim=1)
 pipe.enable_vae_slicing()

 prompt = "Darth Vader surfing a wave"
-video_frames = pipe(prompt, num_frames=24).frames
+video_frames = pipe(prompt, num_frames=24).frames[0]
 video_path = export_to_video(video_frames)
 video_path
 ```
@@ -148,7 +148,7 @@ pipe.enable_vae_slicing()

 video = [Image.fromarray(frame).resize((1024, 576)) for frame in video_frames]

-video_frames = pipe(prompt, video=video, strength=0.6).frames
+video_frames = pipe(prompt, video=video, strength=0.6).frames[0]
 video_path = export_to_video(video_frames)
 video_path
 ```
--- a/src/diffusers/pipelines/animatediff/pipeline_output.py
+++ b/src/diffusers/pipelines/animatediff/pipeline_output.py
@@ -11,12 +11,13 @@ from ...utils import BaseOutput
@dataclass
 class AnimateDiffPipelineOutput(BaseOutput):
    r"""
-    Output class for AnimateDiff pipelines.
+     Output class for AnimateDiff pipelines.

-    Args:
-        frames (`List[List[PIL.Image.Image]]` or `torch.Tensor` or `np.ndarray`):
-            List of PIL Images of length `batch_size` or torch.Tensor or np.ndarray of shape
-            `(batch_size, num_frames, height, width, num_channels)`.
+     Args:
+         frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
+             List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing denoised
+     PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
+    `(batch_size, num_frames, channels, height, width)`
    """

-    frames: Union[List[List[PIL.Image.Image]], torch.Tensor, np.ndarray]
+    frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]
--- a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
+++ b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
@@ -46,6 +46,7 @@ EXAMPLE_DOC_STRING = """
        ```py
        >>> import torch
        >>> from diffusers import I2VGenXLPipeline
+        >>> from diffusers.utils import export_to_gif, load_image

        >>> pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16")
        >>> pipeline.enable_model_cpu_offload()
@@ -95,15 +96,16 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type:
@dataclass
 class I2VGenXLPipelineOutput(BaseOutput):
    r"""
-    Output class for image-to-video pipeline.
+     Output class for image-to-video pipeline.

-    Args:
-        frames (`List[np.ndarray]` or `torch.FloatTensor`)
-            List of denoised frames (essentially images) as NumPy arrays of shape `(height, width, num_channels)` or as
-            a `torch` tensor. The length of the list denotes the video length (the number of frames).
+     Args:
+         frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
+             List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing denoised
+     PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
+    `(batch_size, num_frames, channels, height, width)`
    """

-    frames: Union[List[np.ndarray], torch.FloatTensor]
+    frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]


 class I2VGenXLPipeline(DiffusionPipeline):
--- a/src/diffusers/pipelines/pia/pipeline_pia.py
+++ b/src/diffusers/pipelines/pia/pipeline_pia.py
@@ -200,13 +200,13 @@ class PIAPipelineOutput(BaseOutput):
    Output class for PIAPipeline.

    Args:
-        frames (`torch.Tensor`, `np.ndarray`, or List[PIL.Image.Image]):
+        frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
        Nested list of length `batch_size` with denoised PIL image sequences of length `num_frames`,
        NumPy array of shape `(batch_size, num_frames, channels, height, width,
        Torch tensor of shape `(batch_size, num_frames, channels, height, width)`.
    """

-    frames: Union[torch.Tensor, np.ndarray, PIL.Image.Image]
+    frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]


 class PIAPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin):
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_output.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_output.py
@@ -2,6 +2,7 @@ from dataclasses import dataclass
 from typing import List, Union

 import numpy as np
+import PIL
 import torch

 from ...utils import (
@@ -12,12 +13,13 @@ from ...utils import (
@dataclass
 class TextToVideoSDPipelineOutput(BaseOutput):
    """
-    Output class for text-to-video pipelines.
+     Output class for text-to-video pipelines.

-    Args:
-        frames (`List[np.ndarray]` or `torch.FloatTensor`)
-            List of denoised frames (essentially images) as NumPy arrays of shape `(height, width, num_channels)` or as
-            a `torch` tensor. The length of the list denotes the video length (the number of frames).
+     Args:
+         frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
+             List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing denoised
+     PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
+    `(batch_size, num_frames, channels, height, width)`
    """

-    frames: Union[List[np.ndarray], torch.FloatTensor]
+    frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]
Author	SHA1	Message	Date
Dhruv Nair	a30871a0c5	update	2024-02-02 05:12:27 +00:00
Dhruv Nair	9237ea5787	update	2024-02-02 05:07:12 +00:00
Dhruv Nair	f915b558d4	Update src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py Co-authored-by: YiYi Xu <yixu310@gmail.com>	2024-02-02 10:27:34 +05:30
Dhruv Nair	e2827f819a	update	2024-02-02 04:30:44 +00:00
Dhruv Nair	3cf7b068c3	update	2024-02-01 08:02:27 +00:00
Dhruv Nair	c7652d3d60	update	2024-02-01 07:58:59 +00:00