Compare commits

...

6 Commits

Author SHA1 Message Date
Dhruv Nair
a30871a0c5 update 2024-02-02 05:12:27 +00:00
Dhruv Nair
9237ea5787 update 2024-02-02 05:07:12 +00:00
Dhruv Nair
f915b558d4 Update src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
Co-authored-by: YiYi Xu <yixu310@gmail.com>
2024-02-02 10:27:34 +05:30
Dhruv Nair
e2827f819a update 2024-02-02 04:30:44 +00:00
Dhruv Nair
3cf7b068c3 update 2024-02-01 08:02:27 +00:00
Dhruv Nair
c7652d3d60 update 2024-02-01 07:58:59 +00:00
7 changed files with 39 additions and 34 deletions

View File

@@ -31,7 +31,7 @@ Sample output with I2VGenXL:
<table>
<tr>
<td><center>
masterpiece, bestquality, sunset.
library.
<br>
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/i2vgen-xl-example.gif"
alt="library"

View File

@@ -70,7 +70,7 @@ Here are some sample outputs:
<table>
<tr>
<td><center>
masterpiece, bestquality, sunset.
cat in a field.
<br>
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/pia-default-output.gif"
alt="cat in a field"
@@ -119,7 +119,7 @@ image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/pix2pix/cat_6.png?download=true"
)
image = image.resize((512, 512))
prompt = "cat in a hat"
prompt = "cat in a field"
negative_prompt = "wrong white balance, dark, sketches,worst quality,low quality"
generator = torch.Generator("cpu").manual_seed(0)
@@ -132,7 +132,7 @@ export_to_gif(frames, "pia-freeinit-animation.gif")
<table>
<tr>
<td><center>
masterpiece, bestquality, sunset.
cat in a field.
<br>
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/pia-freeinit-output-cat.gif"
alt="cat in a field"

View File

@@ -41,7 +41,7 @@ pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", tor
pipe = pipe.to("cuda")
prompt = "Spiderman is surfing"
video_frames = pipe(prompt).frames
video_frames = pipe(prompt).frames[0]
video_path = export_to_video(video_frames)
video_path
```
@@ -64,7 +64,7 @@ pipe.enable_model_cpu_offload()
pipe.enable_vae_slicing()
prompt = "Darth Vader surfing a wave"
video_frames = pipe(prompt, num_frames=64).frames
video_frames = pipe(prompt, num_frames=64).frames[0]
video_path = export_to_video(video_frames)
video_path
```
@@ -83,7 +83,7 @@ pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
prompt = "Spiderman is surfing"
video_frames = pipe(prompt, num_inference_steps=25).frames
video_frames = pipe(prompt, num_inference_steps=25).frames[0]
video_path = export_to_video(video_frames)
video_path
```
@@ -130,7 +130,7 @@ pipe.unet.enable_forward_chunking(chunk_size=1, dim=1)
pipe.enable_vae_slicing()
prompt = "Darth Vader surfing a wave"
video_frames = pipe(prompt, num_frames=24).frames
video_frames = pipe(prompt, num_frames=24).frames[0]
video_path = export_to_video(video_frames)
video_path
```
@@ -148,7 +148,7 @@ pipe.enable_vae_slicing()
video = [Image.fromarray(frame).resize((1024, 576)) for frame in video_frames]
video_frames = pipe(prompt, video=video, strength=0.6).frames
video_frames = pipe(prompt, video=video, strength=0.6).frames[0]
video_path = export_to_video(video_frames)
video_path
```

View File

@@ -14,9 +14,10 @@ class AnimateDiffPipelineOutput(BaseOutput):
Output class for AnimateDiff pipelines.
Args:
frames (`List[List[PIL.Image.Image]]` or `torch.Tensor` or `np.ndarray`):
List of PIL Images of length `batch_size` or torch.Tensor or np.ndarray of shape
`(batch_size, num_frames, height, width, num_channels)`.
frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing denoised
PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
`(batch_size, num_frames, channels, height, width)`
"""
frames: Union[List[List[PIL.Image.Image]], torch.Tensor, np.ndarray]
frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]

View File

@@ -46,6 +46,7 @@ EXAMPLE_DOC_STRING = """
```py
>>> import torch
>>> from diffusers import I2VGenXLPipeline
>>> from diffusers.utils import export_to_gif, load_image
>>> pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16")
>>> pipeline.enable_model_cpu_offload()
@@ -98,12 +99,13 @@ class I2VGenXLPipelineOutput(BaseOutput):
Output class for image-to-video pipeline.
Args:
frames (`List[np.ndarray]` or `torch.FloatTensor`)
List of denoised frames (essentially images) as NumPy arrays of shape `(height, width, num_channels)` or as
a `torch` tensor. The length of the list denotes the video length (the number of frames).
frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing denoised
PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
`(batch_size, num_frames, channels, height, width)`
"""
frames: Union[List[np.ndarray], torch.FloatTensor]
frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]
class I2VGenXLPipeline(DiffusionPipeline):

View File

@@ -200,13 +200,13 @@ class PIAPipelineOutput(BaseOutput):
Output class for PIAPipeline.
Args:
frames (`torch.Tensor`, `np.ndarray`, or List[PIL.Image.Image]):
frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
Nested list of length `batch_size` with denoised PIL image sequences of length `num_frames`,
NumPy array of shape `(batch_size, num_frames, channels, height, width,
Torch tensor of shape `(batch_size, num_frames, channels, height, width)`.
"""
frames: Union[torch.Tensor, np.ndarray, PIL.Image.Image]
frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]
class PIAPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin):

View File

@@ -2,6 +2,7 @@ from dataclasses import dataclass
from typing import List, Union
import numpy as np
import PIL
import torch
from ...utils import (
@@ -15,9 +16,10 @@ class TextToVideoSDPipelineOutput(BaseOutput):
Output class for text-to-video pipelines.
Args:
frames (`List[np.ndarray]` or `torch.FloatTensor`)
List of denoised frames (essentially images) as NumPy arrays of shape `(height, width, num_channels)` or as
a `torch` tensor. The length of the list denotes the video length (the number of frames).
frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing denoised
PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
`(batch_size, num_frames, channels, height, width)`
"""
frames: Union[List[np.ndarray], torch.FloatTensor]
frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]