Compare commits

...

6 Commits

Author SHA1 Message Date
Dhruv Nair
a30871a0c5 update 2024-02-02 05:12:27 +00:00
Dhruv Nair
9237ea5787 update 2024-02-02 05:07:12 +00:00
Dhruv Nair
f915b558d4 Update src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
Co-authored-by: YiYi Xu <yixu310@gmail.com>
2024-02-02 10:27:34 +05:30
Dhruv Nair
e2827f819a update 2024-02-02 04:30:44 +00:00
Dhruv Nair
3cf7b068c3 update 2024-02-01 08:02:27 +00:00
Dhruv Nair
c7652d3d60 update 2024-02-01 07:58:59 +00:00
7 changed files with 39 additions and 34 deletions

View File

@@ -31,7 +31,7 @@ Sample output with I2VGenXL:
<table> <table>
<tr> <tr>
<td><center> <td><center>
masterpiece, bestquality, sunset. library.
<br> <br>
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/i2vgen-xl-example.gif" <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/i2vgen-xl-example.gif"
alt="library" alt="library"

View File

@@ -70,7 +70,7 @@ Here are some sample outputs:
<table> <table>
<tr> <tr>
<td><center> <td><center>
masterpiece, bestquality, sunset. cat in a field.
<br> <br>
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/pia-default-output.gif" <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/pia-default-output.gif"
alt="cat in a field" alt="cat in a field"
@@ -119,7 +119,7 @@ image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/pix2pix/cat_6.png?download=true" "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/pix2pix/cat_6.png?download=true"
) )
image = image.resize((512, 512)) image = image.resize((512, 512))
prompt = "cat in a hat" prompt = "cat in a field"
negative_prompt = "wrong white balance, dark, sketches,worst quality,low quality" negative_prompt = "wrong white balance, dark, sketches,worst quality,low quality"
generator = torch.Generator("cpu").manual_seed(0) generator = torch.Generator("cpu").manual_seed(0)
@@ -132,7 +132,7 @@ export_to_gif(frames, "pia-freeinit-animation.gif")
<table> <table>
<tr> <tr>
<td><center> <td><center>
masterpiece, bestquality, sunset. cat in a field.
<br> <br>
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/pia-freeinit-output-cat.gif" <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/pia-freeinit-output-cat.gif"
alt="cat in a field" alt="cat in a field"

View File

@@ -41,7 +41,7 @@ pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", tor
pipe = pipe.to("cuda") pipe = pipe.to("cuda")
prompt = "Spiderman is surfing" prompt = "Spiderman is surfing"
video_frames = pipe(prompt).frames video_frames = pipe(prompt).frames[0]
video_path = export_to_video(video_frames) video_path = export_to_video(video_frames)
video_path video_path
``` ```
@@ -64,7 +64,7 @@ pipe.enable_model_cpu_offload()
pipe.enable_vae_slicing() pipe.enable_vae_slicing()
prompt = "Darth Vader surfing a wave" prompt = "Darth Vader surfing a wave"
video_frames = pipe(prompt, num_frames=64).frames video_frames = pipe(prompt, num_frames=64).frames[0]
video_path = export_to_video(video_frames) video_path = export_to_video(video_frames)
video_path video_path
``` ```
@@ -83,7 +83,7 @@ pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload()
prompt = "Spiderman is surfing" prompt = "Spiderman is surfing"
video_frames = pipe(prompt, num_inference_steps=25).frames video_frames = pipe(prompt, num_inference_steps=25).frames[0]
video_path = export_to_video(video_frames) video_path = export_to_video(video_frames)
video_path video_path
``` ```
@@ -130,7 +130,7 @@ pipe.unet.enable_forward_chunking(chunk_size=1, dim=1)
pipe.enable_vae_slicing() pipe.enable_vae_slicing()
prompt = "Darth Vader surfing a wave" prompt = "Darth Vader surfing a wave"
video_frames = pipe(prompt, num_frames=24).frames video_frames = pipe(prompt, num_frames=24).frames[0]
video_path = export_to_video(video_frames) video_path = export_to_video(video_frames)
video_path video_path
``` ```
@@ -148,7 +148,7 @@ pipe.enable_vae_slicing()
video = [Image.fromarray(frame).resize((1024, 576)) for frame in video_frames] video = [Image.fromarray(frame).resize((1024, 576)) for frame in video_frames]
video_frames = pipe(prompt, video=video, strength=0.6).frames video_frames = pipe(prompt, video=video, strength=0.6).frames[0]
video_path = export_to_video(video_frames) video_path = export_to_video(video_frames)
video_path video_path
``` ```

View File

@@ -14,9 +14,10 @@ class AnimateDiffPipelineOutput(BaseOutput):
Output class for AnimateDiff pipelines. Output class for AnimateDiff pipelines.
Args: Args:
frames (`List[List[PIL.Image.Image]]` or `torch.Tensor` or `np.ndarray`): frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
List of PIL Images of length `batch_size` or torch.Tensor or np.ndarray of shape List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing denoised
`(batch_size, num_frames, height, width, num_channels)`. PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
`(batch_size, num_frames, channels, height, width)`
""" """
frames: Union[List[List[PIL.Image.Image]], torch.Tensor, np.ndarray] frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]

View File

@@ -46,6 +46,7 @@ EXAMPLE_DOC_STRING = """
```py ```py
>>> import torch >>> import torch
>>> from diffusers import I2VGenXLPipeline >>> from diffusers import I2VGenXLPipeline
>>> from diffusers.utils import export_to_gif, load_image
>>> pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16") >>> pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16")
>>> pipeline.enable_model_cpu_offload() >>> pipeline.enable_model_cpu_offload()
@@ -98,12 +99,13 @@ class I2VGenXLPipelineOutput(BaseOutput):
Output class for image-to-video pipeline. Output class for image-to-video pipeline.
Args: Args:
frames (`List[np.ndarray]` or `torch.FloatTensor`) frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
List of denoised frames (essentially images) as NumPy arrays of shape `(height, width, num_channels)` or as List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing denoised
a `torch` tensor. The length of the list denotes the video length (the number of frames). PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
`(batch_size, num_frames, channels, height, width)`
""" """
frames: Union[List[np.ndarray], torch.FloatTensor] frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]
class I2VGenXLPipeline(DiffusionPipeline): class I2VGenXLPipeline(DiffusionPipeline):

View File

@@ -200,13 +200,13 @@ class PIAPipelineOutput(BaseOutput):
Output class for PIAPipeline. Output class for PIAPipeline.
Args: Args:
frames (`torch.Tensor`, `np.ndarray`, or List[PIL.Image.Image]): frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
Nested list of length `batch_size` with denoised PIL image sequences of length `num_frames`, Nested list of length `batch_size` with denoised PIL image sequences of length `num_frames`,
NumPy array of shape `(batch_size, num_frames, channels, height, width, NumPy array of shape `(batch_size, num_frames, channels, height, width,
Torch tensor of shape `(batch_size, num_frames, channels, height, width)`. Torch tensor of shape `(batch_size, num_frames, channels, height, width)`.
""" """
frames: Union[torch.Tensor, np.ndarray, PIL.Image.Image] frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]
class PIAPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin): class PIAPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin):

View File

@@ -2,6 +2,7 @@ from dataclasses import dataclass
from typing import List, Union from typing import List, Union
import numpy as np import numpy as np
import PIL
import torch import torch
from ...utils import ( from ...utils import (
@@ -15,9 +16,10 @@ class TextToVideoSDPipelineOutput(BaseOutput):
Output class for text-to-video pipelines. Output class for text-to-video pipelines.
Args: Args:
frames (`List[np.ndarray]` or `torch.FloatTensor`) frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
List of denoised frames (essentially images) as NumPy arrays of shape `(height, width, num_channels)` or as List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing denoised
a `torch` tensor. The length of the list denotes the video length (the number of frames). PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
`(batch_size, num_frames, channels, height, width)`
""" """
frames: Union[List[np.ndarray], torch.FloatTensor] frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]