mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-08 21:44:27 +08:00
Compare commits
6 Commits
edit-pypi-
...
vid-docs-f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a30871a0c5 | ||
|
|
9237ea5787 | ||
|
|
f915b558d4 | ||
|
|
e2827f819a | ||
|
|
3cf7b068c3 | ||
|
|
c7652d3d60 |
@@ -31,7 +31,7 @@ Sample output with I2VGenXL:
|
||||
<table>
|
||||
<tr>
|
||||
<td><center>
|
||||
masterpiece, bestquality, sunset.
|
||||
library.
|
||||
<br>
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/i2vgen-xl-example.gif"
|
||||
alt="library"
|
||||
|
||||
@@ -70,7 +70,7 @@ Here are some sample outputs:
|
||||
<table>
|
||||
<tr>
|
||||
<td><center>
|
||||
masterpiece, bestquality, sunset.
|
||||
cat in a field.
|
||||
<br>
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/pia-default-output.gif"
|
||||
alt="cat in a field"
|
||||
@@ -119,7 +119,7 @@ image = load_image(
|
||||
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/pix2pix/cat_6.png?download=true"
|
||||
)
|
||||
image = image.resize((512, 512))
|
||||
prompt = "cat in a hat"
|
||||
prompt = "cat in a field"
|
||||
negative_prompt = "wrong white balance, dark, sketches,worst quality,low quality"
|
||||
|
||||
generator = torch.Generator("cpu").manual_seed(0)
|
||||
@@ -132,7 +132,7 @@ export_to_gif(frames, "pia-freeinit-animation.gif")
|
||||
<table>
|
||||
<tr>
|
||||
<td><center>
|
||||
masterpiece, bestquality, sunset.
|
||||
cat in a field.
|
||||
<br>
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/pia-freeinit-output-cat.gif"
|
||||
alt="cat in a field"
|
||||
|
||||
@@ -41,7 +41,7 @@ pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", tor
|
||||
pipe = pipe.to("cuda")
|
||||
|
||||
prompt = "Spiderman is surfing"
|
||||
video_frames = pipe(prompt).frames
|
||||
video_frames = pipe(prompt).frames[0]
|
||||
video_path = export_to_video(video_frames)
|
||||
video_path
|
||||
```
|
||||
@@ -64,7 +64,7 @@ pipe.enable_model_cpu_offload()
|
||||
pipe.enable_vae_slicing()
|
||||
|
||||
prompt = "Darth Vader surfing a wave"
|
||||
video_frames = pipe(prompt, num_frames=64).frames
|
||||
video_frames = pipe(prompt, num_frames=64).frames[0]
|
||||
video_path = export_to_video(video_frames)
|
||||
video_path
|
||||
```
|
||||
@@ -83,7 +83,7 @@ pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
|
||||
pipe.enable_model_cpu_offload()
|
||||
|
||||
prompt = "Spiderman is surfing"
|
||||
video_frames = pipe(prompt, num_inference_steps=25).frames
|
||||
video_frames = pipe(prompt, num_inference_steps=25).frames[0]
|
||||
video_path = export_to_video(video_frames)
|
||||
video_path
|
||||
```
|
||||
@@ -130,7 +130,7 @@ pipe.unet.enable_forward_chunking(chunk_size=1, dim=1)
|
||||
pipe.enable_vae_slicing()
|
||||
|
||||
prompt = "Darth Vader surfing a wave"
|
||||
video_frames = pipe(prompt, num_frames=24).frames
|
||||
video_frames = pipe(prompt, num_frames=24).frames[0]
|
||||
video_path = export_to_video(video_frames)
|
||||
video_path
|
||||
```
|
||||
@@ -148,7 +148,7 @@ pipe.enable_vae_slicing()
|
||||
|
||||
video = [Image.fromarray(frame).resize((1024, 576)) for frame in video_frames]
|
||||
|
||||
video_frames = pipe(prompt, video=video, strength=0.6).frames
|
||||
video_frames = pipe(prompt, video=video, strength=0.6).frames[0]
|
||||
video_path = export_to_video(video_frames)
|
||||
video_path
|
||||
```
|
||||
|
||||
@@ -11,12 +11,13 @@ from ...utils import BaseOutput
|
||||
@dataclass
|
||||
class AnimateDiffPipelineOutput(BaseOutput):
|
||||
r"""
|
||||
Output class for AnimateDiff pipelines.
|
||||
Output class for AnimateDiff pipelines.
|
||||
|
||||
Args:
|
||||
frames (`List[List[PIL.Image.Image]]` or `torch.Tensor` or `np.ndarray`):
|
||||
List of PIL Images of length `batch_size` or torch.Tensor or np.ndarray of shape
|
||||
`(batch_size, num_frames, height, width, num_channels)`.
|
||||
Args:
|
||||
frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
|
||||
List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing denoised
|
||||
PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
|
||||
`(batch_size, num_frames, channels, height, width)`
|
||||
"""
|
||||
|
||||
frames: Union[List[List[PIL.Image.Image]], torch.Tensor, np.ndarray]
|
||||
frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]
|
||||
|
||||
@@ -46,6 +46,7 @@ EXAMPLE_DOC_STRING = """
|
||||
```py
|
||||
>>> import torch
|
||||
>>> from diffusers import I2VGenXLPipeline
|
||||
>>> from diffusers.utils import export_to_gif, load_image
|
||||
|
||||
>>> pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16")
|
||||
>>> pipeline.enable_model_cpu_offload()
|
||||
@@ -95,15 +96,16 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type:
|
||||
@dataclass
|
||||
class I2VGenXLPipelineOutput(BaseOutput):
|
||||
r"""
|
||||
Output class for image-to-video pipeline.
|
||||
Output class for image-to-video pipeline.
|
||||
|
||||
Args:
|
||||
frames (`List[np.ndarray]` or `torch.FloatTensor`)
|
||||
List of denoised frames (essentially images) as NumPy arrays of shape `(height, width, num_channels)` or as
|
||||
a `torch` tensor. The length of the list denotes the video length (the number of frames).
|
||||
Args:
|
||||
frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
|
||||
List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing denoised
|
||||
PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
|
||||
`(batch_size, num_frames, channels, height, width)`
|
||||
"""
|
||||
|
||||
frames: Union[List[np.ndarray], torch.FloatTensor]
|
||||
frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]
|
||||
|
||||
|
||||
class I2VGenXLPipeline(DiffusionPipeline):
|
||||
|
||||
@@ -200,13 +200,13 @@ class PIAPipelineOutput(BaseOutput):
|
||||
Output class for PIAPipeline.
|
||||
|
||||
Args:
|
||||
frames (`torch.Tensor`, `np.ndarray`, or List[PIL.Image.Image]):
|
||||
frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
|
||||
Nested list of length `batch_size` with denoised PIL image sequences of length `num_frames`,
|
||||
NumPy array of shape `(batch_size, num_frames, channels, height, width,
|
||||
Torch tensor of shape `(batch_size, num_frames, channels, height, width)`.
|
||||
"""
|
||||
|
||||
frames: Union[torch.Tensor, np.ndarray, PIL.Image.Image]
|
||||
frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]
|
||||
|
||||
|
||||
class PIAPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin):
|
||||
|
||||
@@ -2,6 +2,7 @@ from dataclasses import dataclass
|
||||
from typing import List, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL
|
||||
import torch
|
||||
|
||||
from ...utils import (
|
||||
@@ -12,12 +13,13 @@ from ...utils import (
|
||||
@dataclass
|
||||
class TextToVideoSDPipelineOutput(BaseOutput):
|
||||
"""
|
||||
Output class for text-to-video pipelines.
|
||||
Output class for text-to-video pipelines.
|
||||
|
||||
Args:
|
||||
frames (`List[np.ndarray]` or `torch.FloatTensor`)
|
||||
List of denoised frames (essentially images) as NumPy arrays of shape `(height, width, num_channels)` or as
|
||||
a `torch` tensor. The length of the list denotes the video length (the number of frames).
|
||||
Args:
|
||||
frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
|
||||
List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing denoised
|
||||
PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
|
||||
`(batch_size, num_frames, channels, height, width)`
|
||||
"""
|
||||
|
||||
frames: Union[List[np.ndarray], torch.FloatTensor]
|
||||
frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]
|
||||
|
||||
Reference in New Issue
Block a user