mirror of
https://github.com/huggingface/diffusers.git
synced 2026-02-17 00:06:20 +08:00
Compare commits
22 Commits
ci-pin-set
...
ltx2-add-c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
65597652a7 | ||
|
|
b4e7815306 | ||
|
|
49ef4c5ba2 | ||
|
|
df2ca6ed22 | ||
|
|
ca931c6416 | ||
|
|
e8c5ee0c6e | ||
|
|
1c120c6ad9 | ||
|
|
1cdea99b8b | ||
|
|
8ba350cb47 | ||
|
|
83c8ae6b29 | ||
|
|
98f74b2fe4 | ||
|
|
33e6ec1f85 | ||
|
|
2e824f561a | ||
|
|
d39d89f0d6 | ||
|
|
45051e18f5 | ||
|
|
70dff16996 | ||
|
|
e0bd6a07f7 | ||
|
|
5577e08433 | ||
|
|
5368d73f7e | ||
|
|
ed52c0d7cc | ||
|
|
02c750b590 | ||
|
|
2cc7e116ef |
@@ -569,6 +569,7 @@ else:
|
||||
"LEditsPPPipelineStableDiffusionXL",
|
||||
"LongCatImageEditPipeline",
|
||||
"LongCatImagePipeline",
|
||||
"LTX2ConditionPipeline",
|
||||
"LTX2ImageToVideoPipeline",
|
||||
"LTX2LatentUpsamplePipeline",
|
||||
"LTX2Pipeline",
|
||||
@@ -1324,6 +1325,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
LEditsPPPipelineStableDiffusionXL,
|
||||
LongCatImageEditPipeline,
|
||||
LongCatImagePipeline,
|
||||
LTX2ConditionPipeline,
|
||||
LTX2ImageToVideoPipeline,
|
||||
LTX2LatentUpsamplePipeline,
|
||||
LTX2Pipeline,
|
||||
|
||||
@@ -292,7 +292,12 @@ else:
|
||||
"LTXLatentUpsamplePipeline",
|
||||
"LTXI2VLongMultiPromptPipeline",
|
||||
]
|
||||
_import_structure["ltx2"] = ["LTX2Pipeline", "LTX2ImageToVideoPipeline", "LTX2LatentUpsamplePipeline"]
|
||||
_import_structure["ltx2"] = [
|
||||
"LTX2Pipeline",
|
||||
"LTX2ConditionPipeline",
|
||||
"LTX2ImageToVideoPipeline",
|
||||
"LTX2LatentUpsamplePipeline",
|
||||
]
|
||||
_import_structure["lumina"] = ["LuminaPipeline", "LuminaText2ImgPipeline"]
|
||||
_import_structure["lumina2"] = ["Lumina2Pipeline", "Lumina2Text2ImgPipeline"]
|
||||
_import_structure["lucy"] = ["LucyEditPipeline"]
|
||||
@@ -745,7 +750,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
LTXLatentUpsamplePipeline,
|
||||
LTXPipeline,
|
||||
)
|
||||
from .ltx2 import LTX2ImageToVideoPipeline, LTX2LatentUpsamplePipeline, LTX2Pipeline
|
||||
from .ltx2 import LTX2ConditionPipeline, LTX2ImageToVideoPipeline, LTX2LatentUpsamplePipeline, LTX2Pipeline
|
||||
from .lucy import LucyEditPipeline
|
||||
from .lumina import LuminaPipeline, LuminaText2ImgPipeline
|
||||
from .lumina2 import Lumina2Pipeline, Lumina2Text2ImgPipeline
|
||||
|
||||
@@ -25,6 +25,7 @@ else:
|
||||
_import_structure["connectors"] = ["LTX2TextConnectors"]
|
||||
_import_structure["latent_upsampler"] = ["LTX2LatentUpsamplerModel"]
|
||||
_import_structure["pipeline_ltx2"] = ["LTX2Pipeline"]
|
||||
_import_structure["pipeline_ltx2_condition"] = ["LTX2ConditionPipeline"]
|
||||
_import_structure["pipeline_ltx2_image2video"] = ["LTX2ImageToVideoPipeline"]
|
||||
_import_structure["pipeline_ltx2_latent_upsample"] = ["LTX2LatentUpsamplePipeline"]
|
||||
_import_structure["vocoder"] = ["LTX2Vocoder"]
|
||||
@@ -40,6 +41,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
from .connectors import LTX2TextConnectors
|
||||
from .latent_upsampler import LTX2LatentUpsamplerModel
|
||||
from .pipeline_ltx2 import LTX2Pipeline
|
||||
from .pipeline_ltx2_condition import LTX2ConditionPipeline
|
||||
from .pipeline_ltx2_image2video import LTX2ImageToVideoPipeline
|
||||
from .pipeline_ltx2_latent_upsample import LTX2LatentUpsamplePipeline
|
||||
from .vocoder import LTX2Vocoder
|
||||
|
||||
1489
src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py
Normal file
1489
src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -2117,6 +2117,21 @@ class LongCatImagePipeline(metaclass=DummyObject):
|
||||
requires_backends(cls, ["torch", "transformers"])
|
||||
|
||||
|
||||
class LTX2ConditionPipeline(metaclass=DummyObject):
|
||||
_backends = ["torch", "transformers"]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
requires_backends(self, ["torch", "transformers"])
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, *args, **kwargs):
|
||||
requires_backends(cls, ["torch", "transformers"])
|
||||
|
||||
@classmethod
|
||||
def from_pretrained(cls, *args, **kwargs):
|
||||
requires_backends(cls, ["torch", "transformers"])
|
||||
|
||||
|
||||
class LTX2ImageToVideoPipeline(metaclass=DummyObject):
|
||||
_backends = ["torch", "transformers"]
|
||||
|
||||
|
||||
@@ -25,9 +25,9 @@ from .image_processor import VaeImageProcessor, is_valid_image, is_valid_image_i
|
||||
class VideoProcessor(VaeImageProcessor):
|
||||
r"""Simple video processor."""
|
||||
|
||||
def preprocess_video(self, video, height: int | None = None, width: int | None = None) -> torch.Tensor:
|
||||
def preprocess_video(self, video, height: int | None = None, width: int | None = None, **kwargs) -> torch.Tensor:
|
||||
r"""
|
||||
Preprocesses input video(s).
|
||||
Preprocesses input video(s). Keyword arguments will be forwarded to `VaeImageProcessor.preprocess`.
|
||||
|
||||
Args:
|
||||
video (`list[PIL.Image]`, `list[list[PIL.Image]]`, `torch.Tensor`, `np.array`, `list[torch.Tensor]`, `list[np.array]`):
|
||||
@@ -49,6 +49,10 @@ class VideoProcessor(VaeImageProcessor):
|
||||
width (`int`, *optional*`, defaults to `None`):
|
||||
The width in preprocessed frames of the video. If `None`, will use get_default_height_width()` to get
|
||||
the default width.
|
||||
|
||||
Returns:
|
||||
`torch.Tensor` of shape `(batch_size, num_channels, num_frames, height, width)`:
|
||||
A 5D tensor holding the batched channels-first video(s).
|
||||
"""
|
||||
if isinstance(video, list) and isinstance(video[0], np.ndarray) and video[0].ndim == 5:
|
||||
warnings.warn(
|
||||
@@ -79,7 +83,7 @@ class VideoProcessor(VaeImageProcessor):
|
||||
"Input is in incorrect format. Currently, we only support numpy.ndarray, torch.Tensor, PIL.Image.Image"
|
||||
)
|
||||
|
||||
video = torch.stack([self.preprocess(img, height=height, width=width) for img in video], dim=0)
|
||||
video = torch.stack([self.preprocess(img, height=height, width=width, **kwargs) for img in video], dim=0)
|
||||
|
||||
# move the number of channels before the number of frames.
|
||||
video = video.permute(0, 2, 1, 3, 4)
|
||||
@@ -87,10 +91,11 @@ class VideoProcessor(VaeImageProcessor):
|
||||
return video
|
||||
|
||||
def postprocess_video(
|
||||
self, video: torch.Tensor, output_type: str = "np"
|
||||
self, video: torch.Tensor, output_type: str = "np", **kwargs
|
||||
) -> np.ndarray | torch.Tensor | list[PIL.Image.Image]:
|
||||
r"""
|
||||
Converts a video tensor to a list of frames for export.
|
||||
Converts a video tensor to a list of frames for export. Keyword arguments will be forwarded to
|
||||
`VaeImageProcessor.postprocess`.
|
||||
|
||||
Args:
|
||||
video (`torch.Tensor`): The video as a tensor.
|
||||
@@ -100,7 +105,7 @@ class VideoProcessor(VaeImageProcessor):
|
||||
outputs = []
|
||||
for batch_idx in range(batch_size):
|
||||
batch_vid = video[batch_idx].permute(1, 0, 2, 3)
|
||||
batch_output = self.postprocess(batch_vid, output_type)
|
||||
batch_output = self.postprocess(batch_vid, output_type, **kwargs)
|
||||
outputs.append(batch_output)
|
||||
|
||||
if output_type == "np":
|
||||
|
||||
Reference in New Issue
Block a user