mirror of
https://github.com/huggingface/diffusers.git
synced 2026-02-01 00:15:00 +08:00
Compare commits
4 Commits
transforme
...
ltx2-add-c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2cc7e116ef | ||
|
|
6a1904eb06 | ||
|
|
f5b6b6625a | ||
|
|
1be2f7e8c5 |
@@ -552,6 +552,7 @@ else:
|
||||
"LEditsPPPipelineStableDiffusionXL",
|
||||
"LongCatImageEditPipeline",
|
||||
"LongCatImagePipeline",
|
||||
"LTX2ConditionPipeline",
|
||||
"LTX2ImageToVideoPipeline",
|
||||
"LTX2LatentUpsamplePipeline",
|
||||
"LTX2Pipeline",
|
||||
@@ -1284,6 +1285,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
LEditsPPPipelineStableDiffusionXL,
|
||||
LongCatImageEditPipeline,
|
||||
LongCatImagePipeline,
|
||||
LTX2ConditionPipeline,
|
||||
LTX2ImageToVideoPipeline,
|
||||
LTX2LatentUpsamplePipeline,
|
||||
LTX2Pipeline,
|
||||
|
||||
@@ -35,8 +35,8 @@ from . import BaseDiffusersCLICommand
|
||||
def conversion_command_factory(args: Namespace):
|
||||
if args.use_auth_token:
|
||||
warnings.warn(
|
||||
"The `--use_auth_token` flag is deprecated and will be removed in a future version. Authentication is now"
|
||||
" handled automatically if user is logged in."
|
||||
"The `--use_auth_token` flag is deprecated and will be removed in a future version."
|
||||
"Authentication is now handled automatically if the user is logged in."
|
||||
)
|
||||
return FP16SafetensorsCommand(args.ckpt_id, args.fp16, args.use_safetensors)
|
||||
|
||||
@@ -92,8 +92,8 @@ class FP16SafetensorsCommand(BaseDiffusersCLICommand):
|
||||
pipeline_class = getattr(import_module("diffusers"), pipeline_class_name)
|
||||
self.logger.info(f"Pipeline class imported: {pipeline_class_name}.")
|
||||
|
||||
# Load the appropriate pipeline. We could have use `DiffusionPipeline`
|
||||
# here, but just to avoid any rough edge cases.
|
||||
# Load the appropriate pipeline. We could have used `DiffusionPipeline`
|
||||
# here, but just to avoid potential edge cases.
|
||||
pipeline = pipeline_class.from_pretrained(
|
||||
self.ckpt_id, torch_dtype=torch.float16 if self.fp16 else torch.float32
|
||||
)
|
||||
|
||||
@@ -291,7 +291,11 @@ else:
|
||||
"LTXLatentUpsamplePipeline",
|
||||
"LTXI2VLongMultiPromptPipeline",
|
||||
]
|
||||
_import_structure["ltx2"] = ["LTX2Pipeline", "LTX2ImageToVideoPipeline", "LTX2LatentUpsamplePipeline"]
|
||||
_import_structure["ltx2"] = [
|
||||
"LTX2Pipeline",
|
||||
"LTX2ConditionPipelineLTX2ImageToVideoPipeline",
|
||||
"LTX2LatentUpsamplePipeline",
|
||||
]
|
||||
_import_structure["lumina"] = ["LuminaPipeline", "LuminaText2ImgPipeline"]
|
||||
_import_structure["lumina2"] = ["Lumina2Pipeline", "Lumina2Text2ImgPipeline"]
|
||||
_import_structure["lucy"] = ["LucyEditPipeline"]
|
||||
@@ -742,7 +746,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
LTXLatentUpsamplePipeline,
|
||||
LTXPipeline,
|
||||
)
|
||||
from .ltx2 import LTX2ImageToVideoPipeline, LTX2LatentUpsamplePipeline, LTX2Pipeline
|
||||
from .ltx2 import LTX2ConditionPipeline, LTX2ImageToVideoPipeline, LTX2LatentUpsamplePipeline, LTX2Pipeline
|
||||
from .lucy import LucyEditPipeline
|
||||
from .lumina import LuminaPipeline, LuminaText2ImgPipeline
|
||||
from .lumina2 import Lumina2Pipeline, Lumina2Text2ImgPipeline
|
||||
|
||||
@@ -407,8 +407,8 @@ class GlmImagePipeline(DiffusionPipeline):
|
||||
|
||||
if len(source_grids) > 0:
|
||||
prior_token_image_embed = self.vision_language_encoder.get_image_features(
|
||||
inputs["pixel_values"], source_grids, return_dict=False
|
||||
)
|
||||
inputs["pixel_values"], source_grids
|
||||
).pooler_output
|
||||
prior_token_image_embed = torch.cat(prior_token_image_embed, dim=0)
|
||||
prior_token_image_ids_d32 = self.vision_language_encoder.get_image_tokens(
|
||||
prior_token_image_embed, source_grids
|
||||
|
||||
@@ -25,6 +25,7 @@ else:
|
||||
_import_structure["connectors"] = ["LTX2TextConnectors"]
|
||||
_import_structure["latent_upsampler"] = ["LTX2LatentUpsamplerModel"]
|
||||
_import_structure["pipeline_ltx2"] = ["LTX2Pipeline"]
|
||||
_import_structure["pipeline_ltx2_condition"] = ["LTX2ConditionPipeline"]
|
||||
_import_structure["pipeline_ltx2_image2video"] = ["LTX2ImageToVideoPipeline"]
|
||||
_import_structure["pipeline_ltx2_latent_upsample"] = ["LTX2LatentUpsamplePipeline"]
|
||||
_import_structure["vocoder"] = ["LTX2Vocoder"]
|
||||
@@ -40,6 +41,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
from .connectors import LTX2TextConnectors
|
||||
from .latent_upsampler import LTX2LatentUpsamplerModel
|
||||
from .pipeline_ltx2 import LTX2Pipeline
|
||||
from .pipeline_ltx2_condition import LTX2ConditionPipeline
|
||||
from .pipeline_ltx2_image2video import LTX2ImageToVideoPipeline
|
||||
from .pipeline_ltx2_latent_upsample import LTX2LatentUpsamplePipeline
|
||||
from .vocoder import LTX2Vocoder
|
||||
|
||||
1332
src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py
Normal file
1332
src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -496,8 +496,13 @@ class WanPipeline(DiffusionPipeline, WanLoraLoaderMixin):
|
||||
num_frames = num_frames // self.vae_scale_factor_temporal * self.vae_scale_factor_temporal + 1
|
||||
num_frames = max(num_frames, 1)
|
||||
|
||||
h_multiple_of = self.vae_scale_factor_spatial * self.transformer.config.patch_size[1]
|
||||
w_multiple_of = self.vae_scale_factor_spatial * self.transformer.config.patch_size[2]
|
||||
patch_size = (
|
||||
self.transformer.config.patch_size
|
||||
if self.transformer is not None
|
||||
else self.transformer_2.config.patch_size
|
||||
)
|
||||
h_multiple_of = self.vae_scale_factor_spatial * patch_size[1]
|
||||
w_multiple_of = self.vae_scale_factor_spatial * patch_size[2]
|
||||
calc_height = height // h_multiple_of * h_multiple_of
|
||||
calc_width = width // w_multiple_of * w_multiple_of
|
||||
if height != calc_height or width != calc_width:
|
||||
|
||||
@@ -637,8 +637,13 @@ class WanImageToVideoPipeline(DiffusionPipeline, WanLoraLoaderMixin):
|
||||
num_frames = num_frames // self.vae_scale_factor_temporal * self.vae_scale_factor_temporal + 1
|
||||
num_frames = max(num_frames, 1)
|
||||
|
||||
h_multiple_of = self.vae_scale_factor_spatial * self.transformer.config.patch_size[1]
|
||||
w_multiple_of = self.vae_scale_factor_spatial * self.transformer.config.patch_size[2]
|
||||
patch_size = (
|
||||
self.transformer.config.patch_size
|
||||
if self.transformer is not None
|
||||
else self.transformer_2.config.patch_size
|
||||
)
|
||||
h_multiple_of = self.vae_scale_factor_spatial * patch_size[1]
|
||||
w_multiple_of = self.vae_scale_factor_spatial * patch_size[2]
|
||||
calc_height = height // h_multiple_of * h_multiple_of
|
||||
calc_width = width // w_multiple_of * w_multiple_of
|
||||
if height != calc_height or width != calc_width:
|
||||
|
||||
Reference in New Issue
Block a user