Compare commits

..

2 Commits

Author SHA1 Message Date
yiyi@huggingface.co
929414d0ea style 2026-01-15 04:35:34 +00:00
yiyi@huggingface.co
d2ded13cd6 up 2026-01-15 04:32:52 +00:00
10 changed files with 40 additions and 1488 deletions

View File

@@ -99,9 +99,3 @@ image.save("chroma-single-file.png")
[[autodoc]] ChromaImg2ImgPipeline
- all
- __call__
## ChromaInpaintPipeline
[[autodoc]] ChromaInpaintPipeline
- all
- __call__

View File

@@ -460,7 +460,6 @@ else:
"BriaFiboPipeline",
"BriaPipeline",
"ChromaImg2ImgPipeline",
"ChromaInpaintPipeline",
"ChromaPipeline",
"ChronoEditPipeline",
"CLIPImageProjection",
@@ -1187,7 +1186,6 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
BriaFiboPipeline,
BriaPipeline,
ChromaImg2ImgPipeline,
ChromaInpaintPipeline,
ChromaPipeline,
ChronoEditPipeline,
CLIPImageProjection,

View File

@@ -1573,6 +1573,8 @@ def _templated_context_parallel_attention(
backward_op,
_parallel_config: Optional["ParallelConfig"] = None,
):
if attn_mask is not None:
raise ValueError("Attention mask is not yet supported for templated attention.")
if is_causal:
raise ValueError("Causal attention is not yet supported for templated attention.")
if enable_gqa:

View File

@@ -761,14 +761,11 @@ class QwenImageTransformer2DModel(
_no_split_modules = ["QwenImageTransformerBlock"]
_skip_layerwise_casting_patterns = ["pos_embed", "norm"]
_repeated_blocks = ["QwenImageTransformerBlock"]
# Make CP plan compatible with https://github.com/huggingface/diffusers/pull/12702
_cp_plan = {
"transformer_blocks.0": {
"": {
"hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False),
"encoder_hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False),
},
"transformer_blocks.*": {
"modulate_index": ContextParallelInput(split_dim=1, expected_dims=2, split_output=False),
"encoder_hidden_states_mask": ContextParallelInput(split_dim=1, expected_dims=2, split_output=False),
},
"pos_embed": {
0: ContextParallelInput(split_dim=0, expected_dims=2, split_output=True),

View File

@@ -23,18 +23,10 @@ logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class MellonParam:
"""
Parameter definition for Mellon nodes.
Parameter definition for Mellon nodes.
Use factory methods for common params (e.g., MellonParam.seed()) or create custom ones with
MellonParam(name="...", label="...", type="...").
Example:
```python
# Custom param
MellonParam(name="my_param", label="My Param", type="float", default=0.5)
# Output in Mellon node definition:
# "my_param": {"label": "My Param", "type": "float", "default": 0.5}
```
Use factory methods for common params (e.g., MellonParam.seed()) or create custom ones with MellonParam(name="...",
label="...", type="...").
"""
name: str
@@ -59,32 +51,14 @@ class MellonParam:
@classmethod
def image(cls) -> "MellonParam":
"""
Image input parameter.
Mellon node definition:
"image": {"label": "Image", "type": "image", "display": "input"}
"""
return cls(name="image", label="Image", type="image", display="input", required_block_params=["image"])
@classmethod
def images(cls) -> "MellonParam":
"""
Images output parameter.
Mellon node definition:
"images": {"label": "Images", "type": "image", "display": "output"}
"""
return cls(name="images", label="Images", type="image", display="output", required_block_params=["images"])
@classmethod
def control_image(cls, display: str = "input") -> "MellonParam":
"""
Control image parameter for ControlNet.
Mellon node definition (display="input"):
"control_image": {"label": "Control Image", "type": "image", "display": "input"}
"""
return cls(
name="control_image",
label="Control Image",
@@ -95,25 +69,10 @@ class MellonParam:
@classmethod
def latents(cls, display: str = "input") -> "MellonParam":
"""
Latents parameter.
Mellon node definition (display="input"):
"latents": {"label": "Latents", "type": "latents", "display": "input"}
Mellon node definition (display="output"):
"latents": {"label": "Latents", "type": "latents", "display": "output"}
"""
return cls(name="latents", label="Latents", type="latents", display=display, required_block_params=["latents"])
@classmethod
def image_latents(cls, display: str = "input") -> "MellonParam":
"""
Image latents parameter for img2img workflows.
Mellon node definition (display="input"):
"image_latents": {"label": "Image Latents", "type": "latents", "display": "input"}
"""
return cls(
name="image_latents",
label="Image Latents",
@@ -124,12 +83,6 @@ class MellonParam:
@classmethod
def first_frame_latents(cls, display: str = "input") -> "MellonParam":
"""
First frame latents for video generation.
Mellon node definition (display="input"):
"first_frame_latents": {"label": "First Frame Latents", "type": "latents", "display": "input"}
"""
return cls(
name="first_frame_latents",
label="First Frame Latents",
@@ -140,16 +93,6 @@ class MellonParam:
@classmethod
def image_latents_with_strength(cls) -> "MellonParam":
"""
Image latents with strength-based onChange behavior. When connected, shows strength slider; when disconnected,
shows height/width.
Mellon node definition:
"image_latents": {
"label": "Image Latents", "type": "latents", "display": "input", "onChange": {"false": ["height",
"width"], "true": ["strength"]}
}
"""
return cls(
name="image_latents",
label="Image Latents",
@@ -162,34 +105,16 @@ class MellonParam:
@classmethod
def latents_preview(cls) -> "MellonParam":
"""
Latents preview output for visualizing latents in the UI.
Mellon node definition:
"latents_preview": {"label": "Latents Preview", "type": "latent", "display": "output"}
`Latents Preview` is a special output parameter that is used to preview the latents in the UI.
"""
return cls(name="latents_preview", label="Latents Preview", type="latent", display="output")
@classmethod
def embeddings(cls, display: str = "output") -> "MellonParam":
"""
Text embeddings parameter.
Mellon node definition (display="output"):
"embeddings": {"label": "Text Embeddings", "type": "embeddings", "display": "output"}
Mellon node definition (display="input"):
"embeddings": {"label": "Text Embeddings", "type": "embeddings", "display": "input"}
"""
return cls(name="embeddings", label="Text Embeddings", type="embeddings", display=display)
@classmethod
def image_embeds(cls, display: str = "output") -> "MellonParam":
"""
Image embeddings parameter for IP-Adapter workflows.
Mellon node definition (display="output"):
"image_embeds": {"label": "Image Embeddings", "type": "image_embeds", "display": "output"}
"""
return cls(
name="image_embeds",
label="Image Embeddings",
@@ -200,15 +125,6 @@ class MellonParam:
@classmethod
def controlnet_conditioning_scale(cls, default: float = 0.5) -> "MellonParam":
"""
ControlNet conditioning scale slider.
Mellon node definition (default=0.5):
"controlnet_conditioning_scale": {
"label": "Controlnet Conditioning Scale", "type": "float", "default": 0.5, "min": 0.0, "max": 1.0,
"step": 0.01
}
"""
return cls(
name="controlnet_conditioning_scale",
label="Controlnet Conditioning Scale",
@@ -222,15 +138,6 @@ class MellonParam:
@classmethod
def control_guidance_start(cls, default: float = 0.0) -> "MellonParam":
"""
Control guidance start timestep.
Mellon node definition (default=0.0):
"control_guidance_start": {
"label": "Control Guidance Start", "type": "float", "default": 0.0, "min": 0.0, "max": 1.0, "step":
0.01
}
"""
return cls(
name="control_guidance_start",
label="Control Guidance Start",
@@ -244,14 +151,6 @@ class MellonParam:
@classmethod
def control_guidance_end(cls, default: float = 1.0) -> "MellonParam":
"""
Control guidance end timestep.
Mellon node definition (default=1.0):
"control_guidance_end": {
"label": "Control Guidance End", "type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01
}
"""
return cls(
name="control_guidance_end",
label="Control Guidance End",
@@ -265,12 +164,6 @@ class MellonParam:
@classmethod
def prompt(cls, default: str = "") -> "MellonParam":
"""
Text prompt input as textarea.
Mellon node definition (default=""):
"prompt": {"label": "Prompt", "type": "string", "default": "", "display": "textarea"}
"""
return cls(
name="prompt",
label="Prompt",
@@ -282,12 +175,6 @@ class MellonParam:
@classmethod
def negative_prompt(cls, default: str = "") -> "MellonParam":
"""
Negative prompt input as textarea.
Mellon node definition (default=""):
"negative_prompt": {"label": "Negative Prompt", "type": "string", "default": "", "display": "textarea"}
"""
return cls(
name="negative_prompt",
label="Negative Prompt",
@@ -299,12 +186,6 @@ class MellonParam:
@classmethod
def strength(cls, default: float = 0.5) -> "MellonParam":
"""
Denoising strength for img2img.
Mellon node definition (default=0.5):
"strength": {"label": "Strength", "type": "float", "default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01}
"""
return cls(
name="strength",
label="Strength",
@@ -318,15 +199,6 @@ class MellonParam:
@classmethod
def guidance_scale(cls, default: float = 5.0) -> "MellonParam":
"""
CFG guidance scale slider.
Mellon node definition (default=5.0):
"guidance_scale": {
"label": "Guidance Scale", "type": "float", "display": "slider", "default": 5.0, "min": 1.0, "max":
30.0, "step": 0.1
}
"""
return cls(
name="guidance_scale",
label="Guidance Scale",
@@ -340,12 +212,6 @@ class MellonParam:
@classmethod
def height(cls, default: int = 1024) -> "MellonParam":
"""
Image height in pixels.
Mellon node definition (default=1024):
"height": {"label": "Height", "type": "int", "default": 1024, "min": 64, "step": 8}
"""
return cls(
name="height",
label="Height",
@@ -358,26 +224,12 @@ class MellonParam:
@classmethod
def width(cls, default: int = 1024) -> "MellonParam":
"""
Image width in pixels.
Mellon node definition (default=1024):
"width": {"label": "Width", "type": "int", "default": 1024, "min": 64, "step": 8}
"""
return cls(
name="width", label="Width", type="int", default=default, min=64, step=8, required_block_params=["width"]
)
@classmethod
def seed(cls, default: int = 0) -> "MellonParam":
"""
Random seed with randomize button.
Mellon node definition (default=0):
"seed": {
"label": "Seed", "type": "int", "default": 0, "min": 0, "max": 4294967295, "display": "random"
}
"""
return cls(
name="seed",
label="Seed",
@@ -391,14 +243,6 @@ class MellonParam:
@classmethod
def num_inference_steps(cls, default: int = 25) -> "MellonParam":
"""
Number of denoising steps slider.
Mellon node definition (default=25):
"num_inference_steps": {
"label": "Steps", "type": "int", "default": 25, "min": 1, "max": 100, "display": "slider"
}
"""
return cls(
name="num_inference_steps",
label="Steps",
@@ -412,12 +256,6 @@ class MellonParam:
@classmethod
def num_frames(cls, default: int = 81) -> "MellonParam":
"""
Number of video frames slider.
Mellon node definition (default=81):
"num_frames": {"label": "Frames", "type": "int", "default": 81, "min": 1, "max": 480, "display": "slider"}
"""
return cls(
name="num_frames",
label="Frames",
@@ -431,12 +269,6 @@ class MellonParam:
@classmethod
def layers(cls, default: int = 4) -> "MellonParam":
"""
Number of layers slider (for layered diffusion).
Mellon node definition (default=4):
"layers": {"label": "Layers", "type": "int", "default": 4, "min": 1, "max": 10, "display": "slider"}
"""
return cls(
name="layers",
label="Layers",
@@ -450,24 +282,15 @@ class MellonParam:
@classmethod
def videos(cls) -> "MellonParam":
"""
Video output parameter.
Mellon node definition:
"videos": {"label": "Videos", "type": "video", "display": "output"}
"""
return cls(name="videos", label="Videos", type="video", display="output", required_block_params=["videos"])
@classmethod
def vae(cls) -> "MellonParam":
"""
VAE model input.
VAE model info dict.
Mellon node definition:
"vae": {"label": "VAE", "type": "diffusers_auto_model", "display": "input"}
Note: The value received is a model info dict with keys like 'model_id', 'repo_id', 'execution_device'. Use
components.get_one(model_id) to retrieve the actual model.
Contains keys like 'model_id', 'repo_id', 'execution_device' etc. Use components.get_one(model_id) to retrieve
the actual model.
"""
return cls(
name="vae", label="VAE", type="diffusers_auto_model", display="input", required_block_params=["vae"]
@@ -476,13 +299,10 @@ class MellonParam:
@classmethod
def image_encoder(cls) -> "MellonParam":
"""
Image encoder model input.
Image Encoder model info dict.
Mellon node definition:
"image_encoder": {"label": "Image Encoder", "type": "diffusers_auto_model", "display": "input"}
Note: The value received is a model info dict with keys like 'model_id', 'repo_id', 'execution_device'. Use
components.get_one(model_id) to retrieve the actual model.
Contains keys like 'model_id', 'repo_id', 'execution_device' etc. Use components.get_one(model_id) to retrieve
the actual model.
"""
return cls(
name="image_encoder",
@@ -495,39 +315,30 @@ class MellonParam:
@classmethod
def unet(cls) -> "MellonParam":
"""
Denoising model (UNet/Transformer) input.
Denoising model (UNet/Transformer) info dict.
Mellon node definition:
"unet": {"label": "Denoise Model", "type": "diffusers_auto_model", "display": "input"}
Note: The value received is a model info dict with keys like 'model_id', 'repo_id', 'execution_device'. Use
components.get_one(model_id) to retrieve the actual model.
Contains keys like 'model_id', 'repo_id', 'execution_device' etc. Use components.get_one(model_id) to retrieve
the actual model.
"""
return cls(name="unet", label="Denoise Model", type="diffusers_auto_model", display="input")
@classmethod
def scheduler(cls) -> "MellonParam":
"""
Scheduler model input.
Scheduler model info dict.
Mellon node definition:
"scheduler": {"label": "Scheduler", "type": "diffusers_auto_model", "display": "input"}
Note: The value received is a model info dict with keys like 'model_id', 'repo_id'. Use
components.get_one(model_id) to retrieve the actual scheduler.
Contains keys like 'model_id', 'repo_id' etc. Use components.get_one(model_id) to retrieve the actual
scheduler.
"""
return cls(name="scheduler", label="Scheduler", type="diffusers_auto_model", display="input")
@classmethod
def controlnet(cls) -> "MellonParam":
"""
ControlNet model input.
ControlNet model info dict.
Mellon node definition:
"controlnet": {"label": "ControlNet Model", "type": "diffusers_auto_model", "display": "input"}
Note: The value received is a model info dict with keys like 'model_id', 'repo_id', 'execution_device'. Use
components.get_one(model_id) to retrieve the actual model.
Contains keys like 'model_id', 'repo_id', 'execution_device' etc. Use components.get_one(model_id) to retrieve
the actual model.
"""
return cls(
name="controlnet",
@@ -540,17 +351,12 @@ class MellonParam:
@classmethod
def text_encoders(cls) -> "MellonParam":
"""
Text encoders dict input (multiple encoders).
Dict of text encoder model info dicts.
Mellon node definition:
"text_encoders": {"label": "Text Encoders", "type": "diffusers_auto_models", "display": "input"}
Note: The value received is a dict of model info dicts:
{
'text_encoder': {'model_id': ..., 'execution_device': ..., ...}, 'tokenizer': {'model_id': ..., ...},
'repo_id': '...'
}
Use components.get_one(model_id) to retrieve each model.
Structure: {
'text_encoder': {'model_id': ..., 'execution_device': ..., ...}, 'tokenizer': {'model_id': ..., ...},
'repo_id': '...'
} Use components.get_one(model_id) to retrieve each model.
"""
return cls(
name="text_encoders",
@@ -563,20 +369,15 @@ class MellonParam:
@classmethod
def controlnet_bundle(cls, display: str = "input") -> "MellonParam":
"""
ControlNet bundle containing model and processed control inputs. Output from ControlNet node, input to Denoise
node.
ControlNet bundle containing model info and processed control inputs.
Mellon node definition (display="input"):
"controlnet_bundle": {"label": "ControlNet", "type": "custom_controlnet", "display": "input"}
Structure: {
'controlnet': {'model_id': ..., ...}, # controlnet model info dict 'control_image': ..., # processed
control image/embeddings 'controlnet_conditioning_scale': ..., ... # other inputs expected by denoise
blocks
}
Mellon node definition (display="output"):
"controlnet_bundle": {"label": "ControlNet", "type": "custom_controlnet", "display": "output"}
Note: The value is a dict containing:
{
'controlnet': {'model_id': ..., ...}, # controlnet model info 'control_image': ..., # processed control
image/embeddings 'controlnet_conditioning_scale': ..., # and other denoise block inputs
}
Output from Controlnet node, input to Denoise node.
"""
return cls(
name="controlnet_bundle",
@@ -588,25 +389,10 @@ class MellonParam:
@classmethod
def ip_adapter(cls) -> "MellonParam":
"""
IP-Adapter input.
Mellon node definition:
"ip_adapter": {"label": "IP Adapter", "type": "custom_ip_adapter", "display": "input"}
"""
return cls(name="ip_adapter", label="IP Adapter", type="custom_ip_adapter", display="input")
@classmethod
def guider(cls) -> "MellonParam":
"""
Custom guider input. When connected, hides the guidance_scale slider.
Mellon node definition:
"guider": {
"label": "Guider", "type": "custom_guider", "display": "input", "onChange": {false: ["guidance_scale"],
true: []}
}
"""
return cls(
name="guider",
label="Guider",
@@ -617,12 +403,6 @@ class MellonParam:
@classmethod
def doc(cls) -> "MellonParam":
"""
Documentation output for inspecting the underlying modular pipeline.
Mellon node definition:
"doc": {"label": "Doc", "type": "string", "display": "output"}
"""
return cls(name="doc", label="Doc", type="string", display="output")
@@ -635,7 +415,6 @@ DEFAULT_NODE_SPECS = {
MellonParam.height(),
MellonParam.seed(),
MellonParam.num_inference_steps(),
MellonParam.num_frames(),
MellonParam.guidance_scale(),
MellonParam.strength(),
MellonParam.image_latents_with_strength(),
@@ -890,9 +669,6 @@ class MellonPipelineConfig:
@property
def node_params(self) -> Dict[str, Any]:
"""Lazily compute node_params from node_specs."""
if self.node_specs is None:
return self._node_params
params = {}
for node_type, spec in self.node_specs.items():
if spec is None:
@@ -935,8 +711,7 @@ class MellonPipelineConfig:
Note: The mellon_params are already in Mellon format when loading from JSON.
"""
instance = cls.__new__(cls)
instance.node_specs = None
instance._node_params = data.get("node_params", {})
instance.node_params = data.get("node_params", {})
instance.label = data.get("label", "")
instance.default_repo = data.get("default_repo", "")
instance.default_dtype = data.get("default_dtype", "")

View File

@@ -155,7 +155,7 @@ else:
"AudioLDM2UNet2DConditionModel",
]
_import_structure["blip_diffusion"] = ["BlipDiffusionPipeline"]
_import_structure["chroma"] = ["ChromaPipeline", "ChromaImg2ImgPipeline", "ChromaInpaintPipeline"]
_import_structure["chroma"] = ["ChromaPipeline", "ChromaImg2ImgPipeline"]
_import_structure["cogvideo"] = [
"CogVideoXPipeline",
"CogVideoXImageToVideoPipeline",
@@ -598,7 +598,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
from .blip_diffusion import BlipDiffusionPipeline
from .bria import BriaPipeline
from .bria_fibo import BriaFiboPipeline
from .chroma import ChromaImg2ImgPipeline, ChromaInpaintPipeline, ChromaPipeline
from .chroma import ChromaImg2ImgPipeline, ChromaPipeline
from .chronoedit import ChronoEditPipeline
from .cogvideo import (
CogVideoXFunControlPipeline,

View File

@@ -24,7 +24,6 @@ except OptionalDependencyNotAvailable:
else:
_import_structure["pipeline_chroma"] = ["ChromaPipeline"]
_import_structure["pipeline_chroma_img2img"] = ["ChromaImg2ImgPipeline"]
_import_structure["pipeline_chroma_inpainting"] = ["ChromaInpaintPipeline"]
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
try:
if not (is_transformers_available() and is_torch_available()):
@@ -34,7 +33,6 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
else:
from .pipeline_chroma import ChromaPipeline
from .pipeline_chroma_img2img import ChromaImg2ImgPipeline
from .pipeline_chroma_inpainting import ChromaInpaintPipeline
else:
import sys

File diff suppressed because it is too large Load Diff

View File

@@ -260,10 +260,10 @@ class LongCatImagePipeline(DiffusionPipeline, FromSingleFileMixin):
text = self.text_processor.apply_chat_template(message, tokenize=False, add_generation_prompt=True)
all_text.append(text)
inputs = self.text_processor(text=all_text, padding=True, return_tensors="pt").to(self.text_encoder.device)
inputs = self.text_processor(text=all_text, padding=True, return_tensors="pt").to(device)
self.text_encoder.to(device)
generated_ids = self.text_encoder.generate(**inputs, max_new_tokens=self.tokenizer_max_length)
generated_ids.to(device)
generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
output_text = self.text_processor.batch_decode(
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False

View File

@@ -632,21 +632,6 @@ class ChromaImg2ImgPipeline(metaclass=DummyObject):
requires_backends(cls, ["torch", "transformers"])
class ChromaInpaintPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers"])
@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
class ChromaPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]