mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-31 07:55:01 +08:00
Compare commits
16 Commits
main
...
more-mello
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3985c43031 | ||
|
|
8c5b119e52 | ||
|
|
46a713a6fa | ||
|
|
d4f2a8979f | ||
|
|
5c7273ff99 | ||
|
|
3fe2711691 | ||
|
|
48160f6f5e | ||
|
|
3393ef0177 | ||
|
|
a71d86b9ae | ||
|
|
26f59f1aa9 | ||
|
|
29c5741c2a | ||
|
|
5ad83903f9 | ||
|
|
ffc5708b78 | ||
|
|
c5c732b87b | ||
|
|
d2bee6a57e | ||
|
|
2890dd8480 |
@@ -114,6 +114,8 @@
|
||||
title: Guiders
|
||||
- local: modular_diffusers/custom_blocks
|
||||
title: Building Custom Blocks
|
||||
- local: modular_diffusers/mellon
|
||||
title: Mellon Guide
|
||||
title: Modular Diffusers
|
||||
- isExpanded: false
|
||||
sections:
|
||||
|
||||
233
docs/source/en/modular_diffusers/mellon.md
Normal file
233
docs/source/en/modular_diffusers/mellon.md
Normal file
@@ -0,0 +1,233 @@
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
## Using Custom Blocks with Mellon
|
||||
|
||||
[Mellon](https://github.com/cubiq/Mellon) is a visual workflow interface (similar to ComfyUI) that integrates with Modular Diffusers. This guide shows how to add Mellon support to your custom blocks so they can be used in the Mellon UI.
|
||||
|
||||
## Overview
|
||||
|
||||
To use a custom block in Mellon, you need a `mellon_pipeline_config.json` file that defines how your block's parameters map to Mellon UI components. Here's how to create one:
|
||||
|
||||
1. **Add a "Mellon type" to your block's parameters** - Each `InputParam`/`OutputParam` needs a type that tells Mellon what UI component to render (e.g., `"textbox"`, `"dropdown"`, `"image"`). You can specify types via metadata in your block definitions, or pass them when generating the config.
|
||||
2. **Generate `mellon_pipeline_config.json`** - Use our utility to generate a default template and push it to your Hub repository
|
||||
3. **(Optional) Manually adjust the template** - Fine-tune the generated config for your specific needs
|
||||
|
||||
## Step 1: Specify Mellon Types for Parameters
|
||||
|
||||
Mellon types determine how each parameter renders in the UI. If you don't specify a type for a parameter, it will default to `"custom"`, which renders as a simple connection dot. You can always adjust this later in the generated config.
|
||||
|
||||
### Supported Mellon Types
|
||||
|
||||
| Type | Input/Output | Description |
|
||||
|------|--------------|-------------|
|
||||
| `image` | Both | Image (PIL Image) |
|
||||
| `video` | Both | Video |
|
||||
| `text` | Both | Text display |
|
||||
| `textbox` | Input | Text input |
|
||||
| `dropdown` | Input | Dropdown selection menu |
|
||||
| `slider` | Input | Slider for numeric values |
|
||||
| `number` | Input | Numeric input |
|
||||
| `checkbox` | Input | Boolean toggle |
|
||||
|
||||
### Method 1: Using `metadata` in Block Definitions
|
||||
|
||||
If you're defining a custom block from scratch, you can add `metadata={"mellon": "<type>"}` directly to your `InputParam` and `OutputParam` definitions:
|
||||
```python
|
||||
class GeminiPromptExpander(ModularPipelineBlocks):
|
||||
|
||||
@property
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
InputParam(
|
||||
"prompt",
|
||||
type_hint=str,
|
||||
required=True,
|
||||
description="Prompt to use",
|
||||
metadata={"mellon": "textbox"}, # Text input
|
||||
)
|
||||
]
|
||||
|
||||
@property
|
||||
def intermediate_outputs(self) -> List[OutputParam]:
|
||||
return [
|
||||
OutputParam(
|
||||
"prompt",
|
||||
type_hint=str,
|
||||
description="Expanded prompt by the LLM",
|
||||
metadata={"mellon": "text"}, # Text output
|
||||
),
|
||||
OutputParam(
|
||||
"old_prompt",
|
||||
type_hint=str,
|
||||
description="Old prompt provided by the user",
|
||||
# No metadata - we don't want to render this in UI
|
||||
)
|
||||
]
|
||||
```
|
||||
|
||||
### Method 2: Using `input_types` and `output_types` When Generating Config
|
||||
|
||||
If you're working with an existing pipeline or prefer to keep your block definitions clean, you can specify types when generating the config using the `input_types/output_types` argument:
|
||||
```python
|
||||
from diffusers.modular_pipelines.mellon_node_utils import MellonPipelineConfig
|
||||
|
||||
mellon_config = MellonPipelineConfig.from_custom_block(
|
||||
blocks,
|
||||
input_types={"prompt": "textbox"},
|
||||
output_types={"prompt": "text"}
|
||||
)
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> If you specify both `metadata` and `input_types`/`output_types`, the arguments take precedence, allowing you to override metadata when needed.
|
||||
|
||||
## Step 2: Generate and Push the Mellon Config
|
||||
|
||||
After adding metadata to your block, generate the default Mellon configuration template and push it to the Hub:
|
||||
|
||||
```python
|
||||
from diffusers import ModularPipelineBlocks
|
||||
from diffusers.modular_pipelines.mellon_node_utils import MellonPipelineConfig
|
||||
|
||||
# load your custom blocks from your local dir
|
||||
blocks = ModularPipelineBlocks.from_pretrained("/path/local/folder", trust_remote_code=True)
|
||||
|
||||
# Generate the default config template
|
||||
mellon_config = MellonPipelineConfig.from_custom_block(blocks)
|
||||
# push the default template to `repo_id`, you will need to pass the same local folder path so that it will save the config locally first
|
||||
mellon_config.save(
|
||||
local_dir="/path/local/folder",
|
||||
repo_id= repo_id,
|
||||
push_to_hub=True
|
||||
)
|
||||
```
|
||||
|
||||
This creates a `mellon_pipeline_config.json` file in your repository.
|
||||
|
||||
## Step 3: Review and Adjust the Config (Optional)
|
||||
|
||||
The generated template is a starting point - you may want to adjust it for your needs. Let's walk through the generated config for the Gemini Prompt Expander:
|
||||
|
||||
```json
|
||||
{
|
||||
"label": "Gemini Prompt Expander",
|
||||
"default_repo": "",
|
||||
"default_dtype": "",
|
||||
"node_params": {
|
||||
"custom": {
|
||||
"params": {
|
||||
"prompt": {
|
||||
"label": "Prompt",
|
||||
"type": "string",
|
||||
"display": "textarea",
|
||||
"default": ""
|
||||
},
|
||||
"out_prompt": {
|
||||
"label": "Prompt",
|
||||
"type": "string",
|
||||
"display": "output"
|
||||
},
|
||||
"old_prompt": {
|
||||
"label": "Old Prompt",
|
||||
"type": "custom",
|
||||
"display": "output"
|
||||
},
|
||||
"doc": {
|
||||
"label": "Doc",
|
||||
"type": "string",
|
||||
"display": "output"
|
||||
}
|
||||
},
|
||||
"input_names": ["prompt"],
|
||||
"model_input_names": [],
|
||||
"output_names": ["out_prompt", "old_prompt", "doc"],
|
||||
"block_name": "custom",
|
||||
"node_type": "custom"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Understanding the Structure
|
||||
|
||||
The `params` dict defines how each UI element renders. The `input_names`, `model_input_names`, and `output_names` lists map these UI elements to the underlying [`ModularPipelineBlocks`]'s I/O interface:
|
||||
|
||||
| Mellon Config | ModularPipelineBlocks |
|
||||
|---------------|----------------------|
|
||||
| `input_names` | `inputs` property |
|
||||
| `model_input_names` | `expected_components` property |
|
||||
| `output_names` | `intermediate_outputs` property |
|
||||
|
||||
In this example: `prompt` is the only input, there are no model components, and outputs include `out_prompt`, `old_prompt`, and `doc`.
|
||||
|
||||
Now let's look at the `params` dict:
|
||||
|
||||
**`prompt`** is an input parameter. It has `display: "textarea"` which renders as a text input box, `label: "Prompt"` shown in the UI, and `default: ""` so it starts empty. The `type: "string"` field is important in Mellon because it determines which nodes can connect together - only matching types can be linked with "noodles".
|
||||
|
||||
**`out_prompt`** is the expanded prompt output. The `out_` prefix was automatically added because the input and output share the same name (`prompt`), avoiding naming conflicts in the config. It has `display: "output"` which renders as an output socket.
|
||||
|
||||
**`old_prompt`** has `type: "custom"` because we didn't specify metadata. This renders as a simple dot in the UI. Since we don't actually want to expose this in the UI, we can remove it.
|
||||
|
||||
**`doc`** is the documentation output, automatically added to all custom blocks.
|
||||
|
||||
### Making Adjustments
|
||||
|
||||
For the Gemini Prompt Expander, we don't need `old_prompt` in the UI. Remove it from both `params` and `output_names`:
|
||||
|
||||
```json
|
||||
{
|
||||
"label": "Gemini Prompt Expander",
|
||||
"default_repo": "",
|
||||
"default_dtype": "",
|
||||
"node_params": {
|
||||
"custom": {
|
||||
"params": {
|
||||
"prompt": {
|
||||
"label": "Prompt",
|
||||
"type": "string",
|
||||
"display": "textarea",
|
||||
"default": ""
|
||||
},
|
||||
"out_prompt": {
|
||||
"label": "Prompt",
|
||||
"type": "string",
|
||||
"display": "output"
|
||||
},
|
||||
"doc": {
|
||||
"label": "Doc",
|
||||
"type": "string",
|
||||
"display": "output"
|
||||
}
|
||||
},
|
||||
"input_names": ["prompt"],
|
||||
"model_input_names": [],
|
||||
"output_names": ["out_prompt", "doc"],
|
||||
"block_name": "custom",
|
||||
"node_type": "custom"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
See the final config at [YiYiXu/gemini-prompt-expander](https://huggingface.co/YiYiXu/gemini-prompt-expander).
|
||||
|
||||
## Use in Mellon
|
||||
|
||||
1. Start Mellon (see [Mellon installation guide](https://github.com/cubiq/Mellon))
|
||||
|
||||
2. In Mellon:
|
||||
- Drag a **Dynamic Block Node** from the ModularDiffusers section
|
||||
- Enter your `repo_id` (e.g., `YiYiXu/gemini-prompt-expander`)
|
||||
- Click **Load Custom Block**
|
||||
- The node will transform to show your block's inputs and outputs
|
||||
@@ -324,6 +324,7 @@ class ComponentsManager:
|
||||
"has_hook",
|
||||
"execution_device",
|
||||
"ip_adapter",
|
||||
"quantization",
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
@@ -356,7 +357,9 @@ class ComponentsManager:
|
||||
ids_by_name.add(component_id)
|
||||
else:
|
||||
ids_by_name = set(components.keys())
|
||||
if collection:
|
||||
if collection and collection not in self.collections:
|
||||
return set()
|
||||
elif collection and collection in self.collections:
|
||||
ids_by_collection = set()
|
||||
for component_id, component in components.items():
|
||||
if component_id in self.collections[collection]:
|
||||
@@ -760,7 +763,6 @@ class ComponentsManager:
|
||||
self.model_hooks = None
|
||||
self._auto_offload_enabled = False
|
||||
|
||||
# YiYi TODO: (1) add quantization info
|
||||
def get_model_info(
|
||||
self,
|
||||
component_id: str,
|
||||
@@ -836,6 +838,14 @@ class ComponentsManager:
|
||||
if scales:
|
||||
info["ip_adapter"] = summarize_dict_by_value_and_parts(scales)
|
||||
|
||||
# Check for quantization
|
||||
hf_quantizer = getattr(component, "hf_quantizer", None)
|
||||
if hf_quantizer is not None:
|
||||
quant_config = hf_quantizer.quantization_config
|
||||
info["quantization"] = quant_config.to_dict()
|
||||
else:
|
||||
info["quantization"] = None
|
||||
|
||||
# If fields specified, filter info
|
||||
if fields is not None:
|
||||
return {k: v for k, v in info.items() if k in fields}
|
||||
@@ -966,12 +976,14 @@ class ComponentsManager:
|
||||
output += "\nAdditional Component Info:\n" + "=" * 50 + "\n"
|
||||
for name in self.components:
|
||||
info = self.get_model_info(name)
|
||||
if info is not None and (info.get("adapters") is not None or info.get("ip_adapter")):
|
||||
if info is not None and (info.get("adapters") is not None or info.get("ip_adapter") or info.get("quantization")):
|
||||
output += f"\n{name}:\n"
|
||||
if info.get("adapters") is not None:
|
||||
output += f" Adapters: {info['adapters']}\n"
|
||||
if info.get("ip_adapter"):
|
||||
output += " IP-Adapter: Enabled\n"
|
||||
if info.get("quantization"):
|
||||
output += f" Quantization: {info['quantization']}\n"
|
||||
|
||||
return output
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -520,6 +520,7 @@ class InputParam:
|
||||
required: bool = False
|
||||
description: str = ""
|
||||
kwargs_type: str = None
|
||||
metadata: Dict[str, Any] = None
|
||||
|
||||
def __repr__(self):
|
||||
return f"<{self.name}: {'required' if self.required else 'optional'}, default={self.default}>"
|
||||
@@ -553,6 +554,7 @@ class OutputParam:
|
||||
type_hint: Any = None
|
||||
description: str = ""
|
||||
kwargs_type: str = None
|
||||
metadata: Dict[str, Any] = None
|
||||
|
||||
def __repr__(self):
|
||||
return (
|
||||
|
||||
@@ -407,8 +407,8 @@ class GlmImagePipeline(DiffusionPipeline):
|
||||
|
||||
if len(source_grids) > 0:
|
||||
prior_token_image_embed = self.vision_language_encoder.get_image_features(
|
||||
inputs["pixel_values"], source_grids
|
||||
).pooler_output
|
||||
inputs["pixel_values"], source_grids, return_dict=False
|
||||
)
|
||||
prior_token_image_embed = torch.cat(prior_token_image_embed, dim=0)
|
||||
prior_token_image_ids_d32 = self.vision_language_encoder.get_image_tokens(
|
||||
prior_token_image_embed, source_grids
|
||||
|
||||
@@ -227,7 +227,7 @@ _cosmos_guardrail_available, _cosmos_guardrail_version = _is_package_available("
|
||||
_sageattention_available, _sageattention_version = _is_package_available("sageattention")
|
||||
_flash_attn_available, _flash_attn_version = _is_package_available("flash_attn")
|
||||
_flash_attn_3_available, _flash_attn_3_version = _is_package_available("flash_attn_3")
|
||||
_aiter_available, _aiter_version = _is_package_available("aiter", get_dist_name=True)
|
||||
_aiter_available, _aiter_version = _is_package_available("aiter")
|
||||
_kornia_available, _kornia_version = _is_package_available("kornia")
|
||||
_nvidia_modelopt_available, _nvidia_modelopt_version = _is_package_available("modelopt", get_dist_name=True)
|
||||
_av_available, _av_version = _is_package_available("av")
|
||||
|
||||
Reference in New Issue
Block a user