Compare commits

..

20 Commits

Author SHA1 Message Date
Dhruv Nair
7516680d8c update 2025-11-10 04:05:18 +01:00
Dhruv Nair
cfb83e54f3 update 2025-11-07 16:02:16 +01:00
Dhruv Nair
2b3558adab update 2025-11-07 15:58:13 +01:00
Dhruv Nair
9b7065d1bb update 2025-11-07 08:49:41 +01:00
Dhruv Nair
218cb72a5d update 2025-11-07 08:42:18 +01:00
Dhruv Nair
ad24f55ea5 Merge branch 'main' into custom-blocks-guide 2025-11-07 07:51:54 +01:00
Dhruv Nair
a65e0a60df update 2025-11-07 07:34:14 +01:00
Dhruv Nair
331a7a1356 Apply suggestion from @stevhliu
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
2025-10-08 14:26:46 +05:30
Dhruv Nair
28d3856a0e Apply suggestion from @stevhliu
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
2025-10-08 14:26:40 +05:30
DN6
c918079f8b update 2025-10-08 14:21:37 +05:30
DN6
a4815ab1c8 update 2025-10-08 13:53:59 +05:30
DN6
c194bf11a0 update 2025-10-08 13:53:59 +05:30
Dhruv Nair
df67d521ee Apply suggestion from @stevhliu
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
2025-10-08 13:51:07 +05:30
Dhruv Nair
ddaf986eb4 Apply suggestion from @stevhliu
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
2025-10-08 13:49:38 +05:30
Dhruv Nair
e78aa54b82 Update docs/source/en/modular_diffusers/custom_blocks.md
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
2025-09-17 08:41:51 +05:30
Dhruv Nair
180c9eaed1 Update docs/source/en/_toctree.yml
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
2025-09-17 08:41:19 +05:30
Dhruv Nair
830603e323 Update docs/source/en/modular_diffusers/custom_blocks.md
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
2025-09-17 08:41:10 +05:30
Dhruv Nair
ed3f88528a Update docs/source/en/modular_diffusers/custom_blocks.md
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
2025-09-17 08:41:02 +05:30
DN6
c67dda4bdd update 2025-09-16 18:11:20 +05:30
DN6
e3f111a095 update 2025-09-15 23:16:43 +05:30
10 changed files with 543 additions and 100 deletions

View File

@@ -76,6 +76,7 @@ jobs:
run: | run: |
uv pip install -e ".[quality]" uv pip install -e ".[quality]"
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers -y && pip uninstall huggingface_hub -y && python -m uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -127,7 +128,7 @@ jobs:
uv pip install -e ".[quality]" uv pip install -e ".[quality]"
uv pip install peft@git+https://github.com/huggingface/peft.git uv pip install peft@git+https://github.com/huggingface/peft.git
uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
uv pip uninstall transformers -y && pip uninstall huggingface_hub -y && python -m uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -178,6 +179,7 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
uv pip install -e ".[quality,training]" uv pip install -e ".[quality,training]"
uv pip uninstall transformers -y && pip uninstall huggingface_hub -y && python -m uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
- name: Environment - name: Environment
run: | run: |
python utils/print_env.py python utils/print_env.py
@@ -198,47 +200,6 @@ jobs:
name: torch_compile_test_reports name: torch_compile_test_reports
path: reports path: reports
run_xformers_tests:
name: PyTorch xformers CUDA tests
runs-on:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-pytorch-xformers-cuda
options: --gpus all --shm-size "16gb" --ipc host
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Install dependencies
run: |
uv pip install -e ".[quality,training]"
- name: Environment
run: |
python utils/print_env.py
- name: Run example tests on GPU
env:
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
run: |
pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "xformers" --make-reports=tests_torch_xformers_cuda tests/
- name: Failure short reports
if: ${{ failure() }}
run: cat reports/tests_torch_xformers_cuda_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: torch_xformers_test_reports
path: reports
run_examples_tests: run_examples_tests:
name: Examples PyTorch CUDA tests on Ubuntu name: Examples PyTorch CUDA tests on Ubuntu

View File

@@ -119,6 +119,8 @@
title: ComponentsManager title: ComponentsManager
- local: modular_diffusers/guiders - local: modular_diffusers/guiders
title: Guiders title: Guiders
- local: modular_diffusers/custom_blocks
title: Building Custom Blocks
title: Modular Diffusers title: Modular Diffusers
- isExpanded: false - isExpanded: false
sections: sections:

View File

@@ -0,0 +1,493 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# Building Custom Blocks
[ModularPipelineBlocks](./pipeline_block) are the fundamental building blocks of a [`ModularPipeline`]. You can create custom blocks by defining their inputs, outputs, and computation logic. This guide demonstrates how to create and use a custom block.
<Tip>
You can find examples of different types of custom blocks in the [Modular Diffusers Custom Blocks collection](https://huggingface.co/collections/diffusers/modular-diffusers-custom-blocks)
</Tip>
## Project Structure
Your custom block project should use the following structure:
```shell
.
├── block.py
└── modular_config.json
```
- `block.py` contains the custom block implementation
- `modular_config.json` contains the metadata needed to load the block
## Example: Florence 2 Inpainting Block
In this example we will create a custom block that uses the [Florence 2](https://huggingface.co/docs/transformers/model_doc/florence2) model to process an input image and generate a mask for inpainting.
The first step is to define the components that the block will use. In this case, we will need to use the `Florence2ForConditionalGeneration` model and its corresponding processor `AutoProcessor`. When defining components, we must specify the name of the component within our pipeline, model class via `type_hint`, and provide a `pretrained_model_name_or_path` for the component if we intend to load the model weights from a specific repository on the Hub.
```py
# Inside block.py
from diffusers.modular_pipelines import (
ModularPipelineBlocks,
ComponentSpec,
)
from transformers import AutoProcessor, Florence2ForConditionalGeneration
class Florence2ImageAnnotatorBlock(ModularPipelineBlocks):
@property
def expected_components(self):
return [
ComponentSpec(
name="image_annotator",
type_hint=Florence2ForConditionalGeneration,
pretrained_model_name_or_path="florence-community/Florence-2-base-ft",
),
ComponentSpec(
name="image_annotator_processor",
type_hint=AutoProcessor,
pretrained_model_name_or_path="florence-community/Florence-2-base-ft",
),
]
```
Next, we define the inputs and outputs of the block. The inputs include the image to be annotated, the annotation task, and the annotation prompt. The outputs include the generated mask image and annotations.
```py
from typing import List, Union
from PIL import Image, ImageDraw
import torch
import numpy as np
from diffusers.modular_pipelines import (
PipelineState,
ModularPipelineBlocks,
InputParam,
ComponentSpec,
OutputParam,
)
from transformers import AutoProcessor, Florence2ForConditionalGeneration
class Florence2ImageAnnotatorBlock(ModularPipelineBlocks):
@property
def expected_components(self):
return [
ComponentSpec(
name="image_annotator",
type_hint=Florence2ForConditionalGeneration,
pretrained_model_name_or_path="florence-community/Florence-2-base-ft",
),
ComponentSpec(
name="image_annotator_processor",
type_hint=AutoProcessor,
pretrained_model_name_or_path="florence-community/Florence-2-base-ft",
),
]
@property
def inputs(self) -> List[InputParam]:
return [
InputParam(
"image",
type_hint=Union[Image.Image, List[Image.Image]],
required=True,
description="Image(s) to annotate",
),
InputParam(
"annotation_task",
type_hint=Union[str, List[str]],
required=True,
default="<REFERRING_EXPRESSION_SEGMENTATION>",
description="""Annotation Task to perform on the image.
Supported Tasks:
<OD>
<REFERRING_EXPRESSION_SEGMENTATION>
<CAPTION>
<DETAILED_CAPTION>
<MORE_DETAILED_CAPTION>
<DENSE_REGION_CAPTION>
<CAPTION_TO_PHRASE_GROUNDING>
<OPEN_VOCABULARY_DETECTION>
""",
),
InputParam(
"annotation_prompt",
type_hint=Union[str, List[str]],
required=True,
description="""Annotation Prompt to provide more context to the task.
Can be used to detect or segment out specific elements in the image
""",
),
InputParam(
"annotation_output_type",
type_hint=str,
required=True,
default="mask_image",
description="""Output type from annotation predictions. Availabe options are
mask_image:
-black and white mask image for the given image based on the task type
mask_overlay:
- mask overlayed on the original image
bounding_box:
- bounding boxes drawn on the original image
""",
),
InputParam(
"annotation_overlay",
type_hint=bool,
required=True,
default=False,
description="",
),
]
@property
def intermediate_outputs(self) -> List[OutputParam]:
return [
OutputParam(
"mask_image",
type_hint=Image,
description="Inpainting Mask for input Image(s)",
),
OutputParam(
"annotations",
type_hint=dict,
description="Annotations Predictions for input Image(s)",
),
OutputParam(
"image",
type_hint=Image,
description="Annotated input Image(s)",
),
]
```
Now we implement the `__call__` method, which contains the logic for processing the input image and generating the mask.
```py
from typing import List, Union
from PIL import Image, ImageDraw
import torch
import numpy as np
from diffusers.modular_pipelines import (
PipelineState,
ModularPipelineBlocks,
InputParam,
ComponentSpec,
OutputParam,
)
from transformers import AutoProcessor, Florence2ForConditionalGeneration
class Florence2ImageAnnotatorBlock(ModularPipelineBlocks):
@property
def expected_components(self):
return [
ComponentSpec(
name="image_annotator",
type_hint=Florence2ForConditionalGeneration,
pretrained_model_name_or_path="florence-community/Florence-2-base-ft",
),
ComponentSpec(
name="image_annotator_processor",
type_hint=AutoProcessor,
pretrained_model_name_or_path="florence-community/Florence-2-base-ft",
),
]
@property
def inputs(self) -> List[InputParam]:
return [
InputParam(
"image",
type_hint=Union[Image.Image, List[Image.Image]],
required=True,
description="Image(s) to annotate",
),
InputParam(
"annotation_task",
type_hint=Union[str, List[str]],
required=True,
default="<REFERRING_EXPRESSION_SEGMENTATION>",
description="""Annotation Task to perform on the image.
Supported Tasks:
<OD>
<REFERRING_EXPRESSION_SEGMENTATION>
<CAPTION>
<DETAILED_CAPTION>
<MORE_DETAILED_CAPTION>
<DENSE_REGION_CAPTION>
<CAPTION_TO_PHRASE_GROUNDING>
<OPEN_VOCABULARY_DETECTION>
""",
),
InputParam(
"annotation_prompt",
type_hint=Union[str, List[str]],
required=True,
description="""Annotation Prompt to provide more context to the task.
Can be used to detect or segment out specific elements in the image
""",
),
InputParam(
"annotation_output_type",
type_hint=str,
required=True,
default="mask_image",
description="""Output type from annotation predictions. Availabe options are
mask_image:
-black and white mask image for the given image based on the task type
mask_overlay:
- mask overlayed on the original image
bounding_box:
- bounding boxes drawn on the original image
""",
),
InputParam(
"annotation_overlay",
type_hint=bool,
required=True,
default=False,
description="",
),
]
@property
def intermediate_outputs(self) -> List[OutputParam]:
return [
OutputParam(
"mask_image",
type_hint=Image,
description="Inpainting Mask for input Image(s)",
),
OutputParam(
"annotations",
type_hint=dict,
description="Annotations Predictions for input Image(s)",
),
OutputParam(
"image",
type_hint=Image,
description="Annotated input Image(s)",
),
]
def get_annotations(self, components, images, prompts, task):
task_prompts = [task + prompt for prompt in prompts]
inputs = components.image_annotator_processor(
text=task_prompts, images=images, return_tensors="pt"
).to(components.image_annotator.device, components.image_annotator.dtype)
generated_ids = components.image_annotator.generate(
input_ids=inputs["input_ids"],
pixel_values=inputs["pixel_values"],
max_new_tokens=1024,
early_stopping=False,
do_sample=False,
num_beams=3,
)
annotations = components.image_annotator_processor.batch_decode(
generated_ids, skip_special_tokens=False
)
outputs = []
for image, annotation in zip(images, annotations):
outputs.append(
components.image_annotator_processor.post_process_generation(
annotation, task=task, image_size=(image.width, image.height)
)
)
return outputs
def prepare_mask(self, images, annotations, overlay=False, fill="white"):
masks = []
for image, annotation in zip(images, annotations):
mask_image = image.copy() if overlay else Image.new("L", image.size, 0)
draw = ImageDraw.Draw(mask_image)
for _, _annotation in annotation.items():
if "polygons" in _annotation:
for polygon in _annotation["polygons"]:
polygon = np.array(polygon).reshape(-1, 2)
if len(polygon) < 3:
continue
polygon = polygon.reshape(-1).tolist()
draw.polygon(polygon, fill=fill)
elif "bbox" in _annotation:
bbox = _annotation["bbox"]
draw.rectangle(bbox, fill="white")
masks.append(mask_image)
return masks
def prepare_bounding_boxes(self, images, annotations):
outputs = []
for image, annotation in zip(images, annotations):
image_copy = image.copy()
draw = ImageDraw.Draw(image_copy)
for _, _annotation in annotation.items():
bbox = _annotation["bbox"]
label = _annotation["label"]
draw.rectangle(bbox, outline="red", width=3)
draw.text((bbox[0], bbox[1] - 20), label, fill="red")
outputs.append(image_copy)
return outputs
def prepare_inputs(self, images, prompts):
prompts = prompts or ""
if isinstance(images, Image.Image):
images = [images]
if isinstance(prompts, str):
prompts = [prompts]
if len(images) != len(prompts):
raise ValueError("Number of images and annotation prompts must match.")
return images, prompts
@torch.no_grad()
def __call__(self, components, state: PipelineState) -> PipelineState:
block_state = self.get_block_state(state)
images, annotation_task_prompt = self.prepare_inputs(
block_state.image, block_state.annotation_prompt
)
task = block_state.annotation_task
fill = block_state.fill
annotations = self.get_annotations(
components, images, annotation_task_prompt, task
)
block_state.annotations = annotations
if block_state.annotation_output_type == "mask_image":
block_state.mask_image = self.prepare_mask(images, annotations)
else:
block_state.mask_image = None
if block_state.annotation_output_type == "mask_overlay":
block_state.image = self.prepare_mask(images, annotations, overlay=True, fill=fill)
elif block_state.annotation_output_type == "bounding_box":
block_state.image = self.prepare_bounding_boxes(images, annotations)
self.set_block_state(state, block_state)
return components, state
```
Once we have defined our custom block, we can save it to the Hub, using either the CLI or the [`push_to_hub`] method. This will make it easy to share and reuse our custom block with other pipelines.
<hfoptions id="share">
<hfoption id="hf CLI">
```shell
# In the folder with the `block.py` file, run:
diffusers-cli custom_block
```
Then upload the block to the Hub:
```shell
hf upload <your repo id> . .
```
</hfoption>
<hfoption id="push_to_hub">
```py
from block import Florence2ImageAnnotatorBlock
block = Florence2ImageAnnotatorBlock()
block.push_to_hub("<your repo id>")
```
</hfoption>
</hfoptions>
## Using Custom Blocks
Load the custom block with [`~ModularPipelineBlocks.from_pretrained`] and set `trust_remote_code=True`.
```py
import torch
from diffusers.modular_pipelines import ModularPipelineBlocks, SequentialPipelineBlocks
from diffusers.modular_pipelines.stable_diffusion_xl import INPAINT_BLOCKS
from diffusers.utils import load_image
# Fetch the Florence2 image annotator block that will create our mask
image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence-2-custom-block", trust_remote_code=True)
my_blocks = INPAINT_BLOCKS.copy()
# insert the annotation block before the image encoding step
my_blocks.insert("image_annotator", image_annotator_block, 1)
# Create our initial set of inpainting blocks
blocks = SequentialPipelineBlocks.from_blocks_dict(my_blocks)
repo_id = "diffusers/modular-stable-diffusion-xl-base-1.0"
pipe = blocks.init_pipeline(repo_id)
pipe.load_components(torch_dtype=torch.float16, device_map="cuda", trust_remote_code=True)
image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true")
image = image.resize((1024, 1024))
prompt = ["A red car"]
annotation_task = "<REFERRING_EXPRESSION_SEGMENTATION>"
annotation_prompt = ["the car"]
output = pipe(
prompt=prompt,
image=image,
annotation_task=annotation_task,
annotation_prompt=annotation_prompt,
annotation_output_type="mask_image",
num_inference_steps=35,
guidance_scale=7.5,
strength=0.95,
output="images"
)
output[0].save("florence-inpainting.png")
```
## Editing Custom Blocks
By default, custom blocks are saved in your cache directory. Use the `local_dir` argument to download and edit a custom block in a specific folder.
```py
import torch
from diffusers.modular_pipelines import ModularPipelineBlocks, SequentialPipelineBlocks
from diffusers.modular_pipelines.stable_diffusion_xl import INPAINT_BLOCKS
from diffusers.utils import load_image
# Fetch the Florence2 image annotator block that will create our mask
image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence-2-custom-block", trust_remote_code=True, local_dir="/my-local-folder")
```
Any changes made to the block files in this folder will be reflected when you load the block again.

View File

@@ -12,7 +12,7 @@ specific language governing permissions and limitations under the License.
# LoopSequentialPipelineBlocks # LoopSequentialPipelineBlocks
[`~modular_pipelines.LoopSequentialPipelineBlocks`] are a multi-block type that composes other [`~modular_pipelines.ModularPipelineBlocks`] together in a loop. Data flows circularly, using `inputs` and `intermediate_outputs`, and each block is run iteratively. This is typically used to create a denoising loop which is iterative by default. [`~modular_pipelines.LoopSequentialPipelineBlocks`] are a multi-block type that composes other [`~modular_pipelines.ModularPipelineBlocks`] together in a loop. Data flows circularly, using `intermediate_inputs` and `intermediate_outputs`, and each block is run iteratively. This is typically used to create a denoising loop which is iterative by default.
This guide shows you how to create [`~modular_pipelines.LoopSequentialPipelineBlocks`]. This guide shows you how to create [`~modular_pipelines.LoopSequentialPipelineBlocks`].
@@ -21,6 +21,7 @@ This guide shows you how to create [`~modular_pipelines.LoopSequentialPipelineBl
[`~modular_pipelines.LoopSequentialPipelineBlocks`], is also known as the *loop wrapper* because it defines the loop structure, iteration variables, and configuration. Within the loop wrapper, you need the following variables. [`~modular_pipelines.LoopSequentialPipelineBlocks`], is also known as the *loop wrapper* because it defines the loop structure, iteration variables, and configuration. Within the loop wrapper, you need the following variables.
- `loop_inputs` are user provided values and equivalent to [`~modular_pipelines.ModularPipelineBlocks.inputs`]. - `loop_inputs` are user provided values and equivalent to [`~modular_pipelines.ModularPipelineBlocks.inputs`].
- `loop_intermediate_inputs` are intermediate variables from the [`~modular_pipelines.PipelineState`] and equivalent to [`~modular_pipelines.ModularPipelineBlocks.intermediate_inputs`].
- `loop_intermediate_outputs` are new intermediate variables created by the block and added to the [`~modular_pipelines.PipelineState`]. It is equivalent to [`~modular_pipelines.ModularPipelineBlocks.intermediate_outputs`]. - `loop_intermediate_outputs` are new intermediate variables created by the block and added to the [`~modular_pipelines.PipelineState`]. It is equivalent to [`~modular_pipelines.ModularPipelineBlocks.intermediate_outputs`].
- `__call__` method defines the loop structure and iteration logic. - `__call__` method defines the loop structure and iteration logic.
@@ -89,4 +90,4 @@ Add more loop blocks to run within each iteration with [`~modular_pipelines.Loop
```py ```py
loop = LoopWrapper.from_blocks_dict({"block1": LoopBlock(), "block2": LoopBlock}) loop = LoopWrapper.from_blocks_dict({"block1": LoopBlock(), "block2": LoopBlock})
``` ```

View File

@@ -37,7 +37,17 @@ A [`~modular_pipelines.ModularPipelineBlocks`] requires `inputs`, and `intermedi
] ]
``` ```
- `intermediate_outputs` are new values created by a block and added to the [`~modular_pipelines.PipelineState`]. The `intermediate_outputs` are available as `inputs` for subsequent blocks or available as the final output from running the pipeline. - `intermediate_inputs` are values typically created from a previous block but it can also be directly provided if no preceding block generates them. Unlike `inputs`, `intermediate_inputs` can be modified.
Use `InputParam` to define `intermediate_inputs`.
```py
user_intermediate_inputs = [
InputParam(name="processed_image", type_hint="torch.Tensor", description="image that has been preprocessed and normalized"),
]
```
- `intermediate_outputs` are new values created by a block and added to the [`~modular_pipelines.PipelineState`]. The `intermediate_outputs` are available as `intermediate_inputs` for subsequent blocks or available as the final output from running the pipeline.
Use `OutputParam` to define `intermediate_outputs`. Use `OutputParam` to define `intermediate_outputs`.
@@ -55,8 +65,8 @@ The intermediate inputs and outputs share data to connect blocks. They are acces
The computation a block performs is defined in the `__call__` method and it follows a specific structure. The computation a block performs is defined in the `__call__` method and it follows a specific structure.
1. Retrieve the [`~modular_pipelines.BlockState`] to get a local view of the `inputs` 1. Retrieve the [`~modular_pipelines.BlockState`] to get a local view of the `inputs` and `intermediate_inputs`.
2. Implement the computation logic on the `inputs`. 2. Implement the computation logic on the `inputs` and `intermediate_inputs`.
3. Update [`~modular_pipelines.PipelineState`] to push changes from the local [`~modular_pipelines.BlockState`] back to the global [`~modular_pipelines.PipelineState`]. 3. Update [`~modular_pipelines.PipelineState`] to push changes from the local [`~modular_pipelines.BlockState`] back to the global [`~modular_pipelines.PipelineState`].
4. Return the components and state which becomes available to the next block. 4. Return the components and state which becomes available to the next block.
@@ -66,7 +76,7 @@ def __call__(self, components, state):
block_state = self.get_block_state(state) block_state = self.get_block_state(state)
# Your computation logic here # Your computation logic here
# block_state contains all your inputs # block_state contains all your inputs and intermediate_inputs
# Access them like: block_state.image, block_state.processed_image # Access them like: block_state.image, block_state.processed_image
# Update the pipeline state with your updated block_states # Update the pipeline state with your updated block_states
@@ -102,4 +112,4 @@ def __call__(self, components, state):
unet = components.unet unet = components.unet
vae = components.vae vae = components.vae
scheduler = components.scheduler scheduler = components.scheduler
``` ```

View File

@@ -183,7 +183,7 @@ from diffusers.modular_pipelines import ComponentsManager
components = ComponentManager() components = ComponentManager()
dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", components_manager=components, collection="diffdiff") dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", components_manager=components, collection="diffdiff")
dd_pipeline.load_componenets(torch_dtype=torch.float16) dd_pipeline.load_default_componenets(torch_dtype=torch.float16)
dd_pipeline.to("cuda") dd_pipeline.to("cuda")
``` ```

View File

@@ -12,11 +12,11 @@ specific language governing permissions and limitations under the License.
# SequentialPipelineBlocks # SequentialPipelineBlocks
[`~modular_pipelines.SequentialPipelineBlocks`] are a multi-block type that composes other [`~modular_pipelines.ModularPipelineBlocks`] together in a sequence. Data flows linearly from one block to the next using `inputs` and `intermediate_outputs`. Each block in [`~modular_pipelines.SequentialPipelineBlocks`] usually represents a step in the pipeline, and by combining them, you gradually build a pipeline. [`~modular_pipelines.SequentialPipelineBlocks`] are a multi-block type that composes other [`~modular_pipelines.ModularPipelineBlocks`] together in a sequence. Data flows linearly from one block to the next using `intermediate_inputs` and `intermediate_outputs`. Each block in [`~modular_pipelines.SequentialPipelineBlocks`] usually represents a step in the pipeline, and by combining them, you gradually build a pipeline.
This guide shows you how to connect two blocks into a [`~modular_pipelines.SequentialPipelineBlocks`]. This guide shows you how to connect two blocks into a [`~modular_pipelines.SequentialPipelineBlocks`].
Create two [`~modular_pipelines.ModularPipelineBlocks`]. The first block, `InputBlock`, outputs a `batch_size` value and the second block, `ImageEncoderBlock` uses `batch_size` as `inputs`. Create two [`~modular_pipelines.ModularPipelineBlocks`]. The first block, `InputBlock`, outputs a `batch_size` value and the second block, `ImageEncoderBlock` uses `batch_size` as `intermediate_inputs`.
<hfoptions id="sequential"> <hfoptions id="sequential">
<hfoption id="InputBlock"> <hfoption id="InputBlock">
@@ -110,4 +110,4 @@ Inspect the sub-blocks in [`~modular_pipelines.SequentialPipelineBlocks`] by cal
```py ```py
print(blocks) print(blocks)
print(blocks.doc) print(blocks.doc)
``` ```

View File

@@ -203,12 +203,10 @@ class ContextParallelSplitHook(ModelHook):
def _prepare_cp_input(self, x: torch.Tensor, cp_input: ContextParallelInput) -> torch.Tensor: def _prepare_cp_input(self, x: torch.Tensor, cp_input: ContextParallelInput) -> torch.Tensor:
if cp_input.expected_dims is not None and x.dim() != cp_input.expected_dims: if cp_input.expected_dims is not None and x.dim() != cp_input.expected_dims:
logger.warning_once( raise ValueError(
f"Expected input tensor to have {cp_input.expected_dims} dimensions, but got {x.dim()} dimensions, split will not be applied." f"Expected input tensor to have {cp_input.expected_dims} dimensions, but got {x.dim()} dimensions."
) )
return x return EquipartitionSharder.shard(x, cp_input.split_dim, self.parallel_config._flattened_mesh)
else:
return EquipartitionSharder.shard(x, cp_input.split_dim, self.parallel_config._flattened_mesh)
class ContextParallelGatherHook(ModelHook): class ContextParallelGatherHook(ModelHook):

View File

@@ -24,7 +24,6 @@ from ...configuration_utils import ConfigMixin, register_to_config
from ...loaders import PeftAdapterMixin from ...loaders import PeftAdapterMixin
from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers
from ..attention import FeedForward from ..attention import FeedForward
from ..attention_dispatch import dispatch_attention_fn
from ..attention_processor import Attention, AttentionProcessor from ..attention_processor import Attention, AttentionProcessor
from ..cache_utils import CacheMixin from ..cache_utils import CacheMixin
from ..embeddings import ( from ..embeddings import (
@@ -43,9 +42,6 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
class HunyuanVideoAttnProcessor2_0: class HunyuanVideoAttnProcessor2_0:
_attention_backend = None
_parallel_config = None
def __init__(self): def __init__(self):
if not hasattr(F, "scaled_dot_product_attention"): if not hasattr(F, "scaled_dot_product_attention"):
raise ImportError( raise ImportError(
@@ -68,9 +64,9 @@ class HunyuanVideoAttnProcessor2_0:
key = attn.to_k(hidden_states) key = attn.to_k(hidden_states)
value = attn.to_v(hidden_states) value = attn.to_v(hidden_states)
query = query.unflatten(2, (attn.heads, -1)) query = query.unflatten(2, (attn.heads, -1)).transpose(1, 2)
key = key.unflatten(2, (attn.heads, -1)) key = key.unflatten(2, (attn.heads, -1)).transpose(1, 2)
value = value.unflatten(2, (attn.heads, -1)) value = value.unflatten(2, (attn.heads, -1)).transpose(1, 2)
# 2. QK normalization # 2. QK normalization
if attn.norm_q is not None: if attn.norm_q is not None:
@@ -85,29 +81,21 @@ class HunyuanVideoAttnProcessor2_0:
if attn.add_q_proj is None and encoder_hidden_states is not None: if attn.add_q_proj is None and encoder_hidden_states is not None:
query = torch.cat( query = torch.cat(
[ [
apply_rotary_emb( apply_rotary_emb(query[:, :, : -encoder_hidden_states.shape[1]], image_rotary_emb),
query[:, : -encoder_hidden_states.shape[1]], query[:, :, -encoder_hidden_states.shape[1] :],
image_rotary_emb,
sequence_dim=1,
),
query[:, -encoder_hidden_states.shape[1] :],
], ],
dim=1, dim=2,
) )
key = torch.cat( key = torch.cat(
[ [
apply_rotary_emb( apply_rotary_emb(key[:, :, : -encoder_hidden_states.shape[1]], image_rotary_emb),
key[:, : -encoder_hidden_states.shape[1]], key[:, :, -encoder_hidden_states.shape[1] :],
image_rotary_emb,
sequence_dim=1,
),
key[:, -encoder_hidden_states.shape[1] :],
], ],
dim=1, dim=2,
) )
else: else:
query = apply_rotary_emb(query, image_rotary_emb, sequence_dim=1) query = apply_rotary_emb(query, image_rotary_emb)
key = apply_rotary_emb(key, image_rotary_emb, sequence_dim=1) key = apply_rotary_emb(key, image_rotary_emb)
# 4. Encoder condition QKV projection and normalization # 4. Encoder condition QKV projection and normalization
if attn.add_q_proj is not None and encoder_hidden_states is not None: if attn.add_q_proj is not None and encoder_hidden_states is not None:
@@ -115,31 +103,24 @@ class HunyuanVideoAttnProcessor2_0:
encoder_key = attn.add_k_proj(encoder_hidden_states) encoder_key = attn.add_k_proj(encoder_hidden_states)
encoder_value = attn.add_v_proj(encoder_hidden_states) encoder_value = attn.add_v_proj(encoder_hidden_states)
encoder_query = encoder_query.unflatten(2, (attn.heads, -1)) encoder_query = encoder_query.unflatten(2, (attn.heads, -1)).transpose(1, 2)
encoder_key = encoder_key.unflatten(2, (attn.heads, -1)) encoder_key = encoder_key.unflatten(2, (attn.heads, -1)).transpose(1, 2)
encoder_value = encoder_value.unflatten(2, (attn.heads, -1)) encoder_value = encoder_value.unflatten(2, (attn.heads, -1)).transpose(1, 2)
if attn.norm_added_q is not None: if attn.norm_added_q is not None:
encoder_query = attn.norm_added_q(encoder_query) encoder_query = attn.norm_added_q(encoder_query)
if attn.norm_added_k is not None: if attn.norm_added_k is not None:
encoder_key = attn.norm_added_k(encoder_key) encoder_key = attn.norm_added_k(encoder_key)
query = torch.cat([query, encoder_query], dim=1) query = torch.cat([query, encoder_query], dim=2)
key = torch.cat([key, encoder_key], dim=1) key = torch.cat([key, encoder_key], dim=2)
value = torch.cat([value, encoder_value], dim=1) value = torch.cat([value, encoder_value], dim=2)
# 5. Attention # 5. Attention
hidden_states = dispatch_attention_fn( hidden_states = F.scaled_dot_product_attention(
query, query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
key,
value,
attn_mask=attention_mask,
dropout_p=0.0,
is_causal=False,
backend=self._attention_backend,
parallel_config=self._parallel_config,
) )
hidden_states = hidden_states.flatten(2, 3) hidden_states = hidden_states.transpose(1, 2).flatten(2, 3)
hidden_states = hidden_states.to(query.dtype) hidden_states = hidden_states.to(query.dtype)
# 6. Output projection # 6. Output projection

View File

@@ -555,9 +555,6 @@ class WanTransformer3DModel(
"encoder_hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), "encoder_hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False),
}, },
"proj_out": ContextParallelOutput(gather_dim=1, expected_dims=3), "proj_out": ContextParallelOutput(gather_dim=1, expected_dims=3),
"": {
"timestep": ContextParallelInput(split_dim=1, expected_dims=2, split_output=False),
},
} }
@register_to_config @register_to_config