mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 22:15:00 +08:00
Compare commits
7 Commits
modular-te
...
cache-docs
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8470ce3d06 | ||
|
|
73601980c2 | ||
|
|
25795856e0 | ||
|
|
d76b744ac3 | ||
|
|
b26867b628 | ||
|
|
e3f441648c | ||
|
|
c6cfc5ce1d |
@@ -29,7 +29,7 @@ Cache methods speedup diffusion transformers by storing and reusing intermediate
|
|||||||
|
|
||||||
[[autodoc]] apply_faster_cache
|
[[autodoc]] apply_faster_cache
|
||||||
|
|
||||||
### FirstBlockCacheConfig
|
## FirstBlockCacheConfig
|
||||||
|
|
||||||
[[autodoc]] FirstBlockCacheConfig
|
[[autodoc]] FirstBlockCacheConfig
|
||||||
|
|
||||||
|
|||||||
@@ -68,6 +68,20 @@ config = FasterCacheConfig(
|
|||||||
pipeline.transformer.enable_cache(config)
|
pipeline.transformer.enable_cache(config)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## FirstBlockCache
|
||||||
|
|
||||||
|
[FirstBlock Cache](https://huggingface.co/docs/diffusers/main/en/api/cache#diffusers.FirstBlockCacheConfig) checks how much the early layers of the denoiser changes from one timestep to the next. If the change is small, the model skips the expensive later layers and reuses the previous output.
|
||||||
|
|
||||||
|
```py
|
||||||
|
import torch
|
||||||
|
from diffusers import DiffusionPipeline
|
||||||
|
from diffusers.hooks import apply_first_block_cache, FirstBlockCacheConfig
|
||||||
|
|
||||||
|
pipeline = DiffusionPipeline.from_pretrained(
|
||||||
|
"Qwen/Qwen-Image", torch_dtype=torch.bfloat16
|
||||||
|
)
|
||||||
|
apply_first_block_cache(pipeline.transformer, FirstBlockCacheConfig(threshold=0.2))
|
||||||
|
```
|
||||||
## TaylorSeer Cache
|
## TaylorSeer Cache
|
||||||
|
|
||||||
[TaylorSeer Cache](https://huggingface.co/papers/2403.06923) accelerates diffusion inference by using Taylor series expansions to approximate and cache intermediate activations across denoising steps. The method predicts future outputs based on past computations, reusing them at specified intervals to reduce redundant calculations.
|
[TaylorSeer Cache](https://huggingface.co/papers/2403.06923) accelerates diffusion inference by using Taylor series expansions to approximate and cache intermediate activations across denoising steps. The method predicts future outputs based on past computations, reusing them at specified intervals to reduce redundant calculations.
|
||||||
@@ -87,8 +101,7 @@ from diffusers import FluxPipeline, TaylorSeerCacheConfig
|
|||||||
pipe = FluxPipeline.from_pretrained(
|
pipe = FluxPipeline.from_pretrained(
|
||||||
"black-forest-labs/FLUX.1-dev",
|
"black-forest-labs/FLUX.1-dev",
|
||||||
torch_dtype=torch.bfloat16,
|
torch_dtype=torch.bfloat16,
|
||||||
)
|
).to("cuda")
|
||||||
pipe.to("cuda")
|
|
||||||
|
|
||||||
config = TaylorSeerCacheConfig(
|
config = TaylorSeerCacheConfig(
|
||||||
cache_interval=5,
|
cache_interval=5,
|
||||||
@@ -97,4 +110,4 @@ config = TaylorSeerCacheConfig(
|
|||||||
taylor_factors_dtype=torch.bfloat16,
|
taylor_factors_dtype=torch.bfloat16,
|
||||||
)
|
)
|
||||||
pipe.transformer.enable_cache(config)
|
pipe.transformer.enable_cache(config)
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -41,9 +41,11 @@ class CacheMixin:
|
|||||||
Enable caching techniques on the model.
|
Enable caching techniques on the model.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
config (`Union[PyramidAttentionBroadcastConfig]`):
|
config (`Union[PyramidAttentionBroadcastConfig, FasterCacheConfig, FirstBlockCacheConfig]`):
|
||||||
The configuration for applying the caching technique. Currently supported caching techniques are:
|
The configuration for applying the caching technique. Currently supported caching techniques are:
|
||||||
- [`~hooks.PyramidAttentionBroadcastConfig`]
|
- [`~hooks.PyramidAttentionBroadcastConfig`]
|
||||||
|
- [`~hooks.FasterCacheConfig`]
|
||||||
|
- [`~hooks.FirstBlockCacheConfig`]
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user