mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-06 12:34:13 +08:00
Fix typos in strings and comments (#11476)
* Fix typos in strings and comments Signed-off-by: co63oc <co63oc@users.noreply.github.com> * Update src/diffusers/hooks/hooks.py Co-authored-by: Aryan <contact.aryanvs@gmail.com> * Update src/diffusers/hooks/hooks.py Co-authored-by: Aryan <contact.aryanvs@gmail.com> * Update layerwise_casting.py * Apply style fixes * update --------- Signed-off-by: co63oc <co63oc@users.noreply.github.com> Co-authored-by: Aryan <contact.aryanvs@gmail.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
@@ -555,7 +555,7 @@ class VideoDataset(Dataset):
|
||||
|
||||
if any(not path.is_file() for path in instance_videos):
|
||||
raise ValueError(
|
||||
"Expected '--video_column' to be a path to a file in `--instance_data_root` containing line-separated paths to video data but found atleast one path that is not a valid file."
|
||||
"Expected '--video_column' to be a path to a file in `--instance_data_root` containing line-separated paths to video data but found at least one path that is not a valid file."
|
||||
)
|
||||
|
||||
return instance_prompts, instance_videos
|
||||
|
||||
@@ -539,7 +539,7 @@ class VideoDataset(Dataset):
|
||||
|
||||
if any(not path.is_file() for path in instance_videos):
|
||||
raise ValueError(
|
||||
"Expected '--video_column' to be a path to a file in `--instance_data_root` containing line-separated paths to video data but found atleast one path that is not a valid file."
|
||||
"Expected '--video_column' to be a path to a file in `--instance_data_root` containing line-separated paths to video data but found at least one path that is not a valid file."
|
||||
)
|
||||
|
||||
return instance_prompts, instance_videos
|
||||
|
||||
@@ -73,7 +73,7 @@ accelerate launch train_multi_subject_dreambooth_inpaint.py \
|
||||
|
||||
## 3. Results
|
||||
|
||||
A [](https://wandb.ai/gzguevara/uncategorized/reports/Multi-Subject-Dreambooth-for-Inpainting--Vmlldzo2MzY5NDQ4?accessToken=y0nya2d7baguhbryxaikbfr1203amvn1jsmyl07vk122mrs7tnph037u1nqgse8t) is provided showing the training progress by every 50 steps. Note, the reported weights & baises run was performed on a A100 GPU with the following stetting:
|
||||
A [](https://wandb.ai/gzguevara/uncategorized/reports/Multi-Subject-Dreambooth-for-Inpainting--Vmlldzo2MzY5NDQ4?accessToken=y0nya2d7baguhbryxaikbfr1203amvn1jsmyl07vk122mrs7tnph037u1nqgse8t) is provided showing the training progress by every 50 steps. Note, the reported weights & biases run was performed on a A100 GPU with the following stetting:
|
||||
|
||||
```bash
|
||||
accelerate launch train_multi_subject_dreambooth_inpaint.py \
|
||||
|
||||
@@ -146,7 +146,7 @@ class FasterCacheConfig:
|
||||
alpha_low_frequency: float = 1.1
|
||||
alpha_high_frequency: float = 1.1
|
||||
|
||||
# n as described in CFG-Cache explanation in the paper - dependant on the model
|
||||
# n as described in CFG-Cache explanation in the paper - dependent on the model
|
||||
unconditional_batch_skip_range: int = 5
|
||||
unconditional_batch_timestep_skip_range: Tuple[int, int] = (-1, 641)
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ class ModelHook:
|
||||
|
||||
def deinitalize_hook(self, module: torch.nn.Module) -> torch.nn.Module:
|
||||
r"""
|
||||
Hook that is executed when a model is deinitalized.
|
||||
Hook that is executed when a model is deinitialized.
|
||||
|
||||
Args:
|
||||
module (`torch.nn.Module`):
|
||||
|
||||
@@ -62,7 +62,7 @@ class LayerwiseCastingHook(ModelHook):
|
||||
|
||||
def deinitalize_hook(self, module: torch.nn.Module):
|
||||
raise NotImplementedError(
|
||||
"LayerwiseCastingHook does not support deinitalization. A model once enabled with layerwise casting will "
|
||||
"LayerwiseCastingHook does not support deinitialization. A model once enabled with layerwise casting will "
|
||||
"have casted its weights to a lower precision dtype for storage. Casting this back to the original dtype "
|
||||
"will lead to precision loss, which might have an impact on the model's generation quality. The model should "
|
||||
"be re-initialized and loaded in the original dtype."
|
||||
|
||||
@@ -251,7 +251,7 @@ class PeftAdapterMixin:
|
||||
|
||||
rank = {}
|
||||
for key, val in state_dict.items():
|
||||
# Cannot figure out rank from lora layers that don't have atleast 2 dimensions.
|
||||
# Cannot figure out rank from lora layers that don't have at least 2 dimensions.
|
||||
# Bias layers in LoRA only have a single dimension
|
||||
if "lora_B" in key and val.ndim > 1:
|
||||
# Check out https://github.com/huggingface/peft/pull/2419 for the `^` symbol.
|
||||
|
||||
@@ -63,8 +63,8 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalModelMixin, PeftAdapter
|
||||
Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper.
|
||||
force_upcast (`bool`, *optional*, default to `True`):
|
||||
If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE
|
||||
can be fine-tuned / trained to a lower range without loosing too much precision in which case
|
||||
`force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
|
||||
can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast`
|
||||
can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
|
||||
mid_block_add_attention (`bool`, *optional*, default to `True`):
|
||||
If enabled, the mid_block of the Encoder and Decoder will have attention blocks. If set to false, the
|
||||
mid_block will only have resnet blocks
|
||||
|
||||
@@ -715,8 +715,8 @@ class AutoencoderKLAllegro(ModelMixin, ConfigMixin):
|
||||
Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper.
|
||||
force_upcast (`bool`, default to `True`):
|
||||
If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE
|
||||
can be fine-tuned / trained to a lower range without loosing too much precision in which case
|
||||
`force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
|
||||
can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast`
|
||||
can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
|
||||
"""
|
||||
|
||||
_supports_gradient_checkpointing = True
|
||||
|
||||
@@ -983,8 +983,8 @@ class AutoencoderKLCogVideoX(ModelMixin, ConfigMixin, FromOriginalModelMixin):
|
||||
Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper.
|
||||
force_upcast (`bool`, *optional*, default to `True`):
|
||||
If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE
|
||||
can be fine-tuned / trained to a lower range without loosing too much precision in which case
|
||||
`force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
|
||||
can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast`
|
||||
can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
|
||||
"""
|
||||
|
||||
_supports_gradient_checkpointing = True
|
||||
|
||||
@@ -161,8 +161,8 @@ class AutoencoderKLTemporalDecoder(ModelMixin, ConfigMixin):
|
||||
Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper.
|
||||
force_upcast (`bool`, *optional*, default to `True`):
|
||||
If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE
|
||||
can be fine-tuned / trained to a lower range without loosing too much precision in which case
|
||||
`force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
|
||||
can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast`
|
||||
can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
|
||||
"""
|
||||
|
||||
_supports_gradient_checkpointing = True
|
||||
|
||||
@@ -166,7 +166,7 @@ def process_face_embeddings(
|
||||
raise RuntimeError("facexlib align face fail")
|
||||
align_face = face_helper_1.cropped_faces[0] # (512, 512, 3) # RGB
|
||||
|
||||
# incase insightface didn't detect face
|
||||
# in case insightface didn't detect face
|
||||
if id_ante_embedding is None:
|
||||
logger.warning("Failed to detect face using insightface. Extracting embedding with align face")
|
||||
id_ante_embedding = face_helper_2.get_feat(align_face)
|
||||
|
||||
@@ -1092,7 +1092,7 @@ class PeftLoraLoaderMixinTests:
|
||||
def test_simple_inference_with_text_denoiser_multi_adapter_block_lora(self):
|
||||
"""
|
||||
Tests a simple inference with lora attached to text encoder and unet, attaches
|
||||
multiple adapters and set differnt weights for different blocks (i.e. block lora)
|
||||
multiple adapters and set different weights for different blocks (i.e. block lora)
|
||||
"""
|
||||
for scheduler_cls in self.scheduler_classes:
|
||||
components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls)
|
||||
@@ -1636,7 +1636,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules, adapter_names=["adapter-1"])
|
||||
self.assertTrue(pipe.num_fused_loras == 1, f"{pipe.num_fused_loras=}, {pipe.fused_loras=}")
|
||||
|
||||
# Fusing should still keep the LoRA layers so outpout should remain the same
|
||||
# Fusing should still keep the LoRA layers so output should remain the same
|
||||
outputs_lora_1_fused = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
|
||||
self.assertTrue(
|
||||
|
||||
@@ -270,7 +270,7 @@ class CogVideoXImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
|
||||
generator_device = "cpu"
|
||||
components = self.get_dummy_components()
|
||||
|
||||
# The reason to modify it this way is because I2V Transformer limits the generation to resolutions used during initalization.
|
||||
# The reason to modify it this way is because I2V Transformer limits the generation to resolutions used during initialization.
|
||||
# This limitation comes from using learned positional embeddings which cannot be generated on-the-fly like sincos or RoPE embeddings.
|
||||
# See the if-statement on "self.use_learned_positional_embeddings" in diffusers/models/embeddings.py
|
||||
components["transformer"] = CogVideoXTransformer3DModel.from_config(
|
||||
|
||||
@@ -280,7 +280,7 @@ class ConsisIDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
generator_device = "cpu"
|
||||
components = self.get_dummy_components()
|
||||
|
||||
# The reason to modify it this way is because ConsisID Transformer limits the generation to resolutions used during initalization.
|
||||
# The reason to modify it this way is because ConsisID Transformer limits the generation to resolutions used during initialization.
|
||||
# This limitation comes from using learned positional embeddings which cannot be generated on-the-fly like sincos or RoPE embeddings.
|
||||
# See the if-statement on "self.use_learned_positional_embeddings" in diffusers/models/embeddings.py
|
||||
components["transformer"] = ConsisIDTransformer3DModel.from_config(
|
||||
|
||||
@@ -155,6 +155,6 @@ class KolorsPipelineImg2ImgFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
def test_float16_inference(self):
|
||||
super().test_float16_inference(expected_max_diff=7e-2)
|
||||
|
||||
@unittest.skip("Test not supported because kolors img2img doesn't take pooled embeds as inputs unline kolors t2i.")
|
||||
@unittest.skip("Test not supported because kolors img2img doesn't take pooled embeds as inputs unlike kolors t2i.")
|
||||
def test_encode_prompt_works_in_isolation(self):
|
||||
pass
|
||||
|
||||
@@ -254,7 +254,7 @@ class PixArtSigmaPAGPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
assert_mean_pixel_difference(to_np(output_with_slicing1[0]), to_np(output_without_slicing[0]))
|
||||
assert_mean_pixel_difference(to_np(output_with_slicing2[0]), to_np(output_without_slicing[0]))
|
||||
|
||||
# Because we have `pag_applied_layers` we cannot direcly apply
|
||||
# Because we have `pag_applied_layers` we cannot directly apply
|
||||
# `set_default_attn_processor`
|
||||
def test_dict_tuple_outputs_equivalent(self, expected_slice=None, expected_max_difference=1e-4):
|
||||
components = self.get_dummy_components()
|
||||
|
||||
@@ -227,7 +227,7 @@ class StableUnCLIPPipelineIntegrationTests(unittest.TestCase):
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
|
||||
generator = torch.Generator(device="cpu").manual_seed(0)
|
||||
output = pipe("anime turle", generator=generator, output_type="np")
|
||||
output = pipe("anime turtle", generator=generator, output_type="np")
|
||||
|
||||
image = output.images[0]
|
||||
|
||||
|
||||
@@ -250,7 +250,7 @@ class StableUnCLIPImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
|
||||
generator = torch.Generator(device="cpu").manual_seed(0)
|
||||
output = pipe(input_image, "anime turle", generator=generator, output_type="np")
|
||||
output = pipe(input_image, "anime turtle", generator=generator, output_type="np")
|
||||
|
||||
image = output.images[0]
|
||||
|
||||
@@ -277,7 +277,7 @@ class StableUnCLIPImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
|
||||
generator = torch.Generator(device="cpu").manual_seed(0)
|
||||
output = pipe(input_image, "anime turle", generator=generator, output_type="np")
|
||||
output = pipe(input_image, "anime turtle", generator=generator, output_type="np")
|
||||
|
||||
image = output.images[0]
|
||||
|
||||
|
||||
@@ -2096,11 +2096,11 @@ class PipelineTesterMixin:
|
||||
with torch.no_grad():
|
||||
encoded_prompt_outputs = pipe_with_just_text_encoder.encode_prompt(**encode_prompt_inputs)
|
||||
|
||||
# Programatically determine the reutrn names of `encode_prompt.`
|
||||
ast_vistor = ReturnNameVisitor()
|
||||
encode_prompt_tree = ast_vistor.get_ast_tree(cls=self.pipeline_class)
|
||||
ast_vistor.visit(encode_prompt_tree)
|
||||
prompt_embed_kwargs = ast_vistor.return_names
|
||||
# Programmatically determine the return names of `encode_prompt.`
|
||||
ast_visitor = ReturnNameVisitor()
|
||||
encode_prompt_tree = ast_visitor.get_ast_tree(cls=self.pipeline_class)
|
||||
ast_visitor.visit(encode_prompt_tree)
|
||||
prompt_embed_kwargs = ast_visitor.return_names
|
||||
prompt_embeds_kwargs = dict(zip(prompt_embed_kwargs, encoded_prompt_outputs))
|
||||
|
||||
# Pack the outputs of `encode_prompt`.
|
||||
|
||||
@@ -205,7 +205,7 @@ class BnB4BitBasicTests(Base4bitTests):
|
||||
|
||||
def test_original_dtype(self):
|
||||
r"""
|
||||
A simple test to check if the model succesfully stores the original dtype
|
||||
A simple test to check if the model successfully stores the original dtype
|
||||
"""
|
||||
self.assertTrue("_pre_quantization_dtype" in self.model_4bit.config)
|
||||
self.assertFalse("_pre_quantization_dtype" in self.model_fp16.config)
|
||||
|
||||
@@ -195,7 +195,7 @@ class BnB8bitBasicTests(Base8bitTests):
|
||||
|
||||
def test_original_dtype(self):
|
||||
r"""
|
||||
A simple test to check if the model succesfully stores the original dtype
|
||||
A simple test to check if the model successfully stores the original dtype
|
||||
"""
|
||||
self.assertTrue("_pre_quantization_dtype" in self.model_8bit.config)
|
||||
self.assertFalse("_pre_quantization_dtype" in self.model_fp16.config)
|
||||
|
||||
@@ -95,7 +95,7 @@ class AutoencoderDCSingleFileTests(unittest.TestCase):
|
||||
# `in` variant checkpoints require passing in a `config` parameter
|
||||
# in order to set the scaling factor correctly.
|
||||
# `in` and `mix` variants have the same keys and we cannot automatically infer a scaling factor.
|
||||
# We default to using teh `mix` config
|
||||
# We default to using the `mix` config
|
||||
repo_id = "mit-han-lab/dc-ae-f128c512-in-1.0-diffusers"
|
||||
ckpt_path = "https://huggingface.co/mit-han-lab/dc-ae-f128c512-in-1.0/blob/main/model.safetensors"
|
||||
|
||||
|
||||
@@ -252,7 +252,7 @@ def sort_imports(file: str, check_only: bool = True):
|
||||
code, start_prompt="_import_structure = {", end_prompt="if TYPE_CHECKING:"
|
||||
)
|
||||
|
||||
# We ignore block 0 (everything untils start_prompt) and the last block (everything after end_prompt).
|
||||
# We ignore block 0 (everything until start_prompt) and the last block (everything after end_prompt).
|
||||
for block_idx in range(1, len(main_blocks) - 1):
|
||||
# Check if the block contains some `_import_structure`s thingy to sort.
|
||||
block = main_blocks[block_idx]
|
||||
|
||||
Reference in New Issue
Block a user