diff --git a/src/diffusers/pipelines/transformers_loading_utils.py b/src/diffusers/pipelines/transformers_loading_utils.py index c750292f59..6f17c2a8cc 100644 --- a/src/diffusers/pipelines/transformers_loading_utils.py +++ b/src/diffusers/pipelines/transformers_loading_utils.py @@ -112,10 +112,14 @@ def _load_transformers_model_from_dduf( tensors = safetensors.torch.load(mmap) # Update the state dictionary with tensors state_dict.update(tensors) - return cls.from_pretrained( + model = cls.from_pretrained( pretrained_model_name_or_path=None, config=config, generation_config=generation_config, state_dict=state_dict, **kwargs, ) + # Models loaded via from_pretrained are in eval mode by default, + # but we need to preserve training mode for consistency with non-DDUF loading + model.train() + return model