mirror of
https://github.com/huggingface/diffusers.git
synced 2025-12-24 05:14:55 +08:00
Compare commits
1 Commits
main
...
torchao-co
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d957cd816d |
@@ -21,8 +21,8 @@ from transformers import (
|
|||||||
BertModel,
|
BertModel,
|
||||||
BertTokenizer,
|
BertTokenizer,
|
||||||
CLIPImageProcessor,
|
CLIPImageProcessor,
|
||||||
|
MT5Tokenizer,
|
||||||
T5EncoderModel,
|
T5EncoderModel,
|
||||||
T5Tokenizer,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback
|
from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback
|
||||||
@@ -260,7 +260,7 @@ class HunyuanDiTDifferentialImg2ImgPipeline(DiffusionPipeline):
|
|||||||
The HunyuanDiT model designed by Tencent Hunyuan.
|
The HunyuanDiT model designed by Tencent Hunyuan.
|
||||||
text_encoder_2 (`T5EncoderModel`):
|
text_encoder_2 (`T5EncoderModel`):
|
||||||
The mT5 embedder. Specifically, it is 't5-v1_1-xxl'.
|
The mT5 embedder. Specifically, it is 't5-v1_1-xxl'.
|
||||||
tokenizer_2 (`T5Tokenizer`):
|
tokenizer_2 (`MT5Tokenizer`):
|
||||||
The tokenizer for the mT5 embedder.
|
The tokenizer for the mT5 embedder.
|
||||||
scheduler ([`DDPMScheduler`]):
|
scheduler ([`DDPMScheduler`]):
|
||||||
A scheduler to be used in combination with HunyuanDiT to denoise the encoded image latents.
|
A scheduler to be used in combination with HunyuanDiT to denoise the encoded image latents.
|
||||||
@@ -295,7 +295,7 @@ class HunyuanDiTDifferentialImg2ImgPipeline(DiffusionPipeline):
|
|||||||
feature_extractor: CLIPImageProcessor,
|
feature_extractor: CLIPImageProcessor,
|
||||||
requires_safety_checker: bool = True,
|
requires_safety_checker: bool = True,
|
||||||
text_encoder_2=T5EncoderModel,
|
text_encoder_2=T5EncoderModel,
|
||||||
tokenizer_2=T5Tokenizer,
|
tokenizer_2=MT5Tokenizer,
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
|
|||||||
@@ -29,52 +29,13 @@ hf download nvidia/Cosmos-Predict2.5-2B
|
|||||||
|
|
||||||
Convert checkpoint
|
Convert checkpoint
|
||||||
```bash
|
```bash
|
||||||
# pre-trained
|
|
||||||
transformer_ckpt_path=~/.cache/huggingface/hub/models--nvidia--Cosmos-Predict2.5-2B/snapshots/865baf084d4c9e850eac59a021277d5a9b9e8b63/base/pre-trained/d20b7120-df3e-4911-919d-db6e08bad31c_ema_bf16.pt
|
transformer_ckpt_path=~/.cache/huggingface/hub/models--nvidia--Cosmos-Predict2.5-2B/snapshots/865baf084d4c9e850eac59a021277d5a9b9e8b63/base/pre-trained/d20b7120-df3e-4911-919d-db6e08bad31c_ema_bf16.pt
|
||||||
|
|
||||||
python scripts/convert_cosmos_to_diffusers.py \
|
python scripts/convert_cosmos_to_diffusers.py \
|
||||||
--transformer_type Cosmos-2.5-Predict-Base-2B \
|
--transformer_type Cosmos-2.5-Predict-Base-2B \
|
||||||
--transformer_ckpt_path $transformer_ckpt_path \
|
--transformer_ckpt_path $transformer_ckpt_path \
|
||||||
--vae_type wan2.1 \
|
--vae_type wan2.1 \
|
||||||
--output_path converted/2b/d20b7120-df3e-4911-919d-db6e08bad31c \
|
--output_path converted/cosmos-p2.5-base-2b \
|
||||||
--save_pipeline
|
|
||||||
|
|
||||||
# post-trained
|
|
||||||
transformer_ckpt_path=~/.cache/huggingface/hub/models--nvidia--Cosmos-Predict2.5-2B/snapshots/865baf084d4c9e850eac59a021277d5a9b9e8b63/base/post-trained/81edfebe-bd6a-4039-8c1d-737df1a790bf_ema_bf16.pt
|
|
||||||
|
|
||||||
python scripts/convert_cosmos_to_diffusers.py \
|
|
||||||
--transformer_type Cosmos-2.5-Predict-Base-2B \
|
|
||||||
--transformer_ckpt_path $transformer_ckpt_path \
|
|
||||||
--vae_type wan2.1 \
|
|
||||||
--output_path converted/2b/81edfebe-bd6a-4039-8c1d-737df1a790bf \
|
|
||||||
--save_pipeline
|
|
||||||
```
|
|
||||||
|
|
||||||
## 14B
|
|
||||||
|
|
||||||
```bash
|
|
||||||
hf download nvidia/Cosmos-Predict2.5-14B
|
|
||||||
```
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# pre-trained
|
|
||||||
transformer_ckpt_path=~/.cache/huggingface/hub/models--nvidia--Cosmos-Predict2.5-14B/snapshots/71ebf3e8af30ecfe440bf0481115975fcc052b46/base/pre-trained/54937b8c-29de-4f04-862c-e67b04ec41e8_ema_bf16.pt
|
|
||||||
|
|
||||||
python scripts/convert_cosmos_to_diffusers.py \
|
|
||||||
--transformer_type Cosmos-2.5-Predict-Base-14B \
|
|
||||||
--transformer_ckpt_path $transformer_ckpt_path \
|
|
||||||
--vae_type wan2.1 \
|
|
||||||
--output_path converted/14b/54937b8c-29de-4f04-862c-e67b04ec41e8/ \
|
|
||||||
--save_pipeline
|
|
||||||
|
|
||||||
# post-trained
|
|
||||||
transformer_ckpt_path=~/.cache/huggingface/hub/models--nvidia--Cosmos-Predict2.5-14B/snapshots/71ebf3e8af30ecfe440bf0481115975fcc052b46/base/post-trained/e21d2a49-4747-44c8-ba44-9f6f9243715f_ema_bf16.pt
|
|
||||||
|
|
||||||
python scripts/convert_cosmos_to_diffusers.py \
|
|
||||||
--transformer_type Cosmos-2.5-Predict-Base-14B \
|
|
||||||
--transformer_ckpt_path $transformer_ckpt_path \
|
|
||||||
--vae_type wan2.1 \
|
|
||||||
--output_path converted/14b/e21d2a49-4747-44c8-ba44-9f6f9243715f/ \
|
|
||||||
--save_pipeline
|
--save_pipeline
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -337,25 +298,6 @@ TRANSFORMER_CONFIGS = {
|
|||||||
"crossattn_proj_in_channels": 100352,
|
"crossattn_proj_in_channels": 100352,
|
||||||
"encoder_hidden_states_channels": 1024,
|
"encoder_hidden_states_channels": 1024,
|
||||||
},
|
},
|
||||||
"Cosmos-2.5-Predict-Base-14B": {
|
|
||||||
"in_channels": 16 + 1,
|
|
||||||
"out_channels": 16,
|
|
||||||
"num_attention_heads": 40,
|
|
||||||
"attention_head_dim": 128,
|
|
||||||
"num_layers": 36,
|
|
||||||
"mlp_ratio": 4.0,
|
|
||||||
"text_embed_dim": 1024,
|
|
||||||
"adaln_lora_dim": 256,
|
|
||||||
"max_size": (128, 240, 240),
|
|
||||||
"patch_size": (1, 2, 2),
|
|
||||||
"rope_scale": (1.0, 3.0, 3.0),
|
|
||||||
"concat_padding_mask": True,
|
|
||||||
# NOTE: source config has pos_emb_learnable: 'True' - but params are missing
|
|
||||||
"extra_pos_embed_type": None,
|
|
||||||
"use_crossattn_projection": True,
|
|
||||||
"crossattn_proj_in_channels": 100352,
|
|
||||||
"encoder_hidden_states_channels": 1024,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
VAE_KEYS_RENAME_DICT = {
|
VAE_KEYS_RENAME_DICT = {
|
||||||
|
|||||||
@@ -73,7 +73,6 @@ from .kandinsky3 import Kandinsky3Img2ImgPipeline, Kandinsky3Pipeline
|
|||||||
from .latent_consistency_models import LatentConsistencyModelImg2ImgPipeline, LatentConsistencyModelPipeline
|
from .latent_consistency_models import LatentConsistencyModelImg2ImgPipeline, LatentConsistencyModelPipeline
|
||||||
from .lumina import LuminaPipeline
|
from .lumina import LuminaPipeline
|
||||||
from .lumina2 import Lumina2Pipeline
|
from .lumina2 import Lumina2Pipeline
|
||||||
from .ovis_image import OvisImagePipeline
|
|
||||||
from .pag import (
|
from .pag import (
|
||||||
HunyuanDiTPAGPipeline,
|
HunyuanDiTPAGPipeline,
|
||||||
PixArtSigmaPAGPipeline,
|
PixArtSigmaPAGPipeline,
|
||||||
@@ -165,7 +164,6 @@ AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict(
|
|||||||
("qwenimage", QwenImagePipeline),
|
("qwenimage", QwenImagePipeline),
|
||||||
("qwenimage-controlnet", QwenImageControlNetPipeline),
|
("qwenimage-controlnet", QwenImageControlNetPipeline),
|
||||||
("z-image", ZImagePipeline),
|
("z-image", ZImagePipeline),
|
||||||
("ovis", OvisImagePipeline),
|
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from typing import Callable, Dict, List, Optional, Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from transformers import BertModel, BertTokenizer, CLIPImageProcessor, T5EncoderModel, T5Tokenizer
|
from transformers import BertModel, BertTokenizer, CLIPImageProcessor, MT5Tokenizer, T5EncoderModel
|
||||||
|
|
||||||
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
|
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
|
||||||
|
|
||||||
@@ -185,7 +185,7 @@ class HunyuanDiTControlNetPipeline(DiffusionPipeline):
|
|||||||
The HunyuanDiT model designed by Tencent Hunyuan.
|
The HunyuanDiT model designed by Tencent Hunyuan.
|
||||||
text_encoder_2 (`T5EncoderModel`):
|
text_encoder_2 (`T5EncoderModel`):
|
||||||
The mT5 embedder. Specifically, it is 't5-v1_1-xxl'.
|
The mT5 embedder. Specifically, it is 't5-v1_1-xxl'.
|
||||||
tokenizer_2 (`T5Tokenizer`):
|
tokenizer_2 (`MT5Tokenizer`):
|
||||||
The tokenizer for the mT5 embedder.
|
The tokenizer for the mT5 embedder.
|
||||||
scheduler ([`DDPMScheduler`]):
|
scheduler ([`DDPMScheduler`]):
|
||||||
A scheduler to be used in combination with HunyuanDiT to denoise the encoded image latents.
|
A scheduler to be used in combination with HunyuanDiT to denoise the encoded image latents.
|
||||||
@@ -229,7 +229,7 @@ class HunyuanDiTControlNetPipeline(DiffusionPipeline):
|
|||||||
HunyuanDiT2DMultiControlNetModel,
|
HunyuanDiT2DMultiControlNetModel,
|
||||||
],
|
],
|
||||||
text_encoder_2: Optional[T5EncoderModel] = None,
|
text_encoder_2: Optional[T5EncoderModel] = None,
|
||||||
tokenizer_2: Optional[T5Tokenizer] = None,
|
tokenizer_2: Optional[MT5Tokenizer] = None,
|
||||||
requires_safety_checker: bool = True,
|
requires_safety_checker: bool = True,
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|||||||
@@ -133,7 +133,7 @@ EXAMPLE_DOC_STRING = """
|
|||||||
... num_frames=93,
|
... num_frames=93,
|
||||||
... generator=torch.Generator().manual_seed(1),
|
... generator=torch.Generator().manual_seed(1),
|
||||||
... ).frames[0]
|
... ).frames[0]
|
||||||
>>> export_to_video(video, "image2world.mp4", fps=16)
|
>>> # export_to_video(video, "image2world.mp4", fps=16)
|
||||||
|
|
||||||
>>> # Video2World: condition on an input clip and predict a 93-frame world video.
|
>>> # Video2World: condition on an input clip and predict a 93-frame world video.
|
||||||
>>> prompt = (
|
>>> prompt = (
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from typing import Callable, Dict, List, Optional, Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from transformers import BertModel, BertTokenizer, CLIPImageProcessor, T5EncoderModel, T5Tokenizer
|
from transformers import BertModel, BertTokenizer, CLIPImageProcessor, MT5Tokenizer, T5EncoderModel
|
||||||
|
|
||||||
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
|
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
|
||||||
|
|
||||||
@@ -169,7 +169,7 @@ class HunyuanDiTPipeline(DiffusionPipeline):
|
|||||||
The HunyuanDiT model designed by Tencent Hunyuan.
|
The HunyuanDiT model designed by Tencent Hunyuan.
|
||||||
text_encoder_2 (`T5EncoderModel`):
|
text_encoder_2 (`T5EncoderModel`):
|
||||||
The mT5 embedder. Specifically, it is 't5-v1_1-xxl'.
|
The mT5 embedder. Specifically, it is 't5-v1_1-xxl'.
|
||||||
tokenizer_2 (`T5Tokenizer`):
|
tokenizer_2 (`MT5Tokenizer`):
|
||||||
The tokenizer for the mT5 embedder.
|
The tokenizer for the mT5 embedder.
|
||||||
scheduler ([`DDPMScheduler`]):
|
scheduler ([`DDPMScheduler`]):
|
||||||
A scheduler to be used in combination with HunyuanDiT to denoise the encoded image latents.
|
A scheduler to be used in combination with HunyuanDiT to denoise the encoded image latents.
|
||||||
@@ -204,7 +204,7 @@ class HunyuanDiTPipeline(DiffusionPipeline):
|
|||||||
feature_extractor: CLIPImageProcessor,
|
feature_extractor: CLIPImageProcessor,
|
||||||
requires_safety_checker: bool = True,
|
requires_safety_checker: bool = True,
|
||||||
text_encoder_2: Optional[T5EncoderModel] = None,
|
text_encoder_2: Optional[T5EncoderModel] = None,
|
||||||
tokenizer_2: Optional[T5Tokenizer] = None,
|
tokenizer_2: Optional[MT5Tokenizer] = None,
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from typing import Callable, Dict, List, Optional, Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from transformers import BertModel, BertTokenizer, CLIPImageProcessor, T5EncoderModel, T5Tokenizer
|
from transformers import BertModel, BertTokenizer, CLIPImageProcessor, MT5Tokenizer, T5EncoderModel
|
||||||
|
|
||||||
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
|
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
|
||||||
|
|
||||||
@@ -173,7 +173,7 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin):
|
|||||||
The HunyuanDiT model designed by Tencent Hunyuan.
|
The HunyuanDiT model designed by Tencent Hunyuan.
|
||||||
text_encoder_2 (`T5EncoderModel`):
|
text_encoder_2 (`T5EncoderModel`):
|
||||||
The mT5 embedder. Specifically, it is 't5-v1_1-xxl'.
|
The mT5 embedder. Specifically, it is 't5-v1_1-xxl'.
|
||||||
tokenizer_2 (`T5Tokenizer`):
|
tokenizer_2 (`MT5Tokenizer`):
|
||||||
The tokenizer for the mT5 embedder.
|
The tokenizer for the mT5 embedder.
|
||||||
scheduler ([`DDPMScheduler`]):
|
scheduler ([`DDPMScheduler`]):
|
||||||
A scheduler to be used in combination with HunyuanDiT to denoise the encoded image latents.
|
A scheduler to be used in combination with HunyuanDiT to denoise the encoded image latents.
|
||||||
@@ -208,7 +208,7 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin):
|
|||||||
feature_extractor: Optional[CLIPImageProcessor] = None,
|
feature_extractor: Optional[CLIPImageProcessor] = None,
|
||||||
requires_safety_checker: bool = True,
|
requires_safety_checker: bool = True,
|
||||||
text_encoder_2: Optional[T5EncoderModel] = None,
|
text_encoder_2: Optional[T5EncoderModel] = None,
|
||||||
tokenizer_2: Optional[T5Tokenizer] = None,
|
tokenizer_2: Optional[MT5Tokenizer] = None,
|
||||||
pag_applied_layers: Union[str, List[str]] = "blocks.1", # "blocks.16.attn1", "blocks.16", "16", 16
|
pag_applied_layers: Union[str, List[str]] = "blocks.1", # "blocks.16.attn1", "blocks.16", "16", 16
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|||||||
@@ -671,44 +671,46 @@ class TorchAoSerializationTest(unittest.TestCase):
|
|||||||
class TorchAoCompileTest(QuantCompileTests, unittest.TestCase):
|
class TorchAoCompileTest(QuantCompileTests, unittest.TestCase):
|
||||||
@property
|
@property
|
||||||
def quantization_config(self):
|
def quantization_config(self):
|
||||||
|
from torchao.quantization import Int8WeightOnlyConfig
|
||||||
|
|
||||||
return PipelineQuantizationConfig(
|
return PipelineQuantizationConfig(
|
||||||
quant_mapping={
|
quant_mapping={
|
||||||
"transformer": TorchAoConfig(quant_type="int8_weight_only"),
|
"transformer": TorchAoConfig(Int8WeightOnlyConfig()),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@unittest.skip(
|
# @unittest.skip(
|
||||||
"Changing the device of AQT tensor with module._apply (called from doing module.to() in accelerate) does not work "
|
# "Changing the device of AQT tensor with module._apply (called from doing module.to() in accelerate) does not work "
|
||||||
"when compiling."
|
# "when compiling."
|
||||||
)
|
# )
|
||||||
def test_torch_compile_with_cpu_offload(self):
|
# def test_torch_compile_with_cpu_offload(self):
|
||||||
# RuntimeError: _apply(): Couldn't swap Linear.weight
|
# # RuntimeError: _apply(): Couldn't swap Linear.weight
|
||||||
super().test_torch_compile_with_cpu_offload()
|
# super().test_torch_compile_with_cpu_offload()
|
||||||
|
|
||||||
@parameterized.expand([False, True])
|
# @parameterized.expand([False, True])
|
||||||
@unittest.skip(
|
# @unittest.skip(
|
||||||
"""
|
# """
|
||||||
For `use_stream=False`:
|
# For `use_stream=False`:
|
||||||
- Changing the device of AQT tensor, with `param.data = param.data.to(device)` as done in group offloading implementation
|
# - Changing the device of AQT tensor, with `param.data = param.data.to(device)` as done in group offloading implementation
|
||||||
is unsupported in TorchAO. When compiling, FakeTensor device mismatch causes failure.
|
# is unsupported in TorchAO. When compiling, FakeTensor device mismatch causes failure.
|
||||||
For `use_stream=True`:
|
# For `use_stream=True`:
|
||||||
Using non-default stream requires ability to pin tensors. AQT does not seem to support this yet in TorchAO.
|
# Using non-default stream requires ability to pin tensors. AQT does not seem to support this yet in TorchAO.
|
||||||
"""
|
# """
|
||||||
)
|
# )
|
||||||
def test_torch_compile_with_group_offload_leaf(self, use_stream):
|
# def test_torch_compile_with_group_offload_leaf(self, use_stream):
|
||||||
# For use_stream=False:
|
# # For use_stream=False:
|
||||||
# If we run group offloading without compilation, we will see:
|
# # If we run group offloading without compilation, we will see:
|
||||||
# RuntimeError: Attempted to set the storage of a tensor on device "cpu" to a storage on different device "cuda:0". This is no longer allowed; the devices must match.
|
# # RuntimeError: Attempted to set the storage of a tensor on device "cpu" to a storage on different device "cuda:0". This is no longer allowed; the devices must match.
|
||||||
# When running with compilation, the error ends up being different:
|
# # When running with compilation, the error ends up being different:
|
||||||
# Dynamo failed to run FX node with fake tensors: call_function <built-in function linear>(*(FakeTensor(..., device='cuda:0', size=(s0, 256), dtype=torch.bfloat16), AffineQuantizedTensor(tensor_impl=PlainAQTTensorImpl(data=FakeTensor(..., size=(1536, 256), dtype=torch.int8)... , scale=FakeTensor(..., size=(1536,), dtype=torch.bfloat16)... , zero_point=FakeTensor(..., size=(1536,), dtype=torch.int64)... , _layout=PlainLayout()), block_size=(1, 256), shape=torch.Size([1536, 256]), device=cpu, dtype=torch.bfloat16, requires_grad=False), Parameter(FakeTensor(..., device='cuda:0', size=(1536,), dtype=torch.bfloat16,
|
# # Dynamo failed to run FX node with fake tensors: call_function <built-in function linear>(*(FakeTensor(..., device='cuda:0', size=(s0, 256), dtype=torch.bfloat16), AffineQuantizedTensor(tensor_impl=PlainAQTTensorImpl(data=FakeTensor(..., size=(1536, 256), dtype=torch.int8)... , scale=FakeTensor(..., size=(1536,), dtype=torch.bfloat16)... , zero_point=FakeTensor(..., size=(1536,), dtype=torch.int64)... , _layout=PlainLayout()), block_size=(1, 256), shape=torch.Size([1536, 256]), device=cpu, dtype=torch.bfloat16, requires_grad=False), Parameter(FakeTensor(..., device='cuda:0', size=(1536,), dtype=torch.bfloat16,
|
||||||
# requires_grad=True))), **{}): got RuntimeError('Unhandled FakeTensor Device Propagation for aten.mm.default, found two different devices cuda:0, cpu')
|
# # requires_grad=True))), **{}): got RuntimeError('Unhandled FakeTensor Device Propagation for aten.mm.default, found two different devices cuda:0, cpu')
|
||||||
# Looks like something that will have to be looked into upstream.
|
# # Looks like something that will have to be looked into upstream.
|
||||||
# for linear layers, weight.tensor_impl shows cuda... but:
|
# # for linear layers, weight.tensor_impl shows cuda... but:
|
||||||
# weight.tensor_impl.{data,scale,zero_point}.device will be cpu
|
# # weight.tensor_impl.{data,scale,zero_point}.device will be cpu
|
||||||
|
|
||||||
# For use_stream=True:
|
# # For use_stream=True:
|
||||||
# NotImplementedError: AffineQuantizedTensor dispatch: attempting to run unimplemented operator/function: func=<OpOverload(op='aten.is_pinned', overload='default')>, types=(<class 'torchao.dtypes.affine_quantized_tensor.AffineQuantizedTensor'>,), arg_types=(<class 'torchao.dtypes.affine_quantized_tensor.AffineQuantizedTensor'>,), kwarg_types={}
|
# # NotImplementedError: AffineQuantizedTensor dispatch: attempting to run unimplemented operator/function: func=<OpOverload(op='aten.is_pinned', overload='default')>, types=(<class 'torchao.dtypes.affine_quantized_tensor.AffineQuantizedTensor'>,), arg_types=(<class 'torchao.dtypes.affine_quantized_tensor.AffineQuantizedTensor'>,), kwarg_types={}
|
||||||
super()._test_torch_compile_with_group_offload_leaf(use_stream=use_stream)
|
# super()._test_torch_compile_with_group_offload_leaf(use_stream=use_stream)
|
||||||
|
|
||||||
|
|
||||||
# Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners
|
# Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners
|
||||||
|
|||||||
Reference in New Issue
Block a user