Update modeling_hyperclovax.py
Browse files- modeling_hyperclovax.py +2 -2
modeling_hyperclovax.py
CHANGED
|
@@ -43,7 +43,7 @@ from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_u
|
|
| 43 |
from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
| 44 |
from transformers.processing_utils import Unpack
|
| 45 |
from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
|
| 46 |
-
from transformers.utils import
|
| 47 |
from .configuration_hyperclovax import HyperCLOVAXConfig
|
| 48 |
if is_torch_flex_attn_available():
|
| 49 |
from torch.nn.attention.flex_attention import BlockMask
|
|
@@ -620,7 +620,7 @@ class HyperCLOVAXModel(HyperCLOVAXPreTrainedModel):
|
|
| 620 |
return causal_mask
|
| 621 |
|
| 622 |
|
| 623 |
-
class KwargsForCausalLM(FlashAttentionKwargs,
|
| 624 |
|
| 625 |
|
| 626 |
@auto_docstring
|
|
|
|
| 43 |
from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
| 44 |
from transformers.processing_utils import Unpack
|
| 45 |
from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
|
| 46 |
+
from transformers.utils import TransformersKwargs, auto_docstring, can_return_tuple, is_torch_flex_attn_available, logging
|
| 47 |
from .configuration_hyperclovax import HyperCLOVAXConfig
|
| 48 |
if is_torch_flex_attn_available():
|
| 49 |
from torch.nn.attention.flex_attention import BlockMask
|
|
|
|
| 620 |
return causal_mask
|
| 621 |
|
| 622 |
|
| 623 |
+
class KwargsForCausalLM(FlashAttentionKwargs, TransformersKwargs): ...
|
| 624 |
|
| 625 |
|
| 626 |
@auto_docstring
|