K-Compression commited on
Commit
8ecf0aa
·
verified ·
1 Parent(s): ea69e2c

Update modeling_hyperclovax.py

Browse files
Files changed (1) hide show
  1. modeling_hyperclovax.py +2 -2
modeling_hyperclovax.py CHANGED
@@ -43,7 +43,7 @@ from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_u
43
  from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
44
  from transformers.processing_utils import Unpack
45
  from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
46
- from transformers.utils import LossKwargs, auto_docstring, can_return_tuple, is_torch_flex_attn_available, logging
47
  from .configuration_hyperclovax import HyperCLOVAXConfig
48
  if is_torch_flex_attn_available():
49
  from torch.nn.attention.flex_attention import BlockMask
@@ -620,7 +620,7 @@ class HyperCLOVAXModel(HyperCLOVAXPreTrainedModel):
620
  return causal_mask
621
 
622
 
623
- class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
624
 
625
 
626
  @auto_docstring
 
43
  from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
44
  from transformers.processing_utils import Unpack
45
  from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
46
+ from transformers.utils import TransformersKwargs, auto_docstring, can_return_tuple, is_torch_flex_attn_available, logging
47
  from .configuration_hyperclovax import HyperCLOVAXConfig
48
  if is_torch_flex_attn_available():
49
  from torch.nn.attention.flex_attention import BlockMask
 
620
  return causal_mask
621
 
622
 
623
+ class KwargsForCausalLM(FlashAttentionKwargs, TransformersKwargs): ...
624
 
625
 
626
  @auto_docstring