remove LossKwargs

#1
by kashif HF Staff - opened
Files changed (1) hide show
  1. modeling_sdar.py +2 -2
modeling_sdar.py CHANGED
@@ -43,7 +43,7 @@ from transformers.modeling_outputs import (
43
  from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
44
  from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
45
  from transformers.processing_utils import Unpack
46
- from transformers.utils import LossKwargs, auto_docstring, can_return_tuple, is_torch_flex_attn_available, logging
47
  from .configuration_sdar import SDARConfig
48
 
49
  from flash_attn.ops.triton.layer_norm import rms_norm_fn as flash_rms_norm
@@ -734,7 +734,7 @@ class SDARModel(SDARPreTrainedModel):
734
  return causal_mask
735
 
736
 
737
- class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs):
738
  ...
739
 
740
 
 
43
  from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
44
  from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
45
  from transformers.processing_utils import Unpack
46
+ from transformers.utils import auto_docstring, can_return_tuple, is_torch_flex_attn_available, logging
47
  from .configuration_sdar import SDARConfig
48
 
49
  from flash_attn.ops.triton.layer_norm import rms_norm_fn as flash_rms_norm
 
734
  return causal_mask
735
 
736
 
737
+ class KwargsForCausalLM(FlashAttentionKwargs):
738
  ...
739
 
740