remove LossKwargs
#1
by
kashif
HF Staff
- opened
- modeling_sdar.py +2 -2
modeling_sdar.py
CHANGED
|
@@ -43,7 +43,7 @@ from transformers.modeling_outputs import (
|
|
| 43 |
from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
|
| 44 |
from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
| 45 |
from transformers.processing_utils import Unpack
|
| 46 |
-
from transformers.utils import
|
| 47 |
from .configuration_sdar import SDARConfig
|
| 48 |
|
| 49 |
from flash_attn.ops.triton.layer_norm import rms_norm_fn as flash_rms_norm
|
|
@@ -734,7 +734,7 @@ class SDARModel(SDARPreTrainedModel):
|
|
| 734 |
return causal_mask
|
| 735 |
|
| 736 |
|
| 737 |
-
class KwargsForCausalLM(FlashAttentionKwargs
|
| 738 |
...
|
| 739 |
|
| 740 |
|
|
|
|
| 43 |
from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
|
| 44 |
from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
| 45 |
from transformers.processing_utils import Unpack
|
| 46 |
+
from transformers.utils import auto_docstring, can_return_tuple, is_torch_flex_attn_available, logging
|
| 47 |
from .configuration_sdar import SDARConfig
|
| 48 |
|
| 49 |
from flash_attn.ops.triton.layer_norm import rms_norm_fn as flash_rms_norm
|
|
|
|
| 734 |
return causal_mask
|
| 735 |
|
| 736 |
|
| 737 |
+
class KwargsForCausalLM(FlashAttentionKwargs):
|
| 738 |
...
|
| 739 |
|
| 740 |
|