YongganFu commited on
Commit
6bb63c1
·
verified ·
1 Parent(s): be9e4da

Update modeling_qwen3.py

Browse files
Files changed (1) hide show
  1. modeling_qwen3.py +2 -2
modeling_qwen3.py CHANGED
@@ -35,7 +35,7 @@ from transformers.modeling_outputs import (
35
  from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
36
  from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
37
  from transformers.processing_utils import Unpack
38
- from transformers.utils import LossKwargs, auto_docstring, can_return_tuple, is_torch_flex_attn_available, logging
39
  from .configuration_nvrdiff import NVRDiffConfig
40
 
41
 
@@ -752,7 +752,7 @@ class Qwen3Model(Qwen3PreTrainedModel):
752
  return causal_mask
753
 
754
 
755
- class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
756
 
757
 
758
  @auto_docstring
 
35
  from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
36
  from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
37
  from transformers.processing_utils import Unpack
38
+ from transformers.utils import TransformersKwargs, auto_docstring, can_return_tuple, is_torch_flex_attn_available, logging
39
  from .configuration_nvrdiff import NVRDiffConfig
40
 
41
 
 
752
  return causal_mask
753
 
754
 
755
+ class KwargsForCausalLM(FlashAttentionKwargs, TransformersKwargs): ...
756
 
757
 
758
  @auto_docstring