exdysa commited on
Commit
730e1d7
Β·
verified Β·
1 Parent(s): c65ea85

Update modeling_sdar.py (#2)

Browse files

- Update modeling_sdar.py (fb3dd55ee5da84e0c564a5201f2ca224a7a7dedd)

Files changed (1) hide show
  1. modeling_sdar.py +4 -11
modeling_sdar.py CHANGED
@@ -1,11 +1,8 @@
 
 
1
  # This file is modified based on https://github.com/huggingface/transformers/blob/v4.52.4/src/transformers/models/qwen3/modeling_qwen3.py.
2
  #
3
- # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
4
- # This file was automatically generated from src/transformers/models/qwen3/modular_qwen3.py.
5
- # Do NOT edit this file manually as any edits will be overwritten by the generation of
6
- # the file from the modular. If any change should be done, please apply the change to the
7
- # modular_qwen3.py file directly. One of our CI enforces this.
8
- # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
9
  # coding=utf-8
10
  # Copyright 2025 The Qwen team, Alibaba Group and the HuggingFace Inc. team. All rights reserved.
11
  #
@@ -49,7 +46,6 @@ from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_u
49
  from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
50
  from transformers.processing_utils import Unpack
51
  from transformers.utils import (
52
- LossKwargs,
53
  auto_docstring,
54
  can_return_tuple,
55
  is_torch_flex_attn_available,
@@ -781,9 +777,6 @@ class SDARModel(SDARPreTrainedModel):
781
  return causal_mask
782
 
783
 
784
- class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
785
-
786
-
787
  @auto_docstring
788
  class SDARForCausalLM(SDARPreTrainedModel, GenerationMixin):
789
  _tied_weights_keys = ["lm_head.weight"]
@@ -832,7 +825,7 @@ class SDARForCausalLM(SDARPreTrainedModel, GenerationMixin):
832
  output_hidden_states: Optional[bool] = None,
833
  cache_position: Optional[torch.LongTensor] = None,
834
  logits_to_keep: Union[int, torch.Tensor] = 0,
835
- **kwargs: Unpack[KwargsForCausalLM],
836
  ) -> CausalLMOutputWithPast:
837
  r"""
838
  labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
 
1
+ # SPDX-License-Identifier: MIT
2
+ # Adapted from https://huggingface.co/Gen-Verse/TraDo-8B-Instruct/blob/main/modeling_sdar.py
3
  # This file is modified based on https://github.com/huggingface/transformers/blob/v4.52.4/src/transformers/models/qwen3/modeling_qwen3.py.
4
  #
5
+
 
 
 
 
 
6
  # coding=utf-8
7
  # Copyright 2025 The Qwen team, Alibaba Group and the HuggingFace Inc. team. All rights reserved.
8
  #
 
46
  from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
47
  from transformers.processing_utils import Unpack
48
  from transformers.utils import (
 
49
  auto_docstring,
50
  can_return_tuple,
51
  is_torch_flex_attn_available,
 
777
  return causal_mask
778
 
779
 
 
 
 
780
  @auto_docstring
781
  class SDARForCausalLM(SDARPreTrainedModel, GenerationMixin):
782
  _tied_weights_keys = ["lm_head.weight"]
 
825
  output_hidden_states: Optional[bool] = None,
826
  cache_position: Optional[torch.LongTensor] = None,
827
  logits_to_keep: Union[int, torch.Tensor] = 0,
828
+ **kwargs: dict,
829
  ) -> CausalLMOutputWithPast:
830
  r"""
831
  labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):