Update modeling_sdar.py (#2)
Browse files- Update modeling_sdar.py (fb3dd55ee5da84e0c564a5201f2ca224a7a7dedd)
- modeling_sdar.py +4 -11
modeling_sdar.py
CHANGED
|
@@ -1,11 +1,8 @@
|
|
|
|
|
|
|
|
| 1 |
# This file is modified based on https://github.com/huggingface/transformers/blob/v4.52.4/src/transformers/models/qwen3/modeling_qwen3.py.
|
| 2 |
#
|
| 3 |
-
|
| 4 |
-
# This file was automatically generated from src/transformers/models/qwen3/modular_qwen3.py.
|
| 5 |
-
# Do NOT edit this file manually as any edits will be overwritten by the generation of
|
| 6 |
-
# the file from the modular. If any change should be done, please apply the change to the
|
| 7 |
-
# modular_qwen3.py file directly. One of our CI enforces this.
|
| 8 |
-
# π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨π¨
|
| 9 |
# coding=utf-8
|
| 10 |
# Copyright 2025 The Qwen team, Alibaba Group and the HuggingFace Inc. team. All rights reserved.
|
| 11 |
#
|
|
@@ -49,7 +46,6 @@ from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_u
|
|
| 49 |
from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
| 50 |
from transformers.processing_utils import Unpack
|
| 51 |
from transformers.utils import (
|
| 52 |
-
LossKwargs,
|
| 53 |
auto_docstring,
|
| 54 |
can_return_tuple,
|
| 55 |
is_torch_flex_attn_available,
|
|
@@ -781,9 +777,6 @@ class SDARModel(SDARPreTrainedModel):
|
|
| 781 |
return causal_mask
|
| 782 |
|
| 783 |
|
| 784 |
-
class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
|
| 785 |
-
|
| 786 |
-
|
| 787 |
@auto_docstring
|
| 788 |
class SDARForCausalLM(SDARPreTrainedModel, GenerationMixin):
|
| 789 |
_tied_weights_keys = ["lm_head.weight"]
|
|
@@ -832,7 +825,7 @@ class SDARForCausalLM(SDARPreTrainedModel, GenerationMixin):
|
|
| 832 |
output_hidden_states: Optional[bool] = None,
|
| 833 |
cache_position: Optional[torch.LongTensor] = None,
|
| 834 |
logits_to_keep: Union[int, torch.Tensor] = 0,
|
| 835 |
-
**kwargs:
|
| 836 |
) -> CausalLMOutputWithPast:
|
| 837 |
r"""
|
| 838 |
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
|
|
| 1 |
+
# SPDX-License-Identifier: MIT
|
| 2 |
+
# Adapted from https://huggingface.co/Gen-Verse/TraDo-8B-Instruct/blob/main/modeling_sdar.py
|
| 3 |
# This file is modified based on https://github.com/huggingface/transformers/blob/v4.52.4/src/transformers/models/qwen3/modeling_qwen3.py.
|
| 4 |
#
|
| 5 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
# coding=utf-8
|
| 7 |
# Copyright 2025 The Qwen team, Alibaba Group and the HuggingFace Inc. team. All rights reserved.
|
| 8 |
#
|
|
|
|
| 46 |
from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
| 47 |
from transformers.processing_utils import Unpack
|
| 48 |
from transformers.utils import (
|
|
|
|
| 49 |
auto_docstring,
|
| 50 |
can_return_tuple,
|
| 51 |
is_torch_flex_attn_available,
|
|
|
|
| 777 |
return causal_mask
|
| 778 |
|
| 779 |
|
|
|
|
|
|
|
|
|
|
| 780 |
@auto_docstring
|
| 781 |
class SDARForCausalLM(SDARPreTrainedModel, GenerationMixin):
|
| 782 |
_tied_weights_keys = ["lm_head.weight"]
|
|
|
|
| 825 |
output_hidden_states: Optional[bool] = None,
|
| 826 |
cache_position: Optional[torch.LongTensor] = None,
|
| 827 |
logits_to_keep: Union[int, torch.Tensor] = 0,
|
| 828 |
+
**kwargs: dict,
|
| 829 |
) -> CausalLMOutputWithPast:
|
| 830 |
r"""
|
| 831 |
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|