File size: 837 Bytes
0d62c3c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | """Qwen3ScaleSeq model configuration.
Extends Qwen3Config with scale_seq_times for embedding replication
to scale effective sequence length. See Scale_SeqLen_via_Embedding_Replication.md.
"""
from transformers import Qwen3Config
class Qwen3ScaleSeqConfig(Qwen3Config):
"""
Configuration for Qwen3 with scaled sequence length via embedding replication.
Adds one parameter on top of Qwen3Config:
scale_seq_times (int): Number of additional embedding copies (n-1 in the doc).
0 means no scaling (standard Qwen3 behavior).
1 means 2x sequence length (original + 1 copy), etc.
"""
model_type = "qwen3_scale_seq"
def __init__(self, scale_seq_times=0, **kwargs):
self.scale_seq_times = scale_seq_times
super().__init__(**kwargs)
__all__ = ["Qwen3ScaleSeqConfig"]
|