File size: 837 Bytes
0d62c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
"""Qwen3ScaleSeq model configuration.

Extends Qwen3Config with scale_seq_times for embedding replication
to scale effective sequence length. See Scale_SeqLen_via_Embedding_Replication.md.
"""

from transformers import Qwen3Config


class Qwen3ScaleSeqConfig(Qwen3Config):
    """
    Configuration for Qwen3 with scaled sequence length via embedding replication.

    Adds one parameter on top of Qwen3Config:
        scale_seq_times (int): Number of additional embedding copies (n-1 in the doc).
            0 means no scaling (standard Qwen3 behavior).
            1 means 2x sequence length (original + 1 copy), etc.
    """

    model_type = "qwen3_scale_seq"

    def __init__(self, scale_seq_times=0, **kwargs):
        self.scale_seq_times = scale_seq_times
        super().__init__(**kwargs)


__all__ = ["Qwen3ScaleSeqConfig"]