| """Qwen3ScaleSeq model configuration. | |
| Extends Qwen3Config with scale_seq_times for embedding replication | |
| to scale effective sequence length. See Scale_SeqLen_via_Embedding_Replication.md. | |
| """ | |
| from transformers import Qwen3Config | |
| class Qwen3ScaleSeqConfig(Qwen3Config): | |
| """ | |
| Configuration for Qwen3 with scaled sequence length via embedding replication. | |
| Adds one parameter on top of Qwen3Config: | |
| scale_seq_times (int): Number of additional embedding copies (n-1 in the doc). | |
| 0 means no scaling (standard Qwen3 behavior). | |
| 1 means 2x sequence length (original + 1 copy), etc. | |
| """ | |
| model_type = "qwen3_scale_seq" | |
| def __init__(self, scale_seq_times=0, **kwargs): | |
| self.scale_seq_times = scale_seq_times | |
| super().__init__(**kwargs) | |
| __all__ = ["Qwen3ScaleSeqConfig"] | |