"""Qwen3ScaleSeq model configuration. Extends Qwen3Config with scale_seq_times for embedding replication to scale effective sequence length. See Scale_SeqLen_via_Embedding_Replication.md. """ from transformers import Qwen3Config class Qwen3ScaleSeqConfig(Qwen3Config): """ Configuration for Qwen3 with scaled sequence length via embedding replication. Adds one parameter on top of Qwen3Config: scale_seq_times (int): Number of additional embedding copies (n-1 in the doc). 0 means no scaling (standard Qwen3 behavior). 1 means 2x sequence length (original + 1 copy), etc. """ model_type = "qwen3_scale_seq" def __init__(self, scale_seq_times=0, **kwargs): self.scale_seq_times = scale_seq_times super().__init__(**kwargs) __all__ = ["Qwen3ScaleSeqConfig"]