Sequential-Hidden-Decoding-8B-n2 / configuration_qwen3_scale_seq.py
exlaw's picture
Upload folder using huggingface_hub
0d62c3c verified
"""Qwen3ScaleSeq model configuration.
Extends Qwen3Config with scale_seq_times for embedding replication
to scale effective sequence length. See Scale_SeqLen_via_Embedding_Replication.md.
"""
from transformers import Qwen3Config
class Qwen3ScaleSeqConfig(Qwen3Config):
"""
Configuration for Qwen3 with scaled sequence length via embedding replication.
Adds one parameter on top of Qwen3Config:
scale_seq_times (int): Number of additional embedding copies (n-1 in the doc).
0 means no scaling (standard Qwen3 behavior).
1 means 2x sequence length (original + 1 copy), etc.
"""
model_type = "qwen3_scale_seq"
def __init__(self, scale_seq_times=0, **kwargs):
self.scale_seq_times = scale_seq_times
super().__init__(**kwargs)
__all__ = ["Qwen3ScaleSeqConfig"]