Sequential-Hidden-Decoding-8B-n2 / configuration_qwen3_scale_seq.py

Upload folder using huggingface_hub

0d62c3c verified 6 days ago

837 Bytes

	"""Qwen3ScaleSeq model configuration.

	Extends Qwen3Config with scale_seq_times for embedding replication
	to scale effective sequence length. See Scale_SeqLen_via_Embedding_Replication.md.
	"""

	from transformers import Qwen3Config


	class Qwen3ScaleSeqConfig(Qwen3Config):
	"""
	Configuration for Qwen3 with scaled sequence length via embedding replication.

	Adds one parameter on top of Qwen3Config:
	scale_seq_times (int): Number of additional embedding copies (n-1 in the doc).
	0 means no scaling (standard Qwen3 behavior).
	1 means 2x sequence length (original + 1 copy), etc.
	"""

	model_type = "qwen3_scale_seq"

	def __init__(self, scale_seq_times=0, **kwargs):
	self.scale_seq_times = scale_seq_times
	super().__init__(**kwargs)


	__all__ = ["Qwen3ScaleSeqConfig"]