JulianHJR
/

v2

Model card Files Files and versions

v2 / configs /model.py

JulianHJR's picture

Upload folder using huggingface_hub

e53f10b verified 22 days ago

history blame contribute delete

3.32 kB

	"""
	Qwen3-30B-A3B-Thinking model configuration.

	Architecture (from HF config):
	- 48 layers
	- 32 attention heads, head_dim=128
	- hidden_size=2048
	- 128 routed experts per layer
	- 8 experts per token (top-k)
	- Thinking mode: enable_thinking=True in chat template
	"""
	from configs.paths import MODELS_DIR

	MODEL_CONFIG = {
	"hf_id": "Qwen/Qwen3-30B-A3B-Thinking-2507",
	"local_dir": MODELS_DIR / "Qwen3-30B-A3B-Thinking-2507",
	"load_dtype": "bfloat16",
	"trust_remote_code": True,
	# Architecture (will be validated at load time)
	"num_layers": 48,
	"num_attention_heads": 32,
	"head_dim": 128,
	"hidden_size": 2048,
	"num_experts": 128,
	"num_experts_per_tok": 8,
	# Hook paths: Qwen3-MoE standard HF naming
	"gate_attr_path": "mlp.gate", # nn.Linear -> logits over experts
	"moe_attr_path": "mlp", # whole MoE block
	"attn_o_proj_path": "self_attn.o_proj", # for potential head-level analysis
	# Chat template
	"default_system_prompt": "You are a helpful math assistant.",
	"chat_template_enable_thinking": True, # key: enable the <thinking> mode
	# Special tokens for thinking mode (if any)
	"thinking_begin_token": "<think>",
	"thinking_end_token": "</think>",
	}

	# Generation (general)
	# Qwen3-Thinking produces long CoT; max_new_tokens too small TRUNCATES the thinking,
	# leading to cut-off answers and inflated error rate. 12000 is the practical floor.
	GEN_CONFIG = {
	"max_new_tokens": 12000,
	"temperature": 0.7,
	"top_p": 0.95,
	"top_k": 20,
	"do_sample": True,
	"repetition_penalty": 1.0,
	}

	# For faster sweep / inference (still must allow full thinking)
	GEN_CONFIG_FAST = {
	"max_new_tokens": 8192,
	"temperature": 0.7,
	"top_p": 0.95,
	"do_sample": True,
	}

	# Steering alpha sweep values (NEW SEMANTICS — Apr 2026)
	# With the formula x_new = x - (1 - alpha) * Q^T Q · h we have:
	# alpha=1.0 → scale=0 → no change (baseline / "full ability student")
	# alpha=0.5 → scale=0.5 → half suppression
	# alpha=0.0 → scale=1 → full suppression of the cognitive subspace
	# alpha<0 → scale>1 → over-suppression (degrades / collapses)
	# alpha>1 → scale<0 → AMPLIFICATION
	#
	# Sweep is restricted to [0, 1] now. Outside this range causes model collapse and
	# pollutes the "best alpha" selection (verified empirically in v1 results).
	ALPHA_SWEEP = [0.0, 0.1, 0.2, 0.3, 0.5, 0.75, 1.0]
	NEUTRAL_ALPHA = 1.0 # the value that means "no steering"

	# Direction versions to evaluate.
	# Reduced from 4 to 2 — empirically v2/v3/v4 had >0.96 cosine to v1 in earlier results,
	# providing no additional signal. New v_pca_subspace replaces the old broken v4.
	DIRECTION_VERSIONS = ["v1_raw", "v_pca_subspace"]

	# Expert selection
	TOP_K_EXPERTS = 32 # top-K experts per dimension (from routing analysis)

	# PCA subspace dimensionality for v_pca_subspace
	# Small k = strong constraint; large k = closer to full 1-D projection
	# k=3 is a reasonable balance for 5-subtype regex
	PCA_SUBSPACE_K = 3

	# Cross-dimension coupling for joint suppression (anti-leak)
	# When steering one dimension, also suppress the other by this factor
	# 0.0 = independent steering; 0.3 = mild coupling; 1.0 = full coupling
	ANTI_LEAK_BETA = 0.3