""" Qwen3-30B-A3B-Thinking model configuration. Architecture (from HF config): - 48 layers - 32 attention heads, head_dim=128 - hidden_size=2048 - 128 routed experts per layer - 8 experts per token (top-k) - Thinking mode: enable_thinking=True in chat template """ from configs.paths import MODELS_DIR MODEL_CONFIG = { "hf_id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "local_dir": MODELS_DIR / "Qwen3-30B-A3B-Thinking-2507", "load_dtype": "bfloat16", "trust_remote_code": True, # Architecture (will be validated at load time) "num_layers": 48, "num_attention_heads": 32, "head_dim": 128, "hidden_size": 2048, "num_experts": 128, "num_experts_per_tok": 8, # Hook paths: Qwen3-MoE standard HF naming "gate_attr_path": "mlp.gate", # nn.Linear -> logits over experts "moe_attr_path": "mlp", # whole MoE block "attn_o_proj_path": "self_attn.o_proj", # for potential head-level analysis # Chat template "default_system_prompt": "You are a helpful math assistant.", "chat_template_enable_thinking": True, # key: enable the mode # Special tokens for thinking mode (if any) "thinking_begin_token": "", "thinking_end_token": "", } # Generation (general) # Qwen3-Thinking produces long CoT; max_new_tokens too small TRUNCATES the thinking, # leading to cut-off answers and inflated error rate. 12000 is the practical floor. GEN_CONFIG = { "max_new_tokens": 12000, "temperature": 0.7, "top_p": 0.95, "top_k": 20, "do_sample": True, "repetition_penalty": 1.0, } # For faster sweep / inference (still must allow full thinking) GEN_CONFIG_FAST = { "max_new_tokens": 8192, "temperature": 0.7, "top_p": 0.95, "do_sample": True, } # Steering alpha sweep values (NEW SEMANTICS — Apr 2026) # With the formula x_new = x - (1 - alpha) * Q^T Q · h we have: # alpha=1.0 → scale=0 → no change (baseline / "full ability student") # alpha=0.5 → scale=0.5 → half suppression # alpha=0.0 → scale=1 → full suppression of the cognitive subspace # alpha<0 → scale>1 → over-suppression (degrades / collapses) # alpha>1 → scale<0 → AMPLIFICATION # # Sweep is restricted to [0, 1] now. Outside this range causes model collapse and # pollutes the "best alpha" selection (verified empirically in v1 results). ALPHA_SWEEP = [0.0, 0.1, 0.2, 0.3, 0.5, 0.75, 1.0] NEUTRAL_ALPHA = 1.0 # the value that means "no steering" # Direction versions to evaluate. # Reduced from 4 to 2 — empirically v2/v3/v4 had >0.96 cosine to v1 in earlier results, # providing no additional signal. New v_pca_subspace replaces the old broken v4. DIRECTION_VERSIONS = ["v1_raw", "v_pca_subspace"] # Expert selection TOP_K_EXPERTS = 32 # top-K experts per dimension (from routing analysis) # PCA subspace dimensionality for v_pca_subspace # Small k = strong constraint; large k = closer to full 1-D projection # k=3 is a reasonable balance for 5-subtype regex PCA_SUBSPACE_K = 3 # Cross-dimension coupling for joint suppression (anti-leak) # When steering one dimension, also suppress the other by this factor # 0.0 = independent steering; 0.3 = mild coupling; 1.0 = full coupling ANTI_LEAK_BETA = 0.3