v2 / configs /model.py
JulianHJR's picture
Upload folder using huggingface_hub
e53f10b verified
"""
Qwen3-30B-A3B-Thinking model configuration.
Architecture (from HF config):
- 48 layers
- 32 attention heads, head_dim=128
- hidden_size=2048
- 128 routed experts per layer
- 8 experts per token (top-k)
- Thinking mode: enable_thinking=True in chat template
"""
from configs.paths import MODELS_DIR
MODEL_CONFIG = {
"hf_id": "Qwen/Qwen3-30B-A3B-Thinking-2507",
"local_dir": MODELS_DIR / "Qwen3-30B-A3B-Thinking-2507",
"load_dtype": "bfloat16",
"trust_remote_code": True,
# Architecture (will be validated at load time)
"num_layers": 48,
"num_attention_heads": 32,
"head_dim": 128,
"hidden_size": 2048,
"num_experts": 128,
"num_experts_per_tok": 8,
# Hook paths: Qwen3-MoE standard HF naming
"gate_attr_path": "mlp.gate", # nn.Linear -> logits over experts
"moe_attr_path": "mlp", # whole MoE block
"attn_o_proj_path": "self_attn.o_proj", # for potential head-level analysis
# Chat template
"default_system_prompt": "You are a helpful math assistant.",
"chat_template_enable_thinking": True, # key: enable the <thinking> mode
# Special tokens for thinking mode (if any)
"thinking_begin_token": "<think>",
"thinking_end_token": "</think>",
}
# Generation (general)
# Qwen3-Thinking produces long CoT; max_new_tokens too small TRUNCATES the thinking,
# leading to cut-off answers and inflated error rate. 12000 is the practical floor.
GEN_CONFIG = {
"max_new_tokens": 12000,
"temperature": 0.7,
"top_p": 0.95,
"top_k": 20,
"do_sample": True,
"repetition_penalty": 1.0,
}
# For faster sweep / inference (still must allow full thinking)
GEN_CONFIG_FAST = {
"max_new_tokens": 8192,
"temperature": 0.7,
"top_p": 0.95,
"do_sample": True,
}
# Steering alpha sweep values (NEW SEMANTICS β€” Apr 2026)
# With the formula x_new = x - (1 - alpha) * Q^T Q Β· h we have:
# alpha=1.0 β†’ scale=0 β†’ no change (baseline / "full ability student")
# alpha=0.5 β†’ scale=0.5 β†’ half suppression
# alpha=0.0 β†’ scale=1 β†’ full suppression of the cognitive subspace
# alpha<0 β†’ scale>1 β†’ over-suppression (degrades / collapses)
# alpha>1 β†’ scale<0 β†’ AMPLIFICATION
#
# Sweep is restricted to [0, 1] now. Outside this range causes model collapse and
# pollutes the "best alpha" selection (verified empirically in v1 results).
ALPHA_SWEEP = [0.0, 0.1, 0.2, 0.3, 0.5, 0.75, 1.0]
NEUTRAL_ALPHA = 1.0 # the value that means "no steering"
# Direction versions to evaluate.
# Reduced from 4 to 2 β€” empirically v2/v3/v4 had >0.96 cosine to v1 in earlier results,
# providing no additional signal. New v_pca_subspace replaces the old broken v4.
DIRECTION_VERSIONS = ["v1_raw", "v_pca_subspace"]
# Expert selection
TOP_K_EXPERTS = 32 # top-K experts per dimension (from routing analysis)
# PCA subspace dimensionality for v_pca_subspace
# Small k = strong constraint; large k = closer to full 1-D projection
# k=3 is a reasonable balance for 5-subtype regex
PCA_SUBSPACE_K = 3
# Cross-dimension coupling for joint suppression (anti-leak)
# When steering one dimension, also suppress the other by this factor
# 0.0 = independent steering; 0.3 = mild coupling; 1.0 = full coupling
ANTI_LEAK_BETA = 0.3