File size: 3,323 Bytes
e53f10b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""
Qwen3-30B-A3B-Thinking model configuration.

Architecture (from HF config):
    - 48 layers
    - 32 attention heads, head_dim=128
    - hidden_size=2048
    - 128 routed experts per layer
    - 8 experts per token (top-k)
    - Thinking mode: enable_thinking=True in chat template
"""
from configs.paths import MODELS_DIR

MODEL_CONFIG = {
    "hf_id": "Qwen/Qwen3-30B-A3B-Thinking-2507",
    "local_dir": MODELS_DIR / "Qwen3-30B-A3B-Thinking-2507",
    "load_dtype": "bfloat16",
    "trust_remote_code": True,
    # Architecture (will be validated at load time)
    "num_layers": 48,
    "num_attention_heads": 32,
    "head_dim": 128,
    "hidden_size": 2048,
    "num_experts": 128,
    "num_experts_per_tok": 8,
    # Hook paths: Qwen3-MoE standard HF naming
    "gate_attr_path": "mlp.gate",            # nn.Linear -> logits over experts
    "moe_attr_path": "mlp",                   # whole MoE block
    "attn_o_proj_path": "self_attn.o_proj",  # for potential head-level analysis
    # Chat template
    "default_system_prompt": "You are a helpful math assistant.",
    "chat_template_enable_thinking": True,   # key: enable the <thinking> mode
    # Special tokens for thinking mode (if any)
    "thinking_begin_token": "<think>",
    "thinking_end_token": "</think>",
}

# Generation (general)
# Qwen3-Thinking produces long CoT; max_new_tokens too small TRUNCATES the thinking,
# leading to cut-off answers and inflated error rate. 12000 is the practical floor.
GEN_CONFIG = {
    "max_new_tokens": 12000,
    "temperature": 0.7,
    "top_p": 0.95,
    "top_k": 20,
    "do_sample": True,
    "repetition_penalty": 1.0,
}

# For faster sweep / inference (still must allow full thinking)
GEN_CONFIG_FAST = {
    "max_new_tokens": 8192,
    "temperature": 0.7,
    "top_p": 0.95,
    "do_sample": True,
}

# Steering alpha sweep values (NEW SEMANTICS β€” Apr 2026)
#   With the formula  x_new = x - (1 - alpha) * Q^T Q Β· h   we have:
#     alpha=1.0  β†’ scale=0   β†’ no change (baseline / "full ability student")
#     alpha=0.5  β†’ scale=0.5 β†’ half suppression
#     alpha=0.0  β†’ scale=1   β†’ full suppression of the cognitive subspace
#     alpha<0    β†’ scale>1   β†’ over-suppression (degrades / collapses)
#     alpha>1    β†’ scale<0   β†’ AMPLIFICATION
#
# Sweep is restricted to [0, 1] now. Outside this range causes model collapse and
# pollutes the "best alpha" selection (verified empirically in v1 results).
ALPHA_SWEEP = [0.0, 0.1, 0.2, 0.3, 0.5, 0.75, 1.0]
NEUTRAL_ALPHA = 1.0    # the value that means "no steering"

# Direction versions to evaluate.
# Reduced from 4 to 2 β€” empirically v2/v3/v4 had >0.96 cosine to v1 in earlier results,
# providing no additional signal. New v_pca_subspace replaces the old broken v4.
DIRECTION_VERSIONS = ["v1_raw", "v_pca_subspace"]

# Expert selection
TOP_K_EXPERTS = 32      # top-K experts per dimension (from routing analysis)

# PCA subspace dimensionality for v_pca_subspace
# Small k = strong constraint; large k = closer to full 1-D projection
# k=3 is a reasonable balance for 5-subtype regex
PCA_SUBSPACE_K = 3

# Cross-dimension coupling for joint suppression (anti-leak)
# When steering one dimension, also suppress the other by this factor
# 0.0 = independent steering; 0.3 = mild coupling; 1.0 = full coupling
ANTI_LEAK_BETA = 0.3