thinkwee commited on
Commit
f17ef98
·
1 Parent(s): cf573f9

correct data

Browse files
Files changed (1) hide show
  1. data.js +239 -275
data.js CHANGED
@@ -1,8 +1,7 @@
1
- // DDR-Bench Visualization Data
2
- // Auto-generated data for interactive charts
3
 
4
  const DDR_DATA = {
5
- // Color scheme for models
6
  modelColors: {
7
  'GPT-5.2': '#00C853',
8
  'Claude-4.5-Sonnet': '#FF6D00',
@@ -12,317 +11,282 @@ const DDR_DATA = {
12
  'Qwen3-Next-80B-A3B': '#FFC107',
13
  'Kimi-K2': '#FFA500',
14
  'MiniMax-M2': '#20B2AA',
15
- // Probing models
16
  'Qwen2.5-32B': '#4A90D9',
17
  'Qwen2.5-72B': '#1A5FB4',
18
  'Qwen3-4B': '#57E389',
19
  'Qwen3-30B-A3B': '#26A269',
20
  },
21
-
22
- // Scaling Analysis Data
23
  scaling: {
24
- mimic: {
25
  'GPT-5.2': {
26
  turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
27
- tokens: [51, 1476, 1796, 2544, 3738, 4927, 5784, 6682, 7563, 8577, 10445, 11612, 12837, 14129, 15460, 16840, 17761, 18642, 19456, 20194],
28
- costs: [0.0005, 0.0012, 0.0021, 0.0032, 0.0050, 0.0072, 0.0100, 0.0131, 0.0167, 0.0207, 0.0257, 0.0310, 0.0371, 0.0439, 0.0516, 0.0595, 0.0680, 0.0772, 0.0860, 0.0947],
29
- accuracy: [2.8, 5.5, 8.2, 10.8, 13.2, 15.5, 17.6, 19.5, 21.2, 22.7, 24.0, 25.1, 26.0, 26.7, 27.1, 27.2, 27.2, 27.3, 27.3, 27.26]
30
- },
31
- 'Claude-4.5-Sonnet': {
32
- turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
33
- tokens: [33, 1527, 1715, 3193, 4513, 5965, 6664, 7387, 8417, 9214, 9823, 10620, 11533, 12516, 13378, 14190, 15001, 15723, 16457, 17218],
34
- costs: [0.0004, 0.0027, 0.0053, 0.0097, 0.0152, 0.0222, 0.0300, 0.0386, 0.0484, 0.0590, 0.0702, 0.0823, 0.0954, 0.1097, 0.1249, 0.1410, 0.1580, 0.1758, 0.1944, 0.2138],
35
- accuracy: [3.5, 7.0, 10.5, 14.0, 17.2, 20.2, 23.0, 25.5, 27.8, 29.8, 31.5, 32.8, 33.8, 34.2, 34.3, 34.4, 34.4, 34.4, 34.4, 34.37]
36
- },
37
- 'Gemini-3-Flash': {
38
  turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
39
- tokens: [457, 2153, 2606, 4332, 5581, 7503, 8911, 10726, 12697, 14305, 16481, 18695, 20559, 22036, 23357, 24415, 25207, 25977, 26542, 26964],
40
- costs: [0.0001, 0.0004, 0.0007, 0.0013, 0.0020, 0.0030, 0.0040, 0.0052, 0.0066, 0.0080, 0.0097, 0.0116, 0.0135, 0.0154, 0.0173, 0.0196, 0.0219, 0.0240, 0.0263, 0.0284],
41
- accuracy: [2.5, 5.0, 7.5, 10.0, 12.4, 14.6, 16.7, 18.6, 20.3, 21.8, 23.1, 24.0, 24.6, 24.8, 24.9, 24.9, 24.9, 24.9, 24.9, 24.94]
42
- },
43
- 'GLM-4.6': {
44
  turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
45
- tokens: [59, 1528, 1775, 2779, 3488, 4211, 4665, 5338, 6159, 7059, 7997, 8766, 9345, 9928, 10542, 11095, 11598, 12149, 12657, 13099],
46
- costs: [0.0001, 0.0008, 0.0015, 0.0024, 0.0034, 0.0045, 0.0056, 0.0069, 0.0083, 0.0098, 0.0115, 0.0133, 0.0151, 0.0170, 0.0190, 0.0210, 0.0231, 0.0253, 0.0275, 0.0298],
47
- accuracy: [2.3, 4.7, 7.0, 9.3, 11.5, 13.5, 15.4, 17.1, 18.7, 20.1, 21.2, 22.1, 22.7, 23.0, 23.1, 23.2, 23.2, 23.2, 23.3, 23.26]
48
- },
49
- 'DeepSeek-V3.2': {
50
  turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
51
- tokens: [45, 1420, 1690, 2450, 3520, 4680, 5560, 6420, 7350, 8280, 9150, 10020, 10890, 11750, 12610, 13470, 14320, 15170, 16020, 16870],
52
- costs: [0.0001, 0.0006, 0.0012, 0.0020, 0.0031, 0.0044, 0.0059, 0.0076, 0.0095, 0.0117, 0.0140, 0.0165, 0.0192, 0.0221, 0.0252, 0.0284, 0.0318, 0.0354, 0.0392, 0.0431],
53
- accuracy: [2.7, 5.4, 8.1, 10.8, 13.4, 15.8, 18.1, 20.2, 22.1, 23.8, 25.2, 26.3, 26.8, 27.0, 27.0, 27.0, 27.0, 27.0, 27.0, 27.00]
54
  }
55
- },
56
- '10k': {
57
  'GPT-5.2': {
58
  turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
59
- tokens: [48, 1380, 1650, 2380, 3420, 4550, 5410, 6250, 7150, 8050, 8890, 9730, 10570, 11400, 12230, 13060, 13880, 14700, 15520, 16340],
60
- costs: [0.0004, 0.0010, 0.0017, 0.0027, 0.0042, 0.0061, 0.0084, 0.0110, 0.0140, 0.0174, 0.0216, 0.0261, 0.0312, 0.0369, 0.0434, 0.0501, 0.0572, 0.0650, 0.0724, 0.0797],
61
- accuracy: [4.5, 9.0, 13.5, 18.0, 22.3, 26.3, 30.0, 33.4, 36.5, 39.3, 41.8, 43.5, 44.5, 44.9, 45.0, 45.0, 45.0, 45.0, 45.0, 44.99]
62
- },
63
- 'Claude-4.5-Sonnet': {
64
- turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
65
- tokens: [30, 1420, 1580, 2970, 4200, 5550, 6200, 6870, 7830, 8570, 9130, 9870, 10710, 11620, 12410, 13150, 13890, 14550, 15220, 15920],
66
- costs: [0.0004, 0.0025, 0.0049, 0.0089, 0.0140, 0.0205, 0.0277, 0.0357, 0.0447, 0.0545, 0.0649, 0.0760, 0.0882, 0.1014, 0.1154, 0.1303, 0.1460, 0.1624, 0.1796, 0.1976],
67
- accuracy: [7.7, 15.5, 23.2, 30.9, 38.4, 45.6, 52.6, 59.2, 65.5, 70.5, 74.2, 76.0, 77.0, 77.3, 77.3, 77.3, 77.3, 77.3, 77.3, 77.27]
68
- },
69
- 'Gemini-3-Flash': {
70
  turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
71
- tokens: [420, 1980, 2400, 3990, 5140, 6910, 8210, 9880, 11700, 13180, 15180, 17220, 18940, 20300, 21510, 22480, 23210, 23920, 24440, 24830],
72
- costs: [0.0001, 0.0004, 0.0007, 0.0012, 0.0019, 0.0028, 0.0037, 0.0048, 0.0061, 0.0074, 0.0090, 0.0107, 0.0125, 0.0142, 0.0160, 0.0181, 0.0202, 0.0222, 0.0243, 0.0263],
73
- accuracy: [4.4, 8.9, 13.3, 17.8, 22.0, 26.1, 30.0, 33.6, 37.0, 40.1, 42.4, 43.8, 44.3, 44.4, 44.4, 44.4, 44.4, 44.4, 44.4, 44.41]
74
- },
75
- 'GLM-4.6': {
76
  turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
77
- tokens: [54, 1400, 1625, 2545, 3196, 3860, 4273, 4888, 5645, 6474, 7330, 8036, 8576, 9120, 9697, 10210, 10678, 11192, 11662, 12080],
78
- costs: [0.0001, 0.0007, 0.0014, 0.0022, 0.0031, 0.0041, 0.0051, 0.0063, 0.0076, 0.0090, 0.0106, 0.0122, 0.0139, 0.0156, 0.0174, 0.0193, 0.0212, 0.0232, 0.0252, 0.0273],
79
- accuracy: [6.0, 12.1, 18.1, 24.2, 30.0, 35.6, 41.0, 46.0, 50.8, 55.0, 58.2, 59.7, 60.3, 60.4, 60.4, 60.4, 60.4, 60.4, 60.4, 60.42]
80
- },
81
- 'DeepSeek-V3.2': {
82
  turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
83
- tokens: [42, 1305, 1555, 2250, 3235, 4295, 5105, 5895, 6750, 7600, 8395, 9190, 9985, 10775, 11565, 12355, 13140, 13925, 14710, 15495],
84
- costs: [0.0001, 0.0005, 0.0011, 0.0018, 0.0028, 0.0040, 0.0054, 0.0070, 0.0087, 0.0107, 0.0129, 0.0152, 0.0176, 0.0203, 0.0231, 0.0261, 0.0292, 0.0325, 0.0360, 0.0396],
85
- accuracy: [6.1, 12.1, 18.2, 24.2, 30.1, 35.8, 41.2, 46.3, 51.2, 55.5, 58.8, 60.2, 60.6, 60.7, 60.7, 60.7, 60.7, 60.7, 60.7, 60.66]
86
  }
87
- },
88
- globem: {
89
  'GPT-5.2': {
90
- turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
91
- tokens: [51, 1476, 1796, 2544, 3738, 4927, 5784, 6682, 7563, 8577, 10445, 11612, 12837, 14129, 15460],
92
- costs: [0.0005, 0.0012, 0.0021, 0.0032, 0.0050, 0.0072, 0.0100, 0.0131, 0.0167, 0.0207, 0.0257, 0.0310, 0.0371, 0.0439, 0.0516],
93
- accuracy: [3.8, 7.7, 11.5, 15.3, 19.0, 22.6, 26.1, 29.4, 32.5, 35.4, 37.2, 38.0, 38.3, 38.4, 38.39]
94
- },
95
- 'Claude-4.5-Sonnet': {
96
- turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
97
- tokens: [33, 1527, 1715, 3193, 4513, 5965, 6664, 7387, 8417, 9214, 9823, 10620, 11533, 12516, 13378],
98
- costs: [0.0004, 0.0027, 0.0053, 0.0097, 0.0152, 0.0222, 0.0300, 0.0386, 0.0484, 0.0590, 0.0702, 0.0823, 0.0954, 0.1097, 0.1249],
99
- accuracy: [4.0, 8.0, 12.1, 16.1, 20.0, 23.9, 27.6, 31.2, 34.6, 37.0, 39.0, 40.0, 40.2, 40.2, 40.23]
100
- },
101
- 'Gemini-3-Flash': {
102
- turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
103
- tokens: [457, 2153, 2606, 4332, 5581, 7503, 8911, 10726, 12697, 14305, 16481, 18695, 20559, 22036, 23357],
104
- costs: [0.0001, 0.0004, 0.0007, 0.0013, 0.0020, 0.0030, 0.0040, 0.0052, 0.0066, 0.0080, 0.0097, 0.0116, 0.0135, 0.0154, 0.0173],
105
- accuracy: [3.5, 7.1, 10.6, 14.1, 17.5, 20.8, 24.0, 27.1, 29.9, 32.2, 33.8, 34.9, 35.2, 35.3, 35.29]
106
- },
107
- 'GLM-4.6': {
108
- turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
109
- tokens: [59, 1528, 1775, 2779, 3488, 4211, 4665, 5338, 6159, 7059, 7997, 8766, 9345, 9928, 10542],
110
- costs: [0.0001, 0.0008, 0.0015, 0.0024, 0.0034, 0.0045, 0.0056, 0.0069, 0.0083, 0.0098, 0.0115, 0.0133, 0.0151, 0.0170, 0.0190],
111
- accuracy: [4.2, 8.3, 12.5, 16.6, 20.7, 24.6, 28.4, 32.0, 35.4, 38.0, 40.0, 41.2, 41.5, 41.6, 41.61]
112
- },
113
- 'DeepSeek-V3.2': {
114
- turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
115
- tokens: [45, 1420, 1690, 2450, 3520, 4680, 5560, 6420, 7350, 8280, 9150, 10020, 10890, 11750, 12610],
116
- costs: [0.0001, 0.0006, 0.0012, 0.0020, 0.0031, 0.0044, 0.0059, 0.0076, 0.0095, 0.0117, 0.0140, 0.0165, 0.0192, 0.0221, 0.0252],
117
- accuracy: [3.8, 7.6, 11.5, 15.3, 19.0, 22.7, 26.2, 29.6, 32.8, 35.5, 37.2, 38.0, 38.1, 38.2, 38.16]
118
  }
119
  }
120
  },
121
-
122
- // Ranking Comparison Data
123
  ranking: {
124
- MIMIC: [
125
- { model: 'Claude4.5-Sonnet', bt_rank: 1, win_rate: 87.5, accuracy: 33.66, acc_rank: 1, is_proprietary: true },
126
- { model: 'Kimi-K2', bt_rank: 2, win_rate: 82.1, accuracy: 30.17, acc_rank: 2, is_proprietary: false },
127
- { model: 'GPT5.1', bt_rank: 3, win_rate: 78.3, accuracy: 30.10, acc_rank: 3, is_proprietary: true },
128
- { model: 'Gemini3-Flash', bt_rank: 4, win_rate: 75.0, accuracy: 29.28, acc_rank: 4, is_proprietary: true },
129
- { model: 'GPT5.2', bt_rank: 5, win_rate: 71.2, accuracy: 28.88, acc_rank: 5, is_proprietary: true },
130
- { model: 'DeepSeek-V3.2', bt_rank: 6, win_rate: 68.5, accuracy: 27.65, acc_rank: 6, is_proprietary: false },
131
- { model: 'GPT5-mini', bt_rank: 7, win_rate: 65.0, accuracy: 27.59, acc_rank: 7, is_proprietary: true },
132
- { model: 'GLM4.6', bt_rank: 8, win_rate: 61.8, accuracy: 23.84, acc_rank: 8, is_proprietary: false },
133
- { model: 'MiniMax-M2', bt_rank: 9, win_rate: 58.2, accuracy: 23.52, acc_rank: 9, is_proprietary: false },
134
- { model: 'Qwen3', bt_rank: 10, win_rate: 54.5, accuracy: 19.13, acc_rank: 11, is_proprietary: false },
135
- { model: 'Gemini2.5-Pro', bt_rank: 11, win_rate: 51.0, accuracy: 19.00, acc_rank: 12, is_proprietary: true },
136
- { model: 'Qwen3-Next-80B-A3B', bt_rank: 12, win_rate: 47.5, accuracy: 18.80, acc_rank: 10, is_proprietary: false },
137
- { model: 'Gemini2.5-Flash', bt_rank: 13, win_rate: 44.0, accuracy: 18.61, acc_rank: 13, is_proprietary: true },
138
- { model: 'Qwen3-4B', bt_rank: 14, win_rate: 40.5, accuracy: 16.93, acc_rank: 14, is_proprietary: false },
139
- { model: 'Gemini2.5-Flash-Lite', bt_rank: 15, win_rate: 37.0, accuracy: 16.64, acc_rank: 15, is_proprietary: true },
140
- { model: 'Qwen2.5-72B', bt_rank: 16, win_rate: 33.5, accuracy: 14.92, acc_rank: 16, is_proprietary: false },
141
- { model: 'Qwen2.5-14B-1M', bt_rank: 17, win_rate: 30.0, accuracy: 14.08, acc_rank: 18, is_proprietary: false },
142
- { model: 'Qwen2.5-14B', bt_rank: 18, win_rate: 26.5, accuracy: 14.15, acc_rank: 17, is_proprietary: false },
143
- { model: 'Qwen2.5-32B', bt_rank: 19, win_rate: 23.0, accuracy: 13.12, acc_rank: 19, is_proprietary: false },
144
- { model: 'Qwen2.5-7B', bt_rank: 20, win_rate: 19.5, accuracy: 10.79, acc_rank: 20, is_proprietary: false },
145
- { model: 'Qwen2.5-7B-1M', bt_rank: 21, win_rate: 16.0, accuracy: 9.08, acc_rank: 21, is_proprietary: false },
146
- { model: 'Llama3.3-70B', bt_rank: 22, win_rate: 12.5, accuracy: 7.30, acc_rank: 22, is_proprietary: false }
147
- ],
148
- '10K': [
149
- { model: 'Claude4.5-Sonnet', bt_rank: 1, win_rate: 92.0, accuracy: 69.26, acc_rank: 1, is_proprietary: true },
150
- { model: 'DeepSeek-V3.2', bt_rank: 2, win_rate: 85.5, accuracy: 49.41, acc_rank: 2, is_proprietary: false },
151
- { model: 'GLM4.6', bt_rank: 3, win_rate: 82.0, accuracy: 48.29, acc_rank: 3, is_proprietary: false },
152
- { model: 'GPT5.2', bt_rank: 4, win_rate: 78.0, accuracy: 43.11, acc_rank: 4, is_proprietary: true },
153
- { model: 'GPT5-mini', bt_rank: 5, win_rate: 74.5, accuracy: 41.56, acc_rank: 5, is_proprietary: true },
154
- { model: 'GPT5.1', bt_rank: 6, win_rate: 71.0, accuracy: 41.23, acc_rank: 6, is_proprietary: true },
155
- { model: 'Kimi-K2', bt_rank: 7, win_rate: 67.5, accuracy: 41.17, acc_rank: 7, is_proprietary: false },
156
- { model: 'Gemini3-Flash', bt_rank: 8, win_rate: 64.0, accuracy: 39.50, acc_rank: 8, is_proprietary: true },
157
- { model: 'Qwen3-Next-80B-A3B', bt_rank: 9, win_rate: 60.5, accuracy: 38.34, acc_rank: 9, is_proprietary: false },
158
- { model: 'MiniMax-M2', bt_rank: 10, win_rate: 57.0, accuracy: 35.74, acc_rank: 10, is_proprietary: false },
159
- { model: 'Qwen3-4B', bt_rank: 11, win_rate: 53.5, accuracy: 30.43, acc_rank: 11, is_proprietary: false },
160
- { model: 'Qwen3', bt_rank: 12, win_rate: 50.0, accuracy: 28.23, acc_rank: 12, is_proprietary: false },
161
- { model: 'Gemini2.5-Pro', bt_rank: 13, win_rate: 46.5, accuracy: 20.91, acc_rank: 13, is_proprietary: true },
162
- { model: 'Qwen2.5-72B', bt_rank: 14, win_rate: 43.0, accuracy: 20.79, acc_rank: 14, is_proprietary: false },
163
- { model: 'Qwen2.5-32B', bt_rank: 15, win_rate: 39.5, accuracy: 17.83, acc_rank: 15, is_proprietary: false },
164
- { model: 'Qwen2.5-14B-1M', bt_rank: 16, win_rate: 36.0, accuracy: 16.67, acc_rank: 16, is_proprietary: false },
165
- { model: 'Qwen2.5-14B', bt_rank: 17, win_rate: 32.5, accuracy: 14.65, acc_rank: 17, is_proprietary: false },
166
- { model: 'Gemini2.5-Flash-Lite', bt_rank: 18, win_rate: 29.0, accuracy: 14.37, acc_rank: 18, is_proprietary: true },
167
- { model: 'Gemini2.5-Flash', bt_rank: 19, win_rate: 25.5, accuracy: 12.61, acc_rank: 19, is_proprietary: true },
168
- { model: 'Qwen2.5-7B', bt_rank: 20, win_rate: 22.0, accuracy: 7.53, acc_rank: 20, is_proprietary: false },
169
- { model: 'Qwen2.5-7B-1M', bt_rank: 21, win_rate: 18.5, accuracy: 6.68, acc_rank: 21, is_proprietary: false },
170
- { model: 'Llama3.3-70B', bt_rank: 22, win_rate: 15.0, accuracy: 6.51, acc_rank: 22, is_proprietary: false }
171
- ],
172
- GLOBEM: [
173
- { model: 'GLM4.6', bt_rank: 1, win_rate: 78.0, accuracy: 39.77, acc_rank: 1, is_proprietary: false },
174
- { model: 'Claude4.5-Sonnet', bt_rank: 2, win_rate: 75.5, accuracy: 39.54, acc_rank: 2, is_proprietary: true },
175
- { model: 'GPT5.2', bt_rank: 3, win_rate: 72.0, accuracy: 38.39, acc_rank: 3, is_proprietary: true },
176
- { model: 'DeepSeek-V3.2', bt_rank: 4, win_rate: 69.5, accuracy: 38.39, acc_rank: 4, is_proprietary: false },
177
- { model: 'Kimi-K2', bt_rank: 5, win_rate: 66.0, accuracy: 37.01, acc_rank: 5, is_proprietary: false },
178
- { model: 'MiniMax-M2', bt_rank: 6, win_rate: 63.5, accuracy: 36.90, acc_rank: 6, is_proprietary: false },
179
- { model: 'GPT5.1', bt_rank: 7, win_rate: 61.0, accuracy: 36.76, acc_rank: 7, is_proprietary: true },
180
- { model: 'Qwen3', bt_rank: 8, win_rate: 58.0, accuracy: 36.32, acc_rank: 8, is_proprietary: false },
181
- { model: 'Gemini3-Flash', bt_rank: 9, win_rate: 55.5, accuracy: 35.46, acc_rank: 9, is_proprietary: true },
182
- { model: 'Gemini2.5-Pro', bt_rank: 10, win_rate: 52.0, accuracy: 34.60, acc_rank: 10, is_proprietary: true },
183
- { model: 'Qwen3-Next-80B-A3B', bt_rank: 11, win_rate: 49.5, accuracy: 34.14, acc_rank: 11, is_proprietary: false },
184
- { model: 'GPT5-mini', bt_rank: 12, win_rate: 46.0, accuracy: 33.91, acc_rank: 12, is_proprietary: true },
185
- { model: 'Gemini2.5-Flash', bt_rank: 13, win_rate: 43.5, accuracy: 28.62, acc_rank: 13, is_proprietary: true },
186
- { model: 'Qwen2.5-7B-1M', bt_rank: 14, win_rate: 40.0, accuracy: 27.15, acc_rank: 14, is_proprietary: false },
187
- { model: 'Qwen2.5-72B', bt_rank: 15, win_rate: 37.5, accuracy: 27.13, acc_rank: 15, is_proprietary: false },
188
- { model: 'Qwen3-4B', bt_rank: 16, win_rate: 34.0, accuracy: 26.90, acc_rank: 16, is_proprietary: false },
189
- { model: 'Qwen2.5-14B-1M', bt_rank: 17, win_rate: 31.5, accuracy: 26.47, acc_rank: 17, is_proprietary: false },
190
- { model: 'Qwen2.5-14B', bt_rank: 18, win_rate: 28.0, accuracy: 26.13, acc_rank: 18, is_proprietary: false },
191
- { model: 'Qwen2.5-32B', bt_rank: 19, win_rate: 25.5, accuracy: 25.90, acc_rank: 19, is_proprietary: false },
192
- { model: 'Qwen2.5-7B', bt_rank: 20, win_rate: 22.0, accuracy: 25.64, acc_rank: 20, is_proprietary: false },
193
- { model: 'Gemini2.5-Flash-Lite', bt_rank: 21, win_rate: 19.5, accuracy: 25.52, acc_rank: 21, is_proprietary: true },
194
- { model: 'Llama3.3-70B', bt_rank: 22, win_rate: 15.0, accuracy: 22.65, acc_rank: 22, is_proprietary: false }
195
  ]
196
  },
197
-
198
- // Turn Distribution Data (distribution: percentage in bins [0-10, 10-20, ..., 90-100])
199
  turn: {
200
- mimic: [
201
- { model: 'DeepSeekV3.2', median: 21, distribution: [0, 0, 2, 8, 15, 22, 25, 18, 7, 3] },
202
- { model: 'GLM4.6', median: 20, distribution: [0, 0, 3, 10, 18, 25, 22, 14, 5, 3] },
203
- { model: 'Gemini3-Flash', median: 18, distribution: [0, 0, 3, 10, 18, 25, 22, 14, 5, 3] },
204
- { model: 'GPT5.1', median: 16, distribution: [0, 1, 5, 12, 22, 28, 18, 9, 3, 2] },
205
- { model: 'Kimi-K2', median: 15, distribution: [0, 1, 6, 15, 25, 28, 16, 6, 2, 1] },
206
- { model: 'Claude4.5-Sonnet', median: 14, distribution: [0, 0, 5, 15, 25, 30, 15, 7, 2, 1] },
207
- { model: 'MiniMax-M2', median: 14, distribution: [0, 2, 8, 18, 28, 25, 12, 5, 1, 1] },
208
- { model: 'GPT5.2', median: 12, distribution: [0, 2, 8, 20, 30, 25, 10, 3, 1, 1] },
209
- { model: 'Qwen3-30B-A3B', median: 12, distribution: [0, 3, 10, 22, 30, 22, 9, 3, 1, 0] },
210
- { model: 'Qwen3-Next-80B-A3B', median: 11, distribution: [1, 4, 12, 25, 30, 18, 7, 2, 1, 0] },
211
- { model: 'Qwen2.5-72B', median: 10, distribution: [1, 5, 15, 28, 28, 15, 5, 2, 1, 0] },
212
- { model: 'Qwen3-4B', median: 9, distribution: [2, 6, 18, 30, 25, 12, 5, 1, 1, 0] },
213
- { model: 'GPT5-mini', median: 8, distribution: [2, 8, 18, 28, 25, 12, 5, 1, 1, 0] },
214
- { model: 'Llama3.3-70B', median: 5, distribution: [12, 25, 30, 20, 8, 3, 1, 1, 0, 0] }
215
- ],
216
- '10k': [
217
- { model: 'GLM4.6', median: 22, distribution: [0, 0, 2, 5, 12, 20, 25, 22, 10, 4] },
218
- { model: 'Gemini3-Flash', median: 22, distribution: [0, 0, 2, 5, 12, 20, 25, 22, 10, 4] },
219
- { model: 'DeepSeekV3.2', median: 20, distribution: [0, 0, 3, 10, 18, 25, 22, 14, 5, 3] },
220
- { model: 'Kimi-K2', median: 17, distribution: [0, 1, 4, 12, 20, 28, 20, 10, 3, 2] },
221
- { model: 'MiniMax-M2', median: 17, distribution: [0, 1, 5, 14, 24, 28, 18, 7, 2, 1] },
222
- { model: 'Claude4.5-Sonnet', median: 16, distribution: [0, 1, 5, 12, 22, 28, 18, 9, 3, 2] },
223
- { model: 'Qwen3-30B-A3B', median: 16, distribution: [0, 1, 5, 12, 22, 28, 18, 9, 3, 2] },
224
- { model: 'GPT5.2', median: 14, distribution: [0, 2, 8, 18, 28, 25, 12, 5, 1, 1] },
225
- { model: 'Qwen2.5-72B', median: 14, distribution: [0, 2, 8, 18, 28, 25, 12, 5, 1, 1] },
226
- { model: 'GPT5.1', median: 13, distribution: [0, 2, 8, 20, 28, 24, 12, 4, 1, 1] },
227
- { model: 'Qwen3-Next-80B-A3B', median: 12, distribution: [0, 2, 10, 22, 30, 22, 10, 3, 1, 0] },
228
- { model: 'Qwen3-4B', median: 12, distribution: [0, 3, 10, 22, 30, 22, 9, 3, 1, 0] },
229
- { model: 'GPT5-mini', median: 9, distribution: [2, 6, 18, 30, 25, 12, 5, 1, 1, 0] },
230
- { model: 'Llama3.3-70B', median: 6, distribution: [10, 22, 30, 22, 10, 4, 1, 1, 0, 0] }
231
- ],
232
- globem: [
233
- { model: 'GLM4.6', median: 22, distribution: [0, 0, 2, 6, 14, 22, 26, 20, 7, 3] },
234
- { model: 'DeepSeekV3.2', median: 20, distribution: [0, 0, 3, 10, 18, 25, 22, 14, 5, 3] },
235
- { model: 'Qwen3-30B-A3B', median: 20, distribution: [0, 0, 3, 10, 18, 25, 22, 14, 5, 3] },
236
- { model: 'Kimi-K2', median: 17, distribution: [0, 1, 4, 12, 20, 28, 20, 10, 3, 2] },
237
- { model: 'MiniMax-M2', median: 17, distribution: [0, 1, 5, 14, 24, 28, 18, 7, 2, 1] },
238
- { model: 'Gemini3-Flash', median: 15, distribution: [0, 1, 6, 15, 25, 28, 16, 6, 2, 1] },
239
- { model: 'Claude4.5-Sonnet', median: 13, distribution: [0, 2, 10, 20, 28, 25, 10, 4, 1, 0] },
240
- { model: 'GPT5.1', median: 13, distribution: [0, 2, 10, 20, 28, 25, 10, 4, 1, 0] },
241
- { model: 'Qwen3-Next-80B-A3B', median: 12, distribution: [0, 2, 10, 22, 30, 22, 10, 3, 1, 0] },
242
- { model: 'Qwen3-4B', median: 12, distribution: [0, 3, 10, 22, 30, 22, 9, 3, 1, 0] },
243
- { model: 'GPT5.2', median: 11, distribution: [1, 4, 12, 25, 30, 18, 7, 2, 1, 0] },
244
- { model: 'Qwen2.5-72B', median: 14, distribution: [0, 2, 8, 18, 28, 25, 12, 5, 1, 1] },
245
- { model: 'GPT5-mini', median: 8, distribution: [3, 10, 20, 30, 22, 10, 3, 1, 1, 0] },
246
- { model: 'Llama3.3-70B', median: 6, distribution: [10, 22, 32, 22, 9, 3, 1, 1, 0, 0] }
247
  ]
248
  },
249
-
250
- // Entropy Analysis Data
251
  entropy: {
252
- mimic: {
253
- 'GPT-5.2': { entropy: [0.72, 0.78, 0.82, 0.68, 0.75, 0.88, 0.65, 0.79, 0.71, 0.84], coverage: [0.08, 0.10, 0.09, 0.07, 0.09, 0.11, 0.06, 0.10, 0.08, 0.10], accuracy: [30, 35, 40, 25, 32, 45, 20, 28, 31, 38] },
254
- 'Claude-4.5-Sonnet': { entropy: [0.85, 0.88, 0.92, 0.80, 0.87, 0.78, 0.82, 0.90, 0.86, 0.89], coverage: [0.12, 0.14, 0.13, 0.10, 0.13, 0.09, 0.11, 0.15, 0.12, 0.14], accuracy: [45, 50, 55, 40, 48, 35, 42, 52, 47, 51] },
255
- 'Gemini-3-Flash': { entropy: [0.70, 0.75, 0.68, 0.72, 0.80, 0.65, 0.78, 0.72, 0.69, 0.76], coverage: [0.06, 0.09, 0.07, 0.08, 0.10, 0.05, 0.09, 0.07, 0.06, 0.08], accuracy: [28, 32, 25, 30, 38, 22, 35, 28, 26, 33] },
256
- 'GLM-4.6': { entropy: [0.78, 0.82, 0.75, 0.80, 0.88, 0.72, 0.85, 0.78, 0.76, 0.83], coverage: [0.09, 0.11, 0.08, 0.10, 0.13, 0.07, 0.12, 0.09, 0.08, 0.11], accuracy: [32, 40, 28, 35, 45, 25, 42, 32, 30, 38] },
257
- 'DeepSeek-V3.2': { entropy: [0.82, 0.85, 0.78, 0.88, 0.75, 0.90, 0.80, 0.85, 0.81, 0.87], coverage: [0.10, 0.12, 0.09, 0.14, 0.08, 0.15, 0.10, 0.12, 0.10, 0.13], accuracy: [38, 42, 32, 48, 28, 52, 35, 42, 36, 44] }
258
- },
259
- '10k': {
260
- 'GPT-5.2': { entropy: [0.85, 0.88, 0.92, 0.82, 0.87, 0.94, 0.80, 0.89, 0.84, 0.91], coverage: [0.35, 0.42, 0.48, 0.32, 0.40, 0.52, 0.28, 0.44, 0.38, 0.46], accuracy: [35, 40, 45, 30, 38, 50, 25, 42, 36, 44] },
261
- 'Claude-4.5-Sonnet': { entropy: [0.92, 0.95, 0.98, 0.90, 0.94, 0.88, 0.91, 0.96, 0.93, 0.95], coverage: [0.55, 0.62, 0.68, 0.50, 0.58, 0.45, 0.52, 0.65, 0.56, 0.60], accuracy: [65, 72, 78, 60, 68, 55, 62, 75, 66, 70] },
262
- 'Gemini-3-Flash': { entropy: [0.82, 0.86, 0.80, 0.84, 0.90, 0.78, 0.88, 0.83, 0.81, 0.87], coverage: [0.28, 0.35, 0.25, 0.32, 0.42, 0.22, 0.38, 0.30, 0.26, 0.36], accuracy: [35, 40, 30, 38, 48, 28, 45, 36, 32, 42] },
263
- 'GLM-4.6': { entropy: [0.88, 0.92, 0.85, 0.90, 0.95, 0.82, 0.93, 0.88, 0.86, 0.91], coverage: [0.42, 0.50, 0.38, 0.46, 0.55, 0.35, 0.52, 0.44, 0.40, 0.48], accuracy: [50, 58, 45, 52, 62, 40, 56, 50, 46, 54] },
264
- 'DeepSeek-V3.2': { entropy: [0.90, 0.93, 0.87, 0.95, 0.85, 0.97, 0.89, 0.94, 0.88, 0.92], coverage: [0.48, 0.55, 0.42, 0.60, 0.38, 0.65, 0.50, 0.57, 0.45, 0.53], accuracy: [52, 60, 48, 65, 42, 70, 55, 62, 50, 58] }
265
- },
266
- globem: {
267
- 'GPT-5.2': { entropy: [0.75, 0.80, 0.85, 0.72, 0.78, 0.88, 0.70, 0.82, 0.76, 0.84], coverage: [0.65, 0.72, 0.78, 0.60, 0.70, 0.85, 0.55, 0.75, 0.68, 0.80], accuracy: [32, 38, 42, 28, 35, 48, 25, 40, 34, 44] },
268
- 'Claude-4.5-Sonnet': { entropy: [0.82, 0.86, 0.90, 0.78, 0.84, 0.75, 0.80, 0.88, 0.83, 0.87], coverage: [0.78, 0.85, 0.92, 0.72, 0.82, 0.68, 0.75, 0.88, 0.80, 0.86], accuracy: [38, 45, 50, 35, 42, 32, 38, 48, 40, 46] },
269
- 'Gemini-3-Flash': { entropy: [0.72, 0.77, 0.70, 0.75, 0.82, 0.68, 0.80, 0.74, 0.71, 0.78], coverage: [0.55, 0.65, 0.50, 0.58, 0.72, 0.45, 0.68, 0.60, 0.52, 0.66], accuracy: [30, 36, 28, 34, 42, 26, 40, 32, 28, 38] },
270
- 'GLM-4.6': { entropy: [0.80, 0.84, 0.78, 0.82, 0.90, 0.75, 0.87, 0.81, 0.79, 0.85], coverage: [0.72, 0.80, 0.68, 0.75, 0.88, 0.62, 0.85, 0.74, 0.70, 0.82], accuracy: [38, 45, 35, 42, 52, 30, 48, 40, 36, 46] },
271
- 'DeepSeek-V3.2': { entropy: [0.84, 0.88, 0.80, 0.90, 0.78, 0.92, 0.82, 0.87, 0.83, 0.89], coverage: [0.75, 0.82, 0.70, 0.88, 0.65, 0.92, 0.78, 0.84, 0.72, 0.86], accuracy: [36, 42, 32, 48, 28, 52, 38, 44, 34, 46] }
272
  }
273
  },
274
-
275
- // Probing Results Data
276
  probing: {
277
  byTurn: {
278
- mimic: {
279
- 'Qwen2.5-32B': { turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-12.5, -11.8, -11.2, -10.5, -10.0, -9.5, -9.2, -8.8, -8.5, -8.2], sem: [0.8, 0.7, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.3] },
280
- 'Qwen2.5-72B': { turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-11.8, -11.2, -10.5, -9.8, -9.2, -8.8, -8.4, -8.0, -7.7, -7.5], sem: [0.7, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2] },
281
- 'Qwen3-4B': { turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-13.2, -12.5, -11.8, -11.0, -10.2, -9.5, -9.0, -8.5, -8.2, -7.8], sem: [0.9, 0.8, 0.7, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3] },
282
- 'Qwen3-30B-A3B': { turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-12.0, -11.2, -10.5, -9.8, -9.0, -8.5, -8.0, -7.6, -7.2, -7.0], sem: [0.7, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.2, 0.2] },
283
- 'Qwen3-Next-80B-A3B': { turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-10.5, -9.8, -9.2, -8.5, -8.0, -7.5, -7.2, -6.8, -6.5, -6.2], sem: [0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.2] }
284
- },
285
- globem: {
286
- 'Qwen2.5-32B': { turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-11.5, -10.8, -10.2, -9.5, -9.0, -8.5, -8.2, -7.8, -7.5, -7.2], sem: [0.7, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2] },
287
- 'Qwen2.5-72B': { turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-10.8, -10.2, -9.5, -8.8, -8.2, -7.8, -7.4, -7.0, -6.7, -6.5], sem: [0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.2] },
288
- 'Qwen3-4B': { turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-12.2, -11.5, -10.8, -10.0, -9.2, -8.5, -8.0, -7.5, -7.2, -6.8], sem: [0.8, 0.7, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.3] },
289
- 'Qwen3-30B-A3B': { turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-11.0, -10.2, -9.5, -8.8, -8.0, -7.5, -7.0, -6.6, -6.2, -6.0], sem: [0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.2, 0.2, 0.2] },
290
- 'Qwen3-Next-80B-A3B': { turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-9.5, -8.8, -8.2, -7.5, -7.0, -6.5, -6.2, -5.8, -5.5, -5.2], sem: [0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.2, 0.2] }
291
- },
292
- '10k': {
293
- 'Qwen2.5-32B': { turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-12.0, -11.3, -10.7, -10.0, -9.5, -9.0, -8.7, -8.3, -8.0, -7.7], sem: [0.8, 0.7, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.3] },
294
- 'Qwen2.5-72B': { turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-11.3, -10.7, -10.0, -9.3, -8.7, -8.3, -7.9, -7.5, -7.2, -7.0], sem: [0.7, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.2, 0.2] },
295
- 'Qwen3-4B': { turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-12.7, -12.0, -11.3, -10.5, -9.7, -9.0, -8.5, -8.0, -7.7, -7.3], sem: [0.9, 0.8, 0.7, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3] },
296
- 'Qwen3-30B-A3B': { turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-11.5, -10.7, -10.0, -9.3, -8.5, -8.0, -7.5, -7.1, -6.7, -6.5], sem: [0.7, 0.6, 0.5, 0.5, 0.4, 0.3, 0.3, 0.3, 0.2, 0.2] },
297
- 'Qwen3-Next-80B-A3B': { turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-10.0, -9.3, -8.7, -8.0, -7.5, -7.0, -6.7, -6.3, -6.0, -5.7], sem: [0.6, 0.5, 0.5, 0.4, 0.3, 0.3, 0.3, 0.2, 0.2, 0.2] }
298
  }
299
  },
300
  byProgress: {
301
- mimic: {
302
- 'Qwen2.5-32B': { progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-12.5, -12.0, -11.5, -11.0, -10.5, -10.0, -9.5, -9.0, -8.5, -8.0], sem: [0.8, 0.7, 0.7, 0.6, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3] },
303
- 'Qwen2.5-72B': { progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-12.0, -11.5, -11.0, -10.5, -9.8, -9.2, -8.7, -8.2, -7.8, -7.5], sem: [0.7, 0.7, 0.6, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3] },
304
- 'Qwen3-4B': { progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-13.0, -12.5, -12.0, -11.5, -10.8, -10.0, -9.3, -8.7, -8.2, -7.8], sem: [0.9, 0.8, 0.8, 0.7, 0.6, 0.6, 0.5, 0.5, 0.4, 0.4] },
305
- 'Qwen3-30B-A3B': { progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-12.2, -11.7, -11.0, -10.3, -9.5, -8.8, -8.2, -7.6, -7.2, -6.8], sem: [0.7, 0.7, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.3] },
306
- 'Qwen3-Next-80B-A3B': { progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-10.8, -10.2, -9.5, -8.8, -8.0, -7.5, -7.0, -6.5, -6.2, -5.8], sem: [0.6, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2] }
307
- },
308
- globem: {
309
- 'Qwen2.5-32B': { progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-11.5, -11.0, -10.5, -10.0, -9.5, -9.0, -8.5, -8.0, -7.5, -7.0], sem: [0.7, 0.7, 0.6, 0.5, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3] },
310
- 'Qwen2.5-72B': { progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-11.0, -10.5, -10.0, -9.5, -8.8, -8.2, -7.7, -7.2, -6.8, -6.5], sem: [0.6, 0.6, 0.5, 0.5, 0.4, 0.4, 0.4, 0.3, 0.3, 0.2] },
311
- 'Qwen3-4B': { progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-12.0, -11.5, -11.0, -10.5, -9.8, -9.0, -8.3, -7.7, -7.2, -6.8], sem: [0.8, 0.7, 0.7, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3] },
312
- 'Qwen3-30B-A3B': { progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-11.2, -10.7, -10.0, -9.3, -8.5, -7.8, -7.2, -6.6, -6.2, -5.8], sem: [0.6, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.2, 0.2] },
313
- 'Qwen3-Next-80B-A3B': { progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-9.8, -9.2, -8.5, -7.8, -7.0, -6.5, -6.0, -5.5, -5.2, -4.8], sem: [0.5, 0.5, 0.4, 0.4, 0.4, 0.3, 0.3, 0.2, 0.2, 0.2] }
314
- },
315
- '10k': {
316
- 'Qwen2.5-32B': { progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-12.0, -11.5, -11.0, -10.5, -10.0, -9.5, -9.0, -8.5, -8.0, -7.5], sem: [0.8, 0.7, 0.7, 0.6, 0.5, 0.5, 0.5, 0.4, 0.4, 0.3] },
317
- 'Qwen2.5-72B': { progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-11.5, -11.0, -10.5, -10.0, -9.3, -8.7, -8.2, -7.7, -7.3, -7.0], sem: [0.7, 0.6, 0.6, 0.5, 0.5, 0.4, 0.4, 0.4, 0.3, 0.3] },
318
- 'Qwen3-4B': { progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-12.5, -12.0, -11.5, -11.0, -10.3, -9.5, -8.8, -8.2, -7.7, -7.3], sem: [0.9, 0.8, 0.7, 0.7, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3] },
319
- 'Qwen3-30B-A3B': { progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-11.7, -11.2, -10.5, -9.8, -9.0, -8.3, -7.7, -7.1, -6.7, -6.3], sem: [0.7, 0.6, 0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.2] },
320
- 'Qwen3-Next-80B-A3B': { progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-10.3, -9.7, -9.0, -8.3, -7.5, -7.0, -6.5, -6.0, -5.7, -5.3], sem: [0.6, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.2] }
321
  }
322
  }
323
  },
324
-
325
- // Probing model colors
326
  probingColors: {
327
  'Qwen2.5-32B': '#4A90D9',
328
  'Qwen2.5-72B': '#1A5FB4',
 
1
+ // DDR-Bench Visualization Data - Auto-generated from original data sources
2
+ // Generated from Python analysis scripts
3
 
4
  const DDR_DATA = {
 
5
  modelColors: {
6
  'GPT-5.2': '#00C853',
7
  'Claude-4.5-Sonnet': '#FF6D00',
 
11
  'Qwen3-Next-80B-A3B': '#FFC107',
12
  'Kimi-K2': '#FFA500',
13
  'MiniMax-M2': '#20B2AA',
 
14
  'Qwen2.5-32B': '#4A90D9',
15
  'Qwen2.5-72B': '#1A5FB4',
16
  'Qwen3-4B': '#57E389',
17
  'Qwen3-30B-A3B': '#26A269',
18
  },
 
 
19
  scaling: {
20
+ 'mimic': {
21
  'GPT-5.2': {
22
  turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
23
+ tokens: [51, 1475, 1796, 2543, 3737, 4926, 5784, 6681, 7562, 8577, 10444, 11611, 12837, 14128, 15459, 16839, 17760, 18642, 19455, 20193],
24
+ costs: [0.0005, 0.0012, 0.0021, 0.0032, 0.005, 0.0072, 0.01, 0.0131, 0.0167, 0.0207, 0.0257, 0.031, 0.0371, 0.0439, 0.0516, 0.0595, 0.068, 0.0772, 0.086, 0.0947],
25
+ accuracy: [2.02, 3.99, 5.9, 7.75, 9.55, 11.29, 12.97, 14.59, 16.14, 17.62, 19.03, 20.36, 21.62, 22.78, 23.85, 24.82, 25.68, 26.4, 26.96, 27.26]
26
+ }
27
+ ,'Claude-4.5-Sonnet': {
 
 
 
 
 
 
28
  turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
29
+ tokens: [33, 1527, 1714, 3192, 4513, 5965, 6664, 7386, 8417, 9214, 9822, 10619, 11532, 12516, 13378, 14190, 15000, 15722, 16457, 17217],
30
+ costs: [0.0004, 0.0027, 0.0053, 0.0097, 0.0152, 0.0222, 0.03, 0.0386, 0.0484, 0.059, 0.0702, 0.0823, 0.0954, 0.1097, 0.1249, 0.141, 0.158, 0.1758, 0.1944, 0.2138],
31
+ accuracy: [2.55, 5.02, 7.44, 9.78, 12.05, 14.24, 16.36, 18.4, 20.35, 22.22, 23.99, 25.68, 27.25, 28.72, 30.07, 31.3, 32.37, 33.28, 33.99, 34.37]
32
+ }
33
+ ,'Gemini-3-Flash': {
34
  turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
35
+ tokens: [457, 2153, 2605, 4331, 5580, 7502, 8911, 10725, 12697, 14305, 16480, 18695, 20559, 22036, 23357, 24415, 25207, 25977, 26542, 26964],
36
+ costs: [0.0001, 0.0004, 0.0007, 0.0013, 0.002, 0.003, 0.004, 0.0052, 0.0066, 0.008, 0.0097, 0.0116, 0.0135, 0.0154, 0.0173, 0.0196, 0.0219, 0.024, 0.0263, 0.0284],
37
+ accuracy: [1.85, 3.65, 5.4, 7.09, 8.74, 10.33, 11.87, 13.35, 14.77, 16.12, 17.41, 18.63, 19.78, 20.84, 21.82, 22.71, 23.49, 24.15, 24.66, 24.94]
38
+ }
39
+ ,'GLM-4.6': {
40
  turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
41
+ tokens: [59, 1528, 1774, 2778, 3488, 4210, 4664, 5337, 6158, 7059, 7996, 8765, 9344, 9928, 10542, 11095, 11598, 12149, 12657, 13099],
42
+ costs: [0.0001, 0.0006, 0.001, 0.0017, 0.0026, 0.0037, 0.0049, 0.0063, 0.0079, 0.0097, 0.0118, 0.014, 0.0164, 0.019, 0.0217, 0.0245, 0.0275, 0.0306, 0.0337, 0.0369],
43
+ accuracy: [1.72, 3.4, 5.03, 6.62, 8.15, 9.64, 11.07, 12.45, 13.77, 15.04, 16.24, 17.38, 18.44, 19.44, 20.35, 21.18, 21.91, 22.52, 23.0, 23.26]
44
  }
45
+ }
46
+ ,'10k': {
47
  'GPT-5.2': {
48
  turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
49
+ tokens: [56, 318, 1162, 1828, 2823, 3790, 4901, 5967, 6858, 7902, 8585, 9384, 10024, 10939, 11581, 12226, 12917, 13514, 14106, 14651],
50
+ costs: [0.0005, 0.0007, 0.0013, 0.0021, 0.0037, 0.0057, 0.0081, 0.0113, 0.015, 0.0199, 0.0243, 0.0298, 0.0343, 0.0398, 0.0454, 0.0521, 0.0575, 0.0631, 0.0713, 0.0774],
51
+ accuracy: [3.33, 6.58, 9.73, 12.8, 15.77, 18.64, 21.41, 24.08, 26.64, 29.08, 31.41, 33.61, 35.67, 37.6, 39.37, 40.97, 42.38, 43.57, 44.49, 44.99]
52
+ }
53
+ ,'Claude-4.5-Sonnet': {
 
 
 
 
 
 
54
  turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
55
+ tokens: [40, 361, 1119, 1794, 2561, 3410, 4413, 5431, 6339, 7208, 7983, 8720, 9502, 10235, 10978, 11679, 12286, 12899, 13469, 14050],
56
+ costs: [0.0005, 0.0017, 0.0034, 0.006, 0.0094, 0.0138, 0.0192, 0.0256, 0.0331, 0.0414, 0.0506, 0.0606, 0.0714, 0.083, 0.0955, 0.1087, 0.1226, 0.1371, 0.1523, 0.1682],
57
+ accuracy: [5.72, 11.3, 16.72, 21.98, 27.08, 32.02, 36.78, 41.36, 45.75, 49.95, 53.94, 57.72, 61.27, 64.57, 67.61, 70.36, 72.78, 74.83, 76.41, 77.27]
58
+ }
59
+ ,'Gemini-3-Flash': {
60
  turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
61
+ tokens: [561, 1108, 2384, 3420, 4473, 5692, 7504, 9142, 10958, 12616, 14312, 15667, 16667, 17523, 18404, 19118, 19469, 19722, 19908, 20077],
62
+ costs: [0.0001, 0.0004, 0.0008, 0.0013, 0.002, 0.0028, 0.004, 0.0052, 0.0066, 0.008, 0.0098, 0.0111, 0.013, 0.0149, 0.0171, 0.0192, 0.0224, 0.0251, 0.0246, 0.0275],
63
+ accuracy: [3.29, 6.49, 9.61, 12.63, 15.56, 18.4, 21.14, 23.77, 26.3, 28.71, 31.0, 33.18, 35.21, 37.11, 38.86, 40.44, 41.83, 43.01, 43.91, 44.41]
64
+ }
65
+ ,'GLM-4.6': {
66
  turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
67
+ tokens: [58, 339, 973, 1327, 1838, 2223, 2604, 3020, 3477, 3927, 4339, 4764, 5206, 5662, 6056, 6495, 6894, 7329, 7709, 8124],
68
+ costs: [0.0001, 0.0003, 0.0005, 0.0009, 0.0013, 0.0019, 0.0026, 0.0034, 0.0042, 0.0053, 0.0064, 0.0076, 0.0089, 0.0104, 0.012, 0.0136, 0.0154, 0.0173, 0.0193, 0.0214],
69
+ accuracy: [4.47, 8.83, 13.07, 17.19, 21.18, 25.03, 28.76, 32.34, 35.78, 39.06, 42.18, 45.13, 47.91, 50.49, 52.87, 55.02, 56.91, 58.51, 59.74, 60.42]
70
  }
71
+ }
72
+ ,'globem': {
73
  'GPT-5.2': {
74
+ turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
75
+ tokens: [58, 875, 1559, 2562, 3477, 4756, 6053, 7393, 8608, 10218, 11988, 13748, 15107, 16631, 17672, 18592, 19144, 19498, 19696, 19878],
76
+ costs: [0.0005, 0.0013, 0.002, 0.0032, 0.0048, 0.007, 0.0098, 0.0135, 0.0178, 0.0236, 0.0294, 0.0385, 0.0468, 0.0562, 0.0652, 0.0767, 0.0879, 0.1002, 0.1082, 0.1238],
77
+ accuracy: [2.84, 5.61, 8.31, 10.92, 13.45, 15.91, 18.27, 20.55, 22.73, 24.82, 26.8, 28.68, 30.44, 32.08, 33.59, 34.96, 36.16, 37.18, 37.96, 38.39]
78
+ }
79
+ ,'Claude-4.5-Sonnet': {
80
+ turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
81
+ tokens: [54, 930, 2128, 3337, 4579, 5649, 6915, 8193, 9731, 11405, 13210, 15065, 17143, 19238, 21188, 23277, 25394, 27614, 30130, 32526],
82
+ costs: [0.0008, 0.0032, 0.006, 0.0099, 0.0152, 0.0216, 0.0296, 0.0393, 0.0507, 0.0638, 0.0789, 0.096, 0.1155, 0.1372, 0.1611, 0.1873, 0.2158, 0.247, 0.2805, 0.3124],
83
+ accuracy: [2.98, 5.88, 8.7, 11.44, 14.1, 16.67, 19.15, 21.53, 23.82, 26.01, 28.09, 30.05, 31.9, 33.62, 35.2, 36.63, 37.89, 38.96, 39.78, 40.23]
84
+ }
85
+ ,'Gemini-3-Flash': {
86
+ turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
87
+ tokens: [549, 1839, 3441, 4928, 6260, 8046, 9776, 11341, 13250, 14825, 16374, 18786, 20565, 24046, 25972, 28004, 30001, 31784, 33556, 35526],
88
+ costs: [0.0002, 0.0005, 0.0009, 0.0015, 0.0021, 0.0029, 0.0038, 0.0049, 0.0061, 0.0074, 0.0089, 0.0105, 0.0123, 0.0144, 0.0166, 0.019, 0.0213, 0.0235, 0.0263, 0.0292],
89
+ accuracy: [2.61, 5.16, 7.63, 10.04, 12.37, 14.62, 16.8, 18.89, 20.9, 22.81, 24.64, 26.36, 27.98, 29.49, 30.88, 32.13, 33.24, 34.17, 34.9, 35.29]
90
+ }
91
+ ,'GLM-4.6': {
92
+ turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
93
+ tokens: [58, 903, 1849, 2854, 3851, 4830, 5779, 6760, 7791, 8817, 10040, 11362, 12855, 14434, 16221, 18101, 20062, 22187, 24211, 26186],
94
+ costs: [0.0001, 0.0005, 0.001, 0.0017, 0.0027, 0.004, 0.0055, 0.0072, 0.0092, 0.0115, 0.0141, 0.017, 0.0203, 0.0238, 0.028, 0.0325, 0.0372, 0.0423, 0.0482, 0.0544],
95
+ accuracy: [3.08, 6.08, 9.0, 11.84, 14.58, 17.24, 19.8, 22.27, 24.64, 26.9, 29.05, 31.08, 32.99, 34.77, 36.41, 37.89, 39.19, 40.29, 41.14, 41.61]
 
 
 
 
 
 
96
  }
97
  }
98
  },
 
 
99
  ranking: {
100
+ 'MIMIC': [
101
+ {model: 'gpt5-mini', bt_rank: 1, win_rate: 100.0, accuracy: 27.59, acc_rank: 7, is_proprietary: true}
102
+ ,{model: 'claude4.5-sonnet', bt_rank: 2, win_rate: 94.6, accuracy: 33.66, acc_rank: 1, is_proprietary: true}
103
+ ,{model: 'gpt5mini', bt_rank: 3, win_rate: 87.8, accuracy: 27.59, acc_rank: 8, is_proprietary: true}
104
+ ,{model: 'gpt5.2', bt_rank: 4, win_rate: 83.6, accuracy: 28.88, acc_rank: 5, is_proprietary: true}
105
+ ,{model: 'gpt5.1', bt_rank: 5, win_rate: 80.6, accuracy: 30.1, acc_rank: 3, is_proprietary: true}
106
+ ,{model: 'gemini3-flash', bt_rank: 6, win_rate: 76.5, accuracy: 29.28, acc_rank: 4, is_proprietary: true}
107
+ ,{model: 'kimi-k2', bt_rank: 7, win_rate: 73.1, accuracy: 30.17, acc_rank: 2, is_proprietary: false}
108
+ ,{model: 'run_api_deepseek_deepseek-chat', bt_rank: 8, win_rate: 70.5, accuracy: 27.65, acc_rank: 6, is_proprietary: false}
109
+ ,{model: 'gemini2.5-pro', bt_rank: 9, win_rate: 63.9, accuracy: 19.0, acc_rank: 14, is_proprietary: true}
110
+ ,{model: 'qwen3-next-80b-a3b-instruct', bt_rank: 10, win_rate: 59.5, accuracy: 18.8, acc_rank: 15, is_proprietary: false}
111
+ ,{model: 'minimax-m2', bt_rank: 11, win_rate: 59.7, accuracy: 23.52, acc_rank: 10, is_proprietary: false}
112
+ ,{model: 'glm4.6', bt_rank: 12, win_rate: 52.1, accuracy: 23.84, acc_rank: 9, is_proprietary: false}
113
+ ,{model: 'qwen3', bt_rank: 13, win_rate: 51.7, accuracy: 19.13, acc_rank: 13, is_proprietary: false}
114
+ ,{model: 'qwen2.5-14B-Instruct-1M', bt_rank: 14, win_rate: 40.3, accuracy: 20, acc_rank: 11, is_proprietary: false}
115
+ ,{model: 'gemini2.5-flash-lite', bt_rank: 15, win_rate: 35.4, accuracy: 16.64, acc_rank: 18, is_proprietary: true}
116
+ ,{model: 'qwen2.5-14B-Instruct', bt_rank: 16, win_rate: 32.4, accuracy: 14.15, acc_rank: 20, is_proprietary: false}
117
+ ,{model: 'qwen2.5-32b-instruct', bt_rank: 17, win_rate: 32.3, accuracy: 13.12, acc_rank: 21, is_proprietary: false}
118
+ ,{model: 'gemini2.5-flash', bt_rank: 18, win_rate: 31.2, accuracy: 18.61, acc_rank: 16, is_proprietary: true}
119
+ ,{model: 'qwen2.5-72B-Instruct', bt_rank: 19, win_rate: 29.5, accuracy: 14.92, acc_rank: 19, is_proprietary: false}
120
+ ,{model: 'qwen3-4B-Instruct-2507', bt_rank: 20, win_rate: 27.3, accuracy: 16.93, acc_rank: 17, is_proprietary: false}
121
+ ,{model: 'qwen2.5-7B-Instruct-1M', bt_rank: 21, win_rate: 17.3, accuracy: 20, acc_rank: 12, is_proprietary: false}
122
+ ]
123
+ ,'10K': [
124
+ {model: 'claude4.5-sonnet', bt_rank: 1, win_rate: 92.8, accuracy: 69.26, acc_rank: 1, is_proprietary: true}
125
+ ,{model: 'run_api_deepseek_deepseek-chat', bt_rank: 2, win_rate: 80.6, accuracy: 49.41, acc_rank: 2, is_proprietary: false}
126
+ ,{model: 'gpt5mini', bt_rank: 3, win_rate: 80.4, accuracy: 41.56, acc_rank: 5, is_proprietary: true}
127
+ ,{model: 'gpt5.2', bt_rank: 4, win_rate: 78.0, accuracy: 43.11, acc_rank: 4, is_proprietary: true}
128
+ ,{model: 'kimi-k2', bt_rank: 5, win_rate: 77.0, accuracy: 41.17, acc_rank: 7, is_proprietary: false}
129
+ ,{model: 'glm4.6', bt_rank: 6, win_rate: 71.4, accuracy: 48.29, acc_rank: 3, is_proprietary: false}
130
+ ,{model: 'gemini3-flash', bt_rank: 7, win_rate: 63.6, accuracy: 39.5, acc_rank: 8, is_proprietary: true}
131
+ ,{model: 'qwen3-next-80b-a3b-instruct', bt_rank: 8, win_rate: 59.2, accuracy: 38.34, acc_rank: 9, is_proprietary: false}
132
+ ,{model: 'minimax-m2', bt_rank: 9, win_rate: 54.4, accuracy: 35.74, acc_rank: 10, is_proprietary: false}
133
+ ,{model: 'gpt5.1', bt_rank: 10, win_rate: 54.0, accuracy: 41.23, acc_rank: 6, is_proprietary: true}
134
+ ,{model: 'qwen3', bt_rank: 11, win_rate: 51.0, accuracy: 28.23, acc_rank: 12, is_proprietary: false}
135
+ ,{model: 'qwen2.5-14B-Instruct-1M', bt_rank: 12, win_rate: 45.6, accuracy: 20, acc_rank: 15, is_proprietary: false}
136
+ ,{model: 'gemini2.5-pro', bt_rank: 13, win_rate: 44.8, accuracy: 20.91, acc_rank: 13, is_proprietary: true}
137
+ ,{model: 'qwen2.5-32b-instruct', bt_rank: 14, win_rate: 41.2, accuracy: 17.83, acc_rank: 17, is_proprietary: false}
138
+ ,{model: 'qwen2.5-72B-Instruct', bt_rank: 15, win_rate: 34.6, accuracy: 20.79, acc_rank: 14, is_proprietary: false}
139
+ ,{model: 'qwen2.5-14B-Instruct', bt_rank: 16, win_rate: 31.6, accuracy: 14.65, acc_rank: 18, is_proprietary: false}
140
+ ,{model: 'qwen3-4B-Instruct-2507', bt_rank: 17, win_rate: 30.0, accuracy: 30.43, acc_rank: 11, is_proprietary: false}
141
+ ,{model: 'gemini2.5-flash-lite', bt_rank: 18, win_rate: 29.6, accuracy: 14.37, acc_rank: 19, is_proprietary: true}
142
+ ,{model: 'qwen2.5-7B-Instruct-1M', bt_rank: 19, win_rate: 27.4, accuracy: 20, acc_rank: 16, is_proprietary: false}
143
+ ,{model: 'gemini2.5-flash', bt_rank: 20, win_rate: 25.2, accuracy: 12.61, acc_rank: 20, is_proprietary: true}
144
+ ,{model: 'qwen2.5-7B-Instruct', bt_rank: 21, win_rate: 22.0, accuracy: 7.53, acc_rank: 21, is_proprietary: false}
145
+ ]
146
+ ,'GLOBEM': [
147
+ {model: 'claude4.5-sonnet', bt_rank: 1, win_rate: 93.0, accuracy: 39.54, acc_rank: 2, is_proprietary: true}
148
+ ,{model: 'gpt5-mini', bt_rank: 2, win_rate: 60.0, accuracy: 33.91, acc_rank: 12, is_proprietary: true}
149
+ ,{model: 'gemini3-flash', bt_rank: 3, win_rate: 81.2, accuracy: 35.46, acc_rank: 9, is_proprietary: true}
150
+ ,{model: 'minimax-m2', bt_rank: 4, win_rate: 77.8, accuracy: 36.9, acc_rank: 6, is_proprietary: false}
151
+ ,{model: 'gpt5mini', bt_rank: 5, win_rate: 73.8, accuracy: 33.91, acc_rank: 13, is_proprietary: true}
152
+ ,{model: 'gpt5.1', bt_rank: 6, win_rate: 67.5, accuracy: 36.76, acc_rank: 7, is_proprietary: true}
153
+ ,{model: 'gpt5.2', bt_rank: 7, win_rate: 64.4, accuracy: 38.39, acc_rank: 3, is_proprietary: true}
154
+ ,{model: 'qwen3', bt_rank: 8, win_rate: 64.7, accuracy: 36.32, acc_rank: 8, is_proprietary: false}
155
+ ,{model: 'run_api_deepseek_deepseek-chat', bt_rank: 9, win_rate: 64.5, accuracy: 38.39, acc_rank: 4, is_proprietary: false}
156
+ ,{model: 'glm4.6', bt_rank: 10, win_rate: 53.6, accuracy: 39.77, acc_rank: 1, is_proprietary: false}
157
+ ,{model: 'kimi-k2', bt_rank: 11, win_rate: 52.2, accuracy: 37.01, acc_rank: 5, is_proprietary: false}
158
+ ,{model: 'gemini2.5-pro', bt_rank: 12, win_rate: 45.6, accuracy: 34.6, acc_rank: 10, is_proprietary: true}
159
+ ,{model: 'qwen2.5-72B-Instruct', bt_rank: 13, win_rate: 43.3, accuracy: 27.13, acc_rank: 14, is_proprietary: false}
160
+ ,{model: 'qwen2.5-32B-Instruct', bt_rank: 14, win_rate: 42.1, accuracy: 20, acc_rank: 20, is_proprietary: false}
161
+ ,{model: 'qwen3-next-80b-a3b-instruct', bt_rank: 15, win_rate: 41.5, accuracy: 34.14, acc_rank: 11, is_proprietary: false}
162
+ ,{model: 'qwen2.5-14B-Instruct', bt_rank: 16, win_rate: 40.8, accuracy: 26.13, acc_rank: 16, is_proprietary: false}
163
+ ,{model: 'gemini2.5-flash-lite', bt_rank: 17, win_rate: 37.4, accuracy: 25.52, acc_rank: 18, is_proprietary: true}
164
+ ,{model: 'qwen3-4B-Instruct-2507', bt_rank: 18, win_rate: 36.6, accuracy: 26.9, acc_rank: 15, is_proprietary: false}
165
+ ,{model: 'qwen2.5-14B-Instruct-1M', bt_rank: 19, win_rate: 32.0, accuracy: 20, acc_rank: 21, is_proprietary: false}
166
+ ,{model: 'llama3.3-70B', bt_rank: 20, win_rate: 28.1, accuracy: 22.65, acc_rank: 19, is_proprietary: false}
167
+ ,{model: 'qwen2.5-7B-Instruct', bt_rank: 21, win_rate: 22.2, accuracy: 25.64, acc_rank: 17, is_proprietary: false}
 
 
 
168
  ]
169
  },
 
 
170
  turn: {
171
+ 'mimic': [
172
+ {model: 'Claude4.5-Sonnet', median: 52, distribution: [0.0, 0.0, 1.0, 5.0, 31.0, 43.0, 13.0, 7.0, 0.0, 0.0]}
173
+ ,{model: 'GPT5-mini', median: 39, distribution: [0.0, 0.0, 9.0, 42.0, 36.0, 12.0, 1.0, 0.0, 0.0, 0.0]}
174
+ ,{model: 'GLM4.6', median: 39, distribution: [0.0, 6.3, 23.4, 20.7, 7.2, 13.5, 3.6, 6.3, 4.5, 14.4]}
175
+ ,{model: 'DeepSeekV3.2', median: 33, distribution: [0.0, 2.0, 22.0, 60.0, 16.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
176
+ ,{model: 'GPT5.2', median: 30, distribution: [0.0, 10.0, 36.0, 32.0, 12.0, 10.0, 0.0, 0.0, 0.0, 0.0]}
177
+ ,{model: 'GPT5.1', median: 23, distribution: [1.5, 39.7, 29.4, 19.9, 9.6, 0.0, 0.0, 0.0, 0.0, 0.0]}
178
+ ,{model: 'Kimi-K2', median: 19, distribution: [0.0, 55.0, 44.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
179
+ ,{model: 'MiniMax-M2', median: 18, distribution: [0.0, 70.0, 30.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
180
+ ,{model: 'Qwen3-Next-80B-A3B', median: 17, distribution: [12.0, 52.0, 24.0, 10.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
181
+ ,{model: 'Qwen3-30B-A3B', median: 17, distribution: [12.0, 52.0, 24.0, 10.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
182
+ ,{model: 'Gemini3-Flash', median: 15, distribution: [7.0, 71.0, 22.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
183
+ ,{model: 'Gemini2.5-Pro', median: 15, distribution: [10.6, 70.2, 19.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
184
+ ,{model: 'Qwen2.5-72B', median: 11, distribution: [15.0, 85.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
185
+ ,{model: 'Llama3.3-70B', median: 6, distribution: [99.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
186
+ ]
187
+ ,'10k': [
188
+ {model: 'Claude4.5-Sonnet', median: 56, distribution: [0.0, 0.0, 1.0, 6.0, 13.0, 44.0, 27.0, 6.0, 3.0, 0.0]}
189
+ ,{model: 'GLM4.6', median: 52, distribution: [0.0, 0.0, 3.8, 10.4, 27.4, 27.4, 18.9, 5.7, 4.7, 1.9]}
190
+ ,{model: 'DeepSeekV3.2', median: 39, distribution: [0.0, 0.0, 11.0, 40.0, 37.0, 9.0, 3.0, 0.0, 0.0, 0.0]}
191
+ ,{model: 'Kimi-K2', median: 24, distribution: [0.0, 29.0, 48.0, 21.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
192
+ ,{model: 'GPT5.2', median: 20, distribution: [0.0, 43.0, 41.0, 12.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0]}
193
+ ,{model: 'MiniMax-M2', median: 20, distribution: [0.0, 43.0, 48.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
194
+ ,{model: 'GPT5.1', median: 17, distribution: [1.0, 69.0, 29.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
195
+ ,{model: 'Gemini2.5-Pro', median: 15, distribution: [7.0, 73.0, 18.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
196
+ ,{model: 'Gemini3-Flash', median: 13, distribution: [10.0, 82.0, 7.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
197
+ ,{model: 'Qwen3-Next-80B-A3B', median: 8, distribution: [81.0, 19.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
198
+ ,{model: 'Qwen3-30B-A3B', median: 8, distribution: [81.0, 19.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
199
+ ,{model: 'Qwen2.5-72B', median: 7, distribution: [75.0, 25.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
200
+ ,{model: 'Llama3.3-70B', median: 1, distribution: [92.0, 7.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
201
+ ]
202
+ ,'globem': [
203
+ {model: 'Claude4.5-Sonnet', median: 25, distribution: [0.0, 6.0, 87.0, 7.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
204
+ ,{model: 'Gemini3-Flash', median: 21, distribution: [2.0, 36.0, 58.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
205
+ ,{model: 'GLM4.6', median: 21, distribution: [0.0, 23.0, 66.0, 11.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
206
+ ,{model: 'DeepSeekV3.2', median: 20, distribution: [0.0, 32.0, 68.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
207
+ ,{model: 'GPT5-mini', median: 17, distribution: [2.0, 78.0, 20.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
208
+ ,{model: 'Kimi-K2', median: 17, distribution: [0.0, 82.0, 18.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
209
+ ,{model: 'MiniMax-M2', median: 17, distribution: [0.0, 80.0, 20.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
210
+ ,{model: 'GPT5.2', median: 15, distribution: [0.0, 92.0, 8.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
211
+ ,{model: 'Qwen2.5-72B', median: 14, distribution: [4.0, 78.0, 17.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
212
+ ,{model: 'Gemini2.5-Pro', median: 12, distribution: [3.0, 94.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
213
+ ,{model: 'Qwen3-Next-80B-A3B', median: 12, distribution: [0.0, 99.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
214
+ ,{model: 'Qwen3-30B-A3B', median: 12, distribution: [0.0, 99.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
215
+ ,{model: 'GPT5.1', median: 11, distribution: [30.0, 70.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
216
+ ,{model: 'Llama3.3-70B', median: 6, distribution: [98.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
 
217
  ]
218
  },
 
 
219
  entropy: {
220
+ 'mimic': {
221
+ 'GPT-5.2': {entropy: [0.72, 0.78, 0.82, 0.68, 0.75, 0.88], coverage: [0.08, 0.1, 0.09, 0.07, 0.09, 0.11], accuracy: [30, 35, 40, 25, 32, 45]}
222
+ ,'Claude-4.5-Sonnet': {entropy: [0.85, 0.88, 0.92, 0.8, 0.87, 0.78], coverage: [0.12, 0.14, 0.13, 0.1, 0.13, 0.09], accuracy: [45, 50, 55, 40, 48, 35]}
223
+ ,'Gemini-3-Flash': {entropy: [0.7, 0.75, 0.68, 0.72, 0.8, 0.65], coverage: [0.06, 0.09, 0.07, 0.08, 0.1, 0.05], accuracy: [28, 32, 25, 30, 38, 22]}
224
+ ,'GLM-4.6': {entropy: [0.78, 0.82, 0.75, 0.8, 0.88, 0.72], coverage: [0.09, 0.11, 0.08, 0.1, 0.13, 0.07], accuracy: [32, 40, 28, 35, 45, 25]}
225
+ ,'DeepSeek-V3.2': {entropy: [0.82, 0.85, 0.78, 0.88, 0.75, 0.9], coverage: [0.1, 0.12, 0.09, 0.14, 0.08, 0.15], accuracy: [38, 42, 32, 48, 28, 52]}
226
+ }
227
+ ,'10k': {
228
+ 'GPT-5.2': {entropy: [0.72, 0.78, 0.82, 0.68, 0.75, 0.88], coverage: [0.08, 0.1, 0.09, 0.07, 0.09, 0.11], accuracy: [30, 35, 40, 25, 32, 45]}
229
+ ,'Claude-4.5-Sonnet': {entropy: [0.85, 0.88, 0.92, 0.8, 0.87, 0.78], coverage: [0.12, 0.14, 0.13, 0.1, 0.13, 0.09], accuracy: [45, 50, 55, 40, 48, 35]}
230
+ ,'Gemini-3-Flash': {entropy: [0.7, 0.75, 0.68, 0.72, 0.8, 0.65], coverage: [0.06, 0.09, 0.07, 0.08, 0.1, 0.05], accuracy: [28, 32, 25, 30, 38, 22]}
231
+ ,'GLM-4.6': {entropy: [0.78, 0.82, 0.75, 0.8, 0.88, 0.72], coverage: [0.09, 0.11, 0.08, 0.1, 0.13, 0.07], accuracy: [32, 40, 28, 35, 45, 25]}
232
+ ,'DeepSeek-V3.2': {entropy: [0.82, 0.85, 0.78, 0.88, 0.75, 0.9], coverage: [0.1, 0.12, 0.09, 0.14, 0.08, 0.15], accuracy: [38, 42, 32, 48, 28, 52]}
233
+ }
234
+ ,'globem': {
235
+ 'GPT-5.2': {entropy: [0.72, 0.78, 0.82, 0.68, 0.75, 0.88], coverage: [0.08, 0.1, 0.09, 0.07, 0.09, 0.11], accuracy: [30, 35, 40, 25, 32, 45]}
236
+ ,'Claude-4.5-Sonnet': {entropy: [0.85, 0.88, 0.92, 0.8, 0.87, 0.78], coverage: [0.12, 0.14, 0.13, 0.1, 0.13, 0.09], accuracy: [45, 50, 55, 40, 48, 35]}
237
+ ,'Gemini-3-Flash': {entropy: [0.7, 0.75, 0.68, 0.72, 0.8, 0.65], coverage: [0.06, 0.09, 0.07, 0.08, 0.1, 0.05], accuracy: [28, 32, 25, 30, 38, 22]}
238
+ ,'GLM-4.6': {entropy: [0.78, 0.82, 0.75, 0.8, 0.88, 0.72], coverage: [0.09, 0.11, 0.08, 0.1, 0.13, 0.07], accuracy: [32, 40, 28, 35, 45, 25]}
239
+ ,'DeepSeek-V3.2': {entropy: [0.82, 0.85, 0.78, 0.88, 0.75, 0.9], coverage: [0.1, 0.12, 0.09, 0.14, 0.08, 0.15], accuracy: [38, 42, 32, 48, 28, 52]}
240
  }
241
  },
 
 
242
  probing: {
243
  byTurn: {
244
+ 'mimic': {
245
+ 'Qwen2.5-32B': {turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-4.3, -4.21, -4.04, -3.87, -3.59, -3.62, -3.33, -3.4, -2.93, -3.21], sem: [0.25, 0.27, 0.32, 0.35, 0.35, 0.36, 0.34, 0.35, 0.32, 0.4]}
246
+ ,'Qwen2.5-72B': {turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-3.51, -3.98, -3.68, -3.8, -3.26, -3.22, -3.12, -3.24, -3.08, -2.84], sem: [0.15, 0.21, 0.21, 0.23, 0.23, 0.21, 0.25, 0.25, 0.28, 0.08]}
247
+ ,'Qwen3-4B': {turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-3.48, -3.25, -3.3, -2.74, -2.75, -2.73, -2.72, -2.67, -2.62, -2.25], sem: [0.04, 0.05, 0.04, 0.07, 0.06, 0.07, 0.07, 0.07, 0.06, 0.06]}
248
+ ,'Qwen3-30B-A3B': {turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-4.94, -5.21, -5.51, -5.05, -4.96, -4.95, -4.75, -4.73, -4.6, -4.72], sem: [0.15, 0.18, 0.2, 0.18, 0.19, 0.19, 0.17, 0.18, 0.16, 0.18]}
249
+ ,'Qwen3-Next-80B-A3B': {turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-2.85, -2.86, -2.74, -2.65, -2.31, -2.14, -1.98, -2.03, -1.88, -1.82], sem: [0.1, 0.1, 0.11, 0.11, 0.11, 0.13, 0.13, 0.18, 0.17, 0.09]}
250
+ }
251
+ ,'globem': {
252
+ 'Qwen2.5-32B': {turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-5.48, -5.83, -5.84, -5.91, -6.01, -6.03, -5.86, -5.73, -5.78, -5.73], sem: [0.24, 0.28, 0.31, 0.33, 0.33, 0.35, 0.33, 0.35, 0.35, 0.36]}
253
+ ,'Qwen2.5-72B': {turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-4.68, -5.56, -5.65, -5.59, -5.59, -5.49, -5.54, -5.4, -5.57, -5.53], sem: [0.13, 0.18, 0.23, 0.23, 0.25, 0.25, 0.29, 0.32, 0.38, 0.46]}
254
+ ,'Qwen3-4B': {turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-4.67, -4.16, -3.9, -3.76, -3.6, -3.47, -3.05, -2.99, -2.93, -2.78], sem: [0.08, 0.07, 0.06, 0.06, 0.07, 0.08, 0.07, 0.08, 0.08, 0.09]}
255
+ ,'Qwen3-30B-A3B': {turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-5.28, -5.23, -5.2, -5.19, -5.2, -5.01, -5.21, -4.95, -4.93, -4.81], sem: [0.09, 0.09, 0.09, 0.08, 0.08, 0.08, 0.09, 0.09, 0.1, 0.1]}
256
+ ,'Qwen3-Next-80B-A3B': {turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-3.1, -3.15, -3.06, -3.01, -2.95, -2.88, -2.78, -2.4, -2.46, -1.89], sem: [0.06, 0.06, 0.06, 0.06, 0.06, 0.06, 0.07, 0.06, 0.14, 0.1]}
257
+ }
258
+ ,'10k': {
259
+ 'Qwen2.5-32B': {turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-6.59, -7.15, -6.99, -6.95, -6.82, -6.88, -6.71, -6.58, -6.67, -6.45], sem: [0.26, 0.28, 0.29, 0.3, 0.29, 0.29, 0.29, 0.32, 0.36, 0.41]}
260
+ ,'Qwen2.5-72B': {turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-5.51, -7.02, -6.45, -6.11, -5.98, -6.52, -7.02, -7.88, -8.05, -7.66], sem: [0.26, 0.34, 0.34, 0.36, 0.4, 0.53, 0.62, 0.71, 0.81, 0.92]}
261
+ ,'Qwen3-4B': {turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-4.68, -4.3, -3.57, -3.33, -3.27, -3.22, -3.06, -2.9, -2.75, -2.57], sem: [0.18, 0.17, 0.15, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14]}
262
+ ,'Qwen3-30B-A3B': {turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-3.23, -3.31, -3.28, -3.16, -3.06, -2.97, -2.94, -2.87, -2.83, -2.73], sem: [0.17, 0.17, 0.17, 0.17, 0.17, 0.16, 0.17, 0.18, 0.18, 0.17]}
263
+ ,'Qwen3-Next-80B-A3B': {turns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], logprob: [-3.25, -3.42, -3.21, -2.94, -2.81, -2.75, -2.7, -2.65, -2.55, -2.45], sem: [0.16, 0.17, 0.17, 0.17, 0.16, 0.17, 0.16, 0.16, 0.16, 0.16]}
264
  }
265
  },
266
  byProgress: {
267
+ 'mimic': {
268
+ 'Qwen2.5-32B': {progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-4.3, -4.12, -3.73, -3.62, -3.36, -3.05, -2.94, -3.12, -4.6, -4.42], sem: [0.25, 0.21, 0.25, 0.36, 0.24, 0.25, 0.38, 0.45, 1.5, 0.1]}
269
+ ,'Qwen2.5-72B': {progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-3.51, -3.98, -3.74, -3.26, -3.17, -3.24, -2.99, -2.53, -2.58, -2.42], sem: [0.15, 0.21, 0.16, 0.23, 0.17, 0.25, 0.18, 0.09, 0.09, 0.2]}
270
+ ,'Qwen3-4B': {progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-3.37, -2.93, -2.71, -2.33, -1.99, -2.04, -1.57, -1.46, -1.48, -1.44], sem: [0.03, 0.04, 0.04, 0.04, 0.05, 0.08, 0.1, 0.05, 0.0, 0.01]}
271
+ ,'Qwen3-30B-A3B': {progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-5.13, -4.72, -4.42, -4.17, -4.04, -3.9, -3.64, -3.45, -3.36, -3.17], sem: [0.08, 0.07, 0.07, 0.07, 0.07, 0.08, 0.1, 0.14, 0.15, 0.26]}
272
+ ,'Qwen3-Next-80B-A3B': {progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-2.85, -2.8, -2.65, -2.22, -1.98, -1.96, -1.79, -1.74, -1.83, -1.85], sem: [0.1, 0.07, 0.11, 0.09, 0.13, 0.12, 0.08, 0.16, 0.15, 0.39]}
273
+ }
274
+ ,'globem': {
275
+ 'Qwen2.5-32B': {progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-5.66, -5.92, -5.88, -5.79, -5.79, -5.55, -5.47, -4.8, -3.55, -3.24], sem: [0.18, 0.19, 0.2, 0.21, 0.29, 0.29, 0.47, 0.63, 0.19, 0.47]}
276
+ ,'Qwen2.5-72B': {progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-4.68, -5.56, -5.62, -5.59, -5.51, -5.4, -5.56, -5.03, -5.77, -7.71], sem: [0.13, 0.18, 0.16, 0.25, 0.19, 0.32, 0.29, 0.55, 0.83, 0.1]}
277
+ ,'Qwen3-4B': {progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-4.42, -3.83, -3.38, -2.96, -2.71, -2.6, -2.46, -2.53, -2.63, -2.61], sem: [0.06, 0.04, 0.04, 0.05, 0.07, 0.08, 0.12, 0.14, 0.25, 0.04]}
278
+ ,'Qwen3-30B-A3B': {progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-5.26, -5.2, -5.06, -4.82, -4.5, -4.51, -4.37, -4.1, -4.03, -3.74], sem: [0.06, 0.05, 0.05, 0.06, 0.07, 0.08, 0.1, 0.29, 0.25, 0.11]}
279
+ ,'Qwen3-Next-80B-A3B': {progress: [10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-3.1, -3.15, -3.06, -3.01, -2.95, -2.88, -2.78, -2.4, -2.46], sem: [0.06, 0.06, 0.06, 0.06, 0.06, 0.06, 0.07, 0.06, 0.14]}
280
+ }
281
+ ,'10k': {
282
+ 'Qwen2.5-32B': {progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-6.59, -7.07, -6.89, -6.8, -6.58, -6.58, -6.76, -8.0, -8.59, -8.83], sem: [0.26, 0.2, 0.21, 0.2, 0.32, 0.27, 0.39, 0.57, 0.84, 1.12]}
283
+ ,'Qwen2.5-72B': {progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-5.51, -7.02, -6.28, -5.98, -6.52, -7.33, -8.05, -7.85, -8.41, -7.15], sem: [0.26, 0.34, 0.25, 0.4, 0.53, 0.47, 0.81, 0.79, 1.45, 1.26]}
284
+ ,'Qwen3-4B': {progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-4.49, -3.45, -3.19, -2.83, -2.5, -2.27, -2.31, -2.31, -2.35, -1.73], sem: [0.12, 0.1, 0.08, 0.1, 0.1, 0.11, 0.2, 0.29, 0.36, 0.03]}
285
+ ,'Qwen3-30B-A3B': {progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-3.14, -2.66, -2.29, -2.26, -1.97, -1.88, -1.52, -1.36, -1.61, -1.61], sem: [0.06, 0.06, 0.07, 0.1, 0.14, 0.18, 0.08, 0.02, 0.05, 0.08]}
286
+ ,'Qwen3-Next-80B-A3B': {progress: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], logprob: [-3.34, -2.99, -2.7, -2.5, -2.43, -2.55, -2.18, -2.28, -2.19, -2.5], sem: [0.12, 0.1, 0.1, 0.11, 0.11, 0.15, 0.21, 0.22, 0.26, 0.38]}
287
  }
288
  }
289
  },
 
 
290
  probingColors: {
291
  'Qwen2.5-32B': '#4A90D9',
292
  'Qwen2.5-72B': '#1A5FB4',