amirali1985 commited on
Commit
087cbaa
·
verified ·
1 Parent(s): 2909ff3

Upload add_sub_sorl_v1_abs30_K1_10K

Browse files
add_sub_sorl_v1_abs30_K1_10K/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 128,
20
+ "max_window_layers": 28,
21
+ "model_type": "qwen3",
22
+ "num_attention_heads": 3,
23
+ "num_hidden_layers": 2,
24
+ "num_key_value_heads": 3,
25
+ "pad_token_id": null,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_parameters": {
28
+ "rope_theta": 10000.0,
29
+ "rope_type": "default"
30
+ },
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "transformers_version": "5.5.0",
34
+ "use_cache": true,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151674
37
+ }
add_sub_sorl_v1_abs30_K1_10K/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs30_K1_10K/metrics.json ADDED
@@ -0,0 +1,1557 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 207,
8
+ 257,
9
+ 307,
10
+ 364,
11
+ 414,
12
+ 464,
13
+ 521,
14
+ 571,
15
+ 621,
16
+ 678,
17
+ 728,
18
+ 778,
19
+ 835,
20
+ 885,
21
+ 935,
22
+ 992,
23
+ 1042,
24
+ 1092,
25
+ 1149,
26
+ 1199,
27
+ 1249,
28
+ 1306,
29
+ 1356,
30
+ 1406,
31
+ 1463,
32
+ 1513,
33
+ 1563,
34
+ 1620,
35
+ 1670,
36
+ 1720,
37
+ 1777,
38
+ 1827,
39
+ 1877,
40
+ 1934,
41
+ 1984,
42
+ 2034,
43
+ 2091,
44
+ 2141,
45
+ 2191,
46
+ 2248,
47
+ 2298,
48
+ 2348,
49
+ 2405,
50
+ 2455,
51
+ 2505,
52
+ 2562,
53
+ 2612,
54
+ 2662,
55
+ 2719,
56
+ 2769,
57
+ 2819,
58
+ 2876,
59
+ 2926,
60
+ 2976,
61
+ 3033,
62
+ 3083,
63
+ 3133
64
+ ],
65
+ "loss": [
66
+ 7.419157981872559,
67
+ 4.167179584503174,
68
+ 2.757251739501953,
69
+ 2.7242889404296875,
70
+ 2.4392311573028564,
71
+ 2.12624454498291,
72
+ 1.7021116018295288,
73
+ 0.1742163896560669,
74
+ -4.171255588531494,
75
+ -7.291606426239014,
76
+ -7.83780574798584,
77
+ -4.589130401611328,
78
+ -2.949950695037842,
79
+ -1.528172254562378,
80
+ -0.9651578664779663,
81
+ -0.8742300271987915,
82
+ -0.550557017326355,
83
+ -0.38707196712493896,
84
+ -0.5141674876213074,
85
+ -0.6698199510574341,
86
+ -0.7084141373634338,
87
+ -0.47245967388153076,
88
+ -0.31419694423675537,
89
+ -0.31762227416038513,
90
+ -0.24917703866958618,
91
+ -0.5615812540054321,
92
+ -0.11299914121627808,
93
+ -0.3741247057914734,
94
+ -0.1392630785703659,
95
+ -0.4355674088001251,
96
+ -0.7360162138938904,
97
+ -0.5835363268852234,
98
+ -0.5749945640563965,
99
+ -0.4588763415813446,
100
+ -0.5538105964660645,
101
+ -0.009306885302066803,
102
+ -0.4344196915626526,
103
+ -0.23470139503479004,
104
+ -0.4863656759262085,
105
+ -0.6077032685279846,
106
+ -0.41746845841407776,
107
+ -0.4240177273750305,
108
+ -0.17653952538967133,
109
+ -0.39587831497192383,
110
+ -0.1691063493490219,
111
+ -0.12267603725194931,
112
+ -0.3334910571575165,
113
+ -0.11227956414222717,
114
+ -0.2049349546432495,
115
+ -0.3078603148460388,
116
+ -0.17479164898395538,
117
+ -0.25693076848983765,
118
+ -0.22230546176433563,
119
+ -0.24037127196788788,
120
+ -0.07592999190092087,
121
+ -0.35476192831993103,
122
+ -0.3569074273109436,
123
+ -0.22312912344932556,
124
+ -0.22871972620487213,
125
+ -0.16561374068260193
126
+ ],
127
+ "base_loss": [
128
+ 7.4173455238342285,
129
+ 3.6480441093444824,
130
+ 1.9713058471679688,
131
+ 1.8219451904296875,
132
+ 1.865543007850647,
133
+ 1.8542261123657227,
134
+ 1.805886149406433,
135
+ 1.7817356586456299,
136
+ 1.8499161005020142,
137
+ 1.6429988145828247,
138
+ 1.4622584581375122,
139
+ 0.8687496185302734,
140
+ 0.5731961131095886,
141
+ 0.35015660524368286,
142
+ 0.2690192759037018,
143
+ 0.22296881675720215,
144
+ 0.1571018397808075,
145
+ 0.16091905534267426,
146
+ 0.1413668543100357,
147
+ 0.14242176711559296,
148
+ 0.15262103080749512,
149
+ 0.11749549955129623,
150
+ 0.1065789982676506,
151
+ 0.08413096517324448,
152
+ 0.07932066917419434,
153
+ 0.11860263347625732,
154
+ 0.07409992069005966,
155
+ 0.07803652435541153,
156
+ 0.047815799713134766,
157
+ 0.10229728370904922,
158
+ 0.11566775292158127,
159
+ 0.10984303057193756,
160
+ 0.09876441955566406,
161
+ 0.0688941702246666,
162
+ 0.08261072635650635,
163
+ 0.053319063037633896,
164
+ 0.06365398317575455,
165
+ 0.0505535863339901,
166
+ 0.06858514994382858,
167
+ 0.07909546047449112,
168
+ 0.060411665588617325,
169
+ 0.0625351145863533,
170
+ 0.032719288021326065,
171
+ 0.05432973429560661,
172
+ 0.045225996524095535,
173
+ 0.021227678284049034,
174
+ 0.050432708114385605,
175
+ 0.027083681896328926,
176
+ 0.031196508556604385,
177
+ 0.04615318775177002,
178
+ 0.027639741078019142,
179
+ 0.05231175199151039,
180
+ 0.03158637136220932,
181
+ 0.032559193670749664,
182
+ 0.01280138734728098,
183
+ 0.04558839276432991,
184
+ 0.04613020643591881,
185
+ 0.03275144845247269,
186
+ 0.03132258728146553,
187
+ 0.024606991559267044
188
+ ],
189
+ "info_loss": [
190
+ -0.5185031890869141,
191
+ -0.09349203109741211,
192
+ -0.03490471839904785,
193
+ -0.02159249782562256,
194
+ -0.054520368576049805,
195
+ -0.08344614505767822,
196
+ -0.1214907169342041,
197
+ -0.27150094509124756,
198
+ -0.7121474742889404,
199
+ -1.0041959285736084,
200
+ -1.0389212369918823,
201
+ -0.6523160934448242,
202
+ -0.45543068647384644,
203
+ -0.28470802307128906,
204
+ -0.21153677999973297,
205
+ -0.1875189244747162,
206
+ -0.14062973856925964,
207
+ -0.12236915528774261,
208
+ -0.12718062102794647,
209
+ -0.1365828663110733,
210
+ -0.13193942606449127,
211
+ -0.10466521978378296,
212
+ -0.08482377231121063,
213
+ -0.0746171772480011,
214
+ -0.07218538224697113,
215
+ -0.10219592601060867,
216
+ -0.04982271045446396,
217
+ -0.07324571162462234,
218
+ -0.038586750626564026,
219
+ -0.07255075871944427,
220
+ -0.10315456241369247,
221
+ -0.08851015567779541,
222
+ -0.08371994644403458,
223
+ -0.06724002212285995,
224
+ -0.0802496150135994,
225
+ -0.021388381719589233,
226
+ -0.06261564046144485,
227
+ -0.0424460805952549,
228
+ -0.06758096814155579,
229
+ -0.07665054500102997,
230
+ -0.058085739612579346,
231
+ -0.05829145759344101,
232
+ -0.030241522938013077,
233
+ -0.053621694445610046,
234
+ -0.03214117884635925,
235
+ -0.020935092121362686,
236
+ -0.04635276272892952,
237
+ -0.02176225744187832,
238
+ -0.03096679225564003,
239
+ -0.04063677042722702,
240
+ -0.02714877389371395,
241
+ -0.036504216492176056,
242
+ -0.031483057886362076,
243
+ -0.032049935311079025,
244
+ -0.012561824172735214,
245
+ -0.045204248279333115,
246
+ -0.04523665830492973,
247
+ -0.0305631123483181,
248
+ -0.0310970451682806,
249
+ -0.023466313257813454
250
+ ],
251
+ "abs_loss": [
252
+ 3.381772994995117,
253
+ 2.774198293685913,
254
+ 2.777165412902832,
255
+ 2.7333767414093018,
256
+ 2.7525675296783447,
257
+ 2.701953649520874,
258
+ 2.742412567138672,
259
+ 2.6781795024871826,
260
+ 2.65073823928833,
261
+ 2.6559343338012695,
262
+ 2.530367612838745,
263
+ 2.4332215785980225,
264
+ 2.2735555171966553,
265
+ 1.9705966711044312,
266
+ 1.622453212738037,
267
+ 1.4155868291854858,
268
+ 1.1430540084838867,
269
+ 0.9845269322395325,
270
+ 0.9937983155250549,
271
+ 0.802861750125885,
272
+ 0.7566664814949036,
273
+ 0.6577289700508118,
274
+ 0.7418577075004578,
275
+ 0.5929294228553772,
276
+ 0.5775941014289856,
277
+ 0.5590793490409851,
278
+ 0.6108124256134033,
279
+ 0.4465348720550537,
280
+ 0.393932968378067,
281
+ 0.3919040262699127,
282
+ 0.41778573393821716,
283
+ 0.4128030240535736,
284
+ 0.36697685718536377,
285
+ 0.3254868686199188,
286
+ 0.3693109452724457,
287
+ 0.3870414197444916,
288
+ 0.2671542167663574,
289
+ 0.23766420781612396,
290
+ 0.27502405643463135,
291
+ 0.19808150827884674,
292
+ 0.2262984663248062,
293
+ 0.24667154252529144,
294
+ 0.24236536026000977,
295
+ 0.20162977278232574,
296
+ 0.2073269635438919,
297
+ 0.18350571393966675,
298
+ 0.1846071034669876,
299
+ 0.15442465245723724,
300
+ 0.1817246526479721,
301
+ 0.14412124454975128,
302
+ 0.14306403696537018,
303
+ 0.12268459051847458,
304
+ 0.13687384128570557,
305
+ 0.11778446286916733,
306
+ 0.11796697229146957,
307
+ 0.10396292060613632,
308
+ 0.11473483592271805,
309
+ 0.10343429446220398,
310
+ 0.07652456313371658,
311
+ 0.10590269416570663
312
+ ],
313
+ "zipf_loss": [
314
+ 4.848667144775391,
315
+ 1.176635980606079,
316
+ 0.8572765588760376,
317
+ 0.8449309468269348,
318
+ 0.8436350226402283,
319
+ 0.8362844586372375,
320
+ 0.8368913531303406,
321
+ 0.8396722078323364,
322
+ 0.8352292776107788,
323
+ 0.8417611122131348,
324
+ 0.836111843585968,
325
+ 0.821958601474762,
326
+ 0.8038046956062317,
327
+ 0.7716918587684631,
328
+ 0.7189455032348633,
329
+ 0.6364318132400513,
330
+ 0.5843331813812256,
331
+ 0.5772478580474854,
332
+ 0.51689213514328,
333
+ 0.4733008146286011,
334
+ 0.3826923966407776,
335
+ 0.3909241557121277,
336
+ 0.35327601432800293,
337
+ 0.28512564301490784,
338
+ 0.3355967402458191,
339
+ 0.28586745262145996,
340
+ 0.25004681944847107,
341
+ 0.23564238846302032,
342
+ 0.15939532220363617,
343
+ 0.148452490568161,
344
+ 0.13808311522006989,
345
+ 0.15044189989566803,
346
+ 0.12674278020858765,
347
+ 0.11208108067512512,
348
+ 0.12914372980594635,
349
+ 0.11255372315645218,
350
+ 0.10136730968952179,
351
+ 0.11543937772512436,
352
+ 0.09335644543170929,
353
+ 0.05989854037761688,
354
+ 0.08034739643335342,
355
+ 0.07169462740421295,
356
+ 0.06891988217830658,
357
+ 0.06584595888853073,
358
+ 0.08634673058986664,
359
+ 0.04709663242101669,
360
+ 0.061143141239881516,
361
+ 0.06281687319278717,
362
+ 0.055363979190588,
363
+ 0.037942074239254,
364
+ 0.054749950766563416,
365
+ 0.04353116825222969,
366
+ 0.04725135862827301,
367
+ 0.03579045459628105,
368
+ 0.02509017288684845,
369
+ 0.041295863687992096,
370
+ 0.03785546123981476,
371
+ 0.03940713033080101,
372
+ 0.04327566549181938,
373
+ 0.03385213017463684
374
+ ],
375
+ "denoise_loss": [],
376
+ "ortho_loss": [
377
+ 0.3230772912502289,
378
+ 0.1277298629283905,
379
+ 0.10143587738275528,
380
+ 0.08668191730976105,
381
+ 0.09006865322589874,
382
+ 0.07957521826028824,
383
+ 0.09151509404182434,
384
+ 0.09135851263999939,
385
+ 0.1040191501379013,
386
+ 0.118146151304245,
387
+ 0.12644976377487183,
388
+ 0.13958248496055603,
389
+ 0.15548494458198547,
390
+ 0.16445577144622803,
391
+ 0.17663978040218353,
392
+ 0.17986539006233215,
393
+ 0.1787496656179428,
394
+ 0.18172365427017212,
395
+ 0.184878870844841,
396
+ 0.18665480613708496,
397
+ 0.18443602323532104,
398
+ 0.17972944676876068,
399
+ 0.1847168207168579,
400
+ 0.18398721516132355,
401
+ 0.18395553529262543,
402
+ 0.1859062761068344,
403
+ 0.1844593584537506,
404
+ 0.19130879640579224,
405
+ 0.18957658112049103,
406
+ 0.18175643682479858,
407
+ 0.18003104627132416,
408
+ 0.18403305113315582,
409
+ 0.1801728755235672,
410
+ 0.17507442831993103,
411
+ 0.18054728209972382,
412
+ 0.17856059968471527,
413
+ 0.1795228123664856,
414
+ 0.17617978155612946,
415
+ 0.17519906163215637,
416
+ 0.17287763953208923,
417
+ 0.1746804416179657,
418
+ 0.1751648634672165,
419
+ 0.17409953474998474,
420
+ 0.17315584421157837,
421
+ 0.17171329259872437,
422
+ 0.17051003873348236,
423
+ 0.17108182609081268,
424
+ 0.1729588657617569,
425
+ 0.16980983316898346,
426
+ 0.1726285070180893,
427
+ 0.1682920753955841,
428
+ 0.1723119467496872,
429
+ 0.1724519282579422,
430
+ 0.17354536056518555,
431
+ 0.17300596833229065,
432
+ 0.17255157232284546,
433
+ 0.17369846999645233,
434
+ 0.17459796369075775,
435
+ 0.17422601580619812,
436
+ 0.17440278828144073
437
+ ],
438
+ "lr": [
439
+ 3.9200000000000004e-05,
440
+ 7.92e-05,
441
+ 8e-05,
442
+ 8e-05,
443
+ 8e-05,
444
+ 8e-05,
445
+ 8e-05,
446
+ 8e-05,
447
+ 8e-05,
448
+ 8e-05,
449
+ 8e-05,
450
+ 8e-05,
451
+ 8e-05,
452
+ 8e-05,
453
+ 8e-05,
454
+ 8e-05,
455
+ 8e-05,
456
+ 8e-05,
457
+ 8e-05,
458
+ 8e-05,
459
+ 8e-05,
460
+ 8e-05,
461
+ 8e-05,
462
+ 8e-05,
463
+ 8e-05,
464
+ 8e-05,
465
+ 8e-05,
466
+ 8e-05,
467
+ 8e-05,
468
+ 8e-05,
469
+ 8e-05,
470
+ 8e-05,
471
+ 8e-05,
472
+ 8e-05,
473
+ 8e-05,
474
+ 8e-05,
475
+ 7.946710526315791e-05,
476
+ 7.650657894736843e-05,
477
+ 7.354605263157895e-05,
478
+ 7.017105263157896e-05,
479
+ 6.721052631578948e-05,
480
+ 6.425e-05,
481
+ 6.0875e-05,
482
+ 5.791447368421054e-05,
483
+ 5.495394736842105e-05,
484
+ 5.157894736842105e-05,
485
+ 4.861842105263157e-05,
486
+ 4.565789473684212e-05,
487
+ 4.2282894736842104e-05,
488
+ 3.9322368421052625e-05,
489
+ 3.636184210526315e-05,
490
+ 3.2986842105263165e-05,
491
+ 3.0026315789473686e-05,
492
+ 2.7065789473684206e-05,
493
+ 2.3690789473684223e-05,
494
+ 2.0730263157894743e-05,
495
+ 1.7769736842105264e-05,
496
+ 1.4394736842105275e-05,
497
+ 1.1434210526315796e-05,
498
+ 8.473684210526318e-06
499
+ ],
500
+ "emb_lr": [],
501
+ "eval_step": [
502
+ 150,
503
+ 307,
504
+ 464,
505
+ 621,
506
+ 778,
507
+ 935,
508
+ 1092,
509
+ 1199,
510
+ 1356,
511
+ 1513,
512
+ 1670,
513
+ 1827,
514
+ 1984,
515
+ 2141,
516
+ 2298,
517
+ 2455,
518
+ 2612,
519
+ 2769,
520
+ 2926,
521
+ 3083
522
+ ],
523
+ "eval_accuracy": [
524
+ 0.01,
525
+ 0.0,
526
+ 0.0,
527
+ 0.0,
528
+ 0.0,
529
+ 0.0,
530
+ 0.0,
531
+ 0.0,
532
+ 0.0,
533
+ 0.0,
534
+ 0.0,
535
+ 0.0,
536
+ 0.0,
537
+ 0.0,
538
+ 0.0,
539
+ 0.0,
540
+ 0.0,
541
+ 0.0,
542
+ 0.0,
543
+ 0.0
544
+ ]
545
+ },
546
+ "final_accuracy": 0.96125,
547
+ "sft_eval": {
548
+ "config": {
549
+ "ops": "add_sub",
550
+ "K": null,
551
+ "mode": "sft",
552
+ "n_digits": 6,
553
+ "n_per_split": 100
554
+ },
555
+ "splits": {
556
+ "add_S0": {
557
+ "full_accuracy": 0.97,
558
+ "n_examples": 100,
559
+ "per_subtask": {
560
+ "SA": {
561
+ "accuracy": 0.9950413223140496,
562
+ "count": 605
563
+ },
564
+ "SS": {
565
+ "accuracy": 1.0,
566
+ "count": 95
567
+ }
568
+ }
569
+ },
570
+ "add_S1": {
571
+ "full_accuracy": 1.0,
572
+ "n_examples": 100,
573
+ "per_subtask": {
574
+ "SA": {
575
+ "accuracy": 1.0,
576
+ "count": 204
577
+ },
578
+ "SC": {
579
+ "accuracy": 1.0,
580
+ "count": 169
581
+ },
582
+ "SS": {
583
+ "accuracy": 1.0,
584
+ "count": 31
585
+ },
586
+ "UC": {
587
+ "accuracy": 1.0,
588
+ "count": 296
589
+ }
590
+ }
591
+ },
592
+ "add_S2": {
593
+ "full_accuracy": 0.96,
594
+ "n_examples": 100,
595
+ "per_subtask": {
596
+ "SA": {
597
+ "accuracy": 1.0,
598
+ "count": 163
599
+ },
600
+ "SC": {
601
+ "accuracy": 0.9769230769230769,
602
+ "count": 130
603
+ },
604
+ "SS": {
605
+ "accuracy": 0.9770114942528736,
606
+ "count": 87
607
+ },
608
+ "UC": {
609
+ "accuracy": 1.0,
610
+ "count": 203
611
+ },
612
+ "US": {
613
+ "accuracy": 1.0,
614
+ "count": 117
615
+ }
616
+ }
617
+ },
618
+ "add_S3": {
619
+ "full_accuracy": 0.64,
620
+ "n_examples": 100,
621
+ "per_subtask": {
622
+ "SA": {
623
+ "accuracy": 1.0,
624
+ "count": 121
625
+ },
626
+ "SC": {
627
+ "accuracy": 0.9834710743801653,
628
+ "count": 121
629
+ },
630
+ "SS": {
631
+ "accuracy": 1.0,
632
+ "count": 49
633
+ },
634
+ "UC": {
635
+ "accuracy": 0.8172043010752689,
636
+ "count": 186
637
+ },
638
+ "US": {
639
+ "accuracy": 1.0,
640
+ "count": 223
641
+ }
642
+ }
643
+ },
644
+ "add_S4": {
645
+ "full_accuracy": 0.59,
646
+ "n_examples": 100,
647
+ "per_subtask": {
648
+ "SA": {
649
+ "accuracy": 1.0,
650
+ "count": 104
651
+ },
652
+ "SC": {
653
+ "accuracy": 1.0,
654
+ "count": 106
655
+ },
656
+ "SS": {
657
+ "accuracy": 1.0,
658
+ "count": 23
659
+ },
660
+ "UC": {
661
+ "accuracy": 0.8125,
662
+ "count": 160
663
+ },
664
+ "US": {
665
+ "accuracy": 0.9153094462540716,
666
+ "count": 307
667
+ }
668
+ }
669
+ },
670
+ "add_S5": {
671
+ "full_accuracy": 0.62,
672
+ "n_examples": 100,
673
+ "per_subtask": {
674
+ "SA": {
675
+ "accuracy": 1.0,
676
+ "count": 100
677
+ },
678
+ "SC": {
679
+ "accuracy": 1.0,
680
+ "count": 100
681
+ },
682
+ "UC": {
683
+ "accuracy": 0.71,
684
+ "count": 100
685
+ },
686
+ "US": {
687
+ "accuracy": 0.86,
688
+ "count": 400
689
+ }
690
+ }
691
+ },
692
+ "add_S6": {
693
+ "full_accuracy": 1.0,
694
+ "n_examples": 100,
695
+ "per_subtask": {
696
+ "SC": {
697
+ "accuracy": 1.0,
698
+ "count": 100
699
+ },
700
+ "UC": {
701
+ "accuracy": 1.0,
702
+ "count": 100
703
+ },
704
+ "US": {
705
+ "accuracy": 1.0,
706
+ "count": 500
707
+ }
708
+ }
709
+ },
710
+ "add_random": {
711
+ "full_accuracy": 0.99,
712
+ "n_examples": 200,
713
+ "per_subtask": {
714
+ "SA": {
715
+ "accuracy": 1.0,
716
+ "count": 447
717
+ },
718
+ "SC": {
719
+ "accuracy": 0.996875,
720
+ "count": 320
721
+ },
722
+ "SS": {
723
+ "accuracy": 1.0,
724
+ "count": 56
725
+ },
726
+ "UC": {
727
+ "accuracy": 0.998109640831758,
728
+ "count": 529
729
+ },
730
+ "US": {
731
+ "accuracy": 1.0,
732
+ "count": 48
733
+ }
734
+ }
735
+ },
736
+ "add_C3": {
737
+ "full_accuracy": 0.84,
738
+ "n_examples": 100,
739
+ "per_subtask": {
740
+ "SA": {
741
+ "accuracy": 1.0,
742
+ "count": 300
743
+ },
744
+ "SC": {
745
+ "accuracy": 1.0,
746
+ "count": 100
747
+ },
748
+ "UC": {
749
+ "accuracy": 0.917098445595855,
750
+ "count": 193
751
+ },
752
+ "US": {
753
+ "accuracy": 1.0,
754
+ "count": 107
755
+ }
756
+ }
757
+ },
758
+ "add_C4": {
759
+ "full_accuracy": 0.78,
760
+ "n_examples": 100,
761
+ "per_subtask": {
762
+ "SA": {
763
+ "accuracy": 1.0,
764
+ "count": 200
765
+ },
766
+ "SC": {
767
+ "accuracy": 1.0,
768
+ "count": 100
769
+ },
770
+ "UC": {
771
+ "accuracy": 0.92578125,
772
+ "count": 256
773
+ },
774
+ "US": {
775
+ "accuracy": 0.9583333333333334,
776
+ "count": 144
777
+ }
778
+ }
779
+ },
780
+ "add_C5": {
781
+ "full_accuracy": 0.72,
782
+ "n_examples": 100,
783
+ "per_subtask": {
784
+ "SA": {
785
+ "accuracy": 1.0,
786
+ "count": 100
787
+ },
788
+ "SC": {
789
+ "accuracy": 1.0,
790
+ "count": 100
791
+ },
792
+ "UC": {
793
+ "accuracy": 0.9313725490196079,
794
+ "count": 306
795
+ },
796
+ "US": {
797
+ "accuracy": 0.9536082474226805,
798
+ "count": 194
799
+ }
800
+ }
801
+ },
802
+ "add_C6": {
803
+ "full_accuracy": 0.82,
804
+ "n_examples": 100,
805
+ "per_subtask": {
806
+ "SC": {
807
+ "accuracy": 1.0,
808
+ "count": 100
809
+ },
810
+ "UC": {
811
+ "accuracy": 0.953551912568306,
812
+ "count": 366
813
+ },
814
+ "US": {
815
+ "accuracy": 0.9957264957264957,
816
+ "count": 234
817
+ }
818
+ }
819
+ },
820
+ "sub_M0": {
821
+ "full_accuracy": 0.96,
822
+ "n_examples": 100,
823
+ "per_subtask": {
824
+ "MD": {
825
+ "accuracy": 0.9933444259567388,
826
+ "count": 601
827
+ },
828
+ "ME": {
829
+ "accuracy": 1.0,
830
+ "count": 99
831
+ }
832
+ }
833
+ },
834
+ "sub_M1": {
835
+ "full_accuracy": 1.0,
836
+ "n_examples": 100,
837
+ "per_subtask": {
838
+ "MD": {
839
+ "accuracy": 1.0,
840
+ "count": 279
841
+ },
842
+ "MB": {
843
+ "accuracy": 1.0,
844
+ "count": 145
845
+ },
846
+ "ME": {
847
+ "accuracy": 1.0,
848
+ "count": 24
849
+ },
850
+ "UB": {
851
+ "accuracy": 1.0,
852
+ "count": 252
853
+ }
854
+ }
855
+ },
856
+ "sub_M2": {
857
+ "full_accuracy": 1.0,
858
+ "n_examples": 100,
859
+ "per_subtask": {
860
+ "MD": {
861
+ "accuracy": 1.0,
862
+ "count": 213
863
+ },
864
+ "MB": {
865
+ "accuracy": 1.0,
866
+ "count": 113
867
+ },
868
+ "ME": {
869
+ "accuracy": 1.0,
870
+ "count": 85
871
+ },
872
+ "UB": {
873
+ "accuracy": 1.0,
874
+ "count": 181
875
+ },
876
+ "UD": {
877
+ "accuracy": 1.0,
878
+ "count": 108
879
+ }
880
+ }
881
+ },
882
+ "sub_M3": {
883
+ "full_accuracy": 0.35,
884
+ "n_examples": 100,
885
+ "per_subtask": {
886
+ "MD": {
887
+ "accuracy": 1.0,
888
+ "count": 179
889
+ },
890
+ "MB": {
891
+ "accuracy": 1.0,
892
+ "count": 103
893
+ },
894
+ "ME": {
895
+ "accuracy": 1.0,
896
+ "count": 56
897
+ },
898
+ "UB": {
899
+ "accuracy": 0.5637583892617449,
900
+ "count": 149
901
+ },
902
+ "UD": {
903
+ "accuracy": 1.0,
904
+ "count": 213
905
+ }
906
+ }
907
+ },
908
+ "sub_M4": {
909
+ "full_accuracy": 0.03,
910
+ "n_examples": 100,
911
+ "per_subtask": {
912
+ "MD": {
913
+ "accuracy": 1.0,
914
+ "count": 200
915
+ },
916
+ "MB": {
917
+ "accuracy": 1.0,
918
+ "count": 100
919
+ },
920
+ "UB": {
921
+ "accuracy": 0.49,
922
+ "count": 100
923
+ },
924
+ "UD": {
925
+ "accuracy": 0.6766666666666666,
926
+ "count": 300
927
+ }
928
+ }
929
+ },
930
+ "sub_M5": {
931
+ "full_accuracy": 0.0,
932
+ "n_examples": 100,
933
+ "per_subtask": {
934
+ "MD": {
935
+ "accuracy": 1.0,
936
+ "count": 100
937
+ },
938
+ "MB": {
939
+ "accuracy": 1.0,
940
+ "count": 100
941
+ },
942
+ "UB": {
943
+ "accuracy": 0.38,
944
+ "count": 100
945
+ },
946
+ "UD": {
947
+ "accuracy": 0.4975,
948
+ "count": 400
949
+ }
950
+ }
951
+ },
952
+ "sub_random": {
953
+ "full_accuracy": 0.995,
954
+ "n_examples": 200,
955
+ "per_subtask": {
956
+ "MD": {
957
+ "accuracy": 1.0,
958
+ "count": 600
959
+ },
960
+ "MB": {
961
+ "accuracy": 1.0,
962
+ "count": 267
963
+ },
964
+ "ME": {
965
+ "accuracy": 1.0,
966
+ "count": 53
967
+ },
968
+ "UB": {
969
+ "accuracy": 0.9977220956719818,
970
+ "count": 439
971
+ },
972
+ "UD": {
973
+ "accuracy": 1.0,
974
+ "count": 41
975
+ }
976
+ }
977
+ },
978
+ "sub_B3": {
979
+ "full_accuracy": 0.84,
980
+ "n_examples": 100,
981
+ "per_subtask": {
982
+ "MD": {
983
+ "accuracy": 1.0,
984
+ "count": 300
985
+ },
986
+ "MB": {
987
+ "accuracy": 1.0,
988
+ "count": 100
989
+ },
990
+ "UB": {
991
+ "accuracy": 0.9187817258883249,
992
+ "count": 197
993
+ },
994
+ "UD": {
995
+ "accuracy": 1.0,
996
+ "count": 103
997
+ }
998
+ }
999
+ },
1000
+ "sub_B4": {
1001
+ "full_accuracy": 0.76,
1002
+ "n_examples": 100,
1003
+ "per_subtask": {
1004
+ "MD": {
1005
+ "accuracy": 1.0,
1006
+ "count": 200
1007
+ },
1008
+ "MB": {
1009
+ "accuracy": 1.0,
1010
+ "count": 100
1011
+ },
1012
+ "UB": {
1013
+ "accuracy": 0.9311740890688259,
1014
+ "count": 247
1015
+ },
1016
+ "UD": {
1017
+ "accuracy": 0.9084967320261438,
1018
+ "count": 153
1019
+ }
1020
+ }
1021
+ },
1022
+ "sub_B5": {
1023
+ "full_accuracy": 0.59,
1024
+ "n_examples": 100,
1025
+ "per_subtask": {
1026
+ "MD": {
1027
+ "accuracy": 1.0,
1028
+ "count": 100
1029
+ },
1030
+ "MB": {
1031
+ "accuracy": 1.0,
1032
+ "count": 100
1033
+ },
1034
+ "UB": {
1035
+ "accuracy": 0.8859060402684564,
1036
+ "count": 298
1037
+ },
1038
+ "UD": {
1039
+ "accuracy": 0.8861386138613861,
1040
+ "count": 202
1041
+ }
1042
+ }
1043
+ }
1044
+ },
1045
+ "summary": {
1046
+ "overall_accuracy": 0.7683333333333333,
1047
+ "total_examples": 2400,
1048
+ "n_splits": 22
1049
+ }
1050
+ },
1051
+ "sorl_eval": {
1052
+ "config": {
1053
+ "ops": "add_sub",
1054
+ "K": 1,
1055
+ "mode": "sorl",
1056
+ "n_digits": 6,
1057
+ "n_per_split": 100
1058
+ },
1059
+ "splits": {
1060
+ "add_S0": {
1061
+ "full_accuracy": 1.0,
1062
+ "n_examples": 100,
1063
+ "per_subtask": {
1064
+ "SA": {
1065
+ "accuracy": 1.0,
1066
+ "count": 605
1067
+ },
1068
+ "SS": {
1069
+ "accuracy": 1.0,
1070
+ "count": 95
1071
+ }
1072
+ }
1073
+ },
1074
+ "add_S1": {
1075
+ "full_accuracy": 1.0,
1076
+ "n_examples": 100,
1077
+ "per_subtask": {
1078
+ "SA": {
1079
+ "accuracy": 1.0,
1080
+ "count": 204
1081
+ },
1082
+ "SC": {
1083
+ "accuracy": 1.0,
1084
+ "count": 169
1085
+ },
1086
+ "SS": {
1087
+ "accuracy": 1.0,
1088
+ "count": 31
1089
+ },
1090
+ "UC": {
1091
+ "accuracy": 1.0,
1092
+ "count": 296
1093
+ }
1094
+ }
1095
+ },
1096
+ "add_S2": {
1097
+ "full_accuracy": 1.0,
1098
+ "n_examples": 100,
1099
+ "per_subtask": {
1100
+ "SA": {
1101
+ "accuracy": 1.0,
1102
+ "count": 163
1103
+ },
1104
+ "SC": {
1105
+ "accuracy": 1.0,
1106
+ "count": 130
1107
+ },
1108
+ "SS": {
1109
+ "accuracy": 1.0,
1110
+ "count": 87
1111
+ },
1112
+ "UC": {
1113
+ "accuracy": 1.0,
1114
+ "count": 203
1115
+ },
1116
+ "US": {
1117
+ "accuracy": 1.0,
1118
+ "count": 117
1119
+ }
1120
+ }
1121
+ },
1122
+ "add_S3": {
1123
+ "full_accuracy": 1.0,
1124
+ "n_examples": 100,
1125
+ "per_subtask": {
1126
+ "SA": {
1127
+ "accuracy": 1.0,
1128
+ "count": 121
1129
+ },
1130
+ "SC": {
1131
+ "accuracy": 1.0,
1132
+ "count": 121
1133
+ },
1134
+ "SS": {
1135
+ "accuracy": 1.0,
1136
+ "count": 49
1137
+ },
1138
+ "UC": {
1139
+ "accuracy": 1.0,
1140
+ "count": 186
1141
+ },
1142
+ "US": {
1143
+ "accuracy": 1.0,
1144
+ "count": 223
1145
+ }
1146
+ }
1147
+ },
1148
+ "add_S4": {
1149
+ "full_accuracy": 1.0,
1150
+ "n_examples": 100,
1151
+ "per_subtask": {
1152
+ "SA": {
1153
+ "accuracy": 1.0,
1154
+ "count": 104
1155
+ },
1156
+ "SC": {
1157
+ "accuracy": 1.0,
1158
+ "count": 106
1159
+ },
1160
+ "SS": {
1161
+ "accuracy": 1.0,
1162
+ "count": 23
1163
+ },
1164
+ "UC": {
1165
+ "accuracy": 1.0,
1166
+ "count": 160
1167
+ },
1168
+ "US": {
1169
+ "accuracy": 1.0,
1170
+ "count": 307
1171
+ }
1172
+ }
1173
+ },
1174
+ "add_S5": {
1175
+ "full_accuracy": 0.99,
1176
+ "n_examples": 100,
1177
+ "per_subtask": {
1178
+ "SA": {
1179
+ "accuracy": 1.0,
1180
+ "count": 100
1181
+ },
1182
+ "SC": {
1183
+ "accuracy": 1.0,
1184
+ "count": 100
1185
+ },
1186
+ "UC": {
1187
+ "accuracy": 0.99,
1188
+ "count": 100
1189
+ },
1190
+ "US": {
1191
+ "accuracy": 1.0,
1192
+ "count": 400
1193
+ }
1194
+ }
1195
+ },
1196
+ "add_S6": {
1197
+ "full_accuracy": 1.0,
1198
+ "n_examples": 100,
1199
+ "per_subtask": {
1200
+ "SC": {
1201
+ "accuracy": 1.0,
1202
+ "count": 100
1203
+ },
1204
+ "UC": {
1205
+ "accuracy": 1.0,
1206
+ "count": 100
1207
+ },
1208
+ "US": {
1209
+ "accuracy": 1.0,
1210
+ "count": 500
1211
+ }
1212
+ }
1213
+ },
1214
+ "add_random": {
1215
+ "full_accuracy": 1.0,
1216
+ "n_examples": 200,
1217
+ "per_subtask": {
1218
+ "SA": {
1219
+ "accuracy": 1.0,
1220
+ "count": 447
1221
+ },
1222
+ "SC": {
1223
+ "accuracy": 1.0,
1224
+ "count": 320
1225
+ },
1226
+ "SS": {
1227
+ "accuracy": 1.0,
1228
+ "count": 56
1229
+ },
1230
+ "UC": {
1231
+ "accuracy": 1.0,
1232
+ "count": 529
1233
+ },
1234
+ "US": {
1235
+ "accuracy": 1.0,
1236
+ "count": 48
1237
+ }
1238
+ }
1239
+ },
1240
+ "add_C3": {
1241
+ "full_accuracy": 1.0,
1242
+ "n_examples": 100,
1243
+ "per_subtask": {
1244
+ "SA": {
1245
+ "accuracy": 1.0,
1246
+ "count": 300
1247
+ },
1248
+ "SC": {
1249
+ "accuracy": 1.0,
1250
+ "count": 100
1251
+ },
1252
+ "UC": {
1253
+ "accuracy": 1.0,
1254
+ "count": 193
1255
+ },
1256
+ "US": {
1257
+ "accuracy": 1.0,
1258
+ "count": 107
1259
+ }
1260
+ }
1261
+ },
1262
+ "add_C4": {
1263
+ "full_accuracy": 1.0,
1264
+ "n_examples": 100,
1265
+ "per_subtask": {
1266
+ "SA": {
1267
+ "accuracy": 1.0,
1268
+ "count": 200
1269
+ },
1270
+ "SC": {
1271
+ "accuracy": 1.0,
1272
+ "count": 100
1273
+ },
1274
+ "UC": {
1275
+ "accuracy": 1.0,
1276
+ "count": 256
1277
+ },
1278
+ "US": {
1279
+ "accuracy": 1.0,
1280
+ "count": 144
1281
+ }
1282
+ }
1283
+ },
1284
+ "add_C5": {
1285
+ "full_accuracy": 1.0,
1286
+ "n_examples": 100,
1287
+ "per_subtask": {
1288
+ "SA": {
1289
+ "accuracy": 1.0,
1290
+ "count": 100
1291
+ },
1292
+ "SC": {
1293
+ "accuracy": 1.0,
1294
+ "count": 100
1295
+ },
1296
+ "UC": {
1297
+ "accuracy": 1.0,
1298
+ "count": 306
1299
+ },
1300
+ "US": {
1301
+ "accuracy": 1.0,
1302
+ "count": 194
1303
+ }
1304
+ }
1305
+ },
1306
+ "add_C6": {
1307
+ "full_accuracy": 1.0,
1308
+ "n_examples": 100,
1309
+ "per_subtask": {
1310
+ "SC": {
1311
+ "accuracy": 1.0,
1312
+ "count": 100
1313
+ },
1314
+ "UC": {
1315
+ "accuracy": 1.0,
1316
+ "count": 366
1317
+ },
1318
+ "US": {
1319
+ "accuracy": 1.0,
1320
+ "count": 234
1321
+ }
1322
+ }
1323
+ },
1324
+ "sub_M0": {
1325
+ "full_accuracy": 1.0,
1326
+ "n_examples": 100,
1327
+ "per_subtask": {
1328
+ "MD": {
1329
+ "accuracy": 1.0,
1330
+ "count": 601
1331
+ },
1332
+ "ME": {
1333
+ "accuracy": 1.0,
1334
+ "count": 99
1335
+ }
1336
+ }
1337
+ },
1338
+ "sub_M1": {
1339
+ "full_accuracy": 1.0,
1340
+ "n_examples": 100,
1341
+ "per_subtask": {
1342
+ "MD": {
1343
+ "accuracy": 1.0,
1344
+ "count": 279
1345
+ },
1346
+ "MB": {
1347
+ "accuracy": 1.0,
1348
+ "count": 145
1349
+ },
1350
+ "ME": {
1351
+ "accuracy": 1.0,
1352
+ "count": 24
1353
+ },
1354
+ "UB": {
1355
+ "accuracy": 1.0,
1356
+ "count": 252
1357
+ }
1358
+ }
1359
+ },
1360
+ "sub_M2": {
1361
+ "full_accuracy": 1.0,
1362
+ "n_examples": 100,
1363
+ "per_subtask": {
1364
+ "MD": {
1365
+ "accuracy": 1.0,
1366
+ "count": 213
1367
+ },
1368
+ "MB": {
1369
+ "accuracy": 1.0,
1370
+ "count": 113
1371
+ },
1372
+ "ME": {
1373
+ "accuracy": 1.0,
1374
+ "count": 85
1375
+ },
1376
+ "UB": {
1377
+ "accuracy": 1.0,
1378
+ "count": 181
1379
+ },
1380
+ "UD": {
1381
+ "accuracy": 1.0,
1382
+ "count": 108
1383
+ }
1384
+ }
1385
+ },
1386
+ "sub_M3": {
1387
+ "full_accuracy": 1.0,
1388
+ "n_examples": 100,
1389
+ "per_subtask": {
1390
+ "MD": {
1391
+ "accuracy": 1.0,
1392
+ "count": 179
1393
+ },
1394
+ "MB": {
1395
+ "accuracy": 1.0,
1396
+ "count": 103
1397
+ },
1398
+ "ME": {
1399
+ "accuracy": 1.0,
1400
+ "count": 56
1401
+ },
1402
+ "UB": {
1403
+ "accuracy": 1.0,
1404
+ "count": 149
1405
+ },
1406
+ "UD": {
1407
+ "accuracy": 1.0,
1408
+ "count": 213
1409
+ }
1410
+ }
1411
+ },
1412
+ "sub_M4": {
1413
+ "full_accuracy": 1.0,
1414
+ "n_examples": 100,
1415
+ "per_subtask": {
1416
+ "MD": {
1417
+ "accuracy": 1.0,
1418
+ "count": 200
1419
+ },
1420
+ "MB": {
1421
+ "accuracy": 1.0,
1422
+ "count": 100
1423
+ },
1424
+ "UB": {
1425
+ "accuracy": 1.0,
1426
+ "count": 100
1427
+ },
1428
+ "UD": {
1429
+ "accuracy": 1.0,
1430
+ "count": 300
1431
+ }
1432
+ }
1433
+ },
1434
+ "sub_M5": {
1435
+ "full_accuracy": 0.14,
1436
+ "n_examples": 100,
1437
+ "per_subtask": {
1438
+ "MD": {
1439
+ "accuracy": 1.0,
1440
+ "count": 100
1441
+ },
1442
+ "MB": {
1443
+ "accuracy": 1.0,
1444
+ "count": 100
1445
+ },
1446
+ "UB": {
1447
+ "accuracy": 0.14,
1448
+ "count": 100
1449
+ },
1450
+ "UD": {
1451
+ "accuracy": 1.0,
1452
+ "count": 400
1453
+ }
1454
+ }
1455
+ },
1456
+ "sub_random": {
1457
+ "full_accuracy": 1.0,
1458
+ "n_examples": 200,
1459
+ "per_subtask": {
1460
+ "MD": {
1461
+ "accuracy": 1.0,
1462
+ "count": 600
1463
+ },
1464
+ "MB": {
1465
+ "accuracy": 1.0,
1466
+ "count": 267
1467
+ },
1468
+ "ME": {
1469
+ "accuracy": 1.0,
1470
+ "count": 53
1471
+ },
1472
+ "UB": {
1473
+ "accuracy": 1.0,
1474
+ "count": 439
1475
+ },
1476
+ "UD": {
1477
+ "accuracy": 1.0,
1478
+ "count": 41
1479
+ }
1480
+ }
1481
+ },
1482
+ "sub_B3": {
1483
+ "full_accuracy": 1.0,
1484
+ "n_examples": 100,
1485
+ "per_subtask": {
1486
+ "MD": {
1487
+ "accuracy": 1.0,
1488
+ "count": 300
1489
+ },
1490
+ "MB": {
1491
+ "accuracy": 1.0,
1492
+ "count": 100
1493
+ },
1494
+ "UB": {
1495
+ "accuracy": 1.0,
1496
+ "count": 197
1497
+ },
1498
+ "UD": {
1499
+ "accuracy": 1.0,
1500
+ "count": 103
1501
+ }
1502
+ }
1503
+ },
1504
+ "sub_B4": {
1505
+ "full_accuracy": 1.0,
1506
+ "n_examples": 100,
1507
+ "per_subtask": {
1508
+ "MD": {
1509
+ "accuracy": 1.0,
1510
+ "count": 200
1511
+ },
1512
+ "MB": {
1513
+ "accuracy": 1.0,
1514
+ "count": 100
1515
+ },
1516
+ "UB": {
1517
+ "accuracy": 1.0,
1518
+ "count": 247
1519
+ },
1520
+ "UD": {
1521
+ "accuracy": 1.0,
1522
+ "count": 153
1523
+ }
1524
+ }
1525
+ },
1526
+ "sub_B5": {
1527
+ "full_accuracy": 0.94,
1528
+ "n_examples": 100,
1529
+ "per_subtask": {
1530
+ "MD": {
1531
+ "accuracy": 1.0,
1532
+ "count": 100
1533
+ },
1534
+ "MB": {
1535
+ "accuracy": 1.0,
1536
+ "count": 100
1537
+ },
1538
+ "UB": {
1539
+ "accuracy": 0.9798657718120806,
1540
+ "count": 298
1541
+ },
1542
+ "UD": {
1543
+ "accuracy": 1.0,
1544
+ "count": 202
1545
+ }
1546
+ }
1547
+ }
1548
+ },
1549
+ "summary": {
1550
+ "overall_accuracy": 0.96125,
1551
+ "total_examples": 2400,
1552
+ "n_splits": 22
1553
+ }
1554
+ },
1555
+ "sorl_overall_accuracy": 0.96125,
1556
+ "sft_overall_accuracy": 0.7683333333333333
1557
+ }
add_sub_sorl_v1_abs30_K1_10K/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d36997c56cc0957d831d187e34abff36823520d50c5fbaaa3cdca415484c9628
3
+ size 650385300
add_sub_sorl_v1_abs30_K1_10K/train_config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_rollouts": 4,
3
+ "K": 1,
4
+ "max_iterations": 2,
5
+ "memory_span_abs": 1792,
6
+ "memory_span_traj": 1792,
7
+ "temperature": 1.0,
8
+ "ar_search": false,
9
+ "response_only_abs": false,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "alpha_ortho": 0.0,
14
+ "alpha_anchor": 0.0,
15
+ "alpha_jacobi": 0.0,
16
+ "decay": 0.8,
17
+ "target_vocab_util": 0.8,
18
+ "min_abs_ppl": 0.0,
19
+ "zipf_alpha": 1.0,
20
+ "lr": 8e-05,
21
+ "emb_lr_mult": 1.0,
22
+ "weight_decay": 0.01,
23
+ "warmup_steps": 100,
24
+ "cooldown_frac": 0.4,
25
+ "max_grad_norm": 1.0,
26
+ "vq_abs_pretrain_steps": 0,
27
+ "vq_abs_pretrain_lr": 0.001,
28
+ "vq_abs_pretrain_layer": -1,
29
+ "vq_abs_pretrain_batch_size": 256,
30
+ "vq_abs_pretrain_target_vectors": 20000,
31
+ "batch_size": 64,
32
+ "gradient_accumulation_steps": 1,
33
+ "num_epochs": 20,
34
+ "emb_warmup_steps": 0,
35
+ "log_every": 50,
36
+ "eval_every": 156,
37
+ "save_every": 999999,
38
+ "eval_samples": 100,
39
+ "output_dir": "ckpt/sweep/as_sorl_abs30_K1_10K",
40
+ "eval_K": 4,
41
+ "alpha_traj": 0.0,
42
+ "corrupt_method": "shuffle",
43
+ "corrupt_ratio": 0.3,
44
+ "alpha_contrastive": 1.0,
45
+ "gamma_contrastive": 0.5,
46
+ "alpha_masked_traj": 0.0,
47
+ "mask_nl_ratio": 0.3,
48
+ "mask_nl_mode": "fixed",
49
+ "mask_nl_fixed_id": 0,
50
+ "use_ste": true,
51
+ "n_inner": 1,
52
+ "random_K": null,
53
+ "strip_suffix": null,
54
+ "compress_prefix": null,
55
+ "random_mem_span": null,
56
+ "warmup_ratio": 0.03,
57
+ "beta2": 0.999,
58
+ "seed": 42,
59
+ "n_digits": 6,
60
+ "n_layer": 2,
61
+ "n_head": 3,
62
+ "n_embd": 510,
63
+ "ops": "add_sub",
64
+ "abs_vocab": 30,
65
+ "dataset_size": 10000,
66
+ "mode": "sorl",
67
+ "device": "cuda",
68
+ "push_to_hub": true,
69
+ "no_wandb": false,
70
+ "n_params": 162519662,
71
+ "run_name": "add_sub_sorl_v1_abs30_K1_10K",
72
+ "git_commit": "57deaa28d9c21e39ddac5ef448d6e1be992fba91",
73
+ "timestamp": "2026-04-13T09:57:42.064112+00:00",
74
+ "tokenizer": "Qwen/Qwen3-0.6B",
75
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
+ "dataset_config": "add_sub_6digit",
77
+ "model_repo": "thoughtworks/arithmetic-sorl",
78
+ "trainer_version": "v1",
79
+ "wandb_run_id": "6mbrz6zi",
80
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/6mbrz6zi",
81
+ "final_accuracy": 0.96125,
82
+ "sft_accuracy": 0.7683333333333333,
83
+ "eval_method": "ArithmeticEvaluator"
84
+ }