amirali1985 commited on
Commit
0fba29e
·
verified ·
1 Parent(s): fb6692e

Upload add_sub_sorl_v1_abs10_K1_10K

Browse files
add_sub_sorl_v1_abs10_K1_10K/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 128,
20
+ "max_window_layers": 28,
21
+ "model_type": "qwen3",
22
+ "num_attention_heads": 3,
23
+ "num_hidden_layers": 2,
24
+ "num_key_value_heads": 3,
25
+ "pad_token_id": null,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_parameters": {
28
+ "rope_theta": 10000.0,
29
+ "rope_type": "default"
30
+ },
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "transformers_version": "5.5.0",
34
+ "use_cache": true,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151654
37
+ }
add_sub_sorl_v1_abs10_K1_10K/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs10_K1_10K/metrics.json ADDED
@@ -0,0 +1,1297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 207,
8
+ 257,
9
+ 307,
10
+ 364,
11
+ 414,
12
+ 464,
13
+ 521,
14
+ 571,
15
+ 621,
16
+ 678,
17
+ 728,
18
+ 778,
19
+ 835,
20
+ 885,
21
+ 935,
22
+ 992,
23
+ 1042,
24
+ 1092,
25
+ 1149,
26
+ 1199,
27
+ 1249,
28
+ 1306,
29
+ 1356,
30
+ 1406,
31
+ 1463,
32
+ 1513,
33
+ 1563
34
+ ],
35
+ "loss": [
36
+ 4.547272682189941,
37
+ 7.030660629272461,
38
+ 3.9523563385009766,
39
+ 3.047678232192993,
40
+ 3.085705280303955,
41
+ 2.618412494659424,
42
+ 2.579326629638672,
43
+ 1.4848873615264893,
44
+ -0.47891831398010254,
45
+ -5.560091495513916,
46
+ -4.520766735076904,
47
+ -2.9085822105407715,
48
+ -2.024510383605957,
49
+ -1.1992690563201904,
50
+ -0.9796783924102783,
51
+ -0.5373831987380981,
52
+ -0.5736587047576904,
53
+ -0.7218008041381836,
54
+ -0.23317885398864746,
55
+ -0.339227557182312,
56
+ -0.03128504753112793,
57
+ -0.006059408187866211,
58
+ 0.16202318668365479,
59
+ 0.23520565032958984,
60
+ 0.015602707862854004,
61
+ 0.4141479730606079,
62
+ 0.1808193325996399,
63
+ 0.15000015497207642,
64
+ 0.025604248046875,
65
+ 0.30849093198776245
66
+ ],
67
+ "base_loss": [
68
+ 8.916278839111328,
69
+ 6.119595050811768,
70
+ 3.673598527908325,
71
+ 2.020040273666382,
72
+ 1.944067120552063,
73
+ 1.8892498016357422,
74
+ 1.86012864112854,
75
+ 1.8146944046020508,
76
+ 1.869857907295227,
77
+ 1.7928285598754883,
78
+ 1.4957207441329956,
79
+ 1.1758774518966675,
80
+ 0.93010014295578,
81
+ 0.6884778141975403,
82
+ 0.6303120255470276,
83
+ 0.5103054046630859,
84
+ 0.4800644516944885,
85
+ 0.431017130613327,
86
+ 0.3630550801753998,
87
+ 0.3632798194885254,
88
+ 0.32823804020881653,
89
+ 0.2981429398059845,
90
+ 0.24132393300533295,
91
+ 0.23764917254447937,
92
+ 0.2843627333641052,
93
+ 0.19818377494812012,
94
+ 0.2125009298324585,
95
+ 0.18415263295173645,
96
+ 0.19887767732143402,
97
+ 0.1734028160572052
98
+ ],
99
+ "info_loss": [
100
+ -1.2568669319152832,
101
+ -0.31183719635009766,
102
+ -0.1900501251220703,
103
+ -0.09034490585327148,
104
+ -0.07560265064239502,
105
+ -0.11611974239349365,
106
+ -0.11614418029785156,
107
+ -0.2208923101425171,
108
+ -0.42284250259399414,
109
+ -0.9240456819534302,
110
+ -0.7907073497772217,
111
+ -0.5979017615318298,
112
+ -0.4831601679325104,
113
+ -0.3740352690219879,
114
+ -0.34062066674232483,
115
+ -0.2785865366458893,
116
+ -0.27203837037086487,
117
+ -0.27023085951805115,
118
+ -0.20767968893051147,
119
+ -0.21481892466545105,
120
+ -0.16902248561382294,
121
+ -0.1613115668296814,
122
+ -0.1327231526374817,
123
+ -0.12193454056978226,
124
+ -0.14406734704971313,
125
+ -0.09546159207820892,
126
+ -0.1119590476155281,
127
+ -0.11057599633932114,
128
+ -0.12191784381866455,
129
+ -0.09515686333179474
130
+ ],
131
+ "abs_loss": [
132
+ 2.28905987739563,
133
+ 2.068211555480957,
134
+ 1.8956918716430664,
135
+ 1.865671157836914,
136
+ 1.8401455879211426,
137
+ 1.8589462041854858,
138
+ 1.8197526931762695,
139
+ 1.8445491790771484,
140
+ 1.8481858968734741,
141
+ 1.8446826934814453,
142
+ 1.8715795278549194,
143
+ 1.8394945859909058,
144
+ 1.7485918998718262,
145
+ 1.6203765869140625,
146
+ 1.4851690530776978,
147
+ 1.3853811025619507,
148
+ 1.1656250953674316,
149
+ 1.0630995035171509,
150
+ 0.8788065910339355,
151
+ 0.8097158074378967,
152
+ 0.7307209968566895,
153
+ 0.6055489182472229,
154
+ 0.6438609957695007,
155
+ 0.5721376538276672,
156
+ 0.5374541878700256,
157
+ 0.5297055840492249,
158
+ 0.45829495787620544,
159
+ 0.428969144821167,
160
+ 0.47001174092292786,
161
+ 0.4526701867580414
162
+ ],
163
+ "zipf_loss": [
164
+ 7.970757007598877,
165
+ 3.8226165771484375,
166
+ 1.9896899461746216,
167
+ 1.7445199489593506,
168
+ 1.713650107383728,
169
+ 1.704465627670288,
170
+ 1.6986644268035889,
171
+ 1.6946611404418945,
172
+ 1.6948304176330566,
173
+ 1.7030682563781738,
174
+ 1.7034276723861694,
175
+ 1.7106086015701294,
176
+ 1.702131748199463,
177
+ 1.690568208694458,
178
+ 1.6476993560791016,
179
+ 1.59963858127594,
180
+ 1.550097942352295,
181
+ 1.443180799484253,
182
+ 1.392682433128357,
183
+ 1.3647103309631348,
184
+ 1.2576297521591187,
185
+ 1.2483584880828857,
186
+ 1.183544635772705,
187
+ 1.159688115119934,
188
+ 1.1181681156158447,
189
+ 1.1176096200942993,
190
+ 1.0420794486999512,
191
+ 1.0287106037139893,
192
+ 0.9989038109779358,
193
+ 1.0413897037506104
194
+ ],
195
+ "denoise_loss": [],
196
+ "ortho_loss": [
197
+ 0.5579876899719238,
198
+ 0.252128541469574,
199
+ 0.21211421489715576,
200
+ 0.18933552503585815,
201
+ 0.12901675701141357,
202
+ 0.13211114704608917,
203
+ 0.14735311269760132,
204
+ 0.14610400795936584,
205
+ 0.1557065099477768,
206
+ 0.17582964897155762,
207
+ 0.1939730942249298,
208
+ 0.20376113057136536,
209
+ 0.21460682153701782,
210
+ 0.23509620130062103,
211
+ 0.24628381431102753,
212
+ 0.2590982913970947,
213
+ 0.25852254033088684,
214
+ 0.2667267620563507,
215
+ 0.26971182227134705,
216
+ 0.2820512354373932,
217
+ 0.2821248769760132,
218
+ 0.28232815861701965,
219
+ 0.2819784879684448,
220
+ 0.2866751551628113,
221
+ 0.2907232642173767,
222
+ 0.2921256422996521,
223
+ 0.2953319251537323,
224
+ 0.29460301995277405,
225
+ 0.295316606760025,
226
+ 0.2968752384185791
227
+ ],
228
+ "lr": [
229
+ 1.9600000000000002e-05,
230
+ 3.96e-05,
231
+ 4e-05,
232
+ 4e-05,
233
+ 4e-05,
234
+ 4e-05,
235
+ 4e-05,
236
+ 4e-05,
237
+ 4e-05,
238
+ 4e-05,
239
+ 4e-05,
240
+ 4e-05,
241
+ 4e-05,
242
+ 4e-05,
243
+ 4e-05,
244
+ 4e-05,
245
+ 4e-05,
246
+ 4e-05,
247
+ 3.944897959183673e-05,
248
+ 3.638775510204082e-05,
249
+ 3.3326530612244897e-05,
250
+ 2.983673469387755e-05,
251
+ 2.6775510204081637e-05,
252
+ 2.3714285714285713e-05,
253
+ 2.022448979591837e-05,
254
+ 1.7163265306122454e-05,
255
+ 1.4102040816326535e-05,
256
+ 1.0612244897959182e-05,
257
+ 7.551020408163262e-06,
258
+ 4.48979591836735e-06
259
+ ],
260
+ "emb_lr": [],
261
+ "eval_step": [
262
+ 150,
263
+ 307,
264
+ 464,
265
+ 621,
266
+ 778,
267
+ 935,
268
+ 1092,
269
+ 1199,
270
+ 1356,
271
+ 1513
272
+ ],
273
+ "eval_accuracy": [
274
+ 0.01,
275
+ 0.0,
276
+ 0.0,
277
+ 0.0,
278
+ 0.0,
279
+ 0.0,
280
+ 0.0,
281
+ 0.0,
282
+ 0.0,
283
+ 0.0
284
+ ]
285
+ },
286
+ "final_accuracy": 0.6116666666666667,
287
+ "sft_eval": {
288
+ "config": {
289
+ "ops": "add_sub",
290
+ "K": null,
291
+ "mode": "sft",
292
+ "n_digits": 6,
293
+ "n_per_split": 100
294
+ },
295
+ "splits": {
296
+ "add_S0": {
297
+ "full_accuracy": 0.89,
298
+ "n_examples": 100,
299
+ "per_subtask": {
300
+ "SA": {
301
+ "accuracy": 0.9834710743801653,
302
+ "count": 605
303
+ },
304
+ "SS": {
305
+ "accuracy": 0.9473684210526315,
306
+ "count": 95
307
+ }
308
+ }
309
+ },
310
+ "add_S1": {
311
+ "full_accuracy": 0.74,
312
+ "n_examples": 100,
313
+ "per_subtask": {
314
+ "SA": {
315
+ "accuracy": 0.9803921568627451,
316
+ "count": 204
317
+ },
318
+ "SC": {
319
+ "accuracy": 0.9467455621301775,
320
+ "count": 169
321
+ },
322
+ "SS": {
323
+ "accuracy": 1.0,
324
+ "count": 31
325
+ },
326
+ "UC": {
327
+ "accuracy": 0.9391891891891891,
328
+ "count": 296
329
+ }
330
+ }
331
+ },
332
+ "add_S2": {
333
+ "full_accuracy": 0.5,
334
+ "n_examples": 100,
335
+ "per_subtask": {
336
+ "SA": {
337
+ "accuracy": 0.9631901840490797,
338
+ "count": 163
339
+ },
340
+ "SC": {
341
+ "accuracy": 0.9230769230769231,
342
+ "count": 130
343
+ },
344
+ "SS": {
345
+ "accuracy": 0.8505747126436781,
346
+ "count": 87
347
+ },
348
+ "UC": {
349
+ "accuracy": 0.8325123152709359,
350
+ "count": 203
351
+ },
352
+ "US": {
353
+ "accuracy": 0.9743589743589743,
354
+ "count": 117
355
+ }
356
+ }
357
+ },
358
+ "add_S3": {
359
+ "full_accuracy": 0.29,
360
+ "n_examples": 100,
361
+ "per_subtask": {
362
+ "SA": {
363
+ "accuracy": 0.9917355371900827,
364
+ "count": 121
365
+ },
366
+ "SC": {
367
+ "accuracy": 0.9421487603305785,
368
+ "count": 121
369
+ },
370
+ "SS": {
371
+ "accuracy": 0.8979591836734694,
372
+ "count": 49
373
+ },
374
+ "UC": {
375
+ "accuracy": 0.7204301075268817,
376
+ "count": 186
377
+ },
378
+ "US": {
379
+ "accuracy": 0.8340807174887892,
380
+ "count": 223
381
+ }
382
+ }
383
+ },
384
+ "add_S4": {
385
+ "full_accuracy": 0.19,
386
+ "n_examples": 100,
387
+ "per_subtask": {
388
+ "SA": {
389
+ "accuracy": 0.9711538461538461,
390
+ "count": 104
391
+ },
392
+ "SC": {
393
+ "accuracy": 0.9622641509433962,
394
+ "count": 106
395
+ },
396
+ "SS": {
397
+ "accuracy": 0.9565217391304348,
398
+ "count": 23
399
+ },
400
+ "UC": {
401
+ "accuracy": 0.59375,
402
+ "count": 160
403
+ },
404
+ "US": {
405
+ "accuracy": 0.6156351791530945,
406
+ "count": 307
407
+ }
408
+ }
409
+ },
410
+ "add_S5": {
411
+ "full_accuracy": 0.19,
412
+ "n_examples": 100,
413
+ "per_subtask": {
414
+ "SA": {
415
+ "accuracy": 1.0,
416
+ "count": 100
417
+ },
418
+ "SC": {
419
+ "accuracy": 0.96,
420
+ "count": 100
421
+ },
422
+ "UC": {
423
+ "accuracy": 0.31,
424
+ "count": 100
425
+ },
426
+ "US": {
427
+ "accuracy": 0.4775,
428
+ "count": 400
429
+ }
430
+ }
431
+ },
432
+ "add_S6": {
433
+ "full_accuracy": 0.53,
434
+ "n_examples": 100,
435
+ "per_subtask": {
436
+ "SC": {
437
+ "accuracy": 0.99,
438
+ "count": 100
439
+ },
440
+ "UC": {
441
+ "accuracy": 0.65,
442
+ "count": 100
443
+ },
444
+ "US": {
445
+ "accuracy": 0.672,
446
+ "count": 500
447
+ }
448
+ }
449
+ },
450
+ "add_random": {
451
+ "full_accuracy": 0.73,
452
+ "n_examples": 200,
453
+ "per_subtask": {
454
+ "SA": {
455
+ "accuracy": 0.9776286353467561,
456
+ "count": 447
457
+ },
458
+ "SC": {
459
+ "accuracy": 0.975,
460
+ "count": 320
461
+ },
462
+ "SS": {
463
+ "accuracy": 0.9821428571428571,
464
+ "count": 56
465
+ },
466
+ "UC": {
467
+ "accuracy": 0.9338374291115312,
468
+ "count": 529
469
+ },
470
+ "US": {
471
+ "accuracy": 0.9583333333333334,
472
+ "count": 48
473
+ }
474
+ }
475
+ },
476
+ "add_C3": {
477
+ "full_accuracy": 0.42,
478
+ "n_examples": 100,
479
+ "per_subtask": {
480
+ "SA": {
481
+ "accuracy": 1.0,
482
+ "count": 300
483
+ },
484
+ "SC": {
485
+ "accuracy": 0.99,
486
+ "count": 100
487
+ },
488
+ "UC": {
489
+ "accuracy": 0.7253886010362695,
490
+ "count": 193
491
+ },
492
+ "US": {
493
+ "accuracy": 0.8504672897196262,
494
+ "count": 107
495
+ }
496
+ }
497
+ },
498
+ "add_C4": {
499
+ "full_accuracy": 0.43,
500
+ "n_examples": 100,
501
+ "per_subtask": {
502
+ "SA": {
503
+ "accuracy": 1.0,
504
+ "count": 200
505
+ },
506
+ "SC": {
507
+ "accuracy": 0.96,
508
+ "count": 100
509
+ },
510
+ "UC": {
511
+ "accuracy": 0.76953125,
512
+ "count": 256
513
+ },
514
+ "US": {
515
+ "accuracy": 0.8055555555555556,
516
+ "count": 144
517
+ }
518
+ }
519
+ },
520
+ "add_C5": {
521
+ "full_accuracy": 0.31,
522
+ "n_examples": 100,
523
+ "per_subtask": {
524
+ "SA": {
525
+ "accuracy": 1.0,
526
+ "count": 100
527
+ },
528
+ "SC": {
529
+ "accuracy": 0.98,
530
+ "count": 100
531
+ },
532
+ "UC": {
533
+ "accuracy": 0.7745098039215687,
534
+ "count": 306
535
+ },
536
+ "US": {
537
+ "accuracy": 0.7783505154639175,
538
+ "count": 194
539
+ }
540
+ }
541
+ },
542
+ "add_C6": {
543
+ "full_accuracy": 0.44,
544
+ "n_examples": 100,
545
+ "per_subtask": {
546
+ "SC": {
547
+ "accuracy": 1.0,
548
+ "count": 100
549
+ },
550
+ "UC": {
551
+ "accuracy": 0.8306010928961749,
552
+ "count": 366
553
+ },
554
+ "US": {
555
+ "accuracy": 0.8931623931623932,
556
+ "count": 234
557
+ }
558
+ }
559
+ },
560
+ "sub_M0": {
561
+ "full_accuracy": 0.87,
562
+ "n_examples": 100,
563
+ "per_subtask": {
564
+ "MD": {
565
+ "accuracy": 0.9750415973377704,
566
+ "count": 601
567
+ },
568
+ "ME": {
569
+ "accuracy": 1.0,
570
+ "count": 99
571
+ }
572
+ }
573
+ },
574
+ "sub_M1": {
575
+ "full_accuracy": 0.82,
576
+ "n_examples": 100,
577
+ "per_subtask": {
578
+ "MD": {
579
+ "accuracy": 0.989247311827957,
580
+ "count": 279
581
+ },
582
+ "MB": {
583
+ "accuracy": 0.9586206896551724,
584
+ "count": 145
585
+ },
586
+ "ME": {
587
+ "accuracy": 1.0,
588
+ "count": 24
589
+ },
590
+ "UB": {
591
+ "accuracy": 0.9563492063492064,
592
+ "count": 252
593
+ }
594
+ }
595
+ },
596
+ "sub_M2": {
597
+ "full_accuracy": 0.47,
598
+ "n_examples": 100,
599
+ "per_subtask": {
600
+ "MD": {
601
+ "accuracy": 0.9953051643192489,
602
+ "count": 213
603
+ },
604
+ "MB": {
605
+ "accuracy": 0.9380530973451328,
606
+ "count": 113
607
+ },
608
+ "ME": {
609
+ "accuracy": 1.0,
610
+ "count": 85
611
+ },
612
+ "UB": {
613
+ "accuracy": 0.7237569060773481,
614
+ "count": 181
615
+ },
616
+ "UD": {
617
+ "accuracy": 0.9444444444444444,
618
+ "count": 108
619
+ }
620
+ }
621
+ },
622
+ "sub_M3": {
623
+ "full_accuracy": 0.12,
624
+ "n_examples": 100,
625
+ "per_subtask": {
626
+ "MD": {
627
+ "accuracy": 0.994413407821229,
628
+ "count": 179
629
+ },
630
+ "MB": {
631
+ "accuracy": 0.9805825242718447,
632
+ "count": 103
633
+ },
634
+ "ME": {
635
+ "accuracy": 1.0,
636
+ "count": 56
637
+ },
638
+ "UB": {
639
+ "accuracy": 0.5302013422818792,
640
+ "count": 149
641
+ },
642
+ "UD": {
643
+ "accuracy": 0.7417840375586855,
644
+ "count": 213
645
+ }
646
+ }
647
+ },
648
+ "sub_M4": {
649
+ "full_accuracy": 0.0,
650
+ "n_examples": 100,
651
+ "per_subtask": {
652
+ "MD": {
653
+ "accuracy": 1.0,
654
+ "count": 200
655
+ },
656
+ "MB": {
657
+ "accuracy": 1.0,
658
+ "count": 100
659
+ },
660
+ "UB": {
661
+ "accuracy": 0.36,
662
+ "count": 100
663
+ },
664
+ "UD": {
665
+ "accuracy": 0.2966666666666667,
666
+ "count": 300
667
+ }
668
+ }
669
+ },
670
+ "sub_M5": {
671
+ "full_accuracy": 0.01,
672
+ "n_examples": 100,
673
+ "per_subtask": {
674
+ "MD": {
675
+ "accuracy": 1.0,
676
+ "count": 100
677
+ },
678
+ "MB": {
679
+ "accuracy": 1.0,
680
+ "count": 100
681
+ },
682
+ "UB": {
683
+ "accuracy": 0.3,
684
+ "count": 100
685
+ },
686
+ "UD": {
687
+ "accuracy": 0.215,
688
+ "count": 400
689
+ }
690
+ }
691
+ },
692
+ "sub_random": {
693
+ "full_accuracy": 0.765,
694
+ "n_examples": 200,
695
+ "per_subtask": {
696
+ "MD": {
697
+ "accuracy": 0.9783333333333334,
698
+ "count": 600
699
+ },
700
+ "MB": {
701
+ "accuracy": 0.9700374531835206,
702
+ "count": 267
703
+ },
704
+ "ME": {
705
+ "accuracy": 1.0,
706
+ "count": 53
707
+ },
708
+ "UB": {
709
+ "accuracy": 0.9384965831435079,
710
+ "count": 439
711
+ },
712
+ "UD": {
713
+ "accuracy": 0.9512195121951219,
714
+ "count": 41
715
+ }
716
+ }
717
+ },
718
+ "sub_B3": {
719
+ "full_accuracy": 0.49,
720
+ "n_examples": 100,
721
+ "per_subtask": {
722
+ "MD": {
723
+ "accuracy": 0.9966666666666667,
724
+ "count": 300
725
+ },
726
+ "MB": {
727
+ "accuracy": 0.98,
728
+ "count": 100
729
+ },
730
+ "UB": {
731
+ "accuracy": 0.766497461928934,
732
+ "count": 197
733
+ },
734
+ "UD": {
735
+ "accuracy": 0.7572815533980582,
736
+ "count": 103
737
+ }
738
+ }
739
+ },
740
+ "sub_B4": {
741
+ "full_accuracy": 0.4,
742
+ "n_examples": 100,
743
+ "per_subtask": {
744
+ "MD": {
745
+ "accuracy": 0.995,
746
+ "count": 200
747
+ },
748
+ "MB": {
749
+ "accuracy": 1.0,
750
+ "count": 100
751
+ },
752
+ "UB": {
753
+ "accuracy": 0.7975708502024291,
754
+ "count": 247
755
+ },
756
+ "UD": {
757
+ "accuracy": 0.6928104575163399,
758
+ "count": 153
759
+ }
760
+ }
761
+ },
762
+ "sub_B5": {
763
+ "full_accuracy": 0.2,
764
+ "n_examples": 100,
765
+ "per_subtask": {
766
+ "MD": {
767
+ "accuracy": 1.0,
768
+ "count": 100
769
+ },
770
+ "MB": {
771
+ "accuracy": 1.0,
772
+ "count": 100
773
+ },
774
+ "UB": {
775
+ "accuracy": 0.7348993288590604,
776
+ "count": 298
777
+ },
778
+ "UD": {
779
+ "accuracy": 0.6782178217821783,
780
+ "count": 202
781
+ }
782
+ }
783
+ }
784
+ },
785
+ "summary": {
786
+ "overall_accuracy": 0.47041666666666665,
787
+ "total_examples": 2400,
788
+ "n_splits": 22
789
+ }
790
+ },
791
+ "sorl_eval": {
792
+ "config": {
793
+ "ops": "add_sub",
794
+ "K": 1,
795
+ "mode": "sorl",
796
+ "n_digits": 6,
797
+ "n_per_split": 100
798
+ },
799
+ "splits": {
800
+ "add_S0": {
801
+ "full_accuracy": 0.97,
802
+ "n_examples": 100,
803
+ "per_subtask": {
804
+ "SA": {
805
+ "accuracy": 0.9950413223140496,
806
+ "count": 605
807
+ },
808
+ "SS": {
809
+ "accuracy": 1.0,
810
+ "count": 95
811
+ }
812
+ }
813
+ },
814
+ "add_S1": {
815
+ "full_accuracy": 0.96,
816
+ "n_examples": 100,
817
+ "per_subtask": {
818
+ "SA": {
819
+ "accuracy": 0.9901960784313726,
820
+ "count": 204
821
+ },
822
+ "SC": {
823
+ "accuracy": 1.0,
824
+ "count": 169
825
+ },
826
+ "SS": {
827
+ "accuracy": 0.967741935483871,
828
+ "count": 31
829
+ },
830
+ "UC": {
831
+ "accuracy": 0.9932432432432432,
832
+ "count": 296
833
+ }
834
+ }
835
+ },
836
+ "add_S2": {
837
+ "full_accuracy": 0.83,
838
+ "n_examples": 100,
839
+ "per_subtask": {
840
+ "SA": {
841
+ "accuracy": 0.9815950920245399,
842
+ "count": 163
843
+ },
844
+ "SC": {
845
+ "accuracy": 0.9769230769230769,
846
+ "count": 130
847
+ },
848
+ "SS": {
849
+ "accuracy": 0.9885057471264368,
850
+ "count": 87
851
+ },
852
+ "UC": {
853
+ "accuracy": 0.9408866995073891,
854
+ "count": 203
855
+ },
856
+ "US": {
857
+ "accuracy": 1.0,
858
+ "count": 117
859
+ }
860
+ }
861
+ },
862
+ "add_S3": {
863
+ "full_accuracy": 0.55,
864
+ "n_examples": 100,
865
+ "per_subtask": {
866
+ "SA": {
867
+ "accuracy": 0.9917355371900827,
868
+ "count": 121
869
+ },
870
+ "SC": {
871
+ "accuracy": 0.9917355371900827,
872
+ "count": 121
873
+ },
874
+ "SS": {
875
+ "accuracy": 1.0,
876
+ "count": 49
877
+ },
878
+ "UC": {
879
+ "accuracy": 0.7903225806451613,
880
+ "count": 186
881
+ },
882
+ "US": {
883
+ "accuracy": 0.9461883408071748,
884
+ "count": 223
885
+ }
886
+ }
887
+ },
888
+ "add_S4": {
889
+ "full_accuracy": 0.4,
890
+ "n_examples": 100,
891
+ "per_subtask": {
892
+ "SA": {
893
+ "accuracy": 1.0,
894
+ "count": 104
895
+ },
896
+ "SC": {
897
+ "accuracy": 1.0,
898
+ "count": 106
899
+ },
900
+ "SS": {
901
+ "accuracy": 1.0,
902
+ "count": 23
903
+ },
904
+ "UC": {
905
+ "accuracy": 0.7,
906
+ "count": 160
907
+ },
908
+ "US": {
909
+ "accuracy": 0.8045602605863192,
910
+ "count": 307
911
+ }
912
+ }
913
+ },
914
+ "add_S5": {
915
+ "full_accuracy": 0.27,
916
+ "n_examples": 100,
917
+ "per_subtask": {
918
+ "SA": {
919
+ "accuracy": 1.0,
920
+ "count": 100
921
+ },
922
+ "SC": {
923
+ "accuracy": 1.0,
924
+ "count": 100
925
+ },
926
+ "UC": {
927
+ "accuracy": 0.41,
928
+ "count": 100
929
+ },
930
+ "US": {
931
+ "accuracy": 0.59,
932
+ "count": 400
933
+ }
934
+ }
935
+ },
936
+ "add_S6": {
937
+ "full_accuracy": 0.32,
938
+ "n_examples": 100,
939
+ "per_subtask": {
940
+ "SC": {
941
+ "accuracy": 1.0,
942
+ "count": 100
943
+ },
944
+ "UC": {
945
+ "accuracy": 0.52,
946
+ "count": 100
947
+ },
948
+ "US": {
949
+ "accuracy": 0.586,
950
+ "count": 500
951
+ }
952
+ }
953
+ },
954
+ "add_random": {
955
+ "full_accuracy": 0.955,
956
+ "n_examples": 200,
957
+ "per_subtask": {
958
+ "SA": {
959
+ "accuracy": 0.9977628635346756,
960
+ "count": 447
961
+ },
962
+ "SC": {
963
+ "accuracy": 0.996875,
964
+ "count": 320
965
+ },
966
+ "SS": {
967
+ "accuracy": 1.0,
968
+ "count": 56
969
+ },
970
+ "UC": {
971
+ "accuracy": 0.9867674858223062,
972
+ "count": 529
973
+ },
974
+ "US": {
975
+ "accuracy": 1.0,
976
+ "count": 48
977
+ }
978
+ }
979
+ },
980
+ "add_C3": {
981
+ "full_accuracy": 0.67,
982
+ "n_examples": 100,
983
+ "per_subtask": {
984
+ "SA": {
985
+ "accuracy": 1.0,
986
+ "count": 300
987
+ },
988
+ "SC": {
989
+ "accuracy": 1.0,
990
+ "count": 100
991
+ },
992
+ "UC": {
993
+ "accuracy": 0.8341968911917098,
994
+ "count": 193
995
+ },
996
+ "US": {
997
+ "accuracy": 0.9065420560747663,
998
+ "count": 107
999
+ }
1000
+ }
1001
+ },
1002
+ "add_C4": {
1003
+ "full_accuracy": 0.63,
1004
+ "n_examples": 100,
1005
+ "per_subtask": {
1006
+ "SA": {
1007
+ "accuracy": 1.0,
1008
+ "count": 200
1009
+ },
1010
+ "SC": {
1011
+ "accuracy": 1.0,
1012
+ "count": 100
1013
+ },
1014
+ "UC": {
1015
+ "accuracy": 0.86328125,
1016
+ "count": 256
1017
+ },
1018
+ "US": {
1019
+ "accuracy": 0.9097222222222222,
1020
+ "count": 144
1021
+ }
1022
+ }
1023
+ },
1024
+ "add_C5": {
1025
+ "full_accuracy": 0.57,
1026
+ "n_examples": 100,
1027
+ "per_subtask": {
1028
+ "SA": {
1029
+ "accuracy": 1.0,
1030
+ "count": 100
1031
+ },
1032
+ "SC": {
1033
+ "accuracy": 1.0,
1034
+ "count": 100
1035
+ },
1036
+ "UC": {
1037
+ "accuracy": 0.8725490196078431,
1038
+ "count": 306
1039
+ },
1040
+ "US": {
1041
+ "accuracy": 0.9020618556701031,
1042
+ "count": 194
1043
+ }
1044
+ }
1045
+ },
1046
+ "add_C6": {
1047
+ "full_accuracy": 0.66,
1048
+ "n_examples": 100,
1049
+ "per_subtask": {
1050
+ "SC": {
1051
+ "accuracy": 1.0,
1052
+ "count": 100
1053
+ },
1054
+ "UC": {
1055
+ "accuracy": 0.9098360655737705,
1056
+ "count": 366
1057
+ },
1058
+ "US": {
1059
+ "accuracy": 0.9487179487179487,
1060
+ "count": 234
1061
+ }
1062
+ }
1063
+ },
1064
+ "sub_M0": {
1065
+ "full_accuracy": 0.9,
1066
+ "n_examples": 100,
1067
+ "per_subtask": {
1068
+ "MD": {
1069
+ "accuracy": 0.9850249584026622,
1070
+ "count": 601
1071
+ },
1072
+ "ME": {
1073
+ "accuracy": 0.98989898989899,
1074
+ "count": 99
1075
+ }
1076
+ }
1077
+ },
1078
+ "sub_M1": {
1079
+ "full_accuracy": 0.99,
1080
+ "n_examples": 100,
1081
+ "per_subtask": {
1082
+ "MD": {
1083
+ "accuracy": 1.0,
1084
+ "count": 279
1085
+ },
1086
+ "MB": {
1087
+ "accuracy": 0.993103448275862,
1088
+ "count": 145
1089
+ },
1090
+ "ME": {
1091
+ "accuracy": 1.0,
1092
+ "count": 24
1093
+ },
1094
+ "UB": {
1095
+ "accuracy": 1.0,
1096
+ "count": 252
1097
+ }
1098
+ }
1099
+ },
1100
+ "sub_M2": {
1101
+ "full_accuracy": 0.6,
1102
+ "n_examples": 100,
1103
+ "per_subtask": {
1104
+ "MD": {
1105
+ "accuracy": 0.9953051643192489,
1106
+ "count": 213
1107
+ },
1108
+ "MB": {
1109
+ "accuracy": 0.9734513274336283,
1110
+ "count": 113
1111
+ },
1112
+ "ME": {
1113
+ "accuracy": 1.0,
1114
+ "count": 85
1115
+ },
1116
+ "UB": {
1117
+ "accuracy": 0.7845303867403315,
1118
+ "count": 181
1119
+ },
1120
+ "UD": {
1121
+ "accuracy": 1.0,
1122
+ "count": 108
1123
+ }
1124
+ }
1125
+ },
1126
+ "sub_M3": {
1127
+ "full_accuracy": 0.07,
1128
+ "n_examples": 100,
1129
+ "per_subtask": {
1130
+ "MD": {
1131
+ "accuracy": 1.0,
1132
+ "count": 179
1133
+ },
1134
+ "MB": {
1135
+ "accuracy": 0.9805825242718447,
1136
+ "count": 103
1137
+ },
1138
+ "ME": {
1139
+ "accuracy": 1.0,
1140
+ "count": 56
1141
+ },
1142
+ "UB": {
1143
+ "accuracy": 0.4563758389261745,
1144
+ "count": 149
1145
+ },
1146
+ "UD": {
1147
+ "accuracy": 0.7699530516431925,
1148
+ "count": 213
1149
+ }
1150
+ }
1151
+ },
1152
+ "sub_M4": {
1153
+ "full_accuracy": 0.07,
1154
+ "n_examples": 100,
1155
+ "per_subtask": {
1156
+ "MD": {
1157
+ "accuracy": 1.0,
1158
+ "count": 200
1159
+ },
1160
+ "MB": {
1161
+ "accuracy": 1.0,
1162
+ "count": 100
1163
+ },
1164
+ "UB": {
1165
+ "accuracy": 0.38,
1166
+ "count": 100
1167
+ },
1168
+ "UD": {
1169
+ "accuracy": 0.48,
1170
+ "count": 300
1171
+ }
1172
+ }
1173
+ },
1174
+ "sub_M5": {
1175
+ "full_accuracy": 0.02,
1176
+ "n_examples": 100,
1177
+ "per_subtask": {
1178
+ "MD": {
1179
+ "accuracy": 1.0,
1180
+ "count": 100
1181
+ },
1182
+ "MB": {
1183
+ "accuracy": 1.0,
1184
+ "count": 100
1185
+ },
1186
+ "UB": {
1187
+ "accuracy": 0.26,
1188
+ "count": 100
1189
+ },
1190
+ "UD": {
1191
+ "accuracy": 0.38,
1192
+ "count": 400
1193
+ }
1194
+ }
1195
+ },
1196
+ "sub_random": {
1197
+ "full_accuracy": 0.89,
1198
+ "n_examples": 200,
1199
+ "per_subtask": {
1200
+ "MD": {
1201
+ "accuracy": 0.9966666666666667,
1202
+ "count": 600
1203
+ },
1204
+ "MB": {
1205
+ "accuracy": 0.9962546816479401,
1206
+ "count": 267
1207
+ },
1208
+ "ME": {
1209
+ "accuracy": 1.0,
1210
+ "count": 53
1211
+ },
1212
+ "UB": {
1213
+ "accuracy": 0.9567198177676538,
1214
+ "count": 439
1215
+ },
1216
+ "UD": {
1217
+ "accuracy": 1.0,
1218
+ "count": 41
1219
+ }
1220
+ }
1221
+ },
1222
+ "sub_B3": {
1223
+ "full_accuracy": 0.68,
1224
+ "n_examples": 100,
1225
+ "per_subtask": {
1226
+ "MD": {
1227
+ "accuracy": 1.0,
1228
+ "count": 300
1229
+ },
1230
+ "MB": {
1231
+ "accuracy": 1.0,
1232
+ "count": 100
1233
+ },
1234
+ "UB": {
1235
+ "accuracy": 0.8578680203045685,
1236
+ "count": 197
1237
+ },
1238
+ "UD": {
1239
+ "accuracy": 0.8932038834951457,
1240
+ "count": 103
1241
+ }
1242
+ }
1243
+ },
1244
+ "sub_B4": {
1245
+ "full_accuracy": 0.51,
1246
+ "n_examples": 100,
1247
+ "per_subtask": {
1248
+ "MD": {
1249
+ "accuracy": 1.0,
1250
+ "count": 200
1251
+ },
1252
+ "MB": {
1253
+ "accuracy": 1.0,
1254
+ "count": 100
1255
+ },
1256
+ "UB": {
1257
+ "accuracy": 0.8380566801619433,
1258
+ "count": 247
1259
+ },
1260
+ "UD": {
1261
+ "accuracy": 0.7647058823529411,
1262
+ "count": 153
1263
+ }
1264
+ }
1265
+ },
1266
+ "sub_B5": {
1267
+ "full_accuracy": 0.33,
1268
+ "n_examples": 100,
1269
+ "per_subtask": {
1270
+ "MD": {
1271
+ "accuracy": 1.0,
1272
+ "count": 100
1273
+ },
1274
+ "MB": {
1275
+ "accuracy": 1.0,
1276
+ "count": 100
1277
+ },
1278
+ "UB": {
1279
+ "accuracy": 0.8187919463087249,
1280
+ "count": 298
1281
+ },
1282
+ "UD": {
1283
+ "accuracy": 0.7524752475247525,
1284
+ "count": 202
1285
+ }
1286
+ }
1287
+ }
1288
+ },
1289
+ "summary": {
1290
+ "overall_accuracy": 0.6116666666666667,
1291
+ "total_examples": 2400,
1292
+ "n_splits": 22
1293
+ }
1294
+ },
1295
+ "sorl_overall_accuracy": 0.6116666666666667,
1296
+ "sft_overall_accuracy": 0.47041666666666665
1297
+ }
add_sub_sorl_v1_abs10_K1_10K/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b512b0970baed990d0164c674044de1463fd819596795fb6a137261185804ce5
3
+ size 650303660
add_sub_sorl_v1_abs10_K1_10K/train_config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_rollouts": 4,
3
+ "K": 1,
4
+ "max_iterations": 2,
5
+ "memory_span_abs": 1792,
6
+ "memory_span_traj": 1792,
7
+ "temperature": 1.0,
8
+ "ar_search": false,
9
+ "response_only_abs": false,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "alpha_ortho": 0.0,
14
+ "alpha_anchor": 0.0,
15
+ "alpha_jacobi": 0.0,
16
+ "decay": 0.8,
17
+ "target_vocab_util": 0.8,
18
+ "min_abs_ppl": 0.0,
19
+ "zipf_alpha": 1.0,
20
+ "lr": 4e-05,
21
+ "emb_lr_mult": 1.0,
22
+ "weight_decay": 0.01,
23
+ "warmup_steps": 100,
24
+ "cooldown_frac": 0.4,
25
+ "max_grad_norm": 1.0,
26
+ "vq_abs_pretrain_steps": 0,
27
+ "vq_abs_pretrain_lr": 0.001,
28
+ "vq_abs_pretrain_layer": -1,
29
+ "vq_abs_pretrain_batch_size": 256,
30
+ "vq_abs_pretrain_target_vectors": 20000,
31
+ "batch_size": 64,
32
+ "gradient_accumulation_steps": 1,
33
+ "num_epochs": 10,
34
+ "emb_warmup_steps": 0,
35
+ "log_every": 50,
36
+ "eval_every": 156,
37
+ "save_every": 999999,
38
+ "eval_samples": 100,
39
+ "output_dir": "ckpt/sweep/as_sorl_abs10_K1_10K",
40
+ "eval_K": 4,
41
+ "alpha_traj": 0.0,
42
+ "corrupt_method": "shuffle",
43
+ "corrupt_ratio": 0.3,
44
+ "alpha_contrastive": 1.0,
45
+ "gamma_contrastive": 0.5,
46
+ "alpha_masked_traj": 0.0,
47
+ "mask_nl_ratio": 0.3,
48
+ "mask_nl_mode": "fixed",
49
+ "mask_nl_fixed_id": 0,
50
+ "use_ste": true,
51
+ "n_inner": 1,
52
+ "random_K": null,
53
+ "strip_suffix": null,
54
+ "compress_prefix": null,
55
+ "random_mem_span": null,
56
+ "warmup_ratio": 0.03,
57
+ "beta2": 0.999,
58
+ "seed": 42,
59
+ "n_digits": 6,
60
+ "n_layer": 2,
61
+ "n_head": 3,
62
+ "n_embd": 510,
63
+ "ops": "add_sub",
64
+ "abs_vocab": 10,
65
+ "dataset_size": 10000,
66
+ "mode": "sorl",
67
+ "device": "cuda",
68
+ "push_to_hub": true,
69
+ "no_wandb": false,
70
+ "n_params": 162499262,
71
+ "run_name": "add_sub_sorl_v1_abs10_K1_10K",
72
+ "git_commit": "78d46f8665a87f4b44bd5894bd34f393f2dea51f",
73
+ "timestamp": "2026-04-12T08:59:10.916999+00:00",
74
+ "tokenizer": "Qwen/Qwen3-0.6B",
75
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
+ "dataset_config": "add_sub_6digit",
77
+ "model_repo": "thoughtworks/arithmetic-sorl",
78
+ "trainer_version": "v1",
79
+ "wandb_run_id": "pdbywwrx",
80
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/pdbywwrx",
81
+ "final_accuracy": 0.6116666666666667,
82
+ "sft_accuracy": 0.47041666666666665,
83
+ "eval_method": "ArithmeticEvaluator"
84
+ }