amirali1985 commited on
Commit
b614410
·
verified ·
1 Parent(s): c0a1871

Upload add_sub_sorl_v1_abs30_10K

Browse files
add_sub_sorl_v1_abs30_10K/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 128,
20
+ "max_window_layers": 28,
21
+ "model_type": "qwen3",
22
+ "num_attention_heads": 3,
23
+ "num_hidden_layers": 2,
24
+ "num_key_value_heads": 3,
25
+ "pad_token_id": null,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_parameters": {
28
+ "rope_theta": 10000.0,
29
+ "rope_type": "default"
30
+ },
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "transformers_version": "5.5.0",
34
+ "use_cache": true,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151674
37
+ }
add_sub_sorl_v1_abs30_10K/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs30_10K/metrics.json ADDED
@@ -0,0 +1,1297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 207,
8
+ 257,
9
+ 307,
10
+ 364,
11
+ 414,
12
+ 464,
13
+ 521,
14
+ 571,
15
+ 621,
16
+ 678,
17
+ 728,
18
+ 778,
19
+ 835,
20
+ 885,
21
+ 935,
22
+ 992,
23
+ 1042,
24
+ 1092,
25
+ 1149,
26
+ 1199,
27
+ 1249,
28
+ 1306,
29
+ 1356,
30
+ 1406,
31
+ 1463,
32
+ 1513,
33
+ 1563
34
+ ],
35
+ "loss": [
36
+ 7.360226631164551,
37
+ 3.0451724529266357,
38
+ 2.099494457244873,
39
+ 1.8974230289459229,
40
+ 1.6028515100479126,
41
+ 1.6202638149261475,
42
+ 1.0471231937408447,
43
+ 1.206010103225708,
44
+ 1.220455527305603,
45
+ 0.6421937942504883,
46
+ 0.9361663460731506,
47
+ 0.6581725478172302,
48
+ 0.3893754482269287,
49
+ 0.10067030787467957,
50
+ -0.18137793242931366,
51
+ -1.2401902675628662,
52
+ -6.036858558654785,
53
+ -9.205570220947266,
54
+ -12.021049499511719,
55
+ -12.291423797607422,
56
+ -13.102083206176758,
57
+ -13.575828552246094,
58
+ -14.400162696838379,
59
+ -14.627618789672852,
60
+ -13.902620315551758,
61
+ -14.419426918029785,
62
+ -14.45729923248291,
63
+ -14.640434265136719,
64
+ -14.702298164367676,
65
+ -15.077300071716309
66
+ ],
67
+ "base_loss": [
68
+ 6.145678997039795,
69
+ 2.279780149459839,
70
+ 1.8546440601348877,
71
+ 1.8970987796783447,
72
+ 1.9150477647781372,
73
+ 1.7819044589996338,
74
+ 1.8386508226394653,
75
+ 1.8494256734848022,
76
+ 1.8876153230667114,
77
+ 1.8227612972259521,
78
+ 1.820282220840454,
79
+ 1.8904969692230225,
80
+ 1.850385069847107,
81
+ 1.8309259414672852,
82
+ 1.726029634475708,
83
+ 1.879884958267212,
84
+ 1.999297857284546,
85
+ 2.0075831413269043,
86
+ 2.0371410846710205,
87
+ 1.9125654697418213,
88
+ 1.8942102193832397,
89
+ 1.8235619068145752,
90
+ 1.891092300415039,
91
+ 1.8860607147216797,
92
+ 1.773101806640625,
93
+ 1.819642186164856,
94
+ 1.8203500509262085,
95
+ 1.796560287475586,
96
+ 1.7861425876617432,
97
+ 1.820271611213684
98
+ ],
99
+ "info_loss": [
100
+ -0.22868061065673828,
101
+ -0.04863715171813965,
102
+ -0.08824169635772705,
103
+ -0.11128807067871094,
104
+ -0.1426929235458374,
105
+ -0.12672185897827148,
106
+ -0.1895982027053833,
107
+ -0.17402803897857666,
108
+ -0.1781005859375,
109
+ -0.2276695966720581,
110
+ -0.19690930843353271,
111
+ -0.2245497703552246,
112
+ -0.19946801662445068,
113
+ -0.19788849353790283,
114
+ -0.2113332748413086,
115
+ -0.33197808265686035,
116
+ -0.8245757818222046,
117
+ -1.1410942077636719,
118
+ -1.4241830110549927,
119
+ -1.4409539699554443,
120
+ -1.5185356140136719,
121
+ -1.5580085515975952,
122
+ -1.6468720436096191,
123
+ -1.6693123579025269,
124
+ -1.586121678352356,
125
+ -1.6420375108718872,
126
+ -1.645477294921875,
127
+ -1.6611346006393433,
128
+ -1.665205478668213,
129
+ -1.7057867050170898
130
+ ],
131
+ "abs_loss": [
132
+ 3.21622633934021,
133
+ 2.56597900390625,
134
+ 2.6010055541992188,
135
+ 2.561192750930786,
136
+ 2.6244845390319824,
137
+ 2.52919340133667,
138
+ 2.517479419708252,
139
+ 2.517756462097168,
140
+ 2.585357427597046,
141
+ 2.5825083255767822,
142
+ 2.569108009338379,
143
+ 2.4620120525360107,
144
+ 1.8434478044509888,
145
+ 1.0516579151153564,
146
+ 0.9096152782440186,
147
+ 0.9197292923927307,
148
+ 1.1487579345703125,
149
+ 1.1388944387435913,
150
+ 0.9813919067382812,
151
+ 1.1320276260375977,
152
+ 1.004498839378357,
153
+ 0.922298789024353,
154
+ 0.9164756536483765,
155
+ 0.9354845285415649,
156
+ 0.9864010810852051,
157
+ 1.0181562900543213,
158
+ 0.943196177482605,
159
+ 0.9554719924926758,
160
+ 0.8894187808036804,
161
+ 0.8351554274559021
162
+ ],
163
+ "zipf_loss": [
164
+ 3.1797308921813965,
165
+ 0.9951657652854919,
166
+ 0.8671668171882629,
167
+ 0.8570857048034668,
168
+ 0.8522845506668091,
169
+ 0.8526585698127747,
170
+ 0.8527064323425293,
171
+ 0.8450891971588135,
172
+ 0.855310320854187,
173
+ 0.837877631187439,
174
+ 0.8280664086341858,
175
+ 0.7669720649719238,
176
+ 0.3493257761001587,
177
+ 0.14346350729465485,
178
+ 0.11496366560459137,
179
+ 0.1077326238155365,
180
+ 0.09472579509019852,
181
+ 0.08389829844236374,
182
+ 0.08550038933753967,
183
+ 0.09234830737113953,
184
+ 0.08861316740512848,
185
+ 0.08846604079008102,
186
+ 0.08581779152154922,
187
+ 0.08589503169059753,
188
+ 0.08685382455587387,
189
+ 0.07949158549308777,
190
+ 0.08280487358570099,
191
+ 0.07880416512489319,
192
+ 0.07467135041952133,
193
+ 0.07677992433309555
194
+ ],
195
+ "denoise_loss": [],
196
+ "ortho_loss": [
197
+ 0.1771664321422577,
198
+ 0.06714396178722382,
199
+ 0.044829387217760086,
200
+ 0.03473972529172897,
201
+ 0.03302708640694618,
202
+ 0.0372856929898262,
203
+ 0.03956683725118637,
204
+ 0.03881165385246277,
205
+ 0.03860178962349892,
206
+ 0.038966186344623566,
207
+ 0.0423412024974823,
208
+ 0.04346398264169693,
209
+ 0.046835098415613174,
210
+ 0.05101684108376503,
211
+ 0.05338294431567192,
212
+ 0.0558658242225647,
213
+ 0.0594053715467453,
214
+ 0.06552360206842422,
215
+ 0.06966850161552429,
216
+ 0.07646190375089645,
217
+ 0.0794626846909523,
218
+ 0.0848257839679718,
219
+ 0.08905336260795593,
220
+ 0.0941048339009285,
221
+ 0.09721637517213821,
222
+ 0.09776473045349121,
223
+ 0.09905572980642319,
224
+ 0.10016800463199615,
225
+ 0.10210232436656952,
226
+ 0.10285182297229767
227
+ ],
228
+ "lr": [
229
+ 7.840000000000001e-05,
230
+ 8e-05,
231
+ 8e-05,
232
+ 8e-05,
233
+ 8e-05,
234
+ 8e-05,
235
+ 8e-05,
236
+ 8e-05,
237
+ 8e-05,
238
+ 8e-05,
239
+ 8e-05,
240
+ 8e-05,
241
+ 8e-05,
242
+ 8e-05,
243
+ 8e-05,
244
+ 8e-05,
245
+ 8e-05,
246
+ 8e-05,
247
+ 7.656578947368422e-05,
248
+ 7.064473684210527e-05,
249
+ 6.472368421052631e-05,
250
+ 5.7973684210526325e-05,
251
+ 5.2052631578947374e-05,
252
+ 4.613157894736842e-05,
253
+ 3.938157894736842e-05,
254
+ 3.346052631578948e-05,
255
+ 2.7539473684210523e-05,
256
+ 2.078947368421053e-05,
257
+ 1.4868421052631572e-05,
258
+ 8.947368421052635e-06
259
+ ],
260
+ "emb_lr": [],
261
+ "eval_step": [
262
+ 150,
263
+ 307,
264
+ 464,
265
+ 621,
266
+ 778,
267
+ 935,
268
+ 1092,
269
+ 1199,
270
+ 1356,
271
+ 1513
272
+ ],
273
+ "eval_accuracy": [
274
+ 0.0,
275
+ 0.02,
276
+ 0.04,
277
+ 0.02,
278
+ 0.0,
279
+ 0.05,
280
+ 0.4,
281
+ 0.59,
282
+ 0.7,
283
+ 0.8
284
+ ]
285
+ },
286
+ "final_accuracy": 0.6585714285714286,
287
+ "sft_eval": {
288
+ "config": {
289
+ "ops": "add_sub",
290
+ "K": null,
291
+ "mode": "sft",
292
+ "n_digits": 6,
293
+ "n_per_split": 50
294
+ },
295
+ "splits": {
296
+ "add_S0": {
297
+ "full_accuracy": 0.0,
298
+ "n_examples": 50,
299
+ "per_subtask": {
300
+ "SA": {
301
+ "accuracy": 0.24067796610169492,
302
+ "count": 295
303
+ },
304
+ "SS": {
305
+ "accuracy": 0.7454545454545455,
306
+ "count": 55
307
+ }
308
+ }
309
+ },
310
+ "add_S1": {
311
+ "full_accuracy": 0.0,
312
+ "n_examples": 50,
313
+ "per_subtask": {
314
+ "SA": {
315
+ "accuracy": 0.2777777777777778,
316
+ "count": 126
317
+ },
318
+ "SC": {
319
+ "accuracy": 0.13924050632911392,
320
+ "count": 79
321
+ },
322
+ "SS": {
323
+ "accuracy": 0.7142857142857143,
324
+ "count": 21
325
+ },
326
+ "UC": {
327
+ "accuracy": 0.22580645161290322,
328
+ "count": 124
329
+ }
330
+ }
331
+ },
332
+ "add_S2": {
333
+ "full_accuracy": 0.0,
334
+ "n_examples": 50,
335
+ "per_subtask": {
336
+ "SA": {
337
+ "accuracy": 0.32,
338
+ "count": 75
339
+ },
340
+ "SC": {
341
+ "accuracy": 0.12903225806451613,
342
+ "count": 62
343
+ },
344
+ "SS": {
345
+ "accuracy": 0.4358974358974359,
346
+ "count": 39
347
+ },
348
+ "UC": {
349
+ "accuracy": 0.35135135135135137,
350
+ "count": 111
351
+ },
352
+ "US": {
353
+ "accuracy": 0.5396825396825397,
354
+ "count": 63
355
+ }
356
+ }
357
+ },
358
+ "add_S3": {
359
+ "full_accuracy": 0.0,
360
+ "n_examples": 50,
361
+ "per_subtask": {
362
+ "SA": {
363
+ "accuracy": 0.4166666666666667,
364
+ "count": 60
365
+ },
366
+ "SC": {
367
+ "accuracy": 0.07017543859649122,
368
+ "count": 57
369
+ },
370
+ "SS": {
371
+ "accuracy": 0.47368421052631576,
372
+ "count": 19
373
+ },
374
+ "UC": {
375
+ "accuracy": 0.375,
376
+ "count": 104
377
+ },
378
+ "US": {
379
+ "accuracy": 0.44545454545454544,
380
+ "count": 110
381
+ }
382
+ }
383
+ },
384
+ "add_S4": {
385
+ "full_accuracy": 0.0,
386
+ "n_examples": 50,
387
+ "per_subtask": {
388
+ "SA": {
389
+ "accuracy": 0.3541666666666667,
390
+ "count": 48
391
+ },
392
+ "SC": {
393
+ "accuracy": 0.15384615384615385,
394
+ "count": 52
395
+ },
396
+ "SS": {
397
+ "accuracy": 0.14285714285714285,
398
+ "count": 7
399
+ },
400
+ "UC": {
401
+ "accuracy": 0.4943820224719101,
402
+ "count": 89
403
+ },
404
+ "US": {
405
+ "accuracy": 0.6233766233766234,
406
+ "count": 154
407
+ }
408
+ }
409
+ },
410
+ "add_S5": {
411
+ "full_accuracy": 0.0,
412
+ "n_examples": 50,
413
+ "per_subtask": {
414
+ "SA": {
415
+ "accuracy": 0.54,
416
+ "count": 50
417
+ },
418
+ "SC": {
419
+ "accuracy": 0.16,
420
+ "count": 50
421
+ },
422
+ "UC": {
423
+ "accuracy": 0.38,
424
+ "count": 50
425
+ },
426
+ "US": {
427
+ "accuracy": 0.355,
428
+ "count": 200
429
+ }
430
+ }
431
+ },
432
+ "add_S6": {
433
+ "full_accuracy": 0.08,
434
+ "n_examples": 50,
435
+ "per_subtask": {
436
+ "SC": {
437
+ "accuracy": 0.14,
438
+ "count": 50
439
+ },
440
+ "UC": {
441
+ "accuracy": 0.42,
442
+ "count": 50
443
+ },
444
+ "US": {
445
+ "accuracy": 0.396,
446
+ "count": 250
447
+ }
448
+ }
449
+ },
450
+ "add_random": {
451
+ "full_accuracy": 0.0,
452
+ "n_examples": 200,
453
+ "per_subtask": {
454
+ "SA": {
455
+ "accuracy": 0.2668213457076566,
456
+ "count": 431
457
+ },
458
+ "SC": {
459
+ "accuracy": 0.11075949367088607,
460
+ "count": 316
461
+ },
462
+ "SS": {
463
+ "accuracy": 0.7435897435897436,
464
+ "count": 39
465
+ },
466
+ "UC": {
467
+ "accuracy": 0.2714285714285714,
468
+ "count": 560
469
+ },
470
+ "US": {
471
+ "accuracy": 0.42592592592592593,
472
+ "count": 54
473
+ }
474
+ }
475
+ },
476
+ "add_C3": {
477
+ "full_accuracy": 0.0,
478
+ "n_examples": 50,
479
+ "per_subtask": {
480
+ "SA": {
481
+ "accuracy": 0.36,
482
+ "count": 150
483
+ },
484
+ "SC": {
485
+ "accuracy": 0.06,
486
+ "count": 50
487
+ },
488
+ "UC": {
489
+ "accuracy": 0.15384615384615385,
490
+ "count": 104
491
+ },
492
+ "US": {
493
+ "accuracy": 0.3695652173913043,
494
+ "count": 46
495
+ }
496
+ }
497
+ },
498
+ "add_C4": {
499
+ "full_accuracy": 0.0,
500
+ "n_examples": 50,
501
+ "per_subtask": {
502
+ "SA": {
503
+ "accuracy": 0.39,
504
+ "count": 100
505
+ },
506
+ "SC": {
507
+ "accuracy": 0.1,
508
+ "count": 50
509
+ },
510
+ "UC": {
511
+ "accuracy": 0.18699186991869918,
512
+ "count": 123
513
+ },
514
+ "US": {
515
+ "accuracy": 0.37662337662337664,
516
+ "count": 77
517
+ }
518
+ }
519
+ },
520
+ "add_C5": {
521
+ "full_accuracy": 0.0,
522
+ "n_examples": 50,
523
+ "per_subtask": {
524
+ "SA": {
525
+ "accuracy": 0.42,
526
+ "count": 50
527
+ },
528
+ "SC": {
529
+ "accuracy": 0.18,
530
+ "count": 50
531
+ },
532
+ "UC": {
533
+ "accuracy": 0.2662337662337662,
534
+ "count": 154
535
+ },
536
+ "US": {
537
+ "accuracy": 0.5,
538
+ "count": 96
539
+ }
540
+ }
541
+ },
542
+ "add_C6": {
543
+ "full_accuracy": 0.02,
544
+ "n_examples": 50,
545
+ "per_subtask": {
546
+ "SC": {
547
+ "accuracy": 0.08,
548
+ "count": 50
549
+ },
550
+ "UC": {
551
+ "accuracy": 0.3241758241758242,
552
+ "count": 182
553
+ },
554
+ "US": {
555
+ "accuracy": 0.3644067796610169,
556
+ "count": 118
557
+ }
558
+ }
559
+ },
560
+ "sub_M0": {
561
+ "full_accuracy": 0.0,
562
+ "n_examples": 50,
563
+ "per_subtask": {
564
+ "MD": {
565
+ "accuracy": 0.21768707482993196,
566
+ "count": 294
567
+ },
568
+ "ME": {
569
+ "accuracy": 0.9107142857142857,
570
+ "count": 56
571
+ }
572
+ }
573
+ },
574
+ "sub_M1": {
575
+ "full_accuracy": 0.0,
576
+ "n_examples": 50,
577
+ "per_subtask": {
578
+ "MD": {
579
+ "accuracy": 0.38461538461538464,
580
+ "count": 143
581
+ },
582
+ "MB": {
583
+ "accuracy": 0.0,
584
+ "count": 69
585
+ },
586
+ "ME": {
587
+ "accuracy": 0.8666666666666667,
588
+ "count": 15
589
+ },
590
+ "UB": {
591
+ "accuracy": 0.12195121951219512,
592
+ "count": 123
593
+ }
594
+ }
595
+ },
596
+ "sub_M2": {
597
+ "full_accuracy": 0.0,
598
+ "n_examples": 50,
599
+ "per_subtask": {
600
+ "MD": {
601
+ "accuracy": 0.6296296296296297,
602
+ "count": 108
603
+ },
604
+ "MB": {
605
+ "accuracy": 0.038461538461538464,
606
+ "count": 52
607
+ },
608
+ "ME": {
609
+ "accuracy": 0.9038461538461539,
610
+ "count": 52
611
+ },
612
+ "UB": {
613
+ "accuracy": 0.16091954022988506,
614
+ "count": 87
615
+ },
616
+ "UD": {
617
+ "accuracy": 0.058823529411764705,
618
+ "count": 51
619
+ }
620
+ }
621
+ },
622
+ "sub_M3": {
623
+ "full_accuracy": 0.0,
624
+ "n_examples": 50,
625
+ "per_subtask": {
626
+ "MD": {
627
+ "accuracy": 0.648936170212766,
628
+ "count": 94
629
+ },
630
+ "MB": {
631
+ "accuracy": 0.0,
632
+ "count": 51
633
+ },
634
+ "ME": {
635
+ "accuracy": 1.0,
636
+ "count": 25
637
+ },
638
+ "UB": {
639
+ "accuracy": 0.08974358974358974,
640
+ "count": 78
641
+ },
642
+ "UD": {
643
+ "accuracy": 0.029411764705882353,
644
+ "count": 102
645
+ }
646
+ }
647
+ },
648
+ "sub_M4": {
649
+ "full_accuracy": 0.0,
650
+ "n_examples": 50,
651
+ "per_subtask": {
652
+ "MD": {
653
+ "accuracy": 0.51,
654
+ "count": 100
655
+ },
656
+ "MB": {
657
+ "accuracy": 0.0,
658
+ "count": 50
659
+ },
660
+ "UB": {
661
+ "accuracy": 0.3,
662
+ "count": 50
663
+ },
664
+ "UD": {
665
+ "accuracy": 0.013333333333333334,
666
+ "count": 150
667
+ }
668
+ }
669
+ },
670
+ "sub_M5": {
671
+ "full_accuracy": 0.0,
672
+ "n_examples": 50,
673
+ "per_subtask": {
674
+ "MD": {
675
+ "accuracy": 1.0,
676
+ "count": 50
677
+ },
678
+ "MB": {
679
+ "accuracy": 0.04,
680
+ "count": 50
681
+ },
682
+ "UB": {
683
+ "accuracy": 0.26,
684
+ "count": 50
685
+ },
686
+ "UD": {
687
+ "accuracy": 0.04,
688
+ "count": 200
689
+ }
690
+ }
691
+ },
692
+ "sub_random": {
693
+ "full_accuracy": 0.0,
694
+ "n_examples": 200,
695
+ "per_subtask": {
696
+ "MD": {
697
+ "accuracy": 0.3741496598639456,
698
+ "count": 588
699
+ },
700
+ "MB": {
701
+ "accuracy": 0.014925373134328358,
702
+ "count": 268
703
+ },
704
+ "ME": {
705
+ "accuracy": 0.9666666666666667,
706
+ "count": 60
707
+ },
708
+ "UB": {
709
+ "accuracy": 0.1610738255033557,
710
+ "count": 447
711
+ },
712
+ "UD": {
713
+ "accuracy": 0.0,
714
+ "count": 37
715
+ }
716
+ }
717
+ },
718
+ "sub_B3": {
719
+ "full_accuracy": 0.0,
720
+ "n_examples": 50,
721
+ "per_subtask": {
722
+ "MD": {
723
+ "accuracy": 0.34,
724
+ "count": 150
725
+ },
726
+ "MB": {
727
+ "accuracy": 0.0,
728
+ "count": 50
729
+ },
730
+ "UB": {
731
+ "accuracy": 0.11214953271028037,
732
+ "count": 107
733
+ },
734
+ "UD": {
735
+ "accuracy": 0.023255813953488372,
736
+ "count": 43
737
+ }
738
+ }
739
+ },
740
+ "sub_B4": {
741
+ "full_accuracy": 0.0,
742
+ "n_examples": 50,
743
+ "per_subtask": {
744
+ "MD": {
745
+ "accuracy": 0.51,
746
+ "count": 100
747
+ },
748
+ "MB": {
749
+ "accuracy": 0.04,
750
+ "count": 50
751
+ },
752
+ "UB": {
753
+ "accuracy": 0.14035087719298245,
754
+ "count": 114
755
+ },
756
+ "UD": {
757
+ "accuracy": 0.011627906976744186,
758
+ "count": 86
759
+ }
760
+ }
761
+ },
762
+ "sub_B5": {
763
+ "full_accuracy": 0.0,
764
+ "n_examples": 50,
765
+ "per_subtask": {
766
+ "MD": {
767
+ "accuracy": 1.0,
768
+ "count": 50
769
+ },
770
+ "MB": {
771
+ "accuracy": 0.0,
772
+ "count": 50
773
+ },
774
+ "UB": {
775
+ "accuracy": 0.10457516339869281,
776
+ "count": 153
777
+ },
778
+ "UD": {
779
+ "accuracy": 0.020618556701030927,
780
+ "count": 97
781
+ }
782
+ }
783
+ }
784
+ },
785
+ "summary": {
786
+ "overall_accuracy": 0.0035714285714285713,
787
+ "total_examples": 1400,
788
+ "n_splits": 22
789
+ }
790
+ },
791
+ "sorl_eval": {
792
+ "config": {
793
+ "ops": "add_sub",
794
+ "K": 4,
795
+ "mode": "sorl",
796
+ "n_digits": 6,
797
+ "n_per_split": 50
798
+ },
799
+ "splits": {
800
+ "add_S0": {
801
+ "full_accuracy": 1.0,
802
+ "n_examples": 50,
803
+ "per_subtask": {
804
+ "SA": {
805
+ "accuracy": 1.0,
806
+ "count": 295
807
+ },
808
+ "SS": {
809
+ "accuracy": 1.0,
810
+ "count": 55
811
+ }
812
+ }
813
+ },
814
+ "add_S1": {
815
+ "full_accuracy": 0.86,
816
+ "n_examples": 50,
817
+ "per_subtask": {
818
+ "SA": {
819
+ "accuracy": 0.9920634920634921,
820
+ "count": 126
821
+ },
822
+ "SC": {
823
+ "accuracy": 0.9746835443037974,
824
+ "count": 79
825
+ },
826
+ "SS": {
827
+ "accuracy": 1.0,
828
+ "count": 21
829
+ },
830
+ "UC": {
831
+ "accuracy": 0.9516129032258065,
832
+ "count": 124
833
+ }
834
+ }
835
+ },
836
+ "add_S2": {
837
+ "full_accuracy": 0.72,
838
+ "n_examples": 50,
839
+ "per_subtask": {
840
+ "SA": {
841
+ "accuracy": 1.0,
842
+ "count": 75
843
+ },
844
+ "SC": {
845
+ "accuracy": 0.9193548387096774,
846
+ "count": 62
847
+ },
848
+ "SS": {
849
+ "accuracy": 0.9743589743589743,
850
+ "count": 39
851
+ },
852
+ "UC": {
853
+ "accuracy": 0.9369369369369369,
854
+ "count": 111
855
+ },
856
+ "US": {
857
+ "accuracy": 0.9682539682539683,
858
+ "count": 63
859
+ }
860
+ }
861
+ },
862
+ "add_S3": {
863
+ "full_accuracy": 0.52,
864
+ "n_examples": 50,
865
+ "per_subtask": {
866
+ "SA": {
867
+ "accuracy": 1.0,
868
+ "count": 60
869
+ },
870
+ "SC": {
871
+ "accuracy": 0.9473684210526315,
872
+ "count": 57
873
+ },
874
+ "SS": {
875
+ "accuracy": 1.0,
876
+ "count": 19
877
+ },
878
+ "UC": {
879
+ "accuracy": 0.7692307692307693,
880
+ "count": 104
881
+ },
882
+ "US": {
883
+ "accuracy": 0.9545454545454546,
884
+ "count": 110
885
+ }
886
+ }
887
+ },
888
+ "add_S4": {
889
+ "full_accuracy": 0.48,
890
+ "n_examples": 50,
891
+ "per_subtask": {
892
+ "SA": {
893
+ "accuracy": 1.0,
894
+ "count": 48
895
+ },
896
+ "SC": {
897
+ "accuracy": 1.0,
898
+ "count": 52
899
+ },
900
+ "SS": {
901
+ "accuracy": 1.0,
902
+ "count": 7
903
+ },
904
+ "UC": {
905
+ "accuracy": 0.7303370786516854,
906
+ "count": 89
907
+ },
908
+ "US": {
909
+ "accuracy": 0.8701298701298701,
910
+ "count": 154
911
+ }
912
+ }
913
+ },
914
+ "add_S5": {
915
+ "full_accuracy": 0.08,
916
+ "n_examples": 50,
917
+ "per_subtask": {
918
+ "SA": {
919
+ "accuracy": 1.0,
920
+ "count": 50
921
+ },
922
+ "SC": {
923
+ "accuracy": 1.0,
924
+ "count": 50
925
+ },
926
+ "UC": {
927
+ "accuracy": 0.18,
928
+ "count": 50
929
+ },
930
+ "US": {
931
+ "accuracy": 0.575,
932
+ "count": 200
933
+ }
934
+ }
935
+ },
936
+ "add_S6": {
937
+ "full_accuracy": 0.14,
938
+ "n_examples": 50,
939
+ "per_subtask": {
940
+ "SC": {
941
+ "accuracy": 1.0,
942
+ "count": 50
943
+ },
944
+ "UC": {
945
+ "accuracy": 0.22,
946
+ "count": 50
947
+ },
948
+ "US": {
949
+ "accuracy": 0.604,
950
+ "count": 250
951
+ }
952
+ }
953
+ },
954
+ "add_random": {
955
+ "full_accuracy": 0.84,
956
+ "n_examples": 200,
957
+ "per_subtask": {
958
+ "SA": {
959
+ "accuracy": 0.9930394431554525,
960
+ "count": 431
961
+ },
962
+ "SC": {
963
+ "accuracy": 0.9873417721518988,
964
+ "count": 316
965
+ },
966
+ "SS": {
967
+ "accuracy": 0.9743589743589743,
968
+ "count": 39
969
+ },
970
+ "UC": {
971
+ "accuracy": 0.9571428571428572,
972
+ "count": 560
973
+ },
974
+ "US": {
975
+ "accuracy": 0.9814814814814815,
976
+ "count": 54
977
+ }
978
+ }
979
+ },
980
+ "add_C3": {
981
+ "full_accuracy": 0.74,
982
+ "n_examples": 50,
983
+ "per_subtask": {
984
+ "SA": {
985
+ "accuracy": 1.0,
986
+ "count": 150
987
+ },
988
+ "SC": {
989
+ "accuracy": 0.98,
990
+ "count": 50
991
+ },
992
+ "UC": {
993
+ "accuracy": 0.875,
994
+ "count": 104
995
+ },
996
+ "US": {
997
+ "accuracy": 0.9782608695652174,
998
+ "count": 46
999
+ }
1000
+ }
1001
+ },
1002
+ "add_C4": {
1003
+ "full_accuracy": 0.7,
1004
+ "n_examples": 50,
1005
+ "per_subtask": {
1006
+ "SA": {
1007
+ "accuracy": 1.0,
1008
+ "count": 100
1009
+ },
1010
+ "SC": {
1011
+ "accuracy": 1.0,
1012
+ "count": 50
1013
+ },
1014
+ "UC": {
1015
+ "accuracy": 0.8780487804878049,
1016
+ "count": 123
1017
+ },
1018
+ "US": {
1019
+ "accuracy": 0.948051948051948,
1020
+ "count": 77
1021
+ }
1022
+ }
1023
+ },
1024
+ "add_C5": {
1025
+ "full_accuracy": 0.72,
1026
+ "n_examples": 50,
1027
+ "per_subtask": {
1028
+ "SA": {
1029
+ "accuracy": 1.0,
1030
+ "count": 50
1031
+ },
1032
+ "SC": {
1033
+ "accuracy": 0.98,
1034
+ "count": 50
1035
+ },
1036
+ "UC": {
1037
+ "accuracy": 0.9025974025974026,
1038
+ "count": 154
1039
+ },
1040
+ "US": {
1041
+ "accuracy": 0.9895833333333334,
1042
+ "count": 96
1043
+ }
1044
+ }
1045
+ },
1046
+ "add_C6": {
1047
+ "full_accuracy": 0.62,
1048
+ "n_examples": 50,
1049
+ "per_subtask": {
1050
+ "SC": {
1051
+ "accuracy": 1.0,
1052
+ "count": 50
1053
+ },
1054
+ "UC": {
1055
+ "accuracy": 0.9010989010989011,
1056
+ "count": 182
1057
+ },
1058
+ "US": {
1059
+ "accuracy": 0.9152542372881356,
1060
+ "count": 118
1061
+ }
1062
+ }
1063
+ },
1064
+ "sub_M0": {
1065
+ "full_accuracy": 0.9,
1066
+ "n_examples": 50,
1067
+ "per_subtask": {
1068
+ "MD": {
1069
+ "accuracy": 0.9829931972789115,
1070
+ "count": 294
1071
+ },
1072
+ "ME": {
1073
+ "accuracy": 1.0,
1074
+ "count": 56
1075
+ }
1076
+ }
1077
+ },
1078
+ "sub_M1": {
1079
+ "full_accuracy": 0.94,
1080
+ "n_examples": 50,
1081
+ "per_subtask": {
1082
+ "MD": {
1083
+ "accuracy": 0.993006993006993,
1084
+ "count": 143
1085
+ },
1086
+ "MB": {
1087
+ "accuracy": 0.9855072463768116,
1088
+ "count": 69
1089
+ },
1090
+ "ME": {
1091
+ "accuracy": 1.0,
1092
+ "count": 15
1093
+ },
1094
+ "UB": {
1095
+ "accuracy": 0.991869918699187,
1096
+ "count": 123
1097
+ }
1098
+ }
1099
+ },
1100
+ "sub_M2": {
1101
+ "full_accuracy": 0.82,
1102
+ "n_examples": 50,
1103
+ "per_subtask": {
1104
+ "MD": {
1105
+ "accuracy": 0.9814814814814815,
1106
+ "count": 108
1107
+ },
1108
+ "MB": {
1109
+ "accuracy": 0.9615384615384616,
1110
+ "count": 52
1111
+ },
1112
+ "ME": {
1113
+ "accuracy": 1.0,
1114
+ "count": 52
1115
+ },
1116
+ "UB": {
1117
+ "accuracy": 0.9425287356321839,
1118
+ "count": 87
1119
+ },
1120
+ "UD": {
1121
+ "accuracy": 1.0,
1122
+ "count": 51
1123
+ }
1124
+ }
1125
+ },
1126
+ "sub_M3": {
1127
+ "full_accuracy": 0.4,
1128
+ "n_examples": 50,
1129
+ "per_subtask": {
1130
+ "MD": {
1131
+ "accuracy": 1.0,
1132
+ "count": 94
1133
+ },
1134
+ "MB": {
1135
+ "accuracy": 1.0,
1136
+ "count": 51
1137
+ },
1138
+ "ME": {
1139
+ "accuracy": 1.0,
1140
+ "count": 25
1141
+ },
1142
+ "UB": {
1143
+ "accuracy": 0.6153846153846154,
1144
+ "count": 78
1145
+ },
1146
+ "UD": {
1147
+ "accuracy": 1.0,
1148
+ "count": 102
1149
+ }
1150
+ }
1151
+ },
1152
+ "sub_M4": {
1153
+ "full_accuracy": 0.14,
1154
+ "n_examples": 50,
1155
+ "per_subtask": {
1156
+ "MD": {
1157
+ "accuracy": 1.0,
1158
+ "count": 100
1159
+ },
1160
+ "MB": {
1161
+ "accuracy": 1.0,
1162
+ "count": 50
1163
+ },
1164
+ "UB": {
1165
+ "accuracy": 0.44,
1166
+ "count": 50
1167
+ },
1168
+ "UD": {
1169
+ "accuracy": 0.84,
1170
+ "count": 150
1171
+ }
1172
+ }
1173
+ },
1174
+ "sub_M5": {
1175
+ "full_accuracy": 0.04,
1176
+ "n_examples": 50,
1177
+ "per_subtask": {
1178
+ "MD": {
1179
+ "accuracy": 1.0,
1180
+ "count": 50
1181
+ },
1182
+ "MB": {
1183
+ "accuracy": 1.0,
1184
+ "count": 50
1185
+ },
1186
+ "UB": {
1187
+ "accuracy": 0.32,
1188
+ "count": 50
1189
+ },
1190
+ "UD": {
1191
+ "accuracy": 0.69,
1192
+ "count": 200
1193
+ }
1194
+ }
1195
+ },
1196
+ "sub_random": {
1197
+ "full_accuracy": 0.83,
1198
+ "n_examples": 200,
1199
+ "per_subtask": {
1200
+ "MD": {
1201
+ "accuracy": 0.9829931972789115,
1202
+ "count": 588
1203
+ },
1204
+ "MB": {
1205
+ "accuracy": 0.9738805970149254,
1206
+ "count": 268
1207
+ },
1208
+ "ME": {
1209
+ "accuracy": 0.9666666666666667,
1210
+ "count": 60
1211
+ },
1212
+ "UB": {
1213
+ "accuracy": 0.9664429530201343,
1214
+ "count": 447
1215
+ },
1216
+ "UD": {
1217
+ "accuracy": 1.0,
1218
+ "count": 37
1219
+ }
1220
+ }
1221
+ },
1222
+ "sub_B3": {
1223
+ "full_accuracy": 0.72,
1224
+ "n_examples": 50,
1225
+ "per_subtask": {
1226
+ "MD": {
1227
+ "accuracy": 0.9666666666666667,
1228
+ "count": 150
1229
+ },
1230
+ "MB": {
1231
+ "accuracy": 0.98,
1232
+ "count": 50
1233
+ },
1234
+ "UB": {
1235
+ "accuracy": 0.9158878504672897,
1236
+ "count": 107
1237
+ },
1238
+ "UD": {
1239
+ "accuracy": 1.0,
1240
+ "count": 43
1241
+ }
1242
+ }
1243
+ },
1244
+ "sub_B4": {
1245
+ "full_accuracy": 0.54,
1246
+ "n_examples": 50,
1247
+ "per_subtask": {
1248
+ "MD": {
1249
+ "accuracy": 0.98,
1250
+ "count": 100
1251
+ },
1252
+ "MB": {
1253
+ "accuracy": 1.0,
1254
+ "count": 50
1255
+ },
1256
+ "UB": {
1257
+ "accuracy": 0.8157894736842105,
1258
+ "count": 114
1259
+ },
1260
+ "UD": {
1261
+ "accuracy": 0.9534883720930233,
1262
+ "count": 86
1263
+ }
1264
+ }
1265
+ },
1266
+ "sub_B5": {
1267
+ "full_accuracy": 0.68,
1268
+ "n_examples": 50,
1269
+ "per_subtask": {
1270
+ "MD": {
1271
+ "accuracy": 1.0,
1272
+ "count": 50
1273
+ },
1274
+ "MB": {
1275
+ "accuracy": 1.0,
1276
+ "count": 50
1277
+ },
1278
+ "UB": {
1279
+ "accuracy": 0.9084967320261438,
1280
+ "count": 153
1281
+ },
1282
+ "UD": {
1283
+ "accuracy": 0.8969072164948454,
1284
+ "count": 97
1285
+ }
1286
+ }
1287
+ }
1288
+ },
1289
+ "summary": {
1290
+ "overall_accuracy": 0.6585714285714286,
1291
+ "total_examples": 1400,
1292
+ "n_splits": 22
1293
+ }
1294
+ },
1295
+ "sorl_overall_accuracy": 0.6585714285714286,
1296
+ "sft_overall_accuracy": 0.0035714285714285713
1297
+ }
add_sub_sorl_v1_abs30_10K/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68b571ce1a2decbbd505920faf2f1291b7d97212c47921a3415100c4a150125a
3
+ size 650385300
add_sub_sorl_v1_abs30_10K/train_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "sorl",
3
+ "ops": "add_sub",
4
+ "n_digits": 6,
5
+ "n_layer": 2,
6
+ "n_head": 3,
7
+ "n_embd": 510,
8
+ "abs_vocab": 30,
9
+ "K": 4,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "batch_size": 64,
14
+ "num_epochs": 10,
15
+ "dataset_size": 10000,
16
+ "lr": 8e-05,
17
+ "output_dir": "ckpt/sweep/as_sorl_abs30_K4_10K",
18
+ "device": "cuda",
19
+ "push_to_hub": true,
20
+ "no_wandb": false,
21
+ "n_params": 162519662,
22
+ "run_name": "add_sub_sorl_v1_abs30_10K",
23
+ "git_commit": "8b149cd78544f600d46584ae2f143c0261d8eeb8",
24
+ "timestamp": "2026-04-12T05:24:42.766172+00:00",
25
+ "tokenizer": "Qwen/Qwen3-0.6B",
26
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
27
+ "dataset_config": "add_sub_6digit",
28
+ "model_repo": "thoughtworks/arithmetic-sorl",
29
+ "trainer_version": "v1",
30
+ "wandb_run_id": "jcc4cn17",
31
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/jcc4cn17",
32
+ "final_accuracy": 0.6585714285714286,
33
+ "sft_accuracy": 0.0035714285714285713,
34
+ "eval_method": "ArithmeticEvaluator"
35
+ }