amirali1985 commited on
Commit
4194189
·
verified ·
1 Parent(s): 73638b8

Upload add_sub_sorl_v1_abs10_K1_10K

Browse files
add_sub_sorl_v1_abs10_K1_10K/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 128,
20
+ "max_window_layers": 28,
21
+ "model_type": "qwen3",
22
+ "num_attention_heads": 3,
23
+ "num_hidden_layers": 2,
24
+ "num_key_value_heads": 3,
25
+ "pad_token_id": null,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_parameters": {
28
+ "rope_theta": 10000.0,
29
+ "rope_type": "default"
30
+ },
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "transformers_version": "5.5.0",
34
+ "use_cache": true,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151654
37
+ }
add_sub_sorl_v1_abs10_K1_10K/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs10_K1_10K/metrics.json ADDED
@@ -0,0 +1,1297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 207,
8
+ 257,
9
+ 307,
10
+ 364,
11
+ 414,
12
+ 464,
13
+ 521,
14
+ 571,
15
+ 621,
16
+ 678,
17
+ 728,
18
+ 778,
19
+ 835,
20
+ 885,
21
+ 935,
22
+ 992,
23
+ 1042,
24
+ 1092,
25
+ 1149,
26
+ 1199,
27
+ 1249,
28
+ 1306,
29
+ 1356,
30
+ 1406,
31
+ 1463,
32
+ 1513,
33
+ 1563
34
+ ],
35
+ "loss": [
36
+ 8.773706436157227,
37
+ 4.004265308380127,
38
+ 3.2570762634277344,
39
+ 3.041517734527588,
40
+ 2.8982486724853516,
41
+ 2.4261059761047363,
42
+ -1.0320148468017578,
43
+ -6.537303924560547,
44
+ -7.572015285491943,
45
+ -8.820948600769043,
46
+ -9.133442878723145,
47
+ -9.698243141174316,
48
+ -10.157323837280273,
49
+ -9.581876754760742,
50
+ -10.299582481384277,
51
+ -8.835819244384766,
52
+ -7.875391006469727,
53
+ -6.175548553466797,
54
+ -4.922896385192871,
55
+ -5.274993896484375,
56
+ -3.509742021560669,
57
+ -2.9279913902282715,
58
+ -2.248248815536499,
59
+ -1.4432270526885986,
60
+ -0.9849211573600769,
61
+ -0.7613394260406494,
62
+ -0.9093202948570251,
63
+ -0.20168478786945343,
64
+ -0.5009633302688599,
65
+ -0.4359404742717743
66
+ ],
67
+ "base_loss": [
68
+ 6.400631904602051,
69
+ 2.4112229347229004,
70
+ 1.8464170694351196,
71
+ 1.8753893375396729,
72
+ 1.8903639316558838,
73
+ 1.8746846914291382,
74
+ 1.8820133209228516,
75
+ 1.855467438697815,
76
+ 1.7718533277511597,
77
+ 1.7219583988189697,
78
+ 1.7517011165618896,
79
+ 1.6724598407745361,
80
+ 1.647750973701477,
81
+ 1.4754587411880493,
82
+ 1.4356294870376587,
83
+ 1.2360666990280151,
84
+ 1.1430914402008057,
85
+ 0.8601657748222351,
86
+ 0.6953770518302917,
87
+ 0.6893697381019592,
88
+ 0.4810207784175873,
89
+ 0.39060816168785095,
90
+ 0.3150084316730499,
91
+ 0.2136605978012085,
92
+ 0.163777157664299,
93
+ 0.12215958535671234,
94
+ 0.13819025456905365,
95
+ 0.08133143931627274,
96
+ 0.08307908475399017,
97
+ 0.07490883022546768
98
+ ],
99
+ "info_loss": [
100
+ -0.21113967895507812,
101
+ -0.04423642158508301,
102
+ -0.04978370666503906,
103
+ -0.0719677209854126,
104
+ -0.087005615234375,
105
+ -0.13271820545196533,
106
+ -0.4793226718902588,
107
+ -1.025976300239563,
108
+ -1.1140811443328857,
109
+ -1.2247107028961182,
110
+ -1.256365180015564,
111
+ -1.3006340265274048,
112
+ -1.3388965129852295,
113
+ -1.2538009881973267,
114
+ -1.3090555667877197,
115
+ -1.1411150693893433,
116
+ -1.0133163928985596,
117
+ -0.8001939654350281,
118
+ -0.6398270130157471,
119
+ -0.6569589376449585,
120
+ -0.44588613510131836,
121
+ -0.368633508682251,
122
+ -0.2934108078479767,
123
+ -0.1994001418352127,
124
+ -0.14748595654964447,
125
+ -0.11586211621761322,
126
+ -0.12713156640529633,
127
+ -0.04881247505545616,
128
+ -0.07891618460416794,
129
+ -0.06897100806236267
130
+ ],
131
+ "abs_loss": [
132
+ 2.0780529975891113,
133
+ 1.8440923690795898,
134
+ 1.8333244323730469,
135
+ 1.8426448106765747,
136
+ 1.8403841257095337,
137
+ 1.8378067016601562,
138
+ 1.8460584878921509,
139
+ 1.7702795267105103,
140
+ 1.5657355785369873,
141
+ 1.3797943592071533,
142
+ 1.3185033798217773,
143
+ 1.2104538679122925,
144
+ 1.125502586364746,
145
+ 0.9860450029373169,
146
+ 0.8728498220443726,
147
+ 0.7434267997741699,
148
+ 0.7268884778022766,
149
+ 0.5966061949729919,
150
+ 0.5761818885803223,
151
+ 0.4872754216194153,
152
+ 0.42560893297195435,
153
+ 0.3502724766731262,
154
+ 0.29791387915611267,
155
+ 0.267427533864975,
156
+ 0.24475598335266113,
157
+ 0.22809018194675446,
158
+ 0.13244910538196564,
159
+ 0.15626277029514313,
160
+ 0.18973781168460846,
161
+ 0.15587982535362244
162
+ ],
163
+ "zipf_loss": [
164
+ 4.276666164398193,
165
+ 1.8509973287582397,
166
+ 1.7251636981964111,
167
+ 1.7015411853790283,
168
+ 1.6939024925231934,
169
+ 1.6948225498199463,
170
+ 1.6945927143096924,
171
+ 1.6899633407592773,
172
+ 1.6403698921203613,
173
+ 1.5662205219268799,
174
+ 1.546657681465149,
175
+ 1.514591932296753,
176
+ 1.4713406562805176,
177
+ 1.3820693492889404,
178
+ 1.2680578231811523,
179
+ 1.26492178440094,
180
+ 1.0419921875,
181
+ 0.9065651893615723,
182
+ 0.7223787903785706,
183
+ 0.5564987659454346,
184
+ 0.42553776502609253,
185
+ 0.33270835876464844,
186
+ 0.3410593867301941,
187
+ 0.3103709816932678,
188
+ 0.30168575048446655,
189
+ 0.25231313705444336,
190
+ 0.21056024730205536,
191
+ 0.18948225677013397,
192
+ 0.18614569306373596,
193
+ 0.16327276825904846
194
+ ],
195
+ "denoise_loss": [],
196
+ "ortho_loss": [
197
+ 0.3211989104747772,
198
+ 0.19110506772994995,
199
+ 0.11888417601585388,
200
+ 0.0979497954249382,
201
+ 0.09260708838701248,
202
+ 0.10610710084438324,
203
+ 0.14702288806438446,
204
+ 0.21001708507537842,
205
+ 0.2301187366247177,
206
+ 0.24748258292675018,
207
+ 0.2629227936267853,
208
+ 0.26881012320518494,
209
+ 0.269039511680603,
210
+ 0.26941126585006714,
211
+ 0.26823461055755615,
212
+ 0.26427075266838074,
213
+ 0.2743089199066162,
214
+ 0.2734338045120239,
215
+ 0.27655836939811707,
216
+ 0.2692953944206238,
217
+ 0.27160370349884033,
218
+ 0.27964547276496887,
219
+ 0.2874664068222046,
220
+ 0.2921176552772522,
221
+ 0.2956086993217468,
222
+ 0.29919639229774475,
223
+ 0.3037196099758148,
224
+ 0.3071216940879822,
225
+ 0.311845988035202,
226
+ 0.3125845789909363
227
+ ],
228
+ "lr": [
229
+ 7.840000000000001e-05,
230
+ 8e-05,
231
+ 8e-05,
232
+ 8e-05,
233
+ 8e-05,
234
+ 8e-05,
235
+ 8e-05,
236
+ 8e-05,
237
+ 8e-05,
238
+ 8e-05,
239
+ 8e-05,
240
+ 8e-05,
241
+ 8e-05,
242
+ 8e-05,
243
+ 8e-05,
244
+ 8e-05,
245
+ 8e-05,
246
+ 8e-05,
247
+ 7.656578947368422e-05,
248
+ 7.064473684210527e-05,
249
+ 6.472368421052631e-05,
250
+ 5.7973684210526325e-05,
251
+ 5.2052631578947374e-05,
252
+ 4.613157894736842e-05,
253
+ 3.938157894736842e-05,
254
+ 3.346052631578948e-05,
255
+ 2.7539473684210523e-05,
256
+ 2.078947368421053e-05,
257
+ 1.4868421052631572e-05,
258
+ 8.947368421052635e-06
259
+ ],
260
+ "emb_lr": [],
261
+ "eval_step": [
262
+ 150,
263
+ 307,
264
+ 464,
265
+ 621,
266
+ 778,
267
+ 935,
268
+ 1092,
269
+ 1199,
270
+ 1356,
271
+ 1513
272
+ ],
273
+ "eval_accuracy": [
274
+ 0.0,
275
+ 0.0,
276
+ 0.0,
277
+ 0.0,
278
+ 0.0,
279
+ 0.0,
280
+ 0.0,
281
+ 0.0,
282
+ 0.0,
283
+ 0.0
284
+ ]
285
+ },
286
+ "final_accuracy": 0.0,
287
+ "sft_eval": {
288
+ "config": {
289
+ "ops": "add_sub",
290
+ "K": null,
291
+ "mode": "sft",
292
+ "n_digits": 6,
293
+ "n_per_split": 250
294
+ },
295
+ "splits": {
296
+ "add_S0": {
297
+ "full_accuracy": 0.944,
298
+ "n_examples": 250,
299
+ "per_subtask": {
300
+ "SA": {
301
+ "accuracy": 0.9908076165462902,
302
+ "count": 1523
303
+ },
304
+ "SS": {
305
+ "accuracy": 1.0,
306
+ "count": 227
307
+ }
308
+ }
309
+ },
310
+ "add_S1": {
311
+ "full_accuracy": 0.952,
312
+ "n_examples": 250,
313
+ "per_subtask": {
314
+ "SA": {
315
+ "accuracy": 0.9870848708487084,
316
+ "count": 542
317
+ },
318
+ "SC": {
319
+ "accuracy": 0.9952038369304557,
320
+ "count": 417
321
+ },
322
+ "SS": {
323
+ "accuracy": 1.0,
324
+ "count": 70
325
+ },
326
+ "UC": {
327
+ "accuracy": 0.9958391123439667,
328
+ "count": 721
329
+ }
330
+ }
331
+ },
332
+ "add_S2": {
333
+ "full_accuracy": 0.912,
334
+ "n_examples": 250,
335
+ "per_subtask": {
336
+ "SA": {
337
+ "accuracy": 0.970108695652174,
338
+ "count": 368
339
+ },
340
+ "SC": {
341
+ "accuracy": 0.9781931464174455,
342
+ "count": 321
343
+ },
344
+ "SS": {
345
+ "accuracy": 0.9736842105263158,
346
+ "count": 228
347
+ },
348
+ "UC": {
349
+ "accuracy": 0.9962335216572504,
350
+ "count": 531
351
+ },
352
+ "US": {
353
+ "accuracy": 1.0,
354
+ "count": 302
355
+ }
356
+ }
357
+ },
358
+ "add_S3": {
359
+ "full_accuracy": 0.756,
360
+ "n_examples": 250,
361
+ "per_subtask": {
362
+ "SA": {
363
+ "accuracy": 0.990228013029316,
364
+ "count": 307
365
+ },
366
+ "SC": {
367
+ "accuracy": 0.993127147766323,
368
+ "count": 291
369
+ },
370
+ "SS": {
371
+ "accuracy": 0.9823008849557522,
372
+ "count": 113
373
+ },
374
+ "UC": {
375
+ "accuracy": 0.8799171842650103,
376
+ "count": 483
377
+ },
378
+ "US": {
379
+ "accuracy": 1.0,
380
+ "count": 556
381
+ }
382
+ }
383
+ },
384
+ "add_S4": {
385
+ "full_accuracy": 0.544,
386
+ "n_examples": 250,
387
+ "per_subtask": {
388
+ "SA": {
389
+ "accuracy": 1.0,
390
+ "count": 238
391
+ },
392
+ "SC": {
393
+ "accuracy": 0.988929889298893,
394
+ "count": 271
395
+ },
396
+ "SS": {
397
+ "accuracy": 1.0,
398
+ "count": 59
399
+ },
400
+ "UC": {
401
+ "accuracy": 0.782716049382716,
402
+ "count": 405
403
+ },
404
+ "US": {
405
+ "accuracy": 0.9047619047619048,
406
+ "count": 777
407
+ }
408
+ }
409
+ },
410
+ "add_S5": {
411
+ "full_accuracy": 0.26,
412
+ "n_examples": 250,
413
+ "per_subtask": {
414
+ "SA": {
415
+ "accuracy": 0.988,
416
+ "count": 250
417
+ },
418
+ "SC": {
419
+ "accuracy": 1.0,
420
+ "count": 250
421
+ },
422
+ "UC": {
423
+ "accuracy": 0.452,
424
+ "count": 250
425
+ },
426
+ "US": {
427
+ "accuracy": 0.736,
428
+ "count": 1000
429
+ }
430
+ }
431
+ },
432
+ "add_S6": {
433
+ "full_accuracy": 0.656,
434
+ "n_examples": 250,
435
+ "per_subtask": {
436
+ "SC": {
437
+ "accuracy": 1.0,
438
+ "count": 250
439
+ },
440
+ "UC": {
441
+ "accuracy": 0.812,
442
+ "count": 250
443
+ },
444
+ "US": {
445
+ "accuracy": 0.8448,
446
+ "count": 1250
447
+ }
448
+ }
449
+ },
450
+ "add_random": {
451
+ "full_accuracy": 0.91,
452
+ "n_examples": 200,
453
+ "per_subtask": {
454
+ "SA": {
455
+ "accuracy": 0.9886621315192744,
456
+ "count": 441
457
+ },
458
+ "SC": {
459
+ "accuracy": 0.9936908517350158,
460
+ "count": 317
461
+ },
462
+ "SS": {
463
+ "accuracy": 0.9814814814814815,
464
+ "count": 54
465
+ },
466
+ "UC": {
467
+ "accuracy": 0.981203007518797,
468
+ "count": 532
469
+ },
470
+ "US": {
471
+ "accuracy": 0.9642857142857143,
472
+ "count": 56
473
+ }
474
+ }
475
+ },
476
+ "add_C3": {
477
+ "full_accuracy": 0.772,
478
+ "n_examples": 250,
479
+ "per_subtask": {
480
+ "SA": {
481
+ "accuracy": 0.9786666666666667,
482
+ "count": 750
483
+ },
484
+ "SC": {
485
+ "accuracy": 0.996,
486
+ "count": 250
487
+ },
488
+ "UC": {
489
+ "accuracy": 0.9096638655462185,
490
+ "count": 476
491
+ },
492
+ "US": {
493
+ "accuracy": 1.0,
494
+ "count": 274
495
+ }
496
+ }
497
+ },
498
+ "add_C4": {
499
+ "full_accuracy": 0.836,
500
+ "n_examples": 250,
501
+ "per_subtask": {
502
+ "SA": {
503
+ "accuracy": 0.986,
504
+ "count": 500
505
+ },
506
+ "SC": {
507
+ "accuracy": 0.996,
508
+ "count": 250
509
+ },
510
+ "UC": {
511
+ "accuracy": 0.9515885022692889,
512
+ "count": 661
513
+ },
514
+ "US": {
515
+ "accuracy": 0.9616519174041298,
516
+ "count": 339
517
+ }
518
+ }
519
+ },
520
+ "add_C5": {
521
+ "full_accuracy": 0.78,
522
+ "n_examples": 250,
523
+ "per_subtask": {
524
+ "SA": {
525
+ "accuracy": 0.996,
526
+ "count": 250
527
+ },
528
+ "SC": {
529
+ "accuracy": 1.0,
530
+ "count": 250
531
+ },
532
+ "UC": {
533
+ "accuracy": 0.9344262295081968,
534
+ "count": 732
535
+ },
536
+ "US": {
537
+ "accuracy": 0.9305019305019305,
538
+ "count": 518
539
+ }
540
+ }
541
+ },
542
+ "add_C6": {
543
+ "full_accuracy": 0.86,
544
+ "n_examples": 250,
545
+ "per_subtask": {
546
+ "SC": {
547
+ "accuracy": 1.0,
548
+ "count": 250
549
+ },
550
+ "UC": {
551
+ "accuracy": 0.9673423423423423,
552
+ "count": 888
553
+ },
554
+ "US": {
555
+ "accuracy": 0.9673202614379085,
556
+ "count": 612
557
+ }
558
+ }
559
+ },
560
+ "sub_M0": {
561
+ "full_accuracy": 0.988,
562
+ "n_examples": 250,
563
+ "per_subtask": {
564
+ "MD": {
565
+ "accuracy": 0.998671096345515,
566
+ "count": 1505
567
+ },
568
+ "ME": {
569
+ "accuracy": 0.9959183673469387,
570
+ "count": 245
571
+ }
572
+ }
573
+ },
574
+ "sub_M1": {
575
+ "full_accuracy": 0.932,
576
+ "n_examples": 250,
577
+ "per_subtask": {
578
+ "MD": {
579
+ "accuracy": 0.9929971988795518,
580
+ "count": 714
581
+ },
582
+ "MB": {
583
+ "accuracy": 0.9786096256684492,
584
+ "count": 374
585
+ },
586
+ "ME": {
587
+ "accuracy": 0.9866666666666667,
588
+ "count": 75
589
+ },
590
+ "UB": {
591
+ "accuracy": 0.9948892674616695,
592
+ "count": 587
593
+ }
594
+ }
595
+ },
596
+ "sub_M2": {
597
+ "full_accuracy": 0.896,
598
+ "n_examples": 250,
599
+ "per_subtask": {
600
+ "MD": {
601
+ "accuracy": 0.9909747292418772,
602
+ "count": 554
603
+ },
604
+ "MB": {
605
+ "accuracy": 0.9743589743589743,
606
+ "count": 273
607
+ },
608
+ "ME": {
609
+ "accuracy": 0.9908675799086758,
610
+ "count": 219
611
+ },
612
+ "UB": {
613
+ "accuracy": 0.9674418604651163,
614
+ "count": 430
615
+ },
616
+ "UD": {
617
+ "accuracy": 0.9963503649635036,
618
+ "count": 274
619
+ }
620
+ }
621
+ },
622
+ "sub_M3": {
623
+ "full_accuracy": 0.272,
624
+ "n_examples": 250,
625
+ "per_subtask": {
626
+ "MD": {
627
+ "accuracy": 0.9978165938864629,
628
+ "count": 458
629
+ },
630
+ "MB": {
631
+ "accuracy": 0.9923371647509579,
632
+ "count": 261
633
+ },
634
+ "ME": {
635
+ "accuracy": 1.0,
636
+ "count": 124
637
+ },
638
+ "UB": {
639
+ "accuracy": 0.5438144329896907,
640
+ "count": 388
641
+ },
642
+ "UD": {
643
+ "accuracy": 0.9730250481695568,
644
+ "count": 519
645
+ }
646
+ }
647
+ },
648
+ "sub_M4": {
649
+ "full_accuracy": 0.06,
650
+ "n_examples": 250,
651
+ "per_subtask": {
652
+ "MD": {
653
+ "accuracy": 1.0,
654
+ "count": 500
655
+ },
656
+ "MB": {
657
+ "accuracy": 0.98,
658
+ "count": 250
659
+ },
660
+ "UB": {
661
+ "accuracy": 0.192,
662
+ "count": 250
663
+ },
664
+ "UD": {
665
+ "accuracy": 0.6426666666666667,
666
+ "count": 750
667
+ }
668
+ }
669
+ },
670
+ "sub_M5": {
671
+ "full_accuracy": 0.016,
672
+ "n_examples": 250,
673
+ "per_subtask": {
674
+ "MD": {
675
+ "accuracy": 1.0,
676
+ "count": 250
677
+ },
678
+ "MB": {
679
+ "accuracy": 0.984,
680
+ "count": 250
681
+ },
682
+ "UB": {
683
+ "accuracy": 0.096,
684
+ "count": 250
685
+ },
686
+ "UD": {
687
+ "accuracy": 0.476,
688
+ "count": 1000
689
+ }
690
+ }
691
+ },
692
+ "sub_random": {
693
+ "full_accuracy": 0.935,
694
+ "n_examples": 200,
695
+ "per_subtask": {
696
+ "MD": {
697
+ "accuracy": 0.993103448275862,
698
+ "count": 580
699
+ },
700
+ "MB": {
701
+ "accuracy": 0.9887640449438202,
702
+ "count": 267
703
+ },
704
+ "ME": {
705
+ "accuracy": 1.0,
706
+ "count": 63
707
+ },
708
+ "UB": {
709
+ "accuracy": 0.984375,
710
+ "count": 448
711
+ },
712
+ "UD": {
713
+ "accuracy": 1.0,
714
+ "count": 42
715
+ }
716
+ }
717
+ },
718
+ "sub_B3": {
719
+ "full_accuracy": 0.724,
720
+ "n_examples": 250,
721
+ "per_subtask": {
722
+ "MD": {
723
+ "accuracy": 0.9986666666666667,
724
+ "count": 750
725
+ },
726
+ "MB": {
727
+ "accuracy": 0.996,
728
+ "count": 250
729
+ },
730
+ "UB": {
731
+ "accuracy": 0.8700787401574803,
732
+ "count": 508
733
+ },
734
+ "UD": {
735
+ "accuracy": 0.9628099173553719,
736
+ "count": 242
737
+ }
738
+ }
739
+ },
740
+ "sub_B4": {
741
+ "full_accuracy": 0.56,
742
+ "n_examples": 250,
743
+ "per_subtask": {
744
+ "MD": {
745
+ "accuracy": 0.988,
746
+ "count": 500
747
+ },
748
+ "MB": {
749
+ "accuracy": 0.976,
750
+ "count": 250
751
+ },
752
+ "UB": {
753
+ "accuracy": 0.8257328990228013,
754
+ "count": 614
755
+ },
756
+ "UD": {
757
+ "accuracy": 0.8860103626943006,
758
+ "count": 386
759
+ }
760
+ }
761
+ },
762
+ "sub_B5": {
763
+ "full_accuracy": 0.52,
764
+ "n_examples": 250,
765
+ "per_subtask": {
766
+ "MD": {
767
+ "accuracy": 1.0,
768
+ "count": 250
769
+ },
770
+ "MB": {
771
+ "accuracy": 0.992,
772
+ "count": 250
773
+ },
774
+ "UB": {
775
+ "accuracy": 0.8597168597168597,
776
+ "count": 777
777
+ },
778
+ "UD": {
779
+ "accuracy": 0.8752642706131079,
780
+ "count": 473
781
+ }
782
+ }
783
+ }
784
+ },
785
+ "summary": {
786
+ "overall_accuracy": 0.6812962962962963,
787
+ "total_examples": 5400,
788
+ "n_splits": 22
789
+ }
790
+ },
791
+ "sorl_eval": {
792
+ "config": {
793
+ "ops": "add_sub",
794
+ "K": 1,
795
+ "mode": "sorl",
796
+ "n_digits": 6,
797
+ "n_per_split": 250
798
+ },
799
+ "splits": {
800
+ "add_S0": {
801
+ "full_accuracy": 0.0,
802
+ "n_examples": 250,
803
+ "per_subtask": {
804
+ "SA": {
805
+ "accuracy": 0.2593565331582403,
806
+ "count": 1523
807
+ },
808
+ "SS": {
809
+ "accuracy": 0.23348017621145375,
810
+ "count": 227
811
+ }
812
+ }
813
+ },
814
+ "add_S1": {
815
+ "full_accuracy": 0.0,
816
+ "n_examples": 250,
817
+ "per_subtask": {
818
+ "SA": {
819
+ "accuracy": 0.31365313653136534,
820
+ "count": 542
821
+ },
822
+ "SC": {
823
+ "accuracy": 0.09112709832134293,
824
+ "count": 417
825
+ },
826
+ "SS": {
827
+ "accuracy": 0.12857142857142856,
828
+ "count": 70
829
+ },
830
+ "UC": {
831
+ "accuracy": 0.2565880721220527,
832
+ "count": 721
833
+ }
834
+ }
835
+ },
836
+ "add_S2": {
837
+ "full_accuracy": 0.0,
838
+ "n_examples": 250,
839
+ "per_subtask": {
840
+ "SA": {
841
+ "accuracy": 0.29891304347826086,
842
+ "count": 368
843
+ },
844
+ "SC": {
845
+ "accuracy": 0.13707165109034267,
846
+ "count": 321
847
+ },
848
+ "SS": {
849
+ "accuracy": 0.20614035087719298,
850
+ "count": 228
851
+ },
852
+ "UC": {
853
+ "accuracy": 0.3465160075329567,
854
+ "count": 531
855
+ },
856
+ "US": {
857
+ "accuracy": 0.0695364238410596,
858
+ "count": 302
859
+ }
860
+ }
861
+ },
862
+ "add_S3": {
863
+ "full_accuracy": 0.0,
864
+ "n_examples": 250,
865
+ "per_subtask": {
866
+ "SA": {
867
+ "accuracy": 0.2931596091205212,
868
+ "count": 307
869
+ },
870
+ "SC": {
871
+ "accuracy": 0.12027491408934708,
872
+ "count": 291
873
+ },
874
+ "SS": {
875
+ "accuracy": 0.3185840707964602,
876
+ "count": 113
877
+ },
878
+ "UC": {
879
+ "accuracy": 0.40993788819875776,
880
+ "count": 483
881
+ },
882
+ "US": {
883
+ "accuracy": 0.04316546762589928,
884
+ "count": 556
885
+ }
886
+ }
887
+ },
888
+ "add_S4": {
889
+ "full_accuracy": 0.0,
890
+ "n_examples": 250,
891
+ "per_subtask": {
892
+ "SA": {
893
+ "accuracy": 0.2857142857142857,
894
+ "count": 238
895
+ },
896
+ "SC": {
897
+ "accuracy": 0.08487084870848709,
898
+ "count": 271
899
+ },
900
+ "SS": {
901
+ "accuracy": 0.4576271186440678,
902
+ "count": 59
903
+ },
904
+ "UC": {
905
+ "accuracy": 0.5308641975308642,
906
+ "count": 405
907
+ },
908
+ "US": {
909
+ "accuracy": 0.04504504504504504,
910
+ "count": 777
911
+ }
912
+ }
913
+ },
914
+ "add_S5": {
915
+ "full_accuracy": 0.0,
916
+ "n_examples": 250,
917
+ "per_subtask": {
918
+ "SA": {
919
+ "accuracy": 0.588,
920
+ "count": 250
921
+ },
922
+ "SC": {
923
+ "accuracy": 0.084,
924
+ "count": 250
925
+ },
926
+ "UC": {
927
+ "accuracy": 0.488,
928
+ "count": 250
929
+ },
930
+ "US": {
931
+ "accuracy": 0.059,
932
+ "count": 1000
933
+ }
934
+ }
935
+ },
936
+ "add_S6": {
937
+ "full_accuracy": 0.0,
938
+ "n_examples": 250,
939
+ "per_subtask": {
940
+ "SC": {
941
+ "accuracy": 0.08,
942
+ "count": 250
943
+ },
944
+ "UC": {
945
+ "accuracy": 0.992,
946
+ "count": 250
947
+ },
948
+ "US": {
949
+ "accuracy": 0.1616,
950
+ "count": 1250
951
+ }
952
+ }
953
+ },
954
+ "add_random": {
955
+ "full_accuracy": 0.0,
956
+ "n_examples": 200,
957
+ "per_subtask": {
958
+ "SA": {
959
+ "accuracy": 0.2925170068027211,
960
+ "count": 441
961
+ },
962
+ "SC": {
963
+ "accuracy": 0.11356466876971609,
964
+ "count": 317
965
+ },
966
+ "SS": {
967
+ "accuracy": 0.2962962962962963,
968
+ "count": 54
969
+ },
970
+ "UC": {
971
+ "accuracy": 0.2725563909774436,
972
+ "count": 532
973
+ },
974
+ "US": {
975
+ "accuracy": 0.07142857142857142,
976
+ "count": 56
977
+ }
978
+ }
979
+ },
980
+ "add_C3": {
981
+ "full_accuracy": 0.0,
982
+ "n_examples": 250,
983
+ "per_subtask": {
984
+ "SA": {
985
+ "accuracy": 0.33866666666666667,
986
+ "count": 750
987
+ },
988
+ "SC": {
989
+ "accuracy": 0.044,
990
+ "count": 250
991
+ },
992
+ "UC": {
993
+ "accuracy": 0.17016806722689076,
994
+ "count": 476
995
+ },
996
+ "US": {
997
+ "accuracy": 0.0036496350364963502,
998
+ "count": 274
999
+ }
1000
+ }
1001
+ },
1002
+ "add_C4": {
1003
+ "full_accuracy": 0.0,
1004
+ "n_examples": 250,
1005
+ "per_subtask": {
1006
+ "SA": {
1007
+ "accuracy": 0.412,
1008
+ "count": 500
1009
+ },
1010
+ "SC": {
1011
+ "accuracy": 0.056,
1012
+ "count": 250
1013
+ },
1014
+ "UC": {
1015
+ "accuracy": 0.21331316187594554,
1016
+ "count": 661
1017
+ },
1018
+ "US": {
1019
+ "accuracy": 0.0,
1020
+ "count": 339
1021
+ }
1022
+ }
1023
+ },
1024
+ "add_C5": {
1025
+ "full_accuracy": 0.0,
1026
+ "n_examples": 250,
1027
+ "per_subtask": {
1028
+ "SA": {
1029
+ "accuracy": 0.62,
1030
+ "count": 250
1031
+ },
1032
+ "SC": {
1033
+ "accuracy": 0.092,
1034
+ "count": 250
1035
+ },
1036
+ "UC": {
1037
+ "accuracy": 0.22814207650273224,
1038
+ "count": 732
1039
+ },
1040
+ "US": {
1041
+ "accuracy": 0.0694980694980695,
1042
+ "count": 518
1043
+ }
1044
+ }
1045
+ },
1046
+ "add_C6": {
1047
+ "full_accuracy": 0.0,
1048
+ "n_examples": 250,
1049
+ "per_subtask": {
1050
+ "SC": {
1051
+ "accuracy": 0.1,
1052
+ "count": 250
1053
+ },
1054
+ "UC": {
1055
+ "accuracy": 0.3536036036036036,
1056
+ "count": 888
1057
+ },
1058
+ "US": {
1059
+ "accuracy": 0.1323529411764706,
1060
+ "count": 612
1061
+ }
1062
+ }
1063
+ },
1064
+ "sub_M0": {
1065
+ "full_accuracy": 0.0,
1066
+ "n_examples": 250,
1067
+ "per_subtask": {
1068
+ "MD": {
1069
+ "accuracy": 0.3282392026578073,
1070
+ "count": 1505
1071
+ },
1072
+ "ME": {
1073
+ "accuracy": 0.5918367346938775,
1074
+ "count": 245
1075
+ }
1076
+ }
1077
+ },
1078
+ "sub_M1": {
1079
+ "full_accuracy": 0.0,
1080
+ "n_examples": 250,
1081
+ "per_subtask": {
1082
+ "MD": {
1083
+ "accuracy": 0.4887955182072829,
1084
+ "count": 714
1085
+ },
1086
+ "MB": {
1087
+ "accuracy": 0.26737967914438504,
1088
+ "count": 374
1089
+ },
1090
+ "ME": {
1091
+ "accuracy": 0.52,
1092
+ "count": 75
1093
+ },
1094
+ "UB": {
1095
+ "accuracy": 0.1839863713798978,
1096
+ "count": 587
1097
+ }
1098
+ }
1099
+ },
1100
+ "sub_M2": {
1101
+ "full_accuracy": 0.0,
1102
+ "n_examples": 250,
1103
+ "per_subtask": {
1104
+ "MD": {
1105
+ "accuracy": 0.5379061371841155,
1106
+ "count": 554
1107
+ },
1108
+ "MB": {
1109
+ "accuracy": 0.18681318681318682,
1110
+ "count": 273
1111
+ },
1112
+ "ME": {
1113
+ "accuracy": 0.410958904109589,
1114
+ "count": 219
1115
+ },
1116
+ "UB": {
1117
+ "accuracy": 0.19534883720930232,
1118
+ "count": 430
1119
+ },
1120
+ "UD": {
1121
+ "accuracy": 0.4124087591240876,
1122
+ "count": 274
1123
+ }
1124
+ }
1125
+ },
1126
+ "sub_M3": {
1127
+ "full_accuracy": 0.0,
1128
+ "n_examples": 250,
1129
+ "per_subtask": {
1130
+ "MD": {
1131
+ "accuracy": 0.611353711790393,
1132
+ "count": 458
1133
+ },
1134
+ "MB": {
1135
+ "accuracy": 0.19157088122605365,
1136
+ "count": 261
1137
+ },
1138
+ "ME": {
1139
+ "accuracy": 0.41935483870967744,
1140
+ "count": 124
1141
+ },
1142
+ "UB": {
1143
+ "accuracy": 0.18814432989690721,
1144
+ "count": 388
1145
+ },
1146
+ "UD": {
1147
+ "accuracy": 0.23121387283236994,
1148
+ "count": 519
1149
+ }
1150
+ }
1151
+ },
1152
+ "sub_M4": {
1153
+ "full_accuracy": 0.0,
1154
+ "n_examples": 250,
1155
+ "per_subtask": {
1156
+ "MD": {
1157
+ "accuracy": 0.602,
1158
+ "count": 500
1159
+ },
1160
+ "MB": {
1161
+ "accuracy": 0.16,
1162
+ "count": 250
1163
+ },
1164
+ "UB": {
1165
+ "accuracy": 0.06,
1166
+ "count": 250
1167
+ },
1168
+ "UD": {
1169
+ "accuracy": 0.13466666666666666,
1170
+ "count": 750
1171
+ }
1172
+ }
1173
+ },
1174
+ "sub_M5": {
1175
+ "full_accuracy": 0.0,
1176
+ "n_examples": 250,
1177
+ "per_subtask": {
1178
+ "MD": {
1179
+ "accuracy": 1.0,
1180
+ "count": 250
1181
+ },
1182
+ "MB": {
1183
+ "accuracy": 0.072,
1184
+ "count": 250
1185
+ },
1186
+ "UB": {
1187
+ "accuracy": 0.084,
1188
+ "count": 250
1189
+ },
1190
+ "UD": {
1191
+ "accuracy": 0.413,
1192
+ "count": 1000
1193
+ }
1194
+ }
1195
+ },
1196
+ "sub_random": {
1197
+ "full_accuracy": 0.0,
1198
+ "n_examples": 200,
1199
+ "per_subtask": {
1200
+ "MD": {
1201
+ "accuracy": 0.47586206896551725,
1202
+ "count": 580
1203
+ },
1204
+ "MB": {
1205
+ "accuracy": 0.20599250936329588,
1206
+ "count": 267
1207
+ },
1208
+ "ME": {
1209
+ "accuracy": 0.5714285714285714,
1210
+ "count": 63
1211
+ },
1212
+ "UB": {
1213
+ "accuracy": 0.18080357142857142,
1214
+ "count": 448
1215
+ },
1216
+ "UD": {
1217
+ "accuracy": 0.30952380952380953,
1218
+ "count": 42
1219
+ }
1220
+ }
1221
+ },
1222
+ "sub_B3": {
1223
+ "full_accuracy": 0.0,
1224
+ "n_examples": 250,
1225
+ "per_subtask": {
1226
+ "MD": {
1227
+ "accuracy": 0.44,
1228
+ "count": 750
1229
+ },
1230
+ "MB": {
1231
+ "accuracy": 0.212,
1232
+ "count": 250
1233
+ },
1234
+ "UB": {
1235
+ "accuracy": 0.20078740157480315,
1236
+ "count": 508
1237
+ },
1238
+ "UD": {
1239
+ "accuracy": 0.16942148760330578,
1240
+ "count": 242
1241
+ }
1242
+ }
1243
+ },
1244
+ "sub_B4": {
1245
+ "full_accuracy": 0.0,
1246
+ "n_examples": 250,
1247
+ "per_subtask": {
1248
+ "MD": {
1249
+ "accuracy": 0.622,
1250
+ "count": 500
1251
+ },
1252
+ "MB": {
1253
+ "accuracy": 0.184,
1254
+ "count": 250
1255
+ },
1256
+ "UB": {
1257
+ "accuracy": 0.14006514657980457,
1258
+ "count": 614
1259
+ },
1260
+ "UD": {
1261
+ "accuracy": 0.19948186528497408,
1262
+ "count": 386
1263
+ }
1264
+ }
1265
+ },
1266
+ "sub_B5": {
1267
+ "full_accuracy": 0.0,
1268
+ "n_examples": 250,
1269
+ "per_subtask": {
1270
+ "MD": {
1271
+ "accuracy": 1.0,
1272
+ "count": 250
1273
+ },
1274
+ "MB": {
1275
+ "accuracy": 0.068,
1276
+ "count": 250
1277
+ },
1278
+ "UB": {
1279
+ "accuracy": 0.17631917631917632,
1280
+ "count": 777
1281
+ },
1282
+ "UD": {
1283
+ "accuracy": 0.3678646934460888,
1284
+ "count": 473
1285
+ }
1286
+ }
1287
+ }
1288
+ },
1289
+ "summary": {
1290
+ "overall_accuracy": 0.0,
1291
+ "total_examples": 5400,
1292
+ "n_splits": 22
1293
+ }
1294
+ },
1295
+ "sorl_overall_accuracy": 0.0,
1296
+ "sft_overall_accuracy": 0.6812962962962963
1297
+ }
add_sub_sorl_v1_abs10_K1_10K/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdc469755c92ef1653fb3f6ae13eac568959c2ae6791d4bfece6a727903c7bc8
3
+ size 650303660
add_sub_sorl_v1_abs10_K1_10K/train_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "sorl",
3
+ "ops": "add_sub",
4
+ "n_digits": 6,
5
+ "n_layer": 2,
6
+ "n_head": 3,
7
+ "n_embd": 510,
8
+ "abs_vocab": 10,
9
+ "K": 1,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "batch_size": 64,
14
+ "num_epochs": 10,
15
+ "dataset_size": 10000,
16
+ "lr": 8e-05,
17
+ "output_dir": "ckpt/sweep/as_sorl_abs10_K1_10K",
18
+ "device": "cuda",
19
+ "push_to_hub": true,
20
+ "no_wandb": false,
21
+ "n_params": 162499262,
22
+ "run_name": "add_sub_sorl_v1_abs10_K1_10K",
23
+ "git_commit": "800625019270114adcda289bbd550c4f1109a514",
24
+ "timestamp": "2026-04-11T21:33:55.579231+00:00",
25
+ "tokenizer": "Qwen/Qwen3-0.6B",
26
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
27
+ "dataset_config": "add_sub_6digit",
28
+ "model_repo": "thoughtworks/arithmetic-sorl",
29
+ "trainer_version": "v1",
30
+ "wandb_run_id": "ulvxvuq3",
31
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/ulvxvuq3",
32
+ "final_accuracy": 0.0,
33
+ "sft_accuracy": 0.6812962962962963,
34
+ "eval_method": "ArithmeticEvaluator"
35
+ }