amirali1985 commited on
Commit
abf49a6
·
verified ·
1 Parent(s): a94a95e

Upload add_sub_sorl_v1_abs16_1K

Browse files
add_sub_sorl_v1_abs16_1K/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 128,
20
+ "max_window_layers": 28,
21
+ "model_type": "qwen3",
22
+ "num_attention_heads": 3,
23
+ "num_hidden_layers": 2,
24
+ "num_key_value_heads": 3,
25
+ "pad_token_id": null,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_parameters": {
28
+ "rope_theta": 10000.0,
29
+ "rope_type": "default"
30
+ },
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "transformers_version": "5.5.0",
34
+ "use_cache": true,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151660
37
+ }
add_sub_sorl_v1_abs16_1K/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs16_1K/metrics.json ADDED
@@ -0,0 +1,1031 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [],
4
+ "loss": [],
5
+ "base_loss": [],
6
+ "info_loss": [],
7
+ "abs_loss": [],
8
+ "zipf_loss": [],
9
+ "denoise_loss": [],
10
+ "ortho_loss": [],
11
+ "lr": [],
12
+ "emb_lr": [],
13
+ "eval_step": [
14
+ 0
15
+ ],
16
+ "eval_accuracy": [
17
+ 0.01
18
+ ]
19
+ },
20
+ "final_accuracy": 0.0,
21
+ "sft_eval": {
22
+ "config": {
23
+ "ops": "add_sub",
24
+ "K": null,
25
+ "mode": "sft",
26
+ "n_digits": 6,
27
+ "n_per_split": 50
28
+ },
29
+ "splits": {
30
+ "add_S0": {
31
+ "full_accuracy": 0.0,
32
+ "n_examples": 50,
33
+ "per_subtask": {
34
+ "SA": {
35
+ "accuracy": 0.17627118644067796,
36
+ "count": 295
37
+ },
38
+ "SS": {
39
+ "accuracy": 0.0,
40
+ "count": 55
41
+ }
42
+ }
43
+ },
44
+ "add_S1": {
45
+ "full_accuracy": 0.0,
46
+ "n_examples": 50,
47
+ "per_subtask": {
48
+ "SA": {
49
+ "accuracy": 0.30158730158730157,
50
+ "count": 126
51
+ },
52
+ "SC": {
53
+ "accuracy": 0.25316455696202533,
54
+ "count": 79
55
+ },
56
+ "SS": {
57
+ "accuracy": 0.0,
58
+ "count": 21
59
+ },
60
+ "UC": {
61
+ "accuracy": 0.0,
62
+ "count": 124
63
+ }
64
+ }
65
+ },
66
+ "add_S2": {
67
+ "full_accuracy": 0.0,
68
+ "n_examples": 50,
69
+ "per_subtask": {
70
+ "SA": {
71
+ "accuracy": 0.24,
72
+ "count": 75
73
+ },
74
+ "SC": {
75
+ "accuracy": 0.2903225806451613,
76
+ "count": 62
77
+ },
78
+ "SS": {
79
+ "accuracy": 0.0,
80
+ "count": 39
81
+ },
82
+ "UC": {
83
+ "accuracy": 0.0,
84
+ "count": 111
85
+ },
86
+ "US": {
87
+ "accuracy": 1.0,
88
+ "count": 63
89
+ }
90
+ }
91
+ },
92
+ "add_S3": {
93
+ "full_accuracy": 0.0,
94
+ "n_examples": 50,
95
+ "per_subtask": {
96
+ "SA": {
97
+ "accuracy": 0.26666666666666666,
98
+ "count": 60
99
+ },
100
+ "SC": {
101
+ "accuracy": 0.21052631578947367,
102
+ "count": 57
103
+ },
104
+ "SS": {
105
+ "accuracy": 0.0,
106
+ "count": 19
107
+ },
108
+ "UC": {
109
+ "accuracy": 0.0,
110
+ "count": 104
111
+ },
112
+ "US": {
113
+ "accuracy": 1.0,
114
+ "count": 110
115
+ }
116
+ }
117
+ },
118
+ "add_S4": {
119
+ "full_accuracy": 0.0,
120
+ "n_examples": 50,
121
+ "per_subtask": {
122
+ "SA": {
123
+ "accuracy": 0.20833333333333334,
124
+ "count": 48
125
+ },
126
+ "SC": {
127
+ "accuracy": 0.2692307692307692,
128
+ "count": 52
129
+ },
130
+ "SS": {
131
+ "accuracy": 0.0,
132
+ "count": 7
133
+ },
134
+ "UC": {
135
+ "accuracy": 0.0,
136
+ "count": 89
137
+ },
138
+ "US": {
139
+ "accuracy": 1.0,
140
+ "count": 154
141
+ }
142
+ }
143
+ },
144
+ "add_S5": {
145
+ "full_accuracy": 0.0,
146
+ "n_examples": 50,
147
+ "per_subtask": {
148
+ "SA": {
149
+ "accuracy": 0.54,
150
+ "count": 50
151
+ },
152
+ "SC": {
153
+ "accuracy": 0.36,
154
+ "count": 50
155
+ },
156
+ "UC": {
157
+ "accuracy": 0.0,
158
+ "count": 50
159
+ },
160
+ "US": {
161
+ "accuracy": 1.0,
162
+ "count": 200
163
+ }
164
+ }
165
+ },
166
+ "add_S6": {
167
+ "full_accuracy": 0.0,
168
+ "n_examples": 50,
169
+ "per_subtask": {
170
+ "SC": {
171
+ "accuracy": 0.34,
172
+ "count": 50
173
+ },
174
+ "UC": {
175
+ "accuracy": 0.0,
176
+ "count": 50
177
+ },
178
+ "US": {
179
+ "accuracy": 1.0,
180
+ "count": 250
181
+ }
182
+ }
183
+ },
184
+ "add_random": {
185
+ "full_accuracy": 0.0,
186
+ "n_examples": 200,
187
+ "per_subtask": {
188
+ "SA": {
189
+ "accuracy": 0.23665893271461716,
190
+ "count": 431
191
+ },
192
+ "SC": {
193
+ "accuracy": 0.1962025316455696,
194
+ "count": 316
195
+ },
196
+ "SS": {
197
+ "accuracy": 0.0,
198
+ "count": 39
199
+ },
200
+ "UC": {
201
+ "accuracy": 0.0,
202
+ "count": 560
203
+ },
204
+ "US": {
205
+ "accuracy": 1.0,
206
+ "count": 54
207
+ }
208
+ }
209
+ },
210
+ "add_C3": {
211
+ "full_accuracy": 0.0,
212
+ "n_examples": 50,
213
+ "per_subtask": {
214
+ "SA": {
215
+ "accuracy": 0.3466666666666667,
216
+ "count": 150
217
+ },
218
+ "SC": {
219
+ "accuracy": 0.22,
220
+ "count": 50
221
+ },
222
+ "UC": {
223
+ "accuracy": 0.0,
224
+ "count": 104
225
+ },
226
+ "US": {
227
+ "accuracy": 1.0,
228
+ "count": 46
229
+ }
230
+ }
231
+ },
232
+ "add_C4": {
233
+ "full_accuracy": 0.0,
234
+ "n_examples": 50,
235
+ "per_subtask": {
236
+ "SA": {
237
+ "accuracy": 0.35,
238
+ "count": 100
239
+ },
240
+ "SC": {
241
+ "accuracy": 0.26,
242
+ "count": 50
243
+ },
244
+ "UC": {
245
+ "accuracy": 0.0,
246
+ "count": 123
247
+ },
248
+ "US": {
249
+ "accuracy": 1.0,
250
+ "count": 77
251
+ }
252
+ }
253
+ },
254
+ "add_C5": {
255
+ "full_accuracy": 0.0,
256
+ "n_examples": 50,
257
+ "per_subtask": {
258
+ "SA": {
259
+ "accuracy": 0.42,
260
+ "count": 50
261
+ },
262
+ "SC": {
263
+ "accuracy": 0.34,
264
+ "count": 50
265
+ },
266
+ "UC": {
267
+ "accuracy": 0.0,
268
+ "count": 154
269
+ },
270
+ "US": {
271
+ "accuracy": 1.0,
272
+ "count": 96
273
+ }
274
+ }
275
+ },
276
+ "add_C6": {
277
+ "full_accuracy": 0.0,
278
+ "n_examples": 50,
279
+ "per_subtask": {
280
+ "SC": {
281
+ "accuracy": 0.2,
282
+ "count": 50
283
+ },
284
+ "UC": {
285
+ "accuracy": 0.0,
286
+ "count": 182
287
+ },
288
+ "US": {
289
+ "accuracy": 1.0,
290
+ "count": 118
291
+ }
292
+ }
293
+ },
294
+ "sub_M0": {
295
+ "full_accuracy": 0.0,
296
+ "n_examples": 50,
297
+ "per_subtask": {
298
+ "MD": {
299
+ "accuracy": 0.21428571428571427,
300
+ "count": 294
301
+ },
302
+ "ME": {
303
+ "accuracy": 1.0,
304
+ "count": 56
305
+ }
306
+ }
307
+ },
308
+ "sub_M1": {
309
+ "full_accuracy": 0.0,
310
+ "n_examples": 50,
311
+ "per_subtask": {
312
+ "MD": {
313
+ "accuracy": 0.38461538461538464,
314
+ "count": 143
315
+ },
316
+ "MB": {
317
+ "accuracy": 0.0,
318
+ "count": 69
319
+ },
320
+ "ME": {
321
+ "accuracy": 1.0,
322
+ "count": 15
323
+ },
324
+ "UB": {
325
+ "accuracy": 0.10569105691056911,
326
+ "count": 123
327
+ }
328
+ }
329
+ },
330
+ "sub_M2": {
331
+ "full_accuracy": 0.0,
332
+ "n_examples": 50,
333
+ "per_subtask": {
334
+ "MD": {
335
+ "accuracy": 0.6759259259259259,
336
+ "count": 108
337
+ },
338
+ "MB": {
339
+ "accuracy": 0.0,
340
+ "count": 52
341
+ },
342
+ "ME": {
343
+ "accuracy": 1.0,
344
+ "count": 52
345
+ },
346
+ "UB": {
347
+ "accuracy": 0.14942528735632185,
348
+ "count": 87
349
+ },
350
+ "UD": {
351
+ "accuracy": 0.0,
352
+ "count": 51
353
+ }
354
+ }
355
+ },
356
+ "sub_M3": {
357
+ "full_accuracy": 0.0,
358
+ "n_examples": 50,
359
+ "per_subtask": {
360
+ "MD": {
361
+ "accuracy": 0.6276595744680851,
362
+ "count": 94
363
+ },
364
+ "MB": {
365
+ "accuracy": 0.0,
366
+ "count": 51
367
+ },
368
+ "ME": {
369
+ "accuracy": 1.0,
370
+ "count": 25
371
+ },
372
+ "UB": {
373
+ "accuracy": 0.08974358974358974,
374
+ "count": 78
375
+ },
376
+ "UD": {
377
+ "accuracy": 0.0,
378
+ "count": 102
379
+ }
380
+ }
381
+ },
382
+ "sub_M4": {
383
+ "full_accuracy": 0.0,
384
+ "n_examples": 50,
385
+ "per_subtask": {
386
+ "MD": {
387
+ "accuracy": 0.5,
388
+ "count": 100
389
+ },
390
+ "MB": {
391
+ "accuracy": 0.0,
392
+ "count": 50
393
+ },
394
+ "UB": {
395
+ "accuracy": 0.32,
396
+ "count": 50
397
+ },
398
+ "UD": {
399
+ "accuracy": 0.0,
400
+ "count": 150
401
+ }
402
+ }
403
+ },
404
+ "sub_M5": {
405
+ "full_accuracy": 0.0,
406
+ "n_examples": 50,
407
+ "per_subtask": {
408
+ "MD": {
409
+ "accuracy": 1.0,
410
+ "count": 50
411
+ },
412
+ "MB": {
413
+ "accuracy": 0.0,
414
+ "count": 50
415
+ },
416
+ "UB": {
417
+ "accuracy": 0.2,
418
+ "count": 50
419
+ },
420
+ "UD": {
421
+ "accuracy": 0.0,
422
+ "count": 200
423
+ }
424
+ }
425
+ },
426
+ "sub_random": {
427
+ "full_accuracy": 0.0,
428
+ "n_examples": 200,
429
+ "per_subtask": {
430
+ "MD": {
431
+ "accuracy": 0.3758503401360544,
432
+ "count": 588
433
+ },
434
+ "MB": {
435
+ "accuracy": 0.0,
436
+ "count": 268
437
+ },
438
+ "ME": {
439
+ "accuracy": 1.0,
440
+ "count": 60
441
+ },
442
+ "UB": {
443
+ "accuracy": 0.1610738255033557,
444
+ "count": 447
445
+ },
446
+ "UD": {
447
+ "accuracy": 0.0,
448
+ "count": 37
449
+ }
450
+ }
451
+ },
452
+ "sub_B3": {
453
+ "full_accuracy": 0.0,
454
+ "n_examples": 50,
455
+ "per_subtask": {
456
+ "MD": {
457
+ "accuracy": 0.3333333333333333,
458
+ "count": 150
459
+ },
460
+ "MB": {
461
+ "accuracy": 0.0,
462
+ "count": 50
463
+ },
464
+ "UB": {
465
+ "accuracy": 0.11214953271028037,
466
+ "count": 107
467
+ },
468
+ "UD": {
469
+ "accuracy": 0.0,
470
+ "count": 43
471
+ }
472
+ }
473
+ },
474
+ "sub_B4": {
475
+ "full_accuracy": 0.0,
476
+ "n_examples": 50,
477
+ "per_subtask": {
478
+ "MD": {
479
+ "accuracy": 0.5,
480
+ "count": 100
481
+ },
482
+ "MB": {
483
+ "accuracy": 0.0,
484
+ "count": 50
485
+ },
486
+ "UB": {
487
+ "accuracy": 0.14035087719298245,
488
+ "count": 114
489
+ },
490
+ "UD": {
491
+ "accuracy": 0.0,
492
+ "count": 86
493
+ }
494
+ }
495
+ },
496
+ "sub_B5": {
497
+ "full_accuracy": 0.0,
498
+ "n_examples": 50,
499
+ "per_subtask": {
500
+ "MD": {
501
+ "accuracy": 1.0,
502
+ "count": 50
503
+ },
504
+ "MB": {
505
+ "accuracy": 0.0,
506
+ "count": 50
507
+ },
508
+ "UB": {
509
+ "accuracy": 0.09803921568627451,
510
+ "count": 153
511
+ },
512
+ "UD": {
513
+ "accuracy": 0.0,
514
+ "count": 97
515
+ }
516
+ }
517
+ }
518
+ },
519
+ "summary": {
520
+ "overall_accuracy": 0.0,
521
+ "total_examples": 1400,
522
+ "n_splits": 22
523
+ }
524
+ },
525
+ "sorl_eval": {
526
+ "config": {
527
+ "ops": "add_sub",
528
+ "K": 4,
529
+ "mode": "sorl",
530
+ "n_digits": 6,
531
+ "n_per_split": 50
532
+ },
533
+ "splits": {
534
+ "add_S0": {
535
+ "full_accuracy": 0.0,
536
+ "n_examples": 50,
537
+ "per_subtask": {
538
+ "SA": {
539
+ "accuracy": 0.20677966101694914,
540
+ "count": 295
541
+ },
542
+ "SS": {
543
+ "accuracy": 0.34545454545454546,
544
+ "count": 55
545
+ }
546
+ }
547
+ },
548
+ "add_S1": {
549
+ "full_accuracy": 0.0,
550
+ "n_examples": 50,
551
+ "per_subtask": {
552
+ "SA": {
553
+ "accuracy": 0.2857142857142857,
554
+ "count": 126
555
+ },
556
+ "SC": {
557
+ "accuracy": 0.17721518987341772,
558
+ "count": 79
559
+ },
560
+ "SS": {
561
+ "accuracy": 0.47619047619047616,
562
+ "count": 21
563
+ },
564
+ "UC": {
565
+ "accuracy": 0.03225806451612903,
566
+ "count": 124
567
+ }
568
+ }
569
+ },
570
+ "add_S2": {
571
+ "full_accuracy": 0.0,
572
+ "n_examples": 50,
573
+ "per_subtask": {
574
+ "SA": {
575
+ "accuracy": 0.32,
576
+ "count": 75
577
+ },
578
+ "SC": {
579
+ "accuracy": 0.1774193548387097,
580
+ "count": 62
581
+ },
582
+ "SS": {
583
+ "accuracy": 0.3076923076923077,
584
+ "count": 39
585
+ },
586
+ "UC": {
587
+ "accuracy": 0.02702702702702703,
588
+ "count": 111
589
+ },
590
+ "US": {
591
+ "accuracy": 0.7301587301587301,
592
+ "count": 63
593
+ }
594
+ }
595
+ },
596
+ "add_S3": {
597
+ "full_accuracy": 0.0,
598
+ "n_examples": 50,
599
+ "per_subtask": {
600
+ "SA": {
601
+ "accuracy": 0.43333333333333335,
602
+ "count": 60
603
+ },
604
+ "SC": {
605
+ "accuracy": 0.10526315789473684,
606
+ "count": 57
607
+ },
608
+ "SS": {
609
+ "accuracy": 0.5263157894736842,
610
+ "count": 19
611
+ },
612
+ "UC": {
613
+ "accuracy": 0.019230769230769232,
614
+ "count": 104
615
+ },
616
+ "US": {
617
+ "accuracy": 0.4909090909090909,
618
+ "count": 110
619
+ }
620
+ }
621
+ },
622
+ "add_S4": {
623
+ "full_accuracy": 0.0,
624
+ "n_examples": 50,
625
+ "per_subtask": {
626
+ "SA": {
627
+ "accuracy": 0.2916666666666667,
628
+ "count": 48
629
+ },
630
+ "SC": {
631
+ "accuracy": 0.19230769230769232,
632
+ "count": 52
633
+ },
634
+ "SS": {
635
+ "accuracy": 0.0,
636
+ "count": 7
637
+ },
638
+ "UC": {
639
+ "accuracy": 0.011235955056179775,
640
+ "count": 89
641
+ },
642
+ "US": {
643
+ "accuracy": 0.7012987012987013,
644
+ "count": 154
645
+ }
646
+ }
647
+ },
648
+ "add_S5": {
649
+ "full_accuracy": 0.0,
650
+ "n_examples": 50,
651
+ "per_subtask": {
652
+ "SA": {
653
+ "accuracy": 0.5,
654
+ "count": 50
655
+ },
656
+ "SC": {
657
+ "accuracy": 0.28,
658
+ "count": 50
659
+ },
660
+ "UC": {
661
+ "accuracy": 0.02,
662
+ "count": 50
663
+ },
664
+ "US": {
665
+ "accuracy": 0.7,
666
+ "count": 200
667
+ }
668
+ }
669
+ },
670
+ "add_S6": {
671
+ "full_accuracy": 0.0,
672
+ "n_examples": 50,
673
+ "per_subtask": {
674
+ "SC": {
675
+ "accuracy": 0.22,
676
+ "count": 50
677
+ },
678
+ "UC": {
679
+ "accuracy": 0.0,
680
+ "count": 50
681
+ },
682
+ "US": {
683
+ "accuracy": 0.552,
684
+ "count": 250
685
+ }
686
+ }
687
+ },
688
+ "add_random": {
689
+ "full_accuracy": 0.0,
690
+ "n_examples": 200,
691
+ "per_subtask": {
692
+ "SA": {
693
+ "accuracy": 0.2482598607888631,
694
+ "count": 431
695
+ },
696
+ "SC": {
697
+ "accuracy": 0.10759493670886076,
698
+ "count": 316
699
+ },
700
+ "SS": {
701
+ "accuracy": 0.28205128205128205,
702
+ "count": 39
703
+ },
704
+ "UC": {
705
+ "accuracy": 0.03571428571428571,
706
+ "count": 560
707
+ },
708
+ "US": {
709
+ "accuracy": 0.5925925925925926,
710
+ "count": 54
711
+ }
712
+ }
713
+ },
714
+ "add_C3": {
715
+ "full_accuracy": 0.0,
716
+ "n_examples": 50,
717
+ "per_subtask": {
718
+ "SA": {
719
+ "accuracy": 0.3,
720
+ "count": 150
721
+ },
722
+ "SC": {
723
+ "accuracy": 0.16,
724
+ "count": 50
725
+ },
726
+ "UC": {
727
+ "accuracy": 0.0,
728
+ "count": 104
729
+ },
730
+ "US": {
731
+ "accuracy": 0.6086956521739131,
732
+ "count": 46
733
+ }
734
+ }
735
+ },
736
+ "add_C4": {
737
+ "full_accuracy": 0.0,
738
+ "n_examples": 50,
739
+ "per_subtask": {
740
+ "SA": {
741
+ "accuracy": 0.34,
742
+ "count": 100
743
+ },
744
+ "SC": {
745
+ "accuracy": 0.14,
746
+ "count": 50
747
+ },
748
+ "UC": {
749
+ "accuracy": 0.016260162601626018,
750
+ "count": 123
751
+ },
752
+ "US": {
753
+ "accuracy": 0.5974025974025974,
754
+ "count": 77
755
+ }
756
+ }
757
+ },
758
+ "add_C5": {
759
+ "full_accuracy": 0.0,
760
+ "n_examples": 50,
761
+ "per_subtask": {
762
+ "SA": {
763
+ "accuracy": 0.42,
764
+ "count": 50
765
+ },
766
+ "SC": {
767
+ "accuracy": 0.2,
768
+ "count": 50
769
+ },
770
+ "UC": {
771
+ "accuracy": 0.0,
772
+ "count": 154
773
+ },
774
+ "US": {
775
+ "accuracy": 0.71875,
776
+ "count": 96
777
+ }
778
+ }
779
+ },
780
+ "add_C6": {
781
+ "full_accuracy": 0.0,
782
+ "n_examples": 50,
783
+ "per_subtask": {
784
+ "SC": {
785
+ "accuracy": 0.18,
786
+ "count": 50
787
+ },
788
+ "UC": {
789
+ "accuracy": 0.0,
790
+ "count": 182
791
+ },
792
+ "US": {
793
+ "accuracy": 0.711864406779661,
794
+ "count": 118
795
+ }
796
+ }
797
+ },
798
+ "sub_M0": {
799
+ "full_accuracy": 0.0,
800
+ "n_examples": 50,
801
+ "per_subtask": {
802
+ "MD": {
803
+ "accuracy": 0.2108843537414966,
804
+ "count": 294
805
+ },
806
+ "ME": {
807
+ "accuracy": 0.9821428571428571,
808
+ "count": 56
809
+ }
810
+ }
811
+ },
812
+ "sub_M1": {
813
+ "full_accuracy": 0.0,
814
+ "n_examples": 50,
815
+ "per_subtask": {
816
+ "MD": {
817
+ "accuracy": 0.38461538461538464,
818
+ "count": 143
819
+ },
820
+ "MB": {
821
+ "accuracy": 0.0,
822
+ "count": 69
823
+ },
824
+ "ME": {
825
+ "accuracy": 1.0,
826
+ "count": 15
827
+ },
828
+ "UB": {
829
+ "accuracy": 0.10569105691056911,
830
+ "count": 123
831
+ }
832
+ }
833
+ },
834
+ "sub_M2": {
835
+ "full_accuracy": 0.0,
836
+ "n_examples": 50,
837
+ "per_subtask": {
838
+ "MD": {
839
+ "accuracy": 0.6666666666666666,
840
+ "count": 108
841
+ },
842
+ "MB": {
843
+ "accuracy": 0.0,
844
+ "count": 52
845
+ },
846
+ "ME": {
847
+ "accuracy": 0.9807692307692307,
848
+ "count": 52
849
+ },
850
+ "UB": {
851
+ "accuracy": 0.14942528735632185,
852
+ "count": 87
853
+ },
854
+ "UD": {
855
+ "accuracy": 0.0,
856
+ "count": 51
857
+ }
858
+ }
859
+ },
860
+ "sub_M3": {
861
+ "full_accuracy": 0.0,
862
+ "n_examples": 50,
863
+ "per_subtask": {
864
+ "MD": {
865
+ "accuracy": 0.6276595744680851,
866
+ "count": 94
867
+ },
868
+ "MB": {
869
+ "accuracy": 0.0196078431372549,
870
+ "count": 51
871
+ },
872
+ "ME": {
873
+ "accuracy": 1.0,
874
+ "count": 25
875
+ },
876
+ "UB": {
877
+ "accuracy": 0.08974358974358974,
878
+ "count": 78
879
+ },
880
+ "UD": {
881
+ "accuracy": 0.00980392156862745,
882
+ "count": 102
883
+ }
884
+ }
885
+ },
886
+ "sub_M4": {
887
+ "full_accuracy": 0.0,
888
+ "n_examples": 50,
889
+ "per_subtask": {
890
+ "MD": {
891
+ "accuracy": 0.5,
892
+ "count": 100
893
+ },
894
+ "MB": {
895
+ "accuracy": 0.02,
896
+ "count": 50
897
+ },
898
+ "UB": {
899
+ "accuracy": 0.32,
900
+ "count": 50
901
+ },
902
+ "UD": {
903
+ "accuracy": 0.006666666666666667,
904
+ "count": 150
905
+ }
906
+ }
907
+ },
908
+ "sub_M5": {
909
+ "full_accuracy": 0.0,
910
+ "n_examples": 50,
911
+ "per_subtask": {
912
+ "MD": {
913
+ "accuracy": 1.0,
914
+ "count": 50
915
+ },
916
+ "MB": {
917
+ "accuracy": 0.0,
918
+ "count": 50
919
+ },
920
+ "UB": {
921
+ "accuracy": 0.2,
922
+ "count": 50
923
+ },
924
+ "UD": {
925
+ "accuracy": 0.02,
926
+ "count": 200
927
+ }
928
+ }
929
+ },
930
+ "sub_random": {
931
+ "full_accuracy": 0.0,
932
+ "n_examples": 200,
933
+ "per_subtask": {
934
+ "MD": {
935
+ "accuracy": 0.37755102040816324,
936
+ "count": 588
937
+ },
938
+ "MB": {
939
+ "accuracy": 0.0037313432835820895,
940
+ "count": 268
941
+ },
942
+ "ME": {
943
+ "accuracy": 0.9666666666666667,
944
+ "count": 60
945
+ },
946
+ "UB": {
947
+ "accuracy": 0.1476510067114094,
948
+ "count": 447
949
+ },
950
+ "UD": {
951
+ "accuracy": 0.0,
952
+ "count": 37
953
+ }
954
+ }
955
+ },
956
+ "sub_B3": {
957
+ "full_accuracy": 0.0,
958
+ "n_examples": 50,
959
+ "per_subtask": {
960
+ "MD": {
961
+ "accuracy": 0.3333333333333333,
962
+ "count": 150
963
+ },
964
+ "MB": {
965
+ "accuracy": 0.0,
966
+ "count": 50
967
+ },
968
+ "UB": {
969
+ "accuracy": 0.102803738317757,
970
+ "count": 107
971
+ },
972
+ "UD": {
973
+ "accuracy": 0.06976744186046512,
974
+ "count": 43
975
+ }
976
+ }
977
+ },
978
+ "sub_B4": {
979
+ "full_accuracy": 0.0,
980
+ "n_examples": 50,
981
+ "per_subtask": {
982
+ "MD": {
983
+ "accuracy": 0.5,
984
+ "count": 100
985
+ },
986
+ "MB": {
987
+ "accuracy": 0.0,
988
+ "count": 50
989
+ },
990
+ "UB": {
991
+ "accuracy": 0.13157894736842105,
992
+ "count": 114
993
+ },
994
+ "UD": {
995
+ "accuracy": 0.03488372093023256,
996
+ "count": 86
997
+ }
998
+ }
999
+ },
1000
+ "sub_B5": {
1001
+ "full_accuracy": 0.0,
1002
+ "n_examples": 50,
1003
+ "per_subtask": {
1004
+ "MD": {
1005
+ "accuracy": 1.0,
1006
+ "count": 50
1007
+ },
1008
+ "MB": {
1009
+ "accuracy": 0.02,
1010
+ "count": 50
1011
+ },
1012
+ "UB": {
1013
+ "accuracy": 0.0784313725490196,
1014
+ "count": 153
1015
+ },
1016
+ "UD": {
1017
+ "accuracy": 0.041237113402061855,
1018
+ "count": 97
1019
+ }
1020
+ }
1021
+ }
1022
+ },
1023
+ "summary": {
1024
+ "overall_accuracy": 0.0,
1025
+ "total_examples": 1400,
1026
+ "n_splits": 22
1027
+ }
1028
+ },
1029
+ "sorl_overall_accuracy": 0.0,
1030
+ "sft_overall_accuracy": 0.0
1031
+ }
add_sub_sorl_v1_abs16_1K/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43959975cf6b60c99df3f5ee2d6977064967c94752ab5550e4d46030cdbb8767
3
+ size 650328152
add_sub_sorl_v1_abs16_1K/train_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "sorl",
3
+ "ops": "add_sub",
4
+ "n_digits": 6,
5
+ "n_layer": 2,
6
+ "n_head": 3,
7
+ "n_embd": 510,
8
+ "abs_vocab": 16,
9
+ "K": 4,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "batch_size": 64,
14
+ "num_epochs": 1,
15
+ "dataset_size": 1000,
16
+ "lr": 8e-05,
17
+ "output_dir": "ckpt/smoke_sorl",
18
+ "device": "cuda:0",
19
+ "push_to_hub": true,
20
+ "no_wandb": true,
21
+ "n_params": 162505382,
22
+ "run_name": "add_sub_sorl_v1_abs16_1K",
23
+ "git_commit": "800625019270114adcda289bbd550c4f1109a514",
24
+ "timestamp": "2026-04-12T01:47:31.805763+00:00",
25
+ "tokenizer": "Qwen/Qwen3-0.6B",
26
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
27
+ "dataset_config": "add_sub_6digit",
28
+ "model_repo": "thoughtworks/arithmetic-sorl",
29
+ "trainer_version": "v1",
30
+ "wandb_run_id": null,
31
+ "wandb_url": null,
32
+ "final_accuracy": 0.0,
33
+ "sft_accuracy": 0.0,
34
+ "eval_method": "ArithmeticEvaluator"
35
+ }