amirali1985 commited on
Commit
515acdf
·
verified ·
1 Parent(s): 30d4af3

Upload add_sub_sorl_v1_abs10_25K

Browse files
add_sub_sorl_v1_abs10_25K/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 128,
20
+ "max_window_layers": 28,
21
+ "model_type": "qwen3",
22
+ "num_attention_heads": 3,
23
+ "num_hidden_layers": 2,
24
+ "num_key_value_heads": 3,
25
+ "pad_token_id": null,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_parameters": {
28
+ "rope_theta": 10000.0,
29
+ "rope_type": "default"
30
+ },
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "transformers_version": "5.5.0",
34
+ "use_cache": true,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151654
37
+ }
add_sub_sorl_v1_abs10_25K/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs10_25K/metrics.json ADDED
@@ -0,0 +1,1617 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 200,
8
+ 250,
9
+ 300,
10
+ 350,
11
+ 441,
12
+ 491,
13
+ 541,
14
+ 591,
15
+ 641,
16
+ 691,
17
+ 741,
18
+ 832,
19
+ 882,
20
+ 932,
21
+ 982,
22
+ 1032,
23
+ 1082,
24
+ 1132,
25
+ 1223,
26
+ 1273,
27
+ 1323,
28
+ 1373,
29
+ 1423,
30
+ 1473,
31
+ 1523,
32
+ 1614,
33
+ 1664,
34
+ 1714,
35
+ 1764,
36
+ 1814,
37
+ 1864,
38
+ 1914,
39
+ 2005,
40
+ 2055,
41
+ 2105,
42
+ 2155,
43
+ 2205,
44
+ 2255,
45
+ 2305,
46
+ 2396,
47
+ 2446,
48
+ 2496,
49
+ 2546,
50
+ 2596,
51
+ 2646,
52
+ 2696,
53
+ 2787,
54
+ 2837,
55
+ 2887,
56
+ 2937,
57
+ 2987,
58
+ 3037,
59
+ 3087,
60
+ 3178,
61
+ 3228,
62
+ 3278,
63
+ 3328,
64
+ 3378,
65
+ 3428,
66
+ 3478,
67
+ 3569,
68
+ 3619,
69
+ 3669,
70
+ 3719,
71
+ 3769,
72
+ 3819,
73
+ 3869
74
+ ],
75
+ "loss": [
76
+ 8.62352180480957,
77
+ 3.690471649169922,
78
+ 3.1680498123168945,
79
+ 2.650057792663574,
80
+ 2.4546055793762207,
81
+ 2.2286038398742676,
82
+ 2.2743122577667236,
83
+ 1.4608670473098755,
84
+ 1.8613238334655762,
85
+ 2.046741485595703,
86
+ 1.4064021110534668,
87
+ 1.2203834056854248,
88
+ 0.418628990650177,
89
+ -0.6658121347427368,
90
+ -6.851266860961914,
91
+ -8.283329010009766,
92
+ -9.706988334655762,
93
+ -10.902172088623047,
94
+ -11.940679550170898,
95
+ -12.042911529541016,
96
+ -13.00434398651123,
97
+ -12.94082260131836,
98
+ -13.265328407287598,
99
+ -13.784520149230957,
100
+ -13.905801773071289,
101
+ -13.609413146972656,
102
+ -13.715292930603027,
103
+ -14.133524894714355,
104
+ -14.123821258544922,
105
+ -14.485600471496582,
106
+ -13.75143814086914,
107
+ -14.315217018127441,
108
+ -14.236422538757324,
109
+ -13.65060806274414,
110
+ -13.904733657836914,
111
+ -12.59363842010498,
112
+ -10.049785614013672,
113
+ -6.803697109222412,
114
+ -5.973206043243408,
115
+ -5.757765293121338,
116
+ -5.669365406036377,
117
+ -5.0058441162109375,
118
+ -5.555595397949219,
119
+ -5.24139404296875,
120
+ -5.709163665771484,
121
+ -5.130561351776123,
122
+ -4.8264265060424805,
123
+ -4.819240570068359,
124
+ -4.386547565460205,
125
+ -4.0266289710998535,
126
+ -3.970654010772705,
127
+ -3.916658639907837,
128
+ -3.5560050010681152,
129
+ -3.3773417472839355,
130
+ -3.473850727081299,
131
+ -3.083798408508301,
132
+ -3.1898467540740967,
133
+ -2.947633981704712,
134
+ -2.676503896713257,
135
+ -2.8399879932403564,
136
+ -2.484224319458008,
137
+ -2.9060072898864746,
138
+ -2.8863208293914795,
139
+ -2.229196310043335,
140
+ -2.3845486640930176,
141
+ -2.294647216796875,
142
+ -2.293665647506714,
143
+ -2.026386022567749,
144
+ -2.0004825592041016,
145
+ -2.2800354957580566
146
+ ],
147
+ "base_loss": [
148
+ 6.053040504455566,
149
+ 2.548206090927124,
150
+ 1.8746731281280518,
151
+ 1.9164220094680786,
152
+ 1.9539568424224854,
153
+ 1.8462482690811157,
154
+ 1.808115005493164,
155
+ 1.893051266670227,
156
+ 1.8119051456451416,
157
+ 1.7294063568115234,
158
+ 1.7713595628738403,
159
+ 1.796744704246521,
160
+ 1.7207460403442383,
161
+ 1.7722727060317993,
162
+ 1.8661140203475952,
163
+ 1.768760323524475,
164
+ 1.732330083847046,
165
+ 1.7520471811294556,
166
+ 1.7128150463104248,
167
+ 1.6991585493087769,
168
+ 1.7630579471588135,
169
+ 1.6526309251785278,
170
+ 1.7100765705108643,
171
+ 1.7131121158599854,
172
+ 1.7331666946411133,
173
+ 1.635122299194336,
174
+ 1.6380380392074585,
175
+ 1.6857143640518188,
176
+ 1.6658002138137817,
177
+ 1.696215271949768,
178
+ 1.5709830522537231,
179
+ 1.6710329055786133,
180
+ 1.6389342546463013,
181
+ 1.6025257110595703,
182
+ 1.59456467628479,
183
+ 1.466896653175354,
184
+ 1.178689956665039,
185
+ 0.8142209053039551,
186
+ 0.7484551668167114,
187
+ 0.7151921987533569,
188
+ 0.6592262983322144,
189
+ 0.6459842324256897,
190
+ 0.6727160215377808,
191
+ 0.6381062269210815,
192
+ 0.6567009687423706,
193
+ 0.5900193452835083,
194
+ 0.5629978775978088,
195
+ 0.5609976649284363,
196
+ 0.5069475769996643,
197
+ 0.49048009514808655,
198
+ 0.4646568298339844,
199
+ 0.4576375186443329,
200
+ 0.4242294132709503,
201
+ 0.4161135256290436,
202
+ 0.4000745117664337,
203
+ 0.3608091175556183,
204
+ 0.375986248254776,
205
+ 0.3414735198020935,
206
+ 0.3162396252155304,
207
+ 0.3193356692790985,
208
+ 0.3032837212085724,
209
+ 0.3419392704963684,
210
+ 0.3334921896457672,
211
+ 0.2505092918872833,
212
+ 0.26888325810432434,
213
+ 0.2601575553417206,
214
+ 0.26341256499290466,
215
+ 0.2315676510334015,
216
+ 0.23400746285915375,
217
+ 0.263536274433136
218
+ ],
219
+ "info_loss": [
220
+ -0.1942458152770996,
221
+ -0.09092450141906738,
222
+ -0.061556458473205566,
223
+ -0.11549854278564453,
224
+ -0.13766586780548096,
225
+ -0.149794340133667,
226
+ -0.14053452014923096,
227
+ -0.22515976428985596,
228
+ -0.16753578186035156,
229
+ -0.13045012950897217,
230
+ -0.18136727809906006,
231
+ -0.15206265449523926,
232
+ -0.1525585651397705,
233
+ -0.2565147876739502,
234
+ -0.888375461101532,
235
+ -1.0233092308044434,
236
+ -1.1549559831619263,
237
+ -1.2769179344177246,
238
+ -1.3757619857788086,
239
+ -1.3825740814208984,
240
+ -1.485039234161377,
241
+ -1.4669829607009888,
242
+ -1.505444049835205,
243
+ -1.5557503700256348,
244
+ -1.5698777437210083,
245
+ -1.5294302701950073,
246
+ -1.5414831638336182,
247
+ -1.5889732837677002,
248
+ -1.583970308303833,
249
+ -1.624945044517517,
250
+ -1.5382747650146484,
251
+ -1.604049801826477,
252
+ -1.5935652256011963,
253
+ -1.5305627584457397,
254
+ -1.556932806968689,
255
+ -1.4123308658599854,
256
+ -1.128017544746399,
257
+ -0.7671623229980469,
258
+ -0.6771054863929749,
259
+ -0.6523333191871643,
260
+ -0.6362993121147156,
261
+ -0.5681959390640259,
262
+ -0.6264157891273499,
263
+ -0.5906312465667725,
264
+ -0.6391703486442566,
265
+ -0.57457435131073,
266
+ -0.5411813259124756,
267
+ -0.5413545966148376,
268
+ -0.49299928545951843,
269
+ -0.4549095034599304,
270
+ -0.44715359807014465,
271
+ -0.44042420387268066,
272
+ -0.3996829688549042,
273
+ -0.38102421164512634,
274
+ -0.38971224427223206,
275
+ -0.34713271260261536,
276
+ -0.3601379096508026,
277
+ -0.3310556411743164,
278
+ -0.30128398537635803,
279
+ -0.3171699047088623,
280
+ -0.27994272112846375,
281
+ -0.3267364501953125,
282
+ -0.32436403632164,
283
+ -0.24935299158096313,
284
+ -0.26726189255714417,
285
+ -0.257479190826416,
286
+ -0.259423166513443,
287
+ -0.22976627945899963,
288
+ -0.22536085546016693,
289
+ -0.25629565119743347
290
+ ],
291
+ "abs_loss": [
292
+ 2.104123115539551,
293
+ 1.8524173498153687,
294
+ 1.8307222127914429,
295
+ 1.8496146202087402,
296
+ 1.7867794036865234,
297
+ 1.8567798137664795,
298
+ 1.8096632957458496,
299
+ 1.64436674118042,
300
+ 1.5395714044570923,
301
+ 1.423869013786316,
302
+ 1.209989309310913,
303
+ 0.868823766708374,
304
+ 0.4312920570373535,
305
+ 0.49663904309272766,
306
+ 0.513419508934021,
307
+ 0.4742699861526489,
308
+ 0.4636421799659729,
309
+ 0.4861566424369812,
310
+ 0.40548932552337646,
311
+ 0.32250314950942993,
312
+ 0.33680111169815063,
313
+ 0.2677531838417053,
314
+ 0.28694623708724976,
315
+ 0.23017606139183044,
316
+ 0.2637716829776764,
317
+ 0.2513338029384613,
318
+ 0.3043477237224579,
319
+ 0.3003198504447937,
320
+ 0.2580410838127136,
321
+ 0.28733474016189575,
322
+ 0.34056228399276733,
323
+ 0.31581076979637146,
324
+ 0.2850354313850403,
325
+ 0.27487286925315857,
326
+ 0.22147104144096375,
327
+ 0.2475605010986328,
328
+ 0.29944801330566406,
329
+ 0.3343636691570282,
330
+ 0.24871471524238586,
331
+ 0.23551389575004578,
332
+ 0.16640277206897736,
333
+ 0.09375327825546265,
334
+ 0.09696707129478455,
335
+ 0.10278100520372391,
336
+ 0.05310269445180893,
337
+ 0.04737993702292442,
338
+ 0.08958013355731964,
339
+ 0.04776681959629059,
340
+ 0.06892611086368561,
341
+ 0.04011531174182892,
342
+ 0.040765777230262756,
343
+ 0.040000010281801224,
344
+ 0.04502242058515549,
345
+ 0.031056642532348633,
346
+ 0.0263521671295166,
347
+ 0.028736591339111328,
348
+ 0.05925783887505531,
349
+ 0.015155295841395855,
350
+ 0.0065899500623345375,
351
+ 0.01911187544465065,
352
+ 0.011467419564723969,
353
+ 0.0392417348921299,
354
+ 0.03916466236114502,
355
+ 0.014848295599222183,
356
+ 0.011679599061608315,
357
+ 0.01484740898013115,
358
+ 0.009665056131780148,
359
+ 0.009428434073925018,
360
+ 0.009266960434615612,
361
+ 0.01994548924267292
362
+ ],
363
+ "zipf_loss": [
364
+ 4.30252742767334,
365
+ 1.8662687540054321,
366
+ 1.725869059562683,
367
+ 1.7036597728729248,
368
+ 1.69862961769104,
369
+ 1.6946208477020264,
370
+ 1.690576195716858,
371
+ 1.654976725578308,
372
+ 1.57081937789917,
373
+ 1.4794496297836304,
374
+ 1.32771635055542,
375
+ 0.8573828339576721,
376
+ 0.18033941090106964,
377
+ 0.07739913463592529,
378
+ 0.11503171920776367,
379
+ 0.13357549905776978,
380
+ 0.06387755274772644,
381
+ 0.06634452939033508,
382
+ 0.06357603520154953,
383
+ 0.0514199323952198,
384
+ 0.049310360103845596,
385
+ 0.04960052669048309,
386
+ 0.050341784954071045,
387
+ 0.03685367852449417,
388
+ 0.03343234211206436,
389
+ 0.024634700268507004,
390
+ 0.031066298484802246,
391
+ 0.04046095907688141,
392
+ 0.02427714876830578,
393
+ 0.03890116512775421,
394
+ 0.026270316913723946,
395
+ 0.022668270394206047,
396
+ 0.03179142251610756,
397
+ 0.0250064916908741,
398
+ 0.0478830523788929,
399
+ 0.03801628574728966,
400
+ 0.021755080670118332,
401
+ 0.02026906982064247,
402
+ 0.0245220847427845,
403
+ 0.026824040338397026,
404
+ 0.017761364579200745,
405
+ 0.02075556106865406,
406
+ 0.02614957094192505,
407
+ 0.016534019261598587,
408
+ 0.02052861452102661,
409
+ 0.02042551152408123,
410
+ 0.013430889695882797,
411
+ 0.028531325981020927,
412
+ 0.029604997485876083,
413
+ 0.027974452823400497,
414
+ 0.032148949801921844,
415
+ 0.02594580501317978,
416
+ 0.01209304854273796,
417
+ 0.013681085780262947,
418
+ 0.0205619428306818,
419
+ 0.02384582720696926,
420
+ 0.029620153829455376,
421
+ 0.019933151081204414,
422
+ 0.01943730190396309,
423
+ 0.010464150458574295,
424
+ 0.010772374458611012,
425
+ 0.015493839979171753,
426
+ 0.019910695031285286,
427
+ 0.012339326553046703,
428
+ 0.018018856644630432,
429
+ 0.01850241795182228,
430
+ 0.03618703410029411,
431
+ 0.038766246289014816,
432
+ 0.018191780894994736,
433
+ 0.01739019714295864
434
+ ],
435
+ "denoise_loss": [],
436
+ "ortho_loss": [
437
+ 0.21182507276535034,
438
+ 0.08677027374505997,
439
+ 0.0639583095908165,
440
+ 0.049765028059482574,
441
+ 0.04796711727976799,
442
+ 0.04935513809323311,
443
+ 0.06076208874583244,
444
+ 0.0743061751127243,
445
+ 0.0790928527712822,
446
+ 0.08323631435632706,
447
+ 0.08230259269475937,
448
+ 0.08976364135742188,
449
+ 0.09978095442056656,
450
+ 0.10736638307571411,
451
+ 0.10517165064811707,
452
+ 0.10643617808818817,
453
+ 0.10798347741365433,
454
+ 0.10828098654747009,
455
+ 0.11405002325773239,
456
+ 0.12065489590167999,
457
+ 0.12608422338962555,
458
+ 0.1312580555677414,
459
+ 0.13284286856651306,
460
+ 0.13367591798305511,
461
+ 0.135928213596344,
462
+ 0.13187600672245026,
463
+ 0.12774690985679626,
464
+ 0.13383778929710388,
465
+ 0.12453404814004898,
466
+ 0.12623989582061768,
467
+ 0.12295584380626678,
468
+ 0.12713001668453217,
469
+ 0.1253580003976822,
470
+ 0.12130653113126755,
471
+ 0.12213171273469925,
472
+ 0.13524377346038818,
473
+ 0.13765917718410492,
474
+ 0.13814543187618256,
475
+ 0.13984543085098267,
476
+ 0.14626188576221466,
477
+ 0.14723531901836395,
478
+ 0.14748001098632812,
479
+ 0.14777269959449768,
480
+ 0.1560324877500534,
481
+ 0.15685218572616577,
482
+ 0.16409175097942352,
483
+ 0.16843700408935547,
484
+ 0.16872097551822662,
485
+ 0.16776931285858154,
486
+ 0.16298750042915344,
487
+ 0.16492323577404022,
488
+ 0.16564075648784637,
489
+ 0.17178279161453247,
490
+ 0.17316776514053345,
491
+ 0.17469987273216248,
492
+ 0.1731606125831604,
493
+ 0.17828720808029175,
494
+ 0.1789001077413559,
495
+ 0.18193966150283813,
496
+ 0.18010179698467255,
497
+ 0.18138062953948975,
498
+ 0.18183080852031708,
499
+ 0.18499328196048737,
500
+ 0.18659844994544983,
501
+ 0.18622401356697083,
502
+ 0.19045618176460266,
503
+ 0.1923171579837799,
504
+ 0.1990203708410263,
505
+ 0.2016829252243042,
506
+ 0.20334956049919128
507
+ ],
508
+ "lr": [
509
+ 7.840000000000001e-05,
510
+ 8e-05,
511
+ 8e-05,
512
+ 8e-05,
513
+ 8e-05,
514
+ 8e-05,
515
+ 8e-05,
516
+ 8e-05,
517
+ 8e-05,
518
+ 8e-05,
519
+ 8e-05,
520
+ 8e-05,
521
+ 8e-05,
522
+ 8e-05,
523
+ 8e-05,
524
+ 8e-05,
525
+ 8e-05,
526
+ 8e-05,
527
+ 8e-05,
528
+ 8e-05,
529
+ 8e-05,
530
+ 8e-05,
531
+ 8e-05,
532
+ 8e-05,
533
+ 8e-05,
534
+ 8e-05,
535
+ 8e-05,
536
+ 8e-05,
537
+ 8e-05,
538
+ 8e-05,
539
+ 8e-05,
540
+ 8e-05,
541
+ 8e-05,
542
+ 8e-05,
543
+ 8e-05,
544
+ 8e-05,
545
+ 8e-05,
546
+ 8e-05,
547
+ 8e-05,
548
+ 8e-05,
549
+ 8e-05,
550
+ 8e-05,
551
+ 7.864766839378239e-05,
552
+ 7.63160621761658e-05,
553
+ 7.398445595854923e-05,
554
+ 7.165284974093265e-05,
555
+ 6.932124352331606e-05,
556
+ 6.69896373056995e-05,
557
+ 6.465803108808292e-05,
558
+ 6.041450777202072e-05,
559
+ 5.8082901554404154e-05,
560
+ 5.5751295336787566e-05,
561
+ 5.3419689119171e-05,
562
+ 5.108808290155441e-05,
563
+ 4.8756476683937825e-05,
564
+ 4.642487046632125e-05,
565
+ 4.218134715025906e-05,
566
+ 3.98497409326425e-05,
567
+ 3.7518134715025914e-05,
568
+ 3.518652849740933e-05,
569
+ 3.285492227979275e-05,
570
+ 3.0523316062176166e-05,
571
+ 2.8191709844559595e-05,
572
+ 2.3948186528497416e-05,
573
+ 2.1616580310880825e-05,
574
+ 1.9284974093264255e-05,
575
+ 1.6953367875647667e-05,
576
+ 1.4621761658031097e-05,
577
+ 1.2290155440414508e-05,
578
+ 9.958549222797919e-06
579
+ ],
580
+ "emb_lr": [],
581
+ "eval_step": [
582
+ 350,
583
+ 741,
584
+ 1132,
585
+ 1523,
586
+ 1914,
587
+ 2305,
588
+ 2696,
589
+ 3087,
590
+ 3478,
591
+ 3869
592
+ ],
593
+ "eval_accuracy": [
594
+ 0.03,
595
+ 0.03,
596
+ 0.53,
597
+ 0.7,
598
+ 0.88,
599
+ 0.92,
600
+ 0.96,
601
+ 0.92,
602
+ 0.97,
603
+ 0.96
604
+ ]
605
+ },
606
+ "final_accuracy": 0.95,
607
+ "sft_eval": {
608
+ "config": {
609
+ "ops": "add_sub",
610
+ "K": null,
611
+ "mode": "sft",
612
+ "n_digits": 6,
613
+ "n_per_split": 50
614
+ },
615
+ "splits": {
616
+ "add_S0": {
617
+ "full_accuracy": 0.76,
618
+ "n_examples": 50,
619
+ "per_subtask": {
620
+ "SA": {
621
+ "accuracy": 0.9661016949152542,
622
+ "count": 295
623
+ },
624
+ "SS": {
625
+ "accuracy": 0.9454545454545454,
626
+ "count": 55
627
+ }
628
+ }
629
+ },
630
+ "add_S1": {
631
+ "full_accuracy": 0.7,
632
+ "n_examples": 50,
633
+ "per_subtask": {
634
+ "SA": {
635
+ "accuracy": 0.9761904761904762,
636
+ "count": 126
637
+ },
638
+ "SC": {
639
+ "accuracy": 0.9746835443037974,
640
+ "count": 79
641
+ },
642
+ "SS": {
643
+ "accuracy": 0.9523809523809523,
644
+ "count": 21
645
+ },
646
+ "UC": {
647
+ "accuracy": 0.9274193548387096,
648
+ "count": 124
649
+ }
650
+ }
651
+ },
652
+ "add_S2": {
653
+ "full_accuracy": 0.4,
654
+ "n_examples": 50,
655
+ "per_subtask": {
656
+ "SA": {
657
+ "accuracy": 0.9466666666666667,
658
+ "count": 75
659
+ },
660
+ "SC": {
661
+ "accuracy": 0.8548387096774194,
662
+ "count": 62
663
+ },
664
+ "SS": {
665
+ "accuracy": 0.717948717948718,
666
+ "count": 39
667
+ },
668
+ "UC": {
669
+ "accuracy": 0.8378378378378378,
670
+ "count": 111
671
+ },
672
+ "US": {
673
+ "accuracy": 0.9365079365079365,
674
+ "count": 63
675
+ }
676
+ }
677
+ },
678
+ "add_S3": {
679
+ "full_accuracy": 0.28,
680
+ "n_examples": 50,
681
+ "per_subtask": {
682
+ "SA": {
683
+ "accuracy": 0.9666666666666667,
684
+ "count": 60
685
+ },
686
+ "SC": {
687
+ "accuracy": 0.8596491228070176,
688
+ "count": 57
689
+ },
690
+ "SS": {
691
+ "accuracy": 0.9473684210526315,
692
+ "count": 19
693
+ },
694
+ "UC": {
695
+ "accuracy": 0.7980769230769231,
696
+ "count": 104
697
+ },
698
+ "US": {
699
+ "accuracy": 0.8090909090909091,
700
+ "count": 110
701
+ }
702
+ }
703
+ },
704
+ "add_S4": {
705
+ "full_accuracy": 0.4,
706
+ "n_examples": 50,
707
+ "per_subtask": {
708
+ "SA": {
709
+ "accuracy": 1.0,
710
+ "count": 48
711
+ },
712
+ "SC": {
713
+ "accuracy": 0.9423076923076923,
714
+ "count": 52
715
+ },
716
+ "SS": {
717
+ "accuracy": 0.8571428571428571,
718
+ "count": 7
719
+ },
720
+ "UC": {
721
+ "accuracy": 0.7528089887640449,
722
+ "count": 89
723
+ },
724
+ "US": {
725
+ "accuracy": 0.7857142857142857,
726
+ "count": 154
727
+ }
728
+ }
729
+ },
730
+ "add_S5": {
731
+ "full_accuracy": 0.36,
732
+ "n_examples": 50,
733
+ "per_subtask": {
734
+ "SA": {
735
+ "accuracy": 1.0,
736
+ "count": 50
737
+ },
738
+ "SC": {
739
+ "accuracy": 1.0,
740
+ "count": 50
741
+ },
742
+ "UC": {
743
+ "accuracy": 0.52,
744
+ "count": 50
745
+ },
746
+ "US": {
747
+ "accuracy": 0.63,
748
+ "count": 200
749
+ }
750
+ }
751
+ },
752
+ "add_S6": {
753
+ "full_accuracy": 0.66,
754
+ "n_examples": 50,
755
+ "per_subtask": {
756
+ "SC": {
757
+ "accuracy": 1.0,
758
+ "count": 50
759
+ },
760
+ "UC": {
761
+ "accuracy": 0.76,
762
+ "count": 50
763
+ },
764
+ "US": {
765
+ "accuracy": 0.772,
766
+ "count": 250
767
+ }
768
+ }
769
+ },
770
+ "add_random": {
771
+ "full_accuracy": 0.74,
772
+ "n_examples": 200,
773
+ "per_subtask": {
774
+ "SA": {
775
+ "accuracy": 0.9675174013921114,
776
+ "count": 431
777
+ },
778
+ "SC": {
779
+ "accuracy": 0.9525316455696202,
780
+ "count": 316
781
+ },
782
+ "SS": {
783
+ "accuracy": 0.9487179487179487,
784
+ "count": 39
785
+ },
786
+ "UC": {
787
+ "accuracy": 0.9553571428571429,
788
+ "count": 560
789
+ },
790
+ "US": {
791
+ "accuracy": 0.9444444444444444,
792
+ "count": 54
793
+ }
794
+ }
795
+ },
796
+ "add_C3": {
797
+ "full_accuracy": 0.6,
798
+ "n_examples": 50,
799
+ "per_subtask": {
800
+ "SA": {
801
+ "accuracy": 0.9933333333333333,
802
+ "count": 150
803
+ },
804
+ "SC": {
805
+ "accuracy": 1.0,
806
+ "count": 50
807
+ },
808
+ "UC": {
809
+ "accuracy": 0.7692307692307693,
810
+ "count": 104
811
+ },
812
+ "US": {
813
+ "accuracy": 0.9565217391304348,
814
+ "count": 46
815
+ }
816
+ }
817
+ },
818
+ "add_C4": {
819
+ "full_accuracy": 0.52,
820
+ "n_examples": 50,
821
+ "per_subtask": {
822
+ "SA": {
823
+ "accuracy": 0.99,
824
+ "count": 100
825
+ },
826
+ "SC": {
827
+ "accuracy": 0.98,
828
+ "count": 50
829
+ },
830
+ "UC": {
831
+ "accuracy": 0.8373983739837398,
832
+ "count": 123
833
+ },
834
+ "US": {
835
+ "accuracy": 0.7662337662337663,
836
+ "count": 77
837
+ }
838
+ }
839
+ },
840
+ "add_C5": {
841
+ "full_accuracy": 0.4,
842
+ "n_examples": 50,
843
+ "per_subtask": {
844
+ "SA": {
845
+ "accuracy": 1.0,
846
+ "count": 50
847
+ },
848
+ "SC": {
849
+ "accuracy": 0.94,
850
+ "count": 50
851
+ },
852
+ "UC": {
853
+ "accuracy": 0.8181818181818182,
854
+ "count": 154
855
+ },
856
+ "US": {
857
+ "accuracy": 0.78125,
858
+ "count": 96
859
+ }
860
+ }
861
+ },
862
+ "add_C6": {
863
+ "full_accuracy": 0.5,
864
+ "n_examples": 50,
865
+ "per_subtask": {
866
+ "SC": {
867
+ "accuracy": 1.0,
868
+ "count": 50
869
+ },
870
+ "UC": {
871
+ "accuracy": 0.8846153846153846,
872
+ "count": 182
873
+ },
874
+ "US": {
875
+ "accuracy": 0.8050847457627118,
876
+ "count": 118
877
+ }
878
+ }
879
+ },
880
+ "sub_M0": {
881
+ "full_accuracy": 0.96,
882
+ "n_examples": 50,
883
+ "per_subtask": {
884
+ "MD": {
885
+ "accuracy": 0.9931972789115646,
886
+ "count": 294
887
+ },
888
+ "ME": {
889
+ "accuracy": 1.0,
890
+ "count": 56
891
+ }
892
+ }
893
+ },
894
+ "sub_M1": {
895
+ "full_accuracy": 0.86,
896
+ "n_examples": 50,
897
+ "per_subtask": {
898
+ "MD": {
899
+ "accuracy": 0.986013986013986,
900
+ "count": 143
901
+ },
902
+ "MB": {
903
+ "accuracy": 1.0,
904
+ "count": 69
905
+ },
906
+ "ME": {
907
+ "accuracy": 1.0,
908
+ "count": 15
909
+ },
910
+ "UB": {
911
+ "accuracy": 0.959349593495935,
912
+ "count": 123
913
+ }
914
+ }
915
+ },
916
+ "sub_M2": {
917
+ "full_accuracy": 0.3,
918
+ "n_examples": 50,
919
+ "per_subtask": {
920
+ "MD": {
921
+ "accuracy": 1.0,
922
+ "count": 108
923
+ },
924
+ "MB": {
925
+ "accuracy": 0.9807692307692307,
926
+ "count": 52
927
+ },
928
+ "ME": {
929
+ "accuracy": 1.0,
930
+ "count": 52
931
+ },
932
+ "UB": {
933
+ "accuracy": 0.5862068965517241,
934
+ "count": 87
935
+ },
936
+ "UD": {
937
+ "accuracy": 0.9803921568627451,
938
+ "count": 51
939
+ }
940
+ }
941
+ },
942
+ "sub_M3": {
943
+ "full_accuracy": 0.1,
944
+ "n_examples": 50,
945
+ "per_subtask": {
946
+ "MD": {
947
+ "accuracy": 1.0,
948
+ "count": 94
949
+ },
950
+ "MB": {
951
+ "accuracy": 0.9411764705882353,
952
+ "count": 51
953
+ },
954
+ "ME": {
955
+ "accuracy": 1.0,
956
+ "count": 25
957
+ },
958
+ "UB": {
959
+ "accuracy": 0.46153846153846156,
960
+ "count": 78
961
+ },
962
+ "UD": {
963
+ "accuracy": 0.5882352941176471,
964
+ "count": 102
965
+ }
966
+ }
967
+ },
968
+ "sub_M4": {
969
+ "full_accuracy": 0.04,
970
+ "n_examples": 50,
971
+ "per_subtask": {
972
+ "MD": {
973
+ "accuracy": 1.0,
974
+ "count": 100
975
+ },
976
+ "MB": {
977
+ "accuracy": 0.98,
978
+ "count": 50
979
+ },
980
+ "UB": {
981
+ "accuracy": 0.22,
982
+ "count": 50
983
+ },
984
+ "UD": {
985
+ "accuracy": 0.34,
986
+ "count": 150
987
+ }
988
+ }
989
+ },
990
+ "sub_M5": {
991
+ "full_accuracy": 0.0,
992
+ "n_examples": 50,
993
+ "per_subtask": {
994
+ "MD": {
995
+ "accuracy": 1.0,
996
+ "count": 50
997
+ },
998
+ "MB": {
999
+ "accuracy": 1.0,
1000
+ "count": 50
1001
+ },
1002
+ "UB": {
1003
+ "accuracy": 0.26,
1004
+ "count": 50
1005
+ },
1006
+ "UD": {
1007
+ "accuracy": 0.275,
1008
+ "count": 200
1009
+ }
1010
+ }
1011
+ },
1012
+ "sub_random": {
1013
+ "full_accuracy": 0.745,
1014
+ "n_examples": 200,
1015
+ "per_subtask": {
1016
+ "MD": {
1017
+ "accuracy": 0.9948979591836735,
1018
+ "count": 588
1019
+ },
1020
+ "MB": {
1021
+ "accuracy": 0.9738805970149254,
1022
+ "count": 268
1023
+ },
1024
+ "ME": {
1025
+ "accuracy": 0.9666666666666667,
1026
+ "count": 60
1027
+ },
1028
+ "UB": {
1029
+ "accuracy": 0.8903803131991052,
1030
+ "count": 447
1031
+ },
1032
+ "UD": {
1033
+ "accuracy": 0.918918918918919,
1034
+ "count": 37
1035
+ }
1036
+ }
1037
+ },
1038
+ "sub_B3": {
1039
+ "full_accuracy": 0.48,
1040
+ "n_examples": 50,
1041
+ "per_subtask": {
1042
+ "MD": {
1043
+ "accuracy": 1.0,
1044
+ "count": 150
1045
+ },
1046
+ "MB": {
1047
+ "accuracy": 0.96,
1048
+ "count": 50
1049
+ },
1050
+ "UB": {
1051
+ "accuracy": 0.7663551401869159,
1052
+ "count": 107
1053
+ },
1054
+ "UD": {
1055
+ "accuracy": 0.8604651162790697,
1056
+ "count": 43
1057
+ }
1058
+ }
1059
+ },
1060
+ "sub_B4": {
1061
+ "full_accuracy": 0.3,
1062
+ "n_examples": 50,
1063
+ "per_subtask": {
1064
+ "MD": {
1065
+ "accuracy": 1.0,
1066
+ "count": 100
1067
+ },
1068
+ "MB": {
1069
+ "accuracy": 1.0,
1070
+ "count": 50
1071
+ },
1072
+ "UB": {
1073
+ "accuracy": 0.7192982456140351,
1074
+ "count": 114
1075
+ },
1076
+ "UD": {
1077
+ "accuracy": 0.627906976744186,
1078
+ "count": 86
1079
+ }
1080
+ }
1081
+ },
1082
+ "sub_B5": {
1083
+ "full_accuracy": 0.22,
1084
+ "n_examples": 50,
1085
+ "per_subtask": {
1086
+ "MD": {
1087
+ "accuracy": 1.0,
1088
+ "count": 50
1089
+ },
1090
+ "MB": {
1091
+ "accuracy": 0.98,
1092
+ "count": 50
1093
+ },
1094
+ "UB": {
1095
+ "accuracy": 0.7124183006535948,
1096
+ "count": 153
1097
+ },
1098
+ "UD": {
1099
+ "accuracy": 0.6391752577319587,
1100
+ "count": 97
1101
+ }
1102
+ }
1103
+ }
1104
+ },
1105
+ "summary": {
1106
+ "overall_accuracy": 0.5278571428571428,
1107
+ "total_examples": 1400,
1108
+ "n_splits": 22
1109
+ }
1110
+ },
1111
+ "sorl_eval": {
1112
+ "config": {
1113
+ "ops": "add_sub",
1114
+ "K": 4,
1115
+ "mode": "sorl",
1116
+ "n_digits": 6,
1117
+ "n_per_split": 50
1118
+ },
1119
+ "splits": {
1120
+ "add_S0": {
1121
+ "full_accuracy": 1.0,
1122
+ "n_examples": 50,
1123
+ "per_subtask": {
1124
+ "SA": {
1125
+ "accuracy": 1.0,
1126
+ "count": 295
1127
+ },
1128
+ "SS": {
1129
+ "accuracy": 1.0,
1130
+ "count": 55
1131
+ }
1132
+ }
1133
+ },
1134
+ "add_S1": {
1135
+ "full_accuracy": 1.0,
1136
+ "n_examples": 50,
1137
+ "per_subtask": {
1138
+ "SA": {
1139
+ "accuracy": 1.0,
1140
+ "count": 126
1141
+ },
1142
+ "SC": {
1143
+ "accuracy": 1.0,
1144
+ "count": 79
1145
+ },
1146
+ "SS": {
1147
+ "accuracy": 1.0,
1148
+ "count": 21
1149
+ },
1150
+ "UC": {
1151
+ "accuracy": 1.0,
1152
+ "count": 124
1153
+ }
1154
+ }
1155
+ },
1156
+ "add_S2": {
1157
+ "full_accuracy": 1.0,
1158
+ "n_examples": 50,
1159
+ "per_subtask": {
1160
+ "SA": {
1161
+ "accuracy": 1.0,
1162
+ "count": 75
1163
+ },
1164
+ "SC": {
1165
+ "accuracy": 1.0,
1166
+ "count": 62
1167
+ },
1168
+ "SS": {
1169
+ "accuracy": 1.0,
1170
+ "count": 39
1171
+ },
1172
+ "UC": {
1173
+ "accuracy": 1.0,
1174
+ "count": 111
1175
+ },
1176
+ "US": {
1177
+ "accuracy": 1.0,
1178
+ "count": 63
1179
+ }
1180
+ }
1181
+ },
1182
+ "add_S3": {
1183
+ "full_accuracy": 0.96,
1184
+ "n_examples": 50,
1185
+ "per_subtask": {
1186
+ "SA": {
1187
+ "accuracy": 1.0,
1188
+ "count": 60
1189
+ },
1190
+ "SC": {
1191
+ "accuracy": 1.0,
1192
+ "count": 57
1193
+ },
1194
+ "SS": {
1195
+ "accuracy": 1.0,
1196
+ "count": 19
1197
+ },
1198
+ "UC": {
1199
+ "accuracy": 0.9807692307692307,
1200
+ "count": 104
1201
+ },
1202
+ "US": {
1203
+ "accuracy": 1.0,
1204
+ "count": 110
1205
+ }
1206
+ }
1207
+ },
1208
+ "add_S4": {
1209
+ "full_accuracy": 0.96,
1210
+ "n_examples": 50,
1211
+ "per_subtask": {
1212
+ "SA": {
1213
+ "accuracy": 1.0,
1214
+ "count": 48
1215
+ },
1216
+ "SC": {
1217
+ "accuracy": 1.0,
1218
+ "count": 52
1219
+ },
1220
+ "SS": {
1221
+ "accuracy": 1.0,
1222
+ "count": 7
1223
+ },
1224
+ "UC": {
1225
+ "accuracy": 0.9775280898876404,
1226
+ "count": 89
1227
+ },
1228
+ "US": {
1229
+ "accuracy": 0.9935064935064936,
1230
+ "count": 154
1231
+ }
1232
+ }
1233
+ },
1234
+ "add_S5": {
1235
+ "full_accuracy": 0.68,
1236
+ "n_examples": 50,
1237
+ "per_subtask": {
1238
+ "SA": {
1239
+ "accuracy": 1.0,
1240
+ "count": 50
1241
+ },
1242
+ "SC": {
1243
+ "accuracy": 1.0,
1244
+ "count": 50
1245
+ },
1246
+ "UC": {
1247
+ "accuracy": 0.7,
1248
+ "count": 50
1249
+ },
1250
+ "US": {
1251
+ "accuracy": 0.97,
1252
+ "count": 200
1253
+ }
1254
+ }
1255
+ },
1256
+ "add_S6": {
1257
+ "full_accuracy": 0.8,
1258
+ "n_examples": 50,
1259
+ "per_subtask": {
1260
+ "SC": {
1261
+ "accuracy": 1.0,
1262
+ "count": 50
1263
+ },
1264
+ "UC": {
1265
+ "accuracy": 0.8,
1266
+ "count": 50
1267
+ },
1268
+ "US": {
1269
+ "accuracy": 0.944,
1270
+ "count": 250
1271
+ }
1272
+ }
1273
+ },
1274
+ "add_random": {
1275
+ "full_accuracy": 1.0,
1276
+ "n_examples": 200,
1277
+ "per_subtask": {
1278
+ "SA": {
1279
+ "accuracy": 1.0,
1280
+ "count": 431
1281
+ },
1282
+ "SC": {
1283
+ "accuracy": 1.0,
1284
+ "count": 316
1285
+ },
1286
+ "SS": {
1287
+ "accuracy": 1.0,
1288
+ "count": 39
1289
+ },
1290
+ "UC": {
1291
+ "accuracy": 1.0,
1292
+ "count": 560
1293
+ },
1294
+ "US": {
1295
+ "accuracy": 1.0,
1296
+ "count": 54
1297
+ }
1298
+ }
1299
+ },
1300
+ "add_C3": {
1301
+ "full_accuracy": 1.0,
1302
+ "n_examples": 50,
1303
+ "per_subtask": {
1304
+ "SA": {
1305
+ "accuracy": 1.0,
1306
+ "count": 150
1307
+ },
1308
+ "SC": {
1309
+ "accuracy": 1.0,
1310
+ "count": 50
1311
+ },
1312
+ "UC": {
1313
+ "accuracy": 1.0,
1314
+ "count": 104
1315
+ },
1316
+ "US": {
1317
+ "accuracy": 1.0,
1318
+ "count": 46
1319
+ }
1320
+ }
1321
+ },
1322
+ "add_C4": {
1323
+ "full_accuracy": 0.94,
1324
+ "n_examples": 50,
1325
+ "per_subtask": {
1326
+ "SA": {
1327
+ "accuracy": 1.0,
1328
+ "count": 100
1329
+ },
1330
+ "SC": {
1331
+ "accuracy": 1.0,
1332
+ "count": 50
1333
+ },
1334
+ "UC": {
1335
+ "accuracy": 0.975609756097561,
1336
+ "count": 123
1337
+ },
1338
+ "US": {
1339
+ "accuracy": 1.0,
1340
+ "count": 77
1341
+ }
1342
+ }
1343
+ },
1344
+ "add_C5": {
1345
+ "full_accuracy": 0.98,
1346
+ "n_examples": 50,
1347
+ "per_subtask": {
1348
+ "SA": {
1349
+ "accuracy": 1.0,
1350
+ "count": 50
1351
+ },
1352
+ "SC": {
1353
+ "accuracy": 1.0,
1354
+ "count": 50
1355
+ },
1356
+ "UC": {
1357
+ "accuracy": 0.9935064935064936,
1358
+ "count": 154
1359
+ },
1360
+ "US": {
1361
+ "accuracy": 1.0,
1362
+ "count": 96
1363
+ }
1364
+ }
1365
+ },
1366
+ "add_C6": {
1367
+ "full_accuracy": 0.94,
1368
+ "n_examples": 50,
1369
+ "per_subtask": {
1370
+ "SC": {
1371
+ "accuracy": 1.0,
1372
+ "count": 50
1373
+ },
1374
+ "UC": {
1375
+ "accuracy": 0.9835164835164835,
1376
+ "count": 182
1377
+ },
1378
+ "US": {
1379
+ "accuracy": 1.0,
1380
+ "count": 118
1381
+ }
1382
+ }
1383
+ },
1384
+ "sub_M0": {
1385
+ "full_accuracy": 1.0,
1386
+ "n_examples": 50,
1387
+ "per_subtask": {
1388
+ "MD": {
1389
+ "accuracy": 1.0,
1390
+ "count": 294
1391
+ },
1392
+ "ME": {
1393
+ "accuracy": 1.0,
1394
+ "count": 56
1395
+ }
1396
+ }
1397
+ },
1398
+ "sub_M1": {
1399
+ "full_accuracy": 1.0,
1400
+ "n_examples": 50,
1401
+ "per_subtask": {
1402
+ "MD": {
1403
+ "accuracy": 1.0,
1404
+ "count": 143
1405
+ },
1406
+ "MB": {
1407
+ "accuracy": 1.0,
1408
+ "count": 69
1409
+ },
1410
+ "ME": {
1411
+ "accuracy": 1.0,
1412
+ "count": 15
1413
+ },
1414
+ "UB": {
1415
+ "accuracy": 1.0,
1416
+ "count": 123
1417
+ }
1418
+ }
1419
+ },
1420
+ "sub_M2": {
1421
+ "full_accuracy": 1.0,
1422
+ "n_examples": 50,
1423
+ "per_subtask": {
1424
+ "MD": {
1425
+ "accuracy": 1.0,
1426
+ "count": 108
1427
+ },
1428
+ "MB": {
1429
+ "accuracy": 1.0,
1430
+ "count": 52
1431
+ },
1432
+ "ME": {
1433
+ "accuracy": 1.0,
1434
+ "count": 52
1435
+ },
1436
+ "UB": {
1437
+ "accuracy": 1.0,
1438
+ "count": 87
1439
+ },
1440
+ "UD": {
1441
+ "accuracy": 1.0,
1442
+ "count": 51
1443
+ }
1444
+ }
1445
+ },
1446
+ "sub_M3": {
1447
+ "full_accuracy": 0.98,
1448
+ "n_examples": 50,
1449
+ "per_subtask": {
1450
+ "MD": {
1451
+ "accuracy": 1.0,
1452
+ "count": 94
1453
+ },
1454
+ "MB": {
1455
+ "accuracy": 1.0,
1456
+ "count": 51
1457
+ },
1458
+ "ME": {
1459
+ "accuracy": 1.0,
1460
+ "count": 25
1461
+ },
1462
+ "UB": {
1463
+ "accuracy": 0.9871794871794872,
1464
+ "count": 78
1465
+ },
1466
+ "UD": {
1467
+ "accuracy": 1.0,
1468
+ "count": 102
1469
+ }
1470
+ }
1471
+ },
1472
+ "sub_M4": {
1473
+ "full_accuracy": 0.84,
1474
+ "n_examples": 50,
1475
+ "per_subtask": {
1476
+ "MD": {
1477
+ "accuracy": 1.0,
1478
+ "count": 100
1479
+ },
1480
+ "MB": {
1481
+ "accuracy": 1.0,
1482
+ "count": 50
1483
+ },
1484
+ "UB": {
1485
+ "accuracy": 0.84,
1486
+ "count": 50
1487
+ },
1488
+ "UD": {
1489
+ "accuracy": 0.9933333333333333,
1490
+ "count": 150
1491
+ }
1492
+ }
1493
+ },
1494
+ "sub_M5": {
1495
+ "full_accuracy": 0.72,
1496
+ "n_examples": 50,
1497
+ "per_subtask": {
1498
+ "MD": {
1499
+ "accuracy": 1.0,
1500
+ "count": 50
1501
+ },
1502
+ "MB": {
1503
+ "accuracy": 1.0,
1504
+ "count": 50
1505
+ },
1506
+ "UB": {
1507
+ "accuracy": 0.72,
1508
+ "count": 50
1509
+ },
1510
+ "UD": {
1511
+ "accuracy": 0.995,
1512
+ "count": 200
1513
+ }
1514
+ }
1515
+ },
1516
+ "sub_random": {
1517
+ "full_accuracy": 1.0,
1518
+ "n_examples": 200,
1519
+ "per_subtask": {
1520
+ "MD": {
1521
+ "accuracy": 1.0,
1522
+ "count": 588
1523
+ },
1524
+ "MB": {
1525
+ "accuracy": 1.0,
1526
+ "count": 268
1527
+ },
1528
+ "ME": {
1529
+ "accuracy": 1.0,
1530
+ "count": 60
1531
+ },
1532
+ "UB": {
1533
+ "accuracy": 1.0,
1534
+ "count": 447
1535
+ },
1536
+ "UD": {
1537
+ "accuracy": 1.0,
1538
+ "count": 37
1539
+ }
1540
+ }
1541
+ },
1542
+ "sub_B3": {
1543
+ "full_accuracy": 0.96,
1544
+ "n_examples": 50,
1545
+ "per_subtask": {
1546
+ "MD": {
1547
+ "accuracy": 1.0,
1548
+ "count": 150
1549
+ },
1550
+ "MB": {
1551
+ "accuracy": 1.0,
1552
+ "count": 50
1553
+ },
1554
+ "UB": {
1555
+ "accuracy": 0.9813084112149533,
1556
+ "count": 107
1557
+ },
1558
+ "UD": {
1559
+ "accuracy": 1.0,
1560
+ "count": 43
1561
+ }
1562
+ }
1563
+ },
1564
+ "sub_B4": {
1565
+ "full_accuracy": 0.92,
1566
+ "n_examples": 50,
1567
+ "per_subtask": {
1568
+ "MD": {
1569
+ "accuracy": 1.0,
1570
+ "count": 100
1571
+ },
1572
+ "MB": {
1573
+ "accuracy": 1.0,
1574
+ "count": 50
1575
+ },
1576
+ "UB": {
1577
+ "accuracy": 0.9649122807017544,
1578
+ "count": 114
1579
+ },
1580
+ "UD": {
1581
+ "accuracy": 1.0,
1582
+ "count": 86
1583
+ }
1584
+ }
1585
+ },
1586
+ "sub_B5": {
1587
+ "full_accuracy": 0.92,
1588
+ "n_examples": 50,
1589
+ "per_subtask": {
1590
+ "MD": {
1591
+ "accuracy": 1.0,
1592
+ "count": 50
1593
+ },
1594
+ "MB": {
1595
+ "accuracy": 1.0,
1596
+ "count": 50
1597
+ },
1598
+ "UB": {
1599
+ "accuracy": 0.9869281045751634,
1600
+ "count": 153
1601
+ },
1602
+ "UD": {
1603
+ "accuracy": 0.979381443298969,
1604
+ "count": 97
1605
+ }
1606
+ }
1607
+ }
1608
+ },
1609
+ "summary": {
1610
+ "overall_accuracy": 0.95,
1611
+ "total_examples": 1400,
1612
+ "n_splits": 22
1613
+ }
1614
+ },
1615
+ "sorl_overall_accuracy": 0.95,
1616
+ "sft_overall_accuracy": 0.5278571428571428
1617
+ }
add_sub_sorl_v1_abs10_25K/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d40fff70ff6d0020577cd516415675f4669c580b180636378d076a80f6caede
3
+ size 650303660
add_sub_sorl_v1_abs10_25K/train_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "sorl",
3
+ "ops": "add_sub",
4
+ "n_digits": 6,
5
+ "n_layer": 2,
6
+ "n_head": 3,
7
+ "n_embd": 510,
8
+ "abs_vocab": 10,
9
+ "K": 4,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "batch_size": 64,
14
+ "num_epochs": 10,
15
+ "dataset_size": 25000,
16
+ "lr": 8e-05,
17
+ "output_dir": "ckpt/sweep/as_sorl_abs10_K4_25K",
18
+ "device": "cuda",
19
+ "push_to_hub": true,
20
+ "no_wandb": false,
21
+ "n_params": 162499262,
22
+ "run_name": "add_sub_sorl_v1_abs10_25K",
23
+ "git_commit": "800625019270114adcda289bbd550c4f1109a514",
24
+ "timestamp": "2026-04-12T02:37:37.086430+00:00",
25
+ "tokenizer": "Qwen/Qwen3-0.6B",
26
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
27
+ "dataset_config": "add_sub_6digit",
28
+ "model_repo": "thoughtworks/arithmetic-sorl",
29
+ "trainer_version": "v1",
30
+ "wandb_run_id": "iblkjg9n",
31
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/iblkjg9n",
32
+ "final_accuracy": 0.95,
33
+ "sft_accuracy": 0.5278571428571428,
34
+ "eval_method": "ArithmeticEvaluator"
35
+ }