amirali1985 commited on
Commit
ae342ff
·
verified ·
1 Parent(s): 31ed6c2

Upload add_sub_sorl_v1_abs10_K1_25K

Browse files
add_sub_sorl_v1_abs10_K1_25K/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 128,
20
+ "max_window_layers": 28,
21
+ "model_type": "qwen3",
22
+ "num_attention_heads": 3,
23
+ "num_hidden_layers": 2,
24
+ "num_key_value_heads": 3,
25
+ "pad_token_id": null,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_parameters": {
28
+ "rope_theta": 10000.0,
29
+ "rope_type": "default"
30
+ },
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "transformers_version": "5.5.0",
34
+ "use_cache": true,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151654
37
+ }
add_sub_sorl_v1_abs10_K1_25K/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs10_K1_25K/metrics.json ADDED
@@ -0,0 +1,1617 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 200,
8
+ 250,
9
+ 300,
10
+ 350,
11
+ 441,
12
+ 491,
13
+ 541,
14
+ 591,
15
+ 641,
16
+ 691,
17
+ 741,
18
+ 832,
19
+ 882,
20
+ 932,
21
+ 982,
22
+ 1032,
23
+ 1082,
24
+ 1132,
25
+ 1223,
26
+ 1273,
27
+ 1323,
28
+ 1373,
29
+ 1423,
30
+ 1473,
31
+ 1523,
32
+ 1614,
33
+ 1664,
34
+ 1714,
35
+ 1764,
36
+ 1814,
37
+ 1864,
38
+ 1914,
39
+ 2005,
40
+ 2055,
41
+ 2105,
42
+ 2155,
43
+ 2205,
44
+ 2255,
45
+ 2305,
46
+ 2396,
47
+ 2446,
48
+ 2496,
49
+ 2546,
50
+ 2596,
51
+ 2646,
52
+ 2696,
53
+ 2787,
54
+ 2837,
55
+ 2887,
56
+ 2937,
57
+ 2987,
58
+ 3037,
59
+ 3087,
60
+ 3178,
61
+ 3228,
62
+ 3278,
63
+ 3328,
64
+ 3378,
65
+ 3428,
66
+ 3478,
67
+ 3569,
68
+ 3619,
69
+ 3669,
70
+ 3719,
71
+ 3769,
72
+ 3819,
73
+ 3869
74
+ ],
75
+ "loss": [
76
+ 8.440483093261719,
77
+ 3.670170307159424,
78
+ 3.2093284130096436,
79
+ 3.1004648208618164,
80
+ 3.070091724395752,
81
+ 3.0389091968536377,
82
+ 2.5894856452941895,
83
+ 0.7675281763076782,
84
+ -3.0276317596435547,
85
+ -4.935643196105957,
86
+ -4.584414958953857,
87
+ -2.691551923751831,
88
+ -1.9160187244415283,
89
+ -1.2295455932617188,
90
+ -1.3427369594573975,
91
+ -1.812907099723816,
92
+ -1.9342525005340576,
93
+ -1.293236255645752,
94
+ -1.3251506090164185,
95
+ -1.4479036331176758,
96
+ -1.5105352401733398,
97
+ -2.014406204223633,
98
+ -1.9828301668167114,
99
+ -1.7031790018081665,
100
+ -2.5977749824523926,
101
+ -2.3531222343444824,
102
+ -2.0115716457366943,
103
+ -1.536534070968628,
104
+ -1.4918975830078125,
105
+ -1.0497915744781494,
106
+ -1.3855948448181152,
107
+ -1.9143116474151611,
108
+ -1.2018264532089233,
109
+ -0.9971982836723328,
110
+ -1.4220505952835083,
111
+ -0.9136620759963989,
112
+ -0.9068200588226318,
113
+ -1.1527667045593262,
114
+ -0.6265409588813782,
115
+ -0.8574066162109375,
116
+ -0.42161595821380615,
117
+ -0.434919536113739,
118
+ -0.4632154107093811,
119
+ -0.5082250237464905,
120
+ -0.5800994634628296,
121
+ -0.789692759513855,
122
+ -0.2702712416648865,
123
+ -0.4394497275352478,
124
+ -0.5490878820419312,
125
+ -0.5185199975967407,
126
+ -0.32845696806907654,
127
+ -0.2897554337978363,
128
+ -0.39504489302635193,
129
+ -0.27258846163749695,
130
+ -0.25301221013069153,
131
+ -0.08269159495830536,
132
+ -0.18398964405059814,
133
+ -0.23646067082881927,
134
+ -0.10407372564077377,
135
+ -0.09299999475479126,
136
+ -0.08140064775943756,
137
+ -0.09688820689916611,
138
+ -0.05483222380280495,
139
+ -0.03655329346656799,
140
+ -0.05594222992658615,
141
+ -0.008253187872469425,
142
+ -0.09734220057725906,
143
+ -0.05176986753940582,
144
+ -0.05214906856417656,
145
+ -0.07431081682443619
146
+ ],
147
+ "base_loss": [
148
+ 6.488893985748291,
149
+ 2.3548998832702637,
150
+ 1.9527490139007568,
151
+ 1.8523368835449219,
152
+ 1.8802011013031006,
153
+ 1.8292043209075928,
154
+ 1.8652232885360718,
155
+ 1.815848469734192,
156
+ 1.848860740661621,
157
+ 1.741455316543579,
158
+ 1.3914107084274292,
159
+ 1.068152666091919,
160
+ 0.9446887969970703,
161
+ 0.7281270623207092,
162
+ 0.599748432636261,
163
+ 0.5804276466369629,
164
+ 0.5249930024147034,
165
+ 0.454721599817276,
166
+ 0.3896333873271942,
167
+ 0.32255008816719055,
168
+ 0.34916767477989197,
169
+ 0.32530543208122253,
170
+ 0.2995271384716034,
171
+ 0.2648261487483978,
172
+ 0.3412800133228302,
173
+ 0.28785938024520874,
174
+ 0.24230030179023743,
175
+ 0.19102659821510315,
176
+ 0.17416465282440186,
177
+ 0.1800290048122406,
178
+ 0.1682046502828598,
179
+ 0.24820947647094727,
180
+ 0.14416953921318054,
181
+ 0.11789846420288086,
182
+ 0.16254489123821259,
183
+ 0.11386360228061676,
184
+ 0.12063971906900406,
185
+ 0.13949914276599884,
186
+ 0.07232778519392014,
187
+ 0.10645522177219391,
188
+ 0.04950258880853653,
189
+ 0.05214657261967659,
190
+ 0.0541020967066288,
191
+ 0.06009664013981819,
192
+ 0.07329648733139038,
193
+ 0.09814182668924332,
194
+ 0.031828977167606354,
195
+ 0.05065033212304115,
196
+ 0.06576144695281982,
197
+ 0.06058066338300705,
198
+ 0.037494949996471405,
199
+ 0.03307747095823288,
200
+ 0.04483410716056824,
201
+ 0.03311517834663391,
202
+ 0.030032988637685776,
203
+ 0.00985543243587017,
204
+ 0.02162509225308895,
205
+ 0.02781860902905464,
206
+ 0.012631254270672798,
207
+ 0.011143012903630733,
208
+ 0.009497015736997128,
209
+ 0.01135705504566431,
210
+ 0.008042754605412483,
211
+ 0.005164582747966051,
212
+ 0.006489872932434082,
213
+ 0.0014688527444377542,
214
+ 0.011420228518545628,
215
+ 0.006360507570207119,
216
+ 0.006299526430666447,
217
+ 0.008574886247515678
218
+ ],
219
+ "info_loss": [
220
+ -0.2627429962158203,
221
+ -0.0675959587097168,
222
+ -0.06391513347625732,
223
+ -0.06318938732147217,
224
+ -0.068672776222229,
225
+ -0.06626904010772705,
226
+ -0.11490964889526367,
227
+ -0.29147613048553467,
228
+ -0.6731091737747192,
229
+ -0.8447501063346863,
230
+ -0.7606219053268433,
231
+ -0.516476035118103,
232
+ -0.41259610652923584,
233
+ -0.31304433941841125,
234
+ -0.2955251634120941,
235
+ -0.3318001329898834,
236
+ -0.33346521854400635,
237
+ -0.25125789642333984,
238
+ -0.23548583686351776,
239
+ -0.21368299424648285,
240
+ -0.20429086685180664,
241
+ -0.24210147559642792,
242
+ -0.23295104503631592,
243
+ -0.19994628429412842,
244
+ -0.2960708439350128,
245
+ -0.2660777270793915,
246
+ -0.22968997061252594,
247
+ -0.17489448189735413,
248
+ -0.16804951429367065,
249
+ -0.12430460005998611,
250
+ -0.15695549547672272,
251
+ -0.2182704359292984,
252
+ -0.13539204001426697,
253
+ -0.11300598084926605,
254
+ -0.15940292179584503,
255
+ -0.10396484285593033,
256
+ -0.10412997007369995,
257
+ -0.13012480735778809,
258
+ -0.07035745680332184,
259
+ -0.09725693613290787,
260
+ -0.0479651540517807,
261
+ -0.04964497312903404,
262
+ -0.05318129435181618,
263
+ -0.05739910155534744,
264
+ -0.06728154420852661,
265
+ -0.09058966487646103,
266
+ -0.03128577023744583,
267
+ -0.04990841820836067,
268
+ -0.06233469396829605,
269
+ -0.05823947489261627,
270
+ -0.03711993992328644,
271
+ -0.03283752128481865,
272
+ -0.04459574073553085,
273
+ -0.031027397140860558,
274
+ -0.028812875971198082,
275
+ -0.009709836915135384,
276
+ -0.021456001326441765,
277
+ -0.02661510370671749,
278
+ -0.012435774318873882,
279
+ -0.011000837199389935,
280
+ -0.009408645331859589,
281
+ -0.011241589672863483,
282
+ -0.006964982487261295,
283
+ -0.005082710646092892,
284
+ -0.006405099760740995,
285
+ -0.0013885911321267486,
286
+ -0.011316562071442604,
287
+ -0.006281006615608931,
288
+ -0.006225514691323042,
289
+ -0.008512690663337708
290
+ ],
291
+ "abs_loss": [
292
+ 2.062779664993286,
293
+ 1.8660074472427368,
294
+ 1.8702964782714844,
295
+ 1.8314086198806763,
296
+ 1.8298479318618774,
297
+ 1.8219594955444336,
298
+ 1.854802131652832,
299
+ 1.825403094291687,
300
+ 1.8147176504135132,
301
+ 1.718223214149475,
302
+ 1.5588526725769043,
303
+ 1.2659032344818115,
304
+ 1.0050292015075684,
305
+ 0.8381269574165344,
306
+ 0.7240123748779297,
307
+ 0.6084690690040588,
308
+ 0.56196129322052,
309
+ 0.43736353516578674,
310
+ 0.4195970594882965,
311
+ 0.3356196880340576,
312
+ 0.3217657208442688,
313
+ 0.18604372441768646,
314
+ 0.1829766184091568,
315
+ 0.15309858322143555,
316
+ 0.12250169366598129,
317
+ 0.09407112747430801,
318
+ 0.11580777168273926,
319
+ 0.09737056493759155,
320
+ 0.06344643235206604,
321
+ 0.06409666687250137,
322
+ 0.06125688925385475,
323
+ 0.04178151860833168,
324
+ 0.03453303501009941,
325
+ 0.045983314514160156,
326
+ 0.0417625792324543,
327
+ 0.037050385028123856,
328
+ 0.03088521957397461,
329
+ 0.01876666396856308,
330
+ 0.02520829439163208,
331
+ 0.032921046018600464,
332
+ 0.02231394685804844,
333
+ 0.013566468842327595,
334
+ 0.01799575798213482,
335
+ 0.020950788632035255,
336
+ 0.025772780179977417,
337
+ 0.014466878958046436,
338
+ 0.010489404201507568,
339
+ 0.007517005782574415,
340
+ 0.012727208435535431,
341
+ 0.010788698680698872,
342
+ 0.012184095568954945,
343
+ 0.009171529673039913,
344
+ 0.003999281208962202,
345
+ 0.00763775035738945,
346
+ 0.008048650808632374,
347
+ 0.007546226028352976,
348
+ 0.04280756413936615,
349
+ 0.004729445558041334,
350
+ 0.0336616076529026,
351
+ 0.003961015492677689,
352
+ 0.003912051673978567,
353
+ 0.002861393615603447,
354
+ 0.006178874522447586,
355
+ 0.002068899804726243,
356
+ 0.004654192831367254,
357
+ 0.004825535695999861,
358
+ 0.0032716484274715185,
359
+ 0.004482524935156107,
360
+ 0.004428912419825792,
361
+ 0.004628469236195087
362
+ ],
363
+ "zipf_loss": [
364
+ 4.37274169921875,
365
+ 1.8046292066574097,
366
+ 1.7087011337280273,
367
+ 1.6968810558319092,
368
+ 1.6936335563659668,
369
+ 1.690199375152588,
370
+ 1.6878787279129028,
371
+ 1.6839007139205933,
372
+ 1.6731270551681519,
373
+ 1.5985805988311768,
374
+ 1.4745081663131714,
375
+ 1.278465986251831,
376
+ 1.1647508144378662,
377
+ 1.0889580249786377,
378
+ 0.9403650760650635,
379
+ 0.8638197183609009,
380
+ 0.8192105293273926,
381
+ 0.7208845019340515,
382
+ 0.5981146097183228,
383
+ 0.33281421661376953,
384
+ 0.15102912485599518,
385
+ 0.06269875168800354,
386
+ 0.02885546162724495,
387
+ 0.016147876158356667,
388
+ 0.009403230622410774,
389
+ 0.010388587601482868,
390
+ 0.031447142362594604,
391
+ 0.011647149920463562,
392
+ 0.008088257163763046,
393
+ 0.006815792992711067,
394
+ 0.009629786014556885,
395
+ 0.01600518450140953,
396
+ 0.004471144638955593,
397
+ 0.010364695452153683,
398
+ 0.00525753665715456,
399
+ 0.008417721837759018,
400
+ 0.010751370340585709,
401
+ 0.007105482742190361,
402
+ 0.0021849004551768303,
403
+ 0.005415467079728842,
404
+ 0.006301610730588436,
405
+ 0.00802694447338581,
406
+ 0.012695873156189919,
407
+ 0.0035742726176977158,
408
+ 0.016842156648635864,
409
+ 0.01661541312932968,
410
+ 0.009708529338240623,
411
+ 0.008232427760958672,
412
+ 0.007224865257740021,
413
+ 0.002215202199295163,
414
+ 0.004029060248285532,
415
+ 0.004625140223652124,
416
+ 0.005678504705429077,
417
+ 0.0038065649569034576,
418
+ 0.004278676584362984,
419
+ 0.0037967078387737274,
420
+ 0.0046645235270261765,
421
+ 0.0013988213613629341,
422
+ 0.004286607727408409,
423
+ 0.0054692612029612064,
424
+ 0.002797577530145645,
425
+ 0.0038844943046569824,
426
+ 0.006156958639621735,
427
+ 0.00890234112739563,
428
+ 0.0011534709483385086,
429
+ 0.0036813169717788696,
430
+ 0.0040760282427072525,
431
+ 0.004231432918459177,
432
+ 0.003363661468029022,
433
+ 0.0017783557996153831
434
+ ],
435
+ "denoise_loss": [],
436
+ "ortho_loss": [
437
+ 0.3933292031288147,
438
+ 0.19537021219730377,
439
+ 0.1488562375307083,
440
+ 0.13526654243469238,
441
+ 0.1290234625339508,
442
+ 0.12106598168611526,
443
+ 0.13526804745197296,
444
+ 0.1508757472038269,
445
+ 0.21096870303153992,
446
+ 0.2514957785606384,
447
+ 0.27831581234931946,
448
+ 0.2901284098625183,
449
+ 0.29795151948928833,
450
+ 0.32490643858909607,
451
+ 0.3221665322780609,
452
+ 0.3283940255641937,
453
+ 0.3153337240219116,
454
+ 0.3107602894306183,
455
+ 0.30619168281555176,
456
+ 0.3000865578651428,
457
+ 0.29851052165031433,
458
+ 0.29726409912109375,
459
+ 0.2884874641895294,
460
+ 0.28946927189826965,
461
+ 0.27607646584510803,
462
+ 0.28382158279418945,
463
+ 0.28244829177856445,
464
+ 0.29492366313934326,
465
+ 0.28803184628486633,
466
+ 0.2858043313026428,
467
+ 0.27292492985725403,
468
+ 0.2727271318435669,
469
+ 0.27270883321762085,
470
+ 0.277759850025177,
471
+ 0.28240156173706055,
472
+ 0.2703619599342346,
473
+ 0.2714839279651642,
474
+ 0.27064526081085205,
475
+ 0.2735464572906494,
476
+ 0.2756211757659912,
477
+ 0.28339874744415283,
478
+ 0.27706512808799744,
479
+ 0.2764289081096649,
480
+ 0.2683185040950775,
481
+ 0.2532169818878174,
482
+ 0.24946703016757965,
483
+ 0.2466099113225937,
484
+ 0.2510632872581482,
485
+ 0.2696925401687622,
486
+ 0.2670671343803406,
487
+ 0.2715094983577728,
488
+ 0.26723748445510864,
489
+ 0.26079726219177246,
490
+ 0.2603513300418854,
491
+ 0.25771471858024597,
492
+ 0.25748154520988464,
493
+ 0.2570980489253998,
494
+ 0.2605780363082886,
495
+ 0.26496967673301697,
496
+ 0.2605254054069519,
497
+ 0.25853437185287476,
498
+ 0.2556053102016449,
499
+ 0.24565964937210083,
500
+ 0.2401438057422638,
501
+ 0.23765115439891815,
502
+ 0.23894348740577698,
503
+ 0.23828937113285065,
504
+ 0.23862119019031525,
505
+ 0.23789578676223755,
506
+ 0.23580653965473175
507
+ ],
508
+ "lr": [
509
+ 7.840000000000001e-05,
510
+ 8e-05,
511
+ 8e-05,
512
+ 8e-05,
513
+ 8e-05,
514
+ 8e-05,
515
+ 8e-05,
516
+ 8e-05,
517
+ 8e-05,
518
+ 8e-05,
519
+ 8e-05,
520
+ 8e-05,
521
+ 8e-05,
522
+ 8e-05,
523
+ 8e-05,
524
+ 8e-05,
525
+ 8e-05,
526
+ 8e-05,
527
+ 8e-05,
528
+ 8e-05,
529
+ 8e-05,
530
+ 8e-05,
531
+ 8e-05,
532
+ 8e-05,
533
+ 8e-05,
534
+ 8e-05,
535
+ 8e-05,
536
+ 8e-05,
537
+ 8e-05,
538
+ 8e-05,
539
+ 8e-05,
540
+ 8e-05,
541
+ 8e-05,
542
+ 8e-05,
543
+ 8e-05,
544
+ 8e-05,
545
+ 8e-05,
546
+ 8e-05,
547
+ 8e-05,
548
+ 8e-05,
549
+ 8e-05,
550
+ 8e-05,
551
+ 7.864766839378239e-05,
552
+ 7.63160621761658e-05,
553
+ 7.398445595854923e-05,
554
+ 7.165284974093265e-05,
555
+ 6.932124352331606e-05,
556
+ 6.69896373056995e-05,
557
+ 6.465803108808292e-05,
558
+ 6.041450777202072e-05,
559
+ 5.8082901554404154e-05,
560
+ 5.5751295336787566e-05,
561
+ 5.3419689119171e-05,
562
+ 5.108808290155441e-05,
563
+ 4.8756476683937825e-05,
564
+ 4.642487046632125e-05,
565
+ 4.218134715025906e-05,
566
+ 3.98497409326425e-05,
567
+ 3.7518134715025914e-05,
568
+ 3.518652849740933e-05,
569
+ 3.285492227979275e-05,
570
+ 3.0523316062176166e-05,
571
+ 2.8191709844559595e-05,
572
+ 2.3948186528497416e-05,
573
+ 2.1616580310880825e-05,
574
+ 1.9284974093264255e-05,
575
+ 1.6953367875647667e-05,
576
+ 1.4621761658031097e-05,
577
+ 1.2290155440414508e-05,
578
+ 9.958549222797919e-06
579
+ ],
580
+ "emb_lr": [],
581
+ "eval_step": [
582
+ 350,
583
+ 741,
584
+ 1132,
585
+ 1523,
586
+ 1914,
587
+ 2305,
588
+ 2696,
589
+ 3087,
590
+ 3478,
591
+ 3869
592
+ ],
593
+ "eval_accuracy": [
594
+ 0.0,
595
+ 0.0,
596
+ 0.0,
597
+ 0.0,
598
+ 0.0,
599
+ 0.0,
600
+ 0.0,
601
+ 0.0,
602
+ 0.0,
603
+ 0.0
604
+ ]
605
+ },
606
+ "final_accuracy": 0.9707142857142858,
607
+ "sft_eval": {
608
+ "config": {
609
+ "ops": "add_sub",
610
+ "K": null,
611
+ "mode": "sft",
612
+ "n_digits": 6,
613
+ "n_per_split": 50
614
+ },
615
+ "splits": {
616
+ "add_S0": {
617
+ "full_accuracy": 1.0,
618
+ "n_examples": 50,
619
+ "per_subtask": {
620
+ "SA": {
621
+ "accuracy": 1.0,
622
+ "count": 295
623
+ },
624
+ "SS": {
625
+ "accuracy": 1.0,
626
+ "count": 55
627
+ }
628
+ }
629
+ },
630
+ "add_S1": {
631
+ "full_accuracy": 0.98,
632
+ "n_examples": 50,
633
+ "per_subtask": {
634
+ "SA": {
635
+ "accuracy": 1.0,
636
+ "count": 126
637
+ },
638
+ "SC": {
639
+ "accuracy": 1.0,
640
+ "count": 79
641
+ },
642
+ "SS": {
643
+ "accuracy": 1.0,
644
+ "count": 21
645
+ },
646
+ "UC": {
647
+ "accuracy": 0.9919354838709677,
648
+ "count": 124
649
+ }
650
+ }
651
+ },
652
+ "add_S2": {
653
+ "full_accuracy": 1.0,
654
+ "n_examples": 50,
655
+ "per_subtask": {
656
+ "SA": {
657
+ "accuracy": 1.0,
658
+ "count": 75
659
+ },
660
+ "SC": {
661
+ "accuracy": 1.0,
662
+ "count": 62
663
+ },
664
+ "SS": {
665
+ "accuracy": 1.0,
666
+ "count": 39
667
+ },
668
+ "UC": {
669
+ "accuracy": 1.0,
670
+ "count": 111
671
+ },
672
+ "US": {
673
+ "accuracy": 1.0,
674
+ "count": 63
675
+ }
676
+ }
677
+ },
678
+ "add_S3": {
679
+ "full_accuracy": 1.0,
680
+ "n_examples": 50,
681
+ "per_subtask": {
682
+ "SA": {
683
+ "accuracy": 1.0,
684
+ "count": 60
685
+ },
686
+ "SC": {
687
+ "accuracy": 1.0,
688
+ "count": 57
689
+ },
690
+ "SS": {
691
+ "accuracy": 1.0,
692
+ "count": 19
693
+ },
694
+ "UC": {
695
+ "accuracy": 1.0,
696
+ "count": 104
697
+ },
698
+ "US": {
699
+ "accuracy": 1.0,
700
+ "count": 110
701
+ }
702
+ }
703
+ },
704
+ "add_S4": {
705
+ "full_accuracy": 0.88,
706
+ "n_examples": 50,
707
+ "per_subtask": {
708
+ "SA": {
709
+ "accuracy": 1.0,
710
+ "count": 48
711
+ },
712
+ "SC": {
713
+ "accuracy": 1.0,
714
+ "count": 52
715
+ },
716
+ "SS": {
717
+ "accuracy": 1.0,
718
+ "count": 7
719
+ },
720
+ "UC": {
721
+ "accuracy": 0.9325842696629213,
722
+ "count": 89
723
+ },
724
+ "US": {
725
+ "accuracy": 1.0,
726
+ "count": 154
727
+ }
728
+ }
729
+ },
730
+ "add_S5": {
731
+ "full_accuracy": 0.92,
732
+ "n_examples": 50,
733
+ "per_subtask": {
734
+ "SA": {
735
+ "accuracy": 1.0,
736
+ "count": 50
737
+ },
738
+ "SC": {
739
+ "accuracy": 1.0,
740
+ "count": 50
741
+ },
742
+ "UC": {
743
+ "accuracy": 0.96,
744
+ "count": 50
745
+ },
746
+ "US": {
747
+ "accuracy": 0.99,
748
+ "count": 200
749
+ }
750
+ }
751
+ },
752
+ "add_S6": {
753
+ "full_accuracy": 1.0,
754
+ "n_examples": 50,
755
+ "per_subtask": {
756
+ "SC": {
757
+ "accuracy": 1.0,
758
+ "count": 50
759
+ },
760
+ "UC": {
761
+ "accuracy": 1.0,
762
+ "count": 50
763
+ },
764
+ "US": {
765
+ "accuracy": 1.0,
766
+ "count": 250
767
+ }
768
+ }
769
+ },
770
+ "add_random": {
771
+ "full_accuracy": 1.0,
772
+ "n_examples": 200,
773
+ "per_subtask": {
774
+ "SA": {
775
+ "accuracy": 1.0,
776
+ "count": 431
777
+ },
778
+ "SC": {
779
+ "accuracy": 1.0,
780
+ "count": 316
781
+ },
782
+ "SS": {
783
+ "accuracy": 1.0,
784
+ "count": 39
785
+ },
786
+ "UC": {
787
+ "accuracy": 1.0,
788
+ "count": 560
789
+ },
790
+ "US": {
791
+ "accuracy": 1.0,
792
+ "count": 54
793
+ }
794
+ }
795
+ },
796
+ "add_C3": {
797
+ "full_accuracy": 0.98,
798
+ "n_examples": 50,
799
+ "per_subtask": {
800
+ "SA": {
801
+ "accuracy": 1.0,
802
+ "count": 150
803
+ },
804
+ "SC": {
805
+ "accuracy": 1.0,
806
+ "count": 50
807
+ },
808
+ "UC": {
809
+ "accuracy": 0.9903846153846154,
810
+ "count": 104
811
+ },
812
+ "US": {
813
+ "accuracy": 1.0,
814
+ "count": 46
815
+ }
816
+ }
817
+ },
818
+ "add_C4": {
819
+ "full_accuracy": 0.96,
820
+ "n_examples": 50,
821
+ "per_subtask": {
822
+ "SA": {
823
+ "accuracy": 1.0,
824
+ "count": 100
825
+ },
826
+ "SC": {
827
+ "accuracy": 1.0,
828
+ "count": 50
829
+ },
830
+ "UC": {
831
+ "accuracy": 0.983739837398374,
832
+ "count": 123
833
+ },
834
+ "US": {
835
+ "accuracy": 1.0,
836
+ "count": 77
837
+ }
838
+ }
839
+ },
840
+ "add_C5": {
841
+ "full_accuracy": 0.98,
842
+ "n_examples": 50,
843
+ "per_subtask": {
844
+ "SA": {
845
+ "accuracy": 1.0,
846
+ "count": 50
847
+ },
848
+ "SC": {
849
+ "accuracy": 1.0,
850
+ "count": 50
851
+ },
852
+ "UC": {
853
+ "accuracy": 0.9935064935064936,
854
+ "count": 154
855
+ },
856
+ "US": {
857
+ "accuracy": 1.0,
858
+ "count": 96
859
+ }
860
+ }
861
+ },
862
+ "add_C6": {
863
+ "full_accuracy": 0.94,
864
+ "n_examples": 50,
865
+ "per_subtask": {
866
+ "SC": {
867
+ "accuracy": 1.0,
868
+ "count": 50
869
+ },
870
+ "UC": {
871
+ "accuracy": 0.9835164835164835,
872
+ "count": 182
873
+ },
874
+ "US": {
875
+ "accuracy": 1.0,
876
+ "count": 118
877
+ }
878
+ }
879
+ },
880
+ "sub_M0": {
881
+ "full_accuracy": 1.0,
882
+ "n_examples": 50,
883
+ "per_subtask": {
884
+ "MD": {
885
+ "accuracy": 1.0,
886
+ "count": 294
887
+ },
888
+ "ME": {
889
+ "accuracy": 1.0,
890
+ "count": 56
891
+ }
892
+ }
893
+ },
894
+ "sub_M1": {
895
+ "full_accuracy": 1.0,
896
+ "n_examples": 50,
897
+ "per_subtask": {
898
+ "MD": {
899
+ "accuracy": 1.0,
900
+ "count": 143
901
+ },
902
+ "MB": {
903
+ "accuracy": 1.0,
904
+ "count": 69
905
+ },
906
+ "ME": {
907
+ "accuracy": 1.0,
908
+ "count": 15
909
+ },
910
+ "UB": {
911
+ "accuracy": 1.0,
912
+ "count": 123
913
+ }
914
+ }
915
+ },
916
+ "sub_M2": {
917
+ "full_accuracy": 1.0,
918
+ "n_examples": 50,
919
+ "per_subtask": {
920
+ "MD": {
921
+ "accuracy": 1.0,
922
+ "count": 108
923
+ },
924
+ "MB": {
925
+ "accuracy": 1.0,
926
+ "count": 52
927
+ },
928
+ "ME": {
929
+ "accuracy": 1.0,
930
+ "count": 52
931
+ },
932
+ "UB": {
933
+ "accuracy": 1.0,
934
+ "count": 87
935
+ },
936
+ "UD": {
937
+ "accuracy": 1.0,
938
+ "count": 51
939
+ }
940
+ }
941
+ },
942
+ "sub_M3": {
943
+ "full_accuracy": 1.0,
944
+ "n_examples": 50,
945
+ "per_subtask": {
946
+ "MD": {
947
+ "accuracy": 1.0,
948
+ "count": 94
949
+ },
950
+ "MB": {
951
+ "accuracy": 1.0,
952
+ "count": 51
953
+ },
954
+ "ME": {
955
+ "accuracy": 1.0,
956
+ "count": 25
957
+ },
958
+ "UB": {
959
+ "accuracy": 1.0,
960
+ "count": 78
961
+ },
962
+ "UD": {
963
+ "accuracy": 1.0,
964
+ "count": 102
965
+ }
966
+ }
967
+ },
968
+ "sub_M4": {
969
+ "full_accuracy": 0.48,
970
+ "n_examples": 50,
971
+ "per_subtask": {
972
+ "MD": {
973
+ "accuracy": 1.0,
974
+ "count": 100
975
+ },
976
+ "MB": {
977
+ "accuracy": 1.0,
978
+ "count": 50
979
+ },
980
+ "UB": {
981
+ "accuracy": 0.48,
982
+ "count": 50
983
+ },
984
+ "UD": {
985
+ "accuracy": 1.0,
986
+ "count": 150
987
+ }
988
+ }
989
+ },
990
+ "sub_M5": {
991
+ "full_accuracy": 0.6,
992
+ "n_examples": 50,
993
+ "per_subtask": {
994
+ "MD": {
995
+ "accuracy": 1.0,
996
+ "count": 50
997
+ },
998
+ "MB": {
999
+ "accuracy": 1.0,
1000
+ "count": 50
1001
+ },
1002
+ "UB": {
1003
+ "accuracy": 0.94,
1004
+ "count": 50
1005
+ },
1006
+ "UD": {
1007
+ "accuracy": 0.9,
1008
+ "count": 200
1009
+ }
1010
+ }
1011
+ },
1012
+ "sub_random": {
1013
+ "full_accuracy": 1.0,
1014
+ "n_examples": 200,
1015
+ "per_subtask": {
1016
+ "MD": {
1017
+ "accuracy": 1.0,
1018
+ "count": 588
1019
+ },
1020
+ "MB": {
1021
+ "accuracy": 1.0,
1022
+ "count": 268
1023
+ },
1024
+ "ME": {
1025
+ "accuracy": 1.0,
1026
+ "count": 60
1027
+ },
1028
+ "UB": {
1029
+ "accuracy": 1.0,
1030
+ "count": 447
1031
+ },
1032
+ "UD": {
1033
+ "accuracy": 1.0,
1034
+ "count": 37
1035
+ }
1036
+ }
1037
+ },
1038
+ "sub_B3": {
1039
+ "full_accuracy": 1.0,
1040
+ "n_examples": 50,
1041
+ "per_subtask": {
1042
+ "MD": {
1043
+ "accuracy": 1.0,
1044
+ "count": 150
1045
+ },
1046
+ "MB": {
1047
+ "accuracy": 1.0,
1048
+ "count": 50
1049
+ },
1050
+ "UB": {
1051
+ "accuracy": 1.0,
1052
+ "count": 107
1053
+ },
1054
+ "UD": {
1055
+ "accuracy": 1.0,
1056
+ "count": 43
1057
+ }
1058
+ }
1059
+ },
1060
+ "sub_B4": {
1061
+ "full_accuracy": 0.88,
1062
+ "n_examples": 50,
1063
+ "per_subtask": {
1064
+ "MD": {
1065
+ "accuracy": 1.0,
1066
+ "count": 100
1067
+ },
1068
+ "MB": {
1069
+ "accuracy": 1.0,
1070
+ "count": 50
1071
+ },
1072
+ "UB": {
1073
+ "accuracy": 0.9473684210526315,
1074
+ "count": 114
1075
+ },
1076
+ "UD": {
1077
+ "accuracy": 1.0,
1078
+ "count": 86
1079
+ }
1080
+ }
1081
+ },
1082
+ "sub_B5": {
1083
+ "full_accuracy": 0.84,
1084
+ "n_examples": 50,
1085
+ "per_subtask": {
1086
+ "MD": {
1087
+ "accuracy": 1.0,
1088
+ "count": 50
1089
+ },
1090
+ "MB": {
1091
+ "accuracy": 1.0,
1092
+ "count": 50
1093
+ },
1094
+ "UB": {
1095
+ "accuracy": 0.9738562091503268,
1096
+ "count": 153
1097
+ },
1098
+ "UD": {
1099
+ "accuracy": 0.9484536082474226,
1100
+ "count": 97
1101
+ }
1102
+ }
1103
+ }
1104
+ },
1105
+ "summary": {
1106
+ "overall_accuracy": 0.9442857142857143,
1107
+ "total_examples": 1400,
1108
+ "n_splits": 22
1109
+ }
1110
+ },
1111
+ "sorl_eval": {
1112
+ "config": {
1113
+ "ops": "add_sub",
1114
+ "K": 1,
1115
+ "mode": "sorl",
1116
+ "n_digits": 6,
1117
+ "n_per_split": 50
1118
+ },
1119
+ "splits": {
1120
+ "add_S0": {
1121
+ "full_accuracy": 1.0,
1122
+ "n_examples": 50,
1123
+ "per_subtask": {
1124
+ "SA": {
1125
+ "accuracy": 1.0,
1126
+ "count": 295
1127
+ },
1128
+ "SS": {
1129
+ "accuracy": 1.0,
1130
+ "count": 55
1131
+ }
1132
+ }
1133
+ },
1134
+ "add_S1": {
1135
+ "full_accuracy": 1.0,
1136
+ "n_examples": 50,
1137
+ "per_subtask": {
1138
+ "SA": {
1139
+ "accuracy": 1.0,
1140
+ "count": 126
1141
+ },
1142
+ "SC": {
1143
+ "accuracy": 1.0,
1144
+ "count": 79
1145
+ },
1146
+ "SS": {
1147
+ "accuracy": 1.0,
1148
+ "count": 21
1149
+ },
1150
+ "UC": {
1151
+ "accuracy": 1.0,
1152
+ "count": 124
1153
+ }
1154
+ }
1155
+ },
1156
+ "add_S2": {
1157
+ "full_accuracy": 1.0,
1158
+ "n_examples": 50,
1159
+ "per_subtask": {
1160
+ "SA": {
1161
+ "accuracy": 1.0,
1162
+ "count": 75
1163
+ },
1164
+ "SC": {
1165
+ "accuracy": 1.0,
1166
+ "count": 62
1167
+ },
1168
+ "SS": {
1169
+ "accuracy": 1.0,
1170
+ "count": 39
1171
+ },
1172
+ "UC": {
1173
+ "accuracy": 1.0,
1174
+ "count": 111
1175
+ },
1176
+ "US": {
1177
+ "accuracy": 1.0,
1178
+ "count": 63
1179
+ }
1180
+ }
1181
+ },
1182
+ "add_S3": {
1183
+ "full_accuracy": 1.0,
1184
+ "n_examples": 50,
1185
+ "per_subtask": {
1186
+ "SA": {
1187
+ "accuracy": 1.0,
1188
+ "count": 60
1189
+ },
1190
+ "SC": {
1191
+ "accuracy": 1.0,
1192
+ "count": 57
1193
+ },
1194
+ "SS": {
1195
+ "accuracy": 1.0,
1196
+ "count": 19
1197
+ },
1198
+ "UC": {
1199
+ "accuracy": 1.0,
1200
+ "count": 104
1201
+ },
1202
+ "US": {
1203
+ "accuracy": 1.0,
1204
+ "count": 110
1205
+ }
1206
+ }
1207
+ },
1208
+ "add_S4": {
1209
+ "full_accuracy": 1.0,
1210
+ "n_examples": 50,
1211
+ "per_subtask": {
1212
+ "SA": {
1213
+ "accuracy": 1.0,
1214
+ "count": 48
1215
+ },
1216
+ "SC": {
1217
+ "accuracy": 1.0,
1218
+ "count": 52
1219
+ },
1220
+ "SS": {
1221
+ "accuracy": 1.0,
1222
+ "count": 7
1223
+ },
1224
+ "UC": {
1225
+ "accuracy": 1.0,
1226
+ "count": 89
1227
+ },
1228
+ "US": {
1229
+ "accuracy": 1.0,
1230
+ "count": 154
1231
+ }
1232
+ }
1233
+ },
1234
+ "add_S5": {
1235
+ "full_accuracy": 0.62,
1236
+ "n_examples": 50,
1237
+ "per_subtask": {
1238
+ "SA": {
1239
+ "accuracy": 1.0,
1240
+ "count": 50
1241
+ },
1242
+ "SC": {
1243
+ "accuracy": 1.0,
1244
+ "count": 50
1245
+ },
1246
+ "UC": {
1247
+ "accuracy": 0.62,
1248
+ "count": 50
1249
+ },
1250
+ "US": {
1251
+ "accuracy": 1.0,
1252
+ "count": 200
1253
+ }
1254
+ }
1255
+ },
1256
+ "add_S6": {
1257
+ "full_accuracy": 1.0,
1258
+ "n_examples": 50,
1259
+ "per_subtask": {
1260
+ "SC": {
1261
+ "accuracy": 1.0,
1262
+ "count": 50
1263
+ },
1264
+ "UC": {
1265
+ "accuracy": 1.0,
1266
+ "count": 50
1267
+ },
1268
+ "US": {
1269
+ "accuracy": 1.0,
1270
+ "count": 250
1271
+ }
1272
+ }
1273
+ },
1274
+ "add_random": {
1275
+ "full_accuracy": 1.0,
1276
+ "n_examples": 200,
1277
+ "per_subtask": {
1278
+ "SA": {
1279
+ "accuracy": 1.0,
1280
+ "count": 431
1281
+ },
1282
+ "SC": {
1283
+ "accuracy": 1.0,
1284
+ "count": 316
1285
+ },
1286
+ "SS": {
1287
+ "accuracy": 1.0,
1288
+ "count": 39
1289
+ },
1290
+ "UC": {
1291
+ "accuracy": 1.0,
1292
+ "count": 560
1293
+ },
1294
+ "US": {
1295
+ "accuracy": 1.0,
1296
+ "count": 54
1297
+ }
1298
+ }
1299
+ },
1300
+ "add_C3": {
1301
+ "full_accuracy": 1.0,
1302
+ "n_examples": 50,
1303
+ "per_subtask": {
1304
+ "SA": {
1305
+ "accuracy": 1.0,
1306
+ "count": 150
1307
+ },
1308
+ "SC": {
1309
+ "accuracy": 1.0,
1310
+ "count": 50
1311
+ },
1312
+ "UC": {
1313
+ "accuracy": 1.0,
1314
+ "count": 104
1315
+ },
1316
+ "US": {
1317
+ "accuracy": 1.0,
1318
+ "count": 46
1319
+ }
1320
+ }
1321
+ },
1322
+ "add_C4": {
1323
+ "full_accuracy": 1.0,
1324
+ "n_examples": 50,
1325
+ "per_subtask": {
1326
+ "SA": {
1327
+ "accuracy": 1.0,
1328
+ "count": 100
1329
+ },
1330
+ "SC": {
1331
+ "accuracy": 1.0,
1332
+ "count": 50
1333
+ },
1334
+ "UC": {
1335
+ "accuracy": 1.0,
1336
+ "count": 123
1337
+ },
1338
+ "US": {
1339
+ "accuracy": 1.0,
1340
+ "count": 77
1341
+ }
1342
+ }
1343
+ },
1344
+ "add_C5": {
1345
+ "full_accuracy": 1.0,
1346
+ "n_examples": 50,
1347
+ "per_subtask": {
1348
+ "SA": {
1349
+ "accuracy": 1.0,
1350
+ "count": 50
1351
+ },
1352
+ "SC": {
1353
+ "accuracy": 1.0,
1354
+ "count": 50
1355
+ },
1356
+ "UC": {
1357
+ "accuracy": 1.0,
1358
+ "count": 154
1359
+ },
1360
+ "US": {
1361
+ "accuracy": 1.0,
1362
+ "count": 96
1363
+ }
1364
+ }
1365
+ },
1366
+ "add_C6": {
1367
+ "full_accuracy": 1.0,
1368
+ "n_examples": 50,
1369
+ "per_subtask": {
1370
+ "SC": {
1371
+ "accuracy": 1.0,
1372
+ "count": 50
1373
+ },
1374
+ "UC": {
1375
+ "accuracy": 1.0,
1376
+ "count": 182
1377
+ },
1378
+ "US": {
1379
+ "accuracy": 1.0,
1380
+ "count": 118
1381
+ }
1382
+ }
1383
+ },
1384
+ "sub_M0": {
1385
+ "full_accuracy": 1.0,
1386
+ "n_examples": 50,
1387
+ "per_subtask": {
1388
+ "MD": {
1389
+ "accuracy": 1.0,
1390
+ "count": 294
1391
+ },
1392
+ "ME": {
1393
+ "accuracy": 1.0,
1394
+ "count": 56
1395
+ }
1396
+ }
1397
+ },
1398
+ "sub_M1": {
1399
+ "full_accuracy": 1.0,
1400
+ "n_examples": 50,
1401
+ "per_subtask": {
1402
+ "MD": {
1403
+ "accuracy": 1.0,
1404
+ "count": 143
1405
+ },
1406
+ "MB": {
1407
+ "accuracy": 1.0,
1408
+ "count": 69
1409
+ },
1410
+ "ME": {
1411
+ "accuracy": 1.0,
1412
+ "count": 15
1413
+ },
1414
+ "UB": {
1415
+ "accuracy": 1.0,
1416
+ "count": 123
1417
+ }
1418
+ }
1419
+ },
1420
+ "sub_M2": {
1421
+ "full_accuracy": 1.0,
1422
+ "n_examples": 50,
1423
+ "per_subtask": {
1424
+ "MD": {
1425
+ "accuracy": 1.0,
1426
+ "count": 108
1427
+ },
1428
+ "MB": {
1429
+ "accuracy": 1.0,
1430
+ "count": 52
1431
+ },
1432
+ "ME": {
1433
+ "accuracy": 1.0,
1434
+ "count": 52
1435
+ },
1436
+ "UB": {
1437
+ "accuracy": 1.0,
1438
+ "count": 87
1439
+ },
1440
+ "UD": {
1441
+ "accuracy": 1.0,
1442
+ "count": 51
1443
+ }
1444
+ }
1445
+ },
1446
+ "sub_M3": {
1447
+ "full_accuracy": 1.0,
1448
+ "n_examples": 50,
1449
+ "per_subtask": {
1450
+ "MD": {
1451
+ "accuracy": 1.0,
1452
+ "count": 94
1453
+ },
1454
+ "MB": {
1455
+ "accuracy": 1.0,
1456
+ "count": 51
1457
+ },
1458
+ "ME": {
1459
+ "accuracy": 1.0,
1460
+ "count": 25
1461
+ },
1462
+ "UB": {
1463
+ "accuracy": 1.0,
1464
+ "count": 78
1465
+ },
1466
+ "UD": {
1467
+ "accuracy": 1.0,
1468
+ "count": 102
1469
+ }
1470
+ }
1471
+ },
1472
+ "sub_M4": {
1473
+ "full_accuracy": 1.0,
1474
+ "n_examples": 50,
1475
+ "per_subtask": {
1476
+ "MD": {
1477
+ "accuracy": 1.0,
1478
+ "count": 100
1479
+ },
1480
+ "MB": {
1481
+ "accuracy": 1.0,
1482
+ "count": 50
1483
+ },
1484
+ "UB": {
1485
+ "accuracy": 1.0,
1486
+ "count": 50
1487
+ },
1488
+ "UD": {
1489
+ "accuracy": 1.0,
1490
+ "count": 150
1491
+ }
1492
+ }
1493
+ },
1494
+ "sub_M5": {
1495
+ "full_accuracy": 0.58,
1496
+ "n_examples": 50,
1497
+ "per_subtask": {
1498
+ "MD": {
1499
+ "accuracy": 1.0,
1500
+ "count": 50
1501
+ },
1502
+ "MB": {
1503
+ "accuracy": 1.0,
1504
+ "count": 50
1505
+ },
1506
+ "UB": {
1507
+ "accuracy": 0.58,
1508
+ "count": 50
1509
+ },
1510
+ "UD": {
1511
+ "accuracy": 1.0,
1512
+ "count": 200
1513
+ }
1514
+ }
1515
+ },
1516
+ "sub_random": {
1517
+ "full_accuracy": 1.0,
1518
+ "n_examples": 200,
1519
+ "per_subtask": {
1520
+ "MD": {
1521
+ "accuracy": 1.0,
1522
+ "count": 588
1523
+ },
1524
+ "MB": {
1525
+ "accuracy": 1.0,
1526
+ "count": 268
1527
+ },
1528
+ "ME": {
1529
+ "accuracy": 1.0,
1530
+ "count": 60
1531
+ },
1532
+ "UB": {
1533
+ "accuracy": 1.0,
1534
+ "count": 447
1535
+ },
1536
+ "UD": {
1537
+ "accuracy": 1.0,
1538
+ "count": 37
1539
+ }
1540
+ }
1541
+ },
1542
+ "sub_B3": {
1543
+ "full_accuracy": 1.0,
1544
+ "n_examples": 50,
1545
+ "per_subtask": {
1546
+ "MD": {
1547
+ "accuracy": 1.0,
1548
+ "count": 150
1549
+ },
1550
+ "MB": {
1551
+ "accuracy": 1.0,
1552
+ "count": 50
1553
+ },
1554
+ "UB": {
1555
+ "accuracy": 1.0,
1556
+ "count": 107
1557
+ },
1558
+ "UD": {
1559
+ "accuracy": 1.0,
1560
+ "count": 43
1561
+ }
1562
+ }
1563
+ },
1564
+ "sub_B4": {
1565
+ "full_accuracy": 1.0,
1566
+ "n_examples": 50,
1567
+ "per_subtask": {
1568
+ "MD": {
1569
+ "accuracy": 1.0,
1570
+ "count": 100
1571
+ },
1572
+ "MB": {
1573
+ "accuracy": 1.0,
1574
+ "count": 50
1575
+ },
1576
+ "UB": {
1577
+ "accuracy": 1.0,
1578
+ "count": 114
1579
+ },
1580
+ "UD": {
1581
+ "accuracy": 1.0,
1582
+ "count": 86
1583
+ }
1584
+ }
1585
+ },
1586
+ "sub_B5": {
1587
+ "full_accuracy": 1.0,
1588
+ "n_examples": 50,
1589
+ "per_subtask": {
1590
+ "MD": {
1591
+ "accuracy": 1.0,
1592
+ "count": 50
1593
+ },
1594
+ "MB": {
1595
+ "accuracy": 1.0,
1596
+ "count": 50
1597
+ },
1598
+ "UB": {
1599
+ "accuracy": 1.0,
1600
+ "count": 153
1601
+ },
1602
+ "UD": {
1603
+ "accuracy": 1.0,
1604
+ "count": 97
1605
+ }
1606
+ }
1607
+ }
1608
+ },
1609
+ "summary": {
1610
+ "overall_accuracy": 0.9707142857142858,
1611
+ "total_examples": 1400,
1612
+ "n_splits": 22
1613
+ }
1614
+ },
1615
+ "sorl_overall_accuracy": 0.9707142857142858,
1616
+ "sft_overall_accuracy": 0.9442857142857143
1617
+ }
add_sub_sorl_v1_abs10_K1_25K/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccab70b2517264fbfc0cc6c61a4eb7e47555f03c4db8db8c06ccd93ca673eac1
3
+ size 650303660
add_sub_sorl_v1_abs10_K1_25K/train_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "sorl",
3
+ "ops": "add_sub",
4
+ "n_digits": 6,
5
+ "n_layer": 2,
6
+ "n_head": 3,
7
+ "n_embd": 510,
8
+ "abs_vocab": 10,
9
+ "K": 1,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "batch_size": 64,
14
+ "num_epochs": 10,
15
+ "dataset_size": 25000,
16
+ "lr": 8e-05,
17
+ "output_dir": "ckpt/sweep/as_sorl_abs10_K1_25K",
18
+ "device": "cuda",
19
+ "push_to_hub": true,
20
+ "no_wandb": false,
21
+ "n_params": 162499262,
22
+ "run_name": "add_sub_sorl_v1_abs10_K1_25K",
23
+ "git_commit": "800625019270114adcda289bbd550c4f1109a514",
24
+ "timestamp": "2026-04-12T01:58:11.626522+00:00",
25
+ "tokenizer": "Qwen/Qwen3-0.6B",
26
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
27
+ "dataset_config": "add_sub_6digit",
28
+ "model_repo": "thoughtworks/arithmetic-sorl",
29
+ "trainer_version": "v1",
30
+ "wandb_run_id": "nv7szjmr",
31
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/nv7szjmr",
32
+ "final_accuracy": 0.9707142857142858,
33
+ "sft_accuracy": 0.9442857142857143,
34
+ "eval_method": "ArithmeticEvaluator"
35
+ }