amirali1985 commited on
Commit
b8b942a
·
verified ·
1 Parent(s): 0fba29e

Upload add_sub_sorl_v1_abs10_25K

Browse files
add_sub_sorl_v1_abs10_25K/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 128,
20
+ "max_window_layers": 28,
21
+ "model_type": "qwen3",
22
+ "num_attention_heads": 3,
23
+ "num_hidden_layers": 2,
24
+ "num_key_value_heads": 3,
25
+ "pad_token_id": null,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_parameters": {
28
+ "rope_theta": 10000.0,
29
+ "rope_type": "default"
30
+ },
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "transformers_version": "5.5.0",
34
+ "use_cache": true,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151654
37
+ }
add_sub_sorl_v1_abs10_25K/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs10_25K/metrics.json ADDED
@@ -0,0 +1,1617 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 200,
8
+ 250,
9
+ 300,
10
+ 350,
11
+ 441,
12
+ 491,
13
+ 541,
14
+ 591,
15
+ 641,
16
+ 691,
17
+ 741,
18
+ 832,
19
+ 882,
20
+ 932,
21
+ 982,
22
+ 1032,
23
+ 1082,
24
+ 1132,
25
+ 1223,
26
+ 1273,
27
+ 1323,
28
+ 1373,
29
+ 1423,
30
+ 1473,
31
+ 1523,
32
+ 1614,
33
+ 1664,
34
+ 1714,
35
+ 1764,
36
+ 1814,
37
+ 1864,
38
+ 1914,
39
+ 2005,
40
+ 2055,
41
+ 2105,
42
+ 2155,
43
+ 2205,
44
+ 2255,
45
+ 2305,
46
+ 2396,
47
+ 2446,
48
+ 2496,
49
+ 2546,
50
+ 2596,
51
+ 2646,
52
+ 2696,
53
+ 2787,
54
+ 2837,
55
+ 2887,
56
+ 2937,
57
+ 2987,
58
+ 3037,
59
+ 3087,
60
+ 3178,
61
+ 3228,
62
+ 3278,
63
+ 3328,
64
+ 3378,
65
+ 3428,
66
+ 3478,
67
+ 3569,
68
+ 3619,
69
+ 3669,
70
+ 3719,
71
+ 3769,
72
+ 3819,
73
+ 3869
74
+ ],
75
+ "loss": [
76
+ 14.256978034973145,
77
+ 8.812192916870117,
78
+ 5.189567565917969,
79
+ 3.4612081050872803,
80
+ 2.9352636337280273,
81
+ 3.010927438735962,
82
+ 2.87626314163208,
83
+ 2.229984998703003,
84
+ 1.9719955921173096,
85
+ 1.5822030305862427,
86
+ 1.6735219955444336,
87
+ 1.3240095376968384,
88
+ 1.6034573316574097,
89
+ 1.584771752357483,
90
+ 1.3375678062438965,
91
+ 1.5081782341003418,
92
+ 1.2838478088378906,
93
+ 1.442265510559082,
94
+ 1.8373520374298096,
95
+ 1.5873284339904785,
96
+ 1.0017588138580322,
97
+ 0.5844299793243408,
98
+ 0.4599916934967041,
99
+ 0.4710693061351776,
100
+ -0.016015514731407166,
101
+ -0.03885940462350845,
102
+ -1.8101730346679688,
103
+ -6.263668537139893,
104
+ -8.383745193481445,
105
+ -8.508172035217285,
106
+ -9.05787181854248,
107
+ -9.528923988342285,
108
+ -10.230069160461426,
109
+ -10.578496932983398,
110
+ -10.426430702209473,
111
+ -10.69153118133545,
112
+ -11.126657485961914,
113
+ -10.420424461364746,
114
+ -10.6273775100708,
115
+ -8.421341896057129,
116
+ -7.00389289855957,
117
+ -6.054770469665527,
118
+ -3.3519654273986816,
119
+ -2.9612250328063965,
120
+ -2.2057387828826904,
121
+ -1.278799057006836,
122
+ -1.6747382879257202,
123
+ -0.8737431168556213,
124
+ -0.8648754358291626,
125
+ -0.9642059803009033,
126
+ -1.3165982961654663,
127
+ -0.8842989802360535,
128
+ -0.5905858278274536,
129
+ -0.7463504076004028,
130
+ -1.1404083967208862,
131
+ -0.4939427971839905,
132
+ -0.732154905796051,
133
+ -0.5326835513114929,
134
+ -0.6656410694122314,
135
+ -0.5514172315597534,
136
+ -0.4456074833869934,
137
+ -0.8364714980125427,
138
+ -0.6598624587059021,
139
+ -0.48375943303108215,
140
+ -0.8276984691619873,
141
+ -0.6905249953269958,
142
+ -0.7523627281188965,
143
+ -0.6108655333518982,
144
+ -0.634128749370575,
145
+ -0.6320534944534302
146
+ ],
147
+ "base_loss": [
148
+ 8.179495811462402,
149
+ 6.115806579589844,
150
+ 4.016663551330566,
151
+ 2.338233232498169,
152
+ 2.049792528152466,
153
+ 1.9500830173492432,
154
+ 1.8704925775527954,
155
+ 1.915109634399414,
156
+ 1.9027211666107178,
157
+ 1.822627067565918,
158
+ 1.7563773393630981,
159
+ 1.8559844493865967,
160
+ 1.810850739479065,
161
+ 1.8188568353652954,
162
+ 1.8670803308486938,
163
+ 1.7376196384429932,
164
+ 1.7410252094268799,
165
+ 1.6789716482162476,
166
+ 1.7815697193145752,
167
+ 1.7297338247299194,
168
+ 1.76444411277771,
169
+ 1.7341679334640503,
170
+ 1.745019555091858,
171
+ 1.7294776439666748,
172
+ 1.7048413753509521,
173
+ 1.7179557085037231,
174
+ 1.7174227237701416,
175
+ 1.8109327554702759,
176
+ 1.6843860149383545,
177
+ 1.691207766532898,
178
+ 1.6398429870605469,
179
+ 1.6830883026123047,
180
+ 1.7041343450546265,
181
+ 1.7096806764602661,
182
+ 1.6330136060714722,
183
+ 1.572530746459961,
184
+ 1.6290007829666138,
185
+ 1.4542722702026367,
186
+ 1.4407005310058594,
187
+ 1.222497820854187,
188
+ 1.0295950174331665,
189
+ 0.9284564256668091,
190
+ 0.6118221282958984,
191
+ 0.5249842405319214,
192
+ 0.4513547718524933,
193
+ 0.31663909554481506,
194
+ 0.3410135805606842,
195
+ 0.3127397894859314,
196
+ 0.23937572538852692,
197
+ 0.23965051770210266,
198
+ 0.2704034447669983,
199
+ 0.22560283541679382,
200
+ 0.20714624226093292,
201
+ 0.18898002803325653,
202
+ 0.21477903425693512,
203
+ 0.18460628390312195,
204
+ 0.20580320060253143,
205
+ 0.16052566468715668,
206
+ 0.17262044548988342,
207
+ 0.14774015545845032,
208
+ 0.14181342720985413,
209
+ 0.2022370994091034,
210
+ 0.15932251513004303,
211
+ 0.11304645985364914,
212
+ 0.16339722275733948,
213
+ 0.16573865711688995,
214
+ 0.17047688364982605,
215
+ 0.13443663716316223,
216
+ 0.14277073740959167,
217
+ 0.12313588708639145
218
+ ],
219
+ "info_loss": [
220
+ -0.23002290725708008,
221
+ -0.16527986526489258,
222
+ -0.11838364601135254,
223
+ -0.08787274360656738,
224
+ -0.10383403301239014,
225
+ -0.0843881368637085,
226
+ -0.08847904205322266,
227
+ -0.1563432216644287,
228
+ -0.1807953119277954,
229
+ -0.21196186542510986,
230
+ -0.19581389427185059,
231
+ -0.24078822135925293,
232
+ -0.2084348201751709,
233
+ -0.21077227592468262,
234
+ -0.2398141622543335,
235
+ -0.2098095417022705,
236
+ -0.231908917427063,
237
+ -0.20753967761993408,
238
+ -0.17290937900543213,
239
+ -0.17037701606750488,
240
+ -0.12913298606872559,
241
+ -0.12954926490783691,
242
+ -0.1401742696762085,
243
+ -0.13750088214874268,
244
+ -0.18243885040283203,
245
+ -0.18511807918548584,
246
+ -0.36848652362823486,
247
+ -0.8207617402076721,
248
+ -1.0179661512374878,
249
+ -1.0324950218200684,
250
+ -1.079706072807312,
251
+ -1.1315070390701294,
252
+ -1.2022056579589844,
253
+ -1.2376500368118286,
254
+ -1.2149099111557007,
255
+ -1.234117865562439,
256
+ -1.2836066484451294,
257
+ -1.1954413652420044,
258
+ -1.2143906354904175,
259
+ -0.9723633527755737,
260
+ -0.8120318651199341,
261
+ -0.7064701318740845,
262
+ -0.40395891666412354,
263
+ -0.3564797043800354,
264
+ -0.2730256915092468,
265
+ -0.1676408350467682,
266
+ -0.2091994285583496,
267
+ -0.12594006955623627,
268
+ -0.1181057021021843,
269
+ -0.12815025448799133,
270
+ -0.16598695516586304,
271
+ -0.11813508719205856,
272
+ -0.08626975864171982,
273
+ -0.09994277358055115,
274
+ -0.14211693406105042,
275
+ -0.07376568019390106,
276
+ -0.10002028197050095,
277
+ -0.07552778720855713,
278
+ -0.08972552418708801,
279
+ -0.0756625384092331,
280
+ -0.06394617259502411,
281
+ -0.10925082862377167,
282
+ -0.08743764460086823,
283
+ -0.06474234163761139,
284
+ -0.10444813966751099,
285
+ -0.09072288870811462,
286
+ -0.09688038378953934,
287
+ -0.07976917922496796,
288
+ -0.08310963958501816,
289
+ -0.08014579117298126
290
+ ],
291
+ "abs_loss": [
292
+ 2.2766036987304688,
293
+ 2.091233730316162,
294
+ 1.9168267250061035,
295
+ 1.879494547843933,
296
+ 1.8019380569458008,
297
+ 1.82309889793396,
298
+ 1.8274445533752441,
299
+ 1.8011195659637451,
300
+ 1.8016701936721802,
301
+ 1.8573944568634033,
302
+ 1.8407838344573975,
303
+ 1.8531955480575562,
304
+ 1.851827621459961,
305
+ 1.843540906906128,
306
+ 1.8486919403076172,
307
+ 1.8462592363357544,
308
+ 1.8527169227600098,
309
+ 1.8005094528198242,
310
+ 1.8157070875167847,
311
+ 1.8012362718582153,
312
+ 1.0213181972503662,
313
+ 0.5417872667312622,
314
+ 0.5030105113983154,
315
+ 0.5118759274482727,
316
+ 0.4955863952636719,
317
+ 0.44640424847602844,
318
+ 0.6110158562660217,
319
+ 0.558056116104126,
320
+ 0.486600399017334,
321
+ 0.562961757183075,
322
+ 0.47528234124183655,
323
+ 0.49910861253738403,
324
+ 0.46877163648605347,
325
+ 0.44574135541915894,
326
+ 0.5675186514854431,
327
+ 0.4594317078590393,
328
+ 0.49148818850517273,
329
+ 0.5518755912780762,
330
+ 0.5050919055938721,
331
+ 0.4664192795753479,
332
+ 0.5244419574737549,
333
+ 0.47135239839553833,
334
+ 0.5251164436340332,
335
+ 0.5231102705001831,
336
+ 0.5319302678108215,
337
+ 0.5147315263748169,
338
+ 0.4716704487800598,
339
+ 0.5043060779571533,
340
+ 0.5314348936080933,
341
+ 0.6154720783233643,
342
+ 0.6017305850982666,
343
+ 0.5053713321685791,
344
+ 0.5168009996414185,
345
+ 0.5287708044052124,
346
+ 0.5177637338638306,
347
+ 0.46657508611679077,
348
+ 0.4951359033584595,
349
+ 0.4861401319503784,
350
+ 0.40949398279190063,
351
+ 0.38447850942611694,
352
+ 0.40168479084968567,
353
+ 0.3637966215610504,
354
+ 0.41874241828918457,
355
+ 0.4040564298629761,
356
+ 0.3729878067970276,
357
+ 0.36216187477111816,
358
+ 0.3261665105819702,
359
+ 0.3961992561817169,
360
+ 0.40525567531585693,
361
+ 0.347781777381897
362
+ ],
363
+ "zipf_loss": [
364
+ 8.15005111694336,
365
+ 4.140061378479004,
366
+ 2.16505765914917,
367
+ 1.8137528896331787,
368
+ 1.7436177730560303,
369
+ 1.7224159240722656,
370
+ 1.707816481590271,
371
+ 1.6981955766677856,
372
+ 1.697060465812683,
373
+ 1.6934552192687988,
374
+ 1.6912052631378174,
375
+ 1.6905877590179443,
376
+ 1.6917719841003418,
377
+ 1.6892836093902588,
378
+ 1.6837599277496338,
379
+ 1.6840280294418335,
380
+ 1.6766400337219238,
381
+ 1.658639669418335,
382
+ 1.6033053398132324,
383
+ 1.3812410831451416,
384
+ 0.426512748003006,
385
+ 0.0915759801864624,
386
+ 0.06641378998756409,
387
+ 0.06541288644075394,
388
+ 0.05397297441959381,
389
+ 0.049725256860256195,
390
+ 0.09616798162460327,
391
+ 0.07721105217933655,
392
+ 0.06286955624818802,
393
+ 0.06927378475666046,
394
+ 0.05181824788451195,
395
+ 0.05314686521887779,
396
+ 0.04097625985741615,
397
+ 0.04374873265624046,
398
+ 0.032902587205171585,
399
+ 0.03117380663752556,
400
+ 0.03125934302806854,
401
+ 0.02452976070344448,
402
+ 0.025319188833236694,
403
+ 0.03315161168575287,
404
+ 0.03438675403594971,
405
+ 0.03433896601200104,
406
+ 0.023289574310183525,
407
+ 0.02627655677497387,
408
+ 0.019970480352640152,
409
+ 0.029496990144252777,
410
+ 0.029075412079691887,
411
+ 0.022487249225378036,
412
+ 0.02366235852241516,
413
+ 0.016098806634545326,
414
+ 0.01269473321735859,
415
+ 0.02091185189783573,
416
+ 0.01328546553850174,
417
+ 0.011220241896808147,
418
+ 0.014205502346158028,
419
+ 0.01245021354407072,
420
+ 0.012731088325381279,
421
+ 0.013454608619213104,
422
+ 0.01804427243769169,
423
+ 0.019020183011889458,
424
+ 0.011872355826199055,
425
+ 0.017420047894120216,
426
+ 0.013317206874489784,
427
+ 0.010211849585175514,
428
+ 0.016086921095848083,
429
+ 0.014749030582606792,
430
+ 0.013347554951906204,
431
+ 0.012769741006195545,
432
+ 0.013671365566551685,
433
+ 0.011490320786833763
434
+ ],
435
+ "denoise_loss": [],
436
+ "ortho_loss": [
437
+ 0.5640959739685059,
438
+ 0.2631226181983948,
439
+ 0.14999699592590332,
440
+ 0.12239736318588257,
441
+ 0.09714409708976746,
442
+ 0.07303234189748764,
443
+ 0.06550216674804688,
444
+ 0.06051327660679817,
445
+ 0.07507379353046417,
446
+ 0.084007628262043,
447
+ 0.08215909451246262,
448
+ 0.08330931514501572,
449
+ 0.08743671327829361,
450
+ 0.09553566575050354,
451
+ 0.08920977264642715,
452
+ 0.09277433902025223,
453
+ 0.09217482805252075,
454
+ 0.09592001140117645,
455
+ 0.10014844685792923,
456
+ 0.10119500756263733,
457
+ 0.09707954525947571,
458
+ 0.097630575299263,
459
+ 0.09751289337873459,
460
+ 0.09296217560768127,
461
+ 0.08584143221378326,
462
+ 0.08582761883735657,
463
+ 0.08925554901361465,
464
+ 0.09694031625986099,
465
+ 0.10842876881361008,
466
+ 0.11049525439739227,
467
+ 0.11342362314462662,
468
+ 0.11486317962408066,
469
+ 0.11290319263935089,
470
+ 0.1154816746711731,
471
+ 0.11996712535619736,
472
+ 0.13185496628284454,
473
+ 0.12799306213855743,
474
+ 0.13368353247642517,
475
+ 0.13123612105846405,
476
+ 0.13329772651195526,
477
+ 0.1350865513086319,
478
+ 0.13771839439868927,
479
+ 0.1451946496963501,
480
+ 0.15061640739440918,
481
+ 0.1515263319015503,
482
+ 0.1512250006198883,
483
+ 0.14959852397441864,
484
+ 0.15471874177455902,
485
+ 0.1546550989151001,
486
+ 0.15259236097335815,
487
+ 0.1592143177986145,
488
+ 0.16225320100784302,
489
+ 0.16515889763832092,
490
+ 0.16539309918880463,
491
+ 0.16331565380096436,
492
+ 0.16385121643543243,
493
+ 0.17084619402885437,
494
+ 0.17373836040496826,
495
+ 0.17300517857074738,
496
+ 0.17302757501602173,
497
+ 0.17285387217998505,
498
+ 0.17389743030071259,
499
+ 0.17504020035266876,
500
+ 0.17735148966312408,
501
+ 0.18059377372264862,
502
+ 0.18277068436145782,
503
+ 0.1846488118171692,
504
+ 0.18622788786888123,
505
+ 0.18851174414157867,
506
+ 0.19029369950294495
507
+ ],
508
+ "lr": [
509
+ 1.6752136752136756e-05,
510
+ 3.384615384615385e-05,
511
+ 4e-05,
512
+ 4e-05,
513
+ 4e-05,
514
+ 4e-05,
515
+ 4e-05,
516
+ 4e-05,
517
+ 4e-05,
518
+ 4e-05,
519
+ 4e-05,
520
+ 4e-05,
521
+ 4e-05,
522
+ 4e-05,
523
+ 4e-05,
524
+ 4e-05,
525
+ 4e-05,
526
+ 4e-05,
527
+ 4e-05,
528
+ 4e-05,
529
+ 4e-05,
530
+ 4e-05,
531
+ 4e-05,
532
+ 4e-05,
533
+ 4e-05,
534
+ 4e-05,
535
+ 4e-05,
536
+ 4e-05,
537
+ 4e-05,
538
+ 4e-05,
539
+ 4e-05,
540
+ 4e-05,
541
+ 4e-05,
542
+ 4e-05,
543
+ 4e-05,
544
+ 4e-05,
545
+ 4e-05,
546
+ 4e-05,
547
+ 4e-05,
548
+ 4e-05,
549
+ 4e-05,
550
+ 4e-05,
551
+ 3.9947798576324814e-05,
552
+ 3.8761402583706826e-05,
553
+ 3.757500659108885e-05,
554
+ 3.6388610598470864e-05,
555
+ 3.5202214605852884e-05,
556
+ 3.401581861323491e-05,
557
+ 3.282942262061693e-05,
558
+ 3.0670181914052204e-05,
559
+ 2.948378592143422e-05,
560
+ 2.8297389928816243e-05,
561
+ 2.711099393619826e-05,
562
+ 2.5924597943580284e-05,
563
+ 2.4738201950962303e-05,
564
+ 2.3551805958344316e-05,
565
+ 2.1392565251779595e-05,
566
+ 2.020616925916161e-05,
567
+ 1.901977326654364e-05,
568
+ 1.783337727392566e-05,
569
+ 1.6646981281307675e-05,
570
+ 1.546058528868969e-05,
571
+ 1.427418929607171e-05,
572
+ 1.2114948589506984e-05,
573
+ 1.0928552596889013e-05,
574
+ 9.742156604271029e-06,
575
+ 8.555760611653046e-06,
576
+ 7.369364619035064e-06,
577
+ 6.182968626417082e-06,
578
+ 4.996572633799099e-06
579
+ ],
580
+ "emb_lr": [],
581
+ "eval_step": [
582
+ 350,
583
+ 741,
584
+ 1132,
585
+ 1523,
586
+ 1914,
587
+ 2305,
588
+ 2696,
589
+ 3087,
590
+ 3478,
591
+ 3869
592
+ ],
593
+ "eval_accuracy": [
594
+ 0.01,
595
+ 0.0,
596
+ 0.01,
597
+ 0.12,
598
+ 0.26,
599
+ 0.6,
600
+ 0.59,
601
+ 0.76,
602
+ 0.78,
603
+ 0.83
604
+ ]
605
+ },
606
+ "final_accuracy": 0.6191666666666666,
607
+ "sft_eval": {
608
+ "config": {
609
+ "ops": "add_sub",
610
+ "K": null,
611
+ "mode": "sft",
612
+ "n_digits": 6,
613
+ "n_per_split": 100
614
+ },
615
+ "splits": {
616
+ "add_S0": {
617
+ "full_accuracy": 0.82,
618
+ "n_examples": 100,
619
+ "per_subtask": {
620
+ "SA": {
621
+ "accuracy": 0.9735537190082645,
622
+ "count": 605
623
+ },
624
+ "SS": {
625
+ "accuracy": 0.9473684210526315,
626
+ "count": 95
627
+ }
628
+ }
629
+ },
630
+ "add_S1": {
631
+ "full_accuracy": 0.83,
632
+ "n_examples": 100,
633
+ "per_subtask": {
634
+ "SA": {
635
+ "accuracy": 0.9754901960784313,
636
+ "count": 204
637
+ },
638
+ "SC": {
639
+ "accuracy": 0.9881656804733728,
640
+ "count": 169
641
+ },
642
+ "SS": {
643
+ "accuracy": 1.0,
644
+ "count": 31
645
+ },
646
+ "UC": {
647
+ "accuracy": 0.9628378378378378,
648
+ "count": 296
649
+ }
650
+ }
651
+ },
652
+ "add_S2": {
653
+ "full_accuracy": 0.59,
654
+ "n_examples": 100,
655
+ "per_subtask": {
656
+ "SA": {
657
+ "accuracy": 0.9815950920245399,
658
+ "count": 163
659
+ },
660
+ "SC": {
661
+ "accuracy": 0.9384615384615385,
662
+ "count": 130
663
+ },
664
+ "SS": {
665
+ "accuracy": 0.9425287356321839,
666
+ "count": 87
667
+ },
668
+ "UC": {
669
+ "accuracy": 0.8669950738916257,
670
+ "count": 203
671
+ },
672
+ "US": {
673
+ "accuracy": 0.9145299145299145,
674
+ "count": 117
675
+ }
676
+ }
677
+ },
678
+ "add_S3": {
679
+ "full_accuracy": 0.38,
680
+ "n_examples": 100,
681
+ "per_subtask": {
682
+ "SA": {
683
+ "accuracy": 0.9586776859504132,
684
+ "count": 121
685
+ },
686
+ "SC": {
687
+ "accuracy": 0.9752066115702479,
688
+ "count": 121
689
+ },
690
+ "SS": {
691
+ "accuracy": 0.9387755102040817,
692
+ "count": 49
693
+ },
694
+ "UC": {
695
+ "accuracy": 0.7204301075268817,
696
+ "count": 186
697
+ },
698
+ "US": {
699
+ "accuracy": 0.7982062780269058,
700
+ "count": 223
701
+ }
702
+ }
703
+ },
704
+ "add_S4": {
705
+ "full_accuracy": 0.31,
706
+ "n_examples": 100,
707
+ "per_subtask": {
708
+ "SA": {
709
+ "accuracy": 0.9807692307692307,
710
+ "count": 104
711
+ },
712
+ "SC": {
713
+ "accuracy": 0.9716981132075472,
714
+ "count": 106
715
+ },
716
+ "SS": {
717
+ "accuracy": 1.0,
718
+ "count": 23
719
+ },
720
+ "UC": {
721
+ "accuracy": 0.63125,
722
+ "count": 160
723
+ },
724
+ "US": {
725
+ "accuracy": 0.6742671009771987,
726
+ "count": 307
727
+ }
728
+ }
729
+ },
730
+ "add_S5": {
731
+ "full_accuracy": 0.28,
732
+ "n_examples": 100,
733
+ "per_subtask": {
734
+ "SA": {
735
+ "accuracy": 1.0,
736
+ "count": 100
737
+ },
738
+ "SC": {
739
+ "accuracy": 1.0,
740
+ "count": 100
741
+ },
742
+ "UC": {
743
+ "accuracy": 0.46,
744
+ "count": 100
745
+ },
746
+ "US": {
747
+ "accuracy": 0.51,
748
+ "count": 400
749
+ }
750
+ }
751
+ },
752
+ "add_S6": {
753
+ "full_accuracy": 0.41,
754
+ "n_examples": 100,
755
+ "per_subtask": {
756
+ "SC": {
757
+ "accuracy": 1.0,
758
+ "count": 100
759
+ },
760
+ "UC": {
761
+ "accuracy": 0.44,
762
+ "count": 100
763
+ },
764
+ "US": {
765
+ "accuracy": 0.582,
766
+ "count": 500
767
+ }
768
+ }
769
+ },
770
+ "add_random": {
771
+ "full_accuracy": 0.785,
772
+ "n_examples": 200,
773
+ "per_subtask": {
774
+ "SA": {
775
+ "accuracy": 0.9753914988814317,
776
+ "count": 447
777
+ },
778
+ "SC": {
779
+ "accuracy": 0.98125,
780
+ "count": 320
781
+ },
782
+ "SS": {
783
+ "accuracy": 0.9821428571428571,
784
+ "count": 56
785
+ },
786
+ "UC": {
787
+ "accuracy": 0.9489603024574669,
788
+ "count": 529
789
+ },
790
+ "US": {
791
+ "accuracy": 0.7708333333333334,
792
+ "count": 48
793
+ }
794
+ }
795
+ },
796
+ "add_C3": {
797
+ "full_accuracy": 0.56,
798
+ "n_examples": 100,
799
+ "per_subtask": {
800
+ "SA": {
801
+ "accuracy": 0.9966666666666667,
802
+ "count": 300
803
+ },
804
+ "SC": {
805
+ "accuracy": 1.0,
806
+ "count": 100
807
+ },
808
+ "UC": {
809
+ "accuracy": 0.772020725388601,
810
+ "count": 193
811
+ },
812
+ "US": {
813
+ "accuracy": 0.8037383177570093,
814
+ "count": 107
815
+ }
816
+ }
817
+ },
818
+ "add_C4": {
819
+ "full_accuracy": 0.54,
820
+ "n_examples": 100,
821
+ "per_subtask": {
822
+ "SA": {
823
+ "accuracy": 1.0,
824
+ "count": 200
825
+ },
826
+ "SC": {
827
+ "accuracy": 1.0,
828
+ "count": 100
829
+ },
830
+ "UC": {
831
+ "accuracy": 0.8359375,
832
+ "count": 256
833
+ },
834
+ "US": {
835
+ "accuracy": 0.8611111111111112,
836
+ "count": 144
837
+ }
838
+ }
839
+ },
840
+ "add_C5": {
841
+ "full_accuracy": 0.4,
842
+ "n_examples": 100,
843
+ "per_subtask": {
844
+ "SA": {
845
+ "accuracy": 1.0,
846
+ "count": 100
847
+ },
848
+ "SC": {
849
+ "accuracy": 1.0,
850
+ "count": 100
851
+ },
852
+ "UC": {
853
+ "accuracy": 0.803921568627451,
854
+ "count": 306
855
+ },
856
+ "US": {
857
+ "accuracy": 0.7731958762886598,
858
+ "count": 194
859
+ }
860
+ }
861
+ },
862
+ "add_C6": {
863
+ "full_accuracy": 0.52,
864
+ "n_examples": 100,
865
+ "per_subtask": {
866
+ "SC": {
867
+ "accuracy": 1.0,
868
+ "count": 100
869
+ },
870
+ "UC": {
871
+ "accuracy": 0.8688524590163934,
872
+ "count": 366
873
+ },
874
+ "US": {
875
+ "accuracy": 0.8675213675213675,
876
+ "count": 234
877
+ }
878
+ }
879
+ },
880
+ "sub_M0": {
881
+ "full_accuracy": 0.9,
882
+ "n_examples": 100,
883
+ "per_subtask": {
884
+ "MD": {
885
+ "accuracy": 0.9850249584026622,
886
+ "count": 601
887
+ },
888
+ "ME": {
889
+ "accuracy": 0.9797979797979798,
890
+ "count": 99
891
+ }
892
+ }
893
+ },
894
+ "sub_M1": {
895
+ "full_accuracy": 0.9,
896
+ "n_examples": 100,
897
+ "per_subtask": {
898
+ "MD": {
899
+ "accuracy": 0.989247311827957,
900
+ "count": 279
901
+ },
902
+ "MB": {
903
+ "accuracy": 0.9793103448275862,
904
+ "count": 145
905
+ },
906
+ "ME": {
907
+ "accuracy": 0.9583333333333334,
908
+ "count": 24
909
+ },
910
+ "UB": {
911
+ "accuracy": 0.9880952380952381,
912
+ "count": 252
913
+ }
914
+ }
915
+ },
916
+ "sub_M2": {
917
+ "full_accuracy": 0.65,
918
+ "n_examples": 100,
919
+ "per_subtask": {
920
+ "MD": {
921
+ "accuracy": 0.9953051643192489,
922
+ "count": 213
923
+ },
924
+ "MB": {
925
+ "accuracy": 1.0,
926
+ "count": 113
927
+ },
928
+ "ME": {
929
+ "accuracy": 0.9764705882352941,
930
+ "count": 85
931
+ },
932
+ "UB": {
933
+ "accuracy": 0.8176795580110497,
934
+ "count": 181
935
+ },
936
+ "UD": {
937
+ "accuracy": 0.9537037037037037,
938
+ "count": 108
939
+ }
940
+ }
941
+ },
942
+ "sub_M3": {
943
+ "full_accuracy": 0.24,
944
+ "n_examples": 100,
945
+ "per_subtask": {
946
+ "MD": {
947
+ "accuracy": 1.0,
948
+ "count": 179
949
+ },
950
+ "MB": {
951
+ "accuracy": 0.9902912621359223,
952
+ "count": 103
953
+ },
954
+ "ME": {
955
+ "accuracy": 1.0,
956
+ "count": 56
957
+ },
958
+ "UB": {
959
+ "accuracy": 0.5234899328859061,
960
+ "count": 149
961
+ },
962
+ "UD": {
963
+ "accuracy": 0.7934272300469484,
964
+ "count": 213
965
+ }
966
+ }
967
+ },
968
+ "sub_M4": {
969
+ "full_accuracy": 0.01,
970
+ "n_examples": 100,
971
+ "per_subtask": {
972
+ "MD": {
973
+ "accuracy": 1.0,
974
+ "count": 200
975
+ },
976
+ "MB": {
977
+ "accuracy": 0.99,
978
+ "count": 100
979
+ },
980
+ "UB": {
981
+ "accuracy": 0.39,
982
+ "count": 100
983
+ },
984
+ "UD": {
985
+ "accuracy": 0.43666666666666665,
986
+ "count": 300
987
+ }
988
+ }
989
+ },
990
+ "sub_M5": {
991
+ "full_accuracy": 0.0,
992
+ "n_examples": 100,
993
+ "per_subtask": {
994
+ "MD": {
995
+ "accuracy": 1.0,
996
+ "count": 100
997
+ },
998
+ "MB": {
999
+ "accuracy": 1.0,
1000
+ "count": 100
1001
+ },
1002
+ "UB": {
1003
+ "accuracy": 0.39,
1004
+ "count": 100
1005
+ },
1006
+ "UD": {
1007
+ "accuracy": 0.275,
1008
+ "count": 400
1009
+ }
1010
+ }
1011
+ },
1012
+ "sub_random": {
1013
+ "full_accuracy": 0.81,
1014
+ "n_examples": 200,
1015
+ "per_subtask": {
1016
+ "MD": {
1017
+ "accuracy": 0.9933333333333333,
1018
+ "count": 600
1019
+ },
1020
+ "MB": {
1021
+ "accuracy": 0.9625468164794008,
1022
+ "count": 267
1023
+ },
1024
+ "ME": {
1025
+ "accuracy": 0.9622641509433962,
1026
+ "count": 53
1027
+ },
1028
+ "UB": {
1029
+ "accuracy": 0.9476082004555809,
1030
+ "count": 439
1031
+ },
1032
+ "UD": {
1033
+ "accuracy": 0.975609756097561,
1034
+ "count": 41
1035
+ }
1036
+ }
1037
+ },
1038
+ "sub_B3": {
1039
+ "full_accuracy": 0.62,
1040
+ "n_examples": 100,
1041
+ "per_subtask": {
1042
+ "MD": {
1043
+ "accuracy": 0.9966666666666667,
1044
+ "count": 300
1045
+ },
1046
+ "MB": {
1047
+ "accuracy": 1.0,
1048
+ "count": 100
1049
+ },
1050
+ "UB": {
1051
+ "accuracy": 0.8274111675126904,
1052
+ "count": 197
1053
+ },
1054
+ "UD": {
1055
+ "accuracy": 0.7669902912621359,
1056
+ "count": 103
1057
+ }
1058
+ }
1059
+ },
1060
+ "sub_B4": {
1061
+ "full_accuracy": 0.38,
1062
+ "n_examples": 100,
1063
+ "per_subtask": {
1064
+ "MD": {
1065
+ "accuracy": 1.0,
1066
+ "count": 200
1067
+ },
1068
+ "MB": {
1069
+ "accuracy": 1.0,
1070
+ "count": 100
1071
+ },
1072
+ "UB": {
1073
+ "accuracy": 0.7732793522267206,
1074
+ "count": 247
1075
+ },
1076
+ "UD": {
1077
+ "accuracy": 0.7450980392156863,
1078
+ "count": 153
1079
+ }
1080
+ }
1081
+ },
1082
+ "sub_B5": {
1083
+ "full_accuracy": 0.37,
1084
+ "n_examples": 100,
1085
+ "per_subtask": {
1086
+ "MD": {
1087
+ "accuracy": 1.0,
1088
+ "count": 100
1089
+ },
1090
+ "MB": {
1091
+ "accuracy": 1.0,
1092
+ "count": 100
1093
+ },
1094
+ "UB": {
1095
+ "accuracy": 0.785234899328859,
1096
+ "count": 298
1097
+ },
1098
+ "UD": {
1099
+ "accuracy": 0.7524752475247525,
1100
+ "count": 202
1101
+ }
1102
+ }
1103
+ }
1104
+ },
1105
+ "summary": {
1106
+ "overall_accuracy": 0.5375,
1107
+ "total_examples": 2400,
1108
+ "n_splits": 22
1109
+ }
1110
+ },
1111
+ "sorl_eval": {
1112
+ "config": {
1113
+ "ops": "add_sub",
1114
+ "K": 4,
1115
+ "mode": "sorl",
1116
+ "n_digits": 6,
1117
+ "n_per_split": 100
1118
+ },
1119
+ "splits": {
1120
+ "add_S0": {
1121
+ "full_accuracy": 0.97,
1122
+ "n_examples": 100,
1123
+ "per_subtask": {
1124
+ "SA": {
1125
+ "accuracy": 0.9950413223140496,
1126
+ "count": 605
1127
+ },
1128
+ "SS": {
1129
+ "accuracy": 1.0,
1130
+ "count": 95
1131
+ }
1132
+ }
1133
+ },
1134
+ "add_S1": {
1135
+ "full_accuracy": 0.9,
1136
+ "n_examples": 100,
1137
+ "per_subtask": {
1138
+ "SA": {
1139
+ "accuracy": 0.9852941176470589,
1140
+ "count": 204
1141
+ },
1142
+ "SC": {
1143
+ "accuracy": 0.9822485207100592,
1144
+ "count": 169
1145
+ },
1146
+ "SS": {
1147
+ "accuracy": 1.0,
1148
+ "count": 31
1149
+ },
1150
+ "UC": {
1151
+ "accuracy": 0.9864864864864865,
1152
+ "count": 296
1153
+ }
1154
+ }
1155
+ },
1156
+ "add_S2": {
1157
+ "full_accuracy": 0.76,
1158
+ "n_examples": 100,
1159
+ "per_subtask": {
1160
+ "SA": {
1161
+ "accuracy": 0.9815950920245399,
1162
+ "count": 163
1163
+ },
1164
+ "SC": {
1165
+ "accuracy": 0.9615384615384616,
1166
+ "count": 130
1167
+ },
1168
+ "SS": {
1169
+ "accuracy": 0.9425287356321839,
1170
+ "count": 87
1171
+ },
1172
+ "UC": {
1173
+ "accuracy": 0.9261083743842364,
1174
+ "count": 203
1175
+ },
1176
+ "US": {
1177
+ "accuracy": 1.0,
1178
+ "count": 117
1179
+ }
1180
+ }
1181
+ },
1182
+ "add_S3": {
1183
+ "full_accuracy": 0.57,
1184
+ "n_examples": 100,
1185
+ "per_subtask": {
1186
+ "SA": {
1187
+ "accuracy": 0.9917355371900827,
1188
+ "count": 121
1189
+ },
1190
+ "SC": {
1191
+ "accuracy": 0.9834710743801653,
1192
+ "count": 121
1193
+ },
1194
+ "SS": {
1195
+ "accuracy": 0.9591836734693877,
1196
+ "count": 49
1197
+ },
1198
+ "UC": {
1199
+ "accuracy": 0.7956989247311828,
1200
+ "count": 186
1201
+ },
1202
+ "US": {
1203
+ "accuracy": 0.9192825112107623,
1204
+ "count": 223
1205
+ }
1206
+ }
1207
+ },
1208
+ "add_S4": {
1209
+ "full_accuracy": 0.38,
1210
+ "n_examples": 100,
1211
+ "per_subtask": {
1212
+ "SA": {
1213
+ "accuracy": 1.0,
1214
+ "count": 104
1215
+ },
1216
+ "SC": {
1217
+ "accuracy": 1.0,
1218
+ "count": 106
1219
+ },
1220
+ "SS": {
1221
+ "accuracy": 1.0,
1222
+ "count": 23
1223
+ },
1224
+ "UC": {
1225
+ "accuracy": 0.68125,
1226
+ "count": 160
1227
+ },
1228
+ "US": {
1229
+ "accuracy": 0.7654723127035831,
1230
+ "count": 307
1231
+ }
1232
+ }
1233
+ },
1234
+ "add_S5": {
1235
+ "full_accuracy": 0.19,
1236
+ "n_examples": 100,
1237
+ "per_subtask": {
1238
+ "SA": {
1239
+ "accuracy": 1.0,
1240
+ "count": 100
1241
+ },
1242
+ "SC": {
1243
+ "accuracy": 1.0,
1244
+ "count": 100
1245
+ },
1246
+ "UC": {
1247
+ "accuracy": 0.32,
1248
+ "count": 100
1249
+ },
1250
+ "US": {
1251
+ "accuracy": 0.505,
1252
+ "count": 400
1253
+ }
1254
+ }
1255
+ },
1256
+ "add_S6": {
1257
+ "full_accuracy": 0.38,
1258
+ "n_examples": 100,
1259
+ "per_subtask": {
1260
+ "SC": {
1261
+ "accuracy": 1.0,
1262
+ "count": 100
1263
+ },
1264
+ "UC": {
1265
+ "accuracy": 0.59,
1266
+ "count": 100
1267
+ },
1268
+ "US": {
1269
+ "accuracy": 0.634,
1270
+ "count": 500
1271
+ }
1272
+ }
1273
+ },
1274
+ "add_random": {
1275
+ "full_accuracy": 0.885,
1276
+ "n_examples": 200,
1277
+ "per_subtask": {
1278
+ "SA": {
1279
+ "accuracy": 0.9865771812080537,
1280
+ "count": 447
1281
+ },
1282
+ "SC": {
1283
+ "accuracy": 0.98125,
1284
+ "count": 320
1285
+ },
1286
+ "SS": {
1287
+ "accuracy": 0.9642857142857143,
1288
+ "count": 56
1289
+ },
1290
+ "UC": {
1291
+ "accuracy": 0.9792060491493384,
1292
+ "count": 529
1293
+ },
1294
+ "US": {
1295
+ "accuracy": 0.9375,
1296
+ "count": 48
1297
+ }
1298
+ }
1299
+ },
1300
+ "add_C3": {
1301
+ "full_accuracy": 0.71,
1302
+ "n_examples": 100,
1303
+ "per_subtask": {
1304
+ "SA": {
1305
+ "accuracy": 1.0,
1306
+ "count": 300
1307
+ },
1308
+ "SC": {
1309
+ "accuracy": 1.0,
1310
+ "count": 100
1311
+ },
1312
+ "UC": {
1313
+ "accuracy": 0.8497409326424871,
1314
+ "count": 193
1315
+ },
1316
+ "US": {
1317
+ "accuracy": 0.9345794392523364,
1318
+ "count": 107
1319
+ }
1320
+ }
1321
+ },
1322
+ "add_C4": {
1323
+ "full_accuracy": 0.76,
1324
+ "n_examples": 100,
1325
+ "per_subtask": {
1326
+ "SA": {
1327
+ "accuracy": 1.0,
1328
+ "count": 200
1329
+ },
1330
+ "SC": {
1331
+ "accuracy": 1.0,
1332
+ "count": 100
1333
+ },
1334
+ "UC": {
1335
+ "accuracy": 0.91015625,
1336
+ "count": 256
1337
+ },
1338
+ "US": {
1339
+ "accuracy": 0.9236111111111112,
1340
+ "count": 144
1341
+ }
1342
+ }
1343
+ },
1344
+ "add_C5": {
1345
+ "full_accuracy": 0.54,
1346
+ "n_examples": 100,
1347
+ "per_subtask": {
1348
+ "SA": {
1349
+ "accuracy": 1.0,
1350
+ "count": 100
1351
+ },
1352
+ "SC": {
1353
+ "accuracy": 1.0,
1354
+ "count": 100
1355
+ },
1356
+ "UC": {
1357
+ "accuracy": 0.8594771241830066,
1358
+ "count": 306
1359
+ },
1360
+ "US": {
1361
+ "accuracy": 0.8402061855670103,
1362
+ "count": 194
1363
+ }
1364
+ }
1365
+ },
1366
+ "add_C6": {
1367
+ "full_accuracy": 0.68,
1368
+ "n_examples": 100,
1369
+ "per_subtask": {
1370
+ "SC": {
1371
+ "accuracy": 1.0,
1372
+ "count": 100
1373
+ },
1374
+ "UC": {
1375
+ "accuracy": 0.9098360655737705,
1376
+ "count": 366
1377
+ },
1378
+ "US": {
1379
+ "accuracy": 0.9487179487179487,
1380
+ "count": 234
1381
+ }
1382
+ }
1383
+ },
1384
+ "sub_M0": {
1385
+ "full_accuracy": 0.98,
1386
+ "n_examples": 100,
1387
+ "per_subtask": {
1388
+ "MD": {
1389
+ "accuracy": 0.9966722129783694,
1390
+ "count": 601
1391
+ },
1392
+ "ME": {
1393
+ "accuracy": 1.0,
1394
+ "count": 99
1395
+ }
1396
+ }
1397
+ },
1398
+ "sub_M1": {
1399
+ "full_accuracy": 0.97,
1400
+ "n_examples": 100,
1401
+ "per_subtask": {
1402
+ "MD": {
1403
+ "accuracy": 1.0,
1404
+ "count": 279
1405
+ },
1406
+ "MB": {
1407
+ "accuracy": 0.9862068965517241,
1408
+ "count": 145
1409
+ },
1410
+ "ME": {
1411
+ "accuracy": 0.9583333333333334,
1412
+ "count": 24
1413
+ },
1414
+ "UB": {
1415
+ "accuracy": 1.0,
1416
+ "count": 252
1417
+ }
1418
+ }
1419
+ },
1420
+ "sub_M2": {
1421
+ "full_accuracy": 0.63,
1422
+ "n_examples": 100,
1423
+ "per_subtask": {
1424
+ "MD": {
1425
+ "accuracy": 0.9906103286384976,
1426
+ "count": 213
1427
+ },
1428
+ "MB": {
1429
+ "accuracy": 0.9911504424778761,
1430
+ "count": 113
1431
+ },
1432
+ "ME": {
1433
+ "accuracy": 1.0,
1434
+ "count": 85
1435
+ },
1436
+ "UB": {
1437
+ "accuracy": 0.8121546961325967,
1438
+ "count": 181
1439
+ },
1440
+ "UD": {
1441
+ "accuracy": 1.0,
1442
+ "count": 108
1443
+ }
1444
+ }
1445
+ },
1446
+ "sub_M3": {
1447
+ "full_accuracy": 0.2,
1448
+ "n_examples": 100,
1449
+ "per_subtask": {
1450
+ "MD": {
1451
+ "accuracy": 0.994413407821229,
1452
+ "count": 179
1453
+ },
1454
+ "MB": {
1455
+ "accuracy": 0.9902912621359223,
1456
+ "count": 103
1457
+ },
1458
+ "ME": {
1459
+ "accuracy": 1.0,
1460
+ "count": 56
1461
+ },
1462
+ "UB": {
1463
+ "accuracy": 0.4966442953020134,
1464
+ "count": 149
1465
+ },
1466
+ "UD": {
1467
+ "accuracy": 0.8075117370892019,
1468
+ "count": 213
1469
+ }
1470
+ }
1471
+ },
1472
+ "sub_M4": {
1473
+ "full_accuracy": 0.06,
1474
+ "n_examples": 100,
1475
+ "per_subtask": {
1476
+ "MD": {
1477
+ "accuracy": 1.0,
1478
+ "count": 200
1479
+ },
1480
+ "MB": {
1481
+ "accuracy": 1.0,
1482
+ "count": 100
1483
+ },
1484
+ "UB": {
1485
+ "accuracy": 0.25,
1486
+ "count": 100
1487
+ },
1488
+ "UD": {
1489
+ "accuracy": 0.5,
1490
+ "count": 300
1491
+ }
1492
+ }
1493
+ },
1494
+ "sub_M5": {
1495
+ "full_accuracy": 0.08,
1496
+ "n_examples": 100,
1497
+ "per_subtask": {
1498
+ "MD": {
1499
+ "accuracy": 1.0,
1500
+ "count": 100
1501
+ },
1502
+ "MB": {
1503
+ "accuracy": 1.0,
1504
+ "count": 100
1505
+ },
1506
+ "UB": {
1507
+ "accuracy": 0.22,
1508
+ "count": 100
1509
+ },
1510
+ "UD": {
1511
+ "accuracy": 0.425,
1512
+ "count": 400
1513
+ }
1514
+ }
1515
+ },
1516
+ "sub_random": {
1517
+ "full_accuracy": 0.87,
1518
+ "n_examples": 200,
1519
+ "per_subtask": {
1520
+ "MD": {
1521
+ "accuracy": 0.9933333333333333,
1522
+ "count": 600
1523
+ },
1524
+ "MB": {
1525
+ "accuracy": 0.9887640449438202,
1526
+ "count": 267
1527
+ },
1528
+ "ME": {
1529
+ "accuracy": 1.0,
1530
+ "count": 53
1531
+ },
1532
+ "UB": {
1533
+ "accuracy": 0.9567198177676538,
1534
+ "count": 439
1535
+ },
1536
+ "UD": {
1537
+ "accuracy": 0.975609756097561,
1538
+ "count": 41
1539
+ }
1540
+ }
1541
+ },
1542
+ "sub_B3": {
1543
+ "full_accuracy": 0.67,
1544
+ "n_examples": 100,
1545
+ "per_subtask": {
1546
+ "MD": {
1547
+ "accuracy": 0.9933333333333333,
1548
+ "count": 300
1549
+ },
1550
+ "MB": {
1551
+ "accuracy": 1.0,
1552
+ "count": 100
1553
+ },
1554
+ "UB": {
1555
+ "accuracy": 0.8629441624365483,
1556
+ "count": 197
1557
+ },
1558
+ "UD": {
1559
+ "accuracy": 0.8349514563106796,
1560
+ "count": 103
1561
+ }
1562
+ }
1563
+ },
1564
+ "sub_B4": {
1565
+ "full_accuracy": 0.55,
1566
+ "n_examples": 100,
1567
+ "per_subtask": {
1568
+ "MD": {
1569
+ "accuracy": 1.0,
1570
+ "count": 200
1571
+ },
1572
+ "MB": {
1573
+ "accuracy": 1.0,
1574
+ "count": 100
1575
+ },
1576
+ "UB": {
1577
+ "accuracy": 0.8259109311740891,
1578
+ "count": 247
1579
+ },
1580
+ "UD": {
1581
+ "accuracy": 0.8169934640522876,
1582
+ "count": 153
1583
+ }
1584
+ }
1585
+ },
1586
+ "sub_B5": {
1587
+ "full_accuracy": 0.38,
1588
+ "n_examples": 100,
1589
+ "per_subtask": {
1590
+ "MD": {
1591
+ "accuracy": 1.0,
1592
+ "count": 100
1593
+ },
1594
+ "MB": {
1595
+ "accuracy": 1.0,
1596
+ "count": 100
1597
+ },
1598
+ "UB": {
1599
+ "accuracy": 0.7986577181208053,
1600
+ "count": 298
1601
+ },
1602
+ "UD": {
1603
+ "accuracy": 0.7574257425742574,
1604
+ "count": 202
1605
+ }
1606
+ }
1607
+ }
1608
+ },
1609
+ "summary": {
1610
+ "overall_accuracy": 0.6191666666666666,
1611
+ "total_examples": 2400,
1612
+ "n_splits": 22
1613
+ }
1614
+ },
1615
+ "sorl_overall_accuracy": 0.6191666666666666,
1616
+ "sft_overall_accuracy": 0.5375
1617
+ }
add_sub_sorl_v1_abs10_25K/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c44e7d1362819ab8f5d7105db96ac5f8cd27c038610420dcca40684cc135fc24
3
+ size 650303660
add_sub_sorl_v1_abs10_25K/train_config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_rollouts": 4,
3
+ "K": 4,
4
+ "max_iterations": 2,
5
+ "memory_span_abs": 1792,
6
+ "memory_span_traj": 1792,
7
+ "temperature": 1.0,
8
+ "ar_search": false,
9
+ "response_only_abs": false,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "alpha_ortho": 0.0,
14
+ "alpha_anchor": 0.0,
15
+ "alpha_jacobi": 0.0,
16
+ "decay": 0.8,
17
+ "target_vocab_util": 0.8,
18
+ "min_abs_ppl": 0.0,
19
+ "zipf_alpha": 1.0,
20
+ "lr": 4e-05,
21
+ "emb_lr_mult": 1.0,
22
+ "weight_decay": 0.01,
23
+ "warmup_steps": 117,
24
+ "cooldown_frac": 0.4,
25
+ "max_grad_norm": 1.0,
26
+ "vq_abs_pretrain_steps": 0,
27
+ "vq_abs_pretrain_lr": 0.001,
28
+ "vq_abs_pretrain_layer": -1,
29
+ "vq_abs_pretrain_batch_size": 256,
30
+ "vq_abs_pretrain_target_vectors": 20000,
31
+ "batch_size": 64,
32
+ "gradient_accumulation_steps": 1,
33
+ "num_epochs": 10,
34
+ "emb_warmup_steps": 0,
35
+ "log_every": 50,
36
+ "eval_every": 390,
37
+ "save_every": 999999,
38
+ "eval_samples": 100,
39
+ "output_dir": "ckpt/sweep/as_sorl_abs10_K4_25K",
40
+ "eval_K": 4,
41
+ "alpha_traj": 0.0,
42
+ "corrupt_method": "shuffle",
43
+ "corrupt_ratio": 0.3,
44
+ "alpha_contrastive": 1.0,
45
+ "gamma_contrastive": 0.5,
46
+ "alpha_masked_traj": 0.0,
47
+ "mask_nl_ratio": 0.3,
48
+ "mask_nl_mode": "fixed",
49
+ "mask_nl_fixed_id": 0,
50
+ "use_ste": true,
51
+ "n_inner": 1,
52
+ "random_K": null,
53
+ "strip_suffix": null,
54
+ "compress_prefix": null,
55
+ "random_mem_span": null,
56
+ "warmup_ratio": 0.03,
57
+ "beta2": 0.999,
58
+ "seed": 42,
59
+ "n_digits": 6,
60
+ "n_layer": 2,
61
+ "n_head": 3,
62
+ "n_embd": 510,
63
+ "ops": "add_sub",
64
+ "abs_vocab": 10,
65
+ "dataset_size": 25000,
66
+ "mode": "sorl",
67
+ "device": "cuda",
68
+ "push_to_hub": true,
69
+ "no_wandb": false,
70
+ "n_params": 162499262,
71
+ "run_name": "add_sub_sorl_v1_abs10_25K",
72
+ "git_commit": "78d46f8665a87f4b44bd5894bd34f393f2dea51f",
73
+ "timestamp": "2026-04-12T08:59:10.961931+00:00",
74
+ "tokenizer": "Qwen/Qwen3-0.6B",
75
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
+ "dataset_config": "add_sub_6digit",
77
+ "model_repo": "thoughtworks/arithmetic-sorl",
78
+ "trainer_version": "v1",
79
+ "wandb_run_id": "pmvjbi05",
80
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/pmvjbi05",
81
+ "final_accuracy": 0.6191666666666666,
82
+ "sft_accuracy": 0.5375,
83
+ "eval_method": "ArithmeticEvaluator"
84
+ }