amirali1985 commited on
Commit
f9d6435
·
verified ·
1 Parent(s): fd60996

Upload add_sub_sorl_v1_abs10_K1_25K_1L3H510d

Browse files
add_sub_sorl_v1_abs10_K1_25K_1L3H510d/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention"
17
+ ],
18
+ "max_position_embeddings": 128,
19
+ "max_window_layers": 28,
20
+ "model_type": "qwen3",
21
+ "num_attention_heads": 3,
22
+ "num_hidden_layers": 1,
23
+ "num_key_value_heads": 3,
24
+ "pad_token_id": null,
25
+ "rms_norm_eps": 1e-06,
26
+ "rope_parameters": {
27
+ "rope_theta": 10000.0,
28
+ "rope_type": "default"
29
+ },
30
+ "sliding_window": null,
31
+ "tie_word_embeddings": false,
32
+ "transformers_version": "5.5.0",
33
+ "use_cache": true,
34
+ "use_sliding_window": false,
35
+ "vocab_size": 151654
36
+ }
add_sub_sorl_v1_abs10_K1_25K_1L3H510d/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs10_K1_25K_1L3H510d/metrics.json ADDED
@@ -0,0 +1,1617 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 200,
8
+ 250,
9
+ 300,
10
+ 350,
11
+ 441,
12
+ 491,
13
+ 541,
14
+ 591,
15
+ 641,
16
+ 691,
17
+ 741,
18
+ 832,
19
+ 882,
20
+ 932,
21
+ 982,
22
+ 1032,
23
+ 1082,
24
+ 1132,
25
+ 1223,
26
+ 1273,
27
+ 1323,
28
+ 1373,
29
+ 1423,
30
+ 1473,
31
+ 1523,
32
+ 1614,
33
+ 1664,
34
+ 1714,
35
+ 1764,
36
+ 1814,
37
+ 1864,
38
+ 1914,
39
+ 2005,
40
+ 2055,
41
+ 2105,
42
+ 2155,
43
+ 2205,
44
+ 2255,
45
+ 2305,
46
+ 2396,
47
+ 2446,
48
+ 2496,
49
+ 2546,
50
+ 2596,
51
+ 2646,
52
+ 2696,
53
+ 2787,
54
+ 2837,
55
+ 2887,
56
+ 2937,
57
+ 2987,
58
+ 3037,
59
+ 3087,
60
+ 3178,
61
+ 3228,
62
+ 3278,
63
+ 3328,
64
+ 3378,
65
+ 3428,
66
+ 3478,
67
+ 3569,
68
+ 3619,
69
+ 3669,
70
+ 3719,
71
+ 3769,
72
+ 3819,
73
+ 3869
74
+ ],
75
+ "loss": [
76
+ -2.59329891204834,
77
+ 7.806329250335693,
78
+ 4.891926288604736,
79
+ 3.912823438644409,
80
+ 3.36497163772583,
81
+ 3.413069248199463,
82
+ 3.1604514122009277,
83
+ 3.0855469703674316,
84
+ 2.895873546600342,
85
+ 2.8154397010803223,
86
+ 2.8238906860351562,
87
+ 2.5073657035827637,
88
+ 1.2554247379302979,
89
+ -0.46714138984680176,
90
+ -5.484283447265625,
91
+ -6.567154884338379,
92
+ -6.758439064025879,
93
+ -7.20958137512207,
94
+ -8.60386848449707,
95
+ -9.11019515991211,
96
+ -9.290766716003418,
97
+ -9.791807174682617,
98
+ -10.01004695892334,
99
+ -10.105368614196777,
100
+ -9.49570083618164,
101
+ -10.431818008422852,
102
+ -10.326833724975586,
103
+ -10.126893997192383,
104
+ -10.160858154296875,
105
+ -10.941141128540039,
106
+ -10.631235122680664,
107
+ -10.644250869750977,
108
+ -10.591297149658203,
109
+ -11.348092079162598,
110
+ -11.023357391357422,
111
+ -11.110491752624512,
112
+ -11.359291076660156,
113
+ -11.153099060058594,
114
+ -10.987231254577637,
115
+ -11.629621505737305,
116
+ -10.848411560058594,
117
+ -11.752678871154785,
118
+ -11.179244995117188,
119
+ -11.764345169067383,
120
+ -11.807055473327637,
121
+ -11.234673500061035,
122
+ -12.306229591369629,
123
+ -11.79053783416748,
124
+ -12.127859115600586,
125
+ -11.52390193939209,
126
+ -11.549543380737305,
127
+ -13.031827926635742,
128
+ -12.10395336151123,
129
+ -11.681001663208008,
130
+ -12.33035945892334,
131
+ -12.12756633758545,
132
+ -12.05741024017334,
133
+ -12.84146785736084,
134
+ -12.221896171569824,
135
+ -12.554245948791504,
136
+ -12.93739128112793,
137
+ -12.630762100219727,
138
+ -12.096063613891602,
139
+ -12.651780128479004,
140
+ -13.059272766113281,
141
+ -12.436356544494629,
142
+ -12.79887580871582,
143
+ -12.472254753112793,
144
+ -11.984771728515625,
145
+ -13.45839786529541
146
+ ],
147
+ "base_loss": [
148
+ 10.196748733520508,
149
+ 6.539699077606201,
150
+ 4.092434406280518,
151
+ 2.346903085708618,
152
+ 2.0746912956237793,
153
+ 1.9594690799713135,
154
+ 1.8837043046951294,
155
+ 1.8864774703979492,
156
+ 1.8490782976150513,
157
+ 1.7932301759719849,
158
+ 1.7640596628189087,
159
+ 1.8651959896087646,
160
+ 1.8547264337539673,
161
+ 1.886150598526001,
162
+ 1.9799963235855103,
163
+ 1.80038583278656,
164
+ 1.8168869018554688,
165
+ 1.7473572492599487,
166
+ 1.8770685195922852,
167
+ 1.8492628335952759,
168
+ 1.8485273122787476,
169
+ 1.834829330444336,
170
+ 1.843698263168335,
171
+ 1.8508756160736084,
172
+ 1.804511308670044,
173
+ 1.85122549533844,
174
+ 1.8213146924972534,
175
+ 1.8022531270980835,
176
+ 1.8216105699539185,
177
+ 1.8896723985671997,
178
+ 1.7682313919067383,
179
+ 1.851951003074646,
180
+ 1.8244448900222778,
181
+ 1.860870122909546,
182
+ 1.844719648361206,
183
+ 1.8215335607528687,
184
+ 1.8681211471557617,
185
+ 1.7685410976409912,
186
+ 1.77374267578125,
187
+ 1.83017098903656,
188
+ 1.780069351196289,
189
+ 1.8695632219314575,
190
+ 1.803731918334961,
191
+ 1.794657588005066,
192
+ 1.8132097721099854,
193
+ 1.744333028793335,
194
+ 1.850972056388855,
195
+ 1.79466712474823,
196
+ 1.865286111831665,
197
+ 1.775697112083435,
198
+ 1.831557273864746,
199
+ 1.8576253652572632,
200
+ 1.7820489406585693,
201
+ 1.7218695878982544,
202
+ 1.7814966440200806,
203
+ 1.771523356437683,
204
+ 1.748187780380249,
205
+ 1.839253544807434,
206
+ 1.7425659894943237,
207
+ 1.7759649753570557,
208
+ 1.8560504913330078,
209
+ 1.794585108757019,
210
+ 1.7335294485092163,
211
+ 1.806968092918396,
212
+ 1.8292748928070068,
213
+ 1.7885210514068604,
214
+ 1.8298585414886475,
215
+ 1.7961997985839844,
216
+ 1.723584532737732,
217
+ 1.8568757772445679
218
+ ],
219
+ "info_loss": [
220
+ -2.1446304321289062,
221
+ -0.32575225830078125,
222
+ -0.1383652687072754,
223
+ -0.03788328170776367,
224
+ -0.06099653244018555,
225
+ -0.043364644050598145,
226
+ -0.060524702072143555,
227
+ -0.06769096851348877,
228
+ -0.08283662796020508,
229
+ -0.08506667613983154,
230
+ -0.08107435703277588,
231
+ -0.12192976474761963,
232
+ -0.23974978923797607,
233
+ -0.40476346015930176,
234
+ -0.9116840362548828,
235
+ -1.002124309539795,
236
+ -1.0228478908538818,
237
+ -1.060563087463379,
238
+ -1.2133022546768188,
239
+ -1.2610199451446533,
240
+ -1.2785308361053467,
241
+ -1.326535940170288,
242
+ -1.3484688997268677,
243
+ -1.3588534593582153,
244
+ -1.2928407192230225,
245
+ -1.3912382125854492,
246
+ -1.3773154020309448,
247
+ -1.3551386594772339,
248
+ -1.3596680164337158,
249
+ -1.44418203830719,
250
+ -1.3997809886932373,
251
+ -1.4097068309783936,
252
+ -1.400420069694519,
253
+ -1.4767446517944336,
254
+ -1.4412634372711182,
255
+ -1.4422601461410522,
256
+ -1.468010663986206,
257
+ -1.437137484550476,
258
+ -1.421332597732544,
259
+ -1.4890285730361938,
260
+ -1.4050776958465576,
261
+ -1.5023729801177979,
262
+ -1.4319555759429932,
263
+ -1.4917011260986328,
264
+ -1.4962141513824463,
265
+ -1.4313055276870728,
266
+ -1.5444698333740234,
267
+ -1.487035870552063,
268
+ -1.5295355319976807,
269
+ -1.4528287649154663,
270
+ -1.4570941925048828,
271
+ -1.6042754650115967,
272
+ -1.5032556056976318,
273
+ -1.4498255252838135,
274
+ -1.5202281475067139,
275
+ -1.4980217218399048,
276
+ -1.488149881362915,
277
+ -1.574017882347107,
278
+ -1.498759150505066,
279
+ -1.5380897521972656,
280
+ -1.5861152410507202,
281
+ -1.5452888011932373,
282
+ -1.4880478382110596,
283
+ -1.5506173372268677,
284
+ -1.5903542041778564,
285
+ -1.5227900743484497,
286
+ -1.5617271661758423,
287
+ -1.5277729034423828,
288
+ -1.4715402126312256,
289
+ -1.62960946559906
290
+ ],
291
+ "abs_loss": [
292
+ 2.2833845615386963,
293
+ 2.1349236965179443,
294
+ 1.895667552947998,
295
+ 1.8790363073349,
296
+ 1.8763422966003418,
297
+ 1.863279938697815,
298
+ 1.8707689046859741,
299
+ 1.8497987985610962,
300
+ 1.8467472791671753,
301
+ 1.8501476049423218,
302
+ 1.8510236740112305,
303
+ 1.7979803085327148,
304
+ 1.6018939018249512,
305
+ 1.4351223707199097,
306
+ 1.3627341985702515,
307
+ 1.3827515840530396,
308
+ 1.378796935081482,
309
+ 1.3818906545639038,
310
+ 1.3893027305603027,
311
+ 1.376089096069336,
312
+ 1.3647276163101196,
313
+ 1.341958999633789,
314
+ 1.3429460525512695,
315
+ 1.2983181476593018,
316
+ 1.3058513402938843,
317
+ 1.3161550760269165,
318
+ 1.269581913948059,
319
+ 1.2819956541061401,
320
+ 1.225730061531067,
321
+ 1.2145700454711914,
322
+ 1.2128896713256836,
323
+ 1.181650161743164,
324
+ 1.1704578399658203,
325
+ 1.100685477256775,
326
+ 1.1276469230651855,
327
+ 1.0256637334823608,
328
+ 0.9807112812995911,
329
+ 0.9855987429618835,
330
+ 1.0340598821640015,
331
+ 0.962256133556366,
332
+ 0.9177753925323486,
333
+ 0.923234760761261,
334
+ 0.9172429442405701,
335
+ 0.9422297477722168,
336
+ 0.9557555317878723,
337
+ 0.959925651550293,
338
+ 0.8680426478385925,
339
+ 0.8737877011299133,
340
+ 0.9473252296447754,
341
+ 0.8711683750152588,
342
+ 0.8707416653633118,
343
+ 0.7836803793907166,
344
+ 0.847979724407196,
345
+ 0.7654786109924316,
346
+ 0.793886661529541,
347
+ 0.7679941058158875,
348
+ 0.7534409165382385,
349
+ 0.7690866589546204,
350
+ 0.7134169936180115,
351
+ 0.7436110973358154,
352
+ 0.743655264377594,
353
+ 0.6757664084434509,
354
+ 0.6782504916191101,
355
+ 0.684099018573761,
356
+ 0.6790602207183838,
357
+ 0.6778237223625183,
358
+ 0.6263924241065979,
359
+ 0.6698463559150696,
360
+ 0.650520384311676,
361
+ 0.6476595401763916
362
+ ],
363
+ "zipf_loss": [
364
+ 8.427918434143066,
365
+ 4.310660362243652,
366
+ 1.9935778379440308,
367
+ 1.7568495273590088,
368
+ 1.7126115560531616,
369
+ 1.7009185552597046,
370
+ 1.6949173212051392,
371
+ 1.6909992694854736,
372
+ 1.6904866695404053,
373
+ 1.687861680984497,
374
+ 1.6854722499847412,
375
+ 1.6816692352294922,
376
+ 1.6380068063735962,
377
+ 1.550830364227295,
378
+ 1.5162874460220337,
379
+ 1.5154277086257935,
380
+ 1.5152742862701416,
381
+ 1.510502576828003,
382
+ 1.513155460357666,
383
+ 1.513131856918335,
384
+ 1.5095428228378296,
385
+ 1.5045267343521118,
386
+ 1.4966490268707275,
387
+ 1.5024579763412476,
388
+ 1.4976110458374023,
389
+ 1.4977227449417114,
390
+ 1.4980478286743164,
391
+ 1.4940409660339355,
392
+ 1.49163818359375,
393
+ 1.4895493984222412,
394
+ 1.4770550727844238,
395
+ 1.4827024936676025,
396
+ 1.4714136123657227,
397
+ 1.4484158754348755,
398
+ 1.4317924976348877,
399
+ 1.3880096673965454,
400
+ 1.354623794555664,
401
+ 1.3511745929718018,
402
+ 1.3489454984664917,
403
+ 1.3342679738998413,
404
+ 1.3305190801620483,
405
+ 1.309164047241211,
406
+ 1.2448537349700928,
407
+ 1.2637850046157837,
408
+ 1.2463005781173706,
409
+ 1.2380549907684326,
410
+ 1.2006921768188477,
411
+ 1.1977745294570923,
412
+ 1.2074775695800781,
413
+ 1.141572117805481,
414
+ 1.1027672290802002,
415
+ 1.0749335289001465,
416
+ 1.061755895614624,
417
+ 1.0188356637954712,
418
+ 1.0110366344451904,
419
+ 1.0043277740478516,
420
+ 1.0005559921264648,
421
+ 0.9825484752655029,
422
+ 0.9517877101898193,
423
+ 0.9763262271881104,
424
+ 0.9933456182479858,
425
+ 0.959964394569397,
426
+ 0.9830599427223206,
427
+ 0.9790149927139282,
428
+ 0.9470869898796082,
429
+ 0.935240626335144,
430
+ 0.9258978962898254,
431
+ 0.9422906637191772,
432
+ 0.9419942498207092,
433
+ 0.9160555005073547
434
+ ],
435
+ "denoise_loss": [],
436
+ "ortho_loss": [
437
+ 0.7092215418815613,
438
+ 0.46743276715278625,
439
+ 0.372333288192749,
440
+ 0.3163504898548126,
441
+ 0.29026153683662415,
442
+ 0.2792986333370209,
443
+ 0.27686527371406555,
444
+ 0.262327641248703,
445
+ 0.24570715427398682,
446
+ 0.2475493848323822,
447
+ 0.24840138852596283,
448
+ 0.25525224208831787,
449
+ 0.27400314807891846,
450
+ 0.2834039628505707,
451
+ 0.28120777010917664,
452
+ 0.2767947316169739,
453
+ 0.2838645875453949,
454
+ 0.26357901096343994,
455
+ 0.24860280752182007,
456
+ 0.2444341778755188,
457
+ 0.2471916675567627,
458
+ 0.2405933439731598,
459
+ 0.24468615651130676,
460
+ 0.24482561647891998,
461
+ 0.240675151348114,
462
+ 0.24134141206741333,
463
+ 0.24261823296546936,
464
+ 0.24529020488262177,
465
+ 0.24550136923789978,
466
+ 0.24657639861106873,
467
+ 0.24326443672180176,
468
+ 0.24152472615242004,
469
+ 0.24578434228897095,
470
+ 0.24196375906467438,
471
+ 0.24180641770362854,
472
+ 0.24166396260261536,
473
+ 0.23602785170078278,
474
+ 0.2411782592535019,
475
+ 0.23615865409374237,
476
+ 0.23971572518348694,
477
+ 0.23798082768917084,
478
+ 0.2350902259349823,
479
+ 0.23152871429920197,
480
+ 0.23074424266815186,
481
+ 0.22700950503349304,
482
+ 0.2275175005197525,
483
+ 0.23137938976287842,
484
+ 0.22892071306705475,
485
+ 0.22994598746299744,
486
+ 0.2339673489332199,
487
+ 0.23359638452529907,
488
+ 0.23373164236545563,
489
+ 0.23107723891735077,
490
+ 0.2339753359556198,
491
+ 0.23272015154361725,
492
+ 0.2336493581533432,
493
+ 0.233825221657753,
494
+ 0.2339244931936264,
495
+ 0.235467329621315,
496
+ 0.2342061847448349,
497
+ 0.23358942568302155,
498
+ 0.23396462202072144,
499
+ 0.23547381162643433,
500
+ 0.23512768745422363,
501
+ 0.23589524626731873,
502
+ 0.2354237139225006,
503
+ 0.2354665845632553,
504
+ 0.23491325974464417,
505
+ 0.23564963042736053,
506
+ 0.2355274111032486
507
+ ],
508
+ "lr": [
509
+ 1.6752136752136756e-05,
510
+ 3.384615384615385e-05,
511
+ 4e-05,
512
+ 4e-05,
513
+ 4e-05,
514
+ 4e-05,
515
+ 4e-05,
516
+ 4e-05,
517
+ 4e-05,
518
+ 4e-05,
519
+ 4e-05,
520
+ 4e-05,
521
+ 4e-05,
522
+ 4e-05,
523
+ 4e-05,
524
+ 4e-05,
525
+ 4e-05,
526
+ 4e-05,
527
+ 4e-05,
528
+ 4e-05,
529
+ 4e-05,
530
+ 4e-05,
531
+ 4e-05,
532
+ 4e-05,
533
+ 4e-05,
534
+ 4e-05,
535
+ 4e-05,
536
+ 4e-05,
537
+ 4e-05,
538
+ 4e-05,
539
+ 4e-05,
540
+ 4e-05,
541
+ 4e-05,
542
+ 4e-05,
543
+ 4e-05,
544
+ 4e-05,
545
+ 4e-05,
546
+ 4e-05,
547
+ 4e-05,
548
+ 4e-05,
549
+ 4e-05,
550
+ 4e-05,
551
+ 3.9947798576324814e-05,
552
+ 3.8761402583706826e-05,
553
+ 3.757500659108885e-05,
554
+ 3.6388610598470864e-05,
555
+ 3.5202214605852884e-05,
556
+ 3.401581861323491e-05,
557
+ 3.282942262061693e-05,
558
+ 3.0670181914052204e-05,
559
+ 2.948378592143422e-05,
560
+ 2.8297389928816243e-05,
561
+ 2.711099393619826e-05,
562
+ 2.5924597943580284e-05,
563
+ 2.4738201950962303e-05,
564
+ 2.3551805958344316e-05,
565
+ 2.1392565251779595e-05,
566
+ 2.020616925916161e-05,
567
+ 1.901977326654364e-05,
568
+ 1.783337727392566e-05,
569
+ 1.6646981281307675e-05,
570
+ 1.546058528868969e-05,
571
+ 1.427418929607171e-05,
572
+ 1.2114948589506984e-05,
573
+ 1.0928552596889013e-05,
574
+ 9.742156604271029e-06,
575
+ 8.555760611653046e-06,
576
+ 7.369364619035064e-06,
577
+ 6.182968626417082e-06,
578
+ 4.996572633799099e-06
579
+ ],
580
+ "emb_lr": [],
581
+ "eval_step": [
582
+ 350,
583
+ 741,
584
+ 1132,
585
+ 1523,
586
+ 1914,
587
+ 2305,
588
+ 2696,
589
+ 3087,
590
+ 3478,
591
+ 3869
592
+ ],
593
+ "eval_accuracy": [
594
+ 0.01,
595
+ 0.0,
596
+ 0.0,
597
+ 0.0,
598
+ 0.0,
599
+ 0.0,
600
+ 0.0,
601
+ 0.0,
602
+ 0.0,
603
+ 0.0
604
+ ]
605
+ },
606
+ "final_accuracy": 0.28291666666666665,
607
+ "sft_eval": {
608
+ "config": {
609
+ "ops": "add_sub",
610
+ "K": null,
611
+ "mode": "sft",
612
+ "n_digits": 6,
613
+ "n_per_split": 100
614
+ },
615
+ "splits": {
616
+ "add_S0": {
617
+ "full_accuracy": 0.0,
618
+ "n_examples": 100,
619
+ "per_subtask": {
620
+ "SA": {
621
+ "accuracy": 0.2066115702479339,
622
+ "count": 605
623
+ },
624
+ "SS": {
625
+ "accuracy": 1.0,
626
+ "count": 95
627
+ }
628
+ }
629
+ },
630
+ "add_S1": {
631
+ "full_accuracy": 0.0,
632
+ "n_examples": 100,
633
+ "per_subtask": {
634
+ "SA": {
635
+ "accuracy": 0.2647058823529412,
636
+ "count": 204
637
+ },
638
+ "SC": {
639
+ "accuracy": 0.11242603550295859,
640
+ "count": 169
641
+ },
642
+ "SS": {
643
+ "accuracy": 0.6451612903225806,
644
+ "count": 31
645
+ },
646
+ "UC": {
647
+ "accuracy": 0.21621621621621623,
648
+ "count": 296
649
+ }
650
+ }
651
+ },
652
+ "add_S2": {
653
+ "full_accuracy": 0.0,
654
+ "n_examples": 100,
655
+ "per_subtask": {
656
+ "SA": {
657
+ "accuracy": 0.3987730061349693,
658
+ "count": 163
659
+ },
660
+ "SC": {
661
+ "accuracy": 0.13846153846153847,
662
+ "count": 130
663
+ },
664
+ "SS": {
665
+ "accuracy": 0.3448275862068966,
666
+ "count": 87
667
+ },
668
+ "UC": {
669
+ "accuracy": 0.35960591133004927,
670
+ "count": 203
671
+ },
672
+ "US": {
673
+ "accuracy": 0.6324786324786325,
674
+ "count": 117
675
+ }
676
+ }
677
+ },
678
+ "add_S3": {
679
+ "full_accuracy": 0.0,
680
+ "n_examples": 100,
681
+ "per_subtask": {
682
+ "SA": {
683
+ "accuracy": 0.48760330578512395,
684
+ "count": 121
685
+ },
686
+ "SC": {
687
+ "accuracy": 0.04132231404958678,
688
+ "count": 121
689
+ },
690
+ "SS": {
691
+ "accuracy": 0.6530612244897959,
692
+ "count": 49
693
+ },
694
+ "UC": {
695
+ "accuracy": 0.3548387096774194,
696
+ "count": 186
697
+ },
698
+ "US": {
699
+ "accuracy": 0.57847533632287,
700
+ "count": 223
701
+ }
702
+ }
703
+ },
704
+ "add_S4": {
705
+ "full_accuracy": 0.0,
706
+ "n_examples": 100,
707
+ "per_subtask": {
708
+ "SA": {
709
+ "accuracy": 0.47115384615384615,
710
+ "count": 104
711
+ },
712
+ "SC": {
713
+ "accuracy": 0.10377358490566038,
714
+ "count": 106
715
+ },
716
+ "SS": {
717
+ "accuracy": 0.5652173913043478,
718
+ "count": 23
719
+ },
720
+ "UC": {
721
+ "accuracy": 0.3875,
722
+ "count": 160
723
+ },
724
+ "US": {
725
+ "accuracy": 0.5700325732899023,
726
+ "count": 307
727
+ }
728
+ }
729
+ },
730
+ "add_S5": {
731
+ "full_accuracy": 0.0,
732
+ "n_examples": 100,
733
+ "per_subtask": {
734
+ "SA": {
735
+ "accuracy": 0.46,
736
+ "count": 100
737
+ },
738
+ "SC": {
739
+ "accuracy": 0.01,
740
+ "count": 100
741
+ },
742
+ "UC": {
743
+ "accuracy": 0.18,
744
+ "count": 100
745
+ },
746
+ "US": {
747
+ "accuracy": 0.09,
748
+ "count": 400
749
+ }
750
+ }
751
+ },
752
+ "add_S6": {
753
+ "full_accuracy": 0.02,
754
+ "n_examples": 100,
755
+ "per_subtask": {
756
+ "SC": {
757
+ "accuracy": 0.02,
758
+ "count": 100
759
+ },
760
+ "UC": {
761
+ "accuracy": 0.65,
762
+ "count": 100
763
+ },
764
+ "US": {
765
+ "accuracy": 0.648,
766
+ "count": 500
767
+ }
768
+ }
769
+ },
770
+ "add_random": {
771
+ "full_accuracy": 0.0,
772
+ "n_examples": 200,
773
+ "per_subtask": {
774
+ "SA": {
775
+ "accuracy": 0.2639821029082774,
776
+ "count": 447
777
+ },
778
+ "SC": {
779
+ "accuracy": 0.1125,
780
+ "count": 320
781
+ },
782
+ "SS": {
783
+ "accuracy": 0.5178571428571429,
784
+ "count": 56
785
+ },
786
+ "UC": {
787
+ "accuracy": 0.23062381852551986,
788
+ "count": 529
789
+ },
790
+ "US": {
791
+ "accuracy": 0.5625,
792
+ "count": 48
793
+ }
794
+ }
795
+ },
796
+ "add_C3": {
797
+ "full_accuracy": 0.0,
798
+ "n_examples": 100,
799
+ "per_subtask": {
800
+ "SA": {
801
+ "accuracy": 0.25,
802
+ "count": 300
803
+ },
804
+ "SC": {
805
+ "accuracy": 0.08,
806
+ "count": 100
807
+ },
808
+ "UC": {
809
+ "accuracy": 0.11398963730569948,
810
+ "count": 193
811
+ },
812
+ "US": {
813
+ "accuracy": 0.14953271028037382,
814
+ "count": 107
815
+ }
816
+ }
817
+ },
818
+ "add_C4": {
819
+ "full_accuracy": 0.0,
820
+ "n_examples": 100,
821
+ "per_subtask": {
822
+ "SA": {
823
+ "accuracy": 0.355,
824
+ "count": 200
825
+ },
826
+ "SC": {
827
+ "accuracy": 0.03,
828
+ "count": 100
829
+ },
830
+ "UC": {
831
+ "accuracy": 0.10546875,
832
+ "count": 256
833
+ },
834
+ "US": {
835
+ "accuracy": 0.24305555555555555,
836
+ "count": 144
837
+ }
838
+ }
839
+ },
840
+ "add_C5": {
841
+ "full_accuracy": 0.0,
842
+ "n_examples": 100,
843
+ "per_subtask": {
844
+ "SA": {
845
+ "accuracy": 0.5,
846
+ "count": 100
847
+ },
848
+ "SC": {
849
+ "accuracy": 0.09,
850
+ "count": 100
851
+ },
852
+ "UC": {
853
+ "accuracy": 0.13725490196078433,
854
+ "count": 306
855
+ },
856
+ "US": {
857
+ "accuracy": 0.3556701030927835,
858
+ "count": 194
859
+ }
860
+ }
861
+ },
862
+ "add_C6": {
863
+ "full_accuracy": 0.0,
864
+ "n_examples": 100,
865
+ "per_subtask": {
866
+ "SC": {
867
+ "accuracy": 0.12,
868
+ "count": 100
869
+ },
870
+ "UC": {
871
+ "accuracy": 0.2540983606557377,
872
+ "count": 366
873
+ },
874
+ "US": {
875
+ "accuracy": 0.8632478632478633,
876
+ "count": 234
877
+ }
878
+ }
879
+ },
880
+ "sub_M0": {
881
+ "full_accuracy": 0.0,
882
+ "n_examples": 100,
883
+ "per_subtask": {
884
+ "MD": {
885
+ "accuracy": 0.20465890183028287,
886
+ "count": 601
887
+ },
888
+ "ME": {
889
+ "accuracy": 1.0,
890
+ "count": 99
891
+ }
892
+ }
893
+ },
894
+ "sub_M1": {
895
+ "full_accuracy": 0.0,
896
+ "n_examples": 100,
897
+ "per_subtask": {
898
+ "MD": {
899
+ "accuracy": 0.3835125448028674,
900
+ "count": 279
901
+ },
902
+ "MB": {
903
+ "accuracy": 0.0,
904
+ "count": 145
905
+ },
906
+ "ME": {
907
+ "accuracy": 1.0,
908
+ "count": 24
909
+ },
910
+ "UB": {
911
+ "accuracy": 0.09523809523809523,
912
+ "count": 252
913
+ }
914
+ }
915
+ },
916
+ "sub_M2": {
917
+ "full_accuracy": 0.0,
918
+ "n_examples": 100,
919
+ "per_subtask": {
920
+ "MD": {
921
+ "accuracy": 0.6150234741784038,
922
+ "count": 213
923
+ },
924
+ "MB": {
925
+ "accuracy": 0.0,
926
+ "count": 113
927
+ },
928
+ "ME": {
929
+ "accuracy": 1.0,
930
+ "count": 85
931
+ },
932
+ "UB": {
933
+ "accuracy": 0.16574585635359115,
934
+ "count": 181
935
+ },
936
+ "UD": {
937
+ "accuracy": 0.0,
938
+ "count": 108
939
+ }
940
+ }
941
+ },
942
+ "sub_M3": {
943
+ "full_accuracy": 0.0,
944
+ "n_examples": 100,
945
+ "per_subtask": {
946
+ "MD": {
947
+ "accuracy": 0.7597765363128491,
948
+ "count": 179
949
+ },
950
+ "MB": {
951
+ "accuracy": 0.0,
952
+ "count": 103
953
+ },
954
+ "ME": {
955
+ "accuracy": 1.0,
956
+ "count": 56
957
+ },
958
+ "UB": {
959
+ "accuracy": 0.12080536912751678,
960
+ "count": 149
961
+ },
962
+ "UD": {
963
+ "accuracy": 0.0,
964
+ "count": 213
965
+ }
966
+ }
967
+ },
968
+ "sub_M4": {
969
+ "full_accuracy": 0.0,
970
+ "n_examples": 100,
971
+ "per_subtask": {
972
+ "MD": {
973
+ "accuracy": 0.5,
974
+ "count": 200
975
+ },
976
+ "MB": {
977
+ "accuracy": 0.0,
978
+ "count": 100
979
+ },
980
+ "UB": {
981
+ "accuracy": 0.3,
982
+ "count": 100
983
+ },
984
+ "UD": {
985
+ "accuracy": 0.0,
986
+ "count": 300
987
+ }
988
+ }
989
+ },
990
+ "sub_M5": {
991
+ "full_accuracy": 0.0,
992
+ "n_examples": 100,
993
+ "per_subtask": {
994
+ "MD": {
995
+ "accuracy": 1.0,
996
+ "count": 100
997
+ },
998
+ "MB": {
999
+ "accuracy": 0.0,
1000
+ "count": 100
1001
+ },
1002
+ "UB": {
1003
+ "accuracy": 0.31,
1004
+ "count": 100
1005
+ },
1006
+ "UD": {
1007
+ "accuracy": 0.0,
1008
+ "count": 400
1009
+ }
1010
+ }
1011
+ },
1012
+ "sub_random": {
1013
+ "full_accuracy": 0.0,
1014
+ "n_examples": 200,
1015
+ "per_subtask": {
1016
+ "MD": {
1017
+ "accuracy": 0.3616666666666667,
1018
+ "count": 600
1019
+ },
1020
+ "MB": {
1021
+ "accuracy": 0.0,
1022
+ "count": 267
1023
+ },
1024
+ "ME": {
1025
+ "accuracy": 1.0,
1026
+ "count": 53
1027
+ },
1028
+ "UB": {
1029
+ "accuracy": 0.12072892938496584,
1030
+ "count": 439
1031
+ },
1032
+ "UD": {
1033
+ "accuracy": 0.0,
1034
+ "count": 41
1035
+ }
1036
+ }
1037
+ },
1038
+ "sub_B3": {
1039
+ "full_accuracy": 0.0,
1040
+ "n_examples": 100,
1041
+ "per_subtask": {
1042
+ "MD": {
1043
+ "accuracy": 0.3333333333333333,
1044
+ "count": 300
1045
+ },
1046
+ "MB": {
1047
+ "accuracy": 0.0,
1048
+ "count": 100
1049
+ },
1050
+ "UB": {
1051
+ "accuracy": 0.17766497461928935,
1052
+ "count": 197
1053
+ },
1054
+ "UD": {
1055
+ "accuracy": 0.0,
1056
+ "count": 103
1057
+ }
1058
+ }
1059
+ },
1060
+ "sub_B4": {
1061
+ "full_accuracy": 0.0,
1062
+ "n_examples": 100,
1063
+ "per_subtask": {
1064
+ "MD": {
1065
+ "accuracy": 0.5,
1066
+ "count": 200
1067
+ },
1068
+ "MB": {
1069
+ "accuracy": 0.0,
1070
+ "count": 100
1071
+ },
1072
+ "UB": {
1073
+ "accuracy": 0.145748987854251,
1074
+ "count": 247
1075
+ },
1076
+ "UD": {
1077
+ "accuracy": 0.0,
1078
+ "count": 153
1079
+ }
1080
+ }
1081
+ },
1082
+ "sub_B5": {
1083
+ "full_accuracy": 0.0,
1084
+ "n_examples": 100,
1085
+ "per_subtask": {
1086
+ "MD": {
1087
+ "accuracy": 1.0,
1088
+ "count": 100
1089
+ },
1090
+ "MB": {
1091
+ "accuracy": 0.0,
1092
+ "count": 100
1093
+ },
1094
+ "UB": {
1095
+ "accuracy": 0.11073825503355705,
1096
+ "count": 298
1097
+ },
1098
+ "UD": {
1099
+ "accuracy": 0.0,
1100
+ "count": 202
1101
+ }
1102
+ }
1103
+ }
1104
+ },
1105
+ "summary": {
1106
+ "overall_accuracy": 0.0008333333333333334,
1107
+ "total_examples": 2400,
1108
+ "n_splits": 22
1109
+ }
1110
+ },
1111
+ "sorl_eval": {
1112
+ "config": {
1113
+ "ops": "add_sub",
1114
+ "K": 1,
1115
+ "mode": "sorl",
1116
+ "n_digits": 6,
1117
+ "n_per_split": 100
1118
+ },
1119
+ "splits": {
1120
+ "add_S0": {
1121
+ "full_accuracy": 0.74,
1122
+ "n_examples": 100,
1123
+ "per_subtask": {
1124
+ "SA": {
1125
+ "accuracy": 0.9603305785123967,
1126
+ "count": 605
1127
+ },
1128
+ "SS": {
1129
+ "accuracy": 0.9578947368421052,
1130
+ "count": 95
1131
+ }
1132
+ }
1133
+ },
1134
+ "add_S1": {
1135
+ "full_accuracy": 0.58,
1136
+ "n_examples": 100,
1137
+ "per_subtask": {
1138
+ "SA": {
1139
+ "accuracy": 0.9362745098039216,
1140
+ "count": 204
1141
+ },
1142
+ "SC": {
1143
+ "accuracy": 0.9585798816568047,
1144
+ "count": 169
1145
+ },
1146
+ "SS": {
1147
+ "accuracy": 1.0,
1148
+ "count": 31
1149
+ },
1150
+ "UC": {
1151
+ "accuracy": 0.875,
1152
+ "count": 296
1153
+ }
1154
+ }
1155
+ },
1156
+ "add_S2": {
1157
+ "full_accuracy": 0.35,
1158
+ "n_examples": 100,
1159
+ "per_subtask": {
1160
+ "SA": {
1161
+ "accuracy": 0.9693251533742331,
1162
+ "count": 163
1163
+ },
1164
+ "SC": {
1165
+ "accuracy": 0.9076923076923077,
1166
+ "count": 130
1167
+ },
1168
+ "SS": {
1169
+ "accuracy": 0.9080459770114943,
1170
+ "count": 87
1171
+ },
1172
+ "UC": {
1173
+ "accuracy": 0.7192118226600985,
1174
+ "count": 203
1175
+ },
1176
+ "US": {
1177
+ "accuracy": 0.9316239316239316,
1178
+ "count": 117
1179
+ }
1180
+ }
1181
+ },
1182
+ "add_S3": {
1183
+ "full_accuracy": 0.19,
1184
+ "n_examples": 100,
1185
+ "per_subtask": {
1186
+ "SA": {
1187
+ "accuracy": 0.9834710743801653,
1188
+ "count": 121
1189
+ },
1190
+ "SC": {
1191
+ "accuracy": 0.9504132231404959,
1192
+ "count": 121
1193
+ },
1194
+ "SS": {
1195
+ "accuracy": 0.9183673469387755,
1196
+ "count": 49
1197
+ },
1198
+ "UC": {
1199
+ "accuracy": 0.6612903225806451,
1200
+ "count": 186
1201
+ },
1202
+ "US": {
1203
+ "accuracy": 0.6860986547085202,
1204
+ "count": 223
1205
+ }
1206
+ }
1207
+ },
1208
+ "add_S4": {
1209
+ "full_accuracy": 0.2,
1210
+ "n_examples": 100,
1211
+ "per_subtask": {
1212
+ "SA": {
1213
+ "accuracy": 1.0,
1214
+ "count": 104
1215
+ },
1216
+ "SC": {
1217
+ "accuracy": 0.9433962264150944,
1218
+ "count": 106
1219
+ },
1220
+ "SS": {
1221
+ "accuracy": 1.0,
1222
+ "count": 23
1223
+ },
1224
+ "UC": {
1225
+ "accuracy": 0.64375,
1226
+ "count": 160
1227
+ },
1228
+ "US": {
1229
+ "accuracy": 0.5309446254071661,
1230
+ "count": 307
1231
+ }
1232
+ }
1233
+ },
1234
+ "add_S5": {
1235
+ "full_accuracy": 0.05,
1236
+ "n_examples": 100,
1237
+ "per_subtask": {
1238
+ "SA": {
1239
+ "accuracy": 1.0,
1240
+ "count": 100
1241
+ },
1242
+ "SC": {
1243
+ "accuracy": 0.98,
1244
+ "count": 100
1245
+ },
1246
+ "UC": {
1247
+ "accuracy": 0.36,
1248
+ "count": 100
1249
+ },
1250
+ "US": {
1251
+ "accuracy": 0.3325,
1252
+ "count": 400
1253
+ }
1254
+ }
1255
+ },
1256
+ "add_S6": {
1257
+ "full_accuracy": 0.03,
1258
+ "n_examples": 100,
1259
+ "per_subtask": {
1260
+ "SC": {
1261
+ "accuracy": 1.0,
1262
+ "count": 100
1263
+ },
1264
+ "UC": {
1265
+ "accuracy": 0.28,
1266
+ "count": 100
1267
+ },
1268
+ "US": {
1269
+ "accuracy": 0.298,
1270
+ "count": 500
1271
+ }
1272
+ }
1273
+ },
1274
+ "add_random": {
1275
+ "full_accuracy": 0.575,
1276
+ "n_examples": 200,
1277
+ "per_subtask": {
1278
+ "SA": {
1279
+ "accuracy": 0.9552572706935123,
1280
+ "count": 447
1281
+ },
1282
+ "SC": {
1283
+ "accuracy": 0.9625,
1284
+ "count": 320
1285
+ },
1286
+ "SS": {
1287
+ "accuracy": 0.9821428571428571,
1288
+ "count": 56
1289
+ },
1290
+ "UC": {
1291
+ "accuracy": 0.8657844990548205,
1292
+ "count": 529
1293
+ },
1294
+ "US": {
1295
+ "accuracy": 0.7916666666666666,
1296
+ "count": 48
1297
+ }
1298
+ }
1299
+ },
1300
+ "add_C3": {
1301
+ "full_accuracy": 0.41,
1302
+ "n_examples": 100,
1303
+ "per_subtask": {
1304
+ "SA": {
1305
+ "accuracy": 0.9833333333333333,
1306
+ "count": 300
1307
+ },
1308
+ "SC": {
1309
+ "accuracy": 1.0,
1310
+ "count": 100
1311
+ },
1312
+ "UC": {
1313
+ "accuracy": 0.7150259067357513,
1314
+ "count": 193
1315
+ },
1316
+ "US": {
1317
+ "accuracy": 0.7102803738317757,
1318
+ "count": 107
1319
+ }
1320
+ }
1321
+ },
1322
+ "add_C4": {
1323
+ "full_accuracy": 0.32,
1324
+ "n_examples": 100,
1325
+ "per_subtask": {
1326
+ "SA": {
1327
+ "accuracy": 1.0,
1328
+ "count": 200
1329
+ },
1330
+ "SC": {
1331
+ "accuracy": 1.0,
1332
+ "count": 100
1333
+ },
1334
+ "UC": {
1335
+ "accuracy": 0.71875,
1336
+ "count": 256
1337
+ },
1338
+ "US": {
1339
+ "accuracy": 0.7291666666666666,
1340
+ "count": 144
1341
+ }
1342
+ }
1343
+ },
1344
+ "add_C5": {
1345
+ "full_accuracy": 0.23,
1346
+ "n_examples": 100,
1347
+ "per_subtask": {
1348
+ "SA": {
1349
+ "accuracy": 1.0,
1350
+ "count": 100
1351
+ },
1352
+ "SC": {
1353
+ "accuracy": 0.97,
1354
+ "count": 100
1355
+ },
1356
+ "UC": {
1357
+ "accuracy": 0.696078431372549,
1358
+ "count": 306
1359
+ },
1360
+ "US": {
1361
+ "accuracy": 0.6752577319587629,
1362
+ "count": 194
1363
+ }
1364
+ }
1365
+ },
1366
+ "add_C6": {
1367
+ "full_accuracy": 0.23,
1368
+ "n_examples": 100,
1369
+ "per_subtask": {
1370
+ "SC": {
1371
+ "accuracy": 1.0,
1372
+ "count": 100
1373
+ },
1374
+ "UC": {
1375
+ "accuracy": 0.7486338797814208,
1376
+ "count": 366
1377
+ },
1378
+ "US": {
1379
+ "accuracy": 0.7136752136752137,
1380
+ "count": 234
1381
+ }
1382
+ }
1383
+ },
1384
+ "sub_M0": {
1385
+ "full_accuracy": 0.61,
1386
+ "n_examples": 100,
1387
+ "per_subtask": {
1388
+ "MD": {
1389
+ "accuracy": 0.9234608985024958,
1390
+ "count": 601
1391
+ },
1392
+ "ME": {
1393
+ "accuracy": 0.9797979797979798,
1394
+ "count": 99
1395
+ }
1396
+ }
1397
+ },
1398
+ "sub_M1": {
1399
+ "full_accuracy": 0.45,
1400
+ "n_examples": 100,
1401
+ "per_subtask": {
1402
+ "MD": {
1403
+ "accuracy": 0.9605734767025089,
1404
+ "count": 279
1405
+ },
1406
+ "MB": {
1407
+ "accuracy": 0.9241379310344827,
1408
+ "count": 145
1409
+ },
1410
+ "ME": {
1411
+ "accuracy": 0.875,
1412
+ "count": 24
1413
+ },
1414
+ "UB": {
1415
+ "accuracy": 0.8015873015873016,
1416
+ "count": 252
1417
+ }
1418
+ }
1419
+ },
1420
+ "sub_M2": {
1421
+ "full_accuracy": 0.21,
1422
+ "n_examples": 100,
1423
+ "per_subtask": {
1424
+ "MD": {
1425
+ "accuracy": 0.9530516431924883,
1426
+ "count": 213
1427
+ },
1428
+ "MB": {
1429
+ "accuracy": 0.9380530973451328,
1430
+ "count": 113
1431
+ },
1432
+ "ME": {
1433
+ "accuracy": 0.9764705882352941,
1434
+ "count": 85
1435
+ },
1436
+ "UB": {
1437
+ "accuracy": 0.5524861878453039,
1438
+ "count": 181
1439
+ },
1440
+ "UD": {
1441
+ "accuracy": 0.7685185185185185,
1442
+ "count": 108
1443
+ }
1444
+ }
1445
+ },
1446
+ "sub_M3": {
1447
+ "full_accuracy": 0.07,
1448
+ "n_examples": 100,
1449
+ "per_subtask": {
1450
+ "MD": {
1451
+ "accuracy": 0.9888268156424581,
1452
+ "count": 179
1453
+ },
1454
+ "MB": {
1455
+ "accuracy": 0.941747572815534,
1456
+ "count": 103
1457
+ },
1458
+ "ME": {
1459
+ "accuracy": 1.0,
1460
+ "count": 56
1461
+ },
1462
+ "UB": {
1463
+ "accuracy": 0.4563758389261745,
1464
+ "count": 149
1465
+ },
1466
+ "UD": {
1467
+ "accuracy": 0.4788732394366197,
1468
+ "count": 213
1469
+ }
1470
+ }
1471
+ },
1472
+ "sub_M4": {
1473
+ "full_accuracy": 0.03,
1474
+ "n_examples": 100,
1475
+ "per_subtask": {
1476
+ "MD": {
1477
+ "accuracy": 0.935,
1478
+ "count": 200
1479
+ },
1480
+ "MB": {
1481
+ "accuracy": 0.96,
1482
+ "count": 100
1483
+ },
1484
+ "UB": {
1485
+ "accuracy": 0.34,
1486
+ "count": 100
1487
+ },
1488
+ "UD": {
1489
+ "accuracy": 0.23333333333333334,
1490
+ "count": 300
1491
+ }
1492
+ }
1493
+ },
1494
+ "sub_M5": {
1495
+ "full_accuracy": 0.06,
1496
+ "n_examples": 100,
1497
+ "per_subtask": {
1498
+ "MD": {
1499
+ "accuracy": 1.0,
1500
+ "count": 100
1501
+ },
1502
+ "MB": {
1503
+ "accuracy": 1.0,
1504
+ "count": 100
1505
+ },
1506
+ "UB": {
1507
+ "accuracy": 0.38,
1508
+ "count": 100
1509
+ },
1510
+ "UD": {
1511
+ "accuracy": 0.235,
1512
+ "count": 400
1513
+ }
1514
+ }
1515
+ },
1516
+ "sub_random": {
1517
+ "full_accuracy": 0.32,
1518
+ "n_examples": 200,
1519
+ "per_subtask": {
1520
+ "MD": {
1521
+ "accuracy": 0.935,
1522
+ "count": 600
1523
+ },
1524
+ "MB": {
1525
+ "accuracy": 0.898876404494382,
1526
+ "count": 267
1527
+ },
1528
+ "ME": {
1529
+ "accuracy": 0.9433962264150944,
1530
+ "count": 53
1531
+ },
1532
+ "UB": {
1533
+ "accuracy": 0.7562642369020501,
1534
+ "count": 439
1535
+ },
1536
+ "UD": {
1537
+ "accuracy": 0.7804878048780488,
1538
+ "count": 41
1539
+ }
1540
+ }
1541
+ },
1542
+ "sub_B3": {
1543
+ "full_accuracy": 0.13,
1544
+ "n_examples": 100,
1545
+ "per_subtask": {
1546
+ "MD": {
1547
+ "accuracy": 0.8833333333333333,
1548
+ "count": 300
1549
+ },
1550
+ "MB": {
1551
+ "accuracy": 0.95,
1552
+ "count": 100
1553
+ },
1554
+ "UB": {
1555
+ "accuracy": 0.5482233502538071,
1556
+ "count": 197
1557
+ },
1558
+ "UD": {
1559
+ "accuracy": 0.5533980582524272,
1560
+ "count": 103
1561
+ }
1562
+ }
1563
+ },
1564
+ "sub_B4": {
1565
+ "full_accuracy": 0.09,
1566
+ "n_examples": 100,
1567
+ "per_subtask": {
1568
+ "MD": {
1569
+ "accuracy": 0.95,
1570
+ "count": 200
1571
+ },
1572
+ "MB": {
1573
+ "accuracy": 0.97,
1574
+ "count": 100
1575
+ },
1576
+ "UB": {
1577
+ "accuracy": 0.5546558704453441,
1578
+ "count": 247
1579
+ },
1580
+ "UD": {
1581
+ "accuracy": 0.47058823529411764,
1582
+ "count": 153
1583
+ }
1584
+ }
1585
+ },
1586
+ "sub_B5": {
1587
+ "full_accuracy": 0.04,
1588
+ "n_examples": 100,
1589
+ "per_subtask": {
1590
+ "MD": {
1591
+ "accuracy": 1.0,
1592
+ "count": 100
1593
+ },
1594
+ "MB": {
1595
+ "accuracy": 1.0,
1596
+ "count": 100
1597
+ },
1598
+ "UB": {
1599
+ "accuracy": 0.540268456375839,
1600
+ "count": 298
1601
+ },
1602
+ "UD": {
1603
+ "accuracy": 0.4603960396039604,
1604
+ "count": 202
1605
+ }
1606
+ }
1607
+ }
1608
+ },
1609
+ "summary": {
1610
+ "overall_accuracy": 0.28291666666666665,
1611
+ "total_examples": 2400,
1612
+ "n_splits": 22
1613
+ }
1614
+ },
1615
+ "sorl_overall_accuracy": 0.28291666666666665,
1616
+ "sft_overall_accuracy": 0.0008333333333333334
1617
+ }
add_sub_sorl_v1_abs10_K1_25K_1L3H510d/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83dc63f41a380d62231535ea7934fea7ce172db8bc245f6beae0be7d7b4c8b71
3
+ size 634679036
add_sub_sorl_v1_abs10_K1_25K_1L3H510d/train_config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_rollouts": 4,
3
+ "K": 1,
4
+ "max_iterations": 2,
5
+ "memory_span_abs": 1792,
6
+ "memory_span_traj": 1792,
7
+ "temperature": 1.0,
8
+ "ar_search": false,
9
+ "response_only_abs": false,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "alpha_ortho": 0.0,
14
+ "alpha_anchor": 0.0,
15
+ "alpha_jacobi": 0.0,
16
+ "decay": 0.8,
17
+ "target_vocab_util": 0.8,
18
+ "min_abs_ppl": 0.0,
19
+ "zipf_alpha": 1.0,
20
+ "lr": 4e-05,
21
+ "emb_lr_mult": 1.0,
22
+ "weight_decay": 0.01,
23
+ "warmup_steps": 117,
24
+ "cooldown_frac": 0.4,
25
+ "max_grad_norm": 1.0,
26
+ "vq_abs_pretrain_steps": 0,
27
+ "vq_abs_pretrain_lr": 0.001,
28
+ "vq_abs_pretrain_layer": -1,
29
+ "vq_abs_pretrain_batch_size": 256,
30
+ "vq_abs_pretrain_target_vectors": 20000,
31
+ "batch_size": 64,
32
+ "gradient_accumulation_steps": 1,
33
+ "num_epochs": 10,
34
+ "emb_warmup_steps": 0,
35
+ "log_every": 50,
36
+ "eval_every": 390,
37
+ "save_every": 999999,
38
+ "eval_samples": 100,
39
+ "output_dir": "ckpt/sweep/as_sorl_abs10_K1_25K_1L3H510d",
40
+ "eval_K": 4,
41
+ "alpha_traj": 0.0,
42
+ "corrupt_method": "shuffle",
43
+ "corrupt_ratio": 0.3,
44
+ "alpha_contrastive": 1.0,
45
+ "gamma_contrastive": 0.5,
46
+ "alpha_masked_traj": 0.0,
47
+ "mask_nl_ratio": 0.3,
48
+ "mask_nl_mode": "fixed",
49
+ "mask_nl_fixed_id": 0,
50
+ "use_ste": true,
51
+ "n_inner": 1,
52
+ "random_K": null,
53
+ "strip_suffix": null,
54
+ "compress_prefix": null,
55
+ "random_mem_span": null,
56
+ "warmup_ratio": 0.03,
57
+ "beta2": 0.999,
58
+ "seed": 42,
59
+ "n_digits": 6,
60
+ "n_layer": 1,
61
+ "n_head": 3,
62
+ "n_embd": 510,
63
+ "ops": "add_sub",
64
+ "abs_vocab": 10,
65
+ "dataset_size": 25000,
66
+ "mode": "sorl",
67
+ "device": "cuda",
68
+ "push_to_hub": true,
69
+ "no_wandb": false,
70
+ "n_params": 158593426,
71
+ "run_name": "add_sub_sorl_v1_abs10_K1_25K_1L3H510d",
72
+ "git_commit": "f447da529caceac8c7d256cbb2cd185cbc50feac",
73
+ "timestamp": "2026-04-12T16:20:24.628526+00:00",
74
+ "tokenizer": "Qwen/Qwen3-0.6B",
75
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
+ "dataset_config": "add_sub_6digit",
77
+ "model_repo": "thoughtworks/arithmetic-sorl",
78
+ "trainer_version": "v1",
79
+ "wandb_run_id": "ybv7051n",
80
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/ybv7051n",
81
+ "final_accuracy": 0.28291666666666665,
82
+ "sft_accuracy": 0.0008333333333333334,
83
+ "eval_method": "ArithmeticEvaluator"
84
+ }