amirali1985 commited on
Commit
185c5df
·
verified ·
1 Parent(s): 3756aa5

Upload add_sub_sorl_v1_abs20_K1_10K

Browse files
add_sub_sorl_v1_abs20_K1_10K/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 128,
20
+ "max_window_layers": 28,
21
+ "model_type": "qwen3",
22
+ "num_attention_heads": 3,
23
+ "num_hidden_layers": 2,
24
+ "num_key_value_heads": 3,
25
+ "pad_token_id": null,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_parameters": {
28
+ "rope_theta": 10000.0,
29
+ "rope_type": "default"
30
+ },
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "transformers_version": "5.5.0",
34
+ "use_cache": true,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151664
37
+ }
add_sub_sorl_v1_abs20_K1_10K/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs20_K1_10K/metrics.json ADDED
@@ -0,0 +1,1687 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 207,
8
+ 257,
9
+ 307,
10
+ 364,
11
+ 414,
12
+ 464,
13
+ 521,
14
+ 571,
15
+ 621,
16
+ 678,
17
+ 728,
18
+ 778,
19
+ 835,
20
+ 885,
21
+ 935,
22
+ 992,
23
+ 1042,
24
+ 1092,
25
+ 1149,
26
+ 1199,
27
+ 1249,
28
+ 1306,
29
+ 1356,
30
+ 1406,
31
+ 1463,
32
+ 1513,
33
+ 1563,
34
+ 1620,
35
+ 1670,
36
+ 1720,
37
+ 1777,
38
+ 1827,
39
+ 1877,
40
+ 1934,
41
+ 1984,
42
+ 2034,
43
+ 2091,
44
+ 2141,
45
+ 2191,
46
+ 2248,
47
+ 2298,
48
+ 2348,
49
+ 2405,
50
+ 2455,
51
+ 2505,
52
+ 2562,
53
+ 2612,
54
+ 2662,
55
+ 2719,
56
+ 2769,
57
+ 2819,
58
+ 2876,
59
+ 2926,
60
+ 2976,
61
+ 3033,
62
+ 3083,
63
+ 3133
64
+ ],
65
+ "loss": [
66
+ 6.609847068786621,
67
+ 4.313945770263672,
68
+ 2.8211135864257812,
69
+ 2.6098971366882324,
70
+ 2.8803181648254395,
71
+ 2.4080252647399902,
72
+ 2.382941722869873,
73
+ 0.3024674654006958,
74
+ -4.526322364807129,
75
+ -6.631197452545166,
76
+ -4.271163463592529,
77
+ -2.845456838607788,
78
+ -1.7134591341018677,
79
+ -1.919076681137085,
80
+ -1.5664957761764526,
81
+ -1.3899976015090942,
82
+ -0.7998488545417786,
83
+ -1.1952388286590576,
84
+ -0.8307008743286133,
85
+ -0.5860740542411804,
86
+ -0.6104287505149841,
87
+ -0.8407340049743652,
88
+ -1.2119009494781494,
89
+ -1.0129437446594238,
90
+ -1.313117504119873,
91
+ -1.1106709241867065,
92
+ -1.6603138446807861,
93
+ -1.6090965270996094,
94
+ -1.0733165740966797,
95
+ -1.0066807270050049,
96
+ -1.2794181108474731,
97
+ -0.9843854904174805,
98
+ -1.123490333557129,
99
+ -1.0440940856933594,
100
+ -0.9606919884681702,
101
+ -1.414717674255371,
102
+ -0.8292436599731445,
103
+ -1.2538599967956543,
104
+ -0.919226884841919,
105
+ -0.5217360854148865,
106
+ -1.245757818222046,
107
+ -0.7945197820663452,
108
+ -0.6292770504951477,
109
+ -0.7419818639755249,
110
+ -0.5777878761291504,
111
+ -0.28315025568008423,
112
+ -0.35084664821624756,
113
+ -0.6482582092285156,
114
+ -0.33098843693733215,
115
+ -0.37686067819595337,
116
+ -0.5184847116470337,
117
+ -0.0973658561706543,
118
+ -0.3080046772956848,
119
+ -0.2875412106513977,
120
+ -0.20791345834732056,
121
+ -0.2233448624610901,
122
+ -0.16375748813152313,
123
+ -0.13954374194145203,
124
+ -0.12580332159996033,
125
+ -0.0063927918672561646
126
+ ],
127
+ "base_loss": [
128
+ 7.595933437347412,
129
+ 3.8396108150482178,
130
+ 1.9869134426116943,
131
+ 1.833506464958191,
132
+ 1.7767173051834106,
133
+ 1.8865851163864136,
134
+ 1.8839255571365356,
135
+ 1.7337242364883423,
136
+ 1.8361998796463013,
137
+ 1.6329604387283325,
138
+ 1.2356112003326416,
139
+ 0.9798861742019653,
140
+ 0.7108522653579712,
141
+ 0.6530479788780212,
142
+ 0.5507955551147461,
143
+ 0.5035597085952759,
144
+ 0.3905493915081024,
145
+ 0.356679767370224,
146
+ 0.34553012251853943,
147
+ 0.28289204835891724,
148
+ 0.33556804060935974,
149
+ 0.24587801098823547,
150
+ 0.26413649320602417,
151
+ 0.21149785816669464,
152
+ 0.22125734388828278,
153
+ 0.21185027062892914,
154
+ 0.24059262871742249,
155
+ 0.2523518204689026,
156
+ 0.16996660828590393,
157
+ 0.17189233005046844,
158
+ 0.1756567806005478,
159
+ 0.15790970623493195,
160
+ 0.1541072428226471,
161
+ 0.14071570336818695,
162
+ 0.14612773060798645,
163
+ 0.17851118743419647,
164
+ 0.11484314501285553,
165
+ 0.16218499839305878,
166
+ 0.12426789849996567,
167
+ 0.08009054511785507,
168
+ 0.15919354557991028,
169
+ 0.10112188756465912,
170
+ 0.08774156123399734,
171
+ 0.09424220025539398,
172
+ 0.07499466091394424,
173
+ 0.04172312468290329,
174
+ 0.05625886842608452,
175
+ 0.08554046601057053,
176
+ 0.04787531867623329,
177
+ 0.05242893472313881,
178
+ 0.06812094151973724,
179
+ 0.016909128054976463,
180
+ 0.041645247489213943,
181
+ 0.037941548973321915,
182
+ 0.030796796083450317,
183
+ 0.029599543660879135,
184
+ 0.023928115144371986,
185
+ 0.02092134952545166,
186
+ 0.019392890855669975,
187
+ 0.006476817186921835
188
+ ],
189
+ "info_loss": [
190
+ -0.6565923690795898,
191
+ -0.12502765655517578,
192
+ -0.049420952796936035,
193
+ -0.05284583568572998,
194
+ -0.02037215232849121,
195
+ -0.07843160629272461,
196
+ -0.08011436462402344,
197
+ -0.2734527587890625,
198
+ -0.7668462991714478,
199
+ -0.9567966461181641,
200
+ -0.6803109645843506,
201
+ -0.5087734460830688,
202
+ -0.3593854606151581,
203
+ -0.3683965802192688,
204
+ -0.3137052655220032,
205
+ -0.2846810519695282,
206
+ -0.21488648653030396,
207
+ -0.23822584748268127,
208
+ -0.18467622995376587,
209
+ -0.15311263501644135,
210
+ -0.1545877903699875,
211
+ -0.16041716933250427,
212
+ -0.1945183277130127,
213
+ -0.16658000648021698,
214
+ -0.18946321308612823,
215
+ -0.1700718104839325,
216
+ -0.22037822008132935,
217
+ -0.2142937034368515,
218
+ -0.15489357709884644,
219
+ -0.14260269701480865,
220
+ -0.16857698559761047,
221
+ -0.13929885625839233,
222
+ -0.14750534296035767,
223
+ -0.13791777193546295,
224
+ -0.13023759424686432,
225
+ -0.17602205276489258,
226
+ -0.11079932749271393,
227
+ -0.1599888950586319,
228
+ -0.11851393431425095,
229
+ -0.07508832216262817,
230
+ -0.15482710301876068,
231
+ -0.09955955296754837,
232
+ -0.08393968641757965,
233
+ -0.09361077845096588,
234
+ -0.07459273934364319,
235
+ -0.04118003323674202,
236
+ -0.04896640405058861,
237
+ -0.08194470405578613,
238
+ -0.04743151739239693,
239
+ -0.05198490992188454,
240
+ -0.06646489351987839,
241
+ -0.016638053581118584,
242
+ -0.04144047945737839,
243
+ -0.03774741664528847,
244
+ -0.030647873878479004,
245
+ -0.029441749677062035,
246
+ -0.02379968948662281,
247
+ -0.020802199840545654,
248
+ -0.019249044358730316,
249
+ -0.006381101440638304
250
+ ],
251
+ "abs_loss": [
252
+ 2.9256432056427,
253
+ 2.4958276748657227,
254
+ 2.46895432472229,
255
+ 2.4088761806488037,
256
+ 2.4180774688720703,
257
+ 2.4649078845977783,
258
+ 2.4588210582733154,
259
+ 2.453974723815918,
260
+ 2.4324522018432617,
261
+ 2.415642023086548,
262
+ 2.352257490158081,
263
+ 2.2088191509246826,
264
+ 1.8896418809890747,
265
+ 1.6739274263381958,
266
+ 1.32405686378479,
267
+ 1.0811830759048462,
268
+ 1.0548852682113647,
269
+ 0.758519172668457,
270
+ 0.718182384967804,
271
+ 0.6602980494499207,
272
+ 0.6154454946517944,
273
+ 0.46965107321739197,
274
+ 0.44081011414527893,
275
+ 0.4427615702152252,
276
+ 0.35650837421417236,
277
+ 0.3500964641571045,
278
+ 0.3001129925251007,
279
+ 0.2999572455883026,
280
+ 0.25184276700019836,
281
+ 0.2689480483531952,
282
+ 0.2463725060224533,
283
+ 0.24224205315113068,
284
+ 0.2413814663887024,
285
+ 0.20568279922008514,
286
+ 0.1656782478094101,
287
+ 0.19907349348068237,
288
+ 0.16802407801151276,
289
+ 0.17790837585926056,
290
+ 0.16314257681369781,
291
+ 0.17447948455810547,
292
+ 0.1573050618171692,
293
+ 0.13456518948078156,
294
+ 0.12536819279193878,
295
+ 0.10822665691375732,
296
+ 0.09974483400583267,
297
+ 0.11742156744003296,
298
+ 0.11229091882705688,
299
+ 0.09441050887107849,
300
+ 0.08244535326957703,
301
+ 0.08592605590820312,
302
+ 0.07963317632675171,
303
+ 0.08054837584495544,
304
+ 0.07055239379405975,
305
+ 0.089305579662323,
306
+ 0.07961511611938477,
307
+ 0.08294523507356644,
308
+ 0.0843416154384613,
309
+ 0.07325228303670883,
310
+ 0.07107876986265182,
311
+ 0.08012231439352036
312
+ ],
313
+ "zipf_loss": [
314
+ 5.287272930145264,
315
+ 1.4750287532806396,
316
+ 1.0815143585205078,
317
+ 1.063961386680603,
318
+ 1.0655146837234497,
319
+ 1.059265375137329,
320
+ 1.0542776584625244,
321
+ 1.0578733682632446,
322
+ 1.0626957416534424,
323
+ 1.0622444152832031,
324
+ 1.0611090660095215,
325
+ 1.0415093898773193,
326
+ 0.9805790185928345,
327
+ 0.9444482326507568,
328
+ 0.8873554468154907,
329
+ 0.8451348543167114,
330
+ 0.8529779314994812,
331
+ 0.7544879913330078,
332
+ 0.5987130403518677,
333
+ 0.5961304306983948,
334
+ 0.5383365750312805,
335
+ 0.4705946445465088,
336
+ 0.4250647723674774,
337
+ 0.3970823884010315,
338
+ 0.32460635900497437,
339
+ 0.3431873321533203,
340
+ 0.27286437153816223,
341
+ 0.2514929175376892,
342
+ 0.2804683446884155,
343
+ 0.22055907547473907,
344
+ 0.20605769753456116,
345
+ 0.22646912932395935,
346
+ 0.17331784963607788,
347
+ 0.17379960417747498,
348
+ 0.17898838222026825,
349
+ 0.14708437025547028,
350
+ 0.14710399508476257,
351
+ 0.16605313122272491,
352
+ 0.12533023953437805,
353
+ 0.13160867989063263,
354
+ 0.12758919596672058,
355
+ 0.08649729937314987,
356
+ 0.10984141379594803,
357
+ 0.08906104415655136,
358
+ 0.08317035436630249,
359
+ 0.07518477737903595,
360
+ 0.07132942974567413,
361
+ 0.07620728760957718,
362
+ 0.0872068703174591,
363
+ 0.08196690678596497,
364
+ 0.07007996737957001,
365
+ 0.04405072703957558,
366
+ 0.05769962817430496,
367
+ 0.043060846626758575,
368
+ 0.05980696901679039,
369
+ 0.03317856416106224,
370
+ 0.04187712073326111,
371
+ 0.040231671184301376,
372
+ 0.04018634557723999,
373
+ 0.04292917251586914
374
+ ],
375
+ "denoise_loss": [],
376
+ "ortho_loss": [
377
+ 0.35471978783607483,
378
+ 0.10054542869329453,
379
+ 0.08056585490703583,
380
+ 0.06761873513460159,
381
+ 0.06051715835928917,
382
+ 0.054626185446977615,
383
+ 0.06290311366319656,
384
+ 0.08528075367212296,
385
+ 0.12851166725158691,
386
+ 0.17594954371452332,
387
+ 0.1883549988269806,
388
+ 0.20819245278835297,
389
+ 0.22447381913661957,
390
+ 0.23362860083580017,
391
+ 0.23352178931236267,
392
+ 0.2505631148815155,
393
+ 0.2545468807220459,
394
+ 0.25482049584388733,
395
+ 0.2553919851779938,
396
+ 0.2531490623950958,
397
+ 0.2515760660171509,
398
+ 0.25270405411720276,
399
+ 0.2550104260444641,
400
+ 0.24405382573604584,
401
+ 0.2355482131242752,
402
+ 0.23306143283843994,
403
+ 0.2306639552116394,
404
+ 0.23312798142433167,
405
+ 0.2199832797050476,
406
+ 0.2244483232498169,
407
+ 0.22459453344345093,
408
+ 0.2254096269607544,
409
+ 0.22532722353935242,
410
+ 0.22544094920158386,
411
+ 0.22568713128566742,
412
+ 0.22397875785827637,
413
+ 0.232473224401474,
414
+ 0.23453326523303986,
415
+ 0.2402927279472351,
416
+ 0.24208620190620422,
417
+ 0.23906201124191284,
418
+ 0.2491123527288437,
419
+ 0.2513620853424072,
420
+ 0.24948538839817047,
421
+ 0.25132355093955994,
422
+ 0.2519170641899109,
423
+ 0.25072360038757324,
424
+ 0.25337281823158264,
425
+ 0.25460267066955566,
426
+ 0.2526955306529999,
427
+ 0.2516002357006073,
428
+ 0.252238005399704,
429
+ 0.2524856626987457,
430
+ 0.2535747289657593,
431
+ 0.256831556558609,
432
+ 0.2574509084224701,
433
+ 0.2587626278400421,
434
+ 0.25879767537117004,
435
+ 0.25963112711906433,
436
+ 0.2598315477371216
437
+ ],
438
+ "lr": [
439
+ 3.9200000000000004e-05,
440
+ 7.92e-05,
441
+ 8e-05,
442
+ 8e-05,
443
+ 8e-05,
444
+ 8e-05,
445
+ 8e-05,
446
+ 8e-05,
447
+ 8e-05,
448
+ 8e-05,
449
+ 8e-05,
450
+ 8e-05,
451
+ 8e-05,
452
+ 8e-05,
453
+ 8e-05,
454
+ 8e-05,
455
+ 8e-05,
456
+ 8e-05,
457
+ 8e-05,
458
+ 8e-05,
459
+ 8e-05,
460
+ 8e-05,
461
+ 8e-05,
462
+ 8e-05,
463
+ 8e-05,
464
+ 8e-05,
465
+ 8e-05,
466
+ 8e-05,
467
+ 8e-05,
468
+ 8e-05,
469
+ 8e-05,
470
+ 8e-05,
471
+ 8e-05,
472
+ 8e-05,
473
+ 8e-05,
474
+ 8e-05,
475
+ 7.946710526315791e-05,
476
+ 7.650657894736843e-05,
477
+ 7.354605263157895e-05,
478
+ 7.017105263157896e-05,
479
+ 6.721052631578948e-05,
480
+ 6.425e-05,
481
+ 6.0875e-05,
482
+ 5.791447368421054e-05,
483
+ 5.495394736842105e-05,
484
+ 5.157894736842105e-05,
485
+ 4.861842105263157e-05,
486
+ 4.565789473684212e-05,
487
+ 4.2282894736842104e-05,
488
+ 3.9322368421052625e-05,
489
+ 3.636184210526315e-05,
490
+ 3.2986842105263165e-05,
491
+ 3.0026315789473686e-05,
492
+ 2.7065789473684206e-05,
493
+ 2.3690789473684223e-05,
494
+ 2.0730263157894743e-05,
495
+ 1.7769736842105264e-05,
496
+ 1.4394736842105275e-05,
497
+ 1.1434210526315796e-05,
498
+ 8.473684210526318e-06
499
+ ],
500
+ "emb_lr": [],
501
+ "eval_step": [
502
+ 150,
503
+ 307,
504
+ 464,
505
+ 621,
506
+ 778,
507
+ 935,
508
+ 1092,
509
+ 1199,
510
+ 1356,
511
+ 1513,
512
+ 1670,
513
+ 1827,
514
+ 1984,
515
+ 2141,
516
+ 2298,
517
+ 2455,
518
+ 2612,
519
+ 2769,
520
+ 2926,
521
+ 3083
522
+ ],
523
+ "eval_accuracy": [
524
+ 0.02,
525
+ 0.01,
526
+ 0.01,
527
+ 0.0,
528
+ 0.0,
529
+ 0.0,
530
+ 0.0,
531
+ 0.0,
532
+ 0.0,
533
+ 0.0,
534
+ 0.0,
535
+ 0.0,
536
+ 0.0,
537
+ 0.0,
538
+ 0.0,
539
+ 0.0,
540
+ 0.0,
541
+ 0.0,
542
+ 0.0,
543
+ 0.0
544
+ ]
545
+ },
546
+ "final_accuracy": 0.9519230769230769,
547
+ "sft_eval": {
548
+ "config": {
549
+ "ops": "add_sub",
550
+ "K": null,
551
+ "mode": "sft",
552
+ "n_digits": 6,
553
+ "n_per_split": 100
554
+ },
555
+ "splits": {
556
+ "add_S0": {
557
+ "full_accuracy": 1.0,
558
+ "digit_accuracy": 1.0,
559
+ "n_examples": 100,
560
+ "per_subtask": {
561
+ "SA": {
562
+ "accuracy": 1.0,
563
+ "count": 605
564
+ },
565
+ "SS": {
566
+ "accuracy": 1.0,
567
+ "count": 95
568
+ }
569
+ }
570
+ },
571
+ "add_S1": {
572
+ "full_accuracy": 0.98,
573
+ "digit_accuracy": 0.9971428571428571,
574
+ "n_examples": 100,
575
+ "per_subtask": {
576
+ "SA": {
577
+ "accuracy": 1.0,
578
+ "count": 204
579
+ },
580
+ "SC": {
581
+ "accuracy": 0.9940828402366864,
582
+ "count": 169
583
+ },
584
+ "SS": {
585
+ "accuracy": 1.0,
586
+ "count": 31
587
+ },
588
+ "UC": {
589
+ "accuracy": 0.9966216216216216,
590
+ "count": 296
591
+ }
592
+ }
593
+ },
594
+ "add_S2": {
595
+ "full_accuracy": 0.99,
596
+ "digit_accuracy": 0.9985714285714286,
597
+ "n_examples": 100,
598
+ "per_subtask": {
599
+ "SA": {
600
+ "accuracy": 1.0,
601
+ "count": 163
602
+ },
603
+ "SC": {
604
+ "accuracy": 1.0,
605
+ "count": 130
606
+ },
607
+ "SS": {
608
+ "accuracy": 0.9885057471264368,
609
+ "count": 87
610
+ },
611
+ "UC": {
612
+ "accuracy": 1.0,
613
+ "count": 203
614
+ },
615
+ "US": {
616
+ "accuracy": 1.0,
617
+ "count": 117
618
+ }
619
+ }
620
+ },
621
+ "add_S3": {
622
+ "full_accuracy": 0.83,
623
+ "digit_accuracy": 0.9757142857142858,
624
+ "n_examples": 100,
625
+ "per_subtask": {
626
+ "SA": {
627
+ "accuracy": 1.0,
628
+ "count": 121
629
+ },
630
+ "SC": {
631
+ "accuracy": 0.9917355371900827,
632
+ "count": 121
633
+ },
634
+ "SS": {
635
+ "accuracy": 1.0,
636
+ "count": 49
637
+ },
638
+ "UC": {
639
+ "accuracy": 0.9139784946236559,
640
+ "count": 186
641
+ },
642
+ "US": {
643
+ "accuracy": 1.0,
644
+ "count": 223
645
+ }
646
+ }
647
+ },
648
+ "add_S4": {
649
+ "full_accuracy": 0.77,
650
+ "digit_accuracy": 0.9585714285714285,
651
+ "n_examples": 100,
652
+ "per_subtask": {
653
+ "SA": {
654
+ "accuracy": 1.0,
655
+ "count": 104
656
+ },
657
+ "SC": {
658
+ "accuracy": 1.0,
659
+ "count": 106
660
+ },
661
+ "SS": {
662
+ "accuracy": 1.0,
663
+ "count": 23
664
+ },
665
+ "UC": {
666
+ "accuracy": 0.8875,
667
+ "count": 160
668
+ },
669
+ "US": {
670
+ "accuracy": 0.9641693811074918,
671
+ "count": 307
672
+ }
673
+ }
674
+ },
675
+ "add_S5": {
676
+ "full_accuracy": 0.54,
677
+ "digit_accuracy": 0.8628571428571429,
678
+ "n_examples": 100,
679
+ "per_subtask": {
680
+ "SA": {
681
+ "accuracy": 1.0,
682
+ "count": 100
683
+ },
684
+ "SC": {
685
+ "accuracy": 1.0,
686
+ "count": 100
687
+ },
688
+ "UC": {
689
+ "accuracy": 0.69,
690
+ "count": 100
691
+ },
692
+ "US": {
693
+ "accuracy": 0.8375,
694
+ "count": 400
695
+ }
696
+ }
697
+ },
698
+ "add_S6": {
699
+ "full_accuracy": 0.91,
700
+ "digit_accuracy": 0.9657142857142857,
701
+ "n_examples": 100,
702
+ "per_subtask": {
703
+ "SC": {
704
+ "accuracy": 1.0,
705
+ "count": 100
706
+ },
707
+ "UC": {
708
+ "accuracy": 0.96,
709
+ "count": 100
710
+ },
711
+ "US": {
712
+ "accuracy": 0.96,
713
+ "count": 500
714
+ }
715
+ }
716
+ },
717
+ "add_random": {
718
+ "full_accuracy": 0.985,
719
+ "digit_accuracy": 0.9978571428571429,
720
+ "n_examples": 200,
721
+ "per_subtask": {
722
+ "SA": {
723
+ "accuracy": 1.0,
724
+ "count": 447
725
+ },
726
+ "SC": {
727
+ "accuracy": 1.0,
728
+ "count": 320
729
+ },
730
+ "SS": {
731
+ "accuracy": 1.0,
732
+ "count": 56
733
+ },
734
+ "UC": {
735
+ "accuracy": 0.994328922495274,
736
+ "count": 529
737
+ },
738
+ "US": {
739
+ "accuracy": 1.0,
740
+ "count": 48
741
+ }
742
+ }
743
+ },
744
+ "add_C1": {
745
+ "full_accuracy": 1.0,
746
+ "digit_accuracy": 1.0,
747
+ "n_examples": 100,
748
+ "per_subtask": {
749
+ "SA": {
750
+ "accuracy": 1.0,
751
+ "count": 500
752
+ },
753
+ "SC": {
754
+ "accuracy": 1.0,
755
+ "count": 100
756
+ },
757
+ "UC": {
758
+ "accuracy": 1.0,
759
+ "count": 100
760
+ }
761
+ }
762
+ },
763
+ "add_C2": {
764
+ "full_accuracy": 0.98,
765
+ "digit_accuracy": 0.9971428571428571,
766
+ "n_examples": 100,
767
+ "per_subtask": {
768
+ "SA": {
769
+ "accuracy": 0.9975,
770
+ "count": 400
771
+ },
772
+ "SC": {
773
+ "accuracy": 1.0,
774
+ "count": 100
775
+ },
776
+ "UC": {
777
+ "accuracy": 0.9935897435897436,
778
+ "count": 156
779
+ },
780
+ "US": {
781
+ "accuracy": 1.0,
782
+ "count": 44
783
+ }
784
+ }
785
+ },
786
+ "add_C3": {
787
+ "full_accuracy": 0.9,
788
+ "digit_accuracy": 0.9857142857142858,
789
+ "n_examples": 100,
790
+ "per_subtask": {
791
+ "SA": {
792
+ "accuracy": 1.0,
793
+ "count": 300
794
+ },
795
+ "SC": {
796
+ "accuracy": 1.0,
797
+ "count": 100
798
+ },
799
+ "UC": {
800
+ "accuracy": 0.949748743718593,
801
+ "count": 199
802
+ },
803
+ "US": {
804
+ "accuracy": 1.0,
805
+ "count": 101
806
+ }
807
+ }
808
+ },
809
+ "add_C4": {
810
+ "full_accuracy": 0.88,
811
+ "digit_accuracy": 0.9828571428571429,
812
+ "n_examples": 100,
813
+ "per_subtask": {
814
+ "SA": {
815
+ "accuracy": 1.0,
816
+ "count": 200
817
+ },
818
+ "SC": {
819
+ "accuracy": 1.0,
820
+ "count": 100
821
+ },
822
+ "UC": {
823
+ "accuracy": 0.9545454545454546,
824
+ "count": 264
825
+ },
826
+ "US": {
827
+ "accuracy": 1.0,
828
+ "count": 136
829
+ }
830
+ }
831
+ },
832
+ "add_C5": {
833
+ "full_accuracy": 0.83,
834
+ "digit_accuracy": 0.97,
835
+ "n_examples": 100,
836
+ "per_subtask": {
837
+ "SA": {
838
+ "accuracy": 1.0,
839
+ "count": 100
840
+ },
841
+ "SC": {
842
+ "accuracy": 1.0,
843
+ "count": 100
844
+ },
845
+ "UC": {
846
+ "accuracy": 0.9483870967741935,
847
+ "count": 310
848
+ },
849
+ "US": {
850
+ "accuracy": 0.9736842105263158,
851
+ "count": 190
852
+ }
853
+ }
854
+ },
855
+ "add_C6": {
856
+ "full_accuracy": 0.84,
857
+ "digit_accuracy": 0.9685714285714285,
858
+ "n_examples": 100,
859
+ "per_subtask": {
860
+ "SC": {
861
+ "accuracy": 1.0,
862
+ "count": 100
863
+ },
864
+ "UC": {
865
+ "accuracy": 0.9675675675675676,
866
+ "count": 370
867
+ },
868
+ "US": {
869
+ "accuracy": 0.9565217391304348,
870
+ "count": 230
871
+ }
872
+ }
873
+ },
874
+ "sub_M0": {
875
+ "full_accuracy": 1.0,
876
+ "digit_accuracy": 1.0,
877
+ "n_examples": 100,
878
+ "per_subtask": {
879
+ "MD": {
880
+ "accuracy": 1.0,
881
+ "count": 615
882
+ },
883
+ "ME": {
884
+ "accuracy": 1.0,
885
+ "count": 85
886
+ }
887
+ }
888
+ },
889
+ "sub_M1": {
890
+ "full_accuracy": 1.0,
891
+ "digit_accuracy": 1.0,
892
+ "n_examples": 100,
893
+ "per_subtask": {
894
+ "MD": {
895
+ "accuracy": 1.0,
896
+ "count": 292
897
+ },
898
+ "MB": {
899
+ "accuracy": 1.0,
900
+ "count": 144
901
+ },
902
+ "ME": {
903
+ "accuracy": 1.0,
904
+ "count": 25
905
+ },
906
+ "UB": {
907
+ "accuracy": 1.0,
908
+ "count": 239
909
+ }
910
+ }
911
+ },
912
+ "sub_M2": {
913
+ "full_accuracy": 1.0,
914
+ "digit_accuracy": 1.0,
915
+ "n_examples": 100,
916
+ "per_subtask": {
917
+ "MD": {
918
+ "accuracy": 1.0,
919
+ "count": 211
920
+ },
921
+ "MB": {
922
+ "accuracy": 1.0,
923
+ "count": 115
924
+ },
925
+ "ME": {
926
+ "accuracy": 1.0,
927
+ "count": 85
928
+ },
929
+ "UB": {
930
+ "accuracy": 1.0,
931
+ "count": 181
932
+ },
933
+ "UD": {
934
+ "accuracy": 1.0,
935
+ "count": 108
936
+ }
937
+ }
938
+ },
939
+ "sub_M3": {
940
+ "full_accuracy": 0.84,
941
+ "digit_accuracy": 0.9771428571428571,
942
+ "n_examples": 100,
943
+ "per_subtask": {
944
+ "MD": {
945
+ "accuracy": 1.0,
946
+ "count": 179
947
+ },
948
+ "MB": {
949
+ "accuracy": 1.0,
950
+ "count": 103
951
+ },
952
+ "ME": {
953
+ "accuracy": 1.0,
954
+ "count": 56
955
+ },
956
+ "UB": {
957
+ "accuracy": 0.8926174496644296,
958
+ "count": 149
959
+ },
960
+ "UD": {
961
+ "accuracy": 1.0,
962
+ "count": 213
963
+ }
964
+ }
965
+ },
966
+ "sub_M4": {
967
+ "full_accuracy": 0.13,
968
+ "digit_accuracy": 0.81,
969
+ "n_examples": 100,
970
+ "per_subtask": {
971
+ "MD": {
972
+ "accuracy": 1.0,
973
+ "count": 200
974
+ },
975
+ "MB": {
976
+ "accuracy": 1.0,
977
+ "count": 100
978
+ },
979
+ "UB": {
980
+ "accuracy": 0.14,
981
+ "count": 100
982
+ },
983
+ "UD": {
984
+ "accuracy": 0.8433333333333334,
985
+ "count": 300
986
+ }
987
+ }
988
+ },
989
+ "sub_M5": {
990
+ "full_accuracy": 0.08,
991
+ "digit_accuracy": 0.7042857142857143,
992
+ "n_examples": 100,
993
+ "per_subtask": {
994
+ "MD": {
995
+ "accuracy": 1.0,
996
+ "count": 100
997
+ },
998
+ "MB": {
999
+ "accuracy": 1.0,
1000
+ "count": 100
1001
+ },
1002
+ "UB": {
1003
+ "accuracy": 0.27,
1004
+ "count": 100
1005
+ },
1006
+ "UD": {
1007
+ "accuracy": 0.665,
1008
+ "count": 400
1009
+ }
1010
+ }
1011
+ },
1012
+ "sub_random": {
1013
+ "full_accuracy": 1.0,
1014
+ "digit_accuracy": 1.0,
1015
+ "n_examples": 200,
1016
+ "per_subtask": {
1017
+ "MD": {
1018
+ "accuracy": 1.0,
1019
+ "count": 600
1020
+ },
1021
+ "MB": {
1022
+ "accuracy": 1.0,
1023
+ "count": 267
1024
+ },
1025
+ "ME": {
1026
+ "accuracy": 1.0,
1027
+ "count": 53
1028
+ },
1029
+ "UB": {
1030
+ "accuracy": 1.0,
1031
+ "count": 439
1032
+ },
1033
+ "UD": {
1034
+ "accuracy": 1.0,
1035
+ "count": 41
1036
+ }
1037
+ }
1038
+ },
1039
+ "sub_B3": {
1040
+ "full_accuracy": 0.84,
1041
+ "digit_accuracy": 0.9771428571428571,
1042
+ "n_examples": 100,
1043
+ "per_subtask": {
1044
+ "MD": {
1045
+ "accuracy": 1.0,
1046
+ "count": 300
1047
+ },
1048
+ "MB": {
1049
+ "accuracy": 1.0,
1050
+ "count": 100
1051
+ },
1052
+ "UB": {
1053
+ "accuracy": 0.9187817258883249,
1054
+ "count": 197
1055
+ },
1056
+ "UD": {
1057
+ "accuracy": 1.0,
1058
+ "count": 103
1059
+ }
1060
+ }
1061
+ },
1062
+ "sub_B4": {
1063
+ "full_accuracy": 0.79,
1064
+ "digit_accuracy": 0.9642857142857143,
1065
+ "n_examples": 100,
1066
+ "per_subtask": {
1067
+ "MD": {
1068
+ "accuracy": 1.0,
1069
+ "count": 200
1070
+ },
1071
+ "MB": {
1072
+ "accuracy": 1.0,
1073
+ "count": 100
1074
+ },
1075
+ "UB": {
1076
+ "accuracy": 0.9149797570850202,
1077
+ "count": 247
1078
+ },
1079
+ "UD": {
1080
+ "accuracy": 0.9738562091503268,
1081
+ "count": 153
1082
+ }
1083
+ }
1084
+ },
1085
+ "sub_B5": {
1086
+ "full_accuracy": 0.84,
1087
+ "digit_accuracy": 0.9614285714285714,
1088
+ "n_examples": 100,
1089
+ "per_subtask": {
1090
+ "MD": {
1091
+ "accuracy": 1.0,
1092
+ "count": 100
1093
+ },
1094
+ "MB": {
1095
+ "accuracy": 1.0,
1096
+ "count": 100
1097
+ },
1098
+ "UB": {
1099
+ "accuracy": 0.9496644295302014,
1100
+ "count": 298
1101
+ },
1102
+ "UD": {
1103
+ "accuracy": 0.9405940594059405,
1104
+ "count": 202
1105
+ }
1106
+ }
1107
+ }
1108
+ },
1109
+ "summary": {
1110
+ "overall_accuracy": 0.8438461538461538,
1111
+ "digit_accuracy": 0.9634615384615385,
1112
+ "total_examples": 2600,
1113
+ "n_splits": 24
1114
+ }
1115
+ },
1116
+ "sorl_eval": {
1117
+ "config": {
1118
+ "ops": "add_sub",
1119
+ "K": 1,
1120
+ "mode": "sorl",
1121
+ "n_digits": 6,
1122
+ "n_per_split": 100
1123
+ },
1124
+ "splits": {
1125
+ "add_S0": {
1126
+ "full_accuracy": 1.0,
1127
+ "digit_accuracy": 1.0,
1128
+ "n_examples": 100,
1129
+ "per_subtask": {
1130
+ "SA": {
1131
+ "accuracy": 1.0,
1132
+ "count": 605
1133
+ },
1134
+ "SS": {
1135
+ "accuracy": 1.0,
1136
+ "count": 95
1137
+ }
1138
+ }
1139
+ },
1140
+ "add_S1": {
1141
+ "full_accuracy": 1.0,
1142
+ "digit_accuracy": 1.0,
1143
+ "n_examples": 100,
1144
+ "per_subtask": {
1145
+ "SA": {
1146
+ "accuracy": 1.0,
1147
+ "count": 204
1148
+ },
1149
+ "SC": {
1150
+ "accuracy": 1.0,
1151
+ "count": 169
1152
+ },
1153
+ "SS": {
1154
+ "accuracy": 1.0,
1155
+ "count": 31
1156
+ },
1157
+ "UC": {
1158
+ "accuracy": 1.0,
1159
+ "count": 296
1160
+ }
1161
+ }
1162
+ },
1163
+ "add_S2": {
1164
+ "full_accuracy": 1.0,
1165
+ "digit_accuracy": 1.0,
1166
+ "n_examples": 100,
1167
+ "per_subtask": {
1168
+ "SA": {
1169
+ "accuracy": 1.0,
1170
+ "count": 163
1171
+ },
1172
+ "SC": {
1173
+ "accuracy": 1.0,
1174
+ "count": 130
1175
+ },
1176
+ "SS": {
1177
+ "accuracy": 1.0,
1178
+ "count": 87
1179
+ },
1180
+ "UC": {
1181
+ "accuracy": 1.0,
1182
+ "count": 203
1183
+ },
1184
+ "US": {
1185
+ "accuracy": 1.0,
1186
+ "count": 117
1187
+ }
1188
+ }
1189
+ },
1190
+ "add_S3": {
1191
+ "full_accuracy": 1.0,
1192
+ "digit_accuracy": 1.0,
1193
+ "n_examples": 100,
1194
+ "per_subtask": {
1195
+ "SA": {
1196
+ "accuracy": 1.0,
1197
+ "count": 121
1198
+ },
1199
+ "SC": {
1200
+ "accuracy": 1.0,
1201
+ "count": 121
1202
+ },
1203
+ "SS": {
1204
+ "accuracy": 1.0,
1205
+ "count": 49
1206
+ },
1207
+ "UC": {
1208
+ "accuracy": 1.0,
1209
+ "count": 186
1210
+ },
1211
+ "US": {
1212
+ "accuracy": 1.0,
1213
+ "count": 223
1214
+ }
1215
+ }
1216
+ },
1217
+ "add_S4": {
1218
+ "full_accuracy": 1.0,
1219
+ "digit_accuracy": 1.0,
1220
+ "n_examples": 100,
1221
+ "per_subtask": {
1222
+ "SA": {
1223
+ "accuracy": 1.0,
1224
+ "count": 104
1225
+ },
1226
+ "SC": {
1227
+ "accuracy": 1.0,
1228
+ "count": 106
1229
+ },
1230
+ "SS": {
1231
+ "accuracy": 1.0,
1232
+ "count": 23
1233
+ },
1234
+ "UC": {
1235
+ "accuracy": 1.0,
1236
+ "count": 160
1237
+ },
1238
+ "US": {
1239
+ "accuracy": 1.0,
1240
+ "count": 307
1241
+ }
1242
+ }
1243
+ },
1244
+ "add_S5": {
1245
+ "full_accuracy": 0.56,
1246
+ "digit_accuracy": 0.9357142857142857,
1247
+ "n_examples": 100,
1248
+ "per_subtask": {
1249
+ "SA": {
1250
+ "accuracy": 1.0,
1251
+ "count": 100
1252
+ },
1253
+ "SC": {
1254
+ "accuracy": 1.0,
1255
+ "count": 100
1256
+ },
1257
+ "UC": {
1258
+ "accuracy": 0.56,
1259
+ "count": 100
1260
+ },
1261
+ "US": {
1262
+ "accuracy": 0.9975,
1263
+ "count": 400
1264
+ }
1265
+ }
1266
+ },
1267
+ "add_S6": {
1268
+ "full_accuracy": 1.0,
1269
+ "digit_accuracy": 1.0,
1270
+ "n_examples": 100,
1271
+ "per_subtask": {
1272
+ "SC": {
1273
+ "accuracy": 1.0,
1274
+ "count": 100
1275
+ },
1276
+ "UC": {
1277
+ "accuracy": 1.0,
1278
+ "count": 100
1279
+ },
1280
+ "US": {
1281
+ "accuracy": 1.0,
1282
+ "count": 500
1283
+ }
1284
+ }
1285
+ },
1286
+ "add_random": {
1287
+ "full_accuracy": 1.0,
1288
+ "digit_accuracy": 1.0,
1289
+ "n_examples": 200,
1290
+ "per_subtask": {
1291
+ "SA": {
1292
+ "accuracy": 1.0,
1293
+ "count": 447
1294
+ },
1295
+ "SC": {
1296
+ "accuracy": 1.0,
1297
+ "count": 320
1298
+ },
1299
+ "SS": {
1300
+ "accuracy": 1.0,
1301
+ "count": 56
1302
+ },
1303
+ "UC": {
1304
+ "accuracy": 1.0,
1305
+ "count": 529
1306
+ },
1307
+ "US": {
1308
+ "accuracy": 1.0,
1309
+ "count": 48
1310
+ }
1311
+ }
1312
+ },
1313
+ "add_C1": {
1314
+ "full_accuracy": 1.0,
1315
+ "digit_accuracy": 1.0,
1316
+ "n_examples": 100,
1317
+ "per_subtask": {
1318
+ "SA": {
1319
+ "accuracy": 1.0,
1320
+ "count": 500
1321
+ },
1322
+ "SC": {
1323
+ "accuracy": 1.0,
1324
+ "count": 100
1325
+ },
1326
+ "UC": {
1327
+ "accuracy": 1.0,
1328
+ "count": 100
1329
+ }
1330
+ }
1331
+ },
1332
+ "add_C2": {
1333
+ "full_accuracy": 1.0,
1334
+ "digit_accuracy": 1.0,
1335
+ "n_examples": 100,
1336
+ "per_subtask": {
1337
+ "SA": {
1338
+ "accuracy": 1.0,
1339
+ "count": 400
1340
+ },
1341
+ "SC": {
1342
+ "accuracy": 1.0,
1343
+ "count": 100
1344
+ },
1345
+ "UC": {
1346
+ "accuracy": 1.0,
1347
+ "count": 156
1348
+ },
1349
+ "US": {
1350
+ "accuracy": 1.0,
1351
+ "count": 44
1352
+ }
1353
+ }
1354
+ },
1355
+ "add_C3": {
1356
+ "full_accuracy": 1.0,
1357
+ "digit_accuracy": 1.0,
1358
+ "n_examples": 100,
1359
+ "per_subtask": {
1360
+ "SA": {
1361
+ "accuracy": 1.0,
1362
+ "count": 300
1363
+ },
1364
+ "SC": {
1365
+ "accuracy": 1.0,
1366
+ "count": 100
1367
+ },
1368
+ "UC": {
1369
+ "accuracy": 1.0,
1370
+ "count": 199
1371
+ },
1372
+ "US": {
1373
+ "accuracy": 1.0,
1374
+ "count": 101
1375
+ }
1376
+ }
1377
+ },
1378
+ "add_C4": {
1379
+ "full_accuracy": 0.99,
1380
+ "digit_accuracy": 0.9985714285714286,
1381
+ "n_examples": 100,
1382
+ "per_subtask": {
1383
+ "SA": {
1384
+ "accuracy": 1.0,
1385
+ "count": 200
1386
+ },
1387
+ "SC": {
1388
+ "accuracy": 1.0,
1389
+ "count": 100
1390
+ },
1391
+ "UC": {
1392
+ "accuracy": 0.9962121212121212,
1393
+ "count": 264
1394
+ },
1395
+ "US": {
1396
+ "accuracy": 1.0,
1397
+ "count": 136
1398
+ }
1399
+ }
1400
+ },
1401
+ "add_C5": {
1402
+ "full_accuracy": 0.99,
1403
+ "digit_accuracy": 0.9985714285714286,
1404
+ "n_examples": 100,
1405
+ "per_subtask": {
1406
+ "SA": {
1407
+ "accuracy": 1.0,
1408
+ "count": 100
1409
+ },
1410
+ "SC": {
1411
+ "accuracy": 1.0,
1412
+ "count": 100
1413
+ },
1414
+ "UC": {
1415
+ "accuracy": 0.9967741935483871,
1416
+ "count": 310
1417
+ },
1418
+ "US": {
1419
+ "accuracy": 1.0,
1420
+ "count": 190
1421
+ }
1422
+ }
1423
+ },
1424
+ "add_C6": {
1425
+ "full_accuracy": 0.96,
1426
+ "digit_accuracy": 0.9942857142857143,
1427
+ "n_examples": 100,
1428
+ "per_subtask": {
1429
+ "SC": {
1430
+ "accuracy": 1.0,
1431
+ "count": 100
1432
+ },
1433
+ "UC": {
1434
+ "accuracy": 0.9891891891891892,
1435
+ "count": 370
1436
+ },
1437
+ "US": {
1438
+ "accuracy": 1.0,
1439
+ "count": 230
1440
+ }
1441
+ }
1442
+ },
1443
+ "sub_M0": {
1444
+ "full_accuracy": 1.0,
1445
+ "digit_accuracy": 1.0,
1446
+ "n_examples": 100,
1447
+ "per_subtask": {
1448
+ "MD": {
1449
+ "accuracy": 1.0,
1450
+ "count": 615
1451
+ },
1452
+ "ME": {
1453
+ "accuracy": 1.0,
1454
+ "count": 85
1455
+ }
1456
+ }
1457
+ },
1458
+ "sub_M1": {
1459
+ "full_accuracy": 1.0,
1460
+ "digit_accuracy": 1.0,
1461
+ "n_examples": 100,
1462
+ "per_subtask": {
1463
+ "MD": {
1464
+ "accuracy": 1.0,
1465
+ "count": 292
1466
+ },
1467
+ "MB": {
1468
+ "accuracy": 1.0,
1469
+ "count": 144
1470
+ },
1471
+ "ME": {
1472
+ "accuracy": 1.0,
1473
+ "count": 25
1474
+ },
1475
+ "UB": {
1476
+ "accuracy": 1.0,
1477
+ "count": 239
1478
+ }
1479
+ }
1480
+ },
1481
+ "sub_M2": {
1482
+ "full_accuracy": 1.0,
1483
+ "digit_accuracy": 1.0,
1484
+ "n_examples": 100,
1485
+ "per_subtask": {
1486
+ "MD": {
1487
+ "accuracy": 1.0,
1488
+ "count": 211
1489
+ },
1490
+ "MB": {
1491
+ "accuracy": 1.0,
1492
+ "count": 115
1493
+ },
1494
+ "ME": {
1495
+ "accuracy": 1.0,
1496
+ "count": 85
1497
+ },
1498
+ "UB": {
1499
+ "accuracy": 1.0,
1500
+ "count": 181
1501
+ },
1502
+ "UD": {
1503
+ "accuracy": 1.0,
1504
+ "count": 108
1505
+ }
1506
+ }
1507
+ },
1508
+ "sub_M3": {
1509
+ "full_accuracy": 1.0,
1510
+ "digit_accuracy": 1.0,
1511
+ "n_examples": 100,
1512
+ "per_subtask": {
1513
+ "MD": {
1514
+ "accuracy": 1.0,
1515
+ "count": 179
1516
+ },
1517
+ "MB": {
1518
+ "accuracy": 1.0,
1519
+ "count": 103
1520
+ },
1521
+ "ME": {
1522
+ "accuracy": 1.0,
1523
+ "count": 56
1524
+ },
1525
+ "UB": {
1526
+ "accuracy": 1.0,
1527
+ "count": 149
1528
+ },
1529
+ "UD": {
1530
+ "accuracy": 1.0,
1531
+ "count": 213
1532
+ }
1533
+ }
1534
+ },
1535
+ "sub_M4": {
1536
+ "full_accuracy": 0.98,
1537
+ "digit_accuracy": 0.9971428571428571,
1538
+ "n_examples": 100,
1539
+ "per_subtask": {
1540
+ "MD": {
1541
+ "accuracy": 1.0,
1542
+ "count": 200
1543
+ },
1544
+ "MB": {
1545
+ "accuracy": 1.0,
1546
+ "count": 100
1547
+ },
1548
+ "UB": {
1549
+ "accuracy": 1.0,
1550
+ "count": 100
1551
+ },
1552
+ "UD": {
1553
+ "accuracy": 0.9933333333333333,
1554
+ "count": 300
1555
+ }
1556
+ }
1557
+ },
1558
+ "sub_M5": {
1559
+ "full_accuracy": 0.34,
1560
+ "digit_accuracy": 0.9057142857142857,
1561
+ "n_examples": 100,
1562
+ "per_subtask": {
1563
+ "MD": {
1564
+ "accuracy": 1.0,
1565
+ "count": 100
1566
+ },
1567
+ "MB": {
1568
+ "accuracy": 1.0,
1569
+ "count": 100
1570
+ },
1571
+ "UB": {
1572
+ "accuracy": 0.34,
1573
+ "count": 100
1574
+ },
1575
+ "UD": {
1576
+ "accuracy": 1.0,
1577
+ "count": 400
1578
+ }
1579
+ }
1580
+ },
1581
+ "sub_random": {
1582
+ "full_accuracy": 1.0,
1583
+ "digit_accuracy": 1.0,
1584
+ "n_examples": 200,
1585
+ "per_subtask": {
1586
+ "MD": {
1587
+ "accuracy": 1.0,
1588
+ "count": 600
1589
+ },
1590
+ "MB": {
1591
+ "accuracy": 1.0,
1592
+ "count": 267
1593
+ },
1594
+ "ME": {
1595
+ "accuracy": 1.0,
1596
+ "count": 53
1597
+ },
1598
+ "UB": {
1599
+ "accuracy": 1.0,
1600
+ "count": 439
1601
+ },
1602
+ "UD": {
1603
+ "accuracy": 1.0,
1604
+ "count": 41
1605
+ }
1606
+ }
1607
+ },
1608
+ "sub_B3": {
1609
+ "full_accuracy": 0.99,
1610
+ "digit_accuracy": 0.9985714285714286,
1611
+ "n_examples": 100,
1612
+ "per_subtask": {
1613
+ "MD": {
1614
+ "accuracy": 1.0,
1615
+ "count": 300
1616
+ },
1617
+ "MB": {
1618
+ "accuracy": 1.0,
1619
+ "count": 100
1620
+ },
1621
+ "UB": {
1622
+ "accuracy": 0.9949238578680203,
1623
+ "count": 197
1624
+ },
1625
+ "UD": {
1626
+ "accuracy": 1.0,
1627
+ "count": 103
1628
+ }
1629
+ }
1630
+ },
1631
+ "sub_B4": {
1632
+ "full_accuracy": 1.0,
1633
+ "digit_accuracy": 1.0,
1634
+ "n_examples": 100,
1635
+ "per_subtask": {
1636
+ "MD": {
1637
+ "accuracy": 1.0,
1638
+ "count": 200
1639
+ },
1640
+ "MB": {
1641
+ "accuracy": 1.0,
1642
+ "count": 100
1643
+ },
1644
+ "UB": {
1645
+ "accuracy": 1.0,
1646
+ "count": 247
1647
+ },
1648
+ "UD": {
1649
+ "accuracy": 1.0,
1650
+ "count": 153
1651
+ }
1652
+ }
1653
+ },
1654
+ "sub_B5": {
1655
+ "full_accuracy": 0.94,
1656
+ "digit_accuracy": 0.9914285714285714,
1657
+ "n_examples": 100,
1658
+ "per_subtask": {
1659
+ "MD": {
1660
+ "accuracy": 1.0,
1661
+ "count": 100
1662
+ },
1663
+ "MB": {
1664
+ "accuracy": 1.0,
1665
+ "count": 100
1666
+ },
1667
+ "UB": {
1668
+ "accuracy": 0.9798657718120806,
1669
+ "count": 298
1670
+ },
1671
+ "UD": {
1672
+ "accuracy": 1.0,
1673
+ "count": 202
1674
+ }
1675
+ }
1676
+ }
1677
+ },
1678
+ "summary": {
1679
+ "overall_accuracy": 0.9519230769230769,
1680
+ "digit_accuracy": 0.9930769230769231,
1681
+ "total_examples": 2600,
1682
+ "n_splits": 24
1683
+ }
1684
+ },
1685
+ "sorl_overall_accuracy": 0.9519230769230769,
1686
+ "sft_overall_accuracy": 0.8438461538461538
1687
+ }
add_sub_sorl_v1_abs20_K1_10K/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22e372b9ada964d440d4df2367917a51ae15eb6d718813ee947798a1086f11d6
3
+ size 650344480
add_sub_sorl_v1_abs20_K1_10K/train_config.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_rollouts": 4,
3
+ "K": 1,
4
+ "max_iterations": 2,
5
+ "memory_span_abs": 1792,
6
+ "memory_span_traj": 1792,
7
+ "temperature": 1.0,
8
+ "ar_search": false,
9
+ "response_only_abs": false,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "alpha_ortho": 0.0,
14
+ "alpha_anchor": 0.0,
15
+ "alpha_jacobi": 0.0,
16
+ "decay": 0.8,
17
+ "target_vocab_util": 0.8,
18
+ "min_abs_ppl": 0.0,
19
+ "zipf_alpha": 1.0,
20
+ "lr": 8e-05,
21
+ "emb_lr_mult": 1.0,
22
+ "weight_decay": 0.01,
23
+ "warmup_steps": 100,
24
+ "cooldown_frac": 0.4,
25
+ "max_grad_norm": 1.0,
26
+ "vq_abs_pretrain_steps": 0,
27
+ "vq_abs_pretrain_lr": 0.001,
28
+ "vq_abs_pretrain_layer": -1,
29
+ "vq_abs_pretrain_batch_size": 256,
30
+ "vq_abs_pretrain_target_vectors": 20000,
31
+ "batch_size": 64,
32
+ "gradient_accumulation_steps": 1,
33
+ "num_epochs": 20,
34
+ "emb_warmup_steps": 0,
35
+ "log_every": 50,
36
+ "eval_every": 156,
37
+ "save_every": 999999,
38
+ "eval_samples": 100,
39
+ "output_dir": "ckpt/sweep/as_sorl_abs20_K1_10K_2L3H510d",
40
+ "eval_K": 4,
41
+ "alpha_traj": 0.0,
42
+ "corrupt_method": "shuffle",
43
+ "corrupt_ratio": 0.3,
44
+ "alpha_contrastive": 1.0,
45
+ "gamma_contrastive": 0.5,
46
+ "alpha_masked_traj": 0.0,
47
+ "mask_nl_ratio": 0.3,
48
+ "mask_nl_mode": "fixed",
49
+ "mask_nl_fixed_id": 0,
50
+ "use_ste": true,
51
+ "n_inner": 1,
52
+ "random_K": null,
53
+ "strip_suffix": null,
54
+ "compress_prefix": null,
55
+ "random_mem_span": null,
56
+ "warmup_ratio": 0.03,
57
+ "beta2": 0.999,
58
+ "seed": 42,
59
+ "n_digits": 6,
60
+ "n_layer": 2,
61
+ "n_head": 3,
62
+ "n_embd": 510,
63
+ "ops": "add_sub",
64
+ "abs_vocab": 20,
65
+ "dataset_size": 10000,
66
+ "mode": "sorl",
67
+ "device": "cuda",
68
+ "push_to_hub": true,
69
+ "no_wandb": false,
70
+ "n_params": 162509462,
71
+ "run_name": "add_sub_sorl_v1_abs20_K1_10K",
72
+ "git_commit": "f835493c19eb98267697007042c9d440cad2afbb",
73
+ "timestamp": "2026-04-16T04:10:46.561234+00:00",
74
+ "tokenizer": "Qwen/Qwen3-0.6B",
75
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
+ "dataset_config": "add_sub_6digit",
77
+ "train_dataset": "fixed_train/train_10K_seed42.pt",
78
+ "model_repo": "thoughtworks/arithmetic-sorl",
79
+ "trainer_version": "v1",
80
+ "wandb_run_id": "o9oa527y",
81
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/o9oa527y",
82
+ "eval_final_dataset": "eval_sets/eval_add_sub_6d_N100_seed42.json",
83
+ "eval_epoch_dataset": "eval_sets/eval_add_sub_6d_N25_seed42.json",
84
+ "eval_hf_repo": "thoughtworks/arithmetic-sorl-data",
85
+ "config_hash": "8952e8187a2b",
86
+ "final_accuracy": 0.9519230769230769,
87
+ "sft_accuracy": 0.8438461538461538,
88
+ "eval_method": "ArithmeticEvaluator"
89
+ }