amirali1985 commited on
Commit
34efd60
·
verified ·
1 Parent(s): e86c790

Upload add_sub_sorl_v1_abs2_K1_10K

Browse files
add_sub_sorl_v1_abs2_K1_10K/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 128,
20
+ "max_window_layers": 28,
21
+ "model_type": "qwen3",
22
+ "num_attention_heads": 3,
23
+ "num_hidden_layers": 2,
24
+ "num_key_value_heads": 3,
25
+ "pad_token_id": null,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_parameters": {
28
+ "rope_theta": 10000.0,
29
+ "rope_type": "default"
30
+ },
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "transformers_version": "5.5.0",
34
+ "use_cache": true,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151646
37
+ }
add_sub_sorl_v1_abs2_K1_10K/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs2_K1_10K/metrics.json ADDED
@@ -0,0 +1,1687 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 207,
8
+ 257,
9
+ 307,
10
+ 364,
11
+ 414,
12
+ 464,
13
+ 521,
14
+ 571,
15
+ 621,
16
+ 678,
17
+ 728,
18
+ 778,
19
+ 835,
20
+ 885,
21
+ 935,
22
+ 992,
23
+ 1042,
24
+ 1092,
25
+ 1149,
26
+ 1199,
27
+ 1249,
28
+ 1306,
29
+ 1356,
30
+ 1406,
31
+ 1463,
32
+ 1513,
33
+ 1563,
34
+ 1620,
35
+ 1670,
36
+ 1720,
37
+ 1777,
38
+ 1827,
39
+ 1877,
40
+ 1934,
41
+ 1984,
42
+ 2034,
43
+ 2091,
44
+ 2141,
45
+ 2191,
46
+ 2248,
47
+ 2298,
48
+ 2348,
49
+ 2405,
50
+ 2455,
51
+ 2505,
52
+ 2562,
53
+ 2612,
54
+ 2662,
55
+ 2719,
56
+ 2769,
57
+ 2819,
58
+ 2876,
59
+ 2926,
60
+ 2976,
61
+ 3033,
62
+ 3083,
63
+ 3133
64
+ ],
65
+ "loss": [
66
+ 12.146018028259277,
67
+ 8.140860557556152,
68
+ 5.788841247558594,
69
+ 5.928689956665039,
70
+ 4.940187931060791,
71
+ -0.8538930416107178,
72
+ -3.957218647003174,
73
+ -5.3563971519470215,
74
+ -6.507109642028809,
75
+ -7.65699577331543,
76
+ -8.334919929504395,
77
+ -9.785987854003906,
78
+ -9.972005844116211,
79
+ -11.117347717285156,
80
+ -11.665057182312012,
81
+ -10.518767356872559,
82
+ -12.363883018493652,
83
+ -11.287307739257812,
84
+ -9.55109977722168,
85
+ -7.357775688171387,
86
+ -5.654560565948486,
87
+ -3.8458449840545654,
88
+ -1.607912540435791,
89
+ -1.7796285152435303,
90
+ -1.6869027614593506,
91
+ -1.2786306142807007,
92
+ -1.1483945846557617,
93
+ -0.8021516799926758,
94
+ -0.6225531101226807,
95
+ -0.4951012432575226,
96
+ -1.253838062286377,
97
+ -0.6078148484230042,
98
+ -0.5363032817840576,
99
+ -0.4771603047847748,
100
+ -0.4060630202293396,
101
+ -0.5651876926422119,
102
+ -0.4811541736125946,
103
+ -0.5336906313896179,
104
+ -0.29910436272621155,
105
+ -0.1743965446949005,
106
+ -0.0865909680724144,
107
+ -0.19921013712882996,
108
+ -0.09337317198514938,
109
+ -0.33079972863197327,
110
+ -0.22505944967269897,
111
+ -0.13146325945854187,
112
+ -0.16447202861309052,
113
+ -0.11612369120121002,
114
+ 0.009087484329938889,
115
+ 0.03794008493423462,
116
+ 0.02065785601735115,
117
+ 0.0008186213672161102,
118
+ -0.01969563215970993,
119
+ 0.0013124383985996246,
120
+ 0.02527482435107231,
121
+ 0.05165558308362961,
122
+ 0.051053162664175034,
123
+ 0.04289375990629196,
124
+ 0.05697126314043999,
125
+ 0.05128497630357742
126
+ ],
127
+ "base_loss": [
128
+ 7.628437042236328,
129
+ 4.0556206703186035,
130
+ 1.9530707597732544,
131
+ 1.8979682922363281,
132
+ 1.9241129159927368,
133
+ 1.8852487802505493,
134
+ 1.8353227376937866,
135
+ 1.8186378479003906,
136
+ 1.8541980981826782,
137
+ 1.817212462425232,
138
+ 1.7589912414550781,
139
+ 1.752821683883667,
140
+ 1.7150675058364868,
141
+ 1.7124515771865845,
142
+ 1.7245622873306274,
143
+ 1.5408344268798828,
144
+ 1.5466477870941162,
145
+ 1.374353051185608,
146
+ 1.1373178958892822,
147
+ 0.8995004296302795,
148
+ 0.6910373568534851,
149
+ 0.4638596177101135,
150
+ 0.231327623128891,
151
+ 0.24538195133209229,
152
+ 0.20177486538887024,
153
+ 0.15957574546337128,
154
+ 0.15221306681632996,
155
+ 0.10463942587375641,
156
+ 0.10045579820871353,
157
+ 0.06593646109104156,
158
+ 0.15611542761325836,
159
+ 0.07823602110147476,
160
+ 0.08280634135007858,
161
+ 0.06120121851563454,
162
+ 0.05883612856268883,
163
+ 0.07205647975206375,
164
+ 0.0748705118894577,
165
+ 0.06757231056690216,
166
+ 0.04013211280107498,
167
+ 0.02632850408554077,
168
+ 0.016306018456816673,
169
+ 0.029043469578027725,
170
+ 0.016971977427601814,
171
+ 0.04336889460682869,
172
+ 0.03189310058951378,
173
+ 0.0211532860994339,
174
+ 0.02499561943113804,
175
+ 0.019456015899777412,
176
+ 0.005540237762033939,
177
+ 0.0022714717779308558,
178
+ 0.004181382246315479,
179
+ 0.006391526199877262,
180
+ 0.00866654235869646,
181
+ 0.006326296832412481,
182
+ 0.0036668891552835703,
183
+ 0.0007290642824955285,
184
+ 0.0007950459257699549,
185
+ 0.0017015466000884771,
186
+ 0.00013872093404643238,
187
+ 0.000770941493101418
188
+ ],
189
+ "info_loss": [
190
+ -0.6176466941833496,
191
+ -0.08461570739746094,
192
+ -0.05701446533203125,
193
+ -0.035248637199401855,
194
+ -0.13141179084777832,
195
+ -0.6646270751953125,
196
+ -0.9369587898254395,
197
+ -1.0392465591430664,
198
+ -1.134619951248169,
199
+ -1.2192878723144531,
200
+ -1.2667748928070068,
201
+ -1.4063975811004639,
202
+ -1.4086241722106934,
203
+ -1.5143393278121948,
204
+ -1.5569674968719482,
205
+ -1.3761557340621948,
206
+ -1.4464160203933716,
207
+ -1.2794716358184814,
208
+ -1.0764862298965454,
209
+ -0.8341529369354248,
210
+ -0.6416376829147339,
211
+ -0.4403914213180542,
212
+ -0.1967826783657074,
213
+ -0.2110116183757782,
214
+ -0.19503700733184814,
215
+ -0.14979569613933563,
216
+ -0.1361372172832489,
217
+ -0.09671700745820999,
218
+ -0.07853375375270844,
219
+ -0.06197825074195862,
220
+ -0.14958061277866364,
221
+ -0.07456944137811661,
222
+ -0.0677691251039505,
223
+ -0.059750691056251526,
224
+ -0.05245509371161461,
225
+ -0.06968473643064499,
226
+ -0.06143011525273323,
227
+ -0.06598454713821411,
228
+ -0.03974508121609688,
229
+ -0.02589520253241062,
230
+ -0.016123393550515175,
231
+ -0.028640657663345337,
232
+ -0.01684783399105072,
233
+ -0.043257907032966614,
234
+ -0.03150363266468048,
235
+ -0.021070921793580055,
236
+ -0.02475244365632534,
237
+ -0.019364137202501297,
238
+ -0.005449273623526096,
239
+ -0.002236021449789405,
240
+ -0.004155138973146677,
241
+ -0.006360492669045925,
242
+ -0.008639117702841759,
243
+ -0.0063036042265594006,
244
+ -0.0036412517074495554,
245
+ -0.0007088789134286344,
246
+ -0.000775613822042942,
247
+ -0.0016821041936054826,
248
+ -0.00011882289254572242,
249
+ -0.0007504023960791528
250
+ ],
251
+ "abs_loss": [
252
+ 0.5970545411109924,
253
+ 0.07690786570310593,
254
+ 0.0035618257243186235,
255
+ 0.0012020958820357919,
256
+ 0.001016044756397605,
257
+ 0.002263190457597375,
258
+ 0.008593530394136906,
259
+ 0.012380805797874928,
260
+ 0.01136107463389635,
261
+ 0.021077027544379234,
262
+ 0.015056786127388477,
263
+ 0.015377345494925976,
264
+ 0.01237007137387991,
265
+ 0.009393605403602123,
266
+ 0.010074876248836517,
267
+ 0.008252273313701153,
268
+ 0.010502271354198456,
269
+ 0.005695981439203024,
270
+ 0.006076695863157511,
271
+ 0.004774859175086021,
272
+ 0.003744877176359296,
273
+ 0.002134924056008458,
274
+ 0.002635170938447118,
275
+ 0.001622178009711206,
276
+ 0.0014428232097998261,
277
+ 0.001420547254383564,
278
+ 0.0018245340324938297,
279
+ 0.0009455258841626346,
280
+ 0.001069836551323533,
281
+ 0.0005867307772859931,
282
+ 0.0007147423457354307,
283
+ 0.00043318382813595235,
284
+ 0.00048265376244671643,
285
+ 0.0004175094945821911,
286
+ 0.00023684576444793493,
287
+ 0.00024159869644790888,
288
+ 0.00029623997397720814,
289
+ 0.0002410489832982421,
290
+ 0.0002485735749360174,
291
+ 0.00016171969764400274,
292
+ 9.246024274034426e-05,
293
+ 0.00010874952567974105,
294
+ 9.772043995326385e-05,
295
+ 0.00010663767898222432,
296
+ 9.5514660642948e-05,
297
+ 6.556689186254516e-05,
298
+ 9.23834159038961e-05,
299
+ 5.149324351805262e-05,
300
+ 5.457560837385245e-05,
301
+ 3.461950836936012e-05,
302
+ 3.3307816920569167e-05,
303
+ 3.955264764954336e-05,
304
+ 3.886878039338626e-05,
305
+ 3.2628566259518266e-05,
306
+ 2.6662886739359237e-05,
307
+ 2.7881047572009265e-05,
308
+ 1.9944345694966614e-05,
309
+ 2.5962357540265657e-05,
310
+ 2.4488110284437425e-05,
311
+ 2.169463004975114e-05
312
+ ],
313
+ "zipf_loss": [
314
+ 10.634342193603516,
315
+ 4.9237060546875,
316
+ 4.405559062957764,
317
+ 4.383087635040283,
318
+ 4.33009147644043,
319
+ 3.906902551651001,
320
+ 3.5761871337890625,
321
+ 3.2161927223205566,
322
+ 2.983755111694336,
323
+ 2.7165632247924805,
324
+ 2.5723321437835693,
325
+ 2.523629665374756,
326
+ 2.3979315757751465,
327
+ 2.3126540184020996,
328
+ 2.1790482997894287,
329
+ 1.7011308670043945,
330
+ 0.5525786280632019,
331
+ 0.1324852854013443,
332
+ 0.07583678513765335,
333
+ 0.08377665281295776,
334
+ 0.07040485739707947,
335
+ 0.09399650245904922,
336
+ 0.12832312285900116,
337
+ 0.084943488240242,
338
+ 0.06154828518629074,
339
+ 0.0596085824072361,
340
+ 0.060582030564546585,
341
+ 0.06028445437550545,
342
+ 0.062221676111221313,
343
+ 0.05868617817759514,
344
+ 0.08578114211559296,
345
+ 0.05960015952587128,
346
+ 0.05853329226374626,
347
+ 0.059103649109601974,
348
+ 0.059628069400787354,
349
+ 0.05957905203104019,
350
+ 0.05824682116508484,
351
+ 0.05855847895145416,
352
+ 0.05818948894739151,
353
+ 0.05821080133318901,
354
+ 0.05832771211862564,
355
+ 0.058142099529504776,
356
+ 0.05812341719865799,
357
+ 0.05839976668357849,
358
+ 0.058074235916137695,
359
+ 0.058086104691028595,
360
+ 0.058047544211149216,
361
+ 0.058056507259607315,
362
+ 0.05803452432155609,
363
+ 0.05802536383271217,
364
+ 0.058024533092975616,
365
+ 0.05802806839346886,
366
+ 0.058025114238262177,
367
+ 0.058018919080495834,
368
+ 0.0580177865922451,
369
+ 0.05801251903176308,
370
+ 0.05801226198673248,
371
+ 0.058010656386613846,
372
+ 0.05801832303404808,
373
+ 0.058015890419483185
374
+ ],
375
+ "denoise_loss": [],
376
+ "ortho_loss": [
377
+ 0.12159737944602966,
378
+ 0.18038727343082428,
379
+ 0.03381244093179703,
380
+ 0.0021874275989830494,
381
+ 2.175434747186955e-05,
382
+ 0.00040765784797258675,
383
+ 0.0030973420944064856,
384
+ 0.00593204190954566,
385
+ 0.0047910804860293865,
386
+ 0.005361207760870457,
387
+ 3.893865709869715e-07,
388
+ 0.0006737676449120045,
389
+ 1.8460258388586226e-07,
390
+ 0.0002703091886360198,
391
+ 1.5977379916876089e-06,
392
+ 0.0005756249302066863,
393
+ 0.0026328135281801224,
394
+ 0.0036702558863908052,
395
+ 0.002528548240661621,
396
+ 0.003325717756524682,
397
+ 0.004215314518660307,
398
+ 0.005500412080436945,
399
+ 0.011995360255241394,
400
+ 0.015324790962040424,
401
+ 0.02708487957715988,
402
+ 0.025101546198129654,
403
+ 0.027320556342601776,
404
+ 0.03245297074317932,
405
+ 0.03193245083093643,
406
+ 0.03052922524511814,
407
+ 0.029022295027971268,
408
+ 0.026948127895593643,
409
+ 0.030607450753450394,
410
+ 0.028174009174108505,
411
+ 0.03190726414322853,
412
+ 0.02512122131884098,
413
+ 0.11264711618423462,
414
+ 0.11880636215209961,
415
+ 0.11841752380132675,
416
+ 0.1180657371878624,
417
+ 0.11860547959804535,
418
+ 0.11567839980125427,
419
+ 0.11602658033370972,
420
+ 0.11348988115787506,
421
+ 0.11054149270057678,
422
+ 0.1121196448802948,
423
+ 0.11147750914096832,
424
+ 0.11120431870222092,
425
+ 0.11274542659521103,
426
+ 0.1115327998995781,
427
+ 0.11288487911224365,
428
+ 0.11170242726802826,
429
+ 0.11522012948989868,
430
+ 0.11410203576087952,
431
+ 0.11274243891239166,
432
+ 0.11297151446342468,
433
+ 0.11337342858314514,
434
+ 0.11387932300567627,
435
+ 0.11410986632108688,
436
+ 0.1143156886100769
437
+ ],
438
+ "lr": [
439
+ 3.9200000000000004e-05,
440
+ 7.92e-05,
441
+ 8e-05,
442
+ 8e-05,
443
+ 8e-05,
444
+ 8e-05,
445
+ 8e-05,
446
+ 8e-05,
447
+ 8e-05,
448
+ 8e-05,
449
+ 8e-05,
450
+ 8e-05,
451
+ 8e-05,
452
+ 8e-05,
453
+ 8e-05,
454
+ 8e-05,
455
+ 8e-05,
456
+ 8e-05,
457
+ 8e-05,
458
+ 8e-05,
459
+ 8e-05,
460
+ 8e-05,
461
+ 8e-05,
462
+ 8e-05,
463
+ 8e-05,
464
+ 8e-05,
465
+ 8e-05,
466
+ 8e-05,
467
+ 8e-05,
468
+ 8e-05,
469
+ 8e-05,
470
+ 8e-05,
471
+ 8e-05,
472
+ 8e-05,
473
+ 8e-05,
474
+ 8e-05,
475
+ 7.946710526315791e-05,
476
+ 7.650657894736843e-05,
477
+ 7.354605263157895e-05,
478
+ 7.017105263157896e-05,
479
+ 6.721052631578948e-05,
480
+ 6.425e-05,
481
+ 6.0875e-05,
482
+ 5.791447368421054e-05,
483
+ 5.495394736842105e-05,
484
+ 5.157894736842105e-05,
485
+ 4.861842105263157e-05,
486
+ 4.565789473684212e-05,
487
+ 4.2282894736842104e-05,
488
+ 3.9322368421052625e-05,
489
+ 3.636184210526315e-05,
490
+ 3.2986842105263165e-05,
491
+ 3.0026315789473686e-05,
492
+ 2.7065789473684206e-05,
493
+ 2.3690789473684223e-05,
494
+ 2.0730263157894743e-05,
495
+ 1.7769736842105264e-05,
496
+ 1.4394736842105275e-05,
497
+ 1.1434210526315796e-05,
498
+ 8.473684210526318e-06
499
+ ],
500
+ "emb_lr": [],
501
+ "eval_step": [
502
+ 150,
503
+ 307,
504
+ 464,
505
+ 621,
506
+ 778,
507
+ 935,
508
+ 1092,
509
+ 1199,
510
+ 1356,
511
+ 1513,
512
+ 1670,
513
+ 1827,
514
+ 1984,
515
+ 2141,
516
+ 2298,
517
+ 2455,
518
+ 2612,
519
+ 2769,
520
+ 2926,
521
+ 3083
522
+ ],
523
+ "eval_accuracy": [
524
+ 0.02,
525
+ 0.02,
526
+ 0.02,
527
+ 0.02,
528
+ 0.0,
529
+ 0.0,
530
+ 0.0,
531
+ 0.0,
532
+ 0.0,
533
+ 0.0,
534
+ 0.0,
535
+ 0.0,
536
+ 0.0,
537
+ 0.0,
538
+ 0.0,
539
+ 0.0,
540
+ 0.0,
541
+ 0.0,
542
+ 0.0,
543
+ 0.0
544
+ ]
545
+ },
546
+ "final_accuracy": 0.9919230769230769,
547
+ "sft_eval": {
548
+ "config": {
549
+ "ops": "add_sub",
550
+ "K": null,
551
+ "mode": "sft",
552
+ "n_digits": 6,
553
+ "n_per_split": 100
554
+ },
555
+ "splits": {
556
+ "add_S0": {
557
+ "full_accuracy": 1.0,
558
+ "digit_accuracy": 1.0,
559
+ "n_examples": 100,
560
+ "per_subtask": {
561
+ "SA": {
562
+ "accuracy": 1.0,
563
+ "count": 605
564
+ },
565
+ "SS": {
566
+ "accuracy": 1.0,
567
+ "count": 95
568
+ }
569
+ }
570
+ },
571
+ "add_S1": {
572
+ "full_accuracy": 0.99,
573
+ "digit_accuracy": 0.9985714285714286,
574
+ "n_examples": 100,
575
+ "per_subtask": {
576
+ "SA": {
577
+ "accuracy": 1.0,
578
+ "count": 204
579
+ },
580
+ "SC": {
581
+ "accuracy": 1.0,
582
+ "count": 169
583
+ },
584
+ "SS": {
585
+ "accuracy": 1.0,
586
+ "count": 31
587
+ },
588
+ "UC": {
589
+ "accuracy": 0.9966216216216216,
590
+ "count": 296
591
+ }
592
+ }
593
+ },
594
+ "add_S2": {
595
+ "full_accuracy": 1.0,
596
+ "digit_accuracy": 1.0,
597
+ "n_examples": 100,
598
+ "per_subtask": {
599
+ "SA": {
600
+ "accuracy": 1.0,
601
+ "count": 163
602
+ },
603
+ "SC": {
604
+ "accuracy": 1.0,
605
+ "count": 130
606
+ },
607
+ "SS": {
608
+ "accuracy": 1.0,
609
+ "count": 87
610
+ },
611
+ "UC": {
612
+ "accuracy": 1.0,
613
+ "count": 203
614
+ },
615
+ "US": {
616
+ "accuracy": 1.0,
617
+ "count": 117
618
+ }
619
+ }
620
+ },
621
+ "add_S3": {
622
+ "full_accuracy": 0.99,
623
+ "digit_accuracy": 0.9985714285714286,
624
+ "n_examples": 100,
625
+ "per_subtask": {
626
+ "SA": {
627
+ "accuracy": 1.0,
628
+ "count": 121
629
+ },
630
+ "SC": {
631
+ "accuracy": 1.0,
632
+ "count": 121
633
+ },
634
+ "SS": {
635
+ "accuracy": 1.0,
636
+ "count": 49
637
+ },
638
+ "UC": {
639
+ "accuracy": 0.9946236559139785,
640
+ "count": 186
641
+ },
642
+ "US": {
643
+ "accuracy": 1.0,
644
+ "count": 223
645
+ }
646
+ }
647
+ },
648
+ "add_S4": {
649
+ "full_accuracy": 0.99,
650
+ "digit_accuracy": 0.9985714285714286,
651
+ "n_examples": 100,
652
+ "per_subtask": {
653
+ "SA": {
654
+ "accuracy": 1.0,
655
+ "count": 104
656
+ },
657
+ "SC": {
658
+ "accuracy": 1.0,
659
+ "count": 106
660
+ },
661
+ "SS": {
662
+ "accuracy": 1.0,
663
+ "count": 23
664
+ },
665
+ "UC": {
666
+ "accuracy": 0.99375,
667
+ "count": 160
668
+ },
669
+ "US": {
670
+ "accuracy": 1.0,
671
+ "count": 307
672
+ }
673
+ }
674
+ },
675
+ "add_S5": {
676
+ "full_accuracy": 0.85,
677
+ "digit_accuracy": 0.9785714285714285,
678
+ "n_examples": 100,
679
+ "per_subtask": {
680
+ "SA": {
681
+ "accuracy": 1.0,
682
+ "count": 100
683
+ },
684
+ "SC": {
685
+ "accuracy": 1.0,
686
+ "count": 100
687
+ },
688
+ "UC": {
689
+ "accuracy": 0.85,
690
+ "count": 100
691
+ },
692
+ "US": {
693
+ "accuracy": 1.0,
694
+ "count": 400
695
+ }
696
+ }
697
+ },
698
+ "add_S6": {
699
+ "full_accuracy": 0.95,
700
+ "digit_accuracy": 0.9928571428571429,
701
+ "n_examples": 100,
702
+ "per_subtask": {
703
+ "SC": {
704
+ "accuracy": 1.0,
705
+ "count": 100
706
+ },
707
+ "UC": {
708
+ "accuracy": 0.95,
709
+ "count": 100
710
+ },
711
+ "US": {
712
+ "accuracy": 1.0,
713
+ "count": 500
714
+ }
715
+ }
716
+ },
717
+ "add_random": {
718
+ "full_accuracy": 0.995,
719
+ "digit_accuracy": 0.9992857142857143,
720
+ "n_examples": 200,
721
+ "per_subtask": {
722
+ "SA": {
723
+ "accuracy": 1.0,
724
+ "count": 447
725
+ },
726
+ "SC": {
727
+ "accuracy": 1.0,
728
+ "count": 320
729
+ },
730
+ "SS": {
731
+ "accuracy": 0.9821428571428571,
732
+ "count": 56
733
+ },
734
+ "UC": {
735
+ "accuracy": 1.0,
736
+ "count": 529
737
+ },
738
+ "US": {
739
+ "accuracy": 1.0,
740
+ "count": 48
741
+ }
742
+ }
743
+ },
744
+ "add_C1": {
745
+ "full_accuracy": 1.0,
746
+ "digit_accuracy": 1.0,
747
+ "n_examples": 100,
748
+ "per_subtask": {
749
+ "SA": {
750
+ "accuracy": 1.0,
751
+ "count": 500
752
+ },
753
+ "SC": {
754
+ "accuracy": 1.0,
755
+ "count": 100
756
+ },
757
+ "UC": {
758
+ "accuracy": 1.0,
759
+ "count": 100
760
+ }
761
+ }
762
+ },
763
+ "add_C2": {
764
+ "full_accuracy": 1.0,
765
+ "digit_accuracy": 1.0,
766
+ "n_examples": 100,
767
+ "per_subtask": {
768
+ "SA": {
769
+ "accuracy": 1.0,
770
+ "count": 400
771
+ },
772
+ "SC": {
773
+ "accuracy": 1.0,
774
+ "count": 100
775
+ },
776
+ "UC": {
777
+ "accuracy": 1.0,
778
+ "count": 156
779
+ },
780
+ "US": {
781
+ "accuracy": 1.0,
782
+ "count": 44
783
+ }
784
+ }
785
+ },
786
+ "add_C3": {
787
+ "full_accuracy": 1.0,
788
+ "digit_accuracy": 1.0,
789
+ "n_examples": 100,
790
+ "per_subtask": {
791
+ "SA": {
792
+ "accuracy": 1.0,
793
+ "count": 300
794
+ },
795
+ "SC": {
796
+ "accuracy": 1.0,
797
+ "count": 100
798
+ },
799
+ "UC": {
800
+ "accuracy": 1.0,
801
+ "count": 199
802
+ },
803
+ "US": {
804
+ "accuracy": 1.0,
805
+ "count": 101
806
+ }
807
+ }
808
+ },
809
+ "add_C4": {
810
+ "full_accuracy": 1.0,
811
+ "digit_accuracy": 1.0,
812
+ "n_examples": 100,
813
+ "per_subtask": {
814
+ "SA": {
815
+ "accuracy": 1.0,
816
+ "count": 200
817
+ },
818
+ "SC": {
819
+ "accuracy": 1.0,
820
+ "count": 100
821
+ },
822
+ "UC": {
823
+ "accuracy": 1.0,
824
+ "count": 264
825
+ },
826
+ "US": {
827
+ "accuracy": 1.0,
828
+ "count": 136
829
+ }
830
+ }
831
+ },
832
+ "add_C5": {
833
+ "full_accuracy": 0.99,
834
+ "digit_accuracy": 0.9985714285714286,
835
+ "n_examples": 100,
836
+ "per_subtask": {
837
+ "SA": {
838
+ "accuracy": 1.0,
839
+ "count": 100
840
+ },
841
+ "SC": {
842
+ "accuracy": 1.0,
843
+ "count": 100
844
+ },
845
+ "UC": {
846
+ "accuracy": 0.9967741935483871,
847
+ "count": 310
848
+ },
849
+ "US": {
850
+ "accuracy": 1.0,
851
+ "count": 190
852
+ }
853
+ }
854
+ },
855
+ "add_C6": {
856
+ "full_accuracy": 1.0,
857
+ "digit_accuracy": 1.0,
858
+ "n_examples": 100,
859
+ "per_subtask": {
860
+ "SC": {
861
+ "accuracy": 1.0,
862
+ "count": 100
863
+ },
864
+ "UC": {
865
+ "accuracy": 1.0,
866
+ "count": 370
867
+ },
868
+ "US": {
869
+ "accuracy": 1.0,
870
+ "count": 230
871
+ }
872
+ }
873
+ },
874
+ "sub_M0": {
875
+ "full_accuracy": 1.0,
876
+ "digit_accuracy": 1.0,
877
+ "n_examples": 100,
878
+ "per_subtask": {
879
+ "MD": {
880
+ "accuracy": 1.0,
881
+ "count": 615
882
+ },
883
+ "ME": {
884
+ "accuracy": 1.0,
885
+ "count": 85
886
+ }
887
+ }
888
+ },
889
+ "sub_M1": {
890
+ "full_accuracy": 0.99,
891
+ "digit_accuracy": 0.9985714285714286,
892
+ "n_examples": 100,
893
+ "per_subtask": {
894
+ "MD": {
895
+ "accuracy": 1.0,
896
+ "count": 292
897
+ },
898
+ "MB": {
899
+ "accuracy": 1.0,
900
+ "count": 144
901
+ },
902
+ "ME": {
903
+ "accuracy": 1.0,
904
+ "count": 25
905
+ },
906
+ "UB": {
907
+ "accuracy": 0.99581589958159,
908
+ "count": 239
909
+ }
910
+ }
911
+ },
912
+ "sub_M2": {
913
+ "full_accuracy": 1.0,
914
+ "digit_accuracy": 1.0,
915
+ "n_examples": 100,
916
+ "per_subtask": {
917
+ "MD": {
918
+ "accuracy": 1.0,
919
+ "count": 211
920
+ },
921
+ "MB": {
922
+ "accuracy": 1.0,
923
+ "count": 115
924
+ },
925
+ "ME": {
926
+ "accuracy": 1.0,
927
+ "count": 85
928
+ },
929
+ "UB": {
930
+ "accuracy": 1.0,
931
+ "count": 181
932
+ },
933
+ "UD": {
934
+ "accuracy": 1.0,
935
+ "count": 108
936
+ }
937
+ }
938
+ },
939
+ "sub_M3": {
940
+ "full_accuracy": 0.99,
941
+ "digit_accuracy": 0.9985714285714286,
942
+ "n_examples": 100,
943
+ "per_subtask": {
944
+ "MD": {
945
+ "accuracy": 1.0,
946
+ "count": 179
947
+ },
948
+ "MB": {
949
+ "accuracy": 1.0,
950
+ "count": 103
951
+ },
952
+ "ME": {
953
+ "accuracy": 1.0,
954
+ "count": 56
955
+ },
956
+ "UB": {
957
+ "accuracy": 0.9932885906040269,
958
+ "count": 149
959
+ },
960
+ "UD": {
961
+ "accuracy": 1.0,
962
+ "count": 213
963
+ }
964
+ }
965
+ },
966
+ "sub_M4": {
967
+ "full_accuracy": 0.45,
968
+ "digit_accuracy": 0.9214285714285714,
969
+ "n_examples": 100,
970
+ "per_subtask": {
971
+ "MD": {
972
+ "accuracy": 1.0,
973
+ "count": 200
974
+ },
975
+ "MB": {
976
+ "accuracy": 1.0,
977
+ "count": 100
978
+ },
979
+ "UB": {
980
+ "accuracy": 0.45,
981
+ "count": 100
982
+ },
983
+ "UD": {
984
+ "accuracy": 1.0,
985
+ "count": 300
986
+ }
987
+ }
988
+ },
989
+ "sub_M5": {
990
+ "full_accuracy": 0.56,
991
+ "digit_accuracy": 0.92,
992
+ "n_examples": 100,
993
+ "per_subtask": {
994
+ "MD": {
995
+ "accuracy": 1.0,
996
+ "count": 100
997
+ },
998
+ "MB": {
999
+ "accuracy": 1.0,
1000
+ "count": 100
1001
+ },
1002
+ "UB": {
1003
+ "accuracy": 0.81,
1004
+ "count": 100
1005
+ },
1006
+ "UD": {
1007
+ "accuracy": 0.9075,
1008
+ "count": 400
1009
+ }
1010
+ }
1011
+ },
1012
+ "sub_random": {
1013
+ "full_accuracy": 1.0,
1014
+ "digit_accuracy": 1.0,
1015
+ "n_examples": 200,
1016
+ "per_subtask": {
1017
+ "MD": {
1018
+ "accuracy": 1.0,
1019
+ "count": 600
1020
+ },
1021
+ "MB": {
1022
+ "accuracy": 1.0,
1023
+ "count": 267
1024
+ },
1025
+ "ME": {
1026
+ "accuracy": 1.0,
1027
+ "count": 53
1028
+ },
1029
+ "UB": {
1030
+ "accuracy": 1.0,
1031
+ "count": 439
1032
+ },
1033
+ "UD": {
1034
+ "accuracy": 1.0,
1035
+ "count": 41
1036
+ }
1037
+ }
1038
+ },
1039
+ "sub_B3": {
1040
+ "full_accuracy": 0.99,
1041
+ "digit_accuracy": 0.9985714285714286,
1042
+ "n_examples": 100,
1043
+ "per_subtask": {
1044
+ "MD": {
1045
+ "accuracy": 1.0,
1046
+ "count": 300
1047
+ },
1048
+ "MB": {
1049
+ "accuracy": 1.0,
1050
+ "count": 100
1051
+ },
1052
+ "UB": {
1053
+ "accuracy": 0.9949238578680203,
1054
+ "count": 197
1055
+ },
1056
+ "UD": {
1057
+ "accuracy": 1.0,
1058
+ "count": 103
1059
+ }
1060
+ }
1061
+ },
1062
+ "sub_B4": {
1063
+ "full_accuracy": 0.92,
1064
+ "digit_accuracy": 0.9885714285714285,
1065
+ "n_examples": 100,
1066
+ "per_subtask": {
1067
+ "MD": {
1068
+ "accuracy": 0.995,
1069
+ "count": 200
1070
+ },
1071
+ "MB": {
1072
+ "accuracy": 1.0,
1073
+ "count": 100
1074
+ },
1075
+ "UB": {
1076
+ "accuracy": 0.97165991902834,
1077
+ "count": 247
1078
+ },
1079
+ "UD": {
1080
+ "accuracy": 1.0,
1081
+ "count": 153
1082
+ }
1083
+ }
1084
+ },
1085
+ "sub_B5": {
1086
+ "full_accuracy": 0.95,
1087
+ "digit_accuracy": 0.99,
1088
+ "n_examples": 100,
1089
+ "per_subtask": {
1090
+ "MD": {
1091
+ "accuracy": 1.0,
1092
+ "count": 100
1093
+ },
1094
+ "MB": {
1095
+ "accuracy": 1.0,
1096
+ "count": 100
1097
+ },
1098
+ "UB": {
1099
+ "accuracy": 0.9899328859060402,
1100
+ "count": 298
1101
+ },
1102
+ "UD": {
1103
+ "accuracy": 0.9801980198019802,
1104
+ "count": 202
1105
+ }
1106
+ }
1107
+ }
1108
+ },
1109
+ "summary": {
1110
+ "overall_accuracy": 0.9461538461538461,
1111
+ "digit_accuracy": 0.9914835164835165,
1112
+ "total_examples": 2600,
1113
+ "n_splits": 24
1114
+ }
1115
+ },
1116
+ "sorl_eval": {
1117
+ "config": {
1118
+ "ops": "add_sub",
1119
+ "K": 1,
1120
+ "mode": "sorl",
1121
+ "n_digits": 6,
1122
+ "n_per_split": 100
1123
+ },
1124
+ "splits": {
1125
+ "add_S0": {
1126
+ "full_accuracy": 1.0,
1127
+ "digit_accuracy": 1.0,
1128
+ "n_examples": 100,
1129
+ "per_subtask": {
1130
+ "SA": {
1131
+ "accuracy": 1.0,
1132
+ "count": 605
1133
+ },
1134
+ "SS": {
1135
+ "accuracy": 1.0,
1136
+ "count": 95
1137
+ }
1138
+ }
1139
+ },
1140
+ "add_S1": {
1141
+ "full_accuracy": 1.0,
1142
+ "digit_accuracy": 1.0,
1143
+ "n_examples": 100,
1144
+ "per_subtask": {
1145
+ "SA": {
1146
+ "accuracy": 1.0,
1147
+ "count": 204
1148
+ },
1149
+ "SC": {
1150
+ "accuracy": 1.0,
1151
+ "count": 169
1152
+ },
1153
+ "SS": {
1154
+ "accuracy": 1.0,
1155
+ "count": 31
1156
+ },
1157
+ "UC": {
1158
+ "accuracy": 1.0,
1159
+ "count": 296
1160
+ }
1161
+ }
1162
+ },
1163
+ "add_S2": {
1164
+ "full_accuracy": 1.0,
1165
+ "digit_accuracy": 1.0,
1166
+ "n_examples": 100,
1167
+ "per_subtask": {
1168
+ "SA": {
1169
+ "accuracy": 1.0,
1170
+ "count": 163
1171
+ },
1172
+ "SC": {
1173
+ "accuracy": 1.0,
1174
+ "count": 130
1175
+ },
1176
+ "SS": {
1177
+ "accuracy": 1.0,
1178
+ "count": 87
1179
+ },
1180
+ "UC": {
1181
+ "accuracy": 1.0,
1182
+ "count": 203
1183
+ },
1184
+ "US": {
1185
+ "accuracy": 1.0,
1186
+ "count": 117
1187
+ }
1188
+ }
1189
+ },
1190
+ "add_S3": {
1191
+ "full_accuracy": 0.99,
1192
+ "digit_accuracy": 0.9985714285714286,
1193
+ "n_examples": 100,
1194
+ "per_subtask": {
1195
+ "SA": {
1196
+ "accuracy": 1.0,
1197
+ "count": 121
1198
+ },
1199
+ "SC": {
1200
+ "accuracy": 1.0,
1201
+ "count": 121
1202
+ },
1203
+ "SS": {
1204
+ "accuracy": 1.0,
1205
+ "count": 49
1206
+ },
1207
+ "UC": {
1208
+ "accuracy": 0.9946236559139785,
1209
+ "count": 186
1210
+ },
1211
+ "US": {
1212
+ "accuracy": 1.0,
1213
+ "count": 223
1214
+ }
1215
+ }
1216
+ },
1217
+ "add_S4": {
1218
+ "full_accuracy": 1.0,
1219
+ "digit_accuracy": 1.0,
1220
+ "n_examples": 100,
1221
+ "per_subtask": {
1222
+ "SA": {
1223
+ "accuracy": 1.0,
1224
+ "count": 104
1225
+ },
1226
+ "SC": {
1227
+ "accuracy": 1.0,
1228
+ "count": 106
1229
+ },
1230
+ "SS": {
1231
+ "accuracy": 1.0,
1232
+ "count": 23
1233
+ },
1234
+ "UC": {
1235
+ "accuracy": 1.0,
1236
+ "count": 160
1237
+ },
1238
+ "US": {
1239
+ "accuracy": 1.0,
1240
+ "count": 307
1241
+ }
1242
+ }
1243
+ },
1244
+ "add_S5": {
1245
+ "full_accuracy": 0.96,
1246
+ "digit_accuracy": 0.9942857142857143,
1247
+ "n_examples": 100,
1248
+ "per_subtask": {
1249
+ "SA": {
1250
+ "accuracy": 1.0,
1251
+ "count": 100
1252
+ },
1253
+ "SC": {
1254
+ "accuracy": 1.0,
1255
+ "count": 100
1256
+ },
1257
+ "UC": {
1258
+ "accuracy": 0.96,
1259
+ "count": 100
1260
+ },
1261
+ "US": {
1262
+ "accuracy": 1.0,
1263
+ "count": 400
1264
+ }
1265
+ }
1266
+ },
1267
+ "add_S6": {
1268
+ "full_accuracy": 1.0,
1269
+ "digit_accuracy": 1.0,
1270
+ "n_examples": 100,
1271
+ "per_subtask": {
1272
+ "SC": {
1273
+ "accuracy": 1.0,
1274
+ "count": 100
1275
+ },
1276
+ "UC": {
1277
+ "accuracy": 1.0,
1278
+ "count": 100
1279
+ },
1280
+ "US": {
1281
+ "accuracy": 1.0,
1282
+ "count": 500
1283
+ }
1284
+ }
1285
+ },
1286
+ "add_random": {
1287
+ "full_accuracy": 1.0,
1288
+ "digit_accuracy": 1.0,
1289
+ "n_examples": 200,
1290
+ "per_subtask": {
1291
+ "SA": {
1292
+ "accuracy": 1.0,
1293
+ "count": 447
1294
+ },
1295
+ "SC": {
1296
+ "accuracy": 1.0,
1297
+ "count": 320
1298
+ },
1299
+ "SS": {
1300
+ "accuracy": 1.0,
1301
+ "count": 56
1302
+ },
1303
+ "UC": {
1304
+ "accuracy": 1.0,
1305
+ "count": 529
1306
+ },
1307
+ "US": {
1308
+ "accuracy": 1.0,
1309
+ "count": 48
1310
+ }
1311
+ }
1312
+ },
1313
+ "add_C1": {
1314
+ "full_accuracy": 1.0,
1315
+ "digit_accuracy": 1.0,
1316
+ "n_examples": 100,
1317
+ "per_subtask": {
1318
+ "SA": {
1319
+ "accuracy": 1.0,
1320
+ "count": 500
1321
+ },
1322
+ "SC": {
1323
+ "accuracy": 1.0,
1324
+ "count": 100
1325
+ },
1326
+ "UC": {
1327
+ "accuracy": 1.0,
1328
+ "count": 100
1329
+ }
1330
+ }
1331
+ },
1332
+ "add_C2": {
1333
+ "full_accuracy": 1.0,
1334
+ "digit_accuracy": 1.0,
1335
+ "n_examples": 100,
1336
+ "per_subtask": {
1337
+ "SA": {
1338
+ "accuracy": 1.0,
1339
+ "count": 400
1340
+ },
1341
+ "SC": {
1342
+ "accuracy": 1.0,
1343
+ "count": 100
1344
+ },
1345
+ "UC": {
1346
+ "accuracy": 1.0,
1347
+ "count": 156
1348
+ },
1349
+ "US": {
1350
+ "accuracy": 1.0,
1351
+ "count": 44
1352
+ }
1353
+ }
1354
+ },
1355
+ "add_C3": {
1356
+ "full_accuracy": 1.0,
1357
+ "digit_accuracy": 1.0,
1358
+ "n_examples": 100,
1359
+ "per_subtask": {
1360
+ "SA": {
1361
+ "accuracy": 1.0,
1362
+ "count": 300
1363
+ },
1364
+ "SC": {
1365
+ "accuracy": 1.0,
1366
+ "count": 100
1367
+ },
1368
+ "UC": {
1369
+ "accuracy": 1.0,
1370
+ "count": 199
1371
+ },
1372
+ "US": {
1373
+ "accuracy": 1.0,
1374
+ "count": 101
1375
+ }
1376
+ }
1377
+ },
1378
+ "add_C4": {
1379
+ "full_accuracy": 1.0,
1380
+ "digit_accuracy": 1.0,
1381
+ "n_examples": 100,
1382
+ "per_subtask": {
1383
+ "SA": {
1384
+ "accuracy": 1.0,
1385
+ "count": 200
1386
+ },
1387
+ "SC": {
1388
+ "accuracy": 1.0,
1389
+ "count": 100
1390
+ },
1391
+ "UC": {
1392
+ "accuracy": 1.0,
1393
+ "count": 264
1394
+ },
1395
+ "US": {
1396
+ "accuracy": 1.0,
1397
+ "count": 136
1398
+ }
1399
+ }
1400
+ },
1401
+ "add_C5": {
1402
+ "full_accuracy": 1.0,
1403
+ "digit_accuracy": 1.0,
1404
+ "n_examples": 100,
1405
+ "per_subtask": {
1406
+ "SA": {
1407
+ "accuracy": 1.0,
1408
+ "count": 100
1409
+ },
1410
+ "SC": {
1411
+ "accuracy": 1.0,
1412
+ "count": 100
1413
+ },
1414
+ "UC": {
1415
+ "accuracy": 1.0,
1416
+ "count": 310
1417
+ },
1418
+ "US": {
1419
+ "accuracy": 1.0,
1420
+ "count": 190
1421
+ }
1422
+ }
1423
+ },
1424
+ "add_C6": {
1425
+ "full_accuracy": 1.0,
1426
+ "digit_accuracy": 1.0,
1427
+ "n_examples": 100,
1428
+ "per_subtask": {
1429
+ "SC": {
1430
+ "accuracy": 1.0,
1431
+ "count": 100
1432
+ },
1433
+ "UC": {
1434
+ "accuracy": 1.0,
1435
+ "count": 370
1436
+ },
1437
+ "US": {
1438
+ "accuracy": 1.0,
1439
+ "count": 230
1440
+ }
1441
+ }
1442
+ },
1443
+ "sub_M0": {
1444
+ "full_accuracy": 1.0,
1445
+ "digit_accuracy": 1.0,
1446
+ "n_examples": 100,
1447
+ "per_subtask": {
1448
+ "MD": {
1449
+ "accuracy": 1.0,
1450
+ "count": 615
1451
+ },
1452
+ "ME": {
1453
+ "accuracy": 1.0,
1454
+ "count": 85
1455
+ }
1456
+ }
1457
+ },
1458
+ "sub_M1": {
1459
+ "full_accuracy": 1.0,
1460
+ "digit_accuracy": 1.0,
1461
+ "n_examples": 100,
1462
+ "per_subtask": {
1463
+ "MD": {
1464
+ "accuracy": 1.0,
1465
+ "count": 292
1466
+ },
1467
+ "MB": {
1468
+ "accuracy": 1.0,
1469
+ "count": 144
1470
+ },
1471
+ "ME": {
1472
+ "accuracy": 1.0,
1473
+ "count": 25
1474
+ },
1475
+ "UB": {
1476
+ "accuracy": 1.0,
1477
+ "count": 239
1478
+ }
1479
+ }
1480
+ },
1481
+ "sub_M2": {
1482
+ "full_accuracy": 1.0,
1483
+ "digit_accuracy": 1.0,
1484
+ "n_examples": 100,
1485
+ "per_subtask": {
1486
+ "MD": {
1487
+ "accuracy": 1.0,
1488
+ "count": 211
1489
+ },
1490
+ "MB": {
1491
+ "accuracy": 1.0,
1492
+ "count": 115
1493
+ },
1494
+ "ME": {
1495
+ "accuracy": 1.0,
1496
+ "count": 85
1497
+ },
1498
+ "UB": {
1499
+ "accuracy": 1.0,
1500
+ "count": 181
1501
+ },
1502
+ "UD": {
1503
+ "accuracy": 1.0,
1504
+ "count": 108
1505
+ }
1506
+ }
1507
+ },
1508
+ "sub_M3": {
1509
+ "full_accuracy": 1.0,
1510
+ "digit_accuracy": 1.0,
1511
+ "n_examples": 100,
1512
+ "per_subtask": {
1513
+ "MD": {
1514
+ "accuracy": 1.0,
1515
+ "count": 179
1516
+ },
1517
+ "MB": {
1518
+ "accuracy": 1.0,
1519
+ "count": 103
1520
+ },
1521
+ "ME": {
1522
+ "accuracy": 1.0,
1523
+ "count": 56
1524
+ },
1525
+ "UB": {
1526
+ "accuracy": 1.0,
1527
+ "count": 149
1528
+ },
1529
+ "UD": {
1530
+ "accuracy": 1.0,
1531
+ "count": 213
1532
+ }
1533
+ }
1534
+ },
1535
+ "sub_M4": {
1536
+ "full_accuracy": 1.0,
1537
+ "digit_accuracy": 1.0,
1538
+ "n_examples": 100,
1539
+ "per_subtask": {
1540
+ "MD": {
1541
+ "accuracy": 1.0,
1542
+ "count": 200
1543
+ },
1544
+ "MB": {
1545
+ "accuracy": 1.0,
1546
+ "count": 100
1547
+ },
1548
+ "UB": {
1549
+ "accuracy": 1.0,
1550
+ "count": 100
1551
+ },
1552
+ "UD": {
1553
+ "accuracy": 1.0,
1554
+ "count": 300
1555
+ }
1556
+ }
1557
+ },
1558
+ "sub_M5": {
1559
+ "full_accuracy": 0.87,
1560
+ "digit_accuracy": 0.9814285714285714,
1561
+ "n_examples": 100,
1562
+ "per_subtask": {
1563
+ "MD": {
1564
+ "accuracy": 1.0,
1565
+ "count": 100
1566
+ },
1567
+ "MB": {
1568
+ "accuracy": 1.0,
1569
+ "count": 100
1570
+ },
1571
+ "UB": {
1572
+ "accuracy": 0.87,
1573
+ "count": 100
1574
+ },
1575
+ "UD": {
1576
+ "accuracy": 1.0,
1577
+ "count": 400
1578
+ }
1579
+ }
1580
+ },
1581
+ "sub_random": {
1582
+ "full_accuracy": 0.995,
1583
+ "digit_accuracy": 0.9992857142857143,
1584
+ "n_examples": 200,
1585
+ "per_subtask": {
1586
+ "MD": {
1587
+ "accuracy": 0.9983333333333333,
1588
+ "count": 600
1589
+ },
1590
+ "MB": {
1591
+ "accuracy": 1.0,
1592
+ "count": 267
1593
+ },
1594
+ "ME": {
1595
+ "accuracy": 1.0,
1596
+ "count": 53
1597
+ },
1598
+ "UB": {
1599
+ "accuracy": 1.0,
1600
+ "count": 439
1601
+ },
1602
+ "UD": {
1603
+ "accuracy": 1.0,
1604
+ "count": 41
1605
+ }
1606
+ }
1607
+ },
1608
+ "sub_B3": {
1609
+ "full_accuracy": 1.0,
1610
+ "digit_accuracy": 1.0,
1611
+ "n_examples": 100,
1612
+ "per_subtask": {
1613
+ "MD": {
1614
+ "accuracy": 1.0,
1615
+ "count": 300
1616
+ },
1617
+ "MB": {
1618
+ "accuracy": 1.0,
1619
+ "count": 100
1620
+ },
1621
+ "UB": {
1622
+ "accuracy": 1.0,
1623
+ "count": 197
1624
+ },
1625
+ "UD": {
1626
+ "accuracy": 1.0,
1627
+ "count": 103
1628
+ }
1629
+ }
1630
+ },
1631
+ "sub_B4": {
1632
+ "full_accuracy": 0.99,
1633
+ "digit_accuracy": 0.9985714285714286,
1634
+ "n_examples": 100,
1635
+ "per_subtask": {
1636
+ "MD": {
1637
+ "accuracy": 1.0,
1638
+ "count": 200
1639
+ },
1640
+ "MB": {
1641
+ "accuracy": 1.0,
1642
+ "count": 100
1643
+ },
1644
+ "UB": {
1645
+ "accuracy": 0.9959514170040485,
1646
+ "count": 247
1647
+ },
1648
+ "UD": {
1649
+ "accuracy": 1.0,
1650
+ "count": 153
1651
+ }
1652
+ }
1653
+ },
1654
+ "sub_B5": {
1655
+ "full_accuracy": 0.99,
1656
+ "digit_accuracy": 0.9985714285714286,
1657
+ "n_examples": 100,
1658
+ "per_subtask": {
1659
+ "MD": {
1660
+ "accuracy": 1.0,
1661
+ "count": 100
1662
+ },
1663
+ "MB": {
1664
+ "accuracy": 1.0,
1665
+ "count": 100
1666
+ },
1667
+ "UB": {
1668
+ "accuracy": 0.9966442953020134,
1669
+ "count": 298
1670
+ },
1671
+ "UD": {
1672
+ "accuracy": 1.0,
1673
+ "count": 202
1674
+ }
1675
+ }
1676
+ }
1677
+ },
1678
+ "summary": {
1679
+ "overall_accuracy": 0.9919230769230769,
1680
+ "digit_accuracy": 0.9988461538461538,
1681
+ "total_examples": 2600,
1682
+ "n_splits": 24
1683
+ }
1684
+ },
1685
+ "sorl_overall_accuracy": 0.9919230769230769,
1686
+ "sft_overall_accuracy": 0.9461538461538461
1687
+ }
add_sub_sorl_v1_abs2_K1_10K/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:556319ae59912bb52ef9f1eb1cf60b8fa2147c5b065a949f63161676310ad6ee
3
+ size 650271004
add_sub_sorl_v1_abs2_K1_10K/train_config.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_rollouts": 4,
3
+ "K": 1,
4
+ "max_iterations": 2,
5
+ "memory_span_abs": 1792,
6
+ "memory_span_traj": 1792,
7
+ "temperature": 1.0,
8
+ "ar_search": false,
9
+ "response_only_abs": false,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "alpha_ortho": 0.0,
14
+ "alpha_anchor": 0.0,
15
+ "alpha_jacobi": 0.0,
16
+ "decay": 0.8,
17
+ "target_vocab_util": 0.8,
18
+ "min_abs_ppl": 0.0,
19
+ "zipf_alpha": 1.0,
20
+ "lr": 8e-05,
21
+ "emb_lr_mult": 1.0,
22
+ "weight_decay": 0.01,
23
+ "warmup_steps": 100,
24
+ "cooldown_frac": 0.4,
25
+ "max_grad_norm": 1.0,
26
+ "vq_abs_pretrain_steps": 0,
27
+ "vq_abs_pretrain_lr": 0.001,
28
+ "vq_abs_pretrain_layer": -1,
29
+ "vq_abs_pretrain_batch_size": 256,
30
+ "vq_abs_pretrain_target_vectors": 20000,
31
+ "batch_size": 64,
32
+ "gradient_accumulation_steps": 1,
33
+ "num_epochs": 20,
34
+ "emb_warmup_steps": 0,
35
+ "log_every": 50,
36
+ "eval_every": 156,
37
+ "save_every": 999999,
38
+ "eval_samples": 100,
39
+ "output_dir": "ckpt/sweep/as_sorl_abs2_K1_10K_2L3H510d",
40
+ "eval_K": 4,
41
+ "alpha_traj": 0.0,
42
+ "corrupt_method": "shuffle",
43
+ "corrupt_ratio": 0.3,
44
+ "alpha_contrastive": 1.0,
45
+ "gamma_contrastive": 0.5,
46
+ "alpha_masked_traj": 0.0,
47
+ "mask_nl_ratio": 0.3,
48
+ "mask_nl_mode": "fixed",
49
+ "mask_nl_fixed_id": 0,
50
+ "use_ste": true,
51
+ "n_inner": 1,
52
+ "random_K": null,
53
+ "strip_suffix": null,
54
+ "compress_prefix": null,
55
+ "random_mem_span": null,
56
+ "warmup_ratio": 0.03,
57
+ "beta2": 0.999,
58
+ "seed": 42,
59
+ "n_digits": 6,
60
+ "n_layer": 2,
61
+ "n_head": 3,
62
+ "n_embd": 510,
63
+ "ops": "add_sub",
64
+ "abs_vocab": 2,
65
+ "dataset_size": 10000,
66
+ "mode": "sorl",
67
+ "device": "cuda",
68
+ "push_to_hub": true,
69
+ "no_wandb": false,
70
+ "n_params": 162491102,
71
+ "run_name": "add_sub_sorl_v1_abs2_K1_10K",
72
+ "git_commit": "f835493c19eb98267697007042c9d440cad2afbb",
73
+ "timestamp": "2026-04-16T04:13:31.816099+00:00",
74
+ "tokenizer": "Qwen/Qwen3-0.6B",
75
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
+ "dataset_config": "add_sub_6digit",
77
+ "train_dataset": "fixed_train/train_10K_seed42.pt",
78
+ "model_repo": "thoughtworks/arithmetic-sorl",
79
+ "trainer_version": "v1",
80
+ "wandb_run_id": "0zygf9qm",
81
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/0zygf9qm",
82
+ "eval_final_dataset": "eval_sets/eval_add_sub_6d_N100_seed42.json",
83
+ "eval_epoch_dataset": "eval_sets/eval_add_sub_6d_N25_seed42.json",
84
+ "eval_hf_repo": "thoughtworks/arithmetic-sorl-data",
85
+ "config_hash": "5e90800683ec",
86
+ "final_accuracy": 0.9919230769230769,
87
+ "sft_accuracy": 0.9461538461538461,
88
+ "eval_method": "ArithmeticEvaluator"
89
+ }