amirali1985 commited on
Commit
b6e9080
·
verified ·
1 Parent(s): 52cde0a

Upload add_sub_sorl_v6_abs30_10K

Browse files
add_sub_sorl_v6_abs30_10K/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 128,
20
+ "max_window_layers": 28,
21
+ "model_type": "qwen3",
22
+ "num_attention_heads": 3,
23
+ "num_hidden_layers": 2,
24
+ "num_key_value_heads": 3,
25
+ "pad_token_id": null,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_parameters": {
28
+ "rope_theta": 10000.0,
29
+ "rope_type": "default"
30
+ },
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "transformers_version": "5.5.0",
34
+ "use_cache": true,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151674
37
+ }
add_sub_sorl_v6_abs30_10K/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v6_abs30_10K/metrics.json ADDED
@@ -0,0 +1,1759 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 207,
8
+ 257,
9
+ 307,
10
+ 364,
11
+ 414,
12
+ 464,
13
+ 521,
14
+ 571,
15
+ 621,
16
+ 678,
17
+ 728,
18
+ 778,
19
+ 835,
20
+ 885,
21
+ 935,
22
+ 992,
23
+ 1042,
24
+ 1092,
25
+ 1149,
26
+ 1199,
27
+ 1249,
28
+ 1306,
29
+ 1356,
30
+ 1406,
31
+ 1463,
32
+ 1513,
33
+ 1563,
34
+ 1620,
35
+ 1670,
36
+ 1720,
37
+ 1777,
38
+ 1827,
39
+ 1877,
40
+ 1934,
41
+ 1984,
42
+ 2034,
43
+ 2091,
44
+ 2141,
45
+ 2191,
46
+ 2248,
47
+ 2298,
48
+ 2348,
49
+ 2405,
50
+ 2455,
51
+ 2505,
52
+ 2562,
53
+ 2612,
54
+ 2662,
55
+ 2719,
56
+ 2769,
57
+ 2819,
58
+ 2876,
59
+ 2926,
60
+ 2976,
61
+ 3033,
62
+ 3083,
63
+ 3133
64
+ ],
65
+ "loss": [
66
+ 6.981431007385254,
67
+ 3.4973084926605225,
68
+ 1.926186442375183,
69
+ 1.813720941543579,
70
+ 1.80864417552948,
71
+ 1.7347716093063354,
72
+ 1.663161039352417,
73
+ 1.4270867109298706,
74
+ 1.0012342929840088,
75
+ 0.6832233667373657,
76
+ 0.5323711633682251,
77
+ 0.46830829977989197,
78
+ 0.3200864791870117,
79
+ 0.27280735969543457,
80
+ 0.2319440096616745,
81
+ 0.16084234416484833,
82
+ 0.21135425567626953,
83
+ 0.128024622797966,
84
+ 0.15241578221321106,
85
+ 0.11899806559085846,
86
+ 0.13863572478294373,
87
+ 0.070130355656147,
88
+ 0.08487819135189056,
89
+ 0.059525031596422195,
90
+ 0.056034259498119354,
91
+ 0.04735085740685463,
92
+ 0.07451213896274567,
93
+ 0.03786834329366684,
94
+ 0.02611224539577961,
95
+ 0.049724213778972626,
96
+ 0.06763363629579544,
97
+ 0.04095302149653435,
98
+ 0.018686549738049507,
99
+ 0.029115790501236916,
100
+ 0.027573740109801292,
101
+ 0.027499454095959663,
102
+ 0.032373372465372086,
103
+ 0.045382943004369736,
104
+ 0.019351569935679436,
105
+ 0.022611157968640327,
106
+ 0.02561340294778347,
107
+ 0.04150475934147835,
108
+ 0.01612929441034794,
109
+ 0.011931669898331165,
110
+ 0.02427544631063938,
111
+ 0.009333401918411255,
112
+ 0.02278020977973938,
113
+ 0.018232468515634537,
114
+ 0.018840234726667404,
115
+ 0.028673674911260605,
116
+ 0.014240412041544914,
117
+ 0.01684337481856346,
118
+ 0.022692274302244186,
119
+ 0.02324347384274006,
120
+ 0.00509552750736475,
121
+ 0.014809842221438885,
122
+ 0.023364653810858727,
123
+ 0.010995187796652317,
124
+ 0.01633281260728836,
125
+ 0.009612202644348145
126
+ ],
127
+ "base_loss": [
128
+ 7.035663604736328,
129
+ 3.5270779132843018,
130
+ 1.9827979803085327,
131
+ 1.9955610036849976,
132
+ 2.0564560890197754,
133
+ 2.0580272674560547,
134
+ 2.080122709274292,
135
+ 2.5504348278045654,
136
+ 3.3047873973846436,
137
+ 3.790714979171753,
138
+ 4.123860836029053,
139
+ 4.4167375564575195,
140
+ 5.127987384796143,
141
+ 5.316103458404541,
142
+ 5.7927985191345215,
143
+ 5.74397611618042,
144
+ 6.057636260986328,
145
+ 6.407114505767822,
146
+ 6.399770259857178,
147
+ 6.664841651916504,
148
+ 6.508903503417969,
149
+ 6.839577674865723,
150
+ 6.989323139190674,
151
+ 7.3025078773498535,
152
+ 7.278931617736816,
153
+ 7.524968147277832,
154
+ 7.098843574523926,
155
+ 7.61893367767334,
156
+ 7.271321773529053,
157
+ 7.482024669647217,
158
+ 7.943897724151611,
159
+ 7.200656890869141,
160
+ 7.417972087860107,
161
+ 7.996192932128906,
162
+ 8.153302192687988,
163
+ 8.000033378601074,
164
+ 8.261277198791504,
165
+ 8.076943397521973,
166
+ 8.577622413635254,
167
+ 8.461385726928711,
168
+ 8.614644050598145,
169
+ 8.378890037536621,
170
+ 8.52759838104248,
171
+ 8.765730857849121,
172
+ 8.218759536743164,
173
+ 8.548806190490723,
174
+ 8.11020565032959,
175
+ 8.433223724365234,
176
+ 8.715895652770996,
177
+ 8.812418937683105,
178
+ 8.690309524536133,
179
+ 8.739039421081543,
180
+ 8.839436531066895,
181
+ 8.346253395080566,
182
+ 8.437798500061035,
183
+ 8.580765724182129,
184
+ 8.755929946899414,
185
+ 8.623879432678223,
186
+ 8.465852737426758,
187
+ 8.49194622039795
188
+ ],
189
+ "traj_loss": [
190
+ 6.981431007385254,
191
+ 3.4973084926605225,
192
+ 1.926186442375183,
193
+ 1.813720941543579,
194
+ 1.80864417552948,
195
+ 1.7347716093063354,
196
+ 1.663161039352417,
197
+ 1.4270867109298706,
198
+ 1.0012342929840088,
199
+ 0.6832233667373657,
200
+ 0.5323711633682251,
201
+ 0.46830829977989197,
202
+ 0.3200864791870117,
203
+ 0.27280735969543457,
204
+ 0.2319440096616745,
205
+ 0.16084234416484833,
206
+ 0.21135425567626953,
207
+ 0.128024622797966,
208
+ 0.15241578221321106,
209
+ 0.11899806559085846,
210
+ 0.13863572478294373,
211
+ 0.070130355656147,
212
+ 0.08487819135189056,
213
+ 0.059525031596422195,
214
+ 0.056034259498119354,
215
+ 0.04735085740685463,
216
+ 0.07451213896274567,
217
+ 0.03786834329366684,
218
+ 0.02611224539577961,
219
+ 0.049724213778972626,
220
+ 0.06763363629579544,
221
+ 0.04095302149653435,
222
+ 0.018686549738049507,
223
+ 0.029115790501236916,
224
+ 0.027573740109801292,
225
+ 0.027499454095959663,
226
+ 0.032373372465372086,
227
+ 0.045382943004369736,
228
+ 0.019351569935679436,
229
+ 0.022611157968640327,
230
+ 0.02561340294778347,
231
+ 0.04150475934147835,
232
+ 0.01612929441034794,
233
+ 0.011931669898331165,
234
+ 0.02427544631063938,
235
+ 0.009333401918411255,
236
+ 0.02278020977973938,
237
+ 0.018232468515634537,
238
+ 0.018840234726667404,
239
+ 0.028673674911260605,
240
+ 0.014240412041544914,
241
+ 0.01684337481856346,
242
+ 0.022692274302244186,
243
+ 0.02324347384274006,
244
+ 0.00509552750736475,
245
+ 0.014809842221438885,
246
+ 0.023364653810858727,
247
+ 0.010995187796652317,
248
+ 0.01633281260728836,
249
+ 0.009612202644348145
250
+ ],
251
+ "hinge_loss": [
252
+ 0.0,
253
+ 0.0,
254
+ 0.0,
255
+ 0.0,
256
+ 0.0,
257
+ 0.0,
258
+ 0.0,
259
+ 0.0,
260
+ 0.0,
261
+ 0.0,
262
+ 0.0,
263
+ 0.0,
264
+ 0.0,
265
+ 0.0,
266
+ 0.0,
267
+ 0.0,
268
+ 0.0,
269
+ 0.0,
270
+ 0.0,
271
+ 0.0,
272
+ 0.0,
273
+ 0.0,
274
+ 0.0,
275
+ 0.0,
276
+ 0.0,
277
+ 0.0,
278
+ 0.0,
279
+ 0.0,
280
+ 0.0,
281
+ 0.0,
282
+ 0.0,
283
+ 0.0,
284
+ 0.0,
285
+ 0.0,
286
+ 0.0,
287
+ 0.0,
288
+ 0.0,
289
+ 0.0,
290
+ 0.0,
291
+ 0.0,
292
+ 0.0,
293
+ 0.0,
294
+ 0.0,
295
+ 0.0,
296
+ 0.0,
297
+ 0.0,
298
+ 0.0,
299
+ 0.0,
300
+ 0.0,
301
+ 0.0,
302
+ 0.0,
303
+ 0.0,
304
+ 0.0,
305
+ 0.0,
306
+ 0.0,
307
+ 0.0,
308
+ 0.0,
309
+ 0.0,
310
+ 0.0,
311
+ 0.0
312
+ ],
313
+ "masked_traj_loss": [
314
+ 0.0,
315
+ 0.0,
316
+ 0.0,
317
+ 0.0,
318
+ 0.0,
319
+ 0.0,
320
+ 0.0,
321
+ 0.0,
322
+ 0.0,
323
+ 0.0,
324
+ 0.0,
325
+ 0.0,
326
+ 0.0,
327
+ 0.0,
328
+ 0.0,
329
+ 0.0,
330
+ 0.0,
331
+ 0.0,
332
+ 0.0,
333
+ 0.0,
334
+ 0.0,
335
+ 0.0,
336
+ 0.0,
337
+ 0.0,
338
+ 0.0,
339
+ 0.0,
340
+ 0.0,
341
+ 0.0,
342
+ 0.0,
343
+ 0.0,
344
+ 0.0,
345
+ 0.0,
346
+ 0.0,
347
+ 0.0,
348
+ 0.0,
349
+ 0.0,
350
+ 0.0,
351
+ 0.0,
352
+ 0.0,
353
+ 0.0,
354
+ 0.0,
355
+ 0.0,
356
+ 0.0,
357
+ 0.0,
358
+ 0.0,
359
+ 0.0,
360
+ 0.0,
361
+ 0.0,
362
+ 0.0,
363
+ 0.0,
364
+ 0.0,
365
+ 0.0,
366
+ 0.0,
367
+ 0.0,
368
+ 0.0,
369
+ 0.0,
370
+ 0.0,
371
+ 0.0,
372
+ 0.0,
373
+ 0.0
374
+ ],
375
+ "abs_loss": [
376
+ 0.0,
377
+ 0.0,
378
+ 0.0,
379
+ 0.0,
380
+ 0.0,
381
+ 0.0,
382
+ 0.0,
383
+ 0.0,
384
+ 0.0,
385
+ 0.0,
386
+ 0.0,
387
+ 0.0,
388
+ 0.0,
389
+ 0.0,
390
+ 0.0,
391
+ 0.0,
392
+ 0.0,
393
+ 0.0,
394
+ 0.0,
395
+ 0.0,
396
+ 0.0,
397
+ 0.0,
398
+ 0.0,
399
+ 0.0,
400
+ 0.0,
401
+ 0.0,
402
+ 0.0,
403
+ 0.0,
404
+ 0.0,
405
+ 0.0,
406
+ 0.0,
407
+ 0.0,
408
+ 0.0,
409
+ 0.0,
410
+ 0.0,
411
+ 0.0,
412
+ 0.0,
413
+ 0.0,
414
+ 0.0,
415
+ 0.0,
416
+ 0.0,
417
+ 0.0,
418
+ 0.0,
419
+ 0.0,
420
+ 0.0,
421
+ 0.0,
422
+ 0.0,
423
+ 0.0,
424
+ 0.0,
425
+ 0.0,
426
+ 0.0,
427
+ 0.0,
428
+ 0.0,
429
+ 0.0,
430
+ 0.0,
431
+ 0.0,
432
+ 0.0,
433
+ 0.0,
434
+ 0.0,
435
+ 0.0
436
+ ],
437
+ "zipf_loss": [
438
+ 0.0,
439
+ 0.0,
440
+ 0.0,
441
+ 0.0,
442
+ 0.0,
443
+ 0.0,
444
+ 0.0,
445
+ 0.0,
446
+ 0.0,
447
+ 0.0,
448
+ 0.0,
449
+ 0.0,
450
+ 0.0,
451
+ 0.0,
452
+ 0.0,
453
+ 0.0,
454
+ 0.0,
455
+ 0.0,
456
+ 0.0,
457
+ 0.0,
458
+ 0.0,
459
+ 0.0,
460
+ 0.0,
461
+ 0.0,
462
+ 0.0,
463
+ 0.0,
464
+ 0.0,
465
+ 0.0,
466
+ 0.0,
467
+ 0.0,
468
+ 0.0,
469
+ 0.0,
470
+ 0.0,
471
+ 0.0,
472
+ 0.0,
473
+ 0.0,
474
+ 0.0,
475
+ 0.0,
476
+ 0.0,
477
+ 0.0,
478
+ 0.0,
479
+ 0.0,
480
+ 0.0,
481
+ 0.0,
482
+ 0.0,
483
+ 0.0,
484
+ 0.0,
485
+ 0.0,
486
+ 0.0,
487
+ 0.0,
488
+ 0.0,
489
+ 0.0,
490
+ 0.0,
491
+ 0.0,
492
+ 0.0,
493
+ 0.0,
494
+ 0.0,
495
+ 0.0,
496
+ 0.0,
497
+ 0.0
498
+ ],
499
+ "ortho_loss": [
500
+ 0.0,
501
+ 0.0,
502
+ 0.0,
503
+ 0.0,
504
+ 0.0,
505
+ 0.0,
506
+ 0.0,
507
+ 0.0,
508
+ 0.0,
509
+ 0.0,
510
+ 0.0,
511
+ 0.0,
512
+ 0.0,
513
+ 0.0,
514
+ 0.0,
515
+ 0.0,
516
+ 0.0,
517
+ 0.0,
518
+ 0.0,
519
+ 0.0,
520
+ 0.0,
521
+ 0.0,
522
+ 0.0,
523
+ 0.0,
524
+ 0.0,
525
+ 0.0,
526
+ 0.0,
527
+ 0.0,
528
+ 0.0,
529
+ 0.0,
530
+ 0.0,
531
+ 0.0,
532
+ 0.0,
533
+ 0.0,
534
+ 0.0,
535
+ 0.0,
536
+ 0.0,
537
+ 0.0,
538
+ 0.0,
539
+ 0.0,
540
+ 0.0,
541
+ 0.0,
542
+ 0.0,
543
+ 0.0,
544
+ 0.0,
545
+ 0.0,
546
+ 0.0,
547
+ 0.0,
548
+ 0.0,
549
+ 0.0,
550
+ 0.0,
551
+ 0.0,
552
+ 0.0,
553
+ 0.0,
554
+ 0.0,
555
+ 0.0,
556
+ 0.0,
557
+ 0.0,
558
+ 0.0,
559
+ 0.0
560
+ ],
561
+ "anchor_loss": [
562
+ 0.0,
563
+ 0.0,
564
+ 0.0,
565
+ 0.0,
566
+ 0.0,
567
+ 0.0,
568
+ 0.0,
569
+ 0.0,
570
+ 0.0,
571
+ 0.0,
572
+ 0.0,
573
+ 0.0,
574
+ 0.0,
575
+ 0.0,
576
+ 0.0,
577
+ 0.0,
578
+ 0.0,
579
+ 0.0,
580
+ 0.0,
581
+ 0.0,
582
+ 0.0,
583
+ 0.0,
584
+ 0.0,
585
+ 0.0,
586
+ 0.0,
587
+ 0.0,
588
+ 0.0,
589
+ 0.0,
590
+ 0.0,
591
+ 0.0,
592
+ 0.0,
593
+ 0.0,
594
+ 0.0,
595
+ 0.0,
596
+ 0.0,
597
+ 0.0,
598
+ 0.0,
599
+ 0.0,
600
+ 0.0,
601
+ 0.0,
602
+ 0.0,
603
+ 0.0,
604
+ 0.0,
605
+ 0.0,
606
+ 0.0,
607
+ 0.0,
608
+ 0.0,
609
+ 0.0,
610
+ 0.0,
611
+ 0.0,
612
+ 0.0,
613
+ 0.0,
614
+ 0.0,
615
+ 0.0,
616
+ 0.0,
617
+ 0.0,
618
+ 0.0,
619
+ 0.0,
620
+ 0.0,
621
+ 0.0
622
+ ],
623
+ "jacobi_loss": [
624
+ 0.0,
625
+ 0.0,
626
+ 0.0,
627
+ 0.0,
628
+ 0.0,
629
+ 0.0,
630
+ 0.0,
631
+ 0.0,
632
+ 0.0,
633
+ 0.0,
634
+ 0.0,
635
+ 0.0,
636
+ 0.0,
637
+ 0.0,
638
+ 0.0,
639
+ 0.0,
640
+ 0.0,
641
+ 0.0,
642
+ 0.0,
643
+ 0.0,
644
+ 0.0,
645
+ 0.0,
646
+ 0.0,
647
+ 0.0,
648
+ 0.0,
649
+ 0.0,
650
+ 0.0,
651
+ 0.0,
652
+ 0.0,
653
+ 0.0,
654
+ 0.0,
655
+ 0.0,
656
+ 0.0,
657
+ 0.0,
658
+ 0.0,
659
+ 0.0,
660
+ 0.0,
661
+ 0.0,
662
+ 0.0,
663
+ 0.0,
664
+ 0.0,
665
+ 0.0,
666
+ 0.0,
667
+ 0.0,
668
+ 0.0,
669
+ 0.0,
670
+ 0.0,
671
+ 0.0,
672
+ 0.0,
673
+ 0.0,
674
+ 0.0,
675
+ 0.0,
676
+ 0.0,
677
+ 0.0,
678
+ 0.0,
679
+ 0.0,
680
+ 0.0,
681
+ 0.0,
682
+ 0.0,
683
+ 0.0
684
+ ],
685
+ "lr": [
686
+ 3.9200000000000004e-05,
687
+ 7.92e-05,
688
+ 8e-05,
689
+ 8e-05,
690
+ 8e-05,
691
+ 8e-05,
692
+ 8e-05,
693
+ 8e-05,
694
+ 8e-05,
695
+ 8e-05,
696
+ 8e-05,
697
+ 8e-05,
698
+ 8e-05,
699
+ 8e-05,
700
+ 8e-05,
701
+ 8e-05,
702
+ 8e-05,
703
+ 8e-05,
704
+ 8e-05,
705
+ 8e-05,
706
+ 8e-05,
707
+ 8e-05,
708
+ 8e-05,
709
+ 8e-05,
710
+ 8e-05,
711
+ 8e-05,
712
+ 8e-05,
713
+ 8e-05,
714
+ 8e-05,
715
+ 8e-05,
716
+ 8e-05,
717
+ 8e-05,
718
+ 8e-05,
719
+ 8e-05,
720
+ 8e-05,
721
+ 8e-05,
722
+ 7.946710526315791e-05,
723
+ 7.650657894736843e-05,
724
+ 7.354605263157895e-05,
725
+ 7.017105263157896e-05,
726
+ 6.721052631578948e-05,
727
+ 6.425e-05,
728
+ 6.0875e-05,
729
+ 5.791447368421054e-05,
730
+ 5.495394736842105e-05,
731
+ 5.157894736842105e-05,
732
+ 4.861842105263157e-05,
733
+ 4.565789473684212e-05,
734
+ 4.2282894736842104e-05,
735
+ 3.9322368421052625e-05,
736
+ 3.636184210526315e-05,
737
+ 3.2986842105263165e-05,
738
+ 3.0026315789473686e-05,
739
+ 2.7065789473684206e-05,
740
+ 2.3690789473684223e-05,
741
+ 2.0730263157894743e-05,
742
+ 1.7769736842105264e-05,
743
+ 1.4394736842105275e-05,
744
+ 1.1434210526315796e-05,
745
+ 8.473684210526318e-06
746
+ ]
747
+ },
748
+ "final_accuracy": 0.84625,
749
+ "sft_eval": {
750
+ "config": {
751
+ "ops": "add_sub",
752
+ "K": null,
753
+ "mode": "sft",
754
+ "n_digits": 6,
755
+ "n_per_split": 100
756
+ },
757
+ "splits": {
758
+ "add_S0": {
759
+ "full_accuracy": 0.0,
760
+ "n_examples": 100,
761
+ "per_subtask": {
762
+ "SA": {
763
+ "accuracy": 0.21652892561983472,
764
+ "count": 605
765
+ },
766
+ "SS": {
767
+ "accuracy": 0.09473684210526316,
768
+ "count": 95
769
+ }
770
+ }
771
+ },
772
+ "add_S1": {
773
+ "full_accuracy": 0.0,
774
+ "n_examples": 100,
775
+ "per_subtask": {
776
+ "SA": {
777
+ "accuracy": 0.20098039215686275,
778
+ "count": 204
779
+ },
780
+ "SC": {
781
+ "accuracy": 0.1301775147928994,
782
+ "count": 169
783
+ },
784
+ "SS": {
785
+ "accuracy": 0.0967741935483871,
786
+ "count": 31
787
+ },
788
+ "UC": {
789
+ "accuracy": 0.19256756756756757,
790
+ "count": 296
791
+ }
792
+ }
793
+ },
794
+ "add_S2": {
795
+ "full_accuracy": 0.0,
796
+ "n_examples": 100,
797
+ "per_subtask": {
798
+ "SA": {
799
+ "accuracy": 0.1901840490797546,
800
+ "count": 163
801
+ },
802
+ "SC": {
803
+ "accuracy": 0.1,
804
+ "count": 130
805
+ },
806
+ "SS": {
807
+ "accuracy": 0.08045977011494253,
808
+ "count": 87
809
+ },
810
+ "UC": {
811
+ "accuracy": 0.2315270935960591,
812
+ "count": 203
813
+ },
814
+ "US": {
815
+ "accuracy": 0.09401709401709402,
816
+ "count": 117
817
+ }
818
+ }
819
+ },
820
+ "add_S3": {
821
+ "full_accuracy": 0.0,
822
+ "n_examples": 100,
823
+ "per_subtask": {
824
+ "SA": {
825
+ "accuracy": 0.15702479338842976,
826
+ "count": 121
827
+ },
828
+ "SC": {
829
+ "accuracy": 0.11570247933884298,
830
+ "count": 121
831
+ },
832
+ "SS": {
833
+ "accuracy": 0.12244897959183673,
834
+ "count": 49
835
+ },
836
+ "UC": {
837
+ "accuracy": 0.27956989247311825,
838
+ "count": 186
839
+ },
840
+ "US": {
841
+ "accuracy": 0.09865470852017937,
842
+ "count": 223
843
+ }
844
+ }
845
+ },
846
+ "add_S4": {
847
+ "full_accuracy": 0.0,
848
+ "n_examples": 100,
849
+ "per_subtask": {
850
+ "SA": {
851
+ "accuracy": 0.2692307692307692,
852
+ "count": 104
853
+ },
854
+ "SC": {
855
+ "accuracy": 0.08490566037735849,
856
+ "count": 106
857
+ },
858
+ "SS": {
859
+ "accuracy": 0.21739130434782608,
860
+ "count": 23
861
+ },
862
+ "UC": {
863
+ "accuracy": 0.38125,
864
+ "count": 160
865
+ },
866
+ "US": {
867
+ "accuracy": 0.06188925081433225,
868
+ "count": 307
869
+ }
870
+ }
871
+ },
872
+ "add_S5": {
873
+ "full_accuracy": 0.0,
874
+ "n_examples": 100,
875
+ "per_subtask": {
876
+ "SA": {
877
+ "accuracy": 0.3,
878
+ "count": 100
879
+ },
880
+ "SC": {
881
+ "accuracy": 0.15,
882
+ "count": 100
883
+ },
884
+ "UC": {
885
+ "accuracy": 0.12,
886
+ "count": 100
887
+ },
888
+ "US": {
889
+ "accuracy": 0.105,
890
+ "count": 400
891
+ }
892
+ }
893
+ },
894
+ "add_S6": {
895
+ "full_accuracy": 0.0,
896
+ "n_examples": 100,
897
+ "per_subtask": {
898
+ "SC": {
899
+ "accuracy": 0.09,
900
+ "count": 100
901
+ },
902
+ "UC": {
903
+ "accuracy": 0.66,
904
+ "count": 100
905
+ },
906
+ "US": {
907
+ "accuracy": 0.106,
908
+ "count": 500
909
+ }
910
+ }
911
+ },
912
+ "add_random": {
913
+ "full_accuracy": 0.0,
914
+ "n_examples": 200,
915
+ "per_subtask": {
916
+ "SA": {
917
+ "accuracy": 0.19015659955257272,
918
+ "count": 447
919
+ },
920
+ "SC": {
921
+ "accuracy": 0.096875,
922
+ "count": 320
923
+ },
924
+ "SS": {
925
+ "accuracy": 0.17857142857142858,
926
+ "count": 56
927
+ },
928
+ "UC": {
929
+ "accuracy": 0.2060491493383743,
930
+ "count": 529
931
+ },
932
+ "US": {
933
+ "accuracy": 0.10416666666666667,
934
+ "count": 48
935
+ }
936
+ }
937
+ },
938
+ "add_C3": {
939
+ "full_accuracy": 0.0,
940
+ "n_examples": 100,
941
+ "per_subtask": {
942
+ "SA": {
943
+ "accuracy": 0.25333333333333335,
944
+ "count": 300
945
+ },
946
+ "SC": {
947
+ "accuracy": 0.1,
948
+ "count": 100
949
+ },
950
+ "UC": {
951
+ "accuracy": 0.13989637305699482,
952
+ "count": 193
953
+ },
954
+ "US": {
955
+ "accuracy": 0.19626168224299065,
956
+ "count": 107
957
+ }
958
+ }
959
+ },
960
+ "add_C4": {
961
+ "full_accuracy": 0.0,
962
+ "n_examples": 100,
963
+ "per_subtask": {
964
+ "SA": {
965
+ "accuracy": 0.34,
966
+ "count": 200
967
+ },
968
+ "SC": {
969
+ "accuracy": 0.09,
970
+ "count": 100
971
+ },
972
+ "UC": {
973
+ "accuracy": 0.109375,
974
+ "count": 256
975
+ },
976
+ "US": {
977
+ "accuracy": 0.2013888888888889,
978
+ "count": 144
979
+ }
980
+ }
981
+ },
982
+ "add_C5": {
983
+ "full_accuracy": 0.0,
984
+ "n_examples": 100,
985
+ "per_subtask": {
986
+ "SA": {
987
+ "accuracy": 0.21,
988
+ "count": 100
989
+ },
990
+ "SC": {
991
+ "accuracy": 0.06,
992
+ "count": 100
993
+ },
994
+ "UC": {
995
+ "accuracy": 0.1111111111111111,
996
+ "count": 306
997
+ },
998
+ "US": {
999
+ "accuracy": 0.07216494845360824,
1000
+ "count": 194
1001
+ }
1002
+ }
1003
+ },
1004
+ "add_C6": {
1005
+ "full_accuracy": 0.0,
1006
+ "n_examples": 100,
1007
+ "per_subtask": {
1008
+ "SC": {
1009
+ "accuracy": 0.07,
1010
+ "count": 100
1011
+ },
1012
+ "UC": {
1013
+ "accuracy": 0.24043715846994534,
1014
+ "count": 366
1015
+ },
1016
+ "US": {
1017
+ "accuracy": 0.07692307692307693,
1018
+ "count": 234
1019
+ }
1020
+ }
1021
+ },
1022
+ "sub_M0": {
1023
+ "full_accuracy": 0.0,
1024
+ "n_examples": 100,
1025
+ "per_subtask": {
1026
+ "MD": {
1027
+ "accuracy": 0.14309484193011648,
1028
+ "count": 601
1029
+ },
1030
+ "ME": {
1031
+ "accuracy": 0.16161616161616163,
1032
+ "count": 99
1033
+ }
1034
+ }
1035
+ },
1036
+ "sub_M1": {
1037
+ "full_accuracy": 0.0,
1038
+ "n_examples": 100,
1039
+ "per_subtask": {
1040
+ "MD": {
1041
+ "accuracy": 0.2616487455197133,
1042
+ "count": 279
1043
+ },
1044
+ "MB": {
1045
+ "accuracy": 0.0896551724137931,
1046
+ "count": 145
1047
+ },
1048
+ "ME": {
1049
+ "accuracy": 0.041666666666666664,
1050
+ "count": 24
1051
+ },
1052
+ "UB": {
1053
+ "accuracy": 0.07539682539682539,
1054
+ "count": 252
1055
+ }
1056
+ }
1057
+ },
1058
+ "sub_M2": {
1059
+ "full_accuracy": 0.0,
1060
+ "n_examples": 100,
1061
+ "per_subtask": {
1062
+ "MD": {
1063
+ "accuracy": 0.3568075117370892,
1064
+ "count": 213
1065
+ },
1066
+ "MB": {
1067
+ "accuracy": 0.061946902654867256,
1068
+ "count": 113
1069
+ },
1070
+ "ME": {
1071
+ "accuracy": 0.17647058823529413,
1072
+ "count": 85
1073
+ },
1074
+ "UB": {
1075
+ "accuracy": 0.09944751381215469,
1076
+ "count": 181
1077
+ },
1078
+ "UD": {
1079
+ "accuracy": 0.17592592592592593,
1080
+ "count": 108
1081
+ }
1082
+ }
1083
+ },
1084
+ "sub_M3": {
1085
+ "full_accuracy": 0.0,
1086
+ "n_examples": 100,
1087
+ "per_subtask": {
1088
+ "MD": {
1089
+ "accuracy": 0.37988826815642457,
1090
+ "count": 179
1091
+ },
1092
+ "MB": {
1093
+ "accuracy": 0.11650485436893204,
1094
+ "count": 103
1095
+ },
1096
+ "ME": {
1097
+ "accuracy": 0.10714285714285714,
1098
+ "count": 56
1099
+ },
1100
+ "UB": {
1101
+ "accuracy": 0.087248322147651,
1102
+ "count": 149
1103
+ },
1104
+ "UD": {
1105
+ "accuracy": 0.1643192488262911,
1106
+ "count": 213
1107
+ }
1108
+ }
1109
+ },
1110
+ "sub_M4": {
1111
+ "full_accuracy": 0.0,
1112
+ "n_examples": 100,
1113
+ "per_subtask": {
1114
+ "MD": {
1115
+ "accuracy": 0.42,
1116
+ "count": 200
1117
+ },
1118
+ "MB": {
1119
+ "accuracy": 0.08,
1120
+ "count": 100
1121
+ },
1122
+ "UB": {
1123
+ "accuracy": 0.11,
1124
+ "count": 100
1125
+ },
1126
+ "UD": {
1127
+ "accuracy": 0.15666666666666668,
1128
+ "count": 300
1129
+ }
1130
+ }
1131
+ },
1132
+ "sub_M5": {
1133
+ "full_accuracy": 0.0,
1134
+ "n_examples": 100,
1135
+ "per_subtask": {
1136
+ "MD": {
1137
+ "accuracy": 0.86,
1138
+ "count": 100
1139
+ },
1140
+ "MB": {
1141
+ "accuracy": 0.12,
1142
+ "count": 100
1143
+ },
1144
+ "UB": {
1145
+ "accuracy": 0.11,
1146
+ "count": 100
1147
+ },
1148
+ "UD": {
1149
+ "accuracy": 0.1875,
1150
+ "count": 400
1151
+ }
1152
+ }
1153
+ },
1154
+ "sub_random": {
1155
+ "full_accuracy": 0.0,
1156
+ "n_examples": 200,
1157
+ "per_subtask": {
1158
+ "MD": {
1159
+ "accuracy": 0.23,
1160
+ "count": 600
1161
+ },
1162
+ "MB": {
1163
+ "accuracy": 0.08239700374531835,
1164
+ "count": 267
1165
+ },
1166
+ "ME": {
1167
+ "accuracy": 0.24528301886792453,
1168
+ "count": 53
1169
+ },
1170
+ "UB": {
1171
+ "accuracy": 0.09111617312072894,
1172
+ "count": 439
1173
+ },
1174
+ "UD": {
1175
+ "accuracy": 0.14634146341463414,
1176
+ "count": 41
1177
+ }
1178
+ }
1179
+ },
1180
+ "sub_B3": {
1181
+ "full_accuracy": 0.0,
1182
+ "n_examples": 100,
1183
+ "per_subtask": {
1184
+ "MD": {
1185
+ "accuracy": 0.31,
1186
+ "count": 300
1187
+ },
1188
+ "MB": {
1189
+ "accuracy": 0.05,
1190
+ "count": 100
1191
+ },
1192
+ "UB": {
1193
+ "accuracy": 0.10152284263959391,
1194
+ "count": 197
1195
+ },
1196
+ "UD": {
1197
+ "accuracy": 0.14563106796116504,
1198
+ "count": 103
1199
+ }
1200
+ }
1201
+ },
1202
+ "sub_B4": {
1203
+ "full_accuracy": 0.0,
1204
+ "n_examples": 100,
1205
+ "per_subtask": {
1206
+ "MD": {
1207
+ "accuracy": 0.33,
1208
+ "count": 200
1209
+ },
1210
+ "MB": {
1211
+ "accuracy": 0.1,
1212
+ "count": 100
1213
+ },
1214
+ "UB": {
1215
+ "accuracy": 0.0728744939271255,
1216
+ "count": 247
1217
+ },
1218
+ "UD": {
1219
+ "accuracy": 0.30718954248366015,
1220
+ "count": 153
1221
+ }
1222
+ }
1223
+ },
1224
+ "sub_B5": {
1225
+ "full_accuracy": 0.0,
1226
+ "n_examples": 100,
1227
+ "per_subtask": {
1228
+ "MD": {
1229
+ "accuracy": 0.83,
1230
+ "count": 100
1231
+ },
1232
+ "MB": {
1233
+ "accuracy": 0.07,
1234
+ "count": 100
1235
+ },
1236
+ "UB": {
1237
+ "accuracy": 0.09060402684563758,
1238
+ "count": 298
1239
+ },
1240
+ "UD": {
1241
+ "accuracy": 0.20297029702970298,
1242
+ "count": 202
1243
+ }
1244
+ }
1245
+ }
1246
+ },
1247
+ "summary": {
1248
+ "overall_accuracy": 0.0,
1249
+ "total_examples": 2400,
1250
+ "n_splits": 22
1251
+ }
1252
+ },
1253
+ "sorl_eval": {
1254
+ "config": {
1255
+ "ops": "add_sub",
1256
+ "K": 4,
1257
+ "mode": "sorl",
1258
+ "n_digits": 6,
1259
+ "n_per_split": 100
1260
+ },
1261
+ "splits": {
1262
+ "add_S0": {
1263
+ "full_accuracy": 1.0,
1264
+ "n_examples": 100,
1265
+ "per_subtask": {
1266
+ "SA": {
1267
+ "accuracy": 1.0,
1268
+ "count": 605
1269
+ },
1270
+ "SS": {
1271
+ "accuracy": 1.0,
1272
+ "count": 95
1273
+ }
1274
+ }
1275
+ },
1276
+ "add_S1": {
1277
+ "full_accuracy": 1.0,
1278
+ "n_examples": 100,
1279
+ "per_subtask": {
1280
+ "SA": {
1281
+ "accuracy": 1.0,
1282
+ "count": 204
1283
+ },
1284
+ "SC": {
1285
+ "accuracy": 1.0,
1286
+ "count": 169
1287
+ },
1288
+ "SS": {
1289
+ "accuracy": 1.0,
1290
+ "count": 31
1291
+ },
1292
+ "UC": {
1293
+ "accuracy": 1.0,
1294
+ "count": 296
1295
+ }
1296
+ }
1297
+ },
1298
+ "add_S2": {
1299
+ "full_accuracy": 0.99,
1300
+ "n_examples": 100,
1301
+ "per_subtask": {
1302
+ "SA": {
1303
+ "accuracy": 1.0,
1304
+ "count": 163
1305
+ },
1306
+ "SC": {
1307
+ "accuracy": 0.9923076923076923,
1308
+ "count": 130
1309
+ },
1310
+ "SS": {
1311
+ "accuracy": 1.0,
1312
+ "count": 87
1313
+ },
1314
+ "UC": {
1315
+ "accuracy": 1.0,
1316
+ "count": 203
1317
+ },
1318
+ "US": {
1319
+ "accuracy": 1.0,
1320
+ "count": 117
1321
+ }
1322
+ }
1323
+ },
1324
+ "add_S3": {
1325
+ "full_accuracy": 0.89,
1326
+ "n_examples": 100,
1327
+ "per_subtask": {
1328
+ "SA": {
1329
+ "accuracy": 1.0,
1330
+ "count": 121
1331
+ },
1332
+ "SC": {
1333
+ "accuracy": 1.0,
1334
+ "count": 121
1335
+ },
1336
+ "SS": {
1337
+ "accuracy": 1.0,
1338
+ "count": 49
1339
+ },
1340
+ "UC": {
1341
+ "accuracy": 0.9408602150537635,
1342
+ "count": 186
1343
+ },
1344
+ "US": {
1345
+ "accuracy": 1.0,
1346
+ "count": 223
1347
+ }
1348
+ }
1349
+ },
1350
+ "add_S4": {
1351
+ "full_accuracy": 0.65,
1352
+ "n_examples": 100,
1353
+ "per_subtask": {
1354
+ "SA": {
1355
+ "accuracy": 1.0,
1356
+ "count": 104
1357
+ },
1358
+ "SC": {
1359
+ "accuracy": 1.0,
1360
+ "count": 106
1361
+ },
1362
+ "SS": {
1363
+ "accuracy": 1.0,
1364
+ "count": 23
1365
+ },
1366
+ "UC": {
1367
+ "accuracy": 0.7875,
1368
+ "count": 160
1369
+ },
1370
+ "US": {
1371
+ "accuracy": 0.9804560260586319,
1372
+ "count": 307
1373
+ }
1374
+ }
1375
+ },
1376
+ "add_S5": {
1377
+ "full_accuracy": 0.51,
1378
+ "n_examples": 100,
1379
+ "per_subtask": {
1380
+ "SA": {
1381
+ "accuracy": 1.0,
1382
+ "count": 100
1383
+ },
1384
+ "SC": {
1385
+ "accuracy": 1.0,
1386
+ "count": 100
1387
+ },
1388
+ "UC": {
1389
+ "accuracy": 0.61,
1390
+ "count": 100
1391
+ },
1392
+ "US": {
1393
+ "accuracy": 0.885,
1394
+ "count": 400
1395
+ }
1396
+ }
1397
+ },
1398
+ "add_S6": {
1399
+ "full_accuracy": 0.9,
1400
+ "n_examples": 100,
1401
+ "per_subtask": {
1402
+ "SC": {
1403
+ "accuracy": 1.0,
1404
+ "count": 100
1405
+ },
1406
+ "UC": {
1407
+ "accuracy": 1.0,
1408
+ "count": 100
1409
+ },
1410
+ "US": {
1411
+ "accuracy": 0.98,
1412
+ "count": 500
1413
+ }
1414
+ }
1415
+ },
1416
+ "add_random": {
1417
+ "full_accuracy": 1.0,
1418
+ "n_examples": 200,
1419
+ "per_subtask": {
1420
+ "SA": {
1421
+ "accuracy": 1.0,
1422
+ "count": 447
1423
+ },
1424
+ "SC": {
1425
+ "accuracy": 1.0,
1426
+ "count": 320
1427
+ },
1428
+ "SS": {
1429
+ "accuracy": 1.0,
1430
+ "count": 56
1431
+ },
1432
+ "UC": {
1433
+ "accuracy": 1.0,
1434
+ "count": 529
1435
+ },
1436
+ "US": {
1437
+ "accuracy": 1.0,
1438
+ "count": 48
1439
+ }
1440
+ }
1441
+ },
1442
+ "add_C3": {
1443
+ "full_accuracy": 0.96,
1444
+ "n_examples": 100,
1445
+ "per_subtask": {
1446
+ "SA": {
1447
+ "accuracy": 1.0,
1448
+ "count": 300
1449
+ },
1450
+ "SC": {
1451
+ "accuracy": 1.0,
1452
+ "count": 100
1453
+ },
1454
+ "UC": {
1455
+ "accuracy": 0.9792746113989638,
1456
+ "count": 193
1457
+ },
1458
+ "US": {
1459
+ "accuracy": 1.0,
1460
+ "count": 107
1461
+ }
1462
+ }
1463
+ },
1464
+ "add_C4": {
1465
+ "full_accuracy": 0.9,
1466
+ "n_examples": 100,
1467
+ "per_subtask": {
1468
+ "SA": {
1469
+ "accuracy": 1.0,
1470
+ "count": 200
1471
+ },
1472
+ "SC": {
1473
+ "accuracy": 1.0,
1474
+ "count": 100
1475
+ },
1476
+ "UC": {
1477
+ "accuracy": 0.96484375,
1478
+ "count": 256
1479
+ },
1480
+ "US": {
1481
+ "accuracy": 0.9861111111111112,
1482
+ "count": 144
1483
+ }
1484
+ }
1485
+ },
1486
+ "add_C5": {
1487
+ "full_accuracy": 0.86,
1488
+ "n_examples": 100,
1489
+ "per_subtask": {
1490
+ "SA": {
1491
+ "accuracy": 1.0,
1492
+ "count": 100
1493
+ },
1494
+ "SC": {
1495
+ "accuracy": 1.0,
1496
+ "count": 100
1497
+ },
1498
+ "UC": {
1499
+ "accuracy": 0.9575163398692811,
1500
+ "count": 306
1501
+ },
1502
+ "US": {
1503
+ "accuracy": 0.9845360824742269,
1504
+ "count": 194
1505
+ }
1506
+ }
1507
+ },
1508
+ "add_C6": {
1509
+ "full_accuracy": 0.93,
1510
+ "n_examples": 100,
1511
+ "per_subtask": {
1512
+ "SC": {
1513
+ "accuracy": 1.0,
1514
+ "count": 100
1515
+ },
1516
+ "UC": {
1517
+ "accuracy": 0.9836065573770492,
1518
+ "count": 366
1519
+ },
1520
+ "US": {
1521
+ "accuracy": 0.9957264957264957,
1522
+ "count": 234
1523
+ }
1524
+ }
1525
+ },
1526
+ "sub_M0": {
1527
+ "full_accuracy": 0.98,
1528
+ "n_examples": 100,
1529
+ "per_subtask": {
1530
+ "MD": {
1531
+ "accuracy": 0.9966722129783694,
1532
+ "count": 601
1533
+ },
1534
+ "ME": {
1535
+ "accuracy": 1.0,
1536
+ "count": 99
1537
+ }
1538
+ }
1539
+ },
1540
+ "sub_M1": {
1541
+ "full_accuracy": 1.0,
1542
+ "n_examples": 100,
1543
+ "per_subtask": {
1544
+ "MD": {
1545
+ "accuracy": 1.0,
1546
+ "count": 279
1547
+ },
1548
+ "MB": {
1549
+ "accuracy": 1.0,
1550
+ "count": 145
1551
+ },
1552
+ "ME": {
1553
+ "accuracy": 1.0,
1554
+ "count": 24
1555
+ },
1556
+ "UB": {
1557
+ "accuracy": 1.0,
1558
+ "count": 252
1559
+ }
1560
+ }
1561
+ },
1562
+ "sub_M2": {
1563
+ "full_accuracy": 1.0,
1564
+ "n_examples": 100,
1565
+ "per_subtask": {
1566
+ "MD": {
1567
+ "accuracy": 1.0,
1568
+ "count": 213
1569
+ },
1570
+ "MB": {
1571
+ "accuracy": 1.0,
1572
+ "count": 113
1573
+ },
1574
+ "ME": {
1575
+ "accuracy": 1.0,
1576
+ "count": 85
1577
+ },
1578
+ "UB": {
1579
+ "accuracy": 1.0,
1580
+ "count": 181
1581
+ },
1582
+ "UD": {
1583
+ "accuracy": 1.0,
1584
+ "count": 108
1585
+ }
1586
+ }
1587
+ },
1588
+ "sub_M3": {
1589
+ "full_accuracy": 0.84,
1590
+ "n_examples": 100,
1591
+ "per_subtask": {
1592
+ "MD": {
1593
+ "accuracy": 1.0,
1594
+ "count": 179
1595
+ },
1596
+ "MB": {
1597
+ "accuracy": 1.0,
1598
+ "count": 103
1599
+ },
1600
+ "ME": {
1601
+ "accuracy": 1.0,
1602
+ "count": 56
1603
+ },
1604
+ "UB": {
1605
+ "accuracy": 0.8926174496644296,
1606
+ "count": 149
1607
+ },
1608
+ "UD": {
1609
+ "accuracy": 1.0,
1610
+ "count": 213
1611
+ }
1612
+ }
1613
+ },
1614
+ "sub_M4": {
1615
+ "full_accuracy": 0.23,
1616
+ "n_examples": 100,
1617
+ "per_subtask": {
1618
+ "MD": {
1619
+ "accuracy": 1.0,
1620
+ "count": 200
1621
+ },
1622
+ "MB": {
1623
+ "accuracy": 1.0,
1624
+ "count": 100
1625
+ },
1626
+ "UB": {
1627
+ "accuracy": 0.26,
1628
+ "count": 100
1629
+ },
1630
+ "UD": {
1631
+ "accuracy": 0.9433333333333334,
1632
+ "count": 300
1633
+ }
1634
+ }
1635
+ },
1636
+ "sub_M5": {
1637
+ "full_accuracy": 0.01,
1638
+ "n_examples": 100,
1639
+ "per_subtask": {
1640
+ "MD": {
1641
+ "accuracy": 1.0,
1642
+ "count": 100
1643
+ },
1644
+ "MB": {
1645
+ "accuracy": 1.0,
1646
+ "count": 100
1647
+ },
1648
+ "UB": {
1649
+ "accuracy": 0.08,
1650
+ "count": 100
1651
+ },
1652
+ "UD": {
1653
+ "accuracy": 0.765,
1654
+ "count": 400
1655
+ }
1656
+ }
1657
+ },
1658
+ "sub_random": {
1659
+ "full_accuracy": 1.0,
1660
+ "n_examples": 200,
1661
+ "per_subtask": {
1662
+ "MD": {
1663
+ "accuracy": 1.0,
1664
+ "count": 600
1665
+ },
1666
+ "MB": {
1667
+ "accuracy": 1.0,
1668
+ "count": 267
1669
+ },
1670
+ "ME": {
1671
+ "accuracy": 1.0,
1672
+ "count": 53
1673
+ },
1674
+ "UB": {
1675
+ "accuracy": 1.0,
1676
+ "count": 439
1677
+ },
1678
+ "UD": {
1679
+ "accuracy": 1.0,
1680
+ "count": 41
1681
+ }
1682
+ }
1683
+ },
1684
+ "sub_B3": {
1685
+ "full_accuracy": 0.99,
1686
+ "n_examples": 100,
1687
+ "per_subtask": {
1688
+ "MD": {
1689
+ "accuracy": 1.0,
1690
+ "count": 300
1691
+ },
1692
+ "MB": {
1693
+ "accuracy": 1.0,
1694
+ "count": 100
1695
+ },
1696
+ "UB": {
1697
+ "accuracy": 0.9949238578680203,
1698
+ "count": 197
1699
+ },
1700
+ "UD": {
1701
+ "accuracy": 1.0,
1702
+ "count": 103
1703
+ }
1704
+ }
1705
+ },
1706
+ "sub_B4": {
1707
+ "full_accuracy": 0.83,
1708
+ "n_examples": 100,
1709
+ "per_subtask": {
1710
+ "MD": {
1711
+ "accuracy": 1.0,
1712
+ "count": 200
1713
+ },
1714
+ "MB": {
1715
+ "accuracy": 1.0,
1716
+ "count": 100
1717
+ },
1718
+ "UB": {
1719
+ "accuracy": 0.9352226720647774,
1720
+ "count": 247
1721
+ },
1722
+ "UD": {
1723
+ "accuracy": 0.9673202614379085,
1724
+ "count": 153
1725
+ }
1726
+ }
1727
+ },
1728
+ "sub_B5": {
1729
+ "full_accuracy": 0.84,
1730
+ "n_examples": 100,
1731
+ "per_subtask": {
1732
+ "MD": {
1733
+ "accuracy": 1.0,
1734
+ "count": 100
1735
+ },
1736
+ "MB": {
1737
+ "accuracy": 1.0,
1738
+ "count": 100
1739
+ },
1740
+ "UB": {
1741
+ "accuracy": 0.9563758389261745,
1742
+ "count": 298
1743
+ },
1744
+ "UD": {
1745
+ "accuracy": 0.9504950495049505,
1746
+ "count": 202
1747
+ }
1748
+ }
1749
+ }
1750
+ },
1751
+ "summary": {
1752
+ "overall_accuracy": 0.84625,
1753
+ "total_examples": 2400,
1754
+ "n_splits": 22
1755
+ }
1756
+ },
1757
+ "sorl_overall_accuracy": 0.84625,
1758
+ "sft_overall_accuracy": 0.0
1759
+ }
add_sub_sorl_v6_abs30_10K/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c50346ad6978378c5479883daf3e64e8489a3a0f7e9ec5986a671a1e20fffbb9
3
+ size 650385300
add_sub_sorl_v6_abs30_10K/train_config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_rollouts": 4,
3
+ "K": 4,
4
+ "max_iterations": 2,
5
+ "memory_span_abs": 1792,
6
+ "memory_span_traj": 1792,
7
+ "temperature": 1.0,
8
+ "ar_search": false,
9
+ "response_only_abs": false,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "alpha_ortho": 0.0,
14
+ "alpha_anchor": 0.0,
15
+ "alpha_jacobi": 0.0,
16
+ "decay": 0.8,
17
+ "target_vocab_util": 0.8,
18
+ "min_abs_ppl": 0.0,
19
+ "zipf_alpha": 1.0,
20
+ "lr": 8e-05,
21
+ "emb_lr_mult": 1.0,
22
+ "weight_decay": 0.01,
23
+ "warmup_steps": 100,
24
+ "cooldown_frac": 0.4,
25
+ "max_grad_norm": 1.0,
26
+ "vq_abs_pretrain_steps": 0,
27
+ "vq_abs_pretrain_lr": 0.001,
28
+ "vq_abs_pretrain_layer": -1,
29
+ "vq_abs_pretrain_batch_size": 256,
30
+ "vq_abs_pretrain_target_vectors": 20000,
31
+ "batch_size": 64,
32
+ "gradient_accumulation_steps": 1,
33
+ "num_epochs": 20,
34
+ "emb_warmup_steps": 0,
35
+ "log_every": 50,
36
+ "eval_every": 156,
37
+ "save_every": 999999,
38
+ "eval_samples": 100,
39
+ "output_dir": "ckpt/sweep/as_sorl_v6_abs30_K4_10K",
40
+ "eval_K": 4,
41
+ "alpha_traj": 0.0,
42
+ "corrupt_method": "shuffle",
43
+ "corrupt_ratio": 0.3,
44
+ "alpha_contrastive": 1.0,
45
+ "gamma_contrastive": 0.5,
46
+ "alpha_masked_traj": 0.0,
47
+ "mask_nl_ratio": 0.3,
48
+ "mask_nl_mode": "fixed",
49
+ "mask_nl_fixed_id": 0,
50
+ "use_ste": true,
51
+ "n_inner": 1,
52
+ "random_K": null,
53
+ "strip_suffix": null,
54
+ "compress_prefix": null,
55
+ "random_mem_span": null,
56
+ "warmup_ratio": 0.03,
57
+ "beta2": 0.999,
58
+ "seed": 42,
59
+ "n_digits": 6,
60
+ "n_layer": 2,
61
+ "n_head": 3,
62
+ "n_embd": 510,
63
+ "ops": "add_sub",
64
+ "abs_vocab": 30,
65
+ "dataset_size": 10000,
66
+ "mode": "sorl_v6",
67
+ "device": "cuda",
68
+ "push_to_hub": true,
69
+ "no_wandb": false,
70
+ "n_params": 162519662,
71
+ "run_name": "add_sub_sorl_v6_abs30_10K",
72
+ "git_commit": "17e935f460a7f9595b705c1d614101a6b0e520f7",
73
+ "timestamp": "2026-04-14T10:04:29.277632+00:00",
74
+ "tokenizer": "Qwen/Qwen3-0.6B",
75
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
+ "dataset_config": "add_sub_6digit",
77
+ "model_repo": "thoughtworks/arithmetic-sorl",
78
+ "trainer_version": "v6",
79
+ "wandb_run_id": "f9fw4ljx",
80
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/f9fw4ljx",
81
+ "final_accuracy": 0.84625,
82
+ "sft_accuracy": 0.0,
83
+ "eval_method": "ArithmeticEvaluator"
84
+ }