amirali1985 commited on
Commit
177d7d5
·
verified ·
1 Parent(s): 0cb851b

Upload add_sub_sorl_v1_abs10_50K

Browse files
add_sub_sorl_v1_abs10_50K/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 128,
20
+ "max_window_layers": 28,
21
+ "model_type": "qwen3",
22
+ "num_attention_heads": 3,
23
+ "num_hidden_layers": 2,
24
+ "num_key_value_heads": 3,
25
+ "pad_token_id": null,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_parameters": {
28
+ "rope_theta": 10000.0,
29
+ "rope_type": "default"
30
+ },
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "transformers_version": "5.5.0",
34
+ "use_cache": true,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151654
37
+ }
add_sub_sorl_v1_abs10_50K/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs10_50K/metrics.json ADDED
@@ -0,0 +1,2257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 200,
8
+ 250,
9
+ 300,
10
+ 350,
11
+ 400,
12
+ 450,
13
+ 500,
14
+ 550,
15
+ 600,
16
+ 650,
17
+ 700,
18
+ 750,
19
+ 832,
20
+ 882,
21
+ 932,
22
+ 982,
23
+ 1032,
24
+ 1082,
25
+ 1132,
26
+ 1182,
27
+ 1232,
28
+ 1282,
29
+ 1332,
30
+ 1382,
31
+ 1432,
32
+ 1482,
33
+ 1532,
34
+ 1614,
35
+ 1664,
36
+ 1714,
37
+ 1764,
38
+ 1814,
39
+ 1864,
40
+ 1914,
41
+ 1964,
42
+ 2014,
43
+ 2064,
44
+ 2114,
45
+ 2164,
46
+ 2214,
47
+ 2264,
48
+ 2314,
49
+ 2396,
50
+ 2446,
51
+ 2496,
52
+ 2546,
53
+ 2596,
54
+ 2646,
55
+ 2696,
56
+ 2746,
57
+ 2796,
58
+ 2846,
59
+ 2896,
60
+ 2946,
61
+ 2996,
62
+ 3046,
63
+ 3096,
64
+ 3178,
65
+ 3228,
66
+ 3278,
67
+ 3328,
68
+ 3378,
69
+ 3428,
70
+ 3478,
71
+ 3528,
72
+ 3578,
73
+ 3628,
74
+ 3678,
75
+ 3728,
76
+ 3778,
77
+ 3828,
78
+ 3878,
79
+ 3960,
80
+ 4010,
81
+ 4060,
82
+ 4110,
83
+ 4160,
84
+ 4210,
85
+ 4260,
86
+ 4310,
87
+ 4360,
88
+ 4410,
89
+ 4460,
90
+ 4510,
91
+ 4560,
92
+ 4610,
93
+ 4660,
94
+ 4742,
95
+ 4792,
96
+ 4842,
97
+ 4892,
98
+ 4942,
99
+ 4992,
100
+ 5042,
101
+ 5092,
102
+ 5142,
103
+ 5192,
104
+ 5242,
105
+ 5292,
106
+ 5342,
107
+ 5392,
108
+ 5442,
109
+ 5524,
110
+ 5574,
111
+ 5624,
112
+ 5674,
113
+ 5724,
114
+ 5774,
115
+ 5824,
116
+ 5874,
117
+ 5924,
118
+ 5974,
119
+ 6024,
120
+ 6074,
121
+ 6124,
122
+ 6174,
123
+ 6224,
124
+ 6306,
125
+ 6356,
126
+ 6406,
127
+ 6456,
128
+ 6506,
129
+ 6556,
130
+ 6606,
131
+ 6656,
132
+ 6706,
133
+ 6756,
134
+ 6806,
135
+ 6856,
136
+ 6906,
137
+ 6956,
138
+ 7006,
139
+ 7088,
140
+ 7138,
141
+ 7188,
142
+ 7238,
143
+ 7288,
144
+ 7338,
145
+ 7388,
146
+ 7438,
147
+ 7488,
148
+ 7538,
149
+ 7588,
150
+ 7638,
151
+ 7688,
152
+ 7738,
153
+ 7788
154
+ ],
155
+ "loss": [
156
+ 16.378482818603516,
157
+ 11.21066951751709,
158
+ 7.495655059814453,
159
+ 4.388947486877441,
160
+ 3.196079730987549,
161
+ 2.9686319828033447,
162
+ 2.8419575691223145,
163
+ 2.5052895545959473,
164
+ 2.5802419185638428,
165
+ 2.3319308757781982,
166
+ 2.3821346759796143,
167
+ 2.090925693511963,
168
+ 2.018442392349243,
169
+ 1.0070109367370605,
170
+ 1.1892414093017578,
171
+ 1.2099348306655884,
172
+ 1.1913542747497559,
173
+ 1.5283111333847046,
174
+ 1.493798017501831,
175
+ 1.1172080039978027,
176
+ 1.102172613143921,
177
+ 0.8972475528717041,
178
+ 0.4773584008216858,
179
+ -0.12886226177215576,
180
+ -0.2539653778076172,
181
+ -0.7412328124046326,
182
+ -0.963010311126709,
183
+ -1.3086572885513306,
184
+ -2.896714448928833,
185
+ -4.599146366119385,
186
+ -7.537700176239014,
187
+ -9.402212142944336,
188
+ -9.60422134399414,
189
+ -10.469030380249023,
190
+ -10.379827499389648,
191
+ -10.23600959777832,
192
+ -11.531538009643555,
193
+ -11.200029373168945,
194
+ -10.917357444763184,
195
+ -12.113883972167969,
196
+ -11.413084983825684,
197
+ -11.835735321044922,
198
+ -12.362313270568848,
199
+ -12.001949310302734,
200
+ -11.52999210357666,
201
+ -12.262264251708984,
202
+ -12.913427352905273,
203
+ -12.869568824768066,
204
+ -12.031846046447754,
205
+ -12.037793159484863,
206
+ -12.129172325134277,
207
+ -11.788479804992676,
208
+ -11.700554847717285,
209
+ -13.019804954528809,
210
+ -12.643450736999512,
211
+ -12.31265926361084,
212
+ -12.17251968383789,
213
+ -12.044828414916992,
214
+ -12.354959487915039,
215
+ -13.130356788635254,
216
+ -12.746546745300293,
217
+ -12.852694511413574,
218
+ -12.775166511535645,
219
+ -13.146623611450195,
220
+ -13.022551536560059,
221
+ -13.103012084960938,
222
+ -12.287353515625,
223
+ -12.436395645141602,
224
+ -12.934889793395996,
225
+ -13.008482933044434,
226
+ -12.999844551086426,
227
+ -12.155572891235352,
228
+ -12.309308052062988,
229
+ -12.694921493530273,
230
+ -12.600488662719727,
231
+ -11.684075355529785,
232
+ -13.753292083740234,
233
+ -12.823161125183105,
234
+ -12.862237930297852,
235
+ -12.892120361328125,
236
+ -13.087822914123535,
237
+ -13.00316047668457,
238
+ -12.494159698486328,
239
+ -13.454633712768555,
240
+ -12.264759063720703,
241
+ -11.953563690185547,
242
+ -13.270965576171875,
243
+ -13.304547309875488,
244
+ -13.360864639282227,
245
+ -12.359524726867676,
246
+ -12.799259185791016,
247
+ -12.921852111816406,
248
+ -12.180639266967773,
249
+ -12.332677841186523,
250
+ -11.831616401672363,
251
+ -12.805928230285645,
252
+ -12.48134994506836,
253
+ -12.441422462463379,
254
+ -12.271108627319336,
255
+ -12.153956413269043,
256
+ -12.179333686828613,
257
+ -12.23478889465332,
258
+ -11.327901840209961,
259
+ -12.195158004760742,
260
+ -12.080638885498047,
261
+ -11.152657508850098,
262
+ -11.550673484802246,
263
+ -10.325593948364258,
264
+ -9.875710487365723,
265
+ -8.455825805664062,
266
+ -8.968307495117188,
267
+ -8.065058708190918,
268
+ -6.983817100524902,
269
+ -7.070670127868652,
270
+ -6.46987771987915,
271
+ -6.365849018096924,
272
+ -6.478631496429443,
273
+ -5.721439838409424,
274
+ -4.883893013000488,
275
+ -5.037868022918701,
276
+ -4.996427059173584,
277
+ -5.036664009094238,
278
+ -5.050143718719482,
279
+ -4.510994911193848,
280
+ -4.2226057052612305,
281
+ -5.094910144805908,
282
+ -4.5359344482421875,
283
+ -4.703138828277588,
284
+ -4.337795257568359,
285
+ -3.8652913570404053,
286
+ -3.8839306831359863,
287
+ -3.3045520782470703,
288
+ -4.1146416664123535,
289
+ -3.769718647003174,
290
+ -3.929957628250122,
291
+ -3.768799066543579,
292
+ -3.972208261489868,
293
+ -3.634816884994507,
294
+ -3.598145008087158,
295
+ -3.5497827529907227,
296
+ -4.0864362716674805,
297
+ -3.5880346298217773,
298
+ -4.015559673309326,
299
+ -3.432640790939331,
300
+ -3.608142614364624,
301
+ -3.474095106124878,
302
+ -3.7869486808776855,
303
+ -3.5873405933380127,
304
+ -3.4235167503356934,
305
+ -3.184415578842163
306
+ ],
307
+ "base_loss": [
308
+ 9.474967002868652,
309
+ 7.123448371887207,
310
+ 6.168188571929932,
311
+ 4.6793622970581055,
312
+ 2.6044986248016357,
313
+ 2.013458728790283,
314
+ 1.9103567600250244,
315
+ 2.0000832080841064,
316
+ 1.8899085521697998,
317
+ 1.8412806987762451,
318
+ 1.8483163118362427,
319
+ 1.8022738695144653,
320
+ 1.847564458847046,
321
+ 1.8393151760101318,
322
+ 1.850179672241211,
323
+ 1.8068408966064453,
324
+ 1.824257731437683,
325
+ 1.7777620553970337,
326
+ 1.8727468252182007,
327
+ 1.7854875326156616,
328
+ 1.8274955749511719,
329
+ 1.8095120191574097,
330
+ 1.8368505239486694,
331
+ 1.684551477432251,
332
+ 1.8621140718460083,
333
+ 1.7836496829986572,
334
+ 1.767014741897583,
335
+ 1.770763635635376,
336
+ 1.8025929927825928,
337
+ 1.887298345565796,
338
+ 1.8839874267578125,
339
+ 1.8471672534942627,
340
+ 1.812026858329773,
341
+ 1.891275405883789,
342
+ 1.7659833431243896,
343
+ 1.7723257541656494,
344
+ 1.8089889287948608,
345
+ 1.76226806640625,
346
+ 1.7039188146591187,
347
+ 1.7891939878463745,
348
+ 1.719329833984375,
349
+ 1.7772554159164429,
350
+ 1.7795614004135132,
351
+ 1.6842670440673828,
352
+ 1.6844215393066406,
353
+ 1.7464462518692017,
354
+ 1.7909437417984009,
355
+ 1.7481502294540405,
356
+ 1.661051869392395,
357
+ 1.6622161865234375,
358
+ 1.6147782802581787,
359
+ 1.6070369482040405,
360
+ 1.657061219215393,
361
+ 1.714040756225586,
362
+ 1.6753875017166138,
363
+ 1.6686259508132935,
364
+ 1.6674834489822388,
365
+ 1.664836049079895,
366
+ 1.6701329946517944,
367
+ 1.7454251050949097,
368
+ 1.6487113237380981,
369
+ 1.6333528757095337,
370
+ 1.657004714012146,
371
+ 1.6842231750488281,
372
+ 1.6459062099456787,
373
+ 1.638092279434204,
374
+ 1.6258095502853394,
375
+ 1.6174780130386353,
376
+ 1.6530956029891968,
377
+ 1.6498746871948242,
378
+ 1.6689900159835815,
379
+ 1.57684326171875,
380
+ 1.6067333221435547,
381
+ 1.6402298212051392,
382
+ 1.5982598066329956,
383
+ 1.5155116319656372,
384
+ 1.692285180091858,
385
+ 1.6260746717453003,
386
+ 1.6221132278442383,
387
+ 1.6246081590652466,
388
+ 1.6117967367172241,
389
+ 1.6194757223129272,
390
+ 1.598102331161499,
391
+ 1.6332045793533325,
392
+ 1.5825088024139404,
393
+ 1.5107394456863403,
394
+ 1.602547526359558,
395
+ 1.6012418270111084,
396
+ 1.6358078718185425,
397
+ 1.532452940940857,
398
+ 1.5894438028335571,
399
+ 1.553748369216919,
400
+ 1.488343596458435,
401
+ 1.5165965557098389,
402
+ 1.4477319717407227,
403
+ 1.536694884300232,
404
+ 1.5032269954681396,
405
+ 1.5219676494598389,
406
+ 1.5278981924057007,
407
+ 1.496490716934204,
408
+ 1.4675737619400024,
409
+ 1.4852674007415771,
410
+ 1.3362356424331665,
411
+ 1.4154537916183472,
412
+ 1.444419026374817,
413
+ 1.3438326120376587,
414
+ 1.3477458953857422,
415
+ 1.24502432346344,
416
+ 1.1895085573196411,
417
+ 1.0301271677017212,
418
+ 1.0721133947372437,
419
+ 0.9904470443725586,
420
+ 0.8669577836990356,
421
+ 0.8616147637367249,
422
+ 0.8164688944816589,
423
+ 0.7701811194419861,
424
+ 0.7990650534629822,
425
+ 0.7189332246780396,
426
+ 0.6271408796310425,
427
+ 0.658858597278595,
428
+ 0.6362829208374023,
429
+ 0.6195059418678284,
430
+ 0.6116113662719727,
431
+ 0.5777509808540344,
432
+ 0.5383127331733704,
433
+ 0.6213598251342773,
434
+ 0.5587041974067688,
435
+ 0.5645622611045837,
436
+ 0.5378245115280151,
437
+ 0.5126824378967285,
438
+ 0.48708105087280273,
439
+ 0.45352664589881897,
440
+ 0.4964592754840851,
441
+ 0.4707101881504059,
442
+ 0.4800238609313965,
443
+ 0.48176077008247375,
444
+ 0.49011462926864624,
445
+ 0.4257812798023224,
446
+ 0.4405454993247986,
447
+ 0.45625683665275574,
448
+ 0.5016933083534241,
449
+ 0.4303135275840759,
450
+ 0.46569007635116577,
451
+ 0.4130376875400543,
452
+ 0.4513709545135498,
453
+ 0.42867225408554077,
454
+ 0.44723838567733765,
455
+ 0.4232305884361267,
456
+ 0.41526561975479126,
457
+ 0.3808412551879883
458
+ ],
459
+ "info_loss": [
460
+ -0.20977783203125,
461
+ -0.2442469596862793,
462
+ -0.26015424728393555,
463
+ -0.29062795639038086,
464
+ -0.148298978805542,
465
+ -0.09887826442718506,
466
+ -0.09743881225585938,
467
+ -0.13865768909454346,
468
+ -0.11879169940948486,
469
+ -0.13874804973602295,
470
+ -0.13430380821228027,
471
+ -0.15891695022583008,
472
+ -0.17007505893707275,
473
+ -0.27056288719177246,
474
+ -0.2535163164138794,
475
+ -0.24606919288635254,
476
+ -0.2501150369644165,
477
+ -0.21087312698364258,
478
+ -0.22212731838226318,
479
+ -0.2465611696243286,
480
+ -0.24265611171722412,
481
+ -0.23396706581115723,
482
+ -0.21633541584014893,
483
+ -0.22790491580963135,
484
+ -0.23808276653289795,
485
+ -0.27501964569091797,
486
+ -0.2901705503463745,
487
+ -0.32492220401763916,
488
+ -0.4848982095718384,
489
+ -0.6613947153091431,
490
+ -0.9592947363853455,
491
+ -1.1417292356491089,
492
+ -1.1531314849853516,
493
+ -1.2465033531188965,
494
+ -1.2249419689178467,
495
+ -1.2093983888626099,
496
+ -1.342968225479126,
497
+ -1.3036762475967407,
498
+ -1.2695143222808838,
499
+ -1.3967969417572021,
500
+ -1.3202941417694092,
501
+ -1.3677679300308228,
502
+ -1.4199516773223877,
503
+ -1.3747591972351074,
504
+ -1.327619194984436,
505
+ -1.4063856601715088,
506
+ -1.4757684469223022,
507
+ -1.4670329093933105,
508
+ -1.3743075132369995,
509
+ -1.3745038509368896,
510
+ -1.3793561458587646,
511
+ -1.3434416055679321,
512
+ -1.3401577472686768,
513
+ -1.4780397415161133,
514
+ -1.4364835023880005,
515
+ -1.403468132019043,
516
+ -1.3874809741973877,
517
+ -1.3744466304779053,
518
+ -1.406492829322815,
519
+ -1.4912915229797363,
520
+ -1.4426169395446777,
521
+ -1.4530000686645508,
522
+ -1.4468318223953247,
523
+ -1.4860813617706299,
524
+ -1.470852017402649,
525
+ -1.4778376817703247,
526
+ -1.3942036628723145,
527
+ -1.4087891578674316,
528
+ -1.461976408958435,
529
+ -1.4693336486816406,
530
+ -1.470320701599121,
531
+ -1.376815676689148,
532
+ -1.3944391012191772,
533
+ -1.4354804754257202,
534
+ -1.4221135377883911,
535
+ -1.3215875625610352,
536
+ -1.5468448400497437,
537
+ -1.4476895332336426,
538
+ -1.453835368156433,
539
+ -1.4539093971252441,
540
+ -1.4728425741195679,
541
+ -1.4641965627670288,
542
+ -1.4120572805404663,
543
+ -1.5112266540527344,
544
+ -1.3862770795822144,
545
+ -1.348540186882019,
546
+ -1.4900604486465454,
547
+ -1.4927648305892944,
548
+ -1.5019117593765259,
549
+ -1.3912949562072754,
550
+ -1.4404125213623047,
551
+ -1.4496182203292847,
552
+ -1.3697378635406494,
553
+ -1.3870716094970703,
554
+ -1.3299174308776855,
555
+ -1.4373135566711426,
556
+ -1.4011181592941284,
557
+ -1.3986619710922241,
558
+ -1.3821871280670166,
559
+ -1.3670028448104858,
560
+ -1.3672733306884766,
561
+ -1.3753691911697388,
562
+ -1.2679178714752197,
563
+ -1.3628404140472412,
564
+ -1.3553755283355713,
565
+ -1.2510477304458618,
566
+ -1.292446494102478,
567
+ -1.1589775085449219,
568
+ -1.1088510751724243,
569
+ -0.9512673616409302,
570
+ -1.0063751935958862,
571
+ -0.9080485701560974,
572
+ -0.7877959609031677,
573
+ -0.7959626913070679,
574
+ -0.7321491837501526,
575
+ -0.7169923782348633,
576
+ -0.7309204936027527,
577
+ -0.6458167433738708,
578
+ -0.5527870655059814,
579
+ -0.5714083909988403,
580
+ -0.5652658343315125,
581
+ -0.5687893033027649,
582
+ -0.5684319138526917,
583
+ -0.5115469694137573,
584
+ -0.47789427638053894,
585
+ -0.5751711130142212,
586
+ -0.510647177696228,
587
+ -0.5287784337997437,
588
+ -0.4918251633644104,
589
+ -0.4407775402069092,
590
+ -0.4399414360523224,
591
+ -0.3773854374885559,
592
+ -0.4628640413284302,
593
+ -0.4261557459831238,
594
+ -0.4435183107852936,
595
+ -0.4269271194934845,
596
+ -0.4480868875980377,
597
+ -0.4070889949798584,
598
+ -0.4065954089164734,
599
+ -0.4023192524909973,
600
+ -0.4620068371295929,
601
+ -0.40317302942276,
602
+ -0.4504946768283844,
603
+ -0.3861498236656189,
604
+ -0.4086449146270752,
605
+ -0.39321067929267883,
606
+ -0.4254463016986847,
607
+ -0.40254276990890503,
608
+ -0.3855791985988617,
609
+ -0.35843029618263245
610
+ ],
611
+ "abs_loss": [
612
+ 2.294851779937744,
613
+ 2.217968463897705,
614
+ 2.085430383682251,
615
+ 1.933550477027893,
616
+ 1.8507790565490723,
617
+ 1.8573724031448364,
618
+ 1.8490761518478394,
619
+ 1.8258271217346191,
620
+ 1.7650401592254639,
621
+ 1.8163961172103882,
622
+ 1.7979645729064941,
623
+ 1.81875479221344,
624
+ 1.8053371906280518,
625
+ 1.8457976579666138,
626
+ 1.8368562459945679,
627
+ 1.7651417255401611,
628
+ 1.7947784662246704,
629
+ 1.8123019933700562,
630
+ 1.75529146194458,
631
+ 1.7103999853134155,
632
+ 1.665234923362732,
633
+ 1.4871249198913574,
634
+ 1.1473619937896729,
635
+ 0.9188952445983887,
636
+ 0.835640549659729,
637
+ 0.7401660680770874,
638
+ 0.6749497652053833,
639
+ 0.623252272605896,
640
+ 0.6291333436965942,
641
+ 0.5610553026199341,
642
+ 0.6342823505401611,
643
+ 0.5864875316619873,
644
+ 0.5124613642692566,
645
+ 0.45354872941970825,
646
+ 0.505251407623291,
647
+ 0.3990432024002075,
648
+ 0.3562731146812439,
649
+ 0.3264189660549164,
650
+ 0.34121859073638916,
651
+ 0.29397594928741455,
652
+ 0.2657366991043091,
653
+ 0.28916534781455994,
654
+ 0.28594478964805603,
655
+ 0.2538009285926819,
656
+ 0.23509228229522705,
657
+ 0.18661481142044067,
658
+ 0.24677905440330505,
659
+ 0.17556226253509521,
660
+ 0.23916086554527283,
661
+ 0.15728092193603516,
662
+ 0.18109017610549927,
663
+ 0.1437540054321289,
664
+ 0.1634223461151123,
665
+ 0.19285456836223602,
666
+ 0.13049659132957458,
667
+ 0.14706766605377197,
668
+ 0.11795340478420258,
669
+ 0.12607064843177795,
670
+ 0.15534041821956635,
671
+ 0.16451779007911682,
672
+ 0.1086958646774292,
673
+ 0.14760926365852356,
674
+ 0.11080951988697052,
675
+ 0.1084408164024353,
676
+ 0.1662750542163849,
677
+ 0.09962759166955948,
678
+ 0.0982552170753479,
679
+ 0.16848281025886536,
680
+ 0.13386185467243195,
681
+ 0.06918559968471527,
682
+ 0.06314872950315475,
683
+ 0.07399706542491913,
684
+ 0.08023437857627869,
685
+ 0.06503905355930328,
686
+ 0.06501073390245438,
687
+ 0.0373649075627327,
688
+ 0.07079947739839554,
689
+ 0.06553753465414047,
690
+ 0.04528520256280899,
691
+ 0.05383242666721344,
692
+ 0.06707749515771866,
693
+ 0.06644883751869202,
694
+ 0.05952143669128418,
695
+ 0.04839048162102699,
696
+ 0.05015701800584793,
697
+ 0.035811927169561386,
698
+ 0.03779171034693718,
699
+ 0.04513540118932724,
700
+ 0.06980851292610168,
701
+ 0.053471483290195465,
702
+ 0.027496855705976486,
703
+ 0.025432242080569267,
704
+ 0.046835899353027344,
705
+ 0.04602156579494476,
706
+ 0.049780599772930145,
707
+ 0.04161358252167702,
708
+ 0.0522635355591774,
709
+ 0.043539658188819885,
710
+ 0.02877829782664776,
711
+ 0.03934263810515404,
712
+ 0.03931758925318718,
713
+ 0.03406631201505661,
714
+ 0.03732343763113022,
715
+ 0.03033389523625374,
716
+ 0.02612752467393875,
717
+ 0.017829855903983116,
718
+ 0.051261644810438156,
719
+ 0.017979038879275322,
720
+ 0.020870964974164963,
721
+ 0.03181803226470947,
722
+ 0.027377821505069733,
723
+ 0.04135163500905037,
724
+ 0.04830572381615639,
725
+ 0.025115353986620903,
726
+ 0.05764007568359375,
727
+ 0.024219660088419914,
728
+ 0.016297604888677597,
729
+ 0.04372173175215721,
730
+ 0.020716790109872818,
731
+ 0.04647689312696457,
732
+ 0.017766781151294708,
733
+ 0.035948872566223145,
734
+ 0.03585080802440643,
735
+ 0.02679501473903656,
736
+ 0.020538482815027237,
737
+ 0.023500392213463783,
738
+ 0.020186439156532288,
739
+ 0.019431833177804947,
740
+ 0.03605831041932106,
741
+ 0.03780654817819595,
742
+ 0.038680173456668854,
743
+ 0.03160756081342697,
744
+ 0.026255251839756966,
745
+ 0.017248544842004776,
746
+ 0.026952730491757393,
747
+ 0.02061101794242859,
748
+ 0.03785064071416855,
749
+ 0.023413699120283127,
750
+ 0.022951096296310425,
751
+ 0.023105671629309654,
752
+ 0.020348209887742996,
753
+ 0.01708866097033024,
754
+ 0.04392319917678833,
755
+ 0.03585944324731827,
756
+ 0.02257593162357807,
757
+ 0.02412750944495201,
758
+ 0.022317443042993546,
759
+ 0.013668009079992771,
760
+ 0.031480975449085236,
761
+ 0.025552885606884956
762
+ ],
763
+ "zipf_loss": [
764
+ 8.771808624267578,
765
+ 6.307893753051758,
766
+ 3.720466136932373,
767
+ 2.4225094318389893,
768
+ 1.8894931077957153,
769
+ 1.7582186460494995,
770
+ 1.7210813760757446,
771
+ 1.709200382232666,
772
+ 1.7017463445663452,
773
+ 1.6964911222457886,
774
+ 1.697059988975525,
775
+ 1.6959459781646729,
776
+ 1.6910948753356934,
777
+ 1.6887447834014893,
778
+ 1.6905392408370972,
779
+ 1.6872717142105103,
780
+ 1.688768982887268,
781
+ 1.67805016040802,
782
+ 1.666795253753662,
783
+ 1.6262922286987305,
784
+ 1.5347145795822144,
785
+ 1.278693675994873,
786
+ 0.6891258358955383,
787
+ 0.3737459182739258,
788
+ 0.18118418753147125,
789
+ 0.15129734575748444,
790
+ 0.10418544709682465,
791
+ 0.1074758768081665,
792
+ 0.08676093816757202,
793
+ 0.07139623165130615,
794
+ 0.10783088207244873,
795
+ 0.10926471650600433,
796
+ 0.06382100284099579,
797
+ 0.0593724325299263,
798
+ 0.05308477208018303,
799
+ 0.045744385570287704,
800
+ 0.05352798104286194,
801
+ 0.041822511702775955,
802
+ 0.039745353162288666,
803
+ 0.03549409657716751,
804
+ 0.04395335540175438,
805
+ 0.035772405564785004,
806
+ 0.02904796227812767,
807
+ 0.035995081067085266,
808
+ 0.038269124925136566,
809
+ 0.03648350387811661,
810
+ 0.02863462083041668,
811
+ 0.035054247826337814,
812
+ 0.026261046528816223,
813
+ 0.02930118702352047,
814
+ 0.03150211647152901,
815
+ 0.024524109438061714,
816
+ 0.02761816792190075,
817
+ 0.02726607210934162,
818
+ 0.03294641897082329,
819
+ 0.03868941217660904,
820
+ 0.023010730743408203,
821
+ 0.022195344790816307,
822
+ 0.024301059544086456,
823
+ 0.02068120427429676,
824
+ 0.020041795447468758,
825
+ 0.029191507026553154,
826
+ 0.025066332891583443,
827
+ 0.019122403115034103,
828
+ 0.023434175178408623,
829
+ 0.027309784665703773,
830
+ 0.019048001617193222,
831
+ 0.017169244587421417,
832
+ 0.018393322825431824,
833
+ 0.028059540316462517,
834
+ 0.028056636452674866,
835
+ 0.028341447934508324,
836
+ 0.02032645046710968,
837
+ 0.013149574398994446,
838
+ 0.01588503085076809,
839
+ 0.012552021071314812,
840
+ 0.01579051837325096,
841
+ 0.021106014028191566,
842
+ 0.04947477951645851,
843
+ 0.01698216237127781,
844
+ 0.022098727524280548,
845
+ 0.01268475130200386,
846
+ 0.022358544170856476,
847
+ 0.01958904229104519,
848
+ 0.010487602092325687,
849
+ 0.01751772314310074,
850
+ 0.023311246186494827,
851
+ 0.01734558492898941,
852
+ 0.015464305877685547,
853
+ 0.01562531292438507,
854
+ 0.012672880664467812,
855
+ 0.018038703128695488,
856
+ 0.023711949586868286,
857
+ 0.01683882251381874,
858
+ 0.014847498387098312,
859
+ 0.026351360604166985,
860
+ 0.021378500387072563,
861
+ 0.018875695765018463,
862
+ 0.019985806196928024,
863
+ 0.01564783975481987,
864
+ 0.02189333736896515,
865
+ 0.030228938907384872,
866
+ 0.01130809634923935,
867
+ 0.014759158715605736,
868
+ 0.026083514094352722,
869
+ 0.012203274294734001,
870
+ 0.020920155569911003,
871
+ 0.017358364537358284,
872
+ 0.021204819902777672,
873
+ 0.02353861555457115,
874
+ 0.02059362642467022,
875
+ 0.02084406465291977,
876
+ 0.022354595363140106,
877
+ 0.024830598384141922,
878
+ 0.02938147634267807,
879
+ 0.03147149458527565,
880
+ 0.029878588393330574,
881
+ 0.013422621414065361,
882
+ 0.014764735475182533,
883
+ 0.012709204107522964,
884
+ 0.01817169226706028,
885
+ 0.028127998113632202,
886
+ 0.018979191780090332,
887
+ 0.024044431746006012,
888
+ 0.015970000997185707,
889
+ 0.03309129178524017,
890
+ 0.009814741089940071,
891
+ 0.018140148371458054,
892
+ 0.03902601823210716,
893
+ 0.026020988821983337,
894
+ 0.024534741416573524,
895
+ 0.012614620849490166,
896
+ 0.014913609251379967,
897
+ 0.019403688609600067,
898
+ 0.022506240755319595,
899
+ 0.016650553792715073,
900
+ 0.014760621823370457,
901
+ 0.007950497791171074,
902
+ 0.02496860735118389,
903
+ 0.014842228963971138,
904
+ 0.02990444004535675,
905
+ 0.011673047207295895,
906
+ 0.0193045511841774,
907
+ 0.012233665212988853,
908
+ 0.02467804029583931,
909
+ 0.026926595717668533,
910
+ 0.018044285476207733,
911
+ 0.013489766046404839,
912
+ 0.013861648738384247,
913
+ 0.016490790992975235
914
+ ],
915
+ "denoise_loss": [],
916
+ "ortho_loss": [
917
+ 0.6935886740684509,
918
+ 0.48234307765960693,
919
+ 0.3088524043560028,
920
+ 0.20281840860843658,
921
+ 0.15621885657310486,
922
+ 0.10378851741552353,
923
+ 0.07804737240076065,
924
+ 0.06271085143089294,
925
+ 0.05669274926185608,
926
+ 0.05394528806209564,
927
+ 0.05465354025363922,
928
+ 0.05041395500302315,
929
+ 0.051717281341552734,
930
+ 0.049015142023563385,
931
+ 0.049808476120233536,
932
+ 0.05488650128245354,
933
+ 0.05550508573651314,
934
+ 0.05550394207239151,
935
+ 0.05379695072770119,
936
+ 0.0546729750931263,
937
+ 0.05207265913486481,
938
+ 0.05092643201351166,
939
+ 0.05123722553253174,
940
+ 0.051657721400260925,
941
+ 0.047756366431713104,
942
+ 0.0484192855656147,
943
+ 0.048870690166950226,
944
+ 0.047017455101013184,
945
+ 0.047359365969896317,
946
+ 0.05155592039227486,
947
+ 0.05629339441657066,
948
+ 0.059633731842041016,
949
+ 0.06442190706729889,
950
+ 0.06989780068397522,
951
+ 0.07373283058404922,
952
+ 0.07644601911306381,
953
+ 0.08206634968519211,
954
+ 0.08509311079978943,
955
+ 0.08663709461688995,
956
+ 0.08705299347639084,
957
+ 0.08799070119857788,
958
+ 0.0882645770907402,
959
+ 0.08959797769784927,
960
+ 0.09180758148431778,
961
+ 0.09077784419059753,
962
+ 0.09113264083862305,
963
+ 0.08888813853263855,
964
+ 0.09106779098510742,
965
+ 0.09238111227750778,
966
+ 0.09107761830091476,
967
+ 0.08944051712751389,
968
+ 0.08877777308225632,
969
+ 0.08691896498203278,
970
+ 0.09018930047750473,
971
+ 0.08876080811023712,
972
+ 0.08874350786209106,
973
+ 0.09306778758764267,
974
+ 0.09424176067113876,
975
+ 0.09464000165462494,
976
+ 0.0924631878733635,
977
+ 0.09248515218496323,
978
+ 0.0956466943025589,
979
+ 0.09548068791627884,
980
+ 0.09362413734197617,
981
+ 0.09517014771699905,
982
+ 0.09995341300964355,
983
+ 0.10262968391180038,
984
+ 0.10635028779506683,
985
+ 0.10774730145931244,
986
+ 0.11003488302230835,
987
+ 0.11236182600259781,
988
+ 0.11327866464853287,
989
+ 0.11391086876392365,
990
+ 0.11230804026126862,
991
+ 0.11398068070411682,
992
+ 0.11666402220726013,
993
+ 0.12519533932209015,
994
+ 0.12921561300754547,
995
+ 0.1298656314611435,
996
+ 0.13479764759540558,
997
+ 0.14023935794830322,
998
+ 0.14109951257705688,
999
+ 0.14126062393188477,
1000
+ 0.143479585647583,
1001
+ 0.14300858974456787,
1002
+ 0.1485244631767273,
1003
+ 0.1472393125295639,
1004
+ 0.14510539174079895,
1005
+ 0.14728957414627075,
1006
+ 0.15059445798397064,
1007
+ 0.15292133390903473,
1008
+ 0.15796807408332825,
1009
+ 0.15788158774375916,
1010
+ 0.15734679996967316,
1011
+ 0.15908081829547882,
1012
+ 0.16669081151485443,
1013
+ 0.17414961755275726,
1014
+ 0.18038775026798248,
1015
+ 0.17948652803897858,
1016
+ 0.1796618551015854,
1017
+ 0.17886602878570557,
1018
+ 0.17792661488056183,
1019
+ 0.18056391179561615,
1020
+ 0.1810716837644577,
1021
+ 0.19002854824066162,
1022
+ 0.1949450820684433,
1023
+ 0.20069141685962677,
1024
+ 0.1973830759525299,
1025
+ 0.2024090737104416,
1026
+ 0.20541264116764069,
1027
+ 0.2075474113225937,
1028
+ 0.21202749013900757,
1029
+ 0.21167878806591034,
1030
+ 0.2112017720937729,
1031
+ 0.21490934491157532,
1032
+ 0.21238234639167786,
1033
+ 0.21160206198692322,
1034
+ 0.21397870779037476,
1035
+ 0.21555015444755554,
1036
+ 0.2155567705631256,
1037
+ 0.2197999805212021,
1038
+ 0.223252072930336,
1039
+ 0.2264471799135208,
1040
+ 0.22776155173778534,
1041
+ 0.2280261218547821,
1042
+ 0.2283269762992859,
1043
+ 0.2301325798034668,
1044
+ 0.22769907116889954,
1045
+ 0.22877106070518494,
1046
+ 0.23092709481716156,
1047
+ 0.22949814796447754,
1048
+ 0.22870399057865143,
1049
+ 0.2302009016275406,
1050
+ 0.23301368951797485,
1051
+ 0.23054614663124084,
1052
+ 0.23147828876972198,
1053
+ 0.2322801649570465,
1054
+ 0.23291891813278198,
1055
+ 0.23302015662193298,
1056
+ 0.23411573469638824,
1057
+ 0.23496660590171814,
1058
+ 0.2342650145292282,
1059
+ 0.23444189131259918,
1060
+ 0.23619861900806427,
1061
+ 0.23654569685459137,
1062
+ 0.23704758286476135,
1063
+ 0.2376752644777298,
1064
+ 0.23932912945747375,
1065
+ 0.23906366527080536,
1066
+ 0.23852844536304474
1067
+ ],
1068
+ "lr": [
1069
+ 8.376068376068378e-06,
1070
+ 1.6923076923076924e-05,
1071
+ 2.5470085470085475e-05,
1072
+ 3.401709401709402e-05,
1073
+ 4e-05,
1074
+ 4e-05,
1075
+ 4e-05,
1076
+ 4e-05,
1077
+ 4e-05,
1078
+ 4e-05,
1079
+ 4e-05,
1080
+ 4e-05,
1081
+ 4e-05,
1082
+ 4e-05,
1083
+ 4e-05,
1084
+ 4e-05,
1085
+ 4e-05,
1086
+ 4e-05,
1087
+ 4e-05,
1088
+ 4e-05,
1089
+ 4e-05,
1090
+ 4e-05,
1091
+ 4e-05,
1092
+ 4e-05,
1093
+ 4e-05,
1094
+ 4e-05,
1095
+ 4e-05,
1096
+ 4e-05,
1097
+ 4e-05,
1098
+ 4e-05,
1099
+ 4e-05,
1100
+ 4e-05,
1101
+ 4e-05,
1102
+ 4e-05,
1103
+ 4e-05,
1104
+ 4e-05,
1105
+ 4e-05,
1106
+ 4e-05,
1107
+ 4e-05,
1108
+ 4e-05,
1109
+ 4e-05,
1110
+ 4e-05,
1111
+ 4e-05,
1112
+ 4e-05,
1113
+ 4e-05,
1114
+ 4e-05,
1115
+ 4e-05,
1116
+ 4e-05,
1117
+ 4e-05,
1118
+ 4e-05,
1119
+ 4e-05,
1120
+ 4e-05,
1121
+ 4e-05,
1122
+ 4e-05,
1123
+ 4e-05,
1124
+ 4e-05,
1125
+ 4e-05,
1126
+ 4e-05,
1127
+ 4e-05,
1128
+ 4e-05,
1129
+ 4e-05,
1130
+ 4e-05,
1131
+ 4e-05,
1132
+ 4e-05,
1133
+ 4e-05,
1134
+ 4e-05,
1135
+ 4e-05,
1136
+ 4e-05,
1137
+ 4e-05,
1138
+ 4e-05,
1139
+ 4e-05,
1140
+ 4e-05,
1141
+ 4e-05,
1142
+ 4e-05,
1143
+ 4e-05,
1144
+ 4e-05,
1145
+ 4e-05,
1146
+ 4e-05,
1147
+ 4e-05,
1148
+ 4e-05,
1149
+ 4e-05,
1150
+ 4e-05,
1151
+ 4e-05,
1152
+ 4e-05,
1153
+ 4e-05,
1154
+ 4e-05,
1155
+ 4e-05,
1156
+ 4e-05,
1157
+ 4e-05,
1158
+ 4e-05,
1159
+ 4e-05,
1160
+ 3.993593461639863e-05,
1161
+ 3.934273662008964e-05,
1162
+ 3.874953862378065e-05,
1163
+ 3.815634062747166e-05,
1164
+ 3.7563142631162665e-05,
1165
+ 3.696994463485368e-05,
1166
+ 3.637674663854469e-05,
1167
+ 3.5783548642235693e-05,
1168
+ 3.519035064592671e-05,
1169
+ 3.459715264961771e-05,
1170
+ 3.400395465330873e-05,
1171
+ 3.341075665699973e-05,
1172
+ 3.281755866069075e-05,
1173
+ 3.2224360664381764e-05,
1174
+ 3.1251515950435014e-05,
1175
+ 3.065831795412602e-05,
1176
+ 3.0065119957817037e-05,
1177
+ 2.947192196150804e-05,
1178
+ 2.887872396519905e-05,
1179
+ 2.8285525968890065e-05,
1180
+ 2.769232797258107e-05,
1181
+ 2.709912997627208e-05,
1182
+ 2.6505931979963087e-05,
1183
+ 2.5912733983654104e-05,
1184
+ 2.5319535987345117e-05,
1185
+ 2.472633799103612e-05,
1186
+ 2.4133139994727132e-05,
1187
+ 2.3539941998418135e-05,
1188
+ 2.2946744002109148e-05,
1189
+ 2.1973899288162408e-05,
1190
+ 2.138070129185341e-05,
1191
+ 2.0787503295544424e-05,
1192
+ 2.019430529923544e-05,
1193
+ 1.9601107302926443e-05,
1194
+ 1.900790930661746e-05,
1195
+ 1.8414711310308462e-05,
1196
+ 1.7821513313999475e-05,
1197
+ 1.7228315317690488e-05,
1198
+ 1.663511732138149e-05,
1199
+ 1.6041919325072507e-05,
1200
+ 1.544872132876351e-05,
1201
+ 1.4855523332454524e-05,
1202
+ 1.4262325336145537e-05,
1203
+ 1.366912733983654e-05,
1204
+ 1.2696282625889797e-05,
1205
+ 1.2103084629580812e-05,
1206
+ 1.1509886633271816e-05,
1207
+ 1.0916688636962829e-05,
1208
+ 1.0323490640653833e-05,
1209
+ 9.730292644344846e-06,
1210
+ 9.137094648035861e-06,
1211
+ 8.543896651726864e-06,
1212
+ 7.950698655417878e-06,
1213
+ 7.357500659108881e-06,
1214
+ 6.764302662799895e-06,
1215
+ 6.171104666490909e-06,
1216
+ 5.577906670181913e-06,
1217
+ 4.984708673872927e-06,
1218
+ 4.391510677563931e-06
1219
+ ],
1220
+ "emb_lr": [],
1221
+ "eval_step": [
1222
+ 750,
1223
+ 1532,
1224
+ 2314,
1225
+ 3096,
1226
+ 3878,
1227
+ 4660,
1228
+ 5442,
1229
+ 6224,
1230
+ 7006,
1231
+ 7788
1232
+ ],
1233
+ "eval_accuracy": [
1234
+ 0.0,
1235
+ 0.01,
1236
+ 0.2,
1237
+ 0.5,
1238
+ 0.57,
1239
+ 0.69,
1240
+ 0.74,
1241
+ 0.83,
1242
+ 0.86,
1243
+ 0.9
1244
+ ]
1245
+ },
1246
+ "final_accuracy": 0.8245833333333333,
1247
+ "sft_eval": {
1248
+ "config": {
1249
+ "ops": "add_sub",
1250
+ "K": null,
1251
+ "mode": "sft",
1252
+ "n_digits": 6,
1253
+ "n_per_split": 100
1254
+ },
1255
+ "splits": {
1256
+ "add_S0": {
1257
+ "full_accuracy": 0.18,
1258
+ "n_examples": 100,
1259
+ "per_subtask": {
1260
+ "SA": {
1261
+ "accuracy": 0.8066115702479338,
1262
+ "count": 605
1263
+ },
1264
+ "SS": {
1265
+ "accuracy": 0.8947368421052632,
1266
+ "count": 95
1267
+ }
1268
+ }
1269
+ },
1270
+ "add_S1": {
1271
+ "full_accuracy": 0.22,
1272
+ "n_examples": 100,
1273
+ "per_subtask": {
1274
+ "SA": {
1275
+ "accuracy": 0.8529411764705882,
1276
+ "count": 204
1277
+ },
1278
+ "SC": {
1279
+ "accuracy": 0.8816568047337278,
1280
+ "count": 169
1281
+ },
1282
+ "SS": {
1283
+ "accuracy": 0.9032258064516129,
1284
+ "count": 31
1285
+ },
1286
+ "UC": {
1287
+ "accuracy": 0.7466216216216216,
1288
+ "count": 296
1289
+ }
1290
+ }
1291
+ },
1292
+ "add_S2": {
1293
+ "full_accuracy": 0.17,
1294
+ "n_examples": 100,
1295
+ "per_subtask": {
1296
+ "SA": {
1297
+ "accuracy": 0.8404907975460123,
1298
+ "count": 163
1299
+ },
1300
+ "SC": {
1301
+ "accuracy": 0.9,
1302
+ "count": 130
1303
+ },
1304
+ "SS": {
1305
+ "accuracy": 0.735632183908046,
1306
+ "count": 87
1307
+ },
1308
+ "UC": {
1309
+ "accuracy": 0.6798029556650246,
1310
+ "count": 203
1311
+ },
1312
+ "US": {
1313
+ "accuracy": 0.7948717948717948,
1314
+ "count": 117
1315
+ }
1316
+ }
1317
+ },
1318
+ "add_S3": {
1319
+ "full_accuracy": 0.19,
1320
+ "n_examples": 100,
1321
+ "per_subtask": {
1322
+ "SA": {
1323
+ "accuracy": 0.9090909090909091,
1324
+ "count": 121
1325
+ },
1326
+ "SC": {
1327
+ "accuracy": 0.859504132231405,
1328
+ "count": 121
1329
+ },
1330
+ "SS": {
1331
+ "accuracy": 0.7346938775510204,
1332
+ "count": 49
1333
+ },
1334
+ "UC": {
1335
+ "accuracy": 0.7043010752688172,
1336
+ "count": 186
1337
+ },
1338
+ "US": {
1339
+ "accuracy": 0.7354260089686099,
1340
+ "count": 223
1341
+ }
1342
+ }
1343
+ },
1344
+ "add_S4": {
1345
+ "full_accuracy": 0.32,
1346
+ "n_examples": 100,
1347
+ "per_subtask": {
1348
+ "SA": {
1349
+ "accuracy": 0.9326923076923077,
1350
+ "count": 104
1351
+ },
1352
+ "SC": {
1353
+ "accuracy": 0.8867924528301887,
1354
+ "count": 106
1355
+ },
1356
+ "SS": {
1357
+ "accuracy": 0.7391304347826086,
1358
+ "count": 23
1359
+ },
1360
+ "UC": {
1361
+ "accuracy": 0.7625,
1362
+ "count": 160
1363
+ },
1364
+ "US": {
1365
+ "accuracy": 0.6742671009771987,
1366
+ "count": 307
1367
+ }
1368
+ }
1369
+ },
1370
+ "add_S5": {
1371
+ "full_accuracy": 0.3,
1372
+ "n_examples": 100,
1373
+ "per_subtask": {
1374
+ "SA": {
1375
+ "accuracy": 0.86,
1376
+ "count": 100
1377
+ },
1378
+ "SC": {
1379
+ "accuracy": 0.93,
1380
+ "count": 100
1381
+ },
1382
+ "UC": {
1383
+ "accuracy": 0.66,
1384
+ "count": 100
1385
+ },
1386
+ "US": {
1387
+ "accuracy": 0.5175,
1388
+ "count": 400
1389
+ }
1390
+ }
1391
+ },
1392
+ "add_S6": {
1393
+ "full_accuracy": 0.54,
1394
+ "n_examples": 100,
1395
+ "per_subtask": {
1396
+ "SC": {
1397
+ "accuracy": 0.97,
1398
+ "count": 100
1399
+ },
1400
+ "UC": {
1401
+ "accuracy": 0.77,
1402
+ "count": 100
1403
+ },
1404
+ "US": {
1405
+ "accuracy": 0.718,
1406
+ "count": 500
1407
+ }
1408
+ }
1409
+ },
1410
+ "add_random": {
1411
+ "full_accuracy": 0.205,
1412
+ "n_examples": 200,
1413
+ "per_subtask": {
1414
+ "SA": {
1415
+ "accuracy": 0.843400447427293,
1416
+ "count": 447
1417
+ },
1418
+ "SC": {
1419
+ "accuracy": 0.8875,
1420
+ "count": 320
1421
+ },
1422
+ "SS": {
1423
+ "accuracy": 0.8035714285714286,
1424
+ "count": 56
1425
+ },
1426
+ "UC": {
1427
+ "accuracy": 0.7580340264650284,
1428
+ "count": 529
1429
+ },
1430
+ "US": {
1431
+ "accuracy": 0.6458333333333334,
1432
+ "count": 48
1433
+ }
1434
+ }
1435
+ },
1436
+ "add_C3": {
1437
+ "full_accuracy": 0.14,
1438
+ "n_examples": 100,
1439
+ "per_subtask": {
1440
+ "SA": {
1441
+ "accuracy": 0.88,
1442
+ "count": 300
1443
+ },
1444
+ "SC": {
1445
+ "accuracy": 0.93,
1446
+ "count": 100
1447
+ },
1448
+ "UC": {
1449
+ "accuracy": 0.5958549222797928,
1450
+ "count": 193
1451
+ },
1452
+ "US": {
1453
+ "accuracy": 0.6261682242990654,
1454
+ "count": 107
1455
+ }
1456
+ }
1457
+ },
1458
+ "add_C4": {
1459
+ "full_accuracy": 0.16,
1460
+ "n_examples": 100,
1461
+ "per_subtask": {
1462
+ "SA": {
1463
+ "accuracy": 0.92,
1464
+ "count": 200
1465
+ },
1466
+ "SC": {
1467
+ "accuracy": 0.95,
1468
+ "count": 100
1469
+ },
1470
+ "UC": {
1471
+ "accuracy": 0.59765625,
1472
+ "count": 256
1473
+ },
1474
+ "US": {
1475
+ "accuracy": 0.6805555555555556,
1476
+ "count": 144
1477
+ }
1478
+ }
1479
+ },
1480
+ "add_C5": {
1481
+ "full_accuracy": 0.08,
1482
+ "n_examples": 100,
1483
+ "per_subtask": {
1484
+ "SA": {
1485
+ "accuracy": 0.95,
1486
+ "count": 100
1487
+ },
1488
+ "SC": {
1489
+ "accuracy": 0.93,
1490
+ "count": 100
1491
+ },
1492
+ "UC": {
1493
+ "accuracy": 0.6045751633986928,
1494
+ "count": 306
1495
+ },
1496
+ "US": {
1497
+ "accuracy": 0.7268041237113402,
1498
+ "count": 194
1499
+ }
1500
+ }
1501
+ },
1502
+ "add_C6": {
1503
+ "full_accuracy": 0.18,
1504
+ "n_examples": 100,
1505
+ "per_subtask": {
1506
+ "SC": {
1507
+ "accuracy": 0.94,
1508
+ "count": 100
1509
+ },
1510
+ "UC": {
1511
+ "accuracy": 0.73224043715847,
1512
+ "count": 366
1513
+ },
1514
+ "US": {
1515
+ "accuracy": 0.8034188034188035,
1516
+ "count": 234
1517
+ }
1518
+ }
1519
+ },
1520
+ "sub_M0": {
1521
+ "full_accuracy": 0.48,
1522
+ "n_examples": 100,
1523
+ "per_subtask": {
1524
+ "MD": {
1525
+ "accuracy": 0.9068219633943427,
1526
+ "count": 601
1527
+ },
1528
+ "ME": {
1529
+ "accuracy": 0.898989898989899,
1530
+ "count": 99
1531
+ }
1532
+ }
1533
+ },
1534
+ "sub_M1": {
1535
+ "full_accuracy": 0.23,
1536
+ "n_examples": 100,
1537
+ "per_subtask": {
1538
+ "MD": {
1539
+ "accuracy": 0.9032258064516129,
1540
+ "count": 279
1541
+ },
1542
+ "MB": {
1543
+ "accuracy": 0.903448275862069,
1544
+ "count": 145
1545
+ },
1546
+ "ME": {
1547
+ "accuracy": 0.9166666666666666,
1548
+ "count": 24
1549
+ },
1550
+ "UB": {
1551
+ "accuracy": 0.7182539682539683,
1552
+ "count": 252
1553
+ }
1554
+ }
1555
+ },
1556
+ "sub_M2": {
1557
+ "full_accuracy": 0.11,
1558
+ "n_examples": 100,
1559
+ "per_subtask": {
1560
+ "MD": {
1561
+ "accuracy": 0.9436619718309859,
1562
+ "count": 213
1563
+ },
1564
+ "MB": {
1565
+ "accuracy": 0.831858407079646,
1566
+ "count": 113
1567
+ },
1568
+ "ME": {
1569
+ "accuracy": 0.9058823529411765,
1570
+ "count": 85
1571
+ },
1572
+ "UB": {
1573
+ "accuracy": 0.5524861878453039,
1574
+ "count": 181
1575
+ },
1576
+ "UD": {
1577
+ "accuracy": 0.6944444444444444,
1578
+ "count": 108
1579
+ }
1580
+ }
1581
+ },
1582
+ "sub_M3": {
1583
+ "full_accuracy": 0.09,
1584
+ "n_examples": 100,
1585
+ "per_subtask": {
1586
+ "MD": {
1587
+ "accuracy": 0.9720670391061452,
1588
+ "count": 179
1589
+ },
1590
+ "MB": {
1591
+ "accuracy": 0.8543689320388349,
1592
+ "count": 103
1593
+ },
1594
+ "ME": {
1595
+ "accuracy": 0.9464285714285714,
1596
+ "count": 56
1597
+ },
1598
+ "UB": {
1599
+ "accuracy": 0.4697986577181208,
1600
+ "count": 149
1601
+ },
1602
+ "UD": {
1603
+ "accuracy": 0.48826291079812206,
1604
+ "count": 213
1605
+ }
1606
+ }
1607
+ },
1608
+ "sub_M4": {
1609
+ "full_accuracy": 0.04,
1610
+ "n_examples": 100,
1611
+ "per_subtask": {
1612
+ "MD": {
1613
+ "accuracy": 0.955,
1614
+ "count": 200
1615
+ },
1616
+ "MB": {
1617
+ "accuracy": 0.87,
1618
+ "count": 100
1619
+ },
1620
+ "UB": {
1621
+ "accuracy": 0.48,
1622
+ "count": 100
1623
+ },
1624
+ "UD": {
1625
+ "accuracy": 0.33666666666666667,
1626
+ "count": 300
1627
+ }
1628
+ }
1629
+ },
1630
+ "sub_M5": {
1631
+ "full_accuracy": 0.13,
1632
+ "n_examples": 100,
1633
+ "per_subtask": {
1634
+ "MD": {
1635
+ "accuracy": 1.0,
1636
+ "count": 100
1637
+ },
1638
+ "MB": {
1639
+ "accuracy": 0.96,
1640
+ "count": 100
1641
+ },
1642
+ "UB": {
1643
+ "accuracy": 0.59,
1644
+ "count": 100
1645
+ },
1646
+ "UD": {
1647
+ "accuracy": 0.3675,
1648
+ "count": 400
1649
+ }
1650
+ }
1651
+ },
1652
+ "sub_random": {
1653
+ "full_accuracy": 0.29,
1654
+ "n_examples": 200,
1655
+ "per_subtask": {
1656
+ "MD": {
1657
+ "accuracy": 0.915,
1658
+ "count": 600
1659
+ },
1660
+ "MB": {
1661
+ "accuracy": 0.8801498127340824,
1662
+ "count": 267
1663
+ },
1664
+ "ME": {
1665
+ "accuracy": 0.9622641509433962,
1666
+ "count": 53
1667
+ },
1668
+ "UB": {
1669
+ "accuracy": 0.6993166287015945,
1670
+ "count": 439
1671
+ },
1672
+ "UD": {
1673
+ "accuracy": 0.8048780487804879,
1674
+ "count": 41
1675
+ }
1676
+ }
1677
+ },
1678
+ "sub_B3": {
1679
+ "full_accuracy": 0.11,
1680
+ "n_examples": 100,
1681
+ "per_subtask": {
1682
+ "MD": {
1683
+ "accuracy": 0.92,
1684
+ "count": 300
1685
+ },
1686
+ "MB": {
1687
+ "accuracy": 0.95,
1688
+ "count": 100
1689
+ },
1690
+ "UB": {
1691
+ "accuracy": 0.5380710659898477,
1692
+ "count": 197
1693
+ },
1694
+ "UD": {
1695
+ "accuracy": 0.5242718446601942,
1696
+ "count": 103
1697
+ }
1698
+ }
1699
+ },
1700
+ "sub_B4": {
1701
+ "full_accuracy": 0.11,
1702
+ "n_examples": 100,
1703
+ "per_subtask": {
1704
+ "MD": {
1705
+ "accuracy": 0.975,
1706
+ "count": 200
1707
+ },
1708
+ "MB": {
1709
+ "accuracy": 0.93,
1710
+ "count": 100
1711
+ },
1712
+ "UB": {
1713
+ "accuracy": 0.5546558704453441,
1714
+ "count": 247
1715
+ },
1716
+ "UD": {
1717
+ "accuracy": 0.5359477124183006,
1718
+ "count": 153
1719
+ }
1720
+ }
1721
+ },
1722
+ "sub_B5": {
1723
+ "full_accuracy": 0.03,
1724
+ "n_examples": 100,
1725
+ "per_subtask": {
1726
+ "MD": {
1727
+ "accuracy": 1.0,
1728
+ "count": 100
1729
+ },
1730
+ "MB": {
1731
+ "accuracy": 0.92,
1732
+ "count": 100
1733
+ },
1734
+ "UB": {
1735
+ "accuracy": 0.5234899328859061,
1736
+ "count": 298
1737
+ },
1738
+ "UD": {
1739
+ "accuracy": 0.504950495049505,
1740
+ "count": 202
1741
+ }
1742
+ }
1743
+ }
1744
+ },
1745
+ "summary": {
1746
+ "overall_accuracy": 0.19958333333333333,
1747
+ "total_examples": 2400,
1748
+ "n_splits": 22
1749
+ }
1750
+ },
1751
+ "sorl_eval": {
1752
+ "config": {
1753
+ "ops": "add_sub",
1754
+ "K": 4,
1755
+ "mode": "sorl",
1756
+ "n_digits": 6,
1757
+ "n_per_split": 100
1758
+ },
1759
+ "splits": {
1760
+ "add_S0": {
1761
+ "full_accuracy": 1.0,
1762
+ "n_examples": 100,
1763
+ "per_subtask": {
1764
+ "SA": {
1765
+ "accuracy": 1.0,
1766
+ "count": 605
1767
+ },
1768
+ "SS": {
1769
+ "accuracy": 1.0,
1770
+ "count": 95
1771
+ }
1772
+ }
1773
+ },
1774
+ "add_S1": {
1775
+ "full_accuracy": 0.98,
1776
+ "n_examples": 100,
1777
+ "per_subtask": {
1778
+ "SA": {
1779
+ "accuracy": 0.9950980392156863,
1780
+ "count": 204
1781
+ },
1782
+ "SC": {
1783
+ "accuracy": 0.9940828402366864,
1784
+ "count": 169
1785
+ },
1786
+ "SS": {
1787
+ "accuracy": 1.0,
1788
+ "count": 31
1789
+ },
1790
+ "UC": {
1791
+ "accuracy": 1.0,
1792
+ "count": 296
1793
+ }
1794
+ }
1795
+ },
1796
+ "add_S2": {
1797
+ "full_accuracy": 0.98,
1798
+ "n_examples": 100,
1799
+ "per_subtask": {
1800
+ "SA": {
1801
+ "accuracy": 0.9938650306748467,
1802
+ "count": 163
1803
+ },
1804
+ "SC": {
1805
+ "accuracy": 0.9923076923076923,
1806
+ "count": 130
1807
+ },
1808
+ "SS": {
1809
+ "accuracy": 1.0,
1810
+ "count": 87
1811
+ },
1812
+ "UC": {
1813
+ "accuracy": 1.0,
1814
+ "count": 203
1815
+ },
1816
+ "US": {
1817
+ "accuracy": 1.0,
1818
+ "count": 117
1819
+ }
1820
+ }
1821
+ },
1822
+ "add_S3": {
1823
+ "full_accuracy": 0.9,
1824
+ "n_examples": 100,
1825
+ "per_subtask": {
1826
+ "SA": {
1827
+ "accuracy": 1.0,
1828
+ "count": 121
1829
+ },
1830
+ "SC": {
1831
+ "accuracy": 1.0,
1832
+ "count": 121
1833
+ },
1834
+ "SS": {
1835
+ "accuracy": 1.0,
1836
+ "count": 49
1837
+ },
1838
+ "UC": {
1839
+ "accuracy": 0.9516129032258065,
1840
+ "count": 186
1841
+ },
1842
+ "US": {
1843
+ "accuracy": 0.9955156950672646,
1844
+ "count": 223
1845
+ }
1846
+ }
1847
+ },
1848
+ "add_S4": {
1849
+ "full_accuracy": 0.77,
1850
+ "n_examples": 100,
1851
+ "per_subtask": {
1852
+ "SA": {
1853
+ "accuracy": 1.0,
1854
+ "count": 104
1855
+ },
1856
+ "SC": {
1857
+ "accuracy": 1.0,
1858
+ "count": 106
1859
+ },
1860
+ "SS": {
1861
+ "accuracy": 1.0,
1862
+ "count": 23
1863
+ },
1864
+ "UC": {
1865
+ "accuracy": 0.86875,
1866
+ "count": 160
1867
+ },
1868
+ "US": {
1869
+ "accuracy": 0.993485342019544,
1870
+ "count": 307
1871
+ }
1872
+ }
1873
+ },
1874
+ "add_S5": {
1875
+ "full_accuracy": 0.68,
1876
+ "n_examples": 100,
1877
+ "per_subtask": {
1878
+ "SA": {
1879
+ "accuracy": 1.0,
1880
+ "count": 100
1881
+ },
1882
+ "SC": {
1883
+ "accuracy": 1.0,
1884
+ "count": 100
1885
+ },
1886
+ "UC": {
1887
+ "accuracy": 0.7,
1888
+ "count": 100
1889
+ },
1890
+ "US": {
1891
+ "accuracy": 0.96,
1892
+ "count": 400
1893
+ }
1894
+ }
1895
+ },
1896
+ "add_S6": {
1897
+ "full_accuracy": 0.89,
1898
+ "n_examples": 100,
1899
+ "per_subtask": {
1900
+ "SC": {
1901
+ "accuracy": 1.0,
1902
+ "count": 100
1903
+ },
1904
+ "UC": {
1905
+ "accuracy": 0.89,
1906
+ "count": 100
1907
+ },
1908
+ "US": {
1909
+ "accuracy": 0.97,
1910
+ "count": 500
1911
+ }
1912
+ }
1913
+ },
1914
+ "add_random": {
1915
+ "full_accuracy": 0.99,
1916
+ "n_examples": 200,
1917
+ "per_subtask": {
1918
+ "SA": {
1919
+ "accuracy": 1.0,
1920
+ "count": 447
1921
+ },
1922
+ "SC": {
1923
+ "accuracy": 1.0,
1924
+ "count": 320
1925
+ },
1926
+ "SS": {
1927
+ "accuracy": 1.0,
1928
+ "count": 56
1929
+ },
1930
+ "UC": {
1931
+ "accuracy": 0.996219281663516,
1932
+ "count": 529
1933
+ },
1934
+ "US": {
1935
+ "accuracy": 1.0,
1936
+ "count": 48
1937
+ }
1938
+ }
1939
+ },
1940
+ "add_C3": {
1941
+ "full_accuracy": 0.93,
1942
+ "n_examples": 100,
1943
+ "per_subtask": {
1944
+ "SA": {
1945
+ "accuracy": 1.0,
1946
+ "count": 300
1947
+ },
1948
+ "SC": {
1949
+ "accuracy": 1.0,
1950
+ "count": 100
1951
+ },
1952
+ "UC": {
1953
+ "accuracy": 0.9637305699481865,
1954
+ "count": 193
1955
+ },
1956
+ "US": {
1957
+ "accuracy": 1.0,
1958
+ "count": 107
1959
+ }
1960
+ }
1961
+ },
1962
+ "add_C4": {
1963
+ "full_accuracy": 0.91,
1964
+ "n_examples": 100,
1965
+ "per_subtask": {
1966
+ "SA": {
1967
+ "accuracy": 1.0,
1968
+ "count": 200
1969
+ },
1970
+ "SC": {
1971
+ "accuracy": 1.0,
1972
+ "count": 100
1973
+ },
1974
+ "UC": {
1975
+ "accuracy": 0.96484375,
1976
+ "count": 256
1977
+ },
1978
+ "US": {
1979
+ "accuracy": 1.0,
1980
+ "count": 144
1981
+ }
1982
+ }
1983
+ },
1984
+ "add_C5": {
1985
+ "full_accuracy": 0.92,
1986
+ "n_examples": 100,
1987
+ "per_subtask": {
1988
+ "SA": {
1989
+ "accuracy": 1.0,
1990
+ "count": 100
1991
+ },
1992
+ "SC": {
1993
+ "accuracy": 1.0,
1994
+ "count": 100
1995
+ },
1996
+ "UC": {
1997
+ "accuracy": 0.9771241830065359,
1998
+ "count": 306
1999
+ },
2000
+ "US": {
2001
+ "accuracy": 0.9948453608247423,
2002
+ "count": 194
2003
+ }
2004
+ }
2005
+ },
2006
+ "add_C6": {
2007
+ "full_accuracy": 0.93,
2008
+ "n_examples": 100,
2009
+ "per_subtask": {
2010
+ "SC": {
2011
+ "accuracy": 1.0,
2012
+ "count": 100
2013
+ },
2014
+ "UC": {
2015
+ "accuracy": 0.9808743169398907,
2016
+ "count": 366
2017
+ },
2018
+ "US": {
2019
+ "accuracy": 1.0,
2020
+ "count": 234
2021
+ }
2022
+ }
2023
+ },
2024
+ "sub_M0": {
2025
+ "full_accuracy": 0.98,
2026
+ "n_examples": 100,
2027
+ "per_subtask": {
2028
+ "MD": {
2029
+ "accuracy": 0.9966722129783694,
2030
+ "count": 601
2031
+ },
2032
+ "ME": {
2033
+ "accuracy": 1.0,
2034
+ "count": 99
2035
+ }
2036
+ }
2037
+ },
2038
+ "sub_M1": {
2039
+ "full_accuracy": 0.98,
2040
+ "n_examples": 100,
2041
+ "per_subtask": {
2042
+ "MD": {
2043
+ "accuracy": 0.996415770609319,
2044
+ "count": 279
2045
+ },
2046
+ "MB": {
2047
+ "accuracy": 0.993103448275862,
2048
+ "count": 145
2049
+ },
2050
+ "ME": {
2051
+ "accuracy": 1.0,
2052
+ "count": 24
2053
+ },
2054
+ "UB": {
2055
+ "accuracy": 1.0,
2056
+ "count": 252
2057
+ }
2058
+ }
2059
+ },
2060
+ "sub_M2": {
2061
+ "full_accuracy": 0.94,
2062
+ "n_examples": 100,
2063
+ "per_subtask": {
2064
+ "MD": {
2065
+ "accuracy": 0.9953051643192489,
2066
+ "count": 213
2067
+ },
2068
+ "MB": {
2069
+ "accuracy": 1.0,
2070
+ "count": 113
2071
+ },
2072
+ "ME": {
2073
+ "accuracy": 1.0,
2074
+ "count": 85
2075
+ },
2076
+ "UB": {
2077
+ "accuracy": 0.9723756906077348,
2078
+ "count": 181
2079
+ },
2080
+ "UD": {
2081
+ "accuracy": 1.0,
2082
+ "count": 108
2083
+ }
2084
+ }
2085
+ },
2086
+ "sub_M3": {
2087
+ "full_accuracy": 0.58,
2088
+ "n_examples": 100,
2089
+ "per_subtask": {
2090
+ "MD": {
2091
+ "accuracy": 1.0,
2092
+ "count": 179
2093
+ },
2094
+ "MB": {
2095
+ "accuracy": 1.0,
2096
+ "count": 103
2097
+ },
2098
+ "ME": {
2099
+ "accuracy": 1.0,
2100
+ "count": 56
2101
+ },
2102
+ "UB": {
2103
+ "accuracy": 0.7315436241610739,
2104
+ "count": 149
2105
+ },
2106
+ "UD": {
2107
+ "accuracy": 0.9765258215962441,
2108
+ "count": 213
2109
+ }
2110
+ }
2111
+ },
2112
+ "sub_M4": {
2113
+ "full_accuracy": 0.27,
2114
+ "n_examples": 100,
2115
+ "per_subtask": {
2116
+ "MD": {
2117
+ "accuracy": 1.0,
2118
+ "count": 200
2119
+ },
2120
+ "MB": {
2121
+ "accuracy": 1.0,
2122
+ "count": 100
2123
+ },
2124
+ "UB": {
2125
+ "accuracy": 0.56,
2126
+ "count": 100
2127
+ },
2128
+ "UD": {
2129
+ "accuracy": 0.7933333333333333,
2130
+ "count": 300
2131
+ }
2132
+ }
2133
+ },
2134
+ "sub_M5": {
2135
+ "full_accuracy": 0.07,
2136
+ "n_examples": 100,
2137
+ "per_subtask": {
2138
+ "MD": {
2139
+ "accuracy": 1.0,
2140
+ "count": 100
2141
+ },
2142
+ "MB": {
2143
+ "accuracy": 1.0,
2144
+ "count": 100
2145
+ },
2146
+ "UB": {
2147
+ "accuracy": 0.53,
2148
+ "count": 100
2149
+ },
2150
+ "UD": {
2151
+ "accuracy": 0.57,
2152
+ "count": 400
2153
+ }
2154
+ }
2155
+ },
2156
+ "sub_random": {
2157
+ "full_accuracy": 0.965,
2158
+ "n_examples": 200,
2159
+ "per_subtask": {
2160
+ "MD": {
2161
+ "accuracy": 0.9933333333333333,
2162
+ "count": 600
2163
+ },
2164
+ "MB": {
2165
+ "accuracy": 1.0,
2166
+ "count": 267
2167
+ },
2168
+ "ME": {
2169
+ "accuracy": 1.0,
2170
+ "count": 53
2171
+ },
2172
+ "UB": {
2173
+ "accuracy": 0.9931662870159453,
2174
+ "count": 439
2175
+ },
2176
+ "UD": {
2177
+ "accuracy": 1.0,
2178
+ "count": 41
2179
+ }
2180
+ }
2181
+ },
2182
+ "sub_B3": {
2183
+ "full_accuracy": 0.8,
2184
+ "n_examples": 100,
2185
+ "per_subtask": {
2186
+ "MD": {
2187
+ "accuracy": 0.99,
2188
+ "count": 300
2189
+ },
2190
+ "MB": {
2191
+ "accuracy": 1.0,
2192
+ "count": 100
2193
+ },
2194
+ "UB": {
2195
+ "accuracy": 0.9187817258883249,
2196
+ "count": 197
2197
+ },
2198
+ "UD": {
2199
+ "accuracy": 0.9805825242718447,
2200
+ "count": 103
2201
+ }
2202
+ }
2203
+ },
2204
+ "sub_B4": {
2205
+ "full_accuracy": 0.73,
2206
+ "n_examples": 100,
2207
+ "per_subtask": {
2208
+ "MD": {
2209
+ "accuracy": 0.985,
2210
+ "count": 200
2211
+ },
2212
+ "MB": {
2213
+ "accuracy": 1.0,
2214
+ "count": 100
2215
+ },
2216
+ "UB": {
2217
+ "accuracy": 0.9230769230769231,
2218
+ "count": 247
2219
+ },
2220
+ "UD": {
2221
+ "accuracy": 0.9281045751633987,
2222
+ "count": 153
2223
+ }
2224
+ }
2225
+ },
2226
+ "sub_B5": {
2227
+ "full_accuracy": 0.65,
2228
+ "n_examples": 100,
2229
+ "per_subtask": {
2230
+ "MD": {
2231
+ "accuracy": 1.0,
2232
+ "count": 100
2233
+ },
2234
+ "MB": {
2235
+ "accuracy": 1.0,
2236
+ "count": 100
2237
+ },
2238
+ "UB": {
2239
+ "accuracy": 0.9026845637583892,
2240
+ "count": 298
2241
+ },
2242
+ "UD": {
2243
+ "accuracy": 0.9108910891089109,
2244
+ "count": 202
2245
+ }
2246
+ }
2247
+ }
2248
+ },
2249
+ "summary": {
2250
+ "overall_accuracy": 0.8245833333333333,
2251
+ "total_examples": 2400,
2252
+ "n_splits": 22
2253
+ }
2254
+ },
2255
+ "sorl_overall_accuracy": 0.8245833333333333,
2256
+ "sft_overall_accuracy": 0.19958333333333333
2257
+ }
add_sub_sorl_v1_abs10_50K/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb609285c5151c0d0634d5d2b7a3d5ff7ec57d8acf496116f999081eba9e3749
3
+ size 650303660
add_sub_sorl_v1_abs10_50K/train_config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_rollouts": 4,
3
+ "K": 4,
4
+ "max_iterations": 2,
5
+ "memory_span_abs": 1792,
6
+ "memory_span_traj": 1792,
7
+ "temperature": 1.0,
8
+ "ar_search": false,
9
+ "response_only_abs": false,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "alpha_ortho": 0.0,
14
+ "alpha_anchor": 0.0,
15
+ "alpha_jacobi": 0.0,
16
+ "decay": 0.8,
17
+ "target_vocab_util": 0.8,
18
+ "min_abs_ppl": 0.0,
19
+ "zipf_alpha": 1.0,
20
+ "lr": 4e-05,
21
+ "emb_lr_mult": 1.0,
22
+ "weight_decay": 0.01,
23
+ "warmup_steps": 234,
24
+ "cooldown_frac": 0.4,
25
+ "max_grad_norm": 1.0,
26
+ "vq_abs_pretrain_steps": 0,
27
+ "vq_abs_pretrain_lr": 0.001,
28
+ "vq_abs_pretrain_layer": -1,
29
+ "vq_abs_pretrain_batch_size": 256,
30
+ "vq_abs_pretrain_target_vectors": 20000,
31
+ "batch_size": 64,
32
+ "gradient_accumulation_steps": 1,
33
+ "num_epochs": 10,
34
+ "emb_warmup_steps": 0,
35
+ "log_every": 50,
36
+ "eval_every": 781,
37
+ "save_every": 999999,
38
+ "eval_samples": 100,
39
+ "output_dir": "ckpt/sweep/as_sorl_abs10_K4_50K",
40
+ "eval_K": 4,
41
+ "alpha_traj": 0.0,
42
+ "corrupt_method": "shuffle",
43
+ "corrupt_ratio": 0.3,
44
+ "alpha_contrastive": 1.0,
45
+ "gamma_contrastive": 0.5,
46
+ "alpha_masked_traj": 0.0,
47
+ "mask_nl_ratio": 0.3,
48
+ "mask_nl_mode": "fixed",
49
+ "mask_nl_fixed_id": 0,
50
+ "use_ste": true,
51
+ "n_inner": 1,
52
+ "random_K": null,
53
+ "strip_suffix": null,
54
+ "compress_prefix": null,
55
+ "random_mem_span": null,
56
+ "warmup_ratio": 0.03,
57
+ "beta2": 0.999,
58
+ "seed": 42,
59
+ "n_digits": 6,
60
+ "n_layer": 2,
61
+ "n_head": 3,
62
+ "n_embd": 510,
63
+ "ops": "add_sub",
64
+ "abs_vocab": 10,
65
+ "dataset_size": 50000,
66
+ "mode": "sorl",
67
+ "device": "cuda",
68
+ "push_to_hub": true,
69
+ "no_wandb": false,
70
+ "n_params": 162499262,
71
+ "run_name": "add_sub_sorl_v1_abs10_50K",
72
+ "git_commit": "f447da529caceac8c7d256cbb2cd185cbc50feac",
73
+ "timestamp": "2026-04-12T19:31:11.043883+00:00",
74
+ "tokenizer": "Qwen/Qwen3-0.6B",
75
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
+ "dataset_config": "add_sub_6digit",
77
+ "model_repo": "thoughtworks/arithmetic-sorl",
78
+ "trainer_version": "v1",
79
+ "wandb_run_id": "ajnrconh",
80
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/ajnrconh",
81
+ "final_accuracy": 0.8245833333333333,
82
+ "sft_accuracy": 0.19958333333333333,
83
+ "eval_method": "ArithmeticEvaluator"
84
+ }