amirali1985 commited on
Commit
faac3ac
·
verified ·
1 Parent(s): dae0fa4

Upload add_sub_sorl_v1_abs5_K1_50K

Browse files
add_sub_sorl_v1_abs5_K1_50K/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 128,
20
+ "max_window_layers": 28,
21
+ "model_type": "qwen3",
22
+ "num_attention_heads": 3,
23
+ "num_hidden_layers": 2,
24
+ "num_key_value_heads": 3,
25
+ "pad_token_id": null,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_parameters": {
28
+ "rope_theta": 10000.0,
29
+ "rope_type": "default"
30
+ },
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "transformers_version": "5.5.0",
34
+ "use_cache": true,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151649
37
+ }
add_sub_sorl_v1_abs5_K1_50K/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs5_K1_50K/metrics.json ADDED
@@ -0,0 +1,2257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 200,
8
+ 250,
9
+ 300,
10
+ 350,
11
+ 400,
12
+ 450,
13
+ 500,
14
+ 550,
15
+ 600,
16
+ 650,
17
+ 700,
18
+ 750,
19
+ 832,
20
+ 882,
21
+ 932,
22
+ 982,
23
+ 1032,
24
+ 1082,
25
+ 1132,
26
+ 1182,
27
+ 1232,
28
+ 1282,
29
+ 1332,
30
+ 1382,
31
+ 1432,
32
+ 1482,
33
+ 1532,
34
+ 1614,
35
+ 1664,
36
+ 1714,
37
+ 1764,
38
+ 1814,
39
+ 1864,
40
+ 1914,
41
+ 1964,
42
+ 2014,
43
+ 2064,
44
+ 2114,
45
+ 2164,
46
+ 2214,
47
+ 2264,
48
+ 2314,
49
+ 2396,
50
+ 2446,
51
+ 2496,
52
+ 2546,
53
+ 2596,
54
+ 2646,
55
+ 2696,
56
+ 2746,
57
+ 2796,
58
+ 2846,
59
+ 2896,
60
+ 2946,
61
+ 2996,
62
+ 3046,
63
+ 3096,
64
+ 3178,
65
+ 3228,
66
+ 3278,
67
+ 3328,
68
+ 3378,
69
+ 3428,
70
+ 3478,
71
+ 3528,
72
+ 3578,
73
+ 3628,
74
+ 3678,
75
+ 3728,
76
+ 3778,
77
+ 3828,
78
+ 3878,
79
+ 3960,
80
+ 4010,
81
+ 4060,
82
+ 4110,
83
+ 4160,
84
+ 4210,
85
+ 4260,
86
+ 4310,
87
+ 4360,
88
+ 4410,
89
+ 4460,
90
+ 4510,
91
+ 4560,
92
+ 4610,
93
+ 4660,
94
+ 4742,
95
+ 4792,
96
+ 4842,
97
+ 4892,
98
+ 4942,
99
+ 4992,
100
+ 5042,
101
+ 5092,
102
+ 5142,
103
+ 5192,
104
+ 5242,
105
+ 5292,
106
+ 5342,
107
+ 5392,
108
+ 5442,
109
+ 5524,
110
+ 5574,
111
+ 5624,
112
+ 5674,
113
+ 5724,
114
+ 5774,
115
+ 5824,
116
+ 5874,
117
+ 5924,
118
+ 5974,
119
+ 6024,
120
+ 6074,
121
+ 6124,
122
+ 6174,
123
+ 6224,
124
+ 6306,
125
+ 6356,
126
+ 6406,
127
+ 6456,
128
+ 6506,
129
+ 6556,
130
+ 6606,
131
+ 6656,
132
+ 6706,
133
+ 6756,
134
+ 6806,
135
+ 6856,
136
+ 6906,
137
+ 6956,
138
+ 7006,
139
+ 7088,
140
+ 7138,
141
+ 7188,
142
+ 7238,
143
+ 7288,
144
+ 7338,
145
+ 7388,
146
+ 7438,
147
+ 7488,
148
+ 7538,
149
+ 7588,
150
+ 7638,
151
+ 7688,
152
+ 7738,
153
+ 7788
154
+ ],
155
+ "loss": [
156
+ 0.2505636215209961,
157
+ 8.46504020690918,
158
+ 8.983885765075684,
159
+ 5.396197319030762,
160
+ 4.661125183105469,
161
+ 4.198425769805908,
162
+ 3.9200804233551025,
163
+ 4.002684116363525,
164
+ 3.967268705368042,
165
+ 3.5404810905456543,
166
+ 2.8022069931030273,
167
+ 1.8075677156448364,
168
+ -0.4370992183685303,
169
+ -3.252620220184326,
170
+ -6.226922035217285,
171
+ -6.981635093688965,
172
+ -8.204410552978516,
173
+ -8.275205612182617,
174
+ -9.201150894165039,
175
+ -8.603435516357422,
176
+ -9.279670715332031,
177
+ -8.784062385559082,
178
+ -8.871679306030273,
179
+ -6.306247711181641,
180
+ -5.68474006652832,
181
+ -3.962198495864868,
182
+ -3.0238935947418213,
183
+ -2.9744505882263184,
184
+ -2.6373329162597656,
185
+ -2.1695408821105957,
186
+ -1.080020785331726,
187
+ -1.360289454460144,
188
+ -1.0075311660766602,
189
+ -1.0235530138015747,
190
+ -0.46744096279144287,
191
+ -0.3752056360244751,
192
+ 0.04357093572616577,
193
+ -0.09943628311157227,
194
+ -0.4955475330352783,
195
+ -0.2734338641166687,
196
+ -0.3429461717605591,
197
+ -0.0041134655475616455,
198
+ 0.04709044098854065,
199
+ 0.1768924593925476,
200
+ -0.27656251192092896,
201
+ -0.13877591490745544,
202
+ -0.12509974837303162,
203
+ -0.24758702516555786,
204
+ -0.5813546180725098,
205
+ -0.5284724831581116,
206
+ -0.3846120834350586,
207
+ -0.47701379656791687,
208
+ -0.459797739982605,
209
+ -0.1737753003835678,
210
+ -0.3913196623325348,
211
+ -0.46543970704078674,
212
+ -0.4292522370815277,
213
+ -0.5559454560279846,
214
+ -0.21943989396095276,
215
+ -0.34869182109832764,
216
+ -0.40362223982810974,
217
+ -0.2524590492248535,
218
+ -0.2675473988056183,
219
+ -0.1487952321767807,
220
+ -0.307020366191864,
221
+ -0.32443857192993164,
222
+ -0.6750359535217285,
223
+ -0.22530588507652283,
224
+ -0.30566859245300293,
225
+ -0.33527910709381104,
226
+ -0.2065487504005432,
227
+ -0.14366477727890015,
228
+ -0.13279062509536743,
229
+ -0.07906606048345566,
230
+ -0.09167033433914185,
231
+ -0.0976349413394928,
232
+ -0.10151524096727371,
233
+ -0.0636506974697113,
234
+ -0.11694946885108948,
235
+ -0.04894915968179703,
236
+ -0.047724585980176926,
237
+ -0.011612199246883392,
238
+ 0.018235700204968452,
239
+ -0.05753451958298683,
240
+ -0.040516600012779236,
241
+ -0.009241508319973946,
242
+ -0.014481740072369576,
243
+ -0.00770045118406415,
244
+ -0.14257672429084778,
245
+ -0.03606821969151497,
246
+ -0.009480145759880543,
247
+ -0.015739701688289642,
248
+ -0.011609197594225407,
249
+ -0.03947978466749191,
250
+ 0.06050138548016548,
251
+ -0.01320057176053524,
252
+ -0.027511538937687874,
253
+ -0.016208164393901825,
254
+ -0.0333053320646286,
255
+ -0.024525409564375877,
256
+ -0.01065415795892477,
257
+ -0.02119392529129982,
258
+ -0.04003360867500305,
259
+ -0.0028016045689582825,
260
+ 0.000910859671421349,
261
+ -0.13593541085720062,
262
+ 0.001402085181325674,
263
+ -0.005520656704902649,
264
+ -0.046631112694740295,
265
+ -0.009012637659907341,
266
+ -0.06137138605117798,
267
+ -0.00438198447227478,
268
+ -0.05739019811153412,
269
+ 0.0014041318790987134,
270
+ 0.0018914949614554644,
271
+ -0.015167666599154472,
272
+ -0.005372755695134401,
273
+ -0.03327453136444092,
274
+ -0.008505746722221375,
275
+ -0.00026095949579030275,
276
+ -0.00024359801318496466,
277
+ 0.0003202931256964803,
278
+ -0.09214256703853607,
279
+ -0.0035724681802093983,
280
+ -0.0005808689165860415,
281
+ 0.00034972524736076593,
282
+ -0.014495953917503357,
283
+ 0.0004223707364872098,
284
+ -0.02243591845035553,
285
+ -0.01563563011586666,
286
+ -0.015591835603117943,
287
+ 0.00010260124690830708,
288
+ -0.0015123512130230665,
289
+ 0.002046036534011364,
290
+ -0.0009971989784389734,
291
+ -0.027398593723773956,
292
+ 0.00042443181155249476,
293
+ 0.0009379229159094393,
294
+ 0.0023569948971271515,
295
+ 0.00271740835160017,
296
+ -0.004399539437144995,
297
+ 0.00021369947353377938,
298
+ 0.0007292578229680657,
299
+ 0.0008549960912205279,
300
+ 0.000461005256511271,
301
+ 0.0010421416955068707,
302
+ -0.00028817541897296906,
303
+ 0.0004380897735245526,
304
+ 5.740142660215497e-05,
305
+ 0.0007358812727034092
306
+ ],
307
+ "base_loss": [
308
+ 10.585969924926758,
309
+ 7.861696720123291,
310
+ 6.595737457275391,
311
+ 4.664162635803223,
312
+ 2.468219041824341,
313
+ 2.0123753547668457,
314
+ 1.9060745239257812,
315
+ 1.9411739110946655,
316
+ 1.8617933988571167,
317
+ 1.7926157712936401,
318
+ 1.8101921081542969,
319
+ 1.7342417240142822,
320
+ 1.840061902999878,
321
+ 1.817416787147522,
322
+ 1.8616607189178467,
323
+ 1.7341166734695435,
324
+ 1.7626491785049438,
325
+ 1.7099848985671997,
326
+ 1.7449618577957153,
327
+ 1.6113771200180054,
328
+ 1.5788114070892334,
329
+ 1.4948910474777222,
330
+ 1.4506797790527344,
331
+ 1.1060410737991333,
332
+ 1.0262805223464966,
333
+ 0.8133479356765747,
334
+ 0.707499086856842,
335
+ 0.6684346199035645,
336
+ 0.5689376592636108,
337
+ 0.5056790709495544,
338
+ 0.38431841135025024,
339
+ 0.3539868891239166,
340
+ 0.3170618414878845,
341
+ 0.3117201626300812,
342
+ 0.21727757155895233,
343
+ 0.19561822712421417,
344
+ 0.12323100864887238,
345
+ 0.09690731763839722,
346
+ 0.12097124755382538,
347
+ 0.08452977240085602,
348
+ 0.09661463648080826,
349
+ 0.04839548468589783,
350
+ 0.043548472225666046,
351
+ 0.03469983488321304,
352
+ 0.07495895773172379,
353
+ 0.05109835043549538,
354
+ 0.06454946845769882,
355
+ 0.04492994770407677,
356
+ 0.08394144475460052,
357
+ 0.06295685470104218,
358
+ 0.053767867386341095,
359
+ 0.05481096729636192,
360
+ 0.057550448924303055,
361
+ 0.02092977985739708,
362
+ 0.04481356963515282,
363
+ 0.05254203453660011,
364
+ 0.04828425124287605,
365
+ 0.062493324279785156,
366
+ 0.04113643616437912,
367
+ 0.039543312042951584,
368
+ 0.04525873064994812,
369
+ 0.02862536907196045,
370
+ 0.030017653480172157,
371
+ 0.0259102750569582,
372
+ 0.03472105786204338,
373
+ 0.03658299520611763,
374
+ 0.075889952480793,
375
+ 0.025799283757805824,
376
+ 0.03533393517136574,
377
+ 0.03777829185128212,
378
+ 0.023547964170575142,
379
+ 0.01642688177525997,
380
+ 0.015161960385739803,
381
+ 0.008982157334685326,
382
+ 0.010374007746577263,
383
+ 0.011107687838375568,
384
+ 0.011426926590502262,
385
+ 0.00722096860408783,
386
+ 0.01364094763994217,
387
+ 0.005588749889284372,
388
+ 0.005449043121188879,
389
+ 0.0015719280345365405,
390
+ 0.0039480640552937984,
391
+ 0.006539866793900728,
392
+ 0.004745051264762878,
393
+ 0.0011512581259012222,
394
+ 0.0018374177161604166,
395
+ 0.0009692967869341373,
396
+ 0.015946198254823685,
397
+ 0.0041474527679383755,
398
+ 0.0012120535830035806,
399
+ 0.0018659065244719386,
400
+ 0.0014860676601529121,
401
+ 0.004629795905202627,
402
+ 0.005724634509533644,
403
+ 0.0016615678323432803,
404
+ 0.0031864303164184093,
405
+ 0.0019036593148484826,
406
+ 0.0038307576905936003,
407
+ 0.0029207405168563128,
408
+ 0.0015083644539117813,
409
+ 0.0025551484432071447,
410
+ 0.004551176447421312,
411
+ 0.0004310809599701315,
412
+ 9.97485694824718e-05,
413
+ 0.015383963473141193,
414
+ 7.338295836234465e-05,
415
+ 0.0007650703191757202,
416
+ 0.005420635920017958,
417
+ 0.001144574023783207,
418
+ 0.006923999171704054,
419
+ 0.0005839763907715678,
420
+ 0.006508989725261927,
421
+ 4.6668767026858404e-05,
422
+ 0.00011568832269404083,
423
+ 0.0018093654653057456,
424
+ 0.0007959016365930438,
425
+ 0.0038521860260516405,
426
+ 0.001005106489174068,
427
+ 0.00015298707876354456,
428
+ 0.00016213013441301882,
429
+ 8.991890354081988e-05,
430
+ 0.010388972237706184,
431
+ 0.0004740772128570825,
432
+ 0.0001657261891523376,
433
+ 0.0001314091496169567,
434
+ 0.0017237851861864328,
435
+ 0.00014208715583663434,
436
+ 0.0029368281830102205,
437
+ 0.001902392483316362,
438
+ 0.0021551058162003756,
439
+ 8.371058356715366e-05,
440
+ 0.00031782890437170863,
441
+ 7.045956590445712e-05,
442
+ 0.00023993372451514006,
443
+ 0.0033891168422997,
444
+ 3.269490480306558e-05,
445
+ 2.2259604520513676e-05,
446
+ 2.659805068105925e-05,
447
+ 3.853382804663852e-05,
448
+ 0.0007118353387340903,
449
+ 9.246311674360186e-05,
450
+ 4.051219002576545e-05,
451
+ 3.327736703795381e-05,
452
+ 0.00012879837595392019,
453
+ 2.466775185894221e-05,
454
+ 0.00014854212349746376,
455
+ 1.9150504158460535e-05,
456
+ 6.794735963921994e-05,
457
+ 2.4384833523072302e-05
458
+ ],
459
+ "info_loss": [
460
+ -2.183910369873047,
461
+ -0.8172898292541504,
462
+ -0.3265523910522461,
463
+ -0.29645252227783203,
464
+ -0.09593391418457031,
465
+ -0.08809053897857666,
466
+ -0.10290849208831787,
467
+ -0.09725630283355713,
468
+ -0.09241962432861328,
469
+ -0.12810420989990234,
470
+ -0.20332062244415283,
471
+ -0.2951948642730713,
472
+ -0.5304032564163208,
473
+ -0.8100159168243408,
474
+ -1.112187385559082,
475
+ -1.1749544143676758,
476
+ -1.2994019985198975,
477
+ -1.2979850769042969,
478
+ -1.3871701955795288,
479
+ -1.3036789894104004,
480
+ -1.349029302597046,
481
+ -1.2852455377578735,
482
+ -1.2771252393722534,
483
+ -0.9778541326522827,
484
+ -0.8947479724884033,
485
+ -0.69999760389328,
486
+ -0.5798141956329346,
487
+ -0.5599962472915649,
488
+ -0.5034015774726868,
489
+ -0.44325190782546997,
490
+ -0.3099415600299835,
491
+ -0.3180922269821167,
492
+ -0.2747928202152252,
493
+ -0.27571025490760803,
494
+ -0.19737744331359863,
495
+ -0.16460935771465302,
496
+ -0.10986328125,
497
+ -0.09181390702724457,
498
+ -0.1143815740942955,
499
+ -0.07358857244253159,
500
+ -0.08351294696331024,
501
+ -0.041542500257492065,
502
+ -0.03816980496048927,
503
+ -0.0214642696082592,
504
+ -0.07020267099142075,
505
+ -0.049885910004377365,
506
+ -0.0500519797205925,
507
+ -0.044273801147937775,
508
+ -0.06719288229942322,
509
+ -0.05959625542163849,
510
+ -0.044250987470149994,
511
+ -0.05371290445327759,
512
+ -0.052004072815179825,
513
+ -0.020543813705444336,
514
+ -0.04393193870782852,
515
+ -0.05230949446558952,
516
+ -0.048010554164648056,
517
+ -0.0620872937142849,
518
+ -0.0262104831635952,
519
+ -0.03927260637283325,
520
+ -0.045116618275642395,
521
+ -0.028391283005475998,
522
+ -0.029934341087937355,
523
+ -0.01769854500889778,
524
+ -0.03448145464062691,
525
+ -0.036520395427942276,
526
+ -0.07541237026453018,
527
+ -0.025723034515976906,
528
+ -0.03477848321199417,
529
+ -0.037701305001974106,
530
+ -0.023497894406318665,
531
+ -0.016341710463166237,
532
+ -0.01498723216354847,
533
+ -0.008923816494643688,
534
+ -0.010336598381400108,
535
+ -0.011041379533708096,
536
+ -0.011384105309844017,
537
+ -0.007182232104241848,
538
+ -0.013588892295956612,
539
+ -0.005559454672038555,
540
+ -0.005420235451310873,
541
+ -0.0014992294600233436,
542
+ 0.0013668490573763847,
543
+ -0.00651158019900322,
544
+ -0.004717795643955469,
545
+ -0.0011278484016656876,
546
+ -0.00181629101280123,
547
+ -0.0009426028118468821,
548
+ -0.015919113531708717,
549
+ -0.0041302829049527645,
550
+ -0.0011922880075871944,
551
+ -0.0018478219863027334,
552
+ -0.0014635310508310795,
553
+ -0.004584491718560457,
554
+ 0.005315172020345926,
555
+ -0.001626395620405674,
556
+ -0.003172175493091345,
557
+ -0.0018896134570240974,
558
+ -0.0038163100834935904,
559
+ -0.0029021964874118567,
560
+ -0.001496299752034247,
561
+ -0.0025390409864485264,
562
+ -0.004537238739430904,
563
+ -0.00041555240750312805,
564
+ -8.846276614349335e-05,
565
+ -0.015372051857411861,
566
+ -6.181209755595773e-05,
567
+ -0.0007561057573184371,
568
+ -0.0054064528085291386,
569
+ -0.001132069039158523,
570
+ -0.00691541563719511,
571
+ -0.0005742469220422208,
572
+ -0.0064970338717103004,
573
+ -3.8318328734021634e-05,
574
+ -0.00010713449592003599,
575
+ -0.0018018593546003103,
576
+ -0.000784172210842371,
577
+ -0.0038447838742285967,
578
+ -0.0009972280822694302,
579
+ -0.0001449241244699806,
580
+ -0.00015577537124045193,
581
+ -8.330684067914262e-05,
582
+ -0.01038222387433052,
583
+ -0.00046742818085476756,
584
+ -0.00015831156633794308,
585
+ -0.00012537343718577176,
586
+ -0.0017165575409308076,
587
+ -0.00013619274250231683,
588
+ -0.0029318383894860744,
589
+ -0.001897612470202148,
590
+ -0.0021497313864529133,
591
+ -7.87463941378519e-05,
592
+ -0.00031296690576709807,
593
+ -6.56055417493917e-05,
594
+ -0.0002351704315515235,
595
+ -0.003384436247870326,
596
+ -2.8172687962069176e-05,
597
+ -1.711163167783525e-05,
598
+ -2.2289243133855052e-05,
599
+ -3.4640113881323487e-05,
600
+ -0.00070682488149032,
601
+ -8.828522550174966e-05,
602
+ -3.625154204200953e-05,
603
+ -2.956273419840727e-05,
604
+ -0.0001251215289812535,
605
+ -2.0451281670830213e-05,
606
+ -0.0001448101975256577,
607
+ -1.5480572983506136e-05,
608
+ -6.413826486095786e-05,
609
+ -2.0543287973850965e-05
610
+ ],
611
+ "abs_loss": [
612
+ 1.6025371551513672,
613
+ 1.5280265808105469,
614
+ 1.3616584539413452,
615
+ 1.1554263830184937,
616
+ 1.090998888015747,
617
+ 1.0798345804214478,
618
+ 1.0821243524551392,
619
+ 1.0772966146469116,
620
+ 1.077260136604309,
621
+ 1.0835458040237427,
622
+ 1.079797387123108,
623
+ 1.0777150392532349,
624
+ 1.0810126066207886,
625
+ 1.0769461393356323,
626
+ 1.0705296993255615,
627
+ 1.0520821809768677,
628
+ 0.9844305515289307,
629
+ 0.8652377128601074,
630
+ 0.7227235436439514,
631
+ 0.5906375050544739,
632
+ 0.4413999617099762,
633
+ 0.3452411890029907,
634
+ 0.29571032524108887,
635
+ 0.23282186686992645,
636
+ 0.14773382246494293,
637
+ 0.13641048967838287,
638
+ 0.08999554067850113,
639
+ 0.11419766396284103,
640
+ 0.08703526109457016,
641
+ 0.07698491960763931,
642
+ 0.07054328173398972,
643
+ 0.08026841282844543,
644
+ 0.07668456435203552,
645
+ 0.10232362896203995,
646
+ 0.07094237953424454,
647
+ 0.07218757271766663,
648
+ 0.07528065890073776,
649
+ 0.08964914083480835,
650
+ 0.09180616587400436,
651
+ 0.0713563784956932,
652
+ 0.08065492659807205,
653
+ 0.05928153917193413,
654
+ 0.09654293209314346,
655
+ 0.18235540390014648,
656
+ 0.08406329900026321,
657
+ 0.04552678391337395,
658
+ 0.046885158866643906,
659
+ 0.042871445417404175,
660
+ 0.039759550243616104,
661
+ 0.029482612386345863,
662
+ 0.02260918915271759,
663
+ 0.029610687866806984,
664
+ 0.002320162719115615,
665
+ 0.001828500535339117,
666
+ 0.0209819208830595,
667
+ 0.028438260778784752,
668
+ 0.0035261947195976973,
669
+ 0.0012173553695902228,
670
+ 0.0015700069488957524,
671
+ 0.013621640391647816,
672
+ 0.0018642926588654518,
673
+ 0.00222670822404325,
674
+ 0.007209902163594961,
675
+ 0.002658851444721222,
676
+ 0.0030335213523358107,
677
+ 0.02170608937740326,
678
+ 0.001575362985022366,
679
+ 0.006342296022921801,
680
+ 0.004211599472910166,
681
+ 0.0006940102321095765,
682
+ 0.0012646445538848639,
683
+ 0.0021566555369645357,
684
+ 0.0002222745679318905,
685
+ 0.0015051417285576463,
686
+ 0.0026516858488321304,
687
+ 0.0001819822209654376,
688
+ 0.00018783057748805732,
689
+ 0.0017949346220120788,
690
+ 0.001176174613647163,
691
+ 0.00023223921016324311,
692
+ 0.0003606785030569881,
693
+ 0.001950734294950962,
694
+ 0.0009348828461952507,
695
+ 0.0011603906750679016,
696
+ 0.0030520979780703783,
697
+ 0.0013620670652016997,
698
+ 0.0009282492683269083,
699
+ 0.00015765779244247824,
700
+ 0.00037819836870767176,
701
+ 9.389529441250488e-05,
702
+ 0.0002264125068904832,
703
+ 0.0009022634476423264,
704
+ 0.0008199801668524742,
705
+ 0.005860028322786093,
706
+ 0.0004964592517353594,
707
+ 0.0003333363856654614,
708
+ 0.0016712062060832977,
709
+ 0.0011127572506666183,
710
+ 0.0008218744187615812,
711
+ 0.00011704187636496499,
712
+ 3.0456712920567952e-05,
713
+ 4.790263483300805e-05,
714
+ 0.0020059377420693636,
715
+ 0.00022615592752117664,
716
+ 0.0014674357371404767,
717
+ 0.0002781325893010944,
718
+ 4.343148248153739e-05,
719
+ 0.0027780078817158937,
720
+ 0.0004207564052194357,
721
+ 0.0005745245143771172,
722
+ 2.9878183340770192e-05,
723
+ 1.2017285371257458e-05,
724
+ 0.001584974699653685,
725
+ 3.3227872336283326e-05,
726
+ 9.690172009868547e-05,
727
+ 0.00015782308764755726,
728
+ 8.321733184857294e-05,
729
+ 1.623890238988679e-05,
730
+ 1.5769308447488584e-05,
731
+ 1.3366395251068752e-05,
732
+ 1.8728727809502743e-05,
733
+ 2.2210122551769018e-05,
734
+ 1.4690090210933704e-05,
735
+ 0.0001451492280466482,
736
+ 3.520072641549632e-05,
737
+ 0.0012909631477668881,
738
+ 0.0012377469101920724,
739
+ 1.613760832697153e-05,
740
+ 1.3538238818000536e-05,
741
+ 1.547469037177507e-05,
742
+ 0.00024891310022212565,
743
+ 1.7161219147965312e-05,
744
+ 0.003930000588297844,
745
+ 1.9172684915247373e-05,
746
+ 3.660873699118383e-05,
747
+ 1.2629378943529446e-05,
748
+ 0.00029429898131638765,
749
+ 3.262208338128403e-05,
750
+ 6.562576891155913e-05,
751
+ 0.0014089597389101982,
752
+ 0.0005585301551036537,
753
+ 3.131064659100957e-05,
754
+ 3.751121766981669e-05,
755
+ 0.0006812455249018967,
756
+ 4.927303234580904e-05,
757
+ 0.0008097690879367292,
758
+ 3.0214263460948132e-05,
759
+ 0.0005298063042573631,
760
+ 1.2007123586954549e-05,
761
+ 1.4416099475056399e-05
762
+ ],
763
+ "zipf_loss": [
764
+ 11.343443870544434,
765
+ 8.623438835144043,
766
+ 5.5175065994262695,
767
+ 3.58101749420166,
768
+ 3.043145179748535,
769
+ 2.958972215652466,
770
+ 2.934878349304199,
771
+ 2.9263434410095215,
772
+ 2.921945571899414,
773
+ 2.9205527305603027,
774
+ 2.917241334915161,
775
+ 2.9175031185150146,
776
+ 2.9187698364257812,
777
+ 2.922427177429199,
778
+ 2.9262380599975586,
779
+ 2.928584098815918,
780
+ 2.9285175800323486,
781
+ 2.9081363677978516,
782
+ 2.8533177375793457,
783
+ 2.762913465499878,
784
+ 2.5876712799072266,
785
+ 2.5389773845672607,
786
+ 2.419322967529297,
787
+ 2.342971086502075,
788
+ 2.2216851711273193,
789
+ 2.2107889652252197,
790
+ 2.0577499866485596,
791
+ 1.9456571340560913,
792
+ 1.819041132926941,
793
+ 1.7496004104614258,
794
+ 1.6280220746994019,
795
+ 1.4586189985275269,
796
+ 1.4156665802001953,
797
+ 1.4115968942642212,
798
+ 1.2819616794586182,
799
+ 1.0680509805679321,
800
+ 1.0114446878433228,
801
+ 0.7128306031227112,
802
+ 0.5181163549423218,
803
+ 0.37078648805618286,
804
+ 0.38750314712524414,
805
+ 0.3569878935813904,
806
+ 0.37558573484420776,
807
+ 0.3385998010635376,
808
+ 0.3420988917350769,
809
+ 0.3044321537017822,
810
+ 0.3061820864677429,
811
+ 0.1459338665008545,
812
+ 0.0026568169705569744,
813
+ 0.001584875164553523,
814
+ 0.0018690116703510284,
815
+ 0.0023432141169905663,
816
+ 0.0024605486541986465,
817
+ 0.010550200007855892,
818
+ 0.0010879351757466793,
819
+ 0.0022694002836942673,
820
+ 0.0022164294496178627,
821
+ 0.002312395256012678,
822
+ 0.001371507067233324,
823
+ 0.0031287604942917824,
824
+ 0.0020987577736377716,
825
+ 0.002605732064694166,
826
+ 0.0010573845356702805,
827
+ 0.002014062600210309,
828
+ 0.0027697691693902016,
829
+ 0.00201174383983016,
830
+ 0.0030402797274291515,
831
+ 0.005490951240062714,
832
+ 0.0063611241057515144,
833
+ 0.0038862451910972595,
834
+ 0.004755761474370956,
835
+ 0.0031097866594791412,
836
+ 0.0018975045531988144,
837
+ 0.0010394295677542686,
838
+ 0.001056477427482605,
839
+ 0.0016529643908143044,
840
+ 0.0008801072835922241,
841
+ 0.0007711621001362801,
842
+ 0.005180887877941132,
843
+ 0.001033412292599678,
844
+ 0.0009926604107022285,
845
+ 0.0016130937729030848,
846
+ 0.0005256567383185029,
847
+ 0.0009253760799765587,
848
+ 0.0016110949218273163,
849
+ 0.0007495111785829067,
850
+ 0.0017509274184703827,
851
+ 0.0007405146025121212,
852
+ 0.0006303912959992886,
853
+ 0.0010777697898447514,
854
+ 0.0012080390006303787,
855
+ 0.0007823850028216839,
856
+ 0.0014580467250198126,
857
+ 0.001149333082139492,
858
+ 0.0015753833577036858,
859
+ 0.001368482131510973,
860
+ 0.0008566654287278652,
861
+ 0.0006730356253683567,
862
+ 0.0009448234923183918,
863
+ 0.0015641096979379654,
864
+ 0.0027974294498562813,
865
+ 0.0016365437768399715,
866
+ 0.0005870126187801361,
867
+ 0.0009002229198813438,
868
+ 0.0015489952638745308,
869
+ 0.002373320981860161,
870
+ 0.001942479982972145,
871
+ 0.0009975298307836056,
872
+ 0.0019706999883055687,
873
+ 0.001106026116758585,
874
+ 0.000855783000588417,
875
+ 0.0007753064855933189,
876
+ 0.0009126532822847366,
877
+ 0.0017373235896229744,
878
+ 0.002837461419403553,
879
+ 0.0010257791727781296,
880
+ 0.0016647428274154663,
881
+ 0.0013194996863603592,
882
+ 0.000459850300103426,
883
+ 0.0010339580476284027,
884
+ 0.0011501526460051537,
885
+ 0.0010612215846776962,
886
+ 0.0012892335653305054,
887
+ 0.0006132214330136776,
888
+ 0.0008330005221068859,
889
+ 0.0013429541140794754,
890
+ 0.000822061556391418,
891
+ 0.0016405973583459854,
892
+ 0.003944283351302147,
893
+ 0.0014365538954734802,
894
+ 0.0037254802882671356,
895
+ 0.0008046384900808334,
896
+ 0.0009064888581633568,
897
+ 0.0026297150179743767,
898
+ 0.0011109109036624432,
899
+ 0.003055390901863575,
900
+ 0.0006440338911488652,
901
+ 0.0010835174471139908,
902
+ 0.0025467267259955406,
903
+ 0.0028843795880675316,
904
+ 0.0019010212272405624,
905
+ 0.0010009575635194778,
906
+ 0.0010475099552422762,
907
+ 0.0010492214933037758,
908
+ 0.001578494906425476,
909
+ 0.0011410098522901535,
910
+ 0.0010083629749715328,
911
+ 0.000520764384418726,
912
+ 0.0006296359933912754,
913
+ 0.0009154877043329179
914
+ ],
915
+ "denoise_loss": [],
916
+ "ortho_loss": [
917
+ 0.7514292597770691,
918
+ 0.44231683015823364,
919
+ 0.4044279158115387,
920
+ 0.31896743178367615,
921
+ 0.2748689353466034,
922
+ 0.17610643804073334,
923
+ 0.1378389149904251,
924
+ 0.10322701930999756,
925
+ 0.08539271354675293,
926
+ 0.09809615463018417,
927
+ 0.10976400226354599,
928
+ 0.1415826380252838,
929
+ 0.1451120674610138,
930
+ 0.17278920114040375,
931
+ 0.22727791965007782,
932
+ 0.25248804688453674,
933
+ 0.255528062582016,
934
+ 0.27296891808509827,
935
+ 0.3045079708099365,
936
+ 0.317409485578537,
937
+ 0.33449694514274597,
938
+ 0.3505777418613434,
939
+ 0.3575138747692108,
940
+ 0.35384806990623474,
941
+ 0.3518454134464264,
942
+ 0.3580361008644104,
943
+ 0.3517009913921356,
944
+ 0.33349522948265076,
945
+ 0.33289653062820435,
946
+ 0.31936779618263245,
947
+ 0.3168710172176361,
948
+ 0.3141125738620758,
949
+ 0.30932894349098206,
950
+ 0.3887239098548889,
951
+ 0.3971191942691803,
952
+ 0.41927996277809143,
953
+ 0.4253721833229065,
954
+ 0.44365379214286804,
955
+ 0.44678258895874023,
956
+ 0.4558866620063782,
957
+ 0.4441855549812317,
958
+ 0.4194315969944,
959
+ 0.4099842607975006,
960
+ 0.4103780686855316,
961
+ 0.4245625138282776,
962
+ 0.4197509288787842,
963
+ 0.42175063490867615,
964
+ 0.411930650472641,
965
+ 0.41106978058815,
966
+ 0.4170016944408417,
967
+ 0.41532015800476074,
968
+ 0.41429853439331055,
969
+ 0.41899004578590393,
970
+ 0.42344552278518677,
971
+ 0.4204721450805664,
972
+ 0.42217817902565,
973
+ 0.4267289340496063,
974
+ 0.4268219470977783,
975
+ 0.42429524660110474,
976
+ 0.4305848181247711,
977
+ 0.4328194260597229,
978
+ 0.431699275970459,
979
+ 0.4296744465827942,
980
+ 0.4308277666568756,
981
+ 0.4295274317264557,
982
+ 0.4337575137615204,
983
+ 0.4304613173007965,
984
+ 0.43323755264282227,
985
+ 0.42645522952079773,
986
+ 0.42617306113243103,
987
+ 0.4247611463069916,
988
+ 0.42557570338249207,
989
+ 0.427293986082077,
990
+ 0.4365611672401428,
991
+ 0.4403775632381439,
992
+ 0.4325670897960663,
993
+ 0.4502088725566864,
994
+ 0.45125389099121094,
995
+ 0.4531281590461731,
996
+ 0.45476648211479187,
997
+ 0.4559301435947418,
998
+ 0.45802441239356995,
999
+ 0.44424352049827576,
1000
+ 0.44277673959732056,
1001
+ 0.44663745164871216,
1002
+ 0.45184335112571716,
1003
+ 0.44507738947868347,
1004
+ 0.4511640965938568,
1005
+ 0.4508034288883209,
1006
+ 0.4560854136943817,
1007
+ 0.4548608362674713,
1008
+ 0.4576341211795807,
1009
+ 0.4547962248325348,
1010
+ 0.4557437598705292,
1011
+ 0.42322593927383423,
1012
+ 0.3989056348800659,
1013
+ 0.40023231506347656,
1014
+ 0.4040592610836029,
1015
+ 0.40395256876945496,
1016
+ 0.4029274880886078,
1017
+ 0.40079614520072937,
1018
+ 0.3979244828224182,
1019
+ 0.3972989022731781,
1020
+ 0.4015220105648041,
1021
+ 0.3990614116191864,
1022
+ 0.39628490805625916,
1023
+ 0.395735502243042,
1024
+ 0.39462000131607056,
1025
+ 0.3914993703365326,
1026
+ 0.3908904492855072,
1027
+ 0.388724148273468,
1028
+ 0.38905712962150574,
1029
+ 0.3822788596153259,
1030
+ 0.38399258255958557,
1031
+ 0.38412877917289734,
1032
+ 0.3843962252140045,
1033
+ 0.38461238145828247,
1034
+ 0.3833766579627991,
1035
+ 0.38389861583709717,
1036
+ 0.3849593698978424,
1037
+ 0.3849909007549286,
1038
+ 0.386178582906723,
1039
+ 0.3854115903377533,
1040
+ 0.3844901919364929,
1041
+ 0.3846789300441742,
1042
+ 0.38446399569511414,
1043
+ 0.38404810428619385,
1044
+ 0.3841705024242401,
1045
+ 0.38369980454444885,
1046
+ 0.38355860114097595,
1047
+ 0.38261696696281433,
1048
+ 0.3843592703342438,
1049
+ 0.3838559091091156,
1050
+ 0.38580289483070374,
1051
+ 0.3865111172199249,
1052
+ 0.3902479410171509,
1053
+ 0.39057838916778564,
1054
+ 0.390543133020401,
1055
+ 0.39057406783103943,
1056
+ 0.3909136652946472,
1057
+ 0.39078858494758606,
1058
+ 0.39051759243011475,
1059
+ 0.3902330994606018,
1060
+ 0.3898555338382721,
1061
+ 0.389696329832077,
1062
+ 0.3901805877685547,
1063
+ 0.390118807554245,
1064
+ 0.38976243138313293,
1065
+ 0.38940849900245667,
1066
+ 0.38956424593925476
1067
+ ],
1068
+ "lr": [
1069
+ 8.376068376068378e-06,
1070
+ 1.6923076923076924e-05,
1071
+ 2.5470085470085475e-05,
1072
+ 3.401709401709402e-05,
1073
+ 4e-05,
1074
+ 4e-05,
1075
+ 4e-05,
1076
+ 4e-05,
1077
+ 4e-05,
1078
+ 4e-05,
1079
+ 4e-05,
1080
+ 4e-05,
1081
+ 4e-05,
1082
+ 4e-05,
1083
+ 4e-05,
1084
+ 4e-05,
1085
+ 4e-05,
1086
+ 4e-05,
1087
+ 4e-05,
1088
+ 4e-05,
1089
+ 4e-05,
1090
+ 4e-05,
1091
+ 4e-05,
1092
+ 4e-05,
1093
+ 4e-05,
1094
+ 4e-05,
1095
+ 4e-05,
1096
+ 4e-05,
1097
+ 4e-05,
1098
+ 4e-05,
1099
+ 4e-05,
1100
+ 4e-05,
1101
+ 4e-05,
1102
+ 4e-05,
1103
+ 4e-05,
1104
+ 4e-05,
1105
+ 4e-05,
1106
+ 4e-05,
1107
+ 4e-05,
1108
+ 4e-05,
1109
+ 4e-05,
1110
+ 4e-05,
1111
+ 4e-05,
1112
+ 4e-05,
1113
+ 4e-05,
1114
+ 4e-05,
1115
+ 4e-05,
1116
+ 4e-05,
1117
+ 4e-05,
1118
+ 4e-05,
1119
+ 4e-05,
1120
+ 4e-05,
1121
+ 4e-05,
1122
+ 4e-05,
1123
+ 4e-05,
1124
+ 4e-05,
1125
+ 4e-05,
1126
+ 4e-05,
1127
+ 4e-05,
1128
+ 4e-05,
1129
+ 4e-05,
1130
+ 4e-05,
1131
+ 4e-05,
1132
+ 4e-05,
1133
+ 4e-05,
1134
+ 4e-05,
1135
+ 4e-05,
1136
+ 4e-05,
1137
+ 4e-05,
1138
+ 4e-05,
1139
+ 4e-05,
1140
+ 4e-05,
1141
+ 4e-05,
1142
+ 4e-05,
1143
+ 4e-05,
1144
+ 4e-05,
1145
+ 4e-05,
1146
+ 4e-05,
1147
+ 4e-05,
1148
+ 4e-05,
1149
+ 4e-05,
1150
+ 4e-05,
1151
+ 4e-05,
1152
+ 4e-05,
1153
+ 4e-05,
1154
+ 4e-05,
1155
+ 4e-05,
1156
+ 4e-05,
1157
+ 4e-05,
1158
+ 4e-05,
1159
+ 4e-05,
1160
+ 3.993593461639863e-05,
1161
+ 3.934273662008964e-05,
1162
+ 3.874953862378065e-05,
1163
+ 3.815634062747166e-05,
1164
+ 3.7563142631162665e-05,
1165
+ 3.696994463485368e-05,
1166
+ 3.637674663854469e-05,
1167
+ 3.5783548642235693e-05,
1168
+ 3.519035064592671e-05,
1169
+ 3.459715264961771e-05,
1170
+ 3.400395465330873e-05,
1171
+ 3.341075665699973e-05,
1172
+ 3.281755866069075e-05,
1173
+ 3.2224360664381764e-05,
1174
+ 3.1251515950435014e-05,
1175
+ 3.065831795412602e-05,
1176
+ 3.0065119957817037e-05,
1177
+ 2.947192196150804e-05,
1178
+ 2.887872396519905e-05,
1179
+ 2.8285525968890065e-05,
1180
+ 2.769232797258107e-05,
1181
+ 2.709912997627208e-05,
1182
+ 2.6505931979963087e-05,
1183
+ 2.5912733983654104e-05,
1184
+ 2.5319535987345117e-05,
1185
+ 2.472633799103612e-05,
1186
+ 2.4133139994727132e-05,
1187
+ 2.3539941998418135e-05,
1188
+ 2.2946744002109148e-05,
1189
+ 2.1973899288162408e-05,
1190
+ 2.138070129185341e-05,
1191
+ 2.0787503295544424e-05,
1192
+ 2.019430529923544e-05,
1193
+ 1.9601107302926443e-05,
1194
+ 1.900790930661746e-05,
1195
+ 1.8414711310308462e-05,
1196
+ 1.7821513313999475e-05,
1197
+ 1.7228315317690488e-05,
1198
+ 1.663511732138149e-05,
1199
+ 1.6041919325072507e-05,
1200
+ 1.544872132876351e-05,
1201
+ 1.4855523332454524e-05,
1202
+ 1.4262325336145537e-05,
1203
+ 1.366912733983654e-05,
1204
+ 1.2696282625889797e-05,
1205
+ 1.2103084629580812e-05,
1206
+ 1.1509886633271816e-05,
1207
+ 1.0916688636962829e-05,
1208
+ 1.0323490640653833e-05,
1209
+ 9.730292644344846e-06,
1210
+ 9.137094648035861e-06,
1211
+ 8.543896651726864e-06,
1212
+ 7.950698655417878e-06,
1213
+ 7.357500659108881e-06,
1214
+ 6.764302662799895e-06,
1215
+ 6.171104666490909e-06,
1216
+ 5.577906670181913e-06,
1217
+ 4.984708673872927e-06,
1218
+ 4.391510677563931e-06
1219
+ ],
1220
+ "emb_lr": [],
1221
+ "eval_step": [
1222
+ 750,
1223
+ 1532,
1224
+ 2314,
1225
+ 3096,
1226
+ 3878,
1227
+ 4660,
1228
+ 5442,
1229
+ 6224,
1230
+ 7006,
1231
+ 7788
1232
+ ],
1233
+ "eval_accuracy": [
1234
+ 0.0,
1235
+ 0.0,
1236
+ 0.0,
1237
+ 0.0,
1238
+ 0.0,
1239
+ 0.0,
1240
+ 0.0,
1241
+ 0.0,
1242
+ 0.0,
1243
+ 0.0
1244
+ ]
1245
+ },
1246
+ "final_accuracy": 1.0,
1247
+ "sft_eval": {
1248
+ "config": {
1249
+ "ops": "add_sub",
1250
+ "K": null,
1251
+ "mode": "sft",
1252
+ "n_digits": 6,
1253
+ "n_per_split": 100
1254
+ },
1255
+ "splits": {
1256
+ "add_S0": {
1257
+ "full_accuracy": 1.0,
1258
+ "n_examples": 100,
1259
+ "per_subtask": {
1260
+ "SA": {
1261
+ "accuracy": 1.0,
1262
+ "count": 605
1263
+ },
1264
+ "SS": {
1265
+ "accuracy": 1.0,
1266
+ "count": 95
1267
+ }
1268
+ }
1269
+ },
1270
+ "add_S1": {
1271
+ "full_accuracy": 1.0,
1272
+ "n_examples": 100,
1273
+ "per_subtask": {
1274
+ "SA": {
1275
+ "accuracy": 1.0,
1276
+ "count": 204
1277
+ },
1278
+ "SC": {
1279
+ "accuracy": 1.0,
1280
+ "count": 169
1281
+ },
1282
+ "SS": {
1283
+ "accuracy": 1.0,
1284
+ "count": 31
1285
+ },
1286
+ "UC": {
1287
+ "accuracy": 1.0,
1288
+ "count": 296
1289
+ }
1290
+ }
1291
+ },
1292
+ "add_S2": {
1293
+ "full_accuracy": 1.0,
1294
+ "n_examples": 100,
1295
+ "per_subtask": {
1296
+ "SA": {
1297
+ "accuracy": 1.0,
1298
+ "count": 163
1299
+ },
1300
+ "SC": {
1301
+ "accuracy": 1.0,
1302
+ "count": 130
1303
+ },
1304
+ "SS": {
1305
+ "accuracy": 1.0,
1306
+ "count": 87
1307
+ },
1308
+ "UC": {
1309
+ "accuracy": 1.0,
1310
+ "count": 203
1311
+ },
1312
+ "US": {
1313
+ "accuracy": 1.0,
1314
+ "count": 117
1315
+ }
1316
+ }
1317
+ },
1318
+ "add_S3": {
1319
+ "full_accuracy": 1.0,
1320
+ "n_examples": 100,
1321
+ "per_subtask": {
1322
+ "SA": {
1323
+ "accuracy": 1.0,
1324
+ "count": 121
1325
+ },
1326
+ "SC": {
1327
+ "accuracy": 1.0,
1328
+ "count": 121
1329
+ },
1330
+ "SS": {
1331
+ "accuracy": 1.0,
1332
+ "count": 49
1333
+ },
1334
+ "UC": {
1335
+ "accuracy": 1.0,
1336
+ "count": 186
1337
+ },
1338
+ "US": {
1339
+ "accuracy": 1.0,
1340
+ "count": 223
1341
+ }
1342
+ }
1343
+ },
1344
+ "add_S4": {
1345
+ "full_accuracy": 1.0,
1346
+ "n_examples": 100,
1347
+ "per_subtask": {
1348
+ "SA": {
1349
+ "accuracy": 1.0,
1350
+ "count": 104
1351
+ },
1352
+ "SC": {
1353
+ "accuracy": 1.0,
1354
+ "count": 106
1355
+ },
1356
+ "SS": {
1357
+ "accuracy": 1.0,
1358
+ "count": 23
1359
+ },
1360
+ "UC": {
1361
+ "accuracy": 1.0,
1362
+ "count": 160
1363
+ },
1364
+ "US": {
1365
+ "accuracy": 1.0,
1366
+ "count": 307
1367
+ }
1368
+ }
1369
+ },
1370
+ "add_S5": {
1371
+ "full_accuracy": 1.0,
1372
+ "n_examples": 100,
1373
+ "per_subtask": {
1374
+ "SA": {
1375
+ "accuracy": 1.0,
1376
+ "count": 100
1377
+ },
1378
+ "SC": {
1379
+ "accuracy": 1.0,
1380
+ "count": 100
1381
+ },
1382
+ "UC": {
1383
+ "accuracy": 1.0,
1384
+ "count": 100
1385
+ },
1386
+ "US": {
1387
+ "accuracy": 1.0,
1388
+ "count": 400
1389
+ }
1390
+ }
1391
+ },
1392
+ "add_S6": {
1393
+ "full_accuracy": 1.0,
1394
+ "n_examples": 100,
1395
+ "per_subtask": {
1396
+ "SC": {
1397
+ "accuracy": 1.0,
1398
+ "count": 100
1399
+ },
1400
+ "UC": {
1401
+ "accuracy": 1.0,
1402
+ "count": 100
1403
+ },
1404
+ "US": {
1405
+ "accuracy": 1.0,
1406
+ "count": 500
1407
+ }
1408
+ }
1409
+ },
1410
+ "add_random": {
1411
+ "full_accuracy": 1.0,
1412
+ "n_examples": 200,
1413
+ "per_subtask": {
1414
+ "SA": {
1415
+ "accuracy": 1.0,
1416
+ "count": 447
1417
+ },
1418
+ "SC": {
1419
+ "accuracy": 1.0,
1420
+ "count": 320
1421
+ },
1422
+ "SS": {
1423
+ "accuracy": 1.0,
1424
+ "count": 56
1425
+ },
1426
+ "UC": {
1427
+ "accuracy": 1.0,
1428
+ "count": 529
1429
+ },
1430
+ "US": {
1431
+ "accuracy": 1.0,
1432
+ "count": 48
1433
+ }
1434
+ }
1435
+ },
1436
+ "add_C3": {
1437
+ "full_accuracy": 1.0,
1438
+ "n_examples": 100,
1439
+ "per_subtask": {
1440
+ "SA": {
1441
+ "accuracy": 1.0,
1442
+ "count": 300
1443
+ },
1444
+ "SC": {
1445
+ "accuracy": 1.0,
1446
+ "count": 100
1447
+ },
1448
+ "UC": {
1449
+ "accuracy": 1.0,
1450
+ "count": 193
1451
+ },
1452
+ "US": {
1453
+ "accuracy": 1.0,
1454
+ "count": 107
1455
+ }
1456
+ }
1457
+ },
1458
+ "add_C4": {
1459
+ "full_accuracy": 1.0,
1460
+ "n_examples": 100,
1461
+ "per_subtask": {
1462
+ "SA": {
1463
+ "accuracy": 1.0,
1464
+ "count": 200
1465
+ },
1466
+ "SC": {
1467
+ "accuracy": 1.0,
1468
+ "count": 100
1469
+ },
1470
+ "UC": {
1471
+ "accuracy": 1.0,
1472
+ "count": 256
1473
+ },
1474
+ "US": {
1475
+ "accuracy": 1.0,
1476
+ "count": 144
1477
+ }
1478
+ }
1479
+ },
1480
+ "add_C5": {
1481
+ "full_accuracy": 1.0,
1482
+ "n_examples": 100,
1483
+ "per_subtask": {
1484
+ "SA": {
1485
+ "accuracy": 1.0,
1486
+ "count": 100
1487
+ },
1488
+ "SC": {
1489
+ "accuracy": 1.0,
1490
+ "count": 100
1491
+ },
1492
+ "UC": {
1493
+ "accuracy": 1.0,
1494
+ "count": 306
1495
+ },
1496
+ "US": {
1497
+ "accuracy": 1.0,
1498
+ "count": 194
1499
+ }
1500
+ }
1501
+ },
1502
+ "add_C6": {
1503
+ "full_accuracy": 1.0,
1504
+ "n_examples": 100,
1505
+ "per_subtask": {
1506
+ "SC": {
1507
+ "accuracy": 1.0,
1508
+ "count": 100
1509
+ },
1510
+ "UC": {
1511
+ "accuracy": 1.0,
1512
+ "count": 366
1513
+ },
1514
+ "US": {
1515
+ "accuracy": 1.0,
1516
+ "count": 234
1517
+ }
1518
+ }
1519
+ },
1520
+ "sub_M0": {
1521
+ "full_accuracy": 1.0,
1522
+ "n_examples": 100,
1523
+ "per_subtask": {
1524
+ "MD": {
1525
+ "accuracy": 1.0,
1526
+ "count": 601
1527
+ },
1528
+ "ME": {
1529
+ "accuracy": 1.0,
1530
+ "count": 99
1531
+ }
1532
+ }
1533
+ },
1534
+ "sub_M1": {
1535
+ "full_accuracy": 1.0,
1536
+ "n_examples": 100,
1537
+ "per_subtask": {
1538
+ "MD": {
1539
+ "accuracy": 1.0,
1540
+ "count": 279
1541
+ },
1542
+ "MB": {
1543
+ "accuracy": 1.0,
1544
+ "count": 145
1545
+ },
1546
+ "ME": {
1547
+ "accuracy": 1.0,
1548
+ "count": 24
1549
+ },
1550
+ "UB": {
1551
+ "accuracy": 1.0,
1552
+ "count": 252
1553
+ }
1554
+ }
1555
+ },
1556
+ "sub_M2": {
1557
+ "full_accuracy": 1.0,
1558
+ "n_examples": 100,
1559
+ "per_subtask": {
1560
+ "MD": {
1561
+ "accuracy": 1.0,
1562
+ "count": 213
1563
+ },
1564
+ "MB": {
1565
+ "accuracy": 1.0,
1566
+ "count": 113
1567
+ },
1568
+ "ME": {
1569
+ "accuracy": 1.0,
1570
+ "count": 85
1571
+ },
1572
+ "UB": {
1573
+ "accuracy": 1.0,
1574
+ "count": 181
1575
+ },
1576
+ "UD": {
1577
+ "accuracy": 1.0,
1578
+ "count": 108
1579
+ }
1580
+ }
1581
+ },
1582
+ "sub_M3": {
1583
+ "full_accuracy": 1.0,
1584
+ "n_examples": 100,
1585
+ "per_subtask": {
1586
+ "MD": {
1587
+ "accuracy": 1.0,
1588
+ "count": 179
1589
+ },
1590
+ "MB": {
1591
+ "accuracy": 1.0,
1592
+ "count": 103
1593
+ },
1594
+ "ME": {
1595
+ "accuracy": 1.0,
1596
+ "count": 56
1597
+ },
1598
+ "UB": {
1599
+ "accuracy": 1.0,
1600
+ "count": 149
1601
+ },
1602
+ "UD": {
1603
+ "accuracy": 1.0,
1604
+ "count": 213
1605
+ }
1606
+ }
1607
+ },
1608
+ "sub_M4": {
1609
+ "full_accuracy": 1.0,
1610
+ "n_examples": 100,
1611
+ "per_subtask": {
1612
+ "MD": {
1613
+ "accuracy": 1.0,
1614
+ "count": 200
1615
+ },
1616
+ "MB": {
1617
+ "accuracy": 1.0,
1618
+ "count": 100
1619
+ },
1620
+ "UB": {
1621
+ "accuracy": 1.0,
1622
+ "count": 100
1623
+ },
1624
+ "UD": {
1625
+ "accuracy": 1.0,
1626
+ "count": 300
1627
+ }
1628
+ }
1629
+ },
1630
+ "sub_M5": {
1631
+ "full_accuracy": 1.0,
1632
+ "n_examples": 100,
1633
+ "per_subtask": {
1634
+ "MD": {
1635
+ "accuracy": 1.0,
1636
+ "count": 100
1637
+ },
1638
+ "MB": {
1639
+ "accuracy": 1.0,
1640
+ "count": 100
1641
+ },
1642
+ "UB": {
1643
+ "accuracy": 1.0,
1644
+ "count": 100
1645
+ },
1646
+ "UD": {
1647
+ "accuracy": 1.0,
1648
+ "count": 400
1649
+ }
1650
+ }
1651
+ },
1652
+ "sub_random": {
1653
+ "full_accuracy": 1.0,
1654
+ "n_examples": 200,
1655
+ "per_subtask": {
1656
+ "MD": {
1657
+ "accuracy": 1.0,
1658
+ "count": 600
1659
+ },
1660
+ "MB": {
1661
+ "accuracy": 1.0,
1662
+ "count": 267
1663
+ },
1664
+ "ME": {
1665
+ "accuracy": 1.0,
1666
+ "count": 53
1667
+ },
1668
+ "UB": {
1669
+ "accuracy": 1.0,
1670
+ "count": 439
1671
+ },
1672
+ "UD": {
1673
+ "accuracy": 1.0,
1674
+ "count": 41
1675
+ }
1676
+ }
1677
+ },
1678
+ "sub_B3": {
1679
+ "full_accuracy": 1.0,
1680
+ "n_examples": 100,
1681
+ "per_subtask": {
1682
+ "MD": {
1683
+ "accuracy": 1.0,
1684
+ "count": 300
1685
+ },
1686
+ "MB": {
1687
+ "accuracy": 1.0,
1688
+ "count": 100
1689
+ },
1690
+ "UB": {
1691
+ "accuracy": 1.0,
1692
+ "count": 197
1693
+ },
1694
+ "UD": {
1695
+ "accuracy": 1.0,
1696
+ "count": 103
1697
+ }
1698
+ }
1699
+ },
1700
+ "sub_B4": {
1701
+ "full_accuracy": 1.0,
1702
+ "n_examples": 100,
1703
+ "per_subtask": {
1704
+ "MD": {
1705
+ "accuracy": 1.0,
1706
+ "count": 200
1707
+ },
1708
+ "MB": {
1709
+ "accuracy": 1.0,
1710
+ "count": 100
1711
+ },
1712
+ "UB": {
1713
+ "accuracy": 1.0,
1714
+ "count": 247
1715
+ },
1716
+ "UD": {
1717
+ "accuracy": 1.0,
1718
+ "count": 153
1719
+ }
1720
+ }
1721
+ },
1722
+ "sub_B5": {
1723
+ "full_accuracy": 1.0,
1724
+ "n_examples": 100,
1725
+ "per_subtask": {
1726
+ "MD": {
1727
+ "accuracy": 1.0,
1728
+ "count": 100
1729
+ },
1730
+ "MB": {
1731
+ "accuracy": 1.0,
1732
+ "count": 100
1733
+ },
1734
+ "UB": {
1735
+ "accuracy": 1.0,
1736
+ "count": 298
1737
+ },
1738
+ "UD": {
1739
+ "accuracy": 1.0,
1740
+ "count": 202
1741
+ }
1742
+ }
1743
+ }
1744
+ },
1745
+ "summary": {
1746
+ "overall_accuracy": 1.0,
1747
+ "total_examples": 2400,
1748
+ "n_splits": 22
1749
+ }
1750
+ },
1751
+ "sorl_eval": {
1752
+ "config": {
1753
+ "ops": "add_sub",
1754
+ "K": 1,
1755
+ "mode": "sorl",
1756
+ "n_digits": 6,
1757
+ "n_per_split": 100
1758
+ },
1759
+ "splits": {
1760
+ "add_S0": {
1761
+ "full_accuracy": 1.0,
1762
+ "n_examples": 100,
1763
+ "per_subtask": {
1764
+ "SA": {
1765
+ "accuracy": 1.0,
1766
+ "count": 605
1767
+ },
1768
+ "SS": {
1769
+ "accuracy": 1.0,
1770
+ "count": 95
1771
+ }
1772
+ }
1773
+ },
1774
+ "add_S1": {
1775
+ "full_accuracy": 1.0,
1776
+ "n_examples": 100,
1777
+ "per_subtask": {
1778
+ "SA": {
1779
+ "accuracy": 1.0,
1780
+ "count": 204
1781
+ },
1782
+ "SC": {
1783
+ "accuracy": 1.0,
1784
+ "count": 169
1785
+ },
1786
+ "SS": {
1787
+ "accuracy": 1.0,
1788
+ "count": 31
1789
+ },
1790
+ "UC": {
1791
+ "accuracy": 1.0,
1792
+ "count": 296
1793
+ }
1794
+ }
1795
+ },
1796
+ "add_S2": {
1797
+ "full_accuracy": 1.0,
1798
+ "n_examples": 100,
1799
+ "per_subtask": {
1800
+ "SA": {
1801
+ "accuracy": 1.0,
1802
+ "count": 163
1803
+ },
1804
+ "SC": {
1805
+ "accuracy": 1.0,
1806
+ "count": 130
1807
+ },
1808
+ "SS": {
1809
+ "accuracy": 1.0,
1810
+ "count": 87
1811
+ },
1812
+ "UC": {
1813
+ "accuracy": 1.0,
1814
+ "count": 203
1815
+ },
1816
+ "US": {
1817
+ "accuracy": 1.0,
1818
+ "count": 117
1819
+ }
1820
+ }
1821
+ },
1822
+ "add_S3": {
1823
+ "full_accuracy": 1.0,
1824
+ "n_examples": 100,
1825
+ "per_subtask": {
1826
+ "SA": {
1827
+ "accuracy": 1.0,
1828
+ "count": 121
1829
+ },
1830
+ "SC": {
1831
+ "accuracy": 1.0,
1832
+ "count": 121
1833
+ },
1834
+ "SS": {
1835
+ "accuracy": 1.0,
1836
+ "count": 49
1837
+ },
1838
+ "UC": {
1839
+ "accuracy": 1.0,
1840
+ "count": 186
1841
+ },
1842
+ "US": {
1843
+ "accuracy": 1.0,
1844
+ "count": 223
1845
+ }
1846
+ }
1847
+ },
1848
+ "add_S4": {
1849
+ "full_accuracy": 1.0,
1850
+ "n_examples": 100,
1851
+ "per_subtask": {
1852
+ "SA": {
1853
+ "accuracy": 1.0,
1854
+ "count": 104
1855
+ },
1856
+ "SC": {
1857
+ "accuracy": 1.0,
1858
+ "count": 106
1859
+ },
1860
+ "SS": {
1861
+ "accuracy": 1.0,
1862
+ "count": 23
1863
+ },
1864
+ "UC": {
1865
+ "accuracy": 1.0,
1866
+ "count": 160
1867
+ },
1868
+ "US": {
1869
+ "accuracy": 1.0,
1870
+ "count": 307
1871
+ }
1872
+ }
1873
+ },
1874
+ "add_S5": {
1875
+ "full_accuracy": 1.0,
1876
+ "n_examples": 100,
1877
+ "per_subtask": {
1878
+ "SA": {
1879
+ "accuracy": 1.0,
1880
+ "count": 100
1881
+ },
1882
+ "SC": {
1883
+ "accuracy": 1.0,
1884
+ "count": 100
1885
+ },
1886
+ "UC": {
1887
+ "accuracy": 1.0,
1888
+ "count": 100
1889
+ },
1890
+ "US": {
1891
+ "accuracy": 1.0,
1892
+ "count": 400
1893
+ }
1894
+ }
1895
+ },
1896
+ "add_S6": {
1897
+ "full_accuracy": 1.0,
1898
+ "n_examples": 100,
1899
+ "per_subtask": {
1900
+ "SC": {
1901
+ "accuracy": 1.0,
1902
+ "count": 100
1903
+ },
1904
+ "UC": {
1905
+ "accuracy": 1.0,
1906
+ "count": 100
1907
+ },
1908
+ "US": {
1909
+ "accuracy": 1.0,
1910
+ "count": 500
1911
+ }
1912
+ }
1913
+ },
1914
+ "add_random": {
1915
+ "full_accuracy": 1.0,
1916
+ "n_examples": 200,
1917
+ "per_subtask": {
1918
+ "SA": {
1919
+ "accuracy": 1.0,
1920
+ "count": 447
1921
+ },
1922
+ "SC": {
1923
+ "accuracy": 1.0,
1924
+ "count": 320
1925
+ },
1926
+ "SS": {
1927
+ "accuracy": 1.0,
1928
+ "count": 56
1929
+ },
1930
+ "UC": {
1931
+ "accuracy": 1.0,
1932
+ "count": 529
1933
+ },
1934
+ "US": {
1935
+ "accuracy": 1.0,
1936
+ "count": 48
1937
+ }
1938
+ }
1939
+ },
1940
+ "add_C3": {
1941
+ "full_accuracy": 1.0,
1942
+ "n_examples": 100,
1943
+ "per_subtask": {
1944
+ "SA": {
1945
+ "accuracy": 1.0,
1946
+ "count": 300
1947
+ },
1948
+ "SC": {
1949
+ "accuracy": 1.0,
1950
+ "count": 100
1951
+ },
1952
+ "UC": {
1953
+ "accuracy": 1.0,
1954
+ "count": 193
1955
+ },
1956
+ "US": {
1957
+ "accuracy": 1.0,
1958
+ "count": 107
1959
+ }
1960
+ }
1961
+ },
1962
+ "add_C4": {
1963
+ "full_accuracy": 1.0,
1964
+ "n_examples": 100,
1965
+ "per_subtask": {
1966
+ "SA": {
1967
+ "accuracy": 1.0,
1968
+ "count": 200
1969
+ },
1970
+ "SC": {
1971
+ "accuracy": 1.0,
1972
+ "count": 100
1973
+ },
1974
+ "UC": {
1975
+ "accuracy": 1.0,
1976
+ "count": 256
1977
+ },
1978
+ "US": {
1979
+ "accuracy": 1.0,
1980
+ "count": 144
1981
+ }
1982
+ }
1983
+ },
1984
+ "add_C5": {
1985
+ "full_accuracy": 1.0,
1986
+ "n_examples": 100,
1987
+ "per_subtask": {
1988
+ "SA": {
1989
+ "accuracy": 1.0,
1990
+ "count": 100
1991
+ },
1992
+ "SC": {
1993
+ "accuracy": 1.0,
1994
+ "count": 100
1995
+ },
1996
+ "UC": {
1997
+ "accuracy": 1.0,
1998
+ "count": 306
1999
+ },
2000
+ "US": {
2001
+ "accuracy": 1.0,
2002
+ "count": 194
2003
+ }
2004
+ }
2005
+ },
2006
+ "add_C6": {
2007
+ "full_accuracy": 1.0,
2008
+ "n_examples": 100,
2009
+ "per_subtask": {
2010
+ "SC": {
2011
+ "accuracy": 1.0,
2012
+ "count": 100
2013
+ },
2014
+ "UC": {
2015
+ "accuracy": 1.0,
2016
+ "count": 366
2017
+ },
2018
+ "US": {
2019
+ "accuracy": 1.0,
2020
+ "count": 234
2021
+ }
2022
+ }
2023
+ },
2024
+ "sub_M0": {
2025
+ "full_accuracy": 1.0,
2026
+ "n_examples": 100,
2027
+ "per_subtask": {
2028
+ "MD": {
2029
+ "accuracy": 1.0,
2030
+ "count": 601
2031
+ },
2032
+ "ME": {
2033
+ "accuracy": 1.0,
2034
+ "count": 99
2035
+ }
2036
+ }
2037
+ },
2038
+ "sub_M1": {
2039
+ "full_accuracy": 1.0,
2040
+ "n_examples": 100,
2041
+ "per_subtask": {
2042
+ "MD": {
2043
+ "accuracy": 1.0,
2044
+ "count": 279
2045
+ },
2046
+ "MB": {
2047
+ "accuracy": 1.0,
2048
+ "count": 145
2049
+ },
2050
+ "ME": {
2051
+ "accuracy": 1.0,
2052
+ "count": 24
2053
+ },
2054
+ "UB": {
2055
+ "accuracy": 1.0,
2056
+ "count": 252
2057
+ }
2058
+ }
2059
+ },
2060
+ "sub_M2": {
2061
+ "full_accuracy": 1.0,
2062
+ "n_examples": 100,
2063
+ "per_subtask": {
2064
+ "MD": {
2065
+ "accuracy": 1.0,
2066
+ "count": 213
2067
+ },
2068
+ "MB": {
2069
+ "accuracy": 1.0,
2070
+ "count": 113
2071
+ },
2072
+ "ME": {
2073
+ "accuracy": 1.0,
2074
+ "count": 85
2075
+ },
2076
+ "UB": {
2077
+ "accuracy": 1.0,
2078
+ "count": 181
2079
+ },
2080
+ "UD": {
2081
+ "accuracy": 1.0,
2082
+ "count": 108
2083
+ }
2084
+ }
2085
+ },
2086
+ "sub_M3": {
2087
+ "full_accuracy": 1.0,
2088
+ "n_examples": 100,
2089
+ "per_subtask": {
2090
+ "MD": {
2091
+ "accuracy": 1.0,
2092
+ "count": 179
2093
+ },
2094
+ "MB": {
2095
+ "accuracy": 1.0,
2096
+ "count": 103
2097
+ },
2098
+ "ME": {
2099
+ "accuracy": 1.0,
2100
+ "count": 56
2101
+ },
2102
+ "UB": {
2103
+ "accuracy": 1.0,
2104
+ "count": 149
2105
+ },
2106
+ "UD": {
2107
+ "accuracy": 1.0,
2108
+ "count": 213
2109
+ }
2110
+ }
2111
+ },
2112
+ "sub_M4": {
2113
+ "full_accuracy": 1.0,
2114
+ "n_examples": 100,
2115
+ "per_subtask": {
2116
+ "MD": {
2117
+ "accuracy": 1.0,
2118
+ "count": 200
2119
+ },
2120
+ "MB": {
2121
+ "accuracy": 1.0,
2122
+ "count": 100
2123
+ },
2124
+ "UB": {
2125
+ "accuracy": 1.0,
2126
+ "count": 100
2127
+ },
2128
+ "UD": {
2129
+ "accuracy": 1.0,
2130
+ "count": 300
2131
+ }
2132
+ }
2133
+ },
2134
+ "sub_M5": {
2135
+ "full_accuracy": 1.0,
2136
+ "n_examples": 100,
2137
+ "per_subtask": {
2138
+ "MD": {
2139
+ "accuracy": 1.0,
2140
+ "count": 100
2141
+ },
2142
+ "MB": {
2143
+ "accuracy": 1.0,
2144
+ "count": 100
2145
+ },
2146
+ "UB": {
2147
+ "accuracy": 1.0,
2148
+ "count": 100
2149
+ },
2150
+ "UD": {
2151
+ "accuracy": 1.0,
2152
+ "count": 400
2153
+ }
2154
+ }
2155
+ },
2156
+ "sub_random": {
2157
+ "full_accuracy": 1.0,
2158
+ "n_examples": 200,
2159
+ "per_subtask": {
2160
+ "MD": {
2161
+ "accuracy": 1.0,
2162
+ "count": 600
2163
+ },
2164
+ "MB": {
2165
+ "accuracy": 1.0,
2166
+ "count": 267
2167
+ },
2168
+ "ME": {
2169
+ "accuracy": 1.0,
2170
+ "count": 53
2171
+ },
2172
+ "UB": {
2173
+ "accuracy": 1.0,
2174
+ "count": 439
2175
+ },
2176
+ "UD": {
2177
+ "accuracy": 1.0,
2178
+ "count": 41
2179
+ }
2180
+ }
2181
+ },
2182
+ "sub_B3": {
2183
+ "full_accuracy": 1.0,
2184
+ "n_examples": 100,
2185
+ "per_subtask": {
2186
+ "MD": {
2187
+ "accuracy": 1.0,
2188
+ "count": 300
2189
+ },
2190
+ "MB": {
2191
+ "accuracy": 1.0,
2192
+ "count": 100
2193
+ },
2194
+ "UB": {
2195
+ "accuracy": 1.0,
2196
+ "count": 197
2197
+ },
2198
+ "UD": {
2199
+ "accuracy": 1.0,
2200
+ "count": 103
2201
+ }
2202
+ }
2203
+ },
2204
+ "sub_B4": {
2205
+ "full_accuracy": 1.0,
2206
+ "n_examples": 100,
2207
+ "per_subtask": {
2208
+ "MD": {
2209
+ "accuracy": 1.0,
2210
+ "count": 200
2211
+ },
2212
+ "MB": {
2213
+ "accuracy": 1.0,
2214
+ "count": 100
2215
+ },
2216
+ "UB": {
2217
+ "accuracy": 1.0,
2218
+ "count": 247
2219
+ },
2220
+ "UD": {
2221
+ "accuracy": 1.0,
2222
+ "count": 153
2223
+ }
2224
+ }
2225
+ },
2226
+ "sub_B5": {
2227
+ "full_accuracy": 1.0,
2228
+ "n_examples": 100,
2229
+ "per_subtask": {
2230
+ "MD": {
2231
+ "accuracy": 1.0,
2232
+ "count": 100
2233
+ },
2234
+ "MB": {
2235
+ "accuracy": 1.0,
2236
+ "count": 100
2237
+ },
2238
+ "UB": {
2239
+ "accuracy": 1.0,
2240
+ "count": 298
2241
+ },
2242
+ "UD": {
2243
+ "accuracy": 1.0,
2244
+ "count": 202
2245
+ }
2246
+ }
2247
+ }
2248
+ },
2249
+ "summary": {
2250
+ "overall_accuracy": 1.0,
2251
+ "total_examples": 2400,
2252
+ "n_splits": 22
2253
+ }
2254
+ },
2255
+ "sorl_overall_accuracy": 1.0,
2256
+ "sft_overall_accuracy": 1.0
2257
+ }
add_sub_sorl_v1_abs5_K1_50K/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:320da49221baacc16c296960d0855142b5f676072ebecf1bd64b79536ba9ee50
3
+ size 650283250
add_sub_sorl_v1_abs5_K1_50K/train_config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_rollouts": 4,
3
+ "K": 1,
4
+ "max_iterations": 2,
5
+ "memory_span_abs": 1792,
6
+ "memory_span_traj": 1792,
7
+ "temperature": 1.0,
8
+ "ar_search": false,
9
+ "response_only_abs": false,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "alpha_ortho": 0.0,
14
+ "alpha_anchor": 0.0,
15
+ "alpha_jacobi": 0.0,
16
+ "decay": 0.8,
17
+ "target_vocab_util": 0.8,
18
+ "min_abs_ppl": 0.0,
19
+ "zipf_alpha": 1.0,
20
+ "lr": 4e-05,
21
+ "emb_lr_mult": 1.0,
22
+ "weight_decay": 0.01,
23
+ "warmup_steps": 234,
24
+ "cooldown_frac": 0.4,
25
+ "max_grad_norm": 1.0,
26
+ "vq_abs_pretrain_steps": 0,
27
+ "vq_abs_pretrain_lr": 0.001,
28
+ "vq_abs_pretrain_layer": -1,
29
+ "vq_abs_pretrain_batch_size": 256,
30
+ "vq_abs_pretrain_target_vectors": 20000,
31
+ "batch_size": 64,
32
+ "gradient_accumulation_steps": 1,
33
+ "num_epochs": 10,
34
+ "emb_warmup_steps": 0,
35
+ "log_every": 50,
36
+ "eval_every": 781,
37
+ "save_every": 999999,
38
+ "eval_samples": 100,
39
+ "output_dir": "ckpt/sweep/as_sorl_abs5_K1_50K",
40
+ "eval_K": 4,
41
+ "alpha_traj": 0.0,
42
+ "corrupt_method": "shuffle",
43
+ "corrupt_ratio": 0.3,
44
+ "alpha_contrastive": 1.0,
45
+ "gamma_contrastive": 0.5,
46
+ "alpha_masked_traj": 0.0,
47
+ "mask_nl_ratio": 0.3,
48
+ "mask_nl_mode": "fixed",
49
+ "mask_nl_fixed_id": 0,
50
+ "use_ste": true,
51
+ "n_inner": 1,
52
+ "random_K": null,
53
+ "strip_suffix": null,
54
+ "compress_prefix": null,
55
+ "random_mem_span": null,
56
+ "warmup_ratio": 0.03,
57
+ "beta2": 0.999,
58
+ "seed": 42,
59
+ "n_digits": 6,
60
+ "n_layer": 2,
61
+ "n_head": 3,
62
+ "n_embd": 510,
63
+ "ops": "add_sub",
64
+ "abs_vocab": 5,
65
+ "dataset_size": 50000,
66
+ "mode": "sorl",
67
+ "device": "cuda",
68
+ "push_to_hub": true,
69
+ "no_wandb": false,
70
+ "n_params": 162494162,
71
+ "run_name": "add_sub_sorl_v1_abs5_K1_50K",
72
+ "git_commit": "f447da529caceac8c7d256cbb2cd185cbc50feac",
73
+ "timestamp": "2026-04-12T10:05:47.875905+00:00",
74
+ "tokenizer": "Qwen/Qwen3-0.6B",
75
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
+ "dataset_config": "add_sub_6digit",
77
+ "model_repo": "thoughtworks/arithmetic-sorl",
78
+ "trainer_version": "v1",
79
+ "wandb_run_id": "iulz6nra",
80
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/iulz6nra",
81
+ "final_accuracy": 1.0,
82
+ "sft_accuracy": 1.0,
83
+ "eval_method": "ArithmeticEvaluator"
84
+ }