amirali1985 commited on
Commit
5f7d914
·
verified ·
1 Parent(s): 57ff541

Upload add_sub_sorl_v1_abs10_K1_50K_1L3H510d

Browse files
add_sub_sorl_v1_abs10_K1_50K_1L3H510d/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention"
17
+ ],
18
+ "max_position_embeddings": 128,
19
+ "max_window_layers": 28,
20
+ "model_type": "qwen3",
21
+ "num_attention_heads": 3,
22
+ "num_hidden_layers": 1,
23
+ "num_key_value_heads": 3,
24
+ "pad_token_id": null,
25
+ "rms_norm_eps": 1e-06,
26
+ "rope_parameters": {
27
+ "rope_theta": 10000.0,
28
+ "rope_type": "default"
29
+ },
30
+ "sliding_window": null,
31
+ "tie_word_embeddings": false,
32
+ "transformers_version": "5.5.0",
33
+ "use_cache": true,
34
+ "use_sliding_window": false,
35
+ "vocab_size": 151654
36
+ }
add_sub_sorl_v1_abs10_K1_50K_1L3H510d/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs10_K1_50K_1L3H510d/metrics.json ADDED
@@ -0,0 +1,2257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 200,
8
+ 250,
9
+ 300,
10
+ 350,
11
+ 400,
12
+ 450,
13
+ 500,
14
+ 550,
15
+ 600,
16
+ 650,
17
+ 700,
18
+ 750,
19
+ 832,
20
+ 882,
21
+ 932,
22
+ 982,
23
+ 1032,
24
+ 1082,
25
+ 1132,
26
+ 1182,
27
+ 1232,
28
+ 1282,
29
+ 1332,
30
+ 1382,
31
+ 1432,
32
+ 1482,
33
+ 1532,
34
+ 1614,
35
+ 1664,
36
+ 1714,
37
+ 1764,
38
+ 1814,
39
+ 1864,
40
+ 1914,
41
+ 1964,
42
+ 2014,
43
+ 2064,
44
+ 2114,
45
+ 2164,
46
+ 2214,
47
+ 2264,
48
+ 2314,
49
+ 2396,
50
+ 2446,
51
+ 2496,
52
+ 2546,
53
+ 2596,
54
+ 2646,
55
+ 2696,
56
+ 2746,
57
+ 2796,
58
+ 2846,
59
+ 2896,
60
+ 2946,
61
+ 2996,
62
+ 3046,
63
+ 3096,
64
+ 3178,
65
+ 3228,
66
+ 3278,
67
+ 3328,
68
+ 3378,
69
+ 3428,
70
+ 3478,
71
+ 3528,
72
+ 3578,
73
+ 3628,
74
+ 3678,
75
+ 3728,
76
+ 3778,
77
+ 3828,
78
+ 3878,
79
+ 3960,
80
+ 4010,
81
+ 4060,
82
+ 4110,
83
+ 4160,
84
+ 4210,
85
+ 4260,
86
+ 4310,
87
+ 4360,
88
+ 4410,
89
+ 4460,
90
+ 4510,
91
+ 4560,
92
+ 4610,
93
+ 4660,
94
+ 4742,
95
+ 4792,
96
+ 4842,
97
+ 4892,
98
+ 4942,
99
+ 4992,
100
+ 5042,
101
+ 5092,
102
+ 5142,
103
+ 5192,
104
+ 5242,
105
+ 5292,
106
+ 5342,
107
+ 5392,
108
+ 5442,
109
+ 5524,
110
+ 5574,
111
+ 5624,
112
+ 5674,
113
+ 5724,
114
+ 5774,
115
+ 5824,
116
+ 5874,
117
+ 5924,
118
+ 5974,
119
+ 6024,
120
+ 6074,
121
+ 6124,
122
+ 6174,
123
+ 6224,
124
+ 6306,
125
+ 6356,
126
+ 6406,
127
+ 6456,
128
+ 6506,
129
+ 6556,
130
+ 6606,
131
+ 6656,
132
+ 6706,
133
+ 6756,
134
+ 6806,
135
+ 6856,
136
+ 6906,
137
+ 6956,
138
+ 7006,
139
+ 7088,
140
+ 7138,
141
+ 7188,
142
+ 7238,
143
+ 7288,
144
+ 7338,
145
+ 7388,
146
+ 7438,
147
+ 7488,
148
+ 7538,
149
+ 7588,
150
+ 7638,
151
+ 7688,
152
+ 7738,
153
+ 7788
154
+ ],
155
+ "loss": [
156
+ 8.47067642211914,
157
+ 3.859321117401123,
158
+ 3.0991883277893066,
159
+ 3.3409392833709717,
160
+ 3.3445615768432617,
161
+ 3.071617603302002,
162
+ 3.0536575317382812,
163
+ 2.8200273513793945,
164
+ 2.7634453773498535,
165
+ 0.6405220031738281,
166
+ -0.9271819591522217,
167
+ -4.679404258728027,
168
+ -5.836901664733887,
169
+ -8.526687622070312,
170
+ -8.978545188903809,
171
+ -8.417521476745605,
172
+ -7.670687198638916,
173
+ -6.564399719238281,
174
+ -5.615384578704834,
175
+ -4.976064682006836,
176
+ -2.216883420944214,
177
+ -1.4136266708374023,
178
+ -1.0798640251159668,
179
+ -0.5408358573913574,
180
+ -0.22238361835479736,
181
+ 0.16258960962295532,
182
+ -0.6259200572967529,
183
+ -0.5599202513694763,
184
+ 0.13963162899017334,
185
+ 0.007780313491821289,
186
+ -0.23957473039627075,
187
+ 0.3784700632095337,
188
+ 0.09624123573303223,
189
+ -0.2628714442253113,
190
+ 0.1245647668838501,
191
+ -0.2984371781349182,
192
+ 0.07317012548446655,
193
+ -0.20061078667640686,
194
+ 0.0016877055168151855,
195
+ -0.08852306008338928,
196
+ 0.007971584796905518,
197
+ -0.005069524049758911,
198
+ -0.1857169270515442,
199
+ 0.12880273163318634,
200
+ -0.15553274750709534,
201
+ -0.31286513805389404,
202
+ 0.004276543855667114,
203
+ 0.043737590312957764,
204
+ -0.20392894744873047,
205
+ -0.06425321102142334,
206
+ 0.042032390832901,
207
+ 0.13489098846912384,
208
+ -0.1695605218410492,
209
+ -0.18983259797096252,
210
+ 0.05254027247428894,
211
+ -0.17216861248016357,
212
+ 0.42946910858154297,
213
+ 0.03497058153152466,
214
+ -0.4959927201271057,
215
+ -0.2946988046169281,
216
+ -0.14054325222969055,
217
+ -0.1606135368347168,
218
+ -0.20758064091205597,
219
+ -0.07599818706512451,
220
+ -0.204355850815773,
221
+ -0.4362576901912689,
222
+ -0.42311590909957886,
223
+ -0.40590041875839233,
224
+ -0.16058385372161865,
225
+ -0.24916203320026398,
226
+ 0.16023975610733032,
227
+ -0.2869209051132202,
228
+ -0.3760267496109009,
229
+ -0.3357478082180023,
230
+ -0.09387531876564026,
231
+ -0.7844890356063843,
232
+ -0.13949094712734222,
233
+ -0.44414588809013367,
234
+ -0.42745646834373474,
235
+ -0.3531284034252167,
236
+ -0.6143267154693604,
237
+ -0.9826579689979553,
238
+ -0.4692939221858978,
239
+ -0.3503918945789337,
240
+ -0.42758840322494507,
241
+ -0.3714824318885803,
242
+ -0.31439173221588135,
243
+ -0.6495652198791504,
244
+ -0.058341529220342636,
245
+ -0.6107015013694763,
246
+ -0.5336044430732727,
247
+ -0.4551495909690857,
248
+ -0.6802051067352295,
249
+ -0.3394930064678192,
250
+ -0.47552600502967834,
251
+ -0.01830144040286541,
252
+ -0.30640774965286255,
253
+ -0.5638172030448914,
254
+ -0.40761300921440125,
255
+ -0.3676009178161621,
256
+ -0.33406054973602295,
257
+ -0.32963287830352783,
258
+ -0.41975510120391846,
259
+ -0.4875797927379608,
260
+ -0.13679492473602295,
261
+ -0.3449482321739197,
262
+ -0.38658419251441956,
263
+ -0.5263659358024597,
264
+ -0.5405909419059753,
265
+ -0.5471492409706116,
266
+ -0.30939704179763794,
267
+ -0.6842730045318604,
268
+ -0.539143443107605,
269
+ -0.4582570791244507,
270
+ -0.4487195312976837,
271
+ -0.5894002318382263,
272
+ -0.07285723090171814,
273
+ -0.749821126461029,
274
+ -0.4640003442764282,
275
+ -0.42504963278770447,
276
+ -0.2539079785346985,
277
+ -0.4926801323890686,
278
+ -0.4174199104309082,
279
+ -0.38921570777893066,
280
+ -0.3867590129375458,
281
+ -0.3553716838359833,
282
+ -0.4733166992664337,
283
+ -0.32238879799842834,
284
+ -0.6644755005836487,
285
+ -0.5403436422348022,
286
+ -0.560452938079834,
287
+ -0.39427629113197327,
288
+ -0.39716440439224243,
289
+ -0.3601585328578949,
290
+ -0.5065312385559082,
291
+ -0.30354976654052734,
292
+ -0.528386652469635,
293
+ -0.4692227244377136,
294
+ -0.33632326126098633,
295
+ -0.4119393825531006,
296
+ -0.3576270639896393,
297
+ -0.704921543598175,
298
+ -0.6248269081115723,
299
+ -0.3194754123687744,
300
+ -0.3632478415966034,
301
+ -0.4672422707080841,
302
+ -0.710575520992279,
303
+ -0.33513766527175903,
304
+ -0.2928919494152069,
305
+ -0.4346057176589966
306
+ ],
307
+ "base_loss": [
308
+ 6.529869079589844,
309
+ 2.3522889614105225,
310
+ 1.9823862314224243,
311
+ 1.8216798305511475,
312
+ 1.8954092264175415,
313
+ 1.863158106803894,
314
+ 1.8727132081985474,
315
+ 1.8258191347122192,
316
+ 1.8073500394821167,
317
+ 1.8966313600540161,
318
+ 1.8717511892318726,
319
+ 1.9207693338394165,
320
+ 1.8693705797195435,
321
+ 1.9036433696746826,
322
+ 1.8812330961227417,
323
+ 1.6870934963226318,
324
+ 1.6692317724227905,
325
+ 1.4732754230499268,
326
+ 1.365601658821106,
327
+ 1.2771196365356445,
328
+ 0.9189826846122742,
329
+ 0.7958611249923706,
330
+ 0.7418918013572693,
331
+ 0.681869387626648,
332
+ 0.6421297192573547,
333
+ 0.5580056309700012,
334
+ 0.6352177858352661,
335
+ 0.6491341590881348,
336
+ 0.5735772848129272,
337
+ 0.5147913098335266,
338
+ 0.533217191696167,
339
+ 0.47872549295425415,
340
+ 0.48013827204704285,
341
+ 0.5564236640930176,
342
+ 0.49468594789505005,
343
+ 0.4692336618900299,
344
+ 0.5063740611076355,
345
+ 0.48869970440864563,
346
+ 0.49136045575141907,
347
+ 0.44452375173568726,
348
+ 0.4514934718608856,
349
+ 0.45978498458862305,
350
+ 0.39197835326194763,
351
+ 0.4396274983882904,
352
+ 0.42984989285469055,
353
+ 0.4332329332828522,
354
+ 0.4062458574771881,
355
+ 0.37813684344291687,
356
+ 0.3858397305011749,
357
+ 0.3725948929786682,
358
+ 0.4058559834957123,
359
+ 0.2884959578514099,
360
+ 0.3678801953792572,
361
+ 0.336038738489151,
362
+ 0.3166891038417816,
363
+ 0.33307892084121704,
364
+ 0.2845018208026886,
365
+ 0.3227593004703522,
366
+ 0.37250086665153503,
367
+ 0.28964105248451233,
368
+ 0.3191569149494171,
369
+ 0.27697229385375977,
370
+ 0.29193589091300964,
371
+ 0.2866390347480774,
372
+ 0.3261076807975769,
373
+ 0.3937642574310303,
374
+ 0.31407326459884644,
375
+ 0.33535775542259216,
376
+ 0.24889574944972992,
377
+ 0.29406002163887024,
378
+ 0.2409432828426361,
379
+ 0.35092687606811523,
380
+ 0.27360251545906067,
381
+ 0.28033000230789185,
382
+ 0.2864038050174713,
383
+ 0.271339476108551,
384
+ 0.2604587972164154,
385
+ 0.30055779218673706,
386
+ 0.27918002009391785,
387
+ 0.22688111662864685,
388
+ 0.270652711391449,
389
+ 0.28921690583229065,
390
+ 0.23527908325195312,
391
+ 0.25206416845321655,
392
+ 0.25411903858184814,
393
+ 0.21951384842395782,
394
+ 0.27349406480789185,
395
+ 0.254742294549942,
396
+ 0.2292661964893341,
397
+ 0.1955949366092682,
398
+ 0.24696041643619537,
399
+ 0.22090299427509308,
400
+ 0.22282291948795319,
401
+ 0.20417501032352448,
402
+ 0.2358027845621109,
403
+ 0.19285988807678223,
404
+ 0.1971234232187271,
405
+ 0.22451014816761017,
406
+ 0.18633778393268585,
407
+ 0.22210939228534698,
408
+ 0.23272477090358734,
409
+ 0.19261686503887177,
410
+ 0.21030089259147644,
411
+ 0.1925947368144989,
412
+ 0.22401663661003113,
413
+ 0.18720892071723938,
414
+ 0.17757317423820496,
415
+ 0.18794147670269012,
416
+ 0.22906138002872467,
417
+ 0.21192224323749542,
418
+ 0.16107645630836487,
419
+ 0.22859220206737518,
420
+ 0.20563936233520508,
421
+ 0.21756412088871002,
422
+ 0.1816309690475464,
423
+ 0.18884612619876862,
424
+ 0.14128300547599792,
425
+ 0.2663905918598175,
426
+ 0.19550541043281555,
427
+ 0.17163562774658203,
428
+ 0.17473351955413818,
429
+ 0.1715536117553711,
430
+ 0.17578543722629547,
431
+ 0.16855619847774506,
432
+ 0.18263652920722961,
433
+ 0.17694807052612305,
434
+ 0.13545209169387817,
435
+ 0.1860579401254654,
436
+ 0.20312993228435516,
437
+ 0.19952358305454254,
438
+ 0.21564070880413055,
439
+ 0.17865024507045746,
440
+ 0.22009076178073883,
441
+ 0.15182125568389893,
442
+ 0.17367124557495117,
443
+ 0.17723344266414642,
444
+ 0.159737691283226,
445
+ 0.1377307027578354,
446
+ 0.1133374497294426,
447
+ 0.1590133011341095,
448
+ 0.13635817170143127,
449
+ 0.2107985019683838,
450
+ 0.20057733356952667,
451
+ 0.12804178893566132,
452
+ 0.1539105921983719,
453
+ 0.15954028069972992,
454
+ 0.1959955245256424,
455
+ 0.10784348100423813,
456
+ 0.1844562292098999,
457
+ 0.14684811234474182
458
+ ],
459
+ "info_loss": [
460
+ -0.2673468589782715,
461
+ -0.04440808296203613,
462
+ -0.07712435722351074,
463
+ -0.03570270538330078,
464
+ -0.04258620738983154,
465
+ -0.06644809246063232,
466
+ -0.06896436214447021,
467
+ -0.08776187896728516,
468
+ -0.09168505668640137,
469
+ -0.3129291534423828,
470
+ -0.46592533588409424,
471
+ -0.8345524072647095,
472
+ -0.9259849190711975,
473
+ -1.1986396312713623,
474
+ -1.2376453876495361,
475
+ -1.1589113473892212,
476
+ -1.0768587589263916,
477
+ -0.9370661377906799,
478
+ -0.8276593685150146,
479
+ -0.7481008768081665,
480
+ -0.4281192123889923,
481
+ -0.32412827014923096,
482
+ -0.2727624773979187,
483
+ -0.20998990535736084,
484
+ -0.16334781050682068,
485
+ -0.11577355861663818,
486
+ -0.19545629620552063,
487
+ -0.19052308797836304,
488
+ -0.10945683717727661,
489
+ -0.10963675379753113,
490
+ -0.13540121912956238,
491
+ -0.0649571418762207,
492
+ -0.09443607926368713,
493
+ -0.13581201434135437,
494
+ -0.09144818782806396,
495
+ -0.12629523873329163,
496
+ -0.09346094727516174,
497
+ -0.1159842312335968,
498
+ -0.09238475561141968,
499
+ -0.09163549542427063,
500
+ -0.0864732563495636,
501
+ -0.09146097302436829,
502
+ -0.1021280288696289,
503
+ -0.06953030824661255,
504
+ -0.09440380334854126,
505
+ -0.10704010725021362,
506
+ -0.07827016711235046,
507
+ -0.06915175914764404,
508
+ -0.09524726867675781,
509
+ -0.08362510800361633,
510
+ -0.07066857814788818,
511
+ -0.04639166593551636,
512
+ -0.08692899346351624,
513
+ -0.08005630970001221,
514
+ -0.05635380744934082,
515
+ -0.07672905921936035,
516
+ -0.019900262355804443,
517
+ -0.054761648178100586,
518
+ -0.11633443832397461,
519
+ -0.08680947124958038,
520
+ -0.0727926641702652,
521
+ -0.06547568738460541,
522
+ -0.07120101153850555,
523
+ -0.055115923285484314,
524
+ -0.0648864209651947,
525
+ -0.09370043873786926,
526
+ -0.08160407841205597,
527
+ -0.08273720741271973,
528
+ -0.048524439334869385,
529
+ -0.06075708568096161,
530
+ -0.015704527497291565,
531
+ -0.06819471716880798,
532
+ -0.06899033486843109,
533
+ -0.06627984344959259,
534
+ -0.042593181133270264,
535
+ -0.10910588502883911,
536
+ -0.046684086322784424,
537
+ -0.07880324125289917,
538
+ -0.07517123222351074,
539
+ -0.06181502342224121,
540
+ -0.09271721541881561,
541
+ -0.13140380382537842,
542
+ -0.07391722500324249,
543
+ -0.0640488713979721,
544
+ -0.07120409607887268,
545
+ -0.0627627968788147,
546
+ -0.06179974973201752,
547
+ -0.09381449222564697,
548
+ -0.03218841552734375,
549
+ -0.08402406424283981,
550
+ -0.0813249945640564,
551
+ -0.07089047133922577,
552
+ -0.09299984574317932,
553
+ -0.05727653205394745,
554
+ -0.07365383207798004,
555
+ -0.024941951036453247,
556
+ -0.05313184857368469,
557
+ -0.08307541906833649,
558
+ -0.06305523216724396,
559
+ -0.06241106986999512,
560
+ -0.06006002426147461,
561
+ -0.05616569519042969,
562
+ -0.0658143013715744,
563
+ -0.07124374061822891,
564
+ -0.03855186700820923,
565
+ -0.05644591152667999,
566
+ -0.05963750183582306,
567
+ -0.07424025982618332,
568
+ -0.07966555655002594,
569
+ -0.07849693298339844,
570
+ -0.05011218041181564,
571
+ -0.09463152289390564,
572
+ -0.07753230631351471,
573
+ -0.07019945979118347,
574
+ -0.06616665422916412,
575
+ -0.0811585858464241,
576
+ -0.02353697270154953,
577
+ -0.10485921800136566,
578
+ -0.0684717446565628,
579
+ -0.061824582517147064,
580
+ -0.045364245772361755,
581
+ -0.06856260448694229,
582
+ -0.06233055889606476,
583
+ -0.058004721999168396,
584
+ -0.05994530767202377,
585
+ -0.05602298676967621,
586
+ -0.06310290843248367,
587
+ -0.05322764813899994,
588
+ -0.08953020721673965,
589
+ -0.07597177475690842,
590
+ -0.07998061180114746,
591
+ -0.05991140753030777,
592
+ -0.06403300166130066,
593
+ -0.05338875204324722,
594
+ -0.07013767212629318,
595
+ -0.050085484981536865,
596
+ -0.07169508934020996,
597
+ -0.0626421645283699,
598
+ -0.04760565608739853,
599
+ -0.05929655581712723,
600
+ -0.051652222871780396,
601
+ -0.09364695101976395,
602
+ -0.08479366451501846,
603
+ -0.04709339886903763,
604
+ -0.053751140832901,
605
+ -0.06505024433135986,
606
+ -0.09360753744840622,
607
+ -0.04723042622208595,
608
+ -0.05039326846599579,
609
+ -0.06148739159107208
610
+ ],
611
+ "abs_loss": [
612
+ 2.0777323246002197,
613
+ 1.8372353315353394,
614
+ 1.8596841096878052,
615
+ 1.8447657823562622,
616
+ 1.866260051727295,
617
+ 1.8622106313705444,
618
+ 1.8444541692733765,
619
+ 1.8505001068115234,
620
+ 1.886319637298584,
621
+ 1.8786344528198242,
622
+ 1.7852767705917358,
623
+ 1.580426573753357,
624
+ 1.4302619695663452,
625
+ 1.4253687858581543,
626
+ 1.3955053091049194,
627
+ 1.238847255706787,
628
+ 1.0613170862197876,
629
+ 0.9975995421409607,
630
+ 1.0350087881088257,
631
+ 0.8302550315856934,
632
+ 0.7335612177848816,
633
+ 0.6067109704017639,
634
+ 0.5130870938301086,
635
+ 0.4377255439758301,
636
+ 0.40763750672340393,
637
+ 0.3429541289806366,
638
+ 0.35336068272590637,
639
+ 0.35311901569366455,
640
+ 0.30836349725723267,
641
+ 0.29843878746032715,
642
+ 0.2701031267642975,
643
+ 0.2620505690574646,
644
+ 0.27157625555992126,
645
+ 0.26693102717399597,
646
+ 0.27656325697898865,
647
+ 0.2799464166164398,
648
+ 0.27106282114982605,
649
+ 0.32633912563323975,
650
+ 0.29606541991233826,
651
+ 0.2582005262374878,
652
+ 0.2775072157382965,
653
+ 0.2881506681442261,
654
+ 0.3060077130794525,
655
+ 0.3220145106315613,
656
+ 0.2445584088563919,
657
+ 0.2725384533405304,
658
+ 0.35818028450012207,
659
+ 0.3085358142852783,
660
+ 0.2886260151863098,
661
+ 0.32749247550964355,
662
+ 0.3170374631881714,
663
+ 0.32663360238075256,
664
+ 0.3656320571899414,
665
+ 0.38276636600494385,
666
+ 0.4017406404018402,
667
+ 0.3261677026748657,
668
+ 0.30932173132896423,
669
+ 0.34228047728538513,
670
+ 0.3847839832305908,
671
+ 0.3398013114929199,
672
+ 0.2953852117061615,
673
+ 0.285983681678772,
674
+ 0.2711770236492157,
675
+ 0.30411458015441895,
676
+ 0.25742125511169434,
677
+ 0.25713059306144714,
678
+ 0.21089105308055878,
679
+ 0.25374868512153625,
680
+ 0.20307719707489014,
681
+ 0.24056212604045868,
682
+ 0.27503547072410583,
683
+ 0.21771258115768433,
684
+ 0.17491371929645538,
685
+ 0.19084621965885162,
686
+ 0.21915553510189056,
687
+ 0.14147557318210602,
688
+ 0.16538940370082855,
689
+ 0.16390661895275116,
690
+ 0.17348627746105194,
691
+ 0.14653095602989197,
692
+ 0.145098015666008,
693
+ 0.16084204614162445,
694
+ 0.13867895305156708,
695
+ 0.11408242583274841,
696
+ 0.10716051608324051,
697
+ 0.11975974589586258,
698
+ 0.11440757662057877,
699
+ 0.1023651733994484,
700
+ 0.13637660443782806,
701
+ 0.10093352198600769,
702
+ 0.09186253696680069,
703
+ 0.11327973753213882,
704
+ 0.12308790534734726,
705
+ 0.1159990057349205,
706
+ 0.10311237722635269,
707
+ 0.11532590538263321,
708
+ 0.09129684418439865,
709
+ 0.09231492877006531,
710
+ 0.11006152629852295,
711
+ 0.09338565915822983,
712
+ 0.10004422813653946,
713
+ 0.09749677032232285,
714
+ 0.0867953896522522,
715
+ 0.09206672757863998,
716
+ 0.07050669193267822,
717
+ 0.07319243252277374,
718
+ 0.09475291520357132,
719
+ 0.09280968457460403,
720
+ 0.07203791290521622,
721
+ 0.07556378096342087,
722
+ 0.0879942774772644,
723
+ 0.07373198121786118,
724
+ 0.09719672799110413,
725
+ 0.08441676944494247,
726
+ 0.08118002861738205,
727
+ 0.06300904601812363,
728
+ 0.06390989571809769,
729
+ 0.08405200392007828,
730
+ 0.06970033794641495,
731
+ 0.06256341934204102,
732
+ 0.0618957094848156,
733
+ 0.05331374332308769,
734
+ 0.07108704745769501,
735
+ 0.050225283950567245,
736
+ 0.07027151435613632,
737
+ 0.06590838730335236,
738
+ 0.050113528966903687,
739
+ 0.06628331542015076,
740
+ 0.06416907906532288,
741
+ 0.06008912995457649,
742
+ 0.08411464840173721,
743
+ 0.05974702164530754,
744
+ 0.05712446570396423,
745
+ 0.04810317978262901,
746
+ 0.05578098073601723,
747
+ 0.04661886766552925,
748
+ 0.06548946350812912,
749
+ 0.05453115701675415,
750
+ 0.06402477622032166,
751
+ 0.047158390283584595,
752
+ 0.05706872418522835,
753
+ 0.051012784242630005,
754
+ 0.05519309639930725,
755
+ 0.05584952235221863,
756
+ 0.045132603496313095,
757
+ 0.056113243103027344,
758
+ 0.05444992706179619,
759
+ 0.05939311161637306,
760
+ 0.058004170656204224,
761
+ 0.03935953974723816
762
+ ],
763
+ "zipf_loss": [
764
+ 4.406503200531006,
765
+ 1.7673894166946411,
766
+ 1.7020772695541382,
767
+ 1.691809892654419,
768
+ 1.688388466835022,
769
+ 1.6867194175720215,
770
+ 1.6861424446105957,
771
+ 1.686776876449585,
772
+ 1.6843140125274658,
773
+ 1.6853187084197998,
774
+ 1.6817922592163086,
775
+ 1.5873081684112549,
776
+ 1.4105513095855713,
777
+ 1.413528323173523,
778
+ 1.3771247863769531,
779
+ 1.3606131076812744,
780
+ 1.3225369453430176,
781
+ 1.2332267761230469,
782
+ 1.1921072006225586,
783
+ 1.1447988748550415,
784
+ 1.071969985961914,
785
+ 0.9711239337921143,
786
+ 0.8545601963996887,
787
+ 0.8334212303161621,
788
+ 0.7282010912895203,
789
+ 0.7280241250991821,
790
+ 0.6580890417098999,
791
+ 0.660864531993866,
792
+ 0.6297863721847534,
793
+ 0.559512734413147,
794
+ 0.5542100071907043,
795
+ 0.5231109261512756,
796
+ 0.5333061218261719,
797
+ 0.5121319890022278,
798
+ 0.5167043805122375,
799
+ 0.4672868847846985,
800
+ 0.47429925203323364,
801
+ 0.4378978908061981,
802
+ 0.40456825494766235,
803
+ 0.35748809576034546,
804
+ 0.3934599459171295,
805
+ 0.4209401607513428,
806
+ 0.4129841923713684,
807
+ 0.35227686166763306,
808
+ 0.33419957756996155,
809
+ 0.2970491647720337,
810
+ 0.3449143171310425,
811
+ 0.32626476883888245,
812
+ 0.3338414430618286,
813
+ 0.3666537404060364,
814
+ 0.3111584484577179,
815
+ 0.27764832973480225,
816
+ 0.29528602957725525,
817
+ 0.23641511797904968,
818
+ 0.25921517610549927,
819
+ 0.2294262945652008,
820
+ 0.3130377531051636,
821
+ 0.22559970617294312,
822
+ 0.2563723921775818,
823
+ 0.2497747242450714,
824
+ 0.2386879324913025,
825
+ 0.1885727047920227,
826
+ 0.18537591397762299,
827
+ 0.1581105887889862,
828
+ 0.09265856444835663,
829
+ 0.08126936107873917,
830
+ 0.05776248127222061,
831
+ 0.06073905527591705,
832
+ 0.055457063019275665,
833
+ 0.04029260575771332,
834
+ 0.04883820563554764,
835
+ 0.02232813462615013,
836
+ 0.022782735526561737,
837
+ 0.027635956183075905,
838
+ 0.023737134411931038,
839
+ 0.02108277752995491,
840
+ 0.05035217106342316,
841
+ 0.02693808451294899,
842
+ 0.02772720716893673,
843
+ 0.023487640544772148,
844
+ 0.02768293395638466,
845
+ 0.026078997179865837,
846
+ 0.02073132060468197,
847
+ 0.02662445418536663,
848
+ 0.019617483019828796,
849
+ 0.024655727669596672,
850
+ 0.01867099106311798,
851
+ 0.023600950837135315,
852
+ 0.02063876762986183,
853
+ 0.02385086938738823,
854
+ 0.02349885366857052,
855
+ 0.021524202078580856,
856
+ 0.014661639928817749,
857
+ 0.01749744638800621,
858
+ 0.014898284338414669,
859
+ 0.026725592091679573,
860
+ 0.01865765079855919,
861
+ 0.03319530561566353,
862
+ 0.025595368817448616,
863
+ 0.025061847642064095,
864
+ 0.02381046488881111,
865
+ 0.029657501727342606,
866
+ 0.019407516345381737,
867
+ 0.02305617742240429,
868
+ 0.017656439915299416,
869
+ 0.02498270571231842,
870
+ 0.02274232730269432,
871
+ 0.018814217299222946,
872
+ 0.019799407571554184,
873
+ 0.018341470509767532,
874
+ 0.0218488872051239,
875
+ 0.026076773181557655,
876
+ 0.020820580422878265,
877
+ 0.017731718719005585,
878
+ 0.023198017850518227,
879
+ 0.02703857235610485,
880
+ 0.014838501811027527,
881
+ 0.023975318297743797,
882
+ 0.018241601064801216,
883
+ 0.01530423853546381,
884
+ 0.018811386078596115,
885
+ 0.01606089621782303,
886
+ 0.022991515696048737,
887
+ 0.017252791672945023,
888
+ 0.02303040400147438,
889
+ 0.0213193129748106,
890
+ 0.017248917371034622,
891
+ 0.01720144785940647,
892
+ 0.02127968519926071,
893
+ 0.013841651380062103,
894
+ 0.015300977975130081,
895
+ 0.020212795585393906,
896
+ 0.01736241951584816,
897
+ 0.017097417265176773,
898
+ 0.015596166253089905,
899
+ 0.01540975272655487,
900
+ 0.02227761410176754,
901
+ 0.014015071094036102,
902
+ 0.0199933722615242,
903
+ 0.01729702018201351,
904
+ 0.016830110922455788,
905
+ 0.01564820483326912,
906
+ 0.0170130655169487,
907
+ 0.01783180795609951,
908
+ 0.015839725732803345,
909
+ 0.018108535557985306,
910
+ 0.024059370160102844,
911
+ 0.023383792489767075,
912
+ 0.020784102380275726,
913
+ 0.029484093189239502
914
+ ],
915
+ "denoise_loss": [],
916
+ "ortho_loss": [
917
+ 0.39830857515335083,
918
+ 0.2433786690235138,
919
+ 0.20988474786281586,
920
+ 0.19384649395942688,
921
+ 0.19618169963359833,
922
+ 0.21874834597110748,
923
+ 0.20772071182727814,
924
+ 0.18882158398628235,
925
+ 0.2314612716436386,
926
+ 0.25052326917648315,
927
+ 0.2812398374080658,
928
+ 0.3191724717617035,
929
+ 0.3201376497745514,
930
+ 0.3000963032245636,
931
+ 0.26885080337524414,
932
+ 0.268261581659317,
933
+ 0.2745248079299927,
934
+ 0.2682482898235321,
935
+ 0.27149900794029236,
936
+ 0.2707764804363251,
937
+ 0.27085885405540466,
938
+ 0.2695680856704712,
939
+ 0.2618236243724823,
940
+ 0.2597876489162445,
941
+ 0.2581340968608856,
942
+ 0.2508399486541748,
943
+ 0.24459116160869598,
944
+ 0.24822774529457092,
945
+ 0.2757481336593628,
946
+ 0.2812578082084656,
947
+ 0.28060707449913025,
948
+ 0.27701568603515625,
949
+ 0.2774236798286438,
950
+ 0.27818116545677185,
951
+ 0.27627792954444885,
952
+ 0.2726292312145233,
953
+ 0.2671484351158142,
954
+ 0.26871195435523987,
955
+ 0.2720329463481903,
956
+ 0.27176108956336975,
957
+ 0.27438652515411377,
958
+ 0.2732361853122711,
959
+ 0.2705092430114746,
960
+ 0.271069198846817,
961
+ 0.27583789825439453,
962
+ 0.2766655385494232,
963
+ 0.27713289856910706,
964
+ 0.27564698457717896,
965
+ 0.2780587077140808,
966
+ 0.278008371591568,
967
+ 0.2802888751029968,
968
+ 0.28178492188453674,
969
+ 0.2823565602302551,
970
+ 0.28541451692581177,
971
+ 0.29519784450531006,
972
+ 0.2944939136505127,
973
+ 0.2898300886154175,
974
+ 0.2950134873390198,
975
+ 0.2994725704193115,
976
+ 0.3095267415046692,
977
+ 0.3123369514942169,
978
+ 0.3123491704463959,
979
+ 0.31109243631362915,
980
+ 0.3140035569667816,
981
+ 0.3158407211303711,
982
+ 0.3229905664920807,
983
+ 0.3247292637825012,
984
+ 0.32620954513549805,
985
+ 0.32591450214385986,
986
+ 0.32379913330078125,
987
+ 0.3245965540409088,
988
+ 0.32910656929016113,
989
+ 0.3264192044734955,
990
+ 0.3264644145965576,
991
+ 0.32559576630592346,
992
+ 0.3237476944923401,
993
+ 0.3267005980014801,
994
+ 0.33368119597435,
995
+ 0.33319106698036194,
996
+ 0.3357834815979004,
997
+ 0.33304426074028015,
998
+ 0.32880592346191406,
999
+ 0.3296840190887451,
1000
+ 0.3254052698612213,
1001
+ 0.31745028495788574,
1002
+ 0.3167537748813629,
1003
+ 0.3174743354320526,
1004
+ 0.3163991868495941,
1005
+ 0.3160884976387024,
1006
+ 0.3137410283088684,
1007
+ 0.31337714195251465,
1008
+ 0.31452980637550354,
1009
+ 0.31249645352363586,
1010
+ 0.3148419260978699,
1011
+ 0.31133177876472473,
1012
+ 0.30948013067245483,
1013
+ 0.31456074118614197,
1014
+ 0.31482842564582825,
1015
+ 0.31413134932518005,
1016
+ 0.3161856532096863,
1017
+ 0.31791284680366516,
1018
+ 0.3175433874130249,
1019
+ 0.3201428949832916,
1020
+ 0.31951627135276794,
1021
+ 0.3191130757331848,
1022
+ 0.32220661640167236,
1023
+ 0.32215243577957153,
1024
+ 0.32431453466415405,
1025
+ 0.3254556953907013,
1026
+ 0.3261330723762512,
1027
+ 0.33019202947616577,
1028
+ 0.33232834935188293,
1029
+ 0.3316073417663574,
1030
+ 0.3316960632801056,
1031
+ 0.3329588770866394,
1032
+ 0.33336374163627625,
1033
+ 0.3353458046913147,
1034
+ 0.3332311511039734,
1035
+ 0.33092865347862244,
1036
+ 0.33475038409233093,
1037
+ 0.33227968215942383,
1038
+ 0.3314722180366516,
1039
+ 0.33147549629211426,
1040
+ 0.32883259654045105,
1041
+ 0.3315986394882202,
1042
+ 0.33407390117645264,
1043
+ 0.33586472272872925,
1044
+ 0.33549898862838745,
1045
+ 0.3368147015571594,
1046
+ 0.3380911648273468,
1047
+ 0.33757898211479187,
1048
+ 0.33975356817245483,
1049
+ 0.34040141105651855,
1050
+ 0.34014108777046204,
1051
+ 0.34206295013427734,
1052
+ 0.34369945526123047,
1053
+ 0.34482741355895996,
1054
+ 0.3449490964412689,
1055
+ 0.3461775779724121,
1056
+ 0.3472226858139038,
1057
+ 0.3477751910686493,
1058
+ 0.3483361303806305,
1059
+ 0.3491489887237549,
1060
+ 0.3505137860774994,
1061
+ 0.35050302743911743,
1062
+ 0.350710391998291,
1063
+ 0.35074424743652344,
1064
+ 0.35159772634506226,
1065
+ 0.35225003957748413,
1066
+ 0.35208776593208313
1067
+ ],
1068
+ "lr": [
1069
+ 7.840000000000001e-05,
1070
+ 8e-05,
1071
+ 8e-05,
1072
+ 8e-05,
1073
+ 8e-05,
1074
+ 8e-05,
1075
+ 8e-05,
1076
+ 8e-05,
1077
+ 8e-05,
1078
+ 8e-05,
1079
+ 8e-05,
1080
+ 8e-05,
1081
+ 8e-05,
1082
+ 8e-05,
1083
+ 8e-05,
1084
+ 8e-05,
1085
+ 8e-05,
1086
+ 8e-05,
1087
+ 8e-05,
1088
+ 8e-05,
1089
+ 8e-05,
1090
+ 8e-05,
1091
+ 8e-05,
1092
+ 8e-05,
1093
+ 8e-05,
1094
+ 8e-05,
1095
+ 8e-05,
1096
+ 8e-05,
1097
+ 8e-05,
1098
+ 8e-05,
1099
+ 8e-05,
1100
+ 8e-05,
1101
+ 8e-05,
1102
+ 8e-05,
1103
+ 8e-05,
1104
+ 8e-05,
1105
+ 8e-05,
1106
+ 8e-05,
1107
+ 8e-05,
1108
+ 8e-05,
1109
+ 8e-05,
1110
+ 8e-05,
1111
+ 8e-05,
1112
+ 8e-05,
1113
+ 8e-05,
1114
+ 8e-05,
1115
+ 8e-05,
1116
+ 8e-05,
1117
+ 8e-05,
1118
+ 8e-05,
1119
+ 8e-05,
1120
+ 8e-05,
1121
+ 8e-05,
1122
+ 8e-05,
1123
+ 8e-05,
1124
+ 8e-05,
1125
+ 8e-05,
1126
+ 8e-05,
1127
+ 8e-05,
1128
+ 8e-05,
1129
+ 8e-05,
1130
+ 8e-05,
1131
+ 8e-05,
1132
+ 8e-05,
1133
+ 8e-05,
1134
+ 8e-05,
1135
+ 8e-05,
1136
+ 8e-05,
1137
+ 8e-05,
1138
+ 8e-05,
1139
+ 8e-05,
1140
+ 8e-05,
1141
+ 8e-05,
1142
+ 8e-05,
1143
+ 8e-05,
1144
+ 8e-05,
1145
+ 8e-05,
1146
+ 8e-05,
1147
+ 8e-05,
1148
+ 8e-05,
1149
+ 8e-05,
1150
+ 8e-05,
1151
+ 8e-05,
1152
+ 8e-05,
1153
+ 8e-05,
1154
+ 8e-05,
1155
+ 8e-05,
1156
+ 8e-05,
1157
+ 8e-05,
1158
+ 8e-05,
1159
+ 7.932818532818534e-05,
1160
+ 7.816988416988418e-05,
1161
+ 7.701158301158302e-05,
1162
+ 7.585328185328185e-05,
1163
+ 7.469498069498071e-05,
1164
+ 7.353667953667954e-05,
1165
+ 7.237837837837838e-05,
1166
+ 7.122007722007721e-05,
1167
+ 7.006177606177606e-05,
1168
+ 6.890347490347492e-05,
1169
+ 6.774517374517375e-05,
1170
+ 6.65868725868726e-05,
1171
+ 6.542857142857144e-05,
1172
+ 6.427027027027027e-05,
1173
+ 6.311196911196911e-05,
1174
+ 6.121235521235521e-05,
1175
+ 6.0054054054054064e-05,
1176
+ 5.8895752895752895e-05,
1177
+ 5.773745173745175e-05,
1178
+ 5.6579150579150584e-05,
1179
+ 5.542084942084943e-05,
1180
+ 5.426254826254825e-05,
1181
+ 5.310424710424711e-05,
1182
+ 5.194594594594594e-05,
1183
+ 5.0787644787644786e-05,
1184
+ 4.9629343629343644e-05,
1185
+ 4.8471042471042475e-05,
1186
+ 4.7312741312741326e-05,
1187
+ 4.615444015444014e-05,
1188
+ 4.4996138996139e-05,
1189
+ 4.309652509652511e-05,
1190
+ 4.1938223938223946e-05,
1191
+ 4.07799227799228e-05,
1192
+ 3.962162162162162e-05,
1193
+ 3.846332046332047e-05,
1194
+ 3.73050193050193e-05,
1195
+ 3.6146718146718155e-05,
1196
+ 3.4988416988416986e-05,
1197
+ 3.383011583011584e-05,
1198
+ 3.267181467181467e-05,
1199
+ 3.151351351351352e-05,
1200
+ 3.0355212355212367e-05,
1201
+ 2.9196911196911198e-05,
1202
+ 2.8038610038610046e-05,
1203
+ 2.6880308880308876e-05,
1204
+ 2.4980694980694983e-05,
1205
+ 2.3822393822393838e-05,
1206
+ 2.266409266409267e-05,
1207
+ 2.1505791505791517e-05,
1208
+ 2.0347490347490348e-05,
1209
+ 1.9189189189189195e-05,
1210
+ 1.8030888030888026e-05,
1211
+ 1.6872586872586878e-05,
1212
+ 1.571428571428571e-05,
1213
+ 1.455598455598456e-05,
1214
+ 1.3397683397683389e-05,
1215
+ 1.223938223938224e-05,
1216
+ 1.1081081081081092e-05,
1217
+ 9.92277992277992e-06,
1218
+ 8.764478764478772e-06
1219
+ ],
1220
+ "emb_lr": [],
1221
+ "eval_step": [
1222
+ 750,
1223
+ 1532,
1224
+ 2314,
1225
+ 3096,
1226
+ 3878,
1227
+ 4660,
1228
+ 5442,
1229
+ 6224,
1230
+ 7006,
1231
+ 7788
1232
+ ],
1233
+ "eval_accuracy": [
1234
+ 0.01,
1235
+ 0.0,
1236
+ 0.0,
1237
+ 0.0,
1238
+ 0.0,
1239
+ 0.0,
1240
+ 0.0,
1241
+ 0.0,
1242
+ 0.0,
1243
+ 0.0
1244
+ ]
1245
+ },
1246
+ "final_accuracy": 0.6078571428571429,
1247
+ "sft_eval": {
1248
+ "config": {
1249
+ "ops": "add_sub",
1250
+ "K": null,
1251
+ "mode": "sft",
1252
+ "n_digits": 6,
1253
+ "n_per_split": 50
1254
+ },
1255
+ "splits": {
1256
+ "add_S0": {
1257
+ "full_accuracy": 0.88,
1258
+ "n_examples": 50,
1259
+ "per_subtask": {
1260
+ "SA": {
1261
+ "accuracy": 0.9796610169491525,
1262
+ "count": 295
1263
+ },
1264
+ "SS": {
1265
+ "accuracy": 1.0,
1266
+ "count": 55
1267
+ }
1268
+ }
1269
+ },
1270
+ "add_S1": {
1271
+ "full_accuracy": 0.76,
1272
+ "n_examples": 50,
1273
+ "per_subtask": {
1274
+ "SA": {
1275
+ "accuracy": 0.9761904761904762,
1276
+ "count": 126
1277
+ },
1278
+ "SC": {
1279
+ "accuracy": 1.0,
1280
+ "count": 79
1281
+ },
1282
+ "SS": {
1283
+ "accuracy": 0.9523809523809523,
1284
+ "count": 21
1285
+ },
1286
+ "UC": {
1287
+ "accuracy": 0.9354838709677419,
1288
+ "count": 124
1289
+ }
1290
+ }
1291
+ },
1292
+ "add_S2": {
1293
+ "full_accuracy": 0.4,
1294
+ "n_examples": 50,
1295
+ "per_subtask": {
1296
+ "SA": {
1297
+ "accuracy": 0.9866666666666667,
1298
+ "count": 75
1299
+ },
1300
+ "SC": {
1301
+ "accuracy": 0.9193548387096774,
1302
+ "count": 62
1303
+ },
1304
+ "SS": {
1305
+ "accuracy": 0.9230769230769231,
1306
+ "count": 39
1307
+ },
1308
+ "UC": {
1309
+ "accuracy": 0.7747747747747747,
1310
+ "count": 111
1311
+ },
1312
+ "US": {
1313
+ "accuracy": 0.9523809523809523,
1314
+ "count": 63
1315
+ }
1316
+ }
1317
+ },
1318
+ "add_S3": {
1319
+ "full_accuracy": 0.3,
1320
+ "n_examples": 50,
1321
+ "per_subtask": {
1322
+ "SA": {
1323
+ "accuracy": 0.9666666666666667,
1324
+ "count": 60
1325
+ },
1326
+ "SC": {
1327
+ "accuracy": 0.9473684210526315,
1328
+ "count": 57
1329
+ },
1330
+ "SS": {
1331
+ "accuracy": 0.9473684210526315,
1332
+ "count": 19
1333
+ },
1334
+ "UC": {
1335
+ "accuracy": 0.7019230769230769,
1336
+ "count": 104
1337
+ },
1338
+ "US": {
1339
+ "accuracy": 0.7909090909090909,
1340
+ "count": 110
1341
+ }
1342
+ }
1343
+ },
1344
+ "add_S4": {
1345
+ "full_accuracy": 0.28,
1346
+ "n_examples": 50,
1347
+ "per_subtask": {
1348
+ "SA": {
1349
+ "accuracy": 1.0,
1350
+ "count": 48
1351
+ },
1352
+ "SC": {
1353
+ "accuracy": 0.9807692307692307,
1354
+ "count": 52
1355
+ },
1356
+ "SS": {
1357
+ "accuracy": 1.0,
1358
+ "count": 7
1359
+ },
1360
+ "UC": {
1361
+ "accuracy": 0.6179775280898876,
1362
+ "count": 89
1363
+ },
1364
+ "US": {
1365
+ "accuracy": 0.577922077922078,
1366
+ "count": 154
1367
+ }
1368
+ }
1369
+ },
1370
+ "add_S5": {
1371
+ "full_accuracy": 0.08,
1372
+ "n_examples": 50,
1373
+ "per_subtask": {
1374
+ "SA": {
1375
+ "accuracy": 1.0,
1376
+ "count": 50
1377
+ },
1378
+ "SC": {
1379
+ "accuracy": 1.0,
1380
+ "count": 50
1381
+ },
1382
+ "UC": {
1383
+ "accuracy": 0.26,
1384
+ "count": 50
1385
+ },
1386
+ "US": {
1387
+ "accuracy": 0.255,
1388
+ "count": 200
1389
+ }
1390
+ }
1391
+ },
1392
+ "add_S6": {
1393
+ "full_accuracy": 0.14,
1394
+ "n_examples": 50,
1395
+ "per_subtask": {
1396
+ "SC": {
1397
+ "accuracy": 1.0,
1398
+ "count": 50
1399
+ },
1400
+ "UC": {
1401
+ "accuracy": 0.32,
1402
+ "count": 50
1403
+ },
1404
+ "US": {
1405
+ "accuracy": 0.328,
1406
+ "count": 250
1407
+ }
1408
+ }
1409
+ },
1410
+ "add_random": {
1411
+ "full_accuracy": 0.805,
1412
+ "n_examples": 200,
1413
+ "per_subtask": {
1414
+ "SA": {
1415
+ "accuracy": 0.9837587006960556,
1416
+ "count": 431
1417
+ },
1418
+ "SC": {
1419
+ "accuracy": 0.9841772151898734,
1420
+ "count": 316
1421
+ },
1422
+ "SS": {
1423
+ "accuracy": 1.0,
1424
+ "count": 39
1425
+ },
1426
+ "UC": {
1427
+ "accuracy": 0.9464285714285714,
1428
+ "count": 560
1429
+ },
1430
+ "US": {
1431
+ "accuracy": 0.9259259259259259,
1432
+ "count": 54
1433
+ }
1434
+ }
1435
+ },
1436
+ "add_C3": {
1437
+ "full_accuracy": 0.58,
1438
+ "n_examples": 50,
1439
+ "per_subtask": {
1440
+ "SA": {
1441
+ "accuracy": 1.0,
1442
+ "count": 150
1443
+ },
1444
+ "SC": {
1445
+ "accuracy": 1.0,
1446
+ "count": 50
1447
+ },
1448
+ "UC": {
1449
+ "accuracy": 0.8173076923076923,
1450
+ "count": 104
1451
+ },
1452
+ "US": {
1453
+ "accuracy": 0.8043478260869565,
1454
+ "count": 46
1455
+ }
1456
+ }
1457
+ },
1458
+ "add_C4": {
1459
+ "full_accuracy": 0.4,
1460
+ "n_examples": 50,
1461
+ "per_subtask": {
1462
+ "SA": {
1463
+ "accuracy": 1.0,
1464
+ "count": 100
1465
+ },
1466
+ "SC": {
1467
+ "accuracy": 1.0,
1468
+ "count": 50
1469
+ },
1470
+ "UC": {
1471
+ "accuracy": 0.7560975609756098,
1472
+ "count": 123
1473
+ },
1474
+ "US": {
1475
+ "accuracy": 0.6363636363636364,
1476
+ "count": 77
1477
+ }
1478
+ }
1479
+ },
1480
+ "add_C5": {
1481
+ "full_accuracy": 0.42,
1482
+ "n_examples": 50,
1483
+ "per_subtask": {
1484
+ "SA": {
1485
+ "accuracy": 1.0,
1486
+ "count": 50
1487
+ },
1488
+ "SC": {
1489
+ "accuracy": 1.0,
1490
+ "count": 50
1491
+ },
1492
+ "UC": {
1493
+ "accuracy": 0.7792207792207793,
1494
+ "count": 154
1495
+ },
1496
+ "US": {
1497
+ "accuracy": 0.8229166666666666,
1498
+ "count": 96
1499
+ }
1500
+ }
1501
+ },
1502
+ "add_C6": {
1503
+ "full_accuracy": 0.42,
1504
+ "n_examples": 50,
1505
+ "per_subtask": {
1506
+ "SC": {
1507
+ "accuracy": 1.0,
1508
+ "count": 50
1509
+ },
1510
+ "UC": {
1511
+ "accuracy": 0.8186813186813187,
1512
+ "count": 182
1513
+ },
1514
+ "US": {
1515
+ "accuracy": 0.8559322033898306,
1516
+ "count": 118
1517
+ }
1518
+ }
1519
+ },
1520
+ "sub_M0": {
1521
+ "full_accuracy": 0.84,
1522
+ "n_examples": 50,
1523
+ "per_subtask": {
1524
+ "MD": {
1525
+ "accuracy": 0.9727891156462585,
1526
+ "count": 294
1527
+ },
1528
+ "ME": {
1529
+ "accuracy": 1.0,
1530
+ "count": 56
1531
+ }
1532
+ }
1533
+ },
1534
+ "sub_M1": {
1535
+ "full_accuracy": 0.74,
1536
+ "n_examples": 50,
1537
+ "per_subtask": {
1538
+ "MD": {
1539
+ "accuracy": 0.9790209790209791,
1540
+ "count": 143
1541
+ },
1542
+ "MB": {
1543
+ "accuracy": 0.9565217391304348,
1544
+ "count": 69
1545
+ },
1546
+ "ME": {
1547
+ "accuracy": 1.0,
1548
+ "count": 15
1549
+ },
1550
+ "UB": {
1551
+ "accuracy": 0.9349593495934959,
1552
+ "count": 123
1553
+ }
1554
+ }
1555
+ },
1556
+ "sub_M2": {
1557
+ "full_accuracy": 0.48,
1558
+ "n_examples": 50,
1559
+ "per_subtask": {
1560
+ "MD": {
1561
+ "accuracy": 0.9907407407407407,
1562
+ "count": 108
1563
+ },
1564
+ "MB": {
1565
+ "accuracy": 0.9615384615384616,
1566
+ "count": 52
1567
+ },
1568
+ "ME": {
1569
+ "accuracy": 1.0,
1570
+ "count": 52
1571
+ },
1572
+ "UB": {
1573
+ "accuracy": 0.7471264367816092,
1574
+ "count": 87
1575
+ },
1576
+ "UD": {
1577
+ "accuracy": 0.9215686274509803,
1578
+ "count": 51
1579
+ }
1580
+ }
1581
+ },
1582
+ "sub_M3": {
1583
+ "full_accuracy": 0.16,
1584
+ "n_examples": 50,
1585
+ "per_subtask": {
1586
+ "MD": {
1587
+ "accuracy": 1.0,
1588
+ "count": 94
1589
+ },
1590
+ "MB": {
1591
+ "accuracy": 0.9607843137254902,
1592
+ "count": 51
1593
+ },
1594
+ "ME": {
1595
+ "accuracy": 1.0,
1596
+ "count": 25
1597
+ },
1598
+ "UB": {
1599
+ "accuracy": 0.5256410256410257,
1600
+ "count": 78
1601
+ },
1602
+ "UD": {
1603
+ "accuracy": 0.6470588235294118,
1604
+ "count": 102
1605
+ }
1606
+ }
1607
+ },
1608
+ "sub_M4": {
1609
+ "full_accuracy": 0.02,
1610
+ "n_examples": 50,
1611
+ "per_subtask": {
1612
+ "MD": {
1613
+ "accuracy": 1.0,
1614
+ "count": 100
1615
+ },
1616
+ "MB": {
1617
+ "accuracy": 1.0,
1618
+ "count": 50
1619
+ },
1620
+ "UB": {
1621
+ "accuracy": 0.26,
1622
+ "count": 50
1623
+ },
1624
+ "UD": {
1625
+ "accuracy": 0.4066666666666667,
1626
+ "count": 150
1627
+ }
1628
+ }
1629
+ },
1630
+ "sub_M5": {
1631
+ "full_accuracy": 0.04,
1632
+ "n_examples": 50,
1633
+ "per_subtask": {
1634
+ "MD": {
1635
+ "accuracy": 1.0,
1636
+ "count": 50
1637
+ },
1638
+ "MB": {
1639
+ "accuracy": 1.0,
1640
+ "count": 50
1641
+ },
1642
+ "UB": {
1643
+ "accuracy": 0.42,
1644
+ "count": 50
1645
+ },
1646
+ "UD": {
1647
+ "accuracy": 0.285,
1648
+ "count": 200
1649
+ }
1650
+ }
1651
+ },
1652
+ "sub_random": {
1653
+ "full_accuracy": 0.67,
1654
+ "n_examples": 200,
1655
+ "per_subtask": {
1656
+ "MD": {
1657
+ "accuracy": 0.9778911564625851,
1658
+ "count": 588
1659
+ },
1660
+ "MB": {
1661
+ "accuracy": 0.9701492537313433,
1662
+ "count": 268
1663
+ },
1664
+ "ME": {
1665
+ "accuracy": 0.9833333333333333,
1666
+ "count": 60
1667
+ },
1668
+ "UB": {
1669
+ "accuracy": 0.8993288590604027,
1670
+ "count": 447
1671
+ },
1672
+ "UD": {
1673
+ "accuracy": 0.8918918918918919,
1674
+ "count": 37
1675
+ }
1676
+ }
1677
+ },
1678
+ "sub_B3": {
1679
+ "full_accuracy": 0.46,
1680
+ "n_examples": 50,
1681
+ "per_subtask": {
1682
+ "MD": {
1683
+ "accuracy": 1.0,
1684
+ "count": 150
1685
+ },
1686
+ "MB": {
1687
+ "accuracy": 1.0,
1688
+ "count": 50
1689
+ },
1690
+ "UB": {
1691
+ "accuracy": 0.7570093457943925,
1692
+ "count": 107
1693
+ },
1694
+ "UD": {
1695
+ "accuracy": 0.813953488372093,
1696
+ "count": 43
1697
+ }
1698
+ }
1699
+ },
1700
+ "sub_B4": {
1701
+ "full_accuracy": 0.3,
1702
+ "n_examples": 50,
1703
+ "per_subtask": {
1704
+ "MD": {
1705
+ "accuracy": 1.0,
1706
+ "count": 100
1707
+ },
1708
+ "MB": {
1709
+ "accuracy": 1.0,
1710
+ "count": 50
1711
+ },
1712
+ "UB": {
1713
+ "accuracy": 0.6929824561403509,
1714
+ "count": 114
1715
+ },
1716
+ "UD": {
1717
+ "accuracy": 0.6976744186046512,
1718
+ "count": 86
1719
+ }
1720
+ }
1721
+ },
1722
+ "sub_B5": {
1723
+ "full_accuracy": 0.28,
1724
+ "n_examples": 50,
1725
+ "per_subtask": {
1726
+ "MD": {
1727
+ "accuracy": 1.0,
1728
+ "count": 50
1729
+ },
1730
+ "MB": {
1731
+ "accuracy": 1.0,
1732
+ "count": 50
1733
+ },
1734
+ "UB": {
1735
+ "accuracy": 0.6928104575163399,
1736
+ "count": 153
1737
+ },
1738
+ "UD": {
1739
+ "accuracy": 0.5773195876288659,
1740
+ "count": 97
1741
+ }
1742
+ }
1743
+ }
1744
+ },
1745
+ "summary": {
1746
+ "overall_accuracy": 0.495,
1747
+ "total_examples": 1400,
1748
+ "n_splits": 22
1749
+ }
1750
+ },
1751
+ "sorl_eval": {
1752
+ "config": {
1753
+ "ops": "add_sub",
1754
+ "K": 1,
1755
+ "mode": "sorl",
1756
+ "n_digits": 6,
1757
+ "n_per_split": 50
1758
+ },
1759
+ "splits": {
1760
+ "add_S0": {
1761
+ "full_accuracy": 0.9,
1762
+ "n_examples": 50,
1763
+ "per_subtask": {
1764
+ "SA": {
1765
+ "accuracy": 0.9830508474576272,
1766
+ "count": 295
1767
+ },
1768
+ "SS": {
1769
+ "accuracy": 0.9636363636363636,
1770
+ "count": 55
1771
+ }
1772
+ }
1773
+ },
1774
+ "add_S1": {
1775
+ "full_accuracy": 0.88,
1776
+ "n_examples": 50,
1777
+ "per_subtask": {
1778
+ "SA": {
1779
+ "accuracy": 0.9841269841269841,
1780
+ "count": 126
1781
+ },
1782
+ "SC": {
1783
+ "accuracy": 1.0,
1784
+ "count": 79
1785
+ },
1786
+ "SS": {
1787
+ "accuracy": 1.0,
1788
+ "count": 21
1789
+ },
1790
+ "UC": {
1791
+ "accuracy": 0.9596774193548387,
1792
+ "count": 124
1793
+ }
1794
+ }
1795
+ },
1796
+ "add_S2": {
1797
+ "full_accuracy": 0.56,
1798
+ "n_examples": 50,
1799
+ "per_subtask": {
1800
+ "SA": {
1801
+ "accuracy": 0.9733333333333334,
1802
+ "count": 75
1803
+ },
1804
+ "SC": {
1805
+ "accuracy": 0.9838709677419355,
1806
+ "count": 62
1807
+ },
1808
+ "SS": {
1809
+ "accuracy": 0.9743589743589743,
1810
+ "count": 39
1811
+ },
1812
+ "UC": {
1813
+ "accuracy": 0.8288288288288288,
1814
+ "count": 111
1815
+ },
1816
+ "US": {
1817
+ "accuracy": 0.9841269841269841,
1818
+ "count": 63
1819
+ }
1820
+ }
1821
+ },
1822
+ "add_S3": {
1823
+ "full_accuracy": 0.48,
1824
+ "n_examples": 50,
1825
+ "per_subtask": {
1826
+ "SA": {
1827
+ "accuracy": 1.0,
1828
+ "count": 60
1829
+ },
1830
+ "SC": {
1831
+ "accuracy": 0.9649122807017544,
1832
+ "count": 57
1833
+ },
1834
+ "SS": {
1835
+ "accuracy": 1.0,
1836
+ "count": 19
1837
+ },
1838
+ "UC": {
1839
+ "accuracy": 0.7884615384615384,
1840
+ "count": 104
1841
+ },
1842
+ "US": {
1843
+ "accuracy": 0.9090909090909091,
1844
+ "count": 110
1845
+ }
1846
+ }
1847
+ },
1848
+ "add_S4": {
1849
+ "full_accuracy": 0.28,
1850
+ "n_examples": 50,
1851
+ "per_subtask": {
1852
+ "SA": {
1853
+ "accuracy": 1.0,
1854
+ "count": 48
1855
+ },
1856
+ "SC": {
1857
+ "accuracy": 1.0,
1858
+ "count": 52
1859
+ },
1860
+ "SS": {
1861
+ "accuracy": 1.0,
1862
+ "count": 7
1863
+ },
1864
+ "UC": {
1865
+ "accuracy": 0.6067415730337079,
1866
+ "count": 89
1867
+ },
1868
+ "US": {
1869
+ "accuracy": 0.7792207792207793,
1870
+ "count": 154
1871
+ }
1872
+ }
1873
+ },
1874
+ "add_S5": {
1875
+ "full_accuracy": 0.16,
1876
+ "n_examples": 50,
1877
+ "per_subtask": {
1878
+ "SA": {
1879
+ "accuracy": 1.0,
1880
+ "count": 50
1881
+ },
1882
+ "SC": {
1883
+ "accuracy": 1.0,
1884
+ "count": 50
1885
+ },
1886
+ "UC": {
1887
+ "accuracy": 0.36,
1888
+ "count": 50
1889
+ },
1890
+ "US": {
1891
+ "accuracy": 0.48,
1892
+ "count": 200
1893
+ }
1894
+ }
1895
+ },
1896
+ "add_S6": {
1897
+ "full_accuracy": 0.22,
1898
+ "n_examples": 50,
1899
+ "per_subtask": {
1900
+ "SC": {
1901
+ "accuracy": 1.0,
1902
+ "count": 50
1903
+ },
1904
+ "UC": {
1905
+ "accuracy": 0.38,
1906
+ "count": 50
1907
+ },
1908
+ "US": {
1909
+ "accuracy": 0.54,
1910
+ "count": 250
1911
+ }
1912
+ }
1913
+ },
1914
+ "add_random": {
1915
+ "full_accuracy": 0.85,
1916
+ "n_examples": 200,
1917
+ "per_subtask": {
1918
+ "SA": {
1919
+ "accuracy": 0.9837587006960556,
1920
+ "count": 431
1921
+ },
1922
+ "SC": {
1923
+ "accuracy": 0.9968354430379747,
1924
+ "count": 316
1925
+ },
1926
+ "SS": {
1927
+ "accuracy": 1.0,
1928
+ "count": 39
1929
+ },
1930
+ "UC": {
1931
+ "accuracy": 0.9571428571428572,
1932
+ "count": 560
1933
+ },
1934
+ "US": {
1935
+ "accuracy": 0.9814814814814815,
1936
+ "count": 54
1937
+ }
1938
+ }
1939
+ },
1940
+ "add_C3": {
1941
+ "full_accuracy": 0.66,
1942
+ "n_examples": 50,
1943
+ "per_subtask": {
1944
+ "SA": {
1945
+ "accuracy": 0.9933333333333333,
1946
+ "count": 150
1947
+ },
1948
+ "SC": {
1949
+ "accuracy": 1.0,
1950
+ "count": 50
1951
+ },
1952
+ "UC": {
1953
+ "accuracy": 0.8653846153846154,
1954
+ "count": 104
1955
+ },
1956
+ "US": {
1957
+ "accuracy": 0.8260869565217391,
1958
+ "count": 46
1959
+ }
1960
+ }
1961
+ },
1962
+ "add_C4": {
1963
+ "full_accuracy": 0.52,
1964
+ "n_examples": 50,
1965
+ "per_subtask": {
1966
+ "SA": {
1967
+ "accuracy": 0.99,
1968
+ "count": 100
1969
+ },
1970
+ "SC": {
1971
+ "accuracy": 1.0,
1972
+ "count": 50
1973
+ },
1974
+ "UC": {
1975
+ "accuracy": 0.8130081300813008,
1976
+ "count": 123
1977
+ },
1978
+ "US": {
1979
+ "accuracy": 0.7272727272727273,
1980
+ "count": 77
1981
+ }
1982
+ }
1983
+ },
1984
+ "add_C5": {
1985
+ "full_accuracy": 0.48,
1986
+ "n_examples": 50,
1987
+ "per_subtask": {
1988
+ "SA": {
1989
+ "accuracy": 1.0,
1990
+ "count": 50
1991
+ },
1992
+ "SC": {
1993
+ "accuracy": 1.0,
1994
+ "count": 50
1995
+ },
1996
+ "UC": {
1997
+ "accuracy": 0.8051948051948052,
1998
+ "count": 154
1999
+ },
2000
+ "US": {
2001
+ "accuracy": 0.8645833333333334,
2002
+ "count": 96
2003
+ }
2004
+ }
2005
+ },
2006
+ "add_C6": {
2007
+ "full_accuracy": 0.42,
2008
+ "n_examples": 50,
2009
+ "per_subtask": {
2010
+ "SC": {
2011
+ "accuracy": 1.0,
2012
+ "count": 50
2013
+ },
2014
+ "UC": {
2015
+ "accuracy": 0.8296703296703297,
2016
+ "count": 182
2017
+ },
2018
+ "US": {
2019
+ "accuracy": 0.8305084745762712,
2020
+ "count": 118
2021
+ }
2022
+ }
2023
+ },
2024
+ "sub_M0": {
2025
+ "full_accuracy": 0.96,
2026
+ "n_examples": 50,
2027
+ "per_subtask": {
2028
+ "MD": {
2029
+ "accuracy": 0.9931972789115646,
2030
+ "count": 294
2031
+ },
2032
+ "ME": {
2033
+ "accuracy": 1.0,
2034
+ "count": 56
2035
+ }
2036
+ }
2037
+ },
2038
+ "sub_M1": {
2039
+ "full_accuracy": 0.86,
2040
+ "n_examples": 50,
2041
+ "per_subtask": {
2042
+ "MD": {
2043
+ "accuracy": 0.986013986013986,
2044
+ "count": 143
2045
+ },
2046
+ "MB": {
2047
+ "accuracy": 0.9855072463768116,
2048
+ "count": 69
2049
+ },
2050
+ "ME": {
2051
+ "accuracy": 1.0,
2052
+ "count": 15
2053
+ },
2054
+ "UB": {
2055
+ "accuracy": 0.967479674796748,
2056
+ "count": 123
2057
+ }
2058
+ }
2059
+ },
2060
+ "sub_M2": {
2061
+ "full_accuracy": 0.7,
2062
+ "n_examples": 50,
2063
+ "per_subtask": {
2064
+ "MD": {
2065
+ "accuracy": 0.9629629629629629,
2066
+ "count": 108
2067
+ },
2068
+ "MB": {
2069
+ "accuracy": 0.9807692307692307,
2070
+ "count": 52
2071
+ },
2072
+ "ME": {
2073
+ "accuracy": 1.0,
2074
+ "count": 52
2075
+ },
2076
+ "UB": {
2077
+ "accuracy": 0.8735632183908046,
2078
+ "count": 87
2079
+ },
2080
+ "UD": {
2081
+ "accuracy": 0.9607843137254902,
2082
+ "count": 51
2083
+ }
2084
+ }
2085
+ },
2086
+ "sub_M3": {
2087
+ "full_accuracy": 0.34,
2088
+ "n_examples": 50,
2089
+ "per_subtask": {
2090
+ "MD": {
2091
+ "accuracy": 1.0,
2092
+ "count": 94
2093
+ },
2094
+ "MB": {
2095
+ "accuracy": 0.9803921568627451,
2096
+ "count": 51
2097
+ },
2098
+ "ME": {
2099
+ "accuracy": 1.0,
2100
+ "count": 25
2101
+ },
2102
+ "UB": {
2103
+ "accuracy": 0.6282051282051282,
2104
+ "count": 78
2105
+ },
2106
+ "UD": {
2107
+ "accuracy": 0.7843137254901961,
2108
+ "count": 102
2109
+ }
2110
+ }
2111
+ },
2112
+ "sub_M4": {
2113
+ "full_accuracy": 0.2,
2114
+ "n_examples": 50,
2115
+ "per_subtask": {
2116
+ "MD": {
2117
+ "accuracy": 1.0,
2118
+ "count": 100
2119
+ },
2120
+ "MB": {
2121
+ "accuracy": 1.0,
2122
+ "count": 50
2123
+ },
2124
+ "UB": {
2125
+ "accuracy": 0.38,
2126
+ "count": 50
2127
+ },
2128
+ "UD": {
2129
+ "accuracy": 0.6,
2130
+ "count": 150
2131
+ }
2132
+ }
2133
+ },
2134
+ "sub_M5": {
2135
+ "full_accuracy": 0.12,
2136
+ "n_examples": 50,
2137
+ "per_subtask": {
2138
+ "MD": {
2139
+ "accuracy": 1.0,
2140
+ "count": 50
2141
+ },
2142
+ "MB": {
2143
+ "accuracy": 1.0,
2144
+ "count": 50
2145
+ },
2146
+ "UB": {
2147
+ "accuracy": 0.38,
2148
+ "count": 50
2149
+ },
2150
+ "UD": {
2151
+ "accuracy": 0.42,
2152
+ "count": 200
2153
+ }
2154
+ }
2155
+ },
2156
+ "sub_random": {
2157
+ "full_accuracy": 0.795,
2158
+ "n_examples": 200,
2159
+ "per_subtask": {
2160
+ "MD": {
2161
+ "accuracy": 0.9863945578231292,
2162
+ "count": 588
2163
+ },
2164
+ "MB": {
2165
+ "accuracy": 0.9813432835820896,
2166
+ "count": 268
2167
+ },
2168
+ "ME": {
2169
+ "accuracy": 0.9833333333333333,
2170
+ "count": 60
2171
+ },
2172
+ "UB": {
2173
+ "accuracy": 0.9373601789709173,
2174
+ "count": 447
2175
+ },
2176
+ "UD": {
2177
+ "accuracy": 0.9459459459459459,
2178
+ "count": 37
2179
+ }
2180
+ }
2181
+ },
2182
+ "sub_B3": {
2183
+ "full_accuracy": 0.68,
2184
+ "n_examples": 50,
2185
+ "per_subtask": {
2186
+ "MD": {
2187
+ "accuracy": 1.0,
2188
+ "count": 150
2189
+ },
2190
+ "MB": {
2191
+ "accuracy": 1.0,
2192
+ "count": 50
2193
+ },
2194
+ "UB": {
2195
+ "accuracy": 0.8598130841121495,
2196
+ "count": 107
2197
+ },
2198
+ "UD": {
2199
+ "accuracy": 0.8837209302325582,
2200
+ "count": 43
2201
+ }
2202
+ }
2203
+ },
2204
+ "sub_B4": {
2205
+ "full_accuracy": 0.56,
2206
+ "n_examples": 50,
2207
+ "per_subtask": {
2208
+ "MD": {
2209
+ "accuracy": 1.0,
2210
+ "count": 100
2211
+ },
2212
+ "MB": {
2213
+ "accuracy": 1.0,
2214
+ "count": 50
2215
+ },
2216
+ "UB": {
2217
+ "accuracy": 0.8157894736842105,
2218
+ "count": 114
2219
+ },
2220
+ "UD": {
2221
+ "accuracy": 0.8488372093023255,
2222
+ "count": 86
2223
+ }
2224
+ }
2225
+ },
2226
+ "sub_B5": {
2227
+ "full_accuracy": 0.46,
2228
+ "n_examples": 50,
2229
+ "per_subtask": {
2230
+ "MD": {
2231
+ "accuracy": 1.0,
2232
+ "count": 50
2233
+ },
2234
+ "MB": {
2235
+ "accuracy": 1.0,
2236
+ "count": 50
2237
+ },
2238
+ "UB": {
2239
+ "accuracy": 0.8366013071895425,
2240
+ "count": 153
2241
+ },
2242
+ "UD": {
2243
+ "accuracy": 0.711340206185567,
2244
+ "count": 97
2245
+ }
2246
+ }
2247
+ }
2248
+ },
2249
+ "summary": {
2250
+ "overall_accuracy": 0.6078571428571429,
2251
+ "total_examples": 1400,
2252
+ "n_splits": 22
2253
+ }
2254
+ },
2255
+ "sorl_overall_accuracy": 0.6078571428571429,
2256
+ "sft_overall_accuracy": 0.495
2257
+ }
add_sub_sorl_v1_abs10_K1_50K_1L3H510d/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d83775b1d2ac97ad283814b3a9af8f2969068a1ff887cab77d2f8f9d3b7db9a1
3
+ size 634679036
add_sub_sorl_v1_abs10_K1_50K_1L3H510d/train_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "sorl",
3
+ "ops": "add_sub",
4
+ "n_digits": 6,
5
+ "n_layer": 1,
6
+ "n_head": 3,
7
+ "n_embd": 510,
8
+ "abs_vocab": 10,
9
+ "K": 1,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "batch_size": 64,
14
+ "num_epochs": 10,
15
+ "dataset_size": 50000,
16
+ "lr": 8e-05,
17
+ "output_dir": "ckpt/sweep/as_sorl_abs10_K1_50K_1L3H510d",
18
+ "device": "cuda",
19
+ "push_to_hub": true,
20
+ "no_wandb": false,
21
+ "n_params": 158593426,
22
+ "run_name": "add_sub_sorl_v1_abs10_K1_50K_1L3H510d",
23
+ "git_commit": "8b149cd78544f600d46584ae2f143c0261d8eeb8",
24
+ "timestamp": "2026-04-12T04:55:37.572226+00:00",
25
+ "tokenizer": "Qwen/Qwen3-0.6B",
26
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
27
+ "dataset_config": "add_sub_6digit",
28
+ "model_repo": "thoughtworks/arithmetic-sorl",
29
+ "trainer_version": "v1",
30
+ "wandb_run_id": "tw2j1378",
31
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/tw2j1378",
32
+ "final_accuracy": 0.6078571428571429,
33
+ "sft_accuracy": 0.495,
34
+ "eval_method": "ArithmeticEvaluator"
35
+ }