amirali1985 commited on
Commit
0b0bf2f
·
verified ·
1 Parent(s): 459a6e3

Upload add_sub_sorl_v1_abs30_50K

Browse files
add_sub_sorl_v1_abs30_50K/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 128,
20
+ "max_window_layers": 28,
21
+ "model_type": "qwen3",
22
+ "num_attention_heads": 3,
23
+ "num_hidden_layers": 2,
24
+ "num_key_value_heads": 3,
25
+ "pad_token_id": null,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_parameters": {
28
+ "rope_theta": 10000.0,
29
+ "rope_type": "default"
30
+ },
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "transformers_version": "5.5.0",
34
+ "use_cache": true,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151674
37
+ }
add_sub_sorl_v1_abs30_50K/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs30_50K/metrics.json ADDED
@@ -0,0 +1,2257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 200,
8
+ 250,
9
+ 300,
10
+ 350,
11
+ 400,
12
+ 450,
13
+ 500,
14
+ 550,
15
+ 600,
16
+ 650,
17
+ 700,
18
+ 750,
19
+ 832,
20
+ 882,
21
+ 932,
22
+ 982,
23
+ 1032,
24
+ 1082,
25
+ 1132,
26
+ 1182,
27
+ 1232,
28
+ 1282,
29
+ 1332,
30
+ 1382,
31
+ 1432,
32
+ 1482,
33
+ 1532,
34
+ 1614,
35
+ 1664,
36
+ 1714,
37
+ 1764,
38
+ 1814,
39
+ 1864,
40
+ 1914,
41
+ 1964,
42
+ 2014,
43
+ 2064,
44
+ 2114,
45
+ 2164,
46
+ 2214,
47
+ 2264,
48
+ 2314,
49
+ 2396,
50
+ 2446,
51
+ 2496,
52
+ 2546,
53
+ 2596,
54
+ 2646,
55
+ 2696,
56
+ 2746,
57
+ 2796,
58
+ 2846,
59
+ 2896,
60
+ 2946,
61
+ 2996,
62
+ 3046,
63
+ 3096,
64
+ 3178,
65
+ 3228,
66
+ 3278,
67
+ 3328,
68
+ 3378,
69
+ 3428,
70
+ 3478,
71
+ 3528,
72
+ 3578,
73
+ 3628,
74
+ 3678,
75
+ 3728,
76
+ 3778,
77
+ 3828,
78
+ 3878,
79
+ 3960,
80
+ 4010,
81
+ 4060,
82
+ 4110,
83
+ 4160,
84
+ 4210,
85
+ 4260,
86
+ 4310,
87
+ 4360,
88
+ 4410,
89
+ 4460,
90
+ 4510,
91
+ 4560,
92
+ 4610,
93
+ 4660,
94
+ 4742,
95
+ 4792,
96
+ 4842,
97
+ 4892,
98
+ 4942,
99
+ 4992,
100
+ 5042,
101
+ 5092,
102
+ 5142,
103
+ 5192,
104
+ 5242,
105
+ 5292,
106
+ 5342,
107
+ 5392,
108
+ 5442,
109
+ 5524,
110
+ 5574,
111
+ 5624,
112
+ 5674,
113
+ 5724,
114
+ 5774,
115
+ 5824,
116
+ 5874,
117
+ 5924,
118
+ 5974,
119
+ 6024,
120
+ 6074,
121
+ 6124,
122
+ 6174,
123
+ 6224,
124
+ 6306,
125
+ 6356,
126
+ 6406,
127
+ 6456,
128
+ 6506,
129
+ 6556,
130
+ 6606,
131
+ 6656,
132
+ 6706,
133
+ 6756,
134
+ 6806,
135
+ 6856,
136
+ 6906,
137
+ 6956,
138
+ 7006,
139
+ 7088,
140
+ 7138,
141
+ 7188,
142
+ 7238,
143
+ 7288,
144
+ 7338,
145
+ 7388,
146
+ 7438,
147
+ 7488,
148
+ 7538,
149
+ 7588,
150
+ 7638,
151
+ 7688,
152
+ 7738,
153
+ 7788
154
+ ],
155
+ "loss": [
156
+ 14.925691604614258,
157
+ 10.310871124267578,
158
+ 7.15638542175293,
159
+ 4.645656108856201,
160
+ 2.783916711807251,
161
+ 2.4809165000915527,
162
+ 2.7336008548736572,
163
+ 2.3534011840820312,
164
+ 2.153804302215576,
165
+ 1.4827725887298584,
166
+ 1.6645079851150513,
167
+ 1.3439991474151611,
168
+ 1.3138333559036255,
169
+ 0.969010055065155,
170
+ 0.8032979965209961,
171
+ 0.020356476306915283,
172
+ -1.0327482223510742,
173
+ -2.2422666549682617,
174
+ -4.954840660095215,
175
+ -6.268737316131592,
176
+ -7.6543707847595215,
177
+ -8.756476402282715,
178
+ -10.073406219482422,
179
+ -9.491241455078125,
180
+ -10.407691955566406,
181
+ -10.537861824035645,
182
+ -11.763997077941895,
183
+ -11.57911205291748,
184
+ -12.794604301452637,
185
+ -12.53265380859375,
186
+ -13.207358360290527,
187
+ -13.6425142288208,
188
+ -12.904062271118164,
189
+ -14.24144458770752,
190
+ -13.814043998718262,
191
+ -13.221015930175781,
192
+ -13.979741096496582,
193
+ -13.792145729064941,
194
+ -13.058844566345215,
195
+ -14.35586166381836,
196
+ -14.468194007873535,
197
+ -14.658733367919922,
198
+ -14.305359840393066,
199
+ -13.779980659484863,
200
+ -13.826972007751465,
201
+ -14.544889450073242,
202
+ -14.795659065246582,
203
+ -14.45028018951416,
204
+ -13.597357749938965,
205
+ -13.89336109161377,
206
+ -13.701704025268555,
207
+ -13.72293472290039,
208
+ -13.997247695922852,
209
+ -14.357726097106934,
210
+ -14.675873756408691,
211
+ -14.496241569519043,
212
+ -13.631208419799805,
213
+ -14.704141616821289,
214
+ -14.385679244995117,
215
+ -14.757608413696289,
216
+ -14.189804077148438,
217
+ -14.076844215393066,
218
+ -14.364145278930664,
219
+ -14.118315696716309,
220
+ -14.320837020874023,
221
+ -14.189194679260254,
222
+ -13.755496978759766,
223
+ -14.0785493850708,
224
+ -14.141383171081543,
225
+ -13.994441986083984,
226
+ -14.23997974395752,
227
+ -13.254907608032227,
228
+ -13.587363243103027,
229
+ -13.777580261230469,
230
+ -13.173924446105957,
231
+ -11.22693157196045,
232
+ -10.27431869506836,
233
+ -7.386316776275635,
234
+ -6.493227481842041,
235
+ -5.810792922973633,
236
+ -5.425526142120361,
237
+ -5.403774738311768,
238
+ -5.274623870849609,
239
+ -5.1203083992004395,
240
+ -5.120668411254883,
241
+ -4.660671234130859,
242
+ -4.481213092803955,
243
+ -4.476528644561768,
244
+ -4.4041571617126465,
245
+ -4.332465648651123,
246
+ -4.374258995056152,
247
+ -4.8543548583984375,
248
+ -3.9383552074432373,
249
+ -3.6458797454833984,
250
+ -3.5874781608581543,
251
+ -4.045778274536133,
252
+ -3.5161209106445312,
253
+ -3.6460752487182617,
254
+ -3.4689207077026367,
255
+ -3.659513473510742,
256
+ -3.5809338092803955,
257
+ -3.4906952381134033,
258
+ -3.3742854595184326,
259
+ -3.003173589706421,
260
+ -3.4980430603027344,
261
+ -3.279160499572754,
262
+ -2.6153182983398438,
263
+ -3.0391597747802734,
264
+ -2.8614397048950195,
265
+ -3.2860047817230225,
266
+ -3.176285743713379,
267
+ -2.8159425258636475,
268
+ -2.7563352584838867,
269
+ -2.9784791469573975,
270
+ -2.917531728744507,
271
+ -2.713247537612915,
272
+ -2.519273281097412,
273
+ -2.386897325515747,
274
+ -2.3819081783294678,
275
+ -2.354832649230957,
276
+ -2.129584550857544,
277
+ -2.350475788116455,
278
+ -2.5259969234466553,
279
+ -2.2565855979919434,
280
+ -2.326547384262085,
281
+ -2.5057482719421387,
282
+ -1.9806913137435913,
283
+ -1.929569125175476,
284
+ -2.4524779319763184,
285
+ -2.075390338897705,
286
+ -1.8700034618377686,
287
+ -1.989833116531372,
288
+ -2.270037889480591,
289
+ -1.87515127658844,
290
+ -1.9000284671783447,
291
+ -1.84731924533844,
292
+ -1.946153163909912,
293
+ -1.4869354963302612,
294
+ -1.890562891960144,
295
+ -1.7415295839309692,
296
+ -1.9608829021453857,
297
+ -1.5101802349090576,
298
+ -1.5716921091079712,
299
+ -1.6069391965866089,
300
+ -1.429286241531372,
301
+ -1.6014431715011597,
302
+ -1.6325364112854004,
303
+ -1.3494127988815308,
304
+ -1.460780382156372,
305
+ -1.367804765701294
306
+ ],
307
+ "base_loss": [
308
+ 9.473811149597168,
309
+ 7.116661071777344,
310
+ 6.11044979095459,
311
+ 4.431495189666748,
312
+ 2.3752102851867676,
313
+ 2.003913640975952,
314
+ 1.8888157606124878,
315
+ 1.960724115371704,
316
+ 1.8655742406845093,
317
+ 1.85293710231781,
318
+ 1.8393819332122803,
319
+ 1.7841354608535767,
320
+ 1.8464338779449463,
321
+ 1.8436554670333862,
322
+ 1.8355497121810913,
323
+ 1.8096510171890259,
324
+ 1.8457410335540771,
325
+ 1.8437222242355347,
326
+ 2.0343832969665527,
327
+ 1.8724702596664429,
328
+ 1.8979225158691406,
329
+ 1.8813482522964478,
330
+ 1.9130842685699463,
331
+ 1.6835806369781494,
332
+ 1.7973169088363647,
333
+ 1.8007093667984009,
334
+ 1.7375422716140747,
335
+ 1.7017974853515625,
336
+ 1.7410304546356201,
337
+ 1.7181750535964966,
338
+ 1.7001968622207642,
339
+ 1.7098767757415771,
340
+ 1.6393336057662964,
341
+ 1.7414904832839966,
342
+ 1.6889947652816772,
343
+ 1.6481058597564697,
344
+ 1.6936885118484497,
345
+ 1.6631056070327759,
346
+ 1.587760329246521,
347
+ 1.6887438297271729,
348
+ 1.6885937452316284,
349
+ 1.7139508724212646,
350
+ 1.6895262002944946,
351
+ 1.6100366115570068,
352
+ 1.6152337789535522,
353
+ 1.6766698360443115,
354
+ 1.7149112224578857,
355
+ 1.6795676946640015,
356
+ 1.6007461547851562,
357
+ 1.6156295537948608,
358
+ 1.5643459558486938,
359
+ 1.551654577255249,
360
+ 1.5922502279281616,
361
+ 1.6294152736663818,
362
+ 1.663206934928894,
363
+ 1.6547516584396362,
364
+ 1.560704231262207,
365
+ 1.6592508554458618,
366
+ 1.6300275325775146,
367
+ 1.6693183183670044,
368
+ 1.593511700630188,
369
+ 1.5905581712722778,
370
+ 1.6223725080490112,
371
+ 1.6157256364822388,
372
+ 1.6122468709945679,
373
+ 1.6028401851654053,
374
+ 1.56356942653656,
375
+ 1.5920432806015015,
376
+ 1.5996407270431519,
377
+ 1.5716848373413086,
378
+ 1.5974481105804443,
379
+ 1.5151593685150146,
380
+ 1.5354450941085815,
381
+ 1.5449351072311401,
382
+ 1.4808731079101562,
383
+ 1.261785626411438,
384
+ 1.1568143367767334,
385
+ 0.8340187072753906,
386
+ 0.7368689775466919,
387
+ 0.6682016253471375,
388
+ 0.6186455488204956,
389
+ 0.6124207377433777,
390
+ 0.5980902910232544,
391
+ 0.59001624584198,
392
+ 0.5819413065910339,
393
+ 0.5578277707099915,
394
+ 0.5163649320602417,
395
+ 0.5072833299636841,
396
+ 0.49900832772254944,
397
+ 0.4926478862762451,
398
+ 0.4992688000202179,
399
+ 0.5497917532920837,
400
+ 0.46581941843032837,
401
+ 0.441849023103714,
402
+ 0.42151254415512085,
403
+ 0.476866751909256,
404
+ 0.40012720227241516,
405
+ 0.42673566937446594,
406
+ 0.4007093906402588,
407
+ 0.4211159646511078,
408
+ 0.4143979847431183,
409
+ 0.3965285122394562,
410
+ 0.3838675916194916,
411
+ 0.3422652781009674,
412
+ 0.39702606201171875,
413
+ 0.3723902702331543,
414
+ 0.30194923281669617,
415
+ 0.34926849603652954,
416
+ 0.3268876075744629,
417
+ 0.37237080931663513,
418
+ 0.36258235573768616,
419
+ 0.32173871994018555,
420
+ 0.3123743236064911,
421
+ 0.3381679952144623,
422
+ 0.33103036880493164,
423
+ 0.30924150347709656,
424
+ 0.28604206442832947,
425
+ 0.27141666412353516,
426
+ 0.26933255791664124,
427
+ 0.2693643271923065,
428
+ 0.25377893447875977,
429
+ 0.2691524624824524,
430
+ 0.2878199517726898,
431
+ 0.25663235783576965,
432
+ 0.27092984318733215,
433
+ 0.28533241152763367,
434
+ 0.22697682678699493,
435
+ 0.21974924206733704,
436
+ 0.2805941104888916,
437
+ 0.24086768925189972,
438
+ 0.2150326520204544,
439
+ 0.22676725685596466,
440
+ 0.2578849494457245,
441
+ 0.21510757505893707,
442
+ 0.21743905544281006,
443
+ 0.2111271172761917,
444
+ 0.22255614399909973,
445
+ 0.17357924580574036,
446
+ 0.2218475341796875,
447
+ 0.20064163208007812,
448
+ 0.22746801376342773,
449
+ 0.1737966537475586,
450
+ 0.1820410192012787,
451
+ 0.183633491396904,
452
+ 0.16445359587669373,
453
+ 0.18439149856567383,
454
+ 0.1871500015258789,
455
+ 0.15578648447990417,
456
+ 0.17073509097099304,
457
+ 0.15720374882221222
458
+ ],
459
+ "info_loss": [
460
+ -0.14559650421142578,
461
+ -0.17780828475952148,
462
+ -0.1878204345703125,
463
+ -0.14971208572387695,
464
+ -0.08238768577575684,
465
+ -0.06698238849639893,
466
+ -0.02822732925415039,
467
+ -0.07236886024475098,
468
+ -0.08202946186065674,
469
+ -0.14824891090393066,
470
+ -0.12821662425994873,
471
+ -0.15460705757141113,
472
+ -0.16380441188812256,
473
+ -0.19755160808563232,
474
+ -0.21291589736938477,
475
+ -0.28692495822906494,
476
+ -0.3872305154800415,
477
+ -0.48517298698425293,
478
+ -0.7449564933776855,
479
+ -0.8471189737319946,
480
+ -0.9838017225265503,
481
+ -1.090334177017212,
482
+ -1.2246140241622925,
483
+ -1.1417529582977295,
484
+ -1.2453727722167969,
485
+ -1.256455421447754,
486
+ -1.3754072189331055,
487
+ -1.350670576095581,
488
+ -1.4751745462417603,
489
+ -1.444676399230957,
490
+ -1.5119096040725708,
491
+ -1.5553081035614014,
492
+ -1.4727306365966797,
493
+ -1.6164841651916504,
494
+ -1.5695509910583496,
495
+ -1.505563735961914,
496
+ -1.5860234498977661,
497
+ -1.563294768333435,
498
+ -1.4825890064239502,
499
+ -1.6203558444976807,
500
+ -1.6318106651306152,
501
+ -1.6535658836364746,
502
+ -1.6152052879333496,
503
+ -1.554687738418579,
504
+ -1.558127760887146,
505
+ -1.6352657079696655,
506
+ -1.6648855209350586,
507
+ -1.6253372430801392,
508
+ -1.5322500467300415,
509
+ -1.5633134841918945,
510
+ -1.5416780710220337,
511
+ -1.5396777391433716,
512
+ -1.5700269937515259,
513
+ -1.609108805656433,
514
+ -1.6445403099060059,
515
+ -1.6247903108596802,
516
+ -1.5285240411758423,
517
+ -1.647214651107788,
518
+ -1.6125141382217407,
519
+ -1.653419017791748,
520
+ -1.5891343355178833,
521
+ -1.5781067609786987,
522
+ -1.6078630685806274,
523
+ -1.5827549695968628,
524
+ -1.6012533903121948,
525
+ -1.5873873233795166,
526
+ -1.539661169052124,
527
+ -1.5761311054229736,
528
+ -1.580564260482788,
529
+ -1.5653799772262573,
530
+ -1.5943796634674072,
531
+ -1.4857620000839233,
532
+ -1.5196541547775269,
533
+ -1.5394915342330933,
534
+ -1.472989797592163,
535
+ -1.257468342781067,
536
+ -1.1512645483016968,
537
+ -0.8297007083892822,
538
+ -0.7310910820960999,
539
+ -0.656235933303833,
540
+ -0.6113075017929077,
541
+ -0.607907772064209,
542
+ -0.5933750867843628,
543
+ -0.5776615142822266,
544
+ -0.5772484540939331,
545
+ -0.5271150469779968,
546
+ -0.5060456991195679,
547
+ -0.5048168897628784,
548
+ -0.49672001600265503,
549
+ -0.4906327426433563,
550
+ -0.49443888664245605,
551
+ -0.5481942892074585,
552
+ -0.4475238025188446,
553
+ -0.4167567789554596,
554
+ -0.40784752368927,
555
+ -0.46132850646972656,
556
+ -0.39877843856811523,
557
+ -0.41653740406036377,
558
+ -0.39473432302474976,
559
+ -0.41418391466140747,
560
+ -0.4068829417228699,
561
+ -0.39585503935813904,
562
+ -0.3816283345222473,
563
+ -0.3418673276901245,
564
+ -0.3965541422367096,
565
+ -0.3707251250743866,
566
+ -0.29932451248168945,
567
+ -0.3433356285095215,
568
+ -0.3257765471935272,
569
+ -0.37057408690452576,
570
+ -0.3593398630619049,
571
+ -0.31936100125312805,
572
+ -0.31205815076828003,
573
+ -0.3377631604671478,
574
+ -0.3308146595954895,
575
+ -0.30861154198646545,
576
+ -0.28568384051322937,
577
+ -0.27113717794418335,
578
+ -0.2691645920276642,
579
+ -0.26842546463012695,
580
+ -0.24299602210521698,
581
+ -0.26901406049728394,
582
+ -0.28628090023994446,
583
+ -0.25644367933273315,
584
+ -0.2672256827354431,
585
+ -0.2849532663822174,
586
+ -0.22515301406383514,
587
+ -0.2194351702928543,
588
+ -0.2802630364894867,
589
+ -0.23897716403007507,
590
+ -0.2148922085762024,
591
+ -0.22648395597934723,
592
+ -0.2577228546142578,
593
+ -0.21499043703079224,
594
+ -0.21737247705459595,
595
+ -0.21103811264038086,
596
+ -0.2220824509859085,
597
+ -0.1734149008989334,
598
+ -0.218211829662323,
599
+ -0.20041964948177338,
600
+ -0.2263200283050537,
601
+ -0.17371030151844025,
602
+ -0.18098947405815125,
603
+ -0.18344050645828247,
604
+ -0.16407114267349243,
605
+ -0.18422402441501617,
606
+ -0.18706662952899933,
607
+ -0.15566548705101013,
608
+ -0.1706431806087494,
609
+ -0.15713831782341003
610
+ ],
611
+ "abs_loss": [
612
+ 3.39304256439209,
613
+ 3.3440451622009277,
614
+ 3.2221078872680664,
615
+ 2.962374210357666,
616
+ 2.6000261306762695,
617
+ 2.6146888732910156,
618
+ 2.6088027954101562,
619
+ 2.613365411758423,
620
+ 2.5813589096069336,
621
+ 2.5816922187805176,
622
+ 2.5931968688964844,
623
+ 2.561342716217041,
624
+ 2.560413360595703,
625
+ 2.4853343963623047,
626
+ 2.5109357833862305,
627
+ 2.5940842628479004,
628
+ 2.4048895835876465,
629
+ 2.3294265270233154,
630
+ 1.660865068435669,
631
+ 1.3182013034820557,
632
+ 1.1554183959960938,
633
+ 1.1588599681854248,
634
+ 1.1115765571594238,
635
+ 1.0685930252075195,
636
+ 1.1162045001983643,
637
+ 0.9794039130210876,
638
+ 1.186870813369751,
639
+ 1.0021696090698242,
640
+ 0.9881029725074768,
641
+ 0.7798445224761963,
642
+ 1.0104799270629883,
643
+ 0.9486855268478394,
644
+ 0.876146674156189,
645
+ 0.7982321381568909,
646
+ 0.9442781209945679,
647
+ 0.9166160225868225,
648
+ 1.0005611181259155,
649
+ 0.8304280042648315,
650
+ 0.8332905173301697,
651
+ 0.7923551201820374,
652
+ 0.6501508951187134,
653
+ 0.7929084897041321,
654
+ 0.9291647672653198,
655
+ 0.7792672514915466,
656
+ 0.7903759479522705,
657
+ 0.7803529500961304,
658
+ 0.9942322373390198,
659
+ 0.6684384346008301,
660
+ 0.7799667119979858,
661
+ 0.6845371127128601,
662
+ 0.8857107162475586,
663
+ 0.9151805639266968,
664
+ 0.6339882612228394,
665
+ 0.6938873529434204,
666
+ 0.5363549590110779,
667
+ 0.5950627326965332,
668
+ 0.5747663974761963,
669
+ 0.6981031894683838,
670
+ 0.5535870790481567,
671
+ 0.5422572493553162,
672
+ 0.5722933411598206,
673
+ 0.5791301131248474,
674
+ 0.6247014999389648,
675
+ 0.688178539276123,
676
+ 0.4211939573287964,
677
+ 0.4434513449668884,
678
+ 0.4255868196487427,
679
+ 0.5411297082901001,
680
+ 0.41299617290496826,
681
+ 0.4775952696800232,
682
+ 0.3815616965293884,
683
+ 0.4609728157520294,
684
+ 0.5534173250198364,
685
+ 0.40973585844039917,
686
+ 0.5403074026107788,
687
+ 0.34058454632759094,
688
+ 0.4934236407279968,
689
+ 0.43036895990371704,
690
+ 0.4531347453594208,
691
+ 0.5358093976974487,
692
+ 0.39890944957733154,
693
+ 0.3504709303379059,
694
+ 0.4234974682331085,
695
+ 0.33043530583381653,
696
+ 0.3883510231971741,
697
+ 0.3900966942310333,
698
+ 0.3034008741378784,
699
+ 0.3844878077507019,
700
+ 0.3556361198425293,
701
+ 0.32722166180610657,
702
+ 0.2960817515850067,
703
+ 0.3175443112850189,
704
+ 0.36087870597839355,
705
+ 0.44915831089019775,
706
+ 0.3406302332878113,
707
+ 0.27062511444091797,
708
+ 0.4819590449333191,
709
+ 0.2790176272392273,
710
+ 0.3151218295097351,
711
+ 0.3895567059516907,
712
+ 0.30522969365119934,
713
+ 0.3638022840023041,
714
+ 0.3443005084991455,
715
+ 0.30038052797317505,
716
+ 0.3221493363380432,
717
+ 0.27090904116630554,
718
+ 0.3231700658798218,
719
+ 0.2101253718137741,
720
+ 0.2681146264076233,
721
+ 0.363462895154953,
722
+ 0.2364412546157837,
723
+ 0.2628980576992035,
724
+ 0.29717278480529785,
725
+ 0.19711089134216309,
726
+ 0.23752276599407196,
727
+ 0.30123162269592285,
728
+ 0.21607771515846252,
729
+ 0.24906513094902039,
730
+ 0.2326553612947464,
731
+ 0.2548292279243469,
732
+ 0.19183772802352905,
733
+ 0.33658137917518616,
734
+ 0.2623642385005951,
735
+ 0.2028590589761734,
736
+ 0.17227476835250854,
737
+ 0.30687063932418823,
738
+ 0.2223254144191742,
739
+ 0.28637945652008057,
740
+ 0.30192694067955017,
741
+ 0.31335651874542236,
742
+ 0.2542653977870941,
743
+ 0.24489624798297882,
744
+ 0.2581295967102051,
745
+ 0.2891785502433777,
746
+ 0.22562867403030396,
747
+ 0.2942545413970947,
748
+ 0.2457028329372406,
749
+ 0.23869061470031738,
750
+ 0.23281575739383698,
751
+ 0.2988502085208893,
752
+ 0.2741028070449829,
753
+ 0.25956836342811584,
754
+ 0.2576809525489807,
755
+ 0.21786843240261078,
756
+ 0.21318396925926208,
757
+ 0.27870291471481323,
758
+ 0.26582416892051697,
759
+ 0.20977771282196045,
760
+ 0.2978009581565857,
761
+ 0.24936990439891815
762
+ ],
763
+ "zipf_loss": [
764
+ 6.568541526794434,
765
+ 4.6378889083862305,
766
+ 2.601929187774658,
767
+ 1.4150443077087402,
768
+ 0.97258061170578,
769
+ 0.8853577375411987,
770
+ 0.866178035736084,
771
+ 0.8550291061401367,
772
+ 0.850388765335083,
773
+ 0.8541553020477295,
774
+ 0.8479726314544678,
775
+ 0.8498000502586365,
776
+ 0.8494022488594055,
777
+ 0.8523372411727905,
778
+ 0.8458136916160583,
779
+ 0.820546567440033,
780
+ 0.7533268928527832,
781
+ 0.5327982902526855,
782
+ 0.2942545413970947,
783
+ 0.19816140830516815,
784
+ 0.17018242180347443,
785
+ 0.14963090419769287,
786
+ 0.14849303662776947,
787
+ 0.13584934175014496,
788
+ 0.1370982527732849,
789
+ 0.12804265320301056,
790
+ 0.13384655117988586,
791
+ 0.1255798637866974,
792
+ 0.11729998141527176,
793
+ 0.11795015633106232,
794
+ 0.11049142479896545,
795
+ 0.10582149028778076,
796
+ 0.09629538655281067,
797
+ 0.10208211839199066,
798
+ 0.09804347902536392,
799
+ 0.09485457837581635,
800
+ 0.08674925565719604,
801
+ 0.09465356171131134,
802
+ 0.09595625847578049,
803
+ 0.07971762865781784,
804
+ 0.09630455076694489,
805
+ 0.08368515968322754,
806
+ 0.06425083428621292,
807
+ 0.07893300801515579,
808
+ 0.060034651309251785,
809
+ 0.05306267365813255,
810
+ 0.0388617068529129,
811
+ 0.05668090283870697,
812
+ 0.046399977058172226,
813
+ 0.055690523236989975,
814
+ 0.06215830147266388,
815
+ 0.030669555068016052,
816
+ 0.04737374186515808,
817
+ 0.03455729782581329,
818
+ 0.052687350660562515,
819
+ 0.037404127418994904,
820
+ 0.035850390791893005,
821
+ 0.03894411772489548,
822
+ 0.05407535284757614,
823
+ 0.053038209676742554,
824
+ 0.0507982037961483,
825
+ 0.05575310066342354,
826
+ 0.029641835018992424,
827
+ 0.02469017170369625,
828
+ 0.037330590188503265,
829
+ 0.03749319911003113,
830
+ 0.034987181425094604,
831
+ 0.03660454601049423,
832
+ 0.02331935241818428,
833
+ 0.0399140790104866,
834
+ 0.06821182370185852,
835
+ 0.04145489260554314,
836
+ 0.018391745164990425,
837
+ 0.031426701694726944,
838
+ 0.021070394665002823,
839
+ 0.05190783739089966,
840
+ 0.03217053785920143,
841
+ 0.0336339958012104,
842
+ 0.035501234233379364,
843
+ 0.029784096404910088,
844
+ 0.029012762010097504,
845
+ 0.027835464105010033,
846
+ 0.018687181174755096,
847
+ 0.03324688971042633,
848
+ 0.03104039654135704,
849
+ 0.013641919940710068,
850
+ 0.03253910318017006,
851
+ 0.02590865269303322,
852
+ 0.028471380472183228,
853
+ 0.048491280525922775,
854
+ 0.041252508759498596,
855
+ 0.04604184627532959,
856
+ 0.03497536480426788,
857
+ 0.03492319583892822,
858
+ 0.03542107716202736,
859
+ 0.06357758492231369,
860
+ 0.023340530693531036,
861
+ 0.06466128677129745,
862
+ 0.04620100557804108,
863
+ 0.022254012525081635,
864
+ 0.04297468811273575,
865
+ 0.03494641184806824,
866
+ 0.023700159043073654,
867
+ 0.04319623485207558,
868
+ 0.038257330656051636,
869
+ 0.028609748929739,
870
+ 0.04366075620055199,
871
+ 0.02391563169658184,
872
+ 0.04262681305408478,
873
+ 0.011019270867109299,
874
+ 0.030886277556419373,
875
+ 0.02963891625404358,
876
+ 0.022154763340950012,
877
+ 0.0412735678255558,
878
+ 0.035832080990076065,
879
+ 0.03350311517715454,
880
+ 0.029915394261479378,
881
+ 0.028151482343673706,
882
+ 0.017139362171292305,
883
+ 0.03457484766840935,
884
+ 0.02741282619535923,
885
+ 0.03685455769300461,
886
+ 0.022755559533834457,
887
+ 0.030933089554309845,
888
+ 0.05755234882235527,
889
+ 0.027764728292822838,
890
+ 0.02162948064506054,
891
+ 0.016395458951592445,
892
+ 0.039365850389003754,
893
+ 0.0421781912446022,
894
+ 0.038459327071905136,
895
+ 0.023749586194753647,
896
+ 0.02349282242357731,
897
+ 0.030727511271834373,
898
+ 0.0336943119764328,
899
+ 0.022509198635816574,
900
+ 0.027544958516955376,
901
+ 0.04976517707109451,
902
+ 0.04642646759748459,
903
+ 0.03214017301797867,
904
+ 0.04743911325931549,
905
+ 0.02716921642422676,
906
+ 0.030393626540899277,
907
+ 0.02204555831849575,
908
+ 0.025653110817074776,
909
+ 0.028535209596157074,
910
+ 0.024397503584623337,
911
+ 0.030477900058031082,
912
+ 0.04513617232441902,
913
+ 0.021437697112560272
914
+ ],
915
+ "denoise_loss": [],
916
+ "ortho_loss": [
917
+ 0.7070927023887634,
918
+ 0.4337051808834076,
919
+ 0.20525239408016205,
920
+ 0.12833012640476227,
921
+ 0.08922113478183746,
922
+ 0.07138099521398544,
923
+ 0.06153824180364609,
924
+ 0.050383489578962326,
925
+ 0.048941195011138916,
926
+ 0.05425061658024788,
927
+ 0.056658461689949036,
928
+ 0.05490320920944214,
929
+ 0.058780405670404434,
930
+ 0.06422601640224457,
931
+ 0.0640580803155899,
932
+ 0.07662343978881836,
933
+ 0.07740550488233566,
934
+ 0.07985741645097733,
935
+ 0.08211264759302139,
936
+ 0.08479849249124527,
937
+ 0.08962702006101608,
938
+ 0.09255073219537735,
939
+ 0.09895103424787521,
940
+ 0.10785157978534698,
941
+ 0.11439661681652069,
942
+ 0.11830822378396988,
943
+ 0.12574751675128937,
944
+ 0.13142701983451843,
945
+ 0.13731370866298676,
946
+ 0.14256031811237335,
947
+ 0.14993664622306824,
948
+ 0.15445290505886078,
949
+ 0.1589001566171646,
950
+ 0.1592540740966797,
951
+ 0.16055789589881897,
952
+ 0.16371439397335052,
953
+ 0.16041849553585052,
954
+ 0.166905015707016,
955
+ 0.16415290534496307,
956
+ 0.16529402136802673,
957
+ 0.16753415763378143,
958
+ 0.1604532152414322,
959
+ 0.16251112520694733,
960
+ 0.16150222718715668,
961
+ 0.1627689003944397,
962
+ 0.1577976644039154,
963
+ 0.15638048946857452,
964
+ 0.15296657383441925,
965
+ 0.15357930958271027,
966
+ 0.15450166165828705,
967
+ 0.15814656019210815,
968
+ 0.15624772012233734,
969
+ 0.15832668542861938,
970
+ 0.16225354373455048,
971
+ 0.15898476541042328,
972
+ 0.16120833158493042,
973
+ 0.16103416681289673,
974
+ 0.15684235095977783,
975
+ 0.15743117034435272,
976
+ 0.15934884548187256,
977
+ 0.1600343883037567,
978
+ 0.15916313230991364,
979
+ 0.15786658227443695,
980
+ 0.15931351482868195,
981
+ 0.15841044485569,
982
+ 0.15795432031154633,
983
+ 0.15867257118225098,
984
+ 0.15787459909915924,
985
+ 0.1565372198820114,
986
+ 0.1533852070569992,
987
+ 0.15143153071403503,
988
+ 0.1523621827363968,
989
+ 0.14922958612442017,
990
+ 0.15004687011241913,
991
+ 0.14945951104164124,
992
+ 0.14668743312358856,
993
+ 0.14628423750400543,
994
+ 0.1476394087076187,
995
+ 0.15357595682144165,
996
+ 0.1595272421836853,
997
+ 0.16347573697566986,
998
+ 0.17226624488830566,
999
+ 0.17014096677303314,
1000
+ 0.1705310344696045,
1001
+ 0.17292410135269165,
1002
+ 0.17185914516448975,
1003
+ 0.1698039323091507,
1004
+ 0.17396710813045502,
1005
+ 0.1786484271287918,
1006
+ 0.1821245551109314,
1007
+ 0.17920179665088654,
1008
+ 0.18061326444149017,
1009
+ 0.1835414171218872,
1010
+ 0.18515288829803467,
1011
+ 0.1845807284116745,
1012
+ 0.1872483342885971,
1013
+ 0.18795664608478546,
1014
+ 0.18821750581264496,
1015
+ 0.19346866011619568,
1016
+ 0.19681136310100555,
1017
+ 0.20208954811096191,
1018
+ 0.20705866813659668,
1019
+ 0.20783445239067078,
1020
+ 0.20989425480365753,
1021
+ 0.2125101536512375,
1022
+ 0.21912641823291779,
1023
+ 0.21778839826583862,
1024
+ 0.21937686204910278,
1025
+ 0.2197670340538025,
1026
+ 0.22236476838588715,
1027
+ 0.22307045757770538,
1028
+ 0.22510498762130737,
1029
+ 0.2264418601989746,
1030
+ 0.2309156209230423,
1031
+ 0.23448912799358368,
1032
+ 0.23486173152923584,
1033
+ 0.23727869987487793,
1034
+ 0.23963871598243713,
1035
+ 0.24129126965999603,
1036
+ 0.24589307606220245,
1037
+ 0.24669644236564636,
1038
+ 0.24793024361133575,
1039
+ 0.2496657818555832,
1040
+ 0.25194722414016724,
1041
+ 0.25236889719963074,
1042
+ 0.2532859444618225,
1043
+ 0.25499001145362854,
1044
+ 0.25745660066604614,
1045
+ 0.25832274556159973,
1046
+ 0.2611595094203949,
1047
+ 0.2630329132080078,
1048
+ 0.2670155167579651,
1049
+ 0.2685263454914093,
1050
+ 0.2688233554363251,
1051
+ 0.2699948251247406,
1052
+ 0.2728136479854584,
1053
+ 0.27377867698669434,
1054
+ 0.27549049258232117,
1055
+ 0.27645307779312134,
1056
+ 0.2766518294811249,
1057
+ 0.27672335505485535,
1058
+ 0.2774313986301422,
1059
+ 0.2787529528141022,
1060
+ 0.27945172786712646,
1061
+ 0.2794017493724823,
1062
+ 0.28105705976486206,
1063
+ 0.2820282578468323,
1064
+ 0.2829187214374542,
1065
+ 0.2841479480266571,
1066
+ 0.2846400737762451
1067
+ ],
1068
+ "lr": [
1069
+ 8.376068376068378e-06,
1070
+ 1.6923076923076924e-05,
1071
+ 2.5470085470085475e-05,
1072
+ 3.401709401709402e-05,
1073
+ 4e-05,
1074
+ 4e-05,
1075
+ 4e-05,
1076
+ 4e-05,
1077
+ 4e-05,
1078
+ 4e-05,
1079
+ 4e-05,
1080
+ 4e-05,
1081
+ 4e-05,
1082
+ 4e-05,
1083
+ 4e-05,
1084
+ 4e-05,
1085
+ 4e-05,
1086
+ 4e-05,
1087
+ 4e-05,
1088
+ 4e-05,
1089
+ 4e-05,
1090
+ 4e-05,
1091
+ 4e-05,
1092
+ 4e-05,
1093
+ 4e-05,
1094
+ 4e-05,
1095
+ 4e-05,
1096
+ 4e-05,
1097
+ 4e-05,
1098
+ 4e-05,
1099
+ 4e-05,
1100
+ 4e-05,
1101
+ 4e-05,
1102
+ 4e-05,
1103
+ 4e-05,
1104
+ 4e-05,
1105
+ 4e-05,
1106
+ 4e-05,
1107
+ 4e-05,
1108
+ 4e-05,
1109
+ 4e-05,
1110
+ 4e-05,
1111
+ 4e-05,
1112
+ 4e-05,
1113
+ 4e-05,
1114
+ 4e-05,
1115
+ 4e-05,
1116
+ 4e-05,
1117
+ 4e-05,
1118
+ 4e-05,
1119
+ 4e-05,
1120
+ 4e-05,
1121
+ 4e-05,
1122
+ 4e-05,
1123
+ 4e-05,
1124
+ 4e-05,
1125
+ 4e-05,
1126
+ 4e-05,
1127
+ 4e-05,
1128
+ 4e-05,
1129
+ 4e-05,
1130
+ 4e-05,
1131
+ 4e-05,
1132
+ 4e-05,
1133
+ 4e-05,
1134
+ 4e-05,
1135
+ 4e-05,
1136
+ 4e-05,
1137
+ 4e-05,
1138
+ 4e-05,
1139
+ 4e-05,
1140
+ 4e-05,
1141
+ 4e-05,
1142
+ 4e-05,
1143
+ 4e-05,
1144
+ 4e-05,
1145
+ 4e-05,
1146
+ 4e-05,
1147
+ 4e-05,
1148
+ 4e-05,
1149
+ 4e-05,
1150
+ 4e-05,
1151
+ 4e-05,
1152
+ 4e-05,
1153
+ 4e-05,
1154
+ 4e-05,
1155
+ 4e-05,
1156
+ 4e-05,
1157
+ 4e-05,
1158
+ 4e-05,
1159
+ 4e-05,
1160
+ 3.993593461639863e-05,
1161
+ 3.934273662008964e-05,
1162
+ 3.874953862378065e-05,
1163
+ 3.815634062747166e-05,
1164
+ 3.7563142631162665e-05,
1165
+ 3.696994463485368e-05,
1166
+ 3.637674663854469e-05,
1167
+ 3.5783548642235693e-05,
1168
+ 3.519035064592671e-05,
1169
+ 3.459715264961771e-05,
1170
+ 3.400395465330873e-05,
1171
+ 3.341075665699973e-05,
1172
+ 3.281755866069075e-05,
1173
+ 3.2224360664381764e-05,
1174
+ 3.1251515950435014e-05,
1175
+ 3.065831795412602e-05,
1176
+ 3.0065119957817037e-05,
1177
+ 2.947192196150804e-05,
1178
+ 2.887872396519905e-05,
1179
+ 2.8285525968890065e-05,
1180
+ 2.769232797258107e-05,
1181
+ 2.709912997627208e-05,
1182
+ 2.6505931979963087e-05,
1183
+ 2.5912733983654104e-05,
1184
+ 2.5319535987345117e-05,
1185
+ 2.472633799103612e-05,
1186
+ 2.4133139994727132e-05,
1187
+ 2.3539941998418135e-05,
1188
+ 2.2946744002109148e-05,
1189
+ 2.1973899288162408e-05,
1190
+ 2.138070129185341e-05,
1191
+ 2.0787503295544424e-05,
1192
+ 2.019430529923544e-05,
1193
+ 1.9601107302926443e-05,
1194
+ 1.900790930661746e-05,
1195
+ 1.8414711310308462e-05,
1196
+ 1.7821513313999475e-05,
1197
+ 1.7228315317690488e-05,
1198
+ 1.663511732138149e-05,
1199
+ 1.6041919325072507e-05,
1200
+ 1.544872132876351e-05,
1201
+ 1.4855523332454524e-05,
1202
+ 1.4262325336145537e-05,
1203
+ 1.366912733983654e-05,
1204
+ 1.2696282625889797e-05,
1205
+ 1.2103084629580812e-05,
1206
+ 1.1509886633271816e-05,
1207
+ 1.0916688636962829e-05,
1208
+ 1.0323490640653833e-05,
1209
+ 9.730292644344846e-06,
1210
+ 9.137094648035861e-06,
1211
+ 8.543896651726864e-06,
1212
+ 7.950698655417878e-06,
1213
+ 7.357500659108881e-06,
1214
+ 6.764302662799895e-06,
1215
+ 6.171104666490909e-06,
1216
+ 5.577906670181913e-06,
1217
+ 4.984708673872927e-06,
1218
+ 4.391510677563931e-06
1219
+ ],
1220
+ "emb_lr": [],
1221
+ "eval_step": [
1222
+ 750,
1223
+ 1532,
1224
+ 2314,
1225
+ 3096,
1226
+ 3878,
1227
+ 4660,
1228
+ 5442,
1229
+ 6224,
1230
+ 7006,
1231
+ 7788
1232
+ ],
1233
+ "eval_accuracy": [
1234
+ 0.0,
1235
+ 0.62,
1236
+ 0.84,
1237
+ 0.99,
1238
+ 0.97,
1239
+ 0.98,
1240
+ 0.99,
1241
+ 0.98,
1242
+ 0.99,
1243
+ 0.99
1244
+ ]
1245
+ },
1246
+ "final_accuracy": 0.995,
1247
+ "sft_eval": {
1248
+ "config": {
1249
+ "ops": "add_sub",
1250
+ "K": null,
1251
+ "mode": "sft",
1252
+ "n_digits": 6,
1253
+ "n_per_split": 100
1254
+ },
1255
+ "splits": {
1256
+ "add_S0": {
1257
+ "full_accuracy": 0.92,
1258
+ "n_examples": 100,
1259
+ "per_subtask": {
1260
+ "SA": {
1261
+ "accuracy": 0.9851239669421488,
1262
+ "count": 605
1263
+ },
1264
+ "SS": {
1265
+ "accuracy": 1.0,
1266
+ "count": 95
1267
+ }
1268
+ }
1269
+ },
1270
+ "add_S1": {
1271
+ "full_accuracy": 0.72,
1272
+ "n_examples": 100,
1273
+ "per_subtask": {
1274
+ "SA": {
1275
+ "accuracy": 0.9705882352941176,
1276
+ "count": 204
1277
+ },
1278
+ "SC": {
1279
+ "accuracy": 0.9822485207100592,
1280
+ "count": 169
1281
+ },
1282
+ "SS": {
1283
+ "accuracy": 1.0,
1284
+ "count": 31
1285
+ },
1286
+ "UC": {
1287
+ "accuracy": 0.9222972972972973,
1288
+ "count": 296
1289
+ }
1290
+ }
1291
+ },
1292
+ "add_S2": {
1293
+ "full_accuracy": 0.64,
1294
+ "n_examples": 100,
1295
+ "per_subtask": {
1296
+ "SA": {
1297
+ "accuracy": 0.9815950920245399,
1298
+ "count": 163
1299
+ },
1300
+ "SC": {
1301
+ "accuracy": 0.9307692307692308,
1302
+ "count": 130
1303
+ },
1304
+ "SS": {
1305
+ "accuracy": 0.9425287356321839,
1306
+ "count": 87
1307
+ },
1308
+ "UC": {
1309
+ "accuracy": 0.8768472906403941,
1310
+ "count": 203
1311
+ },
1312
+ "US": {
1313
+ "accuracy": 0.9401709401709402,
1314
+ "count": 117
1315
+ }
1316
+ }
1317
+ },
1318
+ "add_S3": {
1319
+ "full_accuracy": 0.45,
1320
+ "n_examples": 100,
1321
+ "per_subtask": {
1322
+ "SA": {
1323
+ "accuracy": 0.9586776859504132,
1324
+ "count": 121
1325
+ },
1326
+ "SC": {
1327
+ "accuracy": 0.9917355371900827,
1328
+ "count": 121
1329
+ },
1330
+ "SS": {
1331
+ "accuracy": 0.9795918367346939,
1332
+ "count": 49
1333
+ },
1334
+ "UC": {
1335
+ "accuracy": 0.7634408602150538,
1336
+ "count": 186
1337
+ },
1338
+ "US": {
1339
+ "accuracy": 0.8654708520179372,
1340
+ "count": 223
1341
+ }
1342
+ }
1343
+ },
1344
+ "add_S4": {
1345
+ "full_accuracy": 0.34,
1346
+ "n_examples": 100,
1347
+ "per_subtask": {
1348
+ "SA": {
1349
+ "accuracy": 0.9903846153846154,
1350
+ "count": 104
1351
+ },
1352
+ "SC": {
1353
+ "accuracy": 0.9622641509433962,
1354
+ "count": 106
1355
+ },
1356
+ "SS": {
1357
+ "accuracy": 0.9565217391304348,
1358
+ "count": 23
1359
+ },
1360
+ "UC": {
1361
+ "accuracy": 0.70625,
1362
+ "count": 160
1363
+ },
1364
+ "US": {
1365
+ "accuracy": 0.7068403908794788,
1366
+ "count": 307
1367
+ }
1368
+ }
1369
+ },
1370
+ "add_S5": {
1371
+ "full_accuracy": 0.39,
1372
+ "n_examples": 100,
1373
+ "per_subtask": {
1374
+ "SA": {
1375
+ "accuracy": 1.0,
1376
+ "count": 100
1377
+ },
1378
+ "SC": {
1379
+ "accuracy": 0.97,
1380
+ "count": 100
1381
+ },
1382
+ "UC": {
1383
+ "accuracy": 0.66,
1384
+ "count": 100
1385
+ },
1386
+ "US": {
1387
+ "accuracy": 0.585,
1388
+ "count": 400
1389
+ }
1390
+ }
1391
+ },
1392
+ "add_S6": {
1393
+ "full_accuracy": 0.59,
1394
+ "n_examples": 100,
1395
+ "per_subtask": {
1396
+ "SC": {
1397
+ "accuracy": 1.0,
1398
+ "count": 100
1399
+ },
1400
+ "UC": {
1401
+ "accuracy": 0.62,
1402
+ "count": 100
1403
+ },
1404
+ "US": {
1405
+ "accuracy": 0.7,
1406
+ "count": 500
1407
+ }
1408
+ }
1409
+ },
1410
+ "add_random": {
1411
+ "full_accuracy": 0.715,
1412
+ "n_examples": 200,
1413
+ "per_subtask": {
1414
+ "SA": {
1415
+ "accuracy": 0.9552572706935123,
1416
+ "count": 447
1417
+ },
1418
+ "SC": {
1419
+ "accuracy": 0.978125,
1420
+ "count": 320
1421
+ },
1422
+ "SS": {
1423
+ "accuracy": 0.9464285714285714,
1424
+ "count": 56
1425
+ },
1426
+ "UC": {
1427
+ "accuracy": 0.943289224952741,
1428
+ "count": 529
1429
+ },
1430
+ "US": {
1431
+ "accuracy": 0.8958333333333334,
1432
+ "count": 48
1433
+ }
1434
+ }
1435
+ },
1436
+ "add_C3": {
1437
+ "full_accuracy": 0.55,
1438
+ "n_examples": 100,
1439
+ "per_subtask": {
1440
+ "SA": {
1441
+ "accuracy": 1.0,
1442
+ "count": 300
1443
+ },
1444
+ "SC": {
1445
+ "accuracy": 0.99,
1446
+ "count": 100
1447
+ },
1448
+ "UC": {
1449
+ "accuracy": 0.7823834196891192,
1450
+ "count": 193
1451
+ },
1452
+ "US": {
1453
+ "accuracy": 0.7757009345794392,
1454
+ "count": 107
1455
+ }
1456
+ }
1457
+ },
1458
+ "add_C4": {
1459
+ "full_accuracy": 0.55,
1460
+ "n_examples": 100,
1461
+ "per_subtask": {
1462
+ "SA": {
1463
+ "accuracy": 1.0,
1464
+ "count": 200
1465
+ },
1466
+ "SC": {
1467
+ "accuracy": 1.0,
1468
+ "count": 100
1469
+ },
1470
+ "UC": {
1471
+ "accuracy": 0.8203125,
1472
+ "count": 256
1473
+ },
1474
+ "US": {
1475
+ "accuracy": 0.8472222222222222,
1476
+ "count": 144
1477
+ }
1478
+ }
1479
+ },
1480
+ "add_C5": {
1481
+ "full_accuracy": 0.46,
1482
+ "n_examples": 100,
1483
+ "per_subtask": {
1484
+ "SA": {
1485
+ "accuracy": 1.0,
1486
+ "count": 100
1487
+ },
1488
+ "SC": {
1489
+ "accuracy": 0.97,
1490
+ "count": 100
1491
+ },
1492
+ "UC": {
1493
+ "accuracy": 0.8333333333333334,
1494
+ "count": 306
1495
+ },
1496
+ "US": {
1497
+ "accuracy": 0.8298969072164949,
1498
+ "count": 194
1499
+ }
1500
+ }
1501
+ },
1502
+ "add_C6": {
1503
+ "full_accuracy": 0.45,
1504
+ "n_examples": 100,
1505
+ "per_subtask": {
1506
+ "SC": {
1507
+ "accuracy": 1.0,
1508
+ "count": 100
1509
+ },
1510
+ "UC": {
1511
+ "accuracy": 0.8415300546448088,
1512
+ "count": 366
1513
+ },
1514
+ "US": {
1515
+ "accuracy": 0.9273504273504274,
1516
+ "count": 234
1517
+ }
1518
+ }
1519
+ },
1520
+ "sub_M0": {
1521
+ "full_accuracy": 0.92,
1522
+ "n_examples": 100,
1523
+ "per_subtask": {
1524
+ "MD": {
1525
+ "accuracy": 0.9883527454242929,
1526
+ "count": 601
1527
+ },
1528
+ "ME": {
1529
+ "accuracy": 0.9797979797979798,
1530
+ "count": 99
1531
+ }
1532
+ }
1533
+ },
1534
+ "sub_M1": {
1535
+ "full_accuracy": 0.78,
1536
+ "n_examples": 100,
1537
+ "per_subtask": {
1538
+ "MD": {
1539
+ "accuracy": 0.974910394265233,
1540
+ "count": 279
1541
+ },
1542
+ "MB": {
1543
+ "accuracy": 0.993103448275862,
1544
+ "count": 145
1545
+ },
1546
+ "ME": {
1547
+ "accuracy": 0.9166666666666666,
1548
+ "count": 24
1549
+ },
1550
+ "UB": {
1551
+ "accuracy": 0.9444444444444444,
1552
+ "count": 252
1553
+ }
1554
+ }
1555
+ },
1556
+ "sub_M2": {
1557
+ "full_accuracy": 0.57,
1558
+ "n_examples": 100,
1559
+ "per_subtask": {
1560
+ "MD": {
1561
+ "accuracy": 0.9906103286384976,
1562
+ "count": 213
1563
+ },
1564
+ "MB": {
1565
+ "accuracy": 0.9911504424778761,
1566
+ "count": 113
1567
+ },
1568
+ "ME": {
1569
+ "accuracy": 1.0,
1570
+ "count": 85
1571
+ },
1572
+ "UB": {
1573
+ "accuracy": 0.7624309392265194,
1574
+ "count": 181
1575
+ },
1576
+ "UD": {
1577
+ "accuracy": 0.9814814814814815,
1578
+ "count": 108
1579
+ }
1580
+ }
1581
+ },
1582
+ "sub_M3": {
1583
+ "full_accuracy": 0.21,
1584
+ "n_examples": 100,
1585
+ "per_subtask": {
1586
+ "MD": {
1587
+ "accuracy": 0.994413407821229,
1588
+ "count": 179
1589
+ },
1590
+ "MB": {
1591
+ "accuracy": 0.970873786407767,
1592
+ "count": 103
1593
+ },
1594
+ "ME": {
1595
+ "accuracy": 0.9821428571428571,
1596
+ "count": 56
1597
+ },
1598
+ "UB": {
1599
+ "accuracy": 0.5302013422818792,
1600
+ "count": 149
1601
+ },
1602
+ "UD": {
1603
+ "accuracy": 0.7887323943661971,
1604
+ "count": 213
1605
+ }
1606
+ }
1607
+ },
1608
+ "sub_M4": {
1609
+ "full_accuracy": 0.03,
1610
+ "n_examples": 100,
1611
+ "per_subtask": {
1612
+ "MD": {
1613
+ "accuracy": 1.0,
1614
+ "count": 200
1615
+ },
1616
+ "MB": {
1617
+ "accuracy": 1.0,
1618
+ "count": 100
1619
+ },
1620
+ "UB": {
1621
+ "accuracy": 0.47,
1622
+ "count": 100
1623
+ },
1624
+ "UD": {
1625
+ "accuracy": 0.39666666666666667,
1626
+ "count": 300
1627
+ }
1628
+ }
1629
+ },
1630
+ "sub_M5": {
1631
+ "full_accuracy": 0.04,
1632
+ "n_examples": 100,
1633
+ "per_subtask": {
1634
+ "MD": {
1635
+ "accuracy": 1.0,
1636
+ "count": 100
1637
+ },
1638
+ "MB": {
1639
+ "accuracy": 1.0,
1640
+ "count": 100
1641
+ },
1642
+ "UB": {
1643
+ "accuracy": 0.43,
1644
+ "count": 100
1645
+ },
1646
+ "UD": {
1647
+ "accuracy": 0.3375,
1648
+ "count": 400
1649
+ }
1650
+ }
1651
+ },
1652
+ "sub_random": {
1653
+ "full_accuracy": 0.77,
1654
+ "n_examples": 200,
1655
+ "per_subtask": {
1656
+ "MD": {
1657
+ "accuracy": 0.98,
1658
+ "count": 600
1659
+ },
1660
+ "MB": {
1661
+ "accuracy": 0.9850187265917603,
1662
+ "count": 267
1663
+ },
1664
+ "ME": {
1665
+ "accuracy": 0.9811320754716981,
1666
+ "count": 53
1667
+ },
1668
+ "UB": {
1669
+ "accuracy": 0.9202733485193622,
1670
+ "count": 439
1671
+ },
1672
+ "UD": {
1673
+ "accuracy": 1.0,
1674
+ "count": 41
1675
+ }
1676
+ }
1677
+ },
1678
+ "sub_B3": {
1679
+ "full_accuracy": 0.5,
1680
+ "n_examples": 100,
1681
+ "per_subtask": {
1682
+ "MD": {
1683
+ "accuracy": 0.9833333333333333,
1684
+ "count": 300
1685
+ },
1686
+ "MB": {
1687
+ "accuracy": 1.0,
1688
+ "count": 100
1689
+ },
1690
+ "UB": {
1691
+ "accuracy": 0.7766497461928934,
1692
+ "count": 197
1693
+ },
1694
+ "UD": {
1695
+ "accuracy": 0.7961165048543689,
1696
+ "count": 103
1697
+ }
1698
+ }
1699
+ },
1700
+ "sub_B4": {
1701
+ "full_accuracy": 0.41,
1702
+ "n_examples": 100,
1703
+ "per_subtask": {
1704
+ "MD": {
1705
+ "accuracy": 1.0,
1706
+ "count": 200
1707
+ },
1708
+ "MB": {
1709
+ "accuracy": 1.0,
1710
+ "count": 100
1711
+ },
1712
+ "UB": {
1713
+ "accuracy": 0.7854251012145749,
1714
+ "count": 247
1715
+ },
1716
+ "UD": {
1717
+ "accuracy": 0.7712418300653595,
1718
+ "count": 153
1719
+ }
1720
+ }
1721
+ },
1722
+ "sub_B5": {
1723
+ "full_accuracy": 0.25,
1724
+ "n_examples": 100,
1725
+ "per_subtask": {
1726
+ "MD": {
1727
+ "accuracy": 1.0,
1728
+ "count": 100
1729
+ },
1730
+ "MB": {
1731
+ "accuracy": 1.0,
1732
+ "count": 100
1733
+ },
1734
+ "UB": {
1735
+ "accuracy": 0.7315436241610739,
1736
+ "count": 298
1737
+ },
1738
+ "UD": {
1739
+ "accuracy": 0.7574257425742574,
1740
+ "count": 202
1741
+ }
1742
+ }
1743
+ }
1744
+ },
1745
+ "summary": {
1746
+ "overall_accuracy": 0.5304166666666666,
1747
+ "total_examples": 2400,
1748
+ "n_splits": 22
1749
+ }
1750
+ },
1751
+ "sorl_eval": {
1752
+ "config": {
1753
+ "ops": "add_sub",
1754
+ "K": 4,
1755
+ "mode": "sorl",
1756
+ "n_digits": 6,
1757
+ "n_per_split": 100
1758
+ },
1759
+ "splits": {
1760
+ "add_S0": {
1761
+ "full_accuracy": 1.0,
1762
+ "n_examples": 100,
1763
+ "per_subtask": {
1764
+ "SA": {
1765
+ "accuracy": 1.0,
1766
+ "count": 605
1767
+ },
1768
+ "SS": {
1769
+ "accuracy": 1.0,
1770
+ "count": 95
1771
+ }
1772
+ }
1773
+ },
1774
+ "add_S1": {
1775
+ "full_accuracy": 1.0,
1776
+ "n_examples": 100,
1777
+ "per_subtask": {
1778
+ "SA": {
1779
+ "accuracy": 1.0,
1780
+ "count": 204
1781
+ },
1782
+ "SC": {
1783
+ "accuracy": 1.0,
1784
+ "count": 169
1785
+ },
1786
+ "SS": {
1787
+ "accuracy": 1.0,
1788
+ "count": 31
1789
+ },
1790
+ "UC": {
1791
+ "accuracy": 1.0,
1792
+ "count": 296
1793
+ }
1794
+ }
1795
+ },
1796
+ "add_S2": {
1797
+ "full_accuracy": 1.0,
1798
+ "n_examples": 100,
1799
+ "per_subtask": {
1800
+ "SA": {
1801
+ "accuracy": 1.0,
1802
+ "count": 163
1803
+ },
1804
+ "SC": {
1805
+ "accuracy": 1.0,
1806
+ "count": 130
1807
+ },
1808
+ "SS": {
1809
+ "accuracy": 1.0,
1810
+ "count": 87
1811
+ },
1812
+ "UC": {
1813
+ "accuracy": 1.0,
1814
+ "count": 203
1815
+ },
1816
+ "US": {
1817
+ "accuracy": 1.0,
1818
+ "count": 117
1819
+ }
1820
+ }
1821
+ },
1822
+ "add_S3": {
1823
+ "full_accuracy": 1.0,
1824
+ "n_examples": 100,
1825
+ "per_subtask": {
1826
+ "SA": {
1827
+ "accuracy": 1.0,
1828
+ "count": 121
1829
+ },
1830
+ "SC": {
1831
+ "accuracy": 1.0,
1832
+ "count": 121
1833
+ },
1834
+ "SS": {
1835
+ "accuracy": 1.0,
1836
+ "count": 49
1837
+ },
1838
+ "UC": {
1839
+ "accuracy": 1.0,
1840
+ "count": 186
1841
+ },
1842
+ "US": {
1843
+ "accuracy": 1.0,
1844
+ "count": 223
1845
+ }
1846
+ }
1847
+ },
1848
+ "add_S4": {
1849
+ "full_accuracy": 0.99,
1850
+ "n_examples": 100,
1851
+ "per_subtask": {
1852
+ "SA": {
1853
+ "accuracy": 1.0,
1854
+ "count": 104
1855
+ },
1856
+ "SC": {
1857
+ "accuracy": 1.0,
1858
+ "count": 106
1859
+ },
1860
+ "SS": {
1861
+ "accuracy": 1.0,
1862
+ "count": 23
1863
+ },
1864
+ "UC": {
1865
+ "accuracy": 0.99375,
1866
+ "count": 160
1867
+ },
1868
+ "US": {
1869
+ "accuracy": 1.0,
1870
+ "count": 307
1871
+ }
1872
+ }
1873
+ },
1874
+ "add_S5": {
1875
+ "full_accuracy": 0.99,
1876
+ "n_examples": 100,
1877
+ "per_subtask": {
1878
+ "SA": {
1879
+ "accuracy": 1.0,
1880
+ "count": 100
1881
+ },
1882
+ "SC": {
1883
+ "accuracy": 1.0,
1884
+ "count": 100
1885
+ },
1886
+ "UC": {
1887
+ "accuracy": 0.99,
1888
+ "count": 100
1889
+ },
1890
+ "US": {
1891
+ "accuracy": 1.0,
1892
+ "count": 400
1893
+ }
1894
+ }
1895
+ },
1896
+ "add_S6": {
1897
+ "full_accuracy": 0.93,
1898
+ "n_examples": 100,
1899
+ "per_subtask": {
1900
+ "SC": {
1901
+ "accuracy": 1.0,
1902
+ "count": 100
1903
+ },
1904
+ "UC": {
1905
+ "accuracy": 0.93,
1906
+ "count": 100
1907
+ },
1908
+ "US": {
1909
+ "accuracy": 0.99,
1910
+ "count": 500
1911
+ }
1912
+ }
1913
+ },
1914
+ "add_random": {
1915
+ "full_accuracy": 1.0,
1916
+ "n_examples": 200,
1917
+ "per_subtask": {
1918
+ "SA": {
1919
+ "accuracy": 1.0,
1920
+ "count": 447
1921
+ },
1922
+ "SC": {
1923
+ "accuracy": 1.0,
1924
+ "count": 320
1925
+ },
1926
+ "SS": {
1927
+ "accuracy": 1.0,
1928
+ "count": 56
1929
+ },
1930
+ "UC": {
1931
+ "accuracy": 1.0,
1932
+ "count": 529
1933
+ },
1934
+ "US": {
1935
+ "accuracy": 1.0,
1936
+ "count": 48
1937
+ }
1938
+ }
1939
+ },
1940
+ "add_C3": {
1941
+ "full_accuracy": 1.0,
1942
+ "n_examples": 100,
1943
+ "per_subtask": {
1944
+ "SA": {
1945
+ "accuracy": 1.0,
1946
+ "count": 300
1947
+ },
1948
+ "SC": {
1949
+ "accuracy": 1.0,
1950
+ "count": 100
1951
+ },
1952
+ "UC": {
1953
+ "accuracy": 1.0,
1954
+ "count": 193
1955
+ },
1956
+ "US": {
1957
+ "accuracy": 1.0,
1958
+ "count": 107
1959
+ }
1960
+ }
1961
+ },
1962
+ "add_C4": {
1963
+ "full_accuracy": 1.0,
1964
+ "n_examples": 100,
1965
+ "per_subtask": {
1966
+ "SA": {
1967
+ "accuracy": 1.0,
1968
+ "count": 200
1969
+ },
1970
+ "SC": {
1971
+ "accuracy": 1.0,
1972
+ "count": 100
1973
+ },
1974
+ "UC": {
1975
+ "accuracy": 1.0,
1976
+ "count": 256
1977
+ },
1978
+ "US": {
1979
+ "accuracy": 1.0,
1980
+ "count": 144
1981
+ }
1982
+ }
1983
+ },
1984
+ "add_C5": {
1985
+ "full_accuracy": 1.0,
1986
+ "n_examples": 100,
1987
+ "per_subtask": {
1988
+ "SA": {
1989
+ "accuracy": 1.0,
1990
+ "count": 100
1991
+ },
1992
+ "SC": {
1993
+ "accuracy": 1.0,
1994
+ "count": 100
1995
+ },
1996
+ "UC": {
1997
+ "accuracy": 1.0,
1998
+ "count": 306
1999
+ },
2000
+ "US": {
2001
+ "accuracy": 1.0,
2002
+ "count": 194
2003
+ }
2004
+ }
2005
+ },
2006
+ "add_C6": {
2007
+ "full_accuracy": 1.0,
2008
+ "n_examples": 100,
2009
+ "per_subtask": {
2010
+ "SC": {
2011
+ "accuracy": 1.0,
2012
+ "count": 100
2013
+ },
2014
+ "UC": {
2015
+ "accuracy": 1.0,
2016
+ "count": 366
2017
+ },
2018
+ "US": {
2019
+ "accuracy": 1.0,
2020
+ "count": 234
2021
+ }
2022
+ }
2023
+ },
2024
+ "sub_M0": {
2025
+ "full_accuracy": 1.0,
2026
+ "n_examples": 100,
2027
+ "per_subtask": {
2028
+ "MD": {
2029
+ "accuracy": 1.0,
2030
+ "count": 601
2031
+ },
2032
+ "ME": {
2033
+ "accuracy": 1.0,
2034
+ "count": 99
2035
+ }
2036
+ }
2037
+ },
2038
+ "sub_M1": {
2039
+ "full_accuracy": 1.0,
2040
+ "n_examples": 100,
2041
+ "per_subtask": {
2042
+ "MD": {
2043
+ "accuracy": 1.0,
2044
+ "count": 279
2045
+ },
2046
+ "MB": {
2047
+ "accuracy": 1.0,
2048
+ "count": 145
2049
+ },
2050
+ "ME": {
2051
+ "accuracy": 1.0,
2052
+ "count": 24
2053
+ },
2054
+ "UB": {
2055
+ "accuracy": 1.0,
2056
+ "count": 252
2057
+ }
2058
+ }
2059
+ },
2060
+ "sub_M2": {
2061
+ "full_accuracy": 1.0,
2062
+ "n_examples": 100,
2063
+ "per_subtask": {
2064
+ "MD": {
2065
+ "accuracy": 1.0,
2066
+ "count": 213
2067
+ },
2068
+ "MB": {
2069
+ "accuracy": 1.0,
2070
+ "count": 113
2071
+ },
2072
+ "ME": {
2073
+ "accuracy": 1.0,
2074
+ "count": 85
2075
+ },
2076
+ "UB": {
2077
+ "accuracy": 1.0,
2078
+ "count": 181
2079
+ },
2080
+ "UD": {
2081
+ "accuracy": 1.0,
2082
+ "count": 108
2083
+ }
2084
+ }
2085
+ },
2086
+ "sub_M3": {
2087
+ "full_accuracy": 1.0,
2088
+ "n_examples": 100,
2089
+ "per_subtask": {
2090
+ "MD": {
2091
+ "accuracy": 1.0,
2092
+ "count": 179
2093
+ },
2094
+ "MB": {
2095
+ "accuracy": 1.0,
2096
+ "count": 103
2097
+ },
2098
+ "ME": {
2099
+ "accuracy": 1.0,
2100
+ "count": 56
2101
+ },
2102
+ "UB": {
2103
+ "accuracy": 1.0,
2104
+ "count": 149
2105
+ },
2106
+ "UD": {
2107
+ "accuracy": 1.0,
2108
+ "count": 213
2109
+ }
2110
+ }
2111
+ },
2112
+ "sub_M4": {
2113
+ "full_accuracy": 1.0,
2114
+ "n_examples": 100,
2115
+ "per_subtask": {
2116
+ "MD": {
2117
+ "accuracy": 1.0,
2118
+ "count": 200
2119
+ },
2120
+ "MB": {
2121
+ "accuracy": 1.0,
2122
+ "count": 100
2123
+ },
2124
+ "UB": {
2125
+ "accuracy": 1.0,
2126
+ "count": 100
2127
+ },
2128
+ "UD": {
2129
+ "accuracy": 1.0,
2130
+ "count": 300
2131
+ }
2132
+ }
2133
+ },
2134
+ "sub_M5": {
2135
+ "full_accuracy": 0.98,
2136
+ "n_examples": 100,
2137
+ "per_subtask": {
2138
+ "MD": {
2139
+ "accuracy": 1.0,
2140
+ "count": 100
2141
+ },
2142
+ "MB": {
2143
+ "accuracy": 1.0,
2144
+ "count": 100
2145
+ },
2146
+ "UB": {
2147
+ "accuracy": 0.98,
2148
+ "count": 100
2149
+ },
2150
+ "UD": {
2151
+ "accuracy": 1.0,
2152
+ "count": 400
2153
+ }
2154
+ }
2155
+ },
2156
+ "sub_random": {
2157
+ "full_accuracy": 1.0,
2158
+ "n_examples": 200,
2159
+ "per_subtask": {
2160
+ "MD": {
2161
+ "accuracy": 1.0,
2162
+ "count": 600
2163
+ },
2164
+ "MB": {
2165
+ "accuracy": 1.0,
2166
+ "count": 267
2167
+ },
2168
+ "ME": {
2169
+ "accuracy": 1.0,
2170
+ "count": 53
2171
+ },
2172
+ "UB": {
2173
+ "accuracy": 1.0,
2174
+ "count": 439
2175
+ },
2176
+ "UD": {
2177
+ "accuracy": 1.0,
2178
+ "count": 41
2179
+ }
2180
+ }
2181
+ },
2182
+ "sub_B3": {
2183
+ "full_accuracy": 0.99,
2184
+ "n_examples": 100,
2185
+ "per_subtask": {
2186
+ "MD": {
2187
+ "accuracy": 1.0,
2188
+ "count": 300
2189
+ },
2190
+ "MB": {
2191
+ "accuracy": 1.0,
2192
+ "count": 100
2193
+ },
2194
+ "UB": {
2195
+ "accuracy": 0.9949238578680203,
2196
+ "count": 197
2197
+ },
2198
+ "UD": {
2199
+ "accuracy": 1.0,
2200
+ "count": 103
2201
+ }
2202
+ }
2203
+ },
2204
+ "sub_B4": {
2205
+ "full_accuracy": 1.0,
2206
+ "n_examples": 100,
2207
+ "per_subtask": {
2208
+ "MD": {
2209
+ "accuracy": 1.0,
2210
+ "count": 200
2211
+ },
2212
+ "MB": {
2213
+ "accuracy": 1.0,
2214
+ "count": 100
2215
+ },
2216
+ "UB": {
2217
+ "accuracy": 1.0,
2218
+ "count": 247
2219
+ },
2220
+ "UD": {
2221
+ "accuracy": 1.0,
2222
+ "count": 153
2223
+ }
2224
+ }
2225
+ },
2226
+ "sub_B5": {
2227
+ "full_accuracy": 1.0,
2228
+ "n_examples": 100,
2229
+ "per_subtask": {
2230
+ "MD": {
2231
+ "accuracy": 1.0,
2232
+ "count": 100
2233
+ },
2234
+ "MB": {
2235
+ "accuracy": 1.0,
2236
+ "count": 100
2237
+ },
2238
+ "UB": {
2239
+ "accuracy": 1.0,
2240
+ "count": 298
2241
+ },
2242
+ "UD": {
2243
+ "accuracy": 1.0,
2244
+ "count": 202
2245
+ }
2246
+ }
2247
+ }
2248
+ },
2249
+ "summary": {
2250
+ "overall_accuracy": 0.995,
2251
+ "total_examples": 2400,
2252
+ "n_splits": 22
2253
+ }
2254
+ },
2255
+ "sorl_overall_accuracy": 0.995,
2256
+ "sft_overall_accuracy": 0.5304166666666666
2257
+ }
add_sub_sorl_v1_abs30_50K/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4acff836b99d599e5d5478cb5262bcf1b97b1d27955ef86f39ee768aeca36b25
3
+ size 650385300
add_sub_sorl_v1_abs30_50K/train_config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_rollouts": 4,
3
+ "K": 4,
4
+ "max_iterations": 2,
5
+ "memory_span_abs": 1792,
6
+ "memory_span_traj": 1792,
7
+ "temperature": 1.0,
8
+ "ar_search": false,
9
+ "response_only_abs": false,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "alpha_ortho": 0.0,
14
+ "alpha_anchor": 0.0,
15
+ "alpha_jacobi": 0.0,
16
+ "decay": 0.8,
17
+ "target_vocab_util": 0.8,
18
+ "min_abs_ppl": 0.0,
19
+ "zipf_alpha": 1.0,
20
+ "lr": 4e-05,
21
+ "emb_lr_mult": 1.0,
22
+ "weight_decay": 0.01,
23
+ "warmup_steps": 234,
24
+ "cooldown_frac": 0.4,
25
+ "max_grad_norm": 1.0,
26
+ "vq_abs_pretrain_steps": 0,
27
+ "vq_abs_pretrain_lr": 0.001,
28
+ "vq_abs_pretrain_layer": -1,
29
+ "vq_abs_pretrain_batch_size": 256,
30
+ "vq_abs_pretrain_target_vectors": 20000,
31
+ "batch_size": 64,
32
+ "gradient_accumulation_steps": 1,
33
+ "num_epochs": 10,
34
+ "emb_warmup_steps": 0,
35
+ "log_every": 50,
36
+ "eval_every": 781,
37
+ "save_every": 999999,
38
+ "eval_samples": 100,
39
+ "output_dir": "ckpt/sweep/as_sorl_abs30_K4_50K",
40
+ "eval_K": 4,
41
+ "alpha_traj": 0.0,
42
+ "corrupt_method": "shuffle",
43
+ "corrupt_ratio": 0.3,
44
+ "alpha_contrastive": 1.0,
45
+ "gamma_contrastive": 0.5,
46
+ "alpha_masked_traj": 0.0,
47
+ "mask_nl_ratio": 0.3,
48
+ "mask_nl_mode": "fixed",
49
+ "mask_nl_fixed_id": 0,
50
+ "use_ste": true,
51
+ "n_inner": 1,
52
+ "random_K": null,
53
+ "strip_suffix": null,
54
+ "compress_prefix": null,
55
+ "random_mem_span": null,
56
+ "warmup_ratio": 0.03,
57
+ "beta2": 0.999,
58
+ "seed": 42,
59
+ "n_digits": 6,
60
+ "n_layer": 2,
61
+ "n_head": 3,
62
+ "n_embd": 510,
63
+ "ops": "add_sub",
64
+ "abs_vocab": 30,
65
+ "dataset_size": 50000,
66
+ "mode": "sorl",
67
+ "device": "cuda",
68
+ "push_to_hub": true,
69
+ "no_wandb": false,
70
+ "n_params": 162519662,
71
+ "run_name": "add_sub_sorl_v1_abs30_50K",
72
+ "git_commit": "f447da529caceac8c7d256cbb2cd185cbc50feac",
73
+ "timestamp": "2026-04-12T17:01:04.959212+00:00",
74
+ "tokenizer": "Qwen/Qwen3-0.6B",
75
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
+ "dataset_config": "add_sub_6digit",
77
+ "model_repo": "thoughtworks/arithmetic-sorl",
78
+ "trainer_version": "v1",
79
+ "wandb_run_id": "8ug8xyio",
80
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/8ug8xyio",
81
+ "final_accuracy": 0.995,
82
+ "sft_accuracy": 0.5304166666666666,
83
+ "eval_method": "ArithmeticEvaluator"
84
+ }