amirali1985 commited on
Commit
e3cbbc5
·
verified ·
1 Parent(s): 320ab23

Upload add_sub_sorl_v1_abs30_K1_25K

Browse files
add_sub_sorl_v1_abs30_K1_25K/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 128,
20
+ "max_window_layers": 28,
21
+ "model_type": "qwen3",
22
+ "num_attention_heads": 3,
23
+ "num_hidden_layers": 2,
24
+ "num_key_value_heads": 3,
25
+ "pad_token_id": null,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_parameters": {
28
+ "rope_theta": 10000.0,
29
+ "rope_type": "default"
30
+ },
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "transformers_version": "5.5.0",
34
+ "use_cache": true,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151674
37
+ }
add_sub_sorl_v1_abs30_K1_25K/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs30_K1_25K/metrics.json ADDED
@@ -0,0 +1,2197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 200,
8
+ 250,
9
+ 300,
10
+ 350,
11
+ 441,
12
+ 491,
13
+ 541,
14
+ 591,
15
+ 641,
16
+ 691,
17
+ 741,
18
+ 832,
19
+ 882,
20
+ 932,
21
+ 982,
22
+ 1032,
23
+ 1082,
24
+ 1132,
25
+ 1223,
26
+ 1273,
27
+ 1323,
28
+ 1373,
29
+ 1423,
30
+ 1473,
31
+ 1523,
32
+ 1614,
33
+ 1664,
34
+ 1714,
35
+ 1764,
36
+ 1814,
37
+ 1864,
38
+ 1914,
39
+ 2005,
40
+ 2055,
41
+ 2105,
42
+ 2155,
43
+ 2205,
44
+ 2255,
45
+ 2305,
46
+ 2396,
47
+ 2446,
48
+ 2496,
49
+ 2546,
50
+ 2596,
51
+ 2646,
52
+ 2696,
53
+ 2787,
54
+ 2837,
55
+ 2887,
56
+ 2937,
57
+ 2987,
58
+ 3037,
59
+ 3087,
60
+ 3178,
61
+ 3228,
62
+ 3278,
63
+ 3328,
64
+ 3378,
65
+ 3428,
66
+ 3478,
67
+ 3569,
68
+ 3619,
69
+ 3669,
70
+ 3719,
71
+ 3769,
72
+ 3819,
73
+ 3869,
74
+ 3960,
75
+ 4010,
76
+ 4060,
77
+ 4110,
78
+ 4160,
79
+ 4210,
80
+ 4260,
81
+ 4351,
82
+ 4401,
83
+ 4451,
84
+ 4501,
85
+ 4551,
86
+ 4601,
87
+ 4651,
88
+ 4742,
89
+ 4792,
90
+ 4842,
91
+ 4892,
92
+ 4942,
93
+ 4992,
94
+ 5042,
95
+ 5133,
96
+ 5183,
97
+ 5233,
98
+ 5283,
99
+ 5333,
100
+ 5383,
101
+ 5433,
102
+ 5524,
103
+ 5574,
104
+ 5624,
105
+ 5674,
106
+ 5724,
107
+ 5774,
108
+ 5824,
109
+ 5915,
110
+ 5965,
111
+ 6015,
112
+ 6065,
113
+ 6115,
114
+ 6165,
115
+ 6215,
116
+ 6306,
117
+ 6356,
118
+ 6406,
119
+ 6456,
120
+ 6506,
121
+ 6556,
122
+ 6606,
123
+ 6697,
124
+ 6747,
125
+ 6797,
126
+ 6847,
127
+ 6897,
128
+ 6947,
129
+ 6997,
130
+ 7088,
131
+ 7138,
132
+ 7188,
133
+ 7238,
134
+ 7288,
135
+ 7338,
136
+ 7388,
137
+ 7479,
138
+ 7529,
139
+ 7579,
140
+ 7629,
141
+ 7679,
142
+ 7729,
143
+ 7779
144
+ ],
145
+ "loss": [
146
+ 1.7119760513305664,
147
+ 6.14704704284668,
148
+ 4.015985488891602,
149
+ 2.931351661682129,
150
+ 2.8381450176239014,
151
+ 2.7390925884246826,
152
+ 1.9953590631484985,
153
+ 0.8327422142028809,
154
+ -3.8749427795410156,
155
+ -4.478009223937988,
156
+ -0.9891107082366943,
157
+ -0.2836305499076843,
158
+ -0.561787486076355,
159
+ -1.141134262084961,
160
+ -1.0379124879837036,
161
+ -0.8155723810195923,
162
+ -0.6522830128669739,
163
+ -0.969704270362854,
164
+ -0.6387378573417664,
165
+ -0.42737025022506714,
166
+ -0.7698933482170105,
167
+ -0.8871013522148132,
168
+ -0.723418116569519,
169
+ -1.4676158428192139,
170
+ -1.118342638015747,
171
+ -0.9780423045158386,
172
+ -1.3972547054290771,
173
+ -1.118117332458496,
174
+ -1.2728338241577148,
175
+ -1.3038815259933472,
176
+ -1.1130985021591187,
177
+ -1.2439594268798828,
178
+ -1.6075873374938965,
179
+ -1.2939362525939941,
180
+ -1.123915672302246,
181
+ -0.8386991024017334,
182
+ -1.2796339988708496,
183
+ -0.9666016101837158,
184
+ -1.2052180767059326,
185
+ -0.9123711585998535,
186
+ -0.7841464281082153,
187
+ -1.115072250366211,
188
+ -0.770645260810852,
189
+ -0.7825030088424683,
190
+ -0.9296966195106506,
191
+ -0.7025026679039001,
192
+ -0.9686148762702942,
193
+ -0.6636331677436829,
194
+ -0.8582434058189392,
195
+ -0.8699550628662109,
196
+ -1.1216692924499512,
197
+ -0.5576128363609314,
198
+ -0.5983526706695557,
199
+ -0.5066444873809814,
200
+ -0.5169333219528198,
201
+ -0.5374507308006287,
202
+ -0.5922688245773315,
203
+ -0.29379820823669434,
204
+ -0.7311683297157288,
205
+ -0.22624635696411133,
206
+ -0.3172416388988495,
207
+ -0.32080110907554626,
208
+ -0.4887930750846863,
209
+ -0.25087282061576843,
210
+ -0.5092266798019409,
211
+ -0.7776860594749451,
212
+ -0.15283048152923584,
213
+ -0.3975360095500946,
214
+ -0.3694022297859192,
215
+ -0.40873661637306213,
216
+ -0.33530089259147644,
217
+ -0.20308193564414978,
218
+ -0.2026340812444687,
219
+ -0.37627139687538147,
220
+ -0.2508956789970398,
221
+ -0.2133498638868332,
222
+ -0.1757795363664627,
223
+ -0.22155731916427612,
224
+ -0.12377417832612991,
225
+ -0.15623070299625397,
226
+ -0.15215444564819336,
227
+ -0.1778583824634552,
228
+ -0.3856639266014099,
229
+ -0.19510412216186523,
230
+ -0.45289498567581177,
231
+ -0.07906970381736755,
232
+ -0.20047199726104736,
233
+ -0.24871760606765747,
234
+ -0.15301105380058289,
235
+ -0.20984536409378052,
236
+ -0.15769784152507782,
237
+ -0.18541622161865234,
238
+ -0.0805714875459671,
239
+ -0.07824444025754929,
240
+ -0.06095310300588608,
241
+ -0.08044923841953278,
242
+ 0.003586065024137497,
243
+ 0.10273022204637527,
244
+ -0.10450196266174316,
245
+ -0.1762467324733734,
246
+ -0.08977089822292328,
247
+ -0.06519266963005066,
248
+ -0.041904985904693604,
249
+ -0.13669879734516144,
250
+ -0.018732935190200806,
251
+ -0.13292600214481354,
252
+ -0.11485083401203156,
253
+ -0.002781972289085388,
254
+ 0.011605948209762573,
255
+ -0.0375908724963665,
256
+ -0.0719958245754242,
257
+ -0.009761612862348557,
258
+ -0.036501675844192505,
259
+ -0.12502069771289825,
260
+ -0.0013336818665266037,
261
+ 0.014050843194127083,
262
+ -0.006407836452126503,
263
+ 0.016416456550359726,
264
+ 0.010326145216822624,
265
+ -0.04670792445540428,
266
+ -0.05867944657802582,
267
+ 1.0332092642784119e-05,
268
+ 0.011082771234214306,
269
+ -0.0830128937959671,
270
+ -0.0022481046617031097,
271
+ -0.009278824552893639,
272
+ -0.1630394458770752,
273
+ 0.014726893045008183,
274
+ 0.009142588824033737,
275
+ 0.01728634163737297,
276
+ 0.006687477231025696,
277
+ 0.0021704640239477158,
278
+ 0.012956775724887848,
279
+ 0.016162145882844925,
280
+ 0.012694327160716057,
281
+ 0.015958743169903755,
282
+ 0.01844569854438305,
283
+ 0.01067356951534748,
284
+ 0.010267311707139015,
285
+ 0.01123642735183239
286
+ ],
287
+ "base_loss": [
288
+ 9.25883960723877,
289
+ 6.370208740234375,
290
+ 3.7065303325653076,
291
+ 2.0331058502197266,
292
+ 1.95931077003479,
293
+ 1.8724143505096436,
294
+ 1.8358138799667358,
295
+ 1.7839851379394531,
296
+ 1.8063528537750244,
297
+ 1.4257497787475586,
298
+ 0.8925438523292542,
299
+ 0.7544015049934387,
300
+ 0.6830788850784302,
301
+ 0.6486687064170837,
302
+ 0.49347400665283203,
303
+ 0.3990347981452942,
304
+ 0.41614702343940735,
305
+ 0.41534119844436646,
306
+ 0.3495095670223236,
307
+ 0.2816762924194336,
308
+ 0.3096388876438141,
309
+ 0.2650178074836731,
310
+ 0.23259153962135315,
311
+ 0.26580312848091125,
312
+ 0.21972285211086273,
313
+ 0.25576403737068176,
314
+ 0.2180083841085434,
315
+ 0.19050060212612152,
316
+ 0.1893482506275177,
317
+ 0.18317624926567078,
318
+ 0.15178455412387848,
319
+ 0.1825704127550125,
320
+ 0.20081649720668793,
321
+ 0.16461952030658722,
322
+ 0.14625681936740875,
323
+ 0.11179553717374802,
324
+ 0.15817393362522125,
325
+ 0.12313541024923325,
326
+ 0.15251047909259796,
327
+ 0.11650027334690094,
328
+ 0.10264534503221512,
329
+ 0.1392023265361786,
330
+ 0.09644023329019547,
331
+ 0.10179956257343292,
332
+ 0.11617104709148407,
333
+ 0.08889312297105789,
334
+ 0.1257583051919937,
335
+ 0.10296887904405594,
336
+ 0.11060570180416107,
337
+ 0.11073609441518784,
338
+ 0.1322706639766693,
339
+ 0.0693567544221878,
340
+ 0.07331784814596176,
341
+ 0.06344141066074371,
342
+ 0.06363283097743988,
343
+ 0.06521368771791458,
344
+ 0.07137874513864517,
345
+ 0.03687870875000954,
346
+ 0.0886240154504776,
347
+ 0.0316486619412899,
348
+ 0.04859679192304611,
349
+ 0.04001149162650108,
350
+ 0.0594177171587944,
351
+ 0.03198449686169624,
352
+ 0.05951383709907532,
353
+ 0.0927167758345604,
354
+ 0.020596766844391823,
355
+ 0.04924142360687256,
356
+ 0.04642181843519211,
357
+ 0.04873456805944443,
358
+ 0.041637055575847626,
359
+ 0.02686495892703533,
360
+ 0.026218848302960396,
361
+ 0.04749896004796028,
362
+ 0.031423475593328476,
363
+ 0.026792261749505997,
364
+ 0.02315610647201538,
365
+ 0.027571475133299828,
366
+ 0.01708334870636463,
367
+ 0.019614964723587036,
368
+ 0.02113191783428192,
369
+ 0.02272043190896511,
370
+ 0.04554429650306702,
371
+ 0.02509036846458912,
372
+ 0.05237502604722977,
373
+ 0.011714950203895569,
374
+ 0.024812577292323112,
375
+ 0.03115662932395935,
376
+ 0.01989857666194439,
377
+ 0.024637239053845406,
378
+ 0.020898962393403053,
379
+ 0.023834871128201485,
380
+ 0.014209302142262459,
381
+ 0.011255674995481968,
382
+ 0.009083413518965244,
383
+ 0.011385834775865078,
384
+ 0.0028439622838050127,
385
+ 0.018480319529771805,
386
+ 0.014435835182666779,
387
+ 0.021674534305930138,
388
+ 0.011868111789226532,
389
+ 0.009697256609797478,
390
+ 0.008302876725792885,
391
+ 0.017009999603033066,
392
+ 0.0038561702240258455,
393
+ 0.016001036390662193,
394
+ 0.015324118547141552,
395
+ 0.002748801140114665,
396
+ 0.0010739824501797557,
397
+ 0.006728976033627987,
398
+ 0.00928917434066534,
399
+ 0.0024181229528039694,
400
+ 0.005708904471248388,
401
+ 0.01629350520670414,
402
+ 0.0029613084625452757,
403
+ 0.0003541119222063571,
404
+ 0.002198481233790517,
405
+ 0.0014702372718602419,
406
+ 0.001857394352555275,
407
+ 0.006702867336571217,
408
+ 0.008157839998602867,
409
+ 0.001853235182352364,
410
+ 0.0002782914671115577,
411
+ 0.010882347822189331,
412
+ 0.0027038822881877422,
413
+ 0.0030141728930175304,
414
+ 0.020343586802482605,
415
+ 0.00020745322399307042,
416
+ 9.352602501166984e-05,
417
+ 0.0006757898954674602,
418
+ 0.0005854826304130256,
419
+ 0.0013537565246224403,
420
+ 0.0008722890052013099,
421
+ 0.0004628680762834847,
422
+ 0.0004031405842397362,
423
+ 0.0008272135746665299,
424
+ 0.00024150208628270775,
425
+ 0.0004777032299898565,
426
+ 0.00025579737848602235,
427
+ 0.0005325012025423348
428
+ ],
429
+ "info_loss": [
430
+ -1.3981437683105469,
431
+ -0.35674333572387695,
432
+ -0.1073293685913086,
433
+ -0.02326798439025879,
434
+ -0.023933053016662598,
435
+ -0.025423526763916016,
436
+ -0.09565258026123047,
437
+ -0.20662808418273926,
438
+ -0.6795123815536499,
439
+ -0.7024374008178711,
440
+ -0.29942911863327026,
441
+ -0.21352607011795044,
442
+ -0.23093774914741516,
443
+ -0.28096023201942444,
444
+ -0.24447700381278992,
445
+ -0.2022431492805481,
446
+ -0.18190807104110718,
447
+ -0.21125152707099915,
448
+ -0.16269072890281677,
449
+ -0.13090260326862335,
450
+ -0.1593063622713089,
451
+ -0.16261976957321167,
452
+ -0.1382463276386261,
453
+ -0.2155323028564453,
454
+ -0.17649787664413452,
455
+ -0.16470617055892944,
456
+ -0.19673863053321838,
457
+ -0.16458788514137268,
458
+ -0.17244882881641388,
459
+ -0.1751340925693512,
460
+ -0.14462099969387054,
461
+ -0.15938325226306915,
462
+ -0.19329020380973816,
463
+ -0.1597842425107956,
464
+ -0.14075155556201935,
465
+ -0.10603491216897964,
466
+ -0.15410369634628296,
467
+ -0.1185174509882927,
468
+ -0.1451079249382019,
469
+ -0.1115327849984169,
470
+ -0.09845148026943207,
471
+ -0.13466130197048187,
472
+ -0.09395471960306168,
473
+ -0.0970274806022644,
474
+ -0.11261290311813354,
475
+ -0.08472369611263275,
476
+ -0.1171511560678482,
477
+ -0.08429136872291565,
478
+ -0.10386909544467926,
479
+ -0.10420701652765274,
480
+ -0.1310877948999405,
481
+ -0.06857217848300934,
482
+ -0.07247594743967056,
483
+ -0.06123020499944687,
484
+ -0.062376223504543304,
485
+ -0.06436779350042343,
486
+ -0.07016307860612869,
487
+ -0.03637612238526344,
488
+ -0.0855904370546341,
489
+ -0.03112221509218216,
490
+ -0.040240220725536346,
491
+ -0.03903472423553467,
492
+ -0.05904826149344444,
493
+ -0.03156021237373352,
494
+ -0.05928679183125496,
495
+ -0.09059230238199234,
496
+ -0.020370472222566605,
497
+ -0.04692664369940758,
498
+ -0.04496893659234047,
499
+ -0.04838690906763077,
500
+ -0.0406218096613884,
501
+ -0.02652939409017563,
502
+ -0.025999585166573524,
503
+ -0.04552473500370979,
504
+ -0.031129708513617516,
505
+ -0.025714172050356865,
506
+ -0.02223960869014263,
507
+ -0.027326716110110283,
508
+ -0.01689782179892063,
509
+ -0.019511861726641655,
510
+ -0.020856689661741257,
511
+ -0.02208787202835083,
512
+ -0.04520589858293533,
513
+ -0.024929175153374672,
514
+ -0.05229552835226059,
515
+ -0.011622090823948383,
516
+ -0.02474926970899105,
517
+ -0.030646927654743195,
518
+ -0.01971360854804516,
519
+ -0.02455708757042885,
520
+ -0.02074458636343479,
521
+ -0.02322465553879738,
522
+ -0.013449409045279026,
523
+ -0.011075416579842567,
524
+ -0.009047647938132286,
525
+ -0.011344228871166706,
526
+ -0.002775580622255802,
527
+ 0.006502024829387665,
528
+ -0.014414124190807343,
529
+ -0.02149597369134426,
530
+ -0.011825395748019218,
531
+ -0.009671507403254509,
532
+ -0.008267457596957684,
533
+ -0.016983015462756157,
534
+ -0.0038351379334926605,
535
+ -0.01598292402923107,
536
+ -0.015295109711587429,
537
+ -0.002731711370870471,
538
+ -0.0010551607701927423,
539
+ -0.006713584065437317,
540
+ -0.009263768792152405,
541
+ -0.002401233185082674,
542
+ -0.005693664308637381,
543
+ -0.01627790927886963,
544
+ -0.0029315929859876633,
545
+ -0.0003119634639006108,
546
+ -0.00217966316267848,
547
+ -0.0014510895125567913,
548
+ -0.0018381474073976278,
549
+ -0.006692532449960709,
550
+ -0.008143601939082146,
551
+ -0.0018416679231449962,
552
+ -0.0002660305181052536,
553
+ -0.01086888276040554,
554
+ -0.002692815847694874,
555
+ -0.002999652875587344,
556
+ -0.02033308334648609,
557
+ -0.0001986423449125141,
558
+ -8.62046581460163e-05,
559
+ -0.0006662025698460639,
560
+ -0.0005774407181888819,
561
+ -0.0013461634516716003,
562
+ -0.000865207810420543,
563
+ -0.00045342743396759033,
564
+ -0.00039656684384681284,
565
+ -0.0008164274040609598,
566
+ -0.00023399226483888924,
567
+ -0.0004711582150775939,
568
+ -0.000249892589636147,
569
+ -0.0005257821758277714
570
+ ],
571
+ "abs_loss": [
572
+ 3.3791351318359375,
573
+ 3.226520538330078,
574
+ 2.748342275619507,
575
+ 2.7246789932250977,
576
+ 2.734380006790161,
577
+ 2.763914108276367,
578
+ 2.776501417160034,
579
+ 2.782012939453125,
580
+ 2.7223784923553467,
581
+ 2.7750370502471924,
582
+ 2.712878465652466,
583
+ 2.5715386867523193,
584
+ 2.417691469192505,
585
+ 2.2169277667999268,
586
+ 1.8418618440628052,
587
+ 1.4758011102676392,
588
+ 1.445372462272644,
589
+ 1.4330517053604126,
590
+ 1.1268384456634521,
591
+ 1.0863100290298462,
592
+ 0.946248471736908,
593
+ 0.8062345385551453,
594
+ 0.8195869326591492,
595
+ 0.7489545941352844,
596
+ 0.8105387687683105,
597
+ 0.7428224682807922,
598
+ 0.6433082222938538,
599
+ 0.6014466285705566,
600
+ 0.5794520974159241,
601
+ 0.42335280776023865,
602
+ 0.38783881068229675,
603
+ 0.37080153822898865,
604
+ 0.29639407992362976,
605
+ 0.276778906583786,
606
+ 0.2690867781639099,
607
+ 0.23475337028503418,
608
+ 0.24031853675842285,
609
+ 0.18818873167037964,
610
+ 0.1650061011314392,
611
+ 0.146784707903862,
612
+ 0.15803326666355133,
613
+ 0.17478173971176147,
614
+ 0.14593462646007538,
615
+ 0.11577823758125305,
616
+ 0.1437772810459137,
617
+ 0.1562107652425766,
618
+ 0.11654581874608994,
619
+ 0.09483480453491211,
620
+ 0.10927138477563858,
621
+ 0.1195492148399353,
622
+ 0.09249719232320786,
623
+ 0.11310684680938721,
624
+ 0.10394057631492615,
625
+ 0.08904767036437988,
626
+ 0.11636340618133545,
627
+ 0.07780475169420242,
628
+ 0.05981302261352539,
629
+ 0.06853172928094864,
630
+ 0.05035331845283508,
631
+ 0.0653398260474205,
632
+ 0.06224076822400093,
633
+ 0.0763515830039978,
634
+ 0.05397680774331093,
635
+ 0.03979582339525223,
636
+ 0.0471830852329731,
637
+ 0.04971422627568245,
638
+ 0.04336335137486458,
639
+ 0.04716387018561363,
640
+ 0.04091737046837807,
641
+ 0.04822436347603798,
642
+ 0.03870323672890663,
643
+ 0.05136945843696594,
644
+ 0.04786120355129242,
645
+ 0.04661177471280098,
646
+ 0.037042561918497086,
647
+ 0.03333150967955589,
648
+ 0.038183193653821945,
649
+ 0.029593804851174355,
650
+ 0.036583200097084045,
651
+ 0.030410559847950935,
652
+ 0.03435201197862625,
653
+ 0.023689428344368935,
654
+ 0.023031366989016533,
655
+ 0.03888919949531555,
656
+ 0.0251901987940073,
657
+ 0.019065313041210175,
658
+ 0.024439163506031036,
659
+ 0.03290224075317383,
660
+ 0.02057739906013012,
661
+ 0.01778118684887886,
662
+ 0.03099118359386921,
663
+ 0.016362473368644714,
664
+ 0.015957167372107506,
665
+ 0.020016077905893326,
666
+ 0.018773702904582024,
667
+ 0.021185478195548058,
668
+ 0.01584002375602722,
669
+ 0.01608959771692753,
670
+ 0.012648460455238819,
671
+ 0.02667520008981228,
672
+ 0.01431312132626772,
673
+ 0.01208517700433731,
674
+ 0.009955878369510174,
675
+ 0.015077034942805767,
676
+ 0.01430173497647047,
677
+ 0.011124823242425919,
678
+ 0.0111143933609128,
679
+ 0.017690075561404228,
680
+ 0.012765412218868732,
681
+ 0.014279651455581188,
682
+ 0.016503993421792984,
683
+ 0.008516673929989338,
684
+ 0.01265427004545927,
685
+ 0.011229410767555237,
686
+ 0.009121122770011425,
687
+ 0.008324752561748028,
688
+ 0.009793217293918133,
689
+ 0.01735287718474865,
690
+ 0.011216825805604458,
691
+ 0.01033524889498949,
692
+ 0.008808014914393425,
693
+ 0.009041398763656616,
694
+ 0.009020754136145115,
695
+ 0.008437370881438255,
696
+ 0.007728166412562132,
697
+ 0.00990037526935339,
698
+ 0.004430749919265509,
699
+ 0.002050023293122649,
700
+ 0.0034150404389947653,
701
+ 0.005411065649241209,
702
+ 0.002684989245608449,
703
+ 0.003553754882887006,
704
+ 0.002668159082531929,
705
+ 0.004461775068193674,
706
+ 0.0015429150080308318,
707
+ 0.0036014870274811983,
708
+ 0.0015474098036065698,
709
+ 0.0015115175629034638,
710
+ 0.0032429725397378206,
711
+ 0.0029643059242516756
712
+ ],
713
+ "zipf_loss": [
714
+ 6.096660614013672,
715
+ 3.0216193199157715,
716
+ 1.1079144477844238,
717
+ 0.8584577441215515,
718
+ 0.8447268009185791,
719
+ 0.8445221185684204,
720
+ 0.8384208679199219,
721
+ 0.836836576461792,
722
+ 0.8415907621383667,
723
+ 0.8431114554405212,
724
+ 0.8413488864898682,
725
+ 0.8400747179985046,
726
+ 0.8227418661117554,
727
+ 0.7981064319610596,
728
+ 0.7291973829269409,
729
+ 0.6602441072463989,
730
+ 0.6061134934425354,
731
+ 0.5841646194458008,
732
+ 0.5259760022163391,
733
+ 0.4913484454154968,
734
+ 0.4189065098762512,
735
+ 0.39345496892929077,
736
+ 0.34449493885040283,
737
+ 0.3470086455345154,
738
+ 0.3458594083786011,
739
+ 0.33897310495376587,
740
+ 0.2877923846244812,
741
+ 0.27711623907089233,
742
+ 0.20436091721057892,
743
+ 0.2219478338956833,
744
+ 0.14254306256771088,
745
+ 0.13022243976593018,
746
+ 0.09485892951488495,
747
+ 0.11160877346992493,
748
+ 0.1104343831539154,
749
+ 0.08637912571430206,
750
+ 0.07919713109731674,
751
+ 0.07661852240562439,
752
+ 0.07685010135173798,
753
+ 0.07177796214818954,
754
+ 0.08191970735788345,
755
+ 0.07486019283533096,
756
+ 0.05786818265914917,
757
+ 0.07439438998699188,
758
+ 0.06588367372751236,
759
+ 0.040220167487859726,
760
+ 0.06548376381397247,
761
+ 0.0668281838297844,
762
+ 0.05891464650630951,
763
+ 0.04942414164543152,
764
+ 0.047688212245702744,
765
+ 0.04744149371981621,
766
+ 0.04269491136074066,
767
+ 0.03331145644187927,
768
+ 0.031559787690639496,
769
+ 0.03323303163051605,
770
+ 0.032001838088035583,
771
+ 0.026231151074171066,
772
+ 0.031076692044734955,
773
+ 0.0467931292951107,
774
+ 0.03033972904086113,
775
+ 0.02189948782324791,
776
+ 0.03687412291765213,
777
+ 0.02876521274447441,
778
+ 0.01940906047821045,
779
+ 0.030548758804798126,
780
+ 0.02594112604856491,
781
+ 0.017772624269127846,
782
+ 0.029773566871881485,
783
+ 0.0215754471719265,
784
+ 0.025409821420907974,
785
+ 0.030210105702280998,
786
+ 0.026356801390647888,
787
+ 0.02681579440832138,
788
+ 0.025273678824305534,
789
+ 0.013666437938809395,
790
+ 0.01964213326573372,
791
+ 0.021178971976041794,
792
+ 0.024462364614009857,
793
+ 0.01623188890516758,
794
+ 0.031845346093177795,
795
+ 0.01793096587061882,
796
+ 0.018547624349594116,
797
+ 0.02520833909511566,
798
+ 0.015166297554969788,
799
+ 0.023529723286628723,
800
+ 0.019764218479394913,
801
+ 0.023304790258407593,
802
+ 0.0221687164157629,
803
+ 0.009310144931077957,
804
+ 0.025749940425157547,
805
+ 0.021359212696552277,
806
+ 0.03811759129166603,
807
+ 0.019252443686127663,
808
+ 0.018562600016593933,
809
+ 0.019488658756017685,
810
+ 0.026913905516266823,
811
+ 0.017620697617530823,
812
+ 0.023938598111271858,
813
+ 0.01437095832079649,
814
+ 0.015183642506599426,
815
+ 0.02061663195490837,
816
+ 0.0314711257815361,
817
+ 0.01461365632712841,
818
+ 0.014332099817693233,
819
+ 0.00978973601013422,
820
+ 0.021664708852767944,
821
+ 0.020017333328723907,
822
+ 0.019807031378149986,
823
+ 0.02138802781701088,
824
+ 0.009702291339635849,
825
+ 0.01098092831671238,
826
+ 0.013460641726851463,
827
+ 0.02034195140004158,
828
+ 0.024108827114105225,
829
+ 0.015983890742063522,
830
+ 0.012210993096232414,
831
+ 0.0277218259871006,
832
+ 0.025728542357683182,
833
+ 0.012481008656322956,
834
+ 0.013717934489250183,
835
+ 0.01566963642835617,
836
+ 0.012562709860503674,
837
+ 0.013949854299426079,
838
+ 0.021203354001045227,
839
+ 0.016713494434952736,
840
+ 0.01950472593307495,
841
+ 0.016300860792398453,
842
+ 0.009569604881107807,
843
+ 0.022731471806764603,
844
+ 0.011607903055846691,
845
+ 0.013922966085374355,
846
+ 0.020469749346375465,
847
+ 0.019787374883890152,
848
+ 0.016102563589811325,
849
+ 0.0229356549680233,
850
+ 0.0203893780708313,
851
+ 0.014756296761333942,
852
+ 0.01218614261597395,
853
+ 0.015665316954255104
854
+ ],
855
+ "denoise_loss": [],
856
+ "ortho_loss": [
857
+ 0.639721691608429,
858
+ 0.23384006321430206,
859
+ 0.14489184319972992,
860
+ 0.10792448371648788,
861
+ 0.08456820994615555,
862
+ 0.08123986423015594,
863
+ 0.0778353363275528,
864
+ 0.11002214252948761,
865
+ 0.12268389016389847,
866
+ 0.15539494156837463,
867
+ 0.16700926423072815,
868
+ 0.17942556738853455,
869
+ 0.18666550517082214,
870
+ 0.19445793330669403,
871
+ 0.2032562643289566,
872
+ 0.20760567486286163,
873
+ 0.21392135322093964,
874
+ 0.21670381724834442,
875
+ 0.2180689126253128,
876
+ 0.22063450515270233,
877
+ 0.21883326768875122,
878
+ 0.21863742172718048,
879
+ 0.217630073428154,
880
+ 0.21256931126117706,
881
+ 0.22088980674743652,
882
+ 0.22135354578495026,
883
+ 0.2220180779695511,
884
+ 0.23056040704250336,
885
+ 0.23146554827690125,
886
+ 0.24043810367584229,
887
+ 0.24833738803863525,
888
+ 0.24956285953521729,
889
+ 0.25005948543548584,
890
+ 0.2468118816614151,
891
+ 0.2451467216014862,
892
+ 0.24257886409759521,
893
+ 0.24182473123073578,
894
+ 0.2443898469209671,
895
+ 0.24258306622505188,
896
+ 0.24195465445518494,
897
+ 0.23636561632156372,
898
+ 0.23654253780841827,
899
+ 0.2325715571641922,
900
+ 0.2336747646331787,
901
+ 0.23286280035972595,
902
+ 0.22832824289798737,
903
+ 0.22483302652835846,
904
+ 0.22698204219341278,
905
+ 0.22653689980506897,
906
+ 0.2247782200574875,
907
+ 0.223842054605484,
908
+ 0.22841455042362213,
909
+ 0.22809086740016937,
910
+ 0.23049242794513702,
911
+ 0.22902648150920868,
912
+ 0.2248598039150238,
913
+ 0.22201740741729736,
914
+ 0.2185370773077011,
915
+ 0.22141091525554657,
916
+ 0.2136271744966507,
917
+ 0.22246746718883514,
918
+ 0.2175038456916809,
919
+ 0.22038504481315613,
920
+ 0.21444186568260193,
921
+ 0.2150769978761673,
922
+ 0.21607351303100586,
923
+ 0.2178366482257843,
924
+ 0.22308430075645447,
925
+ 0.22000181674957275,
926
+ 0.2187521755695343,
927
+ 0.21133314073085785,
928
+ 0.21594126522541046,
929
+ 0.22053566575050354,
930
+ 0.22134697437286377,
931
+ 0.21547871828079224,
932
+ 0.20994195342063904,
933
+ 0.20882248878479004,
934
+ 0.2071162313222885,
935
+ 0.20842047035694122,
936
+ 0.20825868844985962,
937
+ 0.2067190557718277,
938
+ 0.2005504071712494,
939
+ 0.19398145377635956,
940
+ 0.19522309303283691,
941
+ 0.19257421791553497,
942
+ 0.19569028913974762,
943
+ 0.19560210406780243,
944
+ 0.20005181431770325,
945
+ 0.20596961677074432,
946
+ 0.2090260088443756,
947
+ 0.20348960161209106,
948
+ 0.21076665818691254,
949
+ 0.21390055119991302,
950
+ 0.21353191137313843,
951
+ 0.2112823873758316,
952
+ 0.21168112754821777,
953
+ 0.2081877738237381,
954
+ 0.2079249769449234,
955
+ 0.20868492126464844,
956
+ 0.20853272080421448,
957
+ 0.21011212468147278,
958
+ 0.21035552024841309,
959
+ 0.21197006106376648,
960
+ 0.21510401368141174,
961
+ 0.21735182404518127,
962
+ 0.21705609560012817,
963
+ 0.21730081737041473,
964
+ 0.2183588296175003,
965
+ 0.22225020825862885,
966
+ 0.22318315505981445,
967
+ 0.22398485243320465,
968
+ 0.22349779307842255,
969
+ 0.2245921492576599,
970
+ 0.22917932271957397,
971
+ 0.23140773177146912,
972
+ 0.23064255714416504,
973
+ 0.23163443803787231,
974
+ 0.23274460434913635,
975
+ 0.2331482321023941,
976
+ 0.2337607890367508,
977
+ 0.23383651673793793,
978
+ 0.23467832803726196,
979
+ 0.23464395105838776,
980
+ 0.235525444149971,
981
+ 0.235772043466568,
982
+ 0.23610159754753113,
983
+ 0.2369203120470047,
984
+ 0.23657864332199097,
985
+ 0.23652523756027222,
986
+ 0.23683376610279083,
987
+ 0.23720180988311768,
988
+ 0.23807238042354584,
989
+ 0.23984350264072418,
990
+ 0.24047333002090454,
991
+ 0.24087047576904297,
992
+ 0.24072767794132233,
993
+ 0.24080558121204376,
994
+ 0.24085009098052979,
995
+ 0.2407883256673813,
996
+ 0.2406560778617859
997
+ ],
998
+ "lr": [
999
+ 1.6752136752136756e-05,
1000
+ 3.384615384615385e-05,
1001
+ 5.094017094017095e-05,
1002
+ 6.803418803418804e-05,
1003
+ 8e-05,
1004
+ 8e-05,
1005
+ 8e-05,
1006
+ 8e-05,
1007
+ 8e-05,
1008
+ 8e-05,
1009
+ 8e-05,
1010
+ 8e-05,
1011
+ 8e-05,
1012
+ 8e-05,
1013
+ 8e-05,
1014
+ 8e-05,
1015
+ 8e-05,
1016
+ 8e-05,
1017
+ 8e-05,
1018
+ 8e-05,
1019
+ 8e-05,
1020
+ 8e-05,
1021
+ 8e-05,
1022
+ 8e-05,
1023
+ 8e-05,
1024
+ 8e-05,
1025
+ 8e-05,
1026
+ 8e-05,
1027
+ 8e-05,
1028
+ 8e-05,
1029
+ 8e-05,
1030
+ 8e-05,
1031
+ 8e-05,
1032
+ 8e-05,
1033
+ 8e-05,
1034
+ 8e-05,
1035
+ 8e-05,
1036
+ 8e-05,
1037
+ 8e-05,
1038
+ 8e-05,
1039
+ 8e-05,
1040
+ 8e-05,
1041
+ 8e-05,
1042
+ 8e-05,
1043
+ 8e-05,
1044
+ 8e-05,
1045
+ 8e-05,
1046
+ 8e-05,
1047
+ 8e-05,
1048
+ 8e-05,
1049
+ 8e-05,
1050
+ 8e-05,
1051
+ 8e-05,
1052
+ 8e-05,
1053
+ 8e-05,
1054
+ 8e-05,
1055
+ 8e-05,
1056
+ 8e-05,
1057
+ 8e-05,
1058
+ 8e-05,
1059
+ 8e-05,
1060
+ 8e-05,
1061
+ 8e-05,
1062
+ 8e-05,
1063
+ 8e-05,
1064
+ 8e-05,
1065
+ 8e-05,
1066
+ 8e-05,
1067
+ 8e-05,
1068
+ 8e-05,
1069
+ 8e-05,
1070
+ 8e-05,
1071
+ 8e-05,
1072
+ 8e-05,
1073
+ 8e-05,
1074
+ 8e-05,
1075
+ 8e-05,
1076
+ 8e-05,
1077
+ 8e-05,
1078
+ 8e-05,
1079
+ 8e-05,
1080
+ 8e-05,
1081
+ 8e-05,
1082
+ 8e-05,
1083
+ 8e-05,
1084
+ 7.987186923279727e-05,
1085
+ 7.868547324017929e-05,
1086
+ 7.74990772475613e-05,
1087
+ 7.631268125494332e-05,
1088
+ 7.512628526232533e-05,
1089
+ 7.393988926970736e-05,
1090
+ 7.178064856314262e-05,
1091
+ 7.059425257052466e-05,
1092
+ 6.940785657790668e-05,
1093
+ 6.822146058528868e-05,
1094
+ 6.703506459267071e-05,
1095
+ 6.584866860005273e-05,
1096
+ 6.466227260743475e-05,
1097
+ 6.250303190087003e-05,
1098
+ 6.131663590825203e-05,
1099
+ 6.0130239915634074e-05,
1100
+ 5.894384392301608e-05,
1101
+ 5.77574479303981e-05,
1102
+ 5.657105193778013e-05,
1103
+ 5.538465594516214e-05,
1104
+ 5.3225415238597426e-05,
1105
+ 5.203901924597944e-05,
1106
+ 5.0852623253361464e-05,
1107
+ 4.966622726074349e-05,
1108
+ 4.8479831268125495e-05,
1109
+ 4.729343527550752e-05,
1110
+ 4.610703928288953e-05,
1111
+ 4.3947798576324816e-05,
1112
+ 4.276140258370682e-05,
1113
+ 4.157500659108885e-05,
1114
+ 4.038861059847088e-05,
1115
+ 3.9202214605852886e-05,
1116
+ 3.801581861323492e-05,
1117
+ 3.6829422620616924e-05,
1118
+ 3.46701819140522e-05,
1119
+ 3.348378592143423e-05,
1120
+ 3.229738992881624e-05,
1121
+ 3.111099393619827e-05,
1122
+ 2.9924597943580276e-05,
1123
+ 2.8738201950962302e-05,
1124
+ 2.7551805958344328e-05,
1125
+ 2.5392565251779594e-05,
1126
+ 2.4206169259161623e-05,
1127
+ 2.3019773266543632e-05,
1128
+ 2.1833377273925658e-05,
1129
+ 2.0646981281307667e-05,
1130
+ 1.9460585288689693e-05,
1131
+ 1.8274189296071722e-05,
1132
+ 1.6114948589506984e-05,
1133
+ 1.4928552596889012e-05,
1134
+ 1.374215660427102e-05,
1135
+ 1.2555760611653047e-05,
1136
+ 1.1369364619035074e-05,
1137
+ 1.0182968626417082e-05,
1138
+ 8.99657263379911e-06
1139
+ ],
1140
+ "emb_lr": [],
1141
+ "eval_step": [
1142
+ 350,
1143
+ 741,
1144
+ 1132,
1145
+ 1523,
1146
+ 1914,
1147
+ 2305,
1148
+ 2696,
1149
+ 3087,
1150
+ 3478,
1151
+ 3869,
1152
+ 4260,
1153
+ 4651,
1154
+ 5042,
1155
+ 5433,
1156
+ 5824,
1157
+ 6215,
1158
+ 6606,
1159
+ 6997,
1160
+ 7388,
1161
+ 7779
1162
+ ],
1163
+ "eval_accuracy": [
1164
+ 0.0,
1165
+ 0.0,
1166
+ 0.0,
1167
+ 0.0,
1168
+ 0.0,
1169
+ 0.0,
1170
+ 0.0,
1171
+ 0.0,
1172
+ 0.0,
1173
+ 0.0,
1174
+ 0.0,
1175
+ 0.0,
1176
+ 0.0,
1177
+ 0.0,
1178
+ 0.0,
1179
+ 0.0,
1180
+ 0.0,
1181
+ 0.0,
1182
+ 0.0,
1183
+ 0.0
1184
+ ]
1185
+ },
1186
+ "final_accuracy": 1.0,
1187
+ "sft_eval": {
1188
+ "config": {
1189
+ "ops": "add_sub",
1190
+ "K": null,
1191
+ "mode": "sft",
1192
+ "n_digits": 6,
1193
+ "n_per_split": 100
1194
+ },
1195
+ "splits": {
1196
+ "add_S0": {
1197
+ "full_accuracy": 1.0,
1198
+ "n_examples": 100,
1199
+ "per_subtask": {
1200
+ "SA": {
1201
+ "accuracy": 1.0,
1202
+ "count": 605
1203
+ },
1204
+ "SS": {
1205
+ "accuracy": 1.0,
1206
+ "count": 95
1207
+ }
1208
+ }
1209
+ },
1210
+ "add_S1": {
1211
+ "full_accuracy": 1.0,
1212
+ "n_examples": 100,
1213
+ "per_subtask": {
1214
+ "SA": {
1215
+ "accuracy": 1.0,
1216
+ "count": 204
1217
+ },
1218
+ "SC": {
1219
+ "accuracy": 1.0,
1220
+ "count": 169
1221
+ },
1222
+ "SS": {
1223
+ "accuracy": 1.0,
1224
+ "count": 31
1225
+ },
1226
+ "UC": {
1227
+ "accuracy": 1.0,
1228
+ "count": 296
1229
+ }
1230
+ }
1231
+ },
1232
+ "add_S2": {
1233
+ "full_accuracy": 1.0,
1234
+ "n_examples": 100,
1235
+ "per_subtask": {
1236
+ "SA": {
1237
+ "accuracy": 1.0,
1238
+ "count": 163
1239
+ },
1240
+ "SC": {
1241
+ "accuracy": 1.0,
1242
+ "count": 130
1243
+ },
1244
+ "SS": {
1245
+ "accuracy": 1.0,
1246
+ "count": 87
1247
+ },
1248
+ "UC": {
1249
+ "accuracy": 1.0,
1250
+ "count": 203
1251
+ },
1252
+ "US": {
1253
+ "accuracy": 1.0,
1254
+ "count": 117
1255
+ }
1256
+ }
1257
+ },
1258
+ "add_S3": {
1259
+ "full_accuracy": 1.0,
1260
+ "n_examples": 100,
1261
+ "per_subtask": {
1262
+ "SA": {
1263
+ "accuracy": 1.0,
1264
+ "count": 121
1265
+ },
1266
+ "SC": {
1267
+ "accuracy": 1.0,
1268
+ "count": 121
1269
+ },
1270
+ "SS": {
1271
+ "accuracy": 1.0,
1272
+ "count": 49
1273
+ },
1274
+ "UC": {
1275
+ "accuracy": 1.0,
1276
+ "count": 186
1277
+ },
1278
+ "US": {
1279
+ "accuracy": 1.0,
1280
+ "count": 223
1281
+ }
1282
+ }
1283
+ },
1284
+ "add_S4": {
1285
+ "full_accuracy": 0.93,
1286
+ "n_examples": 100,
1287
+ "per_subtask": {
1288
+ "SA": {
1289
+ "accuracy": 1.0,
1290
+ "count": 104
1291
+ },
1292
+ "SC": {
1293
+ "accuracy": 1.0,
1294
+ "count": 106
1295
+ },
1296
+ "SS": {
1297
+ "accuracy": 1.0,
1298
+ "count": 23
1299
+ },
1300
+ "UC": {
1301
+ "accuracy": 0.95625,
1302
+ "count": 160
1303
+ },
1304
+ "US": {
1305
+ "accuracy": 1.0,
1306
+ "count": 307
1307
+ }
1308
+ }
1309
+ },
1310
+ "add_S5": {
1311
+ "full_accuracy": 1.0,
1312
+ "n_examples": 100,
1313
+ "per_subtask": {
1314
+ "SA": {
1315
+ "accuracy": 1.0,
1316
+ "count": 100
1317
+ },
1318
+ "SC": {
1319
+ "accuracy": 1.0,
1320
+ "count": 100
1321
+ },
1322
+ "UC": {
1323
+ "accuracy": 1.0,
1324
+ "count": 100
1325
+ },
1326
+ "US": {
1327
+ "accuracy": 1.0,
1328
+ "count": 400
1329
+ }
1330
+ }
1331
+ },
1332
+ "add_S6": {
1333
+ "full_accuracy": 1.0,
1334
+ "n_examples": 100,
1335
+ "per_subtask": {
1336
+ "SC": {
1337
+ "accuracy": 1.0,
1338
+ "count": 100
1339
+ },
1340
+ "UC": {
1341
+ "accuracy": 1.0,
1342
+ "count": 100
1343
+ },
1344
+ "US": {
1345
+ "accuracy": 1.0,
1346
+ "count": 500
1347
+ }
1348
+ }
1349
+ },
1350
+ "add_random": {
1351
+ "full_accuracy": 1.0,
1352
+ "n_examples": 200,
1353
+ "per_subtask": {
1354
+ "SA": {
1355
+ "accuracy": 1.0,
1356
+ "count": 447
1357
+ },
1358
+ "SC": {
1359
+ "accuracy": 1.0,
1360
+ "count": 320
1361
+ },
1362
+ "SS": {
1363
+ "accuracy": 1.0,
1364
+ "count": 56
1365
+ },
1366
+ "UC": {
1367
+ "accuracy": 1.0,
1368
+ "count": 529
1369
+ },
1370
+ "US": {
1371
+ "accuracy": 1.0,
1372
+ "count": 48
1373
+ }
1374
+ }
1375
+ },
1376
+ "add_C3": {
1377
+ "full_accuracy": 1.0,
1378
+ "n_examples": 100,
1379
+ "per_subtask": {
1380
+ "SA": {
1381
+ "accuracy": 1.0,
1382
+ "count": 300
1383
+ },
1384
+ "SC": {
1385
+ "accuracy": 1.0,
1386
+ "count": 100
1387
+ },
1388
+ "UC": {
1389
+ "accuracy": 1.0,
1390
+ "count": 193
1391
+ },
1392
+ "US": {
1393
+ "accuracy": 1.0,
1394
+ "count": 107
1395
+ }
1396
+ }
1397
+ },
1398
+ "add_C4": {
1399
+ "full_accuracy": 0.97,
1400
+ "n_examples": 100,
1401
+ "per_subtask": {
1402
+ "SA": {
1403
+ "accuracy": 1.0,
1404
+ "count": 200
1405
+ },
1406
+ "SC": {
1407
+ "accuracy": 1.0,
1408
+ "count": 100
1409
+ },
1410
+ "UC": {
1411
+ "accuracy": 0.98828125,
1412
+ "count": 256
1413
+ },
1414
+ "US": {
1415
+ "accuracy": 1.0,
1416
+ "count": 144
1417
+ }
1418
+ }
1419
+ },
1420
+ "add_C5": {
1421
+ "full_accuracy": 1.0,
1422
+ "n_examples": 100,
1423
+ "per_subtask": {
1424
+ "SA": {
1425
+ "accuracy": 1.0,
1426
+ "count": 100
1427
+ },
1428
+ "SC": {
1429
+ "accuracy": 1.0,
1430
+ "count": 100
1431
+ },
1432
+ "UC": {
1433
+ "accuracy": 1.0,
1434
+ "count": 306
1435
+ },
1436
+ "US": {
1437
+ "accuracy": 1.0,
1438
+ "count": 194
1439
+ }
1440
+ }
1441
+ },
1442
+ "add_C6": {
1443
+ "full_accuracy": 1.0,
1444
+ "n_examples": 100,
1445
+ "per_subtask": {
1446
+ "SC": {
1447
+ "accuracy": 1.0,
1448
+ "count": 100
1449
+ },
1450
+ "UC": {
1451
+ "accuracy": 1.0,
1452
+ "count": 366
1453
+ },
1454
+ "US": {
1455
+ "accuracy": 1.0,
1456
+ "count": 234
1457
+ }
1458
+ }
1459
+ },
1460
+ "sub_M0": {
1461
+ "full_accuracy": 1.0,
1462
+ "n_examples": 100,
1463
+ "per_subtask": {
1464
+ "MD": {
1465
+ "accuracy": 1.0,
1466
+ "count": 601
1467
+ },
1468
+ "ME": {
1469
+ "accuracy": 1.0,
1470
+ "count": 99
1471
+ }
1472
+ }
1473
+ },
1474
+ "sub_M1": {
1475
+ "full_accuracy": 1.0,
1476
+ "n_examples": 100,
1477
+ "per_subtask": {
1478
+ "MD": {
1479
+ "accuracy": 1.0,
1480
+ "count": 279
1481
+ },
1482
+ "MB": {
1483
+ "accuracy": 1.0,
1484
+ "count": 145
1485
+ },
1486
+ "ME": {
1487
+ "accuracy": 1.0,
1488
+ "count": 24
1489
+ },
1490
+ "UB": {
1491
+ "accuracy": 1.0,
1492
+ "count": 252
1493
+ }
1494
+ }
1495
+ },
1496
+ "sub_M2": {
1497
+ "full_accuracy": 1.0,
1498
+ "n_examples": 100,
1499
+ "per_subtask": {
1500
+ "MD": {
1501
+ "accuracy": 1.0,
1502
+ "count": 213
1503
+ },
1504
+ "MB": {
1505
+ "accuracy": 1.0,
1506
+ "count": 113
1507
+ },
1508
+ "ME": {
1509
+ "accuracy": 1.0,
1510
+ "count": 85
1511
+ },
1512
+ "UB": {
1513
+ "accuracy": 1.0,
1514
+ "count": 181
1515
+ },
1516
+ "UD": {
1517
+ "accuracy": 1.0,
1518
+ "count": 108
1519
+ }
1520
+ }
1521
+ },
1522
+ "sub_M3": {
1523
+ "full_accuracy": 1.0,
1524
+ "n_examples": 100,
1525
+ "per_subtask": {
1526
+ "MD": {
1527
+ "accuracy": 1.0,
1528
+ "count": 179
1529
+ },
1530
+ "MB": {
1531
+ "accuracy": 1.0,
1532
+ "count": 103
1533
+ },
1534
+ "ME": {
1535
+ "accuracy": 1.0,
1536
+ "count": 56
1537
+ },
1538
+ "UB": {
1539
+ "accuracy": 1.0,
1540
+ "count": 149
1541
+ },
1542
+ "UD": {
1543
+ "accuracy": 1.0,
1544
+ "count": 213
1545
+ }
1546
+ }
1547
+ },
1548
+ "sub_M4": {
1549
+ "full_accuracy": 0.85,
1550
+ "n_examples": 100,
1551
+ "per_subtask": {
1552
+ "MD": {
1553
+ "accuracy": 1.0,
1554
+ "count": 200
1555
+ },
1556
+ "MB": {
1557
+ "accuracy": 1.0,
1558
+ "count": 100
1559
+ },
1560
+ "UB": {
1561
+ "accuracy": 0.85,
1562
+ "count": 100
1563
+ },
1564
+ "UD": {
1565
+ "accuracy": 1.0,
1566
+ "count": 300
1567
+ }
1568
+ }
1569
+ },
1570
+ "sub_M5": {
1571
+ "full_accuracy": 0.76,
1572
+ "n_examples": 100,
1573
+ "per_subtask": {
1574
+ "MD": {
1575
+ "accuracy": 1.0,
1576
+ "count": 100
1577
+ },
1578
+ "MB": {
1579
+ "accuracy": 1.0,
1580
+ "count": 100
1581
+ },
1582
+ "UB": {
1583
+ "accuracy": 0.76,
1584
+ "count": 100
1585
+ },
1586
+ "UD": {
1587
+ "accuracy": 1.0,
1588
+ "count": 400
1589
+ }
1590
+ }
1591
+ },
1592
+ "sub_random": {
1593
+ "full_accuracy": 1.0,
1594
+ "n_examples": 200,
1595
+ "per_subtask": {
1596
+ "MD": {
1597
+ "accuracy": 1.0,
1598
+ "count": 600
1599
+ },
1600
+ "MB": {
1601
+ "accuracy": 1.0,
1602
+ "count": 267
1603
+ },
1604
+ "ME": {
1605
+ "accuracy": 1.0,
1606
+ "count": 53
1607
+ },
1608
+ "UB": {
1609
+ "accuracy": 1.0,
1610
+ "count": 439
1611
+ },
1612
+ "UD": {
1613
+ "accuracy": 1.0,
1614
+ "count": 41
1615
+ }
1616
+ }
1617
+ },
1618
+ "sub_B3": {
1619
+ "full_accuracy": 1.0,
1620
+ "n_examples": 100,
1621
+ "per_subtask": {
1622
+ "MD": {
1623
+ "accuracy": 1.0,
1624
+ "count": 300
1625
+ },
1626
+ "MB": {
1627
+ "accuracy": 1.0,
1628
+ "count": 100
1629
+ },
1630
+ "UB": {
1631
+ "accuracy": 1.0,
1632
+ "count": 197
1633
+ },
1634
+ "UD": {
1635
+ "accuracy": 1.0,
1636
+ "count": 103
1637
+ }
1638
+ }
1639
+ },
1640
+ "sub_B4": {
1641
+ "full_accuracy": 0.95,
1642
+ "n_examples": 100,
1643
+ "per_subtask": {
1644
+ "MD": {
1645
+ "accuracy": 1.0,
1646
+ "count": 200
1647
+ },
1648
+ "MB": {
1649
+ "accuracy": 1.0,
1650
+ "count": 100
1651
+ },
1652
+ "UB": {
1653
+ "accuracy": 0.979757085020243,
1654
+ "count": 247
1655
+ },
1656
+ "UD": {
1657
+ "accuracy": 1.0,
1658
+ "count": 153
1659
+ }
1660
+ }
1661
+ },
1662
+ "sub_B5": {
1663
+ "full_accuracy": 0.98,
1664
+ "n_examples": 100,
1665
+ "per_subtask": {
1666
+ "MD": {
1667
+ "accuracy": 1.0,
1668
+ "count": 100
1669
+ },
1670
+ "MB": {
1671
+ "accuracy": 1.0,
1672
+ "count": 100
1673
+ },
1674
+ "UB": {
1675
+ "accuracy": 0.9932885906040269,
1676
+ "count": 298
1677
+ },
1678
+ "UD": {
1679
+ "accuracy": 1.0,
1680
+ "count": 202
1681
+ }
1682
+ }
1683
+ }
1684
+ },
1685
+ "summary": {
1686
+ "overall_accuracy": 0.9766666666666667,
1687
+ "total_examples": 2400,
1688
+ "n_splits": 22
1689
+ }
1690
+ },
1691
+ "sorl_eval": {
1692
+ "config": {
1693
+ "ops": "add_sub",
1694
+ "K": 1,
1695
+ "mode": "sorl",
1696
+ "n_digits": 6,
1697
+ "n_per_split": 100
1698
+ },
1699
+ "splits": {
1700
+ "add_S0": {
1701
+ "full_accuracy": 1.0,
1702
+ "n_examples": 100,
1703
+ "per_subtask": {
1704
+ "SA": {
1705
+ "accuracy": 1.0,
1706
+ "count": 605
1707
+ },
1708
+ "SS": {
1709
+ "accuracy": 1.0,
1710
+ "count": 95
1711
+ }
1712
+ }
1713
+ },
1714
+ "add_S1": {
1715
+ "full_accuracy": 1.0,
1716
+ "n_examples": 100,
1717
+ "per_subtask": {
1718
+ "SA": {
1719
+ "accuracy": 1.0,
1720
+ "count": 204
1721
+ },
1722
+ "SC": {
1723
+ "accuracy": 1.0,
1724
+ "count": 169
1725
+ },
1726
+ "SS": {
1727
+ "accuracy": 1.0,
1728
+ "count": 31
1729
+ },
1730
+ "UC": {
1731
+ "accuracy": 1.0,
1732
+ "count": 296
1733
+ }
1734
+ }
1735
+ },
1736
+ "add_S2": {
1737
+ "full_accuracy": 1.0,
1738
+ "n_examples": 100,
1739
+ "per_subtask": {
1740
+ "SA": {
1741
+ "accuracy": 1.0,
1742
+ "count": 163
1743
+ },
1744
+ "SC": {
1745
+ "accuracy": 1.0,
1746
+ "count": 130
1747
+ },
1748
+ "SS": {
1749
+ "accuracy": 1.0,
1750
+ "count": 87
1751
+ },
1752
+ "UC": {
1753
+ "accuracy": 1.0,
1754
+ "count": 203
1755
+ },
1756
+ "US": {
1757
+ "accuracy": 1.0,
1758
+ "count": 117
1759
+ }
1760
+ }
1761
+ },
1762
+ "add_S3": {
1763
+ "full_accuracy": 1.0,
1764
+ "n_examples": 100,
1765
+ "per_subtask": {
1766
+ "SA": {
1767
+ "accuracy": 1.0,
1768
+ "count": 121
1769
+ },
1770
+ "SC": {
1771
+ "accuracy": 1.0,
1772
+ "count": 121
1773
+ },
1774
+ "SS": {
1775
+ "accuracy": 1.0,
1776
+ "count": 49
1777
+ },
1778
+ "UC": {
1779
+ "accuracy": 1.0,
1780
+ "count": 186
1781
+ },
1782
+ "US": {
1783
+ "accuracy": 1.0,
1784
+ "count": 223
1785
+ }
1786
+ }
1787
+ },
1788
+ "add_S4": {
1789
+ "full_accuracy": 1.0,
1790
+ "n_examples": 100,
1791
+ "per_subtask": {
1792
+ "SA": {
1793
+ "accuracy": 1.0,
1794
+ "count": 104
1795
+ },
1796
+ "SC": {
1797
+ "accuracy": 1.0,
1798
+ "count": 106
1799
+ },
1800
+ "SS": {
1801
+ "accuracy": 1.0,
1802
+ "count": 23
1803
+ },
1804
+ "UC": {
1805
+ "accuracy": 1.0,
1806
+ "count": 160
1807
+ },
1808
+ "US": {
1809
+ "accuracy": 1.0,
1810
+ "count": 307
1811
+ }
1812
+ }
1813
+ },
1814
+ "add_S5": {
1815
+ "full_accuracy": 1.0,
1816
+ "n_examples": 100,
1817
+ "per_subtask": {
1818
+ "SA": {
1819
+ "accuracy": 1.0,
1820
+ "count": 100
1821
+ },
1822
+ "SC": {
1823
+ "accuracy": 1.0,
1824
+ "count": 100
1825
+ },
1826
+ "UC": {
1827
+ "accuracy": 1.0,
1828
+ "count": 100
1829
+ },
1830
+ "US": {
1831
+ "accuracy": 1.0,
1832
+ "count": 400
1833
+ }
1834
+ }
1835
+ },
1836
+ "add_S6": {
1837
+ "full_accuracy": 1.0,
1838
+ "n_examples": 100,
1839
+ "per_subtask": {
1840
+ "SC": {
1841
+ "accuracy": 1.0,
1842
+ "count": 100
1843
+ },
1844
+ "UC": {
1845
+ "accuracy": 1.0,
1846
+ "count": 100
1847
+ },
1848
+ "US": {
1849
+ "accuracy": 1.0,
1850
+ "count": 500
1851
+ }
1852
+ }
1853
+ },
1854
+ "add_random": {
1855
+ "full_accuracy": 1.0,
1856
+ "n_examples": 200,
1857
+ "per_subtask": {
1858
+ "SA": {
1859
+ "accuracy": 1.0,
1860
+ "count": 447
1861
+ },
1862
+ "SC": {
1863
+ "accuracy": 1.0,
1864
+ "count": 320
1865
+ },
1866
+ "SS": {
1867
+ "accuracy": 1.0,
1868
+ "count": 56
1869
+ },
1870
+ "UC": {
1871
+ "accuracy": 1.0,
1872
+ "count": 529
1873
+ },
1874
+ "US": {
1875
+ "accuracy": 1.0,
1876
+ "count": 48
1877
+ }
1878
+ }
1879
+ },
1880
+ "add_C3": {
1881
+ "full_accuracy": 1.0,
1882
+ "n_examples": 100,
1883
+ "per_subtask": {
1884
+ "SA": {
1885
+ "accuracy": 1.0,
1886
+ "count": 300
1887
+ },
1888
+ "SC": {
1889
+ "accuracy": 1.0,
1890
+ "count": 100
1891
+ },
1892
+ "UC": {
1893
+ "accuracy": 1.0,
1894
+ "count": 193
1895
+ },
1896
+ "US": {
1897
+ "accuracy": 1.0,
1898
+ "count": 107
1899
+ }
1900
+ }
1901
+ },
1902
+ "add_C4": {
1903
+ "full_accuracy": 1.0,
1904
+ "n_examples": 100,
1905
+ "per_subtask": {
1906
+ "SA": {
1907
+ "accuracy": 1.0,
1908
+ "count": 200
1909
+ },
1910
+ "SC": {
1911
+ "accuracy": 1.0,
1912
+ "count": 100
1913
+ },
1914
+ "UC": {
1915
+ "accuracy": 1.0,
1916
+ "count": 256
1917
+ },
1918
+ "US": {
1919
+ "accuracy": 1.0,
1920
+ "count": 144
1921
+ }
1922
+ }
1923
+ },
1924
+ "add_C5": {
1925
+ "full_accuracy": 1.0,
1926
+ "n_examples": 100,
1927
+ "per_subtask": {
1928
+ "SA": {
1929
+ "accuracy": 1.0,
1930
+ "count": 100
1931
+ },
1932
+ "SC": {
1933
+ "accuracy": 1.0,
1934
+ "count": 100
1935
+ },
1936
+ "UC": {
1937
+ "accuracy": 1.0,
1938
+ "count": 306
1939
+ },
1940
+ "US": {
1941
+ "accuracy": 1.0,
1942
+ "count": 194
1943
+ }
1944
+ }
1945
+ },
1946
+ "add_C6": {
1947
+ "full_accuracy": 1.0,
1948
+ "n_examples": 100,
1949
+ "per_subtask": {
1950
+ "SC": {
1951
+ "accuracy": 1.0,
1952
+ "count": 100
1953
+ },
1954
+ "UC": {
1955
+ "accuracy": 1.0,
1956
+ "count": 366
1957
+ },
1958
+ "US": {
1959
+ "accuracy": 1.0,
1960
+ "count": 234
1961
+ }
1962
+ }
1963
+ },
1964
+ "sub_M0": {
1965
+ "full_accuracy": 1.0,
1966
+ "n_examples": 100,
1967
+ "per_subtask": {
1968
+ "MD": {
1969
+ "accuracy": 1.0,
1970
+ "count": 601
1971
+ },
1972
+ "ME": {
1973
+ "accuracy": 1.0,
1974
+ "count": 99
1975
+ }
1976
+ }
1977
+ },
1978
+ "sub_M1": {
1979
+ "full_accuracy": 1.0,
1980
+ "n_examples": 100,
1981
+ "per_subtask": {
1982
+ "MD": {
1983
+ "accuracy": 1.0,
1984
+ "count": 279
1985
+ },
1986
+ "MB": {
1987
+ "accuracy": 1.0,
1988
+ "count": 145
1989
+ },
1990
+ "ME": {
1991
+ "accuracy": 1.0,
1992
+ "count": 24
1993
+ },
1994
+ "UB": {
1995
+ "accuracy": 1.0,
1996
+ "count": 252
1997
+ }
1998
+ }
1999
+ },
2000
+ "sub_M2": {
2001
+ "full_accuracy": 1.0,
2002
+ "n_examples": 100,
2003
+ "per_subtask": {
2004
+ "MD": {
2005
+ "accuracy": 1.0,
2006
+ "count": 213
2007
+ },
2008
+ "MB": {
2009
+ "accuracy": 1.0,
2010
+ "count": 113
2011
+ },
2012
+ "ME": {
2013
+ "accuracy": 1.0,
2014
+ "count": 85
2015
+ },
2016
+ "UB": {
2017
+ "accuracy": 1.0,
2018
+ "count": 181
2019
+ },
2020
+ "UD": {
2021
+ "accuracy": 1.0,
2022
+ "count": 108
2023
+ }
2024
+ }
2025
+ },
2026
+ "sub_M3": {
2027
+ "full_accuracy": 1.0,
2028
+ "n_examples": 100,
2029
+ "per_subtask": {
2030
+ "MD": {
2031
+ "accuracy": 1.0,
2032
+ "count": 179
2033
+ },
2034
+ "MB": {
2035
+ "accuracy": 1.0,
2036
+ "count": 103
2037
+ },
2038
+ "ME": {
2039
+ "accuracy": 1.0,
2040
+ "count": 56
2041
+ },
2042
+ "UB": {
2043
+ "accuracy": 1.0,
2044
+ "count": 149
2045
+ },
2046
+ "UD": {
2047
+ "accuracy": 1.0,
2048
+ "count": 213
2049
+ }
2050
+ }
2051
+ },
2052
+ "sub_M4": {
2053
+ "full_accuracy": 1.0,
2054
+ "n_examples": 100,
2055
+ "per_subtask": {
2056
+ "MD": {
2057
+ "accuracy": 1.0,
2058
+ "count": 200
2059
+ },
2060
+ "MB": {
2061
+ "accuracy": 1.0,
2062
+ "count": 100
2063
+ },
2064
+ "UB": {
2065
+ "accuracy": 1.0,
2066
+ "count": 100
2067
+ },
2068
+ "UD": {
2069
+ "accuracy": 1.0,
2070
+ "count": 300
2071
+ }
2072
+ }
2073
+ },
2074
+ "sub_M5": {
2075
+ "full_accuracy": 1.0,
2076
+ "n_examples": 100,
2077
+ "per_subtask": {
2078
+ "MD": {
2079
+ "accuracy": 1.0,
2080
+ "count": 100
2081
+ },
2082
+ "MB": {
2083
+ "accuracy": 1.0,
2084
+ "count": 100
2085
+ },
2086
+ "UB": {
2087
+ "accuracy": 1.0,
2088
+ "count": 100
2089
+ },
2090
+ "UD": {
2091
+ "accuracy": 1.0,
2092
+ "count": 400
2093
+ }
2094
+ }
2095
+ },
2096
+ "sub_random": {
2097
+ "full_accuracy": 1.0,
2098
+ "n_examples": 200,
2099
+ "per_subtask": {
2100
+ "MD": {
2101
+ "accuracy": 1.0,
2102
+ "count": 600
2103
+ },
2104
+ "MB": {
2105
+ "accuracy": 1.0,
2106
+ "count": 267
2107
+ },
2108
+ "ME": {
2109
+ "accuracy": 1.0,
2110
+ "count": 53
2111
+ },
2112
+ "UB": {
2113
+ "accuracy": 1.0,
2114
+ "count": 439
2115
+ },
2116
+ "UD": {
2117
+ "accuracy": 1.0,
2118
+ "count": 41
2119
+ }
2120
+ }
2121
+ },
2122
+ "sub_B3": {
2123
+ "full_accuracy": 1.0,
2124
+ "n_examples": 100,
2125
+ "per_subtask": {
2126
+ "MD": {
2127
+ "accuracy": 1.0,
2128
+ "count": 300
2129
+ },
2130
+ "MB": {
2131
+ "accuracy": 1.0,
2132
+ "count": 100
2133
+ },
2134
+ "UB": {
2135
+ "accuracy": 1.0,
2136
+ "count": 197
2137
+ },
2138
+ "UD": {
2139
+ "accuracy": 1.0,
2140
+ "count": 103
2141
+ }
2142
+ }
2143
+ },
2144
+ "sub_B4": {
2145
+ "full_accuracy": 1.0,
2146
+ "n_examples": 100,
2147
+ "per_subtask": {
2148
+ "MD": {
2149
+ "accuracy": 1.0,
2150
+ "count": 200
2151
+ },
2152
+ "MB": {
2153
+ "accuracy": 1.0,
2154
+ "count": 100
2155
+ },
2156
+ "UB": {
2157
+ "accuracy": 1.0,
2158
+ "count": 247
2159
+ },
2160
+ "UD": {
2161
+ "accuracy": 1.0,
2162
+ "count": 153
2163
+ }
2164
+ }
2165
+ },
2166
+ "sub_B5": {
2167
+ "full_accuracy": 1.0,
2168
+ "n_examples": 100,
2169
+ "per_subtask": {
2170
+ "MD": {
2171
+ "accuracy": 1.0,
2172
+ "count": 100
2173
+ },
2174
+ "MB": {
2175
+ "accuracy": 1.0,
2176
+ "count": 100
2177
+ },
2178
+ "UB": {
2179
+ "accuracy": 1.0,
2180
+ "count": 298
2181
+ },
2182
+ "UD": {
2183
+ "accuracy": 1.0,
2184
+ "count": 202
2185
+ }
2186
+ }
2187
+ }
2188
+ },
2189
+ "summary": {
2190
+ "overall_accuracy": 1.0,
2191
+ "total_examples": 2400,
2192
+ "n_splits": 22
2193
+ }
2194
+ },
2195
+ "sorl_overall_accuracy": 1.0,
2196
+ "sft_overall_accuracy": 0.9766666666666667
2197
+ }
add_sub_sorl_v1_abs30_K1_25K/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10a719b33a7daab70d7e4cf7bc8d44d7cef4ff6fbfd976546301e57fbbf8644e
3
+ size 650385300
add_sub_sorl_v1_abs30_K1_25K/train_config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_rollouts": 4,
3
+ "K": 1,
4
+ "max_iterations": 2,
5
+ "memory_span_abs": 1792,
6
+ "memory_span_traj": 1792,
7
+ "temperature": 1.0,
8
+ "ar_search": false,
9
+ "response_only_abs": false,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "alpha_ortho": 0.0,
14
+ "alpha_anchor": 0.0,
15
+ "alpha_jacobi": 0.0,
16
+ "decay": 0.8,
17
+ "target_vocab_util": 0.8,
18
+ "min_abs_ppl": 0.0,
19
+ "zipf_alpha": 1.0,
20
+ "lr": 8e-05,
21
+ "emb_lr_mult": 1.0,
22
+ "weight_decay": 0.01,
23
+ "warmup_steps": 234,
24
+ "cooldown_frac": 0.4,
25
+ "max_grad_norm": 1.0,
26
+ "vq_abs_pretrain_steps": 0,
27
+ "vq_abs_pretrain_lr": 0.001,
28
+ "vq_abs_pretrain_layer": -1,
29
+ "vq_abs_pretrain_batch_size": 256,
30
+ "vq_abs_pretrain_target_vectors": 20000,
31
+ "batch_size": 64,
32
+ "gradient_accumulation_steps": 1,
33
+ "num_epochs": 20,
34
+ "emb_warmup_steps": 0,
35
+ "log_every": 50,
36
+ "eval_every": 390,
37
+ "save_every": 999999,
38
+ "eval_samples": 100,
39
+ "output_dir": "ckpt/sweep/as_sorl_abs30_K1_25K",
40
+ "eval_K": 4,
41
+ "alpha_traj": 0.0,
42
+ "corrupt_method": "shuffle",
43
+ "corrupt_ratio": 0.3,
44
+ "alpha_contrastive": 1.0,
45
+ "gamma_contrastive": 0.5,
46
+ "alpha_masked_traj": 0.0,
47
+ "mask_nl_ratio": 0.3,
48
+ "mask_nl_mode": "fixed",
49
+ "mask_nl_fixed_id": 0,
50
+ "use_ste": true,
51
+ "n_inner": 1,
52
+ "random_K": null,
53
+ "strip_suffix": null,
54
+ "compress_prefix": null,
55
+ "random_mem_span": null,
56
+ "warmup_ratio": 0.03,
57
+ "beta2": 0.999,
58
+ "seed": 42,
59
+ "n_digits": 6,
60
+ "n_layer": 2,
61
+ "n_head": 3,
62
+ "n_embd": 510,
63
+ "ops": "add_sub",
64
+ "abs_vocab": 30,
65
+ "dataset_size": 25000,
66
+ "mode": "sorl",
67
+ "device": "cuda",
68
+ "push_to_hub": true,
69
+ "no_wandb": false,
70
+ "n_params": 162519662,
71
+ "run_name": "add_sub_sorl_v1_abs30_K1_25K",
72
+ "git_commit": "57deaa28d9c21e39ddac5ef448d6e1be992fba91",
73
+ "timestamp": "2026-04-13T09:57:41.955984+00:00",
74
+ "tokenizer": "Qwen/Qwen3-0.6B",
75
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
+ "dataset_config": "add_sub_6digit",
77
+ "model_repo": "thoughtworks/arithmetic-sorl",
78
+ "trainer_version": "v1",
79
+ "wandb_run_id": "cf3zi3ax",
80
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/cf3zi3ax",
81
+ "final_accuracy": 1.0,
82
+ "sft_accuracy": 0.9766666666666667,
83
+ "eval_method": "ArithmeticEvaluator"
84
+ }