Shree10 commited on
Commit
22a0251
·
verified ·
1 Parent(s): 49a5b3c

Upload 12 files

Browse files
.gitattributes CHANGED
@@ -60,3 +60,8 @@ vizualisations/expert_usage_over_epochs_token_choice.png filter=lfs diff=lfs mer
60
  vizualisations/routing_comparison.png filter=lfs diff=lfs merge=lfs -text
61
  vizualisations/test_expert_usage_hash.png filter=lfs diff=lfs merge=lfs -text
62
  vizualisations/test_expert_usage_token_choice.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
60
  vizualisations/routing_comparison.png filter=lfs diff=lfs merge=lfs -text
61
  vizualisations/test_expert_usage_hash.png filter=lfs diff=lfs merge=lfs -text
62
  vizualisations/test_expert_usage_token_choice.png filter=lfs diff=lfs merge=lfs -text
63
+ results/gqa_comparison/gqa_vs_mha_comparison.png filter=lfs diff=lfs merge=lfs -text
64
+ results/lb_comparison/expert_balance_comparison.png filter=lfs diff=lfs merge=lfs -text
65
+ results/lb_comparison/loss_comparison.png filter=lfs diff=lfs merge=lfs -text
66
+ results/moe_hash/moe_hash_test_predictions.json filter=lfs diff=lfs merge=lfs -text
67
+ results/moe_hash/training_curves.png filter=lfs diff=lfs merge=lfs -text
results/gqa_comparison/gqa_vs_mha_comparison.png ADDED

Git LFS Details

  • SHA256: 2433071cd08ae1082d12255a6c2d15a58460fcafabf4fc04dd3675a76ae8a916
  • Pointer size: 131 Bytes
  • Size of remote file: 318 kB
results/gqa_moe/best_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94f48872136d77019b1955056e8dcba3500bf1def0e6baaabb33398673f5a17f
3
+ size 701671341
results/lb_comparison/expert_balance_comparison.png ADDED

Git LFS Details

  • SHA256: da69425ce04feec140a2e6a2b618be23f1b40ec9a620d4849a02b0aa8daecaf8
  • Pointer size: 131 Bytes
  • Size of remote file: 960 kB
results/lb_comparison/loss_comparison.png ADDED

Git LFS Details

  • SHA256: 06c72dad075e22c1bb4785570ae9eac545f6621fd17015556972539c3553c81a
  • Pointer size: 131 Bytes
  • Size of remote file: 457 kB
results/lora_comparison/comparison_metrics.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "standard_moe": {
3
+ "total_params": 179944868,
4
+ "trainable_params": 179944868,
5
+ "train_losses": [
6
+ 6.993803797149658,
7
+ 5.954492663574219,
8
+ 5.53921777420044,
9
+ 5.24759983291626,
10
+ 5.0148901168823246
11
+ ],
12
+ "val_losses": [
13
+ 6.212501205444336,
14
+ 5.760108711242676,
15
+ 5.491593559265136,
16
+ 5.284299610137939,
17
+ 5.145402297973633
18
+ ],
19
+ "best_val_loss": 5.145402297973633,
20
+ "training_time": 1290.9204235076904
21
+ },
22
+ "lora_moe": {
23
+ "total_params": 182828452,
24
+ "trainable_params": 2621440,
25
+ "lora_percent": 1.43382497161875,
26
+ "train_losses": [
27
+ 7.029633666229248,
28
+ 5.872530696105957,
29
+ 5.428599870300293,
30
+ 5.109180227661133,
31
+ 4.84611487121582
32
+ ],
33
+ "val_losses": [
34
+ 6.160612037658692,
35
+ 5.704808525085449,
36
+ 5.452941730499267,
37
+ 5.289849403381347,
38
+ 5.177924713134765
39
+ ],
40
+ "best_val_loss": 5.177924713134765,
41
+ "training_time": 18967.75845336914
42
+ }
43
+ }
results/lora_moe/best_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe830c30e4365e4cc873e86af27dc33b40e32e7967070df6276f1184957d27c7
3
+ size 731567101
results/moe_hash/best_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:611bd48bcdf6619e04c9b8aa64375ba6d7e45b4f45855c82419c743de6c78213
3
+ size 2159269635
results/moe_hash/expert_usage_history.json ADDED
@@ -0,0 +1,427 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 1,
4
+ "usage": {
5
+ "encoder_layer_0": {
6
+ "0": 261455302,
7
+ "1": 262893842,
8
+ "2": 3077719,
9
+ "3": 3319592,
10
+ "4": 3253708,
11
+ "5": 3070038,
12
+ "6": 283558135,
13
+ "7": 282061392
14
+ },
15
+ "encoder_layer_1": {
16
+ "0": 240464181,
17
+ "1": 249133057,
18
+ "2": 19122883,
19
+ "3": 22247029,
20
+ "4": 24482469,
21
+ "5": 25912739,
22
+ "6": 267275331,
23
+ "7": 254052039
24
+ },
25
+ "encoder_layer_2": {
26
+ "0": 207284432,
27
+ "1": 235091343,
28
+ "2": 64242756,
29
+ "3": 74434438,
30
+ "4": 72444627,
31
+ "5": 62671279,
32
+ "6": 207373049,
33
+ "7": 179147804
34
+ },
35
+ "encoder_layer_3": {
36
+ "0": 135826351,
37
+ "1": 181241685,
38
+ "2": 103904554,
39
+ "3": 118421274,
40
+ "4": 122321377,
41
+ "5": 112346323,
42
+ "6": 189292582,
43
+ "7": 139335582
44
+ },
45
+ "decoder_layer_0": {
46
+ "0": 9569968,
47
+ "1": 15139042,
48
+ "2": 10527019,
49
+ "3": 11571099,
50
+ "4": 14076991,
51
+ "5": 12499317,
52
+ "6": 15760636,
53
+ "7": 10725156
54
+ },
55
+ "decoder_layer_1": {
56
+ "0": 12376231,
57
+ "1": 15921848,
58
+ "2": 9631421,
59
+ "3": 14336388,
60
+ "4": 14508117,
61
+ "5": 9245838,
62
+ "6": 13418845,
63
+ "7": 10430540
64
+ },
65
+ "decoder_layer_2": {
66
+ "0": 24368930,
67
+ "1": 24373976,
68
+ "2": 9665,
69
+ "3": 8794,
70
+ "4": 7449,
71
+ "5": 5545,
72
+ "6": 25548570,
73
+ "7": 25546299
74
+ },
75
+ "decoder_layer_3": {
76
+ "0": 13153999,
77
+ "1": 17946588,
78
+ "2": 10940872,
79
+ "3": 11284613,
80
+ "4": 12384130,
81
+ "5": 10910761,
82
+ "6": 13455613,
83
+ "7": 9792652
84
+ }
85
+ }
86
+ },
87
+ {
88
+ "epoch": 2,
89
+ "usage": {
90
+ "encoder_layer_0": {
91
+ "0": 261455302,
92
+ "1": 262893842,
93
+ "2": 3077719,
94
+ "3": 3319592,
95
+ "4": 3253708,
96
+ "5": 3070038,
97
+ "6": 283558135,
98
+ "7": 282061392
99
+ },
100
+ "encoder_layer_1": {
101
+ "0": 240464181,
102
+ "1": 249133057,
103
+ "2": 19122883,
104
+ "3": 22247029,
105
+ "4": 24482469,
106
+ "5": 25912739,
107
+ "6": 267275331,
108
+ "7": 254052039
109
+ },
110
+ "encoder_layer_2": {
111
+ "0": 207284432,
112
+ "1": 235091343,
113
+ "2": 64242756,
114
+ "3": 74434438,
115
+ "4": 72444627,
116
+ "5": 62671279,
117
+ "6": 207373049,
118
+ "7": 179147804
119
+ },
120
+ "encoder_layer_3": {
121
+ "0": 135826351,
122
+ "1": 181241685,
123
+ "2": 103904554,
124
+ "3": 118421274,
125
+ "4": 122321377,
126
+ "5": 112346323,
127
+ "6": 189292582,
128
+ "7": 139335582
129
+ },
130
+ "decoder_layer_0": {
131
+ "0": 9569968,
132
+ "1": 15139042,
133
+ "2": 10527019,
134
+ "3": 11571099,
135
+ "4": 14076991,
136
+ "5": 12499317,
137
+ "6": 15760636,
138
+ "7": 10725156
139
+ },
140
+ "decoder_layer_1": {
141
+ "0": 12376231,
142
+ "1": 15921848,
143
+ "2": 9631421,
144
+ "3": 14336388,
145
+ "4": 14508117,
146
+ "5": 9245838,
147
+ "6": 13418845,
148
+ "7": 10430540
149
+ },
150
+ "decoder_layer_2": {
151
+ "0": 24368930,
152
+ "1": 24373976,
153
+ "2": 9665,
154
+ "3": 8794,
155
+ "4": 7449,
156
+ "5": 5545,
157
+ "6": 25548570,
158
+ "7": 25546299
159
+ },
160
+ "decoder_layer_3": {
161
+ "0": 13153999,
162
+ "1": 17946588,
163
+ "2": 10940872,
164
+ "3": 11284613,
165
+ "4": 12384130,
166
+ "5": 10910761,
167
+ "6": 13455613,
168
+ "7": 9792652
169
+ }
170
+ }
171
+ },
172
+ {
173
+ "epoch": 3,
174
+ "usage": {
175
+ "encoder_layer_0": {
176
+ "0": 261455302,
177
+ "1": 262893842,
178
+ "2": 3077719,
179
+ "3": 3319592,
180
+ "4": 3253708,
181
+ "5": 3070038,
182
+ "6": 283558135,
183
+ "7": 282061392
184
+ },
185
+ "encoder_layer_1": {
186
+ "0": 240464181,
187
+ "1": 249133057,
188
+ "2": 19122883,
189
+ "3": 22247029,
190
+ "4": 24482469,
191
+ "5": 25912739,
192
+ "6": 267275331,
193
+ "7": 254052039
194
+ },
195
+ "encoder_layer_2": {
196
+ "0": 207284432,
197
+ "1": 235091343,
198
+ "2": 64242756,
199
+ "3": 74434438,
200
+ "4": 72444627,
201
+ "5": 62671279,
202
+ "6": 207373049,
203
+ "7": 179147804
204
+ },
205
+ "encoder_layer_3": {
206
+ "0": 135826351,
207
+ "1": 181241685,
208
+ "2": 103904554,
209
+ "3": 118421274,
210
+ "4": 122321377,
211
+ "5": 112346323,
212
+ "6": 189292582,
213
+ "7": 139335582
214
+ },
215
+ "decoder_layer_0": {
216
+ "0": 9569968,
217
+ "1": 15139042,
218
+ "2": 10527019,
219
+ "3": 11571099,
220
+ "4": 14076991,
221
+ "5": 12499317,
222
+ "6": 15760636,
223
+ "7": 10725156
224
+ },
225
+ "decoder_layer_1": {
226
+ "0": 12376231,
227
+ "1": 15921848,
228
+ "2": 9631421,
229
+ "3": 14336388,
230
+ "4": 14508117,
231
+ "5": 9245838,
232
+ "6": 13418845,
233
+ "7": 10430540
234
+ },
235
+ "decoder_layer_2": {
236
+ "0": 24368930,
237
+ "1": 24373976,
238
+ "2": 9665,
239
+ "3": 8794,
240
+ "4": 7449,
241
+ "5": 5545,
242
+ "6": 25548570,
243
+ "7": 25546299
244
+ },
245
+ "decoder_layer_3": {
246
+ "0": 13153999,
247
+ "1": 17946588,
248
+ "2": 10940872,
249
+ "3": 11284613,
250
+ "4": 12384130,
251
+ "5": 10910761,
252
+ "6": 13455613,
253
+ "7": 9792652
254
+ }
255
+ }
256
+ },
257
+ {
258
+ "epoch": 4,
259
+ "usage": {
260
+ "encoder_layer_0": {
261
+ "0": 261455302,
262
+ "1": 262893842,
263
+ "2": 3077719,
264
+ "3": 3319592,
265
+ "4": 3253708,
266
+ "5": 3070038,
267
+ "6": 283558135,
268
+ "7": 282061392
269
+ },
270
+ "encoder_layer_1": {
271
+ "0": 240464181,
272
+ "1": 249133057,
273
+ "2": 19122883,
274
+ "3": 22247029,
275
+ "4": 24482469,
276
+ "5": 25912739,
277
+ "6": 267275331,
278
+ "7": 254052039
279
+ },
280
+ "encoder_layer_2": {
281
+ "0": 207284432,
282
+ "1": 235091343,
283
+ "2": 64242756,
284
+ "3": 74434438,
285
+ "4": 72444627,
286
+ "5": 62671279,
287
+ "6": 207373049,
288
+ "7": 179147804
289
+ },
290
+ "encoder_layer_3": {
291
+ "0": 135826351,
292
+ "1": 181241685,
293
+ "2": 103904554,
294
+ "3": 118421274,
295
+ "4": 122321377,
296
+ "5": 112346323,
297
+ "6": 189292582,
298
+ "7": 139335582
299
+ },
300
+ "decoder_layer_0": {
301
+ "0": 9569968,
302
+ "1": 15139042,
303
+ "2": 10527019,
304
+ "3": 11571099,
305
+ "4": 14076991,
306
+ "5": 12499317,
307
+ "6": 15760636,
308
+ "7": 10725156
309
+ },
310
+ "decoder_layer_1": {
311
+ "0": 12376231,
312
+ "1": 15921848,
313
+ "2": 9631421,
314
+ "3": 14336388,
315
+ "4": 14508117,
316
+ "5": 9245838,
317
+ "6": 13418845,
318
+ "7": 10430540
319
+ },
320
+ "decoder_layer_2": {
321
+ "0": 24368930,
322
+ "1": 24373976,
323
+ "2": 9665,
324
+ "3": 8794,
325
+ "4": 7449,
326
+ "5": 5545,
327
+ "6": 25548570,
328
+ "7": 25546299
329
+ },
330
+ "decoder_layer_3": {
331
+ "0": 13153999,
332
+ "1": 17946588,
333
+ "2": 10940872,
334
+ "3": 11284613,
335
+ "4": 12384130,
336
+ "5": 10910761,
337
+ "6": 13455613,
338
+ "7": 9792652
339
+ }
340
+ }
341
+ },
342
+ {
343
+ "epoch": 5,
344
+ "usage": {
345
+ "encoder_layer_0": {
346
+ "0": 261455302,
347
+ "1": 262893842,
348
+ "2": 3077719,
349
+ "3": 3319592,
350
+ "4": 3253708,
351
+ "5": 3070038,
352
+ "6": 283558135,
353
+ "7": 282061392
354
+ },
355
+ "encoder_layer_1": {
356
+ "0": 240464181,
357
+ "1": 249133057,
358
+ "2": 19122883,
359
+ "3": 22247029,
360
+ "4": 24482469,
361
+ "5": 25912739,
362
+ "6": 267275331,
363
+ "7": 254052039
364
+ },
365
+ "encoder_layer_2": {
366
+ "0": 207284432,
367
+ "1": 235091343,
368
+ "2": 64242756,
369
+ "3": 74434438,
370
+ "4": 72444627,
371
+ "5": 62671279,
372
+ "6": 207373049,
373
+ "7": 179147804
374
+ },
375
+ "encoder_layer_3": {
376
+ "0": 135826351,
377
+ "1": 181241685,
378
+ "2": 103904554,
379
+ "3": 118421274,
380
+ "4": 122321377,
381
+ "5": 112346323,
382
+ "6": 189292582,
383
+ "7": 139335582
384
+ },
385
+ "decoder_layer_0": {
386
+ "0": 9569968,
387
+ "1": 15139042,
388
+ "2": 10527019,
389
+ "3": 11571099,
390
+ "4": 14076991,
391
+ "5": 12499317,
392
+ "6": 15760636,
393
+ "7": 10725156
394
+ },
395
+ "decoder_layer_1": {
396
+ "0": 12376231,
397
+ "1": 15921848,
398
+ "2": 9631421,
399
+ "3": 14336388,
400
+ "4": 14508117,
401
+ "5": 9245838,
402
+ "6": 13418845,
403
+ "7": 10430540
404
+ },
405
+ "decoder_layer_2": {
406
+ "0": 24368930,
407
+ "1": 24373976,
408
+ "2": 9665,
409
+ "3": 8794,
410
+ "4": 7449,
411
+ "5": 5545,
412
+ "6": 25548570,
413
+ "7": 25546299
414
+ },
415
+ "decoder_layer_3": {
416
+ "0": 13153999,
417
+ "1": 17946588,
418
+ "2": 10940872,
419
+ "3": 11284613,
420
+ "4": 12384130,
421
+ "5": 10910761,
422
+ "6": 13455613,
423
+ "7": 9792652
424
+ }
425
+ }
426
+ }
427
+ ]
results/moe_hash/moe_hash_test_predictions.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aebe85737a13dc11530e579868d68fa7fd58f6f8d6cebea9fde02914242db6f4
3
+ size 27675266
results/moe_hash/training_curves.png ADDED

Git LFS Details

  • SHA256: fdea766f69fb554f0a1440bcc884800996bc4bc5164028d41f02e2f9f0dcdae7
  • Pointer size: 131 Bytes
  • Size of remote file: 143 kB
results/moe_hash_without_lb/best_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0bb4d7bf522808334d4eb22cc1ef24c434056ebc54f4c4e8f64cf5f16a2ceec
3
+ size 2159271619
results/moe_hash_without_lb/training_metrics.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_losses": [
3
+ 7.003178263092041,
4
+ 5.8915835350036625,
5
+ 5.497518934631348,
6
+ 5.223627810668945,
7
+ 4.998379559326172
8
+ ],
9
+ "train_nll_losses": [
10
+ 7.003178263092041,
11
+ 5.8915835350036625,
12
+ 5.497518934631348,
13
+ 5.223627810668945,
14
+ 4.998379559326172
15
+ ],
16
+ "train_lb_losses": [
17
+ 0.0,
18
+ 0.0,
19
+ 0.0,
20
+ 0.0,
21
+ 0.0
22
+ ],
23
+ "val_losses": [
24
+ 6.157535878438798,
25
+ 5.826862653096517,
26
+ 5.508088460044255,
27
+ 5.584775901976085,
28
+ 5.546320551917667
29
+ ],
30
+ "use_load_balancer": false,
31
+ "expert_balance_history": [
32
+ {
33
+ "epoch": 1,
34
+ "cv_scores": {
35
+ "encoder_layer_0": 0.9752403928100551,
36
+ "encoder_layer_1": 1.1604373739140559,
37
+ "encoder_layer_2": 1.1906502000710109,
38
+ "encoder_layer_3": 1.5189951633226693,
39
+ "decoder_layer_0": 1.6397420562599254,
40
+ "decoder_layer_1": 1.6088734749302642,
41
+ "decoder_layer_2": 1.1485815213057098,
42
+ "decoder_layer_3": 1.031611726454343
43
+ }
44
+ },
45
+ {
46
+ "epoch": 2,
47
+ "cv_scores": {
48
+ "encoder_layer_0": 1.2149520736371027,
49
+ "encoder_layer_1": 0.9475947854990925,
50
+ "encoder_layer_2": 0.973835742113084,
51
+ "encoder_layer_3": 1.5428490392307177,
52
+ "decoder_layer_0": 1.6692692285462098,
53
+ "decoder_layer_1": 1.6462829796890173,
54
+ "decoder_layer_2": 1.26960726561445,
55
+ "decoder_layer_3": 1.2212484174287248
56
+ }
57
+ },
58
+ {
59
+ "epoch": 3,
60
+ "cv_scores": {
61
+ "encoder_layer_0": 1.3859582784712385,
62
+ "encoder_layer_1": 0.774218401617478,
63
+ "encoder_layer_2": 0.7892838172863104,
64
+ "encoder_layer_3": 1.471245050784382,
65
+ "decoder_layer_0": 1.6428237035357693,
66
+ "decoder_layer_1": 1.6575447747063468,
67
+ "decoder_layer_2": 1.3360894947088227,
68
+ "decoder_layer_3": 1.2850289355113476
69
+ }
70
+ },
71
+ {
72
+ "epoch": 4,
73
+ "cv_scores": {
74
+ "encoder_layer_0": 1.470608977224471,
75
+ "encoder_layer_1": 0.8325088809517526,
76
+ "encoder_layer_2": 0.9110868199883019,
77
+ "encoder_layer_3": 1.4852236088974513,
78
+ "decoder_layer_0": 1.6453405313179363,
79
+ "decoder_layer_1": 1.6646367529819495,
80
+ "decoder_layer_2": 1.368985176021636,
81
+ "decoder_layer_3": 1.3377243523199316
82
+ }
83
+ },
84
+ {
85
+ "epoch": 5,
86
+ "cv_scores": {
87
+ "encoder_layer_0": 1.521552219913538,
88
+ "encoder_layer_1": 0.8826608997652614,
89
+ "encoder_layer_2": 1.0384956360485216,
90
+ "encoder_layer_3": 1.4437078323981245,
91
+ "decoder_layer_0": 1.6101629549482483,
92
+ "decoder_layer_1": 1.6548466841970308,
93
+ "decoder_layer_2": 1.3714362330355638,
94
+ "decoder_layer_3": 1.3546301554016826
95
+ }
96
+ }
97
+ ]
98
+ }