mavis-ai commited on
Commit
5f527c6
·
verified ·
1 Parent(s): de631d7

Upload Multilingual-e5-large-Q8

Browse files
Files changed (3) hide show
  1. config.json +4 -2
  2. quantization.json +305 -153
  3. weights.00.safetensors +2 -2
config.json CHANGED
@@ -80,9 +80,11 @@
80
  "use_cache": true,
81
  "classifier_dropout": null,
82
  "revis_quantization": {
83
- "format": "revis-xlm-roberta-e5-q8",
84
  "bits": 8,
85
- "type": "symmetric-per-row",
 
 
86
  "manifest": "quantization.json"
87
  }
88
  }
 
80
  "use_cache": true,
81
  "classifier_dropout": null,
82
  "revis_quantization": {
83
+ "format": "revis-xlm-roberta-e5-mlx-native-q8",
84
  "bits": 8,
85
+ "group_size": 64,
86
+ "mode": "affine",
87
+ "type": "mlx-native-affine",
88
  "manifest": "quantization.json"
89
  }
90
  }
quantization.json CHANGED
@@ -1,15 +1,19 @@
1
  {
2
- "schemaVersion": 1,
3
- "format": "revis-xlm-roberta-e5-q8",
4
  "source": "intfloat/multilingual-e5-large",
5
  "baseWeights": "weights.00.safetensors",
6
  "quantizedWeights": "weights.00.safetensors",
7
  "quantization": {
8
- "type": "symmetric-per-row",
9
  "bits": 8,
 
 
10
  "quantizedTensorSuffix": ".qweight",
11
- "scaleTensorSuffix": ".scale",
12
- "dequantization": "weight = qweight.astype(float32) * scale[:, None]"
 
 
13
  },
14
  "quantized": [
15
  {
@@ -20,7 +24,8 @@
20
  ],
21
  "dtype": "float16",
22
  "qweight": "embeddings.position_embeddings.weight.qweight",
23
- "scale": "embeddings.position_embeddings.weight.scale"
 
24
  },
25
  {
26
  "name": "embeddings.token_type_embeddings.weight",
@@ -30,7 +35,8 @@
30
  ],
31
  "dtype": "float16",
32
  "qweight": "embeddings.token_type_embeddings.weight.qweight",
33
- "scale": "embeddings.token_type_embeddings.weight.scale"
 
34
  },
35
  {
36
  "name": "embeddings.word_embeddings.weight",
@@ -40,7 +46,8 @@
40
  ],
41
  "dtype": "float16",
42
  "qweight": "embeddings.word_embeddings.weight.qweight",
43
- "scale": "embeddings.word_embeddings.weight.scale"
 
44
  },
45
  {
46
  "name": "encoder.layer.0.attention.output.dense.weight",
@@ -50,7 +57,8 @@
50
  ],
51
  "dtype": "float16",
52
  "qweight": "encoder.layer.0.attention.output.dense.weight.qweight",
53
- "scale": "encoder.layer.0.attention.output.dense.weight.scale"
 
54
  },
55
  {
56
  "name": "encoder.layer.0.attention.self.key.weight",
@@ -60,7 +68,8 @@
60
  ],
61
  "dtype": "float16",
62
  "qweight": "encoder.layer.0.attention.self.key.weight.qweight",
63
- "scale": "encoder.layer.0.attention.self.key.weight.scale"
 
64
  },
65
  {
66
  "name": "encoder.layer.0.attention.self.query.weight",
@@ -70,7 +79,8 @@
70
  ],
71
  "dtype": "float16",
72
  "qweight": "encoder.layer.0.attention.self.query.weight.qweight",
73
- "scale": "encoder.layer.0.attention.self.query.weight.scale"
 
74
  },
75
  {
76
  "name": "encoder.layer.0.attention.self.value.weight",
@@ -80,7 +90,8 @@
80
  ],
81
  "dtype": "float16",
82
  "qweight": "encoder.layer.0.attention.self.value.weight.qweight",
83
- "scale": "encoder.layer.0.attention.self.value.weight.scale"
 
84
  },
85
  {
86
  "name": "encoder.layer.0.intermediate.dense.weight",
@@ -90,7 +101,8 @@
90
  ],
91
  "dtype": "float16",
92
  "qweight": "encoder.layer.0.intermediate.dense.weight.qweight",
93
- "scale": "encoder.layer.0.intermediate.dense.weight.scale"
 
94
  },
95
  {
96
  "name": "encoder.layer.0.output.dense.weight",
@@ -100,7 +112,8 @@
100
  ],
101
  "dtype": "float16",
102
  "qweight": "encoder.layer.0.output.dense.weight.qweight",
103
- "scale": "encoder.layer.0.output.dense.weight.scale"
 
104
  },
105
  {
106
  "name": "encoder.layer.1.attention.output.dense.weight",
@@ -110,7 +123,8 @@
110
  ],
111
  "dtype": "float16",
112
  "qweight": "encoder.layer.1.attention.output.dense.weight.qweight",
113
- "scale": "encoder.layer.1.attention.output.dense.weight.scale"
 
114
  },
115
  {
116
  "name": "encoder.layer.1.attention.self.key.weight",
@@ -120,7 +134,8 @@
120
  ],
121
  "dtype": "float16",
122
  "qweight": "encoder.layer.1.attention.self.key.weight.qweight",
123
- "scale": "encoder.layer.1.attention.self.key.weight.scale"
 
124
  },
125
  {
126
  "name": "encoder.layer.1.attention.self.query.weight",
@@ -130,7 +145,8 @@
130
  ],
131
  "dtype": "float16",
132
  "qweight": "encoder.layer.1.attention.self.query.weight.qweight",
133
- "scale": "encoder.layer.1.attention.self.query.weight.scale"
 
134
  },
135
  {
136
  "name": "encoder.layer.1.attention.self.value.weight",
@@ -140,7 +156,8 @@
140
  ],
141
  "dtype": "float16",
142
  "qweight": "encoder.layer.1.attention.self.value.weight.qweight",
143
- "scale": "encoder.layer.1.attention.self.value.weight.scale"
 
144
  },
145
  {
146
  "name": "encoder.layer.1.intermediate.dense.weight",
@@ -150,7 +167,8 @@
150
  ],
151
  "dtype": "float16",
152
  "qweight": "encoder.layer.1.intermediate.dense.weight.qweight",
153
- "scale": "encoder.layer.1.intermediate.dense.weight.scale"
 
154
  },
155
  {
156
  "name": "encoder.layer.1.output.dense.weight",
@@ -160,7 +178,8 @@
160
  ],
161
  "dtype": "float16",
162
  "qweight": "encoder.layer.1.output.dense.weight.qweight",
163
- "scale": "encoder.layer.1.output.dense.weight.scale"
 
164
  },
165
  {
166
  "name": "encoder.layer.10.attention.output.dense.weight",
@@ -170,7 +189,8 @@
170
  ],
171
  "dtype": "float16",
172
  "qweight": "encoder.layer.10.attention.output.dense.weight.qweight",
173
- "scale": "encoder.layer.10.attention.output.dense.weight.scale"
 
174
  },
175
  {
176
  "name": "encoder.layer.10.attention.self.key.weight",
@@ -180,7 +200,8 @@
180
  ],
181
  "dtype": "float16",
182
  "qweight": "encoder.layer.10.attention.self.key.weight.qweight",
183
- "scale": "encoder.layer.10.attention.self.key.weight.scale"
 
184
  },
185
  {
186
  "name": "encoder.layer.10.attention.self.query.weight",
@@ -190,7 +211,8 @@
190
  ],
191
  "dtype": "float16",
192
  "qweight": "encoder.layer.10.attention.self.query.weight.qweight",
193
- "scale": "encoder.layer.10.attention.self.query.weight.scale"
 
194
  },
195
  {
196
  "name": "encoder.layer.10.attention.self.value.weight",
@@ -200,7 +222,8 @@
200
  ],
201
  "dtype": "float16",
202
  "qweight": "encoder.layer.10.attention.self.value.weight.qweight",
203
- "scale": "encoder.layer.10.attention.self.value.weight.scale"
 
204
  },
205
  {
206
  "name": "encoder.layer.10.intermediate.dense.weight",
@@ -210,7 +233,8 @@
210
  ],
211
  "dtype": "float16",
212
  "qweight": "encoder.layer.10.intermediate.dense.weight.qweight",
213
- "scale": "encoder.layer.10.intermediate.dense.weight.scale"
 
214
  },
215
  {
216
  "name": "encoder.layer.10.output.dense.weight",
@@ -220,7 +244,8 @@
220
  ],
221
  "dtype": "float16",
222
  "qweight": "encoder.layer.10.output.dense.weight.qweight",
223
- "scale": "encoder.layer.10.output.dense.weight.scale"
 
224
  },
225
  {
226
  "name": "encoder.layer.11.attention.output.dense.weight",
@@ -230,7 +255,8 @@
230
  ],
231
  "dtype": "float16",
232
  "qweight": "encoder.layer.11.attention.output.dense.weight.qweight",
233
- "scale": "encoder.layer.11.attention.output.dense.weight.scale"
 
234
  },
235
  {
236
  "name": "encoder.layer.11.attention.self.key.weight",
@@ -240,7 +266,8 @@
240
  ],
241
  "dtype": "float16",
242
  "qweight": "encoder.layer.11.attention.self.key.weight.qweight",
243
- "scale": "encoder.layer.11.attention.self.key.weight.scale"
 
244
  },
245
  {
246
  "name": "encoder.layer.11.attention.self.query.weight",
@@ -250,7 +277,8 @@
250
  ],
251
  "dtype": "float16",
252
  "qweight": "encoder.layer.11.attention.self.query.weight.qweight",
253
- "scale": "encoder.layer.11.attention.self.query.weight.scale"
 
254
  },
255
  {
256
  "name": "encoder.layer.11.attention.self.value.weight",
@@ -260,7 +288,8 @@
260
  ],
261
  "dtype": "float16",
262
  "qweight": "encoder.layer.11.attention.self.value.weight.qweight",
263
- "scale": "encoder.layer.11.attention.self.value.weight.scale"
 
264
  },
265
  {
266
  "name": "encoder.layer.11.intermediate.dense.weight",
@@ -270,7 +299,8 @@
270
  ],
271
  "dtype": "float16",
272
  "qweight": "encoder.layer.11.intermediate.dense.weight.qweight",
273
- "scale": "encoder.layer.11.intermediate.dense.weight.scale"
 
274
  },
275
  {
276
  "name": "encoder.layer.11.output.dense.weight",
@@ -280,7 +310,8 @@
280
  ],
281
  "dtype": "float16",
282
  "qweight": "encoder.layer.11.output.dense.weight.qweight",
283
- "scale": "encoder.layer.11.output.dense.weight.scale"
 
284
  },
285
  {
286
  "name": "encoder.layer.12.attention.output.dense.weight",
@@ -290,7 +321,8 @@
290
  ],
291
  "dtype": "float16",
292
  "qweight": "encoder.layer.12.attention.output.dense.weight.qweight",
293
- "scale": "encoder.layer.12.attention.output.dense.weight.scale"
 
294
  },
295
  {
296
  "name": "encoder.layer.12.attention.self.key.weight",
@@ -300,7 +332,8 @@
300
  ],
301
  "dtype": "float16",
302
  "qweight": "encoder.layer.12.attention.self.key.weight.qweight",
303
- "scale": "encoder.layer.12.attention.self.key.weight.scale"
 
304
  },
305
  {
306
  "name": "encoder.layer.12.attention.self.query.weight",
@@ -310,7 +343,8 @@
310
  ],
311
  "dtype": "float16",
312
  "qweight": "encoder.layer.12.attention.self.query.weight.qweight",
313
- "scale": "encoder.layer.12.attention.self.query.weight.scale"
 
314
  },
315
  {
316
  "name": "encoder.layer.12.attention.self.value.weight",
@@ -320,7 +354,8 @@
320
  ],
321
  "dtype": "float16",
322
  "qweight": "encoder.layer.12.attention.self.value.weight.qweight",
323
- "scale": "encoder.layer.12.attention.self.value.weight.scale"
 
324
  },
325
  {
326
  "name": "encoder.layer.12.intermediate.dense.weight",
@@ -330,7 +365,8 @@
330
  ],
331
  "dtype": "float16",
332
  "qweight": "encoder.layer.12.intermediate.dense.weight.qweight",
333
- "scale": "encoder.layer.12.intermediate.dense.weight.scale"
 
334
  },
335
  {
336
  "name": "encoder.layer.12.output.dense.weight",
@@ -340,7 +376,8 @@
340
  ],
341
  "dtype": "float16",
342
  "qweight": "encoder.layer.12.output.dense.weight.qweight",
343
- "scale": "encoder.layer.12.output.dense.weight.scale"
 
344
  },
345
  {
346
  "name": "encoder.layer.13.attention.output.dense.weight",
@@ -350,7 +387,8 @@
350
  ],
351
  "dtype": "float16",
352
  "qweight": "encoder.layer.13.attention.output.dense.weight.qweight",
353
- "scale": "encoder.layer.13.attention.output.dense.weight.scale"
 
354
  },
355
  {
356
  "name": "encoder.layer.13.attention.self.key.weight",
@@ -360,7 +398,8 @@
360
  ],
361
  "dtype": "float16",
362
  "qweight": "encoder.layer.13.attention.self.key.weight.qweight",
363
- "scale": "encoder.layer.13.attention.self.key.weight.scale"
 
364
  },
365
  {
366
  "name": "encoder.layer.13.attention.self.query.weight",
@@ -370,7 +409,8 @@
370
  ],
371
  "dtype": "float16",
372
  "qweight": "encoder.layer.13.attention.self.query.weight.qweight",
373
- "scale": "encoder.layer.13.attention.self.query.weight.scale"
 
374
  },
375
  {
376
  "name": "encoder.layer.13.attention.self.value.weight",
@@ -380,7 +420,8 @@
380
  ],
381
  "dtype": "float16",
382
  "qweight": "encoder.layer.13.attention.self.value.weight.qweight",
383
- "scale": "encoder.layer.13.attention.self.value.weight.scale"
 
384
  },
385
  {
386
  "name": "encoder.layer.13.intermediate.dense.weight",
@@ -390,7 +431,8 @@
390
  ],
391
  "dtype": "float16",
392
  "qweight": "encoder.layer.13.intermediate.dense.weight.qweight",
393
- "scale": "encoder.layer.13.intermediate.dense.weight.scale"
 
394
  },
395
  {
396
  "name": "encoder.layer.13.output.dense.weight",
@@ -400,7 +442,8 @@
400
  ],
401
  "dtype": "float16",
402
  "qweight": "encoder.layer.13.output.dense.weight.qweight",
403
- "scale": "encoder.layer.13.output.dense.weight.scale"
 
404
  },
405
  {
406
  "name": "encoder.layer.14.attention.output.dense.weight",
@@ -410,7 +453,8 @@
410
  ],
411
  "dtype": "float16",
412
  "qweight": "encoder.layer.14.attention.output.dense.weight.qweight",
413
- "scale": "encoder.layer.14.attention.output.dense.weight.scale"
 
414
  },
415
  {
416
  "name": "encoder.layer.14.attention.self.key.weight",
@@ -420,7 +464,8 @@
420
  ],
421
  "dtype": "float16",
422
  "qweight": "encoder.layer.14.attention.self.key.weight.qweight",
423
- "scale": "encoder.layer.14.attention.self.key.weight.scale"
 
424
  },
425
  {
426
  "name": "encoder.layer.14.attention.self.query.weight",
@@ -430,7 +475,8 @@
430
  ],
431
  "dtype": "float16",
432
  "qweight": "encoder.layer.14.attention.self.query.weight.qweight",
433
- "scale": "encoder.layer.14.attention.self.query.weight.scale"
 
434
  },
435
  {
436
  "name": "encoder.layer.14.attention.self.value.weight",
@@ -440,7 +486,8 @@
440
  ],
441
  "dtype": "float16",
442
  "qweight": "encoder.layer.14.attention.self.value.weight.qweight",
443
- "scale": "encoder.layer.14.attention.self.value.weight.scale"
 
444
  },
445
  {
446
  "name": "encoder.layer.14.intermediate.dense.weight",
@@ -450,7 +497,8 @@
450
  ],
451
  "dtype": "float16",
452
  "qweight": "encoder.layer.14.intermediate.dense.weight.qweight",
453
- "scale": "encoder.layer.14.intermediate.dense.weight.scale"
 
454
  },
455
  {
456
  "name": "encoder.layer.14.output.dense.weight",
@@ -460,7 +508,8 @@
460
  ],
461
  "dtype": "float16",
462
  "qweight": "encoder.layer.14.output.dense.weight.qweight",
463
- "scale": "encoder.layer.14.output.dense.weight.scale"
 
464
  },
465
  {
466
  "name": "encoder.layer.15.attention.output.dense.weight",
@@ -470,7 +519,8 @@
470
  ],
471
  "dtype": "float16",
472
  "qweight": "encoder.layer.15.attention.output.dense.weight.qweight",
473
- "scale": "encoder.layer.15.attention.output.dense.weight.scale"
 
474
  },
475
  {
476
  "name": "encoder.layer.15.attention.self.key.weight",
@@ -480,7 +530,8 @@
480
  ],
481
  "dtype": "float16",
482
  "qweight": "encoder.layer.15.attention.self.key.weight.qweight",
483
- "scale": "encoder.layer.15.attention.self.key.weight.scale"
 
484
  },
485
  {
486
  "name": "encoder.layer.15.attention.self.query.weight",
@@ -490,7 +541,8 @@
490
  ],
491
  "dtype": "float16",
492
  "qweight": "encoder.layer.15.attention.self.query.weight.qweight",
493
- "scale": "encoder.layer.15.attention.self.query.weight.scale"
 
494
  },
495
  {
496
  "name": "encoder.layer.15.attention.self.value.weight",
@@ -500,7 +552,8 @@
500
  ],
501
  "dtype": "float16",
502
  "qweight": "encoder.layer.15.attention.self.value.weight.qweight",
503
- "scale": "encoder.layer.15.attention.self.value.weight.scale"
 
504
  },
505
  {
506
  "name": "encoder.layer.15.intermediate.dense.weight",
@@ -510,7 +563,8 @@
510
  ],
511
  "dtype": "float16",
512
  "qweight": "encoder.layer.15.intermediate.dense.weight.qweight",
513
- "scale": "encoder.layer.15.intermediate.dense.weight.scale"
 
514
  },
515
  {
516
  "name": "encoder.layer.15.output.dense.weight",
@@ -520,7 +574,8 @@
520
  ],
521
  "dtype": "float16",
522
  "qweight": "encoder.layer.15.output.dense.weight.qweight",
523
- "scale": "encoder.layer.15.output.dense.weight.scale"
 
524
  },
525
  {
526
  "name": "encoder.layer.16.attention.output.dense.weight",
@@ -530,7 +585,8 @@
530
  ],
531
  "dtype": "float16",
532
  "qweight": "encoder.layer.16.attention.output.dense.weight.qweight",
533
- "scale": "encoder.layer.16.attention.output.dense.weight.scale"
 
534
  },
535
  {
536
  "name": "encoder.layer.16.attention.self.key.weight",
@@ -540,7 +596,8 @@
540
  ],
541
  "dtype": "float16",
542
  "qweight": "encoder.layer.16.attention.self.key.weight.qweight",
543
- "scale": "encoder.layer.16.attention.self.key.weight.scale"
 
544
  },
545
  {
546
  "name": "encoder.layer.16.attention.self.query.weight",
@@ -550,7 +607,8 @@
550
  ],
551
  "dtype": "float16",
552
  "qweight": "encoder.layer.16.attention.self.query.weight.qweight",
553
- "scale": "encoder.layer.16.attention.self.query.weight.scale"
 
554
  },
555
  {
556
  "name": "encoder.layer.16.attention.self.value.weight",
@@ -560,7 +618,8 @@
560
  ],
561
  "dtype": "float16",
562
  "qweight": "encoder.layer.16.attention.self.value.weight.qweight",
563
- "scale": "encoder.layer.16.attention.self.value.weight.scale"
 
564
  },
565
  {
566
  "name": "encoder.layer.16.intermediate.dense.weight",
@@ -570,7 +629,8 @@
570
  ],
571
  "dtype": "float16",
572
  "qweight": "encoder.layer.16.intermediate.dense.weight.qweight",
573
- "scale": "encoder.layer.16.intermediate.dense.weight.scale"
 
574
  },
575
  {
576
  "name": "encoder.layer.16.output.dense.weight",
@@ -580,7 +640,8 @@
580
  ],
581
  "dtype": "float16",
582
  "qweight": "encoder.layer.16.output.dense.weight.qweight",
583
- "scale": "encoder.layer.16.output.dense.weight.scale"
 
584
  },
585
  {
586
  "name": "encoder.layer.17.attention.output.dense.weight",
@@ -590,7 +651,8 @@
590
  ],
591
  "dtype": "float16",
592
  "qweight": "encoder.layer.17.attention.output.dense.weight.qweight",
593
- "scale": "encoder.layer.17.attention.output.dense.weight.scale"
 
594
  },
595
  {
596
  "name": "encoder.layer.17.attention.self.key.weight",
@@ -600,7 +662,8 @@
600
  ],
601
  "dtype": "float16",
602
  "qweight": "encoder.layer.17.attention.self.key.weight.qweight",
603
- "scale": "encoder.layer.17.attention.self.key.weight.scale"
 
604
  },
605
  {
606
  "name": "encoder.layer.17.attention.self.query.weight",
@@ -610,7 +673,8 @@
610
  ],
611
  "dtype": "float16",
612
  "qweight": "encoder.layer.17.attention.self.query.weight.qweight",
613
- "scale": "encoder.layer.17.attention.self.query.weight.scale"
 
614
  },
615
  {
616
  "name": "encoder.layer.17.attention.self.value.weight",
@@ -620,7 +684,8 @@
620
  ],
621
  "dtype": "float16",
622
  "qweight": "encoder.layer.17.attention.self.value.weight.qweight",
623
- "scale": "encoder.layer.17.attention.self.value.weight.scale"
 
624
  },
625
  {
626
  "name": "encoder.layer.17.intermediate.dense.weight",
@@ -630,7 +695,8 @@
630
  ],
631
  "dtype": "float16",
632
  "qweight": "encoder.layer.17.intermediate.dense.weight.qweight",
633
- "scale": "encoder.layer.17.intermediate.dense.weight.scale"
 
634
  },
635
  {
636
  "name": "encoder.layer.17.output.dense.weight",
@@ -640,7 +706,8 @@
640
  ],
641
  "dtype": "float16",
642
  "qweight": "encoder.layer.17.output.dense.weight.qweight",
643
- "scale": "encoder.layer.17.output.dense.weight.scale"
 
644
  },
645
  {
646
  "name": "encoder.layer.18.attention.output.dense.weight",
@@ -650,7 +717,8 @@
650
  ],
651
  "dtype": "float16",
652
  "qweight": "encoder.layer.18.attention.output.dense.weight.qweight",
653
- "scale": "encoder.layer.18.attention.output.dense.weight.scale"
 
654
  },
655
  {
656
  "name": "encoder.layer.18.attention.self.key.weight",
@@ -660,7 +728,8 @@
660
  ],
661
  "dtype": "float16",
662
  "qweight": "encoder.layer.18.attention.self.key.weight.qweight",
663
- "scale": "encoder.layer.18.attention.self.key.weight.scale"
 
664
  },
665
  {
666
  "name": "encoder.layer.18.attention.self.query.weight",
@@ -670,7 +739,8 @@
670
  ],
671
  "dtype": "float16",
672
  "qweight": "encoder.layer.18.attention.self.query.weight.qweight",
673
- "scale": "encoder.layer.18.attention.self.query.weight.scale"
 
674
  },
675
  {
676
  "name": "encoder.layer.18.attention.self.value.weight",
@@ -680,7 +750,8 @@
680
  ],
681
  "dtype": "float16",
682
  "qweight": "encoder.layer.18.attention.self.value.weight.qweight",
683
- "scale": "encoder.layer.18.attention.self.value.weight.scale"
 
684
  },
685
  {
686
  "name": "encoder.layer.18.intermediate.dense.weight",
@@ -690,7 +761,8 @@
690
  ],
691
  "dtype": "float16",
692
  "qweight": "encoder.layer.18.intermediate.dense.weight.qweight",
693
- "scale": "encoder.layer.18.intermediate.dense.weight.scale"
 
694
  },
695
  {
696
  "name": "encoder.layer.18.output.dense.weight",
@@ -700,7 +772,8 @@
700
  ],
701
  "dtype": "float16",
702
  "qweight": "encoder.layer.18.output.dense.weight.qweight",
703
- "scale": "encoder.layer.18.output.dense.weight.scale"
 
704
  },
705
  {
706
  "name": "encoder.layer.19.attention.output.dense.weight",
@@ -710,7 +783,8 @@
710
  ],
711
  "dtype": "float16",
712
  "qweight": "encoder.layer.19.attention.output.dense.weight.qweight",
713
- "scale": "encoder.layer.19.attention.output.dense.weight.scale"
 
714
  },
715
  {
716
  "name": "encoder.layer.19.attention.self.key.weight",
@@ -720,7 +794,8 @@
720
  ],
721
  "dtype": "float16",
722
  "qweight": "encoder.layer.19.attention.self.key.weight.qweight",
723
- "scale": "encoder.layer.19.attention.self.key.weight.scale"
 
724
  },
725
  {
726
  "name": "encoder.layer.19.attention.self.query.weight",
@@ -730,7 +805,8 @@
730
  ],
731
  "dtype": "float16",
732
  "qweight": "encoder.layer.19.attention.self.query.weight.qweight",
733
- "scale": "encoder.layer.19.attention.self.query.weight.scale"
 
734
  },
735
  {
736
  "name": "encoder.layer.19.attention.self.value.weight",
@@ -740,7 +816,8 @@
740
  ],
741
  "dtype": "float16",
742
  "qweight": "encoder.layer.19.attention.self.value.weight.qweight",
743
- "scale": "encoder.layer.19.attention.self.value.weight.scale"
 
744
  },
745
  {
746
  "name": "encoder.layer.19.intermediate.dense.weight",
@@ -750,7 +827,8 @@
750
  ],
751
  "dtype": "float16",
752
  "qweight": "encoder.layer.19.intermediate.dense.weight.qweight",
753
- "scale": "encoder.layer.19.intermediate.dense.weight.scale"
 
754
  },
755
  {
756
  "name": "encoder.layer.19.output.dense.weight",
@@ -760,7 +838,8 @@
760
  ],
761
  "dtype": "float16",
762
  "qweight": "encoder.layer.19.output.dense.weight.qweight",
763
- "scale": "encoder.layer.19.output.dense.weight.scale"
 
764
  },
765
  {
766
  "name": "encoder.layer.2.attention.output.dense.weight",
@@ -770,7 +849,8 @@
770
  ],
771
  "dtype": "float16",
772
  "qweight": "encoder.layer.2.attention.output.dense.weight.qweight",
773
- "scale": "encoder.layer.2.attention.output.dense.weight.scale"
 
774
  },
775
  {
776
  "name": "encoder.layer.2.attention.self.key.weight",
@@ -780,7 +860,8 @@
780
  ],
781
  "dtype": "float16",
782
  "qweight": "encoder.layer.2.attention.self.key.weight.qweight",
783
- "scale": "encoder.layer.2.attention.self.key.weight.scale"
 
784
  },
785
  {
786
  "name": "encoder.layer.2.attention.self.query.weight",
@@ -790,7 +871,8 @@
790
  ],
791
  "dtype": "float16",
792
  "qweight": "encoder.layer.2.attention.self.query.weight.qweight",
793
- "scale": "encoder.layer.2.attention.self.query.weight.scale"
 
794
  },
795
  {
796
  "name": "encoder.layer.2.attention.self.value.weight",
@@ -800,7 +882,8 @@
800
  ],
801
  "dtype": "float16",
802
  "qweight": "encoder.layer.2.attention.self.value.weight.qweight",
803
- "scale": "encoder.layer.2.attention.self.value.weight.scale"
 
804
  },
805
  {
806
  "name": "encoder.layer.2.intermediate.dense.weight",
@@ -810,7 +893,8 @@
810
  ],
811
  "dtype": "float16",
812
  "qweight": "encoder.layer.2.intermediate.dense.weight.qweight",
813
- "scale": "encoder.layer.2.intermediate.dense.weight.scale"
 
814
  },
815
  {
816
  "name": "encoder.layer.2.output.dense.weight",
@@ -820,7 +904,8 @@
820
  ],
821
  "dtype": "float16",
822
  "qweight": "encoder.layer.2.output.dense.weight.qweight",
823
- "scale": "encoder.layer.2.output.dense.weight.scale"
 
824
  },
825
  {
826
  "name": "encoder.layer.20.attention.output.dense.weight",
@@ -830,7 +915,8 @@
830
  ],
831
  "dtype": "float16",
832
  "qweight": "encoder.layer.20.attention.output.dense.weight.qweight",
833
- "scale": "encoder.layer.20.attention.output.dense.weight.scale"
 
834
  },
835
  {
836
  "name": "encoder.layer.20.attention.self.key.weight",
@@ -840,7 +926,8 @@
840
  ],
841
  "dtype": "float16",
842
  "qweight": "encoder.layer.20.attention.self.key.weight.qweight",
843
- "scale": "encoder.layer.20.attention.self.key.weight.scale"
 
844
  },
845
  {
846
  "name": "encoder.layer.20.attention.self.query.weight",
@@ -850,7 +937,8 @@
850
  ],
851
  "dtype": "float16",
852
  "qweight": "encoder.layer.20.attention.self.query.weight.qweight",
853
- "scale": "encoder.layer.20.attention.self.query.weight.scale"
 
854
  },
855
  {
856
  "name": "encoder.layer.20.attention.self.value.weight",
@@ -860,7 +948,8 @@
860
  ],
861
  "dtype": "float16",
862
  "qweight": "encoder.layer.20.attention.self.value.weight.qweight",
863
- "scale": "encoder.layer.20.attention.self.value.weight.scale"
 
864
  },
865
  {
866
  "name": "encoder.layer.20.intermediate.dense.weight",
@@ -870,7 +959,8 @@
870
  ],
871
  "dtype": "float16",
872
  "qweight": "encoder.layer.20.intermediate.dense.weight.qweight",
873
- "scale": "encoder.layer.20.intermediate.dense.weight.scale"
 
874
  },
875
  {
876
  "name": "encoder.layer.20.output.dense.weight",
@@ -880,7 +970,8 @@
880
  ],
881
  "dtype": "float16",
882
  "qweight": "encoder.layer.20.output.dense.weight.qweight",
883
- "scale": "encoder.layer.20.output.dense.weight.scale"
 
884
  },
885
  {
886
  "name": "encoder.layer.21.attention.output.dense.weight",
@@ -890,7 +981,8 @@
890
  ],
891
  "dtype": "float16",
892
  "qweight": "encoder.layer.21.attention.output.dense.weight.qweight",
893
- "scale": "encoder.layer.21.attention.output.dense.weight.scale"
 
894
  },
895
  {
896
  "name": "encoder.layer.21.attention.self.key.weight",
@@ -900,7 +992,8 @@
900
  ],
901
  "dtype": "float16",
902
  "qweight": "encoder.layer.21.attention.self.key.weight.qweight",
903
- "scale": "encoder.layer.21.attention.self.key.weight.scale"
 
904
  },
905
  {
906
  "name": "encoder.layer.21.attention.self.query.weight",
@@ -910,7 +1003,8 @@
910
  ],
911
  "dtype": "float16",
912
  "qweight": "encoder.layer.21.attention.self.query.weight.qweight",
913
- "scale": "encoder.layer.21.attention.self.query.weight.scale"
 
914
  },
915
  {
916
  "name": "encoder.layer.21.attention.self.value.weight",
@@ -920,7 +1014,8 @@
920
  ],
921
  "dtype": "float16",
922
  "qweight": "encoder.layer.21.attention.self.value.weight.qweight",
923
- "scale": "encoder.layer.21.attention.self.value.weight.scale"
 
924
  },
925
  {
926
  "name": "encoder.layer.21.intermediate.dense.weight",
@@ -930,7 +1025,8 @@
930
  ],
931
  "dtype": "float16",
932
  "qweight": "encoder.layer.21.intermediate.dense.weight.qweight",
933
- "scale": "encoder.layer.21.intermediate.dense.weight.scale"
 
934
  },
935
  {
936
  "name": "encoder.layer.21.output.dense.weight",
@@ -940,7 +1036,8 @@
940
  ],
941
  "dtype": "float16",
942
  "qweight": "encoder.layer.21.output.dense.weight.qweight",
943
- "scale": "encoder.layer.21.output.dense.weight.scale"
 
944
  },
945
  {
946
  "name": "encoder.layer.22.attention.output.dense.weight",
@@ -950,7 +1047,8 @@
950
  ],
951
  "dtype": "float16",
952
  "qweight": "encoder.layer.22.attention.output.dense.weight.qweight",
953
- "scale": "encoder.layer.22.attention.output.dense.weight.scale"
 
954
  },
955
  {
956
  "name": "encoder.layer.22.attention.self.key.weight",
@@ -960,7 +1058,8 @@
960
  ],
961
  "dtype": "float16",
962
  "qweight": "encoder.layer.22.attention.self.key.weight.qweight",
963
- "scale": "encoder.layer.22.attention.self.key.weight.scale"
 
964
  },
965
  {
966
  "name": "encoder.layer.22.attention.self.query.weight",
@@ -970,7 +1069,8 @@
970
  ],
971
  "dtype": "float16",
972
  "qweight": "encoder.layer.22.attention.self.query.weight.qweight",
973
- "scale": "encoder.layer.22.attention.self.query.weight.scale"
 
974
  },
975
  {
976
  "name": "encoder.layer.22.attention.self.value.weight",
@@ -980,7 +1080,8 @@
980
  ],
981
  "dtype": "float16",
982
  "qweight": "encoder.layer.22.attention.self.value.weight.qweight",
983
- "scale": "encoder.layer.22.attention.self.value.weight.scale"
 
984
  },
985
  {
986
  "name": "encoder.layer.22.intermediate.dense.weight",
@@ -990,7 +1091,8 @@
990
  ],
991
  "dtype": "float16",
992
  "qweight": "encoder.layer.22.intermediate.dense.weight.qweight",
993
- "scale": "encoder.layer.22.intermediate.dense.weight.scale"
 
994
  },
995
  {
996
  "name": "encoder.layer.22.output.dense.weight",
@@ -1000,7 +1102,8 @@
1000
  ],
1001
  "dtype": "float16",
1002
  "qweight": "encoder.layer.22.output.dense.weight.qweight",
1003
- "scale": "encoder.layer.22.output.dense.weight.scale"
 
1004
  },
1005
  {
1006
  "name": "encoder.layer.23.attention.output.dense.weight",
@@ -1010,7 +1113,8 @@
1010
  ],
1011
  "dtype": "float16",
1012
  "qweight": "encoder.layer.23.attention.output.dense.weight.qweight",
1013
- "scale": "encoder.layer.23.attention.output.dense.weight.scale"
 
1014
  },
1015
  {
1016
  "name": "encoder.layer.23.attention.self.key.weight",
@@ -1020,7 +1124,8 @@
1020
  ],
1021
  "dtype": "float16",
1022
  "qweight": "encoder.layer.23.attention.self.key.weight.qweight",
1023
- "scale": "encoder.layer.23.attention.self.key.weight.scale"
 
1024
  },
1025
  {
1026
  "name": "encoder.layer.23.attention.self.query.weight",
@@ -1030,7 +1135,8 @@
1030
  ],
1031
  "dtype": "float16",
1032
  "qweight": "encoder.layer.23.attention.self.query.weight.qweight",
1033
- "scale": "encoder.layer.23.attention.self.query.weight.scale"
 
1034
  },
1035
  {
1036
  "name": "encoder.layer.23.attention.self.value.weight",
@@ -1040,7 +1146,8 @@
1040
  ],
1041
  "dtype": "float16",
1042
  "qweight": "encoder.layer.23.attention.self.value.weight.qweight",
1043
- "scale": "encoder.layer.23.attention.self.value.weight.scale"
 
1044
  },
1045
  {
1046
  "name": "encoder.layer.23.intermediate.dense.weight",
@@ -1050,7 +1157,8 @@
1050
  ],
1051
  "dtype": "float16",
1052
  "qweight": "encoder.layer.23.intermediate.dense.weight.qweight",
1053
- "scale": "encoder.layer.23.intermediate.dense.weight.scale"
 
1054
  },
1055
  {
1056
  "name": "encoder.layer.23.output.dense.weight",
@@ -1060,7 +1168,8 @@
1060
  ],
1061
  "dtype": "float16",
1062
  "qweight": "encoder.layer.23.output.dense.weight.qweight",
1063
- "scale": "encoder.layer.23.output.dense.weight.scale"
 
1064
  },
1065
  {
1066
  "name": "encoder.layer.3.attention.output.dense.weight",
@@ -1070,7 +1179,8 @@
1070
  ],
1071
  "dtype": "float16",
1072
  "qweight": "encoder.layer.3.attention.output.dense.weight.qweight",
1073
- "scale": "encoder.layer.3.attention.output.dense.weight.scale"
 
1074
  },
1075
  {
1076
  "name": "encoder.layer.3.attention.self.key.weight",
@@ -1080,7 +1190,8 @@
1080
  ],
1081
  "dtype": "float16",
1082
  "qweight": "encoder.layer.3.attention.self.key.weight.qweight",
1083
- "scale": "encoder.layer.3.attention.self.key.weight.scale"
 
1084
  },
1085
  {
1086
  "name": "encoder.layer.3.attention.self.query.weight",
@@ -1090,7 +1201,8 @@
1090
  ],
1091
  "dtype": "float16",
1092
  "qweight": "encoder.layer.3.attention.self.query.weight.qweight",
1093
- "scale": "encoder.layer.3.attention.self.query.weight.scale"
 
1094
  },
1095
  {
1096
  "name": "encoder.layer.3.attention.self.value.weight",
@@ -1100,7 +1212,8 @@
1100
  ],
1101
  "dtype": "float16",
1102
  "qweight": "encoder.layer.3.attention.self.value.weight.qweight",
1103
- "scale": "encoder.layer.3.attention.self.value.weight.scale"
 
1104
  },
1105
  {
1106
  "name": "encoder.layer.3.intermediate.dense.weight",
@@ -1110,7 +1223,8 @@
1110
  ],
1111
  "dtype": "float16",
1112
  "qweight": "encoder.layer.3.intermediate.dense.weight.qweight",
1113
- "scale": "encoder.layer.3.intermediate.dense.weight.scale"
 
1114
  },
1115
  {
1116
  "name": "encoder.layer.3.output.dense.weight",
@@ -1120,7 +1234,8 @@
1120
  ],
1121
  "dtype": "float16",
1122
  "qweight": "encoder.layer.3.output.dense.weight.qweight",
1123
- "scale": "encoder.layer.3.output.dense.weight.scale"
 
1124
  },
1125
  {
1126
  "name": "encoder.layer.4.attention.output.dense.weight",
@@ -1130,7 +1245,8 @@
1130
  ],
1131
  "dtype": "float16",
1132
  "qweight": "encoder.layer.4.attention.output.dense.weight.qweight",
1133
- "scale": "encoder.layer.4.attention.output.dense.weight.scale"
 
1134
  },
1135
  {
1136
  "name": "encoder.layer.4.attention.self.key.weight",
@@ -1140,7 +1256,8 @@
1140
  ],
1141
  "dtype": "float16",
1142
  "qweight": "encoder.layer.4.attention.self.key.weight.qweight",
1143
- "scale": "encoder.layer.4.attention.self.key.weight.scale"
 
1144
  },
1145
  {
1146
  "name": "encoder.layer.4.attention.self.query.weight",
@@ -1150,7 +1267,8 @@
1150
  ],
1151
  "dtype": "float16",
1152
  "qweight": "encoder.layer.4.attention.self.query.weight.qweight",
1153
- "scale": "encoder.layer.4.attention.self.query.weight.scale"
 
1154
  },
1155
  {
1156
  "name": "encoder.layer.4.attention.self.value.weight",
@@ -1160,7 +1278,8 @@
1160
  ],
1161
  "dtype": "float16",
1162
  "qweight": "encoder.layer.4.attention.self.value.weight.qweight",
1163
- "scale": "encoder.layer.4.attention.self.value.weight.scale"
 
1164
  },
1165
  {
1166
  "name": "encoder.layer.4.intermediate.dense.weight",
@@ -1170,7 +1289,8 @@
1170
  ],
1171
  "dtype": "float16",
1172
  "qweight": "encoder.layer.4.intermediate.dense.weight.qweight",
1173
- "scale": "encoder.layer.4.intermediate.dense.weight.scale"
 
1174
  },
1175
  {
1176
  "name": "encoder.layer.4.output.dense.weight",
@@ -1180,7 +1300,8 @@
1180
  ],
1181
  "dtype": "float16",
1182
  "qweight": "encoder.layer.4.output.dense.weight.qweight",
1183
- "scale": "encoder.layer.4.output.dense.weight.scale"
 
1184
  },
1185
  {
1186
  "name": "encoder.layer.5.attention.output.dense.weight",
@@ -1190,7 +1311,8 @@
1190
  ],
1191
  "dtype": "float16",
1192
  "qweight": "encoder.layer.5.attention.output.dense.weight.qweight",
1193
- "scale": "encoder.layer.5.attention.output.dense.weight.scale"
 
1194
  },
1195
  {
1196
  "name": "encoder.layer.5.attention.self.key.weight",
@@ -1200,7 +1322,8 @@
1200
  ],
1201
  "dtype": "float16",
1202
  "qweight": "encoder.layer.5.attention.self.key.weight.qweight",
1203
- "scale": "encoder.layer.5.attention.self.key.weight.scale"
 
1204
  },
1205
  {
1206
  "name": "encoder.layer.5.attention.self.query.weight",
@@ -1210,7 +1333,8 @@
1210
  ],
1211
  "dtype": "float16",
1212
  "qweight": "encoder.layer.5.attention.self.query.weight.qweight",
1213
- "scale": "encoder.layer.5.attention.self.query.weight.scale"
 
1214
  },
1215
  {
1216
  "name": "encoder.layer.5.attention.self.value.weight",
@@ -1220,7 +1344,8 @@
1220
  ],
1221
  "dtype": "float16",
1222
  "qweight": "encoder.layer.5.attention.self.value.weight.qweight",
1223
- "scale": "encoder.layer.5.attention.self.value.weight.scale"
 
1224
  },
1225
  {
1226
  "name": "encoder.layer.5.intermediate.dense.weight",
@@ -1230,7 +1355,8 @@
1230
  ],
1231
  "dtype": "float16",
1232
  "qweight": "encoder.layer.5.intermediate.dense.weight.qweight",
1233
- "scale": "encoder.layer.5.intermediate.dense.weight.scale"
 
1234
  },
1235
  {
1236
  "name": "encoder.layer.5.output.dense.weight",
@@ -1240,7 +1366,8 @@
1240
  ],
1241
  "dtype": "float16",
1242
  "qweight": "encoder.layer.5.output.dense.weight.qweight",
1243
- "scale": "encoder.layer.5.output.dense.weight.scale"
 
1244
  },
1245
  {
1246
  "name": "encoder.layer.6.attention.output.dense.weight",
@@ -1250,7 +1377,8 @@
1250
  ],
1251
  "dtype": "float16",
1252
  "qweight": "encoder.layer.6.attention.output.dense.weight.qweight",
1253
- "scale": "encoder.layer.6.attention.output.dense.weight.scale"
 
1254
  },
1255
  {
1256
  "name": "encoder.layer.6.attention.self.key.weight",
@@ -1260,7 +1388,8 @@
1260
  ],
1261
  "dtype": "float16",
1262
  "qweight": "encoder.layer.6.attention.self.key.weight.qweight",
1263
- "scale": "encoder.layer.6.attention.self.key.weight.scale"
 
1264
  },
1265
  {
1266
  "name": "encoder.layer.6.attention.self.query.weight",
@@ -1270,7 +1399,8 @@
1270
  ],
1271
  "dtype": "float16",
1272
  "qweight": "encoder.layer.6.attention.self.query.weight.qweight",
1273
- "scale": "encoder.layer.6.attention.self.query.weight.scale"
 
1274
  },
1275
  {
1276
  "name": "encoder.layer.6.attention.self.value.weight",
@@ -1280,7 +1410,8 @@
1280
  ],
1281
  "dtype": "float16",
1282
  "qweight": "encoder.layer.6.attention.self.value.weight.qweight",
1283
- "scale": "encoder.layer.6.attention.self.value.weight.scale"
 
1284
  },
1285
  {
1286
  "name": "encoder.layer.6.intermediate.dense.weight",
@@ -1290,7 +1421,8 @@
1290
  ],
1291
  "dtype": "float16",
1292
  "qweight": "encoder.layer.6.intermediate.dense.weight.qweight",
1293
- "scale": "encoder.layer.6.intermediate.dense.weight.scale"
 
1294
  },
1295
  {
1296
  "name": "encoder.layer.6.output.dense.weight",
@@ -1300,7 +1432,8 @@
1300
  ],
1301
  "dtype": "float16",
1302
  "qweight": "encoder.layer.6.output.dense.weight.qweight",
1303
- "scale": "encoder.layer.6.output.dense.weight.scale"
 
1304
  },
1305
  {
1306
  "name": "encoder.layer.7.attention.output.dense.weight",
@@ -1310,7 +1443,8 @@
1310
  ],
1311
  "dtype": "float16",
1312
  "qweight": "encoder.layer.7.attention.output.dense.weight.qweight",
1313
- "scale": "encoder.layer.7.attention.output.dense.weight.scale"
 
1314
  },
1315
  {
1316
  "name": "encoder.layer.7.attention.self.key.weight",
@@ -1320,7 +1454,8 @@
1320
  ],
1321
  "dtype": "float16",
1322
  "qweight": "encoder.layer.7.attention.self.key.weight.qweight",
1323
- "scale": "encoder.layer.7.attention.self.key.weight.scale"
 
1324
  },
1325
  {
1326
  "name": "encoder.layer.7.attention.self.query.weight",
@@ -1330,7 +1465,8 @@
1330
  ],
1331
  "dtype": "float16",
1332
  "qweight": "encoder.layer.7.attention.self.query.weight.qweight",
1333
- "scale": "encoder.layer.7.attention.self.query.weight.scale"
 
1334
  },
1335
  {
1336
  "name": "encoder.layer.7.attention.self.value.weight",
@@ -1340,7 +1476,8 @@
1340
  ],
1341
  "dtype": "float16",
1342
  "qweight": "encoder.layer.7.attention.self.value.weight.qweight",
1343
- "scale": "encoder.layer.7.attention.self.value.weight.scale"
 
1344
  },
1345
  {
1346
  "name": "encoder.layer.7.intermediate.dense.weight",
@@ -1350,7 +1487,8 @@
1350
  ],
1351
  "dtype": "float16",
1352
  "qweight": "encoder.layer.7.intermediate.dense.weight.qweight",
1353
- "scale": "encoder.layer.7.intermediate.dense.weight.scale"
 
1354
  },
1355
  {
1356
  "name": "encoder.layer.7.output.dense.weight",
@@ -1360,7 +1498,8 @@
1360
  ],
1361
  "dtype": "float16",
1362
  "qweight": "encoder.layer.7.output.dense.weight.qweight",
1363
- "scale": "encoder.layer.7.output.dense.weight.scale"
 
1364
  },
1365
  {
1366
  "name": "encoder.layer.8.attention.output.dense.weight",
@@ -1370,7 +1509,8 @@
1370
  ],
1371
  "dtype": "float16",
1372
  "qweight": "encoder.layer.8.attention.output.dense.weight.qweight",
1373
- "scale": "encoder.layer.8.attention.output.dense.weight.scale"
 
1374
  },
1375
  {
1376
  "name": "encoder.layer.8.attention.self.key.weight",
@@ -1380,7 +1520,8 @@
1380
  ],
1381
  "dtype": "float16",
1382
  "qweight": "encoder.layer.8.attention.self.key.weight.qweight",
1383
- "scale": "encoder.layer.8.attention.self.key.weight.scale"
 
1384
  },
1385
  {
1386
  "name": "encoder.layer.8.attention.self.query.weight",
@@ -1390,7 +1531,8 @@
1390
  ],
1391
  "dtype": "float16",
1392
  "qweight": "encoder.layer.8.attention.self.query.weight.qweight",
1393
- "scale": "encoder.layer.8.attention.self.query.weight.scale"
 
1394
  },
1395
  {
1396
  "name": "encoder.layer.8.attention.self.value.weight",
@@ -1400,7 +1542,8 @@
1400
  ],
1401
  "dtype": "float16",
1402
  "qweight": "encoder.layer.8.attention.self.value.weight.qweight",
1403
- "scale": "encoder.layer.8.attention.self.value.weight.scale"
 
1404
  },
1405
  {
1406
  "name": "encoder.layer.8.intermediate.dense.weight",
@@ -1410,7 +1553,8 @@
1410
  ],
1411
  "dtype": "float16",
1412
  "qweight": "encoder.layer.8.intermediate.dense.weight.qweight",
1413
- "scale": "encoder.layer.8.intermediate.dense.weight.scale"
 
1414
  },
1415
  {
1416
  "name": "encoder.layer.8.output.dense.weight",
@@ -1420,7 +1564,8 @@
1420
  ],
1421
  "dtype": "float16",
1422
  "qweight": "encoder.layer.8.output.dense.weight.qweight",
1423
- "scale": "encoder.layer.8.output.dense.weight.scale"
 
1424
  },
1425
  {
1426
  "name": "encoder.layer.9.attention.output.dense.weight",
@@ -1430,7 +1575,8 @@
1430
  ],
1431
  "dtype": "float16",
1432
  "qweight": "encoder.layer.9.attention.output.dense.weight.qweight",
1433
- "scale": "encoder.layer.9.attention.output.dense.weight.scale"
 
1434
  },
1435
  {
1436
  "name": "encoder.layer.9.attention.self.key.weight",
@@ -1440,7 +1586,8 @@
1440
  ],
1441
  "dtype": "float16",
1442
  "qweight": "encoder.layer.9.attention.self.key.weight.qweight",
1443
- "scale": "encoder.layer.9.attention.self.key.weight.scale"
 
1444
  },
1445
  {
1446
  "name": "encoder.layer.9.attention.self.query.weight",
@@ -1450,7 +1597,8 @@
1450
  ],
1451
  "dtype": "float16",
1452
  "qweight": "encoder.layer.9.attention.self.query.weight.qweight",
1453
- "scale": "encoder.layer.9.attention.self.query.weight.scale"
 
1454
  },
1455
  {
1456
  "name": "encoder.layer.9.attention.self.value.weight",
@@ -1460,7 +1608,8 @@
1460
  ],
1461
  "dtype": "float16",
1462
  "qweight": "encoder.layer.9.attention.self.value.weight.qweight",
1463
- "scale": "encoder.layer.9.attention.self.value.weight.scale"
 
1464
  },
1465
  {
1466
  "name": "encoder.layer.9.intermediate.dense.weight",
@@ -1470,7 +1619,8 @@
1470
  ],
1471
  "dtype": "float16",
1472
  "qweight": "encoder.layer.9.intermediate.dense.weight.qweight",
1473
- "scale": "encoder.layer.9.intermediate.dense.weight.scale"
 
1474
  },
1475
  {
1476
  "name": "encoder.layer.9.output.dense.weight",
@@ -1480,7 +1630,8 @@
1480
  ],
1481
  "dtype": "float16",
1482
  "qweight": "encoder.layer.9.output.dense.weight.qweight",
1483
- "scale": "encoder.layer.9.output.dense.weight.scale"
 
1484
  },
1485
  {
1486
  "name": "pooler.dense.weight",
@@ -1490,7 +1641,8 @@
1490
  ],
1491
  "dtype": "float16",
1492
  "qweight": "pooler.dense.weight.qweight",
1493
- "scale": "pooler.dense.weight.scale"
 
1494
  }
1495
  ],
1496
  "kept": [
 
1
  {
2
+ "schemaVersion": 2,
3
+ "format": "revis-xlm-roberta-e5-mlx-native-q8",
4
  "source": "intfloat/multilingual-e5-large",
5
  "baseWeights": "weights.00.safetensors",
6
  "quantizedWeights": "weights.00.safetensors",
7
  "quantization": {
8
+ "type": "mlx-native-affine",
9
  "bits": 8,
10
+ "group_size": 64,
11
+ "mode": "affine",
12
  "quantizedTensorSuffix": ".qweight",
13
+ "scaleTensorSuffix": ".scales",
14
+ "biasTensorSuffix": ".biases",
15
+ "linear": "mx.quantized_matmul(x, qweight, scales=scales, biases=biases, transpose=True, group_size=group_size, bits=bits, mode=mode)",
16
+ "embedding": "mx.dequantize(qweight[ids], scales=scales[ids], biases=biases[ids], group_size=group_size, bits=bits, mode=mode)"
17
  },
18
  "quantized": [
19
  {
 
24
  ],
25
  "dtype": "float16",
26
  "qweight": "embeddings.position_embeddings.weight.qweight",
27
+ "scales": "embeddings.position_embeddings.weight.scales",
28
+ "biases": "embeddings.position_embeddings.weight.biases"
29
  },
30
  {
31
  "name": "embeddings.token_type_embeddings.weight",
 
35
  ],
36
  "dtype": "float16",
37
  "qweight": "embeddings.token_type_embeddings.weight.qweight",
38
+ "scales": "embeddings.token_type_embeddings.weight.scales",
39
+ "biases": "embeddings.token_type_embeddings.weight.biases"
40
  },
41
  {
42
  "name": "embeddings.word_embeddings.weight",
 
46
  ],
47
  "dtype": "float16",
48
  "qweight": "embeddings.word_embeddings.weight.qweight",
49
+ "scales": "embeddings.word_embeddings.weight.scales",
50
+ "biases": "embeddings.word_embeddings.weight.biases"
51
  },
52
  {
53
  "name": "encoder.layer.0.attention.output.dense.weight",
 
57
  ],
58
  "dtype": "float16",
59
  "qweight": "encoder.layer.0.attention.output.dense.weight.qweight",
60
+ "scales": "encoder.layer.0.attention.output.dense.weight.scales",
61
+ "biases": "encoder.layer.0.attention.output.dense.weight.biases"
62
  },
63
  {
64
  "name": "encoder.layer.0.attention.self.key.weight",
 
68
  ],
69
  "dtype": "float16",
70
  "qweight": "encoder.layer.0.attention.self.key.weight.qweight",
71
+ "scales": "encoder.layer.0.attention.self.key.weight.scales",
72
+ "biases": "encoder.layer.0.attention.self.key.weight.biases"
73
  },
74
  {
75
  "name": "encoder.layer.0.attention.self.query.weight",
 
79
  ],
80
  "dtype": "float16",
81
  "qweight": "encoder.layer.0.attention.self.query.weight.qweight",
82
+ "scales": "encoder.layer.0.attention.self.query.weight.scales",
83
+ "biases": "encoder.layer.0.attention.self.query.weight.biases"
84
  },
85
  {
86
  "name": "encoder.layer.0.attention.self.value.weight",
 
90
  ],
91
  "dtype": "float16",
92
  "qweight": "encoder.layer.0.attention.self.value.weight.qweight",
93
+ "scales": "encoder.layer.0.attention.self.value.weight.scales",
94
+ "biases": "encoder.layer.0.attention.self.value.weight.biases"
95
  },
96
  {
97
  "name": "encoder.layer.0.intermediate.dense.weight",
 
101
  ],
102
  "dtype": "float16",
103
  "qweight": "encoder.layer.0.intermediate.dense.weight.qweight",
104
+ "scales": "encoder.layer.0.intermediate.dense.weight.scales",
105
+ "biases": "encoder.layer.0.intermediate.dense.weight.biases"
106
  },
107
  {
108
  "name": "encoder.layer.0.output.dense.weight",
 
112
  ],
113
  "dtype": "float16",
114
  "qweight": "encoder.layer.0.output.dense.weight.qweight",
115
+ "scales": "encoder.layer.0.output.dense.weight.scales",
116
+ "biases": "encoder.layer.0.output.dense.weight.biases"
117
  },
118
  {
119
  "name": "encoder.layer.1.attention.output.dense.weight",
 
123
  ],
124
  "dtype": "float16",
125
  "qweight": "encoder.layer.1.attention.output.dense.weight.qweight",
126
+ "scales": "encoder.layer.1.attention.output.dense.weight.scales",
127
+ "biases": "encoder.layer.1.attention.output.dense.weight.biases"
128
  },
129
  {
130
  "name": "encoder.layer.1.attention.self.key.weight",
 
134
  ],
135
  "dtype": "float16",
136
  "qweight": "encoder.layer.1.attention.self.key.weight.qweight",
137
+ "scales": "encoder.layer.1.attention.self.key.weight.scales",
138
+ "biases": "encoder.layer.1.attention.self.key.weight.biases"
139
  },
140
  {
141
  "name": "encoder.layer.1.attention.self.query.weight",
 
145
  ],
146
  "dtype": "float16",
147
  "qweight": "encoder.layer.1.attention.self.query.weight.qweight",
148
+ "scales": "encoder.layer.1.attention.self.query.weight.scales",
149
+ "biases": "encoder.layer.1.attention.self.query.weight.biases"
150
  },
151
  {
152
  "name": "encoder.layer.1.attention.self.value.weight",
 
156
  ],
157
  "dtype": "float16",
158
  "qweight": "encoder.layer.1.attention.self.value.weight.qweight",
159
+ "scales": "encoder.layer.1.attention.self.value.weight.scales",
160
+ "biases": "encoder.layer.1.attention.self.value.weight.biases"
161
  },
162
  {
163
  "name": "encoder.layer.1.intermediate.dense.weight",
 
167
  ],
168
  "dtype": "float16",
169
  "qweight": "encoder.layer.1.intermediate.dense.weight.qweight",
170
+ "scales": "encoder.layer.1.intermediate.dense.weight.scales",
171
+ "biases": "encoder.layer.1.intermediate.dense.weight.biases"
172
  },
173
  {
174
  "name": "encoder.layer.1.output.dense.weight",
 
178
  ],
179
  "dtype": "float16",
180
  "qweight": "encoder.layer.1.output.dense.weight.qweight",
181
+ "scales": "encoder.layer.1.output.dense.weight.scales",
182
+ "biases": "encoder.layer.1.output.dense.weight.biases"
183
  },
184
  {
185
  "name": "encoder.layer.10.attention.output.dense.weight",
 
189
  ],
190
  "dtype": "float16",
191
  "qweight": "encoder.layer.10.attention.output.dense.weight.qweight",
192
+ "scales": "encoder.layer.10.attention.output.dense.weight.scales",
193
+ "biases": "encoder.layer.10.attention.output.dense.weight.biases"
194
  },
195
  {
196
  "name": "encoder.layer.10.attention.self.key.weight",
 
200
  ],
201
  "dtype": "float16",
202
  "qweight": "encoder.layer.10.attention.self.key.weight.qweight",
203
+ "scales": "encoder.layer.10.attention.self.key.weight.scales",
204
+ "biases": "encoder.layer.10.attention.self.key.weight.biases"
205
  },
206
  {
207
  "name": "encoder.layer.10.attention.self.query.weight",
 
211
  ],
212
  "dtype": "float16",
213
  "qweight": "encoder.layer.10.attention.self.query.weight.qweight",
214
+ "scales": "encoder.layer.10.attention.self.query.weight.scales",
215
+ "biases": "encoder.layer.10.attention.self.query.weight.biases"
216
  },
217
  {
218
  "name": "encoder.layer.10.attention.self.value.weight",
 
222
  ],
223
  "dtype": "float16",
224
  "qweight": "encoder.layer.10.attention.self.value.weight.qweight",
225
+ "scales": "encoder.layer.10.attention.self.value.weight.scales",
226
+ "biases": "encoder.layer.10.attention.self.value.weight.biases"
227
  },
228
  {
229
  "name": "encoder.layer.10.intermediate.dense.weight",
 
233
  ],
234
  "dtype": "float16",
235
  "qweight": "encoder.layer.10.intermediate.dense.weight.qweight",
236
+ "scales": "encoder.layer.10.intermediate.dense.weight.scales",
237
+ "biases": "encoder.layer.10.intermediate.dense.weight.biases"
238
  },
239
  {
240
  "name": "encoder.layer.10.output.dense.weight",
 
244
  ],
245
  "dtype": "float16",
246
  "qweight": "encoder.layer.10.output.dense.weight.qweight",
247
+ "scales": "encoder.layer.10.output.dense.weight.scales",
248
+ "biases": "encoder.layer.10.output.dense.weight.biases"
249
  },
250
  {
251
  "name": "encoder.layer.11.attention.output.dense.weight",
 
255
  ],
256
  "dtype": "float16",
257
  "qweight": "encoder.layer.11.attention.output.dense.weight.qweight",
258
+ "scales": "encoder.layer.11.attention.output.dense.weight.scales",
259
+ "biases": "encoder.layer.11.attention.output.dense.weight.biases"
260
  },
261
  {
262
  "name": "encoder.layer.11.attention.self.key.weight",
 
266
  ],
267
  "dtype": "float16",
268
  "qweight": "encoder.layer.11.attention.self.key.weight.qweight",
269
+ "scales": "encoder.layer.11.attention.self.key.weight.scales",
270
+ "biases": "encoder.layer.11.attention.self.key.weight.biases"
271
  },
272
  {
273
  "name": "encoder.layer.11.attention.self.query.weight",
 
277
  ],
278
  "dtype": "float16",
279
  "qweight": "encoder.layer.11.attention.self.query.weight.qweight",
280
+ "scales": "encoder.layer.11.attention.self.query.weight.scales",
281
+ "biases": "encoder.layer.11.attention.self.query.weight.biases"
282
  },
283
  {
284
  "name": "encoder.layer.11.attention.self.value.weight",
 
288
  ],
289
  "dtype": "float16",
290
  "qweight": "encoder.layer.11.attention.self.value.weight.qweight",
291
+ "scales": "encoder.layer.11.attention.self.value.weight.scales",
292
+ "biases": "encoder.layer.11.attention.self.value.weight.biases"
293
  },
294
  {
295
  "name": "encoder.layer.11.intermediate.dense.weight",
 
299
  ],
300
  "dtype": "float16",
301
  "qweight": "encoder.layer.11.intermediate.dense.weight.qweight",
302
+ "scales": "encoder.layer.11.intermediate.dense.weight.scales",
303
+ "biases": "encoder.layer.11.intermediate.dense.weight.biases"
304
  },
305
  {
306
  "name": "encoder.layer.11.output.dense.weight",
 
310
  ],
311
  "dtype": "float16",
312
  "qweight": "encoder.layer.11.output.dense.weight.qweight",
313
+ "scales": "encoder.layer.11.output.dense.weight.scales",
314
+ "biases": "encoder.layer.11.output.dense.weight.biases"
315
  },
316
  {
317
  "name": "encoder.layer.12.attention.output.dense.weight",
 
321
  ],
322
  "dtype": "float16",
323
  "qweight": "encoder.layer.12.attention.output.dense.weight.qweight",
324
+ "scales": "encoder.layer.12.attention.output.dense.weight.scales",
325
+ "biases": "encoder.layer.12.attention.output.dense.weight.biases"
326
  },
327
  {
328
  "name": "encoder.layer.12.attention.self.key.weight",
 
332
  ],
333
  "dtype": "float16",
334
  "qweight": "encoder.layer.12.attention.self.key.weight.qweight",
335
+ "scales": "encoder.layer.12.attention.self.key.weight.scales",
336
+ "biases": "encoder.layer.12.attention.self.key.weight.biases"
337
  },
338
  {
339
  "name": "encoder.layer.12.attention.self.query.weight",
 
343
  ],
344
  "dtype": "float16",
345
  "qweight": "encoder.layer.12.attention.self.query.weight.qweight",
346
+ "scales": "encoder.layer.12.attention.self.query.weight.scales",
347
+ "biases": "encoder.layer.12.attention.self.query.weight.biases"
348
  },
349
  {
350
  "name": "encoder.layer.12.attention.self.value.weight",
 
354
  ],
355
  "dtype": "float16",
356
  "qweight": "encoder.layer.12.attention.self.value.weight.qweight",
357
+ "scales": "encoder.layer.12.attention.self.value.weight.scales",
358
+ "biases": "encoder.layer.12.attention.self.value.weight.biases"
359
  },
360
  {
361
  "name": "encoder.layer.12.intermediate.dense.weight",
 
365
  ],
366
  "dtype": "float16",
367
  "qweight": "encoder.layer.12.intermediate.dense.weight.qweight",
368
+ "scales": "encoder.layer.12.intermediate.dense.weight.scales",
369
+ "biases": "encoder.layer.12.intermediate.dense.weight.biases"
370
  },
371
  {
372
  "name": "encoder.layer.12.output.dense.weight",
 
376
  ],
377
  "dtype": "float16",
378
  "qweight": "encoder.layer.12.output.dense.weight.qweight",
379
+ "scales": "encoder.layer.12.output.dense.weight.scales",
380
+ "biases": "encoder.layer.12.output.dense.weight.biases"
381
  },
382
  {
383
  "name": "encoder.layer.13.attention.output.dense.weight",
 
387
  ],
388
  "dtype": "float16",
389
  "qweight": "encoder.layer.13.attention.output.dense.weight.qweight",
390
+ "scales": "encoder.layer.13.attention.output.dense.weight.scales",
391
+ "biases": "encoder.layer.13.attention.output.dense.weight.biases"
392
  },
393
  {
394
  "name": "encoder.layer.13.attention.self.key.weight",
 
398
  ],
399
  "dtype": "float16",
400
  "qweight": "encoder.layer.13.attention.self.key.weight.qweight",
401
+ "scales": "encoder.layer.13.attention.self.key.weight.scales",
402
+ "biases": "encoder.layer.13.attention.self.key.weight.biases"
403
  },
404
  {
405
  "name": "encoder.layer.13.attention.self.query.weight",
 
409
  ],
410
  "dtype": "float16",
411
  "qweight": "encoder.layer.13.attention.self.query.weight.qweight",
412
+ "scales": "encoder.layer.13.attention.self.query.weight.scales",
413
+ "biases": "encoder.layer.13.attention.self.query.weight.biases"
414
  },
415
  {
416
  "name": "encoder.layer.13.attention.self.value.weight",
 
420
  ],
421
  "dtype": "float16",
422
  "qweight": "encoder.layer.13.attention.self.value.weight.qweight",
423
+ "scales": "encoder.layer.13.attention.self.value.weight.scales",
424
+ "biases": "encoder.layer.13.attention.self.value.weight.biases"
425
  },
426
  {
427
  "name": "encoder.layer.13.intermediate.dense.weight",
 
431
  ],
432
  "dtype": "float16",
433
  "qweight": "encoder.layer.13.intermediate.dense.weight.qweight",
434
+ "scales": "encoder.layer.13.intermediate.dense.weight.scales",
435
+ "biases": "encoder.layer.13.intermediate.dense.weight.biases"
436
  },
437
  {
438
  "name": "encoder.layer.13.output.dense.weight",
 
442
  ],
443
  "dtype": "float16",
444
  "qweight": "encoder.layer.13.output.dense.weight.qweight",
445
+ "scales": "encoder.layer.13.output.dense.weight.scales",
446
+ "biases": "encoder.layer.13.output.dense.weight.biases"
447
  },
448
  {
449
  "name": "encoder.layer.14.attention.output.dense.weight",
 
453
  ],
454
  "dtype": "float16",
455
  "qweight": "encoder.layer.14.attention.output.dense.weight.qweight",
456
+ "scales": "encoder.layer.14.attention.output.dense.weight.scales",
457
+ "biases": "encoder.layer.14.attention.output.dense.weight.biases"
458
  },
459
  {
460
  "name": "encoder.layer.14.attention.self.key.weight",
 
464
  ],
465
  "dtype": "float16",
466
  "qweight": "encoder.layer.14.attention.self.key.weight.qweight",
467
+ "scales": "encoder.layer.14.attention.self.key.weight.scales",
468
+ "biases": "encoder.layer.14.attention.self.key.weight.biases"
469
  },
470
  {
471
  "name": "encoder.layer.14.attention.self.query.weight",
 
475
  ],
476
  "dtype": "float16",
477
  "qweight": "encoder.layer.14.attention.self.query.weight.qweight",
478
+ "scales": "encoder.layer.14.attention.self.query.weight.scales",
479
+ "biases": "encoder.layer.14.attention.self.query.weight.biases"
480
  },
481
  {
482
  "name": "encoder.layer.14.attention.self.value.weight",
 
486
  ],
487
  "dtype": "float16",
488
  "qweight": "encoder.layer.14.attention.self.value.weight.qweight",
489
+ "scales": "encoder.layer.14.attention.self.value.weight.scales",
490
+ "biases": "encoder.layer.14.attention.self.value.weight.biases"
491
  },
492
  {
493
  "name": "encoder.layer.14.intermediate.dense.weight",
 
497
  ],
498
  "dtype": "float16",
499
  "qweight": "encoder.layer.14.intermediate.dense.weight.qweight",
500
+ "scales": "encoder.layer.14.intermediate.dense.weight.scales",
501
+ "biases": "encoder.layer.14.intermediate.dense.weight.biases"
502
  },
503
  {
504
  "name": "encoder.layer.14.output.dense.weight",
 
508
  ],
509
  "dtype": "float16",
510
  "qweight": "encoder.layer.14.output.dense.weight.qweight",
511
+ "scales": "encoder.layer.14.output.dense.weight.scales",
512
+ "biases": "encoder.layer.14.output.dense.weight.biases"
513
  },
514
  {
515
  "name": "encoder.layer.15.attention.output.dense.weight",
 
519
  ],
520
  "dtype": "float16",
521
  "qweight": "encoder.layer.15.attention.output.dense.weight.qweight",
522
+ "scales": "encoder.layer.15.attention.output.dense.weight.scales",
523
+ "biases": "encoder.layer.15.attention.output.dense.weight.biases"
524
  },
525
  {
526
  "name": "encoder.layer.15.attention.self.key.weight",
 
530
  ],
531
  "dtype": "float16",
532
  "qweight": "encoder.layer.15.attention.self.key.weight.qweight",
533
+ "scales": "encoder.layer.15.attention.self.key.weight.scales",
534
+ "biases": "encoder.layer.15.attention.self.key.weight.biases"
535
  },
536
  {
537
  "name": "encoder.layer.15.attention.self.query.weight",
 
541
  ],
542
  "dtype": "float16",
543
  "qweight": "encoder.layer.15.attention.self.query.weight.qweight",
544
+ "scales": "encoder.layer.15.attention.self.query.weight.scales",
545
+ "biases": "encoder.layer.15.attention.self.query.weight.biases"
546
  },
547
  {
548
  "name": "encoder.layer.15.attention.self.value.weight",
 
552
  ],
553
  "dtype": "float16",
554
  "qweight": "encoder.layer.15.attention.self.value.weight.qweight",
555
+ "scales": "encoder.layer.15.attention.self.value.weight.scales",
556
+ "biases": "encoder.layer.15.attention.self.value.weight.biases"
557
  },
558
  {
559
  "name": "encoder.layer.15.intermediate.dense.weight",
 
563
  ],
564
  "dtype": "float16",
565
  "qweight": "encoder.layer.15.intermediate.dense.weight.qweight",
566
+ "scales": "encoder.layer.15.intermediate.dense.weight.scales",
567
+ "biases": "encoder.layer.15.intermediate.dense.weight.biases"
568
  },
569
  {
570
  "name": "encoder.layer.15.output.dense.weight",
 
574
  ],
575
  "dtype": "float16",
576
  "qweight": "encoder.layer.15.output.dense.weight.qweight",
577
+ "scales": "encoder.layer.15.output.dense.weight.scales",
578
+ "biases": "encoder.layer.15.output.dense.weight.biases"
579
  },
580
  {
581
  "name": "encoder.layer.16.attention.output.dense.weight",
 
585
  ],
586
  "dtype": "float16",
587
  "qweight": "encoder.layer.16.attention.output.dense.weight.qweight",
588
+ "scales": "encoder.layer.16.attention.output.dense.weight.scales",
589
+ "biases": "encoder.layer.16.attention.output.dense.weight.biases"
590
  },
591
  {
592
  "name": "encoder.layer.16.attention.self.key.weight",
 
596
  ],
597
  "dtype": "float16",
598
  "qweight": "encoder.layer.16.attention.self.key.weight.qweight",
599
+ "scales": "encoder.layer.16.attention.self.key.weight.scales",
600
+ "biases": "encoder.layer.16.attention.self.key.weight.biases"
601
  },
602
  {
603
  "name": "encoder.layer.16.attention.self.query.weight",
 
607
  ],
608
  "dtype": "float16",
609
  "qweight": "encoder.layer.16.attention.self.query.weight.qweight",
610
+ "scales": "encoder.layer.16.attention.self.query.weight.scales",
611
+ "biases": "encoder.layer.16.attention.self.query.weight.biases"
612
  },
613
  {
614
  "name": "encoder.layer.16.attention.self.value.weight",
 
618
  ],
619
  "dtype": "float16",
620
  "qweight": "encoder.layer.16.attention.self.value.weight.qweight",
621
+ "scales": "encoder.layer.16.attention.self.value.weight.scales",
622
+ "biases": "encoder.layer.16.attention.self.value.weight.biases"
623
  },
624
  {
625
  "name": "encoder.layer.16.intermediate.dense.weight",
 
629
  ],
630
  "dtype": "float16",
631
  "qweight": "encoder.layer.16.intermediate.dense.weight.qweight",
632
+ "scales": "encoder.layer.16.intermediate.dense.weight.scales",
633
+ "biases": "encoder.layer.16.intermediate.dense.weight.biases"
634
  },
635
  {
636
  "name": "encoder.layer.16.output.dense.weight",
 
640
  ],
641
  "dtype": "float16",
642
  "qweight": "encoder.layer.16.output.dense.weight.qweight",
643
+ "scales": "encoder.layer.16.output.dense.weight.scales",
644
+ "biases": "encoder.layer.16.output.dense.weight.biases"
645
  },
646
  {
647
  "name": "encoder.layer.17.attention.output.dense.weight",
 
651
  ],
652
  "dtype": "float16",
653
  "qweight": "encoder.layer.17.attention.output.dense.weight.qweight",
654
+ "scales": "encoder.layer.17.attention.output.dense.weight.scales",
655
+ "biases": "encoder.layer.17.attention.output.dense.weight.biases"
656
  },
657
  {
658
  "name": "encoder.layer.17.attention.self.key.weight",
 
662
  ],
663
  "dtype": "float16",
664
  "qweight": "encoder.layer.17.attention.self.key.weight.qweight",
665
+ "scales": "encoder.layer.17.attention.self.key.weight.scales",
666
+ "biases": "encoder.layer.17.attention.self.key.weight.biases"
667
  },
668
  {
669
  "name": "encoder.layer.17.attention.self.query.weight",
 
673
  ],
674
  "dtype": "float16",
675
  "qweight": "encoder.layer.17.attention.self.query.weight.qweight",
676
+ "scales": "encoder.layer.17.attention.self.query.weight.scales",
677
+ "biases": "encoder.layer.17.attention.self.query.weight.biases"
678
  },
679
  {
680
  "name": "encoder.layer.17.attention.self.value.weight",
 
684
  ],
685
  "dtype": "float16",
686
  "qweight": "encoder.layer.17.attention.self.value.weight.qweight",
687
+ "scales": "encoder.layer.17.attention.self.value.weight.scales",
688
+ "biases": "encoder.layer.17.attention.self.value.weight.biases"
689
  },
690
  {
691
  "name": "encoder.layer.17.intermediate.dense.weight",
 
695
  ],
696
  "dtype": "float16",
697
  "qweight": "encoder.layer.17.intermediate.dense.weight.qweight",
698
+ "scales": "encoder.layer.17.intermediate.dense.weight.scales",
699
+ "biases": "encoder.layer.17.intermediate.dense.weight.biases"
700
  },
701
  {
702
  "name": "encoder.layer.17.output.dense.weight",
 
706
  ],
707
  "dtype": "float16",
708
  "qweight": "encoder.layer.17.output.dense.weight.qweight",
709
+ "scales": "encoder.layer.17.output.dense.weight.scales",
710
+ "biases": "encoder.layer.17.output.dense.weight.biases"
711
  },
712
  {
713
  "name": "encoder.layer.18.attention.output.dense.weight",
 
717
  ],
718
  "dtype": "float16",
719
  "qweight": "encoder.layer.18.attention.output.dense.weight.qweight",
720
+ "scales": "encoder.layer.18.attention.output.dense.weight.scales",
721
+ "biases": "encoder.layer.18.attention.output.dense.weight.biases"
722
  },
723
  {
724
  "name": "encoder.layer.18.attention.self.key.weight",
 
728
  ],
729
  "dtype": "float16",
730
  "qweight": "encoder.layer.18.attention.self.key.weight.qweight",
731
+ "scales": "encoder.layer.18.attention.self.key.weight.scales",
732
+ "biases": "encoder.layer.18.attention.self.key.weight.biases"
733
  },
734
  {
735
  "name": "encoder.layer.18.attention.self.query.weight",
 
739
  ],
740
  "dtype": "float16",
741
  "qweight": "encoder.layer.18.attention.self.query.weight.qweight",
742
+ "scales": "encoder.layer.18.attention.self.query.weight.scales",
743
+ "biases": "encoder.layer.18.attention.self.query.weight.biases"
744
  },
745
  {
746
  "name": "encoder.layer.18.attention.self.value.weight",
 
750
  ],
751
  "dtype": "float16",
752
  "qweight": "encoder.layer.18.attention.self.value.weight.qweight",
753
+ "scales": "encoder.layer.18.attention.self.value.weight.scales",
754
+ "biases": "encoder.layer.18.attention.self.value.weight.biases"
755
  },
756
  {
757
  "name": "encoder.layer.18.intermediate.dense.weight",
 
761
  ],
762
  "dtype": "float16",
763
  "qweight": "encoder.layer.18.intermediate.dense.weight.qweight",
764
+ "scales": "encoder.layer.18.intermediate.dense.weight.scales",
765
+ "biases": "encoder.layer.18.intermediate.dense.weight.biases"
766
  },
767
  {
768
  "name": "encoder.layer.18.output.dense.weight",
 
772
  ],
773
  "dtype": "float16",
774
  "qweight": "encoder.layer.18.output.dense.weight.qweight",
775
+ "scales": "encoder.layer.18.output.dense.weight.scales",
776
+ "biases": "encoder.layer.18.output.dense.weight.biases"
777
  },
778
  {
779
  "name": "encoder.layer.19.attention.output.dense.weight",
 
783
  ],
784
  "dtype": "float16",
785
  "qweight": "encoder.layer.19.attention.output.dense.weight.qweight",
786
+ "scales": "encoder.layer.19.attention.output.dense.weight.scales",
787
+ "biases": "encoder.layer.19.attention.output.dense.weight.biases"
788
  },
789
  {
790
  "name": "encoder.layer.19.attention.self.key.weight",
 
794
  ],
795
  "dtype": "float16",
796
  "qweight": "encoder.layer.19.attention.self.key.weight.qweight",
797
+ "scales": "encoder.layer.19.attention.self.key.weight.scales",
798
+ "biases": "encoder.layer.19.attention.self.key.weight.biases"
799
  },
800
  {
801
  "name": "encoder.layer.19.attention.self.query.weight",
 
805
  ],
806
  "dtype": "float16",
807
  "qweight": "encoder.layer.19.attention.self.query.weight.qweight",
808
+ "scales": "encoder.layer.19.attention.self.query.weight.scales",
809
+ "biases": "encoder.layer.19.attention.self.query.weight.biases"
810
  },
811
  {
812
  "name": "encoder.layer.19.attention.self.value.weight",
 
816
  ],
817
  "dtype": "float16",
818
  "qweight": "encoder.layer.19.attention.self.value.weight.qweight",
819
+ "scales": "encoder.layer.19.attention.self.value.weight.scales",
820
+ "biases": "encoder.layer.19.attention.self.value.weight.biases"
821
  },
822
  {
823
  "name": "encoder.layer.19.intermediate.dense.weight",
 
827
  ],
828
  "dtype": "float16",
829
  "qweight": "encoder.layer.19.intermediate.dense.weight.qweight",
830
+ "scales": "encoder.layer.19.intermediate.dense.weight.scales",
831
+ "biases": "encoder.layer.19.intermediate.dense.weight.biases"
832
  },
833
  {
834
  "name": "encoder.layer.19.output.dense.weight",
 
838
  ],
839
  "dtype": "float16",
840
  "qweight": "encoder.layer.19.output.dense.weight.qweight",
841
+ "scales": "encoder.layer.19.output.dense.weight.scales",
842
+ "biases": "encoder.layer.19.output.dense.weight.biases"
843
  },
844
  {
845
  "name": "encoder.layer.2.attention.output.dense.weight",
 
849
  ],
850
  "dtype": "float16",
851
  "qweight": "encoder.layer.2.attention.output.dense.weight.qweight",
852
+ "scales": "encoder.layer.2.attention.output.dense.weight.scales",
853
+ "biases": "encoder.layer.2.attention.output.dense.weight.biases"
854
  },
855
  {
856
  "name": "encoder.layer.2.attention.self.key.weight",
 
860
  ],
861
  "dtype": "float16",
862
  "qweight": "encoder.layer.2.attention.self.key.weight.qweight",
863
+ "scales": "encoder.layer.2.attention.self.key.weight.scales",
864
+ "biases": "encoder.layer.2.attention.self.key.weight.biases"
865
  },
866
  {
867
  "name": "encoder.layer.2.attention.self.query.weight",
 
871
  ],
872
  "dtype": "float16",
873
  "qweight": "encoder.layer.2.attention.self.query.weight.qweight",
874
+ "scales": "encoder.layer.2.attention.self.query.weight.scales",
875
+ "biases": "encoder.layer.2.attention.self.query.weight.biases"
876
  },
877
  {
878
  "name": "encoder.layer.2.attention.self.value.weight",
 
882
  ],
883
  "dtype": "float16",
884
  "qweight": "encoder.layer.2.attention.self.value.weight.qweight",
885
+ "scales": "encoder.layer.2.attention.self.value.weight.scales",
886
+ "biases": "encoder.layer.2.attention.self.value.weight.biases"
887
  },
888
  {
889
  "name": "encoder.layer.2.intermediate.dense.weight",
 
893
  ],
894
  "dtype": "float16",
895
  "qweight": "encoder.layer.2.intermediate.dense.weight.qweight",
896
+ "scales": "encoder.layer.2.intermediate.dense.weight.scales",
897
+ "biases": "encoder.layer.2.intermediate.dense.weight.biases"
898
  },
899
  {
900
  "name": "encoder.layer.2.output.dense.weight",
 
904
  ],
905
  "dtype": "float16",
906
  "qweight": "encoder.layer.2.output.dense.weight.qweight",
907
+ "scales": "encoder.layer.2.output.dense.weight.scales",
908
+ "biases": "encoder.layer.2.output.dense.weight.biases"
909
  },
910
  {
911
  "name": "encoder.layer.20.attention.output.dense.weight",
 
915
  ],
916
  "dtype": "float16",
917
  "qweight": "encoder.layer.20.attention.output.dense.weight.qweight",
918
+ "scales": "encoder.layer.20.attention.output.dense.weight.scales",
919
+ "biases": "encoder.layer.20.attention.output.dense.weight.biases"
920
  },
921
  {
922
  "name": "encoder.layer.20.attention.self.key.weight",
 
926
  ],
927
  "dtype": "float16",
928
  "qweight": "encoder.layer.20.attention.self.key.weight.qweight",
929
+ "scales": "encoder.layer.20.attention.self.key.weight.scales",
930
+ "biases": "encoder.layer.20.attention.self.key.weight.biases"
931
  },
932
  {
933
  "name": "encoder.layer.20.attention.self.query.weight",
 
937
  ],
938
  "dtype": "float16",
939
  "qweight": "encoder.layer.20.attention.self.query.weight.qweight",
940
+ "scales": "encoder.layer.20.attention.self.query.weight.scales",
941
+ "biases": "encoder.layer.20.attention.self.query.weight.biases"
942
  },
943
  {
944
  "name": "encoder.layer.20.attention.self.value.weight",
 
948
  ],
949
  "dtype": "float16",
950
  "qweight": "encoder.layer.20.attention.self.value.weight.qweight",
951
+ "scales": "encoder.layer.20.attention.self.value.weight.scales",
952
+ "biases": "encoder.layer.20.attention.self.value.weight.biases"
953
  },
954
  {
955
  "name": "encoder.layer.20.intermediate.dense.weight",
 
959
  ],
960
  "dtype": "float16",
961
  "qweight": "encoder.layer.20.intermediate.dense.weight.qweight",
962
+ "scales": "encoder.layer.20.intermediate.dense.weight.scales",
963
+ "biases": "encoder.layer.20.intermediate.dense.weight.biases"
964
  },
965
  {
966
  "name": "encoder.layer.20.output.dense.weight",
 
970
  ],
971
  "dtype": "float16",
972
  "qweight": "encoder.layer.20.output.dense.weight.qweight",
973
+ "scales": "encoder.layer.20.output.dense.weight.scales",
974
+ "biases": "encoder.layer.20.output.dense.weight.biases"
975
  },
976
  {
977
  "name": "encoder.layer.21.attention.output.dense.weight",
 
981
  ],
982
  "dtype": "float16",
983
  "qweight": "encoder.layer.21.attention.output.dense.weight.qweight",
984
+ "scales": "encoder.layer.21.attention.output.dense.weight.scales",
985
+ "biases": "encoder.layer.21.attention.output.dense.weight.biases"
986
  },
987
  {
988
  "name": "encoder.layer.21.attention.self.key.weight",
 
992
  ],
993
  "dtype": "float16",
994
  "qweight": "encoder.layer.21.attention.self.key.weight.qweight",
995
+ "scales": "encoder.layer.21.attention.self.key.weight.scales",
996
+ "biases": "encoder.layer.21.attention.self.key.weight.biases"
997
  },
998
  {
999
  "name": "encoder.layer.21.attention.self.query.weight",
 
1003
  ],
1004
  "dtype": "float16",
1005
  "qweight": "encoder.layer.21.attention.self.query.weight.qweight",
1006
+ "scales": "encoder.layer.21.attention.self.query.weight.scales",
1007
+ "biases": "encoder.layer.21.attention.self.query.weight.biases"
1008
  },
1009
  {
1010
  "name": "encoder.layer.21.attention.self.value.weight",
 
1014
  ],
1015
  "dtype": "float16",
1016
  "qweight": "encoder.layer.21.attention.self.value.weight.qweight",
1017
+ "scales": "encoder.layer.21.attention.self.value.weight.scales",
1018
+ "biases": "encoder.layer.21.attention.self.value.weight.biases"
1019
  },
1020
  {
1021
  "name": "encoder.layer.21.intermediate.dense.weight",
 
1025
  ],
1026
  "dtype": "float16",
1027
  "qweight": "encoder.layer.21.intermediate.dense.weight.qweight",
1028
+ "scales": "encoder.layer.21.intermediate.dense.weight.scales",
1029
+ "biases": "encoder.layer.21.intermediate.dense.weight.biases"
1030
  },
1031
  {
1032
  "name": "encoder.layer.21.output.dense.weight",
 
1036
  ],
1037
  "dtype": "float16",
1038
  "qweight": "encoder.layer.21.output.dense.weight.qweight",
1039
+ "scales": "encoder.layer.21.output.dense.weight.scales",
1040
+ "biases": "encoder.layer.21.output.dense.weight.biases"
1041
  },
1042
  {
1043
  "name": "encoder.layer.22.attention.output.dense.weight",
 
1047
  ],
1048
  "dtype": "float16",
1049
  "qweight": "encoder.layer.22.attention.output.dense.weight.qweight",
1050
+ "scales": "encoder.layer.22.attention.output.dense.weight.scales",
1051
+ "biases": "encoder.layer.22.attention.output.dense.weight.biases"
1052
  },
1053
  {
1054
  "name": "encoder.layer.22.attention.self.key.weight",
 
1058
  ],
1059
  "dtype": "float16",
1060
  "qweight": "encoder.layer.22.attention.self.key.weight.qweight",
1061
+ "scales": "encoder.layer.22.attention.self.key.weight.scales",
1062
+ "biases": "encoder.layer.22.attention.self.key.weight.biases"
1063
  },
1064
  {
1065
  "name": "encoder.layer.22.attention.self.query.weight",
 
1069
  ],
1070
  "dtype": "float16",
1071
  "qweight": "encoder.layer.22.attention.self.query.weight.qweight",
1072
+ "scales": "encoder.layer.22.attention.self.query.weight.scales",
1073
+ "biases": "encoder.layer.22.attention.self.query.weight.biases"
1074
  },
1075
  {
1076
  "name": "encoder.layer.22.attention.self.value.weight",
 
1080
  ],
1081
  "dtype": "float16",
1082
  "qweight": "encoder.layer.22.attention.self.value.weight.qweight",
1083
+ "scales": "encoder.layer.22.attention.self.value.weight.scales",
1084
+ "biases": "encoder.layer.22.attention.self.value.weight.biases"
1085
  },
1086
  {
1087
  "name": "encoder.layer.22.intermediate.dense.weight",
 
1091
  ],
1092
  "dtype": "float16",
1093
  "qweight": "encoder.layer.22.intermediate.dense.weight.qweight",
1094
+ "scales": "encoder.layer.22.intermediate.dense.weight.scales",
1095
+ "biases": "encoder.layer.22.intermediate.dense.weight.biases"
1096
  },
1097
  {
1098
  "name": "encoder.layer.22.output.dense.weight",
 
1102
  ],
1103
  "dtype": "float16",
1104
  "qweight": "encoder.layer.22.output.dense.weight.qweight",
1105
+ "scales": "encoder.layer.22.output.dense.weight.scales",
1106
+ "biases": "encoder.layer.22.output.dense.weight.biases"
1107
  },
1108
  {
1109
  "name": "encoder.layer.23.attention.output.dense.weight",
 
1113
  ],
1114
  "dtype": "float16",
1115
  "qweight": "encoder.layer.23.attention.output.dense.weight.qweight",
1116
+ "scales": "encoder.layer.23.attention.output.dense.weight.scales",
1117
+ "biases": "encoder.layer.23.attention.output.dense.weight.biases"
1118
  },
1119
  {
1120
  "name": "encoder.layer.23.attention.self.key.weight",
 
1124
  ],
1125
  "dtype": "float16",
1126
  "qweight": "encoder.layer.23.attention.self.key.weight.qweight",
1127
+ "scales": "encoder.layer.23.attention.self.key.weight.scales",
1128
+ "biases": "encoder.layer.23.attention.self.key.weight.biases"
1129
  },
1130
  {
1131
  "name": "encoder.layer.23.attention.self.query.weight",
 
1135
  ],
1136
  "dtype": "float16",
1137
  "qweight": "encoder.layer.23.attention.self.query.weight.qweight",
1138
+ "scales": "encoder.layer.23.attention.self.query.weight.scales",
1139
+ "biases": "encoder.layer.23.attention.self.query.weight.biases"
1140
  },
1141
  {
1142
  "name": "encoder.layer.23.attention.self.value.weight",
 
1146
  ],
1147
  "dtype": "float16",
1148
  "qweight": "encoder.layer.23.attention.self.value.weight.qweight",
1149
+ "scales": "encoder.layer.23.attention.self.value.weight.scales",
1150
+ "biases": "encoder.layer.23.attention.self.value.weight.biases"
1151
  },
1152
  {
1153
  "name": "encoder.layer.23.intermediate.dense.weight",
 
1157
  ],
1158
  "dtype": "float16",
1159
  "qweight": "encoder.layer.23.intermediate.dense.weight.qweight",
1160
+ "scales": "encoder.layer.23.intermediate.dense.weight.scales",
1161
+ "biases": "encoder.layer.23.intermediate.dense.weight.biases"
1162
  },
1163
  {
1164
  "name": "encoder.layer.23.output.dense.weight",
 
1168
  ],
1169
  "dtype": "float16",
1170
  "qweight": "encoder.layer.23.output.dense.weight.qweight",
1171
+ "scales": "encoder.layer.23.output.dense.weight.scales",
1172
+ "biases": "encoder.layer.23.output.dense.weight.biases"
1173
  },
1174
  {
1175
  "name": "encoder.layer.3.attention.output.dense.weight",
 
1179
  ],
1180
  "dtype": "float16",
1181
  "qweight": "encoder.layer.3.attention.output.dense.weight.qweight",
1182
+ "scales": "encoder.layer.3.attention.output.dense.weight.scales",
1183
+ "biases": "encoder.layer.3.attention.output.dense.weight.biases"
1184
  },
1185
  {
1186
  "name": "encoder.layer.3.attention.self.key.weight",
 
1190
  ],
1191
  "dtype": "float16",
1192
  "qweight": "encoder.layer.3.attention.self.key.weight.qweight",
1193
+ "scales": "encoder.layer.3.attention.self.key.weight.scales",
1194
+ "biases": "encoder.layer.3.attention.self.key.weight.biases"
1195
  },
1196
  {
1197
  "name": "encoder.layer.3.attention.self.query.weight",
 
1201
  ],
1202
  "dtype": "float16",
1203
  "qweight": "encoder.layer.3.attention.self.query.weight.qweight",
1204
+ "scales": "encoder.layer.3.attention.self.query.weight.scales",
1205
+ "biases": "encoder.layer.3.attention.self.query.weight.biases"
1206
  },
1207
  {
1208
  "name": "encoder.layer.3.attention.self.value.weight",
 
1212
  ],
1213
  "dtype": "float16",
1214
  "qweight": "encoder.layer.3.attention.self.value.weight.qweight",
1215
+ "scales": "encoder.layer.3.attention.self.value.weight.scales",
1216
+ "biases": "encoder.layer.3.attention.self.value.weight.biases"
1217
  },
1218
  {
1219
  "name": "encoder.layer.3.intermediate.dense.weight",
 
1223
  ],
1224
  "dtype": "float16",
1225
  "qweight": "encoder.layer.3.intermediate.dense.weight.qweight",
1226
+ "scales": "encoder.layer.3.intermediate.dense.weight.scales",
1227
+ "biases": "encoder.layer.3.intermediate.dense.weight.biases"
1228
  },
1229
  {
1230
  "name": "encoder.layer.3.output.dense.weight",
 
1234
  ],
1235
  "dtype": "float16",
1236
  "qweight": "encoder.layer.3.output.dense.weight.qweight",
1237
+ "scales": "encoder.layer.3.output.dense.weight.scales",
1238
+ "biases": "encoder.layer.3.output.dense.weight.biases"
1239
  },
1240
  {
1241
  "name": "encoder.layer.4.attention.output.dense.weight",
 
1245
  ],
1246
  "dtype": "float16",
1247
  "qweight": "encoder.layer.4.attention.output.dense.weight.qweight",
1248
+ "scales": "encoder.layer.4.attention.output.dense.weight.scales",
1249
+ "biases": "encoder.layer.4.attention.output.dense.weight.biases"
1250
  },
1251
  {
1252
  "name": "encoder.layer.4.attention.self.key.weight",
 
1256
  ],
1257
  "dtype": "float16",
1258
  "qweight": "encoder.layer.4.attention.self.key.weight.qweight",
1259
+ "scales": "encoder.layer.4.attention.self.key.weight.scales",
1260
+ "biases": "encoder.layer.4.attention.self.key.weight.biases"
1261
  },
1262
  {
1263
  "name": "encoder.layer.4.attention.self.query.weight",
 
1267
  ],
1268
  "dtype": "float16",
1269
  "qweight": "encoder.layer.4.attention.self.query.weight.qweight",
1270
+ "scales": "encoder.layer.4.attention.self.query.weight.scales",
1271
+ "biases": "encoder.layer.4.attention.self.query.weight.biases"
1272
  },
1273
  {
1274
  "name": "encoder.layer.4.attention.self.value.weight",
 
1278
  ],
1279
  "dtype": "float16",
1280
  "qweight": "encoder.layer.4.attention.self.value.weight.qweight",
1281
+ "scales": "encoder.layer.4.attention.self.value.weight.scales",
1282
+ "biases": "encoder.layer.4.attention.self.value.weight.biases"
1283
  },
1284
  {
1285
  "name": "encoder.layer.4.intermediate.dense.weight",
 
1289
  ],
1290
  "dtype": "float16",
1291
  "qweight": "encoder.layer.4.intermediate.dense.weight.qweight",
1292
+ "scales": "encoder.layer.4.intermediate.dense.weight.scales",
1293
+ "biases": "encoder.layer.4.intermediate.dense.weight.biases"
1294
  },
1295
  {
1296
  "name": "encoder.layer.4.output.dense.weight",
 
1300
  ],
1301
  "dtype": "float16",
1302
  "qweight": "encoder.layer.4.output.dense.weight.qweight",
1303
+ "scales": "encoder.layer.4.output.dense.weight.scales",
1304
+ "biases": "encoder.layer.4.output.dense.weight.biases"
1305
  },
1306
  {
1307
  "name": "encoder.layer.5.attention.output.dense.weight",
 
1311
  ],
1312
  "dtype": "float16",
1313
  "qweight": "encoder.layer.5.attention.output.dense.weight.qweight",
1314
+ "scales": "encoder.layer.5.attention.output.dense.weight.scales",
1315
+ "biases": "encoder.layer.5.attention.output.dense.weight.biases"
1316
  },
1317
  {
1318
  "name": "encoder.layer.5.attention.self.key.weight",
 
1322
  ],
1323
  "dtype": "float16",
1324
  "qweight": "encoder.layer.5.attention.self.key.weight.qweight",
1325
+ "scales": "encoder.layer.5.attention.self.key.weight.scales",
1326
+ "biases": "encoder.layer.5.attention.self.key.weight.biases"
1327
  },
1328
  {
1329
  "name": "encoder.layer.5.attention.self.query.weight",
 
1333
  ],
1334
  "dtype": "float16",
1335
  "qweight": "encoder.layer.5.attention.self.query.weight.qweight",
1336
+ "scales": "encoder.layer.5.attention.self.query.weight.scales",
1337
+ "biases": "encoder.layer.5.attention.self.query.weight.biases"
1338
  },
1339
  {
1340
  "name": "encoder.layer.5.attention.self.value.weight",
 
1344
  ],
1345
  "dtype": "float16",
1346
  "qweight": "encoder.layer.5.attention.self.value.weight.qweight",
1347
+ "scales": "encoder.layer.5.attention.self.value.weight.scales",
1348
+ "biases": "encoder.layer.5.attention.self.value.weight.biases"
1349
  },
1350
  {
1351
  "name": "encoder.layer.5.intermediate.dense.weight",
 
1355
  ],
1356
  "dtype": "float16",
1357
  "qweight": "encoder.layer.5.intermediate.dense.weight.qweight",
1358
+ "scales": "encoder.layer.5.intermediate.dense.weight.scales",
1359
+ "biases": "encoder.layer.5.intermediate.dense.weight.biases"
1360
  },
1361
  {
1362
  "name": "encoder.layer.5.output.dense.weight",
 
1366
  ],
1367
  "dtype": "float16",
1368
  "qweight": "encoder.layer.5.output.dense.weight.qweight",
1369
+ "scales": "encoder.layer.5.output.dense.weight.scales",
1370
+ "biases": "encoder.layer.5.output.dense.weight.biases"
1371
  },
1372
  {
1373
  "name": "encoder.layer.6.attention.output.dense.weight",
 
1377
  ],
1378
  "dtype": "float16",
1379
  "qweight": "encoder.layer.6.attention.output.dense.weight.qweight",
1380
+ "scales": "encoder.layer.6.attention.output.dense.weight.scales",
1381
+ "biases": "encoder.layer.6.attention.output.dense.weight.biases"
1382
  },
1383
  {
1384
  "name": "encoder.layer.6.attention.self.key.weight",
 
1388
  ],
1389
  "dtype": "float16",
1390
  "qweight": "encoder.layer.6.attention.self.key.weight.qweight",
1391
+ "scales": "encoder.layer.6.attention.self.key.weight.scales",
1392
+ "biases": "encoder.layer.6.attention.self.key.weight.biases"
1393
  },
1394
  {
1395
  "name": "encoder.layer.6.attention.self.query.weight",
 
1399
  ],
1400
  "dtype": "float16",
1401
  "qweight": "encoder.layer.6.attention.self.query.weight.qweight",
1402
+ "scales": "encoder.layer.6.attention.self.query.weight.scales",
1403
+ "biases": "encoder.layer.6.attention.self.query.weight.biases"
1404
  },
1405
  {
1406
  "name": "encoder.layer.6.attention.self.value.weight",
 
1410
  ],
1411
  "dtype": "float16",
1412
  "qweight": "encoder.layer.6.attention.self.value.weight.qweight",
1413
+ "scales": "encoder.layer.6.attention.self.value.weight.scales",
1414
+ "biases": "encoder.layer.6.attention.self.value.weight.biases"
1415
  },
1416
  {
1417
  "name": "encoder.layer.6.intermediate.dense.weight",
 
1421
  ],
1422
  "dtype": "float16",
1423
  "qweight": "encoder.layer.6.intermediate.dense.weight.qweight",
1424
+ "scales": "encoder.layer.6.intermediate.dense.weight.scales",
1425
+ "biases": "encoder.layer.6.intermediate.dense.weight.biases"
1426
  },
1427
  {
1428
  "name": "encoder.layer.6.output.dense.weight",
 
1432
  ],
1433
  "dtype": "float16",
1434
  "qweight": "encoder.layer.6.output.dense.weight.qweight",
1435
+ "scales": "encoder.layer.6.output.dense.weight.scales",
1436
+ "biases": "encoder.layer.6.output.dense.weight.biases"
1437
  },
1438
  {
1439
  "name": "encoder.layer.7.attention.output.dense.weight",
 
1443
  ],
1444
  "dtype": "float16",
1445
  "qweight": "encoder.layer.7.attention.output.dense.weight.qweight",
1446
+ "scales": "encoder.layer.7.attention.output.dense.weight.scales",
1447
+ "biases": "encoder.layer.7.attention.output.dense.weight.biases"
1448
  },
1449
  {
1450
  "name": "encoder.layer.7.attention.self.key.weight",
 
1454
  ],
1455
  "dtype": "float16",
1456
  "qweight": "encoder.layer.7.attention.self.key.weight.qweight",
1457
+ "scales": "encoder.layer.7.attention.self.key.weight.scales",
1458
+ "biases": "encoder.layer.7.attention.self.key.weight.biases"
1459
  },
1460
  {
1461
  "name": "encoder.layer.7.attention.self.query.weight",
 
1465
  ],
1466
  "dtype": "float16",
1467
  "qweight": "encoder.layer.7.attention.self.query.weight.qweight",
1468
+ "scales": "encoder.layer.7.attention.self.query.weight.scales",
1469
+ "biases": "encoder.layer.7.attention.self.query.weight.biases"
1470
  },
1471
  {
1472
  "name": "encoder.layer.7.attention.self.value.weight",
 
1476
  ],
1477
  "dtype": "float16",
1478
  "qweight": "encoder.layer.7.attention.self.value.weight.qweight",
1479
+ "scales": "encoder.layer.7.attention.self.value.weight.scales",
1480
+ "biases": "encoder.layer.7.attention.self.value.weight.biases"
1481
  },
1482
  {
1483
  "name": "encoder.layer.7.intermediate.dense.weight",
 
1487
  ],
1488
  "dtype": "float16",
1489
  "qweight": "encoder.layer.7.intermediate.dense.weight.qweight",
1490
+ "scales": "encoder.layer.7.intermediate.dense.weight.scales",
1491
+ "biases": "encoder.layer.7.intermediate.dense.weight.biases"
1492
  },
1493
  {
1494
  "name": "encoder.layer.7.output.dense.weight",
 
1498
  ],
1499
  "dtype": "float16",
1500
  "qweight": "encoder.layer.7.output.dense.weight.qweight",
1501
+ "scales": "encoder.layer.7.output.dense.weight.scales",
1502
+ "biases": "encoder.layer.7.output.dense.weight.biases"
1503
  },
1504
  {
1505
  "name": "encoder.layer.8.attention.output.dense.weight",
 
1509
  ],
1510
  "dtype": "float16",
1511
  "qweight": "encoder.layer.8.attention.output.dense.weight.qweight",
1512
+ "scales": "encoder.layer.8.attention.output.dense.weight.scales",
1513
+ "biases": "encoder.layer.8.attention.output.dense.weight.biases"
1514
  },
1515
  {
1516
  "name": "encoder.layer.8.attention.self.key.weight",
 
1520
  ],
1521
  "dtype": "float16",
1522
  "qweight": "encoder.layer.8.attention.self.key.weight.qweight",
1523
+ "scales": "encoder.layer.8.attention.self.key.weight.scales",
1524
+ "biases": "encoder.layer.8.attention.self.key.weight.biases"
1525
  },
1526
  {
1527
  "name": "encoder.layer.8.attention.self.query.weight",
 
1531
  ],
1532
  "dtype": "float16",
1533
  "qweight": "encoder.layer.8.attention.self.query.weight.qweight",
1534
+ "scales": "encoder.layer.8.attention.self.query.weight.scales",
1535
+ "biases": "encoder.layer.8.attention.self.query.weight.biases"
1536
  },
1537
  {
1538
  "name": "encoder.layer.8.attention.self.value.weight",
 
1542
  ],
1543
  "dtype": "float16",
1544
  "qweight": "encoder.layer.8.attention.self.value.weight.qweight",
1545
+ "scales": "encoder.layer.8.attention.self.value.weight.scales",
1546
+ "biases": "encoder.layer.8.attention.self.value.weight.biases"
1547
  },
1548
  {
1549
  "name": "encoder.layer.8.intermediate.dense.weight",
 
1553
  ],
1554
  "dtype": "float16",
1555
  "qweight": "encoder.layer.8.intermediate.dense.weight.qweight",
1556
+ "scales": "encoder.layer.8.intermediate.dense.weight.scales",
1557
+ "biases": "encoder.layer.8.intermediate.dense.weight.biases"
1558
  },
1559
  {
1560
  "name": "encoder.layer.8.output.dense.weight",
 
1564
  ],
1565
  "dtype": "float16",
1566
  "qweight": "encoder.layer.8.output.dense.weight.qweight",
1567
+ "scales": "encoder.layer.8.output.dense.weight.scales",
1568
+ "biases": "encoder.layer.8.output.dense.weight.biases"
1569
  },
1570
  {
1571
  "name": "encoder.layer.9.attention.output.dense.weight",
 
1575
  ],
1576
  "dtype": "float16",
1577
  "qweight": "encoder.layer.9.attention.output.dense.weight.qweight",
1578
+ "scales": "encoder.layer.9.attention.output.dense.weight.scales",
1579
+ "biases": "encoder.layer.9.attention.output.dense.weight.biases"
1580
  },
1581
  {
1582
  "name": "encoder.layer.9.attention.self.key.weight",
 
1586
  ],
1587
  "dtype": "float16",
1588
  "qweight": "encoder.layer.9.attention.self.key.weight.qweight",
1589
+ "scales": "encoder.layer.9.attention.self.key.weight.scales",
1590
+ "biases": "encoder.layer.9.attention.self.key.weight.biases"
1591
  },
1592
  {
1593
  "name": "encoder.layer.9.attention.self.query.weight",
 
1597
  ],
1598
  "dtype": "float16",
1599
  "qweight": "encoder.layer.9.attention.self.query.weight.qweight",
1600
+ "scales": "encoder.layer.9.attention.self.query.weight.scales",
1601
+ "biases": "encoder.layer.9.attention.self.query.weight.biases"
1602
  },
1603
  {
1604
  "name": "encoder.layer.9.attention.self.value.weight",
 
1608
  ],
1609
  "dtype": "float16",
1610
  "qweight": "encoder.layer.9.attention.self.value.weight.qweight",
1611
+ "scales": "encoder.layer.9.attention.self.value.weight.scales",
1612
+ "biases": "encoder.layer.9.attention.self.value.weight.biases"
1613
  },
1614
  {
1615
  "name": "encoder.layer.9.intermediate.dense.weight",
 
1619
  ],
1620
  "dtype": "float16",
1621
  "qweight": "encoder.layer.9.intermediate.dense.weight.qweight",
1622
+ "scales": "encoder.layer.9.intermediate.dense.weight.scales",
1623
+ "biases": "encoder.layer.9.intermediate.dense.weight.biases"
1624
  },
1625
  {
1626
  "name": "encoder.layer.9.output.dense.weight",
 
1630
  ],
1631
  "dtype": "float16",
1632
  "qweight": "encoder.layer.9.output.dense.weight.qweight",
1633
+ "scales": "encoder.layer.9.output.dense.weight.scales",
1634
+ "biases": "encoder.layer.9.output.dense.weight.biases"
1635
  },
1636
  {
1637
  "name": "pooler.dense.weight",
 
1641
  ],
1642
  "dtype": "float16",
1643
  "qweight": "pooler.dense.weight.qweight",
1644
+ "scales": "pooler.dense.weight.scales",
1645
+ "biases": "pooler.dense.weight.biases"
1646
  }
1647
  ],
1648
  "kept": [
weights.00.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:300cb491160d5d6341397f850e3f1b8bdfb3b7ecf21b1f77eeb5f75c731b99ee
3
- size 561220870
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97aaf1cdb777e1506db17616885f1d9b05a90d3c981022c7a4ee31dc9f1fddc6
3
+ size 595268674