张瀚灵 commited on
Commit
fc30cd3
·
1 Parent(s): 6a5cfa7

init commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
mlc-chat-config.json ADDED
@@ -0,0 +1,759 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "asvd_llama",
3
+ "quantization": "q0f16",
4
+ "model_config": {
5
+ "hidden_size": 4096,
6
+ "intermediate_size": 14336,
7
+ "num_attention_heads": 32,
8
+ "num_hidden_layers": 32,
9
+ "rms_norm_eps": 1e-05,
10
+ "vocab_size": 77209,
11
+ "position_embedding_base": 500000.0,
12
+ "context_window_size": 131072,
13
+ "prefill_chunk_size": 131072,
14
+ "num_key_value_heads": 8,
15
+ "head_dim": 128,
16
+ "tensor_parallel_shards": 1,
17
+ "truncation_ranks": {
18
+ "model.layers.0.mlp.gate_proj": [
19
+ 2007,
20
+ 222
21
+ ],
22
+ "model.layers.0.self_attn.k_proj": [
23
+ 74,
24
+ 8
25
+ ],
26
+ "model.layers.0.self_attn.o_proj": [
27
+ 461,
28
+ 51
29
+ ],
30
+ "model.layers.0.self_attn.q_proj": [
31
+ 185,
32
+ 20
33
+ ],
34
+ "model.layers.0.self_attn.v_proj": [
35
+ 405,
36
+ 45
37
+ ],
38
+ "model.layers.1.mlp.gate_proj": [
39
+ 1576,
40
+ 175
41
+ ],
42
+ "model.layers.1.mlp.up_proj": [
43
+ 2007,
44
+ 222
45
+ ],
46
+ "model.layers.1.self_attn.k_proj": [
47
+ 184,
48
+ 20
49
+ ],
50
+ "model.layers.1.self_attn.o_proj": [
51
+ 738,
52
+ 81
53
+ ],
54
+ "model.layers.1.self_attn.q_proj": [
55
+ 185,
56
+ 20
57
+ ],
58
+ "model.layers.1.self_attn.v_proj": [
59
+ 295,
60
+ 32
61
+ ],
62
+ "model.layers.10.mlp.down_proj": [
63
+ 1146,
64
+ 127
65
+ ],
66
+ "model.layers.10.mlp.gate_proj": [
67
+ 1576,
68
+ 175
69
+ ],
70
+ "model.layers.10.self_attn.k_proj": [
71
+ 184,
72
+ 20
73
+ ],
74
+ "model.layers.10.self_attn.o_proj": [
75
+ 461,
76
+ 51
77
+ ],
78
+ "model.layers.10.self_attn.q_proj": [
79
+ 185,
80
+ 20
81
+ ],
82
+ "model.layers.10.self_attn.v_proj": [
83
+ 295,
84
+ 32
85
+ ],
86
+ "model.layers.11.mlp.down_proj": [
87
+ 1576,
88
+ 175
89
+ ],
90
+ "model.layers.11.self_attn.k_proj": [
91
+ 295,
92
+ 32
93
+ ],
94
+ "model.layers.11.self_attn.o_proj": [
95
+ 1014,
96
+ 112
97
+ ],
98
+ "model.layers.11.self_attn.q_proj": [
99
+ 461,
100
+ 51
101
+ ],
102
+ "model.layers.11.self_attn.v_proj": [
103
+ 405,
104
+ 45
105
+ ],
106
+ "model.layers.12.mlp.down_proj": [
107
+ 1576,
108
+ 175
109
+ ],
110
+ "model.layers.12.mlp.gate_proj": [
111
+ 1146,
112
+ 127
113
+ ],
114
+ "model.layers.12.self_attn.k_proj": [
115
+ 184,
116
+ 20
117
+ ],
118
+ "model.layers.12.self_attn.o_proj": [
119
+ 1014,
120
+ 112
121
+ ],
122
+ "model.layers.12.self_attn.q_proj": [
123
+ 461,
124
+ 51
125
+ ],
126
+ "model.layers.12.self_attn.v_proj": [
127
+ 516,
128
+ 57
129
+ ],
130
+ "model.layers.13.mlp.gate_proj": [
131
+ 1146,
132
+ 127
133
+ ],
134
+ "model.layers.13.mlp.up_proj": [
135
+ 2437,
136
+ 270
137
+ ],
138
+ "model.layers.13.self_attn.o_proj": [
139
+ 461,
140
+ 51
141
+ ],
142
+ "model.layers.13.self_attn.q_proj": [
143
+ 461,
144
+ 51
145
+ ],
146
+ "model.layers.13.self_attn.v_proj": [
147
+ 184,
148
+ 20
149
+ ],
150
+ "model.layers.14.mlp.gate_proj": [
151
+ 717,
152
+ 79
153
+ ],
154
+ "model.layers.14.self_attn.k_proj": [
155
+ 516,
156
+ 57
157
+ ],
158
+ "model.layers.14.self_attn.o_proj": [
159
+ 738,
160
+ 81
161
+ ],
162
+ "model.layers.14.self_attn.q_proj": [
163
+ 461,
164
+ 51
165
+ ],
166
+ "model.layers.14.self_attn.v_proj": [
167
+ 295,
168
+ 32
169
+ ],
170
+ "model.layers.15.mlp.gate_proj": [
171
+ 1146,
172
+ 127
173
+ ],
174
+ "model.layers.15.mlp.up_proj": [
175
+ 1146,
176
+ 127
177
+ ],
178
+ "model.layers.15.self_attn.o_proj": [
179
+ 1014,
180
+ 112
181
+ ],
182
+ "model.layers.15.self_attn.q_proj": [
183
+ 461,
184
+ 51
185
+ ],
186
+ "model.layers.16.mlp.gate_proj": [
187
+ 1576,
188
+ 175
189
+ ],
190
+ "model.layers.16.mlp.up_proj": [
191
+ 1576,
192
+ 175
193
+ ],
194
+ "model.layers.16.self_attn.k_proj": [
195
+ 184,
196
+ 20
197
+ ],
198
+ "model.layers.16.self_attn.o_proj": [
199
+ 1014,
200
+ 112
201
+ ],
202
+ "model.layers.16.self_attn.q_proj": [
203
+ 185,
204
+ 20
205
+ ],
206
+ "model.layers.17.mlp.gate_proj": [
207
+ 1576,
208
+ 175
209
+ ],
210
+ "model.layers.17.mlp.up_proj": [
211
+ 1576,
212
+ 175
213
+ ],
214
+ "model.layers.17.self_attn.k_proj": [
215
+ 74,
216
+ 8
217
+ ],
218
+ "model.layers.17.self_attn.o_proj": [
219
+ 738,
220
+ 81
221
+ ],
222
+ "model.layers.17.self_attn.q_proj": [
223
+ 185,
224
+ 20
225
+ ],
226
+ "model.layers.18.mlp.gate_proj": [
227
+ 1576,
228
+ 175
229
+ ],
230
+ "model.layers.18.mlp.up_proj": [
231
+ 1576,
232
+ 175
233
+ ],
234
+ "model.layers.18.self_attn.k_proj": [
235
+ 184,
236
+ 20
237
+ ],
238
+ "model.layers.18.self_attn.o_proj": [
239
+ 461,
240
+ 51
241
+ ],
242
+ "model.layers.18.self_attn.q_proj": [
243
+ 185,
244
+ 20
245
+ ],
246
+ "model.layers.18.self_attn.v_proj": [
247
+ 295,
248
+ 32
249
+ ],
250
+ "model.layers.19.mlp.gate_proj": [
251
+ 2007,
252
+ 222
253
+ ],
254
+ "model.layers.19.mlp.up_proj": [
255
+ 1576,
256
+ 175
257
+ ],
258
+ "model.layers.19.self_attn.k_proj": [
259
+ 74,
260
+ 8
261
+ ],
262
+ "model.layers.19.self_attn.o_proj": [
263
+ 185,
264
+ 20
265
+ ],
266
+ "model.layers.19.self_attn.q_proj": [
267
+ 185,
268
+ 20
269
+ ],
270
+ "model.layers.19.self_attn.v_proj": [
271
+ 74,
272
+ 8
273
+ ],
274
+ "model.layers.2.self_attn.k_proj": [
275
+ 184,
276
+ 20
277
+ ],
278
+ "model.layers.2.self_attn.o_proj": [
279
+ 461,
280
+ 51
281
+ ],
282
+ "model.layers.2.self_attn.q_proj": [
283
+ 185,
284
+ 20
285
+ ],
286
+ "model.layers.20.mlp.gate_proj": [
287
+ 2007,
288
+ 222
289
+ ],
290
+ "model.layers.20.mlp.up_proj": [
291
+ 2007,
292
+ 222
293
+ ],
294
+ "model.layers.20.self_attn.k_proj": [
295
+ 74,
296
+ 8
297
+ ],
298
+ "model.layers.20.self_attn.o_proj": [
299
+ 185,
300
+ 20
301
+ ],
302
+ "model.layers.20.self_attn.q_proj": [
303
+ 185,
304
+ 20
305
+ ],
306
+ "model.layers.20.self_attn.v_proj": [
307
+ 74,
308
+ 8
309
+ ],
310
+ "model.layers.21.mlp.gate_proj": [
311
+ 2007,
312
+ 222
313
+ ],
314
+ "model.layers.21.mlp.up_proj": [
315
+ 1576,
316
+ 175
317
+ ],
318
+ "model.layers.21.self_attn.k_proj": [
319
+ 184,
320
+ 20
321
+ ],
322
+ "model.layers.21.self_attn.o_proj": [
323
+ 461,
324
+ 51
325
+ ],
326
+ "model.layers.21.self_attn.q_proj": [
327
+ 185,
328
+ 20
329
+ ],
330
+ "model.layers.21.self_attn.v_proj": [
331
+ 295,
332
+ 32
333
+ ],
334
+ "model.layers.22.mlp.gate_proj": [
335
+ 1576,
336
+ 175
337
+ ],
338
+ "model.layers.22.mlp.up_proj": [
339
+ 1576,
340
+ 175
341
+ ],
342
+ "model.layers.22.self_attn.k_proj": [
343
+ 74,
344
+ 8
345
+ ],
346
+ "model.layers.22.self_attn.o_proj": [
347
+ 461,
348
+ 51
349
+ ],
350
+ "model.layers.22.self_attn.q_proj": [
351
+ 185,
352
+ 20
353
+ ],
354
+ "model.layers.22.self_attn.v_proj": [
355
+ 295,
356
+ 32
357
+ ],
358
+ "model.layers.23.mlp.down_proj": [
359
+ 2437,
360
+ 270
361
+ ],
362
+ "model.layers.23.mlp.gate_proj": [
363
+ 1576,
364
+ 175
365
+ ],
366
+ "model.layers.23.mlp.up_proj": [
367
+ 1146,
368
+ 127
369
+ ],
370
+ "model.layers.23.self_attn.k_proj": [
371
+ 74,
372
+ 8
373
+ ],
374
+ "model.layers.23.self_attn.o_proj": [
375
+ 185,
376
+ 20
377
+ ],
378
+ "model.layers.23.self_attn.q_proj": [
379
+ 185,
380
+ 20
381
+ ],
382
+ "model.layers.23.self_attn.v_proj": [
383
+ 184,
384
+ 20
385
+ ],
386
+ "model.layers.24.mlp.down_proj": [
387
+ 1576,
388
+ 175
389
+ ],
390
+ "model.layers.24.mlp.gate_proj": [
391
+ 1576,
392
+ 175
393
+ ],
394
+ "model.layers.24.mlp.up_proj": [
395
+ 1576,
396
+ 175
397
+ ],
398
+ "model.layers.24.self_attn.k_proj": [
399
+ 74,
400
+ 8
401
+ ],
402
+ "model.layers.24.self_attn.o_proj": [
403
+ 185,
404
+ 20
405
+ ],
406
+ "model.layers.24.self_attn.q_proj": [
407
+ 185,
408
+ 20
409
+ ],
410
+ "model.layers.24.self_attn.v_proj": [
411
+ 74,
412
+ 8
413
+ ],
414
+ "model.layers.25.mlp.down_proj": [
415
+ 2437,
416
+ 270
417
+ ],
418
+ "model.layers.25.mlp.gate_proj": [
419
+ 1576,
420
+ 175
421
+ ],
422
+ "model.layers.25.mlp.up_proj": [
423
+ 1576,
424
+ 175
425
+ ],
426
+ "model.layers.25.self_attn.k_proj": [
427
+ 74,
428
+ 8
429
+ ],
430
+ "model.layers.25.self_attn.o_proj": [
431
+ 185,
432
+ 20
433
+ ],
434
+ "model.layers.25.self_attn.q_proj": [
435
+ 185,
436
+ 20
437
+ ],
438
+ "model.layers.25.self_attn.v_proj": [
439
+ 184,
440
+ 20
441
+ ],
442
+ "model.layers.26.mlp.down_proj": [
443
+ 1146,
444
+ 127
445
+ ],
446
+ "model.layers.26.mlp.gate_proj": [
447
+ 1146,
448
+ 127
449
+ ],
450
+ "model.layers.26.mlp.up_proj": [
451
+ 1146,
452
+ 127
453
+ ],
454
+ "model.layers.26.self_attn.k_proj": [
455
+ 74,
456
+ 8
457
+ ],
458
+ "model.layers.26.self_attn.o_proj": [
459
+ 185,
460
+ 20
461
+ ],
462
+ "model.layers.26.self_attn.q_proj": [
463
+ 185,
464
+ 20
465
+ ],
466
+ "model.layers.26.self_attn.v_proj": [
467
+ 74,
468
+ 8
469
+ ],
470
+ "model.layers.27.mlp.down_proj": [
471
+ 1146,
472
+ 127
473
+ ],
474
+ "model.layers.27.mlp.gate_proj": [
475
+ 1146,
476
+ 127
477
+ ],
478
+ "model.layers.27.mlp.up_proj": [
479
+ 1146,
480
+ 127
481
+ ],
482
+ "model.layers.27.self_attn.k_proj": [
483
+ 184,
484
+ 20
485
+ ],
486
+ "model.layers.27.self_attn.o_proj": [
487
+ 461,
488
+ 51
489
+ ],
490
+ "model.layers.27.self_attn.q_proj": [
491
+ 185,
492
+ 20
493
+ ],
494
+ "model.layers.27.self_attn.v_proj": [
495
+ 184,
496
+ 20
497
+ ],
498
+ "model.layers.28.mlp.down_proj": [
499
+ 1146,
500
+ 127
501
+ ],
502
+ "model.layers.28.mlp.gate_proj": [
503
+ 1146,
504
+ 127
505
+ ],
506
+ "model.layers.28.mlp.up_proj": [
507
+ 1146,
508
+ 127
509
+ ],
510
+ "model.layers.28.self_attn.k_proj": [
511
+ 184,
512
+ 20
513
+ ],
514
+ "model.layers.28.self_attn.o_proj": [
515
+ 461,
516
+ 51
517
+ ],
518
+ "model.layers.28.self_attn.q_proj": [
519
+ 185,
520
+ 20
521
+ ],
522
+ "model.layers.28.self_attn.v_proj": [
523
+ 405,
524
+ 45
525
+ ],
526
+ "model.layers.29.mlp.down_proj": [
527
+ 1146,
528
+ 127
529
+ ],
530
+ "model.layers.29.mlp.gate_proj": [
531
+ 1576,
532
+ 175
533
+ ],
534
+ "model.layers.29.mlp.up_proj": [
535
+ 287,
536
+ 31
537
+ ],
538
+ "model.layers.29.self_attn.k_proj": [
539
+ 295,
540
+ 32
541
+ ],
542
+ "model.layers.29.self_attn.o_proj": [
543
+ 461,
544
+ 51
545
+ ],
546
+ "model.layers.29.self_attn.q_proj": [
547
+ 185,
548
+ 20
549
+ ],
550
+ "model.layers.29.self_attn.v_proj": [
551
+ 184,
552
+ 20
553
+ ],
554
+ "model.layers.3.mlp.gate_proj": [
555
+ 2007,
556
+ 222
557
+ ],
558
+ "model.layers.3.self_attn.k_proj": [
559
+ 516,
560
+ 57
561
+ ],
562
+ "model.layers.3.self_attn.o_proj": [
563
+ 1014,
564
+ 112
565
+ ],
566
+ "model.layers.3.self_attn.q_proj": [
567
+ 185,
568
+ 20
569
+ ],
570
+ "model.layers.30.mlp.down_proj": [
571
+ 2007,
572
+ 222
573
+ ],
574
+ "model.layers.30.mlp.gate_proj": [
575
+ 1576,
576
+ 175
577
+ ],
578
+ "model.layers.30.self_attn.k_proj": [
579
+ 184,
580
+ 20
581
+ ],
582
+ "model.layers.30.self_attn.o_proj": [
583
+ 461,
584
+ 51
585
+ ],
586
+ "model.layers.30.self_attn.q_proj": [
587
+ 185,
588
+ 20
589
+ ],
590
+ "model.layers.30.self_attn.v_proj": [
591
+ 405,
592
+ 45
593
+ ],
594
+ "model.layers.31.mlp.down_proj": [
595
+ 1576,
596
+ 175
597
+ ],
598
+ "model.layers.31.mlp.gate_proj": [
599
+ 1576,
600
+ 175
601
+ ],
602
+ "model.layers.31.self_attn.k_proj": [
603
+ 184,
604
+ 20
605
+ ],
606
+ "model.layers.31.self_attn.o_proj": [
607
+ 461,
608
+ 51
609
+ ],
610
+ "model.layers.31.self_attn.q_proj": [
611
+ 185,
612
+ 20
613
+ ],
614
+ "model.layers.4.self_attn.k_proj": [
615
+ 184,
616
+ 20
617
+ ],
618
+ "model.layers.4.self_attn.o_proj": [
619
+ 738,
620
+ 81
621
+ ],
622
+ "model.layers.4.self_attn.q_proj": [
623
+ 185,
624
+ 20
625
+ ],
626
+ "model.layers.5.mlp.down_proj": [
627
+ 1576,
628
+ 175
629
+ ],
630
+ "model.layers.5.mlp.up_proj": [
631
+ 2007,
632
+ 222
633
+ ],
634
+ "model.layers.5.self_attn.k_proj": [
635
+ 295,
636
+ 32
637
+ ],
638
+ "model.layers.5.self_attn.o_proj": [
639
+ 461,
640
+ 51
641
+ ],
642
+ "model.layers.5.self_attn.q_proj": [
643
+ 185,
644
+ 20
645
+ ],
646
+ "model.layers.5.self_attn.v_proj": [
647
+ 184,
648
+ 20
649
+ ],
650
+ "model.layers.6.self_attn.k_proj": [
651
+ 74,
652
+ 8
653
+ ],
654
+ "model.layers.6.self_attn.o_proj": [
655
+ 1014,
656
+ 112
657
+ ],
658
+ "model.layers.6.self_attn.q_proj": [
659
+ 185,
660
+ 20
661
+ ],
662
+ "model.layers.6.self_attn.v_proj": [
663
+ 516,
664
+ 57
665
+ ],
666
+ "model.layers.7.self_attn.o_proj": [
667
+ 1014,
668
+ 112
669
+ ],
670
+ "model.layers.7.self_attn.q_proj": [
671
+ 461,
672
+ 51
673
+ ],
674
+ "model.layers.7.self_attn.v_proj": [
675
+ 405,
676
+ 45
677
+ ],
678
+ "model.layers.8.mlp.down_proj": [
679
+ 717,
680
+ 79
681
+ ],
682
+ "model.layers.8.mlp.gate_proj": [
683
+ 717,
684
+ 79
685
+ ],
686
+ "model.layers.8.mlp.up_proj": [
687
+ 1146,
688
+ 127
689
+ ],
690
+ "model.layers.8.self_attn.k_proj": [
691
+ 74,
692
+ 8
693
+ ],
694
+ "model.layers.8.self_attn.o_proj": [
695
+ 185,
696
+ 20
697
+ ],
698
+ "model.layers.8.self_attn.q_proj": [
699
+ 185,
700
+ 20
701
+ ],
702
+ "model.layers.8.self_attn.v_proj": [
703
+ 74,
704
+ 8
705
+ ],
706
+ "model.layers.9.mlp.down_proj": [
707
+ 2007,
708
+ 222
709
+ ],
710
+ "model.layers.9.mlp.up_proj": [
711
+ 2007,
712
+ 222
713
+ ],
714
+ "model.layers.9.self_attn.k_proj": [
715
+ 184,
716
+ 20
717
+ ],
718
+ "model.layers.9.self_attn.o_proj": [
719
+ 461,
720
+ 51
721
+ ],
722
+ "model.layers.9.self_attn.q_proj": [
723
+ 461,
724
+ 51
725
+ ],
726
+ "model.layers.9.self_attn.v_proj": [
727
+ 516,
728
+ 57
729
+ ]
730
+ },
731
+ "max_batch_size": 1
732
+ },
733
+ "vocab_size": 77209,
734
+ "context_window_size": 131072,
735
+ "sliding_window_size": -1,
736
+ "prefill_chunk_size": 131072,
737
+ "attention_sink_size": -1,
738
+ "tensor_parallel_shards": 1,
739
+ "max_batch_size": 80,
740
+ "mean_gen_len": 128,
741
+ "max_gen_len": 512,
742
+ "shift_fill_factor": 0.3,
743
+ "temperature": 0.6,
744
+ "repetition_penalty": 1.0,
745
+ "top_p": 0.9,
746
+ "conv_template": "LM",
747
+ "pad_token_id": 0,
748
+ "bos_token_id": 128000,
749
+ "eos_token_id": [
750
+ 128001,
751
+ 128008,
752
+ 128009
753
+ ],
754
+ "tokenizer_files": [
755
+ "tokenizer.json",
756
+ "tokenizer_config.json"
757
+ ],
758
+ "version": "0.1.0"
759
+ }
ndarray-cache.json ADDED
The diff for this file is too large to render. See raw diff
 
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65d3233e5b85aaca723a8057cd5c20c6a080300145407df23119ab197343a3cb
3
+ size 632496128
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71ad79ed5e8349e6d35cdb53e00d20c76dd7ab1d3252293ebcda4ccaae118a1f
3
+ size 117440512
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea01e3daa14023bccacec456da4fe7166622a0f240e3d1be087e57d01597dbaf
3
+ size 45187072
params_shard_100.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d439ac469adc2a0f52cba572006d29df63387d7f76f35686b16c589c872d1fdf
3
+ size 33112064
params_shard_101.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dda676a8d317caa73347836420b1e9ff92c51a462a7dfd071acc139943dd7c8
3
+ size 32858112
params_shard_102.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57fdc8b316712f8b703d938cef56d9b794ab2c95d0ce91aea28a4fde26266f27
3
+ size 45187072
params_shard_103.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef2183235ce1c921b5597dc15a2fb65fedf7a7ad4cb92ff6d9f96d1369ab9729
3
+ size 26466304
params_shard_104.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ac83b75ed0a02322431eefaed728c79a1a28b4371189ddc602a547f541f4fd1
3
+ size 117440512
params_shard_105.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c30c426bedc25d5288f7d2800c91aaafcb584963b074d95f5d7b181703c9a6ab
3
+ size 31545344
params_shard_106.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd72d5f62129201a651a227ec6187a0b1f205c017b7fba22d7abb42e6aae0e64
3
+ size 45187072
params_shard_107.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66e949ddcf31509f9eb6d6fac9c8b04750cff629c2292dd380af077012e687a5
3
+ size 117440512
params_shard_108.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e99ce5815e7258bc824dfe3f29fad7b80291d960f33856506085570772dbb90d
3
+ size 117440512
params_shard_109.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73440539c0aaa3277b459ee85b37606769748c42b682ebda60720d9b2750eea8
3
+ size 31942656
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286b53e709d41d0116ec802081f682e4a5b53c7cb4f2ea287d5d9681d669cbcc
3
+ size 31027200
params_shard_110.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a70f6f03667808e6a3c2cb1b55fe114dc9be40b91701bd6f1a89ea5746a304e
3
+ size 22228992
params_shard_111.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe53099ccb172a305fa1bca72cd6cf7e6a43249853c5f8e6ff4f74f82b576ca5
3
+ size 45187072
params_shard_112.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:def4dbda87d6c59c94d7b67e6b0be4b16843bfe5da55f233d65abcac979c7848
3
+ size 32858112
params_shard_113.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c49fd14708fec6c77ce8ec97880b9be754b3734b2a990ffacfb3d665edd26bfb
3
+ size 117440512
params_shard_114.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81100f4a872e26a77bfff715ebff2c7e0be63d1772ede331999ee6f229e0d056
3
+ size 33439744
params_shard_115.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa8255e795473e2b6fc69ee7a72ca6b54de56c8c166468f43b84ec452f7a66db
3
+ size 30099456
params_shard_116.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e086f74692dfa8217f6f924001ced29f7a3250bf5da84b5cc5ec0a59ec1653a
3
+ size 117440512
params_shard_117.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04a3efccc410f6a398273538e59cb89e4e6a2b5a9c5d5d5f6f6b80c208d3af32
3
+ size 32858112
params_shard_118.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deeddc3d8682f723e5a027c5799d6c9066f58efca0f653325eef491e7c88dfd0
3
+ size 69873664
params_shard_119.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97666ea506d8356f816eef34c4b8ae83f0d43aa58ea302e86398e8de7725a01f
3
+ size 19963904
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ffa4f110581d43f49750c4432eb027ecafd98ca6f7e64c8c9e201eb422d25d4
3
+ size 45187072
params_shard_120.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3b8eda758b330c817ff3cb3621da7d196024b4c5bec1ac53f0744f13fe537a2
3
+ size 28733440
params_shard_121.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec5261e30244b55835f92b13ba9fddde15b8041a443863cc50a212c4661485b2
3
+ size 33122304
params_shard_122.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ee7689a755ce25b029a42efc4a1d48827bf62fa4266a56dbee79bd515e0587e
3
+ size 117440512
params_shard_123.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3fd01eed5f5a52263d4c6f25201a21e1264b1d410c37e46f57c62a232ddacdf
3
+ size 117440512
params_shard_124.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df80d5e00232afc5c78117c7610db276fb25db6a111776187cc66ec4c22582f2
3
+ size 117440512
params_shard_125.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d3d26c03213aa0666dccbf9217b7c601fe9b30fae28408d2011e506eff5392e
3
+ size 32299008
params_shard_126.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7396186ecd5c8d394e78b65f09ba97cebaec824d4d4617fd27481cfb8524b052
3
+ size 117440512
params_shard_127.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:264898b13aa03ff3d10d8008458cebee115e24a8ca04160b0401b0dc6d17714d
3
+ size 57544704
params_shard_128.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd6b3e3910aeeb37e401de92395ea1fd40e4d086c350c44c117c44304bd614b0
3
+ size 117440512
params_shard_129.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dfe4af4aa0f67a51b5b55c118ced79f05df19aa422a32aae1c68d850dd2dcd4
3
+ size 33030144
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1bf797cc78e289638733397129c737f447aae0c6192e2643cc6446bc9a19fee
3
+ size 32272384
params_shard_130.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29a26991ddf66237510378aa015b608a5fec74be52cd6b05789e1b26f31527a4
3
+ size 27674624
params_shard_131.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62210b28af08648f273fb5790944b78185fb263cf8fc9d999f6a29df04f0b264
3
+ size 117440512
params_shard_132.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:674702c498130c4a565635a3c7af830c666581f72bcd90d1c34610e10f298c84
3
+ size 117440512
params_shard_133.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6874d939b77589c7aff53e8de02b2ad79c50a4c388b4694d55417d1ccbccec16
3
+ size 117440512
params_shard_134.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19f18c658b512ecea52ba1cf91a0b0e140c63112fc639714d441c09caec66b37
3
+ size 27271168
params_shard_135.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19dc723fc396bf87cacacc6189eb0781033bee7ece9393bc564b5f89f7beaa54
3
+ size 45187072
params_shard_136.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7265fb9280268170062b39bc0154662e2ac7e48cbea731851ee3f7e19874b99d
3
+ size 117440512
params_shard_137.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dddfec9a7cf205e4c249484030008ec265f90ad2a97a7d29b052a277c327beb7
3
+ size 57544704
params_shard_138.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2701cbc2dddb976dba62deb8af03aa405e85ac473ef82805f9ea6d11f7f27dac
3
+ size 27758592
params_shard_139.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a138ee7c8feefe6690bd3e6f890b4fe730783c06c604c3b3be16140874bfe7e4
3
+ size 32176128
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:809eb55a1f07debe1c603e2ad9b3f2d45d43a35bfbf58cb7506b1fe24e272592
3
+ size 25337856
params_shard_140.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0531ac9e5af64e9a3657bc995292c238e13edd1bb580611acb3d6b924d9666ab
3
+ size 117440512