update config.json

#4
Files changed (1) hide show
  1. config.json +780 -2
config.json CHANGED
@@ -39,7 +39,6 @@
39
  "quantization_config": {
40
  "algo_config": null,
41
  "exclude": [
42
- "re:model.layers.61.*",
43
  "model.layers.0.self_attn.q_a_proj",
44
  "model.layers.0.self_attn.q_b_proj",
45
  "model.layers.0.self_attn.kv_a_proj_with_mqa",
@@ -345,6 +344,785 @@
345
  "model.layers.60.self_attn.kv_a_proj_with_mqa",
346
  "model.layers.60.self_attn.kv_b_proj",
347
  "model.layers.60.self_attn.o_proj",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
  "lm_head"
349
  ],
350
  "export": {
@@ -396,7 +1174,7 @@
396
  "quant_method": "quark",
397
  "quant_mode": "eager_mode",
398
  "softmax_quant_spec": null,
399
- "version": "0.11"
400
  },
401
  "rms_norm_eps": 1e-06,
402
  "rope_scaling": {
 
39
  "quantization_config": {
40
  "algo_config": null,
41
  "exclude": [
 
42
  "model.layers.0.self_attn.q_a_proj",
43
  "model.layers.0.self_attn.q_b_proj",
44
  "model.layers.0.self_attn.kv_a_proj_with_mqa",
 
344
  "model.layers.60.self_attn.kv_a_proj_with_mqa",
345
  "model.layers.60.self_attn.kv_b_proj",
346
  "model.layers.60.self_attn.o_proj",
347
+ "model.layers.61.embed_tokens",
348
+ "model.layers.61.eh_proj",
349
+ "model.layers.61.self_attn.q_a_proj",
350
+ "model.layers.61.self_attn.q_b_proj",
351
+ "model.layers.61.self_attn.kv_a_proj_with_mqa",
352
+ "model.layers.61.self_attn.kv_b_proj",
353
+ "model.layers.61.self_attn.o_proj",
354
+ "model.layers.61.mlp.experts.0.gate_proj",
355
+ "model.layers.61.mlp.experts.0.up_proj",
356
+ "model.layers.61.mlp.experts.0.down_proj",
357
+ "model.layers.61.mlp.experts.1.gate_proj",
358
+ "model.layers.61.mlp.experts.1.up_proj",
359
+ "model.layers.61.mlp.experts.1.down_proj",
360
+ "model.layers.61.mlp.experts.2.gate_proj",
361
+ "model.layers.61.mlp.experts.2.up_proj",
362
+ "model.layers.61.mlp.experts.2.down_proj",
363
+ "model.layers.61.mlp.experts.3.gate_proj",
364
+ "model.layers.61.mlp.experts.3.up_proj",
365
+ "model.layers.61.mlp.experts.3.down_proj",
366
+ "model.layers.61.mlp.experts.4.gate_proj",
367
+ "model.layers.61.mlp.experts.4.up_proj",
368
+ "model.layers.61.mlp.experts.4.down_proj",
369
+ "model.layers.61.mlp.experts.5.gate_proj",
370
+ "model.layers.61.mlp.experts.5.up_proj",
371
+ "model.layers.61.mlp.experts.5.down_proj",
372
+ "model.layers.61.mlp.experts.6.gate_proj",
373
+ "model.layers.61.mlp.experts.6.up_proj",
374
+ "model.layers.61.mlp.experts.6.down_proj",
375
+ "model.layers.61.mlp.experts.7.gate_proj",
376
+ "model.layers.61.mlp.experts.7.up_proj",
377
+ "model.layers.61.mlp.experts.7.down_proj",
378
+ "model.layers.61.mlp.experts.8.gate_proj",
379
+ "model.layers.61.mlp.experts.8.up_proj",
380
+ "model.layers.61.mlp.experts.8.down_proj",
381
+ "model.layers.61.mlp.experts.9.gate_proj",
382
+ "model.layers.61.mlp.experts.9.up_proj",
383
+ "model.layers.61.mlp.experts.9.down_proj",
384
+ "model.layers.61.mlp.experts.10.gate_proj",
385
+ "model.layers.61.mlp.experts.10.up_proj",
386
+ "model.layers.61.mlp.experts.10.down_proj",
387
+ "model.layers.61.mlp.experts.11.gate_proj",
388
+ "model.layers.61.mlp.experts.11.up_proj",
389
+ "model.layers.61.mlp.experts.11.down_proj",
390
+ "model.layers.61.mlp.experts.12.gate_proj",
391
+ "model.layers.61.mlp.experts.12.up_proj",
392
+ "model.layers.61.mlp.experts.12.down_proj",
393
+ "model.layers.61.mlp.experts.13.gate_proj",
394
+ "model.layers.61.mlp.experts.13.up_proj",
395
+ "model.layers.61.mlp.experts.13.down_proj",
396
+ "model.layers.61.mlp.experts.14.gate_proj",
397
+ "model.layers.61.mlp.experts.14.up_proj",
398
+ "model.layers.61.mlp.experts.14.down_proj",
399
+ "model.layers.61.mlp.experts.15.gate_proj",
400
+ "model.layers.61.mlp.experts.15.up_proj",
401
+ "model.layers.61.mlp.experts.15.down_proj",
402
+ "model.layers.61.mlp.experts.16.gate_proj",
403
+ "model.layers.61.mlp.experts.16.up_proj",
404
+ "model.layers.61.mlp.experts.16.down_proj",
405
+ "model.layers.61.mlp.experts.17.gate_proj",
406
+ "model.layers.61.mlp.experts.17.up_proj",
407
+ "model.layers.61.mlp.experts.17.down_proj",
408
+ "model.layers.61.mlp.experts.18.gate_proj",
409
+ "model.layers.61.mlp.experts.18.up_proj",
410
+ "model.layers.61.mlp.experts.18.down_proj",
411
+ "model.layers.61.mlp.experts.19.gate_proj",
412
+ "model.layers.61.mlp.experts.19.up_proj",
413
+ "model.layers.61.mlp.experts.19.down_proj",
414
+ "model.layers.61.mlp.experts.20.gate_proj",
415
+ "model.layers.61.mlp.experts.20.up_proj",
416
+ "model.layers.61.mlp.experts.20.down_proj",
417
+ "model.layers.61.mlp.experts.21.gate_proj",
418
+ "model.layers.61.mlp.experts.21.up_proj",
419
+ "model.layers.61.mlp.experts.21.down_proj",
420
+ "model.layers.61.mlp.experts.22.gate_proj",
421
+ "model.layers.61.mlp.experts.22.up_proj",
422
+ "model.layers.61.mlp.experts.22.down_proj",
423
+ "model.layers.61.mlp.experts.23.gate_proj",
424
+ "model.layers.61.mlp.experts.23.up_proj",
425
+ "model.layers.61.mlp.experts.23.down_proj",
426
+ "model.layers.61.mlp.experts.24.gate_proj",
427
+ "model.layers.61.mlp.experts.24.up_proj",
428
+ "model.layers.61.mlp.experts.24.down_proj",
429
+ "model.layers.61.mlp.experts.25.gate_proj",
430
+ "model.layers.61.mlp.experts.25.up_proj",
431
+ "model.layers.61.mlp.experts.25.down_proj",
432
+ "model.layers.61.mlp.experts.26.gate_proj",
433
+ "model.layers.61.mlp.experts.26.up_proj",
434
+ "model.layers.61.mlp.experts.26.down_proj",
435
+ "model.layers.61.mlp.experts.27.gate_proj",
436
+ "model.layers.61.mlp.experts.27.up_proj",
437
+ "model.layers.61.mlp.experts.27.down_proj",
438
+ "model.layers.61.mlp.experts.28.gate_proj",
439
+ "model.layers.61.mlp.experts.28.up_proj",
440
+ "model.layers.61.mlp.experts.28.down_proj",
441
+ "model.layers.61.mlp.experts.29.gate_proj",
442
+ "model.layers.61.mlp.experts.29.up_proj",
443
+ "model.layers.61.mlp.experts.29.down_proj",
444
+ "model.layers.61.mlp.experts.30.gate_proj",
445
+ "model.layers.61.mlp.experts.30.up_proj",
446
+ "model.layers.61.mlp.experts.30.down_proj",
447
+ "model.layers.61.mlp.experts.31.gate_proj",
448
+ "model.layers.61.mlp.experts.31.up_proj",
449
+ "model.layers.61.mlp.experts.31.down_proj",
450
+ "model.layers.61.mlp.experts.32.gate_proj",
451
+ "model.layers.61.mlp.experts.32.up_proj",
452
+ "model.layers.61.mlp.experts.32.down_proj",
453
+ "model.layers.61.mlp.experts.33.gate_proj",
454
+ "model.layers.61.mlp.experts.33.up_proj",
455
+ "model.layers.61.mlp.experts.33.down_proj",
456
+ "model.layers.61.mlp.experts.34.gate_proj",
457
+ "model.layers.61.mlp.experts.34.up_proj",
458
+ "model.layers.61.mlp.experts.34.down_proj",
459
+ "model.layers.61.mlp.experts.35.gate_proj",
460
+ "model.layers.61.mlp.experts.35.up_proj",
461
+ "model.layers.61.mlp.experts.35.down_proj",
462
+ "model.layers.61.mlp.experts.36.gate_proj",
463
+ "model.layers.61.mlp.experts.36.up_proj",
464
+ "model.layers.61.mlp.experts.36.down_proj",
465
+ "model.layers.61.mlp.experts.37.gate_proj",
466
+ "model.layers.61.mlp.experts.37.up_proj",
467
+ "model.layers.61.mlp.experts.37.down_proj",
468
+ "model.layers.61.mlp.experts.38.gate_proj",
469
+ "model.layers.61.mlp.experts.38.up_proj",
470
+ "model.layers.61.mlp.experts.38.down_proj",
471
+ "model.layers.61.mlp.experts.39.gate_proj",
472
+ "model.layers.61.mlp.experts.39.up_proj",
473
+ "model.layers.61.mlp.experts.39.down_proj",
474
+ "model.layers.61.mlp.experts.40.gate_proj",
475
+ "model.layers.61.mlp.experts.40.up_proj",
476
+ "model.layers.61.mlp.experts.40.down_proj",
477
+ "model.layers.61.mlp.experts.41.gate_proj",
478
+ "model.layers.61.mlp.experts.41.up_proj",
479
+ "model.layers.61.mlp.experts.41.down_proj",
480
+ "model.layers.61.mlp.experts.42.gate_proj",
481
+ "model.layers.61.mlp.experts.42.up_proj",
482
+ "model.layers.61.mlp.experts.42.down_proj",
483
+ "model.layers.61.mlp.experts.43.gate_proj",
484
+ "model.layers.61.mlp.experts.43.up_proj",
485
+ "model.layers.61.mlp.experts.43.down_proj",
486
+ "model.layers.61.mlp.experts.44.gate_proj",
487
+ "model.layers.61.mlp.experts.44.up_proj",
488
+ "model.layers.61.mlp.experts.44.down_proj",
489
+ "model.layers.61.mlp.experts.45.gate_proj",
490
+ "model.layers.61.mlp.experts.45.up_proj",
491
+ "model.layers.61.mlp.experts.45.down_proj",
492
+ "model.layers.61.mlp.experts.46.gate_proj",
493
+ "model.layers.61.mlp.experts.46.up_proj",
494
+ "model.layers.61.mlp.experts.46.down_proj",
495
+ "model.layers.61.mlp.experts.47.gate_proj",
496
+ "model.layers.61.mlp.experts.47.up_proj",
497
+ "model.layers.61.mlp.experts.47.down_proj",
498
+ "model.layers.61.mlp.experts.48.gate_proj",
499
+ "model.layers.61.mlp.experts.48.up_proj",
500
+ "model.layers.61.mlp.experts.48.down_proj",
501
+ "model.layers.61.mlp.experts.49.gate_proj",
502
+ "model.layers.61.mlp.experts.49.up_proj",
503
+ "model.layers.61.mlp.experts.49.down_proj",
504
+ "model.layers.61.mlp.experts.50.gate_proj",
505
+ "model.layers.61.mlp.experts.50.up_proj",
506
+ "model.layers.61.mlp.experts.50.down_proj",
507
+ "model.layers.61.mlp.experts.51.gate_proj",
508
+ "model.layers.61.mlp.experts.51.up_proj",
509
+ "model.layers.61.mlp.experts.51.down_proj",
510
+ "model.layers.61.mlp.experts.52.gate_proj",
511
+ "model.layers.61.mlp.experts.52.up_proj",
512
+ "model.layers.61.mlp.experts.52.down_proj",
513
+ "model.layers.61.mlp.experts.53.gate_proj",
514
+ "model.layers.61.mlp.experts.53.up_proj",
515
+ "model.layers.61.mlp.experts.53.down_proj",
516
+ "model.layers.61.mlp.experts.54.gate_proj",
517
+ "model.layers.61.mlp.experts.54.up_proj",
518
+ "model.layers.61.mlp.experts.54.down_proj",
519
+ "model.layers.61.mlp.experts.55.gate_proj",
520
+ "model.layers.61.mlp.experts.55.up_proj",
521
+ "model.layers.61.mlp.experts.55.down_proj",
522
+ "model.layers.61.mlp.experts.56.gate_proj",
523
+ "model.layers.61.mlp.experts.56.up_proj",
524
+ "model.layers.61.mlp.experts.56.down_proj",
525
+ "model.layers.61.mlp.experts.57.gate_proj",
526
+ "model.layers.61.mlp.experts.57.up_proj",
527
+ "model.layers.61.mlp.experts.57.down_proj",
528
+ "model.layers.61.mlp.experts.58.gate_proj",
529
+ "model.layers.61.mlp.experts.58.up_proj",
530
+ "model.layers.61.mlp.experts.58.down_proj",
531
+ "model.layers.61.mlp.experts.59.gate_proj",
532
+ "model.layers.61.mlp.experts.59.up_proj",
533
+ "model.layers.61.mlp.experts.59.down_proj",
534
+ "model.layers.61.mlp.experts.60.gate_proj",
535
+ "model.layers.61.mlp.experts.60.up_proj",
536
+ "model.layers.61.mlp.experts.60.down_proj",
537
+ "model.layers.61.mlp.experts.61.gate_proj",
538
+ "model.layers.61.mlp.experts.61.up_proj",
539
+ "model.layers.61.mlp.experts.61.down_proj",
540
+ "model.layers.61.mlp.experts.62.gate_proj",
541
+ "model.layers.61.mlp.experts.62.up_proj",
542
+ "model.layers.61.mlp.experts.62.down_proj",
543
+ "model.layers.61.mlp.experts.63.gate_proj",
544
+ "model.layers.61.mlp.experts.63.up_proj",
545
+ "model.layers.61.mlp.experts.63.down_proj",
546
+ "model.layers.61.mlp.experts.64.gate_proj",
547
+ "model.layers.61.mlp.experts.64.up_proj",
548
+ "model.layers.61.mlp.experts.64.down_proj",
549
+ "model.layers.61.mlp.experts.65.gate_proj",
550
+ "model.layers.61.mlp.experts.65.up_proj",
551
+ "model.layers.61.mlp.experts.65.down_proj",
552
+ "model.layers.61.mlp.experts.66.gate_proj",
553
+ "model.layers.61.mlp.experts.66.up_proj",
554
+ "model.layers.61.mlp.experts.66.down_proj",
555
+ "model.layers.61.mlp.experts.67.gate_proj",
556
+ "model.layers.61.mlp.experts.67.up_proj",
557
+ "model.layers.61.mlp.experts.67.down_proj",
558
+ "model.layers.61.mlp.experts.68.gate_proj",
559
+ "model.layers.61.mlp.experts.68.up_proj",
560
+ "model.layers.61.mlp.experts.68.down_proj",
561
+ "model.layers.61.mlp.experts.69.gate_proj",
562
+ "model.layers.61.mlp.experts.69.up_proj",
563
+ "model.layers.61.mlp.experts.69.down_proj",
564
+ "model.layers.61.mlp.experts.70.gate_proj",
565
+ "model.layers.61.mlp.experts.70.up_proj",
566
+ "model.layers.61.mlp.experts.70.down_proj",
567
+ "model.layers.61.mlp.experts.71.gate_proj",
568
+ "model.layers.61.mlp.experts.71.up_proj",
569
+ "model.layers.61.mlp.experts.71.down_proj",
570
+ "model.layers.61.mlp.experts.72.gate_proj",
571
+ "model.layers.61.mlp.experts.72.up_proj",
572
+ "model.layers.61.mlp.experts.72.down_proj",
573
+ "model.layers.61.mlp.experts.73.gate_proj",
574
+ "model.layers.61.mlp.experts.73.up_proj",
575
+ "model.layers.61.mlp.experts.73.down_proj",
576
+ "model.layers.61.mlp.experts.74.gate_proj",
577
+ "model.layers.61.mlp.experts.74.up_proj",
578
+ "model.layers.61.mlp.experts.74.down_proj",
579
+ "model.layers.61.mlp.experts.75.gate_proj",
580
+ "model.layers.61.mlp.experts.75.up_proj",
581
+ "model.layers.61.mlp.experts.75.down_proj",
582
+ "model.layers.61.mlp.experts.76.gate_proj",
583
+ "model.layers.61.mlp.experts.76.up_proj",
584
+ "model.layers.61.mlp.experts.76.down_proj",
585
+ "model.layers.61.mlp.experts.77.gate_proj",
586
+ "model.layers.61.mlp.experts.77.up_proj",
587
+ "model.layers.61.mlp.experts.77.down_proj",
588
+ "model.layers.61.mlp.experts.78.gate_proj",
589
+ "model.layers.61.mlp.experts.78.up_proj",
590
+ "model.layers.61.mlp.experts.78.down_proj",
591
+ "model.layers.61.mlp.experts.79.gate_proj",
592
+ "model.layers.61.mlp.experts.79.up_proj",
593
+ "model.layers.61.mlp.experts.79.down_proj",
594
+ "model.layers.61.mlp.experts.80.gate_proj",
595
+ "model.layers.61.mlp.experts.80.up_proj",
596
+ "model.layers.61.mlp.experts.80.down_proj",
597
+ "model.layers.61.mlp.experts.81.gate_proj",
598
+ "model.layers.61.mlp.experts.81.up_proj",
599
+ "model.layers.61.mlp.experts.81.down_proj",
600
+ "model.layers.61.mlp.experts.82.gate_proj",
601
+ "model.layers.61.mlp.experts.82.up_proj",
602
+ "model.layers.61.mlp.experts.82.down_proj",
603
+ "model.layers.61.mlp.experts.83.gate_proj",
604
+ "model.layers.61.mlp.experts.83.up_proj",
605
+ "model.layers.61.mlp.experts.83.down_proj",
606
+ "model.layers.61.mlp.experts.84.gate_proj",
607
+ "model.layers.61.mlp.experts.84.up_proj",
608
+ "model.layers.61.mlp.experts.84.down_proj",
609
+ "model.layers.61.mlp.experts.85.gate_proj",
610
+ "model.layers.61.mlp.experts.85.up_proj",
611
+ "model.layers.61.mlp.experts.85.down_proj",
612
+ "model.layers.61.mlp.experts.86.gate_proj",
613
+ "model.layers.61.mlp.experts.86.up_proj",
614
+ "model.layers.61.mlp.experts.86.down_proj",
615
+ "model.layers.61.mlp.experts.87.gate_proj",
616
+ "model.layers.61.mlp.experts.87.up_proj",
617
+ "model.layers.61.mlp.experts.87.down_proj",
618
+ "model.layers.61.mlp.experts.88.gate_proj",
619
+ "model.layers.61.mlp.experts.88.up_proj",
620
+ "model.layers.61.mlp.experts.88.down_proj",
621
+ "model.layers.61.mlp.experts.89.gate_proj",
622
+ "model.layers.61.mlp.experts.89.up_proj",
623
+ "model.layers.61.mlp.experts.89.down_proj",
624
+ "model.layers.61.mlp.experts.90.gate_proj",
625
+ "model.layers.61.mlp.experts.90.up_proj",
626
+ "model.layers.61.mlp.experts.90.down_proj",
627
+ "model.layers.61.mlp.experts.91.gate_proj",
628
+ "model.layers.61.mlp.experts.91.up_proj",
629
+ "model.layers.61.mlp.experts.91.down_proj",
630
+ "model.layers.61.mlp.experts.92.gate_proj",
631
+ "model.layers.61.mlp.experts.92.up_proj",
632
+ "model.layers.61.mlp.experts.92.down_proj",
633
+ "model.layers.61.mlp.experts.93.gate_proj",
634
+ "model.layers.61.mlp.experts.93.up_proj",
635
+ "model.layers.61.mlp.experts.93.down_proj",
636
+ "model.layers.61.mlp.experts.94.gate_proj",
637
+ "model.layers.61.mlp.experts.94.up_proj",
638
+ "model.layers.61.mlp.experts.94.down_proj",
639
+ "model.layers.61.mlp.experts.95.gate_proj",
640
+ "model.layers.61.mlp.experts.95.up_proj",
641
+ "model.layers.61.mlp.experts.95.down_proj",
642
+ "model.layers.61.mlp.experts.96.gate_proj",
643
+ "model.layers.61.mlp.experts.96.up_proj",
644
+ "model.layers.61.mlp.experts.96.down_proj",
645
+ "model.layers.61.mlp.experts.97.gate_proj",
646
+ "model.layers.61.mlp.experts.97.up_proj",
647
+ "model.layers.61.mlp.experts.97.down_proj",
648
+ "model.layers.61.mlp.experts.98.gate_proj",
649
+ "model.layers.61.mlp.experts.98.up_proj",
650
+ "model.layers.61.mlp.experts.98.down_proj",
651
+ "model.layers.61.mlp.experts.99.gate_proj",
652
+ "model.layers.61.mlp.experts.99.up_proj",
653
+ "model.layers.61.mlp.experts.99.down_proj",
654
+ "model.layers.61.mlp.experts.100.gate_proj",
655
+ "model.layers.61.mlp.experts.100.up_proj",
656
+ "model.layers.61.mlp.experts.100.down_proj",
657
+ "model.layers.61.mlp.experts.101.gate_proj",
658
+ "model.layers.61.mlp.experts.101.up_proj",
659
+ "model.layers.61.mlp.experts.101.down_proj",
660
+ "model.layers.61.mlp.experts.102.gate_proj",
661
+ "model.layers.61.mlp.experts.102.up_proj",
662
+ "model.layers.61.mlp.experts.102.down_proj",
663
+ "model.layers.61.mlp.experts.103.gate_proj",
664
+ "model.layers.61.mlp.experts.103.up_proj",
665
+ "model.layers.61.mlp.experts.103.down_proj",
666
+ "model.layers.61.mlp.experts.104.gate_proj",
667
+ "model.layers.61.mlp.experts.104.up_proj",
668
+ "model.layers.61.mlp.experts.104.down_proj",
669
+ "model.layers.61.mlp.experts.105.gate_proj",
670
+ "model.layers.61.mlp.experts.105.up_proj",
671
+ "model.layers.61.mlp.experts.105.down_proj",
672
+ "model.layers.61.mlp.experts.106.gate_proj",
673
+ "model.layers.61.mlp.experts.106.up_proj",
674
+ "model.layers.61.mlp.experts.106.down_proj",
675
+ "model.layers.61.mlp.experts.107.gate_proj",
676
+ "model.layers.61.mlp.experts.107.up_proj",
677
+ "model.layers.61.mlp.experts.107.down_proj",
678
+ "model.layers.61.mlp.experts.108.gate_proj",
679
+ "model.layers.61.mlp.experts.108.up_proj",
680
+ "model.layers.61.mlp.experts.108.down_proj",
681
+ "model.layers.61.mlp.experts.109.gate_proj",
682
+ "model.layers.61.mlp.experts.109.up_proj",
683
+ "model.layers.61.mlp.experts.109.down_proj",
684
+ "model.layers.61.mlp.experts.110.gate_proj",
685
+ "model.layers.61.mlp.experts.110.up_proj",
686
+ "model.layers.61.mlp.experts.110.down_proj",
687
+ "model.layers.61.mlp.experts.111.gate_proj",
688
+ "model.layers.61.mlp.experts.111.up_proj",
689
+ "model.layers.61.mlp.experts.111.down_proj",
690
+ "model.layers.61.mlp.experts.112.gate_proj",
691
+ "model.layers.61.mlp.experts.112.up_proj",
692
+ "model.layers.61.mlp.experts.112.down_proj",
693
+ "model.layers.61.mlp.experts.113.gate_proj",
694
+ "model.layers.61.mlp.experts.113.up_proj",
695
+ "model.layers.61.mlp.experts.113.down_proj",
696
+ "model.layers.61.mlp.experts.114.gate_proj",
697
+ "model.layers.61.mlp.experts.114.up_proj",
698
+ "model.layers.61.mlp.experts.114.down_proj",
699
+ "model.layers.61.mlp.experts.115.gate_proj",
700
+ "model.layers.61.mlp.experts.115.up_proj",
701
+ "model.layers.61.mlp.experts.115.down_proj",
702
+ "model.layers.61.mlp.experts.116.gate_proj",
703
+ "model.layers.61.mlp.experts.116.up_proj",
704
+ "model.layers.61.mlp.experts.116.down_proj",
705
+ "model.layers.61.mlp.experts.117.gate_proj",
706
+ "model.layers.61.mlp.experts.117.up_proj",
707
+ "model.layers.61.mlp.experts.117.down_proj",
708
+ "model.layers.61.mlp.experts.118.gate_proj",
709
+ "model.layers.61.mlp.experts.118.up_proj",
710
+ "model.layers.61.mlp.experts.118.down_proj",
711
+ "model.layers.61.mlp.experts.119.gate_proj",
712
+ "model.layers.61.mlp.experts.119.up_proj",
713
+ "model.layers.61.mlp.experts.119.down_proj",
714
+ "model.layers.61.mlp.experts.120.gate_proj",
715
+ "model.layers.61.mlp.experts.120.up_proj",
716
+ "model.layers.61.mlp.experts.120.down_proj",
717
+ "model.layers.61.mlp.experts.121.gate_proj",
718
+ "model.layers.61.mlp.experts.121.up_proj",
719
+ "model.layers.61.mlp.experts.121.down_proj",
720
+ "model.layers.61.mlp.experts.122.gate_proj",
721
+ "model.layers.61.mlp.experts.122.up_proj",
722
+ "model.layers.61.mlp.experts.122.down_proj",
723
+ "model.layers.61.mlp.experts.123.gate_proj",
724
+ "model.layers.61.mlp.experts.123.up_proj",
725
+ "model.layers.61.mlp.experts.123.down_proj",
726
+ "model.layers.61.mlp.experts.124.gate_proj",
727
+ "model.layers.61.mlp.experts.124.up_proj",
728
+ "model.layers.61.mlp.experts.124.down_proj",
729
+ "model.layers.61.mlp.experts.125.gate_proj",
730
+ "model.layers.61.mlp.experts.125.up_proj",
731
+ "model.layers.61.mlp.experts.125.down_proj",
732
+ "model.layers.61.mlp.experts.126.gate_proj",
733
+ "model.layers.61.mlp.experts.126.up_proj",
734
+ "model.layers.61.mlp.experts.126.down_proj",
735
+ "model.layers.61.mlp.experts.127.gate_proj",
736
+ "model.layers.61.mlp.experts.127.up_proj",
737
+ "model.layers.61.mlp.experts.127.down_proj",
738
+ "model.layers.61.mlp.experts.128.gate_proj",
739
+ "model.layers.61.mlp.experts.128.up_proj",
740
+ "model.layers.61.mlp.experts.128.down_proj",
741
+ "model.layers.61.mlp.experts.129.gate_proj",
742
+ "model.layers.61.mlp.experts.129.up_proj",
743
+ "model.layers.61.mlp.experts.129.down_proj",
744
+ "model.layers.61.mlp.experts.130.gate_proj",
745
+ "model.layers.61.mlp.experts.130.up_proj",
746
+ "model.layers.61.mlp.experts.130.down_proj",
747
+ "model.layers.61.mlp.experts.131.gate_proj",
748
+ "model.layers.61.mlp.experts.131.up_proj",
749
+ "model.layers.61.mlp.experts.131.down_proj",
750
+ "model.layers.61.mlp.experts.132.gate_proj",
751
+ "model.layers.61.mlp.experts.132.up_proj",
752
+ "model.layers.61.mlp.experts.132.down_proj",
753
+ "model.layers.61.mlp.experts.133.gate_proj",
754
+ "model.layers.61.mlp.experts.133.up_proj",
755
+ "model.layers.61.mlp.experts.133.down_proj",
756
+ "model.layers.61.mlp.experts.134.gate_proj",
757
+ "model.layers.61.mlp.experts.134.up_proj",
758
+ "model.layers.61.mlp.experts.134.down_proj",
759
+ "model.layers.61.mlp.experts.135.gate_proj",
760
+ "model.layers.61.mlp.experts.135.up_proj",
761
+ "model.layers.61.mlp.experts.135.down_proj",
762
+ "model.layers.61.mlp.experts.136.gate_proj",
763
+ "model.layers.61.mlp.experts.136.up_proj",
764
+ "model.layers.61.mlp.experts.136.down_proj",
765
+ "model.layers.61.mlp.experts.137.gate_proj",
766
+ "model.layers.61.mlp.experts.137.up_proj",
767
+ "model.layers.61.mlp.experts.137.down_proj",
768
+ "model.layers.61.mlp.experts.138.gate_proj",
769
+ "model.layers.61.mlp.experts.138.up_proj",
770
+ "model.layers.61.mlp.experts.138.down_proj",
771
+ "model.layers.61.mlp.experts.139.gate_proj",
772
+ "model.layers.61.mlp.experts.139.up_proj",
773
+ "model.layers.61.mlp.experts.139.down_proj",
774
+ "model.layers.61.mlp.experts.140.gate_proj",
775
+ "model.layers.61.mlp.experts.140.up_proj",
776
+ "model.layers.61.mlp.experts.140.down_proj",
777
+ "model.layers.61.mlp.experts.141.gate_proj",
778
+ "model.layers.61.mlp.experts.141.up_proj",
779
+ "model.layers.61.mlp.experts.141.down_proj",
780
+ "model.layers.61.mlp.experts.142.gate_proj",
781
+ "model.layers.61.mlp.experts.142.up_proj",
782
+ "model.layers.61.mlp.experts.142.down_proj",
783
+ "model.layers.61.mlp.experts.143.gate_proj",
784
+ "model.layers.61.mlp.experts.143.up_proj",
785
+ "model.layers.61.mlp.experts.143.down_proj",
786
+ "model.layers.61.mlp.experts.144.gate_proj",
787
+ "model.layers.61.mlp.experts.144.up_proj",
788
+ "model.layers.61.mlp.experts.144.down_proj",
789
+ "model.layers.61.mlp.experts.145.gate_proj",
790
+ "model.layers.61.mlp.experts.145.up_proj",
791
+ "model.layers.61.mlp.experts.145.down_proj",
792
+ "model.layers.61.mlp.experts.146.gate_proj",
793
+ "model.layers.61.mlp.experts.146.up_proj",
794
+ "model.layers.61.mlp.experts.146.down_proj",
795
+ "model.layers.61.mlp.experts.147.gate_proj",
796
+ "model.layers.61.mlp.experts.147.up_proj",
797
+ "model.layers.61.mlp.experts.147.down_proj",
798
+ "model.layers.61.mlp.experts.148.gate_proj",
799
+ "model.layers.61.mlp.experts.148.up_proj",
800
+ "model.layers.61.mlp.experts.148.down_proj",
801
+ "model.layers.61.mlp.experts.149.gate_proj",
802
+ "model.layers.61.mlp.experts.149.up_proj",
803
+ "model.layers.61.mlp.experts.149.down_proj",
804
+ "model.layers.61.mlp.experts.150.gate_proj",
805
+ "model.layers.61.mlp.experts.150.up_proj",
806
+ "model.layers.61.mlp.experts.150.down_proj",
807
+ "model.layers.61.mlp.experts.151.gate_proj",
808
+ "model.layers.61.mlp.experts.151.up_proj",
809
+ "model.layers.61.mlp.experts.151.down_proj",
810
+ "model.layers.61.mlp.experts.152.gate_proj",
811
+ "model.layers.61.mlp.experts.152.up_proj",
812
+ "model.layers.61.mlp.experts.152.down_proj",
813
+ "model.layers.61.mlp.experts.153.gate_proj",
814
+ "model.layers.61.mlp.experts.153.up_proj",
815
+ "model.layers.61.mlp.experts.153.down_proj",
816
+ "model.layers.61.mlp.experts.154.gate_proj",
817
+ "model.layers.61.mlp.experts.154.up_proj",
818
+ "model.layers.61.mlp.experts.154.down_proj",
819
+ "model.layers.61.mlp.experts.155.gate_proj",
820
+ "model.layers.61.mlp.experts.155.up_proj",
821
+ "model.layers.61.mlp.experts.155.down_proj",
822
+ "model.layers.61.mlp.experts.156.gate_proj",
823
+ "model.layers.61.mlp.experts.156.up_proj",
824
+ "model.layers.61.mlp.experts.156.down_proj",
825
+ "model.layers.61.mlp.experts.157.gate_proj",
826
+ "model.layers.61.mlp.experts.157.up_proj",
827
+ "model.layers.61.mlp.experts.157.down_proj",
828
+ "model.layers.61.mlp.experts.158.gate_proj",
829
+ "model.layers.61.mlp.experts.158.up_proj",
830
+ "model.layers.61.mlp.experts.158.down_proj",
831
+ "model.layers.61.mlp.experts.159.gate_proj",
832
+ "model.layers.61.mlp.experts.159.up_proj",
833
+ "model.layers.61.mlp.experts.159.down_proj",
834
+ "model.layers.61.mlp.experts.160.gate_proj",
835
+ "model.layers.61.mlp.experts.160.up_proj",
836
+ "model.layers.61.mlp.experts.160.down_proj",
837
+ "model.layers.61.mlp.experts.161.gate_proj",
838
+ "model.layers.61.mlp.experts.161.up_proj",
839
+ "model.layers.61.mlp.experts.161.down_proj",
840
+ "model.layers.61.mlp.experts.162.gate_proj",
841
+ "model.layers.61.mlp.experts.162.up_proj",
842
+ "model.layers.61.mlp.experts.162.down_proj",
843
+ "model.layers.61.mlp.experts.163.gate_proj",
844
+ "model.layers.61.mlp.experts.163.up_proj",
845
+ "model.layers.61.mlp.experts.163.down_proj",
846
+ "model.layers.61.mlp.experts.164.gate_proj",
847
+ "model.layers.61.mlp.experts.164.up_proj",
848
+ "model.layers.61.mlp.experts.164.down_proj",
849
+ "model.layers.61.mlp.experts.165.gate_proj",
850
+ "model.layers.61.mlp.experts.165.up_proj",
851
+ "model.layers.61.mlp.experts.165.down_proj",
852
+ "model.layers.61.mlp.experts.166.gate_proj",
853
+ "model.layers.61.mlp.experts.166.up_proj",
854
+ "model.layers.61.mlp.experts.166.down_proj",
855
+ "model.layers.61.mlp.experts.167.gate_proj",
856
+ "model.layers.61.mlp.experts.167.up_proj",
857
+ "model.layers.61.mlp.experts.167.down_proj",
858
+ "model.layers.61.mlp.experts.168.gate_proj",
859
+ "model.layers.61.mlp.experts.168.up_proj",
860
+ "model.layers.61.mlp.experts.168.down_proj",
861
+ "model.layers.61.mlp.experts.169.gate_proj",
862
+ "model.layers.61.mlp.experts.169.up_proj",
863
+ "model.layers.61.mlp.experts.169.down_proj",
864
+ "model.layers.61.mlp.experts.170.gate_proj",
865
+ "model.layers.61.mlp.experts.170.up_proj",
866
+ "model.layers.61.mlp.experts.170.down_proj",
867
+ "model.layers.61.mlp.experts.171.gate_proj",
868
+ "model.layers.61.mlp.experts.171.up_proj",
869
+ "model.layers.61.mlp.experts.171.down_proj",
870
+ "model.layers.61.mlp.experts.172.gate_proj",
871
+ "model.layers.61.mlp.experts.172.up_proj",
872
+ "model.layers.61.mlp.experts.172.down_proj",
873
+ "model.layers.61.mlp.experts.173.gate_proj",
874
+ "model.layers.61.mlp.experts.173.up_proj",
875
+ "model.layers.61.mlp.experts.173.down_proj",
876
+ "model.layers.61.mlp.experts.174.gate_proj",
877
+ "model.layers.61.mlp.experts.174.up_proj",
878
+ "model.layers.61.mlp.experts.174.down_proj",
879
+ "model.layers.61.mlp.experts.175.gate_proj",
880
+ "model.layers.61.mlp.experts.175.up_proj",
881
+ "model.layers.61.mlp.experts.175.down_proj",
882
+ "model.layers.61.mlp.experts.176.gate_proj",
883
+ "model.layers.61.mlp.experts.176.up_proj",
884
+ "model.layers.61.mlp.experts.176.down_proj",
885
+ "model.layers.61.mlp.experts.177.gate_proj",
886
+ "model.layers.61.mlp.experts.177.up_proj",
887
+ "model.layers.61.mlp.experts.177.down_proj",
888
+ "model.layers.61.mlp.experts.178.gate_proj",
889
+ "model.layers.61.mlp.experts.178.up_proj",
890
+ "model.layers.61.mlp.experts.178.down_proj",
891
+ "model.layers.61.mlp.experts.179.gate_proj",
892
+ "model.layers.61.mlp.experts.179.up_proj",
893
+ "model.layers.61.mlp.experts.179.down_proj",
894
+ "model.layers.61.mlp.experts.180.gate_proj",
895
+ "model.layers.61.mlp.experts.180.up_proj",
896
+ "model.layers.61.mlp.experts.180.down_proj",
897
+ "model.layers.61.mlp.experts.181.gate_proj",
898
+ "model.layers.61.mlp.experts.181.up_proj",
899
+ "model.layers.61.mlp.experts.181.down_proj",
900
+ "model.layers.61.mlp.experts.182.gate_proj",
901
+ "model.layers.61.mlp.experts.182.up_proj",
902
+ "model.layers.61.mlp.experts.182.down_proj",
903
+ "model.layers.61.mlp.experts.183.gate_proj",
904
+ "model.layers.61.mlp.experts.183.up_proj",
905
+ "model.layers.61.mlp.experts.183.down_proj",
906
+ "model.layers.61.mlp.experts.184.gate_proj",
907
+ "model.layers.61.mlp.experts.184.up_proj",
908
+ "model.layers.61.mlp.experts.184.down_proj",
909
+ "model.layers.61.mlp.experts.185.gate_proj",
910
+ "model.layers.61.mlp.experts.185.up_proj",
911
+ "model.layers.61.mlp.experts.185.down_proj",
912
+ "model.layers.61.mlp.experts.186.gate_proj",
913
+ "model.layers.61.mlp.experts.186.up_proj",
914
+ "model.layers.61.mlp.experts.186.down_proj",
915
+ "model.layers.61.mlp.experts.187.gate_proj",
916
+ "model.layers.61.mlp.experts.187.up_proj",
917
+ "model.layers.61.mlp.experts.187.down_proj",
918
+ "model.layers.61.mlp.experts.188.gate_proj",
919
+ "model.layers.61.mlp.experts.188.up_proj",
920
+ "model.layers.61.mlp.experts.188.down_proj",
921
+ "model.layers.61.mlp.experts.189.gate_proj",
922
+ "model.layers.61.mlp.experts.189.up_proj",
923
+ "model.layers.61.mlp.experts.189.down_proj",
924
+ "model.layers.61.mlp.experts.190.gate_proj",
925
+ "model.layers.61.mlp.experts.190.up_proj",
926
+ "model.layers.61.mlp.experts.190.down_proj",
927
+ "model.layers.61.mlp.experts.191.gate_proj",
928
+ "model.layers.61.mlp.experts.191.up_proj",
929
+ "model.layers.61.mlp.experts.191.down_proj",
930
+ "model.layers.61.mlp.experts.192.gate_proj",
931
+ "model.layers.61.mlp.experts.192.up_proj",
932
+ "model.layers.61.mlp.experts.192.down_proj",
933
+ "model.layers.61.mlp.experts.193.gate_proj",
934
+ "model.layers.61.mlp.experts.193.up_proj",
935
+ "model.layers.61.mlp.experts.193.down_proj",
936
+ "model.layers.61.mlp.experts.194.gate_proj",
937
+ "model.layers.61.mlp.experts.194.up_proj",
938
+ "model.layers.61.mlp.experts.194.down_proj",
939
+ "model.layers.61.mlp.experts.195.gate_proj",
940
+ "model.layers.61.mlp.experts.195.up_proj",
941
+ "model.layers.61.mlp.experts.195.down_proj",
942
+ "model.layers.61.mlp.experts.196.gate_proj",
943
+ "model.layers.61.mlp.experts.196.up_proj",
944
+ "model.layers.61.mlp.experts.196.down_proj",
945
+ "model.layers.61.mlp.experts.197.gate_proj",
946
+ "model.layers.61.mlp.experts.197.up_proj",
947
+ "model.layers.61.mlp.experts.197.down_proj",
948
+ "model.layers.61.mlp.experts.198.gate_proj",
949
+ "model.layers.61.mlp.experts.198.up_proj",
950
+ "model.layers.61.mlp.experts.198.down_proj",
951
+ "model.layers.61.mlp.experts.199.gate_proj",
952
+ "model.layers.61.mlp.experts.199.up_proj",
953
+ "model.layers.61.mlp.experts.199.down_proj",
954
+ "model.layers.61.mlp.experts.200.gate_proj",
955
+ "model.layers.61.mlp.experts.200.up_proj",
956
+ "model.layers.61.mlp.experts.200.down_proj",
957
+ "model.layers.61.mlp.experts.201.gate_proj",
958
+ "model.layers.61.mlp.experts.201.up_proj",
959
+ "model.layers.61.mlp.experts.201.down_proj",
960
+ "model.layers.61.mlp.experts.202.gate_proj",
961
+ "model.layers.61.mlp.experts.202.up_proj",
962
+ "model.layers.61.mlp.experts.202.down_proj",
963
+ "model.layers.61.mlp.experts.203.gate_proj",
964
+ "model.layers.61.mlp.experts.203.up_proj",
965
+ "model.layers.61.mlp.experts.203.down_proj",
966
+ "model.layers.61.mlp.experts.204.gate_proj",
967
+ "model.layers.61.mlp.experts.204.up_proj",
968
+ "model.layers.61.mlp.experts.204.down_proj",
969
+ "model.layers.61.mlp.experts.205.gate_proj",
970
+ "model.layers.61.mlp.experts.205.up_proj",
971
+ "model.layers.61.mlp.experts.205.down_proj",
972
+ "model.layers.61.mlp.experts.206.gate_proj",
973
+ "model.layers.61.mlp.experts.206.up_proj",
974
+ "model.layers.61.mlp.experts.206.down_proj",
975
+ "model.layers.61.mlp.experts.207.gate_proj",
976
+ "model.layers.61.mlp.experts.207.up_proj",
977
+ "model.layers.61.mlp.experts.207.down_proj",
978
+ "model.layers.61.mlp.experts.208.gate_proj",
979
+ "model.layers.61.mlp.experts.208.up_proj",
980
+ "model.layers.61.mlp.experts.208.down_proj",
981
+ "model.layers.61.mlp.experts.209.gate_proj",
982
+ "model.layers.61.mlp.experts.209.up_proj",
983
+ "model.layers.61.mlp.experts.209.down_proj",
984
+ "model.layers.61.mlp.experts.210.gate_proj",
985
+ "model.layers.61.mlp.experts.210.up_proj",
986
+ "model.layers.61.mlp.experts.210.down_proj",
987
+ "model.layers.61.mlp.experts.211.gate_proj",
988
+ "model.layers.61.mlp.experts.211.up_proj",
989
+ "model.layers.61.mlp.experts.211.down_proj",
990
+ "model.layers.61.mlp.experts.212.gate_proj",
991
+ "model.layers.61.mlp.experts.212.up_proj",
992
+ "model.layers.61.mlp.experts.212.down_proj",
993
+ "model.layers.61.mlp.experts.213.gate_proj",
994
+ "model.layers.61.mlp.experts.213.up_proj",
995
+ "model.layers.61.mlp.experts.213.down_proj",
996
+ "model.layers.61.mlp.experts.214.gate_proj",
997
+ "model.layers.61.mlp.experts.214.up_proj",
998
+ "model.layers.61.mlp.experts.214.down_proj",
999
+ "model.layers.61.mlp.experts.215.gate_proj",
1000
+ "model.layers.61.mlp.experts.215.up_proj",
1001
+ "model.layers.61.mlp.experts.215.down_proj",
1002
+ "model.layers.61.mlp.experts.216.gate_proj",
1003
+ "model.layers.61.mlp.experts.216.up_proj",
1004
+ "model.layers.61.mlp.experts.216.down_proj",
1005
+ "model.layers.61.mlp.experts.217.gate_proj",
1006
+ "model.layers.61.mlp.experts.217.up_proj",
1007
+ "model.layers.61.mlp.experts.217.down_proj",
1008
+ "model.layers.61.mlp.experts.218.gate_proj",
1009
+ "model.layers.61.mlp.experts.218.up_proj",
1010
+ "model.layers.61.mlp.experts.218.down_proj",
1011
+ "model.layers.61.mlp.experts.219.gate_proj",
1012
+ "model.layers.61.mlp.experts.219.up_proj",
1013
+ "model.layers.61.mlp.experts.219.down_proj",
1014
+ "model.layers.61.mlp.experts.220.gate_proj",
1015
+ "model.layers.61.mlp.experts.220.up_proj",
1016
+ "model.layers.61.mlp.experts.220.down_proj",
1017
+ "model.layers.61.mlp.experts.221.gate_proj",
1018
+ "model.layers.61.mlp.experts.221.up_proj",
1019
+ "model.layers.61.mlp.experts.221.down_proj",
1020
+ "model.layers.61.mlp.experts.222.gate_proj",
1021
+ "model.layers.61.mlp.experts.222.up_proj",
1022
+ "model.layers.61.mlp.experts.222.down_proj",
1023
+ "model.layers.61.mlp.experts.223.gate_proj",
1024
+ "model.layers.61.mlp.experts.223.up_proj",
1025
+ "model.layers.61.mlp.experts.223.down_proj",
1026
+ "model.layers.61.mlp.experts.224.gate_proj",
1027
+ "model.layers.61.mlp.experts.224.up_proj",
1028
+ "model.layers.61.mlp.experts.224.down_proj",
1029
+ "model.layers.61.mlp.experts.225.gate_proj",
1030
+ "model.layers.61.mlp.experts.225.up_proj",
1031
+ "model.layers.61.mlp.experts.225.down_proj",
1032
+ "model.layers.61.mlp.experts.226.gate_proj",
1033
+ "model.layers.61.mlp.experts.226.up_proj",
1034
+ "model.layers.61.mlp.experts.226.down_proj",
1035
+ "model.layers.61.mlp.experts.227.gate_proj",
1036
+ "model.layers.61.mlp.experts.227.up_proj",
1037
+ "model.layers.61.mlp.experts.227.down_proj",
1038
+ "model.layers.61.mlp.experts.228.gate_proj",
1039
+ "model.layers.61.mlp.experts.228.up_proj",
1040
+ "model.layers.61.mlp.experts.228.down_proj",
1041
+ "model.layers.61.mlp.experts.229.gate_proj",
1042
+ "model.layers.61.mlp.experts.229.up_proj",
1043
+ "model.layers.61.mlp.experts.229.down_proj",
1044
+ "model.layers.61.mlp.experts.230.gate_proj",
1045
+ "model.layers.61.mlp.experts.230.up_proj",
1046
+ "model.layers.61.mlp.experts.230.down_proj",
1047
+ "model.layers.61.mlp.experts.231.gate_proj",
1048
+ "model.layers.61.mlp.experts.231.up_proj",
1049
+ "model.layers.61.mlp.experts.231.down_proj",
1050
+ "model.layers.61.mlp.experts.232.gate_proj",
1051
+ "model.layers.61.mlp.experts.232.up_proj",
1052
+ "model.layers.61.mlp.experts.232.down_proj",
1053
+ "model.layers.61.mlp.experts.233.gate_proj",
1054
+ "model.layers.61.mlp.experts.233.up_proj",
1055
+ "model.layers.61.mlp.experts.233.down_proj",
1056
+ "model.layers.61.mlp.experts.234.gate_proj",
1057
+ "model.layers.61.mlp.experts.234.up_proj",
1058
+ "model.layers.61.mlp.experts.234.down_proj",
1059
+ "model.layers.61.mlp.experts.235.gate_proj",
1060
+ "model.layers.61.mlp.experts.235.up_proj",
1061
+ "model.layers.61.mlp.experts.235.down_proj",
1062
+ "model.layers.61.mlp.experts.236.gate_proj",
1063
+ "model.layers.61.mlp.experts.236.up_proj",
1064
+ "model.layers.61.mlp.experts.236.down_proj",
1065
+ "model.layers.61.mlp.experts.237.gate_proj",
1066
+ "model.layers.61.mlp.experts.237.up_proj",
1067
+ "model.layers.61.mlp.experts.237.down_proj",
1068
+ "model.layers.61.mlp.experts.238.gate_proj",
1069
+ "model.layers.61.mlp.experts.238.up_proj",
1070
+ "model.layers.61.mlp.experts.238.down_proj",
1071
+ "model.layers.61.mlp.experts.239.gate_proj",
1072
+ "model.layers.61.mlp.experts.239.up_proj",
1073
+ "model.layers.61.mlp.experts.239.down_proj",
1074
+ "model.layers.61.mlp.experts.240.gate_proj",
1075
+ "model.layers.61.mlp.experts.240.up_proj",
1076
+ "model.layers.61.mlp.experts.240.down_proj",
1077
+ "model.layers.61.mlp.experts.241.gate_proj",
1078
+ "model.layers.61.mlp.experts.241.up_proj",
1079
+ "model.layers.61.mlp.experts.241.down_proj",
1080
+ "model.layers.61.mlp.experts.242.gate_proj",
1081
+ "model.layers.61.mlp.experts.242.up_proj",
1082
+ "model.layers.61.mlp.experts.242.down_proj",
1083
+ "model.layers.61.mlp.experts.243.gate_proj",
1084
+ "model.layers.61.mlp.experts.243.up_proj",
1085
+ "model.layers.61.mlp.experts.243.down_proj",
1086
+ "model.layers.61.mlp.experts.244.gate_proj",
1087
+ "model.layers.61.mlp.experts.244.up_proj",
1088
+ "model.layers.61.mlp.experts.244.down_proj",
1089
+ "model.layers.61.mlp.experts.245.gate_proj",
1090
+ "model.layers.61.mlp.experts.245.up_proj",
1091
+ "model.layers.61.mlp.experts.245.down_proj",
1092
+ "model.layers.61.mlp.experts.246.gate_proj",
1093
+ "model.layers.61.mlp.experts.246.up_proj",
1094
+ "model.layers.61.mlp.experts.246.down_proj",
1095
+ "model.layers.61.mlp.experts.247.gate_proj",
1096
+ "model.layers.61.mlp.experts.247.up_proj",
1097
+ "model.layers.61.mlp.experts.247.down_proj",
1098
+ "model.layers.61.mlp.experts.248.gate_proj",
1099
+ "model.layers.61.mlp.experts.248.up_proj",
1100
+ "model.layers.61.mlp.experts.248.down_proj",
1101
+ "model.layers.61.mlp.experts.249.gate_proj",
1102
+ "model.layers.61.mlp.experts.249.up_proj",
1103
+ "model.layers.61.mlp.experts.249.down_proj",
1104
+ "model.layers.61.mlp.experts.250.gate_proj",
1105
+ "model.layers.61.mlp.experts.250.up_proj",
1106
+ "model.layers.61.mlp.experts.250.down_proj",
1107
+ "model.layers.61.mlp.experts.251.gate_proj",
1108
+ "model.layers.61.mlp.experts.251.up_proj",
1109
+ "model.layers.61.mlp.experts.251.down_proj",
1110
+ "model.layers.61.mlp.experts.252.gate_proj",
1111
+ "model.layers.61.mlp.experts.252.up_proj",
1112
+ "model.layers.61.mlp.experts.252.down_proj",
1113
+ "model.layers.61.mlp.experts.253.gate_proj",
1114
+ "model.layers.61.mlp.experts.253.up_proj",
1115
+ "model.layers.61.mlp.experts.253.down_proj",
1116
+ "model.layers.61.mlp.experts.254.gate_proj",
1117
+ "model.layers.61.mlp.experts.254.up_proj",
1118
+ "model.layers.61.mlp.experts.254.down_proj",
1119
+ "model.layers.61.mlp.experts.255.gate_proj",
1120
+ "model.layers.61.mlp.experts.255.up_proj",
1121
+ "model.layers.61.mlp.experts.255.down_proj",
1122
+ "model.layers.61.mlp.shared_experts.gate_proj",
1123
+ "model.layers.61.mlp.shared_experts.up_proj",
1124
+ "model.layers.61.mlp.shared_experts.down_proj",
1125
+ "model.layers.61.shared_head.head",
1126
  "lm_head"
1127
  ],
1128
  "export": {
 
1174
  "quant_method": "quark",
1175
  "quant_mode": "eager_mode",
1176
  "softmax_quant_spec": null,
1177
+ "version": "0.12+03a839631e"
1178
  },
1179
  "rms_norm_eps": 1e-06,
1180
  "rope_scaling": {