prabhuat commited on
Commit
5da451d
·
verified ·
1 Parent(s): c2878bf

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -17,11 +17,11 @@
17
  "num_key_value_heads": 8,
18
  "pad_token_id": 151665,
19
  "quantization": {
20
- "group_size": 64,
21
  "bits": 4
22
  },
23
  "quantization_config": {
24
- "group_size": 64,
25
  "bits": 4
26
  },
27
  "rms_norm_eps": 1e-06,
 
17
  "num_key_value_heads": 8,
18
  "pad_token_id": 151665,
19
  "quantization": {
20
+ "group_size": 32,
21
  "bits": 4
22
  },
23
  "quantization_config": {
24
+ "group_size": 32,
25
  "bits": 4
26
  },
27
  "rms_norm_eps": 1e-06,
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cad5f81f8ea070c325d54df78975113c0fa8be6c80dd69092cbb89be20c3fc57
3
- size 5353840945
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed3e62338f559a062f7c75420601897743f192436c71ddab440254bed31cfaf8
3
+ size 5343896747
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e95817c4056d8be7bd5a92e93cce9daa3687900feaf74a716b08c954218100b
3
- size 2955654333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b28da2093899c954f6efa5375c6d157d11d3502830895a86224d8836fc7a90
3
+ size 3888673587
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 8309352448,
4
  "total_parameters": 14770033664
5
  },
6
  "weight_map": {
@@ -556,17 +556,17 @@
556
  "model.layers.27.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
557
  "model.layers.27.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
558
  "model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
559
- "model.layers.28.input_layernorm.weight": "model-00001-of-00002.safetensors",
560
- "model.layers.28.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
561
- "model.layers.28.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
562
- "model.layers.28.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
563
- "model.layers.28.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
564
- "model.layers.28.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
565
- "model.layers.28.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
566
- "model.layers.28.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
567
- "model.layers.28.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
568
- "model.layers.28.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
569
- "model.layers.28.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
570
  "model.layers.28.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
571
  "model.layers.28.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
572
  "model.layers.28.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
@@ -582,32 +582,32 @@
582
  "model.layers.28.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
583
  "model.layers.28.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
584
  "model.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
585
- "model.layers.29.input_layernorm.weight": "model-00001-of-00002.safetensors",
586
- "model.layers.29.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
587
- "model.layers.29.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
588
- "model.layers.29.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
589
- "model.layers.29.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
590
- "model.layers.29.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
591
- "model.layers.29.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
592
- "model.layers.29.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
593
- "model.layers.29.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
594
- "model.layers.29.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
595
- "model.layers.29.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
596
- "model.layers.29.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
597
- "model.layers.29.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
598
- "model.layers.29.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
599
- "model.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
600
- "model.layers.29.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
601
- "model.layers.29.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
602
- "model.layers.29.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
603
- "model.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
604
- "model.layers.29.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
605
- "model.layers.29.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
606
- "model.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
607
- "model.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
608
- "model.layers.29.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
609
- "model.layers.29.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
610
- "model.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
611
  "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
612
  "model.layers.3.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
613
  "model.layers.3.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
@@ -634,58 +634,58 @@
634
  "model.layers.3.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
635
  "model.layers.3.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
636
  "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
637
- "model.layers.30.input_layernorm.weight": "model-00001-of-00002.safetensors",
638
- "model.layers.30.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
639
- "model.layers.30.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
640
- "model.layers.30.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
641
- "model.layers.30.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
642
- "model.layers.30.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
643
- "model.layers.30.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
644
- "model.layers.30.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
645
- "model.layers.30.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
646
- "model.layers.30.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
647
- "model.layers.30.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
648
- "model.layers.30.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
649
- "model.layers.30.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
650
- "model.layers.30.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
651
- "model.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
652
- "model.layers.30.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
653
- "model.layers.30.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
654
- "model.layers.30.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
655
- "model.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
656
- "model.layers.30.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
657
- "model.layers.30.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
658
- "model.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
659
- "model.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
660
- "model.layers.30.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
661
- "model.layers.30.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
662
- "model.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
663
  "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
664
- "model.layers.31.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
665
- "model.layers.31.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
666
- "model.layers.31.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
667
- "model.layers.31.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
668
- "model.layers.31.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
669
- "model.layers.31.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
670
  "model.layers.31.mlp.up_proj.biases": "model-00002-of-00002.safetensors",
671
  "model.layers.31.mlp.up_proj.scales": "model-00002-of-00002.safetensors",
672
  "model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
673
  "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
674
- "model.layers.31.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
675
- "model.layers.31.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
676
- "model.layers.31.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
677
- "model.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
678
- "model.layers.31.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
679
- "model.layers.31.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
680
- "model.layers.31.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
681
- "model.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
682
- "model.layers.31.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
683
- "model.layers.31.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
684
- "model.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
685
- "model.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
686
- "model.layers.31.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
687
- "model.layers.31.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
688
- "model.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
689
  "model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
690
  "model.layers.32.mlp.down_proj.biases": "model-00002-of-00002.safetensors",
691
  "model.layers.32.mlp.down_proj.scales": "model-00002-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 9232427008,
4
  "total_parameters": 14770033664
5
  },
6
  "weight_map": {
 
556
  "model.layers.27.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
557
  "model.layers.27.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
558
  "model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
559
+ "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
560
+ "model.layers.28.mlp.down_proj.biases": "model-00002-of-00002.safetensors",
561
+ "model.layers.28.mlp.down_proj.scales": "model-00002-of-00002.safetensors",
562
+ "model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
563
+ "model.layers.28.mlp.gate_proj.biases": "model-00002-of-00002.safetensors",
564
+ "model.layers.28.mlp.gate_proj.scales": "model-00002-of-00002.safetensors",
565
+ "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
566
+ "model.layers.28.mlp.up_proj.biases": "model-00002-of-00002.safetensors",
567
+ "model.layers.28.mlp.up_proj.scales": "model-00002-of-00002.safetensors",
568
+ "model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
569
+ "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
570
  "model.layers.28.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
571
  "model.layers.28.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
572
  "model.layers.28.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
 
582
  "model.layers.28.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
583
  "model.layers.28.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
584
  "model.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
585
+ "model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
586
+ "model.layers.29.mlp.down_proj.biases": "model-00002-of-00002.safetensors",
587
+ "model.layers.29.mlp.down_proj.scales": "model-00002-of-00002.safetensors",
588
+ "model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
589
+ "model.layers.29.mlp.gate_proj.biases": "model-00002-of-00002.safetensors",
590
+ "model.layers.29.mlp.gate_proj.scales": "model-00002-of-00002.safetensors",
591
+ "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
592
+ "model.layers.29.mlp.up_proj.biases": "model-00002-of-00002.safetensors",
593
+ "model.layers.29.mlp.up_proj.scales": "model-00002-of-00002.safetensors",
594
+ "model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
595
+ "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
596
+ "model.layers.29.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
597
+ "model.layers.29.self_attn.k_proj.biases": "model-00002-of-00002.safetensors",
598
+ "model.layers.29.self_attn.k_proj.scales": "model-00002-of-00002.safetensors",
599
+ "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
600
+ "model.layers.29.self_attn.o_proj.biases": "model-00002-of-00002.safetensors",
601
+ "model.layers.29.self_attn.o_proj.scales": "model-00002-of-00002.safetensors",
602
+ "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
603
+ "model.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
604
+ "model.layers.29.self_attn.q_proj.biases": "model-00002-of-00002.safetensors",
605
+ "model.layers.29.self_attn.q_proj.scales": "model-00002-of-00002.safetensors",
606
+ "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
607
+ "model.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
608
+ "model.layers.29.self_attn.v_proj.biases": "model-00002-of-00002.safetensors",
609
+ "model.layers.29.self_attn.v_proj.scales": "model-00002-of-00002.safetensors",
610
+ "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
611
  "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
612
  "model.layers.3.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
613
  "model.layers.3.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
 
634
  "model.layers.3.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
635
  "model.layers.3.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
636
  "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
637
+ "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
638
+ "model.layers.30.mlp.down_proj.biases": "model-00002-of-00002.safetensors",
639
+ "model.layers.30.mlp.down_proj.scales": "model-00002-of-00002.safetensors",
640
+ "model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
641
+ "model.layers.30.mlp.gate_proj.biases": "model-00002-of-00002.safetensors",
642
+ "model.layers.30.mlp.gate_proj.scales": "model-00002-of-00002.safetensors",
643
+ "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
644
+ "model.layers.30.mlp.up_proj.biases": "model-00002-of-00002.safetensors",
645
+ "model.layers.30.mlp.up_proj.scales": "model-00002-of-00002.safetensors",
646
+ "model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
647
+ "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
648
+ "model.layers.30.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
649
+ "model.layers.30.self_attn.k_proj.biases": "model-00002-of-00002.safetensors",
650
+ "model.layers.30.self_attn.k_proj.scales": "model-00002-of-00002.safetensors",
651
+ "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
652
+ "model.layers.30.self_attn.o_proj.biases": "model-00002-of-00002.safetensors",
653
+ "model.layers.30.self_attn.o_proj.scales": "model-00002-of-00002.safetensors",
654
+ "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
655
+ "model.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
656
+ "model.layers.30.self_attn.q_proj.biases": "model-00002-of-00002.safetensors",
657
+ "model.layers.30.self_attn.q_proj.scales": "model-00002-of-00002.safetensors",
658
+ "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
659
+ "model.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
660
+ "model.layers.30.self_attn.v_proj.biases": "model-00002-of-00002.safetensors",
661
+ "model.layers.30.self_attn.v_proj.scales": "model-00002-of-00002.safetensors",
662
+ "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
663
  "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
664
+ "model.layers.31.mlp.down_proj.biases": "model-00002-of-00002.safetensors",
665
+ "model.layers.31.mlp.down_proj.scales": "model-00002-of-00002.safetensors",
666
+ "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
667
+ "model.layers.31.mlp.gate_proj.biases": "model-00002-of-00002.safetensors",
668
+ "model.layers.31.mlp.gate_proj.scales": "model-00002-of-00002.safetensors",
669
+ "model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
670
  "model.layers.31.mlp.up_proj.biases": "model-00002-of-00002.safetensors",
671
  "model.layers.31.mlp.up_proj.scales": "model-00002-of-00002.safetensors",
672
  "model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
673
  "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
674
+ "model.layers.31.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
675
+ "model.layers.31.self_attn.k_proj.biases": "model-00002-of-00002.safetensors",
676
+ "model.layers.31.self_attn.k_proj.scales": "model-00002-of-00002.safetensors",
677
+ "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
678
+ "model.layers.31.self_attn.o_proj.biases": "model-00002-of-00002.safetensors",
679
+ "model.layers.31.self_attn.o_proj.scales": "model-00002-of-00002.safetensors",
680
+ "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
681
+ "model.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
682
+ "model.layers.31.self_attn.q_proj.biases": "model-00002-of-00002.safetensors",
683
+ "model.layers.31.self_attn.q_proj.scales": "model-00002-of-00002.safetensors",
684
+ "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
685
+ "model.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
686
+ "model.layers.31.self_attn.v_proj.biases": "model-00002-of-00002.safetensors",
687
+ "model.layers.31.self_attn.v_proj.scales": "model-00002-of-00002.safetensors",
688
+ "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
689
  "model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
690
  "model.layers.32.mlp.down_proj.biases": "model-00002-of-00002.safetensors",
691
  "model.layers.32.mlp.down_proj.scales": "model-00002-of-00002.safetensors",