rinarina0429 commited on
Commit
8d12173
·
verified ·
1 Parent(s): c9f99b7

Upload ServerlessLLM format model

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tensor.data_0 filter=lfs diff=lfs merge=lfs -text
37
+ tensor.data_1 filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": true,
3
+ "_name_or_path": "meta-llama/Llama-2-7b-hf",
4
+ "architectures": [
5
+ "LlamaForCausalLM"
6
+ ],
7
+ "attention_bias": false,
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 1,
10
+ "eos_token_id": 2,
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 4096,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 11008,
16
+ "max_position_embeddings": 4096,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 32,
20
+ "num_hidden_layers": 32,
21
+ "num_key_value_heads": 32,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-05,
24
+ "rope_scaling": null,
25
+ "rope_theta": 10000.0,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "float16",
28
+ "transformers_version": "4.47.1",
29
+ "use_cache": true,
30
+ "vocab_size": 32000
31
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 1,
3
+ "do_sample": true,
4
+ "eos_token_id": 2,
5
+ "max_length": 4096,
6
+ "pad_token_id": 0,
7
+ "temperature": 0.6,
8
+ "top_p": 0.9,
9
+ "transformers_version": "4.47.1"
10
+ }
no_split_modules.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model.embed_tokens": 262144000, "model.layers.0": 404766720, "model.layers.1": 404766720, "model.layers.2": 404766720, "model.layers.3": 404766720, "model.layers.4": 404766720, "model.layers.5": 404766720, "model.layers.6": 404766720, "model.layers.7": 404766720, "model.layers.8": 404766720, "model.layers.9": 404766720, "model.layers.10": 404766720, "model.layers.11": 404766720, "model.layers.12": 404766720, "model.layers.13": 404766720, "model.layers.14": 404766720, "model.layers.15": 404766720, "model.layers.16": 404766720, "model.layers.17": 404766720, "model.layers.18": 404766720, "model.layers.19": 404766720, "model.layers.20": 404766720, "model.layers.21": 404766720, "model.layers.22": 404766720, "model.layers.23": 404766720, "model.layers.24": 404766720, "model.layers.25": 404766720, "model.layers.26": 404766720, "model.layers.27": 404766720, "model.layers.28": 404766720, "model.layers.29": 404766720, "model.layers.30": 404766720, "model.layers.31": 404766720, "model.norm": 8192, "model.rotary_emb": 0, "lm_head": 262144000}
tensor.data_0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dbe1c7a84a0cf328be628d69db832e3ca25054d59c4c37ab8dc4af2b8748f81
3
+ size 10695884800
tensor.data_1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48d5c714ee0bd2c0e7d3e2c93d38f660c524643cdbd0e94584384484bfceaa4c
3
+ size 2780946432
tensor_index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model.embed_tokens.weight": [0, 262144000, [32000, 4096], [4096, 1], "torch.float16"], "model.layers.0.self_attn.q_proj.weight": [262144000, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.0.self_attn.k_proj.weight": [295698432, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.0.self_attn.v_proj.weight": [329252864, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.0.self_attn.o_proj.weight": [362807296, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.0.mlp.gate_proj.weight": [396361728, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.0.mlp.up_proj.weight": [486539264, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.0.mlp.down_proj.weight": [576716800, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.0.input_layernorm.weight": [666894336, 8192, [4096], [1], "torch.float16"], "model.layers.0.post_attention_layernorm.weight": [666902528, 8192, [4096], [1], "torch.float16"], "model.layers.1.self_attn.q_proj.weight": [666910720, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.1.self_attn.k_proj.weight": [700465152, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.1.self_attn.v_proj.weight": [734019584, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.1.self_attn.o_proj.weight": [767574016, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.1.mlp.gate_proj.weight": [801128448, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.1.mlp.up_proj.weight": [891305984, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.1.mlp.down_proj.weight": [981483520, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.1.input_layernorm.weight": [1071661056, 8192, [4096], [1], "torch.float16"], "model.layers.1.post_attention_layernorm.weight": [1071669248, 8192, [4096], [1], "torch.float16"], "model.layers.2.self_attn.q_proj.weight": [1071677440, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.2.self_attn.k_proj.weight": [1105231872, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.2.self_attn.v_proj.weight": [1138786304, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.2.self_attn.o_proj.weight": [1172340736, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.2.mlp.gate_proj.weight": [1205895168, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.2.mlp.up_proj.weight": [1296072704, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.2.mlp.down_proj.weight": [1386250240, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.2.input_layernorm.weight": [1476427776, 8192, [4096], [1], "torch.float16"], "model.layers.2.post_attention_layernorm.weight": [1476435968, 8192, [4096], [1], "torch.float16"], "model.layers.3.self_attn.q_proj.weight": [1476444160, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.3.self_attn.k_proj.weight": [1509998592, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.3.self_attn.v_proj.weight": [1543553024, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.3.self_attn.o_proj.weight": [1577107456, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.3.mlp.gate_proj.weight": [1610661888, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.3.mlp.up_proj.weight": [1700839424, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.3.mlp.down_proj.weight": [1791016960, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.3.input_layernorm.weight": [1881194496, 8192, [4096], [1], "torch.float16"], "model.layers.3.post_attention_layernorm.weight": [1881202688, 8192, [4096], [1], "torch.float16"], "model.layers.4.self_attn.q_proj.weight": [1881210880, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.4.self_attn.k_proj.weight": [1914765312, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.4.self_attn.v_proj.weight": [1948319744, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.4.self_attn.o_proj.weight": [1981874176, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.4.mlp.gate_proj.weight": [2015428608, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.4.mlp.up_proj.weight": [2105606144, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.4.mlp.down_proj.weight": [2195783680, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.4.input_layernorm.weight": [2285961216, 8192, [4096], [1], "torch.float16"], "model.layers.4.post_attention_layernorm.weight": [2285969408, 8192, [4096], [1], "torch.float16"], "model.layers.5.self_attn.q_proj.weight": [2285977600, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.5.self_attn.k_proj.weight": [2319532032, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.5.self_attn.v_proj.weight": [2353086464, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.5.self_attn.o_proj.weight": [2386640896, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.5.mlp.gate_proj.weight": [2420195328, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.5.mlp.up_proj.weight": [2510372864, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.5.mlp.down_proj.weight": [2600550400, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.5.input_layernorm.weight": [2690727936, 8192, [4096], [1], "torch.float16"], "model.layers.5.post_attention_layernorm.weight": [2690736128, 8192, [4096], [1], "torch.float16"], "model.layers.6.self_attn.q_proj.weight": [2690744320, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.6.self_attn.k_proj.weight": [2724298752, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.6.self_attn.v_proj.weight": [2757853184, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.6.self_attn.o_proj.weight": [2791407616, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.6.mlp.gate_proj.weight": [2824962048, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.6.mlp.up_proj.weight": [2915139584, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.6.mlp.down_proj.weight": [3005317120, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.6.input_layernorm.weight": [3095494656, 8192, [4096], [1], "torch.float16"], "model.layers.6.post_attention_layernorm.weight": [3095502848, 8192, [4096], [1], "torch.float16"], "model.layers.7.self_attn.q_proj.weight": [3095511040, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.7.self_attn.k_proj.weight": [3129065472, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.7.self_attn.v_proj.weight": [3162619904, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.7.self_attn.o_proj.weight": [3196174336, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.7.mlp.gate_proj.weight": [3229728768, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.7.mlp.up_proj.weight": [3319906304, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.7.mlp.down_proj.weight": [3410083840, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.7.input_layernorm.weight": [3500261376, 8192, [4096], [1], "torch.float16"], "model.layers.7.post_attention_layernorm.weight": [3500269568, 8192, [4096], [1], "torch.float16"], "model.layers.8.self_attn.q_proj.weight": [3500277760, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.8.self_attn.k_proj.weight": [3533832192, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.8.self_attn.v_proj.weight": [3567386624, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.8.self_attn.o_proj.weight": [3600941056, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.8.mlp.gate_proj.weight": [3634495488, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.8.mlp.up_proj.weight": [3724673024, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.8.mlp.down_proj.weight": [3814850560, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.8.input_layernorm.weight": [3905028096, 8192, [4096], [1], "torch.float16"], "model.layers.8.post_attention_layernorm.weight": [3905036288, 8192, [4096], [1], "torch.float16"], "model.layers.9.self_attn.q_proj.weight": [3905044480, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.9.self_attn.k_proj.weight": [3938598912, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.9.self_attn.v_proj.weight": [3972153344, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.9.self_attn.o_proj.weight": [4005707776, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.9.mlp.gate_proj.weight": [4039262208, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.9.mlp.up_proj.weight": [4129439744, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.9.mlp.down_proj.weight": [4219617280, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.9.input_layernorm.weight": [4309794816, 8192, [4096], [1], "torch.float16"], "model.layers.9.post_attention_layernorm.weight": [4309803008, 8192, [4096], [1], "torch.float16"], "model.layers.10.self_attn.q_proj.weight": [4309811200, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.10.self_attn.k_proj.weight": [4343365632, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.10.self_attn.v_proj.weight": [4376920064, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.10.self_attn.o_proj.weight": [4410474496, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.10.mlp.gate_proj.weight": [4444028928, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.10.mlp.up_proj.weight": [4534206464, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.10.mlp.down_proj.weight": [4624384000, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.10.input_layernorm.weight": [4714561536, 8192, [4096], [1], "torch.float16"], "model.layers.10.post_attention_layernorm.weight": [4714569728, 8192, [4096], [1], "torch.float16"], "model.layers.11.self_attn.q_proj.weight": [4714577920, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.11.self_attn.k_proj.weight": [4748132352, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.11.self_attn.v_proj.weight": [4781686784, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.11.self_attn.o_proj.weight": [4815241216, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.11.mlp.gate_proj.weight": [4848795648, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.11.mlp.up_proj.weight": [4938973184, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.11.mlp.down_proj.weight": [5029150720, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.11.input_layernorm.weight": [5119328256, 8192, [4096], [1], "torch.float16"], "model.layers.11.post_attention_layernorm.weight": [5119336448, 8192, [4096], [1], "torch.float16"], "model.layers.12.self_attn.q_proj.weight": [5119344640, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.12.self_attn.k_proj.weight": [5152899072, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.12.self_attn.v_proj.weight": [5186453504, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.12.self_attn.o_proj.weight": [5220007936, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.12.mlp.gate_proj.weight": [5253562368, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.12.mlp.up_proj.weight": [5343739904, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.12.mlp.down_proj.weight": [5433917440, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.12.input_layernorm.weight": [5524094976, 8192, [4096], [1], "torch.float16"], "model.layers.12.post_attention_layernorm.weight": [5524103168, 8192, [4096], [1], "torch.float16"], "model.layers.13.self_attn.q_proj.weight": [5524111360, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.13.self_attn.k_proj.weight": [5557665792, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.13.self_attn.v_proj.weight": [5591220224, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.13.self_attn.o_proj.weight": [5624774656, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.13.mlp.gate_proj.weight": [5658329088, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.13.mlp.up_proj.weight": [5748506624, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.13.mlp.down_proj.weight": [5838684160, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.13.input_layernorm.weight": [5928861696, 8192, [4096], [1], "torch.float16"], "model.layers.13.post_attention_layernorm.weight": [5928869888, 8192, [4096], [1], "torch.float16"], "model.layers.14.self_attn.q_proj.weight": [5928878080, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.14.self_attn.k_proj.weight": [5962432512, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.14.self_attn.v_proj.weight": [5995986944, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.14.self_attn.o_proj.weight": [6029541376, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.14.mlp.gate_proj.weight": [6063095808, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.14.mlp.up_proj.weight": [6153273344, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.14.mlp.down_proj.weight": [6243450880, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.14.input_layernorm.weight": [6333628416, 8192, [4096], [1], "torch.float16"], "model.layers.14.post_attention_layernorm.weight": [6333636608, 8192, [4096], [1], "torch.float16"], "model.layers.15.self_attn.q_proj.weight": [6333644800, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.15.self_attn.k_proj.weight": [6367199232, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.15.self_attn.v_proj.weight": [6400753664, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.15.self_attn.o_proj.weight": [6434308096, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.15.mlp.gate_proj.weight": [6467862528, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.15.mlp.up_proj.weight": [6558040064, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.15.mlp.down_proj.weight": [6648217600, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.15.input_layernorm.weight": [6738395136, 8192, [4096], [1], "torch.float16"], "model.layers.15.post_attention_layernorm.weight": [6738403328, 8192, [4096], [1], "torch.float16"], "model.layers.16.self_attn.q_proj.weight": [6738411520, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.16.self_attn.k_proj.weight": [6771965952, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.16.self_attn.v_proj.weight": [6805520384, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.16.self_attn.o_proj.weight": [6839074816, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.16.mlp.gate_proj.weight": [6872629248, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.16.mlp.up_proj.weight": [6962806784, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.16.mlp.down_proj.weight": [7052984320, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.16.input_layernorm.weight": [7143161856, 8192, [4096], [1], "torch.float16"], "model.layers.16.post_attention_layernorm.weight": [7143170048, 8192, [4096], [1], "torch.float16"], "model.layers.17.self_attn.q_proj.weight": [7143178240, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.17.self_attn.k_proj.weight": [7176732672, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.17.self_attn.v_proj.weight": [7210287104, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.17.self_attn.o_proj.weight": [7243841536, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.17.mlp.gate_proj.weight": [7277395968, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.17.mlp.up_proj.weight": [7367573504, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.17.mlp.down_proj.weight": [7457751040, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.17.input_layernorm.weight": [7547928576, 8192, [4096], [1], "torch.float16"], "model.layers.17.post_attention_layernorm.weight": [7547936768, 8192, [4096], [1], "torch.float16"], "model.layers.18.self_attn.q_proj.weight": [7547944960, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.18.self_attn.k_proj.weight": [7581499392, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.18.self_attn.v_proj.weight": [7615053824, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.18.self_attn.o_proj.weight": [7648608256, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.18.mlp.gate_proj.weight": [7682162688, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.18.mlp.up_proj.weight": [7772340224, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.18.mlp.down_proj.weight": [7862517760, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.18.input_layernorm.weight": [7952695296, 8192, [4096], [1], "torch.float16"], "model.layers.18.post_attention_layernorm.weight": [7952703488, 8192, [4096], [1], "torch.float16"], "model.layers.19.self_attn.q_proj.weight": [7952711680, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.19.self_attn.k_proj.weight": [7986266112, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.19.self_attn.v_proj.weight": [8019820544, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.19.self_attn.o_proj.weight": [8053374976, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.19.mlp.gate_proj.weight": [8086929408, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.19.mlp.up_proj.weight": [8177106944, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.19.mlp.down_proj.weight": [8267284480, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.19.input_layernorm.weight": [8357462016, 8192, [4096], [1], "torch.float16"], "model.layers.19.post_attention_layernorm.weight": [8357470208, 8192, [4096], [1], "torch.float16"], "model.layers.20.self_attn.q_proj.weight": [8357478400, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.20.self_attn.k_proj.weight": [8391032832, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.20.self_attn.v_proj.weight": [8424587264, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.20.self_attn.o_proj.weight": [8458141696, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.20.mlp.gate_proj.weight": [8491696128, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.20.mlp.up_proj.weight": [8581873664, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.20.mlp.down_proj.weight": [8672051200, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.20.input_layernorm.weight": [8762228736, 8192, [4096], [1], "torch.float16"], "model.layers.20.post_attention_layernorm.weight": [8762236928, 8192, [4096], [1], "torch.float16"], "model.layers.21.self_attn.q_proj.weight": [8762245120, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.21.self_attn.k_proj.weight": [8795799552, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.21.self_attn.v_proj.weight": [8829353984, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.21.self_attn.o_proj.weight": [8862908416, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.21.mlp.gate_proj.weight": [8896462848, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.21.mlp.up_proj.weight": [8986640384, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.21.mlp.down_proj.weight": [9076817920, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.21.input_layernorm.weight": [9166995456, 8192, [4096], [1], "torch.float16"], "model.layers.21.post_attention_layernorm.weight": [9167003648, 8192, [4096], [1], "torch.float16"], "model.layers.22.self_attn.q_proj.weight": [9167011840, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.22.self_attn.k_proj.weight": [9200566272, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.22.self_attn.v_proj.weight": [9234120704, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.22.self_attn.o_proj.weight": [9267675136, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.22.mlp.gate_proj.weight": [9301229568, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.22.mlp.up_proj.weight": [9391407104, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.22.mlp.down_proj.weight": [9481584640, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.22.input_layernorm.weight": [9571762176, 8192, [4096], [1], "torch.float16"], "model.layers.22.post_attention_layernorm.weight": [9571770368, 8192, [4096], [1], "torch.float16"], "model.layers.23.self_attn.q_proj.weight": [9571778560, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.23.self_attn.k_proj.weight": [9605332992, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.23.self_attn.v_proj.weight": [9638887424, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.23.self_attn.o_proj.weight": [9672441856, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.23.mlp.gate_proj.weight": [9705996288, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.23.mlp.up_proj.weight": [9796173824, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.23.mlp.down_proj.weight": [9886351360, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.23.input_layernorm.weight": [9976528896, 8192, [4096], [1], "torch.float16"], "model.layers.23.post_attention_layernorm.weight": [9976537088, 8192, [4096], [1], "torch.float16"], "model.layers.24.self_attn.q_proj.weight": [9976545280, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.24.self_attn.k_proj.weight": [10010099712, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.24.self_attn.v_proj.weight": [10043654144, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.24.self_attn.o_proj.weight": [10077208576, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.24.mlp.gate_proj.weight": [10110763008, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.24.mlp.up_proj.weight": [10200940544, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.24.mlp.down_proj.weight": [10291118080, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.24.input_layernorm.weight": [10381295616, 8192, [4096], [1], "torch.float16"], "model.layers.24.post_attention_layernorm.weight": [10381303808, 8192, [4096], [1], "torch.float16"], "model.layers.25.self_attn.q_proj.weight": [10381312000, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.25.self_attn.k_proj.weight": [10414866432, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.25.self_attn.v_proj.weight": [10448420864, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.25.self_attn.o_proj.weight": [10481975296, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.25.mlp.gate_proj.weight": [10515529728, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.25.mlp.up_proj.weight": [10605707264, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.25.mlp.down_proj.weight": [10695884800, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.25.input_layernorm.weight": [10786062336, 8192, [4096], [1], "torch.float16"], "model.layers.25.post_attention_layernorm.weight": [10786070528, 8192, [4096], [1], "torch.float16"], "model.layers.26.self_attn.q_proj.weight": [10786078720, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.26.self_attn.k_proj.weight": [10819633152, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.26.self_attn.v_proj.weight": [10853187584, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.26.self_attn.o_proj.weight": [10886742016, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.26.mlp.gate_proj.weight": [10920296448, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.26.mlp.up_proj.weight": [11010473984, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.26.mlp.down_proj.weight": [11100651520, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.26.input_layernorm.weight": [11190829056, 8192, [4096], [1], "torch.float16"], "model.layers.26.post_attention_layernorm.weight": [11190837248, 8192, [4096], [1], "torch.float16"], "model.layers.27.self_attn.q_proj.weight": [11190845440, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.27.self_attn.k_proj.weight": [11224399872, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.27.self_attn.v_proj.weight": [11257954304, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.27.self_attn.o_proj.weight": [11291508736, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.27.mlp.gate_proj.weight": [11325063168, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.27.mlp.up_proj.weight": [11415240704, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.27.mlp.down_proj.weight": [11505418240, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.27.input_layernorm.weight": [11595595776, 8192, [4096], [1], "torch.float16"], "model.layers.27.post_attention_layernorm.weight": [11595603968, 8192, [4096], [1], "torch.float16"], "model.layers.28.self_attn.q_proj.weight": [11595612160, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.28.self_attn.k_proj.weight": [11629166592, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.28.self_attn.v_proj.weight": [11662721024, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.28.self_attn.o_proj.weight": [11696275456, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.28.mlp.gate_proj.weight": [11729829888, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.28.mlp.up_proj.weight": [11820007424, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.28.mlp.down_proj.weight": [11910184960, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.28.input_layernorm.weight": [12000362496, 8192, [4096], [1], "torch.float16"], "model.layers.28.post_attention_layernorm.weight": [12000370688, 8192, [4096], [1], "torch.float16"], "model.layers.29.self_attn.q_proj.weight": [12000378880, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.29.self_attn.k_proj.weight": [12033933312, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.29.self_attn.v_proj.weight": [12067487744, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.29.self_attn.o_proj.weight": [12101042176, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.29.mlp.gate_proj.weight": [12134596608, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.29.mlp.up_proj.weight": [12224774144, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.29.mlp.down_proj.weight": [12314951680, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.29.input_layernorm.weight": [12405129216, 8192, [4096], [1], "torch.float16"], "model.layers.29.post_attention_layernorm.weight": [12405137408, 8192, [4096], [1], "torch.float16"], "model.layers.30.self_attn.q_proj.weight": [12405145600, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.30.self_attn.k_proj.weight": [12438700032, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.30.self_attn.v_proj.weight": [12472254464, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.30.self_attn.o_proj.weight": [12505808896, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.30.mlp.gate_proj.weight": [12539363328, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.30.mlp.up_proj.weight": [12629540864, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.30.mlp.down_proj.weight": [12719718400, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.30.input_layernorm.weight": [12809895936, 8192, [4096], [1], "torch.float16"], "model.layers.30.post_attention_layernorm.weight": [12809904128, 8192, [4096], [1], "torch.float16"], "model.layers.31.self_attn.q_proj.weight": [12809912320, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.31.self_attn.k_proj.weight": [12843466752, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.31.self_attn.v_proj.weight": [12877021184, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.31.self_attn.o_proj.weight": [12910575616, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.31.mlp.gate_proj.weight": [12944130048, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.31.mlp.up_proj.weight": [13034307584, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.31.mlp.down_proj.weight": [13124485120, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.31.input_layernorm.weight": [13214662656, 8192, [4096], [1], "torch.float16"], "model.layers.31.post_attention_layernorm.weight": [13214670848, 8192, [4096], [1], "torch.float16"], "model.norm.weight": [13214679040, 8192, [4096], [1], "torch.float16"], "lm_head.weight": [13214687232, 262144000, [32000, 4096], [4096, 1], "torch.float16"]}
tied_no_split_modules.json ADDED
@@ -0,0 +1 @@
 
 
1
+ []