Upload ServerlessLLM format model
Browse files- .gitattributes +2 -0
- config.json +31 -0
- generation_config.json +10 -0
- no_split_modules.json +1 -0
- tensor.data_0 +3 -0
- tensor.data_1 +3 -0
- tensor_index.json +1 -0
- tied_no_split_modules.json +1 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tensor.data_0 filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
tensor.data_1 filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_attn_implementation_autoset": true,
|
| 3 |
+
"_name_or_path": "meta-llama/Llama-2-7b-hf",
|
| 4 |
+
"architectures": [
|
| 5 |
+
"LlamaForCausalLM"
|
| 6 |
+
],
|
| 7 |
+
"attention_bias": false,
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"bos_token_id": 1,
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 4096,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 11008,
|
| 16 |
+
"max_position_embeddings": 4096,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"num_attention_heads": 32,
|
| 20 |
+
"num_hidden_layers": 32,
|
| 21 |
+
"num_key_value_heads": 32,
|
| 22 |
+
"pretraining_tp": 1,
|
| 23 |
+
"rms_norm_eps": 1e-05,
|
| 24 |
+
"rope_scaling": null,
|
| 25 |
+
"rope_theta": 10000.0,
|
| 26 |
+
"tie_word_embeddings": false,
|
| 27 |
+
"torch_dtype": "float16",
|
| 28 |
+
"transformers_version": "4.47.1",
|
| 29 |
+
"use_cache": true,
|
| 30 |
+
"vocab_size": 32000
|
| 31 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 1,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"eos_token_id": 2,
|
| 5 |
+
"max_length": 4096,
|
| 6 |
+
"pad_token_id": 0,
|
| 7 |
+
"temperature": 0.6,
|
| 8 |
+
"top_p": 0.9,
|
| 9 |
+
"transformers_version": "4.47.1"
|
| 10 |
+
}
|
no_split_modules.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model.embed_tokens": 262144000, "model.layers.0": 404766720, "model.layers.1": 404766720, "model.layers.2": 404766720, "model.layers.3": 404766720, "model.layers.4": 404766720, "model.layers.5": 404766720, "model.layers.6": 404766720, "model.layers.7": 404766720, "model.layers.8": 404766720, "model.layers.9": 404766720, "model.layers.10": 404766720, "model.layers.11": 404766720, "model.layers.12": 404766720, "model.layers.13": 404766720, "model.layers.14": 404766720, "model.layers.15": 404766720, "model.layers.16": 404766720, "model.layers.17": 404766720, "model.layers.18": 404766720, "model.layers.19": 404766720, "model.layers.20": 404766720, "model.layers.21": 404766720, "model.layers.22": 404766720, "model.layers.23": 404766720, "model.layers.24": 404766720, "model.layers.25": 404766720, "model.layers.26": 404766720, "model.layers.27": 404766720, "model.layers.28": 404766720, "model.layers.29": 404766720, "model.layers.30": 404766720, "model.layers.31": 404766720, "model.norm": 8192, "model.rotary_emb": 0, "lm_head": 262144000}
|
tensor.data_0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4dbe1c7a84a0cf328be628d69db832e3ca25054d59c4c37ab8dc4af2b8748f81
|
| 3 |
+
size 10695884800
|
tensor.data_1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48d5c714ee0bd2c0e7d3e2c93d38f660c524643cdbd0e94584384484bfceaa4c
|
| 3 |
+
size 2780946432
|
tensor_index.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model.embed_tokens.weight": [0, 262144000, [32000, 4096], [4096, 1], "torch.float16"], "model.layers.0.self_attn.q_proj.weight": [262144000, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.0.self_attn.k_proj.weight": [295698432, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.0.self_attn.v_proj.weight": [329252864, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.0.self_attn.o_proj.weight": [362807296, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.0.mlp.gate_proj.weight": [396361728, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.0.mlp.up_proj.weight": [486539264, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.0.mlp.down_proj.weight": [576716800, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.0.input_layernorm.weight": [666894336, 8192, [4096], [1], "torch.float16"], "model.layers.0.post_attention_layernorm.weight": [666902528, 8192, [4096], [1], "torch.float16"], "model.layers.1.self_attn.q_proj.weight": [666910720, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.1.self_attn.k_proj.weight": [700465152, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.1.self_attn.v_proj.weight": [734019584, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.1.self_attn.o_proj.weight": [767574016, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.1.mlp.gate_proj.weight": [801128448, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.1.mlp.up_proj.weight": [891305984, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.1.mlp.down_proj.weight": [981483520, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.1.input_layernorm.weight": [1071661056, 8192, [4096], [1], "torch.float16"], "model.layers.1.post_attention_layernorm.weight": [1071669248, 8192, [4096], [1], "torch.float16"], "model.layers.2.self_attn.q_proj.weight": [1071677440, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.2.self_attn.k_proj.weight": [1105231872, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.2.self_attn.v_proj.weight": [1138786304, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.2.self_attn.o_proj.weight": [1172340736, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.2.mlp.gate_proj.weight": [1205895168, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.2.mlp.up_proj.weight": [1296072704, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.2.mlp.down_proj.weight": [1386250240, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.2.input_layernorm.weight": [1476427776, 8192, [4096], [1], "torch.float16"], "model.layers.2.post_attention_layernorm.weight": [1476435968, 8192, [4096], [1], "torch.float16"], "model.layers.3.self_attn.q_proj.weight": [1476444160, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.3.self_attn.k_proj.weight": [1509998592, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.3.self_attn.v_proj.weight": [1543553024, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.3.self_attn.o_proj.weight": [1577107456, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.3.mlp.gate_proj.weight": [1610661888, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.3.mlp.up_proj.weight": [1700839424, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.3.mlp.down_proj.weight": [1791016960, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.3.input_layernorm.weight": [1881194496, 8192, [4096], [1], "torch.float16"], "model.layers.3.post_attention_layernorm.weight": [1881202688, 8192, [4096], [1], "torch.float16"], "model.layers.4.self_attn.q_proj.weight": [1881210880, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.4.self_attn.k_proj.weight": [1914765312, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.4.self_attn.v_proj.weight": [1948319744, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.4.self_attn.o_proj.weight": [1981874176, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.4.mlp.gate_proj.weight": [2015428608, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.4.mlp.up_proj.weight": [2105606144, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.4.mlp.down_proj.weight": [2195783680, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.4.input_layernorm.weight": [2285961216, 8192, [4096], [1], "torch.float16"], "model.layers.4.post_attention_layernorm.weight": [2285969408, 8192, [4096], [1], "torch.float16"], "model.layers.5.self_attn.q_proj.weight": [2285977600, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.5.self_attn.k_proj.weight": [2319532032, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.5.self_attn.v_proj.weight": [2353086464, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.5.self_attn.o_proj.weight": [2386640896, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.5.mlp.gate_proj.weight": [2420195328, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.5.mlp.up_proj.weight": [2510372864, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.5.mlp.down_proj.weight": [2600550400, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.5.input_layernorm.weight": [2690727936, 8192, [4096], [1], "torch.float16"], "model.layers.5.post_attention_layernorm.weight": [2690736128, 8192, [4096], [1], "torch.float16"], "model.layers.6.self_attn.q_proj.weight": [2690744320, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.6.self_attn.k_proj.weight": [2724298752, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.6.self_attn.v_proj.weight": [2757853184, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.6.self_attn.o_proj.weight": [2791407616, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.6.mlp.gate_proj.weight": [2824962048, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.6.mlp.up_proj.weight": [2915139584, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.6.mlp.down_proj.weight": [3005317120, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.6.input_layernorm.weight": [3095494656, 8192, [4096], [1], "torch.float16"], "model.layers.6.post_attention_layernorm.weight": [3095502848, 8192, [4096], [1], "torch.float16"], "model.layers.7.self_attn.q_proj.weight": [3095511040, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.7.self_attn.k_proj.weight": [3129065472, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.7.self_attn.v_proj.weight": [3162619904, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.7.self_attn.o_proj.weight": [3196174336, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.7.mlp.gate_proj.weight": [3229728768, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.7.mlp.up_proj.weight": [3319906304, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.7.mlp.down_proj.weight": [3410083840, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.7.input_layernorm.weight": [3500261376, 8192, [4096], [1], "torch.float16"], "model.layers.7.post_attention_layernorm.weight": [3500269568, 8192, [4096], [1], "torch.float16"], "model.layers.8.self_attn.q_proj.weight": [3500277760, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.8.self_attn.k_proj.weight": [3533832192, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.8.self_attn.v_proj.weight": [3567386624, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.8.self_attn.o_proj.weight": [3600941056, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.8.mlp.gate_proj.weight": [3634495488, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.8.mlp.up_proj.weight": [3724673024, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.8.mlp.down_proj.weight": [3814850560, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.8.input_layernorm.weight": [3905028096, 8192, [4096], [1], "torch.float16"], "model.layers.8.post_attention_layernorm.weight": [3905036288, 8192, [4096], [1], "torch.float16"], "model.layers.9.self_attn.q_proj.weight": [3905044480, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.9.self_attn.k_proj.weight": [3938598912, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.9.self_attn.v_proj.weight": [3972153344, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.9.self_attn.o_proj.weight": [4005707776, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.9.mlp.gate_proj.weight": [4039262208, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.9.mlp.up_proj.weight": [4129439744, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.9.mlp.down_proj.weight": [4219617280, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.9.input_layernorm.weight": [4309794816, 8192, [4096], [1], "torch.float16"], "model.layers.9.post_attention_layernorm.weight": [4309803008, 8192, [4096], [1], "torch.float16"], "model.layers.10.self_attn.q_proj.weight": [4309811200, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.10.self_attn.k_proj.weight": [4343365632, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.10.self_attn.v_proj.weight": [4376920064, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.10.self_attn.o_proj.weight": [4410474496, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.10.mlp.gate_proj.weight": [4444028928, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.10.mlp.up_proj.weight": [4534206464, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.10.mlp.down_proj.weight": [4624384000, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.10.input_layernorm.weight": [4714561536, 8192, [4096], [1], "torch.float16"], "model.layers.10.post_attention_layernorm.weight": [4714569728, 8192, [4096], [1], "torch.float16"], "model.layers.11.self_attn.q_proj.weight": [4714577920, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.11.self_attn.k_proj.weight": [4748132352, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.11.self_attn.v_proj.weight": [4781686784, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.11.self_attn.o_proj.weight": [4815241216, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.11.mlp.gate_proj.weight": [4848795648, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.11.mlp.up_proj.weight": [4938973184, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.11.mlp.down_proj.weight": [5029150720, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.11.input_layernorm.weight": [5119328256, 8192, [4096], [1], "torch.float16"], "model.layers.11.post_attention_layernorm.weight": [5119336448, 8192, [4096], [1], "torch.float16"], "model.layers.12.self_attn.q_proj.weight": [5119344640, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.12.self_attn.k_proj.weight": [5152899072, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.12.self_attn.v_proj.weight": [5186453504, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.12.self_attn.o_proj.weight": [5220007936, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.12.mlp.gate_proj.weight": [5253562368, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.12.mlp.up_proj.weight": [5343739904, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.12.mlp.down_proj.weight": [5433917440, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.12.input_layernorm.weight": [5524094976, 8192, [4096], [1], "torch.float16"], "model.layers.12.post_attention_layernorm.weight": [5524103168, 8192, [4096], [1], "torch.float16"], "model.layers.13.self_attn.q_proj.weight": [5524111360, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.13.self_attn.k_proj.weight": [5557665792, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.13.self_attn.v_proj.weight": [5591220224, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.13.self_attn.o_proj.weight": [5624774656, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.13.mlp.gate_proj.weight": [5658329088, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.13.mlp.up_proj.weight": [5748506624, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.13.mlp.down_proj.weight": [5838684160, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.13.input_layernorm.weight": [5928861696, 8192, [4096], [1], "torch.float16"], "model.layers.13.post_attention_layernorm.weight": [5928869888, 8192, [4096], [1], "torch.float16"], "model.layers.14.self_attn.q_proj.weight": [5928878080, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.14.self_attn.k_proj.weight": [5962432512, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.14.self_attn.v_proj.weight": [5995986944, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.14.self_attn.o_proj.weight": [6029541376, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.14.mlp.gate_proj.weight": [6063095808, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.14.mlp.up_proj.weight": [6153273344, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.14.mlp.down_proj.weight": [6243450880, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.14.input_layernorm.weight": [6333628416, 8192, [4096], [1], "torch.float16"], "model.layers.14.post_attention_layernorm.weight": [6333636608, 8192, [4096], [1], "torch.float16"], "model.layers.15.self_attn.q_proj.weight": [6333644800, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.15.self_attn.k_proj.weight": [6367199232, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.15.self_attn.v_proj.weight": [6400753664, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.15.self_attn.o_proj.weight": [6434308096, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.15.mlp.gate_proj.weight": [6467862528, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.15.mlp.up_proj.weight": [6558040064, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.15.mlp.down_proj.weight": [6648217600, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.15.input_layernorm.weight": [6738395136, 8192, [4096], [1], "torch.float16"], "model.layers.15.post_attention_layernorm.weight": [6738403328, 8192, [4096], [1], "torch.float16"], "model.layers.16.self_attn.q_proj.weight": [6738411520, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.16.self_attn.k_proj.weight": [6771965952, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.16.self_attn.v_proj.weight": [6805520384, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.16.self_attn.o_proj.weight": [6839074816, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.16.mlp.gate_proj.weight": [6872629248, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.16.mlp.up_proj.weight": [6962806784, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.16.mlp.down_proj.weight": [7052984320, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.16.input_layernorm.weight": [7143161856, 8192, [4096], [1], "torch.float16"], "model.layers.16.post_attention_layernorm.weight": [7143170048, 8192, [4096], [1], "torch.float16"], "model.layers.17.self_attn.q_proj.weight": [7143178240, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.17.self_attn.k_proj.weight": [7176732672, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.17.self_attn.v_proj.weight": [7210287104, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.17.self_attn.o_proj.weight": [7243841536, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.17.mlp.gate_proj.weight": [7277395968, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.17.mlp.up_proj.weight": [7367573504, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.17.mlp.down_proj.weight": [7457751040, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.17.input_layernorm.weight": [7547928576, 8192, [4096], [1], "torch.float16"], "model.layers.17.post_attention_layernorm.weight": [7547936768, 8192, [4096], [1], "torch.float16"], "model.layers.18.self_attn.q_proj.weight": [7547944960, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.18.self_attn.k_proj.weight": [7581499392, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.18.self_attn.v_proj.weight": [7615053824, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.18.self_attn.o_proj.weight": [7648608256, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.18.mlp.gate_proj.weight": [7682162688, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.18.mlp.up_proj.weight": [7772340224, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.18.mlp.down_proj.weight": [7862517760, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.18.input_layernorm.weight": [7952695296, 8192, [4096], [1], "torch.float16"], "model.layers.18.post_attention_layernorm.weight": [7952703488, 8192, [4096], [1], "torch.float16"], "model.layers.19.self_attn.q_proj.weight": [7952711680, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.19.self_attn.k_proj.weight": [7986266112, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.19.self_attn.v_proj.weight": [8019820544, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.19.self_attn.o_proj.weight": [8053374976, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.19.mlp.gate_proj.weight": [8086929408, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.19.mlp.up_proj.weight": [8177106944, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.19.mlp.down_proj.weight": [8267284480, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.19.input_layernorm.weight": [8357462016, 8192, [4096], [1], "torch.float16"], "model.layers.19.post_attention_layernorm.weight": [8357470208, 8192, [4096], [1], "torch.float16"], "model.layers.20.self_attn.q_proj.weight": [8357478400, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.20.self_attn.k_proj.weight": [8391032832, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.20.self_attn.v_proj.weight": [8424587264, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.20.self_attn.o_proj.weight": [8458141696, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.20.mlp.gate_proj.weight": [8491696128, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.20.mlp.up_proj.weight": [8581873664, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.20.mlp.down_proj.weight": [8672051200, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.20.input_layernorm.weight": [8762228736, 8192, [4096], [1], "torch.float16"], "model.layers.20.post_attention_layernorm.weight": [8762236928, 8192, [4096], [1], "torch.float16"], "model.layers.21.self_attn.q_proj.weight": [8762245120, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.21.self_attn.k_proj.weight": [8795799552, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.21.self_attn.v_proj.weight": [8829353984, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.21.self_attn.o_proj.weight": [8862908416, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.21.mlp.gate_proj.weight": [8896462848, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.21.mlp.up_proj.weight": [8986640384, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.21.mlp.down_proj.weight": [9076817920, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.21.input_layernorm.weight": [9166995456, 8192, [4096], [1], "torch.float16"], "model.layers.21.post_attention_layernorm.weight": [9167003648, 8192, [4096], [1], "torch.float16"], "model.layers.22.self_attn.q_proj.weight": [9167011840, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.22.self_attn.k_proj.weight": [9200566272, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.22.self_attn.v_proj.weight": [9234120704, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.22.self_attn.o_proj.weight": [9267675136, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.22.mlp.gate_proj.weight": [9301229568, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.22.mlp.up_proj.weight": [9391407104, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.22.mlp.down_proj.weight": [9481584640, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.22.input_layernorm.weight": [9571762176, 8192, [4096], [1], "torch.float16"], "model.layers.22.post_attention_layernorm.weight": [9571770368, 8192, [4096], [1], "torch.float16"], "model.layers.23.self_attn.q_proj.weight": [9571778560, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.23.self_attn.k_proj.weight": [9605332992, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.23.self_attn.v_proj.weight": [9638887424, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.23.self_attn.o_proj.weight": [9672441856, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.23.mlp.gate_proj.weight": [9705996288, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.23.mlp.up_proj.weight": [9796173824, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.23.mlp.down_proj.weight": [9886351360, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.23.input_layernorm.weight": [9976528896, 8192, [4096], [1], "torch.float16"], "model.layers.23.post_attention_layernorm.weight": [9976537088, 8192, [4096], [1], "torch.float16"], "model.layers.24.self_attn.q_proj.weight": [9976545280, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.24.self_attn.k_proj.weight": [10010099712, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.24.self_attn.v_proj.weight": [10043654144, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.24.self_attn.o_proj.weight": [10077208576, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.24.mlp.gate_proj.weight": [10110763008, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.24.mlp.up_proj.weight": [10200940544, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.24.mlp.down_proj.weight": [10291118080, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.24.input_layernorm.weight": [10381295616, 8192, [4096], [1], "torch.float16"], "model.layers.24.post_attention_layernorm.weight": [10381303808, 8192, [4096], [1], "torch.float16"], "model.layers.25.self_attn.q_proj.weight": [10381312000, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.25.self_attn.k_proj.weight": [10414866432, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.25.self_attn.v_proj.weight": [10448420864, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.25.self_attn.o_proj.weight": [10481975296, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.25.mlp.gate_proj.weight": [10515529728, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.25.mlp.up_proj.weight": [10605707264, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.25.mlp.down_proj.weight": [10695884800, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.25.input_layernorm.weight": [10786062336, 8192, [4096], [1], "torch.float16"], "model.layers.25.post_attention_layernorm.weight": [10786070528, 8192, [4096], [1], "torch.float16"], "model.layers.26.self_attn.q_proj.weight": [10786078720, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.26.self_attn.k_proj.weight": [10819633152, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.26.self_attn.v_proj.weight": [10853187584, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.26.self_attn.o_proj.weight": [10886742016, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.26.mlp.gate_proj.weight": [10920296448, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.26.mlp.up_proj.weight": [11010473984, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.26.mlp.down_proj.weight": [11100651520, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.26.input_layernorm.weight": [11190829056, 8192, [4096], [1], "torch.float16"], "model.layers.26.post_attention_layernorm.weight": [11190837248, 8192, [4096], [1], "torch.float16"], "model.layers.27.self_attn.q_proj.weight": [11190845440, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.27.self_attn.k_proj.weight": [11224399872, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.27.self_attn.v_proj.weight": [11257954304, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.27.self_attn.o_proj.weight": [11291508736, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.27.mlp.gate_proj.weight": [11325063168, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.27.mlp.up_proj.weight": [11415240704, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.27.mlp.down_proj.weight": [11505418240, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.27.input_layernorm.weight": [11595595776, 8192, [4096], [1], "torch.float16"], "model.layers.27.post_attention_layernorm.weight": [11595603968, 8192, [4096], [1], "torch.float16"], "model.layers.28.self_attn.q_proj.weight": [11595612160, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.28.self_attn.k_proj.weight": [11629166592, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.28.self_attn.v_proj.weight": [11662721024, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.28.self_attn.o_proj.weight": [11696275456, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.28.mlp.gate_proj.weight": [11729829888, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.28.mlp.up_proj.weight": [11820007424, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.28.mlp.down_proj.weight": [11910184960, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.28.input_layernorm.weight": [12000362496, 8192, [4096], [1], "torch.float16"], "model.layers.28.post_attention_layernorm.weight": [12000370688, 8192, [4096], [1], "torch.float16"], "model.layers.29.self_attn.q_proj.weight": [12000378880, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.29.self_attn.k_proj.weight": [12033933312, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.29.self_attn.v_proj.weight": [12067487744, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.29.self_attn.o_proj.weight": [12101042176, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.29.mlp.gate_proj.weight": [12134596608, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.29.mlp.up_proj.weight": [12224774144, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.29.mlp.down_proj.weight": [12314951680, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.29.input_layernorm.weight": [12405129216, 8192, [4096], [1], "torch.float16"], "model.layers.29.post_attention_layernorm.weight": [12405137408, 8192, [4096], [1], "torch.float16"], "model.layers.30.self_attn.q_proj.weight": [12405145600, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.30.self_attn.k_proj.weight": [12438700032, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.30.self_attn.v_proj.weight": [12472254464, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.30.self_attn.o_proj.weight": [12505808896, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.30.mlp.gate_proj.weight": [12539363328, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.30.mlp.up_proj.weight": [12629540864, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.30.mlp.down_proj.weight": [12719718400, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.30.input_layernorm.weight": [12809895936, 8192, [4096], [1], "torch.float16"], "model.layers.30.post_attention_layernorm.weight": [12809904128, 8192, [4096], [1], "torch.float16"], "model.layers.31.self_attn.q_proj.weight": [12809912320, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.31.self_attn.k_proj.weight": [12843466752, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.31.self_attn.v_proj.weight": [12877021184, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.31.self_attn.o_proj.weight": [12910575616, 33554432, [4096, 4096], [4096, 1], "torch.float16"], "model.layers.31.mlp.gate_proj.weight": [12944130048, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.31.mlp.up_proj.weight": [13034307584, 90177536, [11008, 4096], [4096, 1], "torch.float16"], "model.layers.31.mlp.down_proj.weight": [13124485120, 90177536, [4096, 11008], [11008, 1], "torch.float16"], "model.layers.31.input_layernorm.weight": [13214662656, 8192, [4096], [1], "torch.float16"], "model.layers.31.post_attention_layernorm.weight": [13214670848, 8192, [4096], [1], "torch.float16"], "model.norm.weight": [13214679040, 8192, [4096], [1], "torch.float16"], "lm_head.weight": [13214687232, 262144000, [32000, 4096], [4096, 1], "torch.float16"]}
|
tied_no_split_modules.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[]
|