Frinkles commited on
Commit
89c89ad
·
verified ·
1 Parent(s): daa0776

Upload Phi3ForCausalLM

Browse files
config.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "_name_or_path": "microsoft/Phi-3-mini-4k-instruct",
3
  "architectures": [
4
  "Phi3ForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "auto_map": {
9
- "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
10
- "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
11
  },
12
  "bos_token_id": 1,
13
  "embd_pdrop": 0.0,
@@ -30,7 +30,7 @@
30
  "sliding_window": 2047,
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "bfloat16",
33
- "transformers_version": "4.42.3",
34
  "use_cache": true,
35
- "vocab_size": 32064
36
  }
 
1
  {
2
+ "_name_or_path": "Complete-Gradual-Training/batch_1",
3
  "architectures": [
4
  "Phi3ForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "auto_map": {
9
+ "AutoConfig": "configuration_phi3.Phi3Config",
10
+ "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
11
  },
12
  "bos_token_id": 1,
13
  "embd_pdrop": 0.0,
 
30
  "sliding_window": 2047,
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "bfloat16",
33
+ "transformers_version": "4.43.0.dev0",
34
  "use_cache": true,
35
+ "vocab_size": 200000
36
  }
generation_config.json CHANGED
@@ -7,5 +7,5 @@
7
  32007
8
  ],
9
  "pad_token_id": 32000,
10
- "transformers_version": "4.42.3"
11
  }
 
7
  32007
8
  ],
9
  "pad_token_id": 32000,
10
+ "transformers_version": "4.43.0.dev0"
11
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7492726c01287bf6e13c3d74c65ade3d436d50da1cf5bb6925bc962419d6610
3
- size 4972489328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a284a02d9aa866e1a0b4617fcb36e1429fce5b1963530e57ee44d0d2e5a6c03
3
+ size 4928384336
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f311787aa136e858556caa8543015161edcad85ba81b6a36072443d7fa73c87
3
- size 2669692552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0ae52740f49346db433aa9296bf2d0e91573bd94e75ebe63500f42a9c43bfd2
3
+ size 4777395200
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 7642159104
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00002-of-00002.safetensors",
@@ -53,47 +53,47 @@
53
  "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
54
  "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
55
  "model.layers.15.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors",
56
- "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
57
- "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
58
- "model.layers.16.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors",
59
- "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
60
  "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
61
  "model.layers.16.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors",
62
- "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
63
- "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
64
- "model.layers.17.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors",
65
- "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
66
- "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
67
- "model.layers.17.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors",
68
- "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
69
- "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
70
- "model.layers.18.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors",
71
- "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
72
- "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
73
- "model.layers.18.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors",
74
- "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
75
- "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
76
- "model.layers.19.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors",
77
- "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
78
- "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
79
- "model.layers.19.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors",
80
  "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
81
  "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
82
  "model.layers.2.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors",
83
  "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
84
  "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
85
  "model.layers.2.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors",
86
- "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
87
- "model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
88
- "model.layers.20.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors",
89
- "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
90
- "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
91
- "model.layers.20.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors",
92
  "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
93
  "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
94
  "model.layers.21.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors",
95
  "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
96
- "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
97
  "model.layers.21.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors",
98
  "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
99
  "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 9705756672
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00002-of-00002.safetensors",
 
53
  "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
54
  "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
55
  "model.layers.15.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors",
56
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00002.safetensors",
57
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
58
+ "model.layers.16.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors",
59
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
60
  "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
61
  "model.layers.16.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors",
62
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
63
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
64
+ "model.layers.17.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors",
65
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
66
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
67
+ "model.layers.17.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors",
68
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
69
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
70
+ "model.layers.18.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors",
71
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
72
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
73
+ "model.layers.18.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors",
74
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
75
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
76
+ "model.layers.19.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors",
77
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
78
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
79
+ "model.layers.19.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors",
80
  "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
81
  "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
82
  "model.layers.2.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors",
83
  "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
84
  "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
85
  "model.layers.2.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors",
86
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
87
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
88
+ "model.layers.20.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors",
89
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
90
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
91
+ "model.layers.20.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors",
92
  "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
93
  "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
94
  "model.layers.21.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors",
95
  "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
96
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
97
  "model.layers.21.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors",
98
  "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
99
  "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",