michaelbzhu commited on
Commit
2c15c8b
·
verified ·
1 Parent(s): 9eb4614

Upload model

Browse files
config.json CHANGED
@@ -2,15 +2,12 @@
2
  "architectures": [
3
  "CustomModel"
4
  ],
5
- "auto_map": {
6
- "AutoConfig": "configuration.CustomConfig",
7
- "AutoModelForCausalLM": "modeling.CustomModel",
8
- "AutoModel": "modeling.CustomModel"
9
- },
10
  "d_head": 128,
11
  "d_model": 8192,
 
 
12
  "n_heads": 64,
13
  "n_layers": 3,
14
  "n_vocab": 50257,
15
- "model_type": "custom-mbz-test"
16
- }
 
2
  "architectures": [
3
  "CustomModel"
4
  ],
 
 
 
 
 
5
  "d_head": 128,
6
  "d_model": 8192,
7
+ "dtype": "float32",
8
+ "model_type": "custom-mbz-test",
9
  "n_heads": 64,
10
  "n_layers": 3,
11
  "n_vocab": 50257,
12
+ "transformers_version": "4.56.0"
13
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:507b764f21cc315722ccfdeeacb2929690d18d81bf6a8c3f5ff05474279d9d2d
3
+ size 4868277384
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b087dc7a7f118e97a7e2d69029ce6ca2af2e5c3f66887841295f6988e6007697
3
+ size 4295198168
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2189cfc700dfe1aa6e616001b5cd06882c664c179328a138e487c49b8dce8cdc
3
+ size 3794769116
model.safetensors.index.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 3239560273,
4
+ "total_size": 12958241092
5
+ },
6
+ "weight_map": {
7
+ "model.blocks.0.attn.Wk.weight": "model-00001-of-00003.safetensors",
8
+ "model.blocks.0.attn.Wo.weight": "model-00001-of-00003.safetensors",
9
+ "model.blocks.0.attn.Wq.weight": "model-00001-of-00003.safetensors",
10
+ "model.blocks.0.attn.Wv.weight": "model-00001-of-00003.safetensors",
11
+ "model.blocks.0.mlp.0.bias": "model-00001-of-00003.safetensors",
12
+ "model.blocks.0.mlp.0.weight": "model-00001-of-00003.safetensors",
13
+ "model.blocks.0.mlp.2.bias": "model-00001-of-00003.safetensors",
14
+ "model.blocks.0.mlp.2.weight": "model-00001-of-00003.safetensors",
15
+ "model.blocks.0.norm1.weight": "model-00001-of-00003.safetensors",
16
+ "model.blocks.0.norm2.weight": "model-00001-of-00003.safetensors",
17
+ "model.blocks.1.attn.Wk.weight": "model-00002-of-00003.safetensors",
18
+ "model.blocks.1.attn.Wo.weight": "model-00002-of-00003.safetensors",
19
+ "model.blocks.1.attn.Wq.weight": "model-00002-of-00003.safetensors",
20
+ "model.blocks.1.attn.Wv.weight": "model-00002-of-00003.safetensors",
21
+ "model.blocks.1.mlp.0.bias": "model-00002-of-00003.safetensors",
22
+ "model.blocks.1.mlp.0.weight": "model-00002-of-00003.safetensors",
23
+ "model.blocks.1.mlp.2.bias": "model-00002-of-00003.safetensors",
24
+ "model.blocks.1.mlp.2.weight": "model-00002-of-00003.safetensors",
25
+ "model.blocks.1.norm1.weight": "model-00002-of-00003.safetensors",
26
+ "model.blocks.1.norm2.weight": "model-00002-of-00003.safetensors",
27
+ "model.blocks.2.attn.Wk.weight": "model-00002-of-00003.safetensors",
28
+ "model.blocks.2.attn.Wo.weight": "model-00002-of-00003.safetensors",
29
+ "model.blocks.2.attn.Wq.weight": "model-00002-of-00003.safetensors",
30
+ "model.blocks.2.attn.Wv.weight": "model-00002-of-00003.safetensors",
31
+ "model.blocks.2.mlp.0.bias": "model-00003-of-00003.safetensors",
32
+ "model.blocks.2.mlp.0.weight": "model-00003-of-00003.safetensors",
33
+ "model.blocks.2.mlp.2.bias": "model-00003-of-00003.safetensors",
34
+ "model.blocks.2.mlp.2.weight": "model-00003-of-00003.safetensors",
35
+ "model.blocks.2.norm1.weight": "model-00003-of-00003.safetensors",
36
+ "model.blocks.2.norm2.weight": "model-00003-of-00003.safetensors",
37
+ "model.embed.weight": "model-00001-of-00003.safetensors",
38
+ "model.norm.weight": "model-00003-of-00003.safetensors",
39
+ "model.out_head.bias": "model-00003-of-00003.safetensors",
40
+ "model.out_head.weight": "model-00003-of-00003.safetensors"
41
+ }
42
+ }