gerou161 commited on
Commit
73b7141
·
verified ·
1 Parent(s): c48c77b

Add files using upload-large-folder tool

Browse files
0000001000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000001000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:562162e335add833b1f213040f95ab7fe97ab6bf7831da8b154c300b0520fd57
3
+ size 709111698
0000002000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000002000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff2e7413280770f533e7e863b9b22c6cc10f43f900d44c394a81fc7706041ecf
3
+ size 709111698
0000003000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000003000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2be48dcbee4ba08f9057c51b5fb1e57938ab3afa35436f9c22901b4d529a707
3
+ size 709111698
0000004000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000004000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f50413c1d2ceceba92d31c5f3d45c74102ab701ab30cec92ab2b29b139f6db95
3
+ size 709111698
0000005000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000005000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85e026ed04e46571e158f07a61cef80d5044957b4d7fd5ef58492acb8be7c952
3
+ size 709111698
0000006000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000006000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e6c3679da9f7c4208aa969a7b6fa30f6458123942d437b6d692d3f020109adb
3
+ size 709111698
0000007000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000007000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f7671a84ddd81a3d0b7662366062a348c48ff834e99a213c30e3e8f6ca649ee
3
+ size 709111698
0000008000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000008000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c4821e269dd8869d60bd4bc0c23100ff514aadd153b19ade39beba317ad657d
3
+ size 709111698
0000009000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000009000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdf8425347c73270d2c0a33c186b4380f87b218433ffd532b5f670fccb3a3fc3
3
+ size 709111698
0000010000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000010000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6db6f20aee284badabcf509c3ad3dd12c2513441fec9951c45eb32093adac273
3
+ size 709111698
0000011000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000011000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b12a82c8f43bd8ffb7fd6544bed3958d3b1be93469d1e8ad9c6c06f1f8f33452
3
+ size 709111698
0000012000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000012000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b024dbcc3db216ac95f5835d8a0d208ac08fb419bd1ed99aee7b0660dd888f7
3
+ size 709111698
0000013000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000013000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1414f1084dcf7eef245321daa5582f98c6ed60f59818c6c957cc9af0048ce917
3
+ size 709111698
0000014000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000014000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e8b13a6c5a4d53f3d55ba8bd5752f4803a9a08493aeef18cf065f3834161953
3
+ size 709111698
0000015000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000015000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0086fd21ed757dac97ef72d522eb51b4f83cd28a4f044917fc0ac385cf680b3e
3
+ size 709111698
0000016000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000016000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fea0d263680555f64d92578e5bc49a472aaf255a82aa97f6a9f6f5b46e7a758
3
+ size 709111698
0000017000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000017000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf7144795511fa5d99a2357f76f551c43122f93dd84af368a416e40a8a408c61
3
+ size 709111698
0000018000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000018000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5cf5e556e041da32cadb7a080d7c59acf753967261408546230b8a0258dda5f
3
+ size 709111698
0000019000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000019000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1baf6886a6d5299bdc5b13e9b89d97b190b4477429f86ff0ff957ab9c69ad55
3
+ size 709111698
0000020000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_bias": false,
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 128000,
5
+ "eos_token_id": 128001,
6
+ "head_dim": 64,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1280,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5120,
11
+ "max_position_embeddings": 2048,
12
+ "mlp_bias": false,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 20,
15
+ "num_hidden_layers": 1,
16
+ "num_key_value_heads": 20,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": true,
25
+ "vocab_size": 128256
26
+ }
0000020000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f093584bc6d6b20fa567d67021f4a0e6e1ccaed744270f909c4c9edf84f07e8
3
+ size 709111698