aladinggit commited on
Commit
4945764
·
verified ·
1 Parent(s): e2fdc84

Upload folder using huggingface_hub

Browse files
Files changed (42) hide show
  1. lm318/model/bio/config.json +24 -0
  2. lm318/model/bio/pytorch_model.bin +3 -0
  3. lm318/model/chem/config.json +24 -0
  4. lm318/model/chem/pytorch_model.bin +3 -0
  5. lm318/model/cnndm/config.json +24 -0
  6. lm318/model/cnndm/pytorch_model.bin +3 -0
  7. lm318/model/math/config.json +24 -0
  8. lm318/model/math/pytorch_model.bin +3 -0
  9. lm318/model/med/config.json +24 -0
  10. lm318/model/med/pytorch_model.bin +3 -0
  11. lm318/model/python/config.json +24 -0
  12. lm318/model/python/pytorch_model.bin +3 -0
  13. lm318/model/sql/config.json +24 -0
  14. lm318/model/sql/pytorch_model.bin +3 -0
  15. qw332/model/bio-32/config.json +33 -0
  16. qw332/model/bio-32/model.safetensors +3 -0
  17. qw332/model/chem-32/config.json +33 -0
  18. qw332/model/chem-32/model.safetensors +3 -0
  19. qw332/model/cnndm-32/config.json +33 -0
  20. qw332/model/cnndm-32/model.safetensors +3 -0
  21. qw332/model/math-32/config.json +33 -0
  22. qw332/model/math-32/model.safetensors +3 -0
  23. qw332/model/med-32/config.json +33 -0
  24. qw332/model/med-32/model.safetensors +3 -0
  25. qw332/model/python-32/config.json +33 -0
  26. qw332/model/python-32/model.safetensors +3 -0
  27. qw332/model/sql-32/config.json +33 -0
  28. qw332/model/sql-32/model.safetensors +3 -0
  29. qw38/model/bio/config.json +31 -0
  30. qw38/model/bio/pytorch_model.bin +3 -0
  31. qw38/model/chem/config.json +31 -0
  32. qw38/model/chem/pytorch_model.bin +3 -0
  33. qw38/model/cnndm/config.json +31 -0
  34. qw38/model/cnndm/pytorch_model.bin +3 -0
  35. qw38/model/math/config.json +31 -0
  36. qw38/model/math/pytorch_model.bin +3 -0
  37. qw38/model/med/config.json +31 -0
  38. qw38/model/med/pytorch_model.bin +3 -0
  39. qw38/model/python/config.json +31 -0
  40. qw38/model/python/pytorch_model.bin +3 -0
  41. qw38/model/sql/config.json +31 -0
  42. qw38/model/sql/pytorch_model.bin +3 -0
lm318/model/bio/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "bos_token_id": 128000,
6
+ "eos_token_id": 128001,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 4096,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 14336,
11
+ "max_position_embeddings": 2048,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 32,
14
+ "num_key_value_heads": 8,
15
+ "num_hidden_layers": 1,
16
+ "pad_token_id": 0,
17
+ "rms_norm_eps": 1e-05,
18
+ "tie_word_embeddings": false,
19
+ "torch_dtype": "float16",
20
+ "transformers_version": "4.28.1",
21
+ "use_cache": true,
22
+ "vocab_size": 128256,
23
+ "draft_vocab_size": 32000
24
+ }
lm318/model/bio/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ed23d1ff7fabd505ac7230ca8f1337b4b4d2e9ac46122af20f89606c788bc36
3
+ size 849795390
lm318/model/chem/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "bos_token_id": 128000,
6
+ "eos_token_id": 128001,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 4096,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 14336,
11
+ "max_position_embeddings": 2048,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 32,
14
+ "num_key_value_heads": 8,
15
+ "num_hidden_layers": 1,
16
+ "pad_token_id": 0,
17
+ "rms_norm_eps": 1e-05,
18
+ "tie_word_embeddings": false,
19
+ "torch_dtype": "float16",
20
+ "transformers_version": "4.28.1",
21
+ "use_cache": true,
22
+ "vocab_size": 128256,
23
+ "draft_vocab_size": 32000
24
+ }
lm318/model/chem/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b404343ddd103cf115445e4ddc054a09811edc201ac967fb553f450f17efc23b
3
+ size 849795390
lm318/model/cnndm/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "bos_token_id": 128000,
6
+ "eos_token_id": 128001,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 4096,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 14336,
11
+ "max_position_embeddings": 2048,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 32,
14
+ "num_key_value_heads": 8,
15
+ "num_hidden_layers": 1,
16
+ "pad_token_id": 0,
17
+ "rms_norm_eps": 1e-05,
18
+ "tie_word_embeddings": false,
19
+ "torch_dtype": "float16",
20
+ "transformers_version": "4.28.1",
21
+ "use_cache": true,
22
+ "vocab_size": 128256,
23
+ "draft_vocab_size": 32000
24
+ }
lm318/model/cnndm/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9059d8dad391f7de2d8255cf7881561360c5d5fd590d31114d4d82c1c95b166
3
+ size 849795390
lm318/model/math/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "bos_token_id": 128000,
6
+ "eos_token_id": 128001,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 4096,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 14336,
11
+ "max_position_embeddings": 2048,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 32,
14
+ "num_key_value_heads": 8,
15
+ "num_hidden_layers": 1,
16
+ "pad_token_id": 0,
17
+ "rms_norm_eps": 1e-05,
18
+ "tie_word_embeddings": false,
19
+ "torch_dtype": "float16",
20
+ "transformers_version": "4.28.1",
21
+ "use_cache": true,
22
+ "vocab_size": 128256,
23
+ "draft_vocab_size": 32000
24
+ }
lm318/model/math/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93973d8d0c0cd1dedc5bffd45ec94ef0a1cec5ea90f4ec2a5d955f962e28d1a7
3
+ size 849795390
lm318/model/med/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "bos_token_id": 128000,
6
+ "eos_token_id": 128001,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 4096,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 14336,
11
+ "max_position_embeddings": 2048,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 32,
14
+ "num_key_value_heads": 8,
15
+ "num_hidden_layers": 1,
16
+ "pad_token_id": 0,
17
+ "rms_norm_eps": 1e-05,
18
+ "tie_word_embeddings": false,
19
+ "torch_dtype": "float16",
20
+ "transformers_version": "4.28.1",
21
+ "use_cache": true,
22
+ "vocab_size": 128256,
23
+ "draft_vocab_size": 32000
24
+ }
lm318/model/med/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d1176c76d9d43f61aa8d252db6b0ef1b4b4f6e8d5449ae83df98fa955e29b6a
3
+ size 849795390
lm318/model/python/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "bos_token_id": 128000,
6
+ "eos_token_id": 128001,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 4096,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 14336,
11
+ "max_position_embeddings": 2048,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 32,
14
+ "num_key_value_heads": 8,
15
+ "num_hidden_layers": 1,
16
+ "pad_token_id": 0,
17
+ "rms_norm_eps": 1e-05,
18
+ "tie_word_embeddings": false,
19
+ "torch_dtype": "float16",
20
+ "transformers_version": "4.28.1",
21
+ "use_cache": true,
22
+ "vocab_size": 128256,
23
+ "draft_vocab_size": 32000
24
+ }
lm318/model/python/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e122c91bd679745e531d4cad58abd464ba5988c821370a1d2b3a38ec380a8bd
3
+ size 849795390
lm318/model/sql/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "bos_token_id": 128000,
6
+ "eos_token_id": 128001,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 4096,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 14336,
11
+ "max_position_embeddings": 2048,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 32,
14
+ "num_key_value_heads": 8,
15
+ "num_hidden_layers": 1,
16
+ "pad_token_id": 0,
17
+ "rms_norm_eps": 1e-05,
18
+ "tie_word_embeddings": false,
19
+ "torch_dtype": "float16",
20
+ "transformers_version": "4.28.1",
21
+ "use_cache": true,
22
+ "vocab_size": 128256,
23
+ "draft_vocab_size": 32000
24
+ }
lm318/model/sql/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ee49ed916d45050e2bfa4f59529cfcacce227c2a0cc9850ba8143c4ef8054aa
3
+ size 849795390
qw332/model/bio-32/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "draft_vocab_size": 32000,
9
+ "eos_token_id": 151645,
10
+ "head_dim": 80,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 5120,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 25600,
15
+ "max_position_embeddings": 40960,
16
+ "max_window_layers": 64,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 64,
20
+ "num_hidden_layers": 1,
21
+ "num_key_value_heads": 8,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-06,
24
+ "rope_scaling": null,
25
+ "rope_theta": 1000000,
26
+ "sliding_window": null,
27
+ "tie_word_embeddings": false,
28
+ "torch_dtype": "bfloat16",
29
+ "transformers_version": "4.55.2",
30
+ "use_cache": true,
31
+ "use_sliding_window": false,
32
+ "vocab_size": 151936
33
+ }
qw332/model/bio-32/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd7569163b040ed5356e92b04a2833836795128064aa2a7f412306005e214bd4
3
+ size 1455349624
qw332/model/chem-32/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "draft_vocab_size": 32000,
9
+ "eos_token_id": 151645,
10
+ "head_dim": 80,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 5120,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 25600,
15
+ "max_position_embeddings": 40960,
16
+ "max_window_layers": 64,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 64,
20
+ "num_hidden_layers": 1,
21
+ "num_key_value_heads": 8,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-06,
24
+ "rope_scaling": null,
25
+ "rope_theta": 1000000,
26
+ "sliding_window": null,
27
+ "tie_word_embeddings": false,
28
+ "torch_dtype": "bfloat16",
29
+ "transformers_version": "4.55.2",
30
+ "use_cache": true,
31
+ "use_sliding_window": false,
32
+ "vocab_size": 151936
33
+ }
qw332/model/chem-32/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:114b1d55c8ac5c21738b4b4bd720ef6fdc4939f6957fda03f381a906e5b76fcb
3
+ size 1455349624
qw332/model/cnndm-32/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "draft_vocab_size": 32000,
9
+ "eos_token_id": 151645,
10
+ "head_dim": 80,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 5120,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 25600,
15
+ "max_position_embeddings": 40960,
16
+ "max_window_layers": 64,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 64,
20
+ "num_hidden_layers": 1,
21
+ "num_key_value_heads": 8,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-06,
24
+ "rope_scaling": null,
25
+ "rope_theta": 1000000,
26
+ "sliding_window": null,
27
+ "tie_word_embeddings": false,
28
+ "torch_dtype": "bfloat16",
29
+ "transformers_version": "4.55.2",
30
+ "use_cache": true,
31
+ "use_sliding_window": false,
32
+ "vocab_size": 151936
33
+ }
qw332/model/cnndm-32/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dbf5677ef704dc40f2405d265055e937b5088f9364391630b43ad33a37194e3
3
+ size 1455349624
qw332/model/math-32/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "draft_vocab_size": 32000,
9
+ "eos_token_id": 151645,
10
+ "head_dim": 80,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 5120,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 25600,
15
+ "max_position_embeddings": 40960,
16
+ "max_window_layers": 64,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 64,
20
+ "num_hidden_layers": 1,
21
+ "num_key_value_heads": 8,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-06,
24
+ "rope_scaling": null,
25
+ "rope_theta": 1000000,
26
+ "sliding_window": null,
27
+ "tie_word_embeddings": false,
28
+ "torch_dtype": "bfloat16",
29
+ "transformers_version": "4.55.2",
30
+ "use_cache": true,
31
+ "use_sliding_window": false,
32
+ "vocab_size": 151936
33
+ }
qw332/model/math-32/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fff6eb79957c1958a959a9783b24b33e31966b850558ade9b22adb5f9530181
3
+ size 1455349624
qw332/model/med-32/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "draft_vocab_size": 32000,
9
+ "eos_token_id": 151645,
10
+ "head_dim": 80,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 5120,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 25600,
15
+ "max_position_embeddings": 40960,
16
+ "max_window_layers": 64,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 64,
20
+ "num_hidden_layers": 1,
21
+ "num_key_value_heads": 8,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-06,
24
+ "rope_scaling": null,
25
+ "rope_theta": 1000000,
26
+ "sliding_window": null,
27
+ "tie_word_embeddings": false,
28
+ "torch_dtype": "bfloat16",
29
+ "transformers_version": "4.55.2",
30
+ "use_cache": true,
31
+ "use_sliding_window": false,
32
+ "vocab_size": 151936
33
+ }
qw332/model/med-32/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6da3febd0784215e48ffea5fbd13af4866ae73cf482465ada01829adb3cb491e
3
+ size 1455349624
qw332/model/python-32/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "draft_vocab_size": 32000,
9
+ "eos_token_id": 151645,
10
+ "head_dim": 80,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 5120,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 25600,
15
+ "max_position_embeddings": 40960,
16
+ "max_window_layers": 64,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 64,
20
+ "num_hidden_layers": 1,
21
+ "num_key_value_heads": 8,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-06,
24
+ "rope_scaling": null,
25
+ "rope_theta": 1000000,
26
+ "sliding_window": null,
27
+ "tie_word_embeddings": false,
28
+ "torch_dtype": "bfloat16",
29
+ "transformers_version": "4.55.2",
30
+ "use_cache": true,
31
+ "use_sliding_window": false,
32
+ "vocab_size": 151936
33
+ }
qw332/model/python-32/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c68122fb969c00ba469eab91e767751d8fdef8f41aac61ff35f06ec4017cb826
3
+ size 1455349624
qw332/model/sql-32/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "draft_vocab_size": 32000,
9
+ "eos_token_id": 151645,
10
+ "head_dim": 80,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 5120,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 25600,
15
+ "max_position_embeddings": 40960,
16
+ "max_window_layers": 64,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 64,
20
+ "num_hidden_layers": 1,
21
+ "num_key_value_heads": 8,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-06,
24
+ "rope_scaling": null,
25
+ "rope_theta": 1000000,
26
+ "sliding_window": null,
27
+ "tie_word_embeddings": false,
28
+ "torch_dtype": "bfloat16",
29
+ "transformers_version": "4.55.2",
30
+ "use_cache": true,
31
+ "use_sliding_window": false,
32
+ "vocab_size": 151936
33
+ }
qw332/model/sql-32/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1350d385c6925a3aa653d9d07e73d53cfb5d15a532f97a327bd264b40b459e9
3
+ size 1455349624
qw38/model/bio/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 12288,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 36,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 1,
19
+ "num_key_value_heads":8 ,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "float16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936,
30
+ "draft_vocab_size": 32000
31
+ }
qw38/model/bio/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:878fae9e3d8985cad5093a4cabe1128123ce255209200a5950e279e5c9bd7e4a
3
+ size 799493182
qw38/model/chem/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 12288,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 36,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 1,
19
+ "num_key_value_heads":8 ,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "float16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936,
30
+ "draft_vocab_size": 32000
31
+ }
qw38/model/chem/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:482b539a5919f37e6d7c4eb3cd36d31404c632e0371a3be9d370b90e0fa019ee
3
+ size 799493182
qw38/model/cnndm/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 12288,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 36,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 1,
19
+ "num_key_value_heads":8 ,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "float16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936,
30
+ "draft_vocab_size": 32000
31
+ }
qw38/model/cnndm/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00bb7891ebfcd764e963613cbfa1c69bd9947a604688c25ecf4861438ce142ba
3
+ size 799493182
qw38/model/math/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 12288,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 36,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 1,
19
+ "num_key_value_heads":8 ,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "float16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936,
30
+ "draft_vocab_size": 32000
31
+ }
qw38/model/math/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:590e6d3c7f0928aacd7ec0ee8bf41bb08e929b48056684066664583d1f2b252a
3
+ size 799493182
qw38/model/med/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 12288,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 36,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 1,
19
+ "num_key_value_heads":8 ,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "float16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936,
30
+ "draft_vocab_size": 32000
31
+ }
qw38/model/med/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b91bbb9dd4fe3c3413f24be8886704f5ba166d4b2b4a85cd85103d71d01c1c1a
3
+ size 799493182
qw38/model/python/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 12288,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 36,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 1,
19
+ "num_key_value_heads":8 ,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "float16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936,
30
+ "draft_vocab_size": 32000
31
+ }
qw38/model/python/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4d8a3486d877088f55c1707df8811949e8ee92c167fb5aea5ed4348c8f7ba7c
3
+ size 799493182
qw38/model/sql/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 12288,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 36,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 1,
19
+ "num_key_value_heads":8 ,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "float16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936,
30
+ "draft_vocab_size": 32000
31
+ }
qw38/model/sql/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6016a77ae8426dcb485e098a8567eeaf7b55cc27ddbe5d02808e7ff9f593311f
3
+ size 799493182