Upload folder using huggingface_hub
Browse files- lm318/model/bio/config.json +24 -0
- lm318/model/bio/pytorch_model.bin +3 -0
- lm318/model/chem/config.json +24 -0
- lm318/model/chem/pytorch_model.bin +3 -0
- lm318/model/cnndm/config.json +24 -0
- lm318/model/cnndm/pytorch_model.bin +3 -0
- lm318/model/math/config.json +24 -0
- lm318/model/math/pytorch_model.bin +3 -0
- lm318/model/med/config.json +24 -0
- lm318/model/med/pytorch_model.bin +3 -0
- lm318/model/python/config.json +24 -0
- lm318/model/python/pytorch_model.bin +3 -0
- lm318/model/sql/config.json +24 -0
- lm318/model/sql/pytorch_model.bin +3 -0
- qw332/model/bio-32/config.json +33 -0
- qw332/model/bio-32/model.safetensors +3 -0
- qw332/model/chem-32/config.json +33 -0
- qw332/model/chem-32/model.safetensors +3 -0
- qw332/model/cnndm-32/config.json +33 -0
- qw332/model/cnndm-32/model.safetensors +3 -0
- qw332/model/math-32/config.json +33 -0
- qw332/model/math-32/model.safetensors +3 -0
- qw332/model/med-32/config.json +33 -0
- qw332/model/med-32/model.safetensors +3 -0
- qw332/model/python-32/config.json +33 -0
- qw332/model/python-32/model.safetensors +3 -0
- qw332/model/sql-32/config.json +33 -0
- qw332/model/sql-32/model.safetensors +3 -0
- qw38/model/bio/config.json +31 -0
- qw38/model/bio/pytorch_model.bin +3 -0
- qw38/model/chem/config.json +31 -0
- qw38/model/chem/pytorch_model.bin +3 -0
- qw38/model/cnndm/config.json +31 -0
- qw38/model/cnndm/pytorch_model.bin +3 -0
- qw38/model/math/config.json +31 -0
- qw38/model/math/pytorch_model.bin +3 -0
- qw38/model/med/config.json +31 -0
- qw38/model/med/pytorch_model.bin +3 -0
- qw38/model/python/config.json +31 -0
- qw38/model/python/pytorch_model.bin +3 -0
- qw38/model/sql/config.json +31 -0
- qw38/model/sql/pytorch_model.bin +3 -0
lm318/model/bio/config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"bos_token_id": 128000,
|
| 6 |
+
"eos_token_id": 128001,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 4096,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 14336,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"model_type": "llama",
|
| 13 |
+
"num_attention_heads": 32,
|
| 14 |
+
"num_key_value_heads": 8,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"pad_token_id": 0,
|
| 17 |
+
"rms_norm_eps": 1e-05,
|
| 18 |
+
"tie_word_embeddings": false,
|
| 19 |
+
"torch_dtype": "float16",
|
| 20 |
+
"transformers_version": "4.28.1",
|
| 21 |
+
"use_cache": true,
|
| 22 |
+
"vocab_size": 128256,
|
| 23 |
+
"draft_vocab_size": 32000
|
| 24 |
+
}
|
lm318/model/bio/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ed23d1ff7fabd505ac7230ca8f1337b4b4d2e9ac46122af20f89606c788bc36
|
| 3 |
+
size 849795390
|
lm318/model/chem/config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"bos_token_id": 128000,
|
| 6 |
+
"eos_token_id": 128001,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 4096,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 14336,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"model_type": "llama",
|
| 13 |
+
"num_attention_heads": 32,
|
| 14 |
+
"num_key_value_heads": 8,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"pad_token_id": 0,
|
| 17 |
+
"rms_norm_eps": 1e-05,
|
| 18 |
+
"tie_word_embeddings": false,
|
| 19 |
+
"torch_dtype": "float16",
|
| 20 |
+
"transformers_version": "4.28.1",
|
| 21 |
+
"use_cache": true,
|
| 22 |
+
"vocab_size": 128256,
|
| 23 |
+
"draft_vocab_size": 32000
|
| 24 |
+
}
|
lm318/model/chem/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b404343ddd103cf115445e4ddc054a09811edc201ac967fb553f450f17efc23b
|
| 3 |
+
size 849795390
|
lm318/model/cnndm/config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"bos_token_id": 128000,
|
| 6 |
+
"eos_token_id": 128001,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 4096,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 14336,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"model_type": "llama",
|
| 13 |
+
"num_attention_heads": 32,
|
| 14 |
+
"num_key_value_heads": 8,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"pad_token_id": 0,
|
| 17 |
+
"rms_norm_eps": 1e-05,
|
| 18 |
+
"tie_word_embeddings": false,
|
| 19 |
+
"torch_dtype": "float16",
|
| 20 |
+
"transformers_version": "4.28.1",
|
| 21 |
+
"use_cache": true,
|
| 22 |
+
"vocab_size": 128256,
|
| 23 |
+
"draft_vocab_size": 32000
|
| 24 |
+
}
|
lm318/model/cnndm/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9059d8dad391f7de2d8255cf7881561360c5d5fd590d31114d4d82c1c95b166
|
| 3 |
+
size 849795390
|
lm318/model/math/config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"bos_token_id": 128000,
|
| 6 |
+
"eos_token_id": 128001,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 4096,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 14336,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"model_type": "llama",
|
| 13 |
+
"num_attention_heads": 32,
|
| 14 |
+
"num_key_value_heads": 8,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"pad_token_id": 0,
|
| 17 |
+
"rms_norm_eps": 1e-05,
|
| 18 |
+
"tie_word_embeddings": false,
|
| 19 |
+
"torch_dtype": "float16",
|
| 20 |
+
"transformers_version": "4.28.1",
|
| 21 |
+
"use_cache": true,
|
| 22 |
+
"vocab_size": 128256,
|
| 23 |
+
"draft_vocab_size": 32000
|
| 24 |
+
}
|
lm318/model/math/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93973d8d0c0cd1dedc5bffd45ec94ef0a1cec5ea90f4ec2a5d955f962e28d1a7
|
| 3 |
+
size 849795390
|
lm318/model/med/config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"bos_token_id": 128000,
|
| 6 |
+
"eos_token_id": 128001,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 4096,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 14336,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"model_type": "llama",
|
| 13 |
+
"num_attention_heads": 32,
|
| 14 |
+
"num_key_value_heads": 8,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"pad_token_id": 0,
|
| 17 |
+
"rms_norm_eps": 1e-05,
|
| 18 |
+
"tie_word_embeddings": false,
|
| 19 |
+
"torch_dtype": "float16",
|
| 20 |
+
"transformers_version": "4.28.1",
|
| 21 |
+
"use_cache": true,
|
| 22 |
+
"vocab_size": 128256,
|
| 23 |
+
"draft_vocab_size": 32000
|
| 24 |
+
}
|
lm318/model/med/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d1176c76d9d43f61aa8d252db6b0ef1b4b4f6e8d5449ae83df98fa955e29b6a
|
| 3 |
+
size 849795390
|
lm318/model/python/config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"bos_token_id": 128000,
|
| 6 |
+
"eos_token_id": 128001,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 4096,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 14336,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"model_type": "llama",
|
| 13 |
+
"num_attention_heads": 32,
|
| 14 |
+
"num_key_value_heads": 8,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"pad_token_id": 0,
|
| 17 |
+
"rms_norm_eps": 1e-05,
|
| 18 |
+
"tie_word_embeddings": false,
|
| 19 |
+
"torch_dtype": "float16",
|
| 20 |
+
"transformers_version": "4.28.1",
|
| 21 |
+
"use_cache": true,
|
| 22 |
+
"vocab_size": 128256,
|
| 23 |
+
"draft_vocab_size": 32000
|
| 24 |
+
}
|
lm318/model/python/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e122c91bd679745e531d4cad58abd464ba5988c821370a1d2b3a38ec380a8bd
|
| 3 |
+
size 849795390
|
lm318/model/sql/config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"bos_token_id": 128000,
|
| 6 |
+
"eos_token_id": 128001,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 4096,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 14336,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"model_type": "llama",
|
| 13 |
+
"num_attention_heads": 32,
|
| 14 |
+
"num_key_value_heads": 8,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"pad_token_id": 0,
|
| 17 |
+
"rms_norm_eps": 1e-05,
|
| 18 |
+
"tie_word_embeddings": false,
|
| 19 |
+
"torch_dtype": "float16",
|
| 20 |
+
"transformers_version": "4.28.1",
|
| 21 |
+
"use_cache": true,
|
| 22 |
+
"vocab_size": 128256,
|
| 23 |
+
"draft_vocab_size": 32000
|
| 24 |
+
}
|
lm318/model/sql/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ee49ed916d45050e2bfa4f59529cfcacce227c2a0cc9850ba8143c4ef8054aa
|
| 3 |
+
size 849795390
|
qw332/model/bio-32/config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLMEagle3"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"draft_vocab_size": 32000,
|
| 9 |
+
"eos_token_id": 151645,
|
| 10 |
+
"head_dim": 80,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 5120,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 25600,
|
| 15 |
+
"max_position_embeddings": 40960,
|
| 16 |
+
"max_window_layers": 64,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"num_attention_heads": 64,
|
| 20 |
+
"num_hidden_layers": 1,
|
| 21 |
+
"num_key_value_heads": 8,
|
| 22 |
+
"pretraining_tp": 1,
|
| 23 |
+
"rms_norm_eps": 1e-06,
|
| 24 |
+
"rope_scaling": null,
|
| 25 |
+
"rope_theta": 1000000,
|
| 26 |
+
"sliding_window": null,
|
| 27 |
+
"tie_word_embeddings": false,
|
| 28 |
+
"torch_dtype": "bfloat16",
|
| 29 |
+
"transformers_version": "4.55.2",
|
| 30 |
+
"use_cache": true,
|
| 31 |
+
"use_sliding_window": false,
|
| 32 |
+
"vocab_size": 151936
|
| 33 |
+
}
|
qw332/model/bio-32/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd7569163b040ed5356e92b04a2833836795128064aa2a7f412306005e214bd4
|
| 3 |
+
size 1455349624
|
qw332/model/chem-32/config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLMEagle3"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"draft_vocab_size": 32000,
|
| 9 |
+
"eos_token_id": 151645,
|
| 10 |
+
"head_dim": 80,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 5120,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 25600,
|
| 15 |
+
"max_position_embeddings": 40960,
|
| 16 |
+
"max_window_layers": 64,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"num_attention_heads": 64,
|
| 20 |
+
"num_hidden_layers": 1,
|
| 21 |
+
"num_key_value_heads": 8,
|
| 22 |
+
"pretraining_tp": 1,
|
| 23 |
+
"rms_norm_eps": 1e-06,
|
| 24 |
+
"rope_scaling": null,
|
| 25 |
+
"rope_theta": 1000000,
|
| 26 |
+
"sliding_window": null,
|
| 27 |
+
"tie_word_embeddings": false,
|
| 28 |
+
"torch_dtype": "bfloat16",
|
| 29 |
+
"transformers_version": "4.55.2",
|
| 30 |
+
"use_cache": true,
|
| 31 |
+
"use_sliding_window": false,
|
| 32 |
+
"vocab_size": 151936
|
| 33 |
+
}
|
qw332/model/chem-32/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:114b1d55c8ac5c21738b4b4bd720ef6fdc4939f6957fda03f381a906e5b76fcb
|
| 3 |
+
size 1455349624
|
qw332/model/cnndm-32/config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLMEagle3"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"draft_vocab_size": 32000,
|
| 9 |
+
"eos_token_id": 151645,
|
| 10 |
+
"head_dim": 80,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 5120,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 25600,
|
| 15 |
+
"max_position_embeddings": 40960,
|
| 16 |
+
"max_window_layers": 64,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"num_attention_heads": 64,
|
| 20 |
+
"num_hidden_layers": 1,
|
| 21 |
+
"num_key_value_heads": 8,
|
| 22 |
+
"pretraining_tp": 1,
|
| 23 |
+
"rms_norm_eps": 1e-06,
|
| 24 |
+
"rope_scaling": null,
|
| 25 |
+
"rope_theta": 1000000,
|
| 26 |
+
"sliding_window": null,
|
| 27 |
+
"tie_word_embeddings": false,
|
| 28 |
+
"torch_dtype": "bfloat16",
|
| 29 |
+
"transformers_version": "4.55.2",
|
| 30 |
+
"use_cache": true,
|
| 31 |
+
"use_sliding_window": false,
|
| 32 |
+
"vocab_size": 151936
|
| 33 |
+
}
|
qw332/model/cnndm-32/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7dbf5677ef704dc40f2405d265055e937b5088f9364391630b43ad33a37194e3
|
| 3 |
+
size 1455349624
|
qw332/model/math-32/config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLMEagle3"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"draft_vocab_size": 32000,
|
| 9 |
+
"eos_token_id": 151645,
|
| 10 |
+
"head_dim": 80,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 5120,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 25600,
|
| 15 |
+
"max_position_embeddings": 40960,
|
| 16 |
+
"max_window_layers": 64,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"num_attention_heads": 64,
|
| 20 |
+
"num_hidden_layers": 1,
|
| 21 |
+
"num_key_value_heads": 8,
|
| 22 |
+
"pretraining_tp": 1,
|
| 23 |
+
"rms_norm_eps": 1e-06,
|
| 24 |
+
"rope_scaling": null,
|
| 25 |
+
"rope_theta": 1000000,
|
| 26 |
+
"sliding_window": null,
|
| 27 |
+
"tie_word_embeddings": false,
|
| 28 |
+
"torch_dtype": "bfloat16",
|
| 29 |
+
"transformers_version": "4.55.2",
|
| 30 |
+
"use_cache": true,
|
| 31 |
+
"use_sliding_window": false,
|
| 32 |
+
"vocab_size": 151936
|
| 33 |
+
}
|
qw332/model/math-32/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0fff6eb79957c1958a959a9783b24b33e31966b850558ade9b22adb5f9530181
|
| 3 |
+
size 1455349624
|
qw332/model/med-32/config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLMEagle3"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"draft_vocab_size": 32000,
|
| 9 |
+
"eos_token_id": 151645,
|
| 10 |
+
"head_dim": 80,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 5120,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 25600,
|
| 15 |
+
"max_position_embeddings": 40960,
|
| 16 |
+
"max_window_layers": 64,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"num_attention_heads": 64,
|
| 20 |
+
"num_hidden_layers": 1,
|
| 21 |
+
"num_key_value_heads": 8,
|
| 22 |
+
"pretraining_tp": 1,
|
| 23 |
+
"rms_norm_eps": 1e-06,
|
| 24 |
+
"rope_scaling": null,
|
| 25 |
+
"rope_theta": 1000000,
|
| 26 |
+
"sliding_window": null,
|
| 27 |
+
"tie_word_embeddings": false,
|
| 28 |
+
"torch_dtype": "bfloat16",
|
| 29 |
+
"transformers_version": "4.55.2",
|
| 30 |
+
"use_cache": true,
|
| 31 |
+
"use_sliding_window": false,
|
| 32 |
+
"vocab_size": 151936
|
| 33 |
+
}
|
qw332/model/med-32/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6da3febd0784215e48ffea5fbd13af4866ae73cf482465ada01829adb3cb491e
|
| 3 |
+
size 1455349624
|
qw332/model/python-32/config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLMEagle3"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"draft_vocab_size": 32000,
|
| 9 |
+
"eos_token_id": 151645,
|
| 10 |
+
"head_dim": 80,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 5120,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 25600,
|
| 15 |
+
"max_position_embeddings": 40960,
|
| 16 |
+
"max_window_layers": 64,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"num_attention_heads": 64,
|
| 20 |
+
"num_hidden_layers": 1,
|
| 21 |
+
"num_key_value_heads": 8,
|
| 22 |
+
"pretraining_tp": 1,
|
| 23 |
+
"rms_norm_eps": 1e-06,
|
| 24 |
+
"rope_scaling": null,
|
| 25 |
+
"rope_theta": 1000000,
|
| 26 |
+
"sliding_window": null,
|
| 27 |
+
"tie_word_embeddings": false,
|
| 28 |
+
"torch_dtype": "bfloat16",
|
| 29 |
+
"transformers_version": "4.55.2",
|
| 30 |
+
"use_cache": true,
|
| 31 |
+
"use_sliding_window": false,
|
| 32 |
+
"vocab_size": 151936
|
| 33 |
+
}
|
qw332/model/python-32/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c68122fb969c00ba469eab91e767751d8fdef8f41aac61ff35f06ec4017cb826
|
| 3 |
+
size 1455349624
|
qw332/model/sql-32/config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLMEagle3"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"draft_vocab_size": 32000,
|
| 9 |
+
"eos_token_id": 151645,
|
| 10 |
+
"head_dim": 80,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 5120,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 25600,
|
| 15 |
+
"max_position_embeddings": 40960,
|
| 16 |
+
"max_window_layers": 64,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"num_attention_heads": 64,
|
| 20 |
+
"num_hidden_layers": 1,
|
| 21 |
+
"num_key_value_heads": 8,
|
| 22 |
+
"pretraining_tp": 1,
|
| 23 |
+
"rms_norm_eps": 1e-06,
|
| 24 |
+
"rope_scaling": null,
|
| 25 |
+
"rope_theta": 1000000,
|
| 26 |
+
"sliding_window": null,
|
| 27 |
+
"tie_word_embeddings": false,
|
| 28 |
+
"torch_dtype": "bfloat16",
|
| 29 |
+
"transformers_version": "4.55.2",
|
| 30 |
+
"use_cache": true,
|
| 31 |
+
"use_sliding_window": false,
|
| 32 |
+
"vocab_size": 151936
|
| 33 |
+
}
|
qw332/model/sql-32/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1350d385c6925a3aa653d9d07e73d53cfb5d15a532f97a327bd264b40b459e9
|
| 3 |
+
size 1455349624
|
qw38/model/bio/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLMEagle3"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"eos_token_id": 151645,
|
| 9 |
+
"head_dim": 128,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 4096,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 12288,
|
| 14 |
+
"max_position_embeddings": 40960,
|
| 15 |
+
"max_window_layers": 36,
|
| 16 |
+
"model_type": "llama",
|
| 17 |
+
"num_attention_heads": 32,
|
| 18 |
+
"num_hidden_layers": 1,
|
| 19 |
+
"num_key_value_heads":8 ,
|
| 20 |
+
"rms_norm_eps": 1e-06,
|
| 21 |
+
"rope_scaling": null,
|
| 22 |
+
"rope_theta": 1000000,
|
| 23 |
+
"sliding_window": null,
|
| 24 |
+
"tie_word_embeddings": false,
|
| 25 |
+
"torch_dtype": "float16",
|
| 26 |
+
"transformers_version": "4.51.0",
|
| 27 |
+
"use_cache": true,
|
| 28 |
+
"use_sliding_window": false,
|
| 29 |
+
"vocab_size": 151936,
|
| 30 |
+
"draft_vocab_size": 32000
|
| 31 |
+
}
|
qw38/model/bio/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:878fae9e3d8985cad5093a4cabe1128123ce255209200a5950e279e5c9bd7e4a
|
| 3 |
+
size 799493182
|
qw38/model/chem/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLMEagle3"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"eos_token_id": 151645,
|
| 9 |
+
"head_dim": 128,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 4096,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 12288,
|
| 14 |
+
"max_position_embeddings": 40960,
|
| 15 |
+
"max_window_layers": 36,
|
| 16 |
+
"model_type": "llama",
|
| 17 |
+
"num_attention_heads": 32,
|
| 18 |
+
"num_hidden_layers": 1,
|
| 19 |
+
"num_key_value_heads":8 ,
|
| 20 |
+
"rms_norm_eps": 1e-06,
|
| 21 |
+
"rope_scaling": null,
|
| 22 |
+
"rope_theta": 1000000,
|
| 23 |
+
"sliding_window": null,
|
| 24 |
+
"tie_word_embeddings": false,
|
| 25 |
+
"torch_dtype": "float16",
|
| 26 |
+
"transformers_version": "4.51.0",
|
| 27 |
+
"use_cache": true,
|
| 28 |
+
"use_sliding_window": false,
|
| 29 |
+
"vocab_size": 151936,
|
| 30 |
+
"draft_vocab_size": 32000
|
| 31 |
+
}
|
qw38/model/chem/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:482b539a5919f37e6d7c4eb3cd36d31404c632e0371a3be9d370b90e0fa019ee
|
| 3 |
+
size 799493182
|
qw38/model/cnndm/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLMEagle3"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"eos_token_id": 151645,
|
| 9 |
+
"head_dim": 128,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 4096,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 12288,
|
| 14 |
+
"max_position_embeddings": 40960,
|
| 15 |
+
"max_window_layers": 36,
|
| 16 |
+
"model_type": "llama",
|
| 17 |
+
"num_attention_heads": 32,
|
| 18 |
+
"num_hidden_layers": 1,
|
| 19 |
+
"num_key_value_heads":8 ,
|
| 20 |
+
"rms_norm_eps": 1e-06,
|
| 21 |
+
"rope_scaling": null,
|
| 22 |
+
"rope_theta": 1000000,
|
| 23 |
+
"sliding_window": null,
|
| 24 |
+
"tie_word_embeddings": false,
|
| 25 |
+
"torch_dtype": "float16",
|
| 26 |
+
"transformers_version": "4.51.0",
|
| 27 |
+
"use_cache": true,
|
| 28 |
+
"use_sliding_window": false,
|
| 29 |
+
"vocab_size": 151936,
|
| 30 |
+
"draft_vocab_size": 32000
|
| 31 |
+
}
|
qw38/model/cnndm/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00bb7891ebfcd764e963613cbfa1c69bd9947a604688c25ecf4861438ce142ba
|
| 3 |
+
size 799493182
|
qw38/model/math/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLMEagle3"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"eos_token_id": 151645,
|
| 9 |
+
"head_dim": 128,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 4096,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 12288,
|
| 14 |
+
"max_position_embeddings": 40960,
|
| 15 |
+
"max_window_layers": 36,
|
| 16 |
+
"model_type": "llama",
|
| 17 |
+
"num_attention_heads": 32,
|
| 18 |
+
"num_hidden_layers": 1,
|
| 19 |
+
"num_key_value_heads":8 ,
|
| 20 |
+
"rms_norm_eps": 1e-06,
|
| 21 |
+
"rope_scaling": null,
|
| 22 |
+
"rope_theta": 1000000,
|
| 23 |
+
"sliding_window": null,
|
| 24 |
+
"tie_word_embeddings": false,
|
| 25 |
+
"torch_dtype": "float16",
|
| 26 |
+
"transformers_version": "4.51.0",
|
| 27 |
+
"use_cache": true,
|
| 28 |
+
"use_sliding_window": false,
|
| 29 |
+
"vocab_size": 151936,
|
| 30 |
+
"draft_vocab_size": 32000
|
| 31 |
+
}
|
qw38/model/math/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:590e6d3c7f0928aacd7ec0ee8bf41bb08e929b48056684066664583d1f2b252a
|
| 3 |
+
size 799493182
|
qw38/model/med/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLMEagle3"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"eos_token_id": 151645,
|
| 9 |
+
"head_dim": 128,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 4096,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 12288,
|
| 14 |
+
"max_position_embeddings": 40960,
|
| 15 |
+
"max_window_layers": 36,
|
| 16 |
+
"model_type": "llama",
|
| 17 |
+
"num_attention_heads": 32,
|
| 18 |
+
"num_hidden_layers": 1,
|
| 19 |
+
"num_key_value_heads":8 ,
|
| 20 |
+
"rms_norm_eps": 1e-06,
|
| 21 |
+
"rope_scaling": null,
|
| 22 |
+
"rope_theta": 1000000,
|
| 23 |
+
"sliding_window": null,
|
| 24 |
+
"tie_word_embeddings": false,
|
| 25 |
+
"torch_dtype": "float16",
|
| 26 |
+
"transformers_version": "4.51.0",
|
| 27 |
+
"use_cache": true,
|
| 28 |
+
"use_sliding_window": false,
|
| 29 |
+
"vocab_size": 151936,
|
| 30 |
+
"draft_vocab_size": 32000
|
| 31 |
+
}
|
qw38/model/med/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b91bbb9dd4fe3c3413f24be8886704f5ba166d4b2b4a85cd85103d71d01c1c1a
|
| 3 |
+
size 799493182
|
qw38/model/python/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLMEagle3"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"eos_token_id": 151645,
|
| 9 |
+
"head_dim": 128,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 4096,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 12288,
|
| 14 |
+
"max_position_embeddings": 40960,
|
| 15 |
+
"max_window_layers": 36,
|
| 16 |
+
"model_type": "llama",
|
| 17 |
+
"num_attention_heads": 32,
|
| 18 |
+
"num_hidden_layers": 1,
|
| 19 |
+
"num_key_value_heads":8 ,
|
| 20 |
+
"rms_norm_eps": 1e-06,
|
| 21 |
+
"rope_scaling": null,
|
| 22 |
+
"rope_theta": 1000000,
|
| 23 |
+
"sliding_window": null,
|
| 24 |
+
"tie_word_embeddings": false,
|
| 25 |
+
"torch_dtype": "float16",
|
| 26 |
+
"transformers_version": "4.51.0",
|
| 27 |
+
"use_cache": true,
|
| 28 |
+
"use_sliding_window": false,
|
| 29 |
+
"vocab_size": 151936,
|
| 30 |
+
"draft_vocab_size": 32000
|
| 31 |
+
}
|
qw38/model/python/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4d8a3486d877088f55c1707df8811949e8ee92c167fb5aea5ed4348c8f7ba7c
|
| 3 |
+
size 799493182
|
qw38/model/sql/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLMEagle3"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"eos_token_id": 151645,
|
| 9 |
+
"head_dim": 128,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 4096,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 12288,
|
| 14 |
+
"max_position_embeddings": 40960,
|
| 15 |
+
"max_window_layers": 36,
|
| 16 |
+
"model_type": "llama",
|
| 17 |
+
"num_attention_heads": 32,
|
| 18 |
+
"num_hidden_layers": 1,
|
| 19 |
+
"num_key_value_heads":8 ,
|
| 20 |
+
"rms_norm_eps": 1e-06,
|
| 21 |
+
"rope_scaling": null,
|
| 22 |
+
"rope_theta": 1000000,
|
| 23 |
+
"sliding_window": null,
|
| 24 |
+
"tie_word_embeddings": false,
|
| 25 |
+
"torch_dtype": "float16",
|
| 26 |
+
"transformers_version": "4.51.0",
|
| 27 |
+
"use_cache": true,
|
| 28 |
+
"use_sliding_window": false,
|
| 29 |
+
"vocab_size": 151936,
|
| 30 |
+
"draft_vocab_size": 32000
|
| 31 |
+
}
|
qw38/model/sql/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6016a77ae8426dcb485e098a8567eeaf7b55cc27ddbe5d02808e7ff9f593311f
|
| 3 |
+
size 799493182
|