Tongjilibo commited on
Commit
25ff35e
·
1 Parent(s): 2f74c46

批量修改配置文件

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 01-ai/Yi-1.5-6B-Chat/bert4torch_config.json +7 -1
  2. 01-ai/Yi-1.5-6B/bert4torch_config.json +7 -1
  3. 01-ai/Yi-1.5-9B-32K/bert4torch_config.json +7 -1
  4. 01-ai/Yi-1.5-9B-Chat-16K/bert4torch_config.json +8 -2
  5. 01-ai/Yi-1.5-9B-Chat/bert4torch_config.json +7 -1
  6. 01-ai/Yi-1.5-9B/bert4torch_config.json +7 -1
  7. 01-ai/Yi-6B-200K/bert4torch_config.json +7 -1
  8. 01-ai/Yi-6B/bert4torch_config.json +7 -1
  9. 01-ai/Yi-9B-200K/bert4torch_config.json +7 -1
  10. 01-ai/Yi-9B/bert4torch_config.json +7 -1
  11. BAAI/bge-base-en-v1.5/bert4torch_config.json +2 -2
  12. BAAI/bge-base-zh-v1.5/bert4torch_config.json +2 -2
  13. BAAI/bge-large-en-v1.5/bert4torch_config.json +1 -1
  14. BAAI/bge-large-zh-v1.5/bert4torch_config.json +2 -2
  15. BAAI/bge-small-en-v1.5/bert4torch_config.json +2 -2
  16. BAAI/bge-small-zh-v1.5/bert4torch_config.json +2 -2
  17. BelleGroup/BELLE-LLaMA-7B-2M-enc/bert4torch_config.json +6 -0
  18. ClueAI/ChatYuan-large-v1/bert4torch_config.json +4 -0
  19. ClueAI/ChatYuan-large-v2/bert4torch_config.json +4 -0
  20. ClueAI/PromptCLUE-base-v1-5/bert4torch_config.json +4 -0
  21. ClueAI/PromptCLUE-base/bert4torch_config.json +4 -0
  22. FacebookAI/roberta-base/bert4torch_config.json +1 -1
  23. IDEA-CCNL/Erlangshen-DeBERTa-v2-320M-Chinese/bert4torch_config.json +1 -0
  24. IDEA-CCNL/Erlangshen-DeBERTa-v2-710M-Chinese/bert4torch_config.json +1 -0
  25. IDEA-CCNL/Erlangshen-DeBERTa-v2-97M-Chinese/bert4torch_config.json +1 -0
  26. IDEA-CCNL/Ziya-LLaMA-13B-v1.1/bert4torch_config.json +6 -0
  27. IDEA-CCNL/Ziya-LLaMA-13B-v1/bert4torch_config.json +6 -0
  28. PaddlePaddle/PaddleOCR-VL/bert4torch_config.json +6 -1
  29. Qwen/Qwen-14B-Chat/bert4torch_config.json +7 -0
  30. Qwen/Qwen-14B/bert4torch_config.json +7 -0
  31. Qwen/Qwen-1_8B-Chat/bert4torch_config.json +7 -0
  32. Qwen/Qwen-1_8B/bert4torch_config.json +7 -0
  33. Qwen/Qwen-7B-Chat/bert4torch_config.json +7 -0
  34. Qwen/Qwen-7B/bert4torch_config.json +7 -0
  35. Qwen/Qwen1.5-0.5B-Chat/bert4torch_config.json +7 -0
  36. Qwen/Qwen1.5-0.5B/bert4torch_config.json +7 -0
  37. Qwen/Qwen1.5-1.8B-Chat/bert4torch_config.json +7 -0
  38. Qwen/Qwen1.5-1.8B/bert4torch_config.json +7 -0
  39. Qwen/Qwen1.5-14B-Chat/bert4torch_config.json +7 -0
  40. Qwen/Qwen1.5-14B/bert4torch_config.json +7 -0
  41. Qwen/Qwen1.5-7B-Chat/bert4torch_config.json +7 -0
  42. Qwen/Qwen1.5-7B/bert4torch_config.json +7 -0
  43. Qwen/Qwen2-0.5B-Instruct/bert4torch_config.json +7 -0
  44. Qwen/Qwen2-0.5B/bert4torch_config.json +7 -0
  45. Qwen/Qwen2-1.5B-Instruct/bert4torch_config.json +7 -0
  46. Qwen/Qwen2-1.5B/bert4torch_config.json +7 -0
  47. Qwen/Qwen2-7B-Instruct/bert4torch_config.json +7 -0
  48. Qwen/Qwen2-7B/bert4torch_config.json +7 -0
  49. Qwen/Qwen2-VL-2B-Instruct/bert4torch_config.json +7 -0
  50. Qwen/Qwen2-VL-7B-Instruct/bert4torch_config.json +8 -1
01-ai/Yi-1.5-6B-Chat/bert4torch_config.json CHANGED
@@ -1,4 +1,11 @@
1
  {
 
 
 
 
 
 
 
2
  "bos_token_id": 1,
3
  "eos_token_id": 2,
4
  "hidden_act": "silu",
@@ -7,7 +14,6 @@
7
  "intermediate_size": 11008,
8
  "max_position_embeddings": 4096,
9
  "model": "llama",
10
- "template": "llama3",
11
  "num_attention_heads": 32,
12
  "num_hidden_layers": 32,
13
  "num_key_value_heads": 4,
 
1
  {
2
+ "pos_emb_type": "rotary",
3
+ "use_bias": false,
4
+ "layer_norm_mode": "rmsnorm",
5
+ "final_layernorm": true,
6
+ "pre_layernorm": true,
7
+ "mlp_type": "LlamaFeedForward",
8
+ "template": "llama3",
9
  "bos_token_id": 1,
10
  "eos_token_id": 2,
11
  "hidden_act": "silu",
 
14
  "intermediate_size": 11008,
15
  "max_position_embeddings": 4096,
16
  "model": "llama",
 
17
  "num_attention_heads": 32,
18
  "num_hidden_layers": 32,
19
  "num_key_value_heads": 4,
01-ai/Yi-1.5-6B/bert4torch_config.json CHANGED
@@ -1,4 +1,11 @@
1
  {
 
 
 
 
 
 
 
2
  "bos_token_id": 1,
3
  "eos_token_id": 2,
4
  "hidden_act": "silu",
@@ -6,7 +13,6 @@
6
  "initializer_range": 0.02,
7
  "intermediate_size": 11008,
8
  "max_position_embeddings": 4096,
9
- "model": "llama",
10
  "num_attention_heads": 32,
11
  "num_hidden_layers": 32,
12
  "num_key_value_heads": 4,
 
1
  {
2
+ "model": "llama",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "layer_norm_mode": "rmsnorm",
6
+ "final_layernorm": true,
7
+ "pre_layernorm": true,
8
+ "mlp_type": "LlamaFeedForward",
9
  "bos_token_id": 1,
10
  "eos_token_id": 2,
11
  "hidden_act": "silu",
 
13
  "initializer_range": 0.02,
14
  "intermediate_size": 11008,
15
  "max_position_embeddings": 4096,
 
16
  "num_attention_heads": 32,
17
  "num_hidden_layers": 32,
18
  "num_key_value_heads": 4,
01-ai/Yi-1.5-9B-32K/bert4torch_config.json CHANGED
@@ -1,4 +1,11 @@
1
  {
 
 
 
 
 
 
 
2
  "bos_token_id": 1,
3
  "eos_token_id": 2,
4
  "hidden_act": "silu",
@@ -6,7 +13,6 @@
6
  "initializer_range": 0.02,
7
  "intermediate_size": 11008,
8
  "max_position_embeddings": 32768,
9
- "model": "llama",
10
  "num_attention_heads": 32,
11
  "num_hidden_layers": 48,
12
  "num_key_value_heads": 4,
 
1
  {
2
+ "model": "llama",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "layer_norm_mode": "rmsnorm",
6
+ "final_layernorm": true,
7
+ "pre_layernorm": true,
8
+ "mlp_type": "LlamaFeedForward",
9
  "bos_token_id": 1,
10
  "eos_token_id": 2,
11
  "hidden_act": "silu",
 
13
  "initializer_range": 0.02,
14
  "intermediate_size": 11008,
15
  "max_position_embeddings": 32768,
 
16
  "num_attention_heads": 32,
17
  "num_hidden_layers": 48,
18
  "num_key_value_heads": 4,
01-ai/Yi-1.5-9B-Chat-16K/bert4torch_config.json CHANGED
@@ -1,4 +1,12 @@
1
  {
 
 
 
 
 
 
 
 
2
  "bos_token_id": 1,
3
  "eos_token_id": 2,
4
  "hidden_act": "silu",
@@ -6,8 +14,6 @@
6
  "initializer_range": 0.02,
7
  "intermediate_size": 11008,
8
  "max_position_embeddings": 16384,
9
- "model": "llama",
10
- "template": "llama3",
11
  "num_attention_heads": 32,
12
  "num_hidden_layers": 48,
13
  "num_key_value_heads": 4,
 
1
  {
2
+ "model": "llama",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "layer_norm_mode": "rmsnorm",
6
+ "final_layernorm": true,
7
+ "pre_layernorm": true,
8
+ "mlp_type": "LlamaFeedForward",
9
+ "template": "llama3",
10
  "bos_token_id": 1,
11
  "eos_token_id": 2,
12
  "hidden_act": "silu",
 
14
  "initializer_range": 0.02,
15
  "intermediate_size": 11008,
16
  "max_position_embeddings": 16384,
 
 
17
  "num_attention_heads": 32,
18
  "num_hidden_layers": 48,
19
  "num_key_value_heads": 4,
01-ai/Yi-1.5-9B-Chat/bert4torch_config.json CHANGED
@@ -1,4 +1,11 @@
1
  {
 
 
 
 
 
 
 
2
  "bos_token_id": 1,
3
  "eos_token_id": 2,
4
  "hidden_act": "silu",
@@ -7,7 +14,6 @@
7
  "intermediate_size": 11008,
8
  "max_position_embeddings": 4096,
9
  "model": "llama",
10
- "template": "llama3",
11
  "num_attention_heads": 32,
12
  "num_hidden_layers": 48,
13
  "num_key_value_heads": 4,
 
1
  {
2
+ "pos_emb_type": "rotary",
3
+ "use_bias": false,
4
+ "layer_norm_mode": "rmsnorm",
5
+ "final_layernorm": true,
6
+ "pre_layernorm": true,
7
+ "mlp_type": "LlamaFeedForward",
8
+ "template": "llama3",
9
  "bos_token_id": 1,
10
  "eos_token_id": 2,
11
  "hidden_act": "silu",
 
14
  "intermediate_size": 11008,
15
  "max_position_embeddings": 4096,
16
  "model": "llama",
 
17
  "num_attention_heads": 32,
18
  "num_hidden_layers": 48,
19
  "num_key_value_heads": 4,
01-ai/Yi-1.5-9B/bert4torch_config.json CHANGED
@@ -1,4 +1,11 @@
1
  {
 
 
 
 
 
 
 
2
  "bos_token_id": 1,
3
  "eos_token_id": 2,
4
  "hidden_act": "silu",
@@ -6,7 +13,6 @@
6
  "initializer_range": 0.02,
7
  "intermediate_size": 11008,
8
  "max_position_embeddings": 4096,
9
- "model": "llama",
10
  "num_attention_heads": 32,
11
  "num_hidden_layers": 48,
12
  "num_key_value_heads": 4,
 
1
  {
2
+ "model": "llama",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "layer_norm_mode": "rmsnorm",
6
+ "final_layernorm": true,
7
+ "pre_layernorm": true,
8
+ "mlp_type": "LlamaFeedForward",
9
  "bos_token_id": 1,
10
  "eos_token_id": 2,
11
  "hidden_act": "silu",
 
13
  "initializer_range": 0.02,
14
  "intermediate_size": 11008,
15
  "max_position_embeddings": 4096,
 
16
  "num_attention_heads": 32,
17
  "num_hidden_layers": 48,
18
  "num_key_value_heads": 4,
01-ai/Yi-6B-200K/bert4torch_config.json CHANGED
@@ -1,4 +1,11 @@
1
  {
 
 
 
 
 
 
 
2
  "bos_token_id": 1,
3
  "eos_token_id": 2,
4
  "hidden_act": "silu",
@@ -6,7 +13,6 @@
6
  "initializer_range": 0.02,
7
  "intermediate_size": 11008,
8
  "max_position_embeddings": 200000,
9
- "model": "llama",
10
  "num_attention_heads": 32,
11
  "num_hidden_layers": 32,
12
  "num_key_value_heads": 4,
 
1
  {
2
+ "model": "llama",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "layer_norm_mode": "rmsnorm",
6
+ "final_layernorm": true,
7
+ "pre_layernorm": true,
8
+ "mlp_type": "LlamaFeedForward",
9
  "bos_token_id": 1,
10
  "eos_token_id": 2,
11
  "hidden_act": "silu",
 
13
  "initializer_range": 0.02,
14
  "intermediate_size": 11008,
15
  "max_position_embeddings": 200000,
 
16
  "num_attention_heads": 32,
17
  "num_hidden_layers": 32,
18
  "num_key_value_heads": 4,
01-ai/Yi-6B/bert4torch_config.json CHANGED
@@ -1,4 +1,11 @@
1
  {
 
 
 
 
 
 
 
2
  "bos_token_id": 1,
3
  "eos_token_id": 2,
4
  "hidden_act": "silu",
@@ -6,7 +13,6 @@
6
  "initializer_range": 0.02,
7
  "intermediate_size": 11008,
8
  "max_position_embeddings": 4096,
9
- "model": "llama",
10
  "num_attention_heads": 32,
11
  "num_hidden_layers": 32,
12
  "num_key_value_heads": 4,
 
1
  {
2
+ "model": "llama",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "layer_norm_mode": "rmsnorm",
6
+ "final_layernorm": true,
7
+ "pre_layernorm": true,
8
+ "mlp_type": "LlamaFeedForward",
9
  "bos_token_id": 1,
10
  "eos_token_id": 2,
11
  "hidden_act": "silu",
 
13
  "initializer_range": 0.02,
14
  "intermediate_size": 11008,
15
  "max_position_embeddings": 4096,
 
16
  "num_attention_heads": 32,
17
  "num_hidden_layers": 32,
18
  "num_key_value_heads": 4,
01-ai/Yi-9B-200K/bert4torch_config.json CHANGED
@@ -1,4 +1,11 @@
1
  {
 
 
 
 
 
 
 
2
  "bos_token_id": 1,
3
  "eos_token_id": 2,
4
  "hidden_act": "silu",
@@ -6,7 +13,6 @@
6
  "initializer_range": 0.02,
7
  "intermediate_size": 11008,
8
  "max_position_embeddings": 262144,
9
- "model": "llama",
10
  "num_attention_heads": 32,
11
  "num_hidden_layers": 48,
12
  "num_key_value_heads": 4,
 
1
  {
2
+ "model": "llama",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "layer_norm_mode": "rmsnorm",
6
+ "final_layernorm": true,
7
+ "pre_layernorm": true,
8
+ "mlp_type": "LlamaFeedForward",
9
  "bos_token_id": 1,
10
  "eos_token_id": 2,
11
  "hidden_act": "silu",
 
13
  "initializer_range": 0.02,
14
  "intermediate_size": 11008,
15
  "max_position_embeddings": 262144,
 
16
  "num_attention_heads": 32,
17
  "num_hidden_layers": 48,
18
  "num_key_value_heads": 4,
01-ai/Yi-9B/bert4torch_config.json CHANGED
@@ -1,4 +1,11 @@
1
  {
 
 
 
 
 
 
 
2
  "bos_token_id": 1,
3
  "eos_token_id": 2,
4
  "hidden_act": "silu",
@@ -6,7 +13,6 @@
6
  "initializer_range": 0.02,
7
  "intermediate_size": 11008,
8
  "max_position_embeddings": 4096,
9
- "model": "llama",
10
  "num_attention_heads": 32,
11
  "num_hidden_layers": 48,
12
  "num_key_value_heads": 4,
 
1
  {
2
+ "model": "llama",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "layer_norm_mode": "rmsnorm",
6
+ "final_layernorm": true,
7
+ "pre_layernorm": true,
8
+ "mlp_type": "LlamaFeedForward",
9
  "bos_token_id": 1,
10
  "eos_token_id": 2,
11
  "hidden_act": "silu",
 
13
  "initializer_range": 0.02,
14
  "intermediate_size": 11008,
15
  "max_position_embeddings": 4096,
 
16
  "num_attention_heads": 32,
17
  "num_hidden_layers": 48,
18
  "num_key_value_heads": 4,
BAAI/bge-base-en-v1.5/bert4torch_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "attention_probs_dropout_prob": 0.1,
3
  "hidden_act": "gelu",
4
  "hidden_dropout_prob": 0.1,
@@ -7,7 +8,6 @@
7
  "intermediate_size": 3072,
8
  "layer_norm_eps": 1e-12,
9
  "max_position_embeddings": 512,
10
- "model": "bert",
11
  "num_attention_heads": 12,
12
  "num_hidden_layers": 12,
13
  "bos_token_id": 0,
@@ -20,7 +20,7 @@
20
  "pooling": {
21
  "pool_strategy": "cls"
22
  },
23
- "norm_mode": "torch_buildin",
24
  "mapping": {
25
  "embeddings.word_embeddings.weight": "embeddings.word_embeddings.weight",
26
  "embeddings.position_embeddings.weight": "embeddings.position_embeddings.weight",
 
1
  {
2
+ "model": "bert",
3
  "attention_probs_dropout_prob": 0.1,
4
  "hidden_act": "gelu",
5
  "hidden_dropout_prob": 0.1,
 
8
  "intermediate_size": 3072,
9
  "layer_norm_eps": 1e-12,
10
  "max_position_embeddings": 512,
 
11
  "num_attention_heads": 12,
12
  "num_hidden_layers": 12,
13
  "bos_token_id": 0,
 
20
  "pooling": {
21
  "pool_strategy": "cls"
22
  },
23
+ "layer_norm_mode": "torch_buildin",
24
  "mapping": {
25
  "embeddings.word_embeddings.weight": "embeddings.word_embeddings.weight",
26
  "embeddings.position_embeddings.weight": "embeddings.position_embeddings.weight",
BAAI/bge-base-zh-v1.5/bert4torch_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "attention_probs_dropout_prob": 0.1,
3
  "hidden_act": "gelu",
4
  "hidden_dropout_prob": 0.1,
@@ -7,7 +8,6 @@
7
  "intermediate_size": 3072,
8
  "layer_norm_eps": 1e-12,
9
  "max_position_embeddings": 512,
10
- "model": "bert",
11
  "num_attention_heads": 12,
12
  "num_hidden_layers": 12,
13
  "bos_token_id": 0,
@@ -20,7 +20,7 @@
20
  "pooling": {
21
  "pool_strategy": "cls"
22
  },
23
- "norm_mode": "torch_buildin",
24
  "mapping": {
25
  "embeddings.word_embeddings.weight": "embeddings.word_embeddings.weight",
26
  "embeddings.position_embeddings.weight": "embeddings.position_embeddings.weight",
 
1
  {
2
+ "model": "bert",
3
  "attention_probs_dropout_prob": 0.1,
4
  "hidden_act": "gelu",
5
  "hidden_dropout_prob": 0.1,
 
8
  "intermediate_size": 3072,
9
  "layer_norm_eps": 1e-12,
10
  "max_position_embeddings": 512,
 
11
  "num_attention_heads": 12,
12
  "num_hidden_layers": 12,
13
  "bos_token_id": 0,
 
20
  "pooling": {
21
  "pool_strategy": "cls"
22
  },
23
+ "layer_norm_mode": "torch_buildin",
24
  "mapping": {
25
  "embeddings.word_embeddings.weight": "embeddings.word_embeddings.weight",
26
  "embeddings.position_embeddings.weight": "embeddings.position_embeddings.weight",
BAAI/bge-large-en-v1.5/bert4torch_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "attention_probs_dropout_prob": 0.1,
3
  "hidden_act": "gelu",
4
  "hidden_dropout_prob": 0.1,
@@ -7,7 +8,6 @@
7
  "intermediate_size": 4096,
8
  "layer_norm_eps": 1e-12,
9
  "max_position_embeddings": 512,
10
- "model": "bert",
11
  "num_attention_heads": 16,
12
  "num_hidden_layers": 24,
13
  "pad_token_id": 0,
 
1
  {
2
+ "model": "bert",
3
  "attention_probs_dropout_prob": 0.1,
4
  "hidden_act": "gelu",
5
  "hidden_dropout_prob": 0.1,
 
8
  "intermediate_size": 4096,
9
  "layer_norm_eps": 1e-12,
10
  "max_position_embeddings": 512,
 
11
  "num_attention_heads": 16,
12
  "num_hidden_layers": 24,
13
  "pad_token_id": 0,
BAAI/bge-large-zh-v1.5/bert4torch_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "attention_probs_dropout_prob": 0.1,
3
  "hidden_act": "gelu",
4
  "hidden_dropout_prob": 0.1,
@@ -7,7 +8,6 @@
7
  "intermediate_size": 4096,
8
  "layer_norm_eps": 1e-12,
9
  "max_position_embeddings": 512,
10
- "model": "bert",
11
  "num_attention_heads": 16,
12
  "num_hidden_layers": 24,
13
  "bos_token_id": 0,
@@ -20,7 +20,7 @@
20
  "pooling": {
21
  "pool_strategy": "cls"
22
  },
23
- "norm_mode": "torch_buildin",
24
  "mapping": {
25
  "embeddings.word_embeddings.weight": "embeddings.word_embeddings.weight",
26
  "embeddings.position_embeddings.weight": "embeddings.position_embeddings.weight",
 
1
  {
2
+ "model": "bert",
3
  "attention_probs_dropout_prob": 0.1,
4
  "hidden_act": "gelu",
5
  "hidden_dropout_prob": 0.1,
 
8
  "intermediate_size": 4096,
9
  "layer_norm_eps": 1e-12,
10
  "max_position_embeddings": 512,
 
11
  "num_attention_heads": 16,
12
  "num_hidden_layers": 24,
13
  "bos_token_id": 0,
 
20
  "pooling": {
21
  "pool_strategy": "cls"
22
  },
23
+ "layer_norm_mode": "torch_buildin",
24
  "mapping": {
25
  "embeddings.word_embeddings.weight": "embeddings.word_embeddings.weight",
26
  "embeddings.position_embeddings.weight": "embeddings.position_embeddings.weight",
BAAI/bge-small-en-v1.5/bert4torch_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "attention_probs_dropout_prob": 0.1,
3
  "hidden_act": "gelu",
4
  "hidden_dropout_prob": 0.1,
@@ -7,7 +8,6 @@
7
  "intermediate_size": 1536,
8
  "layer_norm_eps": 1e-12,
9
  "max_position_embeddings": 512,
10
- "model": "bert",
11
  "num_attention_heads": 12,
12
  "num_hidden_layers": 12,
13
  "pad_token_id": 0,
@@ -18,7 +18,7 @@
18
  "pooling": {
19
  "pool_strategy": "cls"
20
  },
21
- "norm_mode": "torch_buildin",
22
  "mapping": {
23
  "embeddings.word_embeddings.weight": "embeddings.word_embeddings.weight",
24
  "embeddings.position_embeddings.weight": "embeddings.position_embeddings.weight",
 
1
  {
2
+ "model": "bert",
3
  "attention_probs_dropout_prob": 0.1,
4
  "hidden_act": "gelu",
5
  "hidden_dropout_prob": 0.1,
 
8
  "intermediate_size": 1536,
9
  "layer_norm_eps": 1e-12,
10
  "max_position_embeddings": 512,
 
11
  "num_attention_heads": 12,
12
  "num_hidden_layers": 12,
13
  "pad_token_id": 0,
 
18
  "pooling": {
19
  "pool_strategy": "cls"
20
  },
21
+ "layer_norm_mode": "torch_buildin",
22
  "mapping": {
23
  "embeddings.word_embeddings.weight": "embeddings.word_embeddings.weight",
24
  "embeddings.position_embeddings.weight": "embeddings.position_embeddings.weight",
BAAI/bge-small-zh-v1.5/bert4torch_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "attention_probs_dropout_prob": 0.1,
3
  "hidden_act": "gelu",
4
  "hidden_dropout_prob": 0.1,
@@ -7,7 +8,6 @@
7
  "intermediate_size": 2048,
8
  "layer_norm_eps": 1e-12,
9
  "max_position_embeddings": 512,
10
- "model": "bert",
11
  "num_attention_heads": 8,
12
  "num_hidden_layers": 4,
13
  "pad_token_id": 0,
@@ -18,7 +18,7 @@
18
  "pooling": {
19
  "pool_strategy": "cls"
20
  },
21
- "norm_mode": "torch_buildin",
22
  "mapping": {
23
  "embeddings.word_embeddings.weight": "embeddings.word_embeddings.weight",
24
  "embeddings.position_embeddings.weight": "embeddings.position_embeddings.weight",
 
1
  {
2
+ "model": "bert",
3
  "attention_probs_dropout_prob": 0.1,
4
  "hidden_act": "gelu",
5
  "hidden_dropout_prob": 0.1,
 
8
  "intermediate_size": 2048,
9
  "layer_norm_eps": 1e-12,
10
  "max_position_embeddings": 512,
 
11
  "num_attention_heads": 8,
12
  "num_hidden_layers": 4,
13
  "pad_token_id": 0,
 
18
  "pooling": {
19
  "pool_strategy": "cls"
20
  },
21
+ "layer_norm_mode": "torch_buildin",
22
  "mapping": {
23
  "embeddings.word_embeddings.weight": "embeddings.word_embeddings.weight",
24
  "embeddings.position_embeddings.weight": "embeddings.position_embeddings.weight",
BelleGroup/BELLE-LLaMA-7B-2M-enc/bert4torch_config.json CHANGED
@@ -1,5 +1,11 @@
1
  {
2
  "model": "llama",
 
 
 
 
 
 
3
  "template": "belle",
4
  "hidden_size": 4096,
5
  "intermediate_size": 11008,
 
1
  {
2
  "model": "llama",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "layer_norm_mode": "rmsnorm",
6
+ "final_layernorm": true,
7
+ "pre_layernorm": true,
8
+ "mlp_type": "LlamaFeedForward",
9
  "template": "belle",
10
  "hidden_size": 4096,
11
  "intermediate_size": 11008,
ClueAI/ChatYuan-large-v1/bert4torch_config.json CHANGED
@@ -1,5 +1,9 @@
1
  {
2
  "model": "mt5.1.1",
 
 
 
 
3
  "hidden_act": "gelu",
4
  "hidden_dropout_prob": 0.1,
5
  "hidden_size": 1024,
 
1
  {
2
  "model": "mt5.1.1",
3
+ "pos_emb_type": "t5_relative",
4
+ "use_bias": false,
5
+ "layer_norm_mode": "rmsnorm",
6
+ "layer_type": "T5Layer",
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 1024,
ClueAI/ChatYuan-large-v2/bert4torch_config.json CHANGED
@@ -1,5 +1,9 @@
1
  {
2
  "model": "mt5.1.1",
 
 
 
 
3
  "hidden_act": "gelu",
4
  "hidden_dropout_prob": 0.1,
5
  "hidden_size": 1024,
 
1
  {
2
  "model": "mt5.1.1",
3
+ "pos_emb_type": "t5_relative",
4
+ "use_bias": false,
5
+ "layer_norm_mode": "rmsnorm",
6
+ "layer_type": "T5Layer",
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 1024,
ClueAI/PromptCLUE-base-v1-5/bert4torch_config.json CHANGED
@@ -1,5 +1,9 @@
1
  {
2
  "model": "mt5.1.1",
 
 
 
 
3
  "hidden_act": "gelu",
4
  "hidden_dropout_prob": 0.1,
5
  "hidden_size": 768,
 
1
  {
2
  "model": "mt5.1.1",
3
+ "pos_emb_type": "t5_relative",
4
+ "use_bias": false,
5
+ "layer_norm_mode": "rmsnorm",
6
+ "layer_type": "T5Layer",
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
ClueAI/PromptCLUE-base/bert4torch_config.json CHANGED
@@ -1,5 +1,9 @@
1
  {
2
  "model": "mt5.1.1",
 
 
 
 
3
  "hidden_act": "gelu",
4
  "hidden_dropout_prob": 0.1,
5
  "hidden_size": 768,
 
1
  {
2
  "model": "mt5.1.1",
3
+ "pos_emb_type": "t5_relative",
4
+ "use_bias": false,
5
+ "layer_norm_mode": "rmsnorm",
6
+ "layer_type": "T5Layer",
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
FacebookAI/roberta-base/bert4torch_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "attention_probs_dropout_prob": 0.1,
3
  "bos_token_id": 0,
4
  "eos_token_id": 2,
@@ -9,7 +10,6 @@
9
  "intermediate_size": 3072,
10
  "layer_norm_eps": 1e-05,
11
  "max_position_embeddings": 514,
12
- "model_type": "roberta",
13
  "num_attention_heads": 12,
14
  "num_hidden_layers": 12,
15
  "pad_token_id": 1,
 
1
  {
2
+ "model": "roberta",
3
  "attention_probs_dropout_prob": 0.1,
4
  "bos_token_id": 0,
5
  "eos_token_id": 2,
 
10
  "intermediate_size": 3072,
11
  "layer_norm_eps": 1e-05,
12
  "max_position_embeddings": 514,
 
13
  "num_attention_heads": 12,
14
  "num_hidden_layers": 12,
15
  "pad_token_id": 1,
IDEA-CCNL/Erlangshen-DeBERTa-v2-320M-Chinese/bert4torch_config.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "model": "deberta_v2",
 
3
  "attention_probs_dropout_prob": 0.1,
4
  "hidden_act": "gelu",
5
  "hidden_dropout_prob": 0.1,
 
1
  {
2
  "model": "deberta_v2",
3
+ "pos_emb_type": "deberta_v2",
4
  "attention_probs_dropout_prob": 0.1,
5
  "hidden_act": "gelu",
6
  "hidden_dropout_prob": 0.1,
IDEA-CCNL/Erlangshen-DeBERTa-v2-710M-Chinese/bert4torch_config.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "model": "deberta_v2",
 
3
  "attention_probs_dropout_prob": 0.1,
4
  "attention_head_size": 64,
5
  "hidden_act": "gelu",
 
1
  {
2
  "model": "deberta_v2",
3
+ "pos_emb_type": "deberta_v2",
4
  "attention_probs_dropout_prob": 0.1,
5
  "attention_head_size": 64,
6
  "hidden_act": "gelu",
IDEA-CCNL/Erlangshen-DeBERTa-v2-97M-Chinese/bert4torch_config.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "model": "deberta_v2",
 
3
  "attention_probs_dropout_prob": 0.1,
4
  "hidden_act": "gelu",
5
  "hidden_dropout_prob": 0.1,
 
1
  {
2
  "model": "deberta_v2",
3
+ "pos_emb_type": "deberta_v2",
4
  "attention_probs_dropout_prob": 0.1,
5
  "hidden_act": "gelu",
6
  "hidden_dropout_prob": 0.1,
IDEA-CCNL/Ziya-LLaMA-13B-v1.1/bert4torch_config.json CHANGED
@@ -1,5 +1,11 @@
1
  {
2
  "model": "llama",
 
 
 
 
 
 
3
  "template": "ziya",
4
  "bos_token_id": 1,
5
  "eos_token_id": 2,
 
1
  {
2
  "model": "llama",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "layer_norm_mode": "rmsnorm",
6
+ "final_layernorm": true,
7
+ "pre_layernorm": true,
8
+ "mlp_type": "LlamaFeedForward",
9
  "template": "ziya",
10
  "bos_token_id": 1,
11
  "eos_token_id": 2,
IDEA-CCNL/Ziya-LLaMA-13B-v1/bert4torch_config.json CHANGED
@@ -1,5 +1,11 @@
1
  {
2
  "model": "llama",
 
 
 
 
 
 
3
  "template": "ziya",
4
  "bos_token_id": 1,
5
  "eos_token_id": 2,
 
1
  {
2
  "model": "llama",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "layer_norm_mode": "rmsnorm",
6
+ "final_layernorm": true,
7
+ "pre_layernorm": true,
8
+ "mlp_type": "LlamaFeedForward",
9
  "template": "ziya",
10
  "bos_token_id": 1,
11
  "eos_token_id": 2,
PaddlePaddle/PaddleOCR-VL/bert4torch_config.json CHANGED
@@ -1,7 +1,12 @@
1
  {
2
  "model_type": "paddleocr_vl",
3
  "template": "paddleocr_vl",
4
- "bias": false,
 
 
 
 
 
5
  "attention_dropout": 0.0,
6
  "bos_token_id": 0,
7
  "eos_token_id": 2,
 
1
  {
2
  "model_type": "paddleocr_vl",
3
  "template": "paddleocr_vl",
4
+ "pos_emb_type": "rotary",
5
+ "use_bias": false,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "attention_dropout": 0.0,
11
  "bos_token_id": 0,
12
  "eos_token_id": 2,
Qwen/Qwen-14B-Chat/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen",
 
 
 
 
 
 
 
3
  "hidden_act": "silu",
4
  "intermediate_size": 13696,
5
  "initializer_range": 0.02,
 
1
  {
2
  "model": "qwen",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "hidden_act": "silu",
11
  "intermediate_size": 13696,
12
  "initializer_range": 0.02,
Qwen/Qwen-14B/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen",
 
 
 
 
 
 
 
3
  "template": "pretrained_text_continuation",
4
  "hidden_act": "silu",
5
  "intermediate_size": 13696,
 
1
  {
2
  "model": "qwen",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "template": "pretrained_text_continuation",
11
  "hidden_act": "silu",
12
  "intermediate_size": 13696,
Qwen/Qwen-1_8B-Chat/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen",
 
 
 
 
 
 
 
3
  "hidden_act": "silu",
4
  "pad_token_id": 151643,
5
  "eos_token_id": 151643,
 
1
  {
2
  "model": "qwen",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "hidden_act": "silu",
11
  "pad_token_id": 151643,
12
  "eos_token_id": 151643,
Qwen/Qwen-1_8B/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen",
 
 
 
 
 
 
 
3
  "template": "pretrained_text_continuation",
4
  "hidden_act": "silu",
5
  "pad_token_id": 151643,
 
1
  {
2
  "model": "qwen",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "template": "pretrained_text_continuation",
11
  "hidden_act": "silu",
12
  "pad_token_id": 151643,
Qwen/Qwen-7B-Chat/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen",
 
 
 
 
 
 
 
3
  "hidden_act": "silu",
4
  "pad_token_id": 151643,
5
  "eos_token_id": 151643,
 
1
  {
2
  "model": "qwen",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "hidden_act": "silu",
11
  "pad_token_id": 151643,
12
  "eos_token_id": 151643,
Qwen/Qwen-7B/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen",
 
 
 
 
 
 
 
3
  "template": "pretrained_text_continuation",
4
  "hidden_act": "silu",
5
  "pad_token_id": 151643,
 
1
  {
2
  "model": "qwen",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "template": "pretrained_text_continuation",
11
  "hidden_act": "silu",
12
  "pad_token_id": 151643,
Qwen/Qwen1.5-0.5B-Chat/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen2",
 
 
 
 
 
 
 
3
  "hidden_act": "silu",
4
  "attention_probs_dropout_prob": 0.0,
5
  "bos_token_id": 151643,
 
1
  {
2
  "model": "qwen2",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "hidden_act": "silu",
11
  "attention_probs_dropout_prob": 0.0,
12
  "bos_token_id": 151643,
Qwen/Qwen1.5-0.5B/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen2",
 
 
 
 
 
 
 
3
  "template": "pretrained_text_continuation",
4
  "hidden_act": "silu",
5
  "attention_probs_dropout_prob": 0.0,
 
1
  {
2
  "model": "qwen2",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "template": "pretrained_text_continuation",
11
  "hidden_act": "silu",
12
  "attention_probs_dropout_prob": 0.0,
Qwen/Qwen1.5-1.8B-Chat/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen2",
 
 
 
 
 
 
 
3
  "hidden_act": "silu",
4
  "attention_probs_dropout_prob": 0.0,
5
  "bos_token_id": 151643,
 
1
  {
2
  "model": "qwen2",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "hidden_act": "silu",
11
  "attention_probs_dropout_prob": 0.0,
12
  "bos_token_id": 151643,
Qwen/Qwen1.5-1.8B/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen2",
 
 
 
 
 
 
 
3
  "template": "pretrained_text_continuation",
4
  "hidden_act": "silu",
5
  "attention_probs_dropout_prob": 0.0,
 
1
  {
2
  "model": "qwen2",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "template": "pretrained_text_continuation",
11
  "hidden_act": "silu",
12
  "attention_probs_dropout_prob": 0.0,
Qwen/Qwen1.5-14B-Chat/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen2",
 
 
 
 
 
 
 
3
  "hidden_act": "silu",
4
  "attention_probs_dropout_prob": 0.0,
5
  "bos_token_id": 151643,
 
1
  {
2
  "model": "qwen2",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "hidden_act": "silu",
11
  "attention_probs_dropout_prob": 0.0,
12
  "bos_token_id": 151643,
Qwen/Qwen1.5-14B/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen2",
 
 
 
 
 
 
 
3
  "template": "pretrained_text_continuation",
4
  "hidden_act": "silu",
5
  "attention_probs_dropout_prob": 0.0,
 
1
  {
2
  "model": "qwen2",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "template": "pretrained_text_continuation",
11
  "hidden_act": "silu",
12
  "attention_probs_dropout_prob": 0.0,
Qwen/Qwen1.5-7B-Chat/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen2",
 
 
 
 
 
 
 
3
  "hidden_act": "silu",
4
  "attention_probs_dropout_prob": 0.0,
5
  "bos_token_id": 151643,
 
1
  {
2
  "model": "qwen2",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "hidden_act": "silu",
11
  "attention_probs_dropout_prob": 0.0,
12
  "bos_token_id": 151643,
Qwen/Qwen1.5-7B/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen2",
 
 
 
 
 
 
 
3
  "template": "pretrained_text_continuation",
4
  "hidden_act": "silu",
5
  "attention_probs_dropout_prob": 0.0,
 
1
  {
2
  "model": "qwen2",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "template": "pretrained_text_continuation",
11
  "hidden_act": "silu",
12
  "attention_probs_dropout_prob": 0.0,
Qwen/Qwen2-0.5B-Instruct/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen2",
 
 
 
 
 
 
 
3
  "hidden_act": "silu",
4
  "attention_probs_dropout_prob": 0.0,
5
  "bos_token_id": 151643,
 
1
  {
2
  "model": "qwen2",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "hidden_act": "silu",
11
  "attention_probs_dropout_prob": 0.0,
12
  "bos_token_id": 151643,
Qwen/Qwen2-0.5B/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen2",
 
 
 
 
 
 
 
3
  "template": "pretrained_text_continuation",
4
  "hidden_act": "silu",
5
  "attention_probs_dropout_prob": 0.0,
 
1
  {
2
  "model": "qwen2",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "template": "pretrained_text_continuation",
11
  "hidden_act": "silu",
12
  "attention_probs_dropout_prob": 0.0,
Qwen/Qwen2-1.5B-Instruct/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen2",
 
 
 
 
 
 
 
3
  "hidden_act": "silu",
4
  "attention_probs_dropout_prob": 0.0,
5
  "bos_token_id": 151643,
 
1
  {
2
  "model": "qwen2",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "hidden_act": "silu",
11
  "attention_probs_dropout_prob": 0.0,
12
  "bos_token_id": 151643,
Qwen/Qwen2-1.5B/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen2",
 
 
 
 
 
 
 
3
  "template": "pretrained_text_continuation",
4
  "hidden_act": "silu",
5
  "attention_probs_dropout_prob": 0.0,
 
1
  {
2
  "model": "qwen2",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "template": "pretrained_text_continuation",
11
  "hidden_act": "silu",
12
  "attention_probs_dropout_prob": 0.0,
Qwen/Qwen2-7B-Instruct/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen2",
 
 
 
 
 
 
 
3
  "hidden_act": "silu",
4
  "attention_probs_dropout_prob": 0.0,
5
  "bos_token_id": 151643,
 
1
  {
2
  "model": "qwen2",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "hidden_act": "silu",
11
  "attention_probs_dropout_prob": 0.0,
12
  "bos_token_id": 151643,
Qwen/Qwen2-7B/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model": "qwen2",
 
 
 
 
 
 
 
3
  "template": "pretrained_text_continuation",
4
  "hidden_act": "silu",
5
  "attention_probs_dropout_prob": 0.0,
 
1
  {
2
  "model": "qwen2",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "template": "pretrained_text_continuation",
11
  "hidden_act": "silu",
12
  "attention_probs_dropout_prob": 0.0,
Qwen/Qwen2-VL-2B-Instruct/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
  "model_type": "qwen2_vl",
 
 
 
 
 
 
 
3
  "template": "qwen2_vl",
4
  "attention_dropout": 0.0,
5
  "bos_token_id": 151643,
 
1
  {
2
  "model_type": "qwen2_vl",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "template": "qwen2_vl",
11
  "attention_dropout": 0.0,
12
  "bos_token_id": 151643,
Qwen/Qwen2-VL-7B-Instruct/bert4torch_config.json CHANGED
@@ -1,5 +1,12 @@
1
  {
2
- "model": "qwen2_vl",
 
 
 
 
 
 
 
3
  "attention_dropout": 0.0,
4
  "bos_token_id": 151643,
5
  "eos_token_id": 151645,
 
1
  {
2
+ "model": "qwen",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
  "attention_dropout": 0.0,
11
  "bos_token_id": 151643,
12
  "eos_token_id": 151645,