nev8r commited on
Commit
f3303e1
·
verified ·
1 Parent(s): 2aea6be

Upload config.json

Browse files
Files changed (1) hide show
  1. config.json +16 -11
config.json CHANGED
@@ -1,29 +1,34 @@
1
  {
2
  "architectures": [
3
- "LlamaForCausalLM"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 1,
8
  "dtype": "float16",
9
- "eos_token_id": 2,
10
  "head_dim": 64,
11
- "hidden_act": "silu",
12
  "hidden_size": 768,
13
- "initializer_range": 0.02,
14
  "intermediate_size": 2064,
15
  "max_position_embeddings": 2048,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
  "num_attention_heads": 12,
19
  "num_hidden_layers": 12,
20
  "num_key_value_heads": 4,
21
- "pretraining_tp": 1,
 
22
  "rms_norm_eps": 1e-06,
23
- "rope_scaling": null,
 
 
 
 
 
 
 
24
  "rope_theta": 10000.0,
25
- "tie_word_embeddings": true,
26
  "transformers_version": "4.57.5",
27
- "use_cache": true,
 
28
  "vocab_size": 32772
29
  }
 
1
  {
2
  "architectures": [
3
+ "VerMindDenseForCausalLM"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
 
7
  "dtype": "float16",
8
+ "eos_token_id": 32769,
9
  "head_dim": 64,
10
+ "hidden_dropout": 0.0,
11
  "hidden_size": 768,
 
12
  "intermediate_size": 2064,
13
  "max_position_embeddings": 2048,
14
+ "model_type": "vermind_dense",
 
15
  "num_attention_heads": 12,
16
  "num_hidden_layers": 12,
17
  "num_key_value_heads": 4,
18
+ "pad_token_id": 0,
19
+ "residual_dropout": 0.0,
20
  "rms_norm_eps": 1e-06,
21
+ "rope_scaling": {
22
+ "attention_factor": 1.0,
23
+ "beta_fast": 32.0,
24
+ "beta_slow": 1.0,
25
+ "factor": 4.0,
26
+ "original_max_position_embeddings": 2048,
27
+ "type": "yarn"
28
+ },
29
  "rope_theta": 10000.0,
 
30
  "transformers_version": "4.57.5",
31
+ "use_cache": false,
32
+ "use_flash_attention": false,
33
  "vocab_size": 32772
34
  }