Files changed (1) hide show
  1. config.json +21 -37
config.json CHANGED
@@ -5,45 +5,29 @@
5
  "tie_word_embeddings": false,
6
  "transformers_version": "5.4.0",
7
 
8
- "text_config": {
9
- "model_type": "mistral",
 
 
 
 
 
 
 
 
10
 
11
- "vocab_size": 32000,
12
- "hidden_size": 4096,
13
- "intermediate_size": 14336,
14
- "num_hidden_layers": 32,
15
- "num_attention_heads": 32,
16
- "num_key_value_heads": 8,
17
- "head_dim": 128,
18
- "hidden_act": "silu",
19
- "max_position_embeddings": 8192,
20
- "sliding_window": 4096,
21
 
22
- "rms_norm_eps": 1e-05,
23
- "initializer_range": 0.02,
24
- "attention_bias": false,
25
- "attention_dropout": 0.0,
26
- "mlp_bias": false,
27
 
28
- "rope_parameters": {
29
- "rope_theta": 10000.0,
30
- "rope_type": "default"
31
- },
32
 
33
- "use_cache": true,
34
- "tie_word_embeddings": false,
35
- "pretraining_tp": 1,
36
-
37
- "bos_token_id": 1,
38
- "eos_token_id": 2,
39
- "pad_token_id": 2,
40
-
41
- "do_sample": true,
42
- "temperature": 0.7,
43
- "top_k": 0,
44
- "top_p": 0.9,
45
- "repetition_penalty": 1.1,
46
- "max_length": 4096,
47
- "min_length": 0
48
- }
49
  }
 
5
  "tie_word_embeddings": false,
6
  "transformers_version": "5.4.0",
7
 
8
+ "vocab_size": 32000,
9
+ "hidden_size": 4096,
10
+ "intermediate_size": 14336,
11
+ "num_hidden_layers": 32,
12
+ "num_attention_heads": 32,
13
+ "num_key_value_heads": 8,
14
+ "head_dim": 128,
15
+ "hidden_act": "silu",
16
+ "max_position_embeddings": 8192,
17
+ "sliding_window": 4096,
18
 
19
+ "rms_norm_eps": 1e-05,
20
+ "initializer_range": 0.02,
21
+ "attention_bias": false,
22
+ "attention_dropout": 0.0,
23
+ "mlp_bias": false,
 
 
 
 
 
24
 
25
+ "rope_theta": 10000.0,
 
 
 
 
26
 
27
+ "use_cache": true,
28
+ "pretraining_tp": 1,
 
 
29
 
30
+ "bos_token_id": 1,
31
+ "eos_token_id": 2,
32
+ "pad_token_id": 2
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  }