xuebi commited on
Commit
500a52e
·
1 Parent(s): 4d0619f

update: add config.json

Browse files
Files changed (1) hide show
  1. config.json +113 -0
config.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MiniMaxM2ForCausalLM"
4
+ ],
5
+ "attn_type_list": [
6
+ 1,
7
+ 1,
8
+ 1,
9
+ 1,
10
+ 1,
11
+ 1,
12
+ 1,
13
+ 1,
14
+ 1,
15
+ 1,
16
+ 1,
17
+ 1,
18
+ 1,
19
+ 1,
20
+ 1,
21
+ 1,
22
+ 1,
23
+ 1,
24
+ 1,
25
+ 1,
26
+ 1,
27
+ 1,
28
+ 1,
29
+ 1,
30
+ 1,
31
+ 1,
32
+ 1,
33
+ 1,
34
+ 1,
35
+ 1,
36
+ 1,
37
+ 1,
38
+ 1,
39
+ 1,
40
+ 1,
41
+ 1,
42
+ 1,
43
+ 1,
44
+ 1,
45
+ 1,
46
+ 1,
47
+ 1,
48
+ 1,
49
+ 1,
50
+ 1,
51
+ 1,
52
+ 1,
53
+ 1,
54
+ 1,
55
+ 1,
56
+ 1,
57
+ 1,
58
+ 1,
59
+ 1,
60
+ 1,
61
+ 1,
62
+ 1,
63
+ 1,
64
+ 1,
65
+ 1,
66
+ 1,
67
+ 1
68
+ ],
69
+ "auto_map": {
70
+ "AutoConfig": "configuration_minimax_m2.MiniMaxM2Config",
71
+ "AutoModelForCausalLM": "modeling_minimax_m2.MiniMaxM2ForCausalLM"
72
+ },
73
+ "head_dim": 128,
74
+ "hidden_act": "silu",
75
+ "hidden_size": 3072,
76
+ "intermediate_size": 1536,
77
+ "max_position_embeddings": 196608,
78
+ "model_type": "minimax_m2",
79
+ "mtp_transformer_layers": 1,
80
+ "num_attention_heads": 48,
81
+ "num_experts_per_tok": 8,
82
+ "num_hidden_layers": 62,
83
+ "num_key_value_heads": 8,
84
+ "num_local_experts": 256,
85
+ "num_mtp_modules": 3,
86
+ "qk_norm_type": "per_layer",
87
+ "quantization_config": {
88
+ "activation_scheme": "dynamic",
89
+ "fmt": "float8_e4m3fn",
90
+ "quant_method": "fp8",
91
+ "weight_block_size": [
92
+ 128,
93
+ 128
94
+ ],
95
+ "modules_to_not_convert": [
96
+ "gate",
97
+ "e_score_correction_bias",
98
+ "lm_head"
99
+ ]
100
+ },
101
+ "rms_norm_eps": 1e-06,
102
+ "rope_theta": 5000000,
103
+ "rotary_dim": 64,
104
+ "scoring_func": "sigmoid",
105
+ "shared_intermediate_size": 0,
106
+ "tie_word_embeddings": false,
107
+ "transformers_version": "4.46.1",
108
+ "use_cache": true,
109
+ "use_mtp": true,
110
+ "use_qk_norm": true,
111
+ "use_routing_bias": true,
112
+ "vocab_size": 200064
113
+ }