fariasultana commited on
Commit
1d1f00b
·
verified ·
1 Parent(s): dea197e

feat: Add configuration_minimind.py for AutoModelForCausalLM support

Browse files
Files changed (1) hide show
  1. configuration_minimind.py +43 -0
configuration_minimind.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """MiniMind Max2 Configuration"""
2
+ from transformers import PretrainedConfig
3
+
4
+ class MiniMindConfig(PretrainedConfig):
5
+ model_type = "minimind"
6
+
7
+ def __init__(
8
+ self,
9
+ vocab_size=102400,
10
+ hidden_size=1024,
11
+ intermediate_size=2816,
12
+ num_hidden_layers=12,
13
+ num_attention_heads=16,
14
+ num_key_value_heads=4,
15
+ max_position_embeddings=32768,
16
+ rms_norm_eps=1e-6,
17
+ rope_theta=10000.0,
18
+ num_experts=8,
19
+ num_experts_per_token=2,
20
+ pad_token_id=0,
21
+ bos_token_id=1,
22
+ eos_token_id=2,
23
+ tie_word_embeddings=True,
24
+ **kwargs,
25
+ ):
26
+ self.vocab_size = vocab_size
27
+ self.hidden_size = hidden_size
28
+ self.intermediate_size = intermediate_size
29
+ self.num_hidden_layers = num_hidden_layers
30
+ self.num_attention_heads = num_attention_heads
31
+ self.num_key_value_heads = num_key_value_heads
32
+ self.max_position_embeddings = max_position_embeddings
33
+ self.rms_norm_eps = rms_norm_eps
34
+ self.rope_theta = rope_theta
35
+ self.num_experts = num_experts
36
+ self.num_experts_per_token = num_experts_per_token
37
+ super().__init__(
38
+ pad_token_id=pad_token_id,
39
+ bos_token_id=bos_token_id,
40
+ eos_token_id=eos_token_id,
41
+ tie_word_embeddings=tie_word_embeddings,
42
+ **kwargs,
43
+ )