Trouter-Library commited on
Commit
7cda34a
·
verified ·
1 Parent(s): 1a89da1

Create configuration_nsw1.py

Browse files
Files changed (1) hide show
  1. configuration_nsw1.py +47 -0
configuration_nsw1.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PretrainedConfig
2
+
3
+ class NSW1Config(PretrainedConfig):
4
+ model_type = "nsw1"
5
+
6
+ def __init__(
7
+ self,
8
+ vocab_size=50257,
9
+ hidden_size=2048,
10
+ num_hidden_layers=24,
11
+ num_attention_heads=16,
12
+ num_key_value_heads=16,
13
+ intermediate_size=8192,
14
+ hidden_act="silu",
15
+ max_position_embeddings=4096,
16
+ initializer_range=0.02,
17
+ rms_norm_eps=1e-6,
18
+ use_cache=True,
19
+ pad_token_id=0,
20
+ bos_token_id=1,
21
+ eos_token_id=2,
22
+ tie_word_embeddings=False,
23
+ rope_theta=10000.0,
24
+ attention_dropout=0.0,
25
+ **kwargs
26
+ ):
27
+ self.vocab_size = vocab_size
28
+ self.hidden_size = hidden_size
29
+ self.num_hidden_layers = num_hidden_layers
30
+ self.num_attention_heads = num_attention_heads
31
+ self.num_key_value_heads = num_key_value_heads
32
+ self.intermediate_size = intermediate_size
33
+ self.hidden_act = hidden_act
34
+ self.max_position_embeddings = max_position_embeddings
35
+ self.initializer_range = initializer_range
36
+ self.rms_norm_eps = rms_norm_eps
37
+ self.use_cache = use_cache
38
+ self.rope_theta = rope_theta
39
+ self.attention_dropout = attention_dropout
40
+
41
+ super().__init__(
42
+ pad_token_id=pad_token_id,
43
+ bos_token_id=bos_token_id,
44
+ eos_token_id=eos_token_id,
45
+ tie_word_embeddings=tie_word_embeddings,
46
+ **kwargs
47
+ )