Raj0pro commited on
Commit
50bc547
·
verified ·
1 Parent(s): 404b586

Upload model/config.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. model/config.py +31 -0
model/config.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class ModelConfig:
6
+ vocab_size: int = 16000
7
+ d_model: int = 256
8
+ num_heads: int = 4
9
+ d_ff: int = 1024
10
+ num_encoder_layers: int = 4
11
+ num_decoder_layers: int = 4
12
+ max_seq_len: int = 128
13
+ dropout: float = 0.1
14
+
15
+ use_copy: bool = True # pointer-generator copy mechanism
16
+
17
+ # Sentinels appended after the BPE vocab for T5-style span corruption.
18
+ # Their ids are [vocab_size, vocab_size + num_sentinels). At zero this is
19
+ # a no-op; at 32 the effective embedding / output projection grows by
20
+ # 32 rows (~8K extra params at d_model=256).
21
+ num_sentinels: int = 32
22
+
23
+ # special token ids — set after tokenizer is trained
24
+ pad_id: int = 0
25
+ unk_id: int = 1
26
+ bos_id: int = 2
27
+ eos_id: int = 3
28
+
29
+ @property
30
+ def effective_vocab_size(self) -> int:
31
+ return self.vocab_size + self.num_sentinels