File size: 722 Bytes
0ed2b3d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
from transformers import PretrainedConfig
class QMoEConfig(PretrainedConfig):
model_type = 'qmoe'
def __init__(self, vocab_size=50257, d_model=768, num_layers=12, num_heads=16, max_seq_len=512, num_experts=8, moe_top_k=2, ffn_dim=2048, **kwargs):
super().__init__(**kwargs)
self.vocab_size = vocab_size
self.d_model = d_model
self.num_layers = num_layers
self.num_heads = num_heads
self.max_seq_len = max_seq_len
self.num_experts = num_experts
self.moe_top_k = moe_top_k
self.ffn_dim = ffn_dim
self.is_decoder = True
self.add_cross_attention = False
self.use_cache = False
self.tie_word_embeddings = False
|