File size: 1,029 Bytes
c55052c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from transformers.models.bert.configuration_bert import BertConfig

class MoeBertConfig(BertConfig):
    """
    Extension of Bert configuration to add projections parameter.
    """

    model_type = "bert_moe"

    def __init__(
            self, 
            moebert_expert_num = 16,
            moebert_route_method = gate-token,
            moebert_expert_dropout = 0.1,
            moebert_expert_dim = 128,
            moebert_route_hash_list = None,
            moebert_share_importance = 0.5,
            moebert_load_importance = None,
            **kwargs
        ):
        super().__init__(**kwargs)
        self.moebert_expert_num = moebert_expert_num
        self.moebert_route_method = moebert_route_method
        self.moebert_expert_dropout = moebert_expert_dropout
        self.moebert_expert_dim = moebert_expert_dim
        self.moebert_route_hash_list = moebert_route_hash_list
        self.moebert_share_importance = moebert_share_importance
        self.moebert_load_importance = moebert_load_importance