from transformers.models.bert.configuration_bert import BertConfig class MoeBertConfig(BertConfig): """ Extension of Bert configuration to add projections parameter. """ model_type = "bert_moe" def __init__( self, moebert_expert_num = 16, moebert_route_method = gate-token, moebert_expert_dropout = 0.1, moebert_expert_dim = 128, moebert_route_hash_list = None, moebert_share_importance = 0.5, moebert_load_importance = None, **kwargs ): super().__init__(**kwargs) self.moebert_expert_num = moebert_expert_num self.moebert_route_method = moebert_route_method self.moebert_expert_dropout = moebert_expert_dropout self.moebert_expert_dim = moebert_expert_dim self.moebert_route_hash_list = moebert_route_hash_list self.moebert_share_importance = moebert_share_importance self.moebert_load_importance = moebert_load_importance