from transformers import PretrainedConfig, AutoConfig class MiniLLaDAConfig(PretrainedConfig): model_type = "mini-llada" def __init__( self, vocab_size=52000, mask_token_id=4, dim=2048, depth=18, head=16, intermediate_size=5632, max_seq_len=2048, **kwargs, ): self.vocab_size = vocab_size self.mask_token_id = mask_token_id self.dim = dim self.depth = depth self.head = head self.intermediate_size = intermediate_size self.max_seq_len = max_seq_len super().__init__(**kwargs)