from transformers import PretrainedConfig class AveyConfig(PretrainedConfig): model_type = "avey" def __init__( self, vocab_size: int = 50304, context_len: int = 512, d_embed: int = 768, n_layers: int = 26, expansion_factor: int = 4, chunk_size: int = 128, k: int = 3, context_proportion: float = 0.5, eps=1e-12, **kwargs ): self.vocab_size = vocab_size self.max_position_embeddings = context_len self.d_embed = d_embed self.hidden_size = d_embed self.n_layers = n_layers self.expansion_factor = expansion_factor self.chunk_size = chunk_size self.k = k self.context_proportion = context_proportion self.eps = eps super().__init__(**kwargs)