from transformers import PretrainedConfig class GPT2WorkshopConfig(PretrainedConfig): model_type = "gpt2-workshop" attribute_map = {"num_hidden_layers": "num_layers"} def __init__( self, vocab_size=50304, hidden_dim=768, context_length=1024, num_heads=12, head_dim=64, num_layers=12, ffn_expansion=4, dropout=0.1, rope_theta=10000.0, logit_soft_cap=30.0, **kwargs, ): self.vocab_size = vocab_size self.hidden_dim = hidden_dim self.context_length = context_length self.num_heads = num_heads self.head_dim = head_dim self.num_layers = num_layers self.ffn_expansion = ffn_expansion self.dropout = dropout self.rope_theta = rope_theta self.logit_soft_cap = logit_soft_cap super().__init__(**kwargs)