| from transformers import PretrainedConfig | |
| class GPT2WorkshopConfig(PretrainedConfig): | |
| model_type = "gpt2-workshop" | |
| attribute_map = {"num_hidden_layers": "num_layers"} | |
| def __init__( | |
| self, | |
| vocab_size=50304, | |
| hidden_dim=768, | |
| context_length=1024, | |
| num_heads=12, | |
| head_dim=64, | |
| num_layers=12, | |
| ffn_expansion=4, | |
| dropout=0.1, | |
| rope_theta=10000.0, | |
| logit_soft_cap=30.0, | |
| **kwargs, | |
| ): | |
| self.vocab_size = vocab_size | |
| self.hidden_dim = hidden_dim | |
| self.context_length = context_length | |
| self.num_heads = num_heads | |
| self.head_dim = head_dim | |
| self.num_layers = num_layers | |
| self.ffn_expansion = ffn_expansion | |
| self.dropout = dropout | |
| self.rope_theta = rope_theta | |
| self.logit_soft_cap = logit_soft_cap | |
| super().__init__(**kwargs) |