| from transformers import PretrainedConfig | |
| class SimpleStories4MConfig(PretrainedConfig): | |
| model_type = "simple_stories_4m" | |
| def __init__( | |
| self, | |
| vocab_size: int = 2048, | |
| block_size: int = 1080, | |
| n_embed: int = 256, | |
| n_heads: int = 2, | |
| n_layers: int = 4, | |
| dropout: float = 0.1, | |
| **kwargs | |
| ): | |
| self.vocab_size = vocab_size | |
| self.block_size = block_size | |
| self.n_embed = n_embed | |
| self.n_heads = n_heads | |
| self.n_layers = n_layers | |
| self.dropout = dropout | |
| super().__init__(**kwargs) |