| | from transformers import PretrainedConfig
|
| |
|
| | repo_name = "BeardedMonster/SabiYarn-125M"
|
| |
|
| | class GPTJXConfig(PretrainedConfig):
|
| | model_type="nanogpt-j"
|
| |
|
| |
|
| | def __init__(self,
|
| | block_size: int = 1024,
|
| | vocab_size: int = 52050,
|
| | n_layer: int = 12,
|
| | n_head: int = 12,
|
| | n_embd: int = 768,
|
| | dropout: float = 0.0,
|
| | bias: bool = False,
|
| | **kwargs
|
| | ):
|
| |
|
| | self.block_size = block_size
|
| | self.vocab_size = vocab_size
|
| | self.n_layer = n_layer
|
| | self.n_head = n_head
|
| | self.n_embd = n_embd
|
| | self.dropout = dropout
|
| | self.bias = bias
|
| |
|
| | super().__init__(**kwargs)
|
| |
|
| | |