| { | |
| "batch_size": 8, | |
| "bias": true, | |
| "block_size": 2048, | |
| "dropout": 0.1, | |
| "init_type": "load_pretrained", | |
| "local_files_only": true, | |
| "n_embd": 1536, | |
| "n_head": 8, | |
| "n_layer": 6, | |
| "pretrained_model_path": "/home/user/rugpt/ckpt_4000.pt", | |
| "vocab_size": 50257 | |
| } |
| { | |
| "batch_size": 8, | |
| "bias": true, | |
| "block_size": 2048, | |
| "dropout": 0.1, | |
| "init_type": "load_pretrained", | |
| "local_files_only": true, | |
| "n_embd": 1536, | |
| "n_head": 8, | |
| "n_layer": 6, | |
| "pretrained_model_path": "/home/user/rugpt/ckpt_4000.pt", | |
| "vocab_size": 50257 | |
| } |