{ "model_type": "gpt2", "architectures": [ "ScratchTransformer" ], "_scratch_model": true, "vocab_size": 8000, "n_embd": 256, "n_layer": 16, "n_head": 16, "n_inner": 512, "n_positions": 4096, "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "d_model": 256, "n_layers": 16, "n_heads": 16, "d_ff": 512, "max_seq_len": 4096, "dropout": 0.1, "name": "scratch-model" }