broadfield's picture
Create scratch-model - 13.5M parameters
10942ba verified
{
"model_type": "gpt2",
"architectures": [
"ScratchTransformer"
],
"_scratch_model": true,
"vocab_size": 8000,
"n_embd": 256,
"n_layer": 16,
"n_head": 16,
"n_inner": 512,
"n_positions": 4096,
"resid_pdrop": 0.1,
"embd_pdrop": 0.1,
"attn_pdrop": 0.1,
"d_model": 256,
"n_layers": 16,
"n_heads": 16,
"d_ff": 512,
"max_seq_len": 4096,
"dropout": 0.1,
"name": "scratch-model"
}