broadfield's picture
Create scratch-model - 13.5M parameters
5238d08 verified
{
"model_type": "scratch_transformer",
"architectures": [
"ScratchTransformer"
],
"vocab_size": 8000,
"d_model": 256,
"n_layers": 16,
"n_heads": 16,
"d_ff": 512,
"max_seq_len": 4096,
"dropout": 0.1,
"name": "scratch-model"
}