{ "vocab_size": 8000, "d_model": 256, "n_layers": 16, "n_heads": 16, "max_seq_len": 4096, "dropout": 0.1, "model_type": "scratch_transformer", "architectures": [ "ScratchTransformer" ] }