{ "architectures": [ "BertForMaskedLM" ], "model_type": "bert", "block_size": 32, "n_embd": 64, "n_head": 4, "n_layer": 2, "vocab_size": 32000 }