{ "vocab_size": 50000, "context_length": 512, "num_layers": 8, "num_classes": 2, "attn_pdrop": 0.1, "d_ff": 2048, "d_model": 512, "num_heads": 8, "residual_pdrop": 0.1 }