{ "d_model": 128, "nhead": 4, "num_layers": 4, "vocab_size": 257, "avg_loss": 0.04948290410004556, "avg_bpb": 0.07138874035391997 }