{ "train_runtime": 33114.4358, "train_samples_per_second": 0.483, "train_steps_per_second": 0.03, "total_flos": 3.673093344067584e+17, "train_loss": 2.038690024614334, "epoch": 0.4, "train_perplexity": 7.680541285926508 }