| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.0, | |
| "global_step": 25284, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9011232399936727e-05, | |
| "loss": 3.3688, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.802246479987344e-05, | |
| "loss": 3.2716, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.703369719981016e-05, | |
| "loss": 3.2238, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.604492959974688e-05, | |
| "loss": 3.2109, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.5056161999683596e-05, | |
| "loss": 3.1767, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.406739439962031e-05, | |
| "loss": 3.1522, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.307862679955704e-05, | |
| "loss": 3.1347, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.2089859199493755e-05, | |
| "loss": 3.129, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.110109159943047e-05, | |
| "loss": 3.0598, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.011232399936719e-05, | |
| "loss": 3.0041, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.9123556399303914e-05, | |
| "loss": 3.0206, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 3.813478879924063e-05, | |
| "loss": 3.0094, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 3.714602119917735e-05, | |
| "loss": 3.0046, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.6157253599114066e-05, | |
| "loss": 3.0031, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 3.516848599905079e-05, | |
| "loss": 2.9904, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 3.41797183989875e-05, | |
| "loss": 2.9906, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 3.3190950798924225e-05, | |
| "loss": 2.9812, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.220218319886094e-05, | |
| "loss": 2.8911, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3.1213415598797666e-05, | |
| "loss": 2.9061, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 3.022464799873438e-05, | |
| "loss": 2.9192, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 2.92358803986711e-05, | |
| "loss": 2.9018, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 2.8247112798607815e-05, | |
| "loss": 2.9167, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 2.725834519854454e-05, | |
| "loss": 2.923, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.6269577598481253e-05, | |
| "loss": 2.8988, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 2.5280809998417977e-05, | |
| "loss": 2.8976, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 2.429204239835469e-05, | |
| "loss": 2.8552, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 2.3303274798291412e-05, | |
| "loss": 2.8315, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 2.231450719822813e-05, | |
| "loss": 2.8407, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 2.1325739598164847e-05, | |
| "loss": 2.8406, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 2.0336971998101567e-05, | |
| "loss": 2.8386, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 1.9348204398038285e-05, | |
| "loss": 2.8402, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 1.8359436797975006e-05, | |
| "loss": 2.8409, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 1.7370669197911723e-05, | |
| "loss": 2.8446, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 1.6381901597848444e-05, | |
| "loss": 2.8165, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 1.539313399778516e-05, | |
| "loss": 2.7863, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 1.440436639772188e-05, | |
| "loss": 2.7812, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 1.3415598797658599e-05, | |
| "loss": 2.7759, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 1.2426831197595318e-05, | |
| "loss": 2.7893, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 1.1438063597532037e-05, | |
| "loss": 2.8047, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 1.0449295997468755e-05, | |
| "loss": 2.7915, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 9.460528397405474e-06, | |
| "loss": 2.8009, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 8.471760797342193e-06, | |
| "loss": 2.7765, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 7.482993197278912e-06, | |
| "loss": 2.7556, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 6.494225597215631e-06, | |
| "loss": 2.7601, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 5.50545799715235e-06, | |
| "loss": 2.7608, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 4.516690397089068e-06, | |
| "loss": 2.7527, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 3.5279227970257872e-06, | |
| "loss": 2.7481, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 2.539155196962506e-06, | |
| "loss": 2.7657, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 1.550387596899225e-06, | |
| "loss": 2.7518, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 5.616199968359437e-07, | |
| "loss": 2.7425, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "step": 25284, | |
| "total_flos": 1.3213015474176e+16, | |
| "train_loss": 2.915600132591418, | |
| "train_runtime": 6526.9461, | |
| "train_samples_per_second": 3.874, | |
| "train_steps_per_second": 3.874 | |
| } | |
| ], | |
| "max_steps": 25284, | |
| "num_train_epochs": 6, | |
| "total_flos": 1.3213015474176e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |