{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.722689075630252, "global_step": 800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 9.915966386554623e-05, "loss": 0.5936, "step": 10 }, { "epoch": 0.17, "learning_rate": 9.831932773109243e-05, "loss": 0.4777, "step": 20 }, { "epoch": 0.25, "learning_rate": 9.747899159663865e-05, "loss": 0.451, "step": 30 }, { "epoch": 0.34, "learning_rate": 9.663865546218487e-05, "loss": 0.4346, "step": 40 }, { "epoch": 0.42, "learning_rate": 9.579831932773111e-05, "loss": 0.4295, "step": 50 }, { "epoch": 0.5, "learning_rate": 9.495798319327731e-05, "loss": 0.4145, "step": 60 }, { "epoch": 0.59, "learning_rate": 9.411764705882353e-05, "loss": 0.4026, "step": 70 }, { "epoch": 0.67, "learning_rate": 9.327731092436976e-05, "loss": 0.3883, "step": 80 }, { "epoch": 0.76, "learning_rate": 9.243697478991598e-05, "loss": 0.3932, "step": 90 }, { "epoch": 0.84, "learning_rate": 9.159663865546218e-05, "loss": 0.3789, "step": 100 }, { "epoch": 0.84, "eval_loss": 0.37311611553636664, "eval_runtime": 3.6115, "eval_samples_per_second": 118.51, "eval_steps_per_second": 14.952, "step": 100 }, { "epoch": 0.92, "learning_rate": 9.07563025210084e-05, "loss": 0.3752, "step": 110 }, { "epoch": 1.01, "learning_rate": 8.991596638655462e-05, "loss": 0.3481, "step": 120 }, { "epoch": 1.09, "learning_rate": 8.907563025210084e-05, "loss": 0.3529, "step": 130 }, { "epoch": 1.18, "learning_rate": 8.823529411764706e-05, "loss": 0.3209, "step": 140 }, { "epoch": 1.26, "learning_rate": 8.739495798319329e-05, "loss": 0.3122, "step": 150 }, { "epoch": 1.34, "learning_rate": 8.65546218487395e-05, "loss": 0.3113, "step": 160 }, { "epoch": 1.43, "learning_rate": 8.571428571428571e-05, "loss": 0.3184, "step": 170 }, { "epoch": 1.51, "learning_rate": 8.487394957983193e-05, "loss": 0.3271, "step": 180 }, { "epoch": 1.6, "learning_rate": 8.403361344537815e-05, "loss": 0.3135, "step": 190 }, { "epoch": 1.68, "learning_rate": 8.319327731092437e-05, "loss": 0.3021, "step": 200 }, { "epoch": 1.68, "eval_loss": 0.314563896388651, "eval_runtime": 4.1203, "eval_samples_per_second": 103.876, "eval_steps_per_second": 13.106, "step": 200 }, { "epoch": 1.76, "learning_rate": 8.23529411764706e-05, "loss": 0.304, "step": 210 }, { "epoch": 1.85, "learning_rate": 8.151260504201682e-05, "loss": 0.3037, "step": 220 }, { "epoch": 1.93, "learning_rate": 8.067226890756304e-05, "loss": 0.2985, "step": 230 }, { "epoch": 2.02, "learning_rate": 7.983193277310926e-05, "loss": 0.2855, "step": 240 }, { "epoch": 2.1, "learning_rate": 7.899159663865546e-05, "loss": 0.2632, "step": 250 }, { "epoch": 2.18, "learning_rate": 7.815126050420168e-05, "loss": 0.2544, "step": 260 }, { "epoch": 2.27, "learning_rate": 7.73109243697479e-05, "loss": 0.256, "step": 270 }, { "epoch": 2.35, "learning_rate": 7.647058823529411e-05, "loss": 0.2563, "step": 280 }, { "epoch": 2.44, "learning_rate": 7.563025210084033e-05, "loss": 0.2595, "step": 290 }, { "epoch": 2.52, "learning_rate": 7.478991596638657e-05, "loss": 0.2623, "step": 300 }, { "epoch": 2.52, "eval_loss": 0.28808101955931803, "eval_runtime": 3.6745, "eval_samples_per_second": 116.478, "eval_steps_per_second": 14.696, "step": 300 }, { "epoch": 2.61, "learning_rate": 7.394957983193279e-05, "loss": 0.2556, "step": 310 }, { "epoch": 2.69, "learning_rate": 7.310924369747899e-05, "loss": 0.2405, "step": 320 }, { "epoch": 2.77, "learning_rate": 7.226890756302521e-05, "loss": 0.2423, "step": 330 }, { "epoch": 2.86, "learning_rate": 7.142857142857143e-05, "loss": 0.2425, "step": 340 }, { "epoch": 2.94, "learning_rate": 7.058823529411765e-05, "loss": 0.2483, "step": 350 }, { "epoch": 3.03, "learning_rate": 6.974789915966386e-05, "loss": 0.2377, "step": 360 }, { "epoch": 3.11, "learning_rate": 6.890756302521008e-05, "loss": 0.217, "step": 370 }, { "epoch": 3.19, "learning_rate": 6.80672268907563e-05, "loss": 0.2078, "step": 380 }, { "epoch": 3.28, "learning_rate": 6.722689075630254e-05, "loss": 0.218, "step": 390 }, { "epoch": 3.36, "learning_rate": 6.638655462184874e-05, "loss": 0.2057, "step": 400 }, { "epoch": 3.36, "eval_loss": 0.26705644754910507, "eval_runtime": 3.8185, "eval_samples_per_second": 112.085, "eval_steps_per_second": 14.142, "step": 400 }, { "epoch": 3.45, "learning_rate": 6.554621848739496e-05, "loss": 0.2105, "step": 410 }, { "epoch": 3.53, "learning_rate": 6.470588235294118e-05, "loss": 0.2122, "step": 420 }, { "epoch": 3.61, "learning_rate": 6.386554621848739e-05, "loss": 0.2009, "step": 430 }, { "epoch": 3.7, "learning_rate": 6.302521008403361e-05, "loss": 0.2134, "step": 440 }, { "epoch": 3.78, "learning_rate": 6.218487394957983e-05, "loss": 0.1925, "step": 450 }, { "epoch": 3.87, "learning_rate": 6.134453781512605e-05, "loss": 0.2059, "step": 460 }, { "epoch": 3.95, "learning_rate": 6.0504201680672267e-05, "loss": 0.2044, "step": 470 }, { "epoch": 4.03, "learning_rate": 5.966386554621849e-05, "loss": 0.1894, "step": 480 }, { "epoch": 4.12, "learning_rate": 5.882352941176471e-05, "loss": 0.1803, "step": 490 }, { "epoch": 4.2, "learning_rate": 5.7983193277310935e-05, "loss": 0.1662, "step": 500 }, { "epoch": 4.2, "eval_loss": 0.2607619765164575, "eval_runtime": 3.6275, "eval_samples_per_second": 117.987, "eval_steps_per_second": 14.886, "step": 500 }, { "epoch": 4.29, "learning_rate": 5.714285714285714e-05, "loss": 0.1697, "step": 510 }, { "epoch": 4.37, "learning_rate": 5.630252100840336e-05, "loss": 0.178, "step": 520 }, { "epoch": 4.45, "learning_rate": 5.546218487394958e-05, "loss": 0.1715, "step": 530 }, { "epoch": 4.54, "learning_rate": 5.4621848739495796e-05, "loss": 0.1543, "step": 540 }, { "epoch": 4.62, "learning_rate": 5.378151260504202e-05, "loss": 0.1642, "step": 550 }, { "epoch": 4.71, "learning_rate": 5.294117647058824e-05, "loss": 0.1588, "step": 560 }, { "epoch": 4.79, "learning_rate": 5.210084033613446e-05, "loss": 0.1637, "step": 570 }, { "epoch": 4.87, "learning_rate": 5.126050420168067e-05, "loss": 0.1586, "step": 580 }, { "epoch": 4.96, "learning_rate": 5.042016806722689e-05, "loss": 0.1597, "step": 590 }, { "epoch": 5.04, "learning_rate": 4.957983193277311e-05, "loss": 0.1499, "step": 600 }, { "epoch": 5.04, "eval_loss": 0.2822469831651042, "eval_runtime": 4.0436, "eval_samples_per_second": 105.845, "eval_steps_per_second": 13.354, "step": 600 }, { "epoch": 5.13, "learning_rate": 4.8739495798319326e-05, "loss": 0.13, "step": 610 }, { "epoch": 5.21, "learning_rate": 4.7899159663865554e-05, "loss": 0.1319, "step": 620 }, { "epoch": 5.29, "learning_rate": 4.705882352941177e-05, "loss": 0.1256, "step": 630 }, { "epoch": 5.38, "learning_rate": 4.621848739495799e-05, "loss": 0.1339, "step": 640 }, { "epoch": 5.46, "learning_rate": 4.53781512605042e-05, "loss": 0.1216, "step": 650 }, { "epoch": 5.55, "learning_rate": 4.453781512605042e-05, "loss": 0.1236, "step": 660 }, { "epoch": 5.63, "learning_rate": 4.369747899159664e-05, "loss": 0.1304, "step": 670 }, { "epoch": 5.71, "learning_rate": 4.2857142857142856e-05, "loss": 0.1163, "step": 680 }, { "epoch": 5.8, "learning_rate": 4.201680672268908e-05, "loss": 0.1099, "step": 690 }, { "epoch": 5.88, "learning_rate": 4.11764705882353e-05, "loss": 0.1129, "step": 700 }, { "epoch": 5.88, "eval_loss": 0.258842878695795, "eval_runtime": 3.7037, "eval_samples_per_second": 115.559, "eval_steps_per_second": 14.58, "step": 700 }, { "epoch": 5.97, "learning_rate": 4.033613445378152e-05, "loss": 0.1264, "step": 710 }, { "epoch": 6.05, "learning_rate": 3.949579831932773e-05, "loss": 0.1137, "step": 720 }, { "epoch": 6.13, "learning_rate": 3.865546218487395e-05, "loss": 0.0875, "step": 730 }, { "epoch": 6.22, "learning_rate": 3.7815126050420166e-05, "loss": 0.0894, "step": 740 }, { "epoch": 6.3, "learning_rate": 3.697478991596639e-05, "loss": 0.09, "step": 750 }, { "epoch": 6.39, "learning_rate": 3.613445378151261e-05, "loss": 0.0957, "step": 760 }, { "epoch": 6.47, "learning_rate": 3.529411764705883e-05, "loss": 0.0906, "step": 770 }, { "epoch": 6.55, "learning_rate": 3.445378151260504e-05, "loss": 0.0843, "step": 780 }, { "epoch": 6.64, "learning_rate": 3.361344537815127e-05, "loss": 0.082, "step": 790 }, { "epoch": 6.72, "learning_rate": 3.277310924369748e-05, "loss": 0.0918, "step": 800 }, { "epoch": 6.72, "eval_loss": 0.25530371967056836, "eval_runtime": 4.1287, "eval_samples_per_second": 103.666, "eval_steps_per_second": 13.079, "step": 800 } ], "max_steps": 1190, "num_train_epochs": 10, "total_flos": 6.810484334054769e+17, "trial_name": null, "trial_params": null }