| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.722689075630252, | |
| "global_step": 800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.915966386554623e-05, | |
| "loss": 0.5936, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 9.831932773109243e-05, | |
| "loss": 0.4777, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 9.747899159663865e-05, | |
| "loss": 0.451, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 9.663865546218487e-05, | |
| "loss": 0.4346, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 9.579831932773111e-05, | |
| "loss": 0.4295, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 9.495798319327731e-05, | |
| "loss": 0.4145, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 9.411764705882353e-05, | |
| "loss": 0.4026, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.327731092436976e-05, | |
| "loss": 0.3883, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 9.243697478991598e-05, | |
| "loss": 0.3932, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 9.159663865546218e-05, | |
| "loss": 0.3789, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_loss": 0.37311611553636664, | |
| "eval_runtime": 3.6115, | |
| "eval_samples_per_second": 118.51, | |
| "eval_steps_per_second": 14.952, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 9.07563025210084e-05, | |
| "loss": 0.3752, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 8.991596638655462e-05, | |
| "loss": 0.3481, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 8.907563025210084e-05, | |
| "loss": 0.3529, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 8.823529411764706e-05, | |
| "loss": 0.3209, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 8.739495798319329e-05, | |
| "loss": 0.3122, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 8.65546218487395e-05, | |
| "loss": 0.3113, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 8.571428571428571e-05, | |
| "loss": 0.3184, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 8.487394957983193e-05, | |
| "loss": 0.3271, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 8.403361344537815e-05, | |
| "loss": 0.3135, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 8.319327731092437e-05, | |
| "loss": 0.3021, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_loss": 0.314563896388651, | |
| "eval_runtime": 4.1203, | |
| "eval_samples_per_second": 103.876, | |
| "eval_steps_per_second": 13.106, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 8.23529411764706e-05, | |
| "loss": 0.304, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 8.151260504201682e-05, | |
| "loss": 0.3037, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 8.067226890756304e-05, | |
| "loss": 0.2985, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 7.983193277310926e-05, | |
| "loss": 0.2855, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 7.899159663865546e-05, | |
| "loss": 0.2632, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 7.815126050420168e-05, | |
| "loss": 0.2544, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 7.73109243697479e-05, | |
| "loss": 0.256, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 7.647058823529411e-05, | |
| "loss": 0.2563, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 7.563025210084033e-05, | |
| "loss": 0.2595, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 7.478991596638657e-05, | |
| "loss": 0.2623, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "eval_loss": 0.28808101955931803, | |
| "eval_runtime": 3.6745, | |
| "eval_samples_per_second": 116.478, | |
| "eval_steps_per_second": 14.696, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 7.394957983193279e-05, | |
| "loss": 0.2556, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 7.310924369747899e-05, | |
| "loss": 0.2405, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 7.226890756302521e-05, | |
| "loss": 0.2423, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 7.142857142857143e-05, | |
| "loss": 0.2425, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 7.058823529411765e-05, | |
| "loss": 0.2483, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 6.974789915966386e-05, | |
| "loss": 0.2377, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 6.890756302521008e-05, | |
| "loss": 0.217, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 6.80672268907563e-05, | |
| "loss": 0.2078, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 6.722689075630254e-05, | |
| "loss": 0.218, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 6.638655462184874e-05, | |
| "loss": 0.2057, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "eval_loss": 0.26705644754910507, | |
| "eval_runtime": 3.8185, | |
| "eval_samples_per_second": 112.085, | |
| "eval_steps_per_second": 14.142, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 6.554621848739496e-05, | |
| "loss": 0.2105, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 6.470588235294118e-05, | |
| "loss": 0.2122, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 6.386554621848739e-05, | |
| "loss": 0.2009, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 6.302521008403361e-05, | |
| "loss": 0.2134, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 6.218487394957983e-05, | |
| "loss": 0.1925, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 6.134453781512605e-05, | |
| "loss": 0.2059, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 6.0504201680672267e-05, | |
| "loss": 0.2044, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 5.966386554621849e-05, | |
| "loss": 0.1894, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 5.882352941176471e-05, | |
| "loss": 0.1803, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 5.7983193277310935e-05, | |
| "loss": 0.1662, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "eval_loss": 0.2607619765164575, | |
| "eval_runtime": 3.6275, | |
| "eval_samples_per_second": 117.987, | |
| "eval_steps_per_second": 14.886, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 5.714285714285714e-05, | |
| "loss": 0.1697, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 5.630252100840336e-05, | |
| "loss": 0.178, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 5.546218487394958e-05, | |
| "loss": 0.1715, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 5.4621848739495796e-05, | |
| "loss": 0.1543, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 5.378151260504202e-05, | |
| "loss": 0.1642, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 5.294117647058824e-05, | |
| "loss": 0.1588, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 5.210084033613446e-05, | |
| "loss": 0.1637, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 5.126050420168067e-05, | |
| "loss": 0.1586, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 5.042016806722689e-05, | |
| "loss": 0.1597, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 4.957983193277311e-05, | |
| "loss": 0.1499, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "eval_loss": 0.2822469831651042, | |
| "eval_runtime": 4.0436, | |
| "eval_samples_per_second": 105.845, | |
| "eval_steps_per_second": 13.354, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 4.8739495798319326e-05, | |
| "loss": 0.13, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 4.7899159663865554e-05, | |
| "loss": 0.1319, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 4.705882352941177e-05, | |
| "loss": 0.1256, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 4.621848739495799e-05, | |
| "loss": 0.1339, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 4.53781512605042e-05, | |
| "loss": 0.1216, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 4.453781512605042e-05, | |
| "loss": 0.1236, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 4.369747899159664e-05, | |
| "loss": 0.1304, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 4.2857142857142856e-05, | |
| "loss": 0.1163, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 4.201680672268908e-05, | |
| "loss": 0.1099, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 4.11764705882353e-05, | |
| "loss": 0.1129, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "eval_loss": 0.258842878695795, | |
| "eval_runtime": 3.7037, | |
| "eval_samples_per_second": 115.559, | |
| "eval_steps_per_second": 14.58, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 4.033613445378152e-05, | |
| "loss": 0.1264, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 3.949579831932773e-05, | |
| "loss": 0.1137, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 3.865546218487395e-05, | |
| "loss": 0.0875, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 3.7815126050420166e-05, | |
| "loss": 0.0894, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 3.697478991596639e-05, | |
| "loss": 0.09, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 3.613445378151261e-05, | |
| "loss": 0.0957, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "learning_rate": 3.529411764705883e-05, | |
| "loss": 0.0906, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 3.445378151260504e-05, | |
| "loss": 0.0843, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 3.361344537815127e-05, | |
| "loss": 0.082, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 3.277310924369748e-05, | |
| "loss": 0.0918, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "eval_loss": 0.25530371967056836, | |
| "eval_runtime": 4.1287, | |
| "eval_samples_per_second": 103.666, | |
| "eval_steps_per_second": 13.079, | |
| "step": 800 | |
| } | |
| ], | |
| "max_steps": 1190, | |
| "num_train_epochs": 10, | |
| "total_flos": 6.810484334054769e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |