| { | |
| "best_metric": 3.029510498046875, | |
| "best_model_checkpoint": "trained_models/microsoftDialoGPTmedium_crd3/checkpoint-9912", | |
| "epoch": 2.957040572792363, | |
| "eval_steps": 168, | |
| "global_step": 9912, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.949880668257757e-05, | |
| "loss": 3.805, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 3.3926753997802734, | |
| "eval_runtime": 142.7844, | |
| "eval_samples_per_second": 29.653, | |
| "eval_steps_per_second": 3.712, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.899761336515513e-05, | |
| "loss": 3.3486, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 3.2977957725524902, | |
| "eval_runtime": 142.7066, | |
| "eval_samples_per_second": 29.669, | |
| "eval_steps_per_second": 3.714, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.84964200477327e-05, | |
| "loss": 3.2539, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 3.2512104511260986, | |
| "eval_runtime": 142.7113, | |
| "eval_samples_per_second": 29.668, | |
| "eval_steps_per_second": 3.714, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.7995226730310264e-05, | |
| "loss": 3.2009, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 3.221975564956665, | |
| "eval_runtime": 142.7296, | |
| "eval_samples_per_second": 29.664, | |
| "eval_steps_per_second": 3.713, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.749403341288783e-05, | |
| "loss": 3.1685, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_loss": 3.197237014770508, | |
| "eval_runtime": 142.735, | |
| "eval_samples_per_second": 29.663, | |
| "eval_steps_per_second": 3.713, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.6992840095465395e-05, | |
| "loss": 3.1332, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_loss": 3.1779494285583496, | |
| "eval_runtime": 142.7443, | |
| "eval_samples_per_second": 29.661, | |
| "eval_steps_per_second": 3.713, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.649164677804296e-05, | |
| "loss": 3.1231, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_loss": 3.163996696472168, | |
| "eval_runtime": 142.7827, | |
| "eval_samples_per_second": 29.653, | |
| "eval_steps_per_second": 3.712, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.5990453460620526e-05, | |
| "loss": 3.1005, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 3.1513357162475586, | |
| "eval_runtime": 142.6836, | |
| "eval_samples_per_second": 29.674, | |
| "eval_steps_per_second": 3.715, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.548926014319809e-05, | |
| "loss": 3.0952, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_loss": 3.1407930850982666, | |
| "eval_runtime": 142.6804, | |
| "eval_samples_per_second": 29.675, | |
| "eval_steps_per_second": 3.715, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.498806682577566e-05, | |
| "loss": 3.0903, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_loss": 3.1297414302825928, | |
| "eval_runtime": 142.7084, | |
| "eval_samples_per_second": 29.669, | |
| "eval_steps_per_second": 3.714, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.448687350835322e-05, | |
| "loss": 3.0737, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_loss": 3.120724678039551, | |
| "eval_runtime": 142.7344, | |
| "eval_samples_per_second": 29.663, | |
| "eval_steps_per_second": 3.713, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.398568019093079e-05, | |
| "loss": 3.0681, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_loss": 3.1170568466186523, | |
| "eval_runtime": 142.7306, | |
| "eval_samples_per_second": 29.664, | |
| "eval_steps_per_second": 3.713, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.348448687350836e-05, | |
| "loss": 3.034, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_loss": 3.1076815128326416, | |
| "eval_runtime": 142.7258, | |
| "eval_samples_per_second": 29.665, | |
| "eval_steps_per_second": 3.713, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.298329355608592e-05, | |
| "loss": 3.0319, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_loss": 3.1014297008514404, | |
| "eval_runtime": 142.7172, | |
| "eval_samples_per_second": 29.667, | |
| "eval_steps_per_second": 3.714, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.2482100238663484e-05, | |
| "loss": 3.0075, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_loss": 3.0964250564575195, | |
| "eval_runtime": 142.7047, | |
| "eval_samples_per_second": 29.67, | |
| "eval_steps_per_second": 3.714, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.1980906921241056e-05, | |
| "loss": 3.0282, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 3.0913186073303223, | |
| "eval_runtime": 142.7245, | |
| "eval_samples_per_second": 29.666, | |
| "eval_steps_per_second": 3.713, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.1479713603818615e-05, | |
| "loss": 3.0055, | |
| "step": 2856 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_loss": 3.0834176540374756, | |
| "eval_runtime": 142.7795, | |
| "eval_samples_per_second": 29.654, | |
| "eval_steps_per_second": 3.712, | |
| "step": 2856 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.097852028639618e-05, | |
| "loss": 3.0101, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_loss": 3.0793018341064453, | |
| "eval_runtime": 142.7762, | |
| "eval_samples_per_second": 29.655, | |
| "eval_steps_per_second": 3.712, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.047732696897375e-05, | |
| "loss": 3.0017, | |
| "step": 3192 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_loss": 3.075371742248535, | |
| "eval_runtime": 142.7297, | |
| "eval_samples_per_second": 29.664, | |
| "eval_steps_per_second": 3.713, | |
| "step": 3192 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.997613365155131e-05, | |
| "loss": 2.977, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 3.0743861198425293, | |
| "eval_runtime": 142.7454, | |
| "eval_samples_per_second": 29.661, | |
| "eval_steps_per_second": 3.713, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.9474940334128877e-05, | |
| "loss": 2.8771, | |
| "step": 3528 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_loss": 3.0757055282592773, | |
| "eval_runtime": 142.7589, | |
| "eval_samples_per_second": 29.658, | |
| "eval_steps_per_second": 3.713, | |
| "step": 3528 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.897374701670645e-05, | |
| "loss": 2.8633, | |
| "step": 3696 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_loss": 3.0775437355041504, | |
| "eval_runtime": 142.7329, | |
| "eval_samples_per_second": 29.664, | |
| "eval_steps_per_second": 3.713, | |
| "step": 3696 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.8472553699284014e-05, | |
| "loss": 2.8591, | |
| "step": 3864 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "eval_loss": 3.0708892345428467, | |
| "eval_runtime": 142.7479, | |
| "eval_samples_per_second": 29.661, | |
| "eval_steps_per_second": 3.713, | |
| "step": 3864 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.797136038186157e-05, | |
| "loss": 2.8625, | |
| "step": 4032 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_loss": 3.0683975219726562, | |
| "eval_runtime": 142.757, | |
| "eval_samples_per_second": 29.659, | |
| "eval_steps_per_second": 3.713, | |
| "step": 4032 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.7470167064439145e-05, | |
| "loss": 2.8605, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "eval_loss": 3.066983222961426, | |
| "eval_runtime": 142.718, | |
| "eval_samples_per_second": 29.667, | |
| "eval_steps_per_second": 3.714, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.696897374701671e-05, | |
| "loss": 2.8466, | |
| "step": 4368 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_loss": 3.0646440982818604, | |
| "eval_runtime": 142.755, | |
| "eval_samples_per_second": 29.659, | |
| "eval_steps_per_second": 3.713, | |
| "step": 4368 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.6467780429594276e-05, | |
| "loss": 2.8398, | |
| "step": 4536 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "eval_loss": 3.0618984699249268, | |
| "eval_runtime": 142.7437, | |
| "eval_samples_per_second": 29.662, | |
| "eval_steps_per_second": 3.713, | |
| "step": 4536 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 3.596658711217184e-05, | |
| "loss": 2.8502, | |
| "step": 4704 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_loss": 3.059511661529541, | |
| "eval_runtime": 142.746, | |
| "eval_samples_per_second": 29.661, | |
| "eval_steps_per_second": 3.713, | |
| "step": 4704 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.546539379474941e-05, | |
| "loss": 2.8523, | |
| "step": 4872 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_loss": 3.0564985275268555, | |
| "eval_runtime": 142.7041, | |
| "eval_samples_per_second": 29.67, | |
| "eval_steps_per_second": 3.714, | |
| "step": 4872 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.496420047732697e-05, | |
| "loss": 2.8545, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_loss": 3.0538723468780518, | |
| "eval_runtime": 142.7089, | |
| "eval_samples_per_second": 29.669, | |
| "eval_steps_per_second": 3.714, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 3.446300715990454e-05, | |
| "loss": 2.8431, | |
| "step": 5208 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_loss": 3.053504705429077, | |
| "eval_runtime": 142.7279, | |
| "eval_samples_per_second": 29.665, | |
| "eval_steps_per_second": 3.713, | |
| "step": 5208 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 3.39618138424821e-05, | |
| "loss": 2.865, | |
| "step": 5376 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_loss": 3.0489370822906494, | |
| "eval_runtime": 142.7484, | |
| "eval_samples_per_second": 29.661, | |
| "eval_steps_per_second": 3.713, | |
| "step": 5376 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 3.346062052505967e-05, | |
| "loss": 2.8412, | |
| "step": 5544 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "eval_loss": 3.048715114593506, | |
| "eval_runtime": 142.7028, | |
| "eval_samples_per_second": 29.67, | |
| "eval_steps_per_second": 3.714, | |
| "step": 5544 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 3.2959427207637234e-05, | |
| "loss": 2.8382, | |
| "step": 5712 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_loss": 3.0453927516937256, | |
| "eval_runtime": 142.6981, | |
| "eval_samples_per_second": 29.671, | |
| "eval_steps_per_second": 3.714, | |
| "step": 5712 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 3.24582338902148e-05, | |
| "loss": 2.8356, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "eval_loss": 3.0460968017578125, | |
| "eval_runtime": 142.6846, | |
| "eval_samples_per_second": 29.674, | |
| "eval_steps_per_second": 3.714, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 3.1957040572792365e-05, | |
| "loss": 2.8265, | |
| "step": 6048 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_loss": 3.042219400405884, | |
| "eval_runtime": 142.7165, | |
| "eval_samples_per_second": 29.667, | |
| "eval_steps_per_second": 3.714, | |
| "step": 6048 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.145584725536993e-05, | |
| "loss": 2.8301, | |
| "step": 6216 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_loss": 3.0397562980651855, | |
| "eval_runtime": 142.7319, | |
| "eval_samples_per_second": 29.664, | |
| "eval_steps_per_second": 3.713, | |
| "step": 6216 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 3.0954653937947496e-05, | |
| "loss": 2.8286, | |
| "step": 6384 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_loss": 3.038638114929199, | |
| "eval_runtime": 142.7562, | |
| "eval_samples_per_second": 29.659, | |
| "eval_steps_per_second": 3.713, | |
| "step": 6384 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 3.045346062052506e-05, | |
| "loss": 2.8416, | |
| "step": 6552 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_loss": 3.035384178161621, | |
| "eval_runtime": 142.669, | |
| "eval_samples_per_second": 29.677, | |
| "eval_steps_per_second": 3.715, | |
| "step": 6552 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 2.9952267303102627e-05, | |
| "loss": 2.8179, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 3.040719509124756, | |
| "eval_runtime": 142.723, | |
| "eval_samples_per_second": 29.666, | |
| "eval_steps_per_second": 3.713, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 2.9451073985680195e-05, | |
| "loss": 2.6965, | |
| "step": 6888 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "eval_loss": 3.0478203296661377, | |
| "eval_runtime": 142.7498, | |
| "eval_samples_per_second": 29.66, | |
| "eval_steps_per_second": 3.713, | |
| "step": 6888 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 2.8949880668257757e-05, | |
| "loss": 2.7437, | |
| "step": 7056 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_loss": 3.048868179321289, | |
| "eval_runtime": 142.7646, | |
| "eval_samples_per_second": 29.657, | |
| "eval_steps_per_second": 3.712, | |
| "step": 7056 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 2.8448687350835323e-05, | |
| "loss": 2.7252, | |
| "step": 7224 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "eval_loss": 3.045444965362549, | |
| "eval_runtime": 142.7306, | |
| "eval_samples_per_second": 29.664, | |
| "eval_steps_per_second": 3.713, | |
| "step": 7224 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 2.794749403341289e-05, | |
| "loss": 2.718, | |
| "step": 7392 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "eval_loss": 3.04612398147583, | |
| "eval_runtime": 142.7393, | |
| "eval_samples_per_second": 29.662, | |
| "eval_steps_per_second": 3.713, | |
| "step": 7392 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 2.7446300715990454e-05, | |
| "loss": 2.7246, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "eval_loss": 3.0447964668273926, | |
| "eval_runtime": 142.6751, | |
| "eval_samples_per_second": 29.676, | |
| "eval_steps_per_second": 3.715, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 2.694510739856802e-05, | |
| "loss": 2.7365, | |
| "step": 7728 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "eval_loss": 3.0433876514434814, | |
| "eval_runtime": 142.7111, | |
| "eval_samples_per_second": 29.668, | |
| "eval_steps_per_second": 3.714, | |
| "step": 7728 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 2.6443914081145588e-05, | |
| "loss": 2.7242, | |
| "step": 7896 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_loss": 3.044072151184082, | |
| "eval_runtime": 142.6648, | |
| "eval_samples_per_second": 29.678, | |
| "eval_steps_per_second": 3.715, | |
| "step": 7896 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 2.594272076372315e-05, | |
| "loss": 2.7343, | |
| "step": 8064 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "eval_loss": 3.0415401458740234, | |
| "eval_runtime": 142.737, | |
| "eval_samples_per_second": 29.663, | |
| "eval_steps_per_second": 3.713, | |
| "step": 8064 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 2.5441527446300715e-05, | |
| "loss": 2.7312, | |
| "step": 8232 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "eval_loss": 3.0406272411346436, | |
| "eval_runtime": 142.7448, | |
| "eval_samples_per_second": 29.661, | |
| "eval_steps_per_second": 3.713, | |
| "step": 8232 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 2.494033412887828e-05, | |
| "loss": 2.7359, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_loss": 3.0369086265563965, | |
| "eval_runtime": 142.71, | |
| "eval_samples_per_second": 29.669, | |
| "eval_steps_per_second": 3.714, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 2.443914081145585e-05, | |
| "loss": 2.714, | |
| "step": 8568 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_loss": 3.039921522140503, | |
| "eval_runtime": 142.7829, | |
| "eval_samples_per_second": 29.653, | |
| "eval_steps_per_second": 3.712, | |
| "step": 8568 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 2.3937947494033415e-05, | |
| "loss": 2.7311, | |
| "step": 8736 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "eval_loss": 3.0377085208892822, | |
| "eval_runtime": 142.7212, | |
| "eval_samples_per_second": 29.666, | |
| "eval_steps_per_second": 3.714, | |
| "step": 8736 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 2.3436754176610977e-05, | |
| "loss": 2.7191, | |
| "step": 8904 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_loss": 3.036844253540039, | |
| "eval_runtime": 142.7078, | |
| "eval_samples_per_second": 29.669, | |
| "eval_steps_per_second": 3.714, | |
| "step": 8904 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 2.2935560859188546e-05, | |
| "loss": 2.7246, | |
| "step": 9072 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_loss": 3.0361039638519287, | |
| "eval_runtime": 142.7607, | |
| "eval_samples_per_second": 29.658, | |
| "eval_steps_per_second": 3.713, | |
| "step": 9072 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 2.243436754176611e-05, | |
| "loss": 2.721, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "eval_loss": 3.0351145267486572, | |
| "eval_runtime": 142.7366, | |
| "eval_samples_per_second": 29.663, | |
| "eval_steps_per_second": 3.713, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 2.1933174224343677e-05, | |
| "loss": 2.71, | |
| "step": 9408 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "eval_loss": 3.032681941986084, | |
| "eval_runtime": 142.729, | |
| "eval_samples_per_second": 29.665, | |
| "eval_steps_per_second": 3.713, | |
| "step": 9408 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.1431980906921242e-05, | |
| "loss": 2.7252, | |
| "step": 9576 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "eval_loss": 3.033348798751831, | |
| "eval_runtime": 142.7408, | |
| "eval_samples_per_second": 29.662, | |
| "eval_steps_per_second": 3.713, | |
| "step": 9576 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 2.0930787589498808e-05, | |
| "loss": 2.7181, | |
| "step": 9744 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_loss": 3.0330820083618164, | |
| "eval_runtime": 142.7512, | |
| "eval_samples_per_second": 29.66, | |
| "eval_steps_per_second": 3.713, | |
| "step": 9744 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.0429594272076373e-05, | |
| "loss": 2.7141, | |
| "step": 9912 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_loss": 3.029510498046875, | |
| "eval_runtime": 142.7075, | |
| "eval_samples_per_second": 29.669, | |
| "eval_steps_per_second": 3.714, | |
| "step": 9912 | |
| } | |
| ], | |
| "logging_steps": 168, | |
| "max_steps": 16760, | |
| "num_train_epochs": 5, | |
| "save_steps": 168, | |
| "total_flos": 3.221291229039821e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |