| { | |
| "best_metric": 2.5792043209075928, | |
| "best_model_checkpoint": "./clip-roberta-finetuned/checkpoint-48000", | |
| "epoch": 10.0, | |
| "global_step": 68710, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.963760733517683e-05, | |
| "loss": 2.9841, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 3.411221504211426, | |
| "eval_runtime": 218.2214, | |
| "eval_samples_per_second": 447.83, | |
| "eval_steps_per_second": 1.751, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.927521467035366e-05, | |
| "loss": 2.72, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 3.3430113792419434, | |
| "eval_runtime": 214.9656, | |
| "eval_samples_per_second": 454.612, | |
| "eval_steps_per_second": 1.777, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.891209430941639e-05, | |
| "loss": 2.6319, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 3.2295451164245605, | |
| "eval_runtime": 250.7246, | |
| "eval_samples_per_second": 389.774, | |
| "eval_steps_per_second": 1.524, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.854824625236501e-05, | |
| "loss": 2.5781, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_loss": 3.1644504070281982, | |
| "eval_runtime": 249.2113, | |
| "eval_samples_per_second": 392.141, | |
| "eval_steps_per_second": 1.533, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.818439819531364e-05, | |
| "loss": 2.5339, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_loss": 3.1226284503936768, | |
| "eval_runtime": 249.7319, | |
| "eval_samples_per_second": 391.324, | |
| "eval_steps_per_second": 1.53, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.782055013826226e-05, | |
| "loss": 2.503, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_loss": 3.0856029987335205, | |
| "eval_runtime": 260.6356, | |
| "eval_samples_per_second": 374.953, | |
| "eval_steps_per_second": 1.466, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.745670208121089e-05, | |
| "loss": 2.4581, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_loss": 3.063863515853882, | |
| "eval_runtime": 246.5877, | |
| "eval_samples_per_second": 396.313, | |
| "eval_steps_per_second": 1.549, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.709285402415951e-05, | |
| "loss": 2.4494, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_loss": 3.0415244102478027, | |
| "eval_runtime": 244.6941, | |
| "eval_samples_per_second": 399.38, | |
| "eval_steps_per_second": 1.561, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.6729005967108134e-05, | |
| "loss": 2.4275, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_loss": 3.0244903564453125, | |
| "eval_runtime": 210.3742, | |
| "eval_samples_per_second": 464.534, | |
| "eval_steps_per_second": 1.816, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.636515791005676e-05, | |
| "loss": 2.3909, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_loss": 2.999117851257324, | |
| "eval_runtime": 210.4024, | |
| "eval_samples_per_second": 464.472, | |
| "eval_steps_per_second": 1.816, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.6001309853005384e-05, | |
| "loss": 2.3902, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 2.9931323528289795, | |
| "eval_runtime": 208.7009, | |
| "eval_samples_per_second": 468.259, | |
| "eval_steps_per_second": 1.83, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.563746179595401e-05, | |
| "loss": 2.3741, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_loss": 2.9612369537353516, | |
| "eval_runtime": 212.7001, | |
| "eval_samples_per_second": 459.454, | |
| "eval_steps_per_second": 1.796, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.5273613738902634e-05, | |
| "loss": 2.3536, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_loss": 2.9508631229400635, | |
| "eval_runtime": 210.803, | |
| "eval_samples_per_second": 463.589, | |
| "eval_steps_per_second": 1.812, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.490976568185126e-05, | |
| "loss": 2.3392, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_loss": 2.9288971424102783, | |
| "eval_runtime": 210.6758, | |
| "eval_samples_per_second": 463.869, | |
| "eval_steps_per_second": 1.813, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.454591762479989e-05, | |
| "loss": 2.3083, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_loss": 2.9214062690734863, | |
| "eval_runtime": 211.6271, | |
| "eval_samples_per_second": 461.784, | |
| "eval_steps_per_second": 1.805, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.418206956774851e-05, | |
| "loss": 2.3094, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_loss": 2.915283441543579, | |
| "eval_runtime": 210.1146, | |
| "eval_samples_per_second": 465.108, | |
| "eval_steps_per_second": 1.818, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 4.3818221510697134e-05, | |
| "loss": 2.2864, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_loss": 2.903420925140381, | |
| "eval_runtime": 214.0395, | |
| "eval_samples_per_second": 456.579, | |
| "eval_steps_per_second": 1.785, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.3454373453645755e-05, | |
| "loss": 2.2893, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_loss": 2.8963093757629395, | |
| "eval_runtime": 218.4194, | |
| "eval_samples_per_second": 447.424, | |
| "eval_steps_per_second": 1.749, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.3090525396594384e-05, | |
| "loss": 2.2697, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_loss": 2.884676456451416, | |
| "eval_runtime": 207.7625, | |
| "eval_samples_per_second": 470.374, | |
| "eval_steps_per_second": 1.839, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 4.2726677339543005e-05, | |
| "loss": 2.2762, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_loss": 2.866511106491089, | |
| "eval_runtime": 207.4714, | |
| "eval_samples_per_second": 471.034, | |
| "eval_steps_per_second": 1.841, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 4.2363556978605734e-05, | |
| "loss": 2.2667, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_loss": 2.853637456893921, | |
| "eval_runtime": 209.4841, | |
| "eval_samples_per_second": 466.508, | |
| "eval_steps_per_second": 1.824, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 4.1999708921554356e-05, | |
| "loss": 2.2548, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_loss": 2.8472321033477783, | |
| "eval_runtime": 207.7935, | |
| "eval_samples_per_second": 470.303, | |
| "eval_steps_per_second": 1.838, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 4.1635860864502984e-05, | |
| "loss": 2.238, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_loss": 2.849086284637451, | |
| "eval_runtime": 207.5183, | |
| "eval_samples_per_second": 470.927, | |
| "eval_steps_per_second": 1.841, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 4.127201280745161e-05, | |
| "loss": 2.2423, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "eval_loss": 2.825746774673462, | |
| "eval_runtime": 218.1498, | |
| "eval_samples_per_second": 447.977, | |
| "eval_steps_per_second": 1.751, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 4.0908164750400234e-05, | |
| "loss": 2.2406, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_loss": 2.82869029045105, | |
| "eval_runtime": 208.9781, | |
| "eval_samples_per_second": 467.637, | |
| "eval_steps_per_second": 1.828, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 4.054431669334886e-05, | |
| "loss": 2.2248, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_loss": 2.81931734085083, | |
| "eval_runtime": 210.3496, | |
| "eval_samples_per_second": 464.588, | |
| "eval_steps_per_second": 1.816, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 4.0181196332411585e-05, | |
| "loss": 2.223, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_loss": 2.810143232345581, | |
| "eval_runtime": 211.5447, | |
| "eval_samples_per_second": 461.964, | |
| "eval_steps_per_second": 1.806, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 3.981734827536021e-05, | |
| "loss": 2.1995, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_loss": 2.802741527557373, | |
| "eval_runtime": 210.9696, | |
| "eval_samples_per_second": 463.223, | |
| "eval_steps_per_second": 1.811, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 3.9453500218308835e-05, | |
| "loss": 2.1834, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_loss": 2.787959098815918, | |
| "eval_runtime": 207.5007, | |
| "eval_samples_per_second": 470.967, | |
| "eval_steps_per_second": 1.841, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 3.908965216125746e-05, | |
| "loss": 2.1723, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "eval_loss": 2.778273582458496, | |
| "eval_runtime": 217.4843, | |
| "eval_samples_per_second": 449.347, | |
| "eval_steps_per_second": 1.756, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 3.8725804104206085e-05, | |
| "loss": 2.1651, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "eval_loss": 2.773916721343994, | |
| "eval_runtime": 211.3325, | |
| "eval_samples_per_second": 462.428, | |
| "eval_steps_per_second": 1.808, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 3.836195604715471e-05, | |
| "loss": 2.1575, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_loss": 2.782458543777466, | |
| "eval_runtime": 214.023, | |
| "eval_samples_per_second": 456.615, | |
| "eval_steps_per_second": 1.785, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.7998107990103335e-05, | |
| "loss": 2.1598, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_loss": 2.7659904956817627, | |
| "eval_runtime": 211.2594, | |
| "eval_samples_per_second": 462.588, | |
| "eval_steps_per_second": 1.808, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 3.7634259933051956e-05, | |
| "loss": 2.1667, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "eval_loss": 2.75777530670166, | |
| "eval_runtime": 209.9442, | |
| "eval_samples_per_second": 465.486, | |
| "eval_steps_per_second": 1.82, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 3.7271139572114685e-05, | |
| "loss": 2.1565, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "eval_loss": 2.757976770401001, | |
| "eval_runtime": 211.0178, | |
| "eval_samples_per_second": 463.117, | |
| "eval_steps_per_second": 1.81, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 3.6907291515063314e-05, | |
| "loss": 2.1558, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "eval_loss": 2.7561423778533936, | |
| "eval_runtime": 210.4961, | |
| "eval_samples_per_second": 464.265, | |
| "eval_steps_per_second": 1.815, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 3.6543443458011935e-05, | |
| "loss": 2.1642, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "eval_loss": 2.751215934753418, | |
| "eval_runtime": 210.4158, | |
| "eval_samples_per_second": 464.442, | |
| "eval_steps_per_second": 1.815, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.617959540096056e-05, | |
| "loss": 2.1374, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_loss": 2.736060619354248, | |
| "eval_runtime": 214.104, | |
| "eval_samples_per_second": 456.442, | |
| "eval_steps_per_second": 1.784, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 3.5815747343909185e-05, | |
| "loss": 2.1402, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "eval_loss": 2.7384564876556396, | |
| "eval_runtime": 210.1981, | |
| "eval_samples_per_second": 464.923, | |
| "eval_steps_per_second": 1.817, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 3.545189928685781e-05, | |
| "loss": 2.1326, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_loss": 2.723484516143799, | |
| "eval_runtime": 210.8719, | |
| "eval_samples_per_second": 463.438, | |
| "eval_steps_per_second": 1.812, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.5088051229806435e-05, | |
| "loss": 2.1272, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_loss": 2.7183401584625244, | |
| "eval_runtime": 239.7397, | |
| "eval_samples_per_second": 407.634, | |
| "eval_steps_per_second": 1.593, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 3.4724203172755057e-05, | |
| "loss": 2.0954, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "eval_loss": 2.7156314849853516, | |
| "eval_runtime": 211.1012, | |
| "eval_samples_per_second": 462.934, | |
| "eval_steps_per_second": 1.81, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 3.4360355115703685e-05, | |
| "loss": 2.0842, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "eval_loss": 2.7065327167510986, | |
| "eval_runtime": 210.8515, | |
| "eval_samples_per_second": 463.483, | |
| "eval_steps_per_second": 1.812, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 3.399650705865231e-05, | |
| "loss": 2.0859, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "eval_loss": 2.7088747024536133, | |
| "eval_runtime": 215.2076, | |
| "eval_samples_per_second": 454.101, | |
| "eval_steps_per_second": 1.775, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 3.3632659001600935e-05, | |
| "loss": 2.0856, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "eval_loss": 2.6962101459503174, | |
| "eval_runtime": 210.6214, | |
| "eval_samples_per_second": 463.989, | |
| "eval_steps_per_second": 1.814, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 3.3268810944549556e-05, | |
| "loss": 2.0775, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "eval_loss": 2.693091630935669, | |
| "eval_runtime": 210.6882, | |
| "eval_samples_per_second": 463.842, | |
| "eval_steps_per_second": 1.813, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 3.2905690583612286e-05, | |
| "loss": 2.0821, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "eval_loss": 2.693345069885254, | |
| "eval_runtime": 217.6654, | |
| "eval_samples_per_second": 448.973, | |
| "eval_steps_per_second": 1.755, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 3.2541842526560914e-05, | |
| "loss": 2.0706, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "eval_loss": 2.70108699798584, | |
| "eval_runtime": 210.1763, | |
| "eval_samples_per_second": 464.971, | |
| "eval_steps_per_second": 1.818, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 3.2177994469509535e-05, | |
| "loss": 2.0689, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "eval_loss": 2.7009191513061523, | |
| "eval_runtime": 207.9861, | |
| "eval_samples_per_second": 469.868, | |
| "eval_steps_per_second": 1.837, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 3.181414641245816e-05, | |
| "loss": 2.0807, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "eval_loss": 2.682542324066162, | |
| "eval_runtime": 214.5749, | |
| "eval_samples_per_second": 455.44, | |
| "eval_steps_per_second": 1.78, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 3.1450298355406785e-05, | |
| "loss": 2.0639, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "eval_loss": 2.674436330795288, | |
| "eval_runtime": 245.508, | |
| "eval_samples_per_second": 398.056, | |
| "eval_steps_per_second": 1.556, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 3.108645029835541e-05, | |
| "loss": 2.0742, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "eval_loss": 2.677746295928955, | |
| "eval_runtime": 245.3374, | |
| "eval_samples_per_second": 398.333, | |
| "eval_steps_per_second": 1.557, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 3.0722602241304035e-05, | |
| "loss": 2.0789, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "eval_loss": 2.6688921451568604, | |
| "eval_runtime": 246.4423, | |
| "eval_samples_per_second": 396.547, | |
| "eval_steps_per_second": 1.55, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 3.0359481880366758e-05, | |
| "loss": 2.0594, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "eval_loss": 2.6566038131713867, | |
| "eval_runtime": 252.2995, | |
| "eval_samples_per_second": 387.341, | |
| "eval_steps_per_second": 1.514, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 2.9995633823315383e-05, | |
| "loss": 2.056, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 2.667599678039551, | |
| "eval_runtime": 245.2202, | |
| "eval_samples_per_second": 398.523, | |
| "eval_steps_per_second": 1.558, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 2.9631785766264007e-05, | |
| "loss": 2.0223, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "eval_loss": 2.6711361408233643, | |
| "eval_runtime": 245.2433, | |
| "eval_samples_per_second": 398.486, | |
| "eval_steps_per_second": 1.558, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 2.9267937709212632e-05, | |
| "loss": 2.0185, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "eval_loss": 2.65678071975708, | |
| "eval_runtime": 208.848, | |
| "eval_samples_per_second": 467.929, | |
| "eval_steps_per_second": 1.829, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 2.890408965216126e-05, | |
| "loss": 2.018, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "eval_loss": 2.656717538833618, | |
| "eval_runtime": 209.2801, | |
| "eval_samples_per_second": 466.963, | |
| "eval_steps_per_second": 1.825, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 2.8540241595109886e-05, | |
| "loss": 2.0036, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "eval_loss": 2.6545379161834717, | |
| "eval_runtime": 210.0273, | |
| "eval_samples_per_second": 465.301, | |
| "eval_steps_per_second": 1.819, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 2.817639353805851e-05, | |
| "loss": 2.0238, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "eval_loss": 2.6558964252471924, | |
| "eval_runtime": 211.3486, | |
| "eval_samples_per_second": 462.392, | |
| "eval_steps_per_second": 1.807, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 2.781472856934944e-05, | |
| "loss": 2.0091, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "eval_loss": 2.6450281143188477, | |
| "eval_runtime": 208.486, | |
| "eval_samples_per_second": 468.741, | |
| "eval_steps_per_second": 1.832, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 2.7450880512298066e-05, | |
| "loss": 2.0096, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "eval_loss": 2.6388843059539795, | |
| "eval_runtime": 210.8413, | |
| "eval_samples_per_second": 463.505, | |
| "eval_steps_per_second": 1.812, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 2.708703245524669e-05, | |
| "loss": 2.0083, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "eval_loss": 2.6401255130767822, | |
| "eval_runtime": 213.1597, | |
| "eval_samples_per_second": 458.464, | |
| "eval_steps_per_second": 1.792, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 2.6723184398195316e-05, | |
| "loss": 2.0012, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "eval_loss": 2.639948844909668, | |
| "eval_runtime": 234.1271, | |
| "eval_samples_per_second": 417.406, | |
| "eval_steps_per_second": 1.632, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 2.635933634114394e-05, | |
| "loss": 2.0166, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "eval_loss": 2.628899097442627, | |
| "eval_runtime": 242.1039, | |
| "eval_samples_per_second": 403.653, | |
| "eval_steps_per_second": 1.578, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 2.5995488284092563e-05, | |
| "loss": 1.9963, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "eval_loss": 2.634817361831665, | |
| "eval_runtime": 275.7387, | |
| "eval_samples_per_second": 354.415, | |
| "eval_steps_per_second": 1.385, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 2.5631640227041188e-05, | |
| "loss": 1.9943, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "eval_loss": 2.6239511966705322, | |
| "eval_runtime": 223.4038, | |
| "eval_samples_per_second": 437.441, | |
| "eval_steps_per_second": 1.71, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 2.5267792169989813e-05, | |
| "loss": 2.0099, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "eval_loss": 2.618997812271118, | |
| "eval_runtime": 215.7939, | |
| "eval_samples_per_second": 452.867, | |
| "eval_steps_per_second": 1.77, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 2.4903944112938438e-05, | |
| "loss": 1.9895, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "eval_loss": 2.630808115005493, | |
| "eval_runtime": 235.2775, | |
| "eval_samples_per_second": 415.365, | |
| "eval_steps_per_second": 1.624, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 2.4540096055887063e-05, | |
| "loss": 1.9581, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "eval_loss": 2.638457775115967, | |
| "eval_runtime": 232.5729, | |
| "eval_samples_per_second": 420.195, | |
| "eval_steps_per_second": 1.642, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 2.4176247998835687e-05, | |
| "loss": 1.9502, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "eval_loss": 2.6236515045166016, | |
| "eval_runtime": 233.0168, | |
| "eval_samples_per_second": 419.395, | |
| "eval_steps_per_second": 1.639, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 2.3812399941784312e-05, | |
| "loss": 1.9485, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "eval_loss": 2.624785900115967, | |
| "eval_runtime": 246.6057, | |
| "eval_samples_per_second": 396.284, | |
| "eval_steps_per_second": 1.549, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 2.3448551884732937e-05, | |
| "loss": 1.9643, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "eval_loss": 2.627931833267212, | |
| "eval_runtime": 212.8412, | |
| "eval_samples_per_second": 459.15, | |
| "eval_steps_per_second": 1.795, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 2.3084703827681562e-05, | |
| "loss": 1.9535, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "eval_loss": 2.6185333728790283, | |
| "eval_runtime": 210.7296, | |
| "eval_samples_per_second": 463.751, | |
| "eval_steps_per_second": 1.813, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 2.2720855770630187e-05, | |
| "loss": 1.9575, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "eval_loss": 2.614642381668091, | |
| "eval_runtime": 211.3001, | |
| "eval_samples_per_second": 462.499, | |
| "eval_steps_per_second": 1.808, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 2.235700771357881e-05, | |
| "loss": 1.9475, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "eval_loss": 2.6092729568481445, | |
| "eval_runtime": 212.2513, | |
| "eval_samples_per_second": 460.426, | |
| "eval_steps_per_second": 1.8, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 2.1993159656527434e-05, | |
| "loss": 1.9434, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "eval_loss": 2.60904598236084, | |
| "eval_runtime": 209.1736, | |
| "eval_samples_per_second": 467.2, | |
| "eval_steps_per_second": 1.826, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 2.1630039295590163e-05, | |
| "loss": 1.954, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "eval_loss": 2.60274338722229, | |
| "eval_runtime": 223.8437, | |
| "eval_samples_per_second": 436.581, | |
| "eval_steps_per_second": 1.707, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 2.1266191238538788e-05, | |
| "loss": 1.9509, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "eval_loss": 2.6107161045074463, | |
| "eval_runtime": 213.2878, | |
| "eval_samples_per_second": 458.188, | |
| "eval_steps_per_second": 1.791, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 2.0902343181487413e-05, | |
| "loss": 1.9454, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "eval_loss": 2.59796142578125, | |
| "eval_runtime": 214.4371, | |
| "eval_samples_per_second": 455.733, | |
| "eval_steps_per_second": 1.781, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 2.053922282055014e-05, | |
| "loss": 1.9479, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "eval_loss": 2.6016438007354736, | |
| "eval_runtime": 218.6403, | |
| "eval_samples_per_second": 446.972, | |
| "eval_steps_per_second": 1.747, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 2.0175374763498764e-05, | |
| "loss": 1.9539, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "eval_loss": 2.5970652103424072, | |
| "eval_runtime": 214.0818, | |
| "eval_samples_per_second": 456.489, | |
| "eval_steps_per_second": 1.784, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 1.981152670644739e-05, | |
| "loss": 1.9119, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "eval_loss": 2.622750759124756, | |
| "eval_runtime": 211.471, | |
| "eval_samples_per_second": 462.125, | |
| "eval_steps_per_second": 1.806, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "learning_rate": 1.9447678649396013e-05, | |
| "loss": 1.8974, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "eval_loss": 2.6169052124023438, | |
| "eval_runtime": 221.6976, | |
| "eval_samples_per_second": 440.808, | |
| "eval_steps_per_second": 1.723, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 1.908383059234464e-05, | |
| "loss": 1.9038, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "eval_loss": 2.6027112007141113, | |
| "eval_runtime": 212.7185, | |
| "eval_samples_per_second": 459.415, | |
| "eval_steps_per_second": 1.796, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 1.8719982535293263e-05, | |
| "loss": 1.9008, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "eval_loss": 2.602651357650757, | |
| "eval_runtime": 212.6929, | |
| "eval_samples_per_second": 459.47, | |
| "eval_steps_per_second": 1.796, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 1.8356134478241888e-05, | |
| "loss": 1.9142, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "eval_loss": 2.6011383533477783, | |
| "eval_runtime": 217.9682, | |
| "eval_samples_per_second": 448.35, | |
| "eval_steps_per_second": 1.753, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 1.7992286421190513e-05, | |
| "loss": 1.8783, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "eval_loss": 2.595999002456665, | |
| "eval_runtime": 216.5321, | |
| "eval_samples_per_second": 451.323, | |
| "eval_steps_per_second": 1.764, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 1.7628438364139135e-05, | |
| "loss": 1.8896, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "eval_loss": 2.6111366748809814, | |
| "eval_runtime": 209.5809, | |
| "eval_samples_per_second": 466.293, | |
| "eval_steps_per_second": 1.823, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 1.7265318003201864e-05, | |
| "loss": 1.8975, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "eval_loss": 2.588871955871582, | |
| "eval_runtime": 215.4187, | |
| "eval_samples_per_second": 453.656, | |
| "eval_steps_per_second": 1.773, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 1.690146994615049e-05, | |
| "loss": 1.9048, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "eval_loss": 2.600691556930542, | |
| "eval_runtime": 225.7312, | |
| "eval_samples_per_second": 432.931, | |
| "eval_steps_per_second": 1.692, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 1.6537621889099114e-05, | |
| "loss": 1.9049, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "eval_loss": 2.5971837043762207, | |
| "eval_runtime": 213.1258, | |
| "eval_samples_per_second": 458.537, | |
| "eval_steps_per_second": 1.792, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 1.6173773832047735e-05, | |
| "loss": 1.8969, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "eval_loss": 2.605257987976074, | |
| "eval_runtime": 212.2604, | |
| "eval_samples_per_second": 460.406, | |
| "eval_steps_per_second": 1.8, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 1.580992577499636e-05, | |
| "loss": 1.9105, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "eval_loss": 2.589334726333618, | |
| "eval_runtime": 211.6138, | |
| "eval_samples_per_second": 461.813, | |
| "eval_steps_per_second": 1.805, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 1.544680541405909e-05, | |
| "loss": 1.8921, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "eval_loss": 2.5882816314697266, | |
| "eval_runtime": 211.2215, | |
| "eval_samples_per_second": 462.671, | |
| "eval_steps_per_second": 1.809, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 1.5083685053121819e-05, | |
| "loss": 1.8918, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "eval_loss": 2.5792043209075928, | |
| "eval_runtime": 211.5529, | |
| "eval_samples_per_second": 461.946, | |
| "eval_steps_per_second": 1.806, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 1.471983699607044e-05, | |
| "loss": 1.8671, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "eval_loss": 2.604069232940674, | |
| "eval_runtime": 212.2008, | |
| "eval_samples_per_second": 460.536, | |
| "eval_steps_per_second": 1.8, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 1.4355988939019067e-05, | |
| "loss": 1.8551, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "eval_loss": 2.6070237159729004, | |
| "eval_runtime": 220.205, | |
| "eval_samples_per_second": 443.795, | |
| "eval_steps_per_second": 1.735, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 1.3992140881967692e-05, | |
| "loss": 1.8555, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "eval_loss": 2.614821434020996, | |
| "eval_runtime": 263.5514, | |
| "eval_samples_per_second": 370.804, | |
| "eval_steps_per_second": 1.449, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 1.3628292824916317e-05, | |
| "loss": 1.8543, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "eval_loss": 2.607656955718994, | |
| "eval_runtime": 264.5295, | |
| "eval_samples_per_second": 369.433, | |
| "eval_steps_per_second": 1.444, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 1.326444476786494e-05, | |
| "loss": 1.8485, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "eval_loss": 2.613083839416504, | |
| "eval_runtime": 263.0661, | |
| "eval_samples_per_second": 371.488, | |
| "eval_steps_per_second": 1.452, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 1.2900596710813565e-05, | |
| "loss": 1.8474, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "eval_loss": 2.603851079940796, | |
| "eval_runtime": 285.6161, | |
| "eval_samples_per_second": 342.159, | |
| "eval_steps_per_second": 1.337, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 1.253674865376219e-05, | |
| "loss": 1.8474, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "eval_loss": 2.5973451137542725, | |
| "eval_runtime": 273.9949, | |
| "eval_samples_per_second": 356.671, | |
| "eval_steps_per_second": 1.394, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 1.2172900596710813e-05, | |
| "loss": 1.8442, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "eval_loss": 2.5946028232574463, | |
| "eval_runtime": 244.7175, | |
| "eval_samples_per_second": 399.342, | |
| "eval_steps_per_second": 1.561, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "learning_rate": 1.1809780235773542e-05, | |
| "loss": 1.8329, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "eval_loss": 2.606858253479004, | |
| "eval_runtime": 275.6967, | |
| "eval_samples_per_second": 354.469, | |
| "eval_steps_per_second": 1.386, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 1.1445932178722165e-05, | |
| "loss": 1.8551, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "eval_loss": 2.592348337173462, | |
| "eval_runtime": 253.7347, | |
| "eval_samples_per_second": 385.15, | |
| "eval_steps_per_second": 1.506, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 1.108208412167079e-05, | |
| "loss": 1.8433, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "eval_loss": 2.592151641845703, | |
| "eval_runtime": 250.8033, | |
| "eval_samples_per_second": 389.652, | |
| "eval_steps_per_second": 1.523, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 1.0718236064619415e-05, | |
| "loss": 1.851, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "eval_loss": 2.5993497371673584, | |
| "eval_runtime": 244.9443, | |
| "eval_samples_per_second": 398.972, | |
| "eval_steps_per_second": 1.56, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 1.035438800756804e-05, | |
| "loss": 1.8313, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "eval_loss": 2.59601092338562, | |
| "eval_runtime": 247.9824, | |
| "eval_samples_per_second": 394.084, | |
| "eval_steps_per_second": 1.54, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 9.991267646630768e-06, | |
| "loss": 1.8298, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 2.6058406829833984, | |
| "eval_runtime": 248.1822, | |
| "eval_samples_per_second": 393.767, | |
| "eval_steps_per_second": 1.539, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 9.628147285693495e-06, | |
| "loss": 1.8159, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "eval_loss": 2.6286239624023438, | |
| "eval_runtime": 249.0138, | |
| "eval_samples_per_second": 392.452, | |
| "eval_steps_per_second": 1.534, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "learning_rate": 9.264299228642118e-06, | |
| "loss": 1.817, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "eval_loss": 2.634847402572632, | |
| "eval_runtime": 257.4805, | |
| "eval_samples_per_second": 379.547, | |
| "eval_steps_per_second": 1.484, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 8.900451171590745e-06, | |
| "loss": 1.8066, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "eval_loss": 2.6410584449768066, | |
| "eval_runtime": 307.9556, | |
| "eval_samples_per_second": 317.338, | |
| "eval_steps_per_second": 1.24, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 8.536603114539368e-06, | |
| "loss": 1.7935, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "eval_loss": 2.633836269378662, | |
| "eval_runtime": 253.2423, | |
| "eval_samples_per_second": 385.899, | |
| "eval_steps_per_second": 1.508, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 8.172755057487993e-06, | |
| "loss": 1.809, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "eval_loss": 2.629018783569336, | |
| "eval_runtime": 250.9739, | |
| "eval_samples_per_second": 389.387, | |
| "eval_steps_per_second": 1.522, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "learning_rate": 7.808907000436618e-06, | |
| "loss": 1.812, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "eval_loss": 2.6257762908935547, | |
| "eval_runtime": 253.0757, | |
| "eval_samples_per_second": 386.153, | |
| "eval_steps_per_second": 1.509, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "learning_rate": 7.445058943385242e-06, | |
| "loss": 1.79, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "eval_loss": 2.6320676803588867, | |
| "eval_runtime": 250.9004, | |
| "eval_samples_per_second": 389.501, | |
| "eval_steps_per_second": 1.523, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 8.59, | |
| "learning_rate": 7.0812108863338665e-06, | |
| "loss": 1.8046, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 8.59, | |
| "eval_loss": 2.6290555000305176, | |
| "eval_runtime": 248.8573, | |
| "eval_samples_per_second": 392.699, | |
| "eval_steps_per_second": 1.535, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 6.718090525396595e-06, | |
| "loss": 1.7975, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "eval_loss": 2.6282989978790283, | |
| "eval_runtime": 245.3595, | |
| "eval_samples_per_second": 398.297, | |
| "eval_steps_per_second": 1.557, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "learning_rate": 6.354970164459321e-06, | |
| "loss": 1.7968, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "eval_loss": 2.628397226333618, | |
| "eval_runtime": 253.0259, | |
| "eval_samples_per_second": 386.229, | |
| "eval_steps_per_second": 1.51, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "learning_rate": 5.991122107407947e-06, | |
| "loss": 1.7779, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "eval_loss": 2.625650405883789, | |
| "eval_runtime": 247.6056, | |
| "eval_samples_per_second": 394.684, | |
| "eval_steps_per_second": 1.543, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 5.627274050356571e-06, | |
| "loss": 1.7664, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "eval_loss": 2.623215675354004, | |
| "eval_runtime": 268.5188, | |
| "eval_samples_per_second": 363.945, | |
| "eval_steps_per_second": 1.423, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 5.263425993305196e-06, | |
| "loss": 1.792, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "eval_loss": 2.63053297996521, | |
| "eval_runtime": 248.0445, | |
| "eval_samples_per_second": 393.986, | |
| "eval_steps_per_second": 1.54, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 4.89957793625382e-06, | |
| "loss": 1.7725, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "eval_loss": 2.6525118350982666, | |
| "eval_runtime": 247.425, | |
| "eval_samples_per_second": 394.972, | |
| "eval_steps_per_second": 1.544, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "learning_rate": 4.535729879202445e-06, | |
| "loss": 1.7563, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "eval_loss": 2.679419755935669, | |
| "eval_runtime": 249.9688, | |
| "eval_samples_per_second": 390.953, | |
| "eval_steps_per_second": 1.528, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "learning_rate": 4.17188182215107e-06, | |
| "loss": 1.7606, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "eval_loss": 2.6783671379089355, | |
| "eval_runtime": 248.5877, | |
| "eval_samples_per_second": 393.125, | |
| "eval_steps_per_second": 1.537, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "learning_rate": 3.8080337650996943e-06, | |
| "loss": 1.7666, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "eval_loss": 2.679766893386841, | |
| "eval_runtime": 253.2315, | |
| "eval_samples_per_second": 385.916, | |
| "eval_steps_per_second": 1.509, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "learning_rate": 3.4449134041624217e-06, | |
| "loss": 1.7551, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "eval_loss": 2.6813337802886963, | |
| "eval_runtime": 248.5037, | |
| "eval_samples_per_second": 393.258, | |
| "eval_steps_per_second": 1.537, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 3.0810653471110467e-06, | |
| "loss": 1.7578, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "eval_loss": 2.683032751083374, | |
| "eval_runtime": 245.3373, | |
| "eval_samples_per_second": 398.333, | |
| "eval_steps_per_second": 1.557, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "learning_rate": 2.717217290059671e-06, | |
| "loss": 1.7483, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "eval_loss": 2.6832828521728516, | |
| "eval_runtime": 266.4744, | |
| "eval_samples_per_second": 366.737, | |
| "eval_steps_per_second": 1.434, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "learning_rate": 2.3533692330082957e-06, | |
| "loss": 1.7431, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "eval_loss": 2.6883933544158936, | |
| "eval_runtime": 256.629, | |
| "eval_samples_per_second": 380.807, | |
| "eval_steps_per_second": 1.489, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 9.61, | |
| "learning_rate": 1.9895211759569207e-06, | |
| "loss": 1.743, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 9.61, | |
| "eval_loss": 2.6931965351104736, | |
| "eval_runtime": 260.7406, | |
| "eval_samples_per_second": 374.802, | |
| "eval_steps_per_second": 1.465, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "learning_rate": 1.6264008150196477e-06, | |
| "loss": 1.7395, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "eval_loss": 2.6927101612091064, | |
| "eval_runtime": 254.3781, | |
| "eval_samples_per_second": 384.176, | |
| "eval_steps_per_second": 1.502, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "learning_rate": 1.2625527579682726e-06, | |
| "loss": 1.7473, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "eval_loss": 2.6903834342956543, | |
| "eval_runtime": 221.4662, | |
| "eval_samples_per_second": 441.268, | |
| "eval_steps_per_second": 1.725, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "learning_rate": 8.987047009168971e-07, | |
| "loss": 1.7413, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "eval_loss": 2.6892080307006836, | |
| "eval_runtime": 233.1215, | |
| "eval_samples_per_second": 419.206, | |
| "eval_steps_per_second": 1.639, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "learning_rate": 5.348566438655218e-07, | |
| "loss": 1.7437, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "eval_loss": 2.6897966861724854, | |
| "eval_runtime": 217.2083, | |
| "eval_samples_per_second": 449.918, | |
| "eval_steps_per_second": 1.759, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "learning_rate": 1.7173628292824918e-07, | |
| "loss": 1.7546, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "eval_loss": 2.689425468444824, | |
| "eval_runtime": 214.7197, | |
| "eval_samples_per_second": 455.133, | |
| "eval_steps_per_second": 1.779, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 68710, | |
| "total_flos": 1.1742684555264e+18, | |
| "train_loss": 2.026226943438967, | |
| "train_runtime": 84552.1773, | |
| "train_samples_per_second": 104.022, | |
| "train_steps_per_second": 0.813 | |
| } | |
| ], | |
| "max_steps": 68710, | |
| "num_train_epochs": 10, | |
| "total_flos": 1.1742684555264e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |