| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 120340, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.979225527671597e-05, | |
| "loss": 1.2637, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_f1": 0.2863453369025287, | |
| "eval_loss": 1.204175591468811, | |
| "eval_runtime": 51.6606, | |
| "eval_samples_per_second": 125.473, | |
| "eval_steps_per_second": 7.859, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.958451055343195e-05, | |
| "loss": 1.213, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_f1": 0.274772728922474, | |
| "eval_loss": 1.2542401552200317, | |
| "eval_runtime": 46.4286, | |
| "eval_samples_per_second": 139.612, | |
| "eval_steps_per_second": 8.745, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.937676583014792e-05, | |
| "loss": 1.1852, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_f1": 0.31238188693470137, | |
| "eval_loss": 1.1441909074783325, | |
| "eval_runtime": 46.2394, | |
| "eval_samples_per_second": 140.183, | |
| "eval_steps_per_second": 8.78, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.9169021106863886e-05, | |
| "loss": 1.1495, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_f1": 0.3303277428471857, | |
| "eval_loss": 1.19601309299469, | |
| "eval_runtime": 46.3042, | |
| "eval_samples_per_second": 139.987, | |
| "eval_steps_per_second": 8.768, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8961276383579855e-05, | |
| "loss": 1.1406, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_f1": 0.33363478480748904, | |
| "eval_loss": 1.2050482034683228, | |
| "eval_runtime": 46.0365, | |
| "eval_samples_per_second": 140.801, | |
| "eval_steps_per_second": 8.819, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.875353166029583e-05, | |
| "loss": 1.1332, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_f1": 0.35687474820142895, | |
| "eval_loss": 1.1201504468917847, | |
| "eval_runtime": 45.6751, | |
| "eval_samples_per_second": 141.915, | |
| "eval_steps_per_second": 8.889, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.85457869370118e-05, | |
| "loss": 1.1007, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_f1": 0.3605293337176752, | |
| "eval_loss": 1.1953203678131104, | |
| "eval_runtime": 45.2576, | |
| "eval_samples_per_second": 143.225, | |
| "eval_steps_per_second": 8.971, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.833804221372778e-05, | |
| "loss": 1.1157, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_f1": 0.3862175225700333, | |
| "eval_loss": 1.1009345054626465, | |
| "eval_runtime": 46.0236, | |
| "eval_samples_per_second": 140.841, | |
| "eval_steps_per_second": 8.822, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.8130297490443746e-05, | |
| "loss": 1.1172, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_f1": 0.3819750143486552, | |
| "eval_loss": 1.126935362815857, | |
| "eval_runtime": 46.0628, | |
| "eval_samples_per_second": 140.721, | |
| "eval_steps_per_second": 8.814, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.7922552767159715e-05, | |
| "loss": 1.1041, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_f1": 0.29887014740598117, | |
| "eval_loss": 1.1668146848678589, | |
| "eval_runtime": 46.4347, | |
| "eval_samples_per_second": 139.594, | |
| "eval_steps_per_second": 8.743, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.7714808043875684e-05, | |
| "loss": 1.102, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_f1": 0.418642853322715, | |
| "eval_loss": 1.1066502332687378, | |
| "eval_runtime": 46.0383, | |
| "eval_samples_per_second": 140.796, | |
| "eval_steps_per_second": 8.819, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.750706332059166e-05, | |
| "loss": 1.0878, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_f1": 0.3200899667689199, | |
| "eval_loss": 1.1729530096054077, | |
| "eval_runtime": 46.8484, | |
| "eval_samples_per_second": 138.361, | |
| "eval_steps_per_second": 8.666, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.729931859730763e-05, | |
| "loss": 1.0866, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_f1": 0.38888205294003, | |
| "eval_loss": 1.108739972114563, | |
| "eval_runtime": 46.5428, | |
| "eval_samples_per_second": 139.27, | |
| "eval_steps_per_second": 8.723, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.7091573874023606e-05, | |
| "loss": 1.0729, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_f1": 0.335409883600229, | |
| "eval_loss": 1.1224578619003296, | |
| "eval_runtime": 46.3948, | |
| "eval_samples_per_second": 139.714, | |
| "eval_steps_per_second": 8.751, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.6883829150739575e-05, | |
| "loss": 1.0684, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_f1": 0.4087602196471012, | |
| "eval_loss": 1.1329175233840942, | |
| "eval_runtime": 46.5357, | |
| "eval_samples_per_second": 139.291, | |
| "eval_steps_per_second": 8.724, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.6676084427455544e-05, | |
| "loss": 1.0633, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_f1": 0.39803057267400827, | |
| "eval_loss": 1.1004011631011963, | |
| "eval_runtime": 46.1396, | |
| "eval_samples_per_second": 140.487, | |
| "eval_steps_per_second": 8.799, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.6468339704171513e-05, | |
| "loss": 1.0739, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_f1": 0.39430507046678914, | |
| "eval_loss": 1.090652585029602, | |
| "eval_runtime": 46.3294, | |
| "eval_samples_per_second": 139.911, | |
| "eval_steps_per_second": 8.763, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.626059498088749e-05, | |
| "loss": 1.0646, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_f1": 0.4204523467443143, | |
| "eval_loss": 1.1204614639282227, | |
| "eval_runtime": 46.2687, | |
| "eval_samples_per_second": 140.095, | |
| "eval_steps_per_second": 8.775, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.605285025760346e-05, | |
| "loss": 1.0581, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_f1": 0.3934072615715215, | |
| "eval_loss": 1.100487232208252, | |
| "eval_runtime": 46.7591, | |
| "eval_samples_per_second": 138.625, | |
| "eval_steps_per_second": 8.683, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.5845105534319435e-05, | |
| "loss": 1.0659, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_f1": 0.3959022659523447, | |
| "eval_loss": 1.0948997735977173, | |
| "eval_runtime": 46.4254, | |
| "eval_samples_per_second": 139.622, | |
| "eval_steps_per_second": 8.745, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.5637360811035404e-05, | |
| "loss": 1.0573, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_f1": 0.4038876072579517, | |
| "eval_loss": 1.0948611497879028, | |
| "eval_runtime": 46.9168, | |
| "eval_samples_per_second": 138.159, | |
| "eval_steps_per_second": 8.654, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.542961608775137e-05, | |
| "loss": 1.0725, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_f1": 0.3986235498061616, | |
| "eval_loss": 1.1076061725616455, | |
| "eval_runtime": 46.337, | |
| "eval_samples_per_second": 139.888, | |
| "eval_steps_per_second": 8.762, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.522187136446734e-05, | |
| "loss": 1.0453, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_f1": 0.39752123118599975, | |
| "eval_loss": 1.0838735103607178, | |
| "eval_runtime": 46.3441, | |
| "eval_samples_per_second": 139.867, | |
| "eval_steps_per_second": 8.761, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.501412664118332e-05, | |
| "loss": 1.0594, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.3901284493425606, | |
| "eval_loss": 1.0847594738006592, | |
| "eval_runtime": 45.6188, | |
| "eval_samples_per_second": 142.091, | |
| "eval_steps_per_second": 8.9, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.480638191789929e-05, | |
| "loss": 0.9487, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "eval_f1": 0.43667878762469675, | |
| "eval_loss": 1.1431002616882324, | |
| "eval_runtime": 46.1871, | |
| "eval_samples_per_second": 140.342, | |
| "eval_steps_per_second": 8.79, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.4598637194615264e-05, | |
| "loss": 0.9704, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_f1": 0.39215933654644525, | |
| "eval_loss": 1.1027016639709473, | |
| "eval_runtime": 45.9637, | |
| "eval_samples_per_second": 141.024, | |
| "eval_steps_per_second": 8.833, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.439089247133123e-05, | |
| "loss": 0.9469, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_f1": 0.4076438023703015, | |
| "eval_loss": 1.1772775650024414, | |
| "eval_runtime": 45.9053, | |
| "eval_samples_per_second": 141.204, | |
| "eval_steps_per_second": 8.844, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.41831477480472e-05, | |
| "loss": 0.9325, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_f1": 0.4386204591653561, | |
| "eval_loss": 1.1739530563354492, | |
| "eval_runtime": 45.6601, | |
| "eval_samples_per_second": 141.962, | |
| "eval_steps_per_second": 8.892, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.397540302476317e-05, | |
| "loss": 0.9393, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_f1": 0.4275788122291597, | |
| "eval_loss": 1.1776121854782104, | |
| "eval_runtime": 45.9594, | |
| "eval_samples_per_second": 141.037, | |
| "eval_steps_per_second": 8.834, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.376765830147914e-05, | |
| "loss": 0.9358, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "eval_f1": 0.402493107642323, | |
| "eval_loss": 1.1454391479492188, | |
| "eval_runtime": 46.4333, | |
| "eval_samples_per_second": 139.598, | |
| "eval_steps_per_second": 8.744, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.355991357819512e-05, | |
| "loss": 0.9276, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_f1": 0.4309881707776124, | |
| "eval_loss": 1.1370099782943726, | |
| "eval_runtime": 46.0386, | |
| "eval_samples_per_second": 140.795, | |
| "eval_steps_per_second": 8.819, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.3352168854911086e-05, | |
| "loss": 0.9749, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_f1": 0.42666296909338014, | |
| "eval_loss": 1.147721290588379, | |
| "eval_runtime": 46.5256, | |
| "eval_samples_per_second": 139.321, | |
| "eval_steps_per_second": 8.726, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.314442413162706e-05, | |
| "loss": 0.9584, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "eval_f1": 0.3917665752135426, | |
| "eval_loss": 1.1466563940048218, | |
| "eval_runtime": 45.7559, | |
| "eval_samples_per_second": 141.665, | |
| "eval_steps_per_second": 8.873, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.293667940834303e-05, | |
| "loss": 0.9458, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_f1": 0.38855363975832957, | |
| "eval_loss": 1.1946083307266235, | |
| "eval_runtime": 46.3569, | |
| "eval_samples_per_second": 139.828, | |
| "eval_steps_per_second": 8.758, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 4.2728934685059e-05, | |
| "loss": 0.9615, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_f1": 0.4006260567973624, | |
| "eval_loss": 1.1700124740600586, | |
| "eval_runtime": 46.2179, | |
| "eval_samples_per_second": 140.249, | |
| "eval_steps_per_second": 8.784, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 4.252118996177497e-05, | |
| "loss": 0.949, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_f1": 0.39774908999391234, | |
| "eval_loss": 1.1761705875396729, | |
| "eval_runtime": 46.6777, | |
| "eval_samples_per_second": 138.867, | |
| "eval_steps_per_second": 8.698, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 4.2313445238490946e-05, | |
| "loss": 0.9424, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_f1": 0.4174117585364745, | |
| "eval_loss": 1.165438175201416, | |
| "eval_runtime": 45.4426, | |
| "eval_samples_per_second": 142.641, | |
| "eval_steps_per_second": 8.934, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 4.2105700515206915e-05, | |
| "loss": 0.947, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_f1": 0.3867006123407769, | |
| "eval_loss": 1.1531673669815063, | |
| "eval_runtime": 46.295, | |
| "eval_samples_per_second": 140.015, | |
| "eval_steps_per_second": 8.77, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 4.189795579192289e-05, | |
| "loss": 0.938, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_f1": 0.44235664423629284, | |
| "eval_loss": 1.1730421781539917, | |
| "eval_runtime": 45.858, | |
| "eval_samples_per_second": 141.349, | |
| "eval_steps_per_second": 8.853, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 4.1690211068638854e-05, | |
| "loss": 0.9486, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "eval_f1": 0.41240105931665383, | |
| "eval_loss": 1.1419258117675781, | |
| "eval_runtime": 46.2869, | |
| "eval_samples_per_second": 140.04, | |
| "eval_steps_per_second": 8.771, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 4.148246634535483e-05, | |
| "loss": 0.9464, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_f1": 0.4092001043387523, | |
| "eval_loss": 1.2019739151000977, | |
| "eval_runtime": 46.6507, | |
| "eval_samples_per_second": 138.948, | |
| "eval_steps_per_second": 8.703, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 4.12747216220708e-05, | |
| "loss": 0.933, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "eval_f1": 0.4176306552081639, | |
| "eval_loss": 1.1400264501571655, | |
| "eval_runtime": 46.9566, | |
| "eval_samples_per_second": 138.042, | |
| "eval_steps_per_second": 8.646, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 4.1066976898786775e-05, | |
| "loss": 0.9544, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_f1": 0.430552427353769, | |
| "eval_loss": 1.1604799032211304, | |
| "eval_runtime": 47.2589, | |
| "eval_samples_per_second": 137.159, | |
| "eval_steps_per_second": 8.591, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 4.0859232175502744e-05, | |
| "loss": 0.9312, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "eval_f1": 0.39462194466074857, | |
| "eval_loss": 1.1545989513397217, | |
| "eval_runtime": 46.5344, | |
| "eval_samples_per_second": 139.295, | |
| "eval_steps_per_second": 8.725, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 4.065148745221872e-05, | |
| "loss": 0.9458, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "eval_f1": 0.44503977528303557, | |
| "eval_loss": 1.1579736471176147, | |
| "eval_runtime": 47.0732, | |
| "eval_samples_per_second": 137.7, | |
| "eval_steps_per_second": 8.625, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 4.044374272893468e-05, | |
| "loss": 0.9463, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "eval_f1": 0.41566594457572864, | |
| "eval_loss": 1.1216946840286255, | |
| "eval_runtime": 46.6125, | |
| "eval_samples_per_second": 139.061, | |
| "eval_steps_per_second": 8.71, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 4.023599800565066e-05, | |
| "loss": 0.9292, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_f1": 0.3965530806436371, | |
| "eval_loss": 1.1553888320922852, | |
| "eval_runtime": 46.7596, | |
| "eval_samples_per_second": 138.624, | |
| "eval_steps_per_second": 8.683, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 4.002825328236663e-05, | |
| "loss": 0.9286, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_f1": 0.4010412760625175, | |
| "eval_loss": 1.1262454986572266, | |
| "eval_runtime": 46.3591, | |
| "eval_samples_per_second": 139.822, | |
| "eval_steps_per_second": 8.758, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 3.9820508559082604e-05, | |
| "loss": 0.7944, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_f1": 0.43825195451845356, | |
| "eval_loss": 1.2961622476577759, | |
| "eval_runtime": 46.8176, | |
| "eval_samples_per_second": 138.452, | |
| "eval_steps_per_second": 8.672, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 3.961276383579857e-05, | |
| "loss": 0.8003, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "eval_f1": 0.4380376882658572, | |
| "eval_loss": 1.268869161605835, | |
| "eval_runtime": 52.7668, | |
| "eval_samples_per_second": 122.842, | |
| "eval_steps_per_second": 7.694, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 3.940501911251455e-05, | |
| "loss": 0.7792, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "eval_f1": 0.4432590805872725, | |
| "eval_loss": 1.2123405933380127, | |
| "eval_runtime": 46.7198, | |
| "eval_samples_per_second": 138.742, | |
| "eval_steps_per_second": 8.69, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 3.919727438923051e-05, | |
| "loss": 0.79, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "eval_f1": 0.44452383932580275, | |
| "eval_loss": 1.2517160177230835, | |
| "eval_runtime": 46.1817, | |
| "eval_samples_per_second": 140.359, | |
| "eval_steps_per_second": 8.791, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.898952966594649e-05, | |
| "loss": 0.7984, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_f1": 0.4221078291607028, | |
| "eval_loss": 1.2184810638427734, | |
| "eval_runtime": 46.3545, | |
| "eval_samples_per_second": 139.835, | |
| "eval_steps_per_second": 8.759, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.878178494266246e-05, | |
| "loss": 0.7952, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "eval_f1": 0.439579966107181, | |
| "eval_loss": 1.2801449298858643, | |
| "eval_runtime": 46.2023, | |
| "eval_samples_per_second": 140.296, | |
| "eval_steps_per_second": 8.787, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 3.857404021937843e-05, | |
| "loss": 0.8055, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "eval_f1": 0.4544774942196613, | |
| "eval_loss": 1.2639812231063843, | |
| "eval_runtime": 46.0512, | |
| "eval_samples_per_second": 140.757, | |
| "eval_steps_per_second": 8.816, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 3.83662954960944e-05, | |
| "loss": 0.8084, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_f1": 0.44333487259913823, | |
| "eval_loss": 1.228055477142334, | |
| "eval_runtime": 46.2747, | |
| "eval_samples_per_second": 140.077, | |
| "eval_steps_per_second": 8.774, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 3.815855077281037e-05, | |
| "loss": 0.7904, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "eval_f1": 0.43992159119888363, | |
| "eval_loss": 1.2493727207183838, | |
| "eval_runtime": 46.321, | |
| "eval_samples_per_second": 139.937, | |
| "eval_steps_per_second": 8.765, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 3.795080604952634e-05, | |
| "loss": 0.8057, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "eval_f1": 0.4114745835354577, | |
| "eval_loss": 1.2447556257247925, | |
| "eval_runtime": 46.3954, | |
| "eval_samples_per_second": 139.712, | |
| "eval_steps_per_second": 8.751, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 3.774306132624231e-05, | |
| "loss": 0.8001, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "eval_f1": 0.41230473935117545, | |
| "eval_loss": 1.2784521579742432, | |
| "eval_runtime": 46.222, | |
| "eval_samples_per_second": 140.236, | |
| "eval_steps_per_second": 8.784, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 3.7535316602958286e-05, | |
| "loss": 0.8293, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "eval_f1": 0.4303954902219652, | |
| "eval_loss": 1.1889426708221436, | |
| "eval_runtime": 45.554, | |
| "eval_samples_per_second": 142.293, | |
| "eval_steps_per_second": 8.913, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.7327571879674255e-05, | |
| "loss": 0.8194, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_f1": 0.4301727119748369, | |
| "eval_loss": 1.2015577554702759, | |
| "eval_runtime": 46.1992, | |
| "eval_samples_per_second": 140.305, | |
| "eval_steps_per_second": 8.788, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 3.711982715639023e-05, | |
| "loss": 0.8028, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "eval_f1": 0.44964098289977084, | |
| "eval_loss": 1.2026257514953613, | |
| "eval_runtime": 45.6586, | |
| "eval_samples_per_second": 141.967, | |
| "eval_steps_per_second": 8.892, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 3.69120824331062e-05, | |
| "loss": 0.8123, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "eval_f1": 0.4305026988222712, | |
| "eval_loss": 1.2430651187896729, | |
| "eval_runtime": 45.6789, | |
| "eval_samples_per_second": 141.904, | |
| "eval_steps_per_second": 8.888, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 3.670433770982217e-05, | |
| "loss": 0.7941, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_f1": 0.4185262770510854, | |
| "eval_loss": 1.2300126552581787, | |
| "eval_runtime": 46.2407, | |
| "eval_samples_per_second": 140.179, | |
| "eval_steps_per_second": 8.78, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 3.649659298653814e-05, | |
| "loss": 0.7815, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "eval_f1": 0.42807790167507703, | |
| "eval_loss": 1.3011759519577026, | |
| "eval_runtime": 45.2153, | |
| "eval_samples_per_second": 143.358, | |
| "eval_steps_per_second": 8.979, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 3.6288848263254115e-05, | |
| "loss": 0.8081, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "eval_f1": 0.440687373165412, | |
| "eval_loss": 1.253546953201294, | |
| "eval_runtime": 45.7395, | |
| "eval_samples_per_second": 141.715, | |
| "eval_steps_per_second": 8.876, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.6081103539970084e-05, | |
| "loss": 0.8086, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_f1": 0.44928394998593935, | |
| "eval_loss": 1.2568650245666504, | |
| "eval_runtime": 45.7963, | |
| "eval_samples_per_second": 141.54, | |
| "eval_steps_per_second": 8.865, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 3.587335881668606e-05, | |
| "loss": 0.7858, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "eval_f1": 0.43231367666538134, | |
| "eval_loss": 1.2376387119293213, | |
| "eval_runtime": 46.5395, | |
| "eval_samples_per_second": 139.279, | |
| "eval_steps_per_second": 8.724, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 3.566561409340203e-05, | |
| "loss": 0.8065, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "eval_f1": 0.42028961729158787, | |
| "eval_loss": 1.2222144603729248, | |
| "eval_runtime": 46.2907, | |
| "eval_samples_per_second": 140.028, | |
| "eval_steps_per_second": 8.771, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 3.5457869370118e-05, | |
| "loss": 0.7991, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_f1": 0.4231707776654273, | |
| "eval_loss": 1.250239372253418, | |
| "eval_runtime": 45.795, | |
| "eval_samples_per_second": 141.544, | |
| "eval_steps_per_second": 8.866, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 3.525012464683397e-05, | |
| "loss": 0.816, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "eval_f1": 0.4231928313001403, | |
| "eval_loss": 1.2436952590942383, | |
| "eval_runtime": 45.9669, | |
| "eval_samples_per_second": 141.014, | |
| "eval_steps_per_second": 8.832, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 3.5042379923549944e-05, | |
| "loss": 0.8093, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_f1": 0.40722455645687655, | |
| "eval_loss": 1.1901623010635376, | |
| "eval_runtime": 45.8966, | |
| "eval_samples_per_second": 141.231, | |
| "eval_steps_per_second": 8.846, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 3.483463520026591e-05, | |
| "loss": 0.6567, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "eval_f1": 0.4358392298680451, | |
| "eval_loss": 1.4940779209136963, | |
| "eval_runtime": 46.3646, | |
| "eval_samples_per_second": 139.805, | |
| "eval_steps_per_second": 8.757, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 3.462689047698189e-05, | |
| "loss": 0.6304, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "eval_f1": 0.4248894510967205, | |
| "eval_loss": 1.471817135810852, | |
| "eval_runtime": 46.8512, | |
| "eval_samples_per_second": 138.353, | |
| "eval_steps_per_second": 8.666, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 3.441914575369786e-05, | |
| "loss": 0.6454, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "eval_f1": 0.427949493239817, | |
| "eval_loss": 1.484312891960144, | |
| "eval_runtime": 46.3345, | |
| "eval_samples_per_second": 139.896, | |
| "eval_steps_per_second": 8.762, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 3.421140103041383e-05, | |
| "loss": 0.6654, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "eval_f1": 0.43504779862979137, | |
| "eval_loss": 1.4933797121047974, | |
| "eval_runtime": 45.9895, | |
| "eval_samples_per_second": 140.945, | |
| "eval_steps_per_second": 8.828, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 3.40036563071298e-05, | |
| "loss": 0.6478, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "eval_f1": 0.4310701120380386, | |
| "eval_loss": 1.4152840375900269, | |
| "eval_runtime": 46.2675, | |
| "eval_samples_per_second": 140.098, | |
| "eval_steps_per_second": 8.775, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 3.379591158384577e-05, | |
| "loss": 0.637, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "eval_f1": 0.44398856573642115, | |
| "eval_loss": 1.3994076251983643, | |
| "eval_runtime": 45.8114, | |
| "eval_samples_per_second": 141.493, | |
| "eval_steps_per_second": 8.862, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 3.358816686056174e-05, | |
| "loss": 0.6398, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "eval_f1": 0.42669623237681525, | |
| "eval_loss": 1.5294607877731323, | |
| "eval_runtime": 46.4175, | |
| "eval_samples_per_second": 139.646, | |
| "eval_steps_per_second": 8.747, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 3.338042213727772e-05, | |
| "loss": 0.6703, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "eval_f1": 0.4287387139396017, | |
| "eval_loss": 1.3941184282302856, | |
| "eval_runtime": 46.3839, | |
| "eval_samples_per_second": 139.747, | |
| "eval_steps_per_second": 8.753, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 3.317267741399369e-05, | |
| "loss": 0.6442, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "eval_f1": 0.4371709367951578, | |
| "eval_loss": 1.388688564300537, | |
| "eval_runtime": 46.5417, | |
| "eval_samples_per_second": 139.273, | |
| "eval_steps_per_second": 8.723, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 3.296493269070966e-05, | |
| "loss": 0.6784, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "eval_f1": 0.43751727834027077, | |
| "eval_loss": 1.3877114057540894, | |
| "eval_runtime": 45.9117, | |
| "eval_samples_per_second": 141.184, | |
| "eval_steps_per_second": 8.843, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 3.2757187967425626e-05, | |
| "loss": 0.6614, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "eval_f1": 0.4372541317801921, | |
| "eval_loss": 1.4126884937286377, | |
| "eval_runtime": 46.4825, | |
| "eval_samples_per_second": 139.45, | |
| "eval_steps_per_second": 8.734, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 3.25494432441416e-05, | |
| "loss": 0.6864, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "eval_f1": 0.42774626956090644, | |
| "eval_loss": 1.4882549047470093, | |
| "eval_runtime": 46.0334, | |
| "eval_samples_per_second": 140.811, | |
| "eval_steps_per_second": 8.82, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 3.234169852085757e-05, | |
| "loss": 0.6636, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "eval_f1": 0.4248446501416097, | |
| "eval_loss": 1.3951656818389893, | |
| "eval_runtime": 45.7857, | |
| "eval_samples_per_second": 141.573, | |
| "eval_steps_per_second": 8.867, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 3.213395379757354e-05, | |
| "loss": 0.6801, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "eval_f1": 0.41563433666965854, | |
| "eval_loss": 1.4469736814498901, | |
| "eval_runtime": 46.1733, | |
| "eval_samples_per_second": 140.384, | |
| "eval_steps_per_second": 8.793, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 3.1926209074289517e-05, | |
| "loss": 0.6509, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "eval_f1": 0.42981854634578975, | |
| "eval_loss": 1.3635119199752808, | |
| "eval_runtime": 46.4023, | |
| "eval_samples_per_second": 139.691, | |
| "eval_steps_per_second": 8.75, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 3.1718464351005486e-05, | |
| "loss": 0.6776, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "eval_f1": 0.4345867863354599, | |
| "eval_loss": 1.3212920427322388, | |
| "eval_runtime": 46.0513, | |
| "eval_samples_per_second": 140.756, | |
| "eval_steps_per_second": 8.816, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 3.1510719627721455e-05, | |
| "loss": 0.6686, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "eval_f1": 0.4283615179772263, | |
| "eval_loss": 1.3529335260391235, | |
| "eval_runtime": 45.8914, | |
| "eval_samples_per_second": 141.247, | |
| "eval_steps_per_second": 8.847, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 3.1302974904437424e-05, | |
| "loss": 0.6696, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "eval_f1": 0.42784477364430307, | |
| "eval_loss": 1.3639956712722778, | |
| "eval_runtime": 45.6293, | |
| "eval_samples_per_second": 142.058, | |
| "eval_steps_per_second": 8.898, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 3.10952301811534e-05, | |
| "loss": 0.6624, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "eval_f1": 0.4325377081208613, | |
| "eval_loss": 1.409765601158142, | |
| "eval_runtime": 47.4821, | |
| "eval_samples_per_second": 136.515, | |
| "eval_steps_per_second": 8.551, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 3.088748545786937e-05, | |
| "loss": 0.6876, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "eval_f1": 0.4282077409143604, | |
| "eval_loss": 1.4017492532730103, | |
| "eval_runtime": 46.1534, | |
| "eval_samples_per_second": 140.445, | |
| "eval_steps_per_second": 8.797, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 3.0679740734585346e-05, | |
| "loss": 0.671, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "eval_f1": 0.4172019162341494, | |
| "eval_loss": 1.3028308153152466, | |
| "eval_runtime": 46.2659, | |
| "eval_samples_per_second": 140.103, | |
| "eval_steps_per_second": 8.775, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 3.047199601130131e-05, | |
| "loss": 0.68, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "eval_f1": 0.43831774420843844, | |
| "eval_loss": 1.3964955806732178, | |
| "eval_runtime": 45.9895, | |
| "eval_samples_per_second": 140.945, | |
| "eval_steps_per_second": 8.828, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 3.0264251288017287e-05, | |
| "loss": 0.6715, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "eval_f1": 0.4301624961844753, | |
| "eval_loss": 1.3572640419006348, | |
| "eval_runtime": 45.9503, | |
| "eval_samples_per_second": 141.065, | |
| "eval_steps_per_second": 8.836, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 3.0056506564733257e-05, | |
| "loss": 0.697, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "eval_f1": 0.4253265083713992, | |
| "eval_loss": 1.3642019033432007, | |
| "eval_runtime": 46.1149, | |
| "eval_samples_per_second": 140.562, | |
| "eval_steps_per_second": 8.804, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 2.984876184144923e-05, | |
| "loss": 0.5631, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "eval_f1": 0.433265159904986, | |
| "eval_loss": 1.5990760326385498, | |
| "eval_runtime": 45.3111, | |
| "eval_samples_per_second": 143.055, | |
| "eval_steps_per_second": 8.96, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 2.96410171181652e-05, | |
| "loss": 0.5151, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "eval_f1": 0.4295485993747164, | |
| "eval_loss": 1.6384857892990112, | |
| "eval_runtime": 46.4055, | |
| "eval_samples_per_second": 139.682, | |
| "eval_steps_per_second": 8.749, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 2.943327239488117e-05, | |
| "loss": 0.5348, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "eval_f1": 0.4240371984001696, | |
| "eval_loss": 1.5903598070144653, | |
| "eval_runtime": 45.9488, | |
| "eval_samples_per_second": 141.07, | |
| "eval_steps_per_second": 8.836, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 2.922552767159714e-05, | |
| "loss": 0.5288, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "eval_f1": 0.42810904253980414, | |
| "eval_loss": 1.6144169569015503, | |
| "eval_runtime": 46.451, | |
| "eval_samples_per_second": 139.545, | |
| "eval_steps_per_second": 8.74, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 2.9017782948313117e-05, | |
| "loss": 0.5422, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "eval_f1": 0.4302607705379434, | |
| "eval_loss": 1.7097866535186768, | |
| "eval_runtime": 46.1436, | |
| "eval_samples_per_second": 140.475, | |
| "eval_steps_per_second": 8.799, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 2.8810038225029086e-05, | |
| "loss": 0.548, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "eval_f1": 0.4281896068096123, | |
| "eval_loss": 1.573617935180664, | |
| "eval_runtime": 46.7725, | |
| "eval_samples_per_second": 138.586, | |
| "eval_steps_per_second": 8.68, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 2.860229350174506e-05, | |
| "loss": 0.5169, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "eval_f1": 0.4336268684648746, | |
| "eval_loss": 1.5596050024032593, | |
| "eval_runtime": 45.9526, | |
| "eval_samples_per_second": 141.058, | |
| "eval_steps_per_second": 8.835, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 2.8394548778461028e-05, | |
| "loss": 0.5327, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "eval_f1": 0.4223419321700975, | |
| "eval_loss": 1.6166974306106567, | |
| "eval_runtime": 46.03, | |
| "eval_samples_per_second": 140.821, | |
| "eval_steps_per_second": 8.82, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 2.8186804055177e-05, | |
| "loss": 0.5343, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "eval_f1": 0.4428612616903695, | |
| "eval_loss": 1.7605165243148804, | |
| "eval_runtime": 45.6649, | |
| "eval_samples_per_second": 141.947, | |
| "eval_steps_per_second": 8.891, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 2.797905933189297e-05, | |
| "loss": 0.5478, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "eval_f1": 0.44071411652178355, | |
| "eval_loss": 1.6004695892333984, | |
| "eval_runtime": 45.993, | |
| "eval_samples_per_second": 140.935, | |
| "eval_steps_per_second": 8.827, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 2.7771314608608946e-05, | |
| "loss": 0.5489, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "eval_f1": 0.44084931155643786, | |
| "eval_loss": 1.642219066619873, | |
| "eval_runtime": 45.869, | |
| "eval_samples_per_second": 141.316, | |
| "eval_steps_per_second": 8.851, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 2.7563569885324915e-05, | |
| "loss": 0.5388, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "eval_f1": 0.4372812211041268, | |
| "eval_loss": 1.7352898120880127, | |
| "eval_runtime": 46.4558, | |
| "eval_samples_per_second": 139.53, | |
| "eval_steps_per_second": 8.739, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 2.7355825162040887e-05, | |
| "loss": 0.5312, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "eval_f1": 0.42873515159285114, | |
| "eval_loss": 1.6332671642303467, | |
| "eval_runtime": 46.1955, | |
| "eval_samples_per_second": 140.317, | |
| "eval_steps_per_second": 8.789, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 2.7148080438756857e-05, | |
| "loss": 0.5369, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "eval_f1": 0.4392093355525315, | |
| "eval_loss": 1.5618759393692017, | |
| "eval_runtime": 47.5099, | |
| "eval_samples_per_second": 136.435, | |
| "eval_steps_per_second": 8.546, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 2.694033571547283e-05, | |
| "loss": 0.5475, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "eval_f1": 0.42820616668057093, | |
| "eval_loss": 1.583003282546997, | |
| "eval_runtime": 46.2808, | |
| "eval_samples_per_second": 140.058, | |
| "eval_steps_per_second": 8.773, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 2.67325909921888e-05, | |
| "loss": 0.5622, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "eval_f1": 0.4405394714296675, | |
| "eval_loss": 1.5289151668548584, | |
| "eval_runtime": 46.2454, | |
| "eval_samples_per_second": 140.165, | |
| "eval_steps_per_second": 8.779, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 2.6524846268904768e-05, | |
| "loss": 0.5662, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "eval_f1": 0.43493209599396276, | |
| "eval_loss": 1.5689671039581299, | |
| "eval_runtime": 46.2429, | |
| "eval_samples_per_second": 140.173, | |
| "eval_steps_per_second": 8.78, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 2.6317101545620744e-05, | |
| "loss": 0.5373, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "eval_f1": 0.4302946240864107, | |
| "eval_loss": 1.6275018453598022, | |
| "eval_runtime": 46.0239, | |
| "eval_samples_per_second": 140.84, | |
| "eval_steps_per_second": 8.821, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 2.610935682233671e-05, | |
| "loss": 0.5584, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "eval_f1": 0.43485188110217615, | |
| "eval_loss": 1.7044297456741333, | |
| "eval_runtime": 46.2954, | |
| "eval_samples_per_second": 140.014, | |
| "eval_steps_per_second": 8.77, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 2.5901612099052686e-05, | |
| "loss": 0.5484, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "eval_f1": 0.43712471779629325, | |
| "eval_loss": 1.6315213441848755, | |
| "eval_runtime": 46.0252, | |
| "eval_samples_per_second": 140.836, | |
| "eval_steps_per_second": 8.821, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 2.5693867375768655e-05, | |
| "loss": 0.5475, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "eval_f1": 0.446221258616921, | |
| "eval_loss": 1.5129351615905762, | |
| "eval_runtime": 46.4009, | |
| "eval_samples_per_second": 139.696, | |
| "eval_steps_per_second": 8.75, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 2.5486122652484628e-05, | |
| "loss": 0.5551, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "eval_f1": 0.4409313233994151, | |
| "eval_loss": 1.637054204940796, | |
| "eval_runtime": 46.0061, | |
| "eval_samples_per_second": 140.894, | |
| "eval_steps_per_second": 8.825, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 2.5278377929200597e-05, | |
| "loss": 0.558, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "eval_f1": 0.4335536795951597, | |
| "eval_loss": 1.5173062086105347, | |
| "eval_runtime": 46.0786, | |
| "eval_samples_per_second": 140.673, | |
| "eval_steps_per_second": 8.811, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 2.5070633205916573e-05, | |
| "loss": 0.5553, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_f1": 0.4349357425201839, | |
| "eval_loss": 1.564207911491394, | |
| "eval_runtime": 45.533, | |
| "eval_samples_per_second": 142.358, | |
| "eval_steps_per_second": 8.917, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 2.4862888482632542e-05, | |
| "loss": 0.4491, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "eval_f1": 0.43614659724704335, | |
| "eval_loss": 1.9311244487762451, | |
| "eval_runtime": 46.1968, | |
| "eval_samples_per_second": 140.313, | |
| "eval_steps_per_second": 8.788, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 2.4655143759348515e-05, | |
| "loss": 0.403, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "eval_f1": 0.4393254273429778, | |
| "eval_loss": 2.0766100883483887, | |
| "eval_runtime": 45.9492, | |
| "eval_samples_per_second": 141.069, | |
| "eval_steps_per_second": 8.836, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 2.4447399036064487e-05, | |
| "loss": 0.4233, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "eval_f1": 0.43421874747799094, | |
| "eval_loss": 2.0252885818481445, | |
| "eval_runtime": 46.6362, | |
| "eval_samples_per_second": 138.991, | |
| "eval_steps_per_second": 8.706, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 2.4239654312780457e-05, | |
| "loss": 0.4412, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "eval_f1": 0.43655258613920295, | |
| "eval_loss": 2.0584676265716553, | |
| "eval_runtime": 46.1023, | |
| "eval_samples_per_second": 140.6, | |
| "eval_steps_per_second": 8.807, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 2.403190958949643e-05, | |
| "loss": 0.4477, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "eval_f1": 0.445008636219538, | |
| "eval_loss": 1.9808226823806763, | |
| "eval_runtime": 46.3962, | |
| "eval_samples_per_second": 139.71, | |
| "eval_steps_per_second": 8.751, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 2.3824164866212402e-05, | |
| "loss": 0.4497, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "eval_f1": 0.44325755380966075, | |
| "eval_loss": 1.8606414794921875, | |
| "eval_runtime": 46.1573, | |
| "eval_samples_per_second": 140.433, | |
| "eval_steps_per_second": 8.796, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 2.361642014292837e-05, | |
| "loss": 0.4415, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "eval_f1": 0.44300106609021345, | |
| "eval_loss": 2.060542106628418, | |
| "eval_runtime": 45.615, | |
| "eval_samples_per_second": 142.102, | |
| "eval_steps_per_second": 8.901, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 2.340867541964434e-05, | |
| "loss": 0.4655, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "eval_f1": 0.4281324149819077, | |
| "eval_loss": 1.7479959726333618, | |
| "eval_runtime": 45.9941, | |
| "eval_samples_per_second": 140.931, | |
| "eval_steps_per_second": 8.827, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 2.3200930696360313e-05, | |
| "loss": 0.4395, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "eval_f1": 0.4315119731593183, | |
| "eval_loss": 1.851706862449646, | |
| "eval_runtime": 45.6959, | |
| "eval_samples_per_second": 141.851, | |
| "eval_steps_per_second": 8.885, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 2.2993185973076282e-05, | |
| "loss": 0.4738, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "eval_f1": 0.4246487511252701, | |
| "eval_loss": 1.7510011196136475, | |
| "eval_runtime": 46.0796, | |
| "eval_samples_per_second": 140.67, | |
| "eval_steps_per_second": 8.811, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 2.2785441249792255e-05, | |
| "loss": 0.455, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "eval_f1": 0.42367965460597234, | |
| "eval_loss": 1.7951207160949707, | |
| "eval_runtime": 46.1571, | |
| "eval_samples_per_second": 140.433, | |
| "eval_steps_per_second": 8.796, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 2.2577696526508228e-05, | |
| "loss": 0.4494, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "eval_f1": 0.4400121441304865, | |
| "eval_loss": 1.8601397275924683, | |
| "eval_runtime": 46.6249, | |
| "eval_samples_per_second": 139.024, | |
| "eval_steps_per_second": 8.708, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 2.2369951803224197e-05, | |
| "loss": 0.4364, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "eval_f1": 0.4454509847031083, | |
| "eval_loss": 1.9597169160842896, | |
| "eval_runtime": 45.9584, | |
| "eval_samples_per_second": 141.041, | |
| "eval_steps_per_second": 8.834, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 2.216220707994017e-05, | |
| "loss": 0.4611, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "eval_f1": 0.44059704673003397, | |
| "eval_loss": 1.899012804031372, | |
| "eval_runtime": 47.0101, | |
| "eval_samples_per_second": 137.885, | |
| "eval_steps_per_second": 8.636, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 2.1954462356656142e-05, | |
| "loss": 0.4434, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "eval_f1": 0.44119414948642377, | |
| "eval_loss": 1.983310341835022, | |
| "eval_runtime": 45.9947, | |
| "eval_samples_per_second": 140.929, | |
| "eval_steps_per_second": 8.827, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 2.174671763337211e-05, | |
| "loss": 0.45, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "eval_f1": 0.4426699728777116, | |
| "eval_loss": 1.8643006086349487, | |
| "eval_runtime": 46.7241, | |
| "eval_samples_per_second": 138.729, | |
| "eval_steps_per_second": 8.689, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 2.1538972910088084e-05, | |
| "loss": 0.4657, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "eval_f1": 0.4459768786824306, | |
| "eval_loss": 1.9347878694534302, | |
| "eval_runtime": 46.4171, | |
| "eval_samples_per_second": 139.647, | |
| "eval_steps_per_second": 8.747, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 2.1331228186804057e-05, | |
| "loss": 0.4536, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "eval_f1": 0.43967737232771253, | |
| "eval_loss": 1.9800372123718262, | |
| "eval_runtime": 46.5899, | |
| "eval_samples_per_second": 139.129, | |
| "eval_steps_per_second": 8.714, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 2.1123483463520026e-05, | |
| "loss": 0.4665, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "eval_f1": 0.43522687025203594, | |
| "eval_loss": 1.7668453454971313, | |
| "eval_runtime": 45.8529, | |
| "eval_samples_per_second": 141.365, | |
| "eval_steps_per_second": 8.854, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 2.0915738740236e-05, | |
| "loss": 0.4668, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "eval_f1": 0.43357794081082646, | |
| "eval_loss": 1.8984841108322144, | |
| "eval_runtime": 46.4335, | |
| "eval_samples_per_second": 139.597, | |
| "eval_steps_per_second": 8.744, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 2.070799401695197e-05, | |
| "loss": 0.4622, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "eval_f1": 0.4323966893074926, | |
| "eval_loss": 1.8252310752868652, | |
| "eval_runtime": 45.8779, | |
| "eval_samples_per_second": 141.288, | |
| "eval_steps_per_second": 8.85, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 2.050024929366794e-05, | |
| "loss": 0.4766, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "eval_f1": 0.44080051637694073, | |
| "eval_loss": 1.7060314416885376, | |
| "eval_runtime": 46.3123, | |
| "eval_samples_per_second": 139.963, | |
| "eval_steps_per_second": 8.767, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 2.0292504570383913e-05, | |
| "loss": 0.4476, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "eval_f1": 0.4385964321265162, | |
| "eval_loss": 1.8184629678726196, | |
| "eval_runtime": 45.9872, | |
| "eval_samples_per_second": 140.952, | |
| "eval_steps_per_second": 8.829, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 2.0084759847099886e-05, | |
| "loss": 0.4602, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "eval_f1": 0.44088418061549356, | |
| "eval_loss": 1.7630596160888672, | |
| "eval_runtime": 46.6035, | |
| "eval_samples_per_second": 139.088, | |
| "eval_steps_per_second": 8.712, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 1.9877015123815855e-05, | |
| "loss": 0.3679, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "eval_f1": 0.4346630256587338, | |
| "eval_loss": 2.1265206336975098, | |
| "eval_runtime": 46.1031, | |
| "eval_samples_per_second": 140.598, | |
| "eval_steps_per_second": 8.806, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 1.9669270400531827e-05, | |
| "loss": 0.3708, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "eval_f1": 0.4350030580705022, | |
| "eval_loss": 2.233613967895508, | |
| "eval_runtime": 46.3001, | |
| "eval_samples_per_second": 140.0, | |
| "eval_steps_per_second": 8.769, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "learning_rate": 1.94615256772478e-05, | |
| "loss": 0.3761, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "eval_f1": 0.4315408453045143, | |
| "eval_loss": 2.161686420440674, | |
| "eval_runtime": 46.0645, | |
| "eval_samples_per_second": 140.716, | |
| "eval_steps_per_second": 8.814, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 1.925378095396377e-05, | |
| "loss": 0.382, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "eval_f1": 0.4300930630733001, | |
| "eval_loss": 2.2093658447265625, | |
| "eval_runtime": 46.5249, | |
| "eval_samples_per_second": 139.323, | |
| "eval_steps_per_second": 8.727, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 1.9046036230679742e-05, | |
| "loss": 0.3606, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "eval_f1": 0.4263466007824531, | |
| "eval_loss": 2.422569751739502, | |
| "eval_runtime": 46.1496, | |
| "eval_samples_per_second": 140.456, | |
| "eval_steps_per_second": 8.797, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 1.8838291507395715e-05, | |
| "loss": 0.3929, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "eval_f1": 0.42870899531401374, | |
| "eval_loss": 2.2339413166046143, | |
| "eval_runtime": 45.9626, | |
| "eval_samples_per_second": 141.028, | |
| "eval_steps_per_second": 8.833, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 1.8630546784111684e-05, | |
| "loss": 0.3761, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "eval_f1": 0.43084241603610085, | |
| "eval_loss": 2.245156764984131, | |
| "eval_runtime": 45.9689, | |
| "eval_samples_per_second": 141.008, | |
| "eval_steps_per_second": 8.832, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 1.8422802060827657e-05, | |
| "loss": 0.3735, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "eval_f1": 0.42730161492497354, | |
| "eval_loss": 2.375741720199585, | |
| "eval_runtime": 45.5018, | |
| "eval_samples_per_second": 142.456, | |
| "eval_steps_per_second": 8.923, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 1.821505733754363e-05, | |
| "loss": 0.3824, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "eval_f1": 0.42850714365323117, | |
| "eval_loss": 2.4190187454223633, | |
| "eval_runtime": 46.0335, | |
| "eval_samples_per_second": 140.81, | |
| "eval_steps_per_second": 8.82, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 1.80073126142596e-05, | |
| "loss": 0.4198, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "eval_f1": 0.41957506619020246, | |
| "eval_loss": 2.291776657104492, | |
| "eval_runtime": 45.9135, | |
| "eval_samples_per_second": 141.179, | |
| "eval_steps_per_second": 8.843, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 1.779956789097557e-05, | |
| "loss": 0.3759, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "eval_f1": 0.4248491736870143, | |
| "eval_loss": 2.2740871906280518, | |
| "eval_runtime": 46.8387, | |
| "eval_samples_per_second": 138.39, | |
| "eval_steps_per_second": 8.668, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 1.759182316769154e-05, | |
| "loss": 0.4006, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "eval_f1": 0.43213490469184823, | |
| "eval_loss": 2.3717033863067627, | |
| "eval_runtime": 46.2157, | |
| "eval_samples_per_second": 140.255, | |
| "eval_steps_per_second": 8.785, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 1.7384078444407513e-05, | |
| "loss": 0.374, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "eval_f1": 0.42844878973090866, | |
| "eval_loss": 2.2131588459014893, | |
| "eval_runtime": 46.4565, | |
| "eval_samples_per_second": 139.528, | |
| "eval_steps_per_second": 8.739, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 1.7176333721123482e-05, | |
| "loss": 0.3985, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "eval_f1": 0.430486194952956, | |
| "eval_loss": 2.2677221298217773, | |
| "eval_runtime": 45.8499, | |
| "eval_samples_per_second": 141.374, | |
| "eval_steps_per_second": 8.855, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 1.6968588997839455e-05, | |
| "loss": 0.3892, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "eval_f1": 0.4339078659073283, | |
| "eval_loss": 2.2518913745880127, | |
| "eval_runtime": 46.9488, | |
| "eval_samples_per_second": 138.065, | |
| "eval_steps_per_second": 8.648, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 1.6760844274555427e-05, | |
| "loss": 0.4071, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "eval_f1": 0.4281750460163451, | |
| "eval_loss": 2.1826319694519043, | |
| "eval_runtime": 46.8418, | |
| "eval_samples_per_second": 138.381, | |
| "eval_steps_per_second": 8.667, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 1.6553099551271397e-05, | |
| "loss": 0.4054, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "eval_f1": 0.425255645027199, | |
| "eval_loss": 2.1723647117614746, | |
| "eval_runtime": 46.2813, | |
| "eval_samples_per_second": 140.057, | |
| "eval_steps_per_second": 8.772, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 1.634535482798737e-05, | |
| "loss": 0.374, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "eval_f1": 0.4243673281546822, | |
| "eval_loss": 2.208036184310913, | |
| "eval_runtime": 45.8911, | |
| "eval_samples_per_second": 141.248, | |
| "eval_steps_per_second": 8.847, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 1.6137610104703342e-05, | |
| "loss": 0.4086, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "eval_f1": 0.4345723086799732, | |
| "eval_loss": 2.182802438735962, | |
| "eval_runtime": 45.7981, | |
| "eval_samples_per_second": 141.534, | |
| "eval_steps_per_second": 8.865, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 1.592986538141931e-05, | |
| "loss": 0.3828, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "eval_f1": 0.43379920447502057, | |
| "eval_loss": 2.186859369277954, | |
| "eval_runtime": 45.8903, | |
| "eval_samples_per_second": 141.25, | |
| "eval_steps_per_second": 8.847, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 1.5722120658135284e-05, | |
| "loss": 0.4065, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "eval_f1": 0.43151860579932383, | |
| "eval_loss": 2.138597249984741, | |
| "eval_runtime": 46.0037, | |
| "eval_samples_per_second": 140.902, | |
| "eval_steps_per_second": 8.825, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 1.5514375934851256e-05, | |
| "loss": 0.4058, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "eval_f1": 0.43162515735240276, | |
| "eval_loss": 2.0976452827453613, | |
| "eval_runtime": 46.3993, | |
| "eval_samples_per_second": 139.7, | |
| "eval_steps_per_second": 8.75, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 1.5306631211567226e-05, | |
| "loss": 0.3873, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "eval_f1": 0.4212864273818023, | |
| "eval_loss": 2.4226503372192383, | |
| "eval_runtime": 46.1616, | |
| "eval_samples_per_second": 140.42, | |
| "eval_steps_per_second": 8.795, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 1.5098886488283198e-05, | |
| "loss": 0.389, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "eval_f1": 0.431436402933277, | |
| "eval_loss": 2.1598784923553467, | |
| "eval_runtime": 46.8062, | |
| "eval_samples_per_second": 138.486, | |
| "eval_steps_per_second": 8.674, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 1.489114176499917e-05, | |
| "loss": 0.3538, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "eval_f1": 0.4350556588831767, | |
| "eval_loss": 2.304293155670166, | |
| "eval_runtime": 46.574, | |
| "eval_samples_per_second": 139.176, | |
| "eval_steps_per_second": 8.717, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 1.4683397041715142e-05, | |
| "loss": 0.3132, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "eval_f1": 0.4266591713234993, | |
| "eval_loss": 2.650728702545166, | |
| "eval_runtime": 46.4231, | |
| "eval_samples_per_second": 139.629, | |
| "eval_steps_per_second": 8.746, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 1.4475652318431113e-05, | |
| "loss": 0.3166, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "eval_f1": 0.43607320504273067, | |
| "eval_loss": 2.6059470176696777, | |
| "eval_runtime": 45.9726, | |
| "eval_samples_per_second": 140.997, | |
| "eval_steps_per_second": 8.831, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 1.4267907595147084e-05, | |
| "loss": 0.3303, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "eval_f1": 0.43326161921977124, | |
| "eval_loss": 2.56927227973938, | |
| "eval_runtime": 46.099, | |
| "eval_samples_per_second": 140.611, | |
| "eval_steps_per_second": 8.807, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 1.4060162871863056e-05, | |
| "loss": 0.3031, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "eval_f1": 0.43263480546826966, | |
| "eval_loss": 2.6123645305633545, | |
| "eval_runtime": 46.1092, | |
| "eval_samples_per_second": 140.579, | |
| "eval_steps_per_second": 8.805, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 1.3852418148579027e-05, | |
| "loss": 0.3495, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "eval_f1": 0.42935020523616974, | |
| "eval_loss": 2.5714728832244873, | |
| "eval_runtime": 46.4434, | |
| "eval_samples_per_second": 139.568, | |
| "eval_steps_per_second": 8.742, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 1.3644673425294998e-05, | |
| "loss": 0.3364, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "eval_f1": 0.4343767918185199, | |
| "eval_loss": 2.5482189655303955, | |
| "eval_runtime": 45.6061, | |
| "eval_samples_per_second": 142.13, | |
| "eval_steps_per_second": 8.902, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 1.3436928702010971e-05, | |
| "loss": 0.3169, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "eval_f1": 0.4326820837375438, | |
| "eval_loss": 2.533505916595459, | |
| "eval_runtime": 45.7021, | |
| "eval_samples_per_second": 141.832, | |
| "eval_steps_per_second": 8.884, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 1.3229183978726942e-05, | |
| "loss": 0.3306, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "eval_f1": 0.4336259208362092, | |
| "eval_loss": 2.505479097366333, | |
| "eval_runtime": 45.9282, | |
| "eval_samples_per_second": 141.133, | |
| "eval_steps_per_second": 8.84, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 1.3021439255442913e-05, | |
| "loss": 0.3238, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "eval_f1": 0.4290241175053658, | |
| "eval_loss": 2.64029598236084, | |
| "eval_runtime": 45.7506, | |
| "eval_samples_per_second": 141.681, | |
| "eval_steps_per_second": 8.874, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 1.2813694532158885e-05, | |
| "loss": 0.3488, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "eval_f1": 0.4318857789484726, | |
| "eval_loss": 2.4507477283477783, | |
| "eval_runtime": 46.4161, | |
| "eval_samples_per_second": 139.65, | |
| "eval_steps_per_second": 8.747, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 1.2605949808874856e-05, | |
| "loss": 0.3423, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "eval_f1": 0.42784593140742677, | |
| "eval_loss": 2.4914281368255615, | |
| "eval_runtime": 46.3911, | |
| "eval_samples_per_second": 139.725, | |
| "eval_steps_per_second": 8.752, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 1.2398205085590826e-05, | |
| "loss": 0.3356, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "eval_f1": 0.44002802787763307, | |
| "eval_loss": 2.723588466644287, | |
| "eval_runtime": 46.564, | |
| "eval_samples_per_second": 139.206, | |
| "eval_steps_per_second": 8.719, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 1.2190460362306798e-05, | |
| "loss": 0.3655, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "eval_f1": 0.4342107835672762, | |
| "eval_loss": 2.516355276107788, | |
| "eval_runtime": 46.0601, | |
| "eval_samples_per_second": 140.729, | |
| "eval_steps_per_second": 8.815, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 1.198271563902277e-05, | |
| "loss": 0.3445, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "eval_f1": 0.4392682142110146, | |
| "eval_loss": 2.4509174823760986, | |
| "eval_runtime": 46.5389, | |
| "eval_samples_per_second": 139.281, | |
| "eval_steps_per_second": 8.724, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "learning_rate": 1.177497091573874e-05, | |
| "loss": 0.3123, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "eval_f1": 0.4346012419342983, | |
| "eval_loss": 2.605870008468628, | |
| "eval_runtime": 46.0454, | |
| "eval_samples_per_second": 140.774, | |
| "eval_steps_per_second": 8.817, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 1.1567226192454713e-05, | |
| "loss": 0.3286, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "eval_f1": 0.42900413120162884, | |
| "eval_loss": 2.5746617317199707, | |
| "eval_runtime": 46.6316, | |
| "eval_samples_per_second": 139.005, | |
| "eval_steps_per_second": 8.707, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 1.1359481469170684e-05, | |
| "loss": 0.3308, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "eval_f1": 0.4348325522174462, | |
| "eval_loss": 2.6604156494140625, | |
| "eval_runtime": 46.0552, | |
| "eval_samples_per_second": 140.744, | |
| "eval_steps_per_second": 8.816, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 1.1151736745886655e-05, | |
| "loss": 0.3507, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "eval_f1": 0.4362724679273792, | |
| "eval_loss": 2.6467623710632324, | |
| "eval_runtime": 46.2441, | |
| "eval_samples_per_second": 140.169, | |
| "eval_steps_per_second": 8.78, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 1.0943992022602627e-05, | |
| "loss": 0.3392, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "eval_f1": 0.43161760121865855, | |
| "eval_loss": 2.6293702125549316, | |
| "eval_runtime": 45.6229, | |
| "eval_samples_per_second": 142.078, | |
| "eval_steps_per_second": 8.899, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 1.0736247299318598e-05, | |
| "loss": 0.3885, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "eval_f1": 0.42849041218645784, | |
| "eval_loss": 2.4070699214935303, | |
| "eval_runtime": 45.9402, | |
| "eval_samples_per_second": 141.096, | |
| "eval_steps_per_second": 8.838, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 7.89, | |
| "learning_rate": 1.052850257603457e-05, | |
| "loss": 0.3458, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 7.89, | |
| "eval_f1": 0.42803465998306145, | |
| "eval_loss": 2.5539064407348633, | |
| "eval_runtime": 46.1313, | |
| "eval_samples_per_second": 140.512, | |
| "eval_steps_per_second": 8.801, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 1.032075785275054e-05, | |
| "loss": 0.3451, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "eval_f1": 0.4282701100981614, | |
| "eval_loss": 2.4033892154693604, | |
| "eval_runtime": 46.4312, | |
| "eval_samples_per_second": 139.604, | |
| "eval_steps_per_second": 8.744, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 1.0113013129466511e-05, | |
| "loss": 0.3394, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "eval_f1": 0.43486076258951, | |
| "eval_loss": 2.6116716861724854, | |
| "eval_runtime": 45.9663, | |
| "eval_samples_per_second": 141.016, | |
| "eval_steps_per_second": 8.833, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "learning_rate": 9.905268406182484e-06, | |
| "loss": 0.3275, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "eval_f1": 0.4322422606334531, | |
| "eval_loss": 2.5991387367248535, | |
| "eval_runtime": 46.5777, | |
| "eval_samples_per_second": 139.165, | |
| "eval_steps_per_second": 8.717, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 9.697523682898455e-06, | |
| "loss": 0.2676, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "eval_f1": 0.42999721877624847, | |
| "eval_loss": 2.772134304046631, | |
| "eval_runtime": 46.0246, | |
| "eval_samples_per_second": 140.838, | |
| "eval_steps_per_second": 8.821, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "learning_rate": 9.489778959614426e-06, | |
| "loss": 0.3013, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "eval_f1": 0.43159307099859695, | |
| "eval_loss": 2.7766764163970947, | |
| "eval_runtime": 46.3012, | |
| "eval_samples_per_second": 139.996, | |
| "eval_steps_per_second": 8.769, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 9.282034236330398e-06, | |
| "loss": 0.283, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "eval_f1": 0.43429947059083457, | |
| "eval_loss": 2.723806142807007, | |
| "eval_runtime": 46.2805, | |
| "eval_samples_per_second": 140.059, | |
| "eval_steps_per_second": 8.773, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 8.19, | |
| "learning_rate": 9.07428951304637e-06, | |
| "loss": 0.2924, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 8.19, | |
| "eval_f1": 0.43274102934044933, | |
| "eval_loss": 2.7132375240325928, | |
| "eval_runtime": 46.1859, | |
| "eval_samples_per_second": 140.346, | |
| "eval_steps_per_second": 8.791, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 8.86654478976234e-06, | |
| "loss": 0.2817, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "eval_f1": 0.427616805108728, | |
| "eval_loss": 2.7119312286376953, | |
| "eval_runtime": 56.0523, | |
| "eval_samples_per_second": 115.642, | |
| "eval_steps_per_second": 7.243, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 8.658800066478313e-06, | |
| "loss": 0.2586, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "eval_f1": 0.4289585910640077, | |
| "eval_loss": 2.887305736541748, | |
| "eval_runtime": 46.8426, | |
| "eval_samples_per_second": 138.378, | |
| "eval_steps_per_second": 8.667, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "learning_rate": 8.451055343194284e-06, | |
| "loss": 0.3051, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "eval_f1": 0.42582497432500993, | |
| "eval_loss": 2.7572007179260254, | |
| "eval_runtime": 45.6991, | |
| "eval_samples_per_second": 141.841, | |
| "eval_steps_per_second": 8.884, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "learning_rate": 8.243310619910255e-06, | |
| "loss": 0.2916, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "eval_f1": 0.43075118593793377, | |
| "eval_loss": 2.814906358718872, | |
| "eval_runtime": 46.2133, | |
| "eval_samples_per_second": 140.263, | |
| "eval_steps_per_second": 8.785, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 8.035565896626227e-06, | |
| "loss": 0.2948, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "eval_f1": 0.4244624620028211, | |
| "eval_loss": 2.6767539978027344, | |
| "eval_runtime": 46.5451, | |
| "eval_samples_per_second": 139.263, | |
| "eval_steps_per_second": 8.723, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 8.43, | |
| "learning_rate": 7.827821173342198e-06, | |
| "loss": 0.277, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 8.43, | |
| "eval_f1": 0.4273818288568198, | |
| "eval_loss": 2.727567672729492, | |
| "eval_runtime": 46.8521, | |
| "eval_samples_per_second": 138.35, | |
| "eval_steps_per_second": 8.666, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 7.620076450058169e-06, | |
| "loss": 0.2929, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "eval_f1": 0.4320719535965778, | |
| "eval_loss": 2.926513433456421, | |
| "eval_runtime": 47.1016, | |
| "eval_samples_per_second": 137.617, | |
| "eval_steps_per_second": 8.62, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "learning_rate": 7.412331726774139e-06, | |
| "loss": 0.2893, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "eval_f1": 0.4260519729309167, | |
| "eval_loss": 2.877741575241089, | |
| "eval_runtime": 45.6581, | |
| "eval_samples_per_second": 141.968, | |
| "eval_steps_per_second": 8.892, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "learning_rate": 7.204587003490111e-06, | |
| "loss": 0.2808, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "eval_f1": 0.4282218551070931, | |
| "eval_loss": 2.7379603385925293, | |
| "eval_runtime": 45.9855, | |
| "eval_samples_per_second": 140.957, | |
| "eval_steps_per_second": 8.829, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "learning_rate": 6.996842280206083e-06, | |
| "loss": 0.2728, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "eval_f1": 0.43170842579550445, | |
| "eval_loss": 2.7965376377105713, | |
| "eval_runtime": 45.7627, | |
| "eval_samples_per_second": 141.644, | |
| "eval_steps_per_second": 8.872, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 6.789097556922054e-06, | |
| "loss": 0.2789, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "eval_f1": 0.4315708387912964, | |
| "eval_loss": 2.875927209854126, | |
| "eval_runtime": 45.9208, | |
| "eval_samples_per_second": 141.156, | |
| "eval_steps_per_second": 8.841, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "learning_rate": 6.5813528336380256e-06, | |
| "loss": 0.3072, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "eval_f1": 0.42772350225777217, | |
| "eval_loss": 2.8333616256713867, | |
| "eval_runtime": 49.0657, | |
| "eval_samples_per_second": 132.109, | |
| "eval_steps_per_second": 8.275, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "learning_rate": 6.373608110353997e-06, | |
| "loss": 0.2779, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "eval_f1": 0.4306437356669429, | |
| "eval_loss": 2.8720405101776123, | |
| "eval_runtime": 46.0481, | |
| "eval_samples_per_second": 140.766, | |
| "eval_steps_per_second": 8.817, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 8.77, | |
| "learning_rate": 6.165863387069968e-06, | |
| "loss": 0.2948, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 8.77, | |
| "eval_f1": 0.42931126691296156, | |
| "eval_loss": 2.82370662689209, | |
| "eval_runtime": 45.6262, | |
| "eval_samples_per_second": 142.067, | |
| "eval_steps_per_second": 8.898, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "learning_rate": 5.95811866378594e-06, | |
| "loss": 0.2917, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "eval_f1": 0.42987557908855206, | |
| "eval_loss": 2.785443067550659, | |
| "eval_runtime": 45.7985, | |
| "eval_samples_per_second": 141.533, | |
| "eval_steps_per_second": 8.865, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 5.750373940501912e-06, | |
| "loss": 0.2604, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "eval_f1": 0.4272210968347812, | |
| "eval_loss": 2.9237265586853027, | |
| "eval_runtime": 45.9348, | |
| "eval_samples_per_second": 141.113, | |
| "eval_steps_per_second": 8.839, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "learning_rate": 5.542629217217883e-06, | |
| "loss": 0.3057, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "eval_f1": 0.4287531329908217, | |
| "eval_loss": 2.8509225845336914, | |
| "eval_runtime": 46.4174, | |
| "eval_samples_per_second": 139.646, | |
| "eval_steps_per_second": 8.747, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 5.334884493933855e-06, | |
| "loss": 0.2853, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "eval_f1": 0.4273342046666312, | |
| "eval_loss": 2.7482059001922607, | |
| "eval_runtime": 45.8656, | |
| "eval_samples_per_second": 141.326, | |
| "eval_steps_per_second": 8.852, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "learning_rate": 5.1271397706498255e-06, | |
| "loss": 0.2946, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "eval_f1": 0.4272429839187248, | |
| "eval_loss": 2.86079740524292, | |
| "eval_runtime": 46.3563, | |
| "eval_samples_per_second": 139.83, | |
| "eval_steps_per_second": 8.758, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 4.919395047365797e-06, | |
| "loss": 0.2492, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "eval_f1": 0.4263680579842533, | |
| "eval_loss": 3.0018138885498047, | |
| "eval_runtime": 46.2406, | |
| "eval_samples_per_second": 140.18, | |
| "eval_steps_per_second": 8.78, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "learning_rate": 4.711650324081768e-06, | |
| "loss": 0.2471, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "eval_f1": 0.42881222429550997, | |
| "eval_loss": 3.014409303665161, | |
| "eval_runtime": 46.1142, | |
| "eval_samples_per_second": 140.564, | |
| "eval_steps_per_second": 8.804, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "learning_rate": 4.50390560079774e-06, | |
| "loss": 0.2497, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "eval_f1": 0.4299801659722133, | |
| "eval_loss": 3.0282411575317383, | |
| "eval_runtime": 46.4599, | |
| "eval_samples_per_second": 139.518, | |
| "eval_steps_per_second": 8.739, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "learning_rate": 4.296160877513712e-06, | |
| "loss": 0.2632, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "eval_f1": 0.4285290598627509, | |
| "eval_loss": 2.9987945556640625, | |
| "eval_runtime": 46.2719, | |
| "eval_samples_per_second": 140.085, | |
| "eval_steps_per_second": 8.774, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "learning_rate": 4.088416154229683e-06, | |
| "loss": 0.2292, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "eval_f1": 0.4295693743332262, | |
| "eval_loss": 2.9812545776367188, | |
| "eval_runtime": 46.0798, | |
| "eval_samples_per_second": 140.669, | |
| "eval_steps_per_second": 8.811, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "learning_rate": 3.8806714309456546e-06, | |
| "loss": 0.2467, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "eval_f1": 0.4292138712207353, | |
| "eval_loss": 2.9471848011016846, | |
| "eval_runtime": 46.1858, | |
| "eval_samples_per_second": 140.346, | |
| "eval_steps_per_second": 8.791, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "learning_rate": 3.672926707661625e-06, | |
| "loss": 0.2405, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "eval_f1": 0.4283879568475488, | |
| "eval_loss": 3.013239860534668, | |
| "eval_runtime": 46.341, | |
| "eval_samples_per_second": 139.876, | |
| "eval_steps_per_second": 8.761, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "learning_rate": 3.465181984377597e-06, | |
| "loss": 0.2479, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "eval_f1": 0.43018803192623334, | |
| "eval_loss": 2.974679946899414, | |
| "eval_runtime": 46.0989, | |
| "eval_samples_per_second": 140.611, | |
| "eval_steps_per_second": 8.807, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "learning_rate": 3.2574372610935682e-06, | |
| "loss": 0.2415, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "eval_f1": 0.43026318885711246, | |
| "eval_loss": 2.9601778984069824, | |
| "eval_runtime": 46.5726, | |
| "eval_samples_per_second": 139.18, | |
| "eval_steps_per_second": 8.718, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 3.0496925378095396e-06, | |
| "loss": 0.2387, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "eval_f1": 0.4279864446994403, | |
| "eval_loss": 2.995856285095215, | |
| "eval_runtime": 45.9971, | |
| "eval_samples_per_second": 140.922, | |
| "eval_steps_per_second": 8.827, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "learning_rate": 2.8419478145255114e-06, | |
| "loss": 0.2005, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "eval_f1": 0.4226136111773091, | |
| "eval_loss": 3.061025857925415, | |
| "eval_runtime": 46.3377, | |
| "eval_samples_per_second": 139.886, | |
| "eval_steps_per_second": 8.762, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "learning_rate": 2.6342030912414823e-06, | |
| "loss": 0.2479, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "eval_f1": 0.4310947389401135, | |
| "eval_loss": 3.0478320121765137, | |
| "eval_runtime": 46.1598, | |
| "eval_samples_per_second": 140.425, | |
| "eval_steps_per_second": 8.796, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "learning_rate": 2.426458367957454e-06, | |
| "loss": 0.2649, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "eval_f1": 0.42937936025434953, | |
| "eval_loss": 2.964843511581421, | |
| "eval_runtime": 45.9242, | |
| "eval_samples_per_second": 141.146, | |
| "eval_steps_per_second": 8.841, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 9.56, | |
| "learning_rate": 2.2187136446734255e-06, | |
| "loss": 0.2659, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 9.56, | |
| "eval_f1": 0.42954375020993524, | |
| "eval_loss": 2.9387714862823486, | |
| "eval_runtime": 46.5353, | |
| "eval_samples_per_second": 139.292, | |
| "eval_steps_per_second": 8.725, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 2.010968921389397e-06, | |
| "loss": 0.2723, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "eval_f1": 0.43453017211998707, | |
| "eval_loss": 2.942929744720459, | |
| "eval_runtime": 45.4198, | |
| "eval_samples_per_second": 142.713, | |
| "eval_steps_per_second": 8.939, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "learning_rate": 1.8032241981053682e-06, | |
| "loss": 0.2607, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "eval_f1": 0.43214152175935766, | |
| "eval_loss": 2.9575393199920654, | |
| "eval_runtime": 46.6531, | |
| "eval_samples_per_second": 138.94, | |
| "eval_steps_per_second": 8.703, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "learning_rate": 1.5954794748213396e-06, | |
| "loss": 0.2284, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "eval_f1": 0.43057572034563696, | |
| "eval_loss": 2.9958791732788086, | |
| "eval_runtime": 45.9052, | |
| "eval_samples_per_second": 141.204, | |
| "eval_steps_per_second": 8.844, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "learning_rate": 1.387734751537311e-06, | |
| "loss": 0.2703, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "eval_f1": 0.4305604104069932, | |
| "eval_loss": 2.9565792083740234, | |
| "eval_runtime": 46.6048, | |
| "eval_samples_per_second": 139.084, | |
| "eval_steps_per_second": 8.712, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 1.1799900282532825e-06, | |
| "loss": 0.2358, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "eval_f1": 0.4299059329453134, | |
| "eval_loss": 2.9763364791870117, | |
| "eval_runtime": 45.9929, | |
| "eval_samples_per_second": 140.935, | |
| "eval_steps_per_second": 8.827, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "learning_rate": 9.722453049692539e-07, | |
| "loss": 0.2633, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "eval_f1": 0.42872674431828584, | |
| "eval_loss": 2.9550485610961914, | |
| "eval_runtime": 46.191, | |
| "eval_samples_per_second": 140.33, | |
| "eval_steps_per_second": 8.79, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "learning_rate": 7.645005816852252e-07, | |
| "loss": 0.2184, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "eval_f1": 0.43016074486231115, | |
| "eval_loss": 2.9980921745300293, | |
| "eval_runtime": 46.1749, | |
| "eval_samples_per_second": 140.379, | |
| "eval_steps_per_second": 8.793, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 9.89, | |
| "learning_rate": 5.567558584011966e-07, | |
| "loss": 0.2299, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 9.89, | |
| "eval_f1": 0.4306080177005011, | |
| "eval_loss": 3.0055530071258545, | |
| "eval_runtime": 46.0833, | |
| "eval_samples_per_second": 140.658, | |
| "eval_steps_per_second": 8.81, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 9.93, | |
| "learning_rate": 3.4901113511716807e-07, | |
| "loss": 0.2469, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 9.93, | |
| "eval_f1": 0.4310614992045377, | |
| "eval_loss": 2.9983813762664795, | |
| "eval_runtime": 46.3038, | |
| "eval_samples_per_second": 139.989, | |
| "eval_steps_per_second": 8.768, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "learning_rate": 1.4126641183313946e-07, | |
| "loss": 0.2741, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "eval_f1": 0.43169668013947665, | |
| "eval_loss": 2.9964375495910645, | |
| "eval_runtime": 46.5054, | |
| "eval_samples_per_second": 139.382, | |
| "eval_steps_per_second": 8.73, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 120340, | |
| "total_flos": 5.252278015131096e+16, | |
| "train_loss": 0.5760954068351747, | |
| "train_runtime": 42398.6115, | |
| "train_samples_per_second": 22.706, | |
| "train_steps_per_second": 2.838 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 120340, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "total_flos": 5.252278015131096e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |