| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 65.0, | |
| "global_step": 6760, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.9238165680473374e-05, | |
| "loss": 1.4565, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.5061315988298739, | |
| "eval_loss": 1.226438045501709, | |
| "eval_runtime": 2.9031, | |
| "eval_samples_per_second": 190.487, | |
| "eval_steps_per_second": 12.056, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 4.8483727810650895e-05, | |
| "loss": 1.08, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.5585235698125446, | |
| "eval_loss": 1.1974396705627441, | |
| "eval_runtime": 2.9416, | |
| "eval_samples_per_second": 187.991, | |
| "eval_steps_per_second": 11.898, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.772189349112427e-05, | |
| "loss": 0.8073, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.5539323613883379, | |
| "eval_loss": 1.276489496231079, | |
| "eval_runtime": 2.969, | |
| "eval_samples_per_second": 186.259, | |
| "eval_steps_per_second": 11.789, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 4.696005917159764e-05, | |
| "loss": 0.5577, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_f1": 0.5553847052045314, | |
| "eval_loss": 1.427822232246399, | |
| "eval_runtime": 2.9361, | |
| "eval_samples_per_second": 188.346, | |
| "eval_steps_per_second": 11.921, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 4.619822485207101e-05, | |
| "loss": 0.3941, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_f1": 0.5570342860609194, | |
| "eval_loss": 1.6517128944396973, | |
| "eval_runtime": 2.9022, | |
| "eval_samples_per_second": 190.542, | |
| "eval_steps_per_second": 12.06, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 4.543639053254438e-05, | |
| "loss": 0.2878, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_f1": 0.5619826716090497, | |
| "eval_loss": 1.8180437088012695, | |
| "eval_runtime": 2.855, | |
| "eval_samples_per_second": 193.697, | |
| "eval_steps_per_second": 12.259, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 4.468195266272189e-05, | |
| "loss": 0.2337, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_f1": 0.5674708526030706, | |
| "eval_loss": 1.9061989784240723, | |
| "eval_runtime": 2.8641, | |
| "eval_samples_per_second": 193.077, | |
| "eval_steps_per_second": 12.22, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 4.392011834319526e-05, | |
| "loss": 0.1743, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_f1": 0.5571774381839604, | |
| "eval_loss": 2.166078805923462, | |
| "eval_runtime": 2.8885, | |
| "eval_samples_per_second": 191.449, | |
| "eval_steps_per_second": 12.117, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 8.91, | |
| "learning_rate": 4.315828402366864e-05, | |
| "loss": 0.1324, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_f1": 0.5586999359656486, | |
| "eval_loss": 2.1434192657470703, | |
| "eval_runtime": 2.8793, | |
| "eval_samples_per_second": 192.062, | |
| "eval_steps_per_second": 12.156, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "learning_rate": 4.239644970414201e-05, | |
| "loss": 0.1051, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_f1": 0.571861247626083, | |
| "eval_loss": 2.2514231204986572, | |
| "eval_runtime": 2.8876, | |
| "eval_samples_per_second": 191.511, | |
| "eval_steps_per_second": 12.121, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 10.89, | |
| "learning_rate": 4.163461538461539e-05, | |
| "loss": 0.1016, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_f1": 0.5608736700927537, | |
| "eval_loss": 2.452277898788452, | |
| "eval_runtime": 2.9662, | |
| "eval_samples_per_second": 186.434, | |
| "eval_steps_per_second": 11.8, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 11.88, | |
| "learning_rate": 4.0872781065088764e-05, | |
| "loss": 0.0814, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_f1": 0.5643677851728315, | |
| "eval_loss": 2.5340888500213623, | |
| "eval_runtime": 2.8457, | |
| "eval_samples_per_second": 194.329, | |
| "eval_steps_per_second": 12.299, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 12.88, | |
| "learning_rate": 4.0110946745562136e-05, | |
| "loss": 0.0673, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_f1": 0.5738915229311208, | |
| "eval_loss": 2.6217703819274902, | |
| "eval_runtime": 2.9035, | |
| "eval_samples_per_second": 190.46, | |
| "eval_steps_per_second": 12.054, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 13.87, | |
| "learning_rate": 3.934911242603551e-05, | |
| "loss": 0.0684, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_f1": 0.5366433281464598, | |
| "eval_loss": 2.9552414417266846, | |
| "eval_runtime": 2.9063, | |
| "eval_samples_per_second": 190.277, | |
| "eval_steps_per_second": 12.043, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 14.86, | |
| "learning_rate": 3.858727810650888e-05, | |
| "loss": 0.0466, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_f1": 0.5787084254032917, | |
| "eval_loss": 2.7240512371063232, | |
| "eval_runtime": 2.8887, | |
| "eval_samples_per_second": 191.438, | |
| "eval_steps_per_second": 12.116, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 15.85, | |
| "learning_rate": 3.782544378698225e-05, | |
| "loss": 0.0577, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_f1": 0.5666557248979172, | |
| "eval_loss": 2.821897506713867, | |
| "eval_runtime": 2.8994, | |
| "eval_samples_per_second": 190.727, | |
| "eval_steps_per_second": 12.071, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 16.84, | |
| "learning_rate": 3.706360946745562e-05, | |
| "loss": 0.042, | |
| "step": 1751 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_f1": 0.56033452806457, | |
| "eval_loss": 2.9155900478363037, | |
| "eval_runtime": 2.9258, | |
| "eval_samples_per_second": 189.01, | |
| "eval_steps_per_second": 11.963, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 17.83, | |
| "learning_rate": 3.6301775147928995e-05, | |
| "loss": 0.0404, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_f1": 0.5621979513908701, | |
| "eval_loss": 2.893630266189575, | |
| "eval_runtime": 2.9319, | |
| "eval_samples_per_second": 188.613, | |
| "eval_steps_per_second": 11.938, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 18.82, | |
| "learning_rate": 3.553994082840237e-05, | |
| "loss": 0.0426, | |
| "step": 1957 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_f1": 0.5766503161850353, | |
| "eval_loss": 3.00762939453125, | |
| "eval_runtime": 2.9064, | |
| "eval_samples_per_second": 190.271, | |
| "eval_steps_per_second": 12.042, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 19.81, | |
| "learning_rate": 3.477810650887574e-05, | |
| "loss": 0.0361, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_f1": 0.5565194377868121, | |
| "eval_loss": 3.043562173843384, | |
| "eval_runtime": 2.9099, | |
| "eval_samples_per_second": 190.043, | |
| "eval_steps_per_second": 12.028, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 20.8, | |
| "learning_rate": 3.401627218934911e-05, | |
| "loss": 0.039, | |
| "step": 2163 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_f1": 0.5661283975776907, | |
| "eval_loss": 3.034050226211548, | |
| "eval_runtime": 2.8472, | |
| "eval_samples_per_second": 194.228, | |
| "eval_steps_per_second": 12.293, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 21.79, | |
| "learning_rate": 3.325443786982248e-05, | |
| "loss": 0.0311, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_f1": 0.5698598461896062, | |
| "eval_loss": 3.1546428203582764, | |
| "eval_runtime": 2.8965, | |
| "eval_samples_per_second": 190.921, | |
| "eval_steps_per_second": 12.084, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 22.78, | |
| "learning_rate": 3.2492603550295855e-05, | |
| "loss": 0.0296, | |
| "step": 2369 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_f1": 0.5584145320343268, | |
| "eval_loss": 3.3160221576690674, | |
| "eval_runtime": 2.9004, | |
| "eval_samples_per_second": 190.664, | |
| "eval_steps_per_second": 12.067, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 23.77, | |
| "learning_rate": 3.1730769230769234e-05, | |
| "loss": 0.03, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_f1": 0.5765799312977243, | |
| "eval_loss": 3.2025678157806396, | |
| "eval_runtime": 2.837, | |
| "eval_samples_per_second": 194.921, | |
| "eval_steps_per_second": 12.337, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 24.76, | |
| "learning_rate": 3.0968934911242606e-05, | |
| "loss": 0.0333, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_f1": 0.5689553713820321, | |
| "eval_loss": 3.211634397506714, | |
| "eval_runtime": 2.9044, | |
| "eval_samples_per_second": 190.402, | |
| "eval_steps_per_second": 12.051, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 25.75, | |
| "learning_rate": 3.0207100591715974e-05, | |
| "loss": 0.0321, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_f1": 0.5756108062994573, | |
| "eval_loss": 3.2678425312042236, | |
| "eval_runtime": 2.8888, | |
| "eval_samples_per_second": 191.428, | |
| "eval_steps_per_second": 12.116, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 26.74, | |
| "learning_rate": 2.944526627218935e-05, | |
| "loss": 0.0263, | |
| "step": 2781 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_f1": 0.5758065273285641, | |
| "eval_loss": 3.2969822883605957, | |
| "eval_runtime": 2.9527, | |
| "eval_samples_per_second": 187.286, | |
| "eval_steps_per_second": 11.854, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 27.73, | |
| "learning_rate": 2.8683431952662725e-05, | |
| "loss": 0.0281, | |
| "step": 2884 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_f1": 0.5781354966097151, | |
| "eval_loss": 3.3730037212371826, | |
| "eval_runtime": 2.8614, | |
| "eval_samples_per_second": 193.264, | |
| "eval_steps_per_second": 12.232, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 28.72, | |
| "learning_rate": 2.7921597633136097e-05, | |
| "loss": 0.0282, | |
| "step": 2987 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_f1": 0.5741866124789994, | |
| "eval_loss": 3.364117383956909, | |
| "eval_runtime": 2.8696, | |
| "eval_samples_per_second": 192.707, | |
| "eval_steps_per_second": 12.197, | |
| "step": 3016 | |
| }, | |
| { | |
| "epoch": 29.71, | |
| "learning_rate": 2.7159763313609472e-05, | |
| "loss": 0.0296, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_f1": 0.5771762774162508, | |
| "eval_loss": 3.3623032569885254, | |
| "eval_runtime": 2.9567, | |
| "eval_samples_per_second": 187.031, | |
| "eval_steps_per_second": 11.837, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 30.7, | |
| "learning_rate": 2.6397928994082844e-05, | |
| "loss": 0.0308, | |
| "step": 3193 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_f1": 0.578537002980747, | |
| "eval_loss": 3.4039528369903564, | |
| "eval_runtime": 2.8263, | |
| "eval_samples_per_second": 195.66, | |
| "eval_steps_per_second": 12.384, | |
| "step": 3224 | |
| }, | |
| { | |
| "epoch": 31.69, | |
| "learning_rate": 2.5636094674556216e-05, | |
| "loss": 0.0308, | |
| "step": 3296 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_f1": 0.575919412837488, | |
| "eval_loss": 3.392319679260254, | |
| "eval_runtime": 2.9375, | |
| "eval_samples_per_second": 188.254, | |
| "eval_steps_per_second": 11.915, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 32.68, | |
| "learning_rate": 2.4874260355029588e-05, | |
| "loss": 0.0262, | |
| "step": 3399 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_f1": 0.5563772891428104, | |
| "eval_loss": 3.4757542610168457, | |
| "eval_runtime": 2.865, | |
| "eval_samples_per_second": 193.019, | |
| "eval_steps_per_second": 12.216, | |
| "step": 3432 | |
| }, | |
| { | |
| "epoch": 33.67, | |
| "learning_rate": 2.411242603550296e-05, | |
| "loss": 0.0319, | |
| "step": 3502 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_f1": 0.5738865992034025, | |
| "eval_loss": 3.425334930419922, | |
| "eval_runtime": 2.9109, | |
| "eval_samples_per_second": 189.974, | |
| "eval_steps_per_second": 12.024, | |
| "step": 3536 | |
| }, | |
| { | |
| "epoch": 34.66, | |
| "learning_rate": 2.3350591715976332e-05, | |
| "loss": 0.0277, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_f1": 0.5785980513801816, | |
| "eval_loss": 3.4686436653137207, | |
| "eval_runtime": 2.93, | |
| "eval_samples_per_second": 188.738, | |
| "eval_steps_per_second": 11.945, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 35.65, | |
| "learning_rate": 2.2588757396449707e-05, | |
| "loss": 0.0289, | |
| "step": 3708 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_f1": 0.5836924697871717, | |
| "eval_loss": 3.462078094482422, | |
| "eval_runtime": 2.8428, | |
| "eval_samples_per_second": 194.527, | |
| "eval_steps_per_second": 12.312, | |
| "step": 3744 | |
| }, | |
| { | |
| "epoch": 36.64, | |
| "learning_rate": 2.182692307692308e-05, | |
| "loss": 0.0247, | |
| "step": 3811 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_f1": 0.5734707197245945, | |
| "eval_loss": 3.481998920440674, | |
| "eval_runtime": 3.0017, | |
| "eval_samples_per_second": 184.228, | |
| "eval_steps_per_second": 11.66, | |
| "step": 3848 | |
| }, | |
| { | |
| "epoch": 37.63, | |
| "learning_rate": 2.106508875739645e-05, | |
| "loss": 0.0303, | |
| "step": 3914 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_f1": 0.5770262969511715, | |
| "eval_loss": 3.466510772705078, | |
| "eval_runtime": 2.8587, | |
| "eval_samples_per_second": 193.442, | |
| "eval_steps_per_second": 12.243, | |
| "step": 3952 | |
| }, | |
| { | |
| "epoch": 38.62, | |
| "learning_rate": 2.0303254437869823e-05, | |
| "loss": 0.0239, | |
| "step": 4017 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_f1": 0.5666519467364683, | |
| "eval_loss": 3.5593807697296143, | |
| "eval_runtime": 2.8222, | |
| "eval_samples_per_second": 195.946, | |
| "eval_steps_per_second": 12.402, | |
| "step": 4056 | |
| }, | |
| { | |
| "epoch": 39.62, | |
| "learning_rate": 1.9541420118343195e-05, | |
| "loss": 0.0262, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_f1": 0.5808476343157906, | |
| "eval_loss": 3.5302422046661377, | |
| "eval_runtime": 2.8598, | |
| "eval_samples_per_second": 193.368, | |
| "eval_steps_per_second": 12.238, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 40.61, | |
| "learning_rate": 1.8779585798816567e-05, | |
| "loss": 0.0282, | |
| "step": 4223 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_f1": 0.5835890408164021, | |
| "eval_loss": 3.4572339057922363, | |
| "eval_runtime": 2.8566, | |
| "eval_samples_per_second": 193.584, | |
| "eval_steps_per_second": 12.252, | |
| "step": 4264 | |
| }, | |
| { | |
| "epoch": 41.6, | |
| "learning_rate": 1.8025147928994084e-05, | |
| "loss": 0.0469, | |
| "step": 4326 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_f1": 0.5685331156394952, | |
| "eval_loss": 3.609334707260132, | |
| "eval_runtime": 2.8251, | |
| "eval_samples_per_second": 195.747, | |
| "eval_steps_per_second": 12.389, | |
| "step": 4368 | |
| }, | |
| { | |
| "epoch": 42.59, | |
| "learning_rate": 1.7263313609467456e-05, | |
| "loss": 0.0302, | |
| "step": 4429 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_f1": 0.5684067370608473, | |
| "eval_loss": 3.6115400791168213, | |
| "eval_runtime": 2.9194, | |
| "eval_samples_per_second": 189.42, | |
| "eval_steps_per_second": 11.989, | |
| "step": 4472 | |
| }, | |
| { | |
| "epoch": 43.58, | |
| "learning_rate": 1.650147928994083e-05, | |
| "loss": 0.0289, | |
| "step": 4532 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_f1": 0.5757900647671246, | |
| "eval_loss": 3.629568099975586, | |
| "eval_runtime": 2.9036, | |
| "eval_samples_per_second": 190.453, | |
| "eval_steps_per_second": 12.054, | |
| "step": 4576 | |
| }, | |
| { | |
| "epoch": 44.57, | |
| "learning_rate": 1.5739644970414204e-05, | |
| "loss": 0.0254, | |
| "step": 4635 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_f1": 0.5689505752768721, | |
| "eval_loss": 3.7250843048095703, | |
| "eval_runtime": 2.9726, | |
| "eval_samples_per_second": 186.035, | |
| "eval_steps_per_second": 11.774, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 45.56, | |
| "learning_rate": 1.4977810650887576e-05, | |
| "loss": 0.0283, | |
| "step": 4738 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_f1": 0.5592198654774546, | |
| "eval_loss": 3.726353645324707, | |
| "eval_runtime": 2.9328, | |
| "eval_samples_per_second": 188.559, | |
| "eval_steps_per_second": 11.934, | |
| "step": 4784 | |
| }, | |
| { | |
| "epoch": 46.55, | |
| "learning_rate": 1.4215976331360948e-05, | |
| "loss": 0.0246, | |
| "step": 4841 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_f1": 0.5650157110711802, | |
| "eval_loss": 3.7832093238830566, | |
| "eval_runtime": 2.9067, | |
| "eval_samples_per_second": 190.249, | |
| "eval_steps_per_second": 12.041, | |
| "step": 4888 | |
| }, | |
| { | |
| "epoch": 47.54, | |
| "learning_rate": 1.345414201183432e-05, | |
| "loss": 0.0311, | |
| "step": 4944 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_f1": 0.5681512072556809, | |
| "eval_loss": 3.6964025497436523, | |
| "eval_runtime": 2.9008, | |
| "eval_samples_per_second": 190.634, | |
| "eval_steps_per_second": 12.065, | |
| "step": 4992 | |
| }, | |
| { | |
| "epoch": 48.53, | |
| "learning_rate": 1.2692307692307691e-05, | |
| "loss": 0.0268, | |
| "step": 5047 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_f1": 0.5674808111122996, | |
| "eval_loss": 3.7195167541503906, | |
| "eval_runtime": 2.8604, | |
| "eval_samples_per_second": 193.33, | |
| "eval_steps_per_second": 12.236, | |
| "step": 5096 | |
| }, | |
| { | |
| "epoch": 49.52, | |
| "learning_rate": 1.1930473372781067e-05, | |
| "loss": 0.0293, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_f1": 0.5614419693521525, | |
| "eval_loss": 3.752530097961426, | |
| "eval_runtime": 2.8761, | |
| "eval_samples_per_second": 192.275, | |
| "eval_steps_per_second": 12.169, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 50.51, | |
| "learning_rate": 1.1168639053254439e-05, | |
| "loss": 0.0282, | |
| "step": 5253 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_f1": 0.5655838635083059, | |
| "eval_loss": 3.7514984607696533, | |
| "eval_runtime": 2.8609, | |
| "eval_samples_per_second": 193.296, | |
| "eval_steps_per_second": 12.234, | |
| "step": 5304 | |
| }, | |
| { | |
| "epoch": 51.5, | |
| "learning_rate": 1.040680473372781e-05, | |
| "loss": 0.0248, | |
| "step": 5356 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_f1": 0.5590951084274065, | |
| "eval_loss": 3.7639315128326416, | |
| "eval_runtime": 2.8211, | |
| "eval_samples_per_second": 196.025, | |
| "eval_steps_per_second": 12.407, | |
| "step": 5408 | |
| }, | |
| { | |
| "epoch": 52.49, | |
| "learning_rate": 9.644970414201183e-06, | |
| "loss": 0.0257, | |
| "step": 5459 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "eval_f1": 0.5480134247467852, | |
| "eval_loss": 3.824922800064087, | |
| "eval_runtime": 2.8475, | |
| "eval_samples_per_second": 194.205, | |
| "eval_steps_per_second": 12.291, | |
| "step": 5512 | |
| }, | |
| { | |
| "epoch": 53.48, | |
| "learning_rate": 8.883136094674558e-06, | |
| "loss": 0.0235, | |
| "step": 5562 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_f1": 0.5565796472147394, | |
| "eval_loss": 3.7871253490448, | |
| "eval_runtime": 2.9817, | |
| "eval_samples_per_second": 185.462, | |
| "eval_steps_per_second": 11.738, | |
| "step": 5616 | |
| }, | |
| { | |
| "epoch": 54.47, | |
| "learning_rate": 8.12130177514793e-06, | |
| "loss": 0.0299, | |
| "step": 5665 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_f1": 0.5574154263000176, | |
| "eval_loss": 3.788760185241699, | |
| "eval_runtime": 2.8852, | |
| "eval_samples_per_second": 191.665, | |
| "eval_steps_per_second": 12.131, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 55.46, | |
| "learning_rate": 7.359467455621302e-06, | |
| "loss": 0.0277, | |
| "step": 5768 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_f1": 0.563024311843682, | |
| "eval_loss": 3.7907044887542725, | |
| "eval_runtime": 2.8658, | |
| "eval_samples_per_second": 192.962, | |
| "eval_steps_per_second": 12.213, | |
| "step": 5824 | |
| }, | |
| { | |
| "epoch": 56.45, | |
| "learning_rate": 6.597633136094675e-06, | |
| "loss": 0.0256, | |
| "step": 5871 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "eval_f1": 0.56153234588093, | |
| "eval_loss": 3.799422264099121, | |
| "eval_runtime": 2.8666, | |
| "eval_samples_per_second": 192.912, | |
| "eval_steps_per_second": 12.21, | |
| "step": 5928 | |
| }, | |
| { | |
| "epoch": 57.44, | |
| "learning_rate": 5.8357988165680474e-06, | |
| "loss": 0.0226, | |
| "step": 5974 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_f1": 0.5555061070073688, | |
| "eval_loss": 3.811858892440796, | |
| "eval_runtime": 2.8683, | |
| "eval_samples_per_second": 192.797, | |
| "eval_steps_per_second": 12.202, | |
| "step": 6032 | |
| }, | |
| { | |
| "epoch": 58.43, | |
| "learning_rate": 5.07396449704142e-06, | |
| "loss": 0.0284, | |
| "step": 6077 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "eval_f1": 0.5597671150511061, | |
| "eval_loss": 3.8192451000213623, | |
| "eval_runtime": 2.8512, | |
| "eval_samples_per_second": 193.951, | |
| "eval_steps_per_second": 12.275, | |
| "step": 6136 | |
| }, | |
| { | |
| "epoch": 59.42, | |
| "learning_rate": 4.312130177514793e-06, | |
| "loss": 0.0233, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_f1": 0.5584681716027172, | |
| "eval_loss": 3.823091983795166, | |
| "eval_runtime": 2.9385, | |
| "eval_samples_per_second": 188.191, | |
| "eval_steps_per_second": 11.911, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 60.41, | |
| "learning_rate": 3.550295857988166e-06, | |
| "loss": 0.0266, | |
| "step": 6283 | |
| }, | |
| { | |
| "epoch": 61.0, | |
| "eval_f1": 0.5625000576804086, | |
| "eval_loss": 3.8085415363311768, | |
| "eval_runtime": 2.9015, | |
| "eval_samples_per_second": 190.588, | |
| "eval_steps_per_second": 12.063, | |
| "step": 6344 | |
| }, | |
| { | |
| "epoch": 61.4, | |
| "learning_rate": 2.7958579881656803e-06, | |
| "loss": 0.0267, | |
| "step": 6386 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_f1": 0.5622167257088028, | |
| "eval_loss": 3.80642032623291, | |
| "eval_runtime": 2.8514, | |
| "eval_samples_per_second": 193.94, | |
| "eval_steps_per_second": 12.275, | |
| "step": 6448 | |
| }, | |
| { | |
| "epoch": 62.39, | |
| "learning_rate": 2.034023668639053e-06, | |
| "loss": 0.0281, | |
| "step": 6489 | |
| }, | |
| { | |
| "epoch": 63.0, | |
| "eval_f1": 0.564106811375439, | |
| "eval_loss": 3.8057875633239746, | |
| "eval_runtime": 2.8945, | |
| "eval_samples_per_second": 191.055, | |
| "eval_steps_per_second": 12.092, | |
| "step": 6552 | |
| }, | |
| { | |
| "epoch": 63.38, | |
| "learning_rate": 1.2721893491124261e-06, | |
| "loss": 0.025, | |
| "step": 6592 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_f1": 0.5644375312998279, | |
| "eval_loss": 3.807055950164795, | |
| "eval_runtime": 2.8941, | |
| "eval_samples_per_second": 191.08, | |
| "eval_steps_per_second": 12.094, | |
| "step": 6656 | |
| }, | |
| { | |
| "epoch": 64.38, | |
| "learning_rate": 5.103550295857988e-07, | |
| "loss": 0.0226, | |
| "step": 6695 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "eval_f1": 0.5644375312998279, | |
| "eval_loss": 3.807528018951416, | |
| "eval_runtime": 2.8626, | |
| "eval_samples_per_second": 193.181, | |
| "eval_steps_per_second": 12.227, | |
| "step": 6760 | |
| } | |
| ], | |
| "max_steps": 6760, | |
| "num_train_epochs": 65, | |
| "total_flos": 1.4286659901696e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |