| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 2440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 8.300418853759766, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.4092, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8521303258145363, | |
| "eval_f1": 0.7892045810686176, | |
| "eval_loss": 0.3456897437572479, | |
| "eval_precision": 0.8929804104477612, | |
| "eval_recall": 0.7553646117475905, | |
| "eval_runtime": 1.6637, | |
| "eval_samples_per_second": 239.829, | |
| "eval_steps_per_second": 30.054, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 9.286004066467285, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.2282, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8922305764411027, | |
| "eval_f1": 0.8676331036823873, | |
| "eval_loss": 0.258427232503891, | |
| "eval_precision": 0.8749292230261088, | |
| "eval_recall": 0.8612474995453718, | |
| "eval_runtime": 1.6592, | |
| "eval_samples_per_second": 240.478, | |
| "eval_steps_per_second": 30.135, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 13.956122398376465, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.138, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8796992481203008, | |
| "eval_f1": 0.8429546050905274, | |
| "eval_loss": 0.44172462821006775, | |
| "eval_precision": 0.882529902138456, | |
| "eval_recall": 0.8198763411529368, | |
| "eval_runtime": 1.6567, | |
| "eval_samples_per_second": 240.84, | |
| "eval_steps_per_second": 30.18, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.09012622386217117, | |
| "learning_rate": 4e-05, | |
| "loss": 0.0837, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9022556390977443, | |
| "eval_f1": 0.8793019197207679, | |
| "eval_loss": 0.4037090241909027, | |
| "eval_precision": 0.8893184421534936, | |
| "eval_recall": 0.8708401527550463, | |
| "eval_runtime": 1.6737, | |
| "eval_samples_per_second": 238.388, | |
| "eval_steps_per_second": 29.873, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 5.347772121429443, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.0426, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9047619047619048, | |
| "eval_f1": 0.8873149414352814, | |
| "eval_loss": 0.5462044477462769, | |
| "eval_precision": 0.8806277372262774, | |
| "eval_recall": 0.8951172940534643, | |
| "eval_runtime": 1.6751, | |
| "eval_samples_per_second": 238.188, | |
| "eval_steps_per_second": 29.848, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.011684279888868332, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.0502, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8897243107769424, | |
| "eval_f1": 0.8707140332272888, | |
| "eval_loss": 0.5626452565193176, | |
| "eval_precision": 0.8618432385874246, | |
| "eval_recall": 0.8819785415530097, | |
| "eval_runtime": 1.6815, | |
| "eval_samples_per_second": 237.294, | |
| "eval_steps_per_second": 29.736, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.015737071633338928, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.0242, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9072681704260651, | |
| "eval_f1": 0.8848664457009163, | |
| "eval_loss": 0.6241247653961182, | |
| "eval_precision": 0.8977236138837015, | |
| "eval_recall": 0.8743862520458265, | |
| "eval_runtime": 1.6765, | |
| "eval_samples_per_second": 238.002, | |
| "eval_steps_per_second": 29.825, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.004997015465050936, | |
| "learning_rate": 3e-05, | |
| "loss": 0.0217, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8872180451127819, | |
| "eval_f1": 0.8692251105268142, | |
| "eval_loss": 0.7096332907676697, | |
| "eval_precision": 0.8579132638693325, | |
| "eval_recall": 0.885206401163848, | |
| "eval_runtime": 1.6742, | |
| "eval_samples_per_second": 238.324, | |
| "eval_steps_per_second": 29.865, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.004219838418066502, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.0229, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.9122807017543859, | |
| "eval_f1": 0.8954723392788977, | |
| "eval_loss": 0.611499547958374, | |
| "eval_precision": 0.8909569746108776, | |
| "eval_recall": 0.9004364429896345, | |
| "eval_runtime": 1.66, | |
| "eval_samples_per_second": 240.365, | |
| "eval_steps_per_second": 30.121, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.006534805987030268, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0109, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.8972431077694235, | |
| "eval_f1": 0.8744522298370696, | |
| "eval_loss": 0.7575166821479797, | |
| "eval_precision": 0.8795731707317074, | |
| "eval_recall": 0.8697945080923805, | |
| "eval_runtime": 1.6781, | |
| "eval_samples_per_second": 237.769, | |
| "eval_steps_per_second": 29.796, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 0.00184684619307518, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.0068, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.9072681704260651, | |
| "eval_f1": 0.8861029031685659, | |
| "eval_loss": 0.7536790370941162, | |
| "eval_precision": 0.8937558980811576, | |
| "eval_recall": 0.879387161302055, | |
| "eval_runtime": 1.6764, | |
| "eval_samples_per_second": 238.006, | |
| "eval_steps_per_second": 29.825, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.01189060416072607, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0131, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.8972431077694235, | |
| "eval_f1": 0.8775533117267087, | |
| "eval_loss": 0.7247006297111511, | |
| "eval_precision": 0.873246730188791, | |
| "eval_recall": 0.8822967812329514, | |
| "eval_runtime": 1.6752, | |
| "eval_samples_per_second": 238.178, | |
| "eval_steps_per_second": 29.847, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 0.0015333497431129217, | |
| "learning_rate": 1.75e-05, | |
| "loss": 0.0101, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.8972431077694235, | |
| "eval_f1": 0.8763538792940554, | |
| "eval_loss": 0.7927835583686829, | |
| "eval_precision": 0.8754297605404427, | |
| "eval_recall": 0.877295871976723, | |
| "eval_runtime": 1.6839, | |
| "eval_samples_per_second": 236.944, | |
| "eval_steps_per_second": 29.692, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.0037907068617641926, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.0061, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.9072681704260651, | |
| "eval_f1": 0.8884169154604891, | |
| "eval_loss": 0.784883975982666, | |
| "eval_precision": 0.8874630556728391, | |
| "eval_recall": 0.8893889798145117, | |
| "eval_runtime": 1.6745, | |
| "eval_samples_per_second": 238.284, | |
| "eval_steps_per_second": 29.86, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.0031544596422463655, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.0135, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.8972431077694235, | |
| "eval_f1": 0.8731122745782431, | |
| "eval_loss": 0.781574010848999, | |
| "eval_precision": 0.8829705994654449, | |
| "eval_recall": 0.864793598836152, | |
| "eval_runtime": 1.6721, | |
| "eval_samples_per_second": 238.624, | |
| "eval_steps_per_second": 29.903, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 0.0015803646529093385, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0081, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.8972431077694235, | |
| "eval_f1": 0.8757339815412664, | |
| "eval_loss": 0.7727145552635193, | |
| "eval_precision": 0.8766906299500427, | |
| "eval_recall": 0.8747954173486088, | |
| "eval_runtime": 1.6771, | |
| "eval_samples_per_second": 237.909, | |
| "eval_steps_per_second": 29.813, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 0.00223415601067245, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.0027, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.8972431077694235, | |
| "eval_f1": 0.8763538792940554, | |
| "eval_loss": 0.81281578540802, | |
| "eval_precision": 0.8754297605404427, | |
| "eval_recall": 0.877295871976723, | |
| "eval_runtime": 1.6614, | |
| "eval_samples_per_second": 240.156, | |
| "eval_steps_per_second": 30.095, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.0023393542505800724, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0041, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.9022556390977443, | |
| "eval_f1": 0.8817957385392532, | |
| "eval_loss": 0.8081415891647339, | |
| "eval_precision": 0.8827677592299257, | |
| "eval_recall": 0.8808419712675032, | |
| "eval_runtime": 1.6587, | |
| "eval_samples_per_second": 240.553, | |
| "eval_steps_per_second": 30.145, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.0010473760776221752, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.0018, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.9022556390977443, | |
| "eval_f1": 0.8793019197207679, | |
| "eval_loss": 0.8038576245307922, | |
| "eval_precision": 0.8893184421534936, | |
| "eval_recall": 0.8708401527550463, | |
| "eval_runtime": 1.6566, | |
| "eval_samples_per_second": 240.849, | |
| "eval_steps_per_second": 30.182, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.0015798051608726382, | |
| "learning_rate": 0.0, | |
| "loss": 0.0025, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.9022556390977443, | |
| "eval_f1": 0.8799463033398397, | |
| "eval_loss": 0.803022563457489, | |
| "eval_precision": 0.8874803397294746, | |
| "eval_recall": 0.8733406073831607, | |
| "eval_runtime": 1.6611, | |
| "eval_samples_per_second": 240.198, | |
| "eval_steps_per_second": 30.1, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 2440, | |
| "total_flos": 7584162436176000.0, | |
| "train_loss": 0.05501617935226589, | |
| "train_runtime": 867.5592, | |
| "train_samples_per_second": 83.867, | |
| "train_steps_per_second": 2.812 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2440, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 7584162436176000.0, | |
| "train_batch_size": 30, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |