| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 2440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 20.136756896972656, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.3889, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8045112781954887, | |
| "eval_f1": 0.7109554944646705, | |
| "eval_loss": 0.4199941158294678, | |
| "eval_precision": 0.8255285412262157, | |
| "eval_recall": 0.6866703036915802, | |
| "eval_runtime": 1.6394, | |
| "eval_samples_per_second": 243.375, | |
| "eval_steps_per_second": 30.498, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 24.683944702148438, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.2335, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8922305764411027, | |
| "eval_f1": 0.8739355018846853, | |
| "eval_loss": 0.3136064410209656, | |
| "eval_precision": 0.864426651415499, | |
| "eval_recall": 0.886252045826514, | |
| "eval_runtime": 1.6497, | |
| "eval_samples_per_second": 241.866, | |
| "eval_steps_per_second": 30.309, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 66.46725463867188, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.1411, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8972431077694235, | |
| "eval_f1": 0.8751002084335417, | |
| "eval_loss": 0.35689812898635864, | |
| "eval_precision": 0.8780701754385964, | |
| "eval_recall": 0.8722949627204946, | |
| "eval_runtime": 1.6606, | |
| "eval_samples_per_second": 240.275, | |
| "eval_steps_per_second": 30.11, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 42.06414031982422, | |
| "learning_rate": 4e-05, | |
| "loss": 0.1078, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9147869674185464, | |
| "eval_f1": 0.8991765265473572, | |
| "eval_loss": 0.35370269417762756, | |
| "eval_precision": 0.8922773722627737, | |
| "eval_recall": 0.9072104018912529, | |
| "eval_runtime": 1.653, | |
| "eval_samples_per_second": 241.378, | |
| "eval_steps_per_second": 30.248, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 88.54315185546875, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.0822, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8796992481203008, | |
| "eval_f1": 0.8439374185136896, | |
| "eval_loss": 0.5069139003753662, | |
| "eval_precision": 0.8794955044955045, | |
| "eval_recall": 0.822376795781051, | |
| "eval_runtime": 1.6524, | |
| "eval_samples_per_second": 241.466, | |
| "eval_steps_per_second": 30.259, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 114.8245849609375, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.0529, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9072681704260651, | |
| "eval_f1": 0.888964101175568, | |
| "eval_loss": 0.42624175548553467, | |
| "eval_precision": 0.8862007168458781, | |
| "eval_recall": 0.8918894344426259, | |
| "eval_runtime": 1.6561, | |
| "eval_samples_per_second": 240.934, | |
| "eval_steps_per_second": 30.192, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.022069375962018967, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.0365, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8972431077694235, | |
| "eval_f1": 0.8769602202215754, | |
| "eval_loss": 0.5586097836494446, | |
| "eval_precision": 0.8742831541218639, | |
| "eval_recall": 0.8797963266048372, | |
| "eval_runtime": 1.6532, | |
| "eval_samples_per_second": 241.352, | |
| "eval_steps_per_second": 30.245, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.0406961552798748, | |
| "learning_rate": 3e-05, | |
| "loss": 0.033, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8947368421052632, | |
| "eval_f1": 0.8674628282189181, | |
| "eval_loss": 0.5012311935424805, | |
| "eval_precision": 0.8869858462356303, | |
| "eval_recall": 0.8530187306783051, | |
| "eval_runtime": 1.6551, | |
| "eval_samples_per_second": 241.075, | |
| "eval_steps_per_second": 30.21, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.6461573243141174, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.0248, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.8922305764411027, | |
| "eval_f1": 0.8631217838765008, | |
| "eval_loss": 0.583283007144928, | |
| "eval_precision": 0.8872804935927859, | |
| "eval_recall": 0.8462447717766868, | |
| "eval_runtime": 1.6572, | |
| "eval_samples_per_second": 240.772, | |
| "eval_steps_per_second": 30.172, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.12847253680229187, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0123, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9022556390977443, | |
| "eval_f1": 0.8805765113084321, | |
| "eval_loss": 0.6610547304153442, | |
| "eval_precision": 0.8857796167247387, | |
| "eval_recall": 0.8758410620112748, | |
| "eval_runtime": 1.6505, | |
| "eval_samples_per_second": 241.744, | |
| "eval_steps_per_second": 30.294, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 0.003805552376434207, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.0088, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.8947368421052632, | |
| "eval_f1": 0.8682132746146587, | |
| "eval_loss": 0.6935672760009766, | |
| "eval_precision": 0.884741537654159, | |
| "eval_recall": 0.8555191853064193, | |
| "eval_runtime": 1.6547, | |
| "eval_samples_per_second": 241.138, | |
| "eval_steps_per_second": 30.218, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.0037182692904025316, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0074, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.9022556390977443, | |
| "eval_f1": 0.8805765113084321, | |
| "eval_loss": 0.6789939403533936, | |
| "eval_precision": 0.8857796167247387, | |
| "eval_recall": 0.8758410620112748, | |
| "eval_runtime": 1.6567, | |
| "eval_samples_per_second": 240.838, | |
| "eval_steps_per_second": 30.18, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 0.0025616472121328115, | |
| "learning_rate": 1.75e-05, | |
| "loss": 0.0141, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.8972431077694235, | |
| "eval_f1": 0.8731122745782431, | |
| "eval_loss": 0.6981470584869385, | |
| "eval_precision": 0.8829705994654449, | |
| "eval_recall": 0.864793598836152, | |
| "eval_runtime": 1.6639, | |
| "eval_samples_per_second": 239.794, | |
| "eval_steps_per_second": 30.049, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.006673410069197416, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.0034, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.8972431077694235, | |
| "eval_f1": 0.8751002084335417, | |
| "eval_loss": 0.7144644856452942, | |
| "eval_precision": 0.8780701754385964, | |
| "eval_recall": 0.8722949627204946, | |
| "eval_runtime": 1.6531, | |
| "eval_samples_per_second": 241.366, | |
| "eval_steps_per_second": 30.246, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.0030696168541908264, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.0059, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.899749373433584, | |
| "eval_f1": 0.8758710801393728, | |
| "eval_loss": 0.7303631901741028, | |
| "eval_precision": 0.8870983228779925, | |
| "eval_recall": 0.8665666484815421, | |
| "eval_runtime": 1.6541, | |
| "eval_samples_per_second": 241.215, | |
| "eval_steps_per_second": 30.227, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 0.0017388605047017336, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0056, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.899749373433584, | |
| "eval_f1": 0.879667048676036, | |
| "eval_loss": 0.7517656683921814, | |
| "eval_precision": 0.8778361344537815, | |
| "eval_recall": 0.8815693762502272, | |
| "eval_runtime": 1.6536, | |
| "eval_samples_per_second": 241.288, | |
| "eval_steps_per_second": 30.237, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 0.002333118114620447, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.0039, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.9022556390977443, | |
| "eval_f1": 0.8793019197207679, | |
| "eval_loss": 0.7390431761741638, | |
| "eval_precision": 0.8893184421534936, | |
| "eval_recall": 0.8708401527550463, | |
| "eval_runtime": 1.655, | |
| "eval_samples_per_second": 241.08, | |
| "eval_steps_per_second": 30.211, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.0018157872837036848, | |
| "learning_rate": 5e-06, | |
| "loss": 0.004, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.9022556390977443, | |
| "eval_f1": 0.8799463033398397, | |
| "eval_loss": 0.764133095741272, | |
| "eval_precision": 0.8874803397294746, | |
| "eval_recall": 0.8733406073831607, | |
| "eval_runtime": 1.6667, | |
| "eval_samples_per_second": 239.389, | |
| "eval_steps_per_second": 29.999, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.0015570241957902908, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.007, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.9022556390977443, | |
| "eval_f1": 0.8799463033398397, | |
| "eval_loss": 0.7847548723220825, | |
| "eval_precision": 0.8874803397294746, | |
| "eval_recall": 0.8733406073831607, | |
| "eval_runtime": 1.664, | |
| "eval_samples_per_second": 239.788, | |
| "eval_steps_per_second": 30.049, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.002853752113878727, | |
| "learning_rate": 0.0, | |
| "loss": 0.0042, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.9022556390977443, | |
| "eval_f1": 0.8799463033398397, | |
| "eval_loss": 0.790817379951477, | |
| "eval_precision": 0.8874803397294746, | |
| "eval_recall": 0.8733406073831607, | |
| "eval_runtime": 1.6678, | |
| "eval_samples_per_second": 239.236, | |
| "eval_steps_per_second": 29.979, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 2440, | |
| "total_flos": 7584162436176000.0, | |
| "train_loss": 0.0588726386183598, | |
| "train_runtime": 864.0501, | |
| "train_samples_per_second": 84.208, | |
| "train_steps_per_second": 2.824 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2440, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 7584162436176000.0, | |
| "train_batch_size": 30, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |