| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 20.0, |
| "eval_steps": 500, |
| "global_step": 10560, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.387133002281189, |
| "learning_rate": 4.75e-05, |
| "loss": 0.713, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.8990360727417271, |
| "eval_f1": 0.42582781456953644, |
| "eval_loss": 0.3558189868927002, |
| "eval_precision": 0.49499615088529636, |
| "eval_recall": 0.3736199883788495, |
| "eval_runtime": 4.469, |
| "eval_samples_per_second": 209.217, |
| "eval_steps_per_second": 3.356, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.8080163598060608, |
| "learning_rate": 4.5e-05, |
| "loss": 0.2793, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.9392328331511478, |
| "eval_f1": 0.7174307174307173, |
| "eval_loss": 0.19311487674713135, |
| "eval_precision": 0.647196261682243, |
| "eval_recall": 0.8047646717024985, |
| "eval_runtime": 4.5691, |
| "eval_samples_per_second": 204.634, |
| "eval_steps_per_second": 3.283, |
| "step": 1056 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.451019287109375, |
| "learning_rate": 4.25e-05, |
| "loss": 0.1876, |
| "step": 1584 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9461890092417768, |
| "eval_f1": 0.7516120711890638, |
| "eval_loss": 0.161931574344635, |
| "eval_precision": 0.6757884972170687, |
| "eval_recall": 0.8466008134805346, |
| "eval_runtime": 4.5215, |
| "eval_samples_per_second": 206.79, |
| "eval_steps_per_second": 3.317, |
| "step": 1584 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 2.173278570175171, |
| "learning_rate": 4e-05, |
| "loss": 0.1593, |
| "step": 2112 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.95553015999205, |
| "eval_f1": 0.7947551511907948, |
| "eval_loss": 0.14164301753044128, |
| "eval_precision": 0.7366071428571429, |
| "eval_recall": 0.862870424171993, |
| "eval_runtime": 4.5447, |
| "eval_samples_per_second": 205.734, |
| "eval_steps_per_second": 3.301, |
| "step": 2112 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 1.1243175268173218, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 0.1412, |
| "step": 2640 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.9559276557686575, |
| "eval_f1": 0.7968959058067969, |
| "eval_loss": 0.13503268361091614, |
| "eval_precision": 0.7385912698412699, |
| "eval_recall": 0.8651946542707728, |
| "eval_runtime": 4.497, |
| "eval_samples_per_second": 207.916, |
| "eval_steps_per_second": 3.336, |
| "step": 2640 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 1.4806779623031616, |
| "learning_rate": 3.5e-05, |
| "loss": 0.1325, |
| "step": 3168 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.95553015999205, |
| "eval_f1": 0.795219123505976, |
| "eval_loss": 0.13609066605567932, |
| "eval_precision": 0.7323874755381604, |
| "eval_recall": 0.8698431144683324, |
| "eval_runtime": 4.5221, |
| "eval_samples_per_second": 206.764, |
| "eval_steps_per_second": 3.317, |
| "step": 3168 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 2.750013828277588, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 0.126, |
| "step": 3696 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.9553314121037464, |
| "eval_f1": 0.7943751658264792, |
| "eval_loss": 0.13828907907009125, |
| "eval_precision": 0.73095703125, |
| "eval_recall": 0.8698431144683324, |
| "eval_runtime": 4.5248, |
| "eval_samples_per_second": 206.637, |
| "eval_steps_per_second": 3.315, |
| "step": 3696 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.7366927862167358, |
| "learning_rate": 3e-05, |
| "loss": 0.1194, |
| "step": 4224 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.9583126304283017, |
| "eval_f1": 0.8088274395107684, |
| "eval_loss": 0.13490265607833862, |
| "eval_precision": 0.7455882352941177, |
| "eval_recall": 0.8837884950610111, |
| "eval_runtime": 4.4844, |
| "eval_samples_per_second": 208.499, |
| "eval_steps_per_second": 3.345, |
| "step": 4224 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 1.5051740407943726, |
| "learning_rate": 2.7500000000000004e-05, |
| "loss": 0.1137, |
| "step": 4752 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.9583126304283017, |
| "eval_f1": 0.8071869133816038, |
| "eval_loss": 0.12994244694709778, |
| "eval_precision": 0.7495019920318725, |
| "eval_recall": 0.8744915746658919, |
| "eval_runtime": 4.8571, |
| "eval_samples_per_second": 192.501, |
| "eval_steps_per_second": 3.088, |
| "step": 4752 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 1.5836716890335083, |
| "learning_rate": 2.5e-05, |
| "loss": 0.1112, |
| "step": 5280 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.9579151346516943, |
| "eval_f1": 0.8028962188254224, |
| "eval_loss": 0.1285104751586914, |
| "eval_precision": 0.7455179282868526, |
| "eval_recall": 0.8698431144683324, |
| "eval_runtime": 4.5129, |
| "eval_samples_per_second": 207.185, |
| "eval_steps_per_second": 3.324, |
| "step": 5280 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 2.5579893589019775, |
| "learning_rate": 2.25e-05, |
| "loss": 0.1065, |
| "step": 5808 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.958710126204909, |
| "eval_f1": 0.8118811881188118, |
| "eval_loss": 0.13038235902786255, |
| "eval_precision": 0.7524801587301587, |
| "eval_recall": 0.8814642649622313, |
| "eval_runtime": 4.5151, |
| "eval_samples_per_second": 207.083, |
| "eval_steps_per_second": 3.322, |
| "step": 5808 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 1.483597993850708, |
| "learning_rate": 2e-05, |
| "loss": 0.1044, |
| "step": 6336 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.9577163867633907, |
| "eval_f1": 0.8106080900080364, |
| "eval_loss": 0.1328919380903244, |
| "eval_precision": 0.7519880715705766, |
| "eval_recall": 0.8791400348634515, |
| "eval_runtime": 4.5418, |
| "eval_samples_per_second": 205.867, |
| "eval_steps_per_second": 3.303, |
| "step": 6336 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 2.8680760860443115, |
| "learning_rate": 1.75e-05, |
| "loss": 0.1026, |
| "step": 6864 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.9585113783166054, |
| "eval_f1": 0.8076405703524348, |
| "eval_loss": 0.1257127821445465, |
| "eval_precision": 0.7520040080160321, |
| "eval_recall": 0.8721673445671121, |
| "eval_runtime": 4.5829, |
| "eval_samples_per_second": 204.019, |
| "eval_steps_per_second": 3.273, |
| "step": 6864 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.6697184443473816, |
| "learning_rate": 1.5e-05, |
| "loss": 0.0989, |
| "step": 7392 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.9599026135347312, |
| "eval_f1": 0.8167341430499325, |
| "eval_loss": 0.12645690143108368, |
| "eval_precision": 0.7626008064516129, |
| "eval_recall": 0.8791400348634515, |
| "eval_runtime": 4.5334, |
| "eval_samples_per_second": 206.248, |
| "eval_steps_per_second": 3.309, |
| "step": 7392 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 1.7948130369186401, |
| "learning_rate": 1.25e-05, |
| "loss": 0.0982, |
| "step": 7920 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.9597038656464275, |
| "eval_f1": 0.8180102453491508, |
| "eval_loss": 0.1280616968870163, |
| "eval_precision": 0.7630784708249497, |
| "eval_recall": 0.8814642649622313, |
| "eval_runtime": 4.5387, |
| "eval_samples_per_second": 206.008, |
| "eval_steps_per_second": 3.305, |
| "step": 7920 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 2.107656717300415, |
| "learning_rate": 1e-05, |
| "loss": 0.0974, |
| "step": 8448 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.9597038656464275, |
| "eval_f1": 0.8093322606596943, |
| "eval_loss": 0.12642435729503632, |
| "eval_precision": 0.7514940239043825, |
| "eval_recall": 0.8768158047646717, |
| "eval_runtime": 4.5132, |
| "eval_samples_per_second": 207.172, |
| "eval_steps_per_second": 3.324, |
| "step": 8448 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 0.980239748954773, |
| "learning_rate": 7.5e-06, |
| "loss": 0.0966, |
| "step": 8976 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.9589088740932128, |
| "eval_f1": 0.814021942734814, |
| "eval_loss": 0.12821656465530396, |
| "eval_precision": 0.7544642857142857, |
| "eval_recall": 0.8837884950610111, |
| "eval_runtime": 4.5105, |
| "eval_samples_per_second": 207.293, |
| "eval_steps_per_second": 3.326, |
| "step": 8976 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 1.8313038349151611, |
| "learning_rate": 5e-06, |
| "loss": 0.095, |
| "step": 9504 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.9589088740932128, |
| "eval_f1": 0.8144966442953021, |
| "eval_loss": 0.12922193109989166, |
| "eval_precision": 0.7569860279441117, |
| "eval_recall": 0.8814642649622313, |
| "eval_runtime": 4.5244, |
| "eval_samples_per_second": 206.658, |
| "eval_steps_per_second": 3.315, |
| "step": 9504 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 0.775888204574585, |
| "learning_rate": 2.5e-06, |
| "loss": 0.0941, |
| "step": 10032 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.9595051177581239, |
| "eval_f1": 0.8153722117710293, |
| "eval_loss": 0.12684974074363708, |
| "eval_precision": 0.7585, |
| "eval_recall": 0.8814642649622313, |
| "eval_runtime": 4.5294, |
| "eval_samples_per_second": 206.429, |
| "eval_steps_per_second": 3.312, |
| "step": 10032 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 2.084799289703369, |
| "learning_rate": 0.0, |
| "loss": 0.0948, |
| "step": 10560 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.9593063698698201, |
| "eval_f1": 0.8162496637072908, |
| "eval_loss": 0.12777453660964966, |
| "eval_precision": 0.7600200400801603, |
| "eval_recall": 0.8814642649622313, |
| "eval_runtime": 4.4979, |
| "eval_samples_per_second": 207.876, |
| "eval_steps_per_second": 3.335, |
| "step": 10560 |
| }, |
| { |
| "epoch": 20.0, |
| "step": 10560, |
| "total_flos": 4552961808488766.0, |
| "train_loss": 0.15358480504064848, |
| "train_runtime": 1231.8568, |
| "train_samples_per_second": 136.98, |
| "train_steps_per_second": 8.572 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 10560, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "total_flos": 4552961808488766.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|