| { | |
| "best_metric": 0.05933361500501633, | |
| "best_model_checkpoint": "runs/roberta-base-500000-samples-512-max-len-64-train-batch-size-8-test-batch-size-3-epochs-1e-05-lr-0.1-warmup-ratio/checkpoint-12000", | |
| "epoch": 2.1331058020477816, | |
| "eval_steps": 1500, | |
| "global_step": 15000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0, | |
| "eval_accuracy": 0.4971, | |
| "eval_f1": 0.6639222657346396, | |
| "eval_loss": 0.6927798390388489, | |
| "eval_precision": 0.4973866025833584, | |
| "eval_recall": 0.9981112361356695, | |
| "eval_runtime": 372.4207, | |
| "eval_samples_per_second": 134.257, | |
| "eval_steps_per_second": 16.782, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.07110352673492605, | |
| "grad_norm": 11.443495750427246, | |
| "learning_rate": 2.369668246445498e-06, | |
| "loss": 0.4665, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1422070534698521, | |
| "grad_norm": 5.395324230194092, | |
| "learning_rate": 4.739336492890996e-06, | |
| "loss": 0.1226, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.21331058020477817, | |
| "grad_norm": 3.713897705078125, | |
| "learning_rate": 7.1090047393364935e-06, | |
| "loss": 0.1013, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.21331058020477817, | |
| "eval_accuracy": 0.96946, | |
| "eval_f1": 0.9694067677759302, | |
| "eval_loss": 0.08184666186571121, | |
| "eval_precision": 0.9665987454552719, | |
| "eval_recall": 0.9722311525478219, | |
| "eval_runtime": 369.1947, | |
| "eval_samples_per_second": 135.43, | |
| "eval_steps_per_second": 16.929, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.2844141069397042, | |
| "grad_norm": 22.85436248779297, | |
| "learning_rate": 9.478672985781992e-06, | |
| "loss": 0.0911, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.35551763367463024, | |
| "grad_norm": 2.183819532394409, | |
| "learning_rate": 9.794585484040872e-06, | |
| "loss": 0.0904, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.42662116040955633, | |
| "grad_norm": 3.5085792541503906, | |
| "learning_rate": 9.53123354050353e-06, | |
| "loss": 0.0853, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.42662116040955633, | |
| "eval_accuracy": 0.97452, | |
| "eval_f1": 0.97444332998997, | |
| "eval_loss": 0.06914982199668884, | |
| "eval_precision": 0.9728430665705359, | |
| "eval_recall": 0.9760488667416815, | |
| "eval_runtime": 369.1227, | |
| "eval_samples_per_second": 135.456, | |
| "eval_steps_per_second": 16.932, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.49772468714448237, | |
| "grad_norm": 6.5320234298706055, | |
| "learning_rate": 9.267881596966186e-06, | |
| "loss": 0.0799, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.5688282138794084, | |
| "grad_norm": 1.3349543809890747, | |
| "learning_rate": 9.004529653428843e-06, | |
| "loss": 0.0751, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.6399317406143344, | |
| "grad_norm": 1.4865925312042236, | |
| "learning_rate": 8.7411777098915e-06, | |
| "loss": 0.0742, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.6399317406143344, | |
| "eval_accuracy": 0.97572, | |
| "eval_f1": 0.9756136756257282, | |
| "eval_loss": 0.06528624147176743, | |
| "eval_precision": 0.9753393846895333, | |
| "eval_recall": 0.9758881208808873, | |
| "eval_runtime": 369.1379, | |
| "eval_samples_per_second": 135.451, | |
| "eval_steps_per_second": 16.931, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.7110352673492605, | |
| "grad_norm": 1.9182387590408325, | |
| "learning_rate": 8.477825766354156e-06, | |
| "loss": 0.0761, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.7821387940841866, | |
| "grad_norm": 4.340898036956787, | |
| "learning_rate": 8.214473822816812e-06, | |
| "loss": 0.0742, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.8532423208191127, | |
| "grad_norm": 4.503939151763916, | |
| "learning_rate": 7.95112187927947e-06, | |
| "loss": 0.0722, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.8532423208191127, | |
| "eval_accuracy": 0.97488, | |
| "eval_f1": 0.9744964262508122, | |
| "eval_loss": 0.06521258503198624, | |
| "eval_precision": 0.9848957478246594, | |
| "eval_recall": 0.9643144189037133, | |
| "eval_runtime": 369.1106, | |
| "eval_samples_per_second": 135.461, | |
| "eval_steps_per_second": 16.933, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.9243458475540387, | |
| "grad_norm": 5.767714977264404, | |
| "learning_rate": 7.687769935742126e-06, | |
| "loss": 0.0664, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.9954493742889647, | |
| "grad_norm": 1.0236719846725464, | |
| "learning_rate": 7.424417992204783e-06, | |
| "loss": 0.0687, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.0665529010238908, | |
| "grad_norm": 5.575131416320801, | |
| "learning_rate": 7.1610660486674395e-06, | |
| "loss": 0.0594, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.0665529010238908, | |
| "eval_accuracy": 0.9734, | |
| "eval_f1": 0.9729013854930725, | |
| "eval_loss": 0.07639238238334656, | |
| "eval_precision": 0.9867333443544387, | |
| "eval_recall": 0.9594518566146921, | |
| "eval_runtime": 369.1258, | |
| "eval_samples_per_second": 135.455, | |
| "eval_steps_per_second": 16.932, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.1376564277588168, | |
| "grad_norm": 1.4480912685394287, | |
| "learning_rate": 6.8977141051300965e-06, | |
| "loss": 0.0613, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.2087599544937428, | |
| "grad_norm": 14.01652717590332, | |
| "learning_rate": 6.6343621615927535e-06, | |
| "loss": 0.0626, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.2798634812286689, | |
| "grad_norm": 2.645029067993164, | |
| "learning_rate": 6.3710102180554104e-06, | |
| "loss": 0.0595, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.2798634812286689, | |
| "eval_accuracy": 0.97674, | |
| "eval_f1": 0.9764427069618586, | |
| "eval_loss": 0.0677267462015152, | |
| "eval_precision": 0.9843986113947315, | |
| "eval_recall": 0.968614370679955, | |
| "eval_runtime": 369.1238, | |
| "eval_samples_per_second": 135.456, | |
| "eval_steps_per_second": 16.932, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.350967007963595, | |
| "grad_norm": 4.5592122077941895, | |
| "learning_rate": 6.1076582745180666e-06, | |
| "loss": 0.0618, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.4220705346985212, | |
| "grad_norm": 5.417106628417969, | |
| "learning_rate": 5.8443063309807235e-06, | |
| "loss": 0.058, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.493174061433447, | |
| "grad_norm": 1.136661171913147, | |
| "learning_rate": 5.5809543874433805e-06, | |
| "loss": 0.0542, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.493174061433447, | |
| "eval_accuracy": 0.97848, | |
| "eval_f1": 0.9783422567529487, | |
| "eval_loss": 0.06500901281833649, | |
| "eval_precision": 0.9800387127994193, | |
| "eval_recall": 0.9766516637196592, | |
| "eval_runtime": 369.2146, | |
| "eval_samples_per_second": 135.423, | |
| "eval_steps_per_second": 16.928, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.5642775881683733, | |
| "grad_norm": 2.5331344604492188, | |
| "learning_rate": 5.317602443906037e-06, | |
| "loss": 0.0623, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.635381114903299, | |
| "grad_norm": 2.5099124908447266, | |
| "learning_rate": 5.054250500368693e-06, | |
| "loss": 0.0617, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.7064846416382253, | |
| "grad_norm": 0.18802767992019653, | |
| "learning_rate": 4.79089855683135e-06, | |
| "loss": 0.0571, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.7064846416382253, | |
| "eval_accuracy": 0.97944, | |
| "eval_f1": 0.9793847511330366, | |
| "eval_loss": 0.05933361500501633, | |
| "eval_precision": 0.9774637739172204, | |
| "eval_recall": 0.9813132936826877, | |
| "eval_runtime": 369.1545, | |
| "eval_samples_per_second": 135.445, | |
| "eval_steps_per_second": 16.931, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.7775881683731511, | |
| "grad_norm": 0.17306402325630188, | |
| "learning_rate": 4.527546613294007e-06, | |
| "loss": 0.0575, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.8486916951080774, | |
| "grad_norm": 2.0170910358428955, | |
| "learning_rate": 4.264194669756664e-06, | |
| "loss": 0.0573, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.9197952218430034, | |
| "grad_norm": 1.0754927396774292, | |
| "learning_rate": 4.00084272621932e-06, | |
| "loss": 0.0562, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.9197952218430034, | |
| "eval_accuracy": 0.9793, | |
| "eval_f1": 0.9792272955343703, | |
| "eval_loss": 0.05992409214377403, | |
| "eval_precision": 0.9781083356721864, | |
| "eval_recall": 0.9803488185179232, | |
| "eval_runtime": 369.2584, | |
| "eval_samples_per_second": 135.407, | |
| "eval_steps_per_second": 16.926, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.9908987485779295, | |
| "grad_norm": 0.5176452398300171, | |
| "learning_rate": 3.7374907826819767e-06, | |
| "loss": 0.0553, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.0620022753128557, | |
| "grad_norm": 3.9174857139587402, | |
| "learning_rate": 3.474138839144633e-06, | |
| "loss": 0.0506, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.1331058020477816, | |
| "grad_norm": 2.6643998622894287, | |
| "learning_rate": 3.21078689560729e-06, | |
| "loss": 0.0463, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.1331058020477816, | |
| "eval_accuracy": 0.97976, | |
| "eval_f1": 0.9796746334605343, | |
| "eval_loss": 0.05948900803923607, | |
| "eval_precision": 0.9792419497309885, | |
| "eval_recall": 0.9801076997267321, | |
| "eval_runtime": 369.2026, | |
| "eval_samples_per_second": 135.427, | |
| "eval_steps_per_second": 16.928, | |
| "step": 15000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 21096, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.5256135448428544e+17, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |