| { |
| "best_metric": 0.7777777777777778, |
| "best_model_checkpoint": "/home/ubuntu/.cache/huggingface/checkpoints/final-nolowercase/allenai/scibert_scivocab_cased-zo_up/checkpoint-952", |
| "epoch": 14.0, |
| "eval_steps": 500, |
| "global_step": 952, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 9.758819580078125, |
| "learning_rate": 1.9e-05, |
| "loss": 2.1465, |
| "step": 68 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.6060606060606061, |
| "eval_f1": 0.5926804543443077, |
| "eval_loss": 1.737682580947876, |
| "eval_precision": 0.6542157960050116, |
| "eval_recall": 0.6060606060606061, |
| "eval_runtime": 5.5219, |
| "eval_samples_per_second": 35.858, |
| "eval_steps_per_second": 3.079, |
| "step": 68 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 5.286689758300781, |
| "learning_rate": 1.8e-05, |
| "loss": 1.3152, |
| "step": 136 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.6919191919191919, |
| "eval_f1": 0.6839311008227865, |
| "eval_loss": 1.176934838294983, |
| "eval_precision": 0.6962589879256545, |
| "eval_recall": 0.6919191919191919, |
| "eval_runtime": 5.5816, |
| "eval_samples_per_second": 35.474, |
| "eval_steps_per_second": 3.046, |
| "step": 136 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 14.378315925598145, |
| "learning_rate": 1.7e-05, |
| "loss": 0.7672, |
| "step": 204 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.7222222222222222, |
| "eval_f1": 0.7126444337555448, |
| "eval_loss": 0.9430862069129944, |
| "eval_precision": 0.7326162263662264, |
| "eval_recall": 0.7222222222222222, |
| "eval_runtime": 5.5762, |
| "eval_samples_per_second": 35.508, |
| "eval_steps_per_second": 3.049, |
| "step": 204 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 6.346803665161133, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.428, |
| "step": 272 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.7424242424242424, |
| "eval_f1": 0.7374161797373577, |
| "eval_loss": 0.8948661684989929, |
| "eval_precision": 0.7697136967970301, |
| "eval_recall": 0.7424242424242424, |
| "eval_runtime": 5.5678, |
| "eval_samples_per_second": 35.561, |
| "eval_steps_per_second": 3.053, |
| "step": 272 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 2.8745429515838623, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.2365, |
| "step": 340 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.7474747474747475, |
| "eval_f1": 0.7421610878166268, |
| "eval_loss": 0.9098976850509644, |
| "eval_precision": 0.7533454200120868, |
| "eval_recall": 0.7474747474747475, |
| "eval_runtime": 5.5605, |
| "eval_samples_per_second": 35.608, |
| "eval_steps_per_second": 3.057, |
| "step": 340 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 40.36902618408203, |
| "learning_rate": 1.4e-05, |
| "loss": 0.1471, |
| "step": 408 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.7575757575757576, |
| "eval_f1": 0.7509465650386367, |
| "eval_loss": 0.9332886338233948, |
| "eval_precision": 0.7571138737805404, |
| "eval_recall": 0.7575757575757576, |
| "eval_runtime": 5.5624, |
| "eval_samples_per_second": 35.596, |
| "eval_steps_per_second": 3.056, |
| "step": 408 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.5145724415779114, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 0.0813, |
| "step": 476 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.7626262626262627, |
| "eval_f1": 0.7568973419007069, |
| "eval_loss": 1.031730055809021, |
| "eval_precision": 0.7752156022989356, |
| "eval_recall": 0.7626262626262627, |
| "eval_runtime": 5.5746, |
| "eval_samples_per_second": 35.518, |
| "eval_steps_per_second": 3.05, |
| "step": 476 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.22983932495117188, |
| "learning_rate": 1.2e-05, |
| "loss": 0.0489, |
| "step": 544 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.7727272727272727, |
| "eval_f1": 0.7666605746845349, |
| "eval_loss": 1.0123027563095093, |
| "eval_precision": 0.7856826316385139, |
| "eval_recall": 0.7727272727272727, |
| "eval_runtime": 5.5743, |
| "eval_samples_per_second": 35.52, |
| "eval_steps_per_second": 3.05, |
| "step": 544 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.1842876672744751, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 0.032, |
| "step": 612 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.7727272727272727, |
| "eval_f1": 0.7671998708627763, |
| "eval_loss": 1.100764513015747, |
| "eval_precision": 0.7916376062209396, |
| "eval_recall": 0.7727272727272727, |
| "eval_runtime": 5.5601, |
| "eval_samples_per_second": 35.611, |
| "eval_steps_per_second": 3.057, |
| "step": 612 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.4087672233581543, |
| "learning_rate": 1e-05, |
| "loss": 0.0213, |
| "step": 680 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.7727272727272727, |
| "eval_f1": 0.7671998708627763, |
| "eval_loss": 1.0900105237960815, |
| "eval_precision": 0.7916376062209396, |
| "eval_recall": 0.7727272727272727, |
| "eval_runtime": 5.5654, |
| "eval_samples_per_second": 35.577, |
| "eval_steps_per_second": 3.055, |
| "step": 680 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 0.11046537756919861, |
| "learning_rate": 9e-06, |
| "loss": 0.0175, |
| "step": 748 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.7676767676767676, |
| "eval_f1": 0.7623911252744204, |
| "eval_loss": 1.0985959768295288, |
| "eval_precision": 0.7916245791245792, |
| "eval_recall": 0.7676767676767676, |
| "eval_runtime": 5.5637, |
| "eval_samples_per_second": 35.588, |
| "eval_steps_per_second": 3.056, |
| "step": 748 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 0.5795270204544067, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.0105, |
| "step": 816 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.7575757575757576, |
| "eval_f1": 0.753315419105508, |
| "eval_loss": 1.2046688795089722, |
| "eval_precision": 0.7807447182447182, |
| "eval_recall": 0.7575757575757576, |
| "eval_runtime": 5.5698, |
| "eval_samples_per_second": 35.549, |
| "eval_steps_per_second": 3.052, |
| "step": 816 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 0.07767060399055481, |
| "learning_rate": 7e-06, |
| "loss": 0.0099, |
| "step": 884 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.7727272727272727, |
| "eval_f1": 0.7671686533256058, |
| "eval_loss": 1.1359951496124268, |
| "eval_precision": 0.7919163438900281, |
| "eval_recall": 0.7727272727272727, |
| "eval_runtime": 5.5591, |
| "eval_samples_per_second": 35.617, |
| "eval_steps_per_second": 3.058, |
| "step": 884 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.0792476162314415, |
| "learning_rate": 6e-06, |
| "loss": 0.0072, |
| "step": 952 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.7777777777777778, |
| "eval_f1": 0.7736904539650535, |
| "eval_loss": 1.1267083883285522, |
| "eval_precision": 0.797784549758234, |
| "eval_recall": 0.7777777777777778, |
| "eval_runtime": 5.5659, |
| "eval_samples_per_second": 35.574, |
| "eval_steps_per_second": 3.054, |
| "step": 952 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 1360, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2984107995832320.0, |
| "train_batch_size": 12, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|