| { | |
| "best_metric": 29.4578, | |
| "best_model_checkpoint": "/content/gdrive/MyDrive/FYP/t5-base-squad/checkpoint-4000", | |
| "epoch": 0.91324200913242, | |
| "eval_steps": 1000, | |
| "global_step": 9000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.5294313430786133, | |
| "learning_rate": 3.5961440892947744e-05, | |
| "loss": 0.4964, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_gen_len": 14.3475, | |
| "eval_loss": 0.20427940785884857, | |
| "eval_rouge1": 28.3615, | |
| "eval_rouge2": 8.4682, | |
| "eval_rougeL": 25.4968, | |
| "eval_rougeLsum": 25.5017, | |
| "eval_runtime": 1076.2576, | |
| "eval_samples_per_second": 8.139, | |
| "eval_steps_per_second": 1.017, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.3378017246723175, | |
| "learning_rate": 3.19066463723998e-05, | |
| "loss": 0.2181, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_gen_len": 14.0145, | |
| "eval_loss": 0.19709938764572144, | |
| "eval_rouge1": 28.848, | |
| "eval_rouge2": 8.9434, | |
| "eval_rougeL": 25.9934, | |
| "eval_rougeLsum": 25.9925, | |
| "eval_runtime": 1064.9247, | |
| "eval_samples_per_second": 8.226, | |
| "eval_steps_per_second": 1.028, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.4546686112880707, | |
| "learning_rate": 2.786808726534754e-05, | |
| "loss": 0.2113, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_gen_len": 13.3511, | |
| "eval_loss": 0.19329853355884552, | |
| "eval_rouge1": 29.2691, | |
| "eval_rouge2": 9.1353, | |
| "eval_rougeL": 26.4378, | |
| "eval_rougeLsum": 26.4387, | |
| "eval_runtime": 1053.7948, | |
| "eval_samples_per_second": 8.313, | |
| "eval_steps_per_second": 1.039, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 0.3615570664405823, | |
| "learning_rate": 2.3829528158295285e-05, | |
| "loss": 0.2055, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_gen_len": 14.0945, | |
| "eval_loss": 0.1883835792541504, | |
| "eval_rouge1": 29.4578, | |
| "eval_rouge2": 9.2642, | |
| "eval_rougeL": 26.5988, | |
| "eval_rougeLsum": 26.6249, | |
| "eval_runtime": 1085.8301, | |
| "eval_samples_per_second": 8.068, | |
| "eval_steps_per_second": 1.008, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 0.3282989263534546, | |
| "learning_rate": 1.9795027904616947e-05, | |
| "loss": 0.2032, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_gen_len": 14.0758, | |
| "eval_loss": 0.18715335428714752, | |
| "eval_rouge1": 29.1722, | |
| "eval_rouge2": 9.0803, | |
| "eval_rougeL": 26.3435, | |
| "eval_rougeLsum": 26.3577, | |
| "eval_runtime": 1079.1365, | |
| "eval_samples_per_second": 8.118, | |
| "eval_steps_per_second": 1.015, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 0.43038222193717957, | |
| "learning_rate": 1.5752409944190767e-05, | |
| "loss": 0.1997, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_gen_len": 14.4075, | |
| "eval_loss": 0.18170754611492157, | |
| "eval_rouge1": 29.2152, | |
| "eval_rouge2": 9.0871, | |
| "eval_rougeL": 26.2656, | |
| "eval_rougeLsum": 26.2699, | |
| "eval_runtime": 1047.9477, | |
| "eval_samples_per_second": 8.359, | |
| "eval_steps_per_second": 1.045, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 0.35650455951690674, | |
| "learning_rate": 1.171385083713851e-05, | |
| "loss": 0.201, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_gen_len": 14.4231, | |
| "eval_loss": 0.18083173036575317, | |
| "eval_rouge1": 28.9127, | |
| "eval_rouge2": 8.9743, | |
| "eval_rougeL": 26.0338, | |
| "eval_rougeLsum": 26.0194, | |
| "eval_runtime": 1047.0949, | |
| "eval_samples_per_second": 8.366, | |
| "eval_steps_per_second": 1.046, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 0.4643958508968353, | |
| "learning_rate": 7.675291730086252e-06, | |
| "loss": 0.1971, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_gen_len": 14.3037, | |
| "eval_loss": 0.18144848942756653, | |
| "eval_rouge1": 29.4071, | |
| "eval_rouge2": 9.4867, | |
| "eval_rougeL": 26.4771, | |
| "eval_rougeLsum": 26.473, | |
| "eval_runtime": 1059.6714, | |
| "eval_samples_per_second": 8.267, | |
| "eval_steps_per_second": 1.033, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 0.4707474410533905, | |
| "learning_rate": 3.640791476407915e-06, | |
| "loss": 0.1994, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_gen_len": 14.1849, | |
| "eval_loss": 0.18111075460910797, | |
| "eval_rouge1": 29.293, | |
| "eval_rouge2": 9.3682, | |
| "eval_rougeL": 26.3751, | |
| "eval_rougeLsum": 26.3635, | |
| "eval_runtime": 1052.0617, | |
| "eval_samples_per_second": 8.327, | |
| "eval_steps_per_second": 1.041, | |
| "step": 9000 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 9855, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "total_flos": 4.389855641468928e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |