| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.32827994984611875, | |
| "eval_steps": 720, | |
| "global_step": 5760, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.3646579682826996, | |
| "learning_rate": 9.795674104471232e-05, | |
| "loss": 0.0784, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_bertscore": 0.7196829915046692, | |
| "eval_loss": 0.10667099058628082, | |
| "eval_rouge1": 0.5424204681399662, | |
| "eval_rouge2": 0.3232621307936826, | |
| "eval_rougeL": 0.4168316330799737, | |
| "eval_rougeLsum": 0.41874171810922023, | |
| "eval_runtime": 50.0749, | |
| "eval_samples_per_second": 1.078, | |
| "eval_steps_per_second": 0.28, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.590778262232482e-05, | |
| "loss": 0.0858, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_bertscore": 0.6401040554046631, | |
| "eval_loss": 0.18695296347141266, | |
| "eval_rouge1": 0.4271646967472444, | |
| "eval_rouge2": 0.1659738534008396, | |
| "eval_rougeL": 0.28939047028045584, | |
| "eval_rougeLsum": 0.28900025002813623, | |
| "eval_runtime": 43.3827, | |
| "eval_samples_per_second": 1.245, | |
| "eval_steps_per_second": 0.323, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.3369450867176056, | |
| "learning_rate": 9.38559744663874e-05, | |
| "loss": 0.0887, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_bertscore": 0.7218716740608215, | |
| "eval_loss": 0.1099499836564064, | |
| "eval_rouge1": 0.5524760977800962, | |
| "eval_rouge2": 0.330958882130141, | |
| "eval_rougeL": 0.4259051117722474, | |
| "eval_rougeLsum": 0.42919968644337714, | |
| "eval_runtime": 47.1009, | |
| "eval_samples_per_second": 1.146, | |
| "eval_steps_per_second": 0.297, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.36330386996269226, | |
| "learning_rate": 9.180416631044998e-05, | |
| "loss": 0.0933, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_bertscore": 0.7138540744781494, | |
| "eval_loss": 0.1120433360338211, | |
| "eval_rouge1": 0.5390251173909333, | |
| "eval_rouge2": 0.31146103356099275, | |
| "eval_rougeL": 0.41387331131584476, | |
| "eval_rougeLsum": 0.4135311998867288, | |
| "eval_runtime": 46.0195, | |
| "eval_samples_per_second": 1.173, | |
| "eval_steps_per_second": 0.304, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.35335826873779297, | |
| "learning_rate": 8.975235815451256e-05, | |
| "loss": 0.0862, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_bertscore": 0.7312328219413757, | |
| "eval_loss": 0.10742145031690598, | |
| "eval_rouge1": 0.5669051469800668, | |
| "eval_rouge2": 0.3520718989284114, | |
| "eval_rougeL": 0.44425293679893696, | |
| "eval_rougeLsum": 0.44695777725182906, | |
| "eval_runtime": 45.6563, | |
| "eval_samples_per_second": 1.183, | |
| "eval_steps_per_second": 0.307, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.37303468585014343, | |
| "learning_rate": 8.770339973212504e-05, | |
| "loss": 0.0911, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_bertscore": 0.7096375226974487, | |
| "eval_loss": 0.11186981201171875, | |
| "eval_rouge1": 0.5333109271513738, | |
| "eval_rouge2": 0.30338933797823264, | |
| "eval_rougeL": 0.4003430978893555, | |
| "eval_rougeLsum": 0.4005552066640774, | |
| "eval_runtime": 43.9874, | |
| "eval_samples_per_second": 1.228, | |
| "eval_steps_per_second": 0.318, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 0.30176717042922974, | |
| "learning_rate": 8.565159157618762e-05, | |
| "loss": 0.0893, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_bertscore": 0.7181953191757202, | |
| "eval_loss": 0.10768043249845505, | |
| "eval_rouge1": 0.5493961807050101, | |
| "eval_rouge2": 0.3304637891082364, | |
| "eval_rougeL": 0.42184528341938216, | |
| "eval_rougeLsum": 0.4241212110511772, | |
| "eval_runtime": 45.8651, | |
| "eval_samples_per_second": 1.177, | |
| "eval_steps_per_second": 0.305, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 0.35905396938323975, | |
| "learning_rate": 8.35997834202502e-05, | |
| "loss": 0.0895, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_bertscore": 0.7235485315322876, | |
| "eval_loss": 0.10892420262098312, | |
| "eval_rouge1": 0.5464214086441583, | |
| "eval_rouge2": 0.33055059501726136, | |
| "eval_rougeL": 0.4309761675921166, | |
| "eval_rougeLsum": 0.43243067509302885, | |
| "eval_runtime": 46.6469, | |
| "eval_samples_per_second": 1.158, | |
| "eval_steps_per_second": 0.3, | |
| "step": 5760 | |
| } | |
| ], | |
| "logging_steps": 720, | |
| "max_steps": 35092, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 2880, | |
| "total_flos": 4.993009003266048e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |