| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 4460, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.9311900734901428, | |
| "learning_rate": 0.0008, | |
| "loss": 3.0485, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_gen_len": 1.0, | |
| "eval_loss": 2.009058713912964, | |
| "eval_rouge1": 0.7257, | |
| "eval_rouge2": 0.0, | |
| "eval_rougeL": 0.7217, | |
| "eval_rougeLsum": 0.7197, | |
| "eval_runtime": 52.1613, | |
| "eval_samples_per_second": 14.359, | |
| "eval_steps_per_second": 0.46, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.9934630393981934, | |
| "learning_rate": 0.0006, | |
| "loss": 2.4113, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_gen_len": 1.0, | |
| "eval_loss": 1.6974738836288452, | |
| "eval_rouge1": 0.723, | |
| "eval_rouge2": 0.0, | |
| "eval_rougeL": 0.7191, | |
| "eval_rougeLsum": 0.7182, | |
| "eval_runtime": 51.4774, | |
| "eval_samples_per_second": 14.55, | |
| "eval_steps_per_second": 0.466, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.2730742692947388, | |
| "learning_rate": 0.0004, | |
| "loss": 2.156, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_gen_len": 1.0, | |
| "eval_loss": 1.5238491296768188, | |
| "eval_rouge1": 0.7284, | |
| "eval_rouge2": 0.0, | |
| "eval_rougeL": 0.7246, | |
| "eval_rougeLsum": 0.7242, | |
| "eval_runtime": 51.3126, | |
| "eval_samples_per_second": 14.597, | |
| "eval_steps_per_second": 0.468, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.9294344186782837, | |
| "learning_rate": 0.0002, | |
| "loss": 1.969, | |
| "step": 3568 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_gen_len": 1.0, | |
| "eval_loss": 1.4055782556533813, | |
| "eval_rouge1": 0.6992, | |
| "eval_rouge2": 0.0, | |
| "eval_rougeL": 0.6997, | |
| "eval_rougeLsum": 0.6977, | |
| "eval_runtime": 52.4927, | |
| "eval_samples_per_second": 14.269, | |
| "eval_steps_per_second": 0.457, | |
| "step": 3568 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 1.1058984994888306, | |
| "learning_rate": 0.0, | |
| "loss": 1.8249, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_gen_len": 1.0, | |
| "eval_loss": 1.334100365638733, | |
| "eval_rouge1": 0.7266, | |
| "eval_rouge2": 0.0, | |
| "eval_rougeL": 0.7247, | |
| "eval_rougeLsum": 0.7239, | |
| "eval_runtime": 52.9997, | |
| "eval_samples_per_second": 14.132, | |
| "eval_steps_per_second": 0.453, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 4460, | |
| "total_flos": 5.530684398501888e+16, | |
| "train_loss": 2.281923631060818, | |
| "train_runtime": 5654.6105, | |
| "train_samples_per_second": 12.612, | |
| "train_steps_per_second": 0.789 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 4460, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "total_flos": 5.530684398501888e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |