| { |
| "best_metric": 40.7039, |
| "best_model_checkpoint": "text_gen/train/checkpoint/t5-base/samsum_42/checkpoint-4605", |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 4605, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_gen_len": 22.97432762836186, |
| "eval_loss": 1.6420326232910156, |
| "eval_rouge1": 44.8382, |
| "eval_rouge2": 21.2515, |
| "eval_rougeL": 37.1374, |
| "eval_rougeLsum": 41.3685, |
| "eval_runtime": 612.4457, |
| "eval_samples_per_second": 1.336, |
| "eval_steps_per_second": 0.021, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.09, |
| "grad_norm": 3.294757127761841, |
| "learning_rate": 3.914223669923996e-05, |
| "loss": 3.1861, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_gen_len": 23.905867970660147, |
| "eval_loss": 1.5848349332809448, |
| "eval_rouge1": 47.24, |
| "eval_rouge2": 23.0589, |
| "eval_rougeL": 39.234, |
| "eval_rougeLsum": 43.7105, |
| "eval_runtime": 464.203, |
| "eval_samples_per_second": 1.762, |
| "eval_steps_per_second": 0.028, |
| "step": 1842 |
| }, |
| { |
| "epoch": 2.17, |
| "grad_norm": 2.388230800628662, |
| "learning_rate": 2.8284473398479917e-05, |
| "loss": 2.0725, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_gen_len": 24.424205378973106, |
| "eval_loss": 1.5335135459899902, |
| "eval_rouge1": 48.2277, |
| "eval_rouge2": 23.7578, |
| "eval_rougeL": 40.0662, |
| "eval_rougeLsum": 44.5023, |
| "eval_runtime": 499.025, |
| "eval_samples_per_second": 1.639, |
| "eval_steps_per_second": 0.026, |
| "step": 2763 |
| }, |
| { |
| "epoch": 3.26, |
| "grad_norm": 3.758873462677002, |
| "learning_rate": 1.742671009771987e-05, |
| "loss": 1.9171, |
| "step": 3000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_gen_len": 23.876528117359413, |
| "eval_loss": 1.5173996686935425, |
| "eval_rouge1": 48.4762, |
| "eval_rouge2": 23.9941, |
| "eval_rougeL": 40.2086, |
| "eval_rougeLsum": 44.7825, |
| "eval_runtime": 531.8519, |
| "eval_samples_per_second": 1.538, |
| "eval_steps_per_second": 0.024, |
| "step": 3684 |
| }, |
| { |
| "epoch": 4.34, |
| "grad_norm": 2.367593288421631, |
| "learning_rate": 6.568946796959827e-06, |
| "loss": 1.8449, |
| "step": 4000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_gen_len": 23.97921760391198, |
| "eval_loss": 1.5075008869171143, |
| "eval_rouge1": 49.0229, |
| "eval_rouge2": 24.3221, |
| "eval_rougeL": 40.7039, |
| "eval_rougeLsum": 45.1477, |
| "eval_runtime": 514.5213, |
| "eval_samples_per_second": 1.59, |
| "eval_steps_per_second": 0.025, |
| "step": 4605 |
| }, |
| { |
| "epoch": 5.0, |
| "step": 4605, |
| "total_flos": 3.981748050456883e+16, |
| "train_loss": 2.197719763673996, |
| "train_runtime": 4270.6116, |
| "train_samples_per_second": 17.247, |
| "train_steps_per_second": 1.078 |
| } |
| ], |
| "logging_steps": 1000, |
| "max_steps": 4605, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "total_flos": 3.981748050456883e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|