| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 8.0, | |
| "global_step": 3760, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_bleu1_score": 0.0955, | |
| "eval_bleu2_score": 0.04, | |
| "eval_bleu3_score": 0.0219, | |
| "eval_bleu4_score": 0.0133, | |
| "eval_loss": 3.6427664756774902, | |
| "eval_rougeL": 0.0868, | |
| "eval_runtime": 156.5662, | |
| "eval_samples_per_second": 10.666, | |
| "eval_steps_per_second": 0.339, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 2.5e-05, | |
| "loss": 4.6743, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bleu1_score": 0.1017, | |
| "eval_bleu2_score": 0.0483, | |
| "eval_bleu3_score": 0.0274, | |
| "eval_bleu4_score": 0.0163, | |
| "eval_loss": 3.4372775554656982, | |
| "eval_rougeL": 0.1073, | |
| "eval_runtime": 155.081, | |
| "eval_samples_per_second": 10.769, | |
| "eval_steps_per_second": 0.342, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 5e-05, | |
| "loss": 3.6621, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bleu1_score": 0.1142, | |
| "eval_bleu2_score": 0.0589, | |
| "eval_bleu3_score": 0.0354, | |
| "eval_bleu4_score": 0.0222, | |
| "eval_loss": 3.3255724906921387, | |
| "eval_rougeL": 0.1212, | |
| "eval_runtime": 153.5354, | |
| "eval_samples_per_second": 10.877, | |
| "eval_steps_per_second": 0.345, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 4.094202898550725e-05, | |
| "loss": 3.3193, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bleu1_score": 0.1202, | |
| "eval_bleu2_score": 0.0633, | |
| "eval_bleu3_score": 0.0394, | |
| "eval_bleu4_score": 0.0258, | |
| "eval_loss": 3.302886962890625, | |
| "eval_rougeL": 0.1295, | |
| "eval_runtime": 152.9979, | |
| "eval_samples_per_second": 10.915, | |
| "eval_steps_per_second": 0.346, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 3.188405797101449e-05, | |
| "loss": 2.9767, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_bleu1_score": 0.1276, | |
| "eval_bleu2_score": 0.0663, | |
| "eval_bleu3_score": 0.0401, | |
| "eval_bleu4_score": 0.0259, | |
| "eval_loss": 3.303429365158081, | |
| "eval_rougeL": 0.131, | |
| "eval_runtime": 152.8426, | |
| "eval_samples_per_second": 10.926, | |
| "eval_steps_per_second": 0.347, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 2.282608695652174e-05, | |
| "loss": 2.7327, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_bleu1_score": 0.1355, | |
| "eval_bleu2_score": 0.0722, | |
| "eval_bleu3_score": 0.0451, | |
| "eval_bleu4_score": 0.0299, | |
| "eval_loss": 3.336897611618042, | |
| "eval_rougeL": 0.1379, | |
| "eval_runtime": 153.2596, | |
| "eval_samples_per_second": 10.897, | |
| "eval_steps_per_second": 0.346, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 1.3768115942028985e-05, | |
| "loss": 2.5502, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_bleu1_score": 0.1374, | |
| "eval_bleu2_score": 0.0724, | |
| "eval_bleu3_score": 0.0444, | |
| "eval_bleu4_score": 0.029, | |
| "eval_loss": 3.361145496368408, | |
| "eval_rougeL": 0.1394, | |
| "eval_runtime": 153.1486, | |
| "eval_samples_per_second": 10.904, | |
| "eval_steps_per_second": 0.346, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 4.710144927536232e-06, | |
| "loss": 2.4162, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_bleu1_score": 0.1384, | |
| "eval_bleu2_score": 0.0741, | |
| "eval_bleu3_score": 0.0457, | |
| "eval_bleu4_score": 0.0297, | |
| "eval_loss": 3.388550281524658, | |
| "eval_rougeL": 0.1403, | |
| "eval_runtime": 153.1828, | |
| "eval_samples_per_second": 10.902, | |
| "eval_steps_per_second": 0.346, | |
| "step": 3760 | |
| } | |
| ], | |
| "max_steps": 3760, | |
| "num_train_epochs": 8, | |
| "total_flos": 7.147167492096e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |