| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 11.899416255051639, | |
| "global_step": 26500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_bleu 1": 70.3337, | |
| "eval_bleu 2": 60.798, | |
| "eval_bleu 3": 49.7924, | |
| "eval_bleu 4": 33.7923, | |
| "eval_loss": 0.6441967487335205, | |
| "eval_rouge1": 50.4785, | |
| "eval_rouge2": 32.871, | |
| "eval_rougeL": 50.0855, | |
| "eval_rougeLsum": 50.0558, | |
| "eval_runtime": 684.1071, | |
| "eval_samples_per_second": 12.122, | |
| "eval_score": 32.2794, | |
| "eval_sim_cos": 0.7653, | |
| "eval_steps_per_second": 3.032, | |
| "step": 2227 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bleu 1": 70.3322, | |
| "eval_bleu 2": 61.1932, | |
| "eval_bleu 3": 50.7058, | |
| "eval_bleu 4": 35.6325, | |
| "eval_loss": 0.5984485745429993, | |
| "eval_rouge1": 51.6586, | |
| "eval_rouge2": 34.8166, | |
| "eval_rougeL": 51.2221, | |
| "eval_rougeLsum": 51.2004, | |
| "eval_runtime": 689.9391, | |
| "eval_samples_per_second": 12.02, | |
| "eval_score": 33.6509, | |
| "eval_sim_cos": 0.7702, | |
| "eval_steps_per_second": 3.006, | |
| "step": 4454 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bleu 1": 69.8987, | |
| "eval_bleu 2": 60.7039, | |
| "eval_bleu 3": 50.0866, | |
| "eval_bleu 4": 35.1525, | |
| "eval_loss": 0.579205334186554, | |
| "eval_rouge1": 51.7058, | |
| "eval_rouge2": 34.7654, | |
| "eval_rougeL": 51.0676, | |
| "eval_rougeLsum": 51.0349, | |
| "eval_runtime": 700.0697, | |
| "eval_samples_per_second": 11.846, | |
| "eval_score": 34.5869, | |
| "eval_sim_cos": 0.7717, | |
| "eval_steps_per_second": 2.963, | |
| "step": 6681 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bleu 1": 71.3187, | |
| "eval_bleu 2": 62.1795, | |
| "eval_bleu 3": 51.5483, | |
| "eval_bleu 4": 36.2898, | |
| "eval_loss": 0.5839173793792725, | |
| "eval_rouge1": 52.2274, | |
| "eval_rouge2": 35.3155, | |
| "eval_rougeL": 51.6932, | |
| "eval_rougeLsum": 51.6431, | |
| "eval_runtime": 686.5612, | |
| "eval_samples_per_second": 12.079, | |
| "eval_score": 34.2209, | |
| "eval_sim_cos": 0.7769, | |
| "eval_steps_per_second": 3.021, | |
| "step": 8908 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_bleu 1": 70.0029, | |
| "eval_bleu 2": 60.8847, | |
| "eval_bleu 3": 50.4903, | |
| "eval_bleu 4": 35.8533, | |
| "eval_loss": 0.5703114867210388, | |
| "eval_rouge1": 52.2272, | |
| "eval_rouge2": 35.5618, | |
| "eval_rougeL": 51.643, | |
| "eval_rougeLsum": 51.622, | |
| "eval_runtime": 704.1871, | |
| "eval_samples_per_second": 11.777, | |
| "eval_score": 35.0827, | |
| "eval_sim_cos": 0.7735, | |
| "eval_steps_per_second": 2.945, | |
| "step": 11135 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_bleu 1": 69.3858, | |
| "eval_bleu 2": 60.1128, | |
| "eval_bleu 3": 49.6906, | |
| "eval_bleu 4": 35.2287, | |
| "eval_loss": 0.5710490345954895, | |
| "eval_rouge1": 52.586, | |
| "eval_rouge2": 35.9223, | |
| "eval_rougeL": 51.8411, | |
| "eval_rougeLsum": 51.8356, | |
| "eval_runtime": 711.5517, | |
| "eval_samples_per_second": 11.655, | |
| "eval_score": 35.7035, | |
| "eval_sim_cos": 0.7745, | |
| "eval_steps_per_second": 2.915, | |
| "step": 13362 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_bleu 1": 69.9726, | |
| "eval_bleu 2": 60.7494, | |
| "eval_bleu 3": 50.2446, | |
| "eval_bleu 4": 35.578, | |
| "eval_loss": 0.5707941055297852, | |
| "eval_rouge1": 52.6509, | |
| "eval_rouge2": 36.0061, | |
| "eval_rougeL": 51.9829, | |
| "eval_rougeLsum": 51.9582, | |
| "eval_runtime": 703.3177, | |
| "eval_samples_per_second": 11.791, | |
| "eval_score": 35.3903, | |
| "eval_sim_cos": 0.776, | |
| "eval_steps_per_second": 2.949, | |
| "step": 15589 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 1.666853764406526e-05, | |
| "loss": 0.6526, | |
| "step": 17815 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_bleu 1": 70.0548, | |
| "eval_bleu 2": 60.8059, | |
| "eval_bleu 3": 50.2906, | |
| "eval_bleu 4": 35.6541, | |
| "eval_loss": 0.5693395137786865, | |
| "eval_rouge1": 53.0005, | |
| "eval_rouge2": 36.3939, | |
| "eval_rougeL": 52.2168, | |
| "eval_rougeLsum": 52.2032, | |
| "eval_runtime": 709.1405, | |
| "eval_samples_per_second": 11.694, | |
| "eval_score": 35.9363, | |
| "eval_sim_cos": 0.778, | |
| "eval_steps_per_second": 2.925, | |
| "step": 17816 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_bleu 1": 69.7601, | |
| "eval_bleu 2": 60.4978, | |
| "eval_bleu 3": 50.0289, | |
| "eval_bleu 4": 35.5284, | |
| "eval_loss": 0.5695525407791138, | |
| "eval_rouge1": 52.9499, | |
| "eval_rouge2": 36.3434, | |
| "eval_rougeL": 52.1838, | |
| "eval_rougeLsum": 52.1727, | |
| "eval_runtime": 713.0996, | |
| "eval_samples_per_second": 11.63, | |
| "eval_score": 36.1266, | |
| "eval_sim_cos": 0.7772, | |
| "eval_steps_per_second": 2.908, | |
| "step": 20043 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_bleu 1": 69.3518, | |
| "eval_bleu 2": 60.0768, | |
| "eval_bleu 3": 49.6026, | |
| "eval_bleu 4": 35.2072, | |
| "eval_loss": 0.5750804543495178, | |
| "eval_rouge1": 52.9485, | |
| "eval_rouge2": 36.4398, | |
| "eval_rougeL": 52.1386, | |
| "eval_rougeLsum": 52.1295, | |
| "eval_runtime": 717.0443, | |
| "eval_samples_per_second": 11.566, | |
| "eval_score": 36.356, | |
| "eval_sim_cos": 0.7769, | |
| "eval_steps_per_second": 2.892, | |
| "step": 22270 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_bleu 1": 70.0005, | |
| "eval_bleu 2": 60.7865, | |
| "eval_bleu 3": 50.3355, | |
| "eval_bleu 4": 35.8009, | |
| "eval_loss": 0.5696647763252258, | |
| "eval_rouge1": 53.1415, | |
| "eval_rouge2": 36.5933, | |
| "eval_rougeL": 52.3479, | |
| "eval_rougeLsum": 52.3203, | |
| "eval_runtime": 710.6735, | |
| "eval_samples_per_second": 11.669, | |
| "eval_score": 36.3183, | |
| "eval_sim_cos": 0.779, | |
| "eval_steps_per_second": 2.918, | |
| "step": 24497 | |
| } | |
| ], | |
| "max_steps": 26724, | |
| "num_train_epochs": 12, | |
| "total_flos": 7.298085567419412e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |