| { | |
| "best_metric": 2.6812784671783447, | |
| "best_model_checkpoint": "./models/final_bart/checkpoint-2000", | |
| "epoch": 5.0, | |
| "global_step": 3340, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 8.982035928143711e-06, | |
| "loss": 5.6146, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.7964071856287423e-05, | |
| "loss": 2.8024, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.6946107784431136e-05, | |
| "loss": 2.5446, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.934131736526946e-05, | |
| "loss": 2.4198, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 2.8343313373253494e-05, | |
| "loss": 2.3636, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.7345309381237524e-05, | |
| "loss": 2.3251, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 2.6347305389221558e-05, | |
| "loss": 2.2522, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.534930139720559e-05, | |
| "loss": 2.1959, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.4351297405189622e-05, | |
| "loss": 2.1588, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.3353293413173656e-05, | |
| "loss": 2.1542, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_bleu1": 27.9938, | |
| "eval_bleu2": 15.5354, | |
| "eval_bleu3": 8.2494, | |
| "eval_bleu4": 4.42, | |
| "eval_gen_len": 50.08, | |
| "eval_loss": 2.749131202697754, | |
| "eval_rdass": 0.6093000173568726, | |
| "eval_rouge1": 33.5554, | |
| "eval_rouge2": 11.2371, | |
| "eval_rougeL": 22.006, | |
| "eval_runtime": 24.7242, | |
| "eval_samples_per_second": 4.045, | |
| "eval_steps_per_second": 0.04, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.2355289421157686e-05, | |
| "loss": 2.1473, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.135728542914172e-05, | |
| "loss": 2.1459, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 2.0359281437125747e-05, | |
| "loss": 2.1324, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.936127744510978e-05, | |
| "loss": 2.0676, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.836327345309381e-05, | |
| "loss": 2.0206, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.7365269461077845e-05, | |
| "loss": 2.0198, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 1.6367265469061875e-05, | |
| "loss": 2.0177, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 1.536926147704591e-05, | |
| "loss": 2.0143, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 1.437125748502994e-05, | |
| "loss": 2.0075, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.3373253493013973e-05, | |
| "loss": 2.0071, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_bleu1": 29.6866, | |
| "eval_bleu2": 17.1396, | |
| "eval_bleu3": 9.7016, | |
| "eval_bleu4": 5.3559, | |
| "eval_gen_len": 54.04, | |
| "eval_loss": 2.6812784671783447, | |
| "eval_rdass": 0.6154999732971191, | |
| "eval_rouge1": 35.0501, | |
| "eval_rouge2": 12.2759, | |
| "eval_rougeL": 22.6669, | |
| "eval_runtime": 20.0572, | |
| "eval_samples_per_second": 4.986, | |
| "eval_steps_per_second": 0.05, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 1.2375249500998005e-05, | |
| "loss": 1.9318, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 1.1377245508982035e-05, | |
| "loss": 1.9389, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 1.0379241516966067e-05, | |
| "loss": 1.9214, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 9.3812375249501e-06, | |
| "loss": 1.9144, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 8.383233532934131e-06, | |
| "loss": 1.9404, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 7.385229540918164e-06, | |
| "loss": 1.9189, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 6.3872255489021955e-06, | |
| "loss": 1.9155, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 5.3892215568862275e-06, | |
| "loss": 1.874, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 4.39121756487026e-06, | |
| "loss": 1.8951, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 3.3932135728542917e-06, | |
| "loss": 1.8694, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "eval_bleu1": 30.5261, | |
| "eval_bleu2": 17.6264, | |
| "eval_bleu3": 10.3974, | |
| "eval_bleu4": 5.4348, | |
| "eval_gen_len": 53.47, | |
| "eval_loss": 2.684814453125, | |
| "eval_rdass": 0.6248000264167786, | |
| "eval_rouge1": 35.7722, | |
| "eval_rouge2": 12.5127, | |
| "eval_rougeL": 23.3002, | |
| "eval_runtime": 17.6216, | |
| "eval_samples_per_second": 5.675, | |
| "eval_steps_per_second": 0.057, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 2.3952095808383233e-06, | |
| "loss": 1.8708, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 1.3972055888223554e-06, | |
| "loss": 1.8628, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 3.992015968063872e-07, | |
| "loss": 1.8613, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 3340, | |
| "total_flos": 5.5473275160576e+16, | |
| "train_loss": 2.181800748630912, | |
| "train_runtime": 3900.3613, | |
| "train_samples_per_second": 54.733, | |
| "train_steps_per_second": 0.856 | |
| } | |
| ], | |
| "max_steps": 3340, | |
| "num_train_epochs": 5, | |
| "total_flos": 5.5473275160576e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |