Invalid JSON:
Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_global_step": 7000, | |
| "best_metric": 0.4164, | |
| "best_model_checkpoint": "models/facebook/bart-large-cnn_30_5e-05/checkpoint-7000", | |
| "epoch": 28.225806451612904, | |
| "eval_steps": 500, | |
| "global_step": 7000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 2.0161290322580645, | |
| "grad_norm": 2.271665573120117, | |
| "learning_rate": 4.911573288058857e-05, | |
| "loss": 0.6372, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0161290322580645, | |
| "eval_gen_len": 86.6301, | |
| "eval_loss": 0.6856603622436523, | |
| "eval_rouge1": 0.3918, | |
| "eval_rouge2": 0.3151, | |
| "eval_rougeL": 0.3653, | |
| "eval_rougeLsum": 0.3829, | |
| "eval_runtime": 470.8787, | |
| "eval_samples_per_second": 2.4, | |
| "eval_steps_per_second": 0.076, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.032258064516129, | |
| "grad_norm": 1.920154333114624, | |
| "learning_rate": 4.557866440294284e-05, | |
| "loss": 0.3093, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.032258064516129, | |
| "eval_gen_len": 86.4681, | |
| "eval_loss": 0.8830690383911133, | |
| "eval_rouge1": 0.4027, | |
| "eval_rouge2": 0.328, | |
| "eval_rougeL": 0.3763, | |
| "eval_rougeLsum": 0.3948, | |
| "eval_runtime": 463.6435, | |
| "eval_samples_per_second": 2.437, | |
| "eval_steps_per_second": 0.078, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.048387096774194, | |
| "grad_norm": 2.545297622680664, | |
| "learning_rate": 4.204867006225241e-05, | |
| "loss": 0.1443, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 6.048387096774194, | |
| "eval_gen_len": 85.0513, | |
| "eval_loss": 1.0781320333480835, | |
| "eval_rouge1": 0.4129, | |
| "eval_rouge2": 0.3395, | |
| "eval_rougeL": 0.3857, | |
| "eval_rougeLsum": 0.4057, | |
| "eval_runtime": 467.3383, | |
| "eval_samples_per_second": 2.418, | |
| "eval_steps_per_second": 0.077, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 8.064516129032258, | |
| "grad_norm": 1.484631061553955, | |
| "learning_rate": 3.8511601584606685e-05, | |
| "loss": 0.076, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 8.064516129032258, | |
| "eval_gen_len": 77.6938, | |
| "eval_loss": 1.1164271831512451, | |
| "eval_rouge1": 0.4342, | |
| "eval_rouge2": 0.3594, | |
| "eval_rougeL": 0.4087, | |
| "eval_rougeLsum": 0.4259, | |
| "eval_runtime": 433.3086, | |
| "eval_samples_per_second": 2.608, | |
| "eval_steps_per_second": 0.083, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 10.080645161290322, | |
| "grad_norm": 0.9293203949928284, | |
| "learning_rate": 3.4974533106960955e-05, | |
| "loss": 0.046, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 10.080645161290322, | |
| "eval_gen_len": 79.4717, | |
| "eval_loss": 1.4387677907943726, | |
| "eval_rouge1": 0.4304, | |
| "eval_rouge2": 0.3557, | |
| "eval_rougeL": 0.406, | |
| "eval_rougeLsum": 0.423, | |
| "eval_runtime": 431.9944, | |
| "eval_samples_per_second": 2.616, | |
| "eval_steps_per_second": 0.083, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 12.096774193548388, | |
| "grad_norm": 1.3663330078125, | |
| "learning_rate": 3.143746462931523e-05, | |
| "loss": 0.0322, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 12.096774193548388, | |
| "eval_gen_len": 78.7823, | |
| "eval_loss": 1.2941193580627441, | |
| "eval_rouge1": 0.4297, | |
| "eval_rouge2": 0.3536, | |
| "eval_rougeL": 0.4034, | |
| "eval_rougeLsum": 0.4216, | |
| "eval_runtime": 429.3366, | |
| "eval_samples_per_second": 2.632, | |
| "eval_steps_per_second": 0.084, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 14.112903225806452, | |
| "grad_norm": 3.2132232189178467, | |
| "learning_rate": 2.79003961516695e-05, | |
| "loss": 0.0231, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 14.112903225806452, | |
| "eval_gen_len": 80.5938, | |
| "eval_loss": 1.4391534328460693, | |
| "eval_rouge1": 0.4162, | |
| "eval_rouge2": 0.342, | |
| "eval_rougeL": 0.3912, | |
| "eval_rougeLsum": 0.4083, | |
| "eval_runtime": 429.1979, | |
| "eval_samples_per_second": 2.633, | |
| "eval_steps_per_second": 0.084, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 16.129032258064516, | |
| "grad_norm": 0.9062560796737671, | |
| "learning_rate": 2.436332767402377e-05, | |
| "loss": 0.0173, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 16.129032258064516, | |
| "eval_gen_len": 81.5655, | |
| "eval_loss": 1.5033551454544067, | |
| "eval_rouge1": 0.4273, | |
| "eval_rouge2": 0.3537, | |
| "eval_rougeL": 0.4024, | |
| "eval_rougeLsum": 0.4199, | |
| "eval_runtime": 427.5276, | |
| "eval_samples_per_second": 2.643, | |
| "eval_steps_per_second": 0.084, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 18.14516129032258, | |
| "grad_norm": Infinity, | |
| "learning_rate": 2.082625919637804e-05, | |
| "loss": 0.0126, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 18.14516129032258, | |
| "eval_gen_len": 81.192, | |
| "eval_loss": 1.4981940984725952, | |
| "eval_rouge1": 0.4252, | |
| "eval_rouge2": 0.3528, | |
| "eval_rougeL": 0.4013, | |
| "eval_rougeLsum": 0.4175, | |
| "eval_runtime": 437.7384, | |
| "eval_samples_per_second": 2.581, | |
| "eval_steps_per_second": 0.082, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 20.161290322580644, | |
| "grad_norm": 0.3979549705982208, | |
| "learning_rate": 1.729626485568761e-05, | |
| "loss": 0.009, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 20.161290322580644, | |
| "eval_gen_len": 81.9372, | |
| "eval_loss": 1.5800154209136963, | |
| "eval_rouge1": 0.4257, | |
| "eval_rouge2": 0.3534, | |
| "eval_rougeL": 0.401, | |
| "eval_rougeLsum": 0.418, | |
| "eval_runtime": 434.4426, | |
| "eval_samples_per_second": 2.601, | |
| "eval_steps_per_second": 0.083, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 22.177419354838708, | |
| "grad_norm": 1.6955211162567139, | |
| "learning_rate": 1.375919637804188e-05, | |
| "loss": 0.0063, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 22.177419354838708, | |
| "eval_gen_len": 79.0938, | |
| "eval_loss": 1.576202154159546, | |
| "eval_rouge1": 0.4323, | |
| "eval_rouge2": 0.3582, | |
| "eval_rougeL": 0.4074, | |
| "eval_rougeLsum": 0.4245, | |
| "eval_runtime": 428.2115, | |
| "eval_samples_per_second": 2.639, | |
| "eval_steps_per_second": 0.084, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 24.193548387096776, | |
| "grad_norm": 0.2606065273284912, | |
| "learning_rate": 1.0222127900396152e-05, | |
| "loss": 0.0046, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 24.193548387096776, | |
| "eval_gen_len": 80.6867, | |
| "eval_loss": 1.5904020071029663, | |
| "eval_rouge1": 0.4385, | |
| "eval_rouge2": 0.3675, | |
| "eval_rougeL": 0.4145, | |
| "eval_rougeLsum": 0.4313, | |
| "eval_runtime": 427.4723, | |
| "eval_samples_per_second": 2.643, | |
| "eval_steps_per_second": 0.084, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 26.20967741935484, | |
| "grad_norm": 0.12859472632408142, | |
| "learning_rate": 6.685059422750426e-06, | |
| "loss": 0.0033, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 26.20967741935484, | |
| "eval_gen_len": 79.9637, | |
| "eval_loss": 1.5917092561721802, | |
| "eval_rouge1": 0.4393, | |
| "eval_rouge2": 0.3686, | |
| "eval_rougeL": 0.415, | |
| "eval_rougeLsum": 0.4312, | |
| "eval_runtime": 433.2177, | |
| "eval_samples_per_second": 2.608, | |
| "eval_steps_per_second": 0.083, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 28.225806451612904, | |
| "grad_norm": 0.3688865005970001, | |
| "learning_rate": 3.1479909451046973e-06, | |
| "loss": 0.002, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 28.225806451612904, | |
| "eval_gen_len": 80.7027, | |
| "eval_loss": 1.6072598695755005, | |
| "eval_rouge1": 0.4404, | |
| "eval_rouge2": 0.3687, | |
| "eval_rougeL": 0.4164, | |
| "eval_rougeLsum": 0.4327, | |
| "eval_runtime": 441.5366, | |
| "eval_samples_per_second": 2.559, | |
| "eval_steps_per_second": 0.082, | |
| "step": 7000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 7440, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.704962073115853e+17, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |