| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 2639, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_gen_len": 117.37, | |
| "eval_loss": 1.6928337812423706, | |
| "eval_rouge1": 0.42757910674263067, | |
| "eval_rouge2": 0.15720196581442164, | |
| "eval_rougeL": 0.256418241790792, | |
| "eval_rougeLsum": 0.35382355041095964, | |
| "eval_runtime": 1351.6766, | |
| "eval_samples_per_second": 0.74, | |
| "eval_steps_per_second": 0.047, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.3262599469496021, | |
| "grad_norm": 3.7333056926727295, | |
| "learning_rate": 2.8741879956059633e-06, | |
| "loss": 1.7717, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_gen_len": 116.608, | |
| "eval_loss": 1.6410094499588013, | |
| "eval_rouge1": 0.4286897823447751, | |
| "eval_rouge2": 0.15765392626100322, | |
| "eval_rougeL": 0.25895063496135456, | |
| "eval_rougeLsum": 0.3538857213392972, | |
| "eval_runtime": 1170.2376, | |
| "eval_samples_per_second": 0.855, | |
| "eval_steps_per_second": 0.054, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 2.6525198938992043, | |
| "grad_norm": 4.453255653381348, | |
| "learning_rate": 2.514178628889199e-06, | |
| "loss": 1.5895, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_gen_len": 117.178, | |
| "eval_loss": 1.6089894771575928, | |
| "eval_rouge1": 0.42709156909402823, | |
| "eval_rouge2": 0.1553778143480612, | |
| "eval_rougeL": 0.25488136514137727, | |
| "eval_rougeLsum": 0.3525718986787997, | |
| "eval_runtime": 1384.4242, | |
| "eval_samples_per_second": 0.722, | |
| "eval_steps_per_second": 0.046, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 3.9787798408488064, | |
| "grad_norm": 3.2527501583099365, | |
| "learning_rate": 1.981320706041849e-06, | |
| "loss": 1.5182, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_gen_len": 118.94, | |
| "eval_loss": 1.597896933555603, | |
| "eval_rouge1": 0.4321139550542189, | |
| "eval_rouge2": 0.16019449900934246, | |
| "eval_rougeL": 0.2577516929717559, | |
| "eval_rougeLsum": 0.3566245729299692, | |
| "eval_runtime": 1616.3945, | |
| "eval_samples_per_second": 0.619, | |
| "eval_steps_per_second": 0.039, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_gen_len": 116.603, | |
| "eval_loss": 1.583080530166626, | |
| "eval_rouge1": 0.43043600066735005, | |
| "eval_rouge2": 0.16036196231543565, | |
| "eval_rougeL": 0.2578722844279166, | |
| "eval_rougeLsum": 0.35735455171087505, | |
| "eval_runtime": 1754.8754, | |
| "eval_samples_per_second": 0.57, | |
| "eval_steps_per_second": 0.036, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 5.305039787798409, | |
| "grad_norm": 3.344116687774658, | |
| "learning_rate": 1.3651853437341204e-06, | |
| "loss": 1.4547, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_gen_len": 116.67, | |
| "eval_loss": 1.5650923252105713, | |
| "eval_rouge1": 0.4348875537307479, | |
| "eval_rouge2": 0.16312989568941516, | |
| "eval_rougeL": 0.26082852541259915, | |
| "eval_rougeLsum": 0.358369227586099, | |
| "eval_runtime": 1733.5816, | |
| "eval_samples_per_second": 0.577, | |
| "eval_steps_per_second": 0.036, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 6.63129973474801, | |
| "grad_norm": 3.3940606117248535, | |
| "learning_rate": 7.721175674180988e-07, | |
| "loss": 1.4188, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_gen_len": 117.259, | |
| "eval_loss": 1.5626862049102783, | |
| "eval_rouge1": 0.43666460369707594, | |
| "eval_rouge2": 0.16469845252758708, | |
| "eval_rougeL": 0.26181956536662193, | |
| "eval_rougeLsum": 0.3620078400661802, | |
| "eval_runtime": 1745.3989, | |
| "eval_samples_per_second": 0.573, | |
| "eval_steps_per_second": 0.036, | |
| "step": 2639 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 3770, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0123914662589235e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |