LED_Layman / trainer_state.json
harsh580g's picture
Upload folder using huggingface_hub
8b9fd30 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"eval_steps": 500,
"global_step": 2640,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_gen_len": 130.01,
"eval_loss": 1.9146157503128052,
"eval_rouge1": 0.4589875139531387,
"eval_rouge2": 0.21878510498585943,
"eval_rougeL": 0.3016127842816204,
"eval_rougeLsum": 0.4009407199247955,
"eval_runtime": 66.3133,
"eval_samples_per_second": 1.508,
"eval_steps_per_second": 0.106,
"step": 440
},
{
"epoch": 1.1363636363636362,
"grad_norm": 2.9900436401367188,
"learning_rate": 4.844240313802481e-06,
"loss": 1.826,
"step": 500
},
{
"epoch": 2.0,
"eval_gen_len": 121.92,
"eval_loss": 1.8549950122833252,
"eval_rouge1": 0.4619905413631098,
"eval_rouge2": 0.2162940062464438,
"eval_rougeL": 0.30137015640361065,
"eval_rougeLsum": 0.4029678217620495,
"eval_runtime": 313.1936,
"eval_samples_per_second": 0.319,
"eval_steps_per_second": 0.022,
"step": 880
},
{
"epoch": 2.2727272727272725,
"grad_norm": 5.034411907196045,
"learning_rate": 4.394041918714914e-06,
"loss": 1.6317,
"step": 1000
},
{
"epoch": 3.0,
"eval_gen_len": 120.8,
"eval_loss": 1.8090400695800781,
"eval_rouge1": 0.4670357799617887,
"eval_rouge2": 0.22161111145750578,
"eval_rougeL": 0.3101602334681084,
"eval_rougeLsum": 0.40916586051494386,
"eval_runtime": 272.6557,
"eval_samples_per_second": 0.367,
"eval_steps_per_second": 0.026,
"step": 1320
},
{
"epoch": 3.409090909090909,
"grad_norm": 2.994469404220581,
"learning_rate": 3.7059480818938033e-06,
"loss": 1.541,
"step": 1500
},
{
"epoch": 4.0,
"eval_gen_len": 122.31,
"eval_loss": 1.7940858602523804,
"eval_rouge1": 0.47786994937778915,
"eval_rouge2": 0.23261031528055848,
"eval_rougeL": 0.3195968141082743,
"eval_rougeLsum": 0.4178774735087476,
"eval_runtime": 267.101,
"eval_samples_per_second": 0.374,
"eval_steps_per_second": 0.026,
"step": 1760
},
{
"epoch": 4.545454545454545,
"grad_norm": 3.3205864429473877,
"learning_rate": 2.86638476131148e-06,
"loss": 1.4761,
"step": 2000
},
{
"epoch": 5.0,
"eval_gen_len": 120.21,
"eval_loss": 1.7847797870635986,
"eval_rouge1": 0.46872849142787104,
"eval_rouge2": 0.22607984634691033,
"eval_rougeL": 0.31102687015492186,
"eval_rougeLsum": 0.4124971038488223,
"eval_runtime": 255.0208,
"eval_samples_per_second": 0.392,
"eval_steps_per_second": 0.027,
"step": 2200
},
{
"epoch": 5.681818181818182,
"grad_norm": 3.04300594329834,
"learning_rate": 1.980802784132701e-06,
"loss": 1.426,
"step": 2500
},
{
"epoch": 6.0,
"eval_gen_len": 116.53,
"eval_loss": 1.7648776769638062,
"eval_rouge1": 0.4707629824198739,
"eval_rouge2": 0.2250141092622865,
"eval_rougeL": 0.3104222446400876,
"eval_rougeLsum": 0.4100478178262512,
"eval_runtime": 240.46,
"eval_samples_per_second": 0.416,
"eval_steps_per_second": 0.029,
"step": 2640
}
],
"logging_steps": 500,
"max_steps": 4400,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.0120075847663616e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}