galkhesnet / trainer_state.json
shoowadoo's picture
Upload folder using huggingface_hub
4e0e469 verified
{
"best_global_step": 18496,
"best_metric": 0.914106447425357,
"best_model_checkpoint": "./results/checkpoint-18496",
"epoch": 16.0,
"eval_steps": 500,
"global_step": 18496,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 35730.43359375,
"learning_rate": 9.666955017301039e-05,
"loss": 0.0899,
"step": 1156
},
{
"epoch": 1.0,
"eval_exact_match": 0.8427087840761575,
"eval_loss": 0.05666187405586243,
"eval_runtime": 178.4282,
"eval_samples_per_second": 25.904,
"eval_steps_per_second": 0.813,
"step": 1156
},
{
"epoch": 2.0,
"grad_norm": 61059.6328125,
"learning_rate": 9.333621683967705e-05,
"loss": 0.0609,
"step": 2312
},
{
"epoch": 2.0,
"eval_exact_match": 0.8658589355257464,
"eval_loss": 0.0455557182431221,
"eval_runtime": 178.7826,
"eval_samples_per_second": 25.853,
"eval_steps_per_second": 0.811,
"step": 2312
},
{
"epoch": 3.0,
"grad_norm": 17563.07421875,
"learning_rate": 9.000288350634371e-05,
"loss": 0.0469,
"step": 3468
},
{
"epoch": 3.0,
"eval_exact_match": 0.8764604067503245,
"eval_loss": 0.042877476662397385,
"eval_runtime": 181.9993,
"eval_samples_per_second": 25.396,
"eval_steps_per_second": 0.797,
"step": 3468
},
{
"epoch": 4.0,
"grad_norm": 87660.9375,
"learning_rate": 8.666955017301039e-05,
"loss": 0.0385,
"step": 4624
},
{
"epoch": 4.0,
"eval_exact_match": 0.8855473820856772,
"eval_loss": 0.04085667058825493,
"eval_runtime": 178.1605,
"eval_samples_per_second": 25.943,
"eval_steps_per_second": 0.814,
"step": 4624
},
{
"epoch": 5.0,
"grad_norm": 39212.90625,
"learning_rate": 8.333621683967705e-05,
"loss": 0.0323,
"step": 5780
},
{
"epoch": 5.0,
"eval_exact_match": 0.8942016443098226,
"eval_loss": 0.037444353103637695,
"eval_runtime": 179.3565,
"eval_samples_per_second": 25.77,
"eval_steps_per_second": 0.808,
"step": 5780
},
{
"epoch": 6.0,
"grad_norm": 11288.9541015625,
"learning_rate": 8.000288350634371e-05,
"loss": 0.0273,
"step": 6936
},
{
"epoch": 6.0,
"eval_exact_match": 0.8976633491994808,
"eval_loss": 0.0365648977458477,
"eval_runtime": 179.1368,
"eval_samples_per_second": 25.802,
"eval_steps_per_second": 0.809,
"step": 6936
},
{
"epoch": 7.0,
"grad_norm": 190008.28125,
"learning_rate": 7.666955017301039e-05,
"loss": 0.0243,
"step": 8092
},
{
"epoch": 7.0,
"eval_exact_match": 0.8993942016443098,
"eval_loss": 0.03789495304226875,
"eval_runtime": 180.1506,
"eval_samples_per_second": 25.656,
"eval_steps_per_second": 0.805,
"step": 8092
},
{
"epoch": 8.0,
"grad_norm": 55407.90234375,
"learning_rate": 7.333621683967705e-05,
"loss": 0.021,
"step": 9248
},
{
"epoch": 8.0,
"eval_exact_match": 0.9056685417568152,
"eval_loss": 0.03875722736120224,
"eval_runtime": 178.8401,
"eval_samples_per_second": 25.844,
"eval_steps_per_second": 0.811,
"step": 9248
},
{
"epoch": 9.0,
"grad_norm": 20677.572265625,
"learning_rate": 7.000288350634371e-05,
"loss": 0.0189,
"step": 10404
},
{
"epoch": 9.0,
"eval_exact_match": 0.9078321073128516,
"eval_loss": 0.03903103992342949,
"eval_runtime": 181.7261,
"eval_samples_per_second": 25.434,
"eval_steps_per_second": 0.798,
"step": 10404
},
{
"epoch": 10.0,
"grad_norm": 33437.46484375,
"learning_rate": 6.666955017301039e-05,
"loss": 0.0166,
"step": 11560
},
{
"epoch": 10.0,
"eval_exact_match": 0.9071830376460407,
"eval_loss": 0.04076966270804405,
"eval_runtime": 177.5683,
"eval_samples_per_second": 26.029,
"eval_steps_per_second": 0.817,
"step": 11560
},
{
"epoch": 11.0,
"grad_norm": 23686.2890625,
"learning_rate": 6.333621683967705e-05,
"loss": 0.0151,
"step": 12716
},
{
"epoch": 11.0,
"eval_exact_match": 0.9102120294244915,
"eval_loss": 0.04429745301604271,
"eval_runtime": 174.7412,
"eval_samples_per_second": 26.451,
"eval_steps_per_second": 0.83,
"step": 12716
},
{
"epoch": 12.0,
"grad_norm": 8527.7021484375,
"learning_rate": 6.000288350634372e-05,
"loss": 0.0133,
"step": 13872
},
{
"epoch": 12.0,
"eval_exact_match": 0.9110774556469061,
"eval_loss": 0.04824285954236984,
"eval_runtime": 184.9616,
"eval_samples_per_second": 24.989,
"eval_steps_per_second": 0.784,
"step": 13872
},
{
"epoch": 13.0,
"grad_norm": 5270.71826171875,
"learning_rate": 5.666955017301039e-05,
"loss": 0.0123,
"step": 15028
},
{
"epoch": 13.0,
"eval_exact_match": 0.9073993942016443,
"eval_loss": 0.04638659209012985,
"eval_runtime": 176.014,
"eval_samples_per_second": 26.259,
"eval_steps_per_second": 0.824,
"step": 15028
},
{
"epoch": 14.0,
"grad_norm": 15770.6142578125,
"learning_rate": 5.3336216839677045e-05,
"loss": 0.0112,
"step": 16184
},
{
"epoch": 14.0,
"eval_exact_match": 0.9067503245348334,
"eval_loss": 0.05095385015010834,
"eval_runtime": 176.9305,
"eval_samples_per_second": 26.123,
"eval_steps_per_second": 0.82,
"step": 16184
},
{
"epoch": 15.0,
"grad_norm": 2677.674560546875,
"learning_rate": 5.0002883506343714e-05,
"loss": 0.0106,
"step": 17340
},
{
"epoch": 15.0,
"eval_exact_match": 0.9119428818693206,
"eval_loss": 0.050192590802907944,
"eval_runtime": 175.5562,
"eval_samples_per_second": 26.328,
"eval_steps_per_second": 0.826,
"step": 17340
},
{
"epoch": 16.0,
"grad_norm": 109526.6953125,
"learning_rate": 4.6669550173010384e-05,
"loss": 0.0097,
"step": 18496
},
{
"epoch": 16.0,
"eval_exact_match": 0.914106447425357,
"eval_loss": 0.05421430617570877,
"eval_runtime": 175.3249,
"eval_samples_per_second": 26.362,
"eval_steps_per_second": 0.827,
"step": 18496
}
],
"logging_steps": 500,
"max_steps": 34680,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.634418345880166e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}