AroundA / trainer_state.json
leexiaohua's picture
Upload folder using huggingface_hub
a2230bf verified
{
"best_metric": 0.7644568347735216,
"best_model_checkpoint": "nucleotide-transformer-finetuned/checkpoint-10500",
"epoch": 2.7230290456431536,
"eval_steps": 500,
"global_step": 10500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12966804979253113,
"grad_norm": 2.4697630405426025,
"learning_rate": 2.6666666666666667e-05,
"loss": 0.9082,
"step": 500
},
{
"epoch": 0.12966804979253113,
"eval_accuracy": 0.8507642200272479,
"eval_f1_score": 0.6717309860785999,
"eval_loss": 0.44925403594970703,
"eval_runtime": 27.2642,
"eval_samples_per_second": 3450.677,
"eval_steps_per_second": 13.498,
"step": 500
},
{
"epoch": 0.25933609958506226,
"grad_norm": 1.604984164237976,
"learning_rate": 5.333333333333333e-05,
"loss": 0.41,
"step": 1000
},
{
"epoch": 0.25933609958506226,
"eval_accuracy": 0.8567779291553134,
"eval_f1_score": 0.6951178092124136,
"eval_loss": 0.42545419931411743,
"eval_runtime": 27.28,
"eval_samples_per_second": 3448.682,
"eval_steps_per_second": 13.49,
"step": 1000
},
{
"epoch": 0.38900414937759337,
"grad_norm": 1.0481213331222534,
"learning_rate": 8e-05,
"loss": 0.354,
"step": 1500
},
{
"epoch": 0.38900414937759337,
"eval_accuracy": 0.8549897820163488,
"eval_f1_score": 0.7070964269415976,
"eval_loss": 0.42259615659713745,
"eval_runtime": 27.2867,
"eval_samples_per_second": 3447.839,
"eval_steps_per_second": 13.486,
"step": 1500
},
{
"epoch": 0.5186721991701245,
"grad_norm": 1.2174954414367676,
"learning_rate": 7.993926065733265e-05,
"loss": 0.3341,
"step": 2000
},
{
"epoch": 0.5186721991701245,
"eval_accuracy": 0.8659954019073569,
"eval_f1_score": 0.7420034809783201,
"eval_loss": 0.39591285586357117,
"eval_runtime": 27.2541,
"eval_samples_per_second": 3451.961,
"eval_steps_per_second": 13.503,
"step": 2000
},
{
"epoch": 0.6483402489626556,
"grad_norm": 0.8332775831222534,
"learning_rate": 7.975722709271799e-05,
"loss": 0.3016,
"step": 2500
},
{
"epoch": 0.6483402489626556,
"eval_accuracy": 0.8649097411444142,
"eval_f1_score": 0.7200074103536797,
"eval_loss": 0.39967137575149536,
"eval_runtime": 27.2593,
"eval_samples_per_second": 3451.294,
"eval_steps_per_second": 13.5,
"step": 2500
},
{
"epoch": 0.7780082987551867,
"grad_norm": 1.4946839809417725,
"learning_rate": 7.94544521361089e-05,
"loss": 0.2963,
"step": 3000
},
{
"epoch": 0.7780082987551867,
"eval_accuracy": 0.8722113419618529,
"eval_f1_score": 0.762706973392787,
"eval_loss": 0.3748593032360077,
"eval_runtime": 27.2556,
"eval_samples_per_second": 3451.767,
"eval_steps_per_second": 13.502,
"step": 3000
},
{
"epoch": 0.9076763485477178,
"grad_norm": 0.8928861618041992,
"learning_rate": 7.903185530509743e-05,
"loss": 0.2824,
"step": 3500
},
{
"epoch": 0.9076763485477178,
"eval_accuracy": 0.8655164339237057,
"eval_f1_score": 0.7593168456181596,
"eval_loss": 0.3944181501865387,
"eval_runtime": 27.2652,
"eval_samples_per_second": 3450.546,
"eval_steps_per_second": 13.497,
"step": 3500
},
{
"epoch": 1.037344398340249,
"grad_norm": 0.7256332635879517,
"learning_rate": 7.849072001237001e-05,
"loss": 0.2672,
"step": 4000
},
{
"epoch": 1.037344398340249,
"eval_accuracy": 0.8731692779291553,
"eval_f1_score": 0.760049724716224,
"eval_loss": 0.3828999400138855,
"eval_runtime": 27.2571,
"eval_samples_per_second": 3451.579,
"eval_steps_per_second": 13.501,
"step": 4000
},
{
"epoch": 1.1670124481327802,
"grad_norm": 0.8104374408721924,
"learning_rate": 7.783268966802539e-05,
"loss": 0.2383,
"step": 4500
},
{
"epoch": 1.1670124481327802,
"eval_accuracy": 0.8741165701634878,
"eval_f1_score": 0.750188219325254,
"eval_loss": 0.3871263861656189,
"eval_runtime": 27.2708,
"eval_samples_per_second": 3449.845,
"eval_steps_per_second": 13.494,
"step": 4500
},
{
"epoch": 1.2966804979253113,
"grad_norm": 1.0798064470291138,
"learning_rate": 7.705976268859207e-05,
"loss": 0.2371,
"step": 5000
},
{
"epoch": 1.2966804979253113,
"eval_accuracy": 0.872264560626703,
"eval_f1_score": 0.7557877951536082,
"eval_loss": 0.38182297348976135,
"eval_runtime": 27.2826,
"eval_samples_per_second": 3448.353,
"eval_steps_per_second": 13.488,
"step": 5000
},
{
"epoch": 1.4263485477178424,
"grad_norm": 0.905769407749176,
"learning_rate": 7.61742864279031e-05,
"loss": 0.2362,
"step": 5500
},
{
"epoch": 1.4263485477178424,
"eval_accuracy": 0.8758514986376021,
"eval_f1_score": 0.7580900780338423,
"eval_loss": 0.38698655366897583,
"eval_runtime": 27.2696,
"eval_samples_per_second": 3449.99,
"eval_steps_per_second": 13.495,
"step": 5500
},
{
"epoch": 1.5560165975103735,
"grad_norm": 1.2751230001449585,
"learning_rate": 7.517895004825956e-05,
"loss": 0.2368,
"step": 6000
},
{
"epoch": 1.5560165975103735,
"eval_accuracy": 0.8758834298365122,
"eval_f1_score": 0.7618952550392576,
"eval_loss": 0.38447433710098267,
"eval_runtime": 27.2481,
"eval_samples_per_second": 3452.724,
"eval_steps_per_second": 13.506,
"step": 6000
},
{
"epoch": 1.6856846473029046,
"grad_norm": 0.914685070514679,
"learning_rate": 7.407677635353308e-05,
"loss": 0.2363,
"step": 6500
},
{
"epoch": 1.6856846473029046,
"eval_accuracy": 0.8790126873297003,
"eval_f1_score": 0.7640527203630835,
"eval_loss": 0.373322993516922,
"eval_runtime": 27.2729,
"eval_samples_per_second": 3449.582,
"eval_steps_per_second": 13.493,
"step": 6500
},
{
"epoch": 1.8153526970954357,
"grad_norm": 1.1023831367492676,
"learning_rate": 7.28711126090098e-05,
"loss": 0.2315,
"step": 7000
},
{
"epoch": 1.8153526970954357,
"eval_accuracy": 0.8774054836512262,
"eval_f1_score": 0.7444152176417628,
"eval_loss": 0.38009563088417053,
"eval_runtime": 27.2782,
"eval_samples_per_second": 3448.913,
"eval_steps_per_second": 13.491,
"step": 7000
},
{
"epoch": 1.9450207468879668,
"grad_norm": 0.9494823813438416,
"learning_rate": 7.156562037585576e-05,
"loss": 0.2313,
"step": 7500
},
{
"epoch": 1.9450207468879668,
"eval_accuracy": 0.8732544277929155,
"eval_f1_score": 0.7243437098127102,
"eval_loss": 0.39090830087661743,
"eval_runtime": 27.2888,
"eval_samples_per_second": 3447.563,
"eval_steps_per_second": 13.485,
"step": 7500
},
{
"epoch": 2.074688796680498,
"grad_norm": 0.9810757637023926,
"learning_rate": 7.016426439107586e-05,
"loss": 0.1833,
"step": 8000
},
{
"epoch": 2.074688796680498,
"eval_accuracy": 0.8750957935967303,
"eval_f1_score": 0.7539908472094665,
"eval_loss": 0.4634763300418854,
"eval_runtime": 27.2517,
"eval_samples_per_second": 3452.266,
"eval_steps_per_second": 13.504,
"step": 8000
},
{
"epoch": 2.204356846473029,
"grad_norm": 2.2647929191589355,
"learning_rate": 6.867130052673806e-05,
"loss": 0.1435,
"step": 8500
},
{
"epoch": 2.204356846473029,
"eval_accuracy": 0.8713066246594006,
"eval_f1_score": 0.75353223953074,
"eval_loss": 0.4573169946670532,
"eval_runtime": 27.2786,
"eval_samples_per_second": 3448.858,
"eval_steps_per_second": 13.49,
"step": 8500
},
{
"epoch": 2.3340248962655603,
"grad_norm": 1.596372365951538,
"learning_rate": 6.709126286502965e-05,
"loss": 0.1444,
"step": 9000
},
{
"epoch": 2.3340248962655603,
"eval_accuracy": 0.876926515667575,
"eval_f1_score": 0.7590948191097212,
"eval_loss": 0.4492976665496826,
"eval_runtime": 27.2687,
"eval_samples_per_second": 3450.111,
"eval_steps_per_second": 13.495,
"step": 9000
},
{
"epoch": 2.4636929460580914,
"grad_norm": 1.2077136039733887,
"learning_rate": 6.542894992839873e-05,
"loss": 0.1432,
"step": 9500
},
{
"epoch": 2.4636929460580914,
"eval_accuracy": 0.8714556369209809,
"eval_f1_score": 0.7461849778780044,
"eval_loss": 0.4478127658367157,
"eval_runtime": 27.271,
"eval_samples_per_second": 3449.821,
"eval_steps_per_second": 13.494,
"step": 9500
},
{
"epoch": 2.5933609958506225,
"grad_norm": 0.8201944828033447,
"learning_rate": 6.368941010659921e-05,
"loss": 0.1475,
"step": 10000
},
{
"epoch": 2.5933609958506225,
"eval_accuracy": 0.8705083446866485,
"eval_f1_score": 0.7433809202400957,
"eval_loss": 0.44828951358795166,
"eval_runtime": 27.2671,
"eval_samples_per_second": 3450.317,
"eval_steps_per_second": 13.496,
"step": 10000
},
{
"epoch": 2.7230290456431536,
"grad_norm": 0.8232116103172302,
"learning_rate": 6.18779263248971e-05,
"loss": 0.1432,
"step": 10500
},
{
"epoch": 2.7230290456431536,
"eval_accuracy": 0.8751809434604905,
"eval_f1_score": 0.7644568347735216,
"eval_loss": 0.4298805892467499,
"eval_runtime": 27.2878,
"eval_samples_per_second": 3447.699,
"eval_steps_per_second": 13.486,
"step": 10500
}
],
"logging_steps": 500,
"max_steps": 30000,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.654584853376e+16,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}