xf / trainer_state.json
liuyx0903's picture
Upload folder using huggingface_hub
74b9c79 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 594,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.050505050505050504,
"grad_norm": 15.79202210209821,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.8701,
"step": 10
},
{
"epoch": 0.10101010101010101,
"grad_norm": 3.0942332223409834,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.5733,
"step": 20
},
{
"epoch": 0.15151515151515152,
"grad_norm": 4.298646784667104,
"learning_rate": 5e-06,
"loss": 0.6136,
"step": 30
},
{
"epoch": 0.20202020202020202,
"grad_norm": 2.703078685427566,
"learning_rate": 6.666666666666667e-06,
"loss": 0.5472,
"step": 40
},
{
"epoch": 0.25252525252525254,
"grad_norm": 29.98527759901386,
"learning_rate": 8.333333333333334e-06,
"loss": 0.5259,
"step": 50
},
{
"epoch": 0.30303030303030304,
"grad_norm": 8.787895364111028,
"learning_rate": 1e-05,
"loss": 0.5159,
"step": 60
},
{
"epoch": 0.35353535353535354,
"grad_norm": 2.714725648499293,
"learning_rate": 9.991349683972435e-06,
"loss": 0.5982,
"step": 70
},
{
"epoch": 0.40404040404040403,
"grad_norm": 2.393261010163645,
"learning_rate": 9.965428667076687e-06,
"loss": 0.6628,
"step": 80
},
{
"epoch": 0.45454545454545453,
"grad_norm": 15.838479639161053,
"learning_rate": 9.922326639307918e-06,
"loss": 0.5424,
"step": 90
},
{
"epoch": 0.5050505050505051,
"grad_norm": 2.726593770710591,
"learning_rate": 9.86219273913078e-06,
"loss": 0.6873,
"step": 100
},
{
"epoch": 0.5555555555555556,
"grad_norm": 3.6530539945981584,
"learning_rate": 9.785235037441473e-06,
"loss": 0.6324,
"step": 110
},
{
"epoch": 0.6060606060606061,
"grad_norm": 2.2910687177923896,
"learning_rate": 9.691719817616148e-06,
"loss": 0.4908,
"step": 120
},
{
"epoch": 0.6565656565656566,
"grad_norm": 2.749982467389248,
"learning_rate": 9.581970654136752e-06,
"loss": 0.577,
"step": 130
},
{
"epoch": 0.7070707070707071,
"grad_norm": 2.621644636283292,
"learning_rate": 9.45636729298243e-06,
"loss": 0.5327,
"step": 140
},
{
"epoch": 0.7575757575757576,
"grad_norm": 2.1198799176542376,
"learning_rate": 9.315344337660422e-06,
"loss": 0.6347,
"step": 150
},
{
"epoch": 0.8080808080808081,
"grad_norm": 1.7972487002464925,
"learning_rate": 9.159389745423003e-06,
"loss": 0.6343,
"step": 160
},
{
"epoch": 0.8585858585858586,
"grad_norm": 2.5406708998399763,
"learning_rate": 8.98904313887369e-06,
"loss": 0.6402,
"step": 170
},
{
"epoch": 0.9090909090909091,
"grad_norm": 7.266719521619973,
"learning_rate": 8.804893938804839e-06,
"loss": 0.5157,
"step": 180
},
{
"epoch": 0.9595959595959596,
"grad_norm": 13.316489295838588,
"learning_rate": 8.607579324727175e-06,
"loss": 0.5619,
"step": 190
},
{
"epoch": 1.0101010101010102,
"grad_norm": 1.8872961310512126,
"learning_rate": 8.397782030148147e-06,
"loss": 0.4112,
"step": 200
},
{
"epoch": 1.0606060606060606,
"grad_norm": 1.676003362944885,
"learning_rate": 8.176227980227693e-06,
"loss": 0.488,
"step": 210
},
{
"epoch": 1.1111111111111112,
"grad_norm": 2.282289630564045,
"learning_rate": 7.943683779985412e-06,
"loss": 0.4915,
"step": 220
},
{
"epoch": 1.1616161616161615,
"grad_norm": 2.4871416093603376,
"learning_rate": 7.700954061750295e-06,
"loss": 0.4777,
"step": 230
},
{
"epoch": 1.2121212121212122,
"grad_norm": 2.5839494868810826,
"learning_rate": 7.4488787010311425e-06,
"loss": 0.6122,
"step": 240
},
{
"epoch": 1.2626262626262625,
"grad_norm": 10.641920140721124,
"learning_rate": 7.188329910441154e-06,
"loss": 0.4543,
"step": 250
},
{
"epoch": 1.3131313131313131,
"grad_norm": 3.0307123950276402,
"learning_rate": 6.920209221732007e-06,
"loss": 0.4843,
"step": 260
},
{
"epoch": 1.3636363636363638,
"grad_norm": 2.122645461889907,
"learning_rate": 6.64544436638005e-06,
"loss": 0.4867,
"step": 270
},
{
"epoch": 1.4141414141414141,
"grad_norm": 2.631666798742427,
"learning_rate": 6.364986065518106e-06,
"loss": 0.4209,
"step": 280
},
{
"epoch": 1.4646464646464645,
"grad_norm": 2.9904146392503024,
"learning_rate": 6.079804740320181e-06,
"loss": 0.4111,
"step": 290
},
{
"epoch": 1.5151515151515151,
"grad_norm": 3.8938710402714247,
"learning_rate": 5.790887154221521e-06,
"loss": 0.4576,
"step": 300
},
{
"epoch": 1.5656565656565657,
"grad_norm": 1.4401476185203614,
"learning_rate": 5.499232998592399e-06,
"loss": 0.4169,
"step": 310
},
{
"epoch": 1.6161616161616161,
"grad_norm": 1.7931080573819276,
"learning_rate": 5.20585143367959e-06,
"loss": 0.4786,
"step": 320
},
{
"epoch": 1.6666666666666665,
"grad_norm": 2.6304765740012592,
"learning_rate": 4.911757596784358e-06,
"loss": 0.4335,
"step": 330
},
{
"epoch": 1.7171717171717171,
"grad_norm": 1.7093124594050306,
"learning_rate": 4.617969089759066e-06,
"loss": 0.4032,
"step": 340
},
{
"epoch": 1.7676767676767677,
"grad_norm": 1.7751425049397478,
"learning_rate": 4.325502457976126e-06,
"loss": 0.4026,
"step": 350
},
{
"epoch": 1.8181818181818183,
"grad_norm": 1.5604623454463842,
"learning_rate": 4.035369672952516e-06,
"loss": 0.4396,
"step": 360
},
{
"epoch": 1.8686868686868687,
"grad_norm": 4.974012035659653,
"learning_rate": 3.7485746308004013e-06,
"loss": 0.4729,
"step": 370
},
{
"epoch": 1.9191919191919191,
"grad_norm": 1.6364473729173084,
"learning_rate": 3.466109678619681e-06,
"loss": 0.4446,
"step": 380
},
{
"epoch": 1.9696969696969697,
"grad_norm": 1.9395028093472382,
"learning_rate": 3.1889521808515888e-06,
"loss": 0.3533,
"step": 390
},
{
"epoch": 2.0202020202020203,
"grad_norm": 1.8373408967141356,
"learning_rate": 2.9180611374741623e-06,
"loss": 0.3406,
"step": 400
},
{
"epoch": 2.0707070707070705,
"grad_norm": 3.399858195905016,
"learning_rate": 2.6543738657411033e-06,
"loss": 0.2985,
"step": 410
},
{
"epoch": 2.121212121212121,
"grad_norm": 3.2351800580212973,
"learning_rate": 2.3988027569455895e-06,
"loss": 0.3004,
"step": 420
},
{
"epoch": 2.1717171717171717,
"grad_norm": 4.35904988804887,
"learning_rate": 2.1522321194310577e-06,
"loss": 0.3149,
"step": 430
},
{
"epoch": 2.2222222222222223,
"grad_norm": 3.1884018644318024,
"learning_rate": 1.915515118772555e-06,
"loss": 0.2804,
"step": 440
},
{
"epoch": 2.2727272727272725,
"grad_norm": 2.0429924860959217,
"learning_rate": 1.689470825715998e-06,
"loss": 0.2729,
"step": 450
},
{
"epoch": 2.323232323232323,
"grad_norm": 2.648622548442594,
"learning_rate": 1.4748813820898554e-06,
"loss": 0.3002,
"step": 460
},
{
"epoch": 2.3737373737373737,
"grad_norm": 1.3508677209033961,
"learning_rate": 1.272489294495548e-06,
"loss": 0.2667,
"step": 470
},
{
"epoch": 2.4242424242424243,
"grad_norm": 3.366120177530913,
"learning_rate": 1.0829948651407374e-06,
"loss": 0.3297,
"step": 480
},
{
"epoch": 2.474747474747475,
"grad_norm": 1.9426057690340581,
"learning_rate": 9.070537687051817e-07,
"loss": 0.2862,
"step": 490
},
{
"epoch": 2.525252525252525,
"grad_norm": 2.997630345355421,
"learning_rate": 7.452747836234392e-07,
"loss": 0.2782,
"step": 500
},
{
"epoch": 2.5757575757575757,
"grad_norm": 1.9235000947632515,
"learning_rate": 5.982176856345445e-07,
"loss": 0.2541,
"step": 510
},
{
"epoch": 2.6262626262626263,
"grad_norm": 2.042417538987798,
"learning_rate": 4.663913108871726e-07,
"loss": 0.2649,
"step": 520
},
{
"epoch": 2.676767676767677,
"grad_norm": 2.096088188603427,
"learning_rate": 3.5025179530225995e-07,
"loss": 0.2737,
"step": 530
},
{
"epoch": 2.7272727272727275,
"grad_norm": 2.0861722071465194,
"learning_rate": 2.5020099628504603e-07,
"loss": 0.2369,
"step": 540
},
{
"epoch": 2.7777777777777777,
"grad_norm": 2.5284934932886927,
"learning_rate": 1.6658510224765333e-07,
"loss": 0.3142,
"step": 550
},
{
"epoch": 2.8282828282828283,
"grad_norm": 1.653966938488298,
"learning_rate": 9.969343475342285e-08,
"loss": 0.3054,
"step": 560
},
{
"epoch": 2.878787878787879,
"grad_norm": 1.9790588803483495,
"learning_rate": 4.975744742772848e-08,
"loss": 0.3217,
"step": 570
},
{
"epoch": 2.929292929292929,
"grad_norm": 2.220544003340401,
"learning_rate": 1.69499250991767e-08,
"loss": 0.2967,
"step": 580
},
{
"epoch": 2.9797979797979797,
"grad_norm": 2.3170461682627947,
"learning_rate": 1.3843859422574269e-09,
"loss": 0.2458,
"step": 590
},
{
"epoch": 3.0,
"step": 594,
"total_flos": 31949362233344.0,
"train_loss": 0.4421948113224723,
"train_runtime": 1063.9965,
"train_samples_per_second": 4.466,
"train_steps_per_second": 0.558
}
],
"logging_steps": 10,
"max_steps": 594,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 31949362233344.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}