bert_base_code_uml / trainer_state.json
gokulsrinivasagan's picture
End of training
4c5a81d verified
{
"best_global_step": 30000,
"best_metric": 0.8292354941368103,
"best_model_checkpoint": "bert_base_code_uml/checkpoint-30000",
"epoch": 25.0,
"eval_steps": 10000,
"global_step": 31850,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.3924646781789639,
"grad_norm": 2.2605140209198,
"learning_rate": 4.9900000000000005e-06,
"loss": 8.1534,
"step": 500
},
{
"epoch": 0.7849293563579278,
"grad_norm": 1.4277119636535645,
"learning_rate": 9.990000000000001e-06,
"loss": 5.4981,
"step": 1000
},
{
"epoch": 1.1773940345368916,
"grad_norm": 1.1594833135604858,
"learning_rate": 1.499e-05,
"loss": 4.6668,
"step": 1500
},
{
"epoch": 1.5698587127158556,
"grad_norm": 1.3609659671783447,
"learning_rate": 1.999e-05,
"loss": 4.4569,
"step": 2000
},
{
"epoch": 1.9623233908948194,
"grad_norm": 1.4516750574111938,
"learning_rate": 2.4990000000000003e-05,
"loss": 4.3236,
"step": 2500
},
{
"epoch": 2.3547880690737832,
"grad_norm": 1.307254672050476,
"learning_rate": 2.9990000000000003e-05,
"loss": 4.2234,
"step": 3000
},
{
"epoch": 2.7472527472527473,
"grad_norm": 1.1777299642562866,
"learning_rate": 3.499e-05,
"loss": 4.1369,
"step": 3500
},
{
"epoch": 3.1397174254317113,
"grad_norm": 1.277431607246399,
"learning_rate": 3.999e-05,
"loss": 4.0883,
"step": 4000
},
{
"epoch": 3.5321821036106753,
"grad_norm": 1.136020302772522,
"learning_rate": 4.499e-05,
"loss": 4.0251,
"step": 4500
},
{
"epoch": 3.924646781789639,
"grad_norm": 1.5430645942687988,
"learning_rate": 4.999e-05,
"loss": 3.7435,
"step": 5000
},
{
"epoch": 4.3171114599686025,
"grad_norm": 1.1859745979309082,
"learning_rate": 5.499000000000001e-05,
"loss": 3.5562,
"step": 5500
},
{
"epoch": 4.7095761381475665,
"grad_norm": 1.1602009534835815,
"learning_rate": 5.999e-05,
"loss": 3.4409,
"step": 6000
},
{
"epoch": 5.1020408163265305,
"grad_norm": 1.5617371797561646,
"learning_rate": 6.499000000000001e-05,
"loss": 3.3426,
"step": 6500
},
{
"epoch": 5.4945054945054945,
"grad_norm": 1.3554491996765137,
"learning_rate": 6.999e-05,
"loss": 3.2194,
"step": 7000
},
{
"epoch": 5.8869701726844585,
"grad_norm": 2.1539087295532227,
"learning_rate": 7.499e-05,
"loss": 3.1264,
"step": 7500
},
{
"epoch": 6.279434850863423,
"grad_norm": 1.4375736713409424,
"learning_rate": 7.999000000000001e-05,
"loss": 3.0421,
"step": 8000
},
{
"epoch": 6.671899529042387,
"grad_norm": 1.8041514158248901,
"learning_rate": 8.499e-05,
"loss": 2.9334,
"step": 8500
},
{
"epoch": 7.06436420722135,
"grad_norm": 2.089439868927002,
"learning_rate": 8.999000000000001e-05,
"loss": 2.8356,
"step": 9000
},
{
"epoch": 7.456828885400314,
"grad_norm": 1.8236392736434937,
"learning_rate": 9.499e-05,
"loss": 2.6914,
"step": 9500
},
{
"epoch": 7.849293563579278,
"grad_norm": 1.8073580265045166,
"learning_rate": 9.999000000000001e-05,
"loss": 2.4929,
"step": 10000
},
{
"epoch": 7.849293563579278,
"eval_accuracy": 0.5692341405099076,
"eval_loss": 2.151398181915283,
"eval_runtime": 38.2798,
"eval_samples_per_second": 160.685,
"eval_steps_per_second": 1.698,
"step": 10000
},
{
"epoch": 8.241758241758241,
"grad_norm": 1.849391222000122,
"learning_rate": 9.77162471395881e-05,
"loss": 2.1576,
"step": 10500
},
{
"epoch": 8.634222919937205,
"grad_norm": 1.2290756702423096,
"learning_rate": 9.542791762013731e-05,
"loss": 1.7337,
"step": 11000
},
{
"epoch": 9.026687598116169,
"grad_norm": 1.1669484376907349,
"learning_rate": 9.313958810068651e-05,
"loss": 1.4375,
"step": 11500
},
{
"epoch": 9.419152276295133,
"grad_norm": 1.0519758462905884,
"learning_rate": 9.08512585812357e-05,
"loss": 1.3162,
"step": 12000
},
{
"epoch": 9.811616954474097,
"grad_norm": 1.0862187147140503,
"learning_rate": 8.85629290617849e-05,
"loss": 1.2368,
"step": 12500
},
{
"epoch": 10.204081632653061,
"grad_norm": 0.9377219676971436,
"learning_rate": 8.62745995423341e-05,
"loss": 1.1784,
"step": 13000
},
{
"epoch": 10.596546310832025,
"grad_norm": 0.9312331676483154,
"learning_rate": 8.398627002288329e-05,
"loss": 1.1388,
"step": 13500
},
{
"epoch": 10.989010989010989,
"grad_norm": 0.9040568470954895,
"learning_rate": 8.16979405034325e-05,
"loss": 1.1097,
"step": 14000
},
{
"epoch": 11.381475667189953,
"grad_norm": 0.8583242297172546,
"learning_rate": 7.94096109839817e-05,
"loss": 1.0736,
"step": 14500
},
{
"epoch": 11.773940345368917,
"grad_norm": 0.8321512937545776,
"learning_rate": 7.712128146453089e-05,
"loss": 1.0626,
"step": 15000
},
{
"epoch": 12.166405023547881,
"grad_norm": 0.9143489003181458,
"learning_rate": 7.48329519450801e-05,
"loss": 1.0358,
"step": 15500
},
{
"epoch": 12.558869701726845,
"grad_norm": 0.8196631669998169,
"learning_rate": 7.25446224256293e-05,
"loss": 1.0207,
"step": 16000
},
{
"epoch": 12.95133437990581,
"grad_norm": 0.7631738781929016,
"learning_rate": 7.025629290617849e-05,
"loss": 1.004,
"step": 16500
},
{
"epoch": 13.343799058084773,
"grad_norm": 0.8194634914398193,
"learning_rate": 6.79679633867277e-05,
"loss": 0.9921,
"step": 17000
},
{
"epoch": 13.736263736263737,
"grad_norm": 0.7670016884803772,
"learning_rate": 6.56796338672769e-05,
"loss": 0.9779,
"step": 17500
},
{
"epoch": 14.1287284144427,
"grad_norm": 0.7673987746238708,
"learning_rate": 6.339130434782609e-05,
"loss": 0.9608,
"step": 18000
},
{
"epoch": 14.521193092621663,
"grad_norm": 0.7936846613883972,
"learning_rate": 6.110297482837529e-05,
"loss": 0.9558,
"step": 18500
},
{
"epoch": 14.913657770800627,
"grad_norm": 0.7623568177223206,
"learning_rate": 5.881464530892449e-05,
"loss": 0.9505,
"step": 19000
},
{
"epoch": 15.306122448979592,
"grad_norm": 0.7214558720588684,
"learning_rate": 5.652631578947368e-05,
"loss": 0.9402,
"step": 19500
},
{
"epoch": 15.698587127158556,
"grad_norm": 0.827078640460968,
"learning_rate": 5.423798627002289e-05,
"loss": 0.9263,
"step": 20000
},
{
"epoch": 15.698587127158556,
"eval_accuracy": 0.8142541052951258,
"eval_loss": 0.9068173170089722,
"eval_runtime": 31.5271,
"eval_samples_per_second": 195.102,
"eval_steps_per_second": 2.062,
"step": 20000
},
{
"epoch": 16.09105180533752,
"grad_norm": 0.7756440043449402,
"learning_rate": 5.1949656750572084e-05,
"loss": 0.9185,
"step": 20500
},
{
"epoch": 16.483516483516482,
"grad_norm": 0.7866923809051514,
"learning_rate": 4.966132723112129e-05,
"loss": 0.9115,
"step": 21000
},
{
"epoch": 16.875981161695446,
"grad_norm": 0.7449353337287903,
"learning_rate": 4.737299771167048e-05,
"loss": 0.9021,
"step": 21500
},
{
"epoch": 17.26844583987441,
"grad_norm": 0.7738542556762695,
"learning_rate": 4.508466819221968e-05,
"loss": 0.9021,
"step": 22000
},
{
"epoch": 17.660910518053374,
"grad_norm": 0.7117587924003601,
"learning_rate": 4.279633867276888e-05,
"loss": 0.8932,
"step": 22500
},
{
"epoch": 18.053375196232338,
"grad_norm": 0.6952142715454102,
"learning_rate": 4.0508009153318077e-05,
"loss": 0.8866,
"step": 23000
},
{
"epoch": 18.445839874411302,
"grad_norm": 0.6748417615890503,
"learning_rate": 3.821967963386728e-05,
"loss": 0.8831,
"step": 23500
},
{
"epoch": 18.838304552590266,
"grad_norm": 0.7013327479362488,
"learning_rate": 3.593135011441648e-05,
"loss": 0.8714,
"step": 24000
},
{
"epoch": 19.23076923076923,
"grad_norm": 0.629546046257019,
"learning_rate": 3.364302059496568e-05,
"loss": 0.8684,
"step": 24500
},
{
"epoch": 19.623233908948194,
"grad_norm": 0.6739959120750427,
"learning_rate": 3.135469107551487e-05,
"loss": 0.8664,
"step": 25000
},
{
"epoch": 20.015698587127158,
"grad_norm": 0.6923867464065552,
"learning_rate": 2.9066361556064075e-05,
"loss": 0.8613,
"step": 25500
},
{
"epoch": 20.408163265306122,
"grad_norm": 0.7043192386627197,
"learning_rate": 2.677803203661327e-05,
"loss": 0.8541,
"step": 26000
},
{
"epoch": 20.800627943485086,
"grad_norm": 0.6633190512657166,
"learning_rate": 2.448970251716247e-05,
"loss": 0.8558,
"step": 26500
},
{
"epoch": 21.19309262166405,
"grad_norm": 0.6382936239242554,
"learning_rate": 2.2201372997711673e-05,
"loss": 0.8486,
"step": 27000
},
{
"epoch": 21.585557299843014,
"grad_norm": 0.7126407623291016,
"learning_rate": 1.9913043478260872e-05,
"loss": 0.8455,
"step": 27500
},
{
"epoch": 21.978021978021978,
"grad_norm": 0.6809006929397583,
"learning_rate": 1.7624713958810068e-05,
"loss": 0.8382,
"step": 28000
},
{
"epoch": 22.370486656200942,
"grad_norm": 0.6693772077560425,
"learning_rate": 1.533638443935927e-05,
"loss": 0.8377,
"step": 28500
},
{
"epoch": 22.762951334379906,
"grad_norm": 0.7368608117103577,
"learning_rate": 1.3048054919908468e-05,
"loss": 0.8346,
"step": 29000
},
{
"epoch": 23.15541601255887,
"grad_norm": 0.6541247963905334,
"learning_rate": 1.0759725400457667e-05,
"loss": 0.8298,
"step": 29500
},
{
"epoch": 23.547880690737834,
"grad_norm": 0.6780161261558533,
"learning_rate": 8.471395881006864e-06,
"loss": 0.8293,
"step": 30000
},
{
"epoch": 23.547880690737834,
"eval_accuracy": 0.8285928159953133,
"eval_loss": 0.8292354941368103,
"eval_runtime": 31.6198,
"eval_samples_per_second": 194.53,
"eval_steps_per_second": 2.056,
"step": 30000
},
{
"epoch": 23.940345368916798,
"grad_norm": 0.6645314693450928,
"learning_rate": 6.183066361556064e-06,
"loss": 0.8306,
"step": 30500
},
{
"epoch": 24.332810047095762,
"grad_norm": 0.7622667551040649,
"learning_rate": 3.894736842105264e-06,
"loss": 0.8225,
"step": 31000
},
{
"epoch": 24.725274725274726,
"grad_norm": 0.6563706398010254,
"learning_rate": 1.6064073226544622e-06,
"loss": 0.8275,
"step": 31500
},
{
"epoch": 25.0,
"step": 31850,
"total_flos": 8.0444602960128e+17,
"train_loss": 1.908989332549426,
"train_runtime": 23433.9018,
"train_samples_per_second": 130.424,
"train_steps_per_second": 1.359
}
],
"logging_steps": 500,
"max_steps": 31850,
"num_input_tokens_seen": 0,
"num_train_epochs": 25,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.0444602960128e+17,
"train_batch_size": 96,
"trial_name": null,
"trial_params": null
}