lettuce_pos_de_mono / trainer_state.json
pranaydeeps's picture
Upload folder using huggingface_hub
70df694 verified
{
"best_metric": 0.9895929814239887,
"best_model_checkpoint": "models/pos_final_mono_de/checkpoint-4224",
"epoch": 39.994174757281556,
"global_step": 5120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.99,
"eval_accuracy": 0.9475099341812547,
"eval_f1": 0.9428053278974075,
"eval_loss": 0.235727921128273,
"eval_precision": 0.9442734211134948,
"eval_recall": 0.941341792581462,
"eval_runtime": 18.99,
"eval_samples_per_second": 771.406,
"eval_steps_per_second": 3.054,
"step": 128
},
{
"epoch": 1.99,
"eval_accuracy": 0.9852943432700717,
"eval_f1": 0.9842332493182053,
"eval_loss": 0.05128009244799614,
"eval_precision": 0.9842997713944935,
"eval_recall": 0.9841667362328519,
"eval_runtime": 19.2922,
"eval_samples_per_second": 759.323,
"eval_steps_per_second": 3.006,
"step": 256
},
{
"epoch": 2.99,
"eval_accuracy": 0.9875228217677473,
"eval_f1": 0.9867158568898448,
"eval_loss": 0.04063262417912483,
"eval_precision": 0.9867884320258268,
"eval_recall": 0.9866432924284164,
"eval_runtime": 19.7655,
"eval_samples_per_second": 741.14,
"eval_steps_per_second": 2.934,
"step": 384
},
{
"epoch": 3.9,
"learning_rate": 5e-05,
"loss": 0.6822,
"step": 500
},
{
"epoch": 3.99,
"eval_accuracy": 0.9884893907546909,
"eval_f1": 0.9876851402812782,
"eval_loss": 0.036450713872909546,
"eval_precision": 0.9876576580157648,
"eval_recall": 0.9877126240762605,
"eval_runtime": 18.3474,
"eval_samples_per_second": 798.424,
"eval_steps_per_second": 3.161,
"step": 512
},
{
"epoch": 4.99,
"eval_accuracy": 0.9889918532042529,
"eval_f1": 0.9881764176274528,
"eval_loss": 0.03515882417559624,
"eval_precision": 0.9881194651573207,
"eval_recall": 0.9882333766631287,
"eval_runtime": 19.0555,
"eval_samples_per_second": 768.756,
"eval_steps_per_second": 3.044,
"step": 640
},
{
"epoch": 5.99,
"eval_accuracy": 0.9894521241504165,
"eval_f1": 0.9887225068869429,
"eval_loss": 0.0344870425760746,
"eval_precision": 0.9887067858661908,
"eval_recall": 0.9887382284076499,
"eval_runtime": 18.7765,
"eval_samples_per_second": 780.176,
"eval_steps_per_second": 3.089,
"step": 768
},
{
"epoch": 6.99,
"eval_accuracy": 0.9896017122079197,
"eval_f1": 0.9887940595397575,
"eval_loss": 0.03525426983833313,
"eval_precision": 0.9887783373812941,
"eval_recall": 0.9888097821982119,
"eval_runtime": 18.226,
"eval_samples_per_second": 803.744,
"eval_steps_per_second": 3.182,
"step": 896
},
{
"epoch": 7.81,
"learning_rate": 4.458874458874459e-05,
"loss": 0.024,
"step": 1000
},
{
"epoch": 7.99,
"eval_accuracy": 0.9894866444713788,
"eval_f1": 0.9887030802192603,
"eval_loss": 0.037094976752996445,
"eval_precision": 0.9886480621017779,
"eval_recall": 0.9887581044605838,
"eval_runtime": 18.998,
"eval_samples_per_second": 771.08,
"eval_steps_per_second": 3.053,
"step": 1024
},
{
"epoch": 8.99,
"eval_accuracy": 0.9895556851133034,
"eval_f1": 0.988807816838561,
"eval_loss": 0.03866518661379814,
"eval_precision": 0.9888058514867228,
"eval_recall": 0.9888097821982119,
"eval_runtime": 19.63,
"eval_samples_per_second": 746.255,
"eval_steps_per_second": 2.955,
"step": 1152
},
{
"epoch": 9.99,
"eval_accuracy": 0.9897513002654229,
"eval_f1": 0.9889666056460926,
"eval_loss": 0.04022372514009476,
"eval_precision": 0.9889842973563904,
"eval_recall": 0.9889489145687492,
"eval_runtime": 17.8198,
"eval_samples_per_second": 822.064,
"eval_steps_per_second": 3.255,
"step": 1280
},
{
"epoch": 10.99,
"eval_accuracy": 0.9897282867181147,
"eval_f1": 0.9889296106084937,
"eval_loss": 0.04293292760848999,
"eval_precision": 0.9888785102450464,
"eval_recall": 0.9889807162534435,
"eval_runtime": 18.5105,
"eval_samples_per_second": 791.389,
"eval_steps_per_second": 3.133,
"step": 1408
},
{
"epoch": 11.71,
"learning_rate": 3.917748917748918e-05,
"loss": 0.0128,
"step": 1500
},
{
"epoch": 11.99,
"eval_accuracy": 0.989628561346446,
"eval_f1": 0.9888770954828604,
"eval_loss": 0.045427996665239334,
"eval_precision": 0.9889006825762183,
"eval_recall": 0.9888535095146666,
"eval_runtime": 18.3742,
"eval_samples_per_second": 797.257,
"eval_steps_per_second": 3.157,
"step": 1536
},
{
"epoch": 12.99,
"eval_accuracy": 0.9896899308059345,
"eval_f1": 0.9889251953792704,
"eval_loss": 0.04608777165412903,
"eval_precision": 0.9889134021028363,
"eval_recall": 0.9889369889369889,
"eval_runtime": 18.3253,
"eval_samples_per_second": 799.387,
"eval_steps_per_second": 3.165,
"step": 1664
},
{
"epoch": 13.99,
"eval_accuracy": 0.989889381549272,
"eval_f1": 0.9891329626839416,
"eval_loss": 0.04769197106361389,
"eval_precision": 0.9892057156034064,
"eval_recall": 0.9890602204651792,
"eval_runtime": 18.8558,
"eval_samples_per_second": 776.896,
"eval_steps_per_second": 3.076,
"step": 1792
},
{
"epoch": 14.99,
"eval_accuracy": 0.9897743138127311,
"eval_f1": 0.9890247489724366,
"eval_loss": 0.0506986528635025,
"eval_precision": 0.9889972294324113,
"eval_recall": 0.9890522700440055,
"eval_runtime": 17.9307,
"eval_samples_per_second": 816.978,
"eval_steps_per_second": 3.235,
"step": 1920
},
{
"epoch": 15.62,
"learning_rate": 3.376623376623377e-05,
"loss": 0.0069,
"step": 2000
},
{
"epoch": 15.99,
"eval_accuracy": 0.9900581475628654,
"eval_f1": 0.9893163454944793,
"eval_loss": 0.05137912556529045,
"eval_precision": 0.9893419096308429,
"eval_recall": 0.9892907826792124,
"eval_runtime": 18.9861,
"eval_samples_per_second": 771.564,
"eval_steps_per_second": 3.055,
"step": 2048
},
{
"epoch": 16.99,
"eval_accuracy": 0.989889381549272,
"eval_f1": 0.989197257872486,
"eval_loss": 0.053016748279333115,
"eval_precision": 0.9892070887364145,
"eval_recall": 0.9891874272039561,
"eval_runtime": 18.1987,
"eval_samples_per_second": 804.946,
"eval_steps_per_second": 3.187,
"step": 2176
},
{
"epoch": 17.99,
"eval_accuracy": 0.9898203409073475,
"eval_f1": 0.9890543664272952,
"eval_loss": 0.05524001270532608,
"eval_precision": 0.9890445373741871,
"eval_recall": 0.989064195675766,
"eval_runtime": 18.53,
"eval_samples_per_second": 790.554,
"eval_steps_per_second": 3.13,
"step": 2304
},
{
"epoch": 18.99,
"eval_accuracy": 0.9898395188634376,
"eval_f1": 0.9891659296212747,
"eval_loss": 0.0566512756049633,
"eval_precision": 0.9891325086653735,
"eval_recall": 0.9891993528357165,
"eval_runtime": 19.0959,
"eval_samples_per_second": 767.129,
"eval_steps_per_second": 3.037,
"step": 2432
},
{
"epoch": 19.53,
"learning_rate": 2.8354978354978357e-05,
"loss": 0.0037,
"step": 2500
},
{
"epoch": 19.99,
"eval_accuracy": 0.9899507510087605,
"eval_f1": 0.989249406222982,
"eval_loss": 0.057712409645318985,
"eval_precision": 0.9892159824466563,
"eval_recall": 0.9892828322580389,
"eval_runtime": 18.1495,
"eval_samples_per_second": 807.13,
"eval_steps_per_second": 3.196,
"step": 2560
},
{
"epoch": 20.99,
"eval_accuracy": 0.989897052731708,
"eval_f1": 0.9892537230374182,
"eval_loss": 0.05920035019516945,
"eval_precision": 0.9891888454322872,
"eval_recall": 0.9893186091533199,
"eval_runtime": 18.5483,
"eval_samples_per_second": 789.775,
"eval_steps_per_second": 3.127,
"step": 2688
},
{
"epoch": 21.99,
"eval_accuracy": 0.9899584221911966,
"eval_f1": 0.9892630842496084,
"eval_loss": 0.06059529632329941,
"eval_precision": 0.9892512869437322,
"eval_recall": 0.9892748818368653,
"eval_runtime": 18.2219,
"eval_samples_per_second": 803.923,
"eval_steps_per_second": 3.183,
"step": 2816
},
{
"epoch": 22.99,
"eval_accuracy": 0.9899699289648506,
"eval_f1": 0.9892710345759693,
"eval_loss": 0.06275586783885956,
"eval_precision": 0.9892592371752827,
"eval_recall": 0.9892828322580389,
"eval_runtime": 18.6724,
"eval_samples_per_second": 784.529,
"eval_steps_per_second": 3.106,
"step": 2944
},
{
"epoch": 23.43,
"learning_rate": 2.2943722943722946e-05,
"loss": 0.0023,
"step": 3000
},
{
"epoch": 23.99,
"eval_accuracy": 0.9899162306877982,
"eval_f1": 0.9891494254701287,
"eval_loss": 0.06293565034866333,
"eval_precision": 0.9891710528408098,
"eval_recall": 0.9891277990451545,
"eval_runtime": 18.198,
"eval_samples_per_second": 804.98,
"eval_steps_per_second": 3.187,
"step": 3072
},
{
"epoch": 24.99,
"eval_accuracy": 0.9899776001472868,
"eval_f1": 0.9892692816043408,
"eval_loss": 0.06246413290500641,
"eval_precision": 0.9892358571564855,
"eval_recall": 0.9893027083109728,
"eval_runtime": 18.2292,
"eval_samples_per_second": 803.601,
"eval_steps_per_second": 3.182,
"step": 3200
},
{
"epoch": 25.99,
"eval_accuracy": 0.990008284877031,
"eval_f1": 0.9893007845031315,
"eval_loss": 0.06362640857696533,
"eval_precision": 0.9892948855550521,
"eval_recall": 0.9893066835215596,
"eval_runtime": 19.3067,
"eval_samples_per_second": 758.751,
"eval_steps_per_second": 3.004,
"step": 3328
},
{
"epoch": 26.99,
"eval_accuracy": 0.9900926678838277,
"eval_f1": 0.9893981976538494,
"eval_loss": 0.0649913027882576,
"eval_precision": 0.9893903316465458,
"eval_recall": 0.9894060637862291,
"eval_runtime": 18.4146,
"eval_samples_per_second": 795.511,
"eval_steps_per_second": 3.15,
"step": 3456
},
{
"epoch": 27.34,
"learning_rate": 1.7532467532467535e-05,
"loss": 0.0017,
"step": 3500
},
{
"epoch": 27.99,
"eval_accuracy": 0.9901003390662637,
"eval_f1": 0.989384347826087,
"eval_loss": 0.0644073411822319,
"eval_precision": 0.9893705826701542,
"eval_recall": 0.9893981133650556,
"eval_runtime": 18.6787,
"eval_samples_per_second": 784.263,
"eval_steps_per_second": 3.105,
"step": 3584
},
{
"epoch": 28.99,
"eval_accuracy": 0.9901425305696621,
"eval_f1": 0.9894557748763214,
"eval_loss": 0.06558605283498764,
"eval_precision": 0.9894538082366036,
"eval_recall": 0.9894577415238572,
"eval_runtime": 18.1086,
"eval_samples_per_second": 808.954,
"eval_steps_per_second": 3.203,
"step": 3712
},
{
"epoch": 29.99,
"eval_accuracy": 0.9901502017520981,
"eval_f1": 0.9894956104173334,
"eval_loss": 0.0667632669210434,
"eval_precision": 0.989485776979218,
"eval_recall": 0.9895054440508986,
"eval_runtime": 18.5261,
"eval_samples_per_second": 790.723,
"eval_steps_per_second": 3.131,
"step": 3840
},
{
"epoch": 30.99,
"eval_accuracy": 0.9901003390662637,
"eval_f1": 0.9894474469341146,
"eval_loss": 0.06663960218429565,
"eval_precision": 0.9894808819203155,
"eval_recall": 0.9894140142074026,
"eval_runtime": 18.0695,
"eval_samples_per_second": 810.702,
"eval_steps_per_second": 3.21,
"step": 3968
},
{
"epoch": 31.25,
"learning_rate": 1.2121212121212122e-05,
"loss": 0.0011,
"step": 4000
},
{
"epoch": 31.99,
"eval_accuracy": 0.9900466407892112,
"eval_f1": 0.9893740508996081,
"eval_loss": 0.06780469417572021,
"eval_precision": 0.9893937165323654,
"eval_recall": 0.9893543860486009,
"eval_runtime": 18.1642,
"eval_samples_per_second": 806.478,
"eval_steps_per_second": 3.193,
"step": 4096
},
{
"epoch": 32.99,
"eval_accuracy": 0.9902230779852407,
"eval_f1": 0.9895929814239887,
"eval_loss": 0.06849976629018784,
"eval_precision": 0.9895851138680967,
"eval_recall": 0.9896008491049814,
"eval_runtime": 18.9151,
"eval_samples_per_second": 774.46,
"eval_steps_per_second": 3.066,
"step": 4224
},
{
"epoch": 33.99,
"eval_accuracy": 0.99014636616088,
"eval_f1": 0.9894398320867711,
"eval_loss": 0.06920044124126434,
"eval_precision": 0.9894417987104366,
"eval_recall": 0.9894378654709233,
"eval_runtime": 18.3423,
"eval_samples_per_second": 798.645,
"eval_steps_per_second": 3.162,
"step": 4352
},
{
"epoch": 34.99,
"eval_accuracy": 0.9902000644379325,
"eval_f1": 0.9895391709648887,
"eval_loss": 0.06976373493671417,
"eval_precision": 0.9895450714751387,
"eval_recall": 0.9895332705250061,
"eval_runtime": 18.8004,
"eval_samples_per_second": 779.185,
"eval_steps_per_second": 3.085,
"step": 4480
},
{
"epoch": 35.16,
"learning_rate": 6.709956709956711e-06,
"loss": 0.0009,
"step": 4500
},
{
"epoch": 35.99,
"eval_accuracy": 0.9900658187453014,
"eval_f1": 0.9893825501754999,
"eval_loss": 0.06981877237558365,
"eval_precision": 0.9893510881446884,
"eval_recall": 0.9894140142074026,
"eval_runtime": 18.1896,
"eval_samples_per_second": 805.351,
"eval_steps_per_second": 3.189,
"step": 4608
},
{
"epoch": 36.99,
"eval_accuracy": 0.9902039000291505,
"eval_f1": 0.9894797097330076,
"eval_loss": 0.0695314109325409,
"eval_precision": 0.9894698764529106,
"eval_recall": 0.9894895432085514,
"eval_runtime": 18.7061,
"eval_samples_per_second": 783.113,
"eval_steps_per_second": 3.101,
"step": 4736
},
{
"epoch": 37.99,
"eval_accuracy": 0.9901732152994063,
"eval_f1": 0.9894400419774727,
"eval_loss": 0.06961216777563095,
"eval_precision": 0.9894223430643007,
"eval_recall": 0.9894577415238572,
"eval_runtime": 18.6705,
"eval_samples_per_second": 784.607,
"eval_steps_per_second": 3.107,
"step": 4864
},
{
"epoch": 38.99,
"eval_accuracy": 0.9901962288467144,
"eval_f1": 0.9894779103694458,
"eval_loss": 0.06985215842723846,
"eval_precision": 0.9894503782202383,
"eval_recall": 0.9895054440508986,
"eval_runtime": 18.2919,
"eval_samples_per_second": 800.846,
"eval_steps_per_second": 3.171,
"step": 4992
},
{
"epoch": 39.06,
"learning_rate": 1.2987012987012988e-06,
"loss": 0.0007,
"step": 5000
},
{
"epoch": 39.99,
"eval_accuracy": 0.990138694978444,
"eval_f1": 0.9894261920378432,
"eval_loss": 0.06969785690307617,
"eval_precision": 0.9894025940986839,
"eval_recall": 0.9894497911026837,
"eval_runtime": 18.675,
"eval_samples_per_second": 784.419,
"eval_steps_per_second": 3.106,
"step": 5120
},
{
"epoch": 39.99,
"step": 5120,
"total_flos": 2.72643266432467e+17,
"train_loss": 0.07192220802244265,
"train_runtime": 4057.7347,
"train_samples_per_second": 1299.572,
"train_steps_per_second": 1.262
}
],
"max_steps": 5120,
"num_train_epochs": 40,
"total_flos": 2.72643266432467e+17,
"trial_name": null,
"trial_params": null
}