bert-soccer-qa / checkpoint-2500 /trainer_state.json
leomaurodesenv's picture
feat: add tokenizer
f77d768 verified
{
"best_global_step": 2500,
"best_metric": 79.95807844595133,
"best_model_checkpoint": "bert-soccer-qa/checkpoint-2500",
"epoch": 0.3884400248601616,
"eval_steps": 100,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.007768800497203232,
"grad_norm": 9.272425651550293,
"learning_rate": 9.99925419515227e-06,
"loss": 1.1446,
"step": 50
},
{
"epoch": 0.015537600994406464,
"grad_norm": 12.237289428710938,
"learning_rate": 9.99847731510255e-06,
"loss": 0.9455,
"step": 100
},
{
"epoch": 0.015537600994406464,
"eval_HasAns_exact": 69.85208252238225,
"eval_HasAns_f1": 77.13603651720207,
"eval_HasAns_total": 25690,
"eval_best_exact": 69.85208252238225,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 77.13603651720207,
"eval_best_f1_thresh": 0.0,
"eval_exact": 69.85208252238225,
"eval_f1": 77.13603651720207,
"eval_loss": 0.8127343058586121,
"eval_runtime": 202.0338,
"eval_samples_per_second": 127.157,
"eval_steps_per_second": 7.949,
"eval_total": 25690,
"step": 100
},
{
"epoch": 0.023306401491609695,
"grad_norm": 5.626998424530029,
"learning_rate": 9.997700435052828e-06,
"loss": 0.8129,
"step": 150
},
{
"epoch": 0.031075201988812928,
"grad_norm": 6.87526798248291,
"learning_rate": 9.996923555003108e-06,
"loss": 0.8743,
"step": 200
},
{
"epoch": 0.031075201988812928,
"eval_HasAns_exact": 70.21409108602569,
"eval_HasAns_f1": 77.38207884007946,
"eval_HasAns_total": 25690,
"eval_best_exact": 70.21409108602569,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 77.38207884007946,
"eval_best_f1_thresh": 0.0,
"eval_exact": 70.21409108602569,
"eval_f1": 77.38207884007946,
"eval_loss": 0.7383215427398682,
"eval_runtime": 202.5157,
"eval_samples_per_second": 126.854,
"eval_steps_per_second": 7.93,
"eval_total": 25690,
"step": 200
},
{
"epoch": 0.03884400248601616,
"grad_norm": 11.461019515991211,
"learning_rate": 9.996146674953388e-06,
"loss": 0.7825,
"step": 250
},
{
"epoch": 0.04661280298321939,
"grad_norm": 9.630631446838379,
"learning_rate": 9.995369794903668e-06,
"loss": 0.7189,
"step": 300
},
{
"epoch": 0.04661280298321939,
"eval_HasAns_exact": 71.03931490852472,
"eval_HasAns_f1": 78.08897109574,
"eval_HasAns_total": 25690,
"eval_best_exact": 71.03931490852472,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 78.08897109574,
"eval_best_f1_thresh": 0.0,
"eval_exact": 71.03931490852472,
"eval_f1": 78.08897109574,
"eval_loss": 0.7194859981536865,
"eval_runtime": 202.3679,
"eval_samples_per_second": 126.947,
"eval_steps_per_second": 7.936,
"eval_total": 25690,
"step": 300
},
{
"epoch": 0.054381603480422626,
"grad_norm": 11.377346992492676,
"learning_rate": 9.994592914853948e-06,
"loss": 0.7889,
"step": 350
},
{
"epoch": 0.062150403977625855,
"grad_norm": 7.450821399688721,
"learning_rate": 9.993816034804228e-06,
"loss": 0.7367,
"step": 400
},
{
"epoch": 0.062150403977625855,
"eval_HasAns_exact": 71.08991825613079,
"eval_HasAns_f1": 78.00619214091435,
"eval_HasAns_total": 25690,
"eval_best_exact": 71.08991825613079,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 78.00619214091435,
"eval_best_f1_thresh": 0.0,
"eval_exact": 71.08991825613079,
"eval_f1": 78.00619214091435,
"eval_loss": 0.683600902557373,
"eval_runtime": 202.4728,
"eval_samples_per_second": 126.881,
"eval_steps_per_second": 7.932,
"eval_total": 25690,
"step": 400
},
{
"epoch": 0.06991920447482909,
"grad_norm": 14.92029857635498,
"learning_rate": 9.993039154754508e-06,
"loss": 0.7838,
"step": 450
},
{
"epoch": 0.07768800497203232,
"grad_norm": 9.030844688415527,
"learning_rate": 9.992262274704786e-06,
"loss": 0.6469,
"step": 500
},
{
"epoch": 0.07768800497203232,
"eval_HasAns_exact": 71.42467886337096,
"eval_HasAns_f1": 78.23334576784337,
"eval_HasAns_total": 25690,
"eval_best_exact": 71.42467886337096,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 78.23334576784337,
"eval_best_f1_thresh": 0.0,
"eval_exact": 71.42467886337096,
"eval_f1": 78.23334576784337,
"eval_loss": 0.6646179556846619,
"eval_runtime": 202.6333,
"eval_samples_per_second": 126.781,
"eval_steps_per_second": 7.926,
"eval_total": 25690,
"step": 500
},
{
"epoch": 0.08545680546923555,
"grad_norm": 10.103166580200195,
"learning_rate": 9.991485394655066e-06,
"loss": 0.725,
"step": 550
},
{
"epoch": 0.09322560596643878,
"grad_norm": 6.986274242401123,
"learning_rate": 9.990708514605346e-06,
"loss": 0.6657,
"step": 600
},
{
"epoch": 0.09322560596643878,
"eval_HasAns_exact": 70.2841572596341,
"eval_HasAns_f1": 77.1247889416155,
"eval_HasAns_total": 25690,
"eval_best_exact": 70.2841572596341,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 77.1247889416155,
"eval_best_f1_thresh": 0.0,
"eval_exact": 70.2841572596341,
"eval_f1": 77.1247889416155,
"eval_loss": 0.6492825150489807,
"eval_runtime": 203.3854,
"eval_samples_per_second": 126.312,
"eval_steps_per_second": 7.896,
"eval_total": 25690,
"step": 600
},
{
"epoch": 0.10099440646364201,
"grad_norm": 10.636602401733398,
"learning_rate": 9.989931634555626e-06,
"loss": 0.7337,
"step": 650
},
{
"epoch": 0.10876320696084525,
"grad_norm": 8.252824783325195,
"learning_rate": 9.989154754505906e-06,
"loss": 0.662,
"step": 700
},
{
"epoch": 0.10876320696084525,
"eval_HasAns_exact": 72.22654729466718,
"eval_HasAns_f1": 79.0143805614001,
"eval_HasAns_total": 25690,
"eval_best_exact": 72.22654729466718,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 79.0143805614001,
"eval_best_f1_thresh": 0.0,
"eval_exact": 72.22654729466718,
"eval_f1": 79.0143805614001,
"eval_loss": 0.6340453028678894,
"eval_runtime": 202.8216,
"eval_samples_per_second": 126.663,
"eval_steps_per_second": 7.918,
"eval_total": 25690,
"step": 700
},
{
"epoch": 0.11653200745804848,
"grad_norm": 8.463785171508789,
"learning_rate": 9.988377874456184e-06,
"loss": 0.7265,
"step": 750
},
{
"epoch": 0.12430080795525171,
"grad_norm": 9.748174667358398,
"learning_rate": 9.987600994406464e-06,
"loss": 0.6969,
"step": 800
},
{
"epoch": 0.12430080795525171,
"eval_HasAns_exact": 72.1292331646555,
"eval_HasAns_f1": 78.83892105701732,
"eval_HasAns_total": 25690,
"eval_best_exact": 72.1292331646555,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 78.83892105701732,
"eval_best_f1_thresh": 0.0,
"eval_exact": 72.1292331646555,
"eval_f1": 78.83892105701732,
"eval_loss": 0.6085864901542664,
"eval_runtime": 202.3194,
"eval_samples_per_second": 126.977,
"eval_steps_per_second": 7.938,
"eval_total": 25690,
"step": 800
},
{
"epoch": 0.13206960845245494,
"grad_norm": 9.786517143249512,
"learning_rate": 9.986824114356744e-06,
"loss": 0.7111,
"step": 850
},
{
"epoch": 0.13983840894965818,
"grad_norm": 8.391840934753418,
"learning_rate": 9.986047234307024e-06,
"loss": 0.669,
"step": 900
},
{
"epoch": 0.13983840894965818,
"eval_HasAns_exact": 71.89567925262749,
"eval_HasAns_f1": 78.61371443349637,
"eval_HasAns_total": 25690,
"eval_best_exact": 71.89567925262749,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 78.61371443349637,
"eval_best_f1_thresh": 0.0,
"eval_exact": 71.89567925262749,
"eval_f1": 78.61371443349637,
"eval_loss": 0.5937665104866028,
"eval_runtime": 202.8476,
"eval_samples_per_second": 126.647,
"eval_steps_per_second": 7.917,
"eval_total": 25690,
"step": 900
},
{
"epoch": 0.1476072094468614,
"grad_norm": 11.602773666381836,
"learning_rate": 9.985270354257304e-06,
"loss": 0.7253,
"step": 950
},
{
"epoch": 0.15537600994406464,
"grad_norm": 9.150772094726562,
"learning_rate": 9.984493474207582e-06,
"loss": 0.6676,
"step": 1000
},
{
"epoch": 0.15537600994406464,
"eval_HasAns_exact": 72.28493577267419,
"eval_HasAns_f1": 78.8876551339714,
"eval_HasAns_total": 25690,
"eval_best_exact": 72.28493577267419,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 78.8876551339714,
"eval_best_f1_thresh": 0.0,
"eval_exact": 72.28493577267419,
"eval_f1": 78.8876551339714,
"eval_loss": 0.5816648602485657,
"eval_runtime": 202.8184,
"eval_samples_per_second": 126.665,
"eval_steps_per_second": 7.918,
"eval_total": 25690,
"step": 1000
},
{
"epoch": 0.16314481044126786,
"grad_norm": 10.605375289916992,
"learning_rate": 9.983716594157864e-06,
"loss": 0.7131,
"step": 1050
},
{
"epoch": 0.1709136109384711,
"grad_norm": 13.075970649719238,
"learning_rate": 9.982939714108144e-06,
"loss": 0.6664,
"step": 1100
},
{
"epoch": 0.1709136109384711,
"eval_HasAns_exact": 71.95406773063449,
"eval_HasAns_f1": 78.6814885378308,
"eval_HasAns_total": 25690,
"eval_best_exact": 71.95406773063449,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 78.6814885378308,
"eval_best_f1_thresh": 0.0,
"eval_exact": 71.95406773063449,
"eval_f1": 78.6814885378308,
"eval_loss": 0.5695982575416565,
"eval_runtime": 202.5931,
"eval_samples_per_second": 126.806,
"eval_steps_per_second": 7.927,
"eval_total": 25690,
"step": 1100
},
{
"epoch": 0.17868241143567434,
"grad_norm": 12.333536148071289,
"learning_rate": 9.982162834058422e-06,
"loss": 0.6016,
"step": 1150
},
{
"epoch": 0.18645121193287756,
"grad_norm": 9.748809814453125,
"learning_rate": 9.981385954008702e-06,
"loss": 0.6006,
"step": 1200
},
{
"epoch": 0.18645121193287756,
"eval_HasAns_exact": 72.06305955624757,
"eval_HasAns_f1": 78.75337714295185,
"eval_HasAns_total": 25690,
"eval_best_exact": 72.06305955624757,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 78.75337714295185,
"eval_best_f1_thresh": 0.0,
"eval_exact": 72.06305955624757,
"eval_f1": 78.75337714295185,
"eval_loss": 0.5660755038261414,
"eval_runtime": 202.8834,
"eval_samples_per_second": 126.624,
"eval_steps_per_second": 7.916,
"eval_total": 25690,
"step": 1200
},
{
"epoch": 0.1942200124300808,
"grad_norm": 9.964409828186035,
"learning_rate": 9.980609073958982e-06,
"loss": 0.6268,
"step": 1250
},
{
"epoch": 0.20198881292728402,
"grad_norm": 6.387030124664307,
"learning_rate": 9.979832193909262e-06,
"loss": 0.6111,
"step": 1300
},
{
"epoch": 0.20198881292728402,
"eval_HasAns_exact": 72.65862203191904,
"eval_HasAns_f1": 79.23512118883761,
"eval_HasAns_total": 25690,
"eval_best_exact": 72.65862203191904,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 79.23512118883761,
"eval_best_f1_thresh": 0.0,
"eval_exact": 72.65862203191904,
"eval_f1": 79.23512118883761,
"eval_loss": 0.5586764216423035,
"eval_runtime": 203.0862,
"eval_samples_per_second": 126.498,
"eval_steps_per_second": 7.908,
"eval_total": 25690,
"step": 1300
},
{
"epoch": 0.20975761342448726,
"grad_norm": 9.103731155395508,
"learning_rate": 9.979055313859542e-06,
"loss": 0.566,
"step": 1350
},
{
"epoch": 0.2175264139216905,
"grad_norm": 13.135197639465332,
"learning_rate": 9.97827843380982e-06,
"loss": 0.5793,
"step": 1400
},
{
"epoch": 0.2175264139216905,
"eval_HasAns_exact": 72.39392759828728,
"eval_HasAns_f1": 79.00515970901382,
"eval_HasAns_total": 25690,
"eval_best_exact": 72.39392759828728,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 79.00515970901382,
"eval_best_f1_thresh": 0.0,
"eval_exact": 72.39392759828728,
"eval_f1": 79.00515970901382,
"eval_loss": 0.5600055456161499,
"eval_runtime": 202.5804,
"eval_samples_per_second": 126.814,
"eval_steps_per_second": 7.928,
"eval_total": 25690,
"step": 1400
},
{
"epoch": 0.22529521441889372,
"grad_norm": 9.040102005004883,
"learning_rate": 9.9775015537601e-06,
"loss": 0.6309,
"step": 1450
},
{
"epoch": 0.23306401491609696,
"grad_norm": 11.526878356933594,
"learning_rate": 9.97672467371038e-06,
"loss": 0.6064,
"step": 1500
},
{
"epoch": 0.23306401491609696,
"eval_HasAns_exact": 72.74425846632931,
"eval_HasAns_f1": 79.36155045560372,
"eval_HasAns_total": 25690,
"eval_best_exact": 72.74425846632931,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 79.36155045560372,
"eval_best_f1_thresh": 0.0,
"eval_exact": 72.74425846632931,
"eval_f1": 79.36155045560372,
"eval_loss": 0.5500572323799133,
"eval_runtime": 202.9048,
"eval_samples_per_second": 126.611,
"eval_steps_per_second": 7.915,
"eval_total": 25690,
"step": 1500
},
{
"epoch": 0.24083281541330018,
"grad_norm": 4.359344482421875,
"learning_rate": 9.97594779366066e-06,
"loss": 0.6823,
"step": 1550
},
{
"epoch": 0.24860161591050342,
"grad_norm": 9.813569068908691,
"learning_rate": 9.97517091361094e-06,
"loss": 0.6314,
"step": 1600
},
{
"epoch": 0.24860161591050342,
"eval_HasAns_exact": 72.27715064227326,
"eval_HasAns_f1": 78.8175772466993,
"eval_HasAns_total": 25690,
"eval_best_exact": 72.27715064227326,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 78.8175772466993,
"eval_best_f1_thresh": 0.0,
"eval_exact": 72.27715064227326,
"eval_f1": 78.8175772466993,
"eval_loss": 0.5353918075561523,
"eval_runtime": 202.6516,
"eval_samples_per_second": 126.769,
"eval_steps_per_second": 7.925,
"eval_total": 25690,
"step": 1600
},
{
"epoch": 0.25637041640770664,
"grad_norm": 7.802361965179443,
"learning_rate": 9.974394033561218e-06,
"loss": 0.6026,
"step": 1650
},
{
"epoch": 0.2641392169049099,
"grad_norm": 15.137731552124023,
"learning_rate": 9.973617153511498e-06,
"loss": 0.6741,
"step": 1700
},
{
"epoch": 0.2641392169049099,
"eval_HasAns_exact": 72.1058777734527,
"eval_HasAns_f1": 78.67773271200794,
"eval_HasAns_total": 25690,
"eval_best_exact": 72.1058777734527,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 78.67773271200794,
"eval_best_f1_thresh": 0.0,
"eval_exact": 72.1058777734527,
"eval_f1": 78.67773271200794,
"eval_loss": 0.5329614877700806,
"eval_runtime": 202.7192,
"eval_samples_per_second": 126.727,
"eval_steps_per_second": 7.922,
"eval_total": 25690,
"step": 1700
},
{
"epoch": 0.2719080174021131,
"grad_norm": 11.30902099609375,
"learning_rate": 9.972840273461778e-06,
"loss": 0.6069,
"step": 1750
},
{
"epoch": 0.27967681789931637,
"grad_norm": 9.311911582946777,
"learning_rate": 9.972063393412058e-06,
"loss": 0.5912,
"step": 1800
},
{
"epoch": 0.27967681789931637,
"eval_HasAns_exact": 72.24990268586998,
"eval_HasAns_f1": 78.7891482246884,
"eval_HasAns_total": 25690,
"eval_best_exact": 72.24990268586998,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 78.7891482246884,
"eval_best_f1_thresh": 0.0,
"eval_exact": 72.24990268586998,
"eval_f1": 78.7891482246884,
"eval_loss": 0.5291240811347961,
"eval_runtime": 202.8091,
"eval_samples_per_second": 126.671,
"eval_steps_per_second": 7.919,
"eval_total": 25690,
"step": 1800
},
{
"epoch": 0.28744561839651955,
"grad_norm": 9.725021362304688,
"learning_rate": 9.971286513362338e-06,
"loss": 0.5943,
"step": 1850
},
{
"epoch": 0.2952144188937228,
"grad_norm": 8.321113586425781,
"learning_rate": 9.970509633312618e-06,
"loss": 0.584,
"step": 1900
},
{
"epoch": 0.2952144188937228,
"eval_HasAns_exact": 72.56909303230829,
"eval_HasAns_f1": 79.12955371544794,
"eval_HasAns_total": 25690,
"eval_best_exact": 72.56909303230829,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 79.12955371544794,
"eval_best_f1_thresh": 0.0,
"eval_exact": 72.56909303230829,
"eval_f1": 79.12955371544794,
"eval_loss": 0.5198299288749695,
"eval_runtime": 202.5651,
"eval_samples_per_second": 126.823,
"eval_steps_per_second": 7.928,
"eval_total": 25690,
"step": 1900
},
{
"epoch": 0.30298321939092604,
"grad_norm": 11.973769187927246,
"learning_rate": 9.969732753262898e-06,
"loss": 0.5895,
"step": 1950
},
{
"epoch": 0.3107520198881293,
"grad_norm": 9.40645980834961,
"learning_rate": 9.968955873213176e-06,
"loss": 0.64,
"step": 2000
},
{
"epoch": 0.3107520198881293,
"eval_HasAns_exact": 72.79096924873491,
"eval_HasAns_f1": 79.28733698122942,
"eval_HasAns_total": 25690,
"eval_best_exact": 72.79096924873491,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 79.28733698122942,
"eval_best_f1_thresh": 0.0,
"eval_exact": 72.79096924873491,
"eval_f1": 79.28733698122942,
"eval_loss": 0.5117060542106628,
"eval_runtime": 203.1079,
"eval_samples_per_second": 126.485,
"eval_steps_per_second": 7.907,
"eval_total": 25690,
"step": 2000
},
{
"epoch": 0.3185208203853325,
"grad_norm": 14.14991569519043,
"learning_rate": 9.968178993163456e-06,
"loss": 0.5559,
"step": 2050
},
{
"epoch": 0.3262896208825357,
"grad_norm": 10.641693115234375,
"learning_rate": 9.967402113113736e-06,
"loss": 0.5361,
"step": 2100
},
{
"epoch": 0.3262896208825357,
"eval_HasAns_exact": 73.13351498637603,
"eval_HasAns_f1": 79.60268173326928,
"eval_HasAns_total": 25690,
"eval_best_exact": 73.13351498637603,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 79.60268173326928,
"eval_best_f1_thresh": 0.0,
"eval_exact": 73.13351498637603,
"eval_f1": 79.60268173326928,
"eval_loss": 0.5079160928726196,
"eval_runtime": 202.7995,
"eval_samples_per_second": 126.677,
"eval_steps_per_second": 7.919,
"eval_total": 25690,
"step": 2100
},
{
"epoch": 0.33405842137973896,
"grad_norm": 10.088223457336426,
"learning_rate": 9.966625233064016e-06,
"loss": 0.6151,
"step": 2150
},
{
"epoch": 0.3418272218769422,
"grad_norm": 10.863611221313477,
"learning_rate": 9.965848353014296e-06,
"loss": 0.5935,
"step": 2200
},
{
"epoch": 0.3418272218769422,
"eval_HasAns_exact": 72.9349941611522,
"eval_HasAns_f1": 79.49240914807372,
"eval_HasAns_total": 25690,
"eval_best_exact": 72.9349941611522,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 79.49240914807372,
"eval_best_f1_thresh": 0.0,
"eval_exact": 72.9349941611522,
"eval_f1": 79.49240914807372,
"eval_loss": 0.5025383234024048,
"eval_runtime": 203.3956,
"eval_samples_per_second": 126.306,
"eval_steps_per_second": 7.896,
"eval_total": 25690,
"step": 2200
},
{
"epoch": 0.34959602237414544,
"grad_norm": 12.565064430236816,
"learning_rate": 9.965071472964574e-06,
"loss": 0.5667,
"step": 2250
},
{
"epoch": 0.3573648228713487,
"grad_norm": 5.066905975341797,
"learning_rate": 9.964294592914854e-06,
"loss": 0.5198,
"step": 2300
},
{
"epoch": 0.3573648228713487,
"eval_HasAns_exact": 72.6975476839237,
"eval_HasAns_f1": 79.25327470362464,
"eval_HasAns_total": 25690,
"eval_best_exact": 72.6975476839237,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 79.25327470362464,
"eval_best_f1_thresh": 0.0,
"eval_exact": 72.6975476839237,
"eval_f1": 79.25327470362464,
"eval_loss": 0.4996239244937897,
"eval_runtime": 204.0149,
"eval_samples_per_second": 125.922,
"eval_steps_per_second": 7.872,
"eval_total": 25690,
"step": 2300
},
{
"epoch": 0.3651336233685519,
"grad_norm": 10.777655601501465,
"learning_rate": 9.963517712865134e-06,
"loss": 0.4983,
"step": 2350
},
{
"epoch": 0.3729024238657551,
"grad_norm": 6.770049095153809,
"learning_rate": 9.962740832815414e-06,
"loss": 0.5474,
"step": 2400
},
{
"epoch": 0.3729024238657551,
"eval_HasAns_exact": 73.29700272479565,
"eval_HasAns_f1": 79.7561576366954,
"eval_HasAns_total": 25690,
"eval_best_exact": 73.29700272479565,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 79.7561576366954,
"eval_best_f1_thresh": 0.0,
"eval_exact": 73.29700272479565,
"eval_f1": 79.7561576366954,
"eval_loss": 0.4912045896053314,
"eval_runtime": 202.5387,
"eval_samples_per_second": 126.84,
"eval_steps_per_second": 7.929,
"eval_total": 25690,
"step": 2400
},
{
"epoch": 0.38067122436295836,
"grad_norm": 11.886155128479004,
"learning_rate": 9.961963952765694e-06,
"loss": 0.5038,
"step": 2450
},
{
"epoch": 0.3884400248601616,
"grad_norm": 15.60519027709961,
"learning_rate": 9.961187072715973e-06,
"loss": 0.5655,
"step": 2500
},
{
"epoch": 0.3884400248601616,
"eval_HasAns_exact": 73.46049046321527,
"eval_HasAns_f1": 79.95807844595133,
"eval_HasAns_total": 25690,
"eval_best_exact": 73.46049046321527,
"eval_best_exact_thresh": 0.0,
"eval_best_f1": 79.95807844595133,
"eval_best_f1_thresh": 0.0,
"eval_exact": 73.46049046321527,
"eval_f1": 79.95807844595133,
"eval_loss": 0.48474493622779846,
"eval_runtime": 202.6752,
"eval_samples_per_second": 126.755,
"eval_steps_per_second": 7.924,
"eval_total": 25690,
"step": 2500
}
],
"logging_steps": 50,
"max_steps": 643600,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.090374053888e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}