BanglaHealthNER-Model / trainer_state.json
EsferSami's picture
Bangla-English HealthNER model upload
96f1d29 verified
{
"best_global_step": 6360,
"best_metric": 0.5847528623289584,
"best_model_checkpoint": "BanglaHealthNER-Model/checkpoint-6360",
"epoch": 4.0,
"eval_steps": 500,
"global_step": 6360,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.031446540880503145,
"grad_norm": 7.333010673522949,
"learning_rate": 1.9845911949685537e-05,
"loss": 1.2505,
"step": 50
},
{
"epoch": 0.06289308176100629,
"grad_norm": 3.140421152114868,
"learning_rate": 1.968867924528302e-05,
"loss": 0.6551,
"step": 100
},
{
"epoch": 0.09433962264150944,
"grad_norm": 6.619497299194336,
"learning_rate": 1.9531446540880505e-05,
"loss": 0.5465,
"step": 150
},
{
"epoch": 0.12578616352201258,
"grad_norm": 11.99181079864502,
"learning_rate": 1.937421383647799e-05,
"loss": 0.4357,
"step": 200
},
{
"epoch": 0.15723270440251572,
"grad_norm": 5.166473388671875,
"learning_rate": 1.9216981132075473e-05,
"loss": 0.3967,
"step": 250
},
{
"epoch": 0.18867924528301888,
"grad_norm": 4.55715274810791,
"learning_rate": 1.9059748427672957e-05,
"loss": 0.3922,
"step": 300
},
{
"epoch": 0.22012578616352202,
"grad_norm": 4.138736724853516,
"learning_rate": 1.890251572327044e-05,
"loss": 0.3632,
"step": 350
},
{
"epoch": 0.25157232704402516,
"grad_norm": 3.7361745834350586,
"learning_rate": 1.8745283018867925e-05,
"loss": 0.3747,
"step": 400
},
{
"epoch": 0.2830188679245283,
"grad_norm": 3.6701931953430176,
"learning_rate": 1.8588050314465412e-05,
"loss": 0.3318,
"step": 450
},
{
"epoch": 0.31446540880503143,
"grad_norm": 3.9308807849884033,
"learning_rate": 1.8430817610062893e-05,
"loss": 0.3175,
"step": 500
},
{
"epoch": 0.34591194968553457,
"grad_norm": 4.826620578765869,
"learning_rate": 1.827358490566038e-05,
"loss": 0.3267,
"step": 550
},
{
"epoch": 0.37735849056603776,
"grad_norm": 3.1970913410186768,
"learning_rate": 1.8116352201257864e-05,
"loss": 0.3339,
"step": 600
},
{
"epoch": 0.4088050314465409,
"grad_norm": 2.922396183013916,
"learning_rate": 1.795911949685535e-05,
"loss": 0.3243,
"step": 650
},
{
"epoch": 0.44025157232704404,
"grad_norm": 6.193075180053711,
"learning_rate": 1.7801886792452832e-05,
"loss": 0.3117,
"step": 700
},
{
"epoch": 0.4716981132075472,
"grad_norm": 2.791144371032715,
"learning_rate": 1.7644654088050316e-05,
"loss": 0.3056,
"step": 750
},
{
"epoch": 0.5031446540880503,
"grad_norm": 2.0920867919921875,
"learning_rate": 1.74874213836478e-05,
"loss": 0.326,
"step": 800
},
{
"epoch": 0.5345911949685535,
"grad_norm": 3.349987030029297,
"learning_rate": 1.7330188679245284e-05,
"loss": 0.3226,
"step": 850
},
{
"epoch": 0.5660377358490566,
"grad_norm": 3.94093918800354,
"learning_rate": 1.717295597484277e-05,
"loss": 0.2889,
"step": 900
},
{
"epoch": 0.5974842767295597,
"grad_norm": 1.9453665018081665,
"learning_rate": 1.7015723270440252e-05,
"loss": 0.2952,
"step": 950
},
{
"epoch": 0.6289308176100629,
"grad_norm": 3.939730167388916,
"learning_rate": 1.6858490566037736e-05,
"loss": 0.3022,
"step": 1000
},
{
"epoch": 0.660377358490566,
"grad_norm": 2.5297317504882812,
"learning_rate": 1.670125786163522e-05,
"loss": 0.3055,
"step": 1050
},
{
"epoch": 0.6918238993710691,
"grad_norm": 3.0068368911743164,
"learning_rate": 1.6544025157232705e-05,
"loss": 0.2766,
"step": 1100
},
{
"epoch": 0.7232704402515723,
"grad_norm": 2.318912982940674,
"learning_rate": 1.638679245283019e-05,
"loss": 0.2959,
"step": 1150
},
{
"epoch": 0.7547169811320755,
"grad_norm": 2.4681830406188965,
"learning_rate": 1.6229559748427676e-05,
"loss": 0.2887,
"step": 1200
},
{
"epoch": 0.7861635220125787,
"grad_norm": 3.823657989501953,
"learning_rate": 1.6072327044025157e-05,
"loss": 0.3165,
"step": 1250
},
{
"epoch": 0.8176100628930818,
"grad_norm": 3.6450536251068115,
"learning_rate": 1.5915094339622644e-05,
"loss": 0.2979,
"step": 1300
},
{
"epoch": 0.8490566037735849,
"grad_norm": 2.409196615219116,
"learning_rate": 1.5757861635220128e-05,
"loss": 0.2967,
"step": 1350
},
{
"epoch": 0.8805031446540881,
"grad_norm": 5.697852611541748,
"learning_rate": 1.5600628930817612e-05,
"loss": 0.2787,
"step": 1400
},
{
"epoch": 0.9119496855345912,
"grad_norm": 3.4634220600128174,
"learning_rate": 1.5443396226415096e-05,
"loss": 0.287,
"step": 1450
},
{
"epoch": 0.9433962264150944,
"grad_norm": 2.042687177658081,
"learning_rate": 1.528616352201258e-05,
"loss": 0.2791,
"step": 1500
},
{
"epoch": 0.9748427672955975,
"grad_norm": 5.6213531494140625,
"learning_rate": 1.5128930817610064e-05,
"loss": 0.2916,
"step": 1550
},
{
"epoch": 1.0,
"eval_accuracy": 0.8889652412929265,
"eval_f1": 0.532340215783997,
"eval_loss": 0.2980094850063324,
"eval_precision": 0.49460614695705274,
"eval_recall": 0.576307363927428,
"eval_runtime": 33.5758,
"eval_samples_per_second": 94.651,
"eval_steps_per_second": 5.927,
"step": 1590
},
{
"epoch": 1.0062893081761006,
"grad_norm": 4.562012672424316,
"learning_rate": 1.497169811320755e-05,
"loss": 0.2942,
"step": 1600
},
{
"epoch": 1.0377358490566038,
"grad_norm": 3.757510185241699,
"learning_rate": 1.4814465408805032e-05,
"loss": 0.2638,
"step": 1650
},
{
"epoch": 1.069182389937107,
"grad_norm": 3.9764132499694824,
"learning_rate": 1.4657232704402518e-05,
"loss": 0.2729,
"step": 1700
},
{
"epoch": 1.10062893081761,
"grad_norm": 4.585367202758789,
"learning_rate": 1.45e-05,
"loss": 0.2608,
"step": 1750
},
{
"epoch": 1.1320754716981132,
"grad_norm": 3.6955642700195312,
"learning_rate": 1.4342767295597486e-05,
"loss": 0.2573,
"step": 1800
},
{
"epoch": 1.1635220125786163,
"grad_norm": 5.6667256355285645,
"learning_rate": 1.418553459119497e-05,
"loss": 0.2507,
"step": 1850
},
{
"epoch": 1.1949685534591195,
"grad_norm": 4.68058967590332,
"learning_rate": 1.4028301886792456e-05,
"loss": 0.2969,
"step": 1900
},
{
"epoch": 1.2264150943396226,
"grad_norm": 3.523763656616211,
"learning_rate": 1.3871069182389938e-05,
"loss": 0.2654,
"step": 1950
},
{
"epoch": 1.2578616352201257,
"grad_norm": 4.139145374298096,
"learning_rate": 1.3713836477987424e-05,
"loss": 0.2576,
"step": 2000
},
{
"epoch": 1.2893081761006289,
"grad_norm": 3.196833610534668,
"learning_rate": 1.3556603773584906e-05,
"loss": 0.2864,
"step": 2050
},
{
"epoch": 1.320754716981132,
"grad_norm": 2.8964767456054688,
"learning_rate": 1.3399371069182392e-05,
"loss": 0.286,
"step": 2100
},
{
"epoch": 1.3522012578616351,
"grad_norm": 2.7218921184539795,
"learning_rate": 1.3242138364779876e-05,
"loss": 0.2761,
"step": 2150
},
{
"epoch": 1.3836477987421385,
"grad_norm": 4.021376132965088,
"learning_rate": 1.3084905660377361e-05,
"loss": 0.2669,
"step": 2200
},
{
"epoch": 1.4150943396226414,
"grad_norm": 6.181784629821777,
"learning_rate": 1.2927672955974844e-05,
"loss": 0.2599,
"step": 2250
},
{
"epoch": 1.4465408805031448,
"grad_norm": 4.6100077629089355,
"learning_rate": 1.277044025157233e-05,
"loss": 0.2586,
"step": 2300
},
{
"epoch": 1.4779874213836477,
"grad_norm": 3.9046823978424072,
"learning_rate": 1.2613207547169812e-05,
"loss": 0.2437,
"step": 2350
},
{
"epoch": 1.509433962264151,
"grad_norm": 7.715628147125244,
"learning_rate": 1.2455974842767296e-05,
"loss": 0.2562,
"step": 2400
},
{
"epoch": 1.540880503144654,
"grad_norm": 1.5335407257080078,
"learning_rate": 1.2298742138364781e-05,
"loss": 0.2639,
"step": 2450
},
{
"epoch": 1.5723270440251573,
"grad_norm": 3.2147579193115234,
"learning_rate": 1.2141509433962264e-05,
"loss": 0.2572,
"step": 2500
},
{
"epoch": 1.6037735849056602,
"grad_norm": 1.934866189956665,
"learning_rate": 1.198427672955975e-05,
"loss": 0.2563,
"step": 2550
},
{
"epoch": 1.6352201257861636,
"grad_norm": 3.8376920223236084,
"learning_rate": 1.1827044025157233e-05,
"loss": 0.2351,
"step": 2600
},
{
"epoch": 1.6666666666666665,
"grad_norm": 5.230978965759277,
"learning_rate": 1.1669811320754717e-05,
"loss": 0.2649,
"step": 2650
},
{
"epoch": 1.6981132075471699,
"grad_norm": 3.404048442840576,
"learning_rate": 1.1512578616352201e-05,
"loss": 0.2759,
"step": 2700
},
{
"epoch": 1.7295597484276728,
"grad_norm": 6.088818073272705,
"learning_rate": 1.1355345911949687e-05,
"loss": 0.2668,
"step": 2750
},
{
"epoch": 1.7610062893081762,
"grad_norm": 3.810774803161621,
"learning_rate": 1.119811320754717e-05,
"loss": 0.2632,
"step": 2800
},
{
"epoch": 1.7924528301886793,
"grad_norm": 2.0126891136169434,
"learning_rate": 1.1040880503144655e-05,
"loss": 0.2347,
"step": 2850
},
{
"epoch": 1.8238993710691824,
"grad_norm": 3.071716547012329,
"learning_rate": 1.088364779874214e-05,
"loss": 0.2484,
"step": 2900
},
{
"epoch": 1.8553459119496856,
"grad_norm": 3.1930902004241943,
"learning_rate": 1.0726415094339623e-05,
"loss": 0.2736,
"step": 2950
},
{
"epoch": 1.8867924528301887,
"grad_norm": 4.1462907791137695,
"learning_rate": 1.0569182389937107e-05,
"loss": 0.2551,
"step": 3000
},
{
"epoch": 1.9182389937106918,
"grad_norm": 1.993411898612976,
"learning_rate": 1.0411949685534593e-05,
"loss": 0.2657,
"step": 3050
},
{
"epoch": 1.949685534591195,
"grad_norm": 2.755627393722534,
"learning_rate": 1.0254716981132075e-05,
"loss": 0.2556,
"step": 3100
},
{
"epoch": 1.9811320754716981,
"grad_norm": 3.132187604904175,
"learning_rate": 1.0097484276729561e-05,
"loss": 0.256,
"step": 3150
},
{
"epoch": 2.0,
"eval_accuracy": 0.8973939668070005,
"eval_f1": 0.56300452281445,
"eval_loss": 0.28329119086265564,
"eval_precision": 0.5442771751162275,
"eval_recall": 0.5830665243685521,
"eval_runtime": 33.3333,
"eval_samples_per_second": 95.34,
"eval_steps_per_second": 5.97,
"step": 3180
},
{
"epoch": 2.0125786163522013,
"grad_norm": 2.6640384197235107,
"learning_rate": 9.940251572327045e-06,
"loss": 0.2306,
"step": 3200
},
{
"epoch": 2.0440251572327046,
"grad_norm": 1.7074990272521973,
"learning_rate": 9.783018867924529e-06,
"loss": 0.2408,
"step": 3250
},
{
"epoch": 2.0754716981132075,
"grad_norm": 2.832401990890503,
"learning_rate": 9.625786163522013e-06,
"loss": 0.229,
"step": 3300
},
{
"epoch": 2.106918238993711,
"grad_norm": 3.8568339347839355,
"learning_rate": 9.468553459119497e-06,
"loss": 0.2372,
"step": 3350
},
{
"epoch": 2.138364779874214,
"grad_norm": 2.2703866958618164,
"learning_rate": 9.311320754716981e-06,
"loss": 0.2354,
"step": 3400
},
{
"epoch": 2.169811320754717,
"grad_norm": 2.2550501823425293,
"learning_rate": 9.154088050314465e-06,
"loss": 0.235,
"step": 3450
},
{
"epoch": 2.20125786163522,
"grad_norm": 3.6433000564575195,
"learning_rate": 8.99685534591195e-06,
"loss": 0.2307,
"step": 3500
},
{
"epoch": 2.2327044025157234,
"grad_norm": 3.5409624576568604,
"learning_rate": 8.839622641509435e-06,
"loss": 0.2453,
"step": 3550
},
{
"epoch": 2.2641509433962264,
"grad_norm": 2.7766001224517822,
"learning_rate": 8.682389937106919e-06,
"loss": 0.2345,
"step": 3600
},
{
"epoch": 2.2955974842767297,
"grad_norm": 2.28254771232605,
"learning_rate": 8.525157232704403e-06,
"loss": 0.2312,
"step": 3650
},
{
"epoch": 2.3270440251572326,
"grad_norm": 2.1515822410583496,
"learning_rate": 8.367924528301887e-06,
"loss": 0.2309,
"step": 3700
},
{
"epoch": 2.358490566037736,
"grad_norm": 2.217221975326538,
"learning_rate": 8.21069182389937e-06,
"loss": 0.2418,
"step": 3750
},
{
"epoch": 2.389937106918239,
"grad_norm": 3.9846627712249756,
"learning_rate": 8.053459119496856e-06,
"loss": 0.2292,
"step": 3800
},
{
"epoch": 2.4213836477987423,
"grad_norm": 2.434777021408081,
"learning_rate": 7.89622641509434e-06,
"loss": 0.2298,
"step": 3850
},
{
"epoch": 2.452830188679245,
"grad_norm": 2.4619274139404297,
"learning_rate": 7.738993710691825e-06,
"loss": 0.2571,
"step": 3900
},
{
"epoch": 2.4842767295597485,
"grad_norm": 1.396600604057312,
"learning_rate": 7.5817610062893085e-06,
"loss": 0.2346,
"step": 3950
},
{
"epoch": 2.5157232704402515,
"grad_norm": 2.274308919906616,
"learning_rate": 7.424528301886793e-06,
"loss": 0.2319,
"step": 4000
},
{
"epoch": 2.547169811320755,
"grad_norm": 7.168504238128662,
"learning_rate": 7.267295597484277e-06,
"loss": 0.2211,
"step": 4050
},
{
"epoch": 2.5786163522012577,
"grad_norm": 3.706829786300659,
"learning_rate": 7.1100628930817614e-06,
"loss": 0.2502,
"step": 4100
},
{
"epoch": 2.610062893081761,
"grad_norm": 4.938648223876953,
"learning_rate": 6.952830188679246e-06,
"loss": 0.2345,
"step": 4150
},
{
"epoch": 2.641509433962264,
"grad_norm": 2.617217540740967,
"learning_rate": 6.79559748427673e-06,
"loss": 0.2175,
"step": 4200
},
{
"epoch": 2.6729559748427674,
"grad_norm": 9.610097885131836,
"learning_rate": 6.638364779874214e-06,
"loss": 0.2222,
"step": 4250
},
{
"epoch": 2.7044025157232703,
"grad_norm": 3.3741917610168457,
"learning_rate": 6.481132075471699e-06,
"loss": 0.2308,
"step": 4300
},
{
"epoch": 2.7358490566037736,
"grad_norm": 2.843111515045166,
"learning_rate": 6.323899371069183e-06,
"loss": 0.2228,
"step": 4350
},
{
"epoch": 2.767295597484277,
"grad_norm": 2.9446794986724854,
"learning_rate": 6.166666666666667e-06,
"loss": 0.2301,
"step": 4400
},
{
"epoch": 2.79874213836478,
"grad_norm": 3.3162307739257812,
"learning_rate": 6.009433962264152e-06,
"loss": 0.2223,
"step": 4450
},
{
"epoch": 2.830188679245283,
"grad_norm": 2.9730913639068604,
"learning_rate": 5.852201257861636e-06,
"loss": 0.2299,
"step": 4500
},
{
"epoch": 2.861635220125786,
"grad_norm": 2.486496686935425,
"learning_rate": 5.69496855345912e-06,
"loss": 0.2362,
"step": 4550
},
{
"epoch": 2.8930817610062896,
"grad_norm": 3.007472038269043,
"learning_rate": 5.537735849056605e-06,
"loss": 0.2436,
"step": 4600
},
{
"epoch": 2.9245283018867925,
"grad_norm": 2.3737916946411133,
"learning_rate": 5.380503144654089e-06,
"loss": 0.2263,
"step": 4650
},
{
"epoch": 2.9559748427672954,
"grad_norm": 1.6521756649017334,
"learning_rate": 5.223270440251573e-06,
"loss": 0.2304,
"step": 4700
},
{
"epoch": 2.9874213836477987,
"grad_norm": 5.5335774421691895,
"learning_rate": 5.066037735849058e-06,
"loss": 0.2332,
"step": 4750
},
{
"epoch": 3.0,
"eval_accuracy": 0.8990553564594134,
"eval_f1": 0.5799224001311546,
"eval_loss": 0.2767968773841858,
"eval_precision": 0.537806608554632,
"eval_recall": 0.6291948298351714,
"eval_runtime": 33.2823,
"eval_samples_per_second": 95.486,
"eval_steps_per_second": 5.979,
"step": 4770
},
{
"epoch": 3.018867924528302,
"grad_norm": 3.7346746921539307,
"learning_rate": 4.908805031446541e-06,
"loss": 0.2288,
"step": 4800
},
{
"epoch": 3.050314465408805,
"grad_norm": 2.5639073848724365,
"learning_rate": 4.751572327044026e-06,
"loss": 0.2213,
"step": 4850
},
{
"epoch": 3.0817610062893084,
"grad_norm": 3.113640308380127,
"learning_rate": 4.59433962264151e-06,
"loss": 0.2324,
"step": 4900
},
{
"epoch": 3.1132075471698113,
"grad_norm": 3.0239310264587402,
"learning_rate": 4.437106918238994e-06,
"loss": 0.2111,
"step": 4950
},
{
"epoch": 3.1446540880503147,
"grad_norm": 1.2867438793182373,
"learning_rate": 4.279874213836479e-06,
"loss": 0.224,
"step": 5000
},
{
"epoch": 3.1761006289308176,
"grad_norm": 3.4221584796905518,
"learning_rate": 4.122641509433963e-06,
"loss": 0.2126,
"step": 5050
},
{
"epoch": 3.207547169811321,
"grad_norm": 2.2448008060455322,
"learning_rate": 3.965408805031447e-06,
"loss": 0.2298,
"step": 5100
},
{
"epoch": 3.238993710691824,
"grad_norm": 2.793227434158325,
"learning_rate": 3.8081761006289312e-06,
"loss": 0.2169,
"step": 5150
},
{
"epoch": 3.270440251572327,
"grad_norm": 2.6487598419189453,
"learning_rate": 3.6509433962264152e-06,
"loss": 0.2272,
"step": 5200
},
{
"epoch": 3.30188679245283,
"grad_norm": 3.785799503326416,
"learning_rate": 3.4937106918238992e-06,
"loss": 0.2276,
"step": 5250
},
{
"epoch": 3.3333333333333335,
"grad_norm": 3.5206942558288574,
"learning_rate": 3.3364779874213837e-06,
"loss": 0.205,
"step": 5300
},
{
"epoch": 3.3647798742138364,
"grad_norm": 2.6426963806152344,
"learning_rate": 3.179245283018868e-06,
"loss": 0.19,
"step": 5350
},
{
"epoch": 3.3962264150943398,
"grad_norm": 3.5427286624908447,
"learning_rate": 3.022012578616352e-06,
"loss": 0.2051,
"step": 5400
},
{
"epoch": 3.4276729559748427,
"grad_norm": 2.811741828918457,
"learning_rate": 2.8647798742138366e-06,
"loss": 0.198,
"step": 5450
},
{
"epoch": 3.459119496855346,
"grad_norm": 2.434566020965576,
"learning_rate": 2.707547169811321e-06,
"loss": 0.214,
"step": 5500
},
{
"epoch": 3.490566037735849,
"grad_norm": 3.7360141277313232,
"learning_rate": 2.5503144654088054e-06,
"loss": 0.2038,
"step": 5550
},
{
"epoch": 3.5220125786163523,
"grad_norm": 1.8584225177764893,
"learning_rate": 2.3930817610062895e-06,
"loss": 0.2347,
"step": 5600
},
{
"epoch": 3.5534591194968552,
"grad_norm": 3.2380409240722656,
"learning_rate": 2.235849056603774e-06,
"loss": 0.1995,
"step": 5650
},
{
"epoch": 3.5849056603773586,
"grad_norm": 2.471127510070801,
"learning_rate": 2.0786163522012583e-06,
"loss": 0.2063,
"step": 5700
},
{
"epoch": 3.6163522012578615,
"grad_norm": 2.688815116882324,
"learning_rate": 1.9213836477987423e-06,
"loss": 0.2057,
"step": 5750
},
{
"epoch": 3.647798742138365,
"grad_norm": 3.2768843173980713,
"learning_rate": 1.7641509433962264e-06,
"loss": 0.1999,
"step": 5800
},
{
"epoch": 3.6792452830188678,
"grad_norm": 4.351276397705078,
"learning_rate": 1.6069182389937108e-06,
"loss": 0.2317,
"step": 5850
},
{
"epoch": 3.710691823899371,
"grad_norm": 5.953555107116699,
"learning_rate": 1.449685534591195e-06,
"loss": 0.2038,
"step": 5900
},
{
"epoch": 3.742138364779874,
"grad_norm": 2.9146125316619873,
"learning_rate": 1.2924528301886792e-06,
"loss": 0.193,
"step": 5950
},
{
"epoch": 3.7735849056603774,
"grad_norm": 3.5067131519317627,
"learning_rate": 1.1352201257861637e-06,
"loss": 0.2199,
"step": 6000
},
{
"epoch": 3.8050314465408803,
"grad_norm": 3.824366807937622,
"learning_rate": 9.77987421383648e-07,
"loss": 0.1936,
"step": 6050
},
{
"epoch": 3.8364779874213837,
"grad_norm": 3.798210620880127,
"learning_rate": 8.207547169811321e-07,
"loss": 0.2189,
"step": 6100
},
{
"epoch": 3.867924528301887,
"grad_norm": 2.7663450241088867,
"learning_rate": 6.635220125786164e-07,
"loss": 0.2285,
"step": 6150
},
{
"epoch": 3.89937106918239,
"grad_norm": 2.560297966003418,
"learning_rate": 5.062893081761007e-07,
"loss": 0.2027,
"step": 6200
},
{
"epoch": 3.930817610062893,
"grad_norm": 2.668306589126587,
"learning_rate": 3.490566037735849e-07,
"loss": 0.217,
"step": 6250
},
{
"epoch": 3.9622641509433962,
"grad_norm": 3.003211259841919,
"learning_rate": 1.918238993710692e-07,
"loss": 0.2076,
"step": 6300
},
{
"epoch": 3.9937106918238996,
"grad_norm": 2.258948802947998,
"learning_rate": 3.4591194968553466e-08,
"loss": 0.1965,
"step": 6350
},
{
"epoch": 4.0,
"eval_accuracy": 0.9005079851083817,
"eval_f1": 0.5847528623289584,
"eval_loss": 0.2796945869922638,
"eval_precision": 0.5526815878378378,
"eval_recall": 0.6207755247242974,
"eval_runtime": 33.5926,
"eval_samples_per_second": 94.604,
"eval_steps_per_second": 5.924,
"step": 6360
},
{
"epoch": 4.0,
"step": 6360,
"total_flos": 7533320592942900.0,
"train_loss": 0.2684423640464087,
"train_runtime": 3411.3354,
"train_samples_per_second": 29.814,
"train_steps_per_second": 1.864
}
],
"logging_steps": 50,
"max_steps": 6360,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7533320592942900.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}