{ "best_global_step": 6360, "best_metric": 0.5847528623289584, "best_model_checkpoint": "BanglaHealthNER-Model/checkpoint-6360", "epoch": 4.0, "eval_steps": 500, "global_step": 6360, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.031446540880503145, "grad_norm": 7.333010673522949, "learning_rate": 1.9845911949685537e-05, "loss": 1.2505, "step": 50 }, { "epoch": 0.06289308176100629, "grad_norm": 3.140421152114868, "learning_rate": 1.968867924528302e-05, "loss": 0.6551, "step": 100 }, { "epoch": 0.09433962264150944, "grad_norm": 6.619497299194336, "learning_rate": 1.9531446540880505e-05, "loss": 0.5465, "step": 150 }, { "epoch": 0.12578616352201258, "grad_norm": 11.99181079864502, "learning_rate": 1.937421383647799e-05, "loss": 0.4357, "step": 200 }, { "epoch": 0.15723270440251572, "grad_norm": 5.166473388671875, "learning_rate": 1.9216981132075473e-05, "loss": 0.3967, "step": 250 }, { "epoch": 0.18867924528301888, "grad_norm": 4.55715274810791, "learning_rate": 1.9059748427672957e-05, "loss": 0.3922, "step": 300 }, { "epoch": 0.22012578616352202, "grad_norm": 4.138736724853516, "learning_rate": 1.890251572327044e-05, "loss": 0.3632, "step": 350 }, { "epoch": 0.25157232704402516, "grad_norm": 3.7361745834350586, "learning_rate": 1.8745283018867925e-05, "loss": 0.3747, "step": 400 }, { "epoch": 0.2830188679245283, "grad_norm": 3.6701931953430176, "learning_rate": 1.8588050314465412e-05, "loss": 0.3318, "step": 450 }, { "epoch": 0.31446540880503143, "grad_norm": 3.9308807849884033, "learning_rate": 1.8430817610062893e-05, "loss": 0.3175, "step": 500 }, { "epoch": 0.34591194968553457, "grad_norm": 4.826620578765869, "learning_rate": 1.827358490566038e-05, "loss": 0.3267, "step": 550 }, { "epoch": 0.37735849056603776, "grad_norm": 3.1970913410186768, "learning_rate": 1.8116352201257864e-05, "loss": 0.3339, "step": 600 }, { "epoch": 0.4088050314465409, "grad_norm": 2.922396183013916, "learning_rate": 1.795911949685535e-05, "loss": 0.3243, "step": 650 }, { "epoch": 0.44025157232704404, "grad_norm": 6.193075180053711, "learning_rate": 1.7801886792452832e-05, "loss": 0.3117, "step": 700 }, { "epoch": 0.4716981132075472, "grad_norm": 2.791144371032715, "learning_rate": 1.7644654088050316e-05, "loss": 0.3056, "step": 750 }, { "epoch": 0.5031446540880503, "grad_norm": 2.0920867919921875, "learning_rate": 1.74874213836478e-05, "loss": 0.326, "step": 800 }, { "epoch": 0.5345911949685535, "grad_norm": 3.349987030029297, "learning_rate": 1.7330188679245284e-05, "loss": 0.3226, "step": 850 }, { "epoch": 0.5660377358490566, "grad_norm": 3.94093918800354, "learning_rate": 1.717295597484277e-05, "loss": 0.2889, "step": 900 }, { "epoch": 0.5974842767295597, "grad_norm": 1.9453665018081665, "learning_rate": 1.7015723270440252e-05, "loss": 0.2952, "step": 950 }, { "epoch": 0.6289308176100629, "grad_norm": 3.939730167388916, "learning_rate": 1.6858490566037736e-05, "loss": 0.3022, "step": 1000 }, { "epoch": 0.660377358490566, "grad_norm": 2.5297317504882812, "learning_rate": 1.670125786163522e-05, "loss": 0.3055, "step": 1050 }, { "epoch": 0.6918238993710691, "grad_norm": 3.0068368911743164, "learning_rate": 1.6544025157232705e-05, "loss": 0.2766, "step": 1100 }, { "epoch": 0.7232704402515723, "grad_norm": 2.318912982940674, "learning_rate": 1.638679245283019e-05, "loss": 0.2959, "step": 1150 }, { "epoch": 0.7547169811320755, "grad_norm": 2.4681830406188965, "learning_rate": 1.6229559748427676e-05, "loss": 0.2887, "step": 1200 }, { "epoch": 0.7861635220125787, "grad_norm": 3.823657989501953, "learning_rate": 1.6072327044025157e-05, "loss": 0.3165, "step": 1250 }, { "epoch": 0.8176100628930818, "grad_norm": 3.6450536251068115, "learning_rate": 1.5915094339622644e-05, "loss": 0.2979, "step": 1300 }, { "epoch": 0.8490566037735849, "grad_norm": 2.409196615219116, "learning_rate": 1.5757861635220128e-05, "loss": 0.2967, "step": 1350 }, { "epoch": 0.8805031446540881, "grad_norm": 5.697852611541748, "learning_rate": 1.5600628930817612e-05, "loss": 0.2787, "step": 1400 }, { "epoch": 0.9119496855345912, "grad_norm": 3.4634220600128174, "learning_rate": 1.5443396226415096e-05, "loss": 0.287, "step": 1450 }, { "epoch": 0.9433962264150944, "grad_norm": 2.042687177658081, "learning_rate": 1.528616352201258e-05, "loss": 0.2791, "step": 1500 }, { "epoch": 0.9748427672955975, "grad_norm": 5.6213531494140625, "learning_rate": 1.5128930817610064e-05, "loss": 0.2916, "step": 1550 }, { "epoch": 1.0, "eval_accuracy": 0.8889652412929265, "eval_f1": 0.532340215783997, "eval_loss": 0.2980094850063324, "eval_precision": 0.49460614695705274, "eval_recall": 0.576307363927428, "eval_runtime": 33.5758, "eval_samples_per_second": 94.651, "eval_steps_per_second": 5.927, "step": 1590 }, { "epoch": 1.0062893081761006, "grad_norm": 4.562012672424316, "learning_rate": 1.497169811320755e-05, "loss": 0.2942, "step": 1600 }, { "epoch": 1.0377358490566038, "grad_norm": 3.757510185241699, "learning_rate": 1.4814465408805032e-05, "loss": 0.2638, "step": 1650 }, { "epoch": 1.069182389937107, "grad_norm": 3.9764132499694824, "learning_rate": 1.4657232704402518e-05, "loss": 0.2729, "step": 1700 }, { "epoch": 1.10062893081761, "grad_norm": 4.585367202758789, "learning_rate": 1.45e-05, "loss": 0.2608, "step": 1750 }, { "epoch": 1.1320754716981132, "grad_norm": 3.6955642700195312, "learning_rate": 1.4342767295597486e-05, "loss": 0.2573, "step": 1800 }, { "epoch": 1.1635220125786163, "grad_norm": 5.6667256355285645, "learning_rate": 1.418553459119497e-05, "loss": 0.2507, "step": 1850 }, { "epoch": 1.1949685534591195, "grad_norm": 4.68058967590332, "learning_rate": 1.4028301886792456e-05, "loss": 0.2969, "step": 1900 }, { "epoch": 1.2264150943396226, "grad_norm": 3.523763656616211, "learning_rate": 1.3871069182389938e-05, "loss": 0.2654, "step": 1950 }, { "epoch": 1.2578616352201257, "grad_norm": 4.139145374298096, "learning_rate": 1.3713836477987424e-05, "loss": 0.2576, "step": 2000 }, { "epoch": 1.2893081761006289, "grad_norm": 3.196833610534668, "learning_rate": 1.3556603773584906e-05, "loss": 0.2864, "step": 2050 }, { "epoch": 1.320754716981132, "grad_norm": 2.8964767456054688, "learning_rate": 1.3399371069182392e-05, "loss": 0.286, "step": 2100 }, { "epoch": 1.3522012578616351, "grad_norm": 2.7218921184539795, "learning_rate": 1.3242138364779876e-05, "loss": 0.2761, "step": 2150 }, { "epoch": 1.3836477987421385, "grad_norm": 4.021376132965088, "learning_rate": 1.3084905660377361e-05, "loss": 0.2669, "step": 2200 }, { "epoch": 1.4150943396226414, "grad_norm": 6.181784629821777, "learning_rate": 1.2927672955974844e-05, "loss": 0.2599, "step": 2250 }, { "epoch": 1.4465408805031448, "grad_norm": 4.6100077629089355, "learning_rate": 1.277044025157233e-05, "loss": 0.2586, "step": 2300 }, { "epoch": 1.4779874213836477, "grad_norm": 3.9046823978424072, "learning_rate": 1.2613207547169812e-05, "loss": 0.2437, "step": 2350 }, { "epoch": 1.509433962264151, "grad_norm": 7.715628147125244, "learning_rate": 1.2455974842767296e-05, "loss": 0.2562, "step": 2400 }, { "epoch": 1.540880503144654, "grad_norm": 1.5335407257080078, "learning_rate": 1.2298742138364781e-05, "loss": 0.2639, "step": 2450 }, { "epoch": 1.5723270440251573, "grad_norm": 3.2147579193115234, "learning_rate": 1.2141509433962264e-05, "loss": 0.2572, "step": 2500 }, { "epoch": 1.6037735849056602, "grad_norm": 1.934866189956665, "learning_rate": 1.198427672955975e-05, "loss": 0.2563, "step": 2550 }, { "epoch": 1.6352201257861636, "grad_norm": 3.8376920223236084, "learning_rate": 1.1827044025157233e-05, "loss": 0.2351, "step": 2600 }, { "epoch": 1.6666666666666665, "grad_norm": 5.230978965759277, "learning_rate": 1.1669811320754717e-05, "loss": 0.2649, "step": 2650 }, { "epoch": 1.6981132075471699, "grad_norm": 3.404048442840576, "learning_rate": 1.1512578616352201e-05, "loss": 0.2759, "step": 2700 }, { "epoch": 1.7295597484276728, "grad_norm": 6.088818073272705, "learning_rate": 1.1355345911949687e-05, "loss": 0.2668, "step": 2750 }, { "epoch": 1.7610062893081762, "grad_norm": 3.810774803161621, "learning_rate": 1.119811320754717e-05, "loss": 0.2632, "step": 2800 }, { "epoch": 1.7924528301886793, "grad_norm": 2.0126891136169434, "learning_rate": 1.1040880503144655e-05, "loss": 0.2347, "step": 2850 }, { "epoch": 1.8238993710691824, "grad_norm": 3.071716547012329, "learning_rate": 1.088364779874214e-05, "loss": 0.2484, "step": 2900 }, { "epoch": 1.8553459119496856, "grad_norm": 3.1930902004241943, "learning_rate": 1.0726415094339623e-05, "loss": 0.2736, "step": 2950 }, { "epoch": 1.8867924528301887, "grad_norm": 4.1462907791137695, "learning_rate": 1.0569182389937107e-05, "loss": 0.2551, "step": 3000 }, { "epoch": 1.9182389937106918, "grad_norm": 1.993411898612976, "learning_rate": 1.0411949685534593e-05, "loss": 0.2657, "step": 3050 }, { "epoch": 1.949685534591195, "grad_norm": 2.755627393722534, "learning_rate": 1.0254716981132075e-05, "loss": 0.2556, "step": 3100 }, { "epoch": 1.9811320754716981, "grad_norm": 3.132187604904175, "learning_rate": 1.0097484276729561e-05, "loss": 0.256, "step": 3150 }, { "epoch": 2.0, "eval_accuracy": 0.8973939668070005, "eval_f1": 0.56300452281445, "eval_loss": 0.28329119086265564, "eval_precision": 0.5442771751162275, "eval_recall": 0.5830665243685521, "eval_runtime": 33.3333, "eval_samples_per_second": 95.34, "eval_steps_per_second": 5.97, "step": 3180 }, { "epoch": 2.0125786163522013, "grad_norm": 2.6640384197235107, "learning_rate": 9.940251572327045e-06, "loss": 0.2306, "step": 3200 }, { "epoch": 2.0440251572327046, "grad_norm": 1.7074990272521973, "learning_rate": 9.783018867924529e-06, "loss": 0.2408, "step": 3250 }, { "epoch": 2.0754716981132075, "grad_norm": 2.832401990890503, "learning_rate": 9.625786163522013e-06, "loss": 0.229, "step": 3300 }, { "epoch": 2.106918238993711, "grad_norm": 3.8568339347839355, "learning_rate": 9.468553459119497e-06, "loss": 0.2372, "step": 3350 }, { "epoch": 2.138364779874214, "grad_norm": 2.2703866958618164, "learning_rate": 9.311320754716981e-06, "loss": 0.2354, "step": 3400 }, { "epoch": 2.169811320754717, "grad_norm": 2.2550501823425293, "learning_rate": 9.154088050314465e-06, "loss": 0.235, "step": 3450 }, { "epoch": 2.20125786163522, "grad_norm": 3.6433000564575195, "learning_rate": 8.99685534591195e-06, "loss": 0.2307, "step": 3500 }, { "epoch": 2.2327044025157234, "grad_norm": 3.5409624576568604, "learning_rate": 8.839622641509435e-06, "loss": 0.2453, "step": 3550 }, { "epoch": 2.2641509433962264, "grad_norm": 2.7766001224517822, "learning_rate": 8.682389937106919e-06, "loss": 0.2345, "step": 3600 }, { "epoch": 2.2955974842767297, "grad_norm": 2.28254771232605, "learning_rate": 8.525157232704403e-06, "loss": 0.2312, "step": 3650 }, { "epoch": 2.3270440251572326, "grad_norm": 2.1515822410583496, "learning_rate": 8.367924528301887e-06, "loss": 0.2309, "step": 3700 }, { "epoch": 2.358490566037736, "grad_norm": 2.217221975326538, "learning_rate": 8.21069182389937e-06, "loss": 0.2418, "step": 3750 }, { "epoch": 2.389937106918239, "grad_norm": 3.9846627712249756, "learning_rate": 8.053459119496856e-06, "loss": 0.2292, "step": 3800 }, { "epoch": 2.4213836477987423, "grad_norm": 2.434777021408081, "learning_rate": 7.89622641509434e-06, "loss": 0.2298, "step": 3850 }, { "epoch": 2.452830188679245, "grad_norm": 2.4619274139404297, "learning_rate": 7.738993710691825e-06, "loss": 0.2571, "step": 3900 }, { "epoch": 2.4842767295597485, "grad_norm": 1.396600604057312, "learning_rate": 7.5817610062893085e-06, "loss": 0.2346, "step": 3950 }, { "epoch": 2.5157232704402515, "grad_norm": 2.274308919906616, "learning_rate": 7.424528301886793e-06, "loss": 0.2319, "step": 4000 }, { "epoch": 2.547169811320755, "grad_norm": 7.168504238128662, "learning_rate": 7.267295597484277e-06, "loss": 0.2211, "step": 4050 }, { "epoch": 2.5786163522012577, "grad_norm": 3.706829786300659, "learning_rate": 7.1100628930817614e-06, "loss": 0.2502, "step": 4100 }, { "epoch": 2.610062893081761, "grad_norm": 4.938648223876953, "learning_rate": 6.952830188679246e-06, "loss": 0.2345, "step": 4150 }, { "epoch": 2.641509433962264, "grad_norm": 2.617217540740967, "learning_rate": 6.79559748427673e-06, "loss": 0.2175, "step": 4200 }, { "epoch": 2.6729559748427674, "grad_norm": 9.610097885131836, "learning_rate": 6.638364779874214e-06, "loss": 0.2222, "step": 4250 }, { "epoch": 2.7044025157232703, "grad_norm": 3.3741917610168457, "learning_rate": 6.481132075471699e-06, "loss": 0.2308, "step": 4300 }, { "epoch": 2.7358490566037736, "grad_norm": 2.843111515045166, "learning_rate": 6.323899371069183e-06, "loss": 0.2228, "step": 4350 }, { "epoch": 2.767295597484277, "grad_norm": 2.9446794986724854, "learning_rate": 6.166666666666667e-06, "loss": 0.2301, "step": 4400 }, { "epoch": 2.79874213836478, "grad_norm": 3.3162307739257812, "learning_rate": 6.009433962264152e-06, "loss": 0.2223, "step": 4450 }, { "epoch": 2.830188679245283, "grad_norm": 2.9730913639068604, "learning_rate": 5.852201257861636e-06, "loss": 0.2299, "step": 4500 }, { "epoch": 2.861635220125786, "grad_norm": 2.486496686935425, "learning_rate": 5.69496855345912e-06, "loss": 0.2362, "step": 4550 }, { "epoch": 2.8930817610062896, "grad_norm": 3.007472038269043, "learning_rate": 5.537735849056605e-06, "loss": 0.2436, "step": 4600 }, { "epoch": 2.9245283018867925, "grad_norm": 2.3737916946411133, "learning_rate": 5.380503144654089e-06, "loss": 0.2263, "step": 4650 }, { "epoch": 2.9559748427672954, "grad_norm": 1.6521756649017334, "learning_rate": 5.223270440251573e-06, "loss": 0.2304, "step": 4700 }, { "epoch": 2.9874213836477987, "grad_norm": 5.5335774421691895, "learning_rate": 5.066037735849058e-06, "loss": 0.2332, "step": 4750 }, { "epoch": 3.0, "eval_accuracy": 0.8990553564594134, "eval_f1": 0.5799224001311546, "eval_loss": 0.2767968773841858, "eval_precision": 0.537806608554632, "eval_recall": 0.6291948298351714, "eval_runtime": 33.2823, "eval_samples_per_second": 95.486, "eval_steps_per_second": 5.979, "step": 4770 }, { "epoch": 3.018867924528302, "grad_norm": 3.7346746921539307, "learning_rate": 4.908805031446541e-06, "loss": 0.2288, "step": 4800 }, { "epoch": 3.050314465408805, "grad_norm": 2.5639073848724365, "learning_rate": 4.751572327044026e-06, "loss": 0.2213, "step": 4850 }, { "epoch": 3.0817610062893084, "grad_norm": 3.113640308380127, "learning_rate": 4.59433962264151e-06, "loss": 0.2324, "step": 4900 }, { "epoch": 3.1132075471698113, "grad_norm": 3.0239310264587402, "learning_rate": 4.437106918238994e-06, "loss": 0.2111, "step": 4950 }, { "epoch": 3.1446540880503147, "grad_norm": 1.2867438793182373, "learning_rate": 4.279874213836479e-06, "loss": 0.224, "step": 5000 }, { "epoch": 3.1761006289308176, "grad_norm": 3.4221584796905518, "learning_rate": 4.122641509433963e-06, "loss": 0.2126, "step": 5050 }, { "epoch": 3.207547169811321, "grad_norm": 2.2448008060455322, "learning_rate": 3.965408805031447e-06, "loss": 0.2298, "step": 5100 }, { "epoch": 3.238993710691824, "grad_norm": 2.793227434158325, "learning_rate": 3.8081761006289312e-06, "loss": 0.2169, "step": 5150 }, { "epoch": 3.270440251572327, "grad_norm": 2.6487598419189453, "learning_rate": 3.6509433962264152e-06, "loss": 0.2272, "step": 5200 }, { "epoch": 3.30188679245283, "grad_norm": 3.785799503326416, "learning_rate": 3.4937106918238992e-06, "loss": 0.2276, "step": 5250 }, { "epoch": 3.3333333333333335, "grad_norm": 3.5206942558288574, "learning_rate": 3.3364779874213837e-06, "loss": 0.205, "step": 5300 }, { "epoch": 3.3647798742138364, "grad_norm": 2.6426963806152344, "learning_rate": 3.179245283018868e-06, "loss": 0.19, "step": 5350 }, { "epoch": 3.3962264150943398, "grad_norm": 3.5427286624908447, "learning_rate": 3.022012578616352e-06, "loss": 0.2051, "step": 5400 }, { "epoch": 3.4276729559748427, "grad_norm": 2.811741828918457, "learning_rate": 2.8647798742138366e-06, "loss": 0.198, "step": 5450 }, { "epoch": 3.459119496855346, "grad_norm": 2.434566020965576, "learning_rate": 2.707547169811321e-06, "loss": 0.214, "step": 5500 }, { "epoch": 3.490566037735849, "grad_norm": 3.7360141277313232, "learning_rate": 2.5503144654088054e-06, "loss": 0.2038, "step": 5550 }, { "epoch": 3.5220125786163523, "grad_norm": 1.8584225177764893, "learning_rate": 2.3930817610062895e-06, "loss": 0.2347, "step": 5600 }, { "epoch": 3.5534591194968552, "grad_norm": 3.2380409240722656, "learning_rate": 2.235849056603774e-06, "loss": 0.1995, "step": 5650 }, { "epoch": 3.5849056603773586, "grad_norm": 2.471127510070801, "learning_rate": 2.0786163522012583e-06, "loss": 0.2063, "step": 5700 }, { "epoch": 3.6163522012578615, "grad_norm": 2.688815116882324, "learning_rate": 1.9213836477987423e-06, "loss": 0.2057, "step": 5750 }, { "epoch": 3.647798742138365, "grad_norm": 3.2768843173980713, "learning_rate": 1.7641509433962264e-06, "loss": 0.1999, "step": 5800 }, { "epoch": 3.6792452830188678, "grad_norm": 4.351276397705078, "learning_rate": 1.6069182389937108e-06, "loss": 0.2317, "step": 5850 }, { "epoch": 3.710691823899371, "grad_norm": 5.953555107116699, "learning_rate": 1.449685534591195e-06, "loss": 0.2038, "step": 5900 }, { "epoch": 3.742138364779874, "grad_norm": 2.9146125316619873, "learning_rate": 1.2924528301886792e-06, "loss": 0.193, "step": 5950 }, { "epoch": 3.7735849056603774, "grad_norm": 3.5067131519317627, "learning_rate": 1.1352201257861637e-06, "loss": 0.2199, "step": 6000 }, { "epoch": 3.8050314465408803, "grad_norm": 3.824366807937622, "learning_rate": 9.77987421383648e-07, "loss": 0.1936, "step": 6050 }, { "epoch": 3.8364779874213837, "grad_norm": 3.798210620880127, "learning_rate": 8.207547169811321e-07, "loss": 0.2189, "step": 6100 }, { "epoch": 3.867924528301887, "grad_norm": 2.7663450241088867, "learning_rate": 6.635220125786164e-07, "loss": 0.2285, "step": 6150 }, { "epoch": 3.89937106918239, "grad_norm": 2.560297966003418, "learning_rate": 5.062893081761007e-07, "loss": 0.2027, "step": 6200 }, { "epoch": 3.930817610062893, "grad_norm": 2.668306589126587, "learning_rate": 3.490566037735849e-07, "loss": 0.217, "step": 6250 }, { "epoch": 3.9622641509433962, "grad_norm": 3.003211259841919, "learning_rate": 1.918238993710692e-07, "loss": 0.2076, "step": 6300 }, { "epoch": 3.9937106918238996, "grad_norm": 2.258948802947998, "learning_rate": 3.4591194968553466e-08, "loss": 0.1965, "step": 6350 }, { "epoch": 4.0, "eval_accuracy": 0.9005079851083817, "eval_f1": 0.5847528623289584, "eval_loss": 0.2796945869922638, "eval_precision": 0.5526815878378378, "eval_recall": 0.6207755247242974, "eval_runtime": 33.5926, "eval_samples_per_second": 94.604, "eval_steps_per_second": 5.924, "step": 6360 }, { "epoch": 4.0, "step": 6360, "total_flos": 7533320592942900.0, "train_loss": 0.2684423640464087, "train_runtime": 3411.3354, "train_samples_per_second": 29.814, "train_steps_per_second": 1.864 } ], "logging_steps": 50, "max_steps": 6360, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7533320592942900.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }