| { | |
| "best_global_step": 6360, | |
| "best_metric": 0.5847528623289584, | |
| "best_model_checkpoint": "BanglaHealthNER-Model/checkpoint-6360", | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 6360, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.031446540880503145, | |
| "grad_norm": 7.333010673522949, | |
| "learning_rate": 1.9845911949685537e-05, | |
| "loss": 1.2505, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06289308176100629, | |
| "grad_norm": 3.140421152114868, | |
| "learning_rate": 1.968867924528302e-05, | |
| "loss": 0.6551, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09433962264150944, | |
| "grad_norm": 6.619497299194336, | |
| "learning_rate": 1.9531446540880505e-05, | |
| "loss": 0.5465, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12578616352201258, | |
| "grad_norm": 11.99181079864502, | |
| "learning_rate": 1.937421383647799e-05, | |
| "loss": 0.4357, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15723270440251572, | |
| "grad_norm": 5.166473388671875, | |
| "learning_rate": 1.9216981132075473e-05, | |
| "loss": 0.3967, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.18867924528301888, | |
| "grad_norm": 4.55715274810791, | |
| "learning_rate": 1.9059748427672957e-05, | |
| "loss": 0.3922, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.22012578616352202, | |
| "grad_norm": 4.138736724853516, | |
| "learning_rate": 1.890251572327044e-05, | |
| "loss": 0.3632, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.25157232704402516, | |
| "grad_norm": 3.7361745834350586, | |
| "learning_rate": 1.8745283018867925e-05, | |
| "loss": 0.3747, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2830188679245283, | |
| "grad_norm": 3.6701931953430176, | |
| "learning_rate": 1.8588050314465412e-05, | |
| "loss": 0.3318, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.31446540880503143, | |
| "grad_norm": 3.9308807849884033, | |
| "learning_rate": 1.8430817610062893e-05, | |
| "loss": 0.3175, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.34591194968553457, | |
| "grad_norm": 4.826620578765869, | |
| "learning_rate": 1.827358490566038e-05, | |
| "loss": 0.3267, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.37735849056603776, | |
| "grad_norm": 3.1970913410186768, | |
| "learning_rate": 1.8116352201257864e-05, | |
| "loss": 0.3339, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4088050314465409, | |
| "grad_norm": 2.922396183013916, | |
| "learning_rate": 1.795911949685535e-05, | |
| "loss": 0.3243, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.44025157232704404, | |
| "grad_norm": 6.193075180053711, | |
| "learning_rate": 1.7801886792452832e-05, | |
| "loss": 0.3117, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4716981132075472, | |
| "grad_norm": 2.791144371032715, | |
| "learning_rate": 1.7644654088050316e-05, | |
| "loss": 0.3056, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5031446540880503, | |
| "grad_norm": 2.0920867919921875, | |
| "learning_rate": 1.74874213836478e-05, | |
| "loss": 0.326, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5345911949685535, | |
| "grad_norm": 3.349987030029297, | |
| "learning_rate": 1.7330188679245284e-05, | |
| "loss": 0.3226, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.5660377358490566, | |
| "grad_norm": 3.94093918800354, | |
| "learning_rate": 1.717295597484277e-05, | |
| "loss": 0.2889, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5974842767295597, | |
| "grad_norm": 1.9453665018081665, | |
| "learning_rate": 1.7015723270440252e-05, | |
| "loss": 0.2952, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6289308176100629, | |
| "grad_norm": 3.939730167388916, | |
| "learning_rate": 1.6858490566037736e-05, | |
| "loss": 0.3022, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.660377358490566, | |
| "grad_norm": 2.5297317504882812, | |
| "learning_rate": 1.670125786163522e-05, | |
| "loss": 0.3055, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.6918238993710691, | |
| "grad_norm": 3.0068368911743164, | |
| "learning_rate": 1.6544025157232705e-05, | |
| "loss": 0.2766, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7232704402515723, | |
| "grad_norm": 2.318912982940674, | |
| "learning_rate": 1.638679245283019e-05, | |
| "loss": 0.2959, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.7547169811320755, | |
| "grad_norm": 2.4681830406188965, | |
| "learning_rate": 1.6229559748427676e-05, | |
| "loss": 0.2887, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.7861635220125787, | |
| "grad_norm": 3.823657989501953, | |
| "learning_rate": 1.6072327044025157e-05, | |
| "loss": 0.3165, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.8176100628930818, | |
| "grad_norm": 3.6450536251068115, | |
| "learning_rate": 1.5915094339622644e-05, | |
| "loss": 0.2979, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.8490566037735849, | |
| "grad_norm": 2.409196615219116, | |
| "learning_rate": 1.5757861635220128e-05, | |
| "loss": 0.2967, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.8805031446540881, | |
| "grad_norm": 5.697852611541748, | |
| "learning_rate": 1.5600628930817612e-05, | |
| "loss": 0.2787, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9119496855345912, | |
| "grad_norm": 3.4634220600128174, | |
| "learning_rate": 1.5443396226415096e-05, | |
| "loss": 0.287, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.9433962264150944, | |
| "grad_norm": 2.042687177658081, | |
| "learning_rate": 1.528616352201258e-05, | |
| "loss": 0.2791, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9748427672955975, | |
| "grad_norm": 5.6213531494140625, | |
| "learning_rate": 1.5128930817610064e-05, | |
| "loss": 0.2916, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8889652412929265, | |
| "eval_f1": 0.532340215783997, | |
| "eval_loss": 0.2980094850063324, | |
| "eval_precision": 0.49460614695705274, | |
| "eval_recall": 0.576307363927428, | |
| "eval_runtime": 33.5758, | |
| "eval_samples_per_second": 94.651, | |
| "eval_steps_per_second": 5.927, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.0062893081761006, | |
| "grad_norm": 4.562012672424316, | |
| "learning_rate": 1.497169811320755e-05, | |
| "loss": 0.2942, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.0377358490566038, | |
| "grad_norm": 3.757510185241699, | |
| "learning_rate": 1.4814465408805032e-05, | |
| "loss": 0.2638, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.069182389937107, | |
| "grad_norm": 3.9764132499694824, | |
| "learning_rate": 1.4657232704402518e-05, | |
| "loss": 0.2729, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.10062893081761, | |
| "grad_norm": 4.585367202758789, | |
| "learning_rate": 1.45e-05, | |
| "loss": 0.2608, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.1320754716981132, | |
| "grad_norm": 3.6955642700195312, | |
| "learning_rate": 1.4342767295597486e-05, | |
| "loss": 0.2573, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.1635220125786163, | |
| "grad_norm": 5.6667256355285645, | |
| "learning_rate": 1.418553459119497e-05, | |
| "loss": 0.2507, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.1949685534591195, | |
| "grad_norm": 4.68058967590332, | |
| "learning_rate": 1.4028301886792456e-05, | |
| "loss": 0.2969, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.2264150943396226, | |
| "grad_norm": 3.523763656616211, | |
| "learning_rate": 1.3871069182389938e-05, | |
| "loss": 0.2654, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.2578616352201257, | |
| "grad_norm": 4.139145374298096, | |
| "learning_rate": 1.3713836477987424e-05, | |
| "loss": 0.2576, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.2893081761006289, | |
| "grad_norm": 3.196833610534668, | |
| "learning_rate": 1.3556603773584906e-05, | |
| "loss": 0.2864, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.320754716981132, | |
| "grad_norm": 2.8964767456054688, | |
| "learning_rate": 1.3399371069182392e-05, | |
| "loss": 0.286, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.3522012578616351, | |
| "grad_norm": 2.7218921184539795, | |
| "learning_rate": 1.3242138364779876e-05, | |
| "loss": 0.2761, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.3836477987421385, | |
| "grad_norm": 4.021376132965088, | |
| "learning_rate": 1.3084905660377361e-05, | |
| "loss": 0.2669, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.4150943396226414, | |
| "grad_norm": 6.181784629821777, | |
| "learning_rate": 1.2927672955974844e-05, | |
| "loss": 0.2599, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.4465408805031448, | |
| "grad_norm": 4.6100077629089355, | |
| "learning_rate": 1.277044025157233e-05, | |
| "loss": 0.2586, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.4779874213836477, | |
| "grad_norm": 3.9046823978424072, | |
| "learning_rate": 1.2613207547169812e-05, | |
| "loss": 0.2437, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.509433962264151, | |
| "grad_norm": 7.715628147125244, | |
| "learning_rate": 1.2455974842767296e-05, | |
| "loss": 0.2562, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.540880503144654, | |
| "grad_norm": 1.5335407257080078, | |
| "learning_rate": 1.2298742138364781e-05, | |
| "loss": 0.2639, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.5723270440251573, | |
| "grad_norm": 3.2147579193115234, | |
| "learning_rate": 1.2141509433962264e-05, | |
| "loss": 0.2572, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.6037735849056602, | |
| "grad_norm": 1.934866189956665, | |
| "learning_rate": 1.198427672955975e-05, | |
| "loss": 0.2563, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.6352201257861636, | |
| "grad_norm": 3.8376920223236084, | |
| "learning_rate": 1.1827044025157233e-05, | |
| "loss": 0.2351, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 5.230978965759277, | |
| "learning_rate": 1.1669811320754717e-05, | |
| "loss": 0.2649, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.6981132075471699, | |
| "grad_norm": 3.404048442840576, | |
| "learning_rate": 1.1512578616352201e-05, | |
| "loss": 0.2759, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.7295597484276728, | |
| "grad_norm": 6.088818073272705, | |
| "learning_rate": 1.1355345911949687e-05, | |
| "loss": 0.2668, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.7610062893081762, | |
| "grad_norm": 3.810774803161621, | |
| "learning_rate": 1.119811320754717e-05, | |
| "loss": 0.2632, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.7924528301886793, | |
| "grad_norm": 2.0126891136169434, | |
| "learning_rate": 1.1040880503144655e-05, | |
| "loss": 0.2347, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.8238993710691824, | |
| "grad_norm": 3.071716547012329, | |
| "learning_rate": 1.088364779874214e-05, | |
| "loss": 0.2484, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.8553459119496856, | |
| "grad_norm": 3.1930902004241943, | |
| "learning_rate": 1.0726415094339623e-05, | |
| "loss": 0.2736, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.8867924528301887, | |
| "grad_norm": 4.1462907791137695, | |
| "learning_rate": 1.0569182389937107e-05, | |
| "loss": 0.2551, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.9182389937106918, | |
| "grad_norm": 1.993411898612976, | |
| "learning_rate": 1.0411949685534593e-05, | |
| "loss": 0.2657, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.949685534591195, | |
| "grad_norm": 2.755627393722534, | |
| "learning_rate": 1.0254716981132075e-05, | |
| "loss": 0.2556, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.9811320754716981, | |
| "grad_norm": 3.132187604904175, | |
| "learning_rate": 1.0097484276729561e-05, | |
| "loss": 0.256, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8973939668070005, | |
| "eval_f1": 0.56300452281445, | |
| "eval_loss": 0.28329119086265564, | |
| "eval_precision": 0.5442771751162275, | |
| "eval_recall": 0.5830665243685521, | |
| "eval_runtime": 33.3333, | |
| "eval_samples_per_second": 95.34, | |
| "eval_steps_per_second": 5.97, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 2.0125786163522013, | |
| "grad_norm": 2.6640384197235107, | |
| "learning_rate": 9.940251572327045e-06, | |
| "loss": 0.2306, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.0440251572327046, | |
| "grad_norm": 1.7074990272521973, | |
| "learning_rate": 9.783018867924529e-06, | |
| "loss": 0.2408, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.0754716981132075, | |
| "grad_norm": 2.832401990890503, | |
| "learning_rate": 9.625786163522013e-06, | |
| "loss": 0.229, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.106918238993711, | |
| "grad_norm": 3.8568339347839355, | |
| "learning_rate": 9.468553459119497e-06, | |
| "loss": 0.2372, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.138364779874214, | |
| "grad_norm": 2.2703866958618164, | |
| "learning_rate": 9.311320754716981e-06, | |
| "loss": 0.2354, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.169811320754717, | |
| "grad_norm": 2.2550501823425293, | |
| "learning_rate": 9.154088050314465e-06, | |
| "loss": 0.235, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.20125786163522, | |
| "grad_norm": 3.6433000564575195, | |
| "learning_rate": 8.99685534591195e-06, | |
| "loss": 0.2307, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.2327044025157234, | |
| "grad_norm": 3.5409624576568604, | |
| "learning_rate": 8.839622641509435e-06, | |
| "loss": 0.2453, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.2641509433962264, | |
| "grad_norm": 2.7766001224517822, | |
| "learning_rate": 8.682389937106919e-06, | |
| "loss": 0.2345, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.2955974842767297, | |
| "grad_norm": 2.28254771232605, | |
| "learning_rate": 8.525157232704403e-06, | |
| "loss": 0.2312, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.3270440251572326, | |
| "grad_norm": 2.1515822410583496, | |
| "learning_rate": 8.367924528301887e-06, | |
| "loss": 0.2309, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.358490566037736, | |
| "grad_norm": 2.217221975326538, | |
| "learning_rate": 8.21069182389937e-06, | |
| "loss": 0.2418, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.389937106918239, | |
| "grad_norm": 3.9846627712249756, | |
| "learning_rate": 8.053459119496856e-06, | |
| "loss": 0.2292, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.4213836477987423, | |
| "grad_norm": 2.434777021408081, | |
| "learning_rate": 7.89622641509434e-06, | |
| "loss": 0.2298, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.452830188679245, | |
| "grad_norm": 2.4619274139404297, | |
| "learning_rate": 7.738993710691825e-06, | |
| "loss": 0.2571, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.4842767295597485, | |
| "grad_norm": 1.396600604057312, | |
| "learning_rate": 7.5817610062893085e-06, | |
| "loss": 0.2346, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.5157232704402515, | |
| "grad_norm": 2.274308919906616, | |
| "learning_rate": 7.424528301886793e-06, | |
| "loss": 0.2319, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.547169811320755, | |
| "grad_norm": 7.168504238128662, | |
| "learning_rate": 7.267295597484277e-06, | |
| "loss": 0.2211, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.5786163522012577, | |
| "grad_norm": 3.706829786300659, | |
| "learning_rate": 7.1100628930817614e-06, | |
| "loss": 0.2502, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.610062893081761, | |
| "grad_norm": 4.938648223876953, | |
| "learning_rate": 6.952830188679246e-06, | |
| "loss": 0.2345, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.641509433962264, | |
| "grad_norm": 2.617217540740967, | |
| "learning_rate": 6.79559748427673e-06, | |
| "loss": 0.2175, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.6729559748427674, | |
| "grad_norm": 9.610097885131836, | |
| "learning_rate": 6.638364779874214e-06, | |
| "loss": 0.2222, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.7044025157232703, | |
| "grad_norm": 3.3741917610168457, | |
| "learning_rate": 6.481132075471699e-06, | |
| "loss": 0.2308, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.7358490566037736, | |
| "grad_norm": 2.843111515045166, | |
| "learning_rate": 6.323899371069183e-06, | |
| "loss": 0.2228, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.767295597484277, | |
| "grad_norm": 2.9446794986724854, | |
| "learning_rate": 6.166666666666667e-06, | |
| "loss": 0.2301, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.79874213836478, | |
| "grad_norm": 3.3162307739257812, | |
| "learning_rate": 6.009433962264152e-06, | |
| "loss": 0.2223, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.830188679245283, | |
| "grad_norm": 2.9730913639068604, | |
| "learning_rate": 5.852201257861636e-06, | |
| "loss": 0.2299, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.861635220125786, | |
| "grad_norm": 2.486496686935425, | |
| "learning_rate": 5.69496855345912e-06, | |
| "loss": 0.2362, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.8930817610062896, | |
| "grad_norm": 3.007472038269043, | |
| "learning_rate": 5.537735849056605e-06, | |
| "loss": 0.2436, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.9245283018867925, | |
| "grad_norm": 2.3737916946411133, | |
| "learning_rate": 5.380503144654089e-06, | |
| "loss": 0.2263, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.9559748427672954, | |
| "grad_norm": 1.6521756649017334, | |
| "learning_rate": 5.223270440251573e-06, | |
| "loss": 0.2304, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.9874213836477987, | |
| "grad_norm": 5.5335774421691895, | |
| "learning_rate": 5.066037735849058e-06, | |
| "loss": 0.2332, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8990553564594134, | |
| "eval_f1": 0.5799224001311546, | |
| "eval_loss": 0.2767968773841858, | |
| "eval_precision": 0.537806608554632, | |
| "eval_recall": 0.6291948298351714, | |
| "eval_runtime": 33.2823, | |
| "eval_samples_per_second": 95.486, | |
| "eval_steps_per_second": 5.979, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 3.018867924528302, | |
| "grad_norm": 3.7346746921539307, | |
| "learning_rate": 4.908805031446541e-06, | |
| "loss": 0.2288, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 3.050314465408805, | |
| "grad_norm": 2.5639073848724365, | |
| "learning_rate": 4.751572327044026e-06, | |
| "loss": 0.2213, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 3.0817610062893084, | |
| "grad_norm": 3.113640308380127, | |
| "learning_rate": 4.59433962264151e-06, | |
| "loss": 0.2324, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 3.1132075471698113, | |
| "grad_norm": 3.0239310264587402, | |
| "learning_rate": 4.437106918238994e-06, | |
| "loss": 0.2111, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 3.1446540880503147, | |
| "grad_norm": 1.2867438793182373, | |
| "learning_rate": 4.279874213836479e-06, | |
| "loss": 0.224, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.1761006289308176, | |
| "grad_norm": 3.4221584796905518, | |
| "learning_rate": 4.122641509433963e-06, | |
| "loss": 0.2126, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 3.207547169811321, | |
| "grad_norm": 2.2448008060455322, | |
| "learning_rate": 3.965408805031447e-06, | |
| "loss": 0.2298, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 3.238993710691824, | |
| "grad_norm": 2.793227434158325, | |
| "learning_rate": 3.8081761006289312e-06, | |
| "loss": 0.2169, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 3.270440251572327, | |
| "grad_norm": 2.6487598419189453, | |
| "learning_rate": 3.6509433962264152e-06, | |
| "loss": 0.2272, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 3.30188679245283, | |
| "grad_norm": 3.785799503326416, | |
| "learning_rate": 3.4937106918238992e-06, | |
| "loss": 0.2276, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 3.5206942558288574, | |
| "learning_rate": 3.3364779874213837e-06, | |
| "loss": 0.205, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 3.3647798742138364, | |
| "grad_norm": 2.6426963806152344, | |
| "learning_rate": 3.179245283018868e-06, | |
| "loss": 0.19, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 3.3962264150943398, | |
| "grad_norm": 3.5427286624908447, | |
| "learning_rate": 3.022012578616352e-06, | |
| "loss": 0.2051, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 3.4276729559748427, | |
| "grad_norm": 2.811741828918457, | |
| "learning_rate": 2.8647798742138366e-06, | |
| "loss": 0.198, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 3.459119496855346, | |
| "grad_norm": 2.434566020965576, | |
| "learning_rate": 2.707547169811321e-06, | |
| "loss": 0.214, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.490566037735849, | |
| "grad_norm": 3.7360141277313232, | |
| "learning_rate": 2.5503144654088054e-06, | |
| "loss": 0.2038, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 3.5220125786163523, | |
| "grad_norm": 1.8584225177764893, | |
| "learning_rate": 2.3930817610062895e-06, | |
| "loss": 0.2347, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 3.5534591194968552, | |
| "grad_norm": 3.2380409240722656, | |
| "learning_rate": 2.235849056603774e-06, | |
| "loss": 0.1995, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 3.5849056603773586, | |
| "grad_norm": 2.471127510070801, | |
| "learning_rate": 2.0786163522012583e-06, | |
| "loss": 0.2063, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 3.6163522012578615, | |
| "grad_norm": 2.688815116882324, | |
| "learning_rate": 1.9213836477987423e-06, | |
| "loss": 0.2057, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 3.647798742138365, | |
| "grad_norm": 3.2768843173980713, | |
| "learning_rate": 1.7641509433962264e-06, | |
| "loss": 0.1999, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 3.6792452830188678, | |
| "grad_norm": 4.351276397705078, | |
| "learning_rate": 1.6069182389937108e-06, | |
| "loss": 0.2317, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 3.710691823899371, | |
| "grad_norm": 5.953555107116699, | |
| "learning_rate": 1.449685534591195e-06, | |
| "loss": 0.2038, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 3.742138364779874, | |
| "grad_norm": 2.9146125316619873, | |
| "learning_rate": 1.2924528301886792e-06, | |
| "loss": 0.193, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 3.7735849056603774, | |
| "grad_norm": 3.5067131519317627, | |
| "learning_rate": 1.1352201257861637e-06, | |
| "loss": 0.2199, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.8050314465408803, | |
| "grad_norm": 3.824366807937622, | |
| "learning_rate": 9.77987421383648e-07, | |
| "loss": 0.1936, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 3.8364779874213837, | |
| "grad_norm": 3.798210620880127, | |
| "learning_rate": 8.207547169811321e-07, | |
| "loss": 0.2189, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 3.867924528301887, | |
| "grad_norm": 2.7663450241088867, | |
| "learning_rate": 6.635220125786164e-07, | |
| "loss": 0.2285, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 3.89937106918239, | |
| "grad_norm": 2.560297966003418, | |
| "learning_rate": 5.062893081761007e-07, | |
| "loss": 0.2027, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 3.930817610062893, | |
| "grad_norm": 2.668306589126587, | |
| "learning_rate": 3.490566037735849e-07, | |
| "loss": 0.217, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 3.9622641509433962, | |
| "grad_norm": 3.003211259841919, | |
| "learning_rate": 1.918238993710692e-07, | |
| "loss": 0.2076, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 3.9937106918238996, | |
| "grad_norm": 2.258948802947998, | |
| "learning_rate": 3.4591194968553466e-08, | |
| "loss": 0.1965, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9005079851083817, | |
| "eval_f1": 0.5847528623289584, | |
| "eval_loss": 0.2796945869922638, | |
| "eval_precision": 0.5526815878378378, | |
| "eval_recall": 0.6207755247242974, | |
| "eval_runtime": 33.5926, | |
| "eval_samples_per_second": 94.604, | |
| "eval_steps_per_second": 5.924, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 6360, | |
| "total_flos": 7533320592942900.0, | |
| "train_loss": 0.2684423640464087, | |
| "train_runtime": 3411.3354, | |
| "train_samples_per_second": 29.814, | |
| "train_steps_per_second": 1.864 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 6360, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7533320592942900.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |