Invalid JSON:
Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_metric": 0.955865295904617, | |
| "best_model_checkpoint": "output_classification_768/hazard/checkpoint-5714", | |
| "epoch": 8.0, | |
| "eval_steps": 500, | |
| "global_step": 5714, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01400070003500175, | |
| "grad_norm": 18.827600479125977, | |
| "learning_rate": 8.403361344537815e-08, | |
| "loss": 1.9841, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0280014000700035, | |
| "grad_norm": 18.72251319885254, | |
| "learning_rate": 1.680672268907563e-07, | |
| "loss": 2.0361, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04200210010500525, | |
| "grad_norm": 19.22981071472168, | |
| "learning_rate": 2.6143790849673207e-07, | |
| "loss": 2.0152, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.056002800140007, | |
| "grad_norm": 29.57445526123047, | |
| "learning_rate": 3.548085901027078e-07, | |
| "loss": 1.9341, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07000350017500875, | |
| "grad_norm": 22.95323944091797, | |
| "learning_rate": 4.481792717086835e-07, | |
| "loss": 1.886, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0840042002100105, | |
| "grad_norm": 19.979074478149414, | |
| "learning_rate": 5.415499533146593e-07, | |
| "loss": 1.956, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09800490024501225, | |
| "grad_norm": 20.88674545288086, | |
| "learning_rate": 6.255835667600374e-07, | |
| "loss": 1.8046, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.112005600280014, | |
| "grad_norm": 29.573392868041992, | |
| "learning_rate": 7.189542483660131e-07, | |
| "loss": 1.6156, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.12600630031501575, | |
| "grad_norm": 51.34833908081055, | |
| "learning_rate": 8.123249299719889e-07, | |
| "loss": 1.4696, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1400070003500175, | |
| "grad_norm": 25.478782653808594, | |
| "learning_rate": 9.056956115779646e-07, | |
| "loss": 1.3458, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.15400770038501926, | |
| "grad_norm": 26.403230667114258, | |
| "learning_rate": 9.897292250233428e-07, | |
| "loss": 1.2287, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.168008400420021, | |
| "grad_norm": 32.914329528808594, | |
| "learning_rate": 1.0830999066293185e-06, | |
| "loss": 1.373, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.18200910045502275, | |
| "grad_norm": 27.074459075927734, | |
| "learning_rate": 1.1764705882352942e-06, | |
| "loss": 1.1883, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1960098004900245, | |
| "grad_norm": 30.801422119140625, | |
| "learning_rate": 1.26984126984127e-06, | |
| "loss": 1.1013, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.21001050052502626, | |
| "grad_norm": 28.34015655517578, | |
| "learning_rate": 1.3632119514472457e-06, | |
| "loss": 1.2526, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.224011200560028, | |
| "grad_norm": 25.3271541595459, | |
| "learning_rate": 1.4565826330532216e-06, | |
| "loss": 1.1301, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.23801190059502975, | |
| "grad_norm": 39.82203674316406, | |
| "learning_rate": 1.5499533146591973e-06, | |
| "loss": 1.257, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2520126006300315, | |
| "grad_norm": 30.389141082763672, | |
| "learning_rate": 1.6433239962651728e-06, | |
| "loss": 1.1949, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.26601330066503326, | |
| "grad_norm": Infinity, | |
| "learning_rate": 1.727357609710551e-06, | |
| "loss": 1.1192, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.280014000700035, | |
| "grad_norm": 20.167753219604492, | |
| "learning_rate": 1.8207282913165267e-06, | |
| "loss": 1.1671, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.29401470073503677, | |
| "grad_norm": 35.408851623535156, | |
| "learning_rate": 1.914098972922503e-06, | |
| "loss": 1.0633, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3080154007700385, | |
| "grad_norm": 27.517133712768555, | |
| "learning_rate": 2.0074696545284783e-06, | |
| "loss": 1.2556, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.32201610080504023, | |
| "grad_norm": 28.972402572631836, | |
| "learning_rate": 2.100840336134454e-06, | |
| "loss": 0.9785, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.336016800840042, | |
| "grad_norm": 27.440292358398438, | |
| "learning_rate": 2.1942110177404298e-06, | |
| "loss": 1.0198, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.35001750087504374, | |
| "grad_norm": 48.26941680908203, | |
| "learning_rate": 2.278244631185808e-06, | |
| "loss": 1.2617, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3640182009100455, | |
| "grad_norm": 24.725189208984375, | |
| "learning_rate": 2.3716153127917835e-06, | |
| "loss": 1.0211, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.37801890094504725, | |
| "grad_norm": 23.73585319519043, | |
| "learning_rate": 2.4649859943977594e-06, | |
| "loss": 1.0714, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.392019600980049, | |
| "grad_norm": 26.64818000793457, | |
| "learning_rate": 2.558356676003735e-06, | |
| "loss": 0.7989, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.40602030101505077, | |
| "grad_norm": 14.705005645751953, | |
| "learning_rate": 2.651727357609711e-06, | |
| "loss": 0.7878, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.4200210010500525, | |
| "grad_norm": 14.422958374023438, | |
| "learning_rate": 2.7450980392156867e-06, | |
| "loss": 0.8429, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4340217010850543, | |
| "grad_norm": 33.29634475708008, | |
| "learning_rate": 2.8384687208216622e-06, | |
| "loss": 0.7312, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.448022401120056, | |
| "grad_norm": 33.81440353393555, | |
| "learning_rate": 2.931839402427638e-06, | |
| "loss": 0.6694, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.46202310115505774, | |
| "grad_norm": 24.78529167175293, | |
| "learning_rate": 3.0252100840336137e-06, | |
| "loss": 0.4384, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4760238011900595, | |
| "grad_norm": 12.186172485351562, | |
| "learning_rate": 3.1185807656395896e-06, | |
| "loss": 0.6493, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.49002450122506125, | |
| "grad_norm": 21.81092071533203, | |
| "learning_rate": 3.2119514472455655e-06, | |
| "loss": 0.6262, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.504025201260063, | |
| "grad_norm": 32.27071762084961, | |
| "learning_rate": 3.305322128851541e-06, | |
| "loss": 0.5861, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5180259012950648, | |
| "grad_norm": 25.360836029052734, | |
| "learning_rate": 3.398692810457517e-06, | |
| "loss": 0.4615, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5320266013300665, | |
| "grad_norm": 4.028536796569824, | |
| "learning_rate": 3.492063492063492e-06, | |
| "loss": 0.3756, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5460273013650683, | |
| "grad_norm": 23.051633834838867, | |
| "learning_rate": 3.585434173669468e-06, | |
| "loss": 0.4682, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.56002800140007, | |
| "grad_norm": 16.828521728515625, | |
| "learning_rate": 3.678804855275444e-06, | |
| "loss": 0.3575, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5740287014350718, | |
| "grad_norm": 14.486180305480957, | |
| "learning_rate": 3.7721755368814194e-06, | |
| "loss": 0.3074, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.5880294014700735, | |
| "grad_norm": 20.30696678161621, | |
| "learning_rate": 3.865546218487396e-06, | |
| "loss": 0.3504, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6020301015050753, | |
| "grad_norm": 11.418947219848633, | |
| "learning_rate": 3.958916900093371e-06, | |
| "loss": 0.421, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.616030801540077, | |
| "grad_norm": 22.60586929321289, | |
| "learning_rate": 4.052287581699347e-06, | |
| "loss": 0.4654, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6300315015750787, | |
| "grad_norm": 11.261972427368164, | |
| "learning_rate": 4.145658263305323e-06, | |
| "loss": 0.3997, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6440322016100805, | |
| "grad_norm": 28.590770721435547, | |
| "learning_rate": 4.239028944911298e-06, | |
| "loss": 0.41, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6580329016450822, | |
| "grad_norm": 19.348529815673828, | |
| "learning_rate": 4.332399626517274e-06, | |
| "loss": 0.3127, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.672033601680084, | |
| "grad_norm": 11.520312309265137, | |
| "learning_rate": 4.4257703081232496e-06, | |
| "loss": 0.2414, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6860343017150857, | |
| "grad_norm": 12.15475082397461, | |
| "learning_rate": 4.519140989729225e-06, | |
| "loss": 0.3494, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.7000350017500875, | |
| "grad_norm": 25.96927833557129, | |
| "learning_rate": 4.612511671335201e-06, | |
| "loss": 0.3013, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7140357017850892, | |
| "grad_norm": 6.085643291473389, | |
| "learning_rate": 4.705882352941177e-06, | |
| "loss": 0.2011, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.728036401820091, | |
| "grad_norm": 44.394142150878906, | |
| "learning_rate": 4.799253034547152e-06, | |
| "loss": 0.3666, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.7420371018550928, | |
| "grad_norm": 17.268661499023438, | |
| "learning_rate": 4.892623716153128e-06, | |
| "loss": 0.4868, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.7560378018900945, | |
| "grad_norm": 4.370459079742432, | |
| "learning_rate": 4.985994397759104e-06, | |
| "loss": 0.2099, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7700385019250963, | |
| "grad_norm": 18.326570510864258, | |
| "learning_rate": 5.07936507936508e-06, | |
| "loss": 0.1579, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.784039201960098, | |
| "grad_norm": 15.142720222473145, | |
| "learning_rate": 5.172735760971056e-06, | |
| "loss": 0.366, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7980399019950998, | |
| "grad_norm": 18.712690353393555, | |
| "learning_rate": 5.266106442577032e-06, | |
| "loss": 0.2947, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.8120406020301015, | |
| "grad_norm": 17.141977310180664, | |
| "learning_rate": 5.359477124183007e-06, | |
| "loss": 0.3521, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.8260413020651033, | |
| "grad_norm": 25.074716567993164, | |
| "learning_rate": 5.452847805788983e-06, | |
| "loss": 0.3515, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.840042002100105, | |
| "grad_norm": 39.856689453125, | |
| "learning_rate": 5.546218487394959e-06, | |
| "loss": 0.2407, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8540427021351068, | |
| "grad_norm": 27.31299591064453, | |
| "learning_rate": 5.6395891690009344e-06, | |
| "loss": 0.3806, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.8680434021701086, | |
| "grad_norm": 23.94294548034668, | |
| "learning_rate": 5.732959850606909e-06, | |
| "loss": 0.247, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.8820441022051102, | |
| "grad_norm": 18.762691497802734, | |
| "learning_rate": 5.826330532212886e-06, | |
| "loss": 0.227, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.896044802240112, | |
| "grad_norm": 1.3951778411865234, | |
| "learning_rate": 5.919701213818862e-06, | |
| "loss": 0.2068, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.9100455022751137, | |
| "grad_norm": 0.5448408126831055, | |
| "learning_rate": 6.0130718954248365e-06, | |
| "loss": 0.1872, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9240462023101155, | |
| "grad_norm": 15.129274368286133, | |
| "learning_rate": 6.106442577030814e-06, | |
| "loss": 0.2704, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.9380469023451172, | |
| "grad_norm": 14.669654846191406, | |
| "learning_rate": 6.199813258636789e-06, | |
| "loss": 0.2402, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.952047602380119, | |
| "grad_norm": 12.849870681762695, | |
| "learning_rate": 6.293183940242764e-06, | |
| "loss": 0.3798, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.9660483024151207, | |
| "grad_norm": 3.6425814628601074, | |
| "learning_rate": 6.386554621848739e-06, | |
| "loss": 0.1084, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.9800490024501225, | |
| "grad_norm": 22.52290153503418, | |
| "learning_rate": 6.479925303454716e-06, | |
| "loss": 0.2555, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9940497024851243, | |
| "grad_norm": 21.555450439453125, | |
| "learning_rate": 6.573295985060691e-06, | |
| "loss": 0.0919, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.999649982499125, | |
| "eval_f1": 0.8955208020599833, | |
| "eval_loss": 0.22013917565345764, | |
| "eval_precision": 0.8902907982202399, | |
| "eval_recall": 0.8957312806158153, | |
| "eval_runtime": 85.7223, | |
| "eval_samples_per_second": 16.67, | |
| "eval_steps_per_second": 8.341, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.008050402520126, | |
| "grad_norm": 50.78132247924805, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.1955, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.0220511025551278, | |
| "grad_norm": 10.807790756225586, | |
| "learning_rate": 6.760037348272643e-06, | |
| "loss": 0.2093, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.0360518025901295, | |
| "grad_norm": 13.804763793945312, | |
| "learning_rate": 6.8534080298786185e-06, | |
| "loss": 0.1498, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.0500525026251313, | |
| "grad_norm": 177.36477661132812, | |
| "learning_rate": 6.946778711484594e-06, | |
| "loss": 0.0677, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.064053202660133, | |
| "grad_norm": 33.29927062988281, | |
| "learning_rate": 7.04014939309057e-06, | |
| "loss": 0.1319, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.0780539026951348, | |
| "grad_norm": 50.21918869018555, | |
| "learning_rate": 7.133520074696546e-06, | |
| "loss": 0.1975, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.0920546027301365, | |
| "grad_norm": 20.914047241210938, | |
| "learning_rate": 7.226890756302521e-06, | |
| "loss": 0.1184, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.1060553027651383, | |
| "grad_norm": 25.241579055786133, | |
| "learning_rate": 7.320261437908497e-06, | |
| "loss": 0.2089, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.12005600280014, | |
| "grad_norm": 30.187185287475586, | |
| "learning_rate": 7.413632119514473e-06, | |
| "loss": 0.1561, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.1340567028351418, | |
| "grad_norm": 57.18680953979492, | |
| "learning_rate": 7.507002801120449e-06, | |
| "loss": 0.1638, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.1480574028701436, | |
| "grad_norm": 27.371370315551758, | |
| "learning_rate": 7.600373482726424e-06, | |
| "loss": 0.1526, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.1620581029051453, | |
| "grad_norm": 3.447115659713745, | |
| "learning_rate": 7.6937441643324e-06, | |
| "loss": 0.1566, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.176058802940147, | |
| "grad_norm": 47.86556625366211, | |
| "learning_rate": 7.787114845938376e-06, | |
| "loss": 0.0942, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.1900595029751488, | |
| "grad_norm": 1.0057041645050049, | |
| "learning_rate": 7.880485527544352e-06, | |
| "loss": 0.1284, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.2040602030101506, | |
| "grad_norm": 1.5863021612167358, | |
| "learning_rate": 7.973856209150329e-06, | |
| "loss": 0.0242, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.2180609030451524, | |
| "grad_norm": 37.52120590209961, | |
| "learning_rate": 8.067226890756303e-06, | |
| "loss": 0.1856, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.232061603080154, | |
| "grad_norm": 0.7147373557090759, | |
| "learning_rate": 8.151260504201681e-06, | |
| "loss": 0.1503, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.2460623031151559, | |
| "grad_norm": 15.735259056091309, | |
| "learning_rate": 8.244631185807657e-06, | |
| "loss": 0.1443, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.2600630031501576, | |
| "grad_norm": 28.31800651550293, | |
| "learning_rate": 8.338001867413634e-06, | |
| "loss": 0.2344, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.2740637031851594, | |
| "grad_norm": 29.48625946044922, | |
| "learning_rate": 8.43137254901961e-06, | |
| "loss": 0.0729, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.2880644032201611, | |
| "grad_norm": 18.03072166442871, | |
| "learning_rate": 8.524743230625583e-06, | |
| "loss": 0.2843, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.302065103255163, | |
| "grad_norm": 7.900409698486328, | |
| "learning_rate": 8.61811391223156e-06, | |
| "loss": 0.1405, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.3160658032901644, | |
| "grad_norm": 9.55482006072998, | |
| "learning_rate": 8.711484593837536e-06, | |
| "loss": 0.3472, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.3300665033251662, | |
| "grad_norm": 1.0517637729644775, | |
| "learning_rate": 8.804855275443511e-06, | |
| "loss": 0.1811, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.344067203360168, | |
| "grad_norm": 21.067020416259766, | |
| "learning_rate": 8.898225957049487e-06, | |
| "loss": 0.101, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.3580679033951697, | |
| "grad_norm": 2.4087302684783936, | |
| "learning_rate": 8.991596638655462e-06, | |
| "loss": 0.1637, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.3720686034301715, | |
| "grad_norm": 0.5226989388465881, | |
| "learning_rate": 9.084967320261438e-06, | |
| "loss": 0.1412, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.3860693034651732, | |
| "grad_norm": 1.7105305194854736, | |
| "learning_rate": 9.178338001867413e-06, | |
| "loss": 0.0549, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.400070003500175, | |
| "grad_norm": 1.5454177856445312, | |
| "learning_rate": 9.27170868347339e-06, | |
| "loss": 0.1198, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.4140707035351767, | |
| "grad_norm": 0.15765634179115295, | |
| "learning_rate": 9.365079365079366e-06, | |
| "loss": 0.1376, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.4280714035701785, | |
| "grad_norm": 1.46312415599823, | |
| "learning_rate": 9.458450046685342e-06, | |
| "loss": 0.0745, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.4420721036051802, | |
| "grad_norm": 5.383272647857666, | |
| "learning_rate": 9.551820728291317e-06, | |
| "loss": 0.1296, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.456072803640182, | |
| "grad_norm": 27.422487258911133, | |
| "learning_rate": 9.645191409897293e-06, | |
| "loss": 0.2348, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.4700735036751837, | |
| "grad_norm": 4.522994041442871, | |
| "learning_rate": 9.738562091503268e-06, | |
| "loss": 0.2962, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.4840742037101855, | |
| "grad_norm": 1.4947142601013184, | |
| "learning_rate": 9.831932773109244e-06, | |
| "loss": 0.2838, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.4980749037451873, | |
| "grad_norm": 5.711818695068359, | |
| "learning_rate": 9.92530345471522e-06, | |
| "loss": 0.0891, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.512075603780189, | |
| "grad_norm": 49.88716125488281, | |
| "learning_rate": 9.999998937727915e-06, | |
| "loss": 0.1245, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.5260763038151908, | |
| "grad_norm": 31.99099349975586, | |
| "learning_rate": 9.999961758252293e-06, | |
| "loss": 0.0667, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.5400770038501925, | |
| "grad_norm": 0.10951868444681168, | |
| "learning_rate": 9.999871465623728e-06, | |
| "loss": 0.2052, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.5540777038851943, | |
| "grad_norm": 44.73365783691406, | |
| "learning_rate": 9.999728060801377e-06, | |
| "loss": 0.2205, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.568078403920196, | |
| "grad_norm": 4.195182800292969, | |
| "learning_rate": 9.999531545308584e-06, | |
| "loss": 0.1455, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.5820791039551978, | |
| "grad_norm": 16.288776397705078, | |
| "learning_rate": 9.99928192123288e-06, | |
| "loss": 0.1029, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.5960798039901996, | |
| "grad_norm": 2.2979979515075684, | |
| "learning_rate": 9.998979191225946e-06, | |
| "loss": 0.3084, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.6100805040252013, | |
| "grad_norm": 6.856667995452881, | |
| "learning_rate": 9.998623358503599e-06, | |
| "loss": 0.1179, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.624081204060203, | |
| "grad_norm": 0.1702626645565033, | |
| "learning_rate": 9.998214426845745e-06, | |
| "loss": 0.0635, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.6380819040952046, | |
| "grad_norm": 3.6995737552642822, | |
| "learning_rate": 9.997752400596348e-06, | |
| "loss": 0.3362, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.6520826041302064, | |
| "grad_norm": 49.20168685913086, | |
| "learning_rate": 9.99723728466338e-06, | |
| "loss": 0.193, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.6660833041652081, | |
| "grad_norm": 0.09634213149547577, | |
| "learning_rate": 9.996669084518767e-06, | |
| "loss": 0.1294, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.6800840042002099, | |
| "grad_norm": 31.4400577545166, | |
| "learning_rate": 9.996047806198338e-06, | |
| "loss": 0.1319, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.6940847042352116, | |
| "grad_norm": 28.559484481811523, | |
| "learning_rate": 9.995373456301755e-06, | |
| "loss": 0.1419, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.7080854042702134, | |
| "grad_norm": 20.74497413635254, | |
| "learning_rate": 9.99464604199244e-06, | |
| "loss": 0.0486, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.7220861043052151, | |
| "grad_norm": 1.08584725856781, | |
| "learning_rate": 9.993865570997504e-06, | |
| "loss": 0.106, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.736086804340217, | |
| "grad_norm": 28.463891983032227, | |
| "learning_rate": 9.99303205160767e-06, | |
| "loss": 0.2637, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.7500875043752186, | |
| "grad_norm": 0.8130584359169006, | |
| "learning_rate": 9.99214549267717e-06, | |
| "loss": 0.0446, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.7640882044102204, | |
| "grad_norm": 1.0180484056472778, | |
| "learning_rate": 9.991205903623666e-06, | |
| "loss": 0.0494, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.7780889044452222, | |
| "grad_norm": 45.445831298828125, | |
| "learning_rate": 9.990213294428141e-06, | |
| "loss": 0.228, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.792089604480224, | |
| "grad_norm": 50.22725296020508, | |
| "learning_rate": 9.9891676756348e-06, | |
| "loss": 0.1444, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.8060903045152257, | |
| "grad_norm": 78.8548583984375, | |
| "learning_rate": 9.988069058350945e-06, | |
| "loss": 0.2021, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.8200910045502274, | |
| "grad_norm": 58.68122482299805, | |
| "learning_rate": 9.986917454246872e-06, | |
| "loss": 0.2497, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.8340917045852292, | |
| "grad_norm": 0.021190036088228226, | |
| "learning_rate": 9.985712875555743e-06, | |
| "loss": 0.1506, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.848092404620231, | |
| "grad_norm": 63.01392364501953, | |
| "learning_rate": 9.984455335073445e-06, | |
| "loss": 0.1217, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.8620931046552327, | |
| "grad_norm": 19.447858810424805, | |
| "learning_rate": 9.983144846158472e-06, | |
| "loss": 0.2588, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.8760938046902345, | |
| "grad_norm": 15.400856018066406, | |
| "learning_rate": 9.981781422731772e-06, | |
| "loss": 0.1024, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.8900945047252362, | |
| "grad_norm": 1.073379397392273, | |
| "learning_rate": 9.980365079276593e-06, | |
| "loss": 0.1605, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.904095204760238, | |
| "grad_norm": 31.710058212280273, | |
| "learning_rate": 9.978895830838348e-06, | |
| "loss": 0.0748, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.9180959047952397, | |
| "grad_norm": 0.05758415535092354, | |
| "learning_rate": 9.977373693024437e-06, | |
| "loss": 0.1769, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.9320966048302415, | |
| "grad_norm": 13.29843521118164, | |
| "learning_rate": 9.975798682004095e-06, | |
| "loss": 0.0806, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.9460973048652432, | |
| "grad_norm": 0.0567200742661953, | |
| "learning_rate": 9.974170814508207e-06, | |
| "loss": 0.1365, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.960098004900245, | |
| "grad_norm": 28.02842140197754, | |
| "learning_rate": 9.972490107829142e-06, | |
| "loss": 0.2761, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.9740987049352468, | |
| "grad_norm": 2.56071400642395, | |
| "learning_rate": 9.970756579820562e-06, | |
| "loss": 0.1046, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.9880994049702485, | |
| "grad_norm": 0.15937501192092896, | |
| "learning_rate": 9.968970248897233e-06, | |
| "loss": 0.1141, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.99929996499825, | |
| "eval_f1": 0.9405169889195438, | |
| "eval_loss": 0.16149015724658966, | |
| "eval_precision": 0.9404955962490559, | |
| "eval_recall": 0.940517844646606, | |
| "eval_runtime": 85.7404, | |
| "eval_samples_per_second": 16.667, | |
| "eval_steps_per_second": 8.339, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 2.0021001050052503, | |
| "grad_norm": 0.22882509231567383, | |
| "learning_rate": 9.96713113403484e-06, | |
| "loss": 0.0945, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.016100805040252, | |
| "grad_norm": 9.80128288269043, | |
| "learning_rate": 9.965239254769764e-06, | |
| "loss": 0.0908, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.0301015050752538, | |
| "grad_norm": 4.904554843902588, | |
| "learning_rate": 9.963294631198897e-06, | |
| "loss": 0.0313, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.0441022051102555, | |
| "grad_norm": 30.783597946166992, | |
| "learning_rate": 9.961297283979413e-06, | |
| "loss": 0.1126, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.0581029051452573, | |
| "grad_norm": 17.089374542236328, | |
| "learning_rate": 9.959247234328556e-06, | |
| "loss": 0.1442, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.072103605180259, | |
| "grad_norm": 0.05348501354455948, | |
| "learning_rate": 9.957144504023413e-06, | |
| "loss": 0.0121, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.086104305215261, | |
| "grad_norm": 2.584078550338745, | |
| "learning_rate": 9.954989115400684e-06, | |
| "loss": 0.1215, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.1001050052502626, | |
| "grad_norm": 0.12374909967184067, | |
| "learning_rate": 9.952781091356439e-06, | |
| "loss": 0.0125, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.1141057052852643, | |
| "grad_norm": 73.67182922363281, | |
| "learning_rate": 9.95052045534588e-06, | |
| "loss": 0.2536, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.128106405320266, | |
| "grad_norm": 27.475414276123047, | |
| "learning_rate": 9.948207231383094e-06, | |
| "loss": 0.3477, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.142107105355268, | |
| "grad_norm": 0.2636147141456604, | |
| "learning_rate": 9.945841444040793e-06, | |
| "loss": 0.041, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.1561078053902696, | |
| "grad_norm": 35.20915985107422, | |
| "learning_rate": 9.943423118450051e-06, | |
| "loss": 0.0535, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.1701085054252713, | |
| "grad_norm": 0.16824166476726532, | |
| "learning_rate": 9.940952280300049e-06, | |
| "loss": 0.0332, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.184109205460273, | |
| "grad_norm": 0.11545979231595993, | |
| "learning_rate": 9.938428955837785e-06, | |
| "loss": 0.0857, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.198109905495275, | |
| "grad_norm": 0.8021575808525085, | |
| "learning_rate": 9.935853171867807e-06, | |
| "loss": 0.0974, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.2121106055302766, | |
| "grad_norm": 2.349532127380371, | |
| "learning_rate": 9.933224955751929e-06, | |
| "loss": 0.0452, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.2261113055652784, | |
| "grad_norm": 0.620644211769104, | |
| "learning_rate": 9.930544335408931e-06, | |
| "loss": 0.0929, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.24011200560028, | |
| "grad_norm": 0.02595665119588375, | |
| "learning_rate": 9.927811339314272e-06, | |
| "loss": 0.1078, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.254112705635282, | |
| "grad_norm": 31.513547897338867, | |
| "learning_rate": 9.92502599649978e-06, | |
| "loss": 0.336, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.2681134056702836, | |
| "grad_norm": 8.418824195861816, | |
| "learning_rate": 9.922188336553351e-06, | |
| "loss": 0.1468, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.2821141057052854, | |
| "grad_norm": 0.19196270406246185, | |
| "learning_rate": 9.919298389618628e-06, | |
| "loss": 0.0518, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.296114805740287, | |
| "grad_norm": 86.77196502685547, | |
| "learning_rate": 9.916356186394684e-06, | |
| "loss": 0.0622, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.310115505775289, | |
| "grad_norm": 12.014430046081543, | |
| "learning_rate": 9.913361758135697e-06, | |
| "loss": 0.186, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.3241162058102907, | |
| "grad_norm": 4.975588321685791, | |
| "learning_rate": 9.910315136650614e-06, | |
| "loss": 0.1345, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.3381169058452924, | |
| "grad_norm": 0.15552039444446564, | |
| "learning_rate": 9.90721635430282e-06, | |
| "loss": 0.0215, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.352117605880294, | |
| "grad_norm": 24.961191177368164, | |
| "learning_rate": 9.904065444009785e-06, | |
| "loss": 0.1216, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.366118305915296, | |
| "grad_norm": 4.592883110046387, | |
| "learning_rate": 9.900862439242719e-06, | |
| "loss": 0.1366, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.3801190059502977, | |
| "grad_norm": 0.4605049192905426, | |
| "learning_rate": 9.897607374026223e-06, | |
| "loss": 0.0946, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.3941197059852994, | |
| "grad_norm": 1.254657506942749, | |
| "learning_rate": 9.894300282937915e-06, | |
| "loss": 0.042, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.408120406020301, | |
| "grad_norm": 0.14538034796714783, | |
| "learning_rate": 9.890941201108069e-06, | |
| "loss": 0.0559, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.422121106055303, | |
| "grad_norm": 3.5109119415283203, | |
| "learning_rate": 9.887530164219246e-06, | |
| "loss": 0.1131, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.4361218060903047, | |
| "grad_norm": 3.36604380607605, | |
| "learning_rate": 9.884067208505905e-06, | |
| "loss": 0.0274, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.4501225061253065, | |
| "grad_norm": 37.22677993774414, | |
| "learning_rate": 9.88055237075403e-06, | |
| "loss": 0.0846, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.464123206160308, | |
| "grad_norm": 4.4809651374816895, | |
| "learning_rate": 9.876985688300724e-06, | |
| "loss": 0.0349, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.47812390619531, | |
| "grad_norm": 85.08074951171875, | |
| "learning_rate": 9.873367199033834e-06, | |
| "loss": 0.1417, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.4921246062303117, | |
| "grad_norm": 16.589052200317383, | |
| "learning_rate": 9.869696941391525e-06, | |
| "loss": 0.1312, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.5061253062653135, | |
| "grad_norm": 15.452710151672363, | |
| "learning_rate": 9.865974954361885e-06, | |
| "loss": 0.0609, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.5201260063003152, | |
| "grad_norm": 0.2092599719762802, | |
| "learning_rate": 9.862201277482513e-06, | |
| "loss": 0.0776, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.534126706335317, | |
| "grad_norm": 0.01015313621610403, | |
| "learning_rate": 9.85837595084009e-06, | |
| "loss": 0.0674, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.5481274063703188, | |
| "grad_norm": 0.03218379244208336, | |
| "learning_rate": 9.854499015069955e-06, | |
| "loss": 0.037, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.5621281064053205, | |
| "grad_norm": 13.874772071838379, | |
| "learning_rate": 9.850570511355686e-06, | |
| "loss": 0.0864, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.5761288064403223, | |
| "grad_norm": 0.4755847752094269, | |
| "learning_rate": 9.846590481428639e-06, | |
| "loss": 0.0984, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.590129506475324, | |
| "grad_norm": 0.3224013149738312, | |
| "learning_rate": 9.842558967567531e-06, | |
| "loss": 0.0818, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.604130206510326, | |
| "grad_norm": 0.0649426281452179, | |
| "learning_rate": 9.838476012597969e-06, | |
| "loss": 0.0538, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.6181309065453275, | |
| "grad_norm": 14.650222778320312, | |
| "learning_rate": 9.834341659892007e-06, | |
| "loss": 0.1298, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.632131606580329, | |
| "grad_norm": 0.07487118244171143, | |
| "learning_rate": 9.830155953367684e-06, | |
| "loss": 0.0178, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.6461323066153306, | |
| "grad_norm": 0.08998718857765198, | |
| "learning_rate": 9.82591893748855e-06, | |
| "loss": 0.1297, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.6601330066503324, | |
| "grad_norm": 1.421534538269043, | |
| "learning_rate": 9.821630657263211e-06, | |
| "loss": 0.1277, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.674133706685334, | |
| "grad_norm": 0.007493593730032444, | |
| "learning_rate": 9.817291158244825e-06, | |
| "loss": 0.0734, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.688134406720336, | |
| "grad_norm": 7.475185394287109, | |
| "learning_rate": 9.812900486530643e-06, | |
| "loss": 0.0623, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.7021351067553376, | |
| "grad_norm": 23.813947677612305, | |
| "learning_rate": 9.808458688761507e-06, | |
| "loss": 0.1739, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.7161358067903394, | |
| "grad_norm": 0.08445608615875244, | |
| "learning_rate": 9.80396581212135e-06, | |
| "loss": 0.0465, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.730136506825341, | |
| "grad_norm": 0.16602131724357605, | |
| "learning_rate": 9.799421904336709e-06, | |
| "loss": 0.1728, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.744137206860343, | |
| "grad_norm": 5.605527877807617, | |
| "learning_rate": 9.794827013676206e-06, | |
| "loss": 0.0888, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.7581379068953447, | |
| "grad_norm": 0.016257012262940407, | |
| "learning_rate": 9.790181188950043e-06, | |
| "loss": 0.021, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.7721386069303464, | |
| "grad_norm": 0.015474709682166576, | |
| "learning_rate": 9.785484479509472e-06, | |
| "loss": 0.1041, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.786139306965348, | |
| "grad_norm": 0.18694552779197693, | |
| "learning_rate": 9.780736935246292e-06, | |
| "loss": 0.2925, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.80014000700035, | |
| "grad_norm": 0.2058991640806198, | |
| "learning_rate": 9.775938606592287e-06, | |
| "loss": 0.09, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.8141407070353517, | |
| "grad_norm": 0.6892361640930176, | |
| "learning_rate": 9.771089544518729e-06, | |
| "loss": 0.0446, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.8281414070703534, | |
| "grad_norm": 0.04991542920470238, | |
| "learning_rate": 9.766189800535803e-06, | |
| "loss": 0.089, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.842142107105355, | |
| "grad_norm": 3.87736439704895, | |
| "learning_rate": 9.761239426692077e-06, | |
| "loss": 0.0798, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.856142807140357, | |
| "grad_norm": 0.036107420921325684, | |
| "learning_rate": 9.756238475573947e-06, | |
| "loss": 0.0641, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.8701435071753587, | |
| "grad_norm": 0.6231482625007629, | |
| "learning_rate": 9.751187000305076e-06, | |
| "loss": 0.071, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.8841442072103605, | |
| "grad_norm": 0.014469561167061329, | |
| "learning_rate": 9.746085054545829e-06, | |
| "loss": 0.0645, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.8981449072453622, | |
| "grad_norm": 0.029857950285077095, | |
| "learning_rate": 9.740932692492705e-06, | |
| "loss": 0.0221, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.912145607280364, | |
| "grad_norm": 0.03372916206717491, | |
| "learning_rate": 9.735729968877763e-06, | |
| "loss": 0.099, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.9261463073153657, | |
| "grad_norm": 40.68704605102539, | |
| "learning_rate": 9.730476938968037e-06, | |
| "loss": 0.0539, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.9401470073503675, | |
| "grad_norm": 0.22137188911437988, | |
| "learning_rate": 9.725173658564948e-06, | |
| "loss": 0.0521, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.9541477073853692, | |
| "grad_norm": 0.17224650084972382, | |
| "learning_rate": 9.719820184003716e-06, | |
| "loss": 0.0558, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.968148407420371, | |
| "grad_norm": 0.216918483376503, | |
| "learning_rate": 9.714416572152758e-06, | |
| "loss": 0.0414, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.9821491074553728, | |
| "grad_norm": 6.288044452667236, | |
| "learning_rate": 9.708962880413088e-06, | |
| "loss": 0.0984, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.9961498074903745, | |
| "grad_norm": 0.015979696065187454, | |
| "learning_rate": 9.703459166717701e-06, | |
| "loss": 0.0483, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.998949947497375, | |
| "eval_f1": 0.9439567208197156, | |
| "eval_loss": 0.16459082067012787, | |
| "eval_precision": 0.9424573482270827, | |
| "eval_recall": 0.9440167949615116, | |
| "eval_runtime": 85.5476, | |
| "eval_samples_per_second": 16.704, | |
| "eval_steps_per_second": 8.358, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 3.0101505075253763, | |
| "grad_norm": 0.030147364363074303, | |
| "learning_rate": 9.697905489530962e-06, | |
| "loss": 0.0517, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 3.024151207560378, | |
| "grad_norm": 0.12278366088867188, | |
| "learning_rate": 9.692301907847981e-06, | |
| "loss": 0.0017, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 3.03815190759538, | |
| "grad_norm": 1.394048810005188, | |
| "learning_rate": 9.686648481193994e-06, | |
| "loss": 0.0195, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 3.0521526076303815, | |
| "grad_norm": 0.3854627013206482, | |
| "learning_rate": 9.68094526962372e-06, | |
| "loss": 0.0932, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 3.0661533076653833, | |
| "grad_norm": 0.019702697172760963, | |
| "learning_rate": 9.675192333720735e-06, | |
| "loss": 0.1094, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 3.080154007700385, | |
| "grad_norm": 0.032759979367256165, | |
| "learning_rate": 9.669389734596819e-06, | |
| "loss": 0.1292, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.094154707735387, | |
| "grad_norm": 73.04602813720703, | |
| "learning_rate": 9.66353753389131e-06, | |
| "loss": 0.0889, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 3.1081554077703886, | |
| "grad_norm": 1.0851781368255615, | |
| "learning_rate": 9.65763579377045e-06, | |
| "loss": 0.0102, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 3.1221561078053903, | |
| "grad_norm": 0.02348620630800724, | |
| "learning_rate": 9.651684576926721e-06, | |
| "loss": 0.0526, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 3.136156807840392, | |
| "grad_norm": 0.0017571039497852325, | |
| "learning_rate": 9.645683946578189e-06, | |
| "loss": 0.0792, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 3.150157507875394, | |
| "grad_norm": 0.04782587289810181, | |
| "learning_rate": 9.639633966467817e-06, | |
| "loss": 0.0505, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 3.1641582079103956, | |
| "grad_norm": 0.854295551776886, | |
| "learning_rate": 9.633534700862804e-06, | |
| "loss": 0.011, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 3.1781589079453973, | |
| "grad_norm": 29.275421142578125, | |
| "learning_rate": 9.627386214553886e-06, | |
| "loss": 0.0132, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 3.192159607980399, | |
| "grad_norm": 7.652329444885254, | |
| "learning_rate": 9.621188572854668e-06, | |
| "loss": 0.0817, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 3.206160308015401, | |
| "grad_norm": 0.05205431953072548, | |
| "learning_rate": 9.614941841600905e-06, | |
| "loss": 0.064, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 3.2201610080504026, | |
| "grad_norm": 0.006069015711545944, | |
| "learning_rate": 9.608646087149826e-06, | |
| "loss": 0.1078, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.2341617080854044, | |
| "grad_norm": 0.04892052710056305, | |
| "learning_rate": 9.60230137637942e-06, | |
| "loss": 0.0643, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 3.248162408120406, | |
| "grad_norm": 7.19106388092041, | |
| "learning_rate": 9.595907776687715e-06, | |
| "loss": 0.0014, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 3.262163108155408, | |
| "grad_norm": 10.295150756835938, | |
| "learning_rate": 9.58946535599208e-06, | |
| "loss": 0.0847, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 3.2761638081904096, | |
| "grad_norm": 111.84056854248047, | |
| "learning_rate": 9.582974182728497e-06, | |
| "loss": 0.1459, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 3.2901645082254114, | |
| "grad_norm": 4.704950332641602, | |
| "learning_rate": 9.576434325850824e-06, | |
| "loss": 0.0405, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 3.304165208260413, | |
| "grad_norm": 0.4798479676246643, | |
| "learning_rate": 9.56984585483008e-06, | |
| "loss": 0.0535, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 3.318165908295415, | |
| "grad_norm": 0.06830032169818878, | |
| "learning_rate": 9.56320883965369e-06, | |
| "loss": 0.1739, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 3.3321666083304167, | |
| "grad_norm": 1.7860515117645264, | |
| "learning_rate": 9.556523350824759e-06, | |
| "loss": 0.0327, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 3.3461673083654184, | |
| "grad_norm": 0.03282421454787254, | |
| "learning_rate": 9.549789459361303e-06, | |
| "loss": 0.0985, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 3.36016800840042, | |
| "grad_norm": 0.0977889746427536, | |
| "learning_rate": 9.543007236795513e-06, | |
| "loss": 0.0634, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.374168708435422, | |
| "grad_norm": 0.7682515382766724, | |
| "learning_rate": 9.536176755172988e-06, | |
| "loss": 0.1193, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 3.3881694084704237, | |
| "grad_norm": 0.010290628299117088, | |
| "learning_rate": 9.52929808705196e-06, | |
| "loss": 0.0047, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 3.4021701085054254, | |
| "grad_norm": 0.04401390254497528, | |
| "learning_rate": 9.522371305502542e-06, | |
| "loss": 0.0266, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 3.416170808540427, | |
| "grad_norm": 18.24448013305664, | |
| "learning_rate": 9.515396484105938e-06, | |
| "loss": 0.2168, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 3.430171508575429, | |
| "grad_norm": 3.006004571914673, | |
| "learning_rate": 9.508373696953664e-06, | |
| "loss": 0.0812, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 3.4441722086104303, | |
| "grad_norm": 0.4662695527076721, | |
| "learning_rate": 9.501303018646766e-06, | |
| "loss": 0.001, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 3.458172908645432, | |
| "grad_norm": 0.026914849877357483, | |
| "learning_rate": 9.494184524295023e-06, | |
| "loss": 0.0834, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 3.472173608680434, | |
| "grad_norm": 31.83331298828125, | |
| "learning_rate": 9.487018289516146e-06, | |
| "loss": 0.0091, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 3.4861743087154355, | |
| "grad_norm": 0.003149577882140875, | |
| "learning_rate": 9.479804390434983e-06, | |
| "loss": 0.0109, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 3.5001750087504373, | |
| "grad_norm": 24.41321563720703, | |
| "learning_rate": 9.472542903682708e-06, | |
| "loss": 0.0085, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.514175708785439, | |
| "grad_norm": 0.015754742547869682, | |
| "learning_rate": 9.465233906395998e-06, | |
| "loss": 0.0667, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 3.528176408820441, | |
| "grad_norm": 0.012419297359883785, | |
| "learning_rate": 9.457877476216228e-06, | |
| "loss": 0.0599, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 3.5421771088554426, | |
| "grad_norm": 65.708984375, | |
| "learning_rate": 9.450473691288637e-06, | |
| "loss": 0.0873, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 3.5561778088904443, | |
| "grad_norm": 0.09007327258586884, | |
| "learning_rate": 9.443022630261495e-06, | |
| "loss": 0.0203, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 3.570178508925446, | |
| "grad_norm": 1.3261775970458984, | |
| "learning_rate": 9.435524372285279e-06, | |
| "loss": 0.0507, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 3.584179208960448, | |
| "grad_norm": 0.07640054076910019, | |
| "learning_rate": 9.42797899701182e-06, | |
| "loss": 0.0875, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 3.5981799089954496, | |
| "grad_norm": 0.007880746386945248, | |
| "learning_rate": 9.420386584593469e-06, | |
| "loss": 0.0329, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 3.6121806090304514, | |
| "grad_norm": 0.0786275640130043, | |
| "learning_rate": 9.412747215682231e-06, | |
| "loss": 0.089, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 3.626181309065453, | |
| "grad_norm": 0.0043255346827209, | |
| "learning_rate": 9.405060971428924e-06, | |
| "loss": 0.0878, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 3.640182009100455, | |
| "grad_norm": 0.3558666408061981, | |
| "learning_rate": 9.397327933482303e-06, | |
| "loss": 0.0578, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.6541827091354566, | |
| "grad_norm": 13.108118057250977, | |
| "learning_rate": 9.389548183988204e-06, | |
| "loss": 0.0592, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 3.6681834091704584, | |
| "grad_norm": 0.004170840140432119, | |
| "learning_rate": 9.381721805588663e-06, | |
| "loss": 0.0787, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 3.68218410920546, | |
| "grad_norm": 3.8345985412597656, | |
| "learning_rate": 9.373848881421045e-06, | |
| "loss": 0.0259, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 3.696184809240462, | |
| "grad_norm": 0.04699714854359627, | |
| "learning_rate": 9.36592949511715e-06, | |
| "loss": 0.0474, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 3.7101855092754636, | |
| "grad_norm": 0.024606017395853996, | |
| "learning_rate": 9.35796373080234e-06, | |
| "loss": 0.0043, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 3.7241862093104654, | |
| "grad_norm": 0.39088472723960876, | |
| "learning_rate": 9.349951673094633e-06, | |
| "loss": 0.0471, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 3.738186909345467, | |
| "grad_norm": 0.029532933607697487, | |
| "learning_rate": 9.341893407103808e-06, | |
| "loss": 0.061, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 3.752187609380469, | |
| "grad_norm": 6.01865816116333, | |
| "learning_rate": 9.333789018430505e-06, | |
| "loss": 0.038, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 3.7661883094154707, | |
| "grad_norm": 0.9192459583282471, | |
| "learning_rate": 9.325638593165308e-06, | |
| "loss": 0.0417, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 3.7801890094504724, | |
| "grad_norm": 0.4111453890800476, | |
| "learning_rate": 9.317442217887835e-06, | |
| "loss": 0.0358, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 3.794189709485474, | |
| "grad_norm": 0.17047695815563202, | |
| "learning_rate": 9.309199979665821e-06, | |
| "loss": 0.0528, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 3.808190409520476, | |
| "grad_norm": 0.09648007899522781, | |
| "learning_rate": 9.300911966054184e-06, | |
| "loss": 0.0862, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 3.8221911095554777, | |
| "grad_norm": 61.79411315917969, | |
| "learning_rate": 9.292578265094109e-06, | |
| "loss": 0.0682, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 3.8361918095904795, | |
| "grad_norm": 0.0917210653424263, | |
| "learning_rate": 9.284198965312096e-06, | |
| "loss": 0.0035, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 3.850192509625481, | |
| "grad_norm": 4.736217498779297, | |
| "learning_rate": 9.275774155719032e-06, | |
| "loss": 0.0184, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 3.864193209660483, | |
| "grad_norm": 0.051652390509843826, | |
| "learning_rate": 9.267303925809246e-06, | |
| "loss": 0.0562, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 3.8781939096954847, | |
| "grad_norm": 4.532809257507324, | |
| "learning_rate": 9.258788365559543e-06, | |
| "loss": 0.0089, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 3.8921946097304865, | |
| "grad_norm": 0.05398925766348839, | |
| "learning_rate": 9.25022756542827e-06, | |
| "loss": 0.0032, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 3.9061953097654882, | |
| "grad_norm": 49.33939743041992, | |
| "learning_rate": 9.24162161635434e-06, | |
| "loss": 0.1271, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 3.92019600980049, | |
| "grad_norm": 0.09320775419473648, | |
| "learning_rate": 9.232970609756267e-06, | |
| "loss": 0.1672, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.9341967098354917, | |
| "grad_norm": 109.32254791259766, | |
| "learning_rate": 9.224274637531204e-06, | |
| "loss": 0.1793, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 3.9481974098704935, | |
| "grad_norm": 0.11961990594863892, | |
| "learning_rate": 9.215533792053957e-06, | |
| "loss": 0.0838, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 3.9621981099054953, | |
| "grad_norm": 2.1533560752868652, | |
| "learning_rate": 9.20674816617601e-06, | |
| "loss": 0.0522, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 3.976198809940497, | |
| "grad_norm": 0.02157438173890114, | |
| "learning_rate": 9.197917853224531e-06, | |
| "loss": 0.0747, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 3.9901995099754988, | |
| "grad_norm": 0.005027266219258308, | |
| "learning_rate": 9.189042947001395e-06, | |
| "loss": 0.056, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_f1": 0.9502738893213575, | |
| "eval_loss": 0.1555524319410324, | |
| "eval_precision": 0.9492496335924279, | |
| "eval_recall": 0.9503149055283415, | |
| "eval_runtime": 85.5789, | |
| "eval_samples_per_second": 16.698, | |
| "eval_steps_per_second": 8.355, | |
| "step": 2857 | |
| }, | |
| { | |
| "epoch": 4.0042002100105005, | |
| "grad_norm": 2.6012561321258545, | |
| "learning_rate": 9.180123541782172e-06, | |
| "loss": 0.0626, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 4.018200910045502, | |
| "grad_norm": 0.03805302456021309, | |
| "learning_rate": 9.171159732315129e-06, | |
| "loss": 0.0306, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 4.032201610080504, | |
| "grad_norm": 0.02099209651350975, | |
| "learning_rate": 9.162151613820236e-06, | |
| "loss": 0.005, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 4.046202310115506, | |
| "grad_norm": 6.755284786224365, | |
| "learning_rate": 9.153099281988138e-06, | |
| "loss": 0.0276, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 4.0602030101505076, | |
| "grad_norm": 0.10712507367134094, | |
| "learning_rate": 9.144002832979149e-06, | |
| "loss": 0.0385, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 4.074203710185509, | |
| "grad_norm": 0.017589423805475235, | |
| "learning_rate": 9.134862363422223e-06, | |
| "loss": 0.0196, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 4.088204410220511, | |
| "grad_norm": 1.701547384262085, | |
| "learning_rate": 9.125677970413935e-06, | |
| "loss": 0.0195, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 4.102205110255513, | |
| "grad_norm": 0.546120285987854, | |
| "learning_rate": 9.116449751517448e-06, | |
| "loss": 0.029, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 4.116205810290515, | |
| "grad_norm": 0.008883952163159847, | |
| "learning_rate": 9.107177804761468e-06, | |
| "loss": 0.0012, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 4.130206510325516, | |
| "grad_norm": 0.0017636914271861315, | |
| "learning_rate": 9.097862228639216e-06, | |
| "loss": 0.0006, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 4.144207210360518, | |
| "grad_norm": 0.03372401371598244, | |
| "learning_rate": 9.088503122107371e-06, | |
| "loss": 0.0002, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 4.15820791039552, | |
| "grad_norm": 0.03106401488184929, | |
| "learning_rate": 9.079100584585027e-06, | |
| "loss": 0.0606, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 4.172208610430522, | |
| "grad_norm": 6.889786720275879, | |
| "learning_rate": 9.06965471595263e-06, | |
| "loss": 0.0372, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 4.186209310465523, | |
| "grad_norm": 0.10271060466766357, | |
| "learning_rate": 9.060165616550918e-06, | |
| "loss": 0.0517, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 4.200210010500525, | |
| "grad_norm": 0.1456887125968933, | |
| "learning_rate": 9.050633387179861e-06, | |
| "loss": 0.0289, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.214210710535527, | |
| "grad_norm": 1.0142897367477417, | |
| "learning_rate": 9.041058129097586e-06, | |
| "loss": 0.0866, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 4.228211410570529, | |
| "grad_norm": 0.005523073021322489, | |
| "learning_rate": 9.031439944019302e-06, | |
| "loss": 0.1116, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 4.24221211060553, | |
| "grad_norm": 36.47334289550781, | |
| "learning_rate": 9.021778934116212e-06, | |
| "loss": 0.0188, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 4.256212810640532, | |
| "grad_norm": 0.0005946714081801474, | |
| "learning_rate": 9.012075202014444e-06, | |
| "loss": 0.0291, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 4.270213510675534, | |
| "grad_norm": 0.0017927681328728795, | |
| "learning_rate": 9.002328850793946e-06, | |
| "loss": 0.0002, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 4.284214210710536, | |
| "grad_norm": 0.015693560242652893, | |
| "learning_rate": 8.992539983987401e-06, | |
| "loss": 0.0528, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 4.298214910745537, | |
| "grad_norm": 0.028800832107663155, | |
| "learning_rate": 8.982708705579119e-06, | |
| "loss": 0.0598, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 4.312215610780539, | |
| "grad_norm": 0.012344347313046455, | |
| "learning_rate": 8.972835120003936e-06, | |
| "loss": 0.0687, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 4.326216310815541, | |
| "grad_norm": 0.6642976403236389, | |
| "learning_rate": 8.962919332146107e-06, | |
| "loss": 0.0349, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 4.340217010850543, | |
| "grad_norm": 0.03467980772256851, | |
| "learning_rate": 8.952961447338192e-06, | |
| "loss": 0.0012, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 4.354217710885544, | |
| "grad_norm": 0.023241745308041573, | |
| "learning_rate": 8.942961571359927e-06, | |
| "loss": 0.0905, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 4.368218410920546, | |
| "grad_norm": 65.47942352294922, | |
| "learning_rate": 8.932919810437117e-06, | |
| "loss": 0.1127, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 4.382219110955548, | |
| "grad_norm": 0.06475795060396194, | |
| "learning_rate": 8.92283627124049e-06, | |
| "loss": 0.0241, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 4.39621981099055, | |
| "grad_norm": 0.0579422190785408, | |
| "learning_rate": 8.912711060884585e-06, | |
| "loss": 0.0002, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 4.4102205110255515, | |
| "grad_norm": 0.030872151255607605, | |
| "learning_rate": 8.902544286926585e-06, | |
| "loss": 0.0002, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 4.424221211060553, | |
| "grad_norm": 0.020232750102877617, | |
| "learning_rate": 8.892336057365204e-06, | |
| "loss": 0.0917, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 4.438221911095555, | |
| "grad_norm": 0.010859241709113121, | |
| "learning_rate": 8.882086480639526e-06, | |
| "loss": 0.1093, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 4.452222611130557, | |
| "grad_norm": 0.030299527570605278, | |
| "learning_rate": 8.871795665627845e-06, | |
| "loss": 0.0634, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 4.4662233111655585, | |
| "grad_norm": 1.2195743322372437, | |
| "learning_rate": 8.861463721646528e-06, | |
| "loss": 0.0005, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 4.48022401120056, | |
| "grad_norm": 0.06468135863542557, | |
| "learning_rate": 8.851090758448836e-06, | |
| "loss": 0.0717, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 4.494224711235562, | |
| "grad_norm": 0.1307636797428131, | |
| "learning_rate": 8.840676886223768e-06, | |
| "loss": 0.0716, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 4.508225411270564, | |
| "grad_norm": 0.06059357896447182, | |
| "learning_rate": 8.83022221559489e-06, | |
| "loss": 0.0375, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 4.5222261113055655, | |
| "grad_norm": 0.013207100331783295, | |
| "learning_rate": 8.819726857619156e-06, | |
| "loss": 0.0492, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 4.536226811340567, | |
| "grad_norm": 0.020494362339377403, | |
| "learning_rate": 8.809190923785724e-06, | |
| "loss": 0.0175, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 4.550227511375569, | |
| "grad_norm": 0.04863560572266579, | |
| "learning_rate": 8.798614526014786e-06, | |
| "loss": 0.1044, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 4.564228211410571, | |
| "grad_norm": 0.000641553255263716, | |
| "learning_rate": 8.78799777665637e-06, | |
| "loss": 0.0706, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 4.5782289114455725, | |
| "grad_norm": 0.1729791760444641, | |
| "learning_rate": 8.777340788489145e-06, | |
| "loss": 0.0544, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 4.592229611480574, | |
| "grad_norm": 0.32355839014053345, | |
| "learning_rate": 8.76664367471922e-06, | |
| "loss": 0.0785, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 4.606230311515576, | |
| "grad_norm": 1.423351526260376, | |
| "learning_rate": 8.755906548978957e-06, | |
| "loss": 0.0126, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 4.620231011550578, | |
| "grad_norm": 15.19318962097168, | |
| "learning_rate": 8.745129525325746e-06, | |
| "loss": 0.0809, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 4.63423171158558, | |
| "grad_norm": 24.36188316345215, | |
| "learning_rate": 8.734312718240807e-06, | |
| "loss": 0.0907, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 4.648232411620581, | |
| "grad_norm": 0.013152498751878738, | |
| "learning_rate": 8.723456242627961e-06, | |
| "loss": 0.0869, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 4.662233111655583, | |
| "grad_norm": 0.017324671149253845, | |
| "learning_rate": 8.712560213812421e-06, | |
| "loss": 0.0513, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 4.676233811690585, | |
| "grad_norm": 0.044183555990457535, | |
| "learning_rate": 8.701624747539563e-06, | |
| "loss": 0.0209, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 4.690234511725587, | |
| "grad_norm": 0.001150390598922968, | |
| "learning_rate": 8.690649959973693e-06, | |
| "loss": 0.0266, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 4.704235211760588, | |
| "grad_norm": 0.005500374361872673, | |
| "learning_rate": 8.679635967696815e-06, | |
| "loss": 0.0332, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 4.71823591179559, | |
| "grad_norm": 0.07724996656179428, | |
| "learning_rate": 8.6685828877074e-06, | |
| "loss": 0.0403, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 4.732236611830592, | |
| "grad_norm": 0.20006419718265533, | |
| "learning_rate": 8.657490837419124e-06, | |
| "loss": 0.0808, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 4.746237311865594, | |
| "grad_norm": 1.35878324508667, | |
| "learning_rate": 8.646359934659648e-06, | |
| "loss": 0.0544, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 4.760238011900595, | |
| "grad_norm": 4.547935962677002, | |
| "learning_rate": 8.63519029766934e-06, | |
| "loss": 0.0756, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 4.774238711935597, | |
| "grad_norm": 0.7724612951278687, | |
| "learning_rate": 8.623982045100037e-06, | |
| "loss": 0.0283, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 4.788239411970599, | |
| "grad_norm": 0.037291232496500015, | |
| "learning_rate": 8.612735296013777e-06, | |
| "loss": 0.0903, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 4.802240112005601, | |
| "grad_norm": 4.7362565994262695, | |
| "learning_rate": 8.601450169881533e-06, | |
| "loss": 0.0344, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 4.816240812040602, | |
| "grad_norm": 3.896909713745117, | |
| "learning_rate": 8.590126786581948e-06, | |
| "loss": 0.0117, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 4.830241512075604, | |
| "grad_norm": 0.0022337951231747866, | |
| "learning_rate": 8.57876526640006e-06, | |
| "loss": 0.1031, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 4.844242212110606, | |
| "grad_norm": 11.048283576965332, | |
| "learning_rate": 8.567365730026025e-06, | |
| "loss": 0.0687, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 4.858242912145608, | |
| "grad_norm": 0.5430986285209656, | |
| "learning_rate": 8.55592829855383e-06, | |
| "loss": 0.0284, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 4.872243612180609, | |
| "grad_norm": 10.159993171691895, | |
| "learning_rate": 8.544453093480017e-06, | |
| "loss": 0.0261, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 4.886244312215611, | |
| "grad_norm": 0.057509321719408035, | |
| "learning_rate": 8.53294023670238e-06, | |
| "loss": 0.0031, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 4.900245012250613, | |
| "grad_norm": 28.648855209350586, | |
| "learning_rate": 8.521389850518682e-06, | |
| "loss": 0.0544, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.914245712285615, | |
| "grad_norm": 3.066316604614258, | |
| "learning_rate": 8.509802057625345e-06, | |
| "loss": 0.0143, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 4.928246412320616, | |
| "grad_norm": 0.005794500466436148, | |
| "learning_rate": 8.498176981116152e-06, | |
| "loss": 0.0044, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 4.942247112355618, | |
| "grad_norm": 33.897499084472656, | |
| "learning_rate": 8.486514744480946e-06, | |
| "loss": 0.103, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 4.95624781239062, | |
| "grad_norm": 18.55446434020996, | |
| "learning_rate": 8.474815471604303e-06, | |
| "loss": 0.0672, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 4.970248512425622, | |
| "grad_norm": 0.018390638753771782, | |
| "learning_rate": 8.463079286764224e-06, | |
| "loss": 0.0698, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 4.9842492124606235, | |
| "grad_norm": 14.878451347351074, | |
| "learning_rate": 8.451306314630825e-06, | |
| "loss": 0.0149, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 4.998249912495625, | |
| "grad_norm": 0.10592308640480042, | |
| "learning_rate": 8.439496680264993e-06, | |
| "loss": 0.0257, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 4.999649982499125, | |
| "eval_f1": 0.9530601608778744, | |
| "eval_loss": 0.1417933851480484, | |
| "eval_precision": 0.9517145171612903, | |
| "eval_recall": 0.9531140657802659, | |
| "eval_runtime": 85.6287, | |
| "eval_samples_per_second": 16.688, | |
| "eval_steps_per_second": 8.35, | |
| "step": 3571 | |
| }, | |
| { | |
| "epoch": 5.012250612530626, | |
| "grad_norm": 0.3304142653942108, | |
| "learning_rate": 8.42765050911707e-06, | |
| "loss": 0.002, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 5.026251312565628, | |
| "grad_norm": 0.0006598730105906725, | |
| "learning_rate": 8.41576792702552e-06, | |
| "loss": 0.0491, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 5.04025201260063, | |
| "grad_norm": 5.164649486541748, | |
| "learning_rate": 8.403849060215587e-06, | |
| "loss": 0.0065, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 5.054252712635631, | |
| "grad_norm": 0.1107124462723732, | |
| "learning_rate": 8.391894035297962e-06, | |
| "loss": 0.0118, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 5.068253412670633, | |
| "grad_norm": 0.0056930468417704105, | |
| "learning_rate": 8.379902979267424e-06, | |
| "loss": 0.0151, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 5.082254112705635, | |
| "grad_norm": 0.004092092160135508, | |
| "learning_rate": 8.367876019501512e-06, | |
| "loss": 0.0005, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 5.096254812740637, | |
| "grad_norm": 0.008662878535687923, | |
| "learning_rate": 8.35581328375915e-06, | |
| "loss": 0.001, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 5.110255512775638, | |
| "grad_norm": 0.002947417553514242, | |
| "learning_rate": 8.343714900179304e-06, | |
| "loss": 0.0244, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 5.12425621281064, | |
| "grad_norm": 32.711063385009766, | |
| "learning_rate": 8.331580997279616e-06, | |
| "loss": 0.0394, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 5.138256912845642, | |
| "grad_norm": 21.720970153808594, | |
| "learning_rate": 8.319411703955042e-06, | |
| "loss": 0.0487, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 5.152257612880644, | |
| "grad_norm": 6.737855434417725, | |
| "learning_rate": 8.307207149476478e-06, | |
| "loss": 0.0527, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 5.166258312915645, | |
| "grad_norm": 0.01580864191055298, | |
| "learning_rate": 8.294967463489387e-06, | |
| "loss": 0.029, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 5.180259012950647, | |
| "grad_norm": 0.054978061467409134, | |
| "learning_rate": 8.282692776012429e-06, | |
| "loss": 0.0261, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 5.194259712985649, | |
| "grad_norm": 0.001470154500566423, | |
| "learning_rate": 8.27038321743607e-06, | |
| "loss": 0.0204, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 5.208260413020651, | |
| "grad_norm": 0.005747316405177116, | |
| "learning_rate": 8.258038918521203e-06, | |
| "loss": 0.0466, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 5.222261113055652, | |
| "grad_norm": 0.10529675334692001, | |
| "learning_rate": 8.24566001039776e-06, | |
| "loss": 0.0019, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 5.236261813090654, | |
| "grad_norm": 3.1363236904144287, | |
| "learning_rate": 8.233246624563315e-06, | |
| "loss": 0.0235, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 5.250262513125656, | |
| "grad_norm": 0.280477911233902, | |
| "learning_rate": 8.220798892881686e-06, | |
| "loss": 0.054, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 5.264263213160658, | |
| "grad_norm": 35.494441986083984, | |
| "learning_rate": 8.208316947581543e-06, | |
| "loss": 0.0547, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 5.2782639131956595, | |
| "grad_norm": 2.8686635494232178, | |
| "learning_rate": 8.19580092125499e-06, | |
| "loss": 0.0051, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 5.292264613230661, | |
| "grad_norm": 0.020721808075904846, | |
| "learning_rate": 8.183250946856173e-06, | |
| "loss": 0.0408, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 5.306265313265663, | |
| "grad_norm": 9.166502952575684, | |
| "learning_rate": 8.17066715769985e-06, | |
| "loss": 0.0636, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 5.320266013300665, | |
| "grad_norm": 28.179536819458008, | |
| "learning_rate": 8.158049687459986e-06, | |
| "loss": 0.044, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 5.3342667133356665, | |
| "grad_norm": 0.19162550568580627, | |
| "learning_rate": 8.145398670168336e-06, | |
| "loss": 0.0283, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 5.348267413370668, | |
| "grad_norm": 0.9183430671691895, | |
| "learning_rate": 8.132714240213009e-06, | |
| "loss": 0.0002, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 5.36226811340567, | |
| "grad_norm": 0.1281927078962326, | |
| "learning_rate": 8.119996532337047e-06, | |
| "loss": 0.0229, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 5.376268813440672, | |
| "grad_norm": 0.011076876893639565, | |
| "learning_rate": 8.107245681636997e-06, | |
| "loss": 0.0252, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 5.3902695134756735, | |
| "grad_norm": 4.825412750244141, | |
| "learning_rate": 8.094461823561473e-06, | |
| "loss": 0.0624, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 5.404270213510675, | |
| "grad_norm": 0.024655018001794815, | |
| "learning_rate": 8.081645093909715e-06, | |
| "loss": 0.0536, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 5.418270913545677, | |
| "grad_norm": 0.002414155751466751, | |
| "learning_rate": 8.068795628830148e-06, | |
| "loss": 0.0163, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 5.432271613580679, | |
| "grad_norm": 0.4145294725894928, | |
| "learning_rate": 8.055913564818938e-06, | |
| "loss": 0.0282, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 5.4462723136156805, | |
| "grad_norm": 82.75165557861328, | |
| "learning_rate": 8.042999038718538e-06, | |
| "loss": 0.0065, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 5.460273013650682, | |
| "grad_norm": 0.1588965505361557, | |
| "learning_rate": 8.030052187716238e-06, | |
| "loss": 0.0546, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 5.474273713685684, | |
| "grad_norm": 0.04659806191921234, | |
| "learning_rate": 8.017073149342703e-06, | |
| "loss": 0.0289, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 5.488274413720686, | |
| "grad_norm": 0.0054337154142558575, | |
| "learning_rate": 8.004062061470519e-06, | |
| "loss": 0.0267, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 5.502275113755688, | |
| "grad_norm": 0.01362488605082035, | |
| "learning_rate": 7.991019062312723e-06, | |
| "loss": 0.0156, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 5.516275813790689, | |
| "grad_norm": 0.01387427095323801, | |
| "learning_rate": 7.97794429042134e-06, | |
| "loss": 0.0756, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 5.530276513825691, | |
| "grad_norm": 0.1007440835237503, | |
| "learning_rate": 7.9648378846859e-06, | |
| "loss": 0.0348, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 5.544277213860693, | |
| "grad_norm": 0.1274271011352539, | |
| "learning_rate": 7.951699984331973e-06, | |
| "loss": 0.0352, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 5.558277913895695, | |
| "grad_norm": 0.13830313086509705, | |
| "learning_rate": 7.93853072891969e-06, | |
| "loss": 0.1726, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 5.572278613930696, | |
| "grad_norm": 0.11423526704311371, | |
| "learning_rate": 7.925330258342261e-06, | |
| "loss": 0.0695, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 5.586279313965698, | |
| "grad_norm": 0.07872703671455383, | |
| "learning_rate": 7.912098712824474e-06, | |
| "loss": 0.0003, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 5.6002800140007, | |
| "grad_norm": 0.15258124470710754, | |
| "learning_rate": 7.89883623292123e-06, | |
| "loss": 0.0314, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.614280714035702, | |
| "grad_norm": 0.14406810700893402, | |
| "learning_rate": 7.885542959516027e-06, | |
| "loss": 0.0538, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 5.628281414070703, | |
| "grad_norm": 12.04726505279541, | |
| "learning_rate": 7.872219033819479e-06, | |
| "loss": 0.1207, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 5.642282114105705, | |
| "grad_norm": 0.028468133881688118, | |
| "learning_rate": 7.85886459736781e-06, | |
| "loss": 0.0025, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 5.656282814140707, | |
| "grad_norm": 0.06600043177604675, | |
| "learning_rate": 7.84547979202135e-06, | |
| "loss": 0.0302, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 5.670283514175709, | |
| "grad_norm": 6.3559160232543945, | |
| "learning_rate": 7.832064759963028e-06, | |
| "loss": 0.0434, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 5.68428421421071, | |
| "grad_norm": 64.54084014892578, | |
| "learning_rate": 7.818619643696863e-06, | |
| "loss": 0.0091, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 5.698284914245712, | |
| "grad_norm": 0.014282717369496822, | |
| "learning_rate": 7.805144586046454e-06, | |
| "loss": 0.0004, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 5.712285614280714, | |
| "grad_norm": 0.034424930810928345, | |
| "learning_rate": 7.791639730153453e-06, | |
| "loss": 0.0048, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 5.726286314315716, | |
| "grad_norm": 0.039621662348508835, | |
| "learning_rate": 7.778105219476053e-06, | |
| "loss": 0.1063, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 5.740287014350717, | |
| "grad_norm": 0.03727049380540848, | |
| "learning_rate": 7.764541197787462e-06, | |
| "loss": 0.0783, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 5.754287714385719, | |
| "grad_norm": 0.0016835929127410054, | |
| "learning_rate": 7.750947809174372e-06, | |
| "loss": 0.0315, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 5.768288414420721, | |
| "grad_norm": 0.0010269766207784414, | |
| "learning_rate": 7.737325198035435e-06, | |
| "loss": 0.0002, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 5.782289114455723, | |
| "grad_norm": 0.18126137554645538, | |
| "learning_rate": 7.723673509079718e-06, | |
| "loss": 0.0325, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 5.7962898144907244, | |
| "grad_norm": 7.939001560211182, | |
| "learning_rate": 7.709992887325187e-06, | |
| "loss": 0.0374, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 5.810290514525726, | |
| "grad_norm": 0.024497386068105698, | |
| "learning_rate": 7.69628347809714e-06, | |
| "loss": 0.0008, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 5.824291214560728, | |
| "grad_norm": 3.9139621257781982, | |
| "learning_rate": 7.68254542702668e-06, | |
| "loss": 0.0015, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 5.83829191459573, | |
| "grad_norm": 0.0018668597331270576, | |
| "learning_rate": 7.668778880049167e-06, | |
| "loss": 0.0245, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 5.8522926146307315, | |
| "grad_norm": 15.285141944885254, | |
| "learning_rate": 7.654983983402662e-06, | |
| "loss": 0.0633, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 5.866293314665733, | |
| "grad_norm": 0.036636680364608765, | |
| "learning_rate": 7.641160883626374e-06, | |
| "loss": 0.0015, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 5.880294014700735, | |
| "grad_norm": 0.025609202682971954, | |
| "learning_rate": 7.627309727559114e-06, | |
| "loss": 0.019, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 5.894294714735737, | |
| "grad_norm": 7.280523777008057, | |
| "learning_rate": 7.613430662337715e-06, | |
| "loss": 0.0796, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 5.9082954147707385, | |
| "grad_norm": 0.04838598519563675, | |
| "learning_rate": 7.599523835395493e-06, | |
| "loss": 0.0288, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 5.92229611480574, | |
| "grad_norm": 0.0003251319285482168, | |
| "learning_rate": 7.585589394460661e-06, | |
| "loss": 0.0007, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 5.936296814840742, | |
| "grad_norm": 0.007616756483912468, | |
| "learning_rate": 7.571627487554769e-06, | |
| "loss": 0.075, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 5.950297514875744, | |
| "grad_norm": 0.12283679097890854, | |
| "learning_rate": 7.5576382629911306e-06, | |
| "loss": 0.0505, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 5.9642982149107455, | |
| "grad_norm": 0.017637204378843307, | |
| "learning_rate": 7.543621869373249e-06, | |
| "loss": 0.0872, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 5.978298914945747, | |
| "grad_norm": 0.00048005758435465395, | |
| "learning_rate": 7.529578455593232e-06, | |
| "loss": 0.0234, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 5.992299614980749, | |
| "grad_norm": 0.5085453391075134, | |
| "learning_rate": 7.515508170830221e-06, | |
| "loss": 0.1398, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 5.99929996499825, | |
| "eval_f1": 0.9488872841236063, | |
| "eval_loss": 0.16063901782035828, | |
| "eval_precision": 0.948186790375482, | |
| "eval_recall": 0.9489153254023793, | |
| "eval_runtime": 85.1915, | |
| "eval_samples_per_second": 16.774, | |
| "eval_steps_per_second": 8.393, | |
| "step": 4285 | |
| }, | |
| { | |
| "epoch": 6.006300315015751, | |
| "grad_norm": 0.041102755814790726, | |
| "learning_rate": 7.501411164548792e-06, | |
| "loss": 0.0006, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 6.0203010150507525, | |
| "grad_norm": 5.220290660858154, | |
| "learning_rate": 7.487287586497384e-06, | |
| "loss": 0.001, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 6.034301715085754, | |
| "grad_norm": 10.654687881469727, | |
| "learning_rate": 7.473137586706693e-06, | |
| "loss": 0.0228, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 6.048302415120756, | |
| "grad_norm": 0.5803616046905518, | |
| "learning_rate": 7.458961315488095e-06, | |
| "loss": 0.0472, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 6.062303115155758, | |
| "grad_norm": 0.04721968621015549, | |
| "learning_rate": 7.444758923432028e-06, | |
| "loss": 0.0109, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 6.07630381519076, | |
| "grad_norm": 0.033405423164367676, | |
| "learning_rate": 7.4305305614064145e-06, | |
| "loss": 0.0006, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 6.090304515225761, | |
| "grad_norm": 0.0014901540707796812, | |
| "learning_rate": 7.416276380555041e-06, | |
| "loss": 0.0093, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 6.104305215260763, | |
| "grad_norm": 0.026062890887260437, | |
| "learning_rate": 7.401996532295965e-06, | |
| "loss": 0.0022, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 6.118305915295765, | |
| "grad_norm": 0.26311179995536804, | |
| "learning_rate": 7.3876911683198995e-06, | |
| "loss": 0.0345, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 6.132306615330767, | |
| "grad_norm": 0.013148613274097443, | |
| "learning_rate": 7.373360440588604e-06, | |
| "loss": 0.0011, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 6.146307315365768, | |
| "grad_norm": 1.30440354347229, | |
| "learning_rate": 7.359004501333267e-06, | |
| "loss": 0.0012, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 6.16030801540077, | |
| "grad_norm": 0.0015283781103789806, | |
| "learning_rate": 7.344623503052898e-06, | |
| "loss": 0.0096, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 6.174308715435772, | |
| "grad_norm": 2.4098916053771973, | |
| "learning_rate": 7.330217598512696e-06, | |
| "loss": 0.0698, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 6.188309415470774, | |
| "grad_norm": 0.06899107992649078, | |
| "learning_rate": 7.315786940742432e-06, | |
| "loss": 0.0917, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 6.202310115505775, | |
| "grad_norm": 22.4434757232666, | |
| "learning_rate": 7.301331683034827e-06, | |
| "loss": 0.0104, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 6.216310815540777, | |
| "grad_norm": 0.0014025595737621188, | |
| "learning_rate": 7.286851978943919e-06, | |
| "loss": 0.054, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 6.230311515575779, | |
| "grad_norm": 0.36593034863471985, | |
| "learning_rate": 7.2723479822834295e-06, | |
| "loss": 0.0003, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 6.244312215610781, | |
| "grad_norm": 5.852354049682617, | |
| "learning_rate": 7.257819847125136e-06, | |
| "loss": 0.0498, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 6.258312915645782, | |
| "grad_norm": 0.009837552905082703, | |
| "learning_rate": 7.243267727797235e-06, | |
| "loss": 0.0148, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 6.272313615680784, | |
| "grad_norm": 0.712465226650238, | |
| "learning_rate": 7.2286917788826926e-06, | |
| "loss": 0.0009, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 6.286314315715786, | |
| "grad_norm": 0.007876602001488209, | |
| "learning_rate": 7.214092155217614e-06, | |
| "loss": 0.0129, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 6.300315015750788, | |
| "grad_norm": 0.06980904191732407, | |
| "learning_rate": 7.199469011889598e-06, | |
| "loss": 0.0611, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 6.314315715785789, | |
| "grad_norm": 0.0010226389858871698, | |
| "learning_rate": 7.18482250423608e-06, | |
| "loss": 0.001, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 6.328316415820791, | |
| "grad_norm": 0.016090035438537598, | |
| "learning_rate": 7.170152787842689e-06, | |
| "loss": 0.0081, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 6.342317115855793, | |
| "grad_norm": 0.0010334812104701996, | |
| "learning_rate": 7.155460018541597e-06, | |
| "loss": 0.0258, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 6.356317815890795, | |
| "grad_norm": 0.0009966572979465127, | |
| "learning_rate": 7.140744352409856e-06, | |
| "loss": 0.0243, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 6.3703185159257965, | |
| "grad_norm": 27.122793197631836, | |
| "learning_rate": 7.12600594576775e-06, | |
| "loss": 0.0202, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 6.384319215960798, | |
| "grad_norm": 8.17154598236084, | |
| "learning_rate": 7.1112449551771225e-06, | |
| "loss": 0.0069, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 6.3983199159958, | |
| "grad_norm": 0.0030006226152181625, | |
| "learning_rate": 7.096461537439725e-06, | |
| "loss": 0.0005, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 6.412320616030802, | |
| "grad_norm": 28.31182289123535, | |
| "learning_rate": 7.0816558495955435e-06, | |
| "loss": 0.0367, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 6.4263213160658035, | |
| "grad_norm": 0.0193657074123621, | |
| "learning_rate": 7.066828048921133e-06, | |
| "loss": 0.0076, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 6.440322016100805, | |
| "grad_norm": 0.0019821927417069674, | |
| "learning_rate": 7.051978292927947e-06, | |
| "loss": 0.0011, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 6.454322716135807, | |
| "grad_norm": 0.003565513761714101, | |
| "learning_rate": 7.0371067393606665e-06, | |
| "loss": 0.0118, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 6.468323416170809, | |
| "grad_norm": 0.7763420939445496, | |
| "learning_rate": 7.022213546195516e-06, | |
| "loss": 0.0565, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 6.4823241162058105, | |
| "grad_norm": 0.005166476126760244, | |
| "learning_rate": 7.007298871638597e-06, | |
| "loss": 0.0081, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 6.496324816240812, | |
| "grad_norm": 0.0027893453370779753, | |
| "learning_rate": 6.9923628741242e-06, | |
| "loss": 0.0197, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 6.510325516275814, | |
| "grad_norm": 0.010828179307281971, | |
| "learning_rate": 6.97740571231312e-06, | |
| "loss": 0.0002, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 6.524326216310816, | |
| "grad_norm": 1.695597767829895, | |
| "learning_rate": 6.96242754509098e-06, | |
| "loss": 0.0102, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 6.5383269163458175, | |
| "grad_norm": 0.03126674145460129, | |
| "learning_rate": 6.947428531566531e-06, | |
| "loss": 0.0252, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 6.552327616380819, | |
| "grad_norm": 2.710430145263672, | |
| "learning_rate": 6.9324088310699745e-06, | |
| "loss": 0.0297, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 6.566328316415821, | |
| "grad_norm": 0.0005242613842710853, | |
| "learning_rate": 6.9173686031512595e-06, | |
| "loss": 0.0127, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 6.580329016450823, | |
| "grad_norm": 0.12270841002464294, | |
| "learning_rate": 6.902308007578392e-06, | |
| "loss": 0.0048, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 6.5943297164858246, | |
| "grad_norm": 0.5776433944702148, | |
| "learning_rate": 6.887227204335739e-06, | |
| "loss": 0.0012, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 6.608330416520826, | |
| "grad_norm": 0.004722943529486656, | |
| "learning_rate": 6.8721263536223295e-06, | |
| "loss": 0.0388, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 6.622331116555828, | |
| "grad_norm": 27.21036148071289, | |
| "learning_rate": 6.857005615850148e-06, | |
| "loss": 0.0208, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 6.63633181659083, | |
| "grad_norm": 0.00167378771584481, | |
| "learning_rate": 6.841865151642434e-06, | |
| "loss": 0.0056, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 6.650332516625832, | |
| "grad_norm": 54.31971740722656, | |
| "learning_rate": 6.8267051218319766e-06, | |
| "loss": 0.0165, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 6.664333216660833, | |
| "grad_norm": 0.0031642315443605185, | |
| "learning_rate": 6.8115256874594015e-06, | |
| "loss": 0.0066, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 6.678333916695835, | |
| "grad_norm": 0.0017562637804076076, | |
| "learning_rate": 6.7963270097714705e-06, | |
| "loss": 0.0004, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 6.692334616730837, | |
| "grad_norm": 0.0005346941761672497, | |
| "learning_rate": 6.781109250219353e-06, | |
| "loss": 0.0021, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 6.706335316765839, | |
| "grad_norm": 0.001608767081052065, | |
| "learning_rate": 6.765872570456926e-06, | |
| "loss": 0.0147, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 6.72033601680084, | |
| "grad_norm": 0.000481385737657547, | |
| "learning_rate": 6.750617132339045e-06, | |
| "loss": 0.0535, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 6.734336716835842, | |
| "grad_norm": 0.3291676938533783, | |
| "learning_rate": 6.735343097919838e-06, | |
| "loss": 0.092, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 6.748337416870844, | |
| "grad_norm": 18.21904754638672, | |
| "learning_rate": 6.720050629450963e-06, | |
| "loss": 0.0278, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 6.762338116905846, | |
| "grad_norm": 15.501587867736816, | |
| "learning_rate": 6.704739889379914e-06, | |
| "loss": 0.0919, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 6.776338816940847, | |
| "grad_norm": 0.0905318409204483, | |
| "learning_rate": 6.689411040348267e-06, | |
| "loss": 0.0201, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 6.790339516975849, | |
| "grad_norm": 0.034947771579027176, | |
| "learning_rate": 6.674064245189969e-06, | |
| "loss": 0.0616, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 6.804340217010851, | |
| "grad_norm": 0.007598557509481907, | |
| "learning_rate": 6.6586996669296014e-06, | |
| "loss": 0.0272, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 6.818340917045853, | |
| "grad_norm": 38.44027328491211, | |
| "learning_rate": 6.6433174687806525e-06, | |
| "loss": 0.0331, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 6.832341617080854, | |
| "grad_norm": 0.016838544979691505, | |
| "learning_rate": 6.62791781414378e-06, | |
| "loss": 0.0036, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 6.846342317115856, | |
| "grad_norm": 0.11778023093938828, | |
| "learning_rate": 6.612500866605078e-06, | |
| "loss": 0.059, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 6.860343017150858, | |
| "grad_norm": 0.002434186404570937, | |
| "learning_rate": 6.597066789934336e-06, | |
| "loss": 0.0687, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 6.87434371718586, | |
| "grad_norm": 1.2733291387557983, | |
| "learning_rate": 6.581615748083306e-06, | |
| "loss": 0.0171, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 6.8883444172208605, | |
| "grad_norm": 0.05777943879365921, | |
| "learning_rate": 6.56614790518395e-06, | |
| "loss": 0.0092, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 6.902345117255862, | |
| "grad_norm": 0.00835686456412077, | |
| "learning_rate": 6.5506634255467085e-06, | |
| "loss": 0.0003, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 6.916345817290864, | |
| "grad_norm": 0.5090736746788025, | |
| "learning_rate": 6.5351624736587446e-06, | |
| "loss": 0.0296, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 6.930346517325866, | |
| "grad_norm": 0.007250604685395956, | |
| "learning_rate": 6.5196452141822045e-06, | |
| "loss": 0.002, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 6.944347217360868, | |
| "grad_norm": 0.0013252092758193612, | |
| "learning_rate": 6.504111811952463e-06, | |
| "loss": 0.0005, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 6.958347917395869, | |
| "grad_norm": 9.083790064323694e-05, | |
| "learning_rate": 6.488562431976376e-06, | |
| "loss": 0.0491, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 6.972348617430871, | |
| "grad_norm": 0.534872829914093, | |
| "learning_rate": 6.472997239430529e-06, | |
| "loss": 0.0572, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 6.986349317465873, | |
| "grad_norm": 0.11519961804151535, | |
| "learning_rate": 6.457416399659472e-06, | |
| "loss": 0.0593, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 6.998949947497374, | |
| "eval_f1": 0.9509583006631868, | |
| "eval_loss": 0.16305629909038544, | |
| "eval_precision": 0.9495505979751435, | |
| "eval_recall": 0.9510146955913226, | |
| "eval_runtime": 85.3445, | |
| "eval_samples_per_second": 16.744, | |
| "eval_steps_per_second": 8.378, | |
| "step": 4999 | |
| }, | |
| { | |
| "epoch": 7.000350017500875, | |
| "grad_norm": 0.2292632907629013, | |
| "learning_rate": 6.441820078173979e-06, | |
| "loss": 0.0009, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 7.014350717535876, | |
| "grad_norm": 0.05339179188013077, | |
| "learning_rate": 6.426208440649278e-06, | |
| "loss": 0.0393, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 7.028351417570878, | |
| "grad_norm": 0.36590057611465454, | |
| "learning_rate": 6.410581652923298e-06, | |
| "loss": 0.0127, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 7.04235211760588, | |
| "grad_norm": 0.01245095580816269, | |
| "learning_rate": 6.394939880994899e-06, | |
| "loss": 0.0001, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 7.056352817640882, | |
| "grad_norm": 0.010753228329122066, | |
| "learning_rate": 6.379283291022118e-06, | |
| "loss": 0.0011, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 7.070353517675883, | |
| "grad_norm": 0.0007269814377650619, | |
| "learning_rate": 6.363612049320398e-06, | |
| "loss": 0.0282, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 7.084354217710885, | |
| "grad_norm": 0.004665072076022625, | |
| "learning_rate": 6.347926322360825e-06, | |
| "loss": 0.0003, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 7.098354917745887, | |
| "grad_norm": 0.06511039286851883, | |
| "learning_rate": 6.3322262767683564e-06, | |
| "loss": 0.0009, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 7.112355617780889, | |
| "grad_norm": 0.17725107073783875, | |
| "learning_rate": 6.31651207932005e-06, | |
| "loss": 0.0152, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 7.12635631781589, | |
| "grad_norm": 0.026116928085684776, | |
| "learning_rate": 6.300783896943299e-06, | |
| "loss": 0.0037, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 7.140357017850892, | |
| "grad_norm": 0.0030039288103580475, | |
| "learning_rate": 6.285041896714052e-06, | |
| "loss": 0.0203, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 7.154357717885894, | |
| "grad_norm": 0.0004382748156785965, | |
| "learning_rate": 6.269286245855039e-06, | |
| "loss": 0.009, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 7.168358417920896, | |
| "grad_norm": 0.12300131469964981, | |
| "learning_rate": 6.253517111734004e-06, | |
| "loss": 0.0014, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 7.182359117955897, | |
| "grad_norm": 0.01677461341023445, | |
| "learning_rate": 6.237734661861909e-06, | |
| "loss": 0.0416, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 7.196359817990899, | |
| "grad_norm": 0.264602929353714, | |
| "learning_rate": 6.221939063891176e-06, | |
| "loss": 0.0007, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 7.210360518025901, | |
| "grad_norm": 5.225741188041866e-05, | |
| "learning_rate": 6.206130485613887e-06, | |
| "loss": 0.0588, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 7.224361218060903, | |
| "grad_norm": 0.0019231714541092515, | |
| "learning_rate": 6.1903090949600144e-06, | |
| "loss": 0.0081, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 7.2383619180959045, | |
| "grad_norm": 0.33081698417663574, | |
| "learning_rate": 6.1744750599956315e-06, | |
| "loss": 0.0002, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 7.252362618130906, | |
| "grad_norm": 0.04602145031094551, | |
| "learning_rate": 6.158628548921132e-06, | |
| "loss": 0.0376, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 7.266363318165908, | |
| "grad_norm": 0.006785963661968708, | |
| "learning_rate": 6.142769730069431e-06, | |
| "loss": 0.0003, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 7.28036401820091, | |
| "grad_norm": 0.051581576466560364, | |
| "learning_rate": 6.126898771904197e-06, | |
| "loss": 0.0003, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 7.2943647182359115, | |
| "grad_norm": 0.0003680915688164532, | |
| "learning_rate": 6.11101584301804e-06, | |
| "loss": 0.017, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 7.308365418270913, | |
| "grad_norm": 0.26525017619132996, | |
| "learning_rate": 6.095121112130742e-06, | |
| "loss": 0.0282, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 7.322366118305915, | |
| "grad_norm": 0.0712617039680481, | |
| "learning_rate": 6.079214748087444e-06, | |
| "loss": 0.0004, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 7.336366818340917, | |
| "grad_norm": 0.022503864020109177, | |
| "learning_rate": 6.063296919856872e-06, | |
| "loss": 0.0437, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 7.3503675183759185, | |
| "grad_norm": 0.005461391061544418, | |
| "learning_rate": 6.047367796529523e-06, | |
| "loss": 0.0008, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 7.36436821841092, | |
| "grad_norm": 0.013845465146005154, | |
| "learning_rate": 6.031427547315889e-06, | |
| "loss": 0.0241, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 7.378368918445922, | |
| "grad_norm": 0.0006676681223325431, | |
| "learning_rate": 6.0154763415446395e-06, | |
| "loss": 0.0478, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 7.392369618480924, | |
| "grad_norm": 0.001390443299897015, | |
| "learning_rate": 5.9995143486608406e-06, | |
| "loss": 0.0007, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 7.4063703185159255, | |
| "grad_norm": 0.0034175976179540157, | |
| "learning_rate": 5.983541738224141e-06, | |
| "loss": 0.0005, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 7.420371018550927, | |
| "grad_norm": 0.08463763445615768, | |
| "learning_rate": 5.967558679906981e-06, | |
| "loss": 0.0008, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 7.434371718585929, | |
| "grad_norm": 0.00045262128696776927, | |
| "learning_rate": 5.951565343492779e-06, | |
| "loss": 0.0001, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 7.448372418620931, | |
| "grad_norm": 35.275489807128906, | |
| "learning_rate": 5.935561898874142e-06, | |
| "loss": 0.0477, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 7.462373118655933, | |
| "grad_norm": 0.0014428936410695314, | |
| "learning_rate": 5.91954851605105e-06, | |
| "loss": 0.0001, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 7.476373818690934, | |
| "grad_norm": 0.002006105612963438, | |
| "learning_rate": 5.9035253651290555e-06, | |
| "loss": 0.0003, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 7.490374518725936, | |
| "grad_norm": 0.08378235995769501, | |
| "learning_rate": 5.887492616317471e-06, | |
| "loss": 0.0001, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 7.504375218760938, | |
| "grad_norm": 0.006655500736087561, | |
| "learning_rate": 5.87145043992757e-06, | |
| "loss": 0.0023, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 7.51837591879594, | |
| "grad_norm": 0.0004303493769839406, | |
| "learning_rate": 5.855399006370766e-06, | |
| "loss": 0.0001, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 7.532376618830941, | |
| "grad_norm": 0.029720665886998177, | |
| "learning_rate": 5.839338486156812e-06, | |
| "loss": 0.0127, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 7.546377318865943, | |
| "grad_norm": 0.0001170751202153042, | |
| "learning_rate": 5.8232690498919906e-06, | |
| "loss": 0.0003, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 7.560378018900945, | |
| "grad_norm": 0.0037085022777318954, | |
| "learning_rate": 5.80719086827729e-06, | |
| "loss": 0.0004, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 7.574378718935947, | |
| "grad_norm": 0.003970707766711712, | |
| "learning_rate": 5.7911041121066e-06, | |
| "loss": 0.0065, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 7.588379418970948, | |
| "grad_norm": 0.013814202509820461, | |
| "learning_rate": 5.775008952264897e-06, | |
| "loss": 0.0712, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 7.60238011900595, | |
| "grad_norm": 0.003518168581649661, | |
| "learning_rate": 5.7589055597264235e-06, | |
| "loss": 0.0026, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 7.616380819040952, | |
| "grad_norm": 0.07790637761354446, | |
| "learning_rate": 5.742794105552879e-06, | |
| "loss": 0.0389, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 7.630381519075954, | |
| "grad_norm": 0.0029561452101916075, | |
| "learning_rate": 5.726674760891599e-06, | |
| "loss": 0.0371, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 7.644382219110955, | |
| "grad_norm": 0.006337731145322323, | |
| "learning_rate": 5.71054769697374e-06, | |
| "loss": 0.0302, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 7.658382919145957, | |
| "grad_norm": 0.3749936521053314, | |
| "learning_rate": 5.694413085112448e-06, | |
| "loss": 0.0032, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 7.672383619180959, | |
| "grad_norm": 0.03626039996743202, | |
| "learning_rate": 5.678271096701059e-06, | |
| "loss": 0.0646, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 7.686384319215961, | |
| "grad_norm": 0.014318865723907948, | |
| "learning_rate": 5.662121903211265e-06, | |
| "loss": 0.0007, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 7.700385019250962, | |
| "grad_norm": 0.005021099466830492, | |
| "learning_rate": 5.645965676191294e-06, | |
| "loss": 0.0003, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 7.714385719285964, | |
| "grad_norm": 0.004436223302036524, | |
| "learning_rate": 5.62980258726409e-06, | |
| "loss": 0.0004, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 7.728386419320966, | |
| "grad_norm": 0.000860211905092001, | |
| "learning_rate": 5.6136328081254874e-06, | |
| "loss": 0.0, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 7.742387119355968, | |
| "grad_norm": 0.0007738994318060577, | |
| "learning_rate": 5.597456510542395e-06, | |
| "loss": 0.0266, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 7.756387819390969, | |
| "grad_norm": 0.003178725950419903, | |
| "learning_rate": 5.581273866350955e-06, | |
| "loss": 0.0001, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 7.770388519425971, | |
| "grad_norm": 0.0775187537074089, | |
| "learning_rate": 5.565085047454737e-06, | |
| "loss": 0.0298, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 7.784389219460973, | |
| "grad_norm": 0.07989465445280075, | |
| "learning_rate": 5.548890225822896e-06, | |
| "loss": 0.0162, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 7.798389919495975, | |
| "grad_norm": 1.2074592113494873, | |
| "learning_rate": 5.53268957348836e-06, | |
| "loss": 0.0013, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 7.8123906195309765, | |
| "grad_norm": 0.0009719234658405185, | |
| "learning_rate": 5.5164832625459865e-06, | |
| "loss": 0.0187, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 7.826391319565978, | |
| "grad_norm": 0.0023037681821733713, | |
| "learning_rate": 5.500271465150748e-06, | |
| "loss": 0.009, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 7.84039201960098, | |
| "grad_norm": 9.558547973632812, | |
| "learning_rate": 5.484054353515896e-06, | |
| "loss": 0.1276, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 7.854392719635982, | |
| "grad_norm": 20.879596710205078, | |
| "learning_rate": 5.467832099911135e-06, | |
| "loss": 0.0327, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 7.8683934196709835, | |
| "grad_norm": 0.022236233577132225, | |
| "learning_rate": 5.451604876660787e-06, | |
| "loss": 0.0006, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 7.882394119705985, | |
| "grad_norm": 0.005190831143409014, | |
| "learning_rate": 5.435372856141975e-06, | |
| "loss": 0.0117, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 7.896394819740987, | |
| "grad_norm": 24.173221588134766, | |
| "learning_rate": 5.4191362107827704e-06, | |
| "loss": 0.003, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 7.910395519775989, | |
| "grad_norm": 0.0021344611886888742, | |
| "learning_rate": 5.402895113060379e-06, | |
| "loss": 0.035, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 7.9243962198109905, | |
| "grad_norm": 0.045800287276506424, | |
| "learning_rate": 5.3866497354993e-06, | |
| "loss": 0.0004, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 7.938396919845992, | |
| "grad_norm": 0.05233141407370567, | |
| "learning_rate": 5.370400250669504e-06, | |
| "loss": 0.0445, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 7.952397619880994, | |
| "grad_norm": 0.005411333404481411, | |
| "learning_rate": 5.354146831184579e-06, | |
| "loss": 0.0006, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 7.966398319915996, | |
| "grad_norm": 0.002824575873091817, | |
| "learning_rate": 5.337889649699921e-06, | |
| "loss": 0.0006, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 7.9803990199509975, | |
| "grad_norm": 0.0004603438137564808, | |
| "learning_rate": 5.3216288789108805e-06, | |
| "loss": 0.0, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 7.994399719985999, | |
| "grad_norm": 0.17578007280826569, | |
| "learning_rate": 5.305364691550944e-06, | |
| "loss": 0.001, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_f1": 0.955865295904617, | |
| "eval_loss": 0.17154428362846375, | |
| "eval_precision": 0.9546686028711965, | |
| "eval_recall": 0.9559132260321903, | |
| "eval_runtime": 85.2061, | |
| "eval_samples_per_second": 16.771, | |
| "eval_steps_per_second": 8.391, | |
| "step": 5714 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 10710, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 15, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.602886153473369e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |