| { |
| "best_metric": 0.8328847781417223, |
| "best_model_checkpoint": "output_classification_768/product/checkpoint-9285", |
| "epoch": 12.999649982499125, |
| "eval_steps": 500, |
| "global_step": 9285, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01400070003500175, |
| "grad_norm": 21.103994369506836, |
| "learning_rate": 8.403361344537815e-08, |
| "loss": 2.901, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0280014000700035, |
| "grad_norm": 24.394515991210938, |
| "learning_rate": 1.774042950513539e-07, |
| "loss": 2.8333, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04200210010500525, |
| "grad_norm": 17.568700790405273, |
| "learning_rate": 2.7077497665732963e-07, |
| "loss": 2.8567, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.056002800140007, |
| "grad_norm": 24.28147315979004, |
| "learning_rate": 3.454715219421102e-07, |
| "loss": 2.8378, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07000350017500875, |
| "grad_norm": 19.30731201171875, |
| "learning_rate": 4.388422035480859e-07, |
| "loss": 2.8394, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0840042002100105, |
| "grad_norm": 22.653026580810547, |
| "learning_rate": 5.322128851540616e-07, |
| "loss": 2.7691, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.09800490024501225, |
| "grad_norm": 18.282873153686523, |
| "learning_rate": 6.255835667600374e-07, |
| "loss": 2.7781, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.112005600280014, |
| "grad_norm": 23.786239624023438, |
| "learning_rate": 7.096171802054156e-07, |
| "loss": 2.748, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.12600630031501575, |
| "grad_norm": 49.12445831298828, |
| "learning_rate": 7.936507936507937e-07, |
| "loss": 2.5834, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1400070003500175, |
| "grad_norm": 47.40871047973633, |
| "learning_rate": 8.870214752567695e-07, |
| "loss": 2.5132, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.15400770038501926, |
| "grad_norm": 37.690696716308594, |
| "learning_rate": 9.80392156862745e-07, |
| "loss": 2.4696, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.168008400420021, |
| "grad_norm": 53.0093879699707, |
| "learning_rate": 1.0737628384687208e-06, |
| "loss": 2.3196, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.18200910045502275, |
| "grad_norm": 27.630456924438477, |
| "learning_rate": 1.1671335200746967e-06, |
| "loss": 2.4033, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1960098004900245, |
| "grad_norm": 28.57124137878418, |
| "learning_rate": 1.2605042016806724e-06, |
| "loss": 2.2114, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.21001050052502626, |
| "grad_norm": 32.31087875366211, |
| "learning_rate": 1.353874883286648e-06, |
| "loss": 2.1544, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.224011200560028, |
| "grad_norm": 29.275854110717773, |
| "learning_rate": 1.447245564892624e-06, |
| "loss": 2.4179, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.23801190059502975, |
| "grad_norm": 28.587217330932617, |
| "learning_rate": 1.5406162464985996e-06, |
| "loss": 2.2672, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.2520126006300315, |
| "grad_norm": 63.85499954223633, |
| "learning_rate": 1.6339869281045753e-06, |
| "loss": 2.1925, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.26601330066503326, |
| "grad_norm": 38.323062896728516, |
| "learning_rate": 1.727357609710551e-06, |
| "loss": 2.2907, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.280014000700035, |
| "grad_norm": 36.11345672607422, |
| "learning_rate": 1.8207282913165267e-06, |
| "loss": 2.0923, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.29401470073503677, |
| "grad_norm": 31.201120376586914, |
| "learning_rate": 1.914098972922503e-06, |
| "loss": 2.2186, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3080154007700385, |
| "grad_norm": 34.21009063720703, |
| "learning_rate": 2.0074696545284783e-06, |
| "loss": 2.326, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.32201610080504023, |
| "grad_norm": 32.68191909790039, |
| "learning_rate": 2.100840336134454e-06, |
| "loss": 2.1694, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.336016800840042, |
| "grad_norm": 27.508405685424805, |
| "learning_rate": 2.1942110177404298e-06, |
| "loss": 2.028, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.35001750087504374, |
| "grad_norm": 24.13580894470215, |
| "learning_rate": 2.2875816993464053e-06, |
| "loss": 2.2815, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3640182009100455, |
| "grad_norm": 26.199440002441406, |
| "learning_rate": 2.380952380952381e-06, |
| "loss": 2.3411, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.37801890094504725, |
| "grad_norm": 36.189361572265625, |
| "learning_rate": 2.474323062558357e-06, |
| "loss": 2.2379, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.392019600980049, |
| "grad_norm": 22.63069725036621, |
| "learning_rate": 2.567693744164332e-06, |
| "loss": 2.1743, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.40602030101505077, |
| "grad_norm": 28.03191566467285, |
| "learning_rate": 2.6610644257703085e-06, |
| "loss": 2.2767, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.4200210010500525, |
| "grad_norm": 32.591068267822266, |
| "learning_rate": 2.7544351073762845e-06, |
| "loss": 2.1676, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4340217010850543, |
| "grad_norm": 23.519025802612305, |
| "learning_rate": 2.8478057889822595e-06, |
| "loss": 2.3134, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.448022401120056, |
| "grad_norm": 26.249027252197266, |
| "learning_rate": 2.9411764705882355e-06, |
| "loss": 2.3326, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.46202310115505774, |
| "grad_norm": 24.713272094726562, |
| "learning_rate": 3.034547152194211e-06, |
| "loss": 2.2709, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.4760238011900595, |
| "grad_norm": 26.74544334411621, |
| "learning_rate": 3.127917833800187e-06, |
| "loss": 2.2261, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.49002450122506125, |
| "grad_norm": 32.99632263183594, |
| "learning_rate": 3.221288515406163e-06, |
| "loss": 2.0507, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.504025201260063, |
| "grad_norm": 44.55859375, |
| "learning_rate": 3.3146591970121383e-06, |
| "loss": 2.1637, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5180259012950648, |
| "grad_norm": 26.943199157714844, |
| "learning_rate": 3.4080298786181142e-06, |
| "loss": 2.2462, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5320266013300665, |
| "grad_norm": 21.44816780090332, |
| "learning_rate": 3.5014005602240897e-06, |
| "loss": 2.1052, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5460273013650683, |
| "grad_norm": 27.31263542175293, |
| "learning_rate": 3.5947712418300657e-06, |
| "loss": 2.1083, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.56002800140007, |
| "grad_norm": 22.418066024780273, |
| "learning_rate": 3.6881419234360416e-06, |
| "loss": 1.7936, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5740287014350718, |
| "grad_norm": 34.41685104370117, |
| "learning_rate": 3.781512605042017e-06, |
| "loss": 1.8335, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.5880294014700735, |
| "grad_norm": 27.243928909301758, |
| "learning_rate": 3.874883286647993e-06, |
| "loss": 1.8911, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6020301015050753, |
| "grad_norm": 24.380064010620117, |
| "learning_rate": 3.968253968253968e-06, |
| "loss": 1.8737, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.616030801540077, |
| "grad_norm": 26.450056076049805, |
| "learning_rate": 4.0616246498599444e-06, |
| "loss": 1.6773, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6300315015750787, |
| "grad_norm": 28.065898895263672, |
| "learning_rate": 4.15499533146592e-06, |
| "loss": 1.675, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.6440322016100805, |
| "grad_norm": 26.523653030395508, |
| "learning_rate": 4.2483660130718954e-06, |
| "loss": 1.6823, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.6580329016450822, |
| "grad_norm": 26.737417221069336, |
| "learning_rate": 4.341736694677872e-06, |
| "loss": 1.414, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.672033601680084, |
| "grad_norm": 21.968425750732422, |
| "learning_rate": 4.435107376283847e-06, |
| "loss": 1.4583, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.6860343017150857, |
| "grad_norm": 23.044065475463867, |
| "learning_rate": 4.528478057889823e-06, |
| "loss": 1.6017, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.7000350017500875, |
| "grad_norm": 35.77534866333008, |
| "learning_rate": 4.621848739495799e-06, |
| "loss": 1.5726, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7140357017850892, |
| "grad_norm": 35.47792053222656, |
| "learning_rate": 4.715219421101775e-06, |
| "loss": 1.4581, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.728036401820091, |
| "grad_norm": 38.72248077392578, |
| "learning_rate": 4.80859010270775e-06, |
| "loss": 1.2325, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.7420371018550928, |
| "grad_norm": 29.20247459411621, |
| "learning_rate": 4.901960784313726e-06, |
| "loss": 1.5076, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.7560378018900945, |
| "grad_norm": 29.444625854492188, |
| "learning_rate": 4.995331465919702e-06, |
| "loss": 1.2232, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.7700385019250963, |
| "grad_norm": 27.85977554321289, |
| "learning_rate": 5.088702147525677e-06, |
| "loss": 1.0447, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.784039201960098, |
| "grad_norm": 29.185314178466797, |
| "learning_rate": 5.182072829131654e-06, |
| "loss": 1.0293, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.7980399019950998, |
| "grad_norm": 17.427093505859375, |
| "learning_rate": 5.275443510737629e-06, |
| "loss": 1.1785, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.8120406020301015, |
| "grad_norm": 36.874305725097656, |
| "learning_rate": 5.368814192343604e-06, |
| "loss": 1.1096, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.8260413020651033, |
| "grad_norm": 25.060104370117188, |
| "learning_rate": 5.4621848739495795e-06, |
| "loss": 0.8118, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.840042002100105, |
| "grad_norm": 41.775665283203125, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 1.1449, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.8540427021351068, |
| "grad_norm": 28.053466796875, |
| "learning_rate": 5.648926237161531e-06, |
| "loss": 1.0124, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.8680434021701086, |
| "grad_norm": 18.944034576416016, |
| "learning_rate": 5.742296918767507e-06, |
| "loss": 0.6939, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.8820441022051102, |
| "grad_norm": 35.93363952636719, |
| "learning_rate": 5.835667600373483e-06, |
| "loss": 0.9968, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.896044802240112, |
| "grad_norm": 31.607877731323242, |
| "learning_rate": 5.929038281979459e-06, |
| "loss": 0.9672, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.9100455022751137, |
| "grad_norm": 24.163589477539062, |
| "learning_rate": 6.022408963585434e-06, |
| "loss": 1.1201, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.9240462023101155, |
| "grad_norm": 32.112239837646484, |
| "learning_rate": 6.1157796451914105e-06, |
| "loss": 0.9118, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.9380469023451172, |
| "grad_norm": 23.415752410888672, |
| "learning_rate": 6.209150326797386e-06, |
| "loss": 0.8296, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.952047602380119, |
| "grad_norm": 16.017988204956055, |
| "learning_rate": 6.3025210084033615e-06, |
| "loss": 0.8337, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.9660483024151207, |
| "grad_norm": 20.97634506225586, |
| "learning_rate": 6.395891690009337e-06, |
| "loss": 0.6947, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.9800490024501225, |
| "grad_norm": 21.921537399291992, |
| "learning_rate": 6.489262371615313e-06, |
| "loss": 0.7419, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.9940497024851243, |
| "grad_norm": 23.830955505371094, |
| "learning_rate": 6.582633053221289e-06, |
| "loss": 0.6673, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.999649982499125, |
| "eval_f1": 0.7134617992065114, |
| "eval_loss": 0.7731016874313354, |
| "eval_precision": 0.7414465080254933, |
| "eval_recall": 0.7123862841147656, |
| "eval_runtime": 85.6445, |
| "eval_samples_per_second": 16.685, |
| "eval_steps_per_second": 8.348, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.008050402520126, |
| "grad_norm": 41.797821044921875, |
| "learning_rate": 6.676003734827264e-06, |
| "loss": 0.9137, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.0220511025551278, |
| "grad_norm": 22.665904998779297, |
| "learning_rate": 6.769374416433241e-06, |
| "loss": 0.7504, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.0360518025901295, |
| "grad_norm": 24.145736694335938, |
| "learning_rate": 6.862745098039216e-06, |
| "loss": 0.7768, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.0500525026251313, |
| "grad_norm": 34.773475646972656, |
| "learning_rate": 6.956115779645192e-06, |
| "loss": 0.7251, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.064053202660133, |
| "grad_norm": 52.297454833984375, |
| "learning_rate": 7.049486461251168e-06, |
| "loss": 0.9331, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.0780539026951348, |
| "grad_norm": 24.283205032348633, |
| "learning_rate": 7.1428571428571436e-06, |
| "loss": 0.8233, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.0920546027301365, |
| "grad_norm": 50.00333786010742, |
| "learning_rate": 7.236227824463119e-06, |
| "loss": 0.8773, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.1060553027651383, |
| "grad_norm": 15.482346534729004, |
| "learning_rate": 7.3295985060690946e-06, |
| "loss": 0.6958, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.12005600280014, |
| "grad_norm": 22.290390014648438, |
| "learning_rate": 7.422969187675071e-06, |
| "loss": 0.4332, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.1340567028351418, |
| "grad_norm": 31.88663673400879, |
| "learning_rate": 7.516339869281046e-06, |
| "loss": 0.6877, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.1480574028701436, |
| "grad_norm": 35.20397186279297, |
| "learning_rate": 7.609710550887022e-06, |
| "loss": 0.6279, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.1620581029051453, |
| "grad_norm": 18.677602767944336, |
| "learning_rate": 7.703081232492997e-06, |
| "loss": 0.7173, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.176058802940147, |
| "grad_norm": 19.016010284423828, |
| "learning_rate": 7.796451914098973e-06, |
| "loss": 0.4821, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.1900595029751488, |
| "grad_norm": 12.054780006408691, |
| "learning_rate": 7.889822595704948e-06, |
| "loss": 0.5104, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.2040602030101506, |
| "grad_norm": 22.70314598083496, |
| "learning_rate": 7.983193277310926e-06, |
| "loss": 0.5903, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.2180609030451524, |
| "grad_norm": 70.0879135131836, |
| "learning_rate": 8.076563958916901e-06, |
| "loss": 0.7494, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.232061603080154, |
| "grad_norm": 23.438417434692383, |
| "learning_rate": 8.169934640522877e-06, |
| "loss": 0.5092, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.2460623031151559, |
| "grad_norm": 23.705568313598633, |
| "learning_rate": 8.263305322128852e-06, |
| "loss": 0.4623, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.2600630031501576, |
| "grad_norm": 54.969390869140625, |
| "learning_rate": 8.356676003734828e-06, |
| "loss": 0.6911, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.2740637031851594, |
| "grad_norm": 36.40383529663086, |
| "learning_rate": 8.450046685340803e-06, |
| "loss": 0.6824, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.2880644032201611, |
| "grad_norm": 29.539731979370117, |
| "learning_rate": 8.543417366946779e-06, |
| "loss": 0.8295, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.302065103255163, |
| "grad_norm": 31.277971267700195, |
| "learning_rate": 8.636788048552756e-06, |
| "loss": 0.7891, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.3160658032901644, |
| "grad_norm": 52.39353561401367, |
| "learning_rate": 8.730158730158731e-06, |
| "loss": 0.7638, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.3300665033251662, |
| "grad_norm": 25.218196868896484, |
| "learning_rate": 8.823529411764707e-06, |
| "loss": 0.6171, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.344067203360168, |
| "grad_norm": 41.754459381103516, |
| "learning_rate": 8.916900093370682e-06, |
| "loss": 0.6163, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.3580679033951697, |
| "grad_norm": 25.492141723632812, |
| "learning_rate": 9.010270774976658e-06, |
| "loss": 0.5825, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.3720686034301715, |
| "grad_norm": 46.11489486694336, |
| "learning_rate": 9.103641456582633e-06, |
| "loss": 0.8171, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.3860693034651732, |
| "grad_norm": 42.723350524902344, |
| "learning_rate": 9.197012138188609e-06, |
| "loss": 0.5439, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.400070003500175, |
| "grad_norm": 28.26615333557129, |
| "learning_rate": 9.281045751633987e-06, |
| "loss": 0.7337, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.4140707035351767, |
| "grad_norm": 23.162817001342773, |
| "learning_rate": 9.374416433239963e-06, |
| "loss": 0.7653, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.4280714035701785, |
| "grad_norm": 38.257198333740234, |
| "learning_rate": 9.467787114845938e-06, |
| "loss": 0.5201, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.4420721036051802, |
| "grad_norm": 31.445514678955078, |
| "learning_rate": 9.561157796451916e-06, |
| "loss": 0.5803, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.456072803640182, |
| "grad_norm": 64.94223022460938, |
| "learning_rate": 9.654528478057891e-06, |
| "loss": 0.5172, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.4700735036751837, |
| "grad_norm": 36.16607666015625, |
| "learning_rate": 9.747899159663867e-06, |
| "loss": 0.4673, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.4840742037101855, |
| "grad_norm": 13.556483268737793, |
| "learning_rate": 9.841269841269842e-06, |
| "loss": 0.7821, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.4980749037451873, |
| "grad_norm": 31.29158592224121, |
| "learning_rate": 9.934640522875818e-06, |
| "loss": 0.5207, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.512075603780189, |
| "grad_norm": 13.835955619812012, |
| "learning_rate": 9.999997609887913e-06, |
| "loss": 0.5909, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.5260763038151908, |
| "grad_norm": 64.40877532958984, |
| "learning_rate": 9.999955119069914e-06, |
| "loss": 0.6024, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.5400770038501925, |
| "grad_norm": 20.61574363708496, |
| "learning_rate": 9.999859515169501e-06, |
| "loss": 0.5886, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.5540777038851943, |
| "grad_norm": 71.72521209716797, |
| "learning_rate": 9.999710799202244e-06, |
| "loss": 0.7722, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.568078403920196, |
| "grad_norm": 49.93646240234375, |
| "learning_rate": 9.999508972747914e-06, |
| "loss": 0.5671, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.5820791039551978, |
| "grad_norm": 42.75546646118164, |
| "learning_rate": 9.999254037950452e-06, |
| "loss": 0.6359, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.5960798039901996, |
| "grad_norm": 33.113807678222656, |
| "learning_rate": 9.998945997517957e-06, |
| "loss": 0.5196, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.6100805040252013, |
| "grad_norm": 37.63560104370117, |
| "learning_rate": 9.998584854722655e-06, |
| "loss": 0.6019, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.624081204060203, |
| "grad_norm": 16.140146255493164, |
| "learning_rate": 9.998170613400862e-06, |
| "loss": 0.4827, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.6380819040952046, |
| "grad_norm": 47.94279861450195, |
| "learning_rate": 9.997703277952944e-06, |
| "loss": 0.6199, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.6520826041302064, |
| "grad_norm": 3.9183614253997803, |
| "learning_rate": 9.99718285334327e-06, |
| "loss": 0.5419, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.6660833041652081, |
| "grad_norm": 9.321074485778809, |
| "learning_rate": 9.99660934510016e-06, |
| "loss": 0.3374, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.6800840042002099, |
| "grad_norm": 37.75178909301758, |
| "learning_rate": 9.99598275931583e-06, |
| "loss": 0.5618, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.6940847042352116, |
| "grad_norm": 22.888437271118164, |
| "learning_rate": 9.995303102646316e-06, |
| "loss": 0.4591, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.7080854042702134, |
| "grad_norm": 32.00013732910156, |
| "learning_rate": 9.99457038231142e-06, |
| "loss": 0.4901, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.7220861043052151, |
| "grad_norm": 33.478763580322266, |
| "learning_rate": 9.993784606094612e-06, |
| "loss": 0.3999, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.736086804340217, |
| "grad_norm": 16.17192840576172, |
| "learning_rate": 9.992945782342973e-06, |
| "loss": 0.5772, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.7500875043752186, |
| "grad_norm": 13.883872985839844, |
| "learning_rate": 9.992053919967084e-06, |
| "loss": 0.481, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.7640882044102204, |
| "grad_norm": 45.30279541015625, |
| "learning_rate": 9.99110902844094e-06, |
| "loss": 0.5014, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.7780889044452222, |
| "grad_norm": 22.155324935913086, |
| "learning_rate": 9.990111117801852e-06, |
| "loss": 0.7349, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.792089604480224, |
| "grad_norm": 44.108768463134766, |
| "learning_rate": 9.989060198650337e-06, |
| "loss": 0.5384, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.8060903045152257, |
| "grad_norm": 35.5286865234375, |
| "learning_rate": 9.987956282150012e-06, |
| "loss": 0.6392, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.8200910045502274, |
| "grad_norm": 25.29277229309082, |
| "learning_rate": 9.986799380027454e-06, |
| "loss": 0.4703, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.8340917045852292, |
| "grad_norm": 50.539161682128906, |
| "learning_rate": 9.985589504572109e-06, |
| "loss": 0.598, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.848092404620231, |
| "grad_norm": 37.761070251464844, |
| "learning_rate": 9.984326668636131e-06, |
| "loss": 0.6036, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.8620931046552327, |
| "grad_norm": 28.276853561401367, |
| "learning_rate": 9.983010885634263e-06, |
| "loss": 0.7649, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.8760938046902345, |
| "grad_norm": 46.96693801879883, |
| "learning_rate": 9.981642169543691e-06, |
| "loss": 0.7382, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.8900945047252362, |
| "grad_norm": 32.133148193359375, |
| "learning_rate": 9.980220534903889e-06, |
| "loss": 0.774, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.904095204760238, |
| "grad_norm": 30.42446517944336, |
| "learning_rate": 9.978745996816473e-06, |
| "loss": 0.554, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.9180959047952397, |
| "grad_norm": 37.114498138427734, |
| "learning_rate": 9.977218570945036e-06, |
| "loss": 0.5324, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.9320966048302415, |
| "grad_norm": 24.40557098388672, |
| "learning_rate": 9.975638273514981e-06, |
| "loss": 0.4989, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.9460973048652432, |
| "grad_norm": 41.14036560058594, |
| "learning_rate": 9.974005121313356e-06, |
| "loss": 0.5818, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.960098004900245, |
| "grad_norm": 48.84564971923828, |
| "learning_rate": 9.972319131688666e-06, |
| "loss": 0.8088, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.9740987049352468, |
| "grad_norm": 43.53080368041992, |
| "learning_rate": 9.97058032255069e-06, |
| "loss": 0.3958, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.9880994049702485, |
| "grad_norm": 13.485721588134766, |
| "learning_rate": 9.968788712370296e-06, |
| "loss": 0.5372, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.99929996499825, |
| "eval_f1": 0.7711498702842724, |
| "eval_loss": 0.5365688800811768, |
| "eval_precision": 0.7706806893257392, |
| "eval_recall": 0.7711686494051785, |
| "eval_runtime": 85.6938, |
| "eval_samples_per_second": 16.676, |
| "eval_steps_per_second": 8.344, |
| "step": 1428 |
| }, |
| { |
| "epoch": 2.0021001050052503, |
| "grad_norm": 10.337446212768555, |
| "learning_rate": 9.966944320179247e-06, |
| "loss": 0.6646, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.016100805040252, |
| "grad_norm": 14.425756454467773, |
| "learning_rate": 9.965047165569985e-06, |
| "loss": 0.4885, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.0301015050752538, |
| "grad_norm": 42.77712631225586, |
| "learning_rate": 9.96309726869544e-06, |
| "loss": 0.3497, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.0441022051102555, |
| "grad_norm": 26.410226821899414, |
| "learning_rate": 9.961094650268803e-06, |
| "loss": 0.3965, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.0581029051452573, |
| "grad_norm": 27.177295684814453, |
| "learning_rate": 9.959039331563315e-06, |
| "loss": 0.5012, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.072103605180259, |
| "grad_norm": 13.967148780822754, |
| "learning_rate": 9.95693133441203e-06, |
| "loss": 0.4312, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.086104305215261, |
| "grad_norm": 38.66386032104492, |
| "learning_rate": 9.954770681207597e-06, |
| "loss": 0.3715, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.1001050052502626, |
| "grad_norm": 1.9330135583877563, |
| "learning_rate": 9.952557394902013e-06, |
| "loss": 0.366, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.1141057052852643, |
| "grad_norm": 22.95920753479004, |
| "learning_rate": 9.95029149900638e-06, |
| "loss": 0.2283, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.128106405320266, |
| "grad_norm": 25.927152633666992, |
| "learning_rate": 9.947973017590655e-06, |
| "loss": 0.3684, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.142107105355268, |
| "grad_norm": 9.549031257629395, |
| "learning_rate": 9.9456019752834e-06, |
| "loss": 0.3353, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.1561078053902696, |
| "grad_norm": 16.495037078857422, |
| "learning_rate": 9.943178397271513e-06, |
| "loss": 0.4955, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.1701085054252713, |
| "grad_norm": 9.032498359680176, |
| "learning_rate": 9.940702309299968e-06, |
| "loss": 0.234, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.184109205460273, |
| "grad_norm": 39.27081298828125, |
| "learning_rate": 9.938173737671531e-06, |
| "loss": 0.6408, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.198109905495275, |
| "grad_norm": 22.681249618530273, |
| "learning_rate": 9.935592709246489e-06, |
| "loss": 0.3353, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.2121106055302766, |
| "grad_norm": 35.17754364013672, |
| "learning_rate": 9.932959251442366e-06, |
| "loss": 0.4544, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.2261113055652784, |
| "grad_norm": 31.855209350585938, |
| "learning_rate": 9.930273392233624e-06, |
| "loss": 0.4868, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.24011200560028, |
| "grad_norm": 24.157930374145508, |
| "learning_rate": 9.92753516015137e-06, |
| "loss": 0.3609, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.254112705635282, |
| "grad_norm": 22.68416404724121, |
| "learning_rate": 9.92474458428306e-06, |
| "loss": 0.3278, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.2681134056702836, |
| "grad_norm": 4.546386241912842, |
| "learning_rate": 9.92190169427217e-06, |
| "loss": 0.3899, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.2821141057052854, |
| "grad_norm": 14.179669380187988, |
| "learning_rate": 9.919006520317903e-06, |
| "loss": 0.1604, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.296114805740287, |
| "grad_norm": 32.7485237121582, |
| "learning_rate": 9.916059093174862e-06, |
| "loss": 0.3212, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.310115505775289, |
| "grad_norm": 31.538394927978516, |
| "learning_rate": 9.913059444152711e-06, |
| "loss": 0.2861, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.3241162058102907, |
| "grad_norm": 49.36025619506836, |
| "learning_rate": 9.910007605115861e-06, |
| "loss": 0.4726, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.3381169058452924, |
| "grad_norm": 32.57158660888672, |
| "learning_rate": 9.906903608483116e-06, |
| "loss": 0.3142, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.352117605880294, |
| "grad_norm": 56.48556900024414, |
| "learning_rate": 9.903747487227339e-06, |
| "loss": 0.389, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.366118305915296, |
| "grad_norm": 20.73760414123535, |
| "learning_rate": 9.900539274875098e-06, |
| "loss": 0.2501, |
| "step": 1690 |
| }, |
| { |
| "epoch": 2.3801190059502977, |
| "grad_norm": 24.18535614013672, |
| "learning_rate": 9.897279005506306e-06, |
| "loss": 0.348, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.3941197059852994, |
| "grad_norm": 31.34202766418457, |
| "learning_rate": 9.893966713753864e-06, |
| "loss": 0.2551, |
| "step": 1710 |
| }, |
| { |
| "epoch": 2.408120406020301, |
| "grad_norm": 36.77047348022461, |
| "learning_rate": 9.890602434803296e-06, |
| "loss": 0.3296, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.422121106055303, |
| "grad_norm": 30.424179077148438, |
| "learning_rate": 9.887186204392368e-06, |
| "loss": 0.4624, |
| "step": 1730 |
| }, |
| { |
| "epoch": 2.4361218060903047, |
| "grad_norm": 37.91468811035156, |
| "learning_rate": 9.883718058810708e-06, |
| "loss": 0.605, |
| "step": 1740 |
| }, |
| { |
| "epoch": 2.4501225061253065, |
| "grad_norm": 20.114219665527344, |
| "learning_rate": 9.880198034899428e-06, |
| "loss": 0.5256, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.464123206160308, |
| "grad_norm": 32.55492401123047, |
| "learning_rate": 9.87662617005073e-06, |
| "loss": 0.4726, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.47812390619531, |
| "grad_norm": 24.271276473999023, |
| "learning_rate": 9.873002502207502e-06, |
| "loss": 0.2098, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.4921246062303117, |
| "grad_norm": 10.173416137695312, |
| "learning_rate": 9.869327069862924e-06, |
| "loss": 0.4108, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.5061253062653135, |
| "grad_norm": 22.572994232177734, |
| "learning_rate": 9.865599912060058e-06, |
| "loss": 0.3146, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.5201260063003152, |
| "grad_norm": 34.47822952270508, |
| "learning_rate": 9.861821068391424e-06, |
| "loss": 0.2967, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.534126706335317, |
| "grad_norm": 51.81456756591797, |
| "learning_rate": 9.857990578998589e-06, |
| "loss": 0.4418, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.5481274063703188, |
| "grad_norm": 29.156999588012695, |
| "learning_rate": 9.85410848457174e-06, |
| "loss": 0.2793, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.5621281064053205, |
| "grad_norm": 39.23643493652344, |
| "learning_rate": 9.850174826349246e-06, |
| "loss": 0.44, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.5761288064403223, |
| "grad_norm": 17.66337013244629, |
| "learning_rate": 9.846189646117224e-06, |
| "loss": 0.2916, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.590129506475324, |
| "grad_norm": 4.114482879638672, |
| "learning_rate": 9.842152986209098e-06, |
| "loss": 0.2428, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.604130206510326, |
| "grad_norm": 31.08041763305664, |
| "learning_rate": 9.83806488950514e-06, |
| "loss": 0.3134, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.6181309065453275, |
| "grad_norm": 34.51068115234375, |
| "learning_rate": 9.833925399432026e-06, |
| "loss": 0.3659, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.632131606580329, |
| "grad_norm": 6.492781639099121, |
| "learning_rate": 9.829734559962365e-06, |
| "loss": 0.4468, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.6461323066153306, |
| "grad_norm": 10.110315322875977, |
| "learning_rate": 9.825492415614235e-06, |
| "loss": 0.2452, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.6601330066503324, |
| "grad_norm": 27.26378631591797, |
| "learning_rate": 9.821199011450717e-06, |
| "loss": 0.1747, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.674133706685334, |
| "grad_norm": 1.9608994722366333, |
| "learning_rate": 9.816854393079402e-06, |
| "loss": 0.3288, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.688134406720336, |
| "grad_norm": 36.11700439453125, |
| "learning_rate": 9.812458606651922e-06, |
| "loss": 0.3384, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.7021351067553376, |
| "grad_norm": 11.049883842468262, |
| "learning_rate": 9.808011698863449e-06, |
| "loss": 0.5968, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.7161358067903394, |
| "grad_norm": 20.612592697143555, |
| "learning_rate": 9.803513716952203e-06, |
| "loss": 0.3655, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.730136506825341, |
| "grad_norm": 4.569857597351074, |
| "learning_rate": 9.798964708698947e-06, |
| "loss": 0.1961, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.744137206860343, |
| "grad_norm": 19.752294540405273, |
| "learning_rate": 9.794364722426488e-06, |
| "loss": 0.328, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.7581379068953447, |
| "grad_norm": 29.28534507751465, |
| "learning_rate": 9.789713806999154e-06, |
| "loss": 0.4217, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.7721386069303464, |
| "grad_norm": 38.84592056274414, |
| "learning_rate": 9.78501201182228e-06, |
| "loss": 0.3804, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.786139306965348, |
| "grad_norm": 30.860523223876953, |
| "learning_rate": 9.780259386841678e-06, |
| "loss": 0.3784, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.80014000700035, |
| "grad_norm": 0.2881031632423401, |
| "learning_rate": 9.775455982543116e-06, |
| "loss": 0.2319, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.8141407070353517, |
| "grad_norm": 9.092655181884766, |
| "learning_rate": 9.770601849951776e-06, |
| "loss": 0.2303, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.8281414070703534, |
| "grad_norm": 21.23202896118164, |
| "learning_rate": 9.765697040631703e-06, |
| "loss": 0.3774, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.842142107105355, |
| "grad_norm": 15.877593994140625, |
| "learning_rate": 9.760741606685282e-06, |
| "loss": 0.4424, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.856142807140357, |
| "grad_norm": 24.96131134033203, |
| "learning_rate": 9.755735600752652e-06, |
| "loss": 0.415, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.8701435071753587, |
| "grad_norm": 46.10403060913086, |
| "learning_rate": 9.750679076011175e-06, |
| "loss": 0.3205, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.8841442072103605, |
| "grad_norm": 49.756649017333984, |
| "learning_rate": 9.745572086174857e-06, |
| "loss": 0.4478, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.8981449072453622, |
| "grad_norm": 42.11957931518555, |
| "learning_rate": 9.740414685493777e-06, |
| "loss": 0.4191, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.912145607280364, |
| "grad_norm": 31.03801918029785, |
| "learning_rate": 9.735206928753518e-06, |
| "loss": 0.2388, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.9261463073153657, |
| "grad_norm": 47.63282775878906, |
| "learning_rate": 9.729948871274579e-06, |
| "loss": 0.6498, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.9401470073503675, |
| "grad_norm": 28.83293342590332, |
| "learning_rate": 9.724640568911788e-06, |
| "loss": 0.3742, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.9541477073853692, |
| "grad_norm": 5.075782775878906, |
| "learning_rate": 9.719282078053713e-06, |
| "loss": 0.361, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.968148407420371, |
| "grad_norm": 15.41346263885498, |
| "learning_rate": 9.713873455622058e-06, |
| "loss": 0.268, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.9821491074553728, |
| "grad_norm": 22.061138153076172, |
| "learning_rate": 9.70841475907106e-06, |
| "loss": 0.5841, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.9961498074903745, |
| "grad_norm": 29.209182739257812, |
| "learning_rate": 9.702906046386878e-06, |
| "loss": 0.285, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.998949947497375, |
| "eval_f1": 0.7860625313331145, |
| "eval_loss": 0.5379942655563354, |
| "eval_precision": 0.7910525243809028, |
| "eval_recall": 0.7858642407277817, |
| "eval_runtime": 85.7771, |
| "eval_samples_per_second": 16.659, |
| "eval_steps_per_second": 8.336, |
| "step": 2142 |
| }, |
| { |
| "epoch": 3.0101505075253763, |
| "grad_norm": 25.932525634765625, |
| "learning_rate": 9.69734737608698e-06, |
| "loss": 0.3992, |
| "step": 2150 |
| }, |
| { |
| "epoch": 3.024151207560378, |
| "grad_norm": 8.238982200622559, |
| "learning_rate": 9.692301907847981e-06, |
| "loss": 0.2451, |
| "step": 2160 |
| }, |
| { |
| "epoch": 3.03815190759538, |
| "grad_norm": 20.88953971862793, |
| "learning_rate": 9.686648481193994e-06, |
| "loss": 0.2381, |
| "step": 2170 |
| }, |
| { |
| "epoch": 3.0521526076303815, |
| "grad_norm": 52.287017822265625, |
| "learning_rate": 9.68094526962372e-06, |
| "loss": 0.2175, |
| "step": 2180 |
| }, |
| { |
| "epoch": 3.0661533076653833, |
| "grad_norm": 14.914299011230469, |
| "learning_rate": 9.675192333720735e-06, |
| "loss": 0.2471, |
| "step": 2190 |
| }, |
| { |
| "epoch": 3.080154007700385, |
| "grad_norm": 27.594337463378906, |
| "learning_rate": 9.669389734596819e-06, |
| "loss": 0.3744, |
| "step": 2200 |
| }, |
| { |
| "epoch": 3.094154707735387, |
| "grad_norm": 37.43633270263672, |
| "learning_rate": 9.66353753389131e-06, |
| "loss": 0.1572, |
| "step": 2210 |
| }, |
| { |
| "epoch": 3.1081554077703886, |
| "grad_norm": 39.02608871459961, |
| "learning_rate": 9.65763579377045e-06, |
| "loss": 0.1469, |
| "step": 2220 |
| }, |
| { |
| "epoch": 3.1221561078053903, |
| "grad_norm": 1.7573050260543823, |
| "learning_rate": 9.651684576926721e-06, |
| "loss": 0.2838, |
| "step": 2230 |
| }, |
| { |
| "epoch": 3.136156807840392, |
| "grad_norm": 19.469436645507812, |
| "learning_rate": 9.645683946578189e-06, |
| "loss": 0.3036, |
| "step": 2240 |
| }, |
| { |
| "epoch": 3.150157507875394, |
| "grad_norm": 19.4732723236084, |
| "learning_rate": 9.639633966467817e-06, |
| "loss": 0.2174, |
| "step": 2250 |
| }, |
| { |
| "epoch": 3.1641582079103956, |
| "grad_norm": 36.7767448425293, |
| "learning_rate": 9.633534700862804e-06, |
| "loss": 0.2017, |
| "step": 2260 |
| }, |
| { |
| "epoch": 3.1781589079453973, |
| "grad_norm": 27.999431610107422, |
| "learning_rate": 9.627386214553886e-06, |
| "loss": 0.2658, |
| "step": 2270 |
| }, |
| { |
| "epoch": 3.192159607980399, |
| "grad_norm": 29.238040924072266, |
| "learning_rate": 9.621188572854668e-06, |
| "loss": 0.1646, |
| "step": 2280 |
| }, |
| { |
| "epoch": 3.206160308015401, |
| "grad_norm": 22.505971908569336, |
| "learning_rate": 9.614941841600905e-06, |
| "loss": 0.1813, |
| "step": 2290 |
| }, |
| { |
| "epoch": 3.2201610080504026, |
| "grad_norm": 3.6057894229888916, |
| "learning_rate": 9.608646087149826e-06, |
| "loss": 0.101, |
| "step": 2300 |
| }, |
| { |
| "epoch": 3.2341617080854044, |
| "grad_norm": 30.187728881835938, |
| "learning_rate": 9.60230137637942e-06, |
| "loss": 0.2003, |
| "step": 2310 |
| }, |
| { |
| "epoch": 3.248162408120406, |
| "grad_norm": 39.583683013916016, |
| "learning_rate": 9.595907776687715e-06, |
| "loss": 0.2168, |
| "step": 2320 |
| }, |
| { |
| "epoch": 3.262163108155408, |
| "grad_norm": 8.797574996948242, |
| "learning_rate": 9.58946535599208e-06, |
| "loss": 0.243, |
| "step": 2330 |
| }, |
| { |
| "epoch": 3.2761638081904096, |
| "grad_norm": 42.47231674194336, |
| "learning_rate": 9.582974182728497e-06, |
| "loss": 0.269, |
| "step": 2340 |
| }, |
| { |
| "epoch": 3.2901645082254114, |
| "grad_norm": 38.17387008666992, |
| "learning_rate": 9.576434325850824e-06, |
| "loss": 0.3031, |
| "step": 2350 |
| }, |
| { |
| "epoch": 3.304165208260413, |
| "grad_norm": 1.5917739868164062, |
| "learning_rate": 9.56984585483008e-06, |
| "loss": 0.3918, |
| "step": 2360 |
| }, |
| { |
| "epoch": 3.318165908295415, |
| "grad_norm": 27.6890869140625, |
| "learning_rate": 9.56320883965369e-06, |
| "loss": 0.2479, |
| "step": 2370 |
| }, |
| { |
| "epoch": 3.3321666083304167, |
| "grad_norm": 21.0488224029541, |
| "learning_rate": 9.556523350824759e-06, |
| "loss": 0.186, |
| "step": 2380 |
| }, |
| { |
| "epoch": 3.3461673083654184, |
| "grad_norm": 1.7114077806472778, |
| "learning_rate": 9.549789459361303e-06, |
| "loss": 0.418, |
| "step": 2390 |
| }, |
| { |
| "epoch": 3.36016800840042, |
| "grad_norm": 23.43485450744629, |
| "learning_rate": 9.543007236795513e-06, |
| "loss": 0.1727, |
| "step": 2400 |
| }, |
| { |
| "epoch": 3.374168708435422, |
| "grad_norm": 37.15725326538086, |
| "learning_rate": 9.536176755172988e-06, |
| "loss": 0.1121, |
| "step": 2410 |
| }, |
| { |
| "epoch": 3.3881694084704237, |
| "grad_norm": 11.059165954589844, |
| "learning_rate": 9.52929808705196e-06, |
| "loss": 0.2375, |
| "step": 2420 |
| }, |
| { |
| "epoch": 3.4021701085054254, |
| "grad_norm": 72.36428833007812, |
| "learning_rate": 9.522371305502542e-06, |
| "loss": 0.2272, |
| "step": 2430 |
| }, |
| { |
| "epoch": 3.416170808540427, |
| "grad_norm": 0.8613013029098511, |
| "learning_rate": 9.515396484105938e-06, |
| "loss": 0.0909, |
| "step": 2440 |
| }, |
| { |
| "epoch": 3.430171508575429, |
| "grad_norm": 0.07998291403055191, |
| "learning_rate": 9.508373696953664e-06, |
| "loss": 0.1642, |
| "step": 2450 |
| }, |
| { |
| "epoch": 3.4441722086104303, |
| "grad_norm": 5.251307010650635, |
| "learning_rate": 9.501303018646766e-06, |
| "loss": 0.0964, |
| "step": 2460 |
| }, |
| { |
| "epoch": 3.458172908645432, |
| "grad_norm": 0.03508025407791138, |
| "learning_rate": 9.494184524295023e-06, |
| "loss": 0.3244, |
| "step": 2470 |
| }, |
| { |
| "epoch": 3.472173608680434, |
| "grad_norm": 22.236034393310547, |
| "learning_rate": 9.487018289516146e-06, |
| "loss": 0.2749, |
| "step": 2480 |
| }, |
| { |
| "epoch": 3.4861743087154355, |
| "grad_norm": 5.1189703941345215, |
| "learning_rate": 9.479804390434983e-06, |
| "loss": 0.2798, |
| "step": 2490 |
| }, |
| { |
| "epoch": 3.5001750087504373, |
| "grad_norm": 0.6922470331192017, |
| "learning_rate": 9.472542903682708e-06, |
| "loss": 0.3294, |
| "step": 2500 |
| }, |
| { |
| "epoch": 3.514175708785439, |
| "grad_norm": 1.1609469652175903, |
| "learning_rate": 9.465233906395998e-06, |
| "loss": 0.3309, |
| "step": 2510 |
| }, |
| { |
| "epoch": 3.528176408820441, |
| "grad_norm": 1.238772988319397, |
| "learning_rate": 9.457877476216228e-06, |
| "loss": 0.1799, |
| "step": 2520 |
| }, |
| { |
| "epoch": 3.5421771088554426, |
| "grad_norm": 5.87846040725708, |
| "learning_rate": 9.450473691288637e-06, |
| "loss": 0.2434, |
| "step": 2530 |
| }, |
| { |
| "epoch": 3.5561778088904443, |
| "grad_norm": 3.064596176147461, |
| "learning_rate": 9.443022630261495e-06, |
| "loss": 0.1289, |
| "step": 2540 |
| }, |
| { |
| "epoch": 3.570178508925446, |
| "grad_norm": 12.347147941589355, |
| "learning_rate": 9.435524372285279e-06, |
| "loss": 0.2065, |
| "step": 2550 |
| }, |
| { |
| "epoch": 3.584179208960448, |
| "grad_norm": 42.32487487792969, |
| "learning_rate": 9.42797899701182e-06, |
| "loss": 0.2232, |
| "step": 2560 |
| }, |
| { |
| "epoch": 3.5981799089954496, |
| "grad_norm": 21.179861068725586, |
| "learning_rate": 9.420386584593469e-06, |
| "loss": 0.1303, |
| "step": 2570 |
| }, |
| { |
| "epoch": 3.6121806090304514, |
| "grad_norm": 13.074398040771484, |
| "learning_rate": 9.412747215682231e-06, |
| "loss": 0.3421, |
| "step": 2580 |
| }, |
| { |
| "epoch": 3.626181309065453, |
| "grad_norm": 7.620582103729248, |
| "learning_rate": 9.405060971428924e-06, |
| "loss": 0.1988, |
| "step": 2590 |
| }, |
| { |
| "epoch": 3.640182009100455, |
| "grad_norm": 0.4482537806034088, |
| "learning_rate": 9.397327933482303e-06, |
| "loss": 0.183, |
| "step": 2600 |
| }, |
| { |
| "epoch": 3.6541827091354566, |
| "grad_norm": 3.92740535736084, |
| "learning_rate": 9.389548183988204e-06, |
| "loss": 0.1766, |
| "step": 2610 |
| }, |
| { |
| "epoch": 3.6681834091704584, |
| "grad_norm": 18.18442153930664, |
| "learning_rate": 9.381721805588663e-06, |
| "loss": 0.2622, |
| "step": 2620 |
| }, |
| { |
| "epoch": 3.68218410920546, |
| "grad_norm": 3.5551092624664307, |
| "learning_rate": 9.373848881421045e-06, |
| "loss": 0.1895, |
| "step": 2630 |
| }, |
| { |
| "epoch": 3.696184809240462, |
| "grad_norm": 0.5254050493240356, |
| "learning_rate": 9.36592949511715e-06, |
| "loss": 0.1418, |
| "step": 2640 |
| }, |
| { |
| "epoch": 3.7101855092754636, |
| "grad_norm": 44.7007942199707, |
| "learning_rate": 9.35796373080234e-06, |
| "loss": 0.3657, |
| "step": 2650 |
| }, |
| { |
| "epoch": 3.7241862093104654, |
| "grad_norm": 14.073615074157715, |
| "learning_rate": 9.349951673094633e-06, |
| "loss": 0.4366, |
| "step": 2660 |
| }, |
| { |
| "epoch": 3.738186909345467, |
| "grad_norm": 30.277467727661133, |
| "learning_rate": 9.341893407103808e-06, |
| "loss": 0.1471, |
| "step": 2670 |
| }, |
| { |
| "epoch": 3.752187609380469, |
| "grad_norm": 41.96673583984375, |
| "learning_rate": 9.333789018430505e-06, |
| "loss": 0.3122, |
| "step": 2680 |
| }, |
| { |
| "epoch": 3.7661883094154707, |
| "grad_norm": 22.88213539123535, |
| "learning_rate": 9.325638593165308e-06, |
| "loss": 0.2792, |
| "step": 2690 |
| }, |
| { |
| "epoch": 3.7801890094504724, |
| "grad_norm": 44.49440383911133, |
| "learning_rate": 9.317442217887835e-06, |
| "loss": 0.286, |
| "step": 2700 |
| }, |
| { |
| "epoch": 3.794189709485474, |
| "grad_norm": 70.30220794677734, |
| "learning_rate": 9.309199979665821e-06, |
| "loss": 0.3806, |
| "step": 2710 |
| }, |
| { |
| "epoch": 3.808190409520476, |
| "grad_norm": 21.966384887695312, |
| "learning_rate": 9.300911966054184e-06, |
| "loss": 0.2567, |
| "step": 2720 |
| }, |
| { |
| "epoch": 3.8221911095554777, |
| "grad_norm": 1.360113501548767, |
| "learning_rate": 9.292578265094109e-06, |
| "loss": 0.2901, |
| "step": 2730 |
| }, |
| { |
| "epoch": 3.8361918095904795, |
| "grad_norm": 0.4982340931892395, |
| "learning_rate": 9.284198965312096e-06, |
| "loss": 0.261, |
| "step": 2740 |
| }, |
| { |
| "epoch": 3.850192509625481, |
| "grad_norm": 24.56475067138672, |
| "learning_rate": 9.275774155719032e-06, |
| "loss": 0.2213, |
| "step": 2750 |
| }, |
| { |
| "epoch": 3.864193209660483, |
| "grad_norm": 36.820167541503906, |
| "learning_rate": 9.267303925809246e-06, |
| "loss": 0.1971, |
| "step": 2760 |
| }, |
| { |
| "epoch": 3.8781939096954847, |
| "grad_norm": 12.52115535736084, |
| "learning_rate": 9.258788365559543e-06, |
| "loss": 0.247, |
| "step": 2770 |
| }, |
| { |
| "epoch": 3.8921946097304865, |
| "grad_norm": 14.991462707519531, |
| "learning_rate": 9.25022756542827e-06, |
| "loss": 0.109, |
| "step": 2780 |
| }, |
| { |
| "epoch": 3.9061953097654882, |
| "grad_norm": 5.062511920928955, |
| "learning_rate": 9.24162161635434e-06, |
| "loss": 0.2759, |
| "step": 2790 |
| }, |
| { |
| "epoch": 3.92019600980049, |
| "grad_norm": 1.6984614133834839, |
| "learning_rate": 9.232970609756267e-06, |
| "loss": 0.2059, |
| "step": 2800 |
| }, |
| { |
| "epoch": 3.9341967098354917, |
| "grad_norm": 26.058155059814453, |
| "learning_rate": 9.224274637531204e-06, |
| "loss": 0.2886, |
| "step": 2810 |
| }, |
| { |
| "epoch": 3.9481974098704935, |
| "grad_norm": 64.70281219482422, |
| "learning_rate": 9.215533792053957e-06, |
| "loss": 0.204, |
| "step": 2820 |
| }, |
| { |
| "epoch": 3.9621981099054953, |
| "grad_norm": 0.972621738910675, |
| "learning_rate": 9.20674816617601e-06, |
| "loss": 0.2112, |
| "step": 2830 |
| }, |
| { |
| "epoch": 3.976198809940497, |
| "grad_norm": 29.663774490356445, |
| "learning_rate": 9.197917853224531e-06, |
| "loss": 0.1863, |
| "step": 2840 |
| }, |
| { |
| "epoch": 3.9901995099754988, |
| "grad_norm": 11.654136657714844, |
| "learning_rate": 9.189042947001395e-06, |
| "loss": 0.2924, |
| "step": 2850 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_f1": 0.800743336162284, |
| "eval_loss": 0.6182411313056946, |
| "eval_precision": 0.8053584373354589, |
| "eval_recall": 0.8005598320503848, |
| "eval_runtime": 85.7236, |
| "eval_samples_per_second": 16.67, |
| "eval_steps_per_second": 8.341, |
| "step": 2857 |
| }, |
| { |
| "epoch": 4.0042002100105005, |
| "grad_norm": 0.10247134417295456, |
| "learning_rate": 9.180123541782172e-06, |
| "loss": 0.1368, |
| "step": 2860 |
| }, |
| { |
| "epoch": 4.018200910045502, |
| "grad_norm": 0.11311420053243637, |
| "learning_rate": 9.171159732315129e-06, |
| "loss": 0.0733, |
| "step": 2870 |
| }, |
| { |
| "epoch": 4.032201610080504, |
| "grad_norm": 43.14177703857422, |
| "learning_rate": 9.162151613820236e-06, |
| "loss": 0.1013, |
| "step": 2880 |
| }, |
| { |
| "epoch": 4.046202310115506, |
| "grad_norm": 41.66696548461914, |
| "learning_rate": 9.153099281988138e-06, |
| "loss": 0.201, |
| "step": 2890 |
| }, |
| { |
| "epoch": 4.0602030101505076, |
| "grad_norm": 3.4230246543884277, |
| "learning_rate": 9.144002832979149e-06, |
| "loss": 0.0481, |
| "step": 2900 |
| }, |
| { |
| "epoch": 4.074203710185509, |
| "grad_norm": 55.94377517700195, |
| "learning_rate": 9.134862363422223e-06, |
| "loss": 0.2921, |
| "step": 2910 |
| }, |
| { |
| "epoch": 4.088204410220511, |
| "grad_norm": 1.319650650024414, |
| "learning_rate": 9.125677970413935e-06, |
| "loss": 0.0984, |
| "step": 2920 |
| }, |
| { |
| "epoch": 4.102205110255513, |
| "grad_norm": 15.05123519897461, |
| "learning_rate": 9.116449751517448e-06, |
| "loss": 0.2502, |
| "step": 2930 |
| }, |
| { |
| "epoch": 4.116205810290515, |
| "grad_norm": 0.4123871922492981, |
| "learning_rate": 9.107177804761468e-06, |
| "loss": 0.1097, |
| "step": 2940 |
| }, |
| { |
| "epoch": 4.130206510325516, |
| "grad_norm": 90.21160888671875, |
| "learning_rate": 9.097862228639216e-06, |
| "loss": 0.172, |
| "step": 2950 |
| }, |
| { |
| "epoch": 4.144207210360518, |
| "grad_norm": 0.02424849569797516, |
| "learning_rate": 9.088503122107371e-06, |
| "loss": 0.2203, |
| "step": 2960 |
| }, |
| { |
| "epoch": 4.15820791039552, |
| "grad_norm": 14.979598045349121, |
| "learning_rate": 9.079100584585027e-06, |
| "loss": 0.0154, |
| "step": 2970 |
| }, |
| { |
| "epoch": 4.172208610430522, |
| "grad_norm": 11.164504051208496, |
| "learning_rate": 9.06965471595263e-06, |
| "loss": 0.1461, |
| "step": 2980 |
| }, |
| { |
| "epoch": 4.186209310465523, |
| "grad_norm": 73.4662094116211, |
| "learning_rate": 9.060165616550918e-06, |
| "loss": 0.2008, |
| "step": 2990 |
| }, |
| { |
| "epoch": 4.200210010500525, |
| "grad_norm": 0.1721985638141632, |
| "learning_rate": 9.050633387179861e-06, |
| "loss": 0.0428, |
| "step": 3000 |
| }, |
| { |
| "epoch": 4.214210710535527, |
| "grad_norm": 51.70530319213867, |
| "learning_rate": 9.041058129097586e-06, |
| "loss": 0.2124, |
| "step": 3010 |
| }, |
| { |
| "epoch": 4.228211410570529, |
| "grad_norm": 33.774085998535156, |
| "learning_rate": 9.031439944019302e-06, |
| "loss": 0.1633, |
| "step": 3020 |
| }, |
| { |
| "epoch": 4.24221211060553, |
| "grad_norm": 0.30398979783058167, |
| "learning_rate": 9.021778934116212e-06, |
| "loss": 0.2778, |
| "step": 3030 |
| }, |
| { |
| "epoch": 4.256212810640532, |
| "grad_norm": 19.708505630493164, |
| "learning_rate": 9.012075202014444e-06, |
| "loss": 0.0801, |
| "step": 3040 |
| }, |
| { |
| "epoch": 4.270213510675534, |
| "grad_norm": 20.29991340637207, |
| "learning_rate": 9.002328850793946e-06, |
| "loss": 0.2146, |
| "step": 3050 |
| }, |
| { |
| "epoch": 4.284214210710536, |
| "grad_norm": 9.303946495056152, |
| "learning_rate": 8.992539983987401e-06, |
| "loss": 0.1327, |
| "step": 3060 |
| }, |
| { |
| "epoch": 4.298214910745537, |
| "grad_norm": 43.80442428588867, |
| "learning_rate": 8.982708705579119e-06, |
| "loss": 0.2858, |
| "step": 3070 |
| }, |
| { |
| "epoch": 4.312215610780539, |
| "grad_norm": 3.160637855529785, |
| "learning_rate": 8.972835120003936e-06, |
| "loss": 0.0411, |
| "step": 3080 |
| }, |
| { |
| "epoch": 4.326216310815541, |
| "grad_norm": 0.1867658942937851, |
| "learning_rate": 8.962919332146107e-06, |
| "loss": 0.1155, |
| "step": 3090 |
| }, |
| { |
| "epoch": 4.340217010850543, |
| "grad_norm": 41.887699127197266, |
| "learning_rate": 8.952961447338192e-06, |
| "loss": 0.1599, |
| "step": 3100 |
| }, |
| { |
| "epoch": 4.354217710885544, |
| "grad_norm": 49.10065841674805, |
| "learning_rate": 8.942961571359927e-06, |
| "loss": 0.1876, |
| "step": 3110 |
| }, |
| { |
| "epoch": 4.368218410920546, |
| "grad_norm": 33.752349853515625, |
| "learning_rate": 8.932919810437117e-06, |
| "loss": 0.1184, |
| "step": 3120 |
| }, |
| { |
| "epoch": 4.382219110955548, |
| "grad_norm": 33.159061431884766, |
| "learning_rate": 8.92283627124049e-06, |
| "loss": 0.2899, |
| "step": 3130 |
| }, |
| { |
| "epoch": 4.39621981099055, |
| "grad_norm": 0.13434840738773346, |
| "learning_rate": 8.912711060884585e-06, |
| "loss": 0.1228, |
| "step": 3140 |
| }, |
| { |
| "epoch": 4.4102205110255515, |
| "grad_norm": 9.426471710205078, |
| "learning_rate": 8.902544286926585e-06, |
| "loss": 0.2344, |
| "step": 3150 |
| }, |
| { |
| "epoch": 4.424221211060553, |
| "grad_norm": 0.20675937831401825, |
| "learning_rate": 8.892336057365204e-06, |
| "loss": 0.2142, |
| "step": 3160 |
| }, |
| { |
| "epoch": 4.438221911095555, |
| "grad_norm": 24.473310470581055, |
| "learning_rate": 8.882086480639526e-06, |
| "loss": 0.3472, |
| "step": 3170 |
| }, |
| { |
| "epoch": 4.452222611130557, |
| "grad_norm": 54.27497100830078, |
| "learning_rate": 8.871795665627845e-06, |
| "loss": 0.1241, |
| "step": 3180 |
| }, |
| { |
| "epoch": 4.4662233111655585, |
| "grad_norm": 26.60704803466797, |
| "learning_rate": 8.861463721646528e-06, |
| "loss": 0.1468, |
| "step": 3190 |
| }, |
| { |
| "epoch": 4.48022401120056, |
| "grad_norm": 1.4064515829086304, |
| "learning_rate": 8.851090758448836e-06, |
| "loss": 0.3217, |
| "step": 3200 |
| }, |
| { |
| "epoch": 4.494224711235562, |
| "grad_norm": 6.67008113861084, |
| "learning_rate": 8.840676886223768e-06, |
| "loss": 0.0564, |
| "step": 3210 |
| }, |
| { |
| "epoch": 4.508225411270564, |
| "grad_norm": 4.312970161437988, |
| "learning_rate": 8.83022221559489e-06, |
| "loss": 0.1251, |
| "step": 3220 |
| }, |
| { |
| "epoch": 4.5222261113055655, |
| "grad_norm": 15.902132987976074, |
| "learning_rate": 8.819726857619156e-06, |
| "loss": 0.0224, |
| "step": 3230 |
| }, |
| { |
| "epoch": 4.536226811340567, |
| "grad_norm": 10.105917930603027, |
| "learning_rate": 8.809190923785724e-06, |
| "loss": 0.1252, |
| "step": 3240 |
| }, |
| { |
| "epoch": 4.550227511375569, |
| "grad_norm": 0.36241552233695984, |
| "learning_rate": 8.798614526014786e-06, |
| "loss": 0.2363, |
| "step": 3250 |
| }, |
| { |
| "epoch": 4.564228211410571, |
| "grad_norm": 26.39388656616211, |
| "learning_rate": 8.78799777665637e-06, |
| "loss": 0.1994, |
| "step": 3260 |
| }, |
| { |
| "epoch": 4.5782289114455725, |
| "grad_norm": 0.08990269154310226, |
| "learning_rate": 8.777340788489145e-06, |
| "loss": 0.0985, |
| "step": 3270 |
| }, |
| { |
| "epoch": 4.592229611480574, |
| "grad_norm": 0.17877697944641113, |
| "learning_rate": 8.76664367471922e-06, |
| "loss": 0.0589, |
| "step": 3280 |
| }, |
| { |
| "epoch": 4.606230311515576, |
| "grad_norm": 0.4577700197696686, |
| "learning_rate": 8.755906548978957e-06, |
| "loss": 0.1041, |
| "step": 3290 |
| }, |
| { |
| "epoch": 4.620231011550578, |
| "grad_norm": 29.546653747558594, |
| "learning_rate": 8.745129525325746e-06, |
| "loss": 0.2561, |
| "step": 3300 |
| }, |
| { |
| "epoch": 4.63423171158558, |
| "grad_norm": 6.642750263214111, |
| "learning_rate": 8.734312718240807e-06, |
| "loss": 0.2921, |
| "step": 3310 |
| }, |
| { |
| "epoch": 4.648232411620581, |
| "grad_norm": 1.9864214658737183, |
| "learning_rate": 8.723456242627961e-06, |
| "loss": 0.0626, |
| "step": 3320 |
| }, |
| { |
| "epoch": 4.662233111655583, |
| "grad_norm": 0.28439265489578247, |
| "learning_rate": 8.712560213812421e-06, |
| "loss": 0.1834, |
| "step": 3330 |
| }, |
| { |
| "epoch": 4.676233811690585, |
| "grad_norm": 14.246397018432617, |
| "learning_rate": 8.701624747539563e-06, |
| "loss": 0.1986, |
| "step": 3340 |
| }, |
| { |
| "epoch": 4.690234511725587, |
| "grad_norm": 15.254389762878418, |
| "learning_rate": 8.690649959973693e-06, |
| "loss": 0.0628, |
| "step": 3350 |
| }, |
| { |
| "epoch": 4.704235211760588, |
| "grad_norm": 3.156299352645874, |
| "learning_rate": 8.679635967696815e-06, |
| "loss": 0.0524, |
| "step": 3360 |
| }, |
| { |
| "epoch": 4.71823591179559, |
| "grad_norm": 12.631438255310059, |
| "learning_rate": 8.6685828877074e-06, |
| "loss": 0.2967, |
| "step": 3370 |
| }, |
| { |
| "epoch": 4.732236611830592, |
| "grad_norm": 41.207496643066406, |
| "learning_rate": 8.657490837419124e-06, |
| "loss": 0.1632, |
| "step": 3380 |
| }, |
| { |
| "epoch": 4.746237311865594, |
| "grad_norm": 1.7955949306488037, |
| "learning_rate": 8.646359934659648e-06, |
| "loss": 0.1216, |
| "step": 3390 |
| }, |
| { |
| "epoch": 4.760238011900595, |
| "grad_norm": 39.87535858154297, |
| "learning_rate": 8.63519029766934e-06, |
| "loss": 0.1489, |
| "step": 3400 |
| }, |
| { |
| "epoch": 4.774238711935597, |
| "grad_norm": 43.35482406616211, |
| "learning_rate": 8.623982045100037e-06, |
| "loss": 0.0805, |
| "step": 3410 |
| }, |
| { |
| "epoch": 4.788239411970599, |
| "grad_norm": 5.584963798522949, |
| "learning_rate": 8.612735296013777e-06, |
| "loss": 0.1821, |
| "step": 3420 |
| }, |
| { |
| "epoch": 4.802240112005601, |
| "grad_norm": 2.1387903690338135, |
| "learning_rate": 8.601450169881533e-06, |
| "loss": 0.1858, |
| "step": 3430 |
| }, |
| { |
| "epoch": 4.816240812040602, |
| "grad_norm": 19.0107421875, |
| "learning_rate": 8.590126786581948e-06, |
| "loss": 0.2241, |
| "step": 3440 |
| }, |
| { |
| "epoch": 4.830241512075604, |
| "grad_norm": 3.1484501361846924, |
| "learning_rate": 8.57876526640006e-06, |
| "loss": 0.1577, |
| "step": 3450 |
| }, |
| { |
| "epoch": 4.844242212110606, |
| "grad_norm": 0.8619286417961121, |
| "learning_rate": 8.567365730026025e-06, |
| "loss": 0.2343, |
| "step": 3460 |
| }, |
| { |
| "epoch": 4.858242912145608, |
| "grad_norm": 17.187009811401367, |
| "learning_rate": 8.55592829855383e-06, |
| "loss": 0.2091, |
| "step": 3470 |
| }, |
| { |
| "epoch": 4.872243612180609, |
| "grad_norm": 17.190269470214844, |
| "learning_rate": 8.544453093480017e-06, |
| "loss": 0.2174, |
| "step": 3480 |
| }, |
| { |
| "epoch": 4.886244312215611, |
| "grad_norm": 2.3351292610168457, |
| "learning_rate": 8.53294023670238e-06, |
| "loss": 0.166, |
| "step": 3490 |
| }, |
| { |
| "epoch": 4.900245012250613, |
| "grad_norm": 17.254716873168945, |
| "learning_rate": 8.521389850518682e-06, |
| "loss": 0.1901, |
| "step": 3500 |
| }, |
| { |
| "epoch": 4.914245712285615, |
| "grad_norm": 43.53018569946289, |
| "learning_rate": 8.509802057625345e-06, |
| "loss": 0.1017, |
| "step": 3510 |
| }, |
| { |
| "epoch": 4.928246412320616, |
| "grad_norm": 1.7526437044143677, |
| "learning_rate": 8.498176981116152e-06, |
| "loss": 0.108, |
| "step": 3520 |
| }, |
| { |
| "epoch": 4.942247112355618, |
| "grad_norm": 1.4841587543487549, |
| "learning_rate": 8.486514744480946e-06, |
| "loss": 0.1514, |
| "step": 3530 |
| }, |
| { |
| "epoch": 4.95624781239062, |
| "grad_norm": 64.86244201660156, |
| "learning_rate": 8.474815471604303e-06, |
| "loss": 0.2551, |
| "step": 3540 |
| }, |
| { |
| "epoch": 4.970248512425622, |
| "grad_norm": 4.625051498413086, |
| "learning_rate": 8.463079286764224e-06, |
| "loss": 0.144, |
| "step": 3550 |
| }, |
| { |
| "epoch": 4.9842492124606235, |
| "grad_norm": 2.651116132736206, |
| "learning_rate": 8.451306314630825e-06, |
| "loss": 0.2379, |
| "step": 3560 |
| }, |
| { |
| "epoch": 4.998249912495625, |
| "grad_norm": 2.087207078933716, |
| "learning_rate": 8.439496680264993e-06, |
| "loss": 0.0657, |
| "step": 3570 |
| }, |
| { |
| "epoch": 4.999649982499125, |
| "eval_f1": 0.8029782235248062, |
| "eval_loss": 0.6280491352081299, |
| "eval_precision": 0.8110370007061343, |
| "eval_recall": 0.8026592022393282, |
| "eval_runtime": 85.6522, |
| "eval_samples_per_second": 16.684, |
| "eval_steps_per_second": 8.348, |
| "step": 3571 |
| }, |
| { |
| "epoch": 5.012250612530626, |
| "grad_norm": 0.1314196139574051, |
| "learning_rate": 8.42765050911707e-06, |
| "loss": 0.0052, |
| "step": 3580 |
| }, |
| { |
| "epoch": 5.026251312565628, |
| "grad_norm": 0.12375902384519577, |
| "learning_rate": 8.41576792702552e-06, |
| "loss": 0.0193, |
| "step": 3590 |
| }, |
| { |
| "epoch": 5.04025201260063, |
| "grad_norm": 1.289878249168396, |
| "learning_rate": 8.403849060215587e-06, |
| "loss": 0.0272, |
| "step": 3600 |
| }, |
| { |
| "epoch": 5.054252712635631, |
| "grad_norm": 12.536774635314941, |
| "learning_rate": 8.391894035297962e-06, |
| "loss": 0.0311, |
| "step": 3610 |
| }, |
| { |
| "epoch": 5.068253412670633, |
| "grad_norm": 11.550432205200195, |
| "learning_rate": 8.379902979267424e-06, |
| "loss": 0.0884, |
| "step": 3620 |
| }, |
| { |
| "epoch": 5.082254112705635, |
| "grad_norm": 0.03312607482075691, |
| "learning_rate": 8.367876019501512e-06, |
| "loss": 0.0515, |
| "step": 3630 |
| }, |
| { |
| "epoch": 5.096254812740637, |
| "grad_norm": 12.821776390075684, |
| "learning_rate": 8.35581328375915e-06, |
| "loss": 0.0622, |
| "step": 3640 |
| }, |
| { |
| "epoch": 5.110255512775638, |
| "grad_norm": 30.363012313842773, |
| "learning_rate": 8.343714900179304e-06, |
| "loss": 0.1507, |
| "step": 3650 |
| }, |
| { |
| "epoch": 5.12425621281064, |
| "grad_norm": 22.729272842407227, |
| "learning_rate": 8.331580997279616e-06, |
| "loss": 0.0806, |
| "step": 3660 |
| }, |
| { |
| "epoch": 5.138256912845642, |
| "grad_norm": 64.62901306152344, |
| "learning_rate": 8.319411703955042e-06, |
| "loss": 0.0506, |
| "step": 3670 |
| }, |
| { |
| "epoch": 5.152257612880644, |
| "grad_norm": 81.08839416503906, |
| "learning_rate": 8.307207149476478e-06, |
| "loss": 0.11, |
| "step": 3680 |
| }, |
| { |
| "epoch": 5.166258312915645, |
| "grad_norm": 0.01688474230468273, |
| "learning_rate": 8.294967463489387e-06, |
| "loss": 0.1072, |
| "step": 3690 |
| }, |
| { |
| "epoch": 5.180259012950647, |
| "grad_norm": 5.698174476623535, |
| "learning_rate": 8.282692776012429e-06, |
| "loss": 0.0356, |
| "step": 3700 |
| }, |
| { |
| "epoch": 5.194259712985649, |
| "grad_norm": 0.30638906359672546, |
| "learning_rate": 8.27038321743607e-06, |
| "loss": 0.1622, |
| "step": 3710 |
| }, |
| { |
| "epoch": 5.208260413020651, |
| "grad_norm": 1.2199918031692505, |
| "learning_rate": 8.258038918521203e-06, |
| "loss": 0.0041, |
| "step": 3720 |
| }, |
| { |
| "epoch": 5.222261113055652, |
| "grad_norm": 5.87195348739624, |
| "learning_rate": 8.24566001039776e-06, |
| "loss": 0.0538, |
| "step": 3730 |
| }, |
| { |
| "epoch": 5.236261813090654, |
| "grad_norm": 69.89470672607422, |
| "learning_rate": 8.233246624563315e-06, |
| "loss": 0.2333, |
| "step": 3740 |
| }, |
| { |
| "epoch": 5.250262513125656, |
| "grad_norm": 0.06884055584669113, |
| "learning_rate": 8.220798892881686e-06, |
| "loss": 0.0731, |
| "step": 3750 |
| }, |
| { |
| "epoch": 5.264263213160658, |
| "grad_norm": 28.682525634765625, |
| "learning_rate": 8.208316947581543e-06, |
| "loss": 0.1084, |
| "step": 3760 |
| }, |
| { |
| "epoch": 5.2782639131956595, |
| "grad_norm": 0.05551927164196968, |
| "learning_rate": 8.19580092125499e-06, |
| "loss": 0.1332, |
| "step": 3770 |
| }, |
| { |
| "epoch": 5.292264613230661, |
| "grad_norm": 26.148853302001953, |
| "learning_rate": 8.183250946856173e-06, |
| "loss": 0.0403, |
| "step": 3780 |
| }, |
| { |
| "epoch": 5.306265313265663, |
| "grad_norm": 0.33171024918556213, |
| "learning_rate": 8.17066715769985e-06, |
| "loss": 0.1941, |
| "step": 3790 |
| }, |
| { |
| "epoch": 5.320266013300665, |
| "grad_norm": 35.20890808105469, |
| "learning_rate": 8.158049687459986e-06, |
| "loss": 0.1546, |
| "step": 3800 |
| }, |
| { |
| "epoch": 5.3342667133356665, |
| "grad_norm": 20.95752716064453, |
| "learning_rate": 8.145398670168336e-06, |
| "loss": 0.1069, |
| "step": 3810 |
| }, |
| { |
| "epoch": 5.348267413370668, |
| "grad_norm": 9.339089393615723, |
| "learning_rate": 8.132714240213009e-06, |
| "loss": 0.0952, |
| "step": 3820 |
| }, |
| { |
| "epoch": 5.36226811340567, |
| "grad_norm": 18.237586975097656, |
| "learning_rate": 8.119996532337047e-06, |
| "loss": 0.0832, |
| "step": 3830 |
| }, |
| { |
| "epoch": 5.376268813440672, |
| "grad_norm": 6.259382247924805, |
| "learning_rate": 8.107245681636997e-06, |
| "loss": 0.0349, |
| "step": 3840 |
| }, |
| { |
| "epoch": 5.3902695134756735, |
| "grad_norm": 2.9350638389587402, |
| "learning_rate": 8.094461823561473e-06, |
| "loss": 0.0669, |
| "step": 3850 |
| }, |
| { |
| "epoch": 5.404270213510675, |
| "grad_norm": 3.0102787017822266, |
| "learning_rate": 8.081645093909715e-06, |
| "loss": 0.0833, |
| "step": 3860 |
| }, |
| { |
| "epoch": 5.418270913545677, |
| "grad_norm": 0.038166593760252, |
| "learning_rate": 8.068795628830148e-06, |
| "loss": 0.1295, |
| "step": 3870 |
| }, |
| { |
| "epoch": 5.432271613580679, |
| "grad_norm": 74.43338012695312, |
| "learning_rate": 8.055913564818938e-06, |
| "loss": 0.089, |
| "step": 3880 |
| }, |
| { |
| "epoch": 5.4462723136156805, |
| "grad_norm": 1.8652153015136719, |
| "learning_rate": 8.042999038718538e-06, |
| "loss": 0.0432, |
| "step": 3890 |
| }, |
| { |
| "epoch": 5.460273013650682, |
| "grad_norm": 1.0438085794448853, |
| "learning_rate": 8.030052187716238e-06, |
| "loss": 0.2206, |
| "step": 3900 |
| }, |
| { |
| "epoch": 5.474273713685684, |
| "grad_norm": 23.055278778076172, |
| "learning_rate": 8.017073149342703e-06, |
| "loss": 0.0681, |
| "step": 3910 |
| }, |
| { |
| "epoch": 5.488274413720686, |
| "grad_norm": 3.861517906188965, |
| "learning_rate": 8.004062061470519e-06, |
| "loss": 0.1237, |
| "step": 3920 |
| }, |
| { |
| "epoch": 5.502275113755688, |
| "grad_norm": 67.68190002441406, |
| "learning_rate": 7.991019062312723e-06, |
| "loss": 0.3206, |
| "step": 3930 |
| }, |
| { |
| "epoch": 5.516275813790689, |
| "grad_norm": 0.18575100600719452, |
| "learning_rate": 7.97794429042134e-06, |
| "loss": 0.0167, |
| "step": 3940 |
| }, |
| { |
| "epoch": 5.530276513825691, |
| "grad_norm": 8.642852783203125, |
| "learning_rate": 7.9648378846859e-06, |
| "loss": 0.2486, |
| "step": 3950 |
| }, |
| { |
| "epoch": 5.544277213860693, |
| "grad_norm": 25.390499114990234, |
| "learning_rate": 7.951699984331973e-06, |
| "loss": 0.0729, |
| "step": 3960 |
| }, |
| { |
| "epoch": 5.558277913895695, |
| "grad_norm": 41.303653717041016, |
| "learning_rate": 7.93853072891969e-06, |
| "loss": 0.0872, |
| "step": 3970 |
| }, |
| { |
| "epoch": 5.572278613930696, |
| "grad_norm": 2.244464874267578, |
| "learning_rate": 7.925330258342261e-06, |
| "loss": 0.0456, |
| "step": 3980 |
| }, |
| { |
| "epoch": 5.586279313965698, |
| "grad_norm": 1.1481804847717285, |
| "learning_rate": 7.912098712824474e-06, |
| "loss": 0.0371, |
| "step": 3990 |
| }, |
| { |
| "epoch": 5.6002800140007, |
| "grad_norm": 51.855377197265625, |
| "learning_rate": 7.89883623292123e-06, |
| "loss": 0.0609, |
| "step": 4000 |
| }, |
| { |
| "epoch": 5.614280714035702, |
| "grad_norm": 36.11799621582031, |
| "learning_rate": 7.885542959516027e-06, |
| "loss": 0.0478, |
| "step": 4010 |
| }, |
| { |
| "epoch": 5.628281414070703, |
| "grad_norm": 0.010757026262581348, |
| "learning_rate": 7.872219033819479e-06, |
| "loss": 0.1272, |
| "step": 4020 |
| }, |
| { |
| "epoch": 5.642282114105705, |
| "grad_norm": 0.05869268625974655, |
| "learning_rate": 7.85886459736781e-06, |
| "loss": 0.0924, |
| "step": 4030 |
| }, |
| { |
| "epoch": 5.656282814140707, |
| "grad_norm": 19.658790588378906, |
| "learning_rate": 7.84547979202135e-06, |
| "loss": 0.1318, |
| "step": 4040 |
| }, |
| { |
| "epoch": 5.670283514175709, |
| "grad_norm": 1.9408730268478394, |
| "learning_rate": 7.832064759963028e-06, |
| "loss": 0.056, |
| "step": 4050 |
| }, |
| { |
| "epoch": 5.68428421421071, |
| "grad_norm": 11.6222562789917, |
| "learning_rate": 7.818619643696863e-06, |
| "loss": 0.0831, |
| "step": 4060 |
| }, |
| { |
| "epoch": 5.698284914245712, |
| "grad_norm": 0.004137192852795124, |
| "learning_rate": 7.805144586046454e-06, |
| "loss": 0.0741, |
| "step": 4070 |
| }, |
| { |
| "epoch": 5.712285614280714, |
| "grad_norm": 26.38702392578125, |
| "learning_rate": 7.791639730153453e-06, |
| "loss": 0.2131, |
| "step": 4080 |
| }, |
| { |
| "epoch": 5.726286314315716, |
| "grad_norm": 22.777111053466797, |
| "learning_rate": 7.778105219476053e-06, |
| "loss": 0.157, |
| "step": 4090 |
| }, |
| { |
| "epoch": 5.740287014350717, |
| "grad_norm": 28.669824600219727, |
| "learning_rate": 7.764541197787462e-06, |
| "loss": 0.1796, |
| "step": 4100 |
| }, |
| { |
| "epoch": 5.754287714385719, |
| "grad_norm": 3.9441118240356445, |
| "learning_rate": 7.750947809174372e-06, |
| "loss": 0.0144, |
| "step": 4110 |
| }, |
| { |
| "epoch": 5.768288414420721, |
| "grad_norm": 1.612039566040039, |
| "learning_rate": 7.737325198035435e-06, |
| "loss": 0.0634, |
| "step": 4120 |
| }, |
| { |
| "epoch": 5.782289114455723, |
| "grad_norm": 31.477649688720703, |
| "learning_rate": 7.723673509079718e-06, |
| "loss": 0.15, |
| "step": 4130 |
| }, |
| { |
| "epoch": 5.7962898144907244, |
| "grad_norm": 21.06825065612793, |
| "learning_rate": 7.709992887325187e-06, |
| "loss": 0.074, |
| "step": 4140 |
| }, |
| { |
| "epoch": 5.810290514525726, |
| "grad_norm": 23.094755172729492, |
| "learning_rate": 7.69628347809714e-06, |
| "loss": 0.089, |
| "step": 4150 |
| }, |
| { |
| "epoch": 5.824291214560728, |
| "grad_norm": 19.415878295898438, |
| "learning_rate": 7.68254542702668e-06, |
| "loss": 0.0734, |
| "step": 4160 |
| }, |
| { |
| "epoch": 5.83829191459573, |
| "grad_norm": 0.020781751722097397, |
| "learning_rate": 7.668778880049167e-06, |
| "loss": 0.0587, |
| "step": 4170 |
| }, |
| { |
| "epoch": 5.8522926146307315, |
| "grad_norm": 48.292869567871094, |
| "learning_rate": 7.654983983402662e-06, |
| "loss": 0.117, |
| "step": 4180 |
| }, |
| { |
| "epoch": 5.866293314665733, |
| "grad_norm": 27.23204803466797, |
| "learning_rate": 7.641160883626374e-06, |
| "loss": 0.1372, |
| "step": 4190 |
| }, |
| { |
| "epoch": 5.880294014700735, |
| "grad_norm": 0.6378940343856812, |
| "learning_rate": 7.627309727559114e-06, |
| "loss": 0.0299, |
| "step": 4200 |
| }, |
| { |
| "epoch": 5.894294714735737, |
| "grad_norm": 0.020519094541668892, |
| "learning_rate": 7.613430662337715e-06, |
| "loss": 0.1332, |
| "step": 4210 |
| }, |
| { |
| "epoch": 5.9082954147707385, |
| "grad_norm": 0.2816191017627716, |
| "learning_rate": 7.599523835395493e-06, |
| "loss": 0.0604, |
| "step": 4220 |
| }, |
| { |
| "epoch": 5.92229611480574, |
| "grad_norm": 0.048931095749139786, |
| "learning_rate": 7.585589394460661e-06, |
| "loss": 0.0944, |
| "step": 4230 |
| }, |
| { |
| "epoch": 5.936296814840742, |
| "grad_norm": 14.277728080749512, |
| "learning_rate": 7.571627487554769e-06, |
| "loss": 0.1221, |
| "step": 4240 |
| }, |
| { |
| "epoch": 5.950297514875744, |
| "grad_norm": 61.147769927978516, |
| "learning_rate": 7.5576382629911306e-06, |
| "loss": 0.2043, |
| "step": 4250 |
| }, |
| { |
| "epoch": 5.9642982149107455, |
| "grad_norm": 61.5135498046875, |
| "learning_rate": 7.543621869373249e-06, |
| "loss": 0.1118, |
| "step": 4260 |
| }, |
| { |
| "epoch": 5.978298914945747, |
| "grad_norm": 0.41074830293655396, |
| "learning_rate": 7.529578455593232e-06, |
| "loss": 0.0869, |
| "step": 4270 |
| }, |
| { |
| "epoch": 5.992299614980749, |
| "grad_norm": 22.36100959777832, |
| "learning_rate": 7.515508170830221e-06, |
| "loss": 0.0938, |
| "step": 4280 |
| }, |
| { |
| "epoch": 5.99929996499825, |
| "eval_f1": 0.8054250784759932, |
| "eval_loss": 0.707984209060669, |
| "eval_precision": 0.8045938711804601, |
| "eval_recall": 0.8054583624912526, |
| "eval_runtime": 85.6965, |
| "eval_samples_per_second": 16.675, |
| "eval_steps_per_second": 8.343, |
| "step": 4285 |
| }, |
| { |
| "epoch": 6.006300315015751, |
| "grad_norm": 2.4087300300598145, |
| "learning_rate": 7.501411164548792e-06, |
| "loss": 0.0317, |
| "step": 4290 |
| }, |
| { |
| "epoch": 6.0203010150507525, |
| "grad_norm": 8.070867538452148, |
| "learning_rate": 7.487287586497384e-06, |
| "loss": 0.0422, |
| "step": 4300 |
| }, |
| { |
| "epoch": 6.034301715085754, |
| "grad_norm": 20.54210090637207, |
| "learning_rate": 7.473137586706693e-06, |
| "loss": 0.0932, |
| "step": 4310 |
| }, |
| { |
| "epoch": 6.048302415120756, |
| "grad_norm": 0.29761630296707153, |
| "learning_rate": 7.458961315488095e-06, |
| "loss": 0.005, |
| "step": 4320 |
| }, |
| { |
| "epoch": 6.062303115155758, |
| "grad_norm": 0.02404841221868992, |
| "learning_rate": 7.444758923432028e-06, |
| "loss": 0.0024, |
| "step": 4330 |
| }, |
| { |
| "epoch": 6.07630381519076, |
| "grad_norm": 0.01488957554101944, |
| "learning_rate": 7.4305305614064145e-06, |
| "loss": 0.0225, |
| "step": 4340 |
| }, |
| { |
| "epoch": 6.090304515225761, |
| "grad_norm": 0.9857441186904907, |
| "learning_rate": 7.416276380555041e-06, |
| "loss": 0.0527, |
| "step": 4350 |
| }, |
| { |
| "epoch": 6.104305215260763, |
| "grad_norm": 13.299368858337402, |
| "learning_rate": 7.401996532295965e-06, |
| "loss": 0.033, |
| "step": 4360 |
| }, |
| { |
| "epoch": 6.118305915295765, |
| "grad_norm": 8.178146362304688, |
| "learning_rate": 7.3876911683198995e-06, |
| "loss": 0.0218, |
| "step": 4370 |
| }, |
| { |
| "epoch": 6.132306615330767, |
| "grad_norm": 0.02896474301815033, |
| "learning_rate": 7.373360440588604e-06, |
| "loss": 0.0457, |
| "step": 4380 |
| }, |
| { |
| "epoch": 6.146307315365768, |
| "grad_norm": 2.2403640747070312, |
| "learning_rate": 7.359004501333267e-06, |
| "loss": 0.0513, |
| "step": 4390 |
| }, |
| { |
| "epoch": 6.16030801540077, |
| "grad_norm": 19.458662033081055, |
| "learning_rate": 7.344623503052898e-06, |
| "loss": 0.0351, |
| "step": 4400 |
| }, |
| { |
| "epoch": 6.174308715435772, |
| "grad_norm": 0.648754358291626, |
| "learning_rate": 7.330217598512696e-06, |
| "loss": 0.0278, |
| "step": 4410 |
| }, |
| { |
| "epoch": 6.188309415470774, |
| "grad_norm": 1.0036877393722534, |
| "learning_rate": 7.315786940742432e-06, |
| "loss": 0.0331, |
| "step": 4420 |
| }, |
| { |
| "epoch": 6.202310115505775, |
| "grad_norm": 72.4751968383789, |
| "learning_rate": 7.301331683034827e-06, |
| "loss": 0.0618, |
| "step": 4430 |
| }, |
| { |
| "epoch": 6.216310815540777, |
| "grad_norm": 0.07326419651508331, |
| "learning_rate": 7.286851978943919e-06, |
| "loss": 0.0361, |
| "step": 4440 |
| }, |
| { |
| "epoch": 6.230311515575779, |
| "grad_norm": 0.35342127084732056, |
| "learning_rate": 7.2723479822834295e-06, |
| "loss": 0.0262, |
| "step": 4450 |
| }, |
| { |
| "epoch": 6.244312215610781, |
| "grad_norm": 0.19052493572235107, |
| "learning_rate": 7.257819847125136e-06, |
| "loss": 0.0653, |
| "step": 4460 |
| }, |
| { |
| "epoch": 6.258312915645782, |
| "grad_norm": 36.13716125488281, |
| "learning_rate": 7.243267727797235e-06, |
| "loss": 0.0784, |
| "step": 4470 |
| }, |
| { |
| "epoch": 6.272313615680784, |
| "grad_norm": 29.920095443725586, |
| "learning_rate": 7.2286917788826926e-06, |
| "loss": 0.0571, |
| "step": 4480 |
| }, |
| { |
| "epoch": 6.286314315715786, |
| "grad_norm": 0.20633961260318756, |
| "learning_rate": 7.214092155217614e-06, |
| "loss": 0.0306, |
| "step": 4490 |
| }, |
| { |
| "epoch": 6.300315015750788, |
| "grad_norm": 1.4535483121871948, |
| "learning_rate": 7.199469011889598e-06, |
| "loss": 0.0185, |
| "step": 4500 |
| }, |
| { |
| "epoch": 6.314315715785789, |
| "grad_norm": 0.011330017820000648, |
| "learning_rate": 7.18482250423608e-06, |
| "loss": 0.0009, |
| "step": 4510 |
| }, |
| { |
| "epoch": 6.328316415820791, |
| "grad_norm": 0.12344878911972046, |
| "learning_rate": 7.170152787842689e-06, |
| "loss": 0.0077, |
| "step": 4520 |
| }, |
| { |
| "epoch": 6.342317115855793, |
| "grad_norm": 0.0827026218175888, |
| "learning_rate": 7.155460018541597e-06, |
| "loss": 0.0199, |
| "step": 4530 |
| }, |
| { |
| "epoch": 6.356317815890795, |
| "grad_norm": 0.08871891349554062, |
| "learning_rate": 7.140744352409856e-06, |
| "loss": 0.0002, |
| "step": 4540 |
| }, |
| { |
| "epoch": 6.3703185159257965, |
| "grad_norm": 18.5615234375, |
| "learning_rate": 7.12600594576775e-06, |
| "loss": 0.0366, |
| "step": 4550 |
| }, |
| { |
| "epoch": 6.384319215960798, |
| "grad_norm": 3.0964789390563965, |
| "learning_rate": 7.1112449551771225e-06, |
| "loss": 0.0077, |
| "step": 4560 |
| }, |
| { |
| "epoch": 6.3983199159958, |
| "grad_norm": 0.0029246637132018805, |
| "learning_rate": 7.096461537439725e-06, |
| "loss": 0.0266, |
| "step": 4570 |
| }, |
| { |
| "epoch": 6.412320616030802, |
| "grad_norm": 2.371020555496216, |
| "learning_rate": 7.0816558495955435e-06, |
| "loss": 0.0496, |
| "step": 4580 |
| }, |
| { |
| "epoch": 6.4263213160658035, |
| "grad_norm": 0.021786697208881378, |
| "learning_rate": 7.066828048921133e-06, |
| "loss": 0.0161, |
| "step": 4590 |
| }, |
| { |
| "epoch": 6.440322016100805, |
| "grad_norm": 21.118539810180664, |
| "learning_rate": 7.051978292927947e-06, |
| "loss": 0.1049, |
| "step": 4600 |
| }, |
| { |
| "epoch": 6.454322716135807, |
| "grad_norm": 56.019474029541016, |
| "learning_rate": 7.0371067393606665e-06, |
| "loss": 0.0605, |
| "step": 4610 |
| }, |
| { |
| "epoch": 6.468323416170809, |
| "grad_norm": 0.010689962655305862, |
| "learning_rate": 7.022213546195516e-06, |
| "loss": 0.088, |
| "step": 4620 |
| }, |
| { |
| "epoch": 6.4823241162058105, |
| "grad_norm": 9.061432838439941, |
| "learning_rate": 7.007298871638597e-06, |
| "loss": 0.0127, |
| "step": 4630 |
| }, |
| { |
| "epoch": 6.496324816240812, |
| "grad_norm": 5.053805351257324, |
| "learning_rate": 6.9923628741242e-06, |
| "loss": 0.0748, |
| "step": 4640 |
| }, |
| { |
| "epoch": 6.510325516275814, |
| "grad_norm": 50.82999038696289, |
| "learning_rate": 6.97740571231312e-06, |
| "loss": 0.1141, |
| "step": 4650 |
| }, |
| { |
| "epoch": 6.524326216310816, |
| "grad_norm": 0.012241682969033718, |
| "learning_rate": 6.96242754509098e-06, |
| "loss": 0.1384, |
| "step": 4660 |
| }, |
| { |
| "epoch": 6.5383269163458175, |
| "grad_norm": 0.007444610353559256, |
| "learning_rate": 6.947428531566531e-06, |
| "loss": 0.0355, |
| "step": 4670 |
| }, |
| { |
| "epoch": 6.552327616380819, |
| "grad_norm": 8.445463180541992, |
| "learning_rate": 6.9324088310699745e-06, |
| "loss": 0.0322, |
| "step": 4680 |
| }, |
| { |
| "epoch": 6.566328316415821, |
| "grad_norm": 0.0021326360292732716, |
| "learning_rate": 6.9173686031512595e-06, |
| "loss": 0.1049, |
| "step": 4690 |
| }, |
| { |
| "epoch": 6.580329016450823, |
| "grad_norm": 61.38804626464844, |
| "learning_rate": 6.902308007578392e-06, |
| "loss": 0.1214, |
| "step": 4700 |
| }, |
| { |
| "epoch": 6.5943297164858246, |
| "grad_norm": 2.3494713306427, |
| "learning_rate": 6.887227204335739e-06, |
| "loss": 0.0139, |
| "step": 4710 |
| }, |
| { |
| "epoch": 6.608330416520826, |
| "grad_norm": 0.03077736310660839, |
| "learning_rate": 6.8721263536223295e-06, |
| "loss": 0.009, |
| "step": 4720 |
| }, |
| { |
| "epoch": 6.622331116555828, |
| "grad_norm": 0.17415045201778412, |
| "learning_rate": 6.857005615850148e-06, |
| "loss": 0.2035, |
| "step": 4730 |
| }, |
| { |
| "epoch": 6.63633181659083, |
| "grad_norm": 1.6760774850845337, |
| "learning_rate": 6.841865151642434e-06, |
| "loss": 0.0292, |
| "step": 4740 |
| }, |
| { |
| "epoch": 6.650332516625832, |
| "grad_norm": 0.017536571249365807, |
| "learning_rate": 6.8267051218319766e-06, |
| "loss": 0.0159, |
| "step": 4750 |
| }, |
| { |
| "epoch": 6.664333216660833, |
| "grad_norm": 31.02813148498535, |
| "learning_rate": 6.8115256874594015e-06, |
| "loss": 0.0545, |
| "step": 4760 |
| }, |
| { |
| "epoch": 6.678333916695835, |
| "grad_norm": 28.042268753051758, |
| "learning_rate": 6.7963270097714705e-06, |
| "loss": 0.2497, |
| "step": 4770 |
| }, |
| { |
| "epoch": 6.692334616730837, |
| "grad_norm": 0.08991402387619019, |
| "learning_rate": 6.781109250219353e-06, |
| "loss": 0.0343, |
| "step": 4780 |
| }, |
| { |
| "epoch": 6.706335316765839, |
| "grad_norm": 16.03205108642578, |
| "learning_rate": 6.765872570456926e-06, |
| "loss": 0.0407, |
| "step": 4790 |
| }, |
| { |
| "epoch": 6.72033601680084, |
| "grad_norm": 0.023787444457411766, |
| "learning_rate": 6.750617132339045e-06, |
| "loss": 0.1305, |
| "step": 4800 |
| }, |
| { |
| "epoch": 6.734336716835842, |
| "grad_norm": 0.12138766795396805, |
| "learning_rate": 6.735343097919838e-06, |
| "loss": 0.0951, |
| "step": 4810 |
| }, |
| { |
| "epoch": 6.748337416870844, |
| "grad_norm": 5.909520626068115, |
| "learning_rate": 6.720050629450963e-06, |
| "loss": 0.0345, |
| "step": 4820 |
| }, |
| { |
| "epoch": 6.762338116905846, |
| "grad_norm": 0.01361538004130125, |
| "learning_rate": 6.704739889379914e-06, |
| "loss": 0.0356, |
| "step": 4830 |
| }, |
| { |
| "epoch": 6.776338816940847, |
| "grad_norm": 0.11465916782617569, |
| "learning_rate": 6.689411040348267e-06, |
| "loss": 0.0626, |
| "step": 4840 |
| }, |
| { |
| "epoch": 6.790339516975849, |
| "grad_norm": 0.06471576541662216, |
| "learning_rate": 6.674064245189969e-06, |
| "loss": 0.085, |
| "step": 4850 |
| }, |
| { |
| "epoch": 6.804340217010851, |
| "grad_norm": 0.16323409974575043, |
| "learning_rate": 6.6586996669296014e-06, |
| "loss": 0.0755, |
| "step": 4860 |
| }, |
| { |
| "epoch": 6.818340917045853, |
| "grad_norm": 0.5551852583885193, |
| "learning_rate": 6.6433174687806525e-06, |
| "loss": 0.0152, |
| "step": 4870 |
| }, |
| { |
| "epoch": 6.832341617080854, |
| "grad_norm": 32.66178512573242, |
| "learning_rate": 6.62791781414378e-06, |
| "loss": 0.0479, |
| "step": 4880 |
| }, |
| { |
| "epoch": 6.846342317115856, |
| "grad_norm": 0.015004020184278488, |
| "learning_rate": 6.612500866605078e-06, |
| "loss": 0.0181, |
| "step": 4890 |
| }, |
| { |
| "epoch": 6.860343017150858, |
| "grad_norm": 42.09051513671875, |
| "learning_rate": 6.597066789934336e-06, |
| "loss": 0.078, |
| "step": 4900 |
| }, |
| { |
| "epoch": 6.87434371718586, |
| "grad_norm": 13.405208587646484, |
| "learning_rate": 6.581615748083306e-06, |
| "loss": 0.0522, |
| "step": 4910 |
| }, |
| { |
| "epoch": 6.8883444172208605, |
| "grad_norm": 58.45531463623047, |
| "learning_rate": 6.56614790518395e-06, |
| "loss": 0.0454, |
| "step": 4920 |
| }, |
| { |
| "epoch": 6.902345117255862, |
| "grad_norm": 24.918203353881836, |
| "learning_rate": 6.5506634255467085e-06, |
| "loss": 0.0912, |
| "step": 4930 |
| }, |
| { |
| "epoch": 6.916345817290864, |
| "grad_norm": 0.018271498382091522, |
| "learning_rate": 6.5351624736587446e-06, |
| "loss": 0.0156, |
| "step": 4940 |
| }, |
| { |
| "epoch": 6.930346517325866, |
| "grad_norm": 0.018698183819651604, |
| "learning_rate": 6.5196452141822045e-06, |
| "loss": 0.1512, |
| "step": 4950 |
| }, |
| { |
| "epoch": 6.944347217360868, |
| "grad_norm": 24.42111587524414, |
| "learning_rate": 6.504111811952463e-06, |
| "loss": 0.0333, |
| "step": 4960 |
| }, |
| { |
| "epoch": 6.958347917395869, |
| "grad_norm": 0.0020534696523100138, |
| "learning_rate": 6.488562431976376e-06, |
| "loss": 0.0105, |
| "step": 4970 |
| }, |
| { |
| "epoch": 6.972348617430871, |
| "grad_norm": 0.06145526468753815, |
| "learning_rate": 6.472997239430529e-06, |
| "loss": 0.0791, |
| "step": 4980 |
| }, |
| { |
| "epoch": 6.986349317465873, |
| "grad_norm": 0.0036424114368855953, |
| "learning_rate": 6.457416399659472e-06, |
| "loss": 0.0713, |
| "step": 4990 |
| }, |
| { |
| "epoch": 6.998949947497374, |
| "eval_f1": 0.8174142102409481, |
| "eval_loss": 0.6825958490371704, |
| "eval_precision": 0.8189024398260805, |
| "eval_recall": 0.8173547935619314, |
| "eval_runtime": 85.6833, |
| "eval_samples_per_second": 16.678, |
| "eval_steps_per_second": 8.345, |
| "step": 4999 |
| }, |
| { |
| "epoch": 7.000350017500875, |
| "grad_norm": 3.2184433937072754, |
| "learning_rate": 6.441820078173979e-06, |
| "loss": 0.1252, |
| "step": 5000 |
| }, |
| { |
| "epoch": 7.014350717535876, |
| "grad_norm": 0.02290227822959423, |
| "learning_rate": 6.426208440649278e-06, |
| "loss": 0.0076, |
| "step": 5010 |
| }, |
| { |
| "epoch": 7.028351417570878, |
| "grad_norm": 0.8488892912864685, |
| "learning_rate": 6.410581652923298e-06, |
| "loss": 0.0142, |
| "step": 5020 |
| }, |
| { |
| "epoch": 7.04235211760588, |
| "grad_norm": 6.883611679077148, |
| "learning_rate": 6.394939880994899e-06, |
| "loss": 0.0133, |
| "step": 5030 |
| }, |
| { |
| "epoch": 7.056352817640882, |
| "grad_norm": 0.7044736742973328, |
| "learning_rate": 6.379283291022118e-06, |
| "loss": 0.0148, |
| "step": 5040 |
| }, |
| { |
| "epoch": 7.070353517675883, |
| "grad_norm": 8.82105827331543, |
| "learning_rate": 6.363612049320398e-06, |
| "loss": 0.0019, |
| "step": 5050 |
| }, |
| { |
| "epoch": 7.084354217710885, |
| "grad_norm": 0.06456708908081055, |
| "learning_rate": 6.347926322360825e-06, |
| "loss": 0.0021, |
| "step": 5060 |
| }, |
| { |
| "epoch": 7.098354917745887, |
| "grad_norm": 0.15175148844718933, |
| "learning_rate": 6.3322262767683564e-06, |
| "loss": 0.0186, |
| "step": 5070 |
| }, |
| { |
| "epoch": 7.112355617780889, |
| "grad_norm": 0.7544161677360535, |
| "learning_rate": 6.31651207932005e-06, |
| "loss": 0.0667, |
| "step": 5080 |
| }, |
| { |
| "epoch": 7.12635631781589, |
| "grad_norm": 1.5102256536483765, |
| "learning_rate": 6.300783896943299e-06, |
| "loss": 0.0091, |
| "step": 5090 |
| }, |
| { |
| "epoch": 7.140357017850892, |
| "grad_norm": 0.09220393002033234, |
| "learning_rate": 6.285041896714052e-06, |
| "loss": 0.0247, |
| "step": 5100 |
| }, |
| { |
| "epoch": 7.154357717885894, |
| "grad_norm": 31.779438018798828, |
| "learning_rate": 6.269286245855039e-06, |
| "loss": 0.0128, |
| "step": 5110 |
| }, |
| { |
| "epoch": 7.168358417920896, |
| "grad_norm": 0.2819765508174896, |
| "learning_rate": 6.253517111734004e-06, |
| "loss": 0.0381, |
| "step": 5120 |
| }, |
| { |
| "epoch": 7.182359117955897, |
| "grad_norm": 0.007849560119211674, |
| "learning_rate": 6.237734661861909e-06, |
| "loss": 0.0311, |
| "step": 5130 |
| }, |
| { |
| "epoch": 7.196359817990899, |
| "grad_norm": 0.43997815251350403, |
| "learning_rate": 6.221939063891176e-06, |
| "loss": 0.0156, |
| "step": 5140 |
| }, |
| { |
| "epoch": 7.210360518025901, |
| "grad_norm": 0.0021186801604926586, |
| "learning_rate": 6.206130485613887e-06, |
| "loss": 0.0042, |
| "step": 5150 |
| }, |
| { |
| "epoch": 7.224361218060903, |
| "grad_norm": 32.345211029052734, |
| "learning_rate": 6.1903090949600144e-06, |
| "loss": 0.1152, |
| "step": 5160 |
| }, |
| { |
| "epoch": 7.2383619180959045, |
| "grad_norm": 22.896106719970703, |
| "learning_rate": 6.1744750599956315e-06, |
| "loss": 0.0107, |
| "step": 5170 |
| }, |
| { |
| "epoch": 7.252362618130906, |
| "grad_norm": 2.5360770225524902, |
| "learning_rate": 6.158628548921132e-06, |
| "loss": 0.0306, |
| "step": 5180 |
| }, |
| { |
| "epoch": 7.266363318165908, |
| "grad_norm": 0.03135138005018234, |
| "learning_rate": 6.142769730069431e-06, |
| "loss": 0.0608, |
| "step": 5190 |
| }, |
| { |
| "epoch": 7.28036401820091, |
| "grad_norm": 0.004951399751007557, |
| "learning_rate": 6.126898771904197e-06, |
| "loss": 0.108, |
| "step": 5200 |
| }, |
| { |
| "epoch": 7.2943647182359115, |
| "grad_norm": 0.04001991078257561, |
| "learning_rate": 6.11101584301804e-06, |
| "loss": 0.015, |
| "step": 5210 |
| }, |
| { |
| "epoch": 7.308365418270913, |
| "grad_norm": 0.25486335158348083, |
| "learning_rate": 6.095121112130742e-06, |
| "loss": 0.0409, |
| "step": 5220 |
| }, |
| { |
| "epoch": 7.322366118305915, |
| "grad_norm": 5.508518695831299, |
| "learning_rate": 6.079214748087444e-06, |
| "loss": 0.0025, |
| "step": 5230 |
| }, |
| { |
| "epoch": 7.336366818340917, |
| "grad_norm": 1.9795469045639038, |
| "learning_rate": 6.063296919856872e-06, |
| "loss": 0.0701, |
| "step": 5240 |
| }, |
| { |
| "epoch": 7.3503675183759185, |
| "grad_norm": 0.009440050460398197, |
| "learning_rate": 6.047367796529523e-06, |
| "loss": 0.0094, |
| "step": 5250 |
| }, |
| { |
| "epoch": 7.36436821841092, |
| "grad_norm": 0.03409438207745552, |
| "learning_rate": 6.031427547315889e-06, |
| "loss": 0.0715, |
| "step": 5260 |
| }, |
| { |
| "epoch": 7.378368918445922, |
| "grad_norm": 0.0017209186917170882, |
| "learning_rate": 6.0154763415446395e-06, |
| "loss": 0.0754, |
| "step": 5270 |
| }, |
| { |
| "epoch": 7.392369618480924, |
| "grad_norm": 0.07516732811927795, |
| "learning_rate": 5.9995143486608406e-06, |
| "loss": 0.0008, |
| "step": 5280 |
| }, |
| { |
| "epoch": 7.4063703185159255, |
| "grad_norm": 0.0012286275159567595, |
| "learning_rate": 5.983541738224141e-06, |
| "loss": 0.0191, |
| "step": 5290 |
| }, |
| { |
| "epoch": 7.420371018550927, |
| "grad_norm": 0.16271395981311798, |
| "learning_rate": 5.967558679906981e-06, |
| "loss": 0.0349, |
| "step": 5300 |
| }, |
| { |
| "epoch": 7.434371718585929, |
| "grad_norm": 28.011831283569336, |
| "learning_rate": 5.951565343492779e-06, |
| "loss": 0.0093, |
| "step": 5310 |
| }, |
| { |
| "epoch": 7.448372418620931, |
| "grad_norm": 0.0033658405300229788, |
| "learning_rate": 5.935561898874142e-06, |
| "loss": 0.0241, |
| "step": 5320 |
| }, |
| { |
| "epoch": 7.462373118655933, |
| "grad_norm": 0.0023493689950555563, |
| "learning_rate": 5.91954851605105e-06, |
| "loss": 0.043, |
| "step": 5330 |
| }, |
| { |
| "epoch": 7.476373818690934, |
| "grad_norm": 0.00039932539220899343, |
| "learning_rate": 5.9035253651290555e-06, |
| "loss": 0.0009, |
| "step": 5340 |
| }, |
| { |
| "epoch": 7.490374518725936, |
| "grad_norm": 0.0065233842469751835, |
| "learning_rate": 5.887492616317471e-06, |
| "loss": 0.0088, |
| "step": 5350 |
| }, |
| { |
| "epoch": 7.504375218760938, |
| "grad_norm": 0.0013134811306372285, |
| "learning_rate": 5.87145043992757e-06, |
| "loss": 0.0189, |
| "step": 5360 |
| }, |
| { |
| "epoch": 7.51837591879594, |
| "grad_norm": 28.32547950744629, |
| "learning_rate": 5.855399006370766e-06, |
| "loss": 0.0137, |
| "step": 5370 |
| }, |
| { |
| "epoch": 7.532376618830941, |
| "grad_norm": 0.007662694435566664, |
| "learning_rate": 5.839338486156812e-06, |
| "loss": 0.0001, |
| "step": 5380 |
| }, |
| { |
| "epoch": 7.546377318865943, |
| "grad_norm": 0.18727760016918182, |
| "learning_rate": 5.8232690498919906e-06, |
| "loss": 0.0743, |
| "step": 5390 |
| }, |
| { |
| "epoch": 7.560378018900945, |
| "grad_norm": 0.011461739428341389, |
| "learning_rate": 5.80719086827729e-06, |
| "loss": 0.0051, |
| "step": 5400 |
| }, |
| { |
| "epoch": 7.574378718935947, |
| "grad_norm": 1.8720335960388184, |
| "learning_rate": 5.7911041121066e-06, |
| "loss": 0.0027, |
| "step": 5410 |
| }, |
| { |
| "epoch": 7.588379418970948, |
| "grad_norm": 0.2441912293434143, |
| "learning_rate": 5.775008952264897e-06, |
| "loss": 0.0432, |
| "step": 5420 |
| }, |
| { |
| "epoch": 7.60238011900595, |
| "grad_norm": 0.01594419591128826, |
| "learning_rate": 5.7589055597264235e-06, |
| "loss": 0.0138, |
| "step": 5430 |
| }, |
| { |
| "epoch": 7.616380819040952, |
| "grad_norm": 3.6038880348205566, |
| "learning_rate": 5.742794105552879e-06, |
| "loss": 0.0211, |
| "step": 5440 |
| }, |
| { |
| "epoch": 7.630381519075954, |
| "grad_norm": 0.05175204947590828, |
| "learning_rate": 5.726674760891599e-06, |
| "loss": 0.0398, |
| "step": 5450 |
| }, |
| { |
| "epoch": 7.644382219110955, |
| "grad_norm": 0.30954572558403015, |
| "learning_rate": 5.71054769697374e-06, |
| "loss": 0.0859, |
| "step": 5460 |
| }, |
| { |
| "epoch": 7.658382919145957, |
| "grad_norm": 6.447484016418457, |
| "learning_rate": 5.694413085112448e-06, |
| "loss": 0.0376, |
| "step": 5470 |
| }, |
| { |
| "epoch": 7.672383619180959, |
| "grad_norm": 0.004400278907269239, |
| "learning_rate": 5.678271096701059e-06, |
| "loss": 0.0112, |
| "step": 5480 |
| }, |
| { |
| "epoch": 7.686384319215961, |
| "grad_norm": 0.009013152681291103, |
| "learning_rate": 5.662121903211265e-06, |
| "loss": 0.0751, |
| "step": 5490 |
| }, |
| { |
| "epoch": 7.700385019250962, |
| "grad_norm": 16.201181411743164, |
| "learning_rate": 5.645965676191294e-06, |
| "loss": 0.0557, |
| "step": 5500 |
| }, |
| { |
| "epoch": 7.714385719285964, |
| "grad_norm": 0.002877170220017433, |
| "learning_rate": 5.62980258726409e-06, |
| "loss": 0.0036, |
| "step": 5510 |
| }, |
| { |
| "epoch": 7.728386419320966, |
| "grad_norm": 0.004419579636305571, |
| "learning_rate": 5.6136328081254874e-06, |
| "loss": 0.0293, |
| "step": 5520 |
| }, |
| { |
| "epoch": 7.742387119355968, |
| "grad_norm": 0.005260075442492962, |
| "learning_rate": 5.597456510542395e-06, |
| "loss": 0.0313, |
| "step": 5530 |
| }, |
| { |
| "epoch": 7.756387819390969, |
| "grad_norm": 3.496971368789673, |
| "learning_rate": 5.581273866350955e-06, |
| "loss": 0.0377, |
| "step": 5540 |
| }, |
| { |
| "epoch": 7.770388519425971, |
| "grad_norm": 0.009419528767466545, |
| "learning_rate": 5.565085047454737e-06, |
| "loss": 0.0366, |
| "step": 5550 |
| }, |
| { |
| "epoch": 7.784389219460973, |
| "grad_norm": 0.004169847816228867, |
| "learning_rate": 5.548890225822896e-06, |
| "loss": 0.0304, |
| "step": 5560 |
| }, |
| { |
| "epoch": 7.798389919495975, |
| "grad_norm": 0.3091202974319458, |
| "learning_rate": 5.53268957348836e-06, |
| "loss": 0.0034, |
| "step": 5570 |
| }, |
| { |
| "epoch": 7.8123906195309765, |
| "grad_norm": 0.6783474683761597, |
| "learning_rate": 5.5164832625459865e-06, |
| "loss": 0.0056, |
| "step": 5580 |
| }, |
| { |
| "epoch": 7.826391319565978, |
| "grad_norm": 0.16367115080356598, |
| "learning_rate": 5.500271465150748e-06, |
| "loss": 0.0757, |
| "step": 5590 |
| }, |
| { |
| "epoch": 7.84039201960098, |
| "grad_norm": 0.19091859459877014, |
| "learning_rate": 5.484054353515896e-06, |
| "loss": 0.0141, |
| "step": 5600 |
| }, |
| { |
| "epoch": 7.854392719635982, |
| "grad_norm": 3.1218807697296143, |
| "learning_rate": 5.467832099911135e-06, |
| "loss": 0.0024, |
| "step": 5610 |
| }, |
| { |
| "epoch": 7.8683934196709835, |
| "grad_norm": 38.620418548583984, |
| "learning_rate": 5.451604876660787e-06, |
| "loss": 0.0546, |
| "step": 5620 |
| }, |
| { |
| "epoch": 7.882394119705985, |
| "grad_norm": 17.191007614135742, |
| "learning_rate": 5.435372856141975e-06, |
| "loss": 0.0388, |
| "step": 5630 |
| }, |
| { |
| "epoch": 7.896394819740987, |
| "grad_norm": 0.049534134566783905, |
| "learning_rate": 5.4191362107827704e-06, |
| "loss": 0.0471, |
| "step": 5640 |
| }, |
| { |
| "epoch": 7.910395519775989, |
| "grad_norm": 0.07554405927658081, |
| "learning_rate": 5.402895113060379e-06, |
| "loss": 0.0097, |
| "step": 5650 |
| }, |
| { |
| "epoch": 7.9243962198109905, |
| "grad_norm": 10.603336334228516, |
| "learning_rate": 5.3866497354993e-06, |
| "loss": 0.0395, |
| "step": 5660 |
| }, |
| { |
| "epoch": 7.938396919845992, |
| "grad_norm": 10.075541496276855, |
| "learning_rate": 5.370400250669504e-06, |
| "loss": 0.0305, |
| "step": 5670 |
| }, |
| { |
| "epoch": 7.952397619880994, |
| "grad_norm": 0.4186045825481415, |
| "learning_rate": 5.354146831184579e-06, |
| "loss": 0.0068, |
| "step": 5680 |
| }, |
| { |
| "epoch": 7.966398319915996, |
| "grad_norm": 0.040065351873636246, |
| "learning_rate": 5.337889649699921e-06, |
| "loss": 0.0233, |
| "step": 5690 |
| }, |
| { |
| "epoch": 7.9803990199509975, |
| "grad_norm": 0.0028217558283358812, |
| "learning_rate": 5.3216288789108805e-06, |
| "loss": 0.0021, |
| "step": 5700 |
| }, |
| { |
| "epoch": 7.994399719985999, |
| "grad_norm": 26.778573989868164, |
| "learning_rate": 5.305364691550944e-06, |
| "loss": 0.1025, |
| "step": 5710 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_f1": 0.8188966340884549, |
| "eval_loss": 0.7210356593132019, |
| "eval_precision": 0.8224692809382289, |
| "eval_recall": 0.8187543736878936, |
| "eval_runtime": 85.9063, |
| "eval_samples_per_second": 16.634, |
| "eval_steps_per_second": 8.323, |
| "step": 5714 |
| }, |
| { |
| "epoch": 8.008400420021001, |
| "grad_norm": 0.029206441715359688, |
| "learning_rate": 5.289097260389881e-06, |
| "loss": 0.0313, |
| "step": 5720 |
| }, |
| { |
| "epoch": 8.022401120056003, |
| "grad_norm": 0.0030094946268945932, |
| "learning_rate": 5.2728267582319325e-06, |
| "loss": 0.0003, |
| "step": 5730 |
| }, |
| { |
| "epoch": 8.036401820091005, |
| "grad_norm": 0.002360534854233265, |
| "learning_rate": 5.2565533579139484e-06, |
| "loss": 0.0031, |
| "step": 5740 |
| }, |
| { |
| "epoch": 8.050402520126006, |
| "grad_norm": 0.5710445046424866, |
| "learning_rate": 5.240277232303574e-06, |
| "loss": 0.0168, |
| "step": 5750 |
| }, |
| { |
| "epoch": 8.064403220161008, |
| "grad_norm": 48.347259521484375, |
| "learning_rate": 5.2239985542974e-06, |
| "loss": 0.0518, |
| "step": 5760 |
| }, |
| { |
| "epoch": 8.07840392019601, |
| "grad_norm": 0.02666153386235237, |
| "learning_rate": 5.207717496819134e-06, |
| "loss": 0.0805, |
| "step": 5770 |
| }, |
| { |
| "epoch": 8.092404620231012, |
| "grad_norm": 1.3221057653427124, |
| "learning_rate": 5.191434232817753e-06, |
| "loss": 0.0019, |
| "step": 5780 |
| }, |
| { |
| "epoch": 8.106405320266013, |
| "grad_norm": 0.660656750202179, |
| "learning_rate": 5.1751489352656846e-06, |
| "loss": 0.0002, |
| "step": 5790 |
| }, |
| { |
| "epoch": 8.120406020301015, |
| "grad_norm": 0.0018193651922047138, |
| "learning_rate": 5.158861777156947e-06, |
| "loss": 0.0324, |
| "step": 5800 |
| }, |
| { |
| "epoch": 8.134406720336017, |
| "grad_norm": 0.34088951349258423, |
| "learning_rate": 5.14257293150533e-06, |
| "loss": 0.0005, |
| "step": 5810 |
| }, |
| { |
| "epoch": 8.148407420371019, |
| "grad_norm": 0.0037190490402281284, |
| "learning_rate": 5.126282571342547e-06, |
| "loss": 0.0036, |
| "step": 5820 |
| }, |
| { |
| "epoch": 8.16240812040602, |
| "grad_norm": 0.08573169261217117, |
| "learning_rate": 5.109990869716398e-06, |
| "loss": 0.0487, |
| "step": 5830 |
| }, |
| { |
| "epoch": 8.176408820441022, |
| "grad_norm": 18.21436882019043, |
| "learning_rate": 5.093697999688934e-06, |
| "loss": 0.0045, |
| "step": 5840 |
| }, |
| { |
| "epoch": 8.190409520476024, |
| "grad_norm": 0.003121648682281375, |
| "learning_rate": 5.077404134334623e-06, |
| "loss": 0.0012, |
| "step": 5850 |
| }, |
| { |
| "epoch": 8.204410220511026, |
| "grad_norm": 0.43288281559944153, |
| "learning_rate": 5.061109446738496e-06, |
| "loss": 0.0002, |
| "step": 5860 |
| }, |
| { |
| "epoch": 8.218410920546027, |
| "grad_norm": 1.0953173637390137, |
| "learning_rate": 5.044814109994327e-06, |
| "loss": 0.0333, |
| "step": 5870 |
| }, |
| { |
| "epoch": 8.23241162058103, |
| "grad_norm": 0.018074065446853638, |
| "learning_rate": 5.028518297202781e-06, |
| "loss": 0.0003, |
| "step": 5880 |
| }, |
| { |
| "epoch": 8.246412320616031, |
| "grad_norm": 0.1864849030971527, |
| "learning_rate": 5.0122221814695815e-06, |
| "loss": 0.0151, |
| "step": 5890 |
| }, |
| { |
| "epoch": 8.260413020651033, |
| "grad_norm": 0.0006015441031195223, |
| "learning_rate": 4.99592593590367e-06, |
| "loss": 0.0494, |
| "step": 5900 |
| }, |
| { |
| "epoch": 8.274413720686034, |
| "grad_norm": 0.00025341002037748694, |
| "learning_rate": 4.9796297336153685e-06, |
| "loss": 0.072, |
| "step": 5910 |
| }, |
| { |
| "epoch": 8.288414420721036, |
| "grad_norm": 0.022668078541755676, |
| "learning_rate": 4.963333747714536e-06, |
| "loss": 0.0003, |
| "step": 5920 |
| }, |
| { |
| "epoch": 8.302415120756038, |
| "grad_norm": 1.5527633428573608, |
| "learning_rate": 4.947038151308735e-06, |
| "loss": 0.0115, |
| "step": 5930 |
| }, |
| { |
| "epoch": 8.31641582079104, |
| "grad_norm": 0.057844631373882294, |
| "learning_rate": 4.930743117501393e-06, |
| "loss": 0.0574, |
| "step": 5940 |
| }, |
| { |
| "epoch": 8.330416520826041, |
| "grad_norm": 34.13492202758789, |
| "learning_rate": 4.9144488193899546e-06, |
| "loss": 0.018, |
| "step": 5950 |
| }, |
| { |
| "epoch": 8.344417220861043, |
| "grad_norm": 0.12001439183950424, |
| "learning_rate": 4.898155430064056e-06, |
| "loss": 0.0023, |
| "step": 5960 |
| }, |
| { |
| "epoch": 8.358417920896045, |
| "grad_norm": 0.058995530009269714, |
| "learning_rate": 4.881863122603675e-06, |
| "loss": 0.0036, |
| "step": 5970 |
| }, |
| { |
| "epoch": 8.372418620931047, |
| "grad_norm": 0.050912320613861084, |
| "learning_rate": 4.865572070077298e-06, |
| "loss": 0.0056, |
| "step": 5980 |
| }, |
| { |
| "epoch": 8.386419320966048, |
| "grad_norm": 0.03144632279872894, |
| "learning_rate": 4.849282445540085e-06, |
| "loss": 0.0048, |
| "step": 5990 |
| }, |
| { |
| "epoch": 8.40042002100105, |
| "grad_norm": 46.16802978515625, |
| "learning_rate": 4.832994422032022e-06, |
| "loss": 0.0472, |
| "step": 6000 |
| }, |
| { |
| "epoch": 8.414420721036052, |
| "grad_norm": 0.6442742943763733, |
| "learning_rate": 4.816708172576088e-06, |
| "loss": 0.0116, |
| "step": 6010 |
| }, |
| { |
| "epoch": 8.428421421071054, |
| "grad_norm": 0.01641531102359295, |
| "learning_rate": 4.800423870176417e-06, |
| "loss": 0.0012, |
| "step": 6020 |
| }, |
| { |
| "epoch": 8.442422121106055, |
| "grad_norm": 1.5970861911773682, |
| "learning_rate": 4.7841416878164625e-06, |
| "loss": 0.0004, |
| "step": 6030 |
| }, |
| { |
| "epoch": 8.456422821141057, |
| "grad_norm": 0.002396646188572049, |
| "learning_rate": 4.767861798457157e-06, |
| "loss": 0.0147, |
| "step": 6040 |
| }, |
| { |
| "epoch": 8.470423521176059, |
| "grad_norm": 25.83795166015625, |
| "learning_rate": 4.751584375035071e-06, |
| "loss": 0.0144, |
| "step": 6050 |
| }, |
| { |
| "epoch": 8.48442422121106, |
| "grad_norm": 0.007508122827857733, |
| "learning_rate": 4.735309590460585e-06, |
| "loss": 0.0458, |
| "step": 6060 |
| }, |
| { |
| "epoch": 8.498424921246063, |
| "grad_norm": 0.05192435532808304, |
| "learning_rate": 4.719037617616044e-06, |
| "loss": 0.0003, |
| "step": 6070 |
| }, |
| { |
| "epoch": 8.512425621281064, |
| "grad_norm": 0.001271701417863369, |
| "learning_rate": 4.702768629353928e-06, |
| "loss": 0.0279, |
| "step": 6080 |
| }, |
| { |
| "epoch": 8.526426321316066, |
| "grad_norm": 0.016449235379695892, |
| "learning_rate": 4.686502798495009e-06, |
| "loss": 0.0243, |
| "step": 6090 |
| }, |
| { |
| "epoch": 8.540427021351068, |
| "grad_norm": 0.006645840592682362, |
| "learning_rate": 4.6702402978265235e-06, |
| "loss": 0.0442, |
| "step": 6100 |
| }, |
| { |
| "epoch": 8.55442772138607, |
| "grad_norm": 0.001772402785718441, |
| "learning_rate": 4.6539813001003295e-06, |
| "loss": 0.0742, |
| "step": 6110 |
| }, |
| { |
| "epoch": 8.568428421421071, |
| "grad_norm": 0.02117299474775791, |
| "learning_rate": 4.637725978031072e-06, |
| "loss": 0.0005, |
| "step": 6120 |
| }, |
| { |
| "epoch": 8.582429121456073, |
| "grad_norm": 63.013153076171875, |
| "learning_rate": 4.621474504294358e-06, |
| "loss": 0.0331, |
| "step": 6130 |
| }, |
| { |
| "epoch": 8.596429821491075, |
| "grad_norm": 0.011541690677404404, |
| "learning_rate": 4.605227051524904e-06, |
| "loss": 0.0025, |
| "step": 6140 |
| }, |
| { |
| "epoch": 8.610430521526077, |
| "grad_norm": 28.012697219848633, |
| "learning_rate": 4.588983792314723e-06, |
| "loss": 0.0134, |
| "step": 6150 |
| }, |
| { |
| "epoch": 8.624431221561078, |
| "grad_norm": 0.0013819055166095495, |
| "learning_rate": 4.572744899211275e-06, |
| "loss": 0.0053, |
| "step": 6160 |
| }, |
| { |
| "epoch": 8.63843192159608, |
| "grad_norm": 0.22334392368793488, |
| "learning_rate": 4.5565105447156425e-06, |
| "loss": 0.0799, |
| "step": 6170 |
| }, |
| { |
| "epoch": 8.652432621631082, |
| "grad_norm": 0.005143929738551378, |
| "learning_rate": 4.540280901280696e-06, |
| "loss": 0.0098, |
| "step": 6180 |
| }, |
| { |
| "epoch": 8.666433321666084, |
| "grad_norm": 0.0026814427692443132, |
| "learning_rate": 4.524056141309259e-06, |
| "loss": 0.0006, |
| "step": 6190 |
| }, |
| { |
| "epoch": 8.680434021701085, |
| "grad_norm": 0.000591717311181128, |
| "learning_rate": 4.5078364371522815e-06, |
| "loss": 0.0166, |
| "step": 6200 |
| }, |
| { |
| "epoch": 8.694434721736087, |
| "grad_norm": 1.369355320930481, |
| "learning_rate": 4.49162196110701e-06, |
| "loss": 0.0081, |
| "step": 6210 |
| }, |
| { |
| "epoch": 8.708435421771089, |
| "grad_norm": 0.008222399279475212, |
| "learning_rate": 4.4754128854151465e-06, |
| "loss": 0.0001, |
| "step": 6220 |
| }, |
| { |
| "epoch": 8.72243612180609, |
| "grad_norm": 0.47795143723487854, |
| "learning_rate": 4.459209382261034e-06, |
| "loss": 0.0026, |
| "step": 6230 |
| }, |
| { |
| "epoch": 8.736436821841092, |
| "grad_norm": 0.0339123010635376, |
| "learning_rate": 4.4430116237698166e-06, |
| "loss": 0.0054, |
| "step": 6240 |
| }, |
| { |
| "epoch": 8.750437521876094, |
| "grad_norm": 0.007429391611367464, |
| "learning_rate": 4.426819782005614e-06, |
| "loss": 0.0001, |
| "step": 6250 |
| }, |
| { |
| "epoch": 8.764438221911096, |
| "grad_norm": 0.0037332987412810326, |
| "learning_rate": 4.410634028969698e-06, |
| "loss": 0.0471, |
| "step": 6260 |
| }, |
| { |
| "epoch": 8.778438921946098, |
| "grad_norm": 0.004143583122640848, |
| "learning_rate": 4.394454536598655e-06, |
| "loss": 0.0, |
| "step": 6270 |
| }, |
| { |
| "epoch": 8.7924396219811, |
| "grad_norm": 6.966057777404785, |
| "learning_rate": 4.3782814767625755e-06, |
| "loss": 0.0122, |
| "step": 6280 |
| }, |
| { |
| "epoch": 8.806440322016101, |
| "grad_norm": 0.0005727079114876688, |
| "learning_rate": 4.362115021263207e-06, |
| "loss": 0.0, |
| "step": 6290 |
| }, |
| { |
| "epoch": 8.820441022051103, |
| "grad_norm": 0.0036027561873197556, |
| "learning_rate": 4.345955341832156e-06, |
| "loss": 0.0012, |
| "step": 6300 |
| }, |
| { |
| "epoch": 8.834441722086105, |
| "grad_norm": 0.0872088223695755, |
| "learning_rate": 4.329802610129031e-06, |
| "loss": 0.0034, |
| "step": 6310 |
| }, |
| { |
| "epoch": 8.848442422121106, |
| "grad_norm": 0.018417010083794594, |
| "learning_rate": 4.313656997739651e-06, |
| "loss": 0.0, |
| "step": 6320 |
| }, |
| { |
| "epoch": 8.862443122156108, |
| "grad_norm": 0.007588675711303949, |
| "learning_rate": 4.297518676174205e-06, |
| "loss": 0.0506, |
| "step": 6330 |
| }, |
| { |
| "epoch": 8.87644382219111, |
| "grad_norm": 0.11294496059417725, |
| "learning_rate": 4.281387816865431e-06, |
| "loss": 0.0137, |
| "step": 6340 |
| }, |
| { |
| "epoch": 8.890444522226112, |
| "grad_norm": 0.0062033189460635185, |
| "learning_rate": 4.2652645911668e-06, |
| "loss": 0.0092, |
| "step": 6350 |
| }, |
| { |
| "epoch": 8.904445222261113, |
| "grad_norm": 0.3065476715564728, |
| "learning_rate": 4.249149170350689e-06, |
| "loss": 0.0005, |
| "step": 6360 |
| }, |
| { |
| "epoch": 8.918445922296115, |
| "grad_norm": 23.0545597076416, |
| "learning_rate": 4.233041725606573e-06, |
| "loss": 0.0876, |
| "step": 6370 |
| }, |
| { |
| "epoch": 8.932446622331117, |
| "grad_norm": 0.9219328761100769, |
| "learning_rate": 4.216942428039197e-06, |
| "loss": 0.0005, |
| "step": 6380 |
| }, |
| { |
| "epoch": 8.946447322366119, |
| "grad_norm": 30.67947006225586, |
| "learning_rate": 4.200851448666755e-06, |
| "loss": 0.0584, |
| "step": 6390 |
| }, |
| { |
| "epoch": 8.96044802240112, |
| "grad_norm": 0.005178861785680056, |
| "learning_rate": 4.1847689584190894e-06, |
| "loss": 0.1016, |
| "step": 6400 |
| }, |
| { |
| "epoch": 8.974448722436122, |
| "grad_norm": 0.0009322167607024312, |
| "learning_rate": 4.168695128135854e-06, |
| "loss": 0.0819, |
| "step": 6410 |
| }, |
| { |
| "epoch": 8.988449422471124, |
| "grad_norm": 0.005380717106163502, |
| "learning_rate": 4.152630128564719e-06, |
| "loss": 0.0199, |
| "step": 6420 |
| }, |
| { |
| "epoch": 8.999649982499125, |
| "eval_f1": 0.8161936279830228, |
| "eval_loss": 0.7878016233444214, |
| "eval_precision": 0.8221996054957592, |
| "eval_recall": 0.8159552134359692, |
| "eval_runtime": 85.7772, |
| "eval_samples_per_second": 16.659, |
| "eval_steps_per_second": 8.336, |
| "step": 6428 |
| }, |
| { |
| "epoch": 9.002450122506126, |
| "grad_norm": 0.008465089835226536, |
| "learning_rate": 4.136574130359548e-06, |
| "loss": 0.0, |
| "step": 6430 |
| }, |
| { |
| "epoch": 9.016450822541128, |
| "grad_norm": 0.00646022567525506, |
| "learning_rate": 4.120527304078579e-06, |
| "loss": 0.026, |
| "step": 6440 |
| }, |
| { |
| "epoch": 9.03045152257613, |
| "grad_norm": 0.0003980924666393548, |
| "learning_rate": 4.104489820182626e-06, |
| "loss": 0.0104, |
| "step": 6450 |
| }, |
| { |
| "epoch": 9.044452222611131, |
| "grad_norm": 0.018396450206637383, |
| "learning_rate": 4.088461849033257e-06, |
| "loss": 0.0001, |
| "step": 6460 |
| }, |
| { |
| "epoch": 9.058452922646133, |
| "grad_norm": 0.0020354725420475006, |
| "learning_rate": 4.072443560890993e-06, |
| "loss": 0.0001, |
| "step": 6470 |
| }, |
| { |
| "epoch": 9.072453622681135, |
| "grad_norm": 0.0024896147660911083, |
| "learning_rate": 4.05643512591349e-06, |
| "loss": 0.0009, |
| "step": 6480 |
| }, |
| { |
| "epoch": 9.086454322716136, |
| "grad_norm": 0.04813767969608307, |
| "learning_rate": 4.040436714153742e-06, |
| "loss": 0.0001, |
| "step": 6490 |
| }, |
| { |
| "epoch": 9.100455022751138, |
| "grad_norm": 0.2827114760875702, |
| "learning_rate": 4.024448495558267e-06, |
| "loss": 0.0, |
| "step": 6500 |
| }, |
| { |
| "epoch": 9.11445572278614, |
| "grad_norm": 4.328901290893555, |
| "learning_rate": 4.008470639965303e-06, |
| "loss": 0.0007, |
| "step": 6510 |
| }, |
| { |
| "epoch": 9.128456422821142, |
| "grad_norm": 0.0006460743024945259, |
| "learning_rate": 3.992503317103006e-06, |
| "loss": 0.062, |
| "step": 6520 |
| }, |
| { |
| "epoch": 9.142457122856143, |
| "grad_norm": 0.00047457695472985506, |
| "learning_rate": 3.976546696587645e-06, |
| "loss": 0.0555, |
| "step": 6530 |
| }, |
| { |
| "epoch": 9.156457822891145, |
| "grad_norm": 0.06896835565567017, |
| "learning_rate": 3.960600947921803e-06, |
| "loss": 0.001, |
| "step": 6540 |
| }, |
| { |
| "epoch": 9.170458522926147, |
| "grad_norm": 0.0008507549064233899, |
| "learning_rate": 3.9446662404925726e-06, |
| "loss": 0.0009, |
| "step": 6550 |
| }, |
| { |
| "epoch": 9.184459222961149, |
| "grad_norm": 0.0036549328360706568, |
| "learning_rate": 3.9287427435697575e-06, |
| "loss": 0.0004, |
| "step": 6560 |
| }, |
| { |
| "epoch": 9.19845992299615, |
| "grad_norm": 0.0027635847218334675, |
| "learning_rate": 3.91283062630408e-06, |
| "loss": 0.0009, |
| "step": 6570 |
| }, |
| { |
| "epoch": 9.212460623031152, |
| "grad_norm": 0.12736278772354126, |
| "learning_rate": 3.896930057725372e-06, |
| "loss": 0.0, |
| "step": 6580 |
| }, |
| { |
| "epoch": 9.226461323066154, |
| "grad_norm": 0.0035878296475857496, |
| "learning_rate": 3.881041206740793e-06, |
| "loss": 0.0008, |
| "step": 6590 |
| }, |
| { |
| "epoch": 9.240462023101156, |
| "grad_norm": 0.005411120597273111, |
| "learning_rate": 3.865164242133032e-06, |
| "loss": 0.0, |
| "step": 6600 |
| }, |
| { |
| "epoch": 9.254462723136157, |
| "grad_norm": 0.01727963052690029, |
| "learning_rate": 3.849299332558505e-06, |
| "loss": 0.0008, |
| "step": 6610 |
| }, |
| { |
| "epoch": 9.26846342317116, |
| "grad_norm": 0.4582098722457886, |
| "learning_rate": 3.833446646545577e-06, |
| "loss": 0.0004, |
| "step": 6620 |
| }, |
| { |
| "epoch": 9.28246412320616, |
| "grad_norm": 0.001933308900333941, |
| "learning_rate": 3.817606352492761e-06, |
| "loss": 0.0001, |
| "step": 6630 |
| }, |
| { |
| "epoch": 9.296464823241163, |
| "grad_norm": 0.013198823668062687, |
| "learning_rate": 3.8017786186669392e-06, |
| "loss": 0.0009, |
| "step": 6640 |
| }, |
| { |
| "epoch": 9.310465523276164, |
| "grad_norm": 0.002318366663530469, |
| "learning_rate": 3.7859636132015632e-06, |
| "loss": 0.0585, |
| "step": 6650 |
| }, |
| { |
| "epoch": 9.324466223311166, |
| "grad_norm": 0.0035817010793834925, |
| "learning_rate": 3.770161504094881e-06, |
| "loss": 0.0079, |
| "step": 6660 |
| }, |
| { |
| "epoch": 9.338466923346168, |
| "grad_norm": 0.0008065904839895666, |
| "learning_rate": 3.754372459208144e-06, |
| "loss": 0.0003, |
| "step": 6670 |
| }, |
| { |
| "epoch": 9.35246762338117, |
| "grad_norm": 0.2741522789001465, |
| "learning_rate": 3.7385966462638245e-06, |
| "loss": 0.0034, |
| "step": 6680 |
| }, |
| { |
| "epoch": 9.366468323416171, |
| "grad_norm": 0.00011428318248363212, |
| "learning_rate": 3.722834232843842e-06, |
| "loss": 0.0001, |
| "step": 6690 |
| }, |
| { |
| "epoch": 9.380469023451173, |
| "grad_norm": 0.0014890613965690136, |
| "learning_rate": 3.7070853863877655e-06, |
| "loss": 0.0, |
| "step": 6700 |
| }, |
| { |
| "epoch": 9.394469723486175, |
| "grad_norm": 0.02655262127518654, |
| "learning_rate": 3.691350274191057e-06, |
| "loss": 0.0005, |
| "step": 6710 |
| }, |
| { |
| "epoch": 9.408470423521177, |
| "grad_norm": 0.020148996263742447, |
| "learning_rate": 3.675629063403278e-06, |
| "loss": 0.0001, |
| "step": 6720 |
| }, |
| { |
| "epoch": 9.422471123556178, |
| "grad_norm": 0.2881470024585724, |
| "learning_rate": 3.6599219210263204e-06, |
| "loss": 0.0033, |
| "step": 6730 |
| }, |
| { |
| "epoch": 9.43647182359118, |
| "grad_norm": 0.038803525269031525, |
| "learning_rate": 3.6442290139126317e-06, |
| "loss": 0.0154, |
| "step": 6740 |
| }, |
| { |
| "epoch": 9.450472523626182, |
| "grad_norm": 21.950056076049805, |
| "learning_rate": 3.628550508763441e-06, |
| "loss": 0.0041, |
| "step": 6750 |
| }, |
| { |
| "epoch": 9.464473223661184, |
| "grad_norm": 0.0429142527282238, |
| "learning_rate": 3.612886572126991e-06, |
| "loss": 0.0, |
| "step": 6760 |
| }, |
| { |
| "epoch": 9.478473923696185, |
| "grad_norm": 0.09250881522893906, |
| "learning_rate": 3.5972373703967683e-06, |
| "loss": 0.041, |
| "step": 6770 |
| }, |
| { |
| "epoch": 9.492474623731187, |
| "grad_norm": 0.016302289441227913, |
| "learning_rate": 3.5816030698097294e-06, |
| "loss": 0.0006, |
| "step": 6780 |
| }, |
| { |
| "epoch": 9.506475323766189, |
| "grad_norm": 0.025519099086523056, |
| "learning_rate": 3.5659838364445505e-06, |
| "loss": 0.0007, |
| "step": 6790 |
| }, |
| { |
| "epoch": 9.52047602380119, |
| "grad_norm": 0.004305595997720957, |
| "learning_rate": 3.5503798362198394e-06, |
| "loss": 0.028, |
| "step": 6800 |
| }, |
| { |
| "epoch": 9.534476723836192, |
| "grad_norm": 0.0018996294820681214, |
| "learning_rate": 3.5347912348924002e-06, |
| "loss": 0.0001, |
| "step": 6810 |
| }, |
| { |
| "epoch": 9.548477423871194, |
| "grad_norm": 0.016481753438711166, |
| "learning_rate": 3.5192181980554475e-06, |
| "loss": 0.0001, |
| "step": 6820 |
| }, |
| { |
| "epoch": 9.562478123906196, |
| "grad_norm": 38.504085540771484, |
| "learning_rate": 3.5036608911368675e-06, |
| "loss": 0.0112, |
| "step": 6830 |
| }, |
| { |
| "epoch": 9.576478823941198, |
| "grad_norm": 0.013982472941279411, |
| "learning_rate": 3.4881194793974483e-06, |
| "loss": 0.0006, |
| "step": 6840 |
| }, |
| { |
| "epoch": 9.5904795239762, |
| "grad_norm": 0.028334472328424454, |
| "learning_rate": 3.4725941279291265e-06, |
| "loss": 0.0005, |
| "step": 6850 |
| }, |
| { |
| "epoch": 9.604480224011201, |
| "grad_norm": 5.324892044067383, |
| "learning_rate": 3.4570850016532386e-06, |
| "loss": 0.0011, |
| "step": 6860 |
| }, |
| { |
| "epoch": 9.618480924046203, |
| "grad_norm": 0.023565029725432396, |
| "learning_rate": 3.4415922653187626e-06, |
| "loss": 0.0001, |
| "step": 6870 |
| }, |
| { |
| "epoch": 9.632481624081205, |
| "grad_norm": 0.000177843525307253, |
| "learning_rate": 3.426116083500571e-06, |
| "loss": 0.0001, |
| "step": 6880 |
| }, |
| { |
| "epoch": 9.646482324116207, |
| "grad_norm": 1.4049161672592163, |
| "learning_rate": 3.410656620597689e-06, |
| "loss": 0.0005, |
| "step": 6890 |
| }, |
| { |
| "epoch": 9.660483024151208, |
| "grad_norm": 0.03700033575296402, |
| "learning_rate": 3.395214040831529e-06, |
| "loss": 0.0002, |
| "step": 6900 |
| }, |
| { |
| "epoch": 9.67448372418621, |
| "grad_norm": 0.3730657398700714, |
| "learning_rate": 3.3797885082441717e-06, |
| "loss": 0.0274, |
| "step": 6910 |
| }, |
| { |
| "epoch": 9.688484424221212, |
| "grad_norm": 0.11278026551008224, |
| "learning_rate": 3.3643801866965997e-06, |
| "loss": 0.0001, |
| "step": 6920 |
| }, |
| { |
| "epoch": 9.702485124256214, |
| "grad_norm": 0.00797436386346817, |
| "learning_rate": 3.348989239866976e-06, |
| "loss": 0.0057, |
| "step": 6930 |
| }, |
| { |
| "epoch": 9.716485824291215, |
| "grad_norm": 0.0002936197561211884, |
| "learning_rate": 3.3336158312488935e-06, |
| "loss": 0.0019, |
| "step": 6940 |
| }, |
| { |
| "epoch": 9.730486524326217, |
| "grad_norm": 0.01316259428858757, |
| "learning_rate": 3.3182601241496405e-06, |
| "loss": 0.0096, |
| "step": 6950 |
| }, |
| { |
| "epoch": 9.744487224361219, |
| "grad_norm": 0.001627352088689804, |
| "learning_rate": 3.3029222816884697e-06, |
| "loss": 0.042, |
| "step": 6960 |
| }, |
| { |
| "epoch": 9.75848792439622, |
| "grad_norm": 0.05984394624829292, |
| "learning_rate": 3.2876024667948603e-06, |
| "loss": 0.0003, |
| "step": 6970 |
| }, |
| { |
| "epoch": 9.772488624431222, |
| "grad_norm": 0.00035447083064354956, |
| "learning_rate": 3.2723008422067924e-06, |
| "loss": 0.0547, |
| "step": 6980 |
| }, |
| { |
| "epoch": 9.786489324466224, |
| "grad_norm": 47.53248977661133, |
| "learning_rate": 3.2570175704690143e-06, |
| "loss": 0.0149, |
| "step": 6990 |
| }, |
| { |
| "epoch": 9.800490024501226, |
| "grad_norm": 0.0007067213300615549, |
| "learning_rate": 3.241752813931316e-06, |
| "loss": 0.0, |
| "step": 7000 |
| }, |
| { |
| "epoch": 9.814490724536228, |
| "grad_norm": 1.0662330389022827, |
| "learning_rate": 3.2265067347468116e-06, |
| "loss": 0.0171, |
| "step": 7010 |
| }, |
| { |
| "epoch": 9.82849142457123, |
| "grad_norm": 0.05713279917836189, |
| "learning_rate": 3.2112794948702027e-06, |
| "loss": 0.0029, |
| "step": 7020 |
| }, |
| { |
| "epoch": 9.842492124606231, |
| "grad_norm": 0.0015575195429846644, |
| "learning_rate": 3.1960712560560724e-06, |
| "loss": 0.0, |
| "step": 7030 |
| }, |
| { |
| "epoch": 9.856492824641233, |
| "grad_norm": 0.008686025626957417, |
| "learning_rate": 3.1808821798571585e-06, |
| "loss": 0.0037, |
| "step": 7040 |
| }, |
| { |
| "epoch": 9.870493524676235, |
| "grad_norm": 0.4814838767051697, |
| "learning_rate": 3.1657124276226415e-06, |
| "loss": 0.0004, |
| "step": 7050 |
| }, |
| { |
| "epoch": 9.884494224711236, |
| "grad_norm": 0.14743080735206604, |
| "learning_rate": 3.1505621604964277e-06, |
| "loss": 0.0199, |
| "step": 7060 |
| }, |
| { |
| "epoch": 9.898494924746238, |
| "grad_norm": 0.002065706066787243, |
| "learning_rate": 3.1354315394154377e-06, |
| "loss": 0.0, |
| "step": 7070 |
| }, |
| { |
| "epoch": 9.91249562478124, |
| "grad_norm": 0.0025627650320529938, |
| "learning_rate": 3.1203207251079003e-06, |
| "loss": 0.0017, |
| "step": 7080 |
| }, |
| { |
| "epoch": 9.926496324816242, |
| "grad_norm": 0.01459525153040886, |
| "learning_rate": 3.105229878091641e-06, |
| "loss": 0.0855, |
| "step": 7090 |
| }, |
| { |
| "epoch": 9.940497024851243, |
| "grad_norm": 0.001635802211239934, |
| "learning_rate": 3.0901591586723777e-06, |
| "loss": 0.0351, |
| "step": 7100 |
| }, |
| { |
| "epoch": 9.954497724886245, |
| "grad_norm": 0.0009384243749082088, |
| "learning_rate": 3.0751087269420244e-06, |
| "loss": 0.0331, |
| "step": 7110 |
| }, |
| { |
| "epoch": 9.968498424921247, |
| "grad_norm": 0.029788050800561905, |
| "learning_rate": 3.060078742776975e-06, |
| "loss": 0.0, |
| "step": 7120 |
| }, |
| { |
| "epoch": 9.982499124956249, |
| "grad_norm": 0.017946625128388405, |
| "learning_rate": 3.0450693658364243e-06, |
| "loss": 0.0034, |
| "step": 7130 |
| }, |
| { |
| "epoch": 9.99649982499125, |
| "grad_norm": 0.9889459609985352, |
| "learning_rate": 3.030080755560656e-06, |
| "loss": 0.0018, |
| "step": 7140 |
| }, |
| { |
| "epoch": 9.99929996499825, |
| "eval_f1": 0.8204141390277229, |
| "eval_loss": 0.7978833317756653, |
| "eval_precision": 0.8269728499510313, |
| "eval_recall": 0.8201539538138558, |
| "eval_runtime": 85.2249, |
| "eval_samples_per_second": 16.767, |
| "eval_steps_per_second": 8.39, |
| "step": 7142 |
| }, |
| { |
| "epoch": 10.01050052502625, |
| "grad_norm": 0.011208614334464073, |
| "learning_rate": 3.015113071169359e-06, |
| "loss": 0.0005, |
| "step": 7150 |
| }, |
| { |
| "epoch": 10.024501225061252, |
| "grad_norm": 0.0011321509955450892, |
| "learning_rate": 3.000166471659929e-06, |
| "loss": 0.0001, |
| "step": 7160 |
| }, |
| { |
| "epoch": 10.038501925096254, |
| "grad_norm": 0.0020235786214470863, |
| "learning_rate": 2.985241115805788e-06, |
| "loss": 0.0007, |
| "step": 7170 |
| }, |
| { |
| "epoch": 10.052502625131256, |
| "grad_norm": 0.9804045557975769, |
| "learning_rate": 2.9703371621546908e-06, |
| "loss": 0.0001, |
| "step": 7180 |
| }, |
| { |
| "epoch": 10.066503325166257, |
| "grad_norm": 0.017487866804003716, |
| "learning_rate": 2.955454769027039e-06, |
| "loss": 0.001, |
| "step": 7190 |
| }, |
| { |
| "epoch": 10.08050402520126, |
| "grad_norm": 0.012673470191657543, |
| "learning_rate": 2.9405940945142106e-06, |
| "loss": 0.0008, |
| "step": 7200 |
| }, |
| { |
| "epoch": 10.094504725236261, |
| "grad_norm": 0.0016335515538230538, |
| "learning_rate": 2.9257552964768644e-06, |
| "loss": 0.0, |
| "step": 7210 |
| }, |
| { |
| "epoch": 10.108505425271263, |
| "grad_norm": 0.003731638891622424, |
| "learning_rate": 2.9109385325432793e-06, |
| "loss": 0.0, |
| "step": 7220 |
| }, |
| { |
| "epoch": 10.122506125306264, |
| "grad_norm": 0.007333674468100071, |
| "learning_rate": 2.8961439601076667e-06, |
| "loss": 0.0085, |
| "step": 7230 |
| }, |
| { |
| "epoch": 10.136506825341266, |
| "grad_norm": 17.61123275756836, |
| "learning_rate": 2.881371736328506e-06, |
| "loss": 0.0083, |
| "step": 7240 |
| }, |
| { |
| "epoch": 10.150507525376268, |
| "grad_norm": 2.581366777420044, |
| "learning_rate": 2.866622018126876e-06, |
| "loss": 0.0126, |
| "step": 7250 |
| }, |
| { |
| "epoch": 10.16450822541127, |
| "grad_norm": 0.0004104816180188209, |
| "learning_rate": 2.8518949621847793e-06, |
| "loss": 0.0001, |
| "step": 7260 |
| }, |
| { |
| "epoch": 10.178508925446272, |
| "grad_norm": 0.05607493594288826, |
| "learning_rate": 2.8371907249434917e-06, |
| "loss": 0.0216, |
| "step": 7270 |
| }, |
| { |
| "epoch": 10.192509625481273, |
| "grad_norm": 0.0017281303880736232, |
| "learning_rate": 2.822509462601886e-06, |
| "loss": 0.0, |
| "step": 7280 |
| }, |
| { |
| "epoch": 10.206510325516275, |
| "grad_norm": 0.010942882858216763, |
| "learning_rate": 2.807851331114778e-06, |
| "loss": 0.0, |
| "step": 7290 |
| }, |
| { |
| "epoch": 10.220511025551277, |
| "grad_norm": 0.0011507336748763919, |
| "learning_rate": 2.7932164861912805e-06, |
| "loss": 0.0008, |
| "step": 7300 |
| }, |
| { |
| "epoch": 10.234511725586279, |
| "grad_norm": 0.0005932246567681432, |
| "learning_rate": 2.778605083293131e-06, |
| "loss": 0.0, |
| "step": 7310 |
| }, |
| { |
| "epoch": 10.24851242562128, |
| "grad_norm": 1.4911444187164307, |
| "learning_rate": 2.7640172776330504e-06, |
| "loss": 0.0028, |
| "step": 7320 |
| }, |
| { |
| "epoch": 10.262513125656282, |
| "grad_norm": 0.015291115269064903, |
| "learning_rate": 2.7494532241730974e-06, |
| "loss": 0.0, |
| "step": 7330 |
| }, |
| { |
| "epoch": 10.276513825691284, |
| "grad_norm": 0.0419706292450428, |
| "learning_rate": 2.7349130776230132e-06, |
| "loss": 0.0, |
| "step": 7340 |
| }, |
| { |
| "epoch": 10.290514525726286, |
| "grad_norm": 0.0022001699544489384, |
| "learning_rate": 2.7203969924385885e-06, |
| "loss": 0.0, |
| "step": 7350 |
| }, |
| { |
| "epoch": 10.304515225761287, |
| "grad_norm": 0.010627568699419498, |
| "learning_rate": 2.705905122820006e-06, |
| "loss": 0.0001, |
| "step": 7360 |
| }, |
| { |
| "epoch": 10.318515925796289, |
| "grad_norm": 0.005599226802587509, |
| "learning_rate": 2.6914376227102266e-06, |
| "loss": 0.0, |
| "step": 7370 |
| }, |
| { |
| "epoch": 10.33251662583129, |
| "grad_norm": 0.003463909961283207, |
| "learning_rate": 2.676994645793331e-06, |
| "loss": 0.0001, |
| "step": 7380 |
| }, |
| { |
| "epoch": 10.346517325866293, |
| "grad_norm": 0.003562136786058545, |
| "learning_rate": 2.6625763454929048e-06, |
| "loss": 0.03, |
| "step": 7390 |
| }, |
| { |
| "epoch": 10.360518025901294, |
| "grad_norm": 0.007249193266034126, |
| "learning_rate": 2.648182874970395e-06, |
| "loss": 0.0, |
| "step": 7400 |
| }, |
| { |
| "epoch": 10.374518725936296, |
| "grad_norm": 0.0003962105547543615, |
| "learning_rate": 2.6338143871234905e-06, |
| "loss": 0.0009, |
| "step": 7410 |
| }, |
| { |
| "epoch": 10.388519425971298, |
| "grad_norm": 0.0006904040928930044, |
| "learning_rate": 2.6194710345845e-06, |
| "loss": 0.0001, |
| "step": 7420 |
| }, |
| { |
| "epoch": 10.4025201260063, |
| "grad_norm": 2.711101770401001, |
| "learning_rate": 2.6051529697187227e-06, |
| "loss": 0.0022, |
| "step": 7430 |
| }, |
| { |
| "epoch": 10.416520826041301, |
| "grad_norm": 0.08761586248874664, |
| "learning_rate": 2.5908603446228333e-06, |
| "loss": 0.0, |
| "step": 7440 |
| }, |
| { |
| "epoch": 10.430521526076303, |
| "grad_norm": 0.009707544930279255, |
| "learning_rate": 2.5765933111232734e-06, |
| "loss": 0.0003, |
| "step": 7450 |
| }, |
| { |
| "epoch": 10.444522226111305, |
| "grad_norm": 0.0028569665737450123, |
| "learning_rate": 2.5623520207746254e-06, |
| "loss": 0.0101, |
| "step": 7460 |
| }, |
| { |
| "epoch": 10.458522926146307, |
| "grad_norm": 0.12865599989891052, |
| "learning_rate": 2.5481366248580165e-06, |
| "loss": 0.0001, |
| "step": 7470 |
| }, |
| { |
| "epoch": 10.472523626181308, |
| "grad_norm": 0.0029542180709540844, |
| "learning_rate": 2.533947274379499e-06, |
| "loss": 0.0, |
| "step": 7480 |
| }, |
| { |
| "epoch": 10.48652432621631, |
| "grad_norm": 0.00011413331230869517, |
| "learning_rate": 2.5197841200684525e-06, |
| "loss": 0.0, |
| "step": 7490 |
| }, |
| { |
| "epoch": 10.500525026251312, |
| "grad_norm": 0.0017983964644372463, |
| "learning_rate": 2.5056473123759872e-06, |
| "loss": 0.0016, |
| "step": 7500 |
| }, |
| { |
| "epoch": 10.514525726286314, |
| "grad_norm": 0.0005576438270509243, |
| "learning_rate": 2.4915370014733365e-06, |
| "loss": 0.0012, |
| "step": 7510 |
| }, |
| { |
| "epoch": 10.528526426321315, |
| "grad_norm": 0.001333917840383947, |
| "learning_rate": 2.4774533372502657e-06, |
| "loss": 0.0143, |
| "step": 7520 |
| }, |
| { |
| "epoch": 10.542527126356317, |
| "grad_norm": 0.00315807550214231, |
| "learning_rate": 2.463396469313481e-06, |
| "loss": 0.0004, |
| "step": 7530 |
| }, |
| { |
| "epoch": 10.556527826391319, |
| "grad_norm": 0.00039284565718844533, |
| "learning_rate": 2.449366546985042e-06, |
| "loss": 0.0165, |
| "step": 7540 |
| }, |
| { |
| "epoch": 10.57052852642632, |
| "grad_norm": 0.00022277185053098947, |
| "learning_rate": 2.43536371930077e-06, |
| "loss": 0.0028, |
| "step": 7550 |
| }, |
| { |
| "epoch": 10.584529226461322, |
| "grad_norm": 0.0015283463289961219, |
| "learning_rate": 2.421388135008666e-06, |
| "loss": 0.0012, |
| "step": 7560 |
| }, |
| { |
| "epoch": 10.598529926496324, |
| "grad_norm": 21.00211524963379, |
| "learning_rate": 2.407439942567339e-06, |
| "loss": 0.0632, |
| "step": 7570 |
| }, |
| { |
| "epoch": 10.612530626531326, |
| "grad_norm": 0.7737404704093933, |
| "learning_rate": 2.3935192901444127e-06, |
| "loss": 0.0001, |
| "step": 7580 |
| }, |
| { |
| "epoch": 10.626531326566328, |
| "grad_norm": 0.000299283565254882, |
| "learning_rate": 2.3796263256149715e-06, |
| "loss": 0.0001, |
| "step": 7590 |
| }, |
| { |
| "epoch": 10.64053202660133, |
| "grad_norm": 0.04837600886821747, |
| "learning_rate": 2.365761196559972e-06, |
| "loss": 0.0008, |
| "step": 7600 |
| }, |
| { |
| "epoch": 10.654532726636331, |
| "grad_norm": 0.00024079847207758576, |
| "learning_rate": 2.3519240502646822e-06, |
| "loss": 0.0185, |
| "step": 7610 |
| }, |
| { |
| "epoch": 10.668533426671333, |
| "grad_norm": 0.0012332991464063525, |
| "learning_rate": 2.338115033717124e-06, |
| "loss": 0.0001, |
| "step": 7620 |
| }, |
| { |
| "epoch": 10.682534126706335, |
| "grad_norm": 26.770566940307617, |
| "learning_rate": 2.324334293606499e-06, |
| "loss": 0.008, |
| "step": 7630 |
| }, |
| { |
| "epoch": 10.696534826741336, |
| "grad_norm": 0.1293647587299347, |
| "learning_rate": 2.310581976321638e-06, |
| "loss": 0.0, |
| "step": 7640 |
| }, |
| { |
| "epoch": 10.710535526776338, |
| "grad_norm": 0.004871649201959372, |
| "learning_rate": 2.2968582279494432e-06, |
| "loss": 0.0001, |
| "step": 7650 |
| }, |
| { |
| "epoch": 10.72453622681134, |
| "grad_norm": 2.857433319091797, |
| "learning_rate": 2.2831631942733406e-06, |
| "loss": 0.0006, |
| "step": 7660 |
| }, |
| { |
| "epoch": 10.738536926846342, |
| "grad_norm": 0.001171862706542015, |
| "learning_rate": 2.269497020771728e-06, |
| "loss": 0.0131, |
| "step": 7670 |
| }, |
| { |
| "epoch": 10.752537626881344, |
| "grad_norm": 0.01222603302448988, |
| "learning_rate": 2.2558598526164265e-06, |
| "loss": 0.0148, |
| "step": 7680 |
| }, |
| { |
| "epoch": 10.766538326916345, |
| "grad_norm": 2.3237972259521484, |
| "learning_rate": 2.2422518346711445e-06, |
| "loss": 0.0002, |
| "step": 7690 |
| }, |
| { |
| "epoch": 10.780539026951347, |
| "grad_norm": 0.012020766735076904, |
| "learning_rate": 2.2286731114899322e-06, |
| "loss": 0.0, |
| "step": 7700 |
| }, |
| { |
| "epoch": 10.794539726986349, |
| "grad_norm": 0.0029932681936770678, |
| "learning_rate": 2.2151238273156577e-06, |
| "loss": 0.037, |
| "step": 7710 |
| }, |
| { |
| "epoch": 10.80854042702135, |
| "grad_norm": 0.0010985223343595862, |
| "learning_rate": 2.2016041260784604e-06, |
| "loss": 0.0119, |
| "step": 7720 |
| }, |
| { |
| "epoch": 10.822541127056352, |
| "grad_norm": 0.046439751982688904, |
| "learning_rate": 2.188114151394228e-06, |
| "loss": 0.0001, |
| "step": 7730 |
| }, |
| { |
| "epoch": 10.836541827091354, |
| "grad_norm": 0.000295175559585914, |
| "learning_rate": 2.1746540465630784e-06, |
| "loss": 0.0, |
| "step": 7740 |
| }, |
| { |
| "epoch": 10.850542527126356, |
| "grad_norm": 0.0027245362289249897, |
| "learning_rate": 2.1612239545678234e-06, |
| "loss": 0.0061, |
| "step": 7750 |
| }, |
| { |
| "epoch": 10.864543227161358, |
| "grad_norm": 0.00551482243463397, |
| "learning_rate": 2.1478240180724646e-06, |
| "loss": 0.0036, |
| "step": 7760 |
| }, |
| { |
| "epoch": 10.87854392719636, |
| "grad_norm": 0.0002639990416355431, |
| "learning_rate": 2.134454379420659e-06, |
| "loss": 0.0321, |
| "step": 7770 |
| }, |
| { |
| "epoch": 10.892544627231361, |
| "grad_norm": 0.004308238625526428, |
| "learning_rate": 2.1211151806342294e-06, |
| "loss": 0.0003, |
| "step": 7780 |
| }, |
| { |
| "epoch": 10.906545327266363, |
| "grad_norm": 7.97113037109375, |
| "learning_rate": 2.107806563411643e-06, |
| "loss": 0.0011, |
| "step": 7790 |
| }, |
| { |
| "epoch": 10.920546027301365, |
| "grad_norm": 0.001049870508722961, |
| "learning_rate": 2.0945286691265016e-06, |
| "loss": 0.0089, |
| "step": 7800 |
| }, |
| { |
| "epoch": 10.934546727336366, |
| "grad_norm": 0.00010801222379086539, |
| "learning_rate": 2.081281638826052e-06, |
| "loss": 0.0, |
| "step": 7810 |
| }, |
| { |
| "epoch": 10.948547427371368, |
| "grad_norm": 0.0027724995743483305, |
| "learning_rate": 2.0680656132296766e-06, |
| "loss": 0.0005, |
| "step": 7820 |
| }, |
| { |
| "epoch": 10.96254812740637, |
| "grad_norm": 0.0011577574769034982, |
| "learning_rate": 2.05488073272741e-06, |
| "loss": 0.0009, |
| "step": 7830 |
| }, |
| { |
| "epoch": 10.976548827441372, |
| "grad_norm": 23.115427017211914, |
| "learning_rate": 2.0417271373784403e-06, |
| "loss": 0.0033, |
| "step": 7840 |
| }, |
| { |
| "epoch": 10.990549527476373, |
| "grad_norm": 0.10808968544006348, |
| "learning_rate": 2.0286049669096147e-06, |
| "loss": 0.0039, |
| "step": 7850 |
| }, |
| { |
| "epoch": 10.998949947497374, |
| "eval_f1": 0.8251300522683318, |
| "eval_loss": 0.8002111315727234, |
| "eval_precision": 0.8270740039811031, |
| "eval_recall": 0.8250524842547236, |
| "eval_runtime": 85.4056, |
| "eval_samples_per_second": 16.732, |
| "eval_steps_per_second": 8.372, |
| "step": 7856 |
| }, |
| { |
| "epoch": 11.004550227511375, |
| "grad_norm": 0.009638884104788303, |
| "learning_rate": 2.015514360713974e-06, |
| "loss": 0.0212, |
| "step": 7860 |
| }, |
| { |
| "epoch": 11.018550927546377, |
| "grad_norm": 0.0014148970367386937, |
| "learning_rate": 2.0024554578492513e-06, |
| "loss": 0.0267, |
| "step": 7870 |
| }, |
| { |
| "epoch": 11.032551627581379, |
| "grad_norm": 0.0003119578759651631, |
| "learning_rate": 1.9894283970364135e-06, |
| "loss": 0.0, |
| "step": 7880 |
| }, |
| { |
| "epoch": 11.04655232761638, |
| "grad_norm": 0.015133386477828026, |
| "learning_rate": 1.976433316658168e-06, |
| "loss": 0.0, |
| "step": 7890 |
| }, |
| { |
| "epoch": 11.060553027651382, |
| "grad_norm": 0.00046241507516242564, |
| "learning_rate": 1.963470354757512e-06, |
| "loss": 0.0, |
| "step": 7900 |
| }, |
| { |
| "epoch": 11.074553727686384, |
| "grad_norm": 0.019241634756326675, |
| "learning_rate": 1.950539649036255e-06, |
| "loss": 0.0, |
| "step": 7910 |
| }, |
| { |
| "epoch": 11.088554427721386, |
| "grad_norm": 0.00012300013622734696, |
| "learning_rate": 1.9376413368535575e-06, |
| "loss": 0.0398, |
| "step": 7920 |
| }, |
| { |
| "epoch": 11.102555127756387, |
| "grad_norm": 0.018908197060227394, |
| "learning_rate": 1.924775555224472e-06, |
| "loss": 0.0323, |
| "step": 7930 |
| }, |
| { |
| "epoch": 11.11655582779139, |
| "grad_norm": 0.0012662785593420267, |
| "learning_rate": 1.911942440818487e-06, |
| "loss": 0.0, |
| "step": 7940 |
| }, |
| { |
| "epoch": 11.130556527826391, |
| "grad_norm": 0.0013872645795345306, |
| "learning_rate": 1.899142129958082e-06, |
| "loss": 0.0, |
| "step": 7950 |
| }, |
| { |
| "epoch": 11.144557227861393, |
| "grad_norm": 0.031151611357927322, |
| "learning_rate": 1.8863747586172731e-06, |
| "loss": 0.0236, |
| "step": 7960 |
| }, |
| { |
| "epoch": 11.158557927896394, |
| "grad_norm": 0.00018219469347968698, |
| "learning_rate": 1.8736404624201605e-06, |
| "loss": 0.0, |
| "step": 7970 |
| }, |
| { |
| "epoch": 11.172558627931396, |
| "grad_norm": 0.001954052597284317, |
| "learning_rate": 1.8609393766395083e-06, |
| "loss": 0.0, |
| "step": 7980 |
| }, |
| { |
| "epoch": 11.186559327966398, |
| "grad_norm": 0.000496099004521966, |
| "learning_rate": 1.8482716361952868e-06, |
| "loss": 0.0001, |
| "step": 7990 |
| }, |
| { |
| "epoch": 11.2005600280014, |
| "grad_norm": 0.0025121436920017004, |
| "learning_rate": 1.8356373756532557e-06, |
| "loss": 0.0472, |
| "step": 8000 |
| }, |
| { |
| "epoch": 11.214560728036401, |
| "grad_norm": 0.0013273832155391574, |
| "learning_rate": 1.8230367292235234e-06, |
| "loss": 0.0, |
| "step": 8010 |
| }, |
| { |
| "epoch": 11.228561428071403, |
| "grad_norm": 0.0005976692191325128, |
| "learning_rate": 1.810469830759123e-06, |
| "loss": 0.021, |
| "step": 8020 |
| }, |
| { |
| "epoch": 11.242562128106405, |
| "grad_norm": 0.00048146533663384616, |
| "learning_rate": 1.7979368137545988e-06, |
| "loss": 0.0, |
| "step": 8030 |
| }, |
| { |
| "epoch": 11.256562828141407, |
| "grad_norm": 0.0011883461847901344, |
| "learning_rate": 1.785437811344578e-06, |
| "loss": 0.0, |
| "step": 8040 |
| }, |
| { |
| "epoch": 11.270563528176408, |
| "grad_norm": 0.06734263896942139, |
| "learning_rate": 1.7729729563023613e-06, |
| "loss": 0.0, |
| "step": 8050 |
| }, |
| { |
| "epoch": 11.28456422821141, |
| "grad_norm": 0.001917374669574201, |
| "learning_rate": 1.7605423810385097e-06, |
| "loss": 0.0, |
| "step": 8060 |
| }, |
| { |
| "epoch": 11.298564928246412, |
| "grad_norm": 0.000647062377538532, |
| "learning_rate": 1.7481462175994447e-06, |
| "loss": 0.0, |
| "step": 8070 |
| }, |
| { |
| "epoch": 11.312565628281414, |
| "grad_norm": 0.11232136934995651, |
| "learning_rate": 1.7357845976660386e-06, |
| "loss": 0.0001, |
| "step": 8080 |
| }, |
| { |
| "epoch": 11.326566328316416, |
| "grad_norm": 0.0012726177228614688, |
| "learning_rate": 1.7234576525522172e-06, |
| "loss": 0.0001, |
| "step": 8090 |
| }, |
| { |
| "epoch": 11.340567028351417, |
| "grad_norm": 0.00020207905618008226, |
| "learning_rate": 1.7111655132035665e-06, |
| "loss": 0.0, |
| "step": 8100 |
| }, |
| { |
| "epoch": 11.354567728386419, |
| "grad_norm": 0.02561323344707489, |
| "learning_rate": 1.698908310195938e-06, |
| "loss": 0.0, |
| "step": 8110 |
| }, |
| { |
| "epoch": 11.36856842842142, |
| "grad_norm": 0.0024278999771922827, |
| "learning_rate": 1.6866861737340705e-06, |
| "loss": 0.0, |
| "step": 8120 |
| }, |
| { |
| "epoch": 11.382569128456423, |
| "grad_norm": 0.0060219429433345795, |
| "learning_rate": 1.674499233650197e-06, |
| "loss": 0.0, |
| "step": 8130 |
| }, |
| { |
| "epoch": 11.396569828491424, |
| "grad_norm": 1.4449529647827148, |
| "learning_rate": 1.6623476194026678e-06, |
| "loss": 0.0002, |
| "step": 8140 |
| }, |
| { |
| "epoch": 11.410570528526426, |
| "grad_norm": 0.002635002601891756, |
| "learning_rate": 1.6502314600745828e-06, |
| "loss": 0.0001, |
| "step": 8150 |
| }, |
| { |
| "epoch": 11.424571228561428, |
| "grad_norm": 0.003079216228798032, |
| "learning_rate": 1.6381508843724075e-06, |
| "loss": 0.0, |
| "step": 8160 |
| }, |
| { |
| "epoch": 11.43857192859643, |
| "grad_norm": 0.00011024038394680247, |
| "learning_rate": 1.6261060206246199e-06, |
| "loss": 0.0002, |
| "step": 8170 |
| }, |
| { |
| "epoch": 11.452572628631431, |
| "grad_norm": 0.0009657694026827812, |
| "learning_rate": 1.6140969967803355e-06, |
| "loss": 0.0, |
| "step": 8180 |
| }, |
| { |
| "epoch": 11.466573328666433, |
| "grad_norm": 0.8496362566947937, |
| "learning_rate": 1.6021239404079513e-06, |
| "loss": 0.0001, |
| "step": 8190 |
| }, |
| { |
| "epoch": 11.480574028701435, |
| "grad_norm": 0.0027748725842684507, |
| "learning_rate": 1.590186978693799e-06, |
| "loss": 0.0, |
| "step": 8200 |
| }, |
| { |
| "epoch": 11.494574728736437, |
| "grad_norm": 0.0007704569143243134, |
| "learning_rate": 1.5782862384407816e-06, |
| "loss": 0.0123, |
| "step": 8210 |
| }, |
| { |
| "epoch": 11.508575428771438, |
| "grad_norm": 0.009385130368173122, |
| "learning_rate": 1.5664218460670327e-06, |
| "loss": 0.0026, |
| "step": 8220 |
| }, |
| { |
| "epoch": 11.52257612880644, |
| "grad_norm": 0.06345506012439728, |
| "learning_rate": 1.554593927604573e-06, |
| "loss": 0.0003, |
| "step": 8230 |
| }, |
| { |
| "epoch": 11.536576828841442, |
| "grad_norm": 0.0005839611403644085, |
| "learning_rate": 1.5428026086979736e-06, |
| "loss": 0.0, |
| "step": 8240 |
| }, |
| { |
| "epoch": 11.550577528876444, |
| "grad_norm": 0.002742344280704856, |
| "learning_rate": 1.531048014603017e-06, |
| "loss": 0.0, |
| "step": 8250 |
| }, |
| { |
| "epoch": 11.564578228911445, |
| "grad_norm": 0.0002546895411796868, |
| "learning_rate": 1.5193302701853674e-06, |
| "loss": 0.0, |
| "step": 8260 |
| }, |
| { |
| "epoch": 11.578578928946447, |
| "grad_norm": 0.004499376751482487, |
| "learning_rate": 1.5076494999192498e-06, |
| "loss": 0.0, |
| "step": 8270 |
| }, |
| { |
| "epoch": 11.592579628981449, |
| "grad_norm": 0.0029184112790971994, |
| "learning_rate": 1.4960058278861172e-06, |
| "loss": 0.0, |
| "step": 8280 |
| }, |
| { |
| "epoch": 11.60658032901645, |
| "grad_norm": 0.0003372172359377146, |
| "learning_rate": 1.4843993777733467e-06, |
| "loss": 0.0, |
| "step": 8290 |
| }, |
| { |
| "epoch": 11.620581029051452, |
| "grad_norm": 0.016875434666872025, |
| "learning_rate": 1.4728302728729105e-06, |
| "loss": 0.0288, |
| "step": 8300 |
| }, |
| { |
| "epoch": 11.634581729086454, |
| "grad_norm": 0.0007035748567432165, |
| "learning_rate": 1.4612986360800751e-06, |
| "loss": 0.0, |
| "step": 8310 |
| }, |
| { |
| "epoch": 11.648582429121456, |
| "grad_norm": 0.004649253562092781, |
| "learning_rate": 1.4498045898920988e-06, |
| "loss": 0.0, |
| "step": 8320 |
| }, |
| { |
| "epoch": 11.662583129156458, |
| "grad_norm": 0.0004209627804812044, |
| "learning_rate": 1.4383482564069195e-06, |
| "loss": 0.0002, |
| "step": 8330 |
| }, |
| { |
| "epoch": 11.67658382919146, |
| "grad_norm": 0.9587376713752747, |
| "learning_rate": 1.4269297573218648e-06, |
| "loss": 0.0001, |
| "step": 8340 |
| }, |
| { |
| "epoch": 11.690584529226461, |
| "grad_norm": 0.0003986161027569324, |
| "learning_rate": 1.4155492139323645e-06, |
| "loss": 0.0008, |
| "step": 8350 |
| }, |
| { |
| "epoch": 11.704585229261463, |
| "grad_norm": 0.006131873466074467, |
| "learning_rate": 1.4042067471306475e-06, |
| "loss": 0.0, |
| "step": 8360 |
| }, |
| { |
| "epoch": 11.718585929296465, |
| "grad_norm": 0.001703021116554737, |
| "learning_rate": 1.3929024774044748e-06, |
| "loss": 0.0001, |
| "step": 8370 |
| }, |
| { |
| "epoch": 11.732586629331466, |
| "grad_norm": 0.008844499476253986, |
| "learning_rate": 1.3816365248358404e-06, |
| "loss": 0.0, |
| "step": 8380 |
| }, |
| { |
| "epoch": 11.746587329366468, |
| "grad_norm": 0.00026406109100207686, |
| "learning_rate": 1.3704090090997163e-06, |
| "loss": 0.0, |
| "step": 8390 |
| }, |
| { |
| "epoch": 11.76058802940147, |
| "grad_norm": 0.014780262485146523, |
| "learning_rate": 1.3592200494627634e-06, |
| "loss": 0.0085, |
| "step": 8400 |
| }, |
| { |
| "epoch": 11.774588729436472, |
| "grad_norm": 0.0006615730235353112, |
| "learning_rate": 1.3480697647820796e-06, |
| "loss": 0.0001, |
| "step": 8410 |
| }, |
| { |
| "epoch": 11.788589429471473, |
| "grad_norm": 0.028282400220632553, |
| "learning_rate": 1.3369582735039232e-06, |
| "loss": 0.0, |
| "step": 8420 |
| }, |
| { |
| "epoch": 11.802590129506475, |
| "grad_norm": 0.0006141592748463154, |
| "learning_rate": 1.3258856936624636e-06, |
| "loss": 0.0189, |
| "step": 8430 |
| }, |
| { |
| "epoch": 11.816590829541477, |
| "grad_norm": 0.08630286157131195, |
| "learning_rate": 1.3148521428785287e-06, |
| "loss": 0.0001, |
| "step": 8440 |
| }, |
| { |
| "epoch": 11.830591529576479, |
| "grad_norm": 0.020284445956349373, |
| "learning_rate": 1.3038577383583474e-06, |
| "loss": 0.0, |
| "step": 8450 |
| }, |
| { |
| "epoch": 11.84459222961148, |
| "grad_norm": 5.585657119750977, |
| "learning_rate": 1.2929025968923082e-06, |
| "loss": 0.0127, |
| "step": 8460 |
| }, |
| { |
| "epoch": 11.858592929646482, |
| "grad_norm": 0.0004054057062603533, |
| "learning_rate": 1.2819868348537263e-06, |
| "loss": 0.0002, |
| "step": 8470 |
| }, |
| { |
| "epoch": 11.872593629681484, |
| "grad_norm": 0.002798704197630286, |
| "learning_rate": 1.2711105681975927e-06, |
| "loss": 0.0002, |
| "step": 8480 |
| }, |
| { |
| "epoch": 11.886594329716486, |
| "grad_norm": 0.005588957108557224, |
| "learning_rate": 1.2602739124593572e-06, |
| "loss": 0.0031, |
| "step": 8490 |
| }, |
| { |
| "epoch": 11.900595029751488, |
| "grad_norm": 0.0005446571158245206, |
| "learning_rate": 1.249476982753689e-06, |
| "loss": 0.0, |
| "step": 8500 |
| }, |
| { |
| "epoch": 11.91459572978649, |
| "grad_norm": 0.0002528883924242109, |
| "learning_rate": 1.2387198937732597e-06, |
| "loss": 0.0, |
| "step": 8510 |
| }, |
| { |
| "epoch": 11.928596429821491, |
| "grad_norm": 0.0024309209547936916, |
| "learning_rate": 1.2280027597875288e-06, |
| "loss": 0.0, |
| "step": 8520 |
| }, |
| { |
| "epoch": 11.942597129856493, |
| "grad_norm": 0.6172399520874023, |
| "learning_rate": 1.2173256946415214e-06, |
| "loss": 0.0002, |
| "step": 8530 |
| }, |
| { |
| "epoch": 11.956597829891495, |
| "grad_norm": 0.07266847789287567, |
| "learning_rate": 1.2066888117546227e-06, |
| "loss": 0.0, |
| "step": 8540 |
| }, |
| { |
| "epoch": 11.970598529926496, |
| "grad_norm": 0.23300260305404663, |
| "learning_rate": 1.196092224119374e-06, |
| "loss": 0.0001, |
| "step": 8550 |
| }, |
| { |
| "epoch": 11.984599229961498, |
| "grad_norm": 0.0053339735604822636, |
| "learning_rate": 1.1855360443002728e-06, |
| "loss": 0.0, |
| "step": 8560 |
| }, |
| { |
| "epoch": 11.9985999299965, |
| "grad_norm": 0.0010158346267417073, |
| "learning_rate": 1.1750203844325787e-06, |
| "loss": 0.0, |
| "step": 8570 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_f1": 0.8286292301679241, |
| "eval_loss": 0.79404217004776, |
| "eval_precision": 0.8305788792224167, |
| "eval_recall": 0.8285514345696291, |
| "eval_runtime": 85.049, |
| "eval_samples_per_second": 16.802, |
| "eval_steps_per_second": 8.407, |
| "step": 8571 |
| }, |
| { |
| "epoch": 12.012600630031502, |
| "grad_norm": 0.005007775500416756, |
| "learning_rate": 1.1645453562211101e-06, |
| "loss": 0.0, |
| "step": 8580 |
| }, |
| { |
| "epoch": 12.026601330066503, |
| "grad_norm": 0.9903329610824585, |
| "learning_rate": 1.1541110709390786e-06, |
| "loss": 0.0004, |
| "step": 8590 |
| }, |
| { |
| "epoch": 12.040602030101505, |
| "grad_norm": 0.0004595453501679003, |
| "learning_rate": 1.143717639426885e-06, |
| "loss": 0.0, |
| "step": 8600 |
| }, |
| { |
| "epoch": 12.054602730136507, |
| "grad_norm": 0.00516974413767457, |
| "learning_rate": 1.1333651720909621e-06, |
| "loss": 0.0, |
| "step": 8610 |
| }, |
| { |
| "epoch": 12.068603430171509, |
| "grad_norm": 0.00244425842538476, |
| "learning_rate": 1.1230537789025847e-06, |
| "loss": 0.0, |
| "step": 8620 |
| }, |
| { |
| "epoch": 12.08260413020651, |
| "grad_norm": 0.0008475360809825361, |
| "learning_rate": 1.1127835693967104e-06, |
| "loss": 0.0, |
| "step": 8630 |
| }, |
| { |
| "epoch": 12.096604830241512, |
| "grad_norm": 0.0027746877167373896, |
| "learning_rate": 1.1025546526708176e-06, |
| "loss": 0.0, |
| "step": 8640 |
| }, |
| { |
| "epoch": 12.110605530276514, |
| "grad_norm": 0.0005417768843472004, |
| "learning_rate": 1.0923671373837403e-06, |
| "loss": 0.0001, |
| "step": 8650 |
| }, |
| { |
| "epoch": 12.124606230311516, |
| "grad_norm": 0.0007752656820230186, |
| "learning_rate": 1.0822211317545167e-06, |
| "loss": 0.0, |
| "step": 8660 |
| }, |
| { |
| "epoch": 12.138606930346517, |
| "grad_norm": 0.0021129120141267776, |
| "learning_rate": 1.0721167435612402e-06, |
| "loss": 0.0, |
| "step": 8670 |
| }, |
| { |
| "epoch": 12.15260763038152, |
| "grad_norm": 0.010015271604061127, |
| "learning_rate": 1.062054080139916e-06, |
| "loss": 0.0, |
| "step": 8680 |
| }, |
| { |
| "epoch": 12.166608330416521, |
| "grad_norm": 0.00013190499157644808, |
| "learning_rate": 1.052033248383319e-06, |
| "loss": 0.0, |
| "step": 8690 |
| }, |
| { |
| "epoch": 12.180609030451523, |
| "grad_norm": 0.0027459298726171255, |
| "learning_rate": 1.0420543547398566e-06, |
| "loss": 0.0, |
| "step": 8700 |
| }, |
| { |
| "epoch": 12.194609730486524, |
| "grad_norm": 0.0001294492685701698, |
| "learning_rate": 1.03211750521244e-06, |
| "loss": 0.0029, |
| "step": 8710 |
| }, |
| { |
| "epoch": 12.208610430521526, |
| "grad_norm": 8.123584120767191e-05, |
| "learning_rate": 1.0222228053573568e-06, |
| "loss": 0.0, |
| "step": 8720 |
| }, |
| { |
| "epoch": 12.222611130556528, |
| "grad_norm": 0.06022586300969124, |
| "learning_rate": 1.0123703602831554e-06, |
| "loss": 0.0012, |
| "step": 8730 |
| }, |
| { |
| "epoch": 12.23661183059153, |
| "grad_norm": 0.0007046872633509338, |
| "learning_rate": 1.0025602746495189e-06, |
| "loss": 0.008, |
| "step": 8740 |
| }, |
| { |
| "epoch": 12.250612530626531, |
| "grad_norm": 0.01181920524686575, |
| "learning_rate": 9.92792652666159e-07, |
| "loss": 0.0, |
| "step": 8750 |
| }, |
| { |
| "epoch": 12.264613230661533, |
| "grad_norm": 0.0006115382420830429, |
| "learning_rate": 9.83067598091712e-07, |
| "loss": 0.0001, |
| "step": 8760 |
| }, |
| { |
| "epoch": 12.278613930696535, |
| "grad_norm": 0.010715884156525135, |
| "learning_rate": 9.733852142326272e-07, |
| "loss": 0.0, |
| "step": 8770 |
| }, |
| { |
| "epoch": 12.292614630731537, |
| "grad_norm": 0.0034116925671696663, |
| "learning_rate": 9.637456039420822e-07, |
| "loss": 0.0004, |
| "step": 8780 |
| }, |
| { |
| "epoch": 12.306615330766538, |
| "grad_norm": 0.006015400402247906, |
| "learning_rate": 9.54148869618875e-07, |
| "loss": 0.0286, |
| "step": 8790 |
| }, |
| { |
| "epoch": 12.32061603080154, |
| "grad_norm": 0.0004726126790046692, |
| "learning_rate": 9.445951132063502e-07, |
| "loss": 0.0003, |
| "step": 8800 |
| }, |
| { |
| "epoch": 12.334616730836542, |
| "grad_norm": 0.00167147780302912, |
| "learning_rate": 9.350844361913109e-07, |
| "loss": 0.0, |
| "step": 8810 |
| }, |
| { |
| "epoch": 12.348617430871544, |
| "grad_norm": 0.0014586917823180556, |
| "learning_rate": 9.256169396029374e-07, |
| "loss": 0.0001, |
| "step": 8820 |
| }, |
| { |
| "epoch": 12.362618130906545, |
| "grad_norm": 0.01397186890244484, |
| "learning_rate": 9.161927240117174e-07, |
| "loss": 0.0, |
| "step": 8830 |
| }, |
| { |
| "epoch": 12.376618830941547, |
| "grad_norm": 0.07062353193759918, |
| "learning_rate": 9.068118895283762e-07, |
| "loss": 0.0, |
| "step": 8840 |
| }, |
| { |
| "epoch": 12.390619530976549, |
| "grad_norm": 7.136345811886713e-05, |
| "learning_rate": 8.974745358028181e-07, |
| "loss": 0.0, |
| "step": 8850 |
| }, |
| { |
| "epoch": 12.40462023101155, |
| "grad_norm": 0.010756843723356724, |
| "learning_rate": 8.881807620230592e-07, |
| "loss": 0.0, |
| "step": 8860 |
| }, |
| { |
| "epoch": 12.418620931046553, |
| "grad_norm": 0.0017550095217302442, |
| "learning_rate": 8.789306669141795e-07, |
| "loss": 0.0, |
| "step": 8870 |
| }, |
| { |
| "epoch": 12.432621631081554, |
| "grad_norm": 6.513830661773682, |
| "learning_rate": 8.697243487372758e-07, |
| "loss": 0.0009, |
| "step": 8880 |
| }, |
| { |
| "epoch": 12.446622331116556, |
| "grad_norm": 0.014165784232318401, |
| "learning_rate": 8.605619052884106e-07, |
| "loss": 0.0, |
| "step": 8890 |
| }, |
| { |
| "epoch": 12.460623031151558, |
| "grad_norm": 0.00038699989090673625, |
| "learning_rate": 8.514434338975836e-07, |
| "loss": 0.0, |
| "step": 8900 |
| }, |
| { |
| "epoch": 12.47462373118656, |
| "grad_norm": 0.6719708442687988, |
| "learning_rate": 8.423690314276872e-07, |
| "loss": 0.0001, |
| "step": 8910 |
| }, |
| { |
| "epoch": 12.488624431221561, |
| "grad_norm": 0.0005971363862045109, |
| "learning_rate": 8.333387942734822e-07, |
| "loss": 0.0001, |
| "step": 8920 |
| }, |
| { |
| "epoch": 12.502625131256563, |
| "grad_norm": 0.03569694980978966, |
| "learning_rate": 8.243528183605782e-07, |
| "loss": 0.0, |
| "step": 8930 |
| }, |
| { |
| "epoch": 12.516625831291565, |
| "grad_norm": 0.0026051411405205727, |
| "learning_rate": 8.154111991444075e-07, |
| "loss": 0.0001, |
| "step": 8940 |
| }, |
| { |
| "epoch": 12.530626531326567, |
| "grad_norm": 0.00022519452613778412, |
| "learning_rate": 8.065140316092135e-07, |
| "loss": 0.0, |
| "step": 8950 |
| }, |
| { |
| "epoch": 12.544627231361568, |
| "grad_norm": 0.27866029739379883, |
| "learning_rate": 7.976614102670422e-07, |
| "loss": 0.0, |
| "step": 8960 |
| }, |
| { |
| "epoch": 12.55862793139657, |
| "grad_norm": 0.00033088220516219735, |
| "learning_rate": 7.888534291567401e-07, |
| "loss": 0.0, |
| "step": 8970 |
| }, |
| { |
| "epoch": 12.572628631431572, |
| "grad_norm": 0.0004716921248473227, |
| "learning_rate": 7.800901818429551e-07, |
| "loss": 0.0168, |
| "step": 8980 |
| }, |
| { |
| "epoch": 12.586629331466574, |
| "grad_norm": 8.946736488724127e-05, |
| "learning_rate": 7.713717614151334e-07, |
| "loss": 0.0, |
| "step": 8990 |
| }, |
| { |
| "epoch": 12.600630031501575, |
| "grad_norm": 0.0020332683343440294, |
| "learning_rate": 7.626982604865457e-07, |
| "loss": 0.0167, |
| "step": 9000 |
| }, |
| { |
| "epoch": 12.614630731536577, |
| "grad_norm": 0.0020869935397058725, |
| "learning_rate": 7.540697711932915e-07, |
| "loss": 0.0001, |
| "step": 9010 |
| }, |
| { |
| "epoch": 12.628631431571579, |
| "grad_norm": 0.051655784249305725, |
| "learning_rate": 7.45486385193328e-07, |
| "loss": 0.0, |
| "step": 9020 |
| }, |
| { |
| "epoch": 12.64263213160658, |
| "grad_norm": 0.0015947711654007435, |
| "learning_rate": 7.369481936654899e-07, |
| "loss": 0.0, |
| "step": 9030 |
| }, |
| { |
| "epoch": 12.656632831641582, |
| "grad_norm": 0.0003485260531306267, |
| "learning_rate": 7.284552873085249e-07, |
| "loss": 0.0004, |
| "step": 9040 |
| }, |
| { |
| "epoch": 12.670633531676584, |
| "grad_norm": 0.000255667808232829, |
| "learning_rate": 7.200077563401314e-07, |
| "loss": 0.0, |
| "step": 9050 |
| }, |
| { |
| "epoch": 12.684634231711586, |
| "grad_norm": 0.003299176227301359, |
| "learning_rate": 7.116056904959961e-07, |
| "loss": 0.0236, |
| "step": 9060 |
| }, |
| { |
| "epoch": 12.698634931746588, |
| "grad_norm": 0.0074769738130271435, |
| "learning_rate": 7.032491790288415e-07, |
| "loss": 0.0003, |
| "step": 9070 |
| }, |
| { |
| "epoch": 12.71263563178159, |
| "grad_norm": 0.0013627687003463507, |
| "learning_rate": 6.949383107074836e-07, |
| "loss": 0.0, |
| "step": 9080 |
| }, |
| { |
| "epoch": 12.726636331816591, |
| "grad_norm": 0.30319395661354065, |
| "learning_rate": 6.866731738158794e-07, |
| "loss": 0.0001, |
| "step": 9090 |
| }, |
| { |
| "epoch": 12.740637031851593, |
| "grad_norm": 0.0021405743900686502, |
| "learning_rate": 6.784538561521986e-07, |
| "loss": 0.0, |
| "step": 9100 |
| }, |
| { |
| "epoch": 12.754637731886595, |
| "grad_norm": 0.6895262598991394, |
| "learning_rate": 6.702804450278832e-07, |
| "loss": 0.0001, |
| "step": 9110 |
| }, |
| { |
| "epoch": 12.768638431921596, |
| "grad_norm": 0.0008189015206880867, |
| "learning_rate": 6.621530272667237e-07, |
| "loss": 0.0244, |
| "step": 9120 |
| }, |
| { |
| "epoch": 12.782639131956598, |
| "grad_norm": 0.0019172707106918097, |
| "learning_rate": 6.540716892039361e-07, |
| "loss": 0.0234, |
| "step": 9130 |
| }, |
| { |
| "epoch": 12.7966398319916, |
| "grad_norm": 0.0005183944012969732, |
| "learning_rate": 6.460365166852483e-07, |
| "loss": 0.0, |
| "step": 9140 |
| }, |
| { |
| "epoch": 12.810640532026602, |
| "grad_norm": 0.0005380522343330085, |
| "learning_rate": 6.380475950659815e-07, |
| "loss": 0.0, |
| "step": 9150 |
| }, |
| { |
| "epoch": 12.824641232061603, |
| "grad_norm": 0.00026877986965700984, |
| "learning_rate": 6.301050092101463e-07, |
| "loss": 0.0, |
| "step": 9160 |
| }, |
| { |
| "epoch": 12.838641932096605, |
| "grad_norm": 0.0017663196194916964, |
| "learning_rate": 6.222088434895462e-07, |
| "loss": 0.0, |
| "step": 9170 |
| }, |
| { |
| "epoch": 12.852642632131607, |
| "grad_norm": 0.0057665687054395676, |
| "learning_rate": 6.143591817828731e-07, |
| "loss": 0.0, |
| "step": 9180 |
| }, |
| { |
| "epoch": 12.866643332166609, |
| "grad_norm": 0.0044043478555977345, |
| "learning_rate": 6.065561074748194e-07, |
| "loss": 0.0001, |
| "step": 9190 |
| }, |
| { |
| "epoch": 12.88064403220161, |
| "grad_norm": 0.0019976862240582705, |
| "learning_rate": 5.987997034551984e-07, |
| "loss": 0.0001, |
| "step": 9200 |
| }, |
| { |
| "epoch": 12.894644732236612, |
| "grad_norm": 0.0017344317166134715, |
| "learning_rate": 5.910900521180518e-07, |
| "loss": 0.0001, |
| "step": 9210 |
| }, |
| { |
| "epoch": 12.908645432271614, |
| "grad_norm": 0.0007492689182981849, |
| "learning_rate": 5.834272353607867e-07, |
| "loss": 0.0, |
| "step": 9220 |
| }, |
| { |
| "epoch": 12.922646132306616, |
| "grad_norm": 0.0011607420165091753, |
| "learning_rate": 5.758113345832956e-07, |
| "loss": 0.0, |
| "step": 9230 |
| }, |
| { |
| "epoch": 12.936646832341617, |
| "grad_norm": 0.00039469017065130174, |
| "learning_rate": 5.682424306870977e-07, |
| "loss": 0.0, |
| "step": 9240 |
| }, |
| { |
| "epoch": 12.95064753237662, |
| "grad_norm": 0.002621919382363558, |
| "learning_rate": 5.607206040744789e-07, |
| "loss": 0.0003, |
| "step": 9250 |
| }, |
| { |
| "epoch": 12.964648232411621, |
| "grad_norm": 0.002265157410874963, |
| "learning_rate": 5.53245934647636e-07, |
| "loss": 0.0015, |
| "step": 9260 |
| }, |
| { |
| "epoch": 12.978648932446623, |
| "grad_norm": 0.1659821718931198, |
| "learning_rate": 5.458185018078277e-07, |
| "loss": 0.0, |
| "step": 9270 |
| }, |
| { |
| "epoch": 12.992649632481625, |
| "grad_norm": 5.271564960479736, |
| "learning_rate": 5.384383844545321e-07, |
| "loss": 0.0006, |
| "step": 9280 |
| }, |
| { |
| "epoch": 12.999649982499125, |
| "eval_f1": 0.8328847781417223, |
| "eval_loss": 0.8088664412498474, |
| "eval_precision": 0.8362640573272073, |
| "eval_recall": 0.8327501749475158, |
| "eval_runtime": 85.0045, |
| "eval_samples_per_second": 16.811, |
| "eval_steps_per_second": 8.411, |
| "step": 9285 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 10710, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 15, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.852337585794294e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|