{ "best_global_step": 7998, "best_metric": 1.0033386945724487, "best_model_checkpoint": "./../../../models/LedgerBERT-SciBERT-base-v3-News-Class/2025-10-15_00-24-07/market_direction/checkpoint-7998", "epoch": 3.0, "eval_steps": 500, "global_step": 7998, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_accuracy": 0.37046413502109704, "eval_f1_macro": 0.31648220525898246, "eval_f1_weighted": 0.3428571794493407, "eval_loss": 1.0956553220748901, "eval_precision_macro": 0.3404493817232522, "eval_precision_weighted": 0.3541600044961222, "eval_recall_macro": 0.3400488233349732, "eval_recall_weighted": 0.37046413502109704, "eval_runtime": 5.1696, "eval_samples_per_second": 458.454, "eval_steps_per_second": 57.452, "step": 0 }, { "epoch": 0.00037509377344336085, "grad_norm": 6.950562000274658, "learning_rate": 0.0, "loss": 1.0664, "step": 1 }, { "epoch": 0.0037509377344336083, "grad_norm": 7.2157673835754395, "learning_rate": 3.6e-07, "loss": 1.1053, "step": 10 }, { "epoch": 0.007501875468867217, "grad_norm": 7.101637840270996, "learning_rate": 7.6e-07, "loss": 1.081, "step": 20 }, { "epoch": 0.011252813203300824, "grad_norm": 7.503627777099609, "learning_rate": 1.1600000000000001e-06, "loss": 1.1167, "step": 30 }, { "epoch": 0.015003750937734433, "grad_norm": 6.733654975891113, "learning_rate": 1.56e-06, "loss": 1.1038, "step": 40 }, { "epoch": 0.018754688672168042, "grad_norm": 6.987666130065918, "learning_rate": 1.9600000000000003e-06, "loss": 1.1025, "step": 50 }, { "epoch": 0.02250562640660165, "grad_norm": 9.358382225036621, "learning_rate": 2.3600000000000003e-06, "loss": 1.1166, "step": 60 }, { "epoch": 0.02625656414103526, "grad_norm": 6.5409040451049805, "learning_rate": 2.7600000000000003e-06, "loss": 1.0841, "step": 70 }, { "epoch": 0.030007501875468866, "grad_norm": 7.030813217163086, "learning_rate": 3.1600000000000002e-06, "loss": 1.0583, "step": 80 }, { "epoch": 0.03375843960990248, "grad_norm": 6.986401081085205, "learning_rate": 3.5600000000000002e-06, "loss": 1.1288, "step": 90 }, { "epoch": 0.037509377344336084, "grad_norm": 5.53237247467041, "learning_rate": 3.96e-06, "loss": 1.0573, "step": 100 }, { "epoch": 0.04126031507876969, "grad_norm": 14.836161613464355, "learning_rate": 4.360000000000001e-06, "loss": 1.0754, "step": 110 }, { "epoch": 0.0450112528132033, "grad_norm": 8.877525329589844, "learning_rate": 4.76e-06, "loss": 1.0753, "step": 120 }, { "epoch": 0.04876219054763691, "grad_norm": 10.311164855957031, "learning_rate": 5.1600000000000006e-06, "loss": 1.1246, "step": 130 }, { "epoch": 0.05251312828207052, "grad_norm": 5.360109329223633, "learning_rate": 5.560000000000001e-06, "loss": 1.0218, "step": 140 }, { "epoch": 0.056264066016504126, "grad_norm": 10.249133110046387, "learning_rate": 5.9600000000000005e-06, "loss": 1.0723, "step": 150 }, { "epoch": 0.06001500375093773, "grad_norm": 7.881443500518799, "learning_rate": 6.360000000000001e-06, "loss": 1.0727, "step": 160 }, { "epoch": 0.06376594148537135, "grad_norm": 5.892578601837158, "learning_rate": 6.760000000000001e-06, "loss": 1.0498, "step": 170 }, { "epoch": 0.06751687921980495, "grad_norm": 6.164844512939453, "learning_rate": 7.16e-06, "loss": 1.1182, "step": 180 }, { "epoch": 0.07126781695423856, "grad_norm": 6.351868629455566, "learning_rate": 7.5600000000000005e-06, "loss": 1.0735, "step": 190 }, { "epoch": 0.07501875468867217, "grad_norm": 6.895458698272705, "learning_rate": 7.960000000000002e-06, "loss": 1.0503, "step": 200 }, { "epoch": 0.07876969242310577, "grad_norm": 8.486842155456543, "learning_rate": 8.36e-06, "loss": 1.0965, "step": 210 }, { "epoch": 0.08252063015753938, "grad_norm": 8.301511764526367, "learning_rate": 8.76e-06, "loss": 1.1157, "step": 220 }, { "epoch": 0.08627156789197299, "grad_norm": 11.515487670898438, "learning_rate": 9.16e-06, "loss": 1.0854, "step": 230 }, { "epoch": 0.0900225056264066, "grad_norm": 6.189631938934326, "learning_rate": 9.56e-06, "loss": 1.054, "step": 240 }, { "epoch": 0.09377344336084022, "grad_norm": 4.8885393142700195, "learning_rate": 9.960000000000001e-06, "loss": 1.0693, "step": 250 }, { "epoch": 0.09752438109527382, "grad_norm": 6.190073490142822, "learning_rate": 1.036e-05, "loss": 1.0786, "step": 260 }, { "epoch": 0.10127531882970743, "grad_norm": 8.178174018859863, "learning_rate": 1.0760000000000002e-05, "loss": 1.0374, "step": 270 }, { "epoch": 0.10502625656414104, "grad_norm": 5.824592113494873, "learning_rate": 1.1160000000000002e-05, "loss": 1.0829, "step": 280 }, { "epoch": 0.10877719429857464, "grad_norm": 7.339807033538818, "learning_rate": 1.156e-05, "loss": 1.1085, "step": 290 }, { "epoch": 0.11252813203300825, "grad_norm": 6.39154577255249, "learning_rate": 1.196e-05, "loss": 1.0505, "step": 300 }, { "epoch": 0.11627906976744186, "grad_norm": 7.54710054397583, "learning_rate": 1.236e-05, "loss": 1.049, "step": 310 }, { "epoch": 0.12003000750187547, "grad_norm": 10.610452651977539, "learning_rate": 1.2760000000000001e-05, "loss": 1.1105, "step": 320 }, { "epoch": 0.12378094523630907, "grad_norm": 6.961548328399658, "learning_rate": 1.3160000000000001e-05, "loss": 1.0392, "step": 330 }, { "epoch": 0.1275318829707427, "grad_norm": 8.800139427185059, "learning_rate": 1.3560000000000002e-05, "loss": 1.1473, "step": 340 }, { "epoch": 0.1312828207051763, "grad_norm": 7.540011405944824, "learning_rate": 1.396e-05, "loss": 1.0891, "step": 350 }, { "epoch": 0.1350337584396099, "grad_norm": 11.337075233459473, "learning_rate": 1.4360000000000001e-05, "loss": 1.0715, "step": 360 }, { "epoch": 0.13878469617404351, "grad_norm": 5.6576457023620605, "learning_rate": 1.4760000000000001e-05, "loss": 1.0702, "step": 370 }, { "epoch": 0.14253563390847712, "grad_norm": 8.98009967803955, "learning_rate": 1.516e-05, "loss": 1.0752, "step": 380 }, { "epoch": 0.14628657164291073, "grad_norm": 4.932474613189697, "learning_rate": 1.556e-05, "loss": 1.0641, "step": 390 }, { "epoch": 0.15003750937734434, "grad_norm": 6.130215644836426, "learning_rate": 1.5960000000000003e-05, "loss": 1.0133, "step": 400 }, { "epoch": 0.15378844711177794, "grad_norm": 16.0273380279541, "learning_rate": 1.636e-05, "loss": 1.0442, "step": 410 }, { "epoch": 0.15753938484621155, "grad_norm": 12.93301010131836, "learning_rate": 1.6760000000000002e-05, "loss": 1.1161, "step": 420 }, { "epoch": 0.16129032258064516, "grad_norm": 9.27346420288086, "learning_rate": 1.7160000000000002e-05, "loss": 1.0539, "step": 430 }, { "epoch": 0.16504126031507876, "grad_norm": 5.5671186447143555, "learning_rate": 1.756e-05, "loss": 0.9452, "step": 440 }, { "epoch": 0.16879219804951237, "grad_norm": 7.939000606536865, "learning_rate": 1.796e-05, "loss": 1.0522, "step": 450 }, { "epoch": 0.17254313578394598, "grad_norm": 9.265899658203125, "learning_rate": 1.8360000000000004e-05, "loss": 1.0866, "step": 460 }, { "epoch": 0.17629407351837958, "grad_norm": 6.934913158416748, "learning_rate": 1.876e-05, "loss": 0.9723, "step": 470 }, { "epoch": 0.1800450112528132, "grad_norm": 6.007977485656738, "learning_rate": 1.916e-05, "loss": 0.9742, "step": 480 }, { "epoch": 0.1837959489872468, "grad_norm": 7.842029094696045, "learning_rate": 1.9560000000000002e-05, "loss": 0.9334, "step": 490 }, { "epoch": 0.18754688672168043, "grad_norm": 10.53432559967041, "learning_rate": 1.9960000000000002e-05, "loss": 0.8706, "step": 500 }, { "epoch": 0.19129782445611404, "grad_norm": 9.365771293640137, "learning_rate": 1.997599359829288e-05, "loss": 1.0399, "step": 510 }, { "epoch": 0.19504876219054765, "grad_norm": 9.351228713989258, "learning_rate": 1.99493198186183e-05, "loss": 0.9525, "step": 520 }, { "epoch": 0.19879969992498125, "grad_norm": 12.21917724609375, "learning_rate": 1.992264603894372e-05, "loss": 0.9793, "step": 530 }, { "epoch": 0.20255063765941486, "grad_norm": 17.076719284057617, "learning_rate": 1.9895972259269142e-05, "loss": 1.0403, "step": 540 }, { "epoch": 0.20630157539384847, "grad_norm": 6.928652286529541, "learning_rate": 1.9869298479594562e-05, "loss": 0.9047, "step": 550 }, { "epoch": 0.21005251312828208, "grad_norm": 6.858879089355469, "learning_rate": 1.984262469991998e-05, "loss": 1.012, "step": 560 }, { "epoch": 0.21380345086271568, "grad_norm": 5.987520217895508, "learning_rate": 1.98159509202454e-05, "loss": 0.9345, "step": 570 }, { "epoch": 0.2175543885971493, "grad_norm": 12.161517143249512, "learning_rate": 1.978927714057082e-05, "loss": 0.9955, "step": 580 }, { "epoch": 0.2213053263315829, "grad_norm": 9.229764938354492, "learning_rate": 1.976260336089624e-05, "loss": 0.998, "step": 590 }, { "epoch": 0.2250562640660165, "grad_norm": 9.257465362548828, "learning_rate": 1.973592958122166e-05, "loss": 0.9882, "step": 600 }, { "epoch": 0.2288072018004501, "grad_norm": 11.260259628295898, "learning_rate": 1.970925580154708e-05, "loss": 0.9727, "step": 610 }, { "epoch": 0.23255813953488372, "grad_norm": 5.7551984786987305, "learning_rate": 1.96825820218725e-05, "loss": 0.9139, "step": 620 }, { "epoch": 0.23630907726931732, "grad_norm": 7.264505863189697, "learning_rate": 1.9655908242197922e-05, "loss": 0.8718, "step": 630 }, { "epoch": 0.24006001500375093, "grad_norm": 13.518917083740234, "learning_rate": 1.9629234462523342e-05, "loss": 1.0478, "step": 640 }, { "epoch": 0.24381095273818454, "grad_norm": 7.133944034576416, "learning_rate": 1.960256068284876e-05, "loss": 0.951, "step": 650 }, { "epoch": 0.24756189047261815, "grad_norm": 10.491629600524902, "learning_rate": 1.957588690317418e-05, "loss": 0.9271, "step": 660 }, { "epoch": 0.25131282820705175, "grad_norm": 6.807431697845459, "learning_rate": 1.95492131234996e-05, "loss": 1.0804, "step": 670 }, { "epoch": 0.2550637659414854, "grad_norm": 9.180730819702148, "learning_rate": 1.9522539343825024e-05, "loss": 0.9079, "step": 680 }, { "epoch": 0.25881470367591897, "grad_norm": 6.459209442138672, "learning_rate": 1.9495865564150443e-05, "loss": 0.9989, "step": 690 }, { "epoch": 0.2625656414103526, "grad_norm": 5.8546929359436035, "learning_rate": 1.9469191784475863e-05, "loss": 0.951, "step": 700 }, { "epoch": 0.2663165791447862, "grad_norm": 10.301909446716309, "learning_rate": 1.9442518004801282e-05, "loss": 0.8549, "step": 710 }, { "epoch": 0.2700675168792198, "grad_norm": 17.759777069091797, "learning_rate": 1.9415844225126702e-05, "loss": 1.1818, "step": 720 }, { "epoch": 0.2738184546136534, "grad_norm": 7.105804920196533, "learning_rate": 1.938917044545212e-05, "loss": 1.034, "step": 730 }, { "epoch": 0.27756939234808703, "grad_norm": 8.125602722167969, "learning_rate": 1.936249666577754e-05, "loss": 0.9509, "step": 740 }, { "epoch": 0.2813203300825206, "grad_norm": 6.968907833099365, "learning_rate": 1.933582288610296e-05, "loss": 0.9292, "step": 750 }, { "epoch": 0.28507126781695424, "grad_norm": 9.841052055358887, "learning_rate": 1.930914910642838e-05, "loss": 1.0401, "step": 760 }, { "epoch": 0.2888222055513878, "grad_norm": 6.7177910804748535, "learning_rate": 1.9282475326753804e-05, "loss": 1.0079, "step": 770 }, { "epoch": 0.29257314328582146, "grad_norm": 8.652711868286133, "learning_rate": 1.9255801547079223e-05, "loss": 0.8986, "step": 780 }, { "epoch": 0.29632408102025504, "grad_norm": 7.266161918640137, "learning_rate": 1.9229127767404643e-05, "loss": 0.9805, "step": 790 }, { "epoch": 0.30007501875468867, "grad_norm": 7.372107982635498, "learning_rate": 1.9202453987730062e-05, "loss": 1.0254, "step": 800 }, { "epoch": 0.3038259564891223, "grad_norm": 6.467881202697754, "learning_rate": 1.9175780208055482e-05, "loss": 0.9931, "step": 810 }, { "epoch": 0.3075768942235559, "grad_norm": 8.692418098449707, "learning_rate": 1.9149106428380905e-05, "loss": 0.8585, "step": 820 }, { "epoch": 0.3113278319579895, "grad_norm": 7.981175422668457, "learning_rate": 1.9122432648706325e-05, "loss": 0.9164, "step": 830 }, { "epoch": 0.3150787696924231, "grad_norm": 11.882697105407715, "learning_rate": 1.9095758869031744e-05, "loss": 1.0325, "step": 840 }, { "epoch": 0.31882970742685673, "grad_norm": 10.736306190490723, "learning_rate": 1.9069085089357164e-05, "loss": 0.9888, "step": 850 }, { "epoch": 0.3225806451612903, "grad_norm": 5.334744453430176, "learning_rate": 1.9042411309682583e-05, "loss": 0.9364, "step": 860 }, { "epoch": 0.32633158289572395, "grad_norm": 6.579550743103027, "learning_rate": 1.9015737530008003e-05, "loss": 0.9395, "step": 870 }, { "epoch": 0.3300825206301575, "grad_norm": 7.336994171142578, "learning_rate": 1.8989063750333423e-05, "loss": 0.9363, "step": 880 }, { "epoch": 0.33383345836459116, "grad_norm": 9.523600578308105, "learning_rate": 1.8962389970658842e-05, "loss": 0.9405, "step": 890 }, { "epoch": 0.33758439609902474, "grad_norm": 9.350625038146973, "learning_rate": 1.8935716190984262e-05, "loss": 1.0351, "step": 900 }, { "epoch": 0.3413353338334584, "grad_norm": 9.00391674041748, "learning_rate": 1.8909042411309685e-05, "loss": 0.9721, "step": 910 }, { "epoch": 0.34508627156789196, "grad_norm": 5.69331693649292, "learning_rate": 1.8882368631635105e-05, "loss": 0.8811, "step": 920 }, { "epoch": 0.3488372093023256, "grad_norm": 6.127689361572266, "learning_rate": 1.8855694851960524e-05, "loss": 1.0079, "step": 930 }, { "epoch": 0.35258814703675917, "grad_norm": 11.653777122497559, "learning_rate": 1.8829021072285944e-05, "loss": 0.9518, "step": 940 }, { "epoch": 0.3563390847711928, "grad_norm": 7.30828332901001, "learning_rate": 1.8802347292611363e-05, "loss": 0.8464, "step": 950 }, { "epoch": 0.3600900225056264, "grad_norm": 9.21927547454834, "learning_rate": 1.8775673512936786e-05, "loss": 1.0584, "step": 960 }, { "epoch": 0.36384096024006, "grad_norm": 6.939789772033691, "learning_rate": 1.8748999733262206e-05, "loss": 0.9, "step": 970 }, { "epoch": 0.3675918979744936, "grad_norm": 12.434165954589844, "learning_rate": 1.8722325953587626e-05, "loss": 1.015, "step": 980 }, { "epoch": 0.37134283570892723, "grad_norm": 11.779828071594238, "learning_rate": 1.8695652173913045e-05, "loss": 0.9725, "step": 990 }, { "epoch": 0.37509377344336087, "grad_norm": 12.166790962219238, "learning_rate": 1.8668978394238465e-05, "loss": 1.0591, "step": 1000 }, { "epoch": 0.37884471117779445, "grad_norm": 8.87903881072998, "learning_rate": 1.8642304614563884e-05, "loss": 0.9767, "step": 1010 }, { "epoch": 0.3825956489122281, "grad_norm": 5.176930904388428, "learning_rate": 1.8615630834889304e-05, "loss": 0.8934, "step": 1020 }, { "epoch": 0.38634658664666166, "grad_norm": 7.772132396697998, "learning_rate": 1.8588957055214724e-05, "loss": 0.9488, "step": 1030 }, { "epoch": 0.3900975243810953, "grad_norm": 10.097055435180664, "learning_rate": 1.8562283275540143e-05, "loss": 0.9725, "step": 1040 }, { "epoch": 0.3938484621155289, "grad_norm": 10.014994621276855, "learning_rate": 1.8535609495865566e-05, "loss": 0.9432, "step": 1050 }, { "epoch": 0.3975993998499625, "grad_norm": 10.885961532592773, "learning_rate": 1.8508935716190986e-05, "loss": 1.0393, "step": 1060 }, { "epoch": 0.4013503375843961, "grad_norm": 7.621641635894775, "learning_rate": 1.8482261936516406e-05, "loss": 0.9801, "step": 1070 }, { "epoch": 0.4051012753188297, "grad_norm": 6.268519878387451, "learning_rate": 1.8455588156841825e-05, "loss": 0.9922, "step": 1080 }, { "epoch": 0.4088522130532633, "grad_norm": 6.714245796203613, "learning_rate": 1.8428914377167245e-05, "loss": 1.0355, "step": 1090 }, { "epoch": 0.41260315078769694, "grad_norm": 11.643074035644531, "learning_rate": 1.8402240597492668e-05, "loss": 1.0575, "step": 1100 }, { "epoch": 0.4163540885221305, "grad_norm": 6.439828395843506, "learning_rate": 1.8375566817818087e-05, "loss": 0.9101, "step": 1110 }, { "epoch": 0.42010502625656415, "grad_norm": 6.833279609680176, "learning_rate": 1.8348893038143507e-05, "loss": 0.935, "step": 1120 }, { "epoch": 0.42385596399099773, "grad_norm": 7.262381553649902, "learning_rate": 1.8322219258468927e-05, "loss": 0.977, "step": 1130 }, { "epoch": 0.42760690172543137, "grad_norm": 5.480360984802246, "learning_rate": 1.8295545478794346e-05, "loss": 0.8673, "step": 1140 }, { "epoch": 0.43135783945986494, "grad_norm": 8.4745454788208, "learning_rate": 1.8268871699119766e-05, "loss": 0.88, "step": 1150 }, { "epoch": 0.4351087771942986, "grad_norm": 16.769878387451172, "learning_rate": 1.8242197919445185e-05, "loss": 0.9576, "step": 1160 }, { "epoch": 0.43885971492873216, "grad_norm": 7.4179582595825195, "learning_rate": 1.8215524139770605e-05, "loss": 0.9263, "step": 1170 }, { "epoch": 0.4426106526631658, "grad_norm": 11.899470329284668, "learning_rate": 1.8188850360096028e-05, "loss": 0.9328, "step": 1180 }, { "epoch": 0.4463615903975994, "grad_norm": 8.113855361938477, "learning_rate": 1.8162176580421448e-05, "loss": 0.9684, "step": 1190 }, { "epoch": 0.450112528132033, "grad_norm": 7.619154453277588, "learning_rate": 1.8135502800746867e-05, "loss": 0.918, "step": 1200 }, { "epoch": 0.45386346586646664, "grad_norm": 7.7961602210998535, "learning_rate": 1.8108829021072287e-05, "loss": 0.8574, "step": 1210 }, { "epoch": 0.4576144036009002, "grad_norm": 8.734787940979004, "learning_rate": 1.8082155241397707e-05, "loss": 0.9009, "step": 1220 }, { "epoch": 0.46136534133533386, "grad_norm": 5.773232936859131, "learning_rate": 1.8055481461723126e-05, "loss": 1.0554, "step": 1230 }, { "epoch": 0.46511627906976744, "grad_norm": 7.872585773468018, "learning_rate": 1.802880768204855e-05, "loss": 0.8688, "step": 1240 }, { "epoch": 0.46886721680420107, "grad_norm": 7.2498602867126465, "learning_rate": 1.800213390237397e-05, "loss": 0.9726, "step": 1250 }, { "epoch": 0.47261815453863465, "grad_norm": 11.007004737854004, "learning_rate": 1.797546012269939e-05, "loss": 0.9338, "step": 1260 }, { "epoch": 0.4763690922730683, "grad_norm": 10.418313980102539, "learning_rate": 1.7948786343024808e-05, "loss": 0.9217, "step": 1270 }, { "epoch": 0.48012003000750186, "grad_norm": 11.935880661010742, "learning_rate": 1.7922112563350228e-05, "loss": 0.8656, "step": 1280 }, { "epoch": 0.4838709677419355, "grad_norm": 10.331807136535645, "learning_rate": 1.789543878367565e-05, "loss": 0.9948, "step": 1290 }, { "epoch": 0.4876219054763691, "grad_norm": 7.979977607727051, "learning_rate": 1.7868765004001067e-05, "loss": 0.9068, "step": 1300 }, { "epoch": 0.4913728432108027, "grad_norm": 7.865904808044434, "learning_rate": 1.7842091224326486e-05, "loss": 0.8362, "step": 1310 }, { "epoch": 0.4951237809452363, "grad_norm": 11.6406888961792, "learning_rate": 1.781541744465191e-05, "loss": 1.0061, "step": 1320 }, { "epoch": 0.4988747186796699, "grad_norm": 9.274069786071777, "learning_rate": 1.778874366497733e-05, "loss": 0.9448, "step": 1330 }, { "epoch": 0.5026256564141035, "grad_norm": 9.999556541442871, "learning_rate": 1.776206988530275e-05, "loss": 0.9188, "step": 1340 }, { "epoch": 0.5063765941485371, "grad_norm": 10.032958984375, "learning_rate": 1.773539610562817e-05, "loss": 0.9794, "step": 1350 }, { "epoch": 0.5101275318829708, "grad_norm": 5.453114032745361, "learning_rate": 1.7708722325953588e-05, "loss": 1.0102, "step": 1360 }, { "epoch": 0.5138784696174044, "grad_norm": 13.257373809814453, "learning_rate": 1.7682048546279008e-05, "loss": 0.9801, "step": 1370 }, { "epoch": 0.5176294073518379, "grad_norm": 5.355706691741943, "learning_rate": 1.765537476660443e-05, "loss": 0.9126, "step": 1380 }, { "epoch": 0.5213803450862715, "grad_norm": 9.768399238586426, "learning_rate": 1.762870098692985e-05, "loss": 0.9423, "step": 1390 }, { "epoch": 0.5251312828207052, "grad_norm": 8.362143516540527, "learning_rate": 1.760202720725527e-05, "loss": 1.0289, "step": 1400 }, { "epoch": 0.5288822205551388, "grad_norm": 10.58354377746582, "learning_rate": 1.757535342758069e-05, "loss": 0.9593, "step": 1410 }, { "epoch": 0.5326331582895724, "grad_norm": 8.964977264404297, "learning_rate": 1.754867964790611e-05, "loss": 1.1002, "step": 1420 }, { "epoch": 0.536384096024006, "grad_norm": 11.886764526367188, "learning_rate": 1.7522005868231532e-05, "loss": 0.842, "step": 1430 }, { "epoch": 0.5401350337584396, "grad_norm": 9.155001640319824, "learning_rate": 1.7495332088556948e-05, "loss": 1.0402, "step": 1440 }, { "epoch": 0.5438859714928732, "grad_norm": 7.865649223327637, "learning_rate": 1.7468658308882368e-05, "loss": 0.9502, "step": 1450 }, { "epoch": 0.5476369092273068, "grad_norm": 8.232137680053711, "learning_rate": 1.744198452920779e-05, "loss": 0.9042, "step": 1460 }, { "epoch": 0.5513878469617405, "grad_norm": 7.428460597991943, "learning_rate": 1.741531074953321e-05, "loss": 0.8664, "step": 1470 }, { "epoch": 0.5551387846961741, "grad_norm": 6.769949913024902, "learning_rate": 1.738863696985863e-05, "loss": 0.9676, "step": 1480 }, { "epoch": 0.5588897224306076, "grad_norm": 7.262323379516602, "learning_rate": 1.736196319018405e-05, "loss": 0.9461, "step": 1490 }, { "epoch": 0.5626406601650412, "grad_norm": 7.46332311630249, "learning_rate": 1.733528941050947e-05, "loss": 0.9928, "step": 1500 }, { "epoch": 0.5663915978994749, "grad_norm": 13.346348762512207, "learning_rate": 1.7308615630834892e-05, "loss": 0.9645, "step": 1510 }, { "epoch": 0.5701425356339085, "grad_norm": 7.057946681976318, "learning_rate": 1.7281941851160312e-05, "loss": 0.872, "step": 1520 }, { "epoch": 0.5738934733683421, "grad_norm": 11.920793533325195, "learning_rate": 1.725526807148573e-05, "loss": 0.9084, "step": 1530 }, { "epoch": 0.5776444111027756, "grad_norm": 4.696298122406006, "learning_rate": 1.722859429181115e-05, "loss": 0.9184, "step": 1540 }, { "epoch": 0.5813953488372093, "grad_norm": 9.623963356018066, "learning_rate": 1.720192051213657e-05, "loss": 0.8924, "step": 1550 }, { "epoch": 0.5851462865716429, "grad_norm": 10.262091636657715, "learning_rate": 1.717524673246199e-05, "loss": 0.9476, "step": 1560 }, { "epoch": 0.5888972243060765, "grad_norm": 10.587578773498535, "learning_rate": 1.7148572952787413e-05, "loss": 0.9443, "step": 1570 }, { "epoch": 0.5926481620405101, "grad_norm": 8.189558029174805, "learning_rate": 1.7121899173112833e-05, "loss": 0.9245, "step": 1580 }, { "epoch": 0.5963990997749438, "grad_norm": 7.582670211791992, "learning_rate": 1.709522539343825e-05, "loss": 0.8533, "step": 1590 }, { "epoch": 0.6001500375093773, "grad_norm": 8.973713874816895, "learning_rate": 1.7068551613763672e-05, "loss": 0.9197, "step": 1600 }, { "epoch": 0.6039009752438109, "grad_norm": 7.140238285064697, "learning_rate": 1.7041877834089092e-05, "loss": 0.8815, "step": 1610 }, { "epoch": 0.6076519129782446, "grad_norm": 7.83927059173584, "learning_rate": 1.701520405441451e-05, "loss": 0.9782, "step": 1620 }, { "epoch": 0.6114028507126782, "grad_norm": 6.876523494720459, "learning_rate": 1.698853027473993e-05, "loss": 0.9575, "step": 1630 }, { "epoch": 0.6151537884471118, "grad_norm": 10.362568855285645, "learning_rate": 1.696185649506535e-05, "loss": 0.8977, "step": 1640 }, { "epoch": 0.6189047261815454, "grad_norm": 9.509383201599121, "learning_rate": 1.6935182715390774e-05, "loss": 0.996, "step": 1650 }, { "epoch": 0.622655663915979, "grad_norm": 5.023642539978027, "learning_rate": 1.6908508935716193e-05, "loss": 0.9131, "step": 1660 }, { "epoch": 0.6264066016504126, "grad_norm": 6.320276260375977, "learning_rate": 1.6881835156041613e-05, "loss": 0.9765, "step": 1670 }, { "epoch": 0.6301575393848462, "grad_norm": 10.261762619018555, "learning_rate": 1.6855161376367033e-05, "loss": 0.9057, "step": 1680 }, { "epoch": 0.6339084771192798, "grad_norm": 8.115468978881836, "learning_rate": 1.6828487596692452e-05, "loss": 0.8892, "step": 1690 }, { "epoch": 0.6376594148537135, "grad_norm": 10.657661437988281, "learning_rate": 1.6801813817017875e-05, "loss": 0.9186, "step": 1700 }, { "epoch": 0.641410352588147, "grad_norm": 7.065814018249512, "learning_rate": 1.6775140037343295e-05, "loss": 0.8878, "step": 1710 }, { "epoch": 0.6451612903225806, "grad_norm": 8.048439979553223, "learning_rate": 1.6748466257668714e-05, "loss": 0.946, "step": 1720 }, { "epoch": 0.6489122280570142, "grad_norm": 10.228202819824219, "learning_rate": 1.672179247799413e-05, "loss": 0.838, "step": 1730 }, { "epoch": 0.6526631657914479, "grad_norm": 10.011300086975098, "learning_rate": 1.6695118698319554e-05, "loss": 1.0565, "step": 1740 }, { "epoch": 0.6564141035258815, "grad_norm": 8.266985893249512, "learning_rate": 1.6668444918644973e-05, "loss": 0.9523, "step": 1750 }, { "epoch": 0.660165041260315, "grad_norm": 7.511131763458252, "learning_rate": 1.6641771138970393e-05, "loss": 1.0325, "step": 1760 }, { "epoch": 0.6639159789947486, "grad_norm": 7.235232830047607, "learning_rate": 1.6615097359295813e-05, "loss": 0.9197, "step": 1770 }, { "epoch": 0.6676669167291823, "grad_norm": 8.137916564941406, "learning_rate": 1.6588423579621232e-05, "loss": 0.886, "step": 1780 }, { "epoch": 0.6714178544636159, "grad_norm": 7.320621013641357, "learning_rate": 1.6561749799946655e-05, "loss": 0.8866, "step": 1790 }, { "epoch": 0.6751687921980495, "grad_norm": 8.104268074035645, "learning_rate": 1.6535076020272075e-05, "loss": 0.9554, "step": 1800 }, { "epoch": 0.6789197299324832, "grad_norm": 8.669350624084473, "learning_rate": 1.6508402240597494e-05, "loss": 0.907, "step": 1810 }, { "epoch": 0.6826706676669168, "grad_norm": 7.718722820281982, "learning_rate": 1.6481728460922914e-05, "loss": 0.9931, "step": 1820 }, { "epoch": 0.6864216054013503, "grad_norm": 6.479692459106445, "learning_rate": 1.6455054681248334e-05, "loss": 0.9669, "step": 1830 }, { "epoch": 0.6901725431357839, "grad_norm": 5.159636497497559, "learning_rate": 1.6428380901573757e-05, "loss": 1.0003, "step": 1840 }, { "epoch": 0.6939234808702176, "grad_norm": 6.043707847595215, "learning_rate": 1.6401707121899176e-05, "loss": 0.894, "step": 1850 }, { "epoch": 0.6976744186046512, "grad_norm": 8.509610176086426, "learning_rate": 1.6375033342224596e-05, "loss": 1.0656, "step": 1860 }, { "epoch": 0.7014253563390848, "grad_norm": 10.496292114257812, "learning_rate": 1.6348359562550015e-05, "loss": 0.9162, "step": 1870 }, { "epoch": 0.7051762940735183, "grad_norm": 9.357151985168457, "learning_rate": 1.6321685782875435e-05, "loss": 0.8575, "step": 1880 }, { "epoch": 0.708927231807952, "grad_norm": 7.78256368637085, "learning_rate": 1.6295012003200855e-05, "loss": 0.7904, "step": 1890 }, { "epoch": 0.7126781695423856, "grad_norm": 6.14832067489624, "learning_rate": 1.6268338223526274e-05, "loss": 0.8348, "step": 1900 }, { "epoch": 0.7164291072768192, "grad_norm": 7.879366874694824, "learning_rate": 1.6241664443851694e-05, "loss": 0.8826, "step": 1910 }, { "epoch": 0.7201800450112528, "grad_norm": 6.204752445220947, "learning_rate": 1.6214990664177114e-05, "loss": 0.9157, "step": 1920 }, { "epoch": 0.7239309827456865, "grad_norm": 7.274019241333008, "learning_rate": 1.6188316884502537e-05, "loss": 0.8869, "step": 1930 }, { "epoch": 0.72768192048012, "grad_norm": 5.929676055908203, "learning_rate": 1.6161643104827956e-05, "loss": 0.9372, "step": 1940 }, { "epoch": 0.7314328582145536, "grad_norm": 9.161755561828613, "learning_rate": 1.6134969325153376e-05, "loss": 0.9211, "step": 1950 }, { "epoch": 0.7351837959489872, "grad_norm": 5.079675674438477, "learning_rate": 1.6108295545478795e-05, "loss": 0.8084, "step": 1960 }, { "epoch": 0.7389347336834209, "grad_norm": 8.15173053741455, "learning_rate": 1.6081621765804215e-05, "loss": 1.0033, "step": 1970 }, { "epoch": 0.7426856714178545, "grad_norm": 6.805727005004883, "learning_rate": 1.6054947986129638e-05, "loss": 1.0074, "step": 1980 }, { "epoch": 0.746436609152288, "grad_norm": 8.05391788482666, "learning_rate": 1.6028274206455058e-05, "loss": 0.9942, "step": 1990 }, { "epoch": 0.7501875468867217, "grad_norm": 6.02817440032959, "learning_rate": 1.6001600426780477e-05, "loss": 1.0494, "step": 2000 }, { "epoch": 0.7539384846211553, "grad_norm": 9.404801368713379, "learning_rate": 1.5974926647105897e-05, "loss": 0.9451, "step": 2010 }, { "epoch": 0.7576894223555889, "grad_norm": 5.526783466339111, "learning_rate": 1.5948252867431316e-05, "loss": 0.9378, "step": 2020 }, { "epoch": 0.7614403600900225, "grad_norm": 8.972588539123535, "learning_rate": 1.5921579087756736e-05, "loss": 0.9808, "step": 2030 }, { "epoch": 0.7651912978244562, "grad_norm": 4.961981296539307, "learning_rate": 1.5894905308082156e-05, "loss": 0.9078, "step": 2040 }, { "epoch": 0.7689422355588897, "grad_norm": 3.8509440422058105, "learning_rate": 1.5868231528407575e-05, "loss": 1.0518, "step": 2050 }, { "epoch": 0.7726931732933233, "grad_norm": 7.673577785491943, "learning_rate": 1.5841557748732995e-05, "loss": 0.9075, "step": 2060 }, { "epoch": 0.7764441110277569, "grad_norm": 8.731016159057617, "learning_rate": 1.5814883969058418e-05, "loss": 0.9208, "step": 2070 }, { "epoch": 0.7801950487621906, "grad_norm": 6.979492664337158, "learning_rate": 1.5788210189383838e-05, "loss": 0.8977, "step": 2080 }, { "epoch": 0.7839459864966242, "grad_norm": 8.666240692138672, "learning_rate": 1.5761536409709257e-05, "loss": 0.899, "step": 2090 }, { "epoch": 0.7876969242310577, "grad_norm": 6.528694152832031, "learning_rate": 1.5734862630034677e-05, "loss": 0.844, "step": 2100 }, { "epoch": 0.7914478619654913, "grad_norm": 7.253232479095459, "learning_rate": 1.5708188850360096e-05, "loss": 0.7766, "step": 2110 }, { "epoch": 0.795198799699925, "grad_norm": 6.888519287109375, "learning_rate": 1.568151507068552e-05, "loss": 0.9393, "step": 2120 }, { "epoch": 0.7989497374343586, "grad_norm": 6.408233165740967, "learning_rate": 1.565484129101094e-05, "loss": 1.0171, "step": 2130 }, { "epoch": 0.8027006751687922, "grad_norm": 9.36056137084961, "learning_rate": 1.562816751133636e-05, "loss": 0.9127, "step": 2140 }, { "epoch": 0.8064516129032258, "grad_norm": 11.695134162902832, "learning_rate": 1.5601493731661778e-05, "loss": 1.0232, "step": 2150 }, { "epoch": 0.8102025506376594, "grad_norm": 6.716568470001221, "learning_rate": 1.5574819951987198e-05, "loss": 0.9904, "step": 2160 }, { "epoch": 0.813953488372093, "grad_norm": 5.994268417358398, "learning_rate": 1.5548146172312617e-05, "loss": 0.8897, "step": 2170 }, { "epoch": 0.8177044261065266, "grad_norm": 8.419204711914062, "learning_rate": 1.5521472392638037e-05, "loss": 0.8315, "step": 2180 }, { "epoch": 0.8214553638409603, "grad_norm": 6.702762603759766, "learning_rate": 1.5494798612963457e-05, "loss": 0.9393, "step": 2190 }, { "epoch": 0.8252063015753939, "grad_norm": 9.53264045715332, "learning_rate": 1.5468124833288876e-05, "loss": 1.0074, "step": 2200 }, { "epoch": 0.8289572393098275, "grad_norm": 5.6720476150512695, "learning_rate": 1.54414510536143e-05, "loss": 0.7935, "step": 2210 }, { "epoch": 0.832708177044261, "grad_norm": 7.338003158569336, "learning_rate": 1.541477727393972e-05, "loss": 0.898, "step": 2220 }, { "epoch": 0.8364591147786947, "grad_norm": 6.529892444610596, "learning_rate": 1.538810349426514e-05, "loss": 0.8197, "step": 2230 }, { "epoch": 0.8402100525131283, "grad_norm": 9.971487045288086, "learning_rate": 1.5361429714590558e-05, "loss": 0.9551, "step": 2240 }, { "epoch": 0.8439609902475619, "grad_norm": 5.594128608703613, "learning_rate": 1.5334755934915978e-05, "loss": 1.1114, "step": 2250 }, { "epoch": 0.8477119279819955, "grad_norm": 5.723794460296631, "learning_rate": 1.53080821552414e-05, "loss": 0.9341, "step": 2260 }, { "epoch": 0.8514628657164292, "grad_norm": 5.728211879730225, "learning_rate": 1.528140837556682e-05, "loss": 0.9961, "step": 2270 }, { "epoch": 0.8552138034508627, "grad_norm": 7.517919063568115, "learning_rate": 1.525473459589224e-05, "loss": 0.8542, "step": 2280 }, { "epoch": 0.8589647411852963, "grad_norm": 4.70159387588501, "learning_rate": 1.522806081621766e-05, "loss": 1.0348, "step": 2290 }, { "epoch": 0.8627156789197299, "grad_norm": 5.308437347412109, "learning_rate": 1.5201387036543081e-05, "loss": 0.9645, "step": 2300 }, { "epoch": 0.8664666166541636, "grad_norm": 5.659054756164551, "learning_rate": 1.5174713256868499e-05, "loss": 0.8317, "step": 2310 }, { "epoch": 0.8702175543885972, "grad_norm": 5.970462799072266, "learning_rate": 1.5148039477193918e-05, "loss": 0.9889, "step": 2320 }, { "epoch": 0.8739684921230307, "grad_norm": 5.605343818664551, "learning_rate": 1.512136569751934e-05, "loss": 0.8545, "step": 2330 }, { "epoch": 0.8777194298574643, "grad_norm": 9.641878128051758, "learning_rate": 1.509469191784476e-05, "loss": 1.0026, "step": 2340 }, { "epoch": 0.881470367591898, "grad_norm": 9.36474323272705, "learning_rate": 1.5068018138170179e-05, "loss": 0.927, "step": 2350 }, { "epoch": 0.8852213053263316, "grad_norm": 8.28822135925293, "learning_rate": 1.50413443584956e-05, "loss": 0.9955, "step": 2360 }, { "epoch": 0.8889722430607652, "grad_norm": 7.714781284332275, "learning_rate": 1.501467057882102e-05, "loss": 0.9366, "step": 2370 }, { "epoch": 0.8927231807951987, "grad_norm": 3.879307508468628, "learning_rate": 1.498799679914644e-05, "loss": 0.9002, "step": 2380 }, { "epoch": 0.8964741185296324, "grad_norm": 5.898133754730225, "learning_rate": 1.4961323019471861e-05, "loss": 0.8564, "step": 2390 }, { "epoch": 0.900225056264066, "grad_norm": 6.275933265686035, "learning_rate": 1.493464923979728e-05, "loss": 0.9471, "step": 2400 }, { "epoch": 0.9039759939984996, "grad_norm": 6.680263519287109, "learning_rate": 1.4907975460122702e-05, "loss": 0.8609, "step": 2410 }, { "epoch": 0.9077269317329333, "grad_norm": 7.0698676109313965, "learning_rate": 1.4881301680448121e-05, "loss": 0.7758, "step": 2420 }, { "epoch": 0.9114778694673669, "grad_norm": 10.66848373413086, "learning_rate": 1.4854627900773541e-05, "loss": 0.8225, "step": 2430 }, { "epoch": 0.9152288072018004, "grad_norm": 8.714693069458008, "learning_rate": 1.4827954121098962e-05, "loss": 0.8777, "step": 2440 }, { "epoch": 0.918979744936234, "grad_norm": 31.062232971191406, "learning_rate": 1.480128034142438e-05, "loss": 1.0204, "step": 2450 }, { "epoch": 0.9227306826706677, "grad_norm": 11.140453338623047, "learning_rate": 1.47746065617498e-05, "loss": 0.9509, "step": 2460 }, { "epoch": 0.9264816204051013, "grad_norm": 6.338695526123047, "learning_rate": 1.4747932782075221e-05, "loss": 0.8125, "step": 2470 }, { "epoch": 0.9302325581395349, "grad_norm": 8.720800399780273, "learning_rate": 1.472125900240064e-05, "loss": 0.8114, "step": 2480 }, { "epoch": 0.9339834958739685, "grad_norm": 11.407164573669434, "learning_rate": 1.469458522272606e-05, "loss": 1.0623, "step": 2490 }, { "epoch": 0.9377344336084021, "grad_norm": 6.310417652130127, "learning_rate": 1.4667911443051482e-05, "loss": 0.9014, "step": 2500 }, { "epoch": 0.9414853713428357, "grad_norm": 5.94149923324585, "learning_rate": 1.4641237663376901e-05, "loss": 0.7657, "step": 2510 }, { "epoch": 0.9452363090772693, "grad_norm": 9.478999137878418, "learning_rate": 1.4614563883702323e-05, "loss": 0.8412, "step": 2520 }, { "epoch": 0.9489872468117029, "grad_norm": 8.735868453979492, "learning_rate": 1.4587890104027742e-05, "loss": 0.9043, "step": 2530 }, { "epoch": 0.9527381845461366, "grad_norm": 6.766534328460693, "learning_rate": 1.4561216324353162e-05, "loss": 0.9538, "step": 2540 }, { "epoch": 0.9564891222805701, "grad_norm": 18.577468872070312, "learning_rate": 1.4534542544678583e-05, "loss": 0.9458, "step": 2550 }, { "epoch": 0.9602400600150037, "grad_norm": 9.248088836669922, "learning_rate": 1.4507868765004003e-05, "loss": 0.913, "step": 2560 }, { "epoch": 0.9639909977494373, "grad_norm": 7.771203994750977, "learning_rate": 1.4481194985329422e-05, "loss": 0.931, "step": 2570 }, { "epoch": 0.967741935483871, "grad_norm": 7.330334663391113, "learning_rate": 1.4454521205654844e-05, "loss": 0.9681, "step": 2580 }, { "epoch": 0.9714928732183046, "grad_norm": 6.74515438079834, "learning_rate": 1.4427847425980263e-05, "loss": 0.9477, "step": 2590 }, { "epoch": 0.9752438109527382, "grad_norm": 8.954100608825684, "learning_rate": 1.4401173646305681e-05, "loss": 0.8958, "step": 2600 }, { "epoch": 0.9789947486871718, "grad_norm": 11.33262825012207, "learning_rate": 1.4374499866631103e-05, "loss": 0.7998, "step": 2610 }, { "epoch": 0.9827456864216054, "grad_norm": 7.142065048217773, "learning_rate": 1.4347826086956522e-05, "loss": 0.9897, "step": 2620 }, { "epoch": 0.986496624156039, "grad_norm": 8.922056198120117, "learning_rate": 1.4321152307281942e-05, "loss": 0.9172, "step": 2630 }, { "epoch": 0.9902475618904726, "grad_norm": 5.288200378417969, "learning_rate": 1.4294478527607363e-05, "loss": 0.8836, "step": 2640 }, { "epoch": 0.9939984996249063, "grad_norm": 10.067593574523926, "learning_rate": 1.4267804747932783e-05, "loss": 1.0019, "step": 2650 }, { "epoch": 0.9977494373593399, "grad_norm": 5.186861515045166, "learning_rate": 1.4241130968258204e-05, "loss": 0.8005, "step": 2660 }, { "epoch": 1.0, "eval_accuracy": 0.5890295358649789, "eval_f1_macro": 0.5855792301386851, "eval_f1_weighted": 0.5883403945261724, "eval_loss": 0.9054797887802124, "eval_precision_macro": 0.5964531108356991, "eval_precision_weighted": 0.5920764019753845, "eval_recall_macro": 0.5799936335134275, "eval_recall_weighted": 0.5890295358649789, "eval_runtime": 4.8377, "eval_samples_per_second": 489.903, "eval_steps_per_second": 61.393, "step": 2666 }, { "epoch": 1.0015003750937734, "grad_norm": 7.454843044281006, "learning_rate": 1.4214457188583624e-05, "loss": 0.9261, "step": 2670 }, { "epoch": 1.005251312828207, "grad_norm": 7.612959384918213, "learning_rate": 1.4187783408909043e-05, "loss": 0.8656, "step": 2680 }, { "epoch": 1.0090022505626406, "grad_norm": 5.689546585083008, "learning_rate": 1.4161109629234465e-05, "loss": 0.8539, "step": 2690 }, { "epoch": 1.0127531882970742, "grad_norm": 9.812941551208496, "learning_rate": 1.4134435849559884e-05, "loss": 0.8154, "step": 2700 }, { "epoch": 1.016504126031508, "grad_norm": 6.9208550453186035, "learning_rate": 1.4107762069885304e-05, "loss": 0.8441, "step": 2710 }, { "epoch": 1.0202550637659416, "grad_norm": 5.310056686401367, "learning_rate": 1.4081088290210725e-05, "loss": 0.9471, "step": 2720 }, { "epoch": 1.0240060015003751, "grad_norm": 9.985223770141602, "learning_rate": 1.4054414510536145e-05, "loss": 0.853, "step": 2730 }, { "epoch": 1.0277569392348087, "grad_norm": 21.524646759033203, "learning_rate": 1.4027740730861563e-05, "loss": 0.9408, "step": 2740 }, { "epoch": 1.0315078769692423, "grad_norm": 9.250083923339844, "learning_rate": 1.4001066951186984e-05, "loss": 0.8023, "step": 2750 }, { "epoch": 1.0352588147036759, "grad_norm": 6.028738975524902, "learning_rate": 1.3974393171512404e-05, "loss": 0.7849, "step": 2760 }, { "epoch": 1.0390097524381094, "grad_norm": 9.787884712219238, "learning_rate": 1.3947719391837823e-05, "loss": 0.7474, "step": 2770 }, { "epoch": 1.042760690172543, "grad_norm": 12.639663696289062, "learning_rate": 1.3921045612163244e-05, "loss": 0.8167, "step": 2780 }, { "epoch": 1.0465116279069768, "grad_norm": 15.691644668579102, "learning_rate": 1.3894371832488664e-05, "loss": 0.7467, "step": 2790 }, { "epoch": 1.0502625656414104, "grad_norm": 7.864928722381592, "learning_rate": 1.3867698052814085e-05, "loss": 0.9476, "step": 2800 }, { "epoch": 1.054013503375844, "grad_norm": 8.662647247314453, "learning_rate": 1.3841024273139505e-05, "loss": 0.8529, "step": 2810 }, { "epoch": 1.0577644411102776, "grad_norm": 8.244277954101562, "learning_rate": 1.3814350493464925e-05, "loss": 0.753, "step": 2820 }, { "epoch": 1.0615153788447111, "grad_norm": 8.806965827941895, "learning_rate": 1.3787676713790346e-05, "loss": 0.7577, "step": 2830 }, { "epoch": 1.0652663165791447, "grad_norm": 11.864466667175293, "learning_rate": 1.3761002934115766e-05, "loss": 0.8227, "step": 2840 }, { "epoch": 1.0690172543135783, "grad_norm": 16.477638244628906, "learning_rate": 1.3734329154441187e-05, "loss": 0.8603, "step": 2850 }, { "epoch": 1.072768192048012, "grad_norm": 10.029014587402344, "learning_rate": 1.3707655374766607e-05, "loss": 0.7507, "step": 2860 }, { "epoch": 1.0765191297824457, "grad_norm": 42.02882766723633, "learning_rate": 1.3680981595092026e-05, "loss": 0.8731, "step": 2870 }, { "epoch": 1.0802700675168793, "grad_norm": 11.340489387512207, "learning_rate": 1.3654307815417447e-05, "loss": 0.8736, "step": 2880 }, { "epoch": 1.0840210052513128, "grad_norm": 10.736079216003418, "learning_rate": 1.3627634035742865e-05, "loss": 0.7387, "step": 2890 }, { "epoch": 1.0877719429857464, "grad_norm": 12.158968925476074, "learning_rate": 1.3600960256068285e-05, "loss": 0.8563, "step": 2900 }, { "epoch": 1.09152288072018, "grad_norm": 4.968686103820801, "learning_rate": 1.3574286476393706e-05, "loss": 0.8865, "step": 2910 }, { "epoch": 1.0952738184546136, "grad_norm": 9.05169677734375, "learning_rate": 1.3547612696719126e-05, "loss": 0.9706, "step": 2920 }, { "epoch": 1.0990247561890472, "grad_norm": 8.993448257446289, "learning_rate": 1.3520938917044546e-05, "loss": 0.7936, "step": 2930 }, { "epoch": 1.102775693923481, "grad_norm": 9.852548599243164, "learning_rate": 1.3494265137369967e-05, "loss": 0.9188, "step": 2940 }, { "epoch": 1.1065266316579145, "grad_norm": 8.509963035583496, "learning_rate": 1.3467591357695386e-05, "loss": 0.9182, "step": 2950 }, { "epoch": 1.1102775693923481, "grad_norm": 9.74703311920166, "learning_rate": 1.3440917578020806e-05, "loss": 0.8979, "step": 2960 }, { "epoch": 1.1140285071267817, "grad_norm": 11.76938247680664, "learning_rate": 1.3414243798346227e-05, "loss": 0.8431, "step": 2970 }, { "epoch": 1.1177794448612153, "grad_norm": 8.194916725158691, "learning_rate": 1.3387570018671647e-05, "loss": 0.8794, "step": 2980 }, { "epoch": 1.1215303825956489, "grad_norm": 5.259307861328125, "learning_rate": 1.3360896238997068e-05, "loss": 0.8688, "step": 2990 }, { "epoch": 1.1252813203300824, "grad_norm": 8.892224311828613, "learning_rate": 1.3334222459322488e-05, "loss": 0.8924, "step": 3000 }, { "epoch": 1.129032258064516, "grad_norm": 10.505491256713867, "learning_rate": 1.3307548679647908e-05, "loss": 0.8347, "step": 3010 }, { "epoch": 1.1327831957989498, "grad_norm": 4.74807071685791, "learning_rate": 1.3280874899973329e-05, "loss": 0.7728, "step": 3020 }, { "epoch": 1.1365341335333834, "grad_norm": 12.980900764465332, "learning_rate": 1.3254201120298747e-05, "loss": 0.7915, "step": 3030 }, { "epoch": 1.140285071267817, "grad_norm": 12.24691104888916, "learning_rate": 1.3227527340624166e-05, "loss": 0.8422, "step": 3040 }, { "epoch": 1.1440360090022506, "grad_norm": 6.215153217315674, "learning_rate": 1.3200853560949588e-05, "loss": 0.8067, "step": 3050 }, { "epoch": 1.1477869467366841, "grad_norm": 15.73306941986084, "learning_rate": 1.3174179781275007e-05, "loss": 0.8135, "step": 3060 }, { "epoch": 1.1515378844711177, "grad_norm": 12.068921089172363, "learning_rate": 1.3147506001600427e-05, "loss": 0.7305, "step": 3070 }, { "epoch": 1.1552888222055513, "grad_norm": 6.1044464111328125, "learning_rate": 1.3120832221925848e-05, "loss": 0.9578, "step": 3080 }, { "epoch": 1.159039759939985, "grad_norm": 10.416324615478516, "learning_rate": 1.3094158442251268e-05, "loss": 0.8362, "step": 3090 }, { "epoch": 1.1627906976744187, "grad_norm": 13.548623085021973, "learning_rate": 1.3067484662576687e-05, "loss": 0.7862, "step": 3100 }, { "epoch": 1.1665416354088523, "grad_norm": 9.015273094177246, "learning_rate": 1.3040810882902109e-05, "loss": 0.8432, "step": 3110 }, { "epoch": 1.1702925731432858, "grad_norm": 4.893497467041016, "learning_rate": 1.3014137103227528e-05, "loss": 0.719, "step": 3120 }, { "epoch": 1.1740435108777194, "grad_norm": 12.783862113952637, "learning_rate": 1.298746332355295e-05, "loss": 0.9088, "step": 3130 }, { "epoch": 1.177794448612153, "grad_norm": 10.826465606689453, "learning_rate": 1.296078954387837e-05, "loss": 0.8758, "step": 3140 }, { "epoch": 1.1815453863465866, "grad_norm": 9.32836627960205, "learning_rate": 1.2934115764203789e-05, "loss": 0.7643, "step": 3150 }, { "epoch": 1.1852963240810204, "grad_norm": 9.504363059997559, "learning_rate": 1.290744198452921e-05, "loss": 0.8174, "step": 3160 }, { "epoch": 1.189047261815454, "grad_norm": 12.839066505432129, "learning_rate": 1.2880768204854628e-05, "loss": 0.7992, "step": 3170 }, { "epoch": 1.1927981995498875, "grad_norm": 9.912968635559082, "learning_rate": 1.2854094425180048e-05, "loss": 0.7793, "step": 3180 }, { "epoch": 1.196549137284321, "grad_norm": 9.632975578308105, "learning_rate": 1.2827420645505469e-05, "loss": 0.8062, "step": 3190 }, { "epoch": 1.2003000750187547, "grad_norm": 15.091144561767578, "learning_rate": 1.2800746865830889e-05, "loss": 0.8319, "step": 3200 }, { "epoch": 1.2040510127531883, "grad_norm": 9.834930419921875, "learning_rate": 1.2774073086156308e-05, "loss": 0.7946, "step": 3210 }, { "epoch": 1.2078019504876218, "grad_norm": 9.097467422485352, "learning_rate": 1.274739930648173e-05, "loss": 0.7423, "step": 3220 }, { "epoch": 1.2115528882220554, "grad_norm": 7.097741603851318, "learning_rate": 1.272072552680715e-05, "loss": 0.7668, "step": 3230 }, { "epoch": 1.215303825956489, "grad_norm": 16.66200828552246, "learning_rate": 1.269405174713257e-05, "loss": 0.8292, "step": 3240 }, { "epoch": 1.2190547636909228, "grad_norm": 4.819615840911865, "learning_rate": 1.266737796745799e-05, "loss": 0.7641, "step": 3250 }, { "epoch": 1.2228057014253564, "grad_norm": 12.379060745239258, "learning_rate": 1.264070418778341e-05, "loss": 0.7749, "step": 3260 }, { "epoch": 1.22655663915979, "grad_norm": 10.446650505065918, "learning_rate": 1.2614030408108831e-05, "loss": 0.8306, "step": 3270 }, { "epoch": 1.2303075768942235, "grad_norm": 13.330952644348145, "learning_rate": 1.258735662843425e-05, "loss": 0.7924, "step": 3280 }, { "epoch": 1.2340585146286571, "grad_norm": 11.163646697998047, "learning_rate": 1.256068284875967e-05, "loss": 0.8505, "step": 3290 }, { "epoch": 1.2378094523630907, "grad_norm": 10.235424995422363, "learning_rate": 1.2534009069085092e-05, "loss": 0.7111, "step": 3300 }, { "epoch": 1.2415603900975243, "grad_norm": 9.529205322265625, "learning_rate": 1.2507335289410511e-05, "loss": 0.9559, "step": 3310 }, { "epoch": 1.245311327831958, "grad_norm": 9.511346817016602, "learning_rate": 1.2480661509735929e-05, "loss": 1.0107, "step": 3320 }, { "epoch": 1.2490622655663917, "grad_norm": 5.115582466125488, "learning_rate": 1.245398773006135e-05, "loss": 0.8128, "step": 3330 }, { "epoch": 1.2528132033008252, "grad_norm": 10.270365715026855, "learning_rate": 1.242731395038677e-05, "loss": 0.7537, "step": 3340 }, { "epoch": 1.2565641410352588, "grad_norm": 15.309682846069336, "learning_rate": 1.240064017071219e-05, "loss": 0.776, "step": 3350 }, { "epoch": 1.2603150787696924, "grad_norm": 6.9617414474487305, "learning_rate": 1.2373966391037611e-05, "loss": 0.7818, "step": 3360 }, { "epoch": 1.264066016504126, "grad_norm": 14.111533164978027, "learning_rate": 1.234729261136303e-05, "loss": 0.8766, "step": 3370 }, { "epoch": 1.2678169542385596, "grad_norm": 15.513258934020996, "learning_rate": 1.2320618831688452e-05, "loss": 0.8124, "step": 3380 }, { "epoch": 1.2715678919729934, "grad_norm": 10.617011070251465, "learning_rate": 1.2293945052013872e-05, "loss": 0.7367, "step": 3390 }, { "epoch": 1.275318829707427, "grad_norm": 10.756956100463867, "learning_rate": 1.2267271272339291e-05, "loss": 0.9371, "step": 3400 }, { "epoch": 1.2790697674418605, "grad_norm": 20.27239990234375, "learning_rate": 1.2240597492664712e-05, "loss": 0.7812, "step": 3410 }, { "epoch": 1.282820705176294, "grad_norm": 13.26762580871582, "learning_rate": 1.2213923712990132e-05, "loss": 0.9214, "step": 3420 }, { "epoch": 1.2865716429107277, "grad_norm": 6.740780830383301, "learning_rate": 1.2187249933315552e-05, "loss": 0.7254, "step": 3430 }, { "epoch": 1.2903225806451613, "grad_norm": 8.460843086242676, "learning_rate": 1.2160576153640973e-05, "loss": 0.8793, "step": 3440 }, { "epoch": 1.2940735183795948, "grad_norm": 8.37424373626709, "learning_rate": 1.2133902373966393e-05, "loss": 0.7178, "step": 3450 }, { "epoch": 1.2978244561140286, "grad_norm": 9.57453441619873, "learning_rate": 1.210722859429181e-05, "loss": 0.9584, "step": 3460 }, { "epoch": 1.301575393848462, "grad_norm": 15.27446460723877, "learning_rate": 1.2080554814617232e-05, "loss": 0.7314, "step": 3470 }, { "epoch": 1.3053263315828958, "grad_norm": 16.266162872314453, "learning_rate": 1.2053881034942651e-05, "loss": 0.8651, "step": 3480 }, { "epoch": 1.3090772693173294, "grad_norm": 9.161102294921875, "learning_rate": 1.2027207255268071e-05, "loss": 0.7086, "step": 3490 }, { "epoch": 1.312828207051763, "grad_norm": 12.645145416259766, "learning_rate": 1.2000533475593492e-05, "loss": 0.874, "step": 3500 }, { "epoch": 1.3165791447861965, "grad_norm": 9.018929481506348, "learning_rate": 1.1973859695918912e-05, "loss": 0.7457, "step": 3510 }, { "epoch": 1.32033008252063, "grad_norm": 10.96903133392334, "learning_rate": 1.1947185916244333e-05, "loss": 0.865, "step": 3520 }, { "epoch": 1.3240810202550637, "grad_norm": 15.08077621459961, "learning_rate": 1.1920512136569753e-05, "loss": 0.8127, "step": 3530 }, { "epoch": 1.3278319579894973, "grad_norm": 6.171741962432861, "learning_rate": 1.1893838356895173e-05, "loss": 0.7038, "step": 3540 }, { "epoch": 1.331582895723931, "grad_norm": 12.167604446411133, "learning_rate": 1.1867164577220594e-05, "loss": 0.7373, "step": 3550 }, { "epoch": 1.3353338334583646, "grad_norm": 12.859063148498535, "learning_rate": 1.1840490797546013e-05, "loss": 0.8292, "step": 3560 }, { "epoch": 1.3390847711927982, "grad_norm": 9.17769718170166, "learning_rate": 1.1813817017871435e-05, "loss": 0.8117, "step": 3570 }, { "epoch": 1.3428357089272318, "grad_norm": 7.380620002746582, "learning_rate": 1.1787143238196854e-05, "loss": 0.7943, "step": 3580 }, { "epoch": 1.3465866466616654, "grad_norm": 19.143110275268555, "learning_rate": 1.1760469458522274e-05, "loss": 0.7798, "step": 3590 }, { "epoch": 1.350337584396099, "grad_norm": 14.915560722351074, "learning_rate": 1.1733795678847695e-05, "loss": 0.8568, "step": 3600 }, { "epoch": 1.3540885221305325, "grad_norm": 16.487377166748047, "learning_rate": 1.1707121899173113e-05, "loss": 0.8586, "step": 3610 }, { "epoch": 1.3578394598649663, "grad_norm": 9.255929946899414, "learning_rate": 1.1680448119498533e-05, "loss": 0.7957, "step": 3620 }, { "epoch": 1.3615903975994, "grad_norm": 12.38227653503418, "learning_rate": 1.1653774339823954e-05, "loss": 0.7957, "step": 3630 }, { "epoch": 1.3653413353338335, "grad_norm": 10.949649810791016, "learning_rate": 1.1627100560149374e-05, "loss": 0.6871, "step": 3640 }, { "epoch": 1.369092273068267, "grad_norm": 7.265697956085205, "learning_rate": 1.1600426780474793e-05, "loss": 0.7341, "step": 3650 }, { "epoch": 1.3728432108027007, "grad_norm": 12.582711219787598, "learning_rate": 1.1573753000800215e-05, "loss": 0.8242, "step": 3660 }, { "epoch": 1.3765941485371342, "grad_norm": 12.345062255859375, "learning_rate": 1.1547079221125634e-05, "loss": 0.8768, "step": 3670 }, { "epoch": 1.3803450862715678, "grad_norm": 8.697713851928711, "learning_rate": 1.1520405441451054e-05, "loss": 0.855, "step": 3680 }, { "epoch": 1.3840960240060016, "grad_norm": 9.254758834838867, "learning_rate": 1.1493731661776475e-05, "loss": 0.909, "step": 3690 }, { "epoch": 1.387846961740435, "grad_norm": 9.739770889282227, "learning_rate": 1.1467057882101895e-05, "loss": 0.8582, "step": 3700 }, { "epoch": 1.3915978994748688, "grad_norm": 12.004996299743652, "learning_rate": 1.1440384102427316e-05, "loss": 0.7344, "step": 3710 }, { "epoch": 1.3953488372093024, "grad_norm": 13.092066764831543, "learning_rate": 1.1413710322752736e-05, "loss": 0.8916, "step": 3720 }, { "epoch": 1.399099774943736, "grad_norm": 12.259298324584961, "learning_rate": 1.1387036543078155e-05, "loss": 0.9096, "step": 3730 }, { "epoch": 1.4028507126781695, "grad_norm": 8.312166213989258, "learning_rate": 1.1360362763403577e-05, "loss": 0.8647, "step": 3740 }, { "epoch": 1.406601650412603, "grad_norm": 8.59150218963623, "learning_rate": 1.1333688983728995e-05, "loss": 0.9202, "step": 3750 }, { "epoch": 1.4103525881470367, "grad_norm": 8.444820404052734, "learning_rate": 1.1307015204054414e-05, "loss": 0.7343, "step": 3760 }, { "epoch": 1.4141035258814703, "grad_norm": 12.232796669006348, "learning_rate": 1.1280341424379836e-05, "loss": 0.7329, "step": 3770 }, { "epoch": 1.417854463615904, "grad_norm": 9.038057327270508, "learning_rate": 1.1253667644705255e-05, "loss": 0.8751, "step": 3780 }, { "epoch": 1.4216054013503376, "grad_norm": 5.729677200317383, "learning_rate": 1.1226993865030675e-05, "loss": 0.7319, "step": 3790 }, { "epoch": 1.4253563390847712, "grad_norm": 7.777376651763916, "learning_rate": 1.1200320085356096e-05, "loss": 0.802, "step": 3800 }, { "epoch": 1.4291072768192048, "grad_norm": 13.165481567382812, "learning_rate": 1.1173646305681516e-05, "loss": 0.7195, "step": 3810 }, { "epoch": 1.4328582145536384, "grad_norm": 10.966960906982422, "learning_rate": 1.1146972526006935e-05, "loss": 0.8234, "step": 3820 }, { "epoch": 1.436609152288072, "grad_norm": 8.237056732177734, "learning_rate": 1.1120298746332357e-05, "loss": 0.7832, "step": 3830 }, { "epoch": 1.4403600900225055, "grad_norm": 10.419988632202148, "learning_rate": 1.1093624966657776e-05, "loss": 0.8292, "step": 3840 }, { "epoch": 1.4441110277569393, "grad_norm": 14.655726432800293, "learning_rate": 1.1066951186983198e-05, "loss": 0.8523, "step": 3850 }, { "epoch": 1.447861965491373, "grad_norm": 10.38304328918457, "learning_rate": 1.1040277407308617e-05, "loss": 0.856, "step": 3860 }, { "epoch": 1.4516129032258065, "grad_norm": 13.249422073364258, "learning_rate": 1.1013603627634037e-05, "loss": 0.8403, "step": 3870 }, { "epoch": 1.45536384096024, "grad_norm": 9.854536056518555, "learning_rate": 1.0986929847959458e-05, "loss": 0.794, "step": 3880 }, { "epoch": 1.4591147786946737, "grad_norm": 11.48951530456543, "learning_rate": 1.0960256068284876e-05, "loss": 0.7569, "step": 3890 }, { "epoch": 1.4628657164291072, "grad_norm": 8.955044746398926, "learning_rate": 1.0933582288610296e-05, "loss": 0.8064, "step": 3900 }, { "epoch": 1.4666166541635408, "grad_norm": 16.088743209838867, "learning_rate": 1.0906908508935717e-05, "loss": 0.8518, "step": 3910 }, { "epoch": 1.4703675918979746, "grad_norm": 9.207806587219238, "learning_rate": 1.0880234729261137e-05, "loss": 0.875, "step": 3920 }, { "epoch": 1.474118529632408, "grad_norm": 18.738187789916992, "learning_rate": 1.0853560949586556e-05, "loss": 0.8164, "step": 3930 }, { "epoch": 1.4778694673668418, "grad_norm": 10.138594627380371, "learning_rate": 1.0826887169911977e-05, "loss": 0.791, "step": 3940 }, { "epoch": 1.4816204051012754, "grad_norm": 9.635621070861816, "learning_rate": 1.0800213390237397e-05, "loss": 0.7878, "step": 3950 }, { "epoch": 1.485371342835709, "grad_norm": 9.569879531860352, "learning_rate": 1.0773539610562818e-05, "loss": 0.8404, "step": 3960 }, { "epoch": 1.4891222805701425, "grad_norm": 9.855542182922363, "learning_rate": 1.0746865830888238e-05, "loss": 0.8726, "step": 3970 }, { "epoch": 1.492873218304576, "grad_norm": 16.710786819458008, "learning_rate": 1.0720192051213658e-05, "loss": 0.8706, "step": 3980 }, { "epoch": 1.49662415603901, "grad_norm": 13.603216171264648, "learning_rate": 1.0693518271539079e-05, "loss": 0.8437, "step": 3990 }, { "epoch": 1.5003750937734432, "grad_norm": 11.3872652053833, "learning_rate": 1.0666844491864499e-05, "loss": 0.6512, "step": 4000 }, { "epoch": 1.504126031507877, "grad_norm": 10.2975492477417, "learning_rate": 1.0640170712189918e-05, "loss": 0.8774, "step": 4010 }, { "epoch": 1.5078769692423106, "grad_norm": 7.741751194000244, "learning_rate": 1.061349693251534e-05, "loss": 0.7528, "step": 4020 }, { "epoch": 1.5116279069767442, "grad_norm": 9.902315139770508, "learning_rate": 1.0586823152840759e-05, "loss": 0.7995, "step": 4030 }, { "epoch": 1.5153788447111778, "grad_norm": 11.541082382202148, "learning_rate": 1.0560149373166177e-05, "loss": 0.7694, "step": 4040 }, { "epoch": 1.5191297824456114, "grad_norm": 8.56485366821289, "learning_rate": 1.0533475593491598e-05, "loss": 0.8002, "step": 4050 }, { "epoch": 1.5228807201800452, "grad_norm": 8.866626739501953, "learning_rate": 1.0506801813817018e-05, "loss": 0.792, "step": 4060 }, { "epoch": 1.5266316579144785, "grad_norm": 10.332854270935059, "learning_rate": 1.0480128034142438e-05, "loss": 0.7378, "step": 4070 }, { "epoch": 1.5303825956489123, "grad_norm": 8.805913925170898, "learning_rate": 1.0453454254467859e-05, "loss": 0.8287, "step": 4080 }, { "epoch": 1.5341335333833457, "grad_norm": 10.885342597961426, "learning_rate": 1.0426780474793278e-05, "loss": 0.8454, "step": 4090 }, { "epoch": 1.5378844711177795, "grad_norm": 11.047041893005371, "learning_rate": 1.04001066951187e-05, "loss": 0.8955, "step": 4100 }, { "epoch": 1.541635408852213, "grad_norm": 12.287060737609863, "learning_rate": 1.037343291544412e-05, "loss": 0.9106, "step": 4110 }, { "epoch": 1.5453863465866466, "grad_norm": 7.6913628578186035, "learning_rate": 1.0346759135769539e-05, "loss": 0.8287, "step": 4120 }, { "epoch": 1.5491372843210802, "grad_norm": 12.864625930786133, "learning_rate": 1.032008535609496e-05, "loss": 0.8176, "step": 4130 }, { "epoch": 1.5528882220555138, "grad_norm": 17.12616539001465, "learning_rate": 1.029341157642038e-05, "loss": 0.8964, "step": 4140 }, { "epoch": 1.5566391597899476, "grad_norm": 9.076611518859863, "learning_rate": 1.02667377967458e-05, "loss": 0.8222, "step": 4150 }, { "epoch": 1.560390097524381, "grad_norm": 9.327693939208984, "learning_rate": 1.0240064017071221e-05, "loss": 0.9358, "step": 4160 }, { "epoch": 1.5641410352588148, "grad_norm": 7.653916358947754, "learning_rate": 1.021339023739664e-05, "loss": 0.7721, "step": 4170 }, { "epoch": 1.5678919729932483, "grad_norm": 10.110307693481445, "learning_rate": 1.0186716457722058e-05, "loss": 0.9346, "step": 4180 }, { "epoch": 1.571642910727682, "grad_norm": 11.298696517944336, "learning_rate": 1.016004267804748e-05, "loss": 0.8456, "step": 4190 }, { "epoch": 1.5753938484621155, "grad_norm": 13.459417343139648, "learning_rate": 1.01333688983729e-05, "loss": 0.7815, "step": 4200 }, { "epoch": 1.579144786196549, "grad_norm": 16.08547592163086, "learning_rate": 1.0106695118698319e-05, "loss": 0.7656, "step": 4210 }, { "epoch": 1.5828957239309829, "grad_norm": 8.995433807373047, "learning_rate": 1.008002133902374e-05, "loss": 0.8248, "step": 4220 }, { "epoch": 1.5866466616654162, "grad_norm": 10.426254272460938, "learning_rate": 1.005334755934916e-05, "loss": 0.7957, "step": 4230 }, { "epoch": 1.59039759939985, "grad_norm": 8.310003280639648, "learning_rate": 1.0026673779674581e-05, "loss": 0.8313, "step": 4240 }, { "epoch": 1.5941485371342836, "grad_norm": 14.415204048156738, "learning_rate": 1e-05, "loss": 0.7711, "step": 4250 }, { "epoch": 1.5978994748687172, "grad_norm": 8.948083877563477, "learning_rate": 9.97332622032542e-06, "loss": 0.7868, "step": 4260 }, { "epoch": 1.6016504126031508, "grad_norm": 16.681766510009766, "learning_rate": 9.946652440650842e-06, "loss": 0.8633, "step": 4270 }, { "epoch": 1.6054013503375844, "grad_norm": 11.883402824401855, "learning_rate": 9.919978660976261e-06, "loss": 0.8195, "step": 4280 }, { "epoch": 1.6091522880720182, "grad_norm": 11.386548042297363, "learning_rate": 9.893304881301681e-06, "loss": 0.7621, "step": 4290 }, { "epoch": 1.6129032258064515, "grad_norm": 13.255663871765137, "learning_rate": 9.8666311016271e-06, "loss": 1.0233, "step": 4300 }, { "epoch": 1.6166541635408853, "grad_norm": 10.955714225769043, "learning_rate": 9.839957321952522e-06, "loss": 0.9456, "step": 4310 }, { "epoch": 1.6204051012753187, "grad_norm": 7.624833583831787, "learning_rate": 9.813283542277942e-06, "loss": 0.9029, "step": 4320 }, { "epoch": 1.6241560390097525, "grad_norm": 8.860147476196289, "learning_rate": 9.786609762603361e-06, "loss": 0.835, "step": 4330 }, { "epoch": 1.627906976744186, "grad_norm": 13.29971981048584, "learning_rate": 9.759935982928782e-06, "loss": 0.848, "step": 4340 }, { "epoch": 1.6316579144786196, "grad_norm": 10.151264190673828, "learning_rate": 9.733262203254202e-06, "loss": 0.7443, "step": 4350 }, { "epoch": 1.6354088522130532, "grad_norm": 14.21789264678955, "learning_rate": 9.706588423579622e-06, "loss": 0.908, "step": 4360 }, { "epoch": 1.6391597899474868, "grad_norm": 7.94905424118042, "learning_rate": 9.679914643905041e-06, "loss": 0.6919, "step": 4370 }, { "epoch": 1.6429107276819206, "grad_norm": 8.60908031463623, "learning_rate": 9.653240864230463e-06, "loss": 0.7309, "step": 4380 }, { "epoch": 1.646661665416354, "grad_norm": 15.03842544555664, "learning_rate": 9.626567084555882e-06, "loss": 0.9343, "step": 4390 }, { "epoch": 1.6504126031507877, "grad_norm": 11.684754371643066, "learning_rate": 9.599893304881302e-06, "loss": 0.7532, "step": 4400 }, { "epoch": 1.6541635408852213, "grad_norm": 6.24261999130249, "learning_rate": 9.573219525206723e-06, "loss": 0.8449, "step": 4410 }, { "epoch": 1.657914478619655, "grad_norm": 5.580635070800781, "learning_rate": 9.546545745532143e-06, "loss": 0.644, "step": 4420 }, { "epoch": 1.6616654163540885, "grad_norm": 13.382287979125977, "learning_rate": 9.519871965857564e-06, "loss": 0.8713, "step": 4430 }, { "epoch": 1.665416354088522, "grad_norm": 11.218451499938965, "learning_rate": 9.493198186182982e-06, "loss": 0.6552, "step": 4440 }, { "epoch": 1.6691672918229559, "grad_norm": 16.548782348632812, "learning_rate": 9.466524406508403e-06, "loss": 0.8024, "step": 4450 }, { "epoch": 1.6729182295573892, "grad_norm": 17.210647583007812, "learning_rate": 9.439850626833823e-06, "loss": 0.7543, "step": 4460 }, { "epoch": 1.676669167291823, "grad_norm": 13.630977630615234, "learning_rate": 9.413176847159243e-06, "loss": 0.8754, "step": 4470 }, { "epoch": 1.6804201050262566, "grad_norm": 13.967558860778809, "learning_rate": 9.386503067484664e-06, "loss": 0.7699, "step": 4480 }, { "epoch": 1.6841710427606902, "grad_norm": 11.707578659057617, "learning_rate": 9.359829287810083e-06, "loss": 0.8324, "step": 4490 }, { "epoch": 1.6879219804951238, "grad_norm": 9.124420166015625, "learning_rate": 9.333155508135505e-06, "loss": 0.794, "step": 4500 }, { "epoch": 1.6916729182295573, "grad_norm": 10.910788536071777, "learning_rate": 9.306481728460923e-06, "loss": 0.8241, "step": 4510 }, { "epoch": 1.6954238559639911, "grad_norm": 13.6180419921875, "learning_rate": 9.279807948786344e-06, "loss": 0.8882, "step": 4520 }, { "epoch": 1.6991747936984245, "grad_norm": 7.055276393890381, "learning_rate": 9.253134169111764e-06, "loss": 0.9011, "step": 4530 }, { "epoch": 1.7029257314328583, "grad_norm": 14.100971221923828, "learning_rate": 9.226460389437183e-06, "loss": 0.8026, "step": 4540 }, { "epoch": 1.7066766691672917, "grad_norm": 6.9184441566467285, "learning_rate": 9.199786609762605e-06, "loss": 0.6888, "step": 4550 }, { "epoch": 1.7104276069017255, "grad_norm": 9.915225982666016, "learning_rate": 9.173112830088024e-06, "loss": 0.8456, "step": 4560 }, { "epoch": 1.714178544636159, "grad_norm": 11.1101655960083, "learning_rate": 9.146439050413445e-06, "loss": 0.8979, "step": 4570 }, { "epoch": 1.7179294823705926, "grad_norm": 11.128944396972656, "learning_rate": 9.119765270738863e-06, "loss": 0.8386, "step": 4580 }, { "epoch": 1.7216804201050264, "grad_norm": 8.845916748046875, "learning_rate": 9.093091491064285e-06, "loss": 0.8375, "step": 4590 }, { "epoch": 1.7254313578394598, "grad_norm": 12.3989839553833, "learning_rate": 9.066417711389704e-06, "loss": 0.7884, "step": 4600 }, { "epoch": 1.7291822955738936, "grad_norm": 8.899964332580566, "learning_rate": 9.039743931715126e-06, "loss": 0.8391, "step": 4610 }, { "epoch": 1.732933233308327, "grad_norm": 11.830737113952637, "learning_rate": 9.013070152040545e-06, "loss": 0.836, "step": 4620 }, { "epoch": 1.7366841710427607, "grad_norm": 14.875555038452148, "learning_rate": 8.986396372365965e-06, "loss": 0.8148, "step": 4630 }, { "epoch": 1.7404351087771943, "grad_norm": 8.44090461730957, "learning_rate": 8.959722592691386e-06, "loss": 0.7033, "step": 4640 }, { "epoch": 1.744186046511628, "grad_norm": 7.954046726226807, "learning_rate": 8.933048813016804e-06, "loss": 0.8364, "step": 4650 }, { "epoch": 1.7479369842460615, "grad_norm": 14.886021614074707, "learning_rate": 8.906375033342225e-06, "loss": 0.7641, "step": 4660 }, { "epoch": 1.751687921980495, "grad_norm": 15.42341136932373, "learning_rate": 8.879701253667645e-06, "loss": 0.7152, "step": 4670 }, { "epoch": 1.7554388597149289, "grad_norm": 18.62801742553711, "learning_rate": 8.853027473993066e-06, "loss": 0.9192, "step": 4680 }, { "epoch": 1.7591897974493622, "grad_norm": 9.787707328796387, "learning_rate": 8.826353694318486e-06, "loss": 1.0479, "step": 4690 }, { "epoch": 1.762940735183796, "grad_norm": 10.803950309753418, "learning_rate": 8.799679914643906e-06, "loss": 0.7217, "step": 4700 }, { "epoch": 1.7666916729182296, "grad_norm": 5.519962787628174, "learning_rate": 8.773006134969327e-06, "loss": 0.8044, "step": 4710 }, { "epoch": 1.7704426106526632, "grad_norm": 10.77694320678711, "learning_rate": 8.746332355294745e-06, "loss": 0.7759, "step": 4720 }, { "epoch": 1.7741935483870968, "grad_norm": 8.671502113342285, "learning_rate": 8.719658575620166e-06, "loss": 0.834, "step": 4730 }, { "epoch": 1.7779444861215303, "grad_norm": 10.25809097290039, "learning_rate": 8.692984795945586e-06, "loss": 0.7088, "step": 4740 }, { "epoch": 1.7816954238559641, "grad_norm": 11.049978256225586, "learning_rate": 8.666311016271007e-06, "loss": 0.8777, "step": 4750 }, { "epoch": 1.7854463615903975, "grad_norm": 6.090721130371094, "learning_rate": 8.639637236596427e-06, "loss": 0.8311, "step": 4760 }, { "epoch": 1.7891972993248313, "grad_norm": 7.393324375152588, "learning_rate": 8.612963456921846e-06, "loss": 0.7623, "step": 4770 }, { "epoch": 1.7929482370592649, "grad_norm": 9.985932350158691, "learning_rate": 8.586289677247268e-06, "loss": 0.7164, "step": 4780 }, { "epoch": 1.7966991747936985, "grad_norm": 23.15224266052246, "learning_rate": 8.559615897572687e-06, "loss": 0.8139, "step": 4790 }, { "epoch": 1.800450112528132, "grad_norm": 15.539804458618164, "learning_rate": 8.532942117898107e-06, "loss": 0.9038, "step": 4800 }, { "epoch": 1.8042010502625656, "grad_norm": 20.424936294555664, "learning_rate": 8.506268338223526e-06, "loss": 0.9228, "step": 4810 }, { "epoch": 1.8079519879969994, "grad_norm": 12.960927963256836, "learning_rate": 8.479594558548948e-06, "loss": 0.8129, "step": 4820 }, { "epoch": 1.8117029257314328, "grad_norm": 12.578907012939453, "learning_rate": 8.452920778874367e-06, "loss": 0.7919, "step": 4830 }, { "epoch": 1.8154538634658666, "grad_norm": 9.88344955444336, "learning_rate": 8.426246999199787e-06, "loss": 0.8888, "step": 4840 }, { "epoch": 1.8192048012003, "grad_norm": 9.531432151794434, "learning_rate": 8.399573219525208e-06, "loss": 0.8074, "step": 4850 }, { "epoch": 1.8229557389347337, "grad_norm": 10.701923370361328, "learning_rate": 8.372899439850628e-06, "loss": 0.8598, "step": 4860 }, { "epoch": 1.8267066766691673, "grad_norm": 10.894915580749512, "learning_rate": 8.346225660176047e-06, "loss": 0.6588, "step": 4870 }, { "epoch": 1.8304576144036009, "grad_norm": 9.2036714553833, "learning_rate": 8.319551880501467e-06, "loss": 0.8323, "step": 4880 }, { "epoch": 1.8342085521380345, "grad_norm": 8.6634521484375, "learning_rate": 8.292878100826888e-06, "loss": 0.7526, "step": 4890 }, { "epoch": 1.837959489872468, "grad_norm": 14.781025886535645, "learning_rate": 8.266204321152308e-06, "loss": 0.6999, "step": 4900 }, { "epoch": 1.8417104276069018, "grad_norm": 12.273209571838379, "learning_rate": 8.239530541477728e-06, "loss": 0.6734, "step": 4910 }, { "epoch": 1.8454613653413352, "grad_norm": 11.974825859069824, "learning_rate": 8.212856761803149e-06, "loss": 0.7195, "step": 4920 }, { "epoch": 1.849212303075769, "grad_norm": 12.195642471313477, "learning_rate": 8.186182982128569e-06, "loss": 0.8301, "step": 4930 }, { "epoch": 1.8529632408102026, "grad_norm": 6.2414751052856445, "learning_rate": 8.159509202453988e-06, "loss": 0.8528, "step": 4940 }, { "epoch": 1.8567141785446362, "grad_norm": 9.026991844177246, "learning_rate": 8.132835422779408e-06, "loss": 0.8165, "step": 4950 }, { "epoch": 1.8604651162790697, "grad_norm": 13.745824813842773, "learning_rate": 8.106161643104829e-06, "loss": 0.9866, "step": 4960 }, { "epoch": 1.8642160540135033, "grad_norm": 8.861783027648926, "learning_rate": 8.079487863430249e-06, "loss": 0.9738, "step": 4970 }, { "epoch": 1.8679669917479371, "grad_norm": 7.437354564666748, "learning_rate": 8.052814083755668e-06, "loss": 0.7223, "step": 4980 }, { "epoch": 1.8717179294823705, "grad_norm": 14.148890495300293, "learning_rate": 8.02614030408109e-06, "loss": 0.8356, "step": 4990 }, { "epoch": 1.8754688672168043, "grad_norm": 13.688013076782227, "learning_rate": 7.99946652440651e-06, "loss": 0.8949, "step": 5000 }, { "epoch": 1.8792198049512379, "grad_norm": 16.709125518798828, "learning_rate": 7.972792744731929e-06, "loss": 0.8775, "step": 5010 }, { "epoch": 1.8829707426856714, "grad_norm": 9.73661994934082, "learning_rate": 7.946118965057348e-06, "loss": 0.832, "step": 5020 }, { "epoch": 1.886721680420105, "grad_norm": 10.575983047485352, "learning_rate": 7.91944518538277e-06, "loss": 0.7976, "step": 5030 }, { "epoch": 1.8904726181545386, "grad_norm": 9.284303665161133, "learning_rate": 7.89277140570819e-06, "loss": 0.9656, "step": 5040 }, { "epoch": 1.8942235558889724, "grad_norm": 6.543034553527832, "learning_rate": 7.866097626033609e-06, "loss": 0.7157, "step": 5050 }, { "epoch": 1.8979744936234058, "grad_norm": 5.064873218536377, "learning_rate": 7.83942384635903e-06, "loss": 0.7334, "step": 5060 }, { "epoch": 1.9017254313578396, "grad_norm": 16.654563903808594, "learning_rate": 7.81275006668445e-06, "loss": 0.7677, "step": 5070 }, { "epoch": 1.905476369092273, "grad_norm": 20.614212036132812, "learning_rate": 7.78607628700987e-06, "loss": 0.8525, "step": 5080 }, { "epoch": 1.9092273068267067, "grad_norm": 13.709310531616211, "learning_rate": 7.75940250733529e-06, "loss": 0.7237, "step": 5090 }, { "epoch": 1.9129782445611403, "grad_norm": 17.662317276000977, "learning_rate": 7.73272872766071e-06, "loss": 0.8529, "step": 5100 }, { "epoch": 1.9167291822955739, "grad_norm": 9.610177040100098, "learning_rate": 7.70605494798613e-06, "loss": 0.9447, "step": 5110 }, { "epoch": 1.9204801200300075, "grad_norm": 19.19601821899414, "learning_rate": 7.67938116831155e-06, "loss": 0.8738, "step": 5120 }, { "epoch": 1.924231057764441, "grad_norm": 8.228813171386719, "learning_rate": 7.652707388636971e-06, "loss": 0.8096, "step": 5130 }, { "epoch": 1.9279819954988748, "grad_norm": 14.475564956665039, "learning_rate": 7.626033608962391e-06, "loss": 0.7235, "step": 5140 }, { "epoch": 1.9317329332333082, "grad_norm": 17.313648223876953, "learning_rate": 7.599359829287811e-06, "loss": 0.7778, "step": 5150 }, { "epoch": 1.935483870967742, "grad_norm": 6.775811672210693, "learning_rate": 7.572686049613231e-06, "loss": 0.7627, "step": 5160 }, { "epoch": 1.9392348087021756, "grad_norm": 11.815681457519531, "learning_rate": 7.54601226993865e-06, "loss": 0.8978, "step": 5170 }, { "epoch": 1.9429857464366092, "grad_norm": 13.653975486755371, "learning_rate": 7.519338490264071e-06, "loss": 0.7364, "step": 5180 }, { "epoch": 1.9467366841710427, "grad_norm": 9.049905776977539, "learning_rate": 7.492664710589491e-06, "loss": 0.8631, "step": 5190 }, { "epoch": 1.9504876219054763, "grad_norm": 14.149343490600586, "learning_rate": 7.465990930914912e-06, "loss": 0.8279, "step": 5200 }, { "epoch": 1.9542385596399101, "grad_norm": 15.612215995788574, "learning_rate": 7.439317151240331e-06, "loss": 0.9058, "step": 5210 }, { "epoch": 1.9579894973743435, "grad_norm": 11.682372093200684, "learning_rate": 7.412643371565752e-06, "loss": 0.8859, "step": 5220 }, { "epoch": 1.9617404351087773, "grad_norm": 9.87074089050293, "learning_rate": 7.385969591891171e-06, "loss": 0.8733, "step": 5230 }, { "epoch": 1.9654913728432108, "grad_norm": 9.963356971740723, "learning_rate": 7.359295812216591e-06, "loss": 0.7134, "step": 5240 }, { "epoch": 1.9692423105776444, "grad_norm": 4.6800537109375, "learning_rate": 7.3326220325420115e-06, "loss": 0.7594, "step": 5250 }, { "epoch": 1.972993248312078, "grad_norm": 13.148963928222656, "learning_rate": 7.305948252867432e-06, "loss": 0.947, "step": 5260 }, { "epoch": 1.9767441860465116, "grad_norm": 10.073929786682129, "learning_rate": 7.279274473192852e-06, "loss": 0.8769, "step": 5270 }, { "epoch": 1.9804951237809454, "grad_norm": 11.67326831817627, "learning_rate": 7.252600693518272e-06, "loss": 0.7545, "step": 5280 }, { "epoch": 1.9842460615153787, "grad_norm": 7.498824119567871, "learning_rate": 7.2259269138436925e-06, "loss": 0.7997, "step": 5290 }, { "epoch": 1.9879969992498125, "grad_norm": 9.357927322387695, "learning_rate": 7.199253134169112e-06, "loss": 0.8754, "step": 5300 }, { "epoch": 1.991747936984246, "grad_norm": 12.50817584991455, "learning_rate": 7.172579354494532e-06, "loss": 0.79, "step": 5310 }, { "epoch": 1.9954988747186797, "grad_norm": 14.613991737365723, "learning_rate": 7.145905574819952e-06, "loss": 0.8005, "step": 5320 }, { "epoch": 1.9992498124531133, "grad_norm": 9.007129669189453, "learning_rate": 7.119231795145373e-06, "loss": 0.9009, "step": 5330 }, { "epoch": 2.0, "eval_accuracy": 0.580168776371308, "eval_f1_macro": 0.5788189436128865, "eval_f1_weighted": 0.5800618837244829, "eval_loss": 0.9064968228340149, "eval_precision_macro": 0.5789782500874713, "eval_precision_weighted": 0.5804785651892536, "eval_recall_macro": 0.5792145494510413, "eval_recall_weighted": 0.580168776371308, "eval_runtime": 4.8637, "eval_samples_per_second": 487.284, "eval_steps_per_second": 61.065, "step": 5332 }, { "epoch": 2.003000750187547, "grad_norm": 10.368429183959961, "learning_rate": 7.092558015470793e-06, "loss": 0.6842, "step": 5340 }, { "epoch": 2.0067516879219807, "grad_norm": 10.329928398132324, "learning_rate": 7.065884235796214e-06, "loss": 0.7709, "step": 5350 }, { "epoch": 2.010502625656414, "grad_norm": 13.128575325012207, "learning_rate": 7.039210456121633e-06, "loss": 0.7054, "step": 5360 }, { "epoch": 2.014253563390848, "grad_norm": 10.884894371032715, "learning_rate": 7.012536676447053e-06, "loss": 0.8103, "step": 5370 }, { "epoch": 2.018004501125281, "grad_norm": 17.327537536621094, "learning_rate": 6.985862896772473e-06, "loss": 0.6551, "step": 5380 }, { "epoch": 2.021755438859715, "grad_norm": 9.725515365600586, "learning_rate": 6.959189117097893e-06, "loss": 0.6534, "step": 5390 }, { "epoch": 2.0255063765941483, "grad_norm": 9.302525520324707, "learning_rate": 6.932515337423313e-06, "loss": 0.6741, "step": 5400 }, { "epoch": 2.029257314328582, "grad_norm": 12.362338066101074, "learning_rate": 6.905841557748734e-06, "loss": 0.7026, "step": 5410 }, { "epoch": 2.033008252063016, "grad_norm": 7.654306411743164, "learning_rate": 6.879167778074154e-06, "loss": 0.5962, "step": 5420 }, { "epoch": 2.0367591897974493, "grad_norm": 14.547067642211914, "learning_rate": 6.852493998399574e-06, "loss": 0.5578, "step": 5430 }, { "epoch": 2.040510127531883, "grad_norm": 12.792427062988281, "learning_rate": 6.8258202187249935e-06, "loss": 0.7636, "step": 5440 }, { "epoch": 2.0442610652663165, "grad_norm": 8.322968482971191, "learning_rate": 6.799146439050414e-06, "loss": 0.5881, "step": 5450 }, { "epoch": 2.0480120030007503, "grad_norm": 14.064526557922363, "learning_rate": 6.772472659375834e-06, "loss": 0.6907, "step": 5460 }, { "epoch": 2.0517629407351836, "grad_norm": 11.318249702453613, "learning_rate": 6.745798879701254e-06, "loss": 0.6179, "step": 5470 }, { "epoch": 2.0555138784696174, "grad_norm": 7.615289688110352, "learning_rate": 6.7191251000266745e-06, "loss": 0.5912, "step": 5480 }, { "epoch": 2.059264816204051, "grad_norm": 20.249950408935547, "learning_rate": 6.692451320352095e-06, "loss": 0.7777, "step": 5490 }, { "epoch": 2.0630157539384846, "grad_norm": 13.289349555969238, "learning_rate": 6.665777540677515e-06, "loss": 0.6271, "step": 5500 }, { "epoch": 2.0667666916729184, "grad_norm": 14.625772476196289, "learning_rate": 6.639103761002935e-06, "loss": 0.7248, "step": 5510 }, { "epoch": 2.0705176294073517, "grad_norm": 14.428004264831543, "learning_rate": 6.612429981328355e-06, "loss": 0.6791, "step": 5520 }, { "epoch": 2.0742685671417855, "grad_norm": 21.052837371826172, "learning_rate": 6.585756201653774e-06, "loss": 0.6244, "step": 5530 }, { "epoch": 2.078019504876219, "grad_norm": 17.523300170898438, "learning_rate": 6.559082421979195e-06, "loss": 0.6498, "step": 5540 }, { "epoch": 2.0817704426106527, "grad_norm": 9.524145126342773, "learning_rate": 6.532408642304615e-06, "loss": 0.7792, "step": 5550 }, { "epoch": 2.085521380345086, "grad_norm": 14.92676830291748, "learning_rate": 6.505734862630036e-06, "loss": 0.5748, "step": 5560 }, { "epoch": 2.08927231807952, "grad_norm": 18.87467384338379, "learning_rate": 6.479061082955455e-06, "loss": 0.7199, "step": 5570 }, { "epoch": 2.0930232558139537, "grad_norm": 10.356287002563477, "learning_rate": 6.452387303280876e-06, "loss": 0.7016, "step": 5580 }, { "epoch": 2.096774193548387, "grad_norm": 11.189599990844727, "learning_rate": 6.425713523606295e-06, "loss": 0.6511, "step": 5590 }, { "epoch": 2.100525131282821, "grad_norm": 12.267254829406738, "learning_rate": 6.399039743931715e-06, "loss": 0.6421, "step": 5600 }, { "epoch": 2.104276069017254, "grad_norm": 19.524673461914062, "learning_rate": 6.3723659642571354e-06, "loss": 0.6963, "step": 5610 }, { "epoch": 2.108027006751688, "grad_norm": 13.466742515563965, "learning_rate": 6.345692184582556e-06, "loss": 0.6727, "step": 5620 }, { "epoch": 2.1117779444861213, "grad_norm": 20.707855224609375, "learning_rate": 6.319018404907976e-06, "loss": 0.6695, "step": 5630 }, { "epoch": 2.115528882220555, "grad_norm": 15.425350189208984, "learning_rate": 6.292344625233396e-06, "loss": 0.673, "step": 5640 }, { "epoch": 2.119279819954989, "grad_norm": 5.349853038787842, "learning_rate": 6.2656708455588164e-06, "loss": 0.6275, "step": 5650 }, { "epoch": 2.1230307576894223, "grad_norm": 13.552290916442871, "learning_rate": 6.238997065884236e-06, "loss": 0.6945, "step": 5660 }, { "epoch": 2.126781695423856, "grad_norm": 17.840105056762695, "learning_rate": 6.212323286209656e-06, "loss": 0.8054, "step": 5670 }, { "epoch": 2.1305326331582894, "grad_norm": 21.012237548828125, "learning_rate": 6.185649506535076e-06, "loss": 0.7306, "step": 5680 }, { "epoch": 2.1342835708927232, "grad_norm": 13.1303129196167, "learning_rate": 6.158975726860497e-06, "loss": 0.871, "step": 5690 }, { "epoch": 2.1380345086271566, "grad_norm": 11.506791114807129, "learning_rate": 6.132301947185917e-06, "loss": 0.6722, "step": 5700 }, { "epoch": 2.1417854463615904, "grad_norm": 9.709290504455566, "learning_rate": 6.1056281675113375e-06, "loss": 0.6695, "step": 5710 }, { "epoch": 2.145536384096024, "grad_norm": 8.551689147949219, "learning_rate": 6.078954387836757e-06, "loss": 0.7001, "step": 5720 }, { "epoch": 2.1492873218304576, "grad_norm": 12.69763469696045, "learning_rate": 6.052280608162177e-06, "loss": 0.6778, "step": 5730 }, { "epoch": 2.1530382595648914, "grad_norm": 10.49093246459961, "learning_rate": 6.025606828487597e-06, "loss": 0.6671, "step": 5740 }, { "epoch": 2.1567891972993247, "grad_norm": 7.214636325836182, "learning_rate": 5.998933048813017e-06, "loss": 0.616, "step": 5750 }, { "epoch": 2.1605401350337585, "grad_norm": 8.58086109161377, "learning_rate": 5.972259269138437e-06, "loss": 0.6024, "step": 5760 }, { "epoch": 2.164291072768192, "grad_norm": 7.856104373931885, "learning_rate": 5.945585489463858e-06, "loss": 0.6202, "step": 5770 }, { "epoch": 2.1680420105026257, "grad_norm": 6.472407341003418, "learning_rate": 5.918911709789278e-06, "loss": 0.6141, "step": 5780 }, { "epoch": 2.1717929482370595, "grad_norm": 6.612668991088867, "learning_rate": 5.892237930114698e-06, "loss": 0.7841, "step": 5790 }, { "epoch": 2.175543885971493, "grad_norm": 9.869592666625977, "learning_rate": 5.865564150440118e-06, "loss": 0.5949, "step": 5800 }, { "epoch": 2.1792948237059266, "grad_norm": 12.85415267944336, "learning_rate": 5.838890370765538e-06, "loss": 0.663, "step": 5810 }, { "epoch": 2.18304576144036, "grad_norm": 22.380807876586914, "learning_rate": 5.8122165910909575e-06, "loss": 0.6532, "step": 5820 }, { "epoch": 2.186796699174794, "grad_norm": 23.866607666015625, "learning_rate": 5.785542811416378e-06, "loss": 0.6704, "step": 5830 }, { "epoch": 2.190547636909227, "grad_norm": 12.608299255371094, "learning_rate": 5.7588690317417985e-06, "loss": 0.6231, "step": 5840 }, { "epoch": 2.194298574643661, "grad_norm": 27.60419464111328, "learning_rate": 5.732195252067219e-06, "loss": 0.6369, "step": 5850 }, { "epoch": 2.1980495123780943, "grad_norm": 10.39966869354248, "learning_rate": 5.7055214723926385e-06, "loss": 0.5964, "step": 5860 }, { "epoch": 2.201800450112528, "grad_norm": 23.611059188842773, "learning_rate": 5.678847692718059e-06, "loss": 0.7365, "step": 5870 }, { "epoch": 2.205551387846962, "grad_norm": 10.59642505645752, "learning_rate": 5.652173913043479e-06, "loss": 0.6305, "step": 5880 }, { "epoch": 2.2093023255813953, "grad_norm": 15.549806594848633, "learning_rate": 5.625500133368898e-06, "loss": 0.624, "step": 5890 }, { "epoch": 2.213053263315829, "grad_norm": 17.546363830566406, "learning_rate": 5.598826353694319e-06, "loss": 0.7103, "step": 5900 }, { "epoch": 2.2168042010502624, "grad_norm": 19.833606719970703, "learning_rate": 5.572152574019739e-06, "loss": 0.4821, "step": 5910 }, { "epoch": 2.2205551387846962, "grad_norm": 18.05365562438965, "learning_rate": 5.54547879434516e-06, "loss": 0.6953, "step": 5920 }, { "epoch": 2.2243060765191296, "grad_norm": 3.1533432006835938, "learning_rate": 5.518805014670579e-06, "loss": 0.6899, "step": 5930 }, { "epoch": 2.2280570142535634, "grad_norm": 21.84452247619629, "learning_rate": 5.492131234996e-06, "loss": 0.8146, "step": 5940 }, { "epoch": 2.231807951987997, "grad_norm": 20.791135787963867, "learning_rate": 5.465457455321419e-06, "loss": 0.4915, "step": 5950 }, { "epoch": 2.2355588897224306, "grad_norm": 16.44775390625, "learning_rate": 5.438783675646839e-06, "loss": 0.5946, "step": 5960 }, { "epoch": 2.2393098274568644, "grad_norm": 8.386981964111328, "learning_rate": 5.412109895972259e-06, "loss": 0.7348, "step": 5970 }, { "epoch": 2.2430607651912977, "grad_norm": 26.47071075439453, "learning_rate": 5.38543611629768e-06, "loss": 0.6261, "step": 5980 }, { "epoch": 2.2468117029257315, "grad_norm": 11.219141960144043, "learning_rate": 5.3587623366231e-06, "loss": 0.5324, "step": 5990 }, { "epoch": 2.250562640660165, "grad_norm": 15.969422340393066, "learning_rate": 5.33208855694852e-06, "loss": 0.7459, "step": 6000 }, { "epoch": 2.2543135783945987, "grad_norm": 20.990497589111328, "learning_rate": 5.30541477727394e-06, "loss": 0.5593, "step": 6010 }, { "epoch": 2.258064516129032, "grad_norm": 10.82603645324707, "learning_rate": 5.27874099759936e-06, "loss": 0.6698, "step": 6020 }, { "epoch": 2.261815453863466, "grad_norm": 19.865243911743164, "learning_rate": 5.25206721792478e-06, "loss": 0.732, "step": 6030 }, { "epoch": 2.2655663915978996, "grad_norm": 25.37660026550293, "learning_rate": 5.2253934382502e-06, "loss": 0.5585, "step": 6040 }, { "epoch": 2.269317329332333, "grad_norm": 19.796749114990234, "learning_rate": 5.1987196585756205e-06, "loss": 0.7108, "step": 6050 }, { "epoch": 2.273068267066767, "grad_norm": 12.207030296325684, "learning_rate": 5.172045878901041e-06, "loss": 0.6683, "step": 6060 }, { "epoch": 2.2768192048012, "grad_norm": 20.979265213012695, "learning_rate": 5.1453720992264615e-06, "loss": 0.6962, "step": 6070 }, { "epoch": 2.280570142535634, "grad_norm": 13.058587074279785, "learning_rate": 5.118698319551881e-06, "loss": 0.6119, "step": 6080 }, { "epoch": 2.2843210802700673, "grad_norm": 7.18276309967041, "learning_rate": 5.092024539877301e-06, "loss": 0.606, "step": 6090 }, { "epoch": 2.288072018004501, "grad_norm": 21.568151473999023, "learning_rate": 5.065350760202721e-06, "loss": 0.6909, "step": 6100 }, { "epoch": 2.291822955738935, "grad_norm": 28.49129867553711, "learning_rate": 5.038676980528141e-06, "loss": 0.6764, "step": 6110 }, { "epoch": 2.2955738934733683, "grad_norm": 12.39367389678955, "learning_rate": 5.012003200853561e-06, "loss": 0.753, "step": 6120 }, { "epoch": 2.299324831207802, "grad_norm": 17.55943489074707, "learning_rate": 4.985329421178982e-06, "loss": 0.7103, "step": 6130 }, { "epoch": 2.3030757689422354, "grad_norm": 16.813745498657227, "learning_rate": 4.958655641504402e-06, "loss": 0.645, "step": 6140 }, { "epoch": 2.3068267066766692, "grad_norm": 20.711591720581055, "learning_rate": 4.931981861829822e-06, "loss": 0.6337, "step": 6150 }, { "epoch": 2.3105776444111026, "grad_norm": 5.449891567230225, "learning_rate": 4.905308082155241e-06, "loss": 0.6224, "step": 6160 }, { "epoch": 2.3143285821455364, "grad_norm": 15.508672714233398, "learning_rate": 4.878634302480662e-06, "loss": 0.6718, "step": 6170 }, { "epoch": 2.31807951987997, "grad_norm": 12.16860294342041, "learning_rate": 4.8519605228060815e-06, "loss": 0.6044, "step": 6180 }, { "epoch": 2.3218304576144035, "grad_norm": 16.671234130859375, "learning_rate": 4.825286743131502e-06, "loss": 0.7397, "step": 6190 }, { "epoch": 2.3255813953488373, "grad_norm": 27.95615577697754, "learning_rate": 4.798612963456922e-06, "loss": 0.6451, "step": 6200 }, { "epoch": 2.3293323330832707, "grad_norm": 23.62805938720703, "learning_rate": 4.771939183782343e-06, "loss": 0.7978, "step": 6210 }, { "epoch": 2.3330832708177045, "grad_norm": 17.226280212402344, "learning_rate": 4.7452654041077625e-06, "loss": 0.6442, "step": 6220 }, { "epoch": 2.336834208552138, "grad_norm": 22.371273040771484, "learning_rate": 4.718591624433183e-06, "loss": 0.6885, "step": 6230 }, { "epoch": 2.3405851462865717, "grad_norm": 12.560019493103027, "learning_rate": 4.6919178447586026e-06, "loss": 0.6033, "step": 6240 }, { "epoch": 2.3443360840210055, "grad_norm": 14.103109359741211, "learning_rate": 4.665244065084023e-06, "loss": 0.6683, "step": 6250 }, { "epoch": 2.348087021755439, "grad_norm": 11.051913261413574, "learning_rate": 4.638570285409443e-06, "loss": 0.7092, "step": 6260 }, { "epoch": 2.3518379594898726, "grad_norm": 15.613760948181152, "learning_rate": 4.611896505734863e-06, "loss": 0.6974, "step": 6270 }, { "epoch": 2.355588897224306, "grad_norm": 19.85428237915039, "learning_rate": 4.5852227260602836e-06, "loss": 0.6637, "step": 6280 }, { "epoch": 2.35933983495874, "grad_norm": 15.703207015991211, "learning_rate": 4.558548946385703e-06, "loss": 0.6508, "step": 6290 }, { "epoch": 2.363090772693173, "grad_norm": 11.342123985290527, "learning_rate": 4.531875166711124e-06, "loss": 0.7348, "step": 6300 }, { "epoch": 2.366841710427607, "grad_norm": 11.049941062927246, "learning_rate": 4.505201387036543e-06, "loss": 0.6421, "step": 6310 }, { "epoch": 2.3705926481620407, "grad_norm": 24.488731384277344, "learning_rate": 4.478527607361964e-06, "loss": 0.7123, "step": 6320 }, { "epoch": 2.374343585896474, "grad_norm": 14.967778205871582, "learning_rate": 4.451853827687383e-06, "loss": 0.7142, "step": 6330 }, { "epoch": 2.378094523630908, "grad_norm": 9.328021049499512, "learning_rate": 4.425180048012804e-06, "loss": 0.6251, "step": 6340 }, { "epoch": 2.3818454613653413, "grad_norm": 17.42303466796875, "learning_rate": 4.398506268338224e-06, "loss": 0.6355, "step": 6350 }, { "epoch": 2.385596399099775, "grad_norm": 15.201652526855469, "learning_rate": 4.371832488663644e-06, "loss": 0.7441, "step": 6360 }, { "epoch": 2.3893473368342084, "grad_norm": 23.0561466217041, "learning_rate": 4.345158708989064e-06, "loss": 0.6641, "step": 6370 }, { "epoch": 2.393098274568642, "grad_norm": 14.52270221710205, "learning_rate": 4.318484929314484e-06, "loss": 0.7073, "step": 6380 }, { "epoch": 2.396849212303076, "grad_norm": 13.747902870178223, "learning_rate": 4.291811149639904e-06, "loss": 0.81, "step": 6390 }, { "epoch": 2.4006001500375094, "grad_norm": 14.231673240661621, "learning_rate": 4.265137369965324e-06, "loss": 0.6939, "step": 6400 }, { "epoch": 2.404351087771943, "grad_norm": 7.63701057434082, "learning_rate": 4.2384635902907445e-06, "loss": 0.6873, "step": 6410 }, { "epoch": 2.4081020255063765, "grad_norm": 20.752126693725586, "learning_rate": 4.211789810616165e-06, "loss": 0.571, "step": 6420 }, { "epoch": 2.4118529632408103, "grad_norm": 13.460418701171875, "learning_rate": 4.185116030941585e-06, "loss": 0.6506, "step": 6430 }, { "epoch": 2.4156039009752437, "grad_norm": 8.838345527648926, "learning_rate": 4.158442251267005e-06, "loss": 0.5745, "step": 6440 }, { "epoch": 2.4193548387096775, "grad_norm": 10.570659637451172, "learning_rate": 4.131768471592425e-06, "loss": 0.6607, "step": 6450 }, { "epoch": 2.423105776444111, "grad_norm": 12.49052619934082, "learning_rate": 4.105094691917845e-06, "loss": 0.5026, "step": 6460 }, { "epoch": 2.4268567141785446, "grad_norm": 9.46437931060791, "learning_rate": 4.078420912243265e-06, "loss": 0.6005, "step": 6470 }, { "epoch": 2.430607651912978, "grad_norm": 29.9566593170166, "learning_rate": 4.051747132568685e-06, "loss": 0.6292, "step": 6480 }, { "epoch": 2.434358589647412, "grad_norm": 12.318580627441406, "learning_rate": 4.025073352894106e-06, "loss": 0.7056, "step": 6490 }, { "epoch": 2.4381095273818456, "grad_norm": 20.635848999023438, "learning_rate": 3.998399573219526e-06, "loss": 0.6663, "step": 6500 }, { "epoch": 2.441860465116279, "grad_norm": 13.231310844421387, "learning_rate": 3.971725793544946e-06, "loss": 0.7405, "step": 6510 }, { "epoch": 2.4456114028507128, "grad_norm": 16.560197830200195, "learning_rate": 3.945052013870365e-06, "loss": 0.6678, "step": 6520 }, { "epoch": 2.449362340585146, "grad_norm": 21.45167350769043, "learning_rate": 3.918378234195786e-06, "loss": 0.6032, "step": 6530 }, { "epoch": 2.45311327831958, "grad_norm": 37.360843658447266, "learning_rate": 3.891704454521205e-06, "loss": 0.8438, "step": 6540 }, { "epoch": 2.4568642160540133, "grad_norm": 30.98585319519043, "learning_rate": 3.865030674846626e-06, "loss": 0.6035, "step": 6550 }, { "epoch": 2.460615153788447, "grad_norm": 13.408466339111328, "learning_rate": 3.838356895172046e-06, "loss": 0.5181, "step": 6560 }, { "epoch": 2.464366091522881, "grad_norm": 16.84627914428711, "learning_rate": 3.8116831154974664e-06, "loss": 0.6353, "step": 6570 }, { "epoch": 2.4681170292573142, "grad_norm": 19.02153968811035, "learning_rate": 3.785009335822886e-06, "loss": 0.6052, "step": 6580 }, { "epoch": 2.471867966991748, "grad_norm": 13.263850212097168, "learning_rate": 3.7583355561483065e-06, "loss": 0.8126, "step": 6590 }, { "epoch": 2.4756189047261814, "grad_norm": 22.753215789794922, "learning_rate": 3.731661776473727e-06, "loss": 0.6449, "step": 6600 }, { "epoch": 2.479369842460615, "grad_norm": 13.979212760925293, "learning_rate": 3.704987996799147e-06, "loss": 0.7421, "step": 6610 }, { "epoch": 2.4831207801950486, "grad_norm": 23.614389419555664, "learning_rate": 3.6783142171245666e-06, "loss": 0.8168, "step": 6620 }, { "epoch": 2.4868717179294824, "grad_norm": 7.810019493103027, "learning_rate": 3.651640437449987e-06, "loss": 0.6301, "step": 6630 }, { "epoch": 2.490622655663916, "grad_norm": 17.90605926513672, "learning_rate": 3.624966657775407e-06, "loss": 0.6369, "step": 6640 }, { "epoch": 2.4943735933983495, "grad_norm": 10.375251770019531, "learning_rate": 3.598292878100827e-06, "loss": 0.6254, "step": 6650 }, { "epoch": 2.4981245311327833, "grad_norm": 15.813028335571289, "learning_rate": 3.571619098426247e-06, "loss": 0.7866, "step": 6660 }, { "epoch": 2.5018754688672167, "grad_norm": 8.438957214355469, "learning_rate": 3.5449453187516676e-06, "loss": 0.7288, "step": 6670 }, { "epoch": 2.5056264066016505, "grad_norm": 23.076040267944336, "learning_rate": 3.5182715390770877e-06, "loss": 0.6743, "step": 6680 }, { "epoch": 2.509377344336084, "grad_norm": 14.966166496276855, "learning_rate": 3.4915977594025073e-06, "loss": 0.6408, "step": 6690 }, { "epoch": 2.5131282820705176, "grad_norm": 19.553081512451172, "learning_rate": 3.4649239797279277e-06, "loss": 0.613, "step": 6700 }, { "epoch": 2.5168792198049514, "grad_norm": 12.050764083862305, "learning_rate": 3.4382502000533478e-06, "loss": 0.6547, "step": 6710 }, { "epoch": 2.520630157539385, "grad_norm": 14.52085018157959, "learning_rate": 3.411576420378768e-06, "loss": 0.7239, "step": 6720 }, { "epoch": 2.5243810952738186, "grad_norm": 20.222137451171875, "learning_rate": 3.384902640704188e-06, "loss": 0.7656, "step": 6730 }, { "epoch": 2.528132033008252, "grad_norm": 14.729280471801758, "learning_rate": 3.3582288610296083e-06, "loss": 0.6013, "step": 6740 }, { "epoch": 2.5318829707426858, "grad_norm": 21.984832763671875, "learning_rate": 3.3315550813550284e-06, "loss": 0.6453, "step": 6750 }, { "epoch": 2.535633908477119, "grad_norm": 19.643138885498047, "learning_rate": 3.304881301680448e-06, "loss": 0.8221, "step": 6760 }, { "epoch": 2.539384846211553, "grad_norm": 17.281740188598633, "learning_rate": 3.2782075220058684e-06, "loss": 0.6348, "step": 6770 }, { "epoch": 2.5431357839459867, "grad_norm": 17.821035385131836, "learning_rate": 3.251533742331289e-06, "loss": 0.5855, "step": 6780 }, { "epoch": 2.54688672168042, "grad_norm": 14.015131950378418, "learning_rate": 3.224859962656709e-06, "loss": 0.5544, "step": 6790 }, { "epoch": 2.550637659414854, "grad_norm": 10.391494750976562, "learning_rate": 3.1981861829821286e-06, "loss": 0.588, "step": 6800 }, { "epoch": 2.5543885971492872, "grad_norm": 14.990039825439453, "learning_rate": 3.171512403307549e-06, "loss": 0.5782, "step": 6810 }, { "epoch": 2.558139534883721, "grad_norm": 13.448775291442871, "learning_rate": 3.144838623632969e-06, "loss": 0.8525, "step": 6820 }, { "epoch": 2.5618904726181544, "grad_norm": 13.461121559143066, "learning_rate": 3.118164843958389e-06, "loss": 0.6256, "step": 6830 }, { "epoch": 2.565641410352588, "grad_norm": 13.295988082885742, "learning_rate": 3.091491064283809e-06, "loss": 0.7059, "step": 6840 }, { "epoch": 2.569392348087022, "grad_norm": 14.871612548828125, "learning_rate": 3.0648172846092296e-06, "loss": 0.6338, "step": 6850 }, { "epoch": 2.5731432858214554, "grad_norm": 30.46957778930664, "learning_rate": 3.0381435049346496e-06, "loss": 0.7072, "step": 6860 }, { "epoch": 2.5768942235558887, "grad_norm": 20.661733627319336, "learning_rate": 3.0114697252600693e-06, "loss": 0.6634, "step": 6870 }, { "epoch": 2.5806451612903225, "grad_norm": 10.35488224029541, "learning_rate": 2.9847959455854897e-06, "loss": 0.5709, "step": 6880 }, { "epoch": 2.5843960990247563, "grad_norm": 22.383169174194336, "learning_rate": 2.9581221659109098e-06, "loss": 0.433, "step": 6890 }, { "epoch": 2.5881470367591897, "grad_norm": 21.173015594482422, "learning_rate": 2.93144838623633e-06, "loss": 0.6109, "step": 6900 }, { "epoch": 2.5918979744936235, "grad_norm": 25.366735458374023, "learning_rate": 2.90477460656175e-06, "loss": 0.8177, "step": 6910 }, { "epoch": 2.5956489122280573, "grad_norm": 18.91875457763672, "learning_rate": 2.8781008268871703e-06, "loss": 0.7214, "step": 6920 }, { "epoch": 2.5993998499624906, "grad_norm": 12.457830429077148, "learning_rate": 2.8514270472125903e-06, "loss": 0.553, "step": 6930 }, { "epoch": 2.603150787696924, "grad_norm": 6.222160816192627, "learning_rate": 2.82475326753801e-06, "loss": 0.679, "step": 6940 }, { "epoch": 2.606901725431358, "grad_norm": 8.99958324432373, "learning_rate": 2.7980794878634304e-06, "loss": 0.5929, "step": 6950 }, { "epoch": 2.6106526631657916, "grad_norm": 11.063492774963379, "learning_rate": 2.771405708188851e-06, "loss": 0.5185, "step": 6960 }, { "epoch": 2.614403600900225, "grad_norm": 10.320928573608398, "learning_rate": 2.744731928514271e-06, "loss": 0.5286, "step": 6970 }, { "epoch": 2.6181545386346587, "grad_norm": 13.718670845031738, "learning_rate": 2.7180581488396905e-06, "loss": 0.6508, "step": 6980 }, { "epoch": 2.6219054763690925, "grad_norm": 10.613819122314453, "learning_rate": 2.691384369165111e-06, "loss": 0.5805, "step": 6990 }, { "epoch": 2.625656414103526, "grad_norm": 22.765199661254883, "learning_rate": 2.664710589490531e-06, "loss": 0.6691, "step": 7000 }, { "epoch": 2.6294073518379593, "grad_norm": 12.34518051147461, "learning_rate": 2.638036809815951e-06, "loss": 0.6577, "step": 7010 }, { "epoch": 2.633158289572393, "grad_norm": 15.861391067504883, "learning_rate": 2.611363030141371e-06, "loss": 0.5159, "step": 7020 }, { "epoch": 2.636909227306827, "grad_norm": 7.271751880645752, "learning_rate": 2.5846892504667916e-06, "loss": 0.6844, "step": 7030 }, { "epoch": 2.64066016504126, "grad_norm": 20.930856704711914, "learning_rate": 2.5580154707922116e-06, "loss": 0.7827, "step": 7040 }, { "epoch": 2.644411102775694, "grad_norm": 28.042675018310547, "learning_rate": 2.5313416911176312e-06, "loss": 0.6383, "step": 7050 }, { "epoch": 2.6481620405101274, "grad_norm": 25.815296173095703, "learning_rate": 2.5046679114430517e-06, "loss": 0.6866, "step": 7060 }, { "epoch": 2.651912978244561, "grad_norm": 16.492206573486328, "learning_rate": 2.4779941317684717e-06, "loss": 0.5342, "step": 7070 }, { "epoch": 2.6556639159789945, "grad_norm": 23.266910552978516, "learning_rate": 2.4513203520938918e-06, "loss": 0.5564, "step": 7080 }, { "epoch": 2.6594148537134283, "grad_norm": 11.591928482055664, "learning_rate": 2.424646572419312e-06, "loss": 0.573, "step": 7090 }, { "epoch": 2.663165791447862, "grad_norm": 14.71267032623291, "learning_rate": 2.3979727927447323e-06, "loss": 0.6456, "step": 7100 }, { "epoch": 2.6669167291822955, "grad_norm": 7.238256454467773, "learning_rate": 2.3712990130701523e-06, "loss": 0.7731, "step": 7110 }, { "epoch": 2.6706676669167293, "grad_norm": 38.71699523925781, "learning_rate": 2.3446252333955723e-06, "loss": 0.7189, "step": 7120 }, { "epoch": 2.6744186046511627, "grad_norm": 24.029537200927734, "learning_rate": 2.3179514537209924e-06, "loss": 0.7435, "step": 7130 }, { "epoch": 2.6781695423855965, "grad_norm": 17.704763412475586, "learning_rate": 2.291277674046413e-06, "loss": 0.838, "step": 7140 }, { "epoch": 2.68192048012003, "grad_norm": 36.12045669555664, "learning_rate": 2.2646038943718325e-06, "loss": 0.7103, "step": 7150 }, { "epoch": 2.6856714178544636, "grad_norm": 20.062591552734375, "learning_rate": 2.237930114697253e-06, "loss": 0.5748, "step": 7160 }, { "epoch": 2.6894223555888974, "grad_norm": 9.567973136901855, "learning_rate": 2.211256335022673e-06, "loss": 0.7598, "step": 7170 }, { "epoch": 2.6931732933233308, "grad_norm": 19.337631225585938, "learning_rate": 2.184582555348093e-06, "loss": 0.5945, "step": 7180 }, { "epoch": 2.6969242310577646, "grad_norm": 11.189875602722168, "learning_rate": 2.157908775673513e-06, "loss": 0.77, "step": 7190 }, { "epoch": 2.700675168792198, "grad_norm": 16.071062088012695, "learning_rate": 2.131234995998933e-06, "loss": 0.6758, "step": 7200 }, { "epoch": 2.7044261065266317, "grad_norm": 11.37120532989502, "learning_rate": 2.1045612163243535e-06, "loss": 0.5912, "step": 7210 }, { "epoch": 2.708177044261065, "grad_norm": 25.354324340820312, "learning_rate": 2.0778874366497736e-06, "loss": 0.6741, "step": 7220 }, { "epoch": 2.711927981995499, "grad_norm": 11.246193885803223, "learning_rate": 2.0512136569751936e-06, "loss": 0.6073, "step": 7230 }, { "epoch": 2.7156789197299327, "grad_norm": 9.01452350616455, "learning_rate": 2.0245398773006137e-06, "loss": 0.7363, "step": 7240 }, { "epoch": 2.719429857464366, "grad_norm": 22.3641414642334, "learning_rate": 1.9978660976260337e-06, "loss": 0.6278, "step": 7250 }, { "epoch": 2.7231807951988, "grad_norm": 14.206088066101074, "learning_rate": 1.9711923179514537e-06, "loss": 0.6676, "step": 7260 }, { "epoch": 2.726931732933233, "grad_norm": 14.623751640319824, "learning_rate": 1.9445185382768738e-06, "loss": 0.6629, "step": 7270 }, { "epoch": 2.730682670667667, "grad_norm": 15.682950019836426, "learning_rate": 1.9178447586022942e-06, "loss": 0.8008, "step": 7280 }, { "epoch": 2.7344336084021004, "grad_norm": 16.56915855407715, "learning_rate": 1.891170978927714e-06, "loss": 0.8421, "step": 7290 }, { "epoch": 2.738184546136534, "grad_norm": 20.514009475708008, "learning_rate": 1.8644971992531343e-06, "loss": 0.6755, "step": 7300 }, { "epoch": 2.741935483870968, "grad_norm": 15.838664054870605, "learning_rate": 1.8378234195785544e-06, "loss": 0.6463, "step": 7310 }, { "epoch": 2.7456864216054013, "grad_norm": 30.3530330657959, "learning_rate": 1.8111496399039746e-06, "loss": 0.6295, "step": 7320 }, { "epoch": 2.7494373593398347, "grad_norm": 8.959320068359375, "learning_rate": 1.7844758602293946e-06, "loss": 0.6443, "step": 7330 }, { "epoch": 2.7531882970742685, "grad_norm": 11.156110763549805, "learning_rate": 1.757802080554815e-06, "loss": 0.5971, "step": 7340 }, { "epoch": 2.7569392348087023, "grad_norm": 21.744304656982422, "learning_rate": 1.731128300880235e-06, "loss": 0.5818, "step": 7350 }, { "epoch": 2.7606901725431356, "grad_norm": 23.995845794677734, "learning_rate": 1.7044545212056548e-06, "loss": 0.6885, "step": 7360 }, { "epoch": 2.7644411102775694, "grad_norm": 13.629135131835938, "learning_rate": 1.677780741531075e-06, "loss": 0.771, "step": 7370 }, { "epoch": 2.7681920480120032, "grad_norm": 6.805270671844482, "learning_rate": 1.651106961856495e-06, "loss": 0.687, "step": 7380 }, { "epoch": 2.7719429857464366, "grad_norm": 21.93046760559082, "learning_rate": 1.6244331821819153e-06, "loss": 0.5681, "step": 7390 }, { "epoch": 2.77569392348087, "grad_norm": 22.271133422851562, "learning_rate": 1.5977594025073353e-06, "loss": 0.7504, "step": 7400 }, { "epoch": 2.7794448612153038, "grad_norm": 19.411861419677734, "learning_rate": 1.5710856228327556e-06, "loss": 0.7141, "step": 7410 }, { "epoch": 2.7831957989497376, "grad_norm": 21.990013122558594, "learning_rate": 1.5444118431581756e-06, "loss": 0.7941, "step": 7420 }, { "epoch": 2.786946736684171, "grad_norm": 26.875274658203125, "learning_rate": 1.5177380634835959e-06, "loss": 0.7077, "step": 7430 }, { "epoch": 2.7906976744186047, "grad_norm": 17.144861221313477, "learning_rate": 1.491064283809016e-06, "loss": 0.6153, "step": 7440 }, { "epoch": 2.7944486121530385, "grad_norm": 18.100868225097656, "learning_rate": 1.4643905041344357e-06, "loss": 0.635, "step": 7450 }, { "epoch": 2.798199549887472, "grad_norm": 17.497039794921875, "learning_rate": 1.437716724459856e-06, "loss": 0.7681, "step": 7460 }, { "epoch": 2.8019504876219052, "grad_norm": 11.748749732971191, "learning_rate": 1.411042944785276e-06, "loss": 0.7916, "step": 7470 }, { "epoch": 2.805701425356339, "grad_norm": 17.71030616760254, "learning_rate": 1.3843691651106963e-06, "loss": 0.6826, "step": 7480 }, { "epoch": 2.809452363090773, "grad_norm": 15.269068717956543, "learning_rate": 1.3576953854361163e-06, "loss": 0.6008, "step": 7490 }, { "epoch": 2.813203300825206, "grad_norm": 16.148839950561523, "learning_rate": 1.3310216057615366e-06, "loss": 0.7581, "step": 7500 }, { "epoch": 2.81695423855964, "grad_norm": 7.341813564300537, "learning_rate": 1.3043478260869566e-06, "loss": 0.4343, "step": 7510 }, { "epoch": 2.8207051762940734, "grad_norm": 11.722135543823242, "learning_rate": 1.2776740464123769e-06, "loss": 0.5226, "step": 7520 }, { "epoch": 2.824456114028507, "grad_norm": 17.107776641845703, "learning_rate": 1.251000266737797e-06, "loss": 0.6466, "step": 7530 }, { "epoch": 2.8282070517629405, "grad_norm": 15.833941459655762, "learning_rate": 1.224326487063217e-06, "loss": 0.7703, "step": 7540 }, { "epoch": 2.8319579894973743, "grad_norm": 19.610742568969727, "learning_rate": 1.197652707388637e-06, "loss": 0.6359, "step": 7550 }, { "epoch": 2.835708927231808, "grad_norm": 12.620158195495605, "learning_rate": 1.1709789277140572e-06, "loss": 0.6561, "step": 7560 }, { "epoch": 2.8394598649662415, "grad_norm": 20.80132293701172, "learning_rate": 1.1443051480394773e-06, "loss": 0.7668, "step": 7570 }, { "epoch": 2.8432108027006753, "grad_norm": 9.778907775878906, "learning_rate": 1.1176313683648973e-06, "loss": 0.7067, "step": 7580 }, { "epoch": 2.8469617404351086, "grad_norm": 11.224839210510254, "learning_rate": 1.0909575886903174e-06, "loss": 0.5963, "step": 7590 }, { "epoch": 2.8507126781695424, "grad_norm": 11.957784652709961, "learning_rate": 1.0642838090157376e-06, "loss": 0.7127, "step": 7600 }, { "epoch": 2.854463615903976, "grad_norm": 17.465967178344727, "learning_rate": 1.0376100293411576e-06, "loss": 0.5896, "step": 7610 }, { "epoch": 2.8582145536384096, "grad_norm": 22.074583053588867, "learning_rate": 1.010936249666578e-06, "loss": 0.664, "step": 7620 }, { "epoch": 2.8619654913728434, "grad_norm": 45.1811408996582, "learning_rate": 9.84262469991998e-07, "loss": 0.6788, "step": 7630 }, { "epoch": 2.8657164291072768, "grad_norm": 12.519074440002441, "learning_rate": 9.57588690317418e-07, "loss": 0.5208, "step": 7640 }, { "epoch": 2.8694673668417106, "grad_norm": 14.533720016479492, "learning_rate": 9.309149106428382e-07, "loss": 0.6403, "step": 7650 }, { "epoch": 2.873218304576144, "grad_norm": 6.502141952514648, "learning_rate": 9.042411309682584e-07, "loss": 0.6661, "step": 7660 }, { "epoch": 2.8769692423105777, "grad_norm": 16.4246826171875, "learning_rate": 8.775673512936784e-07, "loss": 0.6631, "step": 7670 }, { "epoch": 2.880720180045011, "grad_norm": 20.435749053955078, "learning_rate": 8.508935716190984e-07, "loss": 0.769, "step": 7680 }, { "epoch": 2.884471117779445, "grad_norm": 9.382180213928223, "learning_rate": 8.242197919445186e-07, "loss": 0.5407, "step": 7690 }, { "epoch": 2.8882220555138787, "grad_norm": 12.802393913269043, "learning_rate": 7.975460122699387e-07, "loss": 0.636, "step": 7700 }, { "epoch": 2.891972993248312, "grad_norm": 5.997576713562012, "learning_rate": 7.708722325953588e-07, "loss": 0.561, "step": 7710 }, { "epoch": 2.895723930982746, "grad_norm": 8.369012832641602, "learning_rate": 7.441984529207789e-07, "loss": 0.5721, "step": 7720 }, { "epoch": 2.899474868717179, "grad_norm": 19.990249633789062, "learning_rate": 7.175246732461991e-07, "loss": 0.7267, "step": 7730 }, { "epoch": 2.903225806451613, "grad_norm": 19.364540100097656, "learning_rate": 6.908508935716192e-07, "loss": 0.6867, "step": 7740 }, { "epoch": 2.9069767441860463, "grad_norm": 10.638273239135742, "learning_rate": 6.641771138970394e-07, "loss": 0.5809, "step": 7750 }, { "epoch": 2.91072768192048, "grad_norm": 24.913246154785156, "learning_rate": 6.375033342224594e-07, "loss": 0.5658, "step": 7760 }, { "epoch": 2.914478619654914, "grad_norm": 6.1255412101745605, "learning_rate": 6.108295545478795e-07, "loss": 0.4796, "step": 7770 }, { "epoch": 2.9182295573893473, "grad_norm": 13.97762680053711, "learning_rate": 5.841557748732996e-07, "loss": 0.6201, "step": 7780 }, { "epoch": 2.921980495123781, "grad_norm": 24.56553840637207, "learning_rate": 5.574819951987197e-07, "loss": 0.5206, "step": 7790 }, { "epoch": 2.9257314328582145, "grad_norm": 20.081579208374023, "learning_rate": 5.308082155241398e-07, "loss": 0.5697, "step": 7800 }, { "epoch": 2.9294823705926483, "grad_norm": 11.358619689941406, "learning_rate": 5.041344358495599e-07, "loss": 0.6268, "step": 7810 }, { "epoch": 2.9332333083270816, "grad_norm": 11.016149520874023, "learning_rate": 4.7746065617498e-07, "loss": 0.5753, "step": 7820 }, { "epoch": 2.9369842460615154, "grad_norm": 17.64615249633789, "learning_rate": 4.507868765004002e-07, "loss": 0.7584, "step": 7830 }, { "epoch": 2.9407351837959492, "grad_norm": 17.292207717895508, "learning_rate": 4.2411309682582024e-07, "loss": 0.6361, "step": 7840 }, { "epoch": 2.9444861215303826, "grad_norm": 17.94815444946289, "learning_rate": 3.974393171512404e-07, "loss": 0.7208, "step": 7850 }, { "epoch": 2.948237059264816, "grad_norm": 13.073601722717285, "learning_rate": 3.7076553747666047e-07, "loss": 0.7179, "step": 7860 }, { "epoch": 2.9519879969992497, "grad_norm": 7.956513404846191, "learning_rate": 3.440917578020806e-07, "loss": 0.6109, "step": 7870 }, { "epoch": 2.9557389347336835, "grad_norm": 18.16693687438965, "learning_rate": 3.1741797812750066e-07, "loss": 0.6499, "step": 7880 }, { "epoch": 2.959489872468117, "grad_norm": 25.006132125854492, "learning_rate": 2.907441984529208e-07, "loss": 0.5358, "step": 7890 }, { "epoch": 2.9632408102025507, "grad_norm": 20.937856674194336, "learning_rate": 2.640704187783409e-07, "loss": 0.6364, "step": 7900 }, { "epoch": 2.9669917479369845, "grad_norm": 12.37922477722168, "learning_rate": 2.3739663910376104e-07, "loss": 0.4916, "step": 7910 }, { "epoch": 2.970742685671418, "grad_norm": 8.240549087524414, "learning_rate": 2.1072285942918113e-07, "loss": 0.6811, "step": 7920 }, { "epoch": 2.974493623405851, "grad_norm": 9.405010223388672, "learning_rate": 1.8404907975460125e-07, "loss": 0.5338, "step": 7930 }, { "epoch": 2.978244561140285, "grad_norm": 13.773921966552734, "learning_rate": 1.5737530008002134e-07, "loss": 0.6314, "step": 7940 }, { "epoch": 2.981995498874719, "grad_norm": 12.41072940826416, "learning_rate": 1.3070152040544146e-07, "loss": 0.5497, "step": 7950 }, { "epoch": 2.985746436609152, "grad_norm": 17.232473373413086, "learning_rate": 1.0402774073086158e-07, "loss": 0.588, "step": 7960 }, { "epoch": 2.989497374343586, "grad_norm": 27.516319274902344, "learning_rate": 7.735396105628168e-08, "loss": 0.7673, "step": 7970 }, { "epoch": 2.99324831207802, "grad_norm": 16.864728927612305, "learning_rate": 5.0680181381701795e-08, "loss": 0.6883, "step": 7980 }, { "epoch": 2.996999249812453, "grad_norm": 16.803760528564453, "learning_rate": 2.40064017071219e-08, "loss": 0.6337, "step": 7990 }, { "epoch": 3.0, "eval_accuracy": 0.5919831223628692, "eval_f1_macro": 0.5904844573730711, "eval_f1_weighted": 0.5917816930917, "eval_loss": 1.0033386945724487, "eval_precision_macro": 0.5945695673493336, "eval_precision_weighted": 0.5926704635628428, "eval_recall_macro": 0.5877159391363334, "eval_recall_weighted": 0.5919831223628692, "eval_runtime": 4.8573, "eval_samples_per_second": 487.928, "eval_steps_per_second": 61.145, "step": 7998 } ], "logging_steps": 10, "max_steps": 7998, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.68326808991488e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }