{ "best_global_step": 1380, "best_metric": 0.9760859393830551, "best_model_checkpoint": "./vitmodel-results3\\checkpoint-1380", "epoch": 8.0, "eval_steps": 500, "global_step": 1840, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.043478260869565216, "grad_norm": 6.461246490478516, "learning_rate": 1.9947826086956524e-05, "loss": 0.6514617919921875, "step": 10 }, { "epoch": 0.08695652173913043, "grad_norm": 3.8991446495056152, "learning_rate": 1.988985507246377e-05, "loss": 0.45957489013671876, "step": 20 }, { "epoch": 0.13043478260869565, "grad_norm": 2.4451658725738525, "learning_rate": 1.9831884057971015e-05, "loss": 0.3501922607421875, "step": 30 }, { "epoch": 0.17391304347826086, "grad_norm": 3.7429535388946533, "learning_rate": 1.9773913043478265e-05, "loss": 0.27413215637207033, "step": 40 }, { "epoch": 0.21739130434782608, "grad_norm": 12.403484344482422, "learning_rate": 1.971594202898551e-05, "loss": 0.45773887634277344, "step": 50 }, { "epoch": 0.2608695652173913, "grad_norm": 5.481701850891113, "learning_rate": 1.9657971014492755e-05, "loss": 0.23930573463439941, "step": 60 }, { "epoch": 0.30434782608695654, "grad_norm": 4.304569244384766, "learning_rate": 1.9600000000000002e-05, "loss": 0.2975881576538086, "step": 70 }, { "epoch": 0.34782608695652173, "grad_norm": 8.520660400390625, "learning_rate": 1.954202898550725e-05, "loss": 0.2244499921798706, "step": 80 }, { "epoch": 0.391304347826087, "grad_norm": 5.180691719055176, "learning_rate": 1.9484057971014492e-05, "loss": 0.17172325849533082, "step": 90 }, { "epoch": 0.43478260869565216, "grad_norm": 0.8175772428512573, "learning_rate": 1.9426086956521743e-05, "loss": 0.2387838363647461, "step": 100 }, { "epoch": 0.4782608695652174, "grad_norm": 0.4058602452278137, "learning_rate": 1.9368115942028986e-05, "loss": 0.10988756418228149, "step": 110 }, { "epoch": 0.5217391304347826, "grad_norm": 0.9439899921417236, "learning_rate": 1.9310144927536233e-05, "loss": 0.14697353839874266, "step": 120 }, { "epoch": 0.5652173913043478, "grad_norm": 0.6720163822174072, "learning_rate": 1.925217391304348e-05, "loss": 0.21182384490966796, "step": 130 }, { "epoch": 0.6086956521739131, "grad_norm": 0.371541291475296, "learning_rate": 1.9194202898550727e-05, "loss": 0.2180387258529663, "step": 140 }, { "epoch": 0.6521739130434783, "grad_norm": 1.6255207061767578, "learning_rate": 1.9136231884057973e-05, "loss": 0.20694243907928467, "step": 150 }, { "epoch": 0.6956521739130435, "grad_norm": 7.544068813323975, "learning_rate": 1.907826086956522e-05, "loss": 0.1606433391571045, "step": 160 }, { "epoch": 0.7391304347826086, "grad_norm": 0.549288272857666, "learning_rate": 1.9020289855072464e-05, "loss": 0.17944023609161378, "step": 170 }, { "epoch": 0.782608695652174, "grad_norm": 0.43946510553359985, "learning_rate": 1.896231884057971e-05, "loss": 0.2129380226135254, "step": 180 }, { "epoch": 0.8260869565217391, "grad_norm": 0.20731233060359955, "learning_rate": 1.8904347826086957e-05, "loss": 0.11439937353134155, "step": 190 }, { "epoch": 0.8695652173913043, "grad_norm": 0.1378840208053589, "learning_rate": 1.8846376811594204e-05, "loss": 0.21877152919769288, "step": 200 }, { "epoch": 0.9130434782608695, "grad_norm": 0.35801249742507935, "learning_rate": 1.878840579710145e-05, "loss": 0.10442726612091065, "step": 210 }, { "epoch": 0.9565217391304348, "grad_norm": 10.050288200378418, "learning_rate": 1.8730434782608698e-05, "loss": 0.19560953378677368, "step": 220 }, { "epoch": 1.0, "grad_norm": 0.15084530413150787, "learning_rate": 1.867246376811594e-05, "loss": 0.02761389911174774, "step": 230 }, { "epoch": 1.0, "eval_accuracy": 0.9521739130434783, "eval_f1": 0.9520642679853729, "eval_loss": 0.14785830676555634, "eval_runtime": 5.6109, "eval_samples_per_second": 81.984, "eval_steps_per_second": 10.337, "step": 230 }, { "epoch": 1.0434782608695652, "grad_norm": 0.186416894197464, "learning_rate": 1.861449275362319e-05, "loss": 0.04351995289325714, "step": 240 }, { "epoch": 1.0869565217391304, "grad_norm": 0.06740374863147736, "learning_rate": 1.8556521739130435e-05, "loss": 0.005690252780914307, "step": 250 }, { "epoch": 1.1304347826086956, "grad_norm": 6.0832743644714355, "learning_rate": 1.8498550724637682e-05, "loss": 0.016183775663375855, "step": 260 }, { "epoch": 1.1739130434782608, "grad_norm": 0.025469312444329262, "learning_rate": 1.844057971014493e-05, "loss": 0.025768563151359558, "step": 270 }, { "epoch": 1.2173913043478262, "grad_norm": 0.05013096332550049, "learning_rate": 1.8382608695652175e-05, "loss": 0.07644214630126953, "step": 280 }, { "epoch": 1.2608695652173914, "grad_norm": 0.19063518941402435, "learning_rate": 1.8324637681159422e-05, "loss": 0.06052442789077759, "step": 290 }, { "epoch": 1.3043478260869565, "grad_norm": 0.04668483883142471, "learning_rate": 1.826666666666667e-05, "loss": 0.0497345507144928, "step": 300 }, { "epoch": 1.3478260869565217, "grad_norm": 0.027216244488954544, "learning_rate": 1.8208695652173916e-05, "loss": 0.08886347413063049, "step": 310 }, { "epoch": 1.391304347826087, "grad_norm": 0.3011648952960968, "learning_rate": 1.815072463768116e-05, "loss": 0.0871780276298523, "step": 320 }, { "epoch": 1.434782608695652, "grad_norm": 9.434959411621094, "learning_rate": 1.809275362318841e-05, "loss": 0.06977825760841369, "step": 330 }, { "epoch": 1.4782608695652173, "grad_norm": 0.07939770817756653, "learning_rate": 1.8034782608695653e-05, "loss": 0.11019858121871948, "step": 340 }, { "epoch": 1.5217391304347827, "grad_norm": 6.755427837371826, "learning_rate": 1.79768115942029e-05, "loss": 0.07228946685791016, "step": 350 }, { "epoch": 1.5652173913043477, "grad_norm": 10.783921241760254, "learning_rate": 1.7918840579710147e-05, "loss": 0.06457504034042358, "step": 360 }, { "epoch": 1.608695652173913, "grad_norm": 0.10878543555736542, "learning_rate": 1.7860869565217394e-05, "loss": 0.021503202617168427, "step": 370 }, { "epoch": 1.6521739130434783, "grad_norm": 0.25200846791267395, "learning_rate": 1.780289855072464e-05, "loss": 0.06428139805793762, "step": 380 }, { "epoch": 1.6956521739130435, "grad_norm": 1.0684906244277954, "learning_rate": 1.7744927536231887e-05, "loss": 0.09642828106880189, "step": 390 }, { "epoch": 1.7391304347826086, "grad_norm": 12.593297004699707, "learning_rate": 1.768695652173913e-05, "loss": 0.028580766916275025, "step": 400 }, { "epoch": 1.7826086956521738, "grad_norm": 0.04352446645498276, "learning_rate": 1.7628985507246377e-05, "loss": 0.1266841173171997, "step": 410 }, { "epoch": 1.8260869565217392, "grad_norm": 0.03208275884389877, "learning_rate": 1.7571014492753624e-05, "loss": 0.0604109525680542, "step": 420 }, { "epoch": 1.8695652173913042, "grad_norm": 0.0292875487357378, "learning_rate": 1.751304347826087e-05, "loss": 0.08443626761436462, "step": 430 }, { "epoch": 1.9130434782608696, "grad_norm": 0.04183952882885933, "learning_rate": 1.7455072463768118e-05, "loss": 0.026611250638961793, "step": 440 }, { "epoch": 1.9565217391304348, "grad_norm": 0.419708788394928, "learning_rate": 1.7397101449275365e-05, "loss": 0.1165506362915039, "step": 450 }, { "epoch": 2.0, "grad_norm": 8.380155563354492, "learning_rate": 1.7339130434782608e-05, "loss": 0.041912269592285153, "step": 460 }, { "epoch": 2.0, "eval_accuracy": 0.9565217391304348, "eval_f1": 0.9565184513006655, "eval_loss": 0.1165793240070343, "eval_runtime": 3.7298, "eval_samples_per_second": 123.331, "eval_steps_per_second": 15.55, "step": 460 }, { "epoch": 2.0434782608695654, "grad_norm": 0.07667429745197296, "learning_rate": 1.728115942028986e-05, "loss": 0.0013138219714164735, "step": 470 }, { "epoch": 2.0869565217391304, "grad_norm": 0.05316108465194702, "learning_rate": 1.7223188405797102e-05, "loss": 0.004785384237766266, "step": 480 }, { "epoch": 2.130434782608696, "grad_norm": 0.018993400037288666, "learning_rate": 1.716521739130435e-05, "loss": 0.0010403752326965331, "step": 490 }, { "epoch": 2.1739130434782608, "grad_norm": 0.005419578403234482, "learning_rate": 1.7107246376811596e-05, "loss": 0.0010405436158180236, "step": 500 }, { "epoch": 2.217391304347826, "grad_norm": 2.7880542278289795, "learning_rate": 1.7049275362318842e-05, "loss": 0.01008293330669403, "step": 510 }, { "epoch": 2.260869565217391, "grad_norm": 0.19926372170448303, "learning_rate": 1.6991304347826086e-05, "loss": 0.002237708866596222, "step": 520 }, { "epoch": 2.3043478260869565, "grad_norm": 0.006354826502501965, "learning_rate": 1.6933333333333336e-05, "loss": 0.015198694169521331, "step": 530 }, { "epoch": 2.3478260869565215, "grad_norm": 0.01782035082578659, "learning_rate": 1.687536231884058e-05, "loss": 0.0017350628972053529, "step": 540 }, { "epoch": 2.391304347826087, "grad_norm": 0.6461467742919922, "learning_rate": 1.6817391304347826e-05, "loss": 0.0012194350361824035, "step": 550 }, { "epoch": 2.4347826086956523, "grad_norm": 0.014753537252545357, "learning_rate": 1.6759420289855073e-05, "loss": 0.03461991548538208, "step": 560 }, { "epoch": 2.4782608695652173, "grad_norm": 0.015930302441120148, "learning_rate": 1.670144927536232e-05, "loss": 0.0030654460191726685, "step": 570 }, { "epoch": 2.5217391304347827, "grad_norm": 0.07892700284719467, "learning_rate": 1.6643478260869567e-05, "loss": 0.0017842918634414673, "step": 580 }, { "epoch": 2.5652173913043477, "grad_norm": 0.05785762518644333, "learning_rate": 1.6585507246376814e-05, "loss": 0.0016030147671699524, "step": 590 }, { "epoch": 2.608695652173913, "grad_norm": 0.051935892552137375, "learning_rate": 1.652753623188406e-05, "loss": 0.0006048619747161865, "step": 600 }, { "epoch": 2.6521739130434785, "grad_norm": 0.009883932769298553, "learning_rate": 1.6469565217391304e-05, "loss": 0.0022064462304115296, "step": 610 }, { "epoch": 2.6956521739130435, "grad_norm": 0.01653284765779972, "learning_rate": 1.6411594202898554e-05, "loss": 0.010119739174842834, "step": 620 }, { "epoch": 2.7391304347826084, "grad_norm": 0.013404067605733871, "learning_rate": 1.6353623188405798e-05, "loss": 0.004131542146205902, "step": 630 }, { "epoch": 2.782608695652174, "grad_norm": 0.009171389043331146, "learning_rate": 1.6295652173913044e-05, "loss": 0.08883790969848633, "step": 640 }, { "epoch": 2.8260869565217392, "grad_norm": 12.090933799743652, "learning_rate": 1.623768115942029e-05, "loss": 0.010134254395961762, "step": 650 }, { "epoch": 2.869565217391304, "grad_norm": 4.632288455963135, "learning_rate": 1.6179710144927538e-05, "loss": 0.003986392915248871, "step": 660 }, { "epoch": 2.9130434782608696, "grad_norm": 0.06515643000602722, "learning_rate": 1.6121739130434785e-05, "loss": 0.0041788950562477115, "step": 670 }, { "epoch": 2.9565217391304346, "grad_norm": 0.33638763427734375, "learning_rate": 1.6063768115942032e-05, "loss": 0.0013911113142967223, "step": 680 }, { "epoch": 3.0, "grad_norm": 0.1827061027288437, "learning_rate": 1.6005797101449275e-05, "loss": 0.0004976257681846618, "step": 690 }, { "epoch": 3.0, "eval_accuracy": 0.9695652173913043, "eval_f1": 0.9695652173913043, "eval_loss": 0.09442394226789474, "eval_runtime": 3.6984, "eval_samples_per_second": 124.377, "eval_steps_per_second": 15.682, "step": 690 }, { "epoch": 3.0434782608695654, "grad_norm": 0.04813811555504799, "learning_rate": 1.5947826086956522e-05, "loss": 0.0004477664828300476, "step": 700 }, { "epoch": 3.0869565217391304, "grad_norm": 0.0175640732049942, "learning_rate": 1.588985507246377e-05, "loss": 0.0004123836755752563, "step": 710 }, { "epoch": 3.130434782608696, "grad_norm": 0.008048221468925476, "learning_rate": 1.5831884057971016e-05, "loss": 0.0004120379686355591, "step": 720 }, { "epoch": 3.1739130434782608, "grad_norm": 0.0071647269651293755, "learning_rate": 1.5773913043478263e-05, "loss": 0.00032983869314193723, "step": 730 }, { "epoch": 3.217391304347826, "grad_norm": 0.015544029884040356, "learning_rate": 1.571594202898551e-05, "loss": 0.00034575462341308595, "step": 740 }, { "epoch": 3.260869565217391, "grad_norm": 0.004907351918518543, "learning_rate": 1.5657971014492753e-05, "loss": 0.00026599913835525515, "step": 750 }, { "epoch": 3.3043478260869565, "grad_norm": 0.013097843155264854, "learning_rate": 1.5600000000000003e-05, "loss": 0.0016580477356910705, "step": 760 }, { "epoch": 3.3478260869565215, "grad_norm": 0.004332110285758972, "learning_rate": 1.5542028985507247e-05, "loss": 0.00046425610780715943, "step": 770 }, { "epoch": 3.391304347826087, "grad_norm": 0.06276489794254303, "learning_rate": 1.5484057971014493e-05, "loss": 0.0007047504186630249, "step": 780 }, { "epoch": 3.4347826086956523, "grad_norm": 0.00449096504598856, "learning_rate": 1.542608695652174e-05, "loss": 0.0002553284168243408, "step": 790 }, { "epoch": 3.4782608695652173, "grad_norm": 0.011169650591909885, "learning_rate": 1.5368115942028987e-05, "loss": 0.0003493279218673706, "step": 800 }, { "epoch": 3.5217391304347827, "grad_norm": 0.025958970189094543, "learning_rate": 1.5310144927536234e-05, "loss": 0.0002732709050178528, "step": 810 }, { "epoch": 3.5652173913043477, "grad_norm": 0.01937592588365078, "learning_rate": 1.5252173913043479e-05, "loss": 0.000246034562587738, "step": 820 }, { "epoch": 3.608695652173913, "grad_norm": 0.00856866966933012, "learning_rate": 1.5194202898550726e-05, "loss": 0.00028263479471206664, "step": 830 }, { "epoch": 3.6521739130434785, "grad_norm": 0.12088195979595184, "learning_rate": 1.5136231884057973e-05, "loss": 0.0003507554531097412, "step": 840 }, { "epoch": 3.6956521739130435, "grad_norm": 0.02024533785879612, "learning_rate": 1.5078260869565218e-05, "loss": 0.00027790963649749757, "step": 850 }, { "epoch": 3.7391304347826084, "grad_norm": 0.0040628910064697266, "learning_rate": 1.5020289855072465e-05, "loss": 0.0002285495400428772, "step": 860 }, { "epoch": 3.782608695652174, "grad_norm": 0.0061136772856116295, "learning_rate": 1.496231884057971e-05, "loss": 0.00027128159999847414, "step": 870 }, { "epoch": 3.8260869565217392, "grad_norm": 0.012037448585033417, "learning_rate": 1.4904347826086958e-05, "loss": 0.0002808883786201477, "step": 880 }, { "epoch": 3.869565217391304, "grad_norm": 0.004823528695851564, "learning_rate": 1.4846376811594203e-05, "loss": 0.0005329117178916931, "step": 890 }, { "epoch": 3.9130434782608696, "grad_norm": 0.04427816718816757, "learning_rate": 1.478840579710145e-05, "loss": 0.00029876679182052614, "step": 900 }, { "epoch": 3.9565217391304346, "grad_norm": 0.04008401557803154, "learning_rate": 1.4730434782608695e-05, "loss": 0.00039345473051071166, "step": 910 }, { "epoch": 4.0, "grad_norm": 0.010993687435984612, "learning_rate": 1.4672463768115944e-05, "loss": 0.00024021416902542114, "step": 920 }, { "epoch": 4.0, "eval_accuracy": 0.967391304347826, "eval_f1": 0.967383751435824, "eval_loss": 0.10565203428268433, "eval_runtime": 3.7655, "eval_samples_per_second": 122.162, "eval_steps_per_second": 15.403, "step": 920 }, { "epoch": 4.043478260869565, "grad_norm": 0.009720547124743462, "learning_rate": 1.461449275362319e-05, "loss": 0.00024558454751968386, "step": 930 }, { "epoch": 4.086956521739131, "grad_norm": 0.017342587932944298, "learning_rate": 1.4556521739130436e-05, "loss": 0.00018810927867889403, "step": 940 }, { "epoch": 4.130434782608695, "grad_norm": 0.011509642004966736, "learning_rate": 1.4498550724637683e-05, "loss": 0.00023101717233657836, "step": 950 }, { "epoch": 4.173913043478261, "grad_norm": 0.0029383855871856213, "learning_rate": 1.4440579710144928e-05, "loss": 0.00020957440137863158, "step": 960 }, { "epoch": 4.217391304347826, "grad_norm": 0.016090553253889084, "learning_rate": 1.4382608695652176e-05, "loss": 0.0001988038420677185, "step": 970 }, { "epoch": 4.260869565217392, "grad_norm": 0.005717333406209946, "learning_rate": 1.4324637681159422e-05, "loss": 0.00017771720886230468, "step": 980 }, { "epoch": 4.304347826086957, "grad_norm": 0.0067417211830616, "learning_rate": 1.4266666666666668e-05, "loss": 0.0001595720648765564, "step": 990 }, { "epoch": 4.3478260869565215, "grad_norm": 0.014678889885544777, "learning_rate": 1.4208695652173914e-05, "loss": 0.00021335333585739135, "step": 1000 }, { "epoch": 4.391304347826087, "grad_norm": 0.015480758622288704, "learning_rate": 1.4150724637681162e-05, "loss": 0.00018725097179412843, "step": 1010 }, { "epoch": 4.434782608695652, "grad_norm": 0.009670041501522064, "learning_rate": 1.4092753623188407e-05, "loss": 0.00017006248235702516, "step": 1020 }, { "epoch": 4.478260869565218, "grad_norm": 0.004368505906313658, "learning_rate": 1.4034782608695654e-05, "loss": 0.00011847317218780518, "step": 1030 }, { "epoch": 4.521739130434782, "grad_norm": 0.00646650604903698, "learning_rate": 1.39768115942029e-05, "loss": 0.00011199414730072022, "step": 1040 }, { "epoch": 4.565217391304348, "grad_norm": 0.0032207826152443886, "learning_rate": 1.3918840579710146e-05, "loss": 0.0001057848334312439, "step": 1050 }, { "epoch": 4.608695652173913, "grad_norm": 0.004954950883984566, "learning_rate": 1.3860869565217391e-05, "loss": 0.00018178075551986695, "step": 1060 }, { "epoch": 4.6521739130434785, "grad_norm": 0.002452458254992962, "learning_rate": 1.380289855072464e-05, "loss": 0.00011045336723327636, "step": 1070 }, { "epoch": 4.695652173913043, "grad_norm": 0.008102525025606155, "learning_rate": 1.3744927536231885e-05, "loss": 0.00026093870401382445, "step": 1080 }, { "epoch": 4.739130434782608, "grad_norm": 0.010890824720263481, "learning_rate": 1.3686956521739132e-05, "loss": 0.0001526176929473877, "step": 1090 }, { "epoch": 4.782608695652174, "grad_norm": 0.004832288715988398, "learning_rate": 1.3628985507246377e-05, "loss": 0.0004844769835472107, "step": 1100 }, { "epoch": 4.826086956521739, "grad_norm": 0.0037648973520845175, "learning_rate": 1.3571014492753625e-05, "loss": 0.00011702477931976318, "step": 1110 }, { "epoch": 4.869565217391305, "grad_norm": 0.005592594854533672, "learning_rate": 1.351304347826087e-05, "loss": 0.00010377466678619384, "step": 1120 }, { "epoch": 4.913043478260869, "grad_norm": 0.007901474833488464, "learning_rate": 1.3455072463768117e-05, "loss": 0.00013610869646072388, "step": 1130 }, { "epoch": 4.956521739130435, "grad_norm": 0.01237920019775629, "learning_rate": 1.3397101449275362e-05, "loss": 0.00013603121042251586, "step": 1140 }, { "epoch": 5.0, "grad_norm": 0.0020453499164432287, "learning_rate": 1.333913043478261e-05, "loss": 0.0001194879412651062, "step": 1150 }, { "epoch": 5.0, "eval_accuracy": 0.9739130434782609, "eval_f1": 0.9739110707803992, "eval_loss": 0.10354098677635193, "eval_runtime": 3.6993, "eval_samples_per_second": 124.349, "eval_steps_per_second": 15.679, "step": 1150 }, { "epoch": 5.043478260869565, "grad_norm": 0.006073773372918367, "learning_rate": 1.3281159420289856e-05, "loss": 0.00012996643781661987, "step": 1160 }, { "epoch": 5.086956521739131, "grad_norm": 0.004777880851179361, "learning_rate": 1.3223188405797103e-05, "loss": 0.0001592189073562622, "step": 1170 }, { "epoch": 5.130434782608695, "grad_norm": 0.057864073663949966, "learning_rate": 1.3165217391304348e-05, "loss": 0.00019505620002746582, "step": 1180 }, { "epoch": 5.173913043478261, "grad_norm": 0.004903986118733883, "learning_rate": 1.3107246376811595e-05, "loss": 0.00014082193374633789, "step": 1190 }, { "epoch": 5.217391304347826, "grad_norm": 0.0034294510260224342, "learning_rate": 1.304927536231884e-05, "loss": 0.00015170425176620484, "step": 1200 }, { "epoch": 5.260869565217392, "grad_norm": 0.0011764679802581668, "learning_rate": 1.2991304347826089e-05, "loss": 7.397085428237916e-05, "step": 1210 }, { "epoch": 5.304347826086957, "grad_norm": 0.0015955602284520864, "learning_rate": 1.2933333333333334e-05, "loss": 0.00010628998279571533, "step": 1220 }, { "epoch": 5.3478260869565215, "grad_norm": 0.0054084137082099915, "learning_rate": 1.287536231884058e-05, "loss": 0.00010003894567489624, "step": 1230 }, { "epoch": 5.391304347826087, "grad_norm": 0.0409197136759758, "learning_rate": 1.2817391304347827e-05, "loss": 0.0001949608325958252, "step": 1240 }, { "epoch": 5.434782608695652, "grad_norm": 0.005638486705720425, "learning_rate": 1.2759420289855074e-05, "loss": 0.00010839402675628662, "step": 1250 }, { "epoch": 5.478260869565218, "grad_norm": 0.002196825807914138, "learning_rate": 1.2701449275362321e-05, "loss": 0.00011780411005020141, "step": 1260 }, { "epoch": 5.521739130434782, "grad_norm": 0.004170795436948538, "learning_rate": 1.2643478260869566e-05, "loss": 7.52761960029602e-05, "step": 1270 }, { "epoch": 5.565217391304348, "grad_norm": 0.0018888239283114672, "learning_rate": 1.2585507246376813e-05, "loss": 8.64073634147644e-05, "step": 1280 }, { "epoch": 5.608695652173913, "grad_norm": 0.004605341702699661, "learning_rate": 1.2527536231884058e-05, "loss": 0.00010445266962051391, "step": 1290 }, { "epoch": 5.6521739130434785, "grad_norm": 0.003109138226136565, "learning_rate": 1.2469565217391307e-05, "loss": 0.00017313212156295777, "step": 1300 }, { "epoch": 5.695652173913043, "grad_norm": 0.010427464731037617, "learning_rate": 1.2411594202898552e-05, "loss": 0.00013125985860824586, "step": 1310 }, { "epoch": 5.739130434782608, "grad_norm": 0.003667028620839119, "learning_rate": 1.2353623188405799e-05, "loss": 8.144229650497437e-05, "step": 1320 }, { "epoch": 5.782608695652174, "grad_norm": 0.0063975197263062, "learning_rate": 1.2295652173913044e-05, "loss": 8.790493011474609e-05, "step": 1330 }, { "epoch": 5.826086956521739, "grad_norm": 0.0025064516812562943, "learning_rate": 1.2237681159420292e-05, "loss": 9.892880916595459e-05, "step": 1340 }, { "epoch": 5.869565217391305, "grad_norm": 0.0023004047106951475, "learning_rate": 1.2179710144927537e-05, "loss": 8.99285078048706e-05, "step": 1350 }, { "epoch": 5.913043478260869, "grad_norm": 0.00247712479904294, "learning_rate": 1.2121739130434784e-05, "loss": 7.850229740142822e-05, "step": 1360 }, { "epoch": 5.956521739130435, "grad_norm": 0.004787979181855917, "learning_rate": 1.206376811594203e-05, "loss": 0.00013543367385864257, "step": 1370 }, { "epoch": 6.0, "grad_norm": 0.0011665808269754052, "learning_rate": 1.2005797101449276e-05, "loss": 0.00010280609130859375, "step": 1380 }, { "epoch": 6.0, "eval_accuracy": 0.9760869565217392, "eval_f1": 0.9760859393830551, "eval_loss": 0.10542083531618118, "eval_runtime": 3.6712, "eval_samples_per_second": 125.301, "eval_steps_per_second": 15.799, "step": 1380 }, { "epoch": 6.043478260869565, "grad_norm": 0.003608932951465249, "learning_rate": 1.1947826086956521e-05, "loss": 6.381869316101074e-05, "step": 1390 }, { "epoch": 6.086956521739131, "grad_norm": 0.001413961173966527, "learning_rate": 1.188985507246377e-05, "loss": 0.00011366158723831176, "step": 1400 }, { "epoch": 6.130434782608695, "grad_norm": 0.008014041930437088, "learning_rate": 1.1831884057971015e-05, "loss": 8.733570575714111e-05, "step": 1410 }, { "epoch": 6.173913043478261, "grad_norm": 0.003111343365162611, "learning_rate": 1.1773913043478262e-05, "loss": 8.406937122344971e-05, "step": 1420 }, { "epoch": 6.217391304347826, "grad_norm": 0.005770743824541569, "learning_rate": 1.1715942028985507e-05, "loss": 0.00010157078504562378, "step": 1430 }, { "epoch": 6.260869565217392, "grad_norm": 0.0032873093150556087, "learning_rate": 1.1657971014492756e-05, "loss": 0.00014556646347045897, "step": 1440 }, { "epoch": 6.304347826086957, "grad_norm": 0.001812812639400363, "learning_rate": 1.16e-05, "loss": 0.00010097324848175049, "step": 1450 }, { "epoch": 6.3478260869565215, "grad_norm": 0.004035606049001217, "learning_rate": 1.1542028985507248e-05, "loss": 9.941011667251587e-05, "step": 1460 }, { "epoch": 6.391304347826087, "grad_norm": 0.0012575940927490592, "learning_rate": 1.1484057971014493e-05, "loss": 6.15835189819336e-05, "step": 1470 }, { "epoch": 6.434782608695652, "grad_norm": 0.003833119058981538, "learning_rate": 1.142608695652174e-05, "loss": 8.669793605804443e-05, "step": 1480 }, { "epoch": 6.478260869565218, "grad_norm": 0.004782689735293388, "learning_rate": 1.1368115942028985e-05, "loss": 7.78600573539734e-05, "step": 1490 }, { "epoch": 6.521739130434782, "grad_norm": 0.001532797235995531, "learning_rate": 1.1310144927536233e-05, "loss": 6.358325481414795e-05, "step": 1500 }, { "epoch": 6.565217391304348, "grad_norm": 0.002565442817285657, "learning_rate": 1.1252173913043478e-05, "loss": 0.0001420259475708008, "step": 1510 }, { "epoch": 6.608695652173913, "grad_norm": 0.0025454177521169186, "learning_rate": 1.1194202898550725e-05, "loss": 8.515864610671997e-05, "step": 1520 }, { "epoch": 6.6521739130434785, "grad_norm": 0.0020964243449270725, "learning_rate": 1.113623188405797e-05, "loss": 6.471127271652221e-05, "step": 1530 }, { "epoch": 6.695652173913043, "grad_norm": 0.003716124454513192, "learning_rate": 1.1078260869565219e-05, "loss": 8.204132318496704e-05, "step": 1540 }, { "epoch": 6.739130434782608, "grad_norm": 0.008757402189075947, "learning_rate": 1.1020289855072466e-05, "loss": 8.024424314498902e-05, "step": 1550 }, { "epoch": 6.782608695652174, "grad_norm": 0.0014845369150862098, "learning_rate": 1.096231884057971e-05, "loss": 6.451904773712158e-05, "step": 1560 }, { "epoch": 6.826086956521739, "grad_norm": 0.008402503095567226, "learning_rate": 1.0904347826086958e-05, "loss": 0.00010472536087036133, "step": 1570 }, { "epoch": 6.869565217391305, "grad_norm": 0.0024845916777849197, "learning_rate": 1.0846376811594203e-05, "loss": 7.791221141815186e-05, "step": 1580 }, { "epoch": 6.913043478260869, "grad_norm": 0.0009611704736016691, "learning_rate": 1.0788405797101451e-05, "loss": 6.439834833145141e-05, "step": 1590 }, { "epoch": 6.956521739130435, "grad_norm": 0.002504365984350443, "learning_rate": 1.0730434782608696e-05, "loss": 0.00010657459497451783, "step": 1600 }, { "epoch": 7.0, "grad_norm": 0.0028592213056981564, "learning_rate": 1.0672463768115943e-05, "loss": 6.621479988098145e-05, "step": 1610 }, { "epoch": 7.0, "eval_accuracy": 0.9717391304347827, "eval_f1": 0.9717357910906297, "eval_loss": 0.10924158990383148, "eval_runtime": 3.7267, "eval_samples_per_second": 123.432, "eval_steps_per_second": 15.563, "step": 1610 }, { "epoch": 7.043478260869565, "grad_norm": 0.0038551699835807085, "learning_rate": 1.0614492753623188e-05, "loss": 6.931275129318237e-05, "step": 1620 }, { "epoch": 7.086956521739131, "grad_norm": 0.001955242594704032, "learning_rate": 1.0556521739130437e-05, "loss": 6.8606436252594e-05, "step": 1630 }, { "epoch": 7.130434782608695, "grad_norm": 0.0016041912604123354, "learning_rate": 1.0498550724637682e-05, "loss": 5.517750978469849e-05, "step": 1640 }, { "epoch": 7.173913043478261, "grad_norm": 0.00400899862870574, "learning_rate": 1.0440579710144929e-05, "loss": 6.250441074371338e-05, "step": 1650 }, { "epoch": 7.217391304347826, "grad_norm": 0.00452436925843358, "learning_rate": 1.0382608695652174e-05, "loss": 7.809549570083618e-05, "step": 1660 }, { "epoch": 7.260869565217392, "grad_norm": 0.004081172402948141, "learning_rate": 1.0324637681159423e-05, "loss": 6.081312894821167e-05, "step": 1670 }, { "epoch": 7.304347826086957, "grad_norm": 0.0009276916971430182, "learning_rate": 1.0266666666666668e-05, "loss": 7.750093936920166e-05, "step": 1680 }, { "epoch": 7.3478260869565215, "grad_norm": 0.0008240043534897268, "learning_rate": 1.0208695652173915e-05, "loss": 5.295425653457641e-05, "step": 1690 }, { "epoch": 7.391304347826087, "grad_norm": 0.0009307338623329997, "learning_rate": 1.015072463768116e-05, "loss": 4.418641328811646e-05, "step": 1700 }, { "epoch": 7.434782608695652, "grad_norm": 0.0026277746073901653, "learning_rate": 1.0092753623188407e-05, "loss": 7.459372282028198e-05, "step": 1710 }, { "epoch": 7.478260869565218, "grad_norm": 0.0009984400821849704, "learning_rate": 1.0034782608695652e-05, "loss": 5.8722496032714846e-05, "step": 1720 }, { "epoch": 7.521739130434782, "grad_norm": 0.006830462254583836, "learning_rate": 9.9768115942029e-06, "loss": 0.00010163038969039917, "step": 1730 }, { "epoch": 7.565217391304348, "grad_norm": 0.002571334131062031, "learning_rate": 9.918840579710145e-06, "loss": 4.719942808151245e-05, "step": 1740 }, { "epoch": 7.608695652173913, "grad_norm": 0.006324245594441891, "learning_rate": 9.860869565217392e-06, "loss": 0.00011334121227264404, "step": 1750 }, { "epoch": 7.6521739130434785, "grad_norm": 0.004802050068974495, "learning_rate": 9.802898550724639e-06, "loss": 7.033348083496094e-05, "step": 1760 }, { "epoch": 7.695652173913043, "grad_norm": 0.004078683443367481, "learning_rate": 9.744927536231886e-06, "loss": 7.486343383789062e-05, "step": 1770 }, { "epoch": 7.739130434782608, "grad_norm": 0.0017736013978719711, "learning_rate": 9.686956521739131e-06, "loss": 6.642341613769532e-05, "step": 1780 }, { "epoch": 7.782608695652174, "grad_norm": 0.0009903659811243415, "learning_rate": 9.628985507246378e-06, "loss": 6.065666675567627e-05, "step": 1790 }, { "epoch": 7.826086956521739, "grad_norm": 0.0012112981639802456, "learning_rate": 9.571014492753625e-06, "loss": 6.491392850875855e-05, "step": 1800 }, { "epoch": 7.869565217391305, "grad_norm": 0.0009230478899553418, "learning_rate": 9.51304347826087e-06, "loss": 5.654692649841309e-05, "step": 1810 }, { "epoch": 7.913043478260869, "grad_norm": 0.0006778881652280688, "learning_rate": 9.455072463768117e-06, "loss": 5.7981908321380614e-05, "step": 1820 }, { "epoch": 7.956521739130435, "grad_norm": 0.003380276495590806, "learning_rate": 9.397101449275363e-06, "loss": 0.00010381042957305909, "step": 1830 }, { "epoch": 8.0, "grad_norm": 0.0043472591787576675, "learning_rate": 9.33913043478261e-06, "loss": 7.407516241073609e-05, "step": 1840 }, { "epoch": 8.0, "eval_accuracy": 0.9717391304347827, "eval_f1": 0.9717357910906297, "eval_loss": 0.11276786029338837, "eval_runtime": 3.6362, "eval_samples_per_second": 126.506, "eval_steps_per_second": 15.951, "step": 1840 } ], "logging_steps": 10, "max_steps": 3450, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1406820871267942e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }