{ "best_global_step": 692, "best_metric": 0.8148257052568041, "best_model_checkpoint": "./vitmodel-results2\\checkpoint-692", "epoch": 9.0, "eval_steps": 500, "global_step": 1557, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.057803468208092484, "grad_norm": 9.193648338317871, "learning_rate": 1.993063583815029e-05, "loss": 1.2447509765625, "step": 10 }, { "epoch": 0.11560693641618497, "grad_norm": 11.420437812805176, "learning_rate": 1.9853564547206168e-05, "loss": 0.9639083862304687, "step": 20 }, { "epoch": 0.17341040462427745, "grad_norm": 8.501763343811035, "learning_rate": 1.9776493256262043e-05, "loss": 0.9586288452148437, "step": 30 }, { "epoch": 0.23121387283236994, "grad_norm": 8.869491577148438, "learning_rate": 1.969942196531792e-05, "loss": 0.95760498046875, "step": 40 }, { "epoch": 0.28901734104046245, "grad_norm": 6.993536472320557, "learning_rate": 1.96223506743738e-05, "loss": 0.8282135009765625, "step": 50 }, { "epoch": 0.3468208092485549, "grad_norm": 9.088873863220215, "learning_rate": 1.9545279383429674e-05, "loss": 0.7820648193359375, "step": 60 }, { "epoch": 0.4046242774566474, "grad_norm": 6.549789905548096, "learning_rate": 1.946820809248555e-05, "loss": 0.7731849670410156, "step": 70 }, { "epoch": 0.4624277456647399, "grad_norm": 8.019621849060059, "learning_rate": 1.9391136801541427e-05, "loss": 0.6875213623046875, "step": 80 }, { "epoch": 0.5202312138728323, "grad_norm": 14.647834777832031, "learning_rate": 1.9314065510597305e-05, "loss": 0.8264602661132813, "step": 90 }, { "epoch": 0.5780346820809249, "grad_norm": 4.440170764923096, "learning_rate": 1.923699421965318e-05, "loss": 0.66641845703125, "step": 100 }, { "epoch": 0.6358381502890174, "grad_norm": 7.89495325088501, "learning_rate": 1.9159922928709058e-05, "loss": 0.721063232421875, "step": 110 }, { "epoch": 0.6936416184971098, "grad_norm": 8.646476745605469, "learning_rate": 1.9082851637764936e-05, "loss": 0.6277095794677734, "step": 120 }, { "epoch": 0.7514450867052023, "grad_norm": 7.094816207885742, "learning_rate": 1.900578034682081e-05, "loss": 0.626312255859375, "step": 130 }, { "epoch": 0.8092485549132948, "grad_norm": 8.350555419921875, "learning_rate": 1.892870905587669e-05, "loss": 0.6980167388916015, "step": 140 }, { "epoch": 0.8670520231213873, "grad_norm": 8.661638259887695, "learning_rate": 1.8851637764932563e-05, "loss": 0.7745811462402343, "step": 150 }, { "epoch": 0.9248554913294798, "grad_norm": 13.135848999023438, "learning_rate": 1.877456647398844e-05, "loss": 0.6397926330566406, "step": 160 }, { "epoch": 0.9826589595375722, "grad_norm": 6.089743614196777, "learning_rate": 1.869749518304432e-05, "loss": 0.7790138244628906, "step": 170 }, { "epoch": 1.0, "eval_accuracy": 0.7620967741935484, "eval_f1": 0.7666429887363669, "eval_loss": 0.5470803380012512, "eval_runtime": 3.7787, "eval_samples_per_second": 65.631, "eval_steps_per_second": 8.204, "step": 173 }, { "epoch": 1.0404624277456647, "grad_norm": 6.115858554840088, "learning_rate": 1.8620423892100194e-05, "loss": 0.41851234436035156, "step": 180 }, { "epoch": 1.0982658959537572, "grad_norm": 8.266204833984375, "learning_rate": 1.854335260115607e-05, "loss": 0.38555469512939455, "step": 190 }, { "epoch": 1.1560693641618498, "grad_norm": 7.23341178894043, "learning_rate": 1.8466281310211947e-05, "loss": 0.3871160507202148, "step": 200 }, { "epoch": 1.2138728323699421, "grad_norm": 4.417539596557617, "learning_rate": 1.8389210019267825e-05, "loss": 0.3288217544555664, "step": 210 }, { "epoch": 1.2716763005780347, "grad_norm": 6.040576934814453, "learning_rate": 1.83121387283237e-05, "loss": 0.3685466766357422, "step": 220 }, { "epoch": 1.3294797687861273, "grad_norm": 7.501523017883301, "learning_rate": 1.8235067437379578e-05, "loss": 0.26166458129882814, "step": 230 }, { "epoch": 1.3872832369942196, "grad_norm": 6.774686813354492, "learning_rate": 1.8157996146435456e-05, "loss": 0.4264092445373535, "step": 240 }, { "epoch": 1.4450867052023122, "grad_norm": 7.900248050689697, "learning_rate": 1.808092485549133e-05, "loss": 0.4633197784423828, "step": 250 }, { "epoch": 1.5028901734104045, "grad_norm": 6.848799228668213, "learning_rate": 1.8003853564547206e-05, "loss": 0.3210134506225586, "step": 260 }, { "epoch": 1.560693641618497, "grad_norm": 12.119475364685059, "learning_rate": 1.7926782273603084e-05, "loss": 0.4417572975158691, "step": 270 }, { "epoch": 1.6184971098265897, "grad_norm": 11.230687141418457, "learning_rate": 1.7849710982658962e-05, "loss": 0.3607762336730957, "step": 280 }, { "epoch": 1.6763005780346822, "grad_norm": 4.957355976104736, "learning_rate": 1.7772639691714836e-05, "loss": 0.4235343933105469, "step": 290 }, { "epoch": 1.7341040462427746, "grad_norm": 15.573174476623535, "learning_rate": 1.7695568400770715e-05, "loss": 0.2808579444885254, "step": 300 }, { "epoch": 1.791907514450867, "grad_norm": 4.865276336669922, "learning_rate": 1.7618497109826593e-05, "loss": 0.3062352180480957, "step": 310 }, { "epoch": 1.8497109826589595, "grad_norm": 9.93105697631836, "learning_rate": 1.7541425818882467e-05, "loss": 0.4238410949707031, "step": 320 }, { "epoch": 1.907514450867052, "grad_norm": 4.529516696929932, "learning_rate": 1.7464354527938346e-05, "loss": 0.4600528717041016, "step": 330 }, { "epoch": 1.9653179190751446, "grad_norm": 2.661755323410034, "learning_rate": 1.738728323699422e-05, "loss": 0.4357139587402344, "step": 340 }, { "epoch": 2.0, "eval_accuracy": 0.7701612903225806, "eval_f1": 0.7565331283801342, "eval_loss": 0.5092476010322571, "eval_runtime": 3.7765, "eval_samples_per_second": 65.67, "eval_steps_per_second": 8.209, "step": 346 }, { "epoch": 2.023121387283237, "grad_norm": 5.79880428314209, "learning_rate": 1.73102119460501e-05, "loss": 0.2926643848419189, "step": 350 }, { "epoch": 2.0809248554913293, "grad_norm": 4.915356159210205, "learning_rate": 1.7233140655105977e-05, "loss": 0.12723102569580078, "step": 360 }, { "epoch": 2.138728323699422, "grad_norm": 2.580575942993164, "learning_rate": 1.715606936416185e-05, "loss": 0.13565282821655272, "step": 370 }, { "epoch": 2.1965317919075145, "grad_norm": 4.596588611602783, "learning_rate": 1.7078998073217726e-05, "loss": 0.16868581771850585, "step": 380 }, { "epoch": 2.254335260115607, "grad_norm": 1.6427044868469238, "learning_rate": 1.7001926782273604e-05, "loss": 0.10326943397521973, "step": 390 }, { "epoch": 2.3121387283236996, "grad_norm": 3.8816680908203125, "learning_rate": 1.6924855491329482e-05, "loss": 0.10355021953582763, "step": 400 }, { "epoch": 2.3699421965317917, "grad_norm": 4.5500922203063965, "learning_rate": 1.6847784200385357e-05, "loss": 0.16852855682373047, "step": 410 }, { "epoch": 2.4277456647398843, "grad_norm": 5.74709939956665, "learning_rate": 1.6770712909441235e-05, "loss": 0.1326436996459961, "step": 420 }, { "epoch": 2.485549132947977, "grad_norm": 1.0953601598739624, "learning_rate": 1.6693641618497113e-05, "loss": 0.18565785884857178, "step": 430 }, { "epoch": 2.5433526011560694, "grad_norm": 3.5484166145324707, "learning_rate": 1.6616570327552988e-05, "loss": 0.11107982397079467, "step": 440 }, { "epoch": 2.601156069364162, "grad_norm": 10.514449119567871, "learning_rate": 1.6539499036608863e-05, "loss": 0.23550875186920167, "step": 450 }, { "epoch": 2.6589595375722546, "grad_norm": 1.2385636568069458, "learning_rate": 1.646242774566474e-05, "loss": 0.09937280416488647, "step": 460 }, { "epoch": 2.7167630057803467, "grad_norm": 2.3061702251434326, "learning_rate": 1.638535645472062e-05, "loss": 0.11133263111114503, "step": 470 }, { "epoch": 2.7745664739884393, "grad_norm": 13.695456504821777, "learning_rate": 1.6308285163776494e-05, "loss": 0.14138509035110475, "step": 480 }, { "epoch": 2.832369942196532, "grad_norm": 11.574437141418457, "learning_rate": 1.6231213872832372e-05, "loss": 0.15487065315246581, "step": 490 }, { "epoch": 2.8901734104046244, "grad_norm": 9.247588157653809, "learning_rate": 1.6154142581888246e-05, "loss": 0.18497473001480103, "step": 500 }, { "epoch": 2.9479768786127165, "grad_norm": 1.725334644317627, "learning_rate": 1.6077071290944125e-05, "loss": 0.14644594192504884, "step": 510 }, { "epoch": 3.0, "eval_accuracy": 0.7620967741935484, "eval_f1": 0.7681438802699209, "eval_loss": 0.6075211763381958, "eval_runtime": 3.8105, "eval_samples_per_second": 65.084, "eval_steps_per_second": 8.136, "step": 519 }, { "epoch": 3.005780346820809, "grad_norm": 0.43373608589172363, "learning_rate": 1.6000000000000003e-05, "loss": 0.1772174835205078, "step": 520 }, { "epoch": 3.0635838150289016, "grad_norm": 0.47126689553260803, "learning_rate": 1.5922928709055877e-05, "loss": 0.019189047813415527, "step": 530 }, { "epoch": 3.121387283236994, "grad_norm": 1.0332409143447876, "learning_rate": 1.5845857418111756e-05, "loss": 0.017575371265411376, "step": 540 }, { "epoch": 3.179190751445087, "grad_norm": 1.5852550268173218, "learning_rate": 1.5768786127167634e-05, "loss": 0.037454456090927124, "step": 550 }, { "epoch": 3.2369942196531793, "grad_norm": 1.1080495119094849, "learning_rate": 1.569171483622351e-05, "loss": 0.031146246194839477, "step": 560 }, { "epoch": 3.294797687861272, "grad_norm": 0.29696905612945557, "learning_rate": 1.5614643545279383e-05, "loss": 0.029173070192337038, "step": 570 }, { "epoch": 3.352601156069364, "grad_norm": 3.4115827083587646, "learning_rate": 1.553757225433526e-05, "loss": 0.046107858419418335, "step": 580 }, { "epoch": 3.4104046242774566, "grad_norm": 0.939679741859436, "learning_rate": 1.546050096339114e-05, "loss": 0.04879339635372162, "step": 590 }, { "epoch": 3.468208092485549, "grad_norm": 0.7068578600883484, "learning_rate": 1.5383429672447014e-05, "loss": 0.02517341673374176, "step": 600 }, { "epoch": 3.5260115606936417, "grad_norm": 0.2978713810443878, "learning_rate": 1.530635838150289e-05, "loss": 0.09835766553878784, "step": 610 }, { "epoch": 3.583815028901734, "grad_norm": 10.272834777832031, "learning_rate": 1.5229287090558769e-05, "loss": 0.03281269967556, "step": 620 }, { "epoch": 3.6416184971098264, "grad_norm": 6.145327568054199, "learning_rate": 1.5152215799614645e-05, "loss": 0.04014509916305542, "step": 630 }, { "epoch": 3.699421965317919, "grad_norm": 0.6408748626708984, "learning_rate": 1.5075144508670521e-05, "loss": 0.10582492351531983, "step": 640 }, { "epoch": 3.7572254335260116, "grad_norm": 0.1795165091753006, "learning_rate": 1.49980732177264e-05, "loss": 0.03988836109638214, "step": 650 }, { "epoch": 3.815028901734104, "grad_norm": 0.6440432667732239, "learning_rate": 1.4921001926782274e-05, "loss": 0.1083465576171875, "step": 660 }, { "epoch": 3.8728323699421967, "grad_norm": 2.706090211868286, "learning_rate": 1.484393063583815e-05, "loss": 0.007834103703498841, "step": 670 }, { "epoch": 3.9306358381502893, "grad_norm": 2.186443328857422, "learning_rate": 1.4766859344894029e-05, "loss": 0.017444241046905517, "step": 680 }, { "epoch": 3.9884393063583814, "grad_norm": 0.10573586076498032, "learning_rate": 1.4689788053949905e-05, "loss": 0.008943480253219605, "step": 690 }, { "epoch": 4.0, "eval_accuracy": 0.8145161290322581, "eval_f1": 0.8148257052568041, "eval_loss": 0.6730512976646423, "eval_runtime": 3.8196, "eval_samples_per_second": 64.928, "eval_steps_per_second": 8.116, "step": 692 }, { "epoch": 4.046242774566474, "grad_norm": 0.24642640352249146, "learning_rate": 1.4612716763005782e-05, "loss": 0.011015585064888, "step": 700 }, { "epoch": 4.104046242774566, "grad_norm": 0.15465769171714783, "learning_rate": 1.453564547206166e-05, "loss": 0.00537472665309906, "step": 710 }, { "epoch": 4.161849710982659, "grad_norm": 0.2364652454853058, "learning_rate": 1.4458574181117534e-05, "loss": 0.006760424375534058, "step": 720 }, { "epoch": 4.219653179190751, "grad_norm": 0.039116956293582916, "learning_rate": 1.4381502890173411e-05, "loss": 0.005632311105728149, "step": 730 }, { "epoch": 4.277456647398844, "grad_norm": 0.06665871292352676, "learning_rate": 1.4304431599229289e-05, "loss": 0.004969970881938934, "step": 740 }, { "epoch": 4.335260115606936, "grad_norm": 0.14143921434879303, "learning_rate": 1.4227360308285165e-05, "loss": 0.014902213215827942, "step": 750 }, { "epoch": 4.393063583815029, "grad_norm": 0.2047356367111206, "learning_rate": 1.4150289017341042e-05, "loss": 0.005004642903804779, "step": 760 }, { "epoch": 4.4508670520231215, "grad_norm": 0.03578726947307587, "learning_rate": 1.4073217726396917e-05, "loss": 0.0032742366194725035, "step": 770 }, { "epoch": 4.508670520231214, "grad_norm": 0.05993838235735893, "learning_rate": 1.3996146435452795e-05, "loss": 0.002082832157611847, "step": 780 }, { "epoch": 4.566473988439307, "grad_norm": 0.25260815024375916, "learning_rate": 1.3919075144508671e-05, "loss": 0.002451476454734802, "step": 790 }, { "epoch": 4.624277456647399, "grad_norm": 0.030600009486079216, "learning_rate": 1.3842003853564548e-05, "loss": 0.003828507661819458, "step": 800 }, { "epoch": 4.682080924855491, "grad_norm": 12.333958625793457, "learning_rate": 1.3764932562620426e-05, "loss": 0.06371065378189086, "step": 810 }, { "epoch": 4.7398843930635834, "grad_norm": 0.22651025652885437, "learning_rate": 1.3687861271676302e-05, "loss": 0.004562181234359741, "step": 820 }, { "epoch": 4.797687861271676, "grad_norm": 0.03751413896679878, "learning_rate": 1.3610789980732177e-05, "loss": 0.0019490152597427368, "step": 830 }, { "epoch": 4.855491329479769, "grad_norm": 1.809687852859497, "learning_rate": 1.3533718689788055e-05, "loss": 0.006183743476867676, "step": 840 }, { "epoch": 4.913294797687861, "grad_norm": 1.0058674812316895, "learning_rate": 1.3456647398843931e-05, "loss": 0.008757662773132325, "step": 850 }, { "epoch": 4.971098265895954, "grad_norm": 0.0775017961859703, "learning_rate": 1.3379576107899808e-05, "loss": 0.0020394161343574523, "step": 860 }, { "epoch": 5.0, "eval_accuracy": 0.8064516129032258, "eval_f1": 0.8074801611817632, "eval_loss": 0.7273994088172913, "eval_runtime": 3.8077, "eval_samples_per_second": 65.132, "eval_steps_per_second": 8.141, "step": 865 }, { "epoch": 5.028901734104046, "grad_norm": 0.07742282748222351, "learning_rate": 1.3302504816955686e-05, "loss": 0.0031921621412038803, "step": 870 }, { "epoch": 5.086705202312139, "grad_norm": 0.021719103679060936, "learning_rate": 1.3225433526011562e-05, "loss": 0.0016711041331291198, "step": 880 }, { "epoch": 5.144508670520231, "grad_norm": 0.040297143161296844, "learning_rate": 1.3148362235067437e-05, "loss": 0.0009254798293113709, "step": 890 }, { "epoch": 5.202312138728324, "grad_norm": 0.06567544490098953, "learning_rate": 1.3071290944123315e-05, "loss": 0.0013921096920967101, "step": 900 }, { "epoch": 5.2601156069364166, "grad_norm": 0.10894370079040527, "learning_rate": 1.2994219653179192e-05, "loss": 0.0009916990995407105, "step": 910 }, { "epoch": 5.317919075144509, "grad_norm": 0.030588222667574883, "learning_rate": 1.2917148362235068e-05, "loss": 0.0011297404766082765, "step": 920 }, { "epoch": 5.375722543352601, "grad_norm": 0.044895388185977936, "learning_rate": 1.2840077071290946e-05, "loss": 0.0009317293763160706, "step": 930 }, { "epoch": 5.433526011560693, "grad_norm": 0.030870944261550903, "learning_rate": 1.2763005780346823e-05, "loss": 0.0010338693857192994, "step": 940 }, { "epoch": 5.491329479768786, "grad_norm": 0.024050451815128326, "learning_rate": 1.2685934489402697e-05, "loss": 0.000977499783039093, "step": 950 }, { "epoch": 5.5491329479768785, "grad_norm": 0.02893257327377796, "learning_rate": 1.2608863198458577e-05, "loss": 0.0013506487011909485, "step": 960 }, { "epoch": 5.606936416184971, "grad_norm": 0.09149627387523651, "learning_rate": 1.2531791907514452e-05, "loss": 0.0009457975625991822, "step": 970 }, { "epoch": 5.664739884393064, "grad_norm": 0.020059145987033844, "learning_rate": 1.2454720616570328e-05, "loss": 0.0008360743522644043, "step": 980 }, { "epoch": 5.722543352601156, "grad_norm": 0.02598397620022297, "learning_rate": 1.2377649325626205e-05, "loss": 0.0008077919483184814, "step": 990 }, { "epoch": 5.780346820809249, "grad_norm": 0.051848188042640686, "learning_rate": 1.2300578034682083e-05, "loss": 0.0009181752800941467, "step": 1000 }, { "epoch": 5.838150289017341, "grad_norm": 0.02003743126988411, "learning_rate": 1.222350674373796e-05, "loss": 0.0008455753326416015, "step": 1010 }, { "epoch": 5.895953757225434, "grad_norm": 0.4392681121826172, "learning_rate": 1.2146435452793834e-05, "loss": 0.03311595022678375, "step": 1020 }, { "epoch": 5.953757225433526, "grad_norm": 0.041885748505592346, "learning_rate": 1.2069364161849712e-05, "loss": 0.0012869253754615785, "step": 1030 }, { "epoch": 6.0, "eval_accuracy": 0.7903225806451613, "eval_f1": 0.7943751207262894, "eval_loss": 0.8309548497200012, "eval_runtime": 3.7714, "eval_samples_per_second": 65.759, "eval_steps_per_second": 8.22, "step": 1038 }, { "epoch": 6.011560693641618, "grad_norm": 0.026743754744529724, "learning_rate": 1.1992292870905588e-05, "loss": 0.0006453114096075296, "step": 1040 }, { "epoch": 6.069364161849711, "grad_norm": 0.06693530082702637, "learning_rate": 1.1915221579961465e-05, "loss": 0.0007815584540367126, "step": 1050 }, { "epoch": 6.127167630057803, "grad_norm": 0.015539342537522316, "learning_rate": 1.1838150289017343e-05, "loss": 0.0006080090999603272, "step": 1060 }, { "epoch": 6.184971098265896, "grad_norm": 0.014636315405368805, "learning_rate": 1.176107899807322e-05, "loss": 0.0006367906928062439, "step": 1070 }, { "epoch": 6.242774566473988, "grad_norm": 0.019367052242159843, "learning_rate": 1.1684007707129094e-05, "loss": 0.0007835239171981812, "step": 1080 }, { "epoch": 6.300578034682081, "grad_norm": 0.01782175898551941, "learning_rate": 1.1606936416184972e-05, "loss": 0.0006255954504013062, "step": 1090 }, { "epoch": 6.358381502890174, "grad_norm": 0.009536216966807842, "learning_rate": 1.1529865125240849e-05, "loss": 0.0005810096859931946, "step": 1100 }, { "epoch": 6.416184971098266, "grad_norm": 0.021404527127742767, "learning_rate": 1.1452793834296725e-05, "loss": 0.000746677815914154, "step": 1110 }, { "epoch": 6.473988439306359, "grad_norm": 0.049797266721725464, "learning_rate": 1.1375722543352603e-05, "loss": 0.00063580721616745, "step": 1120 }, { "epoch": 6.531791907514451, "grad_norm": 0.025786111131310463, "learning_rate": 1.129865125240848e-05, "loss": 0.0006033405661582947, "step": 1130 }, { "epoch": 6.589595375722544, "grad_norm": 0.016592316329479218, "learning_rate": 1.1221579961464354e-05, "loss": 0.0006539627909660339, "step": 1140 }, { "epoch": 6.6473988439306355, "grad_norm": 0.016465384513139725, "learning_rate": 1.1144508670520232e-05, "loss": 0.0005995437502861023, "step": 1150 }, { "epoch": 6.705202312138728, "grad_norm": 0.019848085939884186, "learning_rate": 1.1067437379576109e-05, "loss": 0.0006159201264381408, "step": 1160 }, { "epoch": 6.763005780346821, "grad_norm": 0.015507291071116924, "learning_rate": 1.0990366088631985e-05, "loss": 0.0004596635699272156, "step": 1170 }, { "epoch": 6.820809248554913, "grad_norm": 0.022381598129868507, "learning_rate": 1.0913294797687862e-05, "loss": 0.000590360164642334, "step": 1180 }, { "epoch": 6.878612716763006, "grad_norm": 0.01475490815937519, "learning_rate": 1.083622350674374e-05, "loss": 0.000518760085105896, "step": 1190 }, { "epoch": 6.936416184971098, "grad_norm": 0.0208587646484375, "learning_rate": 1.0759152215799615e-05, "loss": 0.0006424024701118469, "step": 1200 }, { "epoch": 6.994219653179191, "grad_norm": 0.08424866199493408, "learning_rate": 1.0682080924855491e-05, "loss": 0.0008051112294197082, "step": 1210 }, { "epoch": 7.0, "eval_accuracy": 0.8104838709677419, "eval_f1": 0.8129958949851424, "eval_loss": 0.8261250853538513, "eval_runtime": 3.8032, "eval_samples_per_second": 65.208, "eval_steps_per_second": 8.151, "step": 1211 }, { "epoch": 7.0520231213872835, "grad_norm": 0.026666566729545593, "learning_rate": 1.0605009633911369e-05, "loss": 0.000561926607042551, "step": 1220 }, { "epoch": 7.109826589595376, "grad_norm": 0.008546934463083744, "learning_rate": 1.0527938342967246e-05, "loss": 0.0005572408437728882, "step": 1230 }, { "epoch": 7.167630057803469, "grad_norm": 0.007139866705983877, "learning_rate": 1.0450867052023122e-05, "loss": 0.00040052533149719237, "step": 1240 }, { "epoch": 7.22543352601156, "grad_norm": 0.01626797765493393, "learning_rate": 1.0373795761079e-05, "loss": 0.0005501970648765564, "step": 1250 }, { "epoch": 7.283236994219653, "grad_norm": 0.012005102820694447, "learning_rate": 1.0296724470134875e-05, "loss": 0.0004243999719619751, "step": 1260 }, { "epoch": 7.341040462427745, "grad_norm": 0.018900051712989807, "learning_rate": 1.0219653179190751e-05, "loss": 0.0004561007022857666, "step": 1270 }, { "epoch": 7.398843930635838, "grad_norm": 0.016326697543263435, "learning_rate": 1.014258188824663e-05, "loss": 0.0005092039704322815, "step": 1280 }, { "epoch": 7.456647398843931, "grad_norm": 0.010545836761593819, "learning_rate": 1.0065510597302506e-05, "loss": 0.0004412621259689331, "step": 1290 }, { "epoch": 7.514450867052023, "grad_norm": 0.01526038721203804, "learning_rate": 9.988439306358382e-06, "loss": 0.0004318729043006897, "step": 1300 }, { "epoch": 7.572254335260116, "grad_norm": 0.012989806942641735, "learning_rate": 9.911368015414259e-06, "loss": 0.00048479437828063965, "step": 1310 }, { "epoch": 7.630057803468208, "grad_norm": 0.009836602956056595, "learning_rate": 9.834296724470137e-06, "loss": 0.0003552690148353577, "step": 1320 }, { "epoch": 7.687861271676301, "grad_norm": 0.012384418398141861, "learning_rate": 9.757225433526011e-06, "loss": 0.0004932507872581482, "step": 1330 }, { "epoch": 7.745664739884393, "grad_norm": 0.020340140908956528, "learning_rate": 9.68015414258189e-06, "loss": 0.00048180222511291505, "step": 1340 }, { "epoch": 7.803468208092486, "grad_norm": 0.03233597055077553, "learning_rate": 9.603082851637766e-06, "loss": 0.0004775360226631165, "step": 1350 }, { "epoch": 7.861271676300578, "grad_norm": 0.01245969720184803, "learning_rate": 9.526011560693642e-06, "loss": 0.00040520131587982176, "step": 1360 }, { "epoch": 7.91907514450867, "grad_norm": 0.00969842541962862, "learning_rate": 9.448940269749519e-06, "loss": 0.0003412917256355286, "step": 1370 }, { "epoch": 7.976878612716763, "grad_norm": 0.022187134250998497, "learning_rate": 9.371868978805397e-06, "loss": 0.0004110649228096008, "step": 1380 }, { "epoch": 8.0, "eval_accuracy": 0.8064516129032258, "eval_f1": 0.8086838155814223, "eval_loss": 0.8546451330184937, "eval_runtime": 3.7864, "eval_samples_per_second": 65.497, "eval_steps_per_second": 8.187, "step": 1384 }, { "epoch": 8.034682080924856, "grad_norm": 0.019872142001986504, "learning_rate": 9.294797687861272e-06, "loss": 0.0003799670375883579, "step": 1390 }, { "epoch": 8.092485549132949, "grad_norm": 0.011816315352916718, "learning_rate": 9.21772639691715e-06, "loss": 0.00041468888521194457, "step": 1400 }, { "epoch": 8.15028901734104, "grad_norm": 0.009374646469950676, "learning_rate": 9.140655105973025e-06, "loss": 0.00042216181755065917, "step": 1410 }, { "epoch": 8.208092485549132, "grad_norm": 0.01085547637194395, "learning_rate": 9.063583815028903e-06, "loss": 0.0004324719309806824, "step": 1420 }, { "epoch": 8.265895953757225, "grad_norm": 0.028972823172807693, "learning_rate": 8.986512524084779e-06, "loss": 0.00038540661334991456, "step": 1430 }, { "epoch": 8.323699421965317, "grad_norm": 0.01954697258770466, "learning_rate": 8.909441233140655e-06, "loss": 0.0004615575075149536, "step": 1440 }, { "epoch": 8.38150289017341, "grad_norm": 0.01238598208874464, "learning_rate": 8.832369942196532e-06, "loss": 0.0003170013427734375, "step": 1450 }, { "epoch": 8.439306358381502, "grad_norm": 0.005586686078459024, "learning_rate": 8.75529865125241e-06, "loss": 0.0003178909420967102, "step": 1460 }, { "epoch": 8.497109826589595, "grad_norm": 0.0054536196403205395, "learning_rate": 8.678227360308286e-06, "loss": 0.00030860304832458496, "step": 1470 }, { "epoch": 8.554913294797688, "grad_norm": 0.005438173655420542, "learning_rate": 8.601156069364163e-06, "loss": 0.0003883242607116699, "step": 1480 }, { "epoch": 8.61271676300578, "grad_norm": 0.010960490442812443, "learning_rate": 8.52408477842004e-06, "loss": 0.0003649115562438965, "step": 1490 }, { "epoch": 8.670520231213873, "grad_norm": 0.010015477426350117, "learning_rate": 8.447013487475916e-06, "loss": 0.0002670750021934509, "step": 1500 }, { "epoch": 8.728323699421965, "grad_norm": 0.01602529175579548, "learning_rate": 8.369942196531792e-06, "loss": 0.0003640010952949524, "step": 1510 }, { "epoch": 8.786127167630058, "grad_norm": 0.011708080768585205, "learning_rate": 8.292870905587669e-06, "loss": 0.0003057181835174561, "step": 1520 }, { "epoch": 8.84393063583815, "grad_norm": 0.01114922296255827, "learning_rate": 8.215799614643547e-06, "loss": 0.0003286987543106079, "step": 1530 }, { "epoch": 8.901734104046243, "grad_norm": 0.015181174501776695, "learning_rate": 8.138728323699423e-06, "loss": 0.0003021523356437683, "step": 1540 }, { "epoch": 8.959537572254336, "grad_norm": 0.014381779357790947, "learning_rate": 8.0616570327553e-06, "loss": 0.0003357663750648499, "step": 1550 }, { "epoch": 9.0, "eval_accuracy": 0.8104838709677419, "eval_f1": 0.8129958949851424, "eval_loss": 0.8783804178237915, "eval_runtime": 3.8263, "eval_samples_per_second": 64.814, "eval_steps_per_second": 8.102, "step": 1557 } ], "logging_steps": 10, "max_steps": 2595, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.8442416458701537e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }