Camspection_Model / trainer_state.json
Cpope3's picture
Upload 8 files
b777e6d verified
{
"best_global_step": 692,
"best_metric": 0.8148257052568041,
"best_model_checkpoint": "./vitmodel-results2\\checkpoint-692",
"epoch": 9.0,
"eval_steps": 500,
"global_step": 1557,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.057803468208092484,
"grad_norm": 9.193648338317871,
"learning_rate": 1.993063583815029e-05,
"loss": 1.2447509765625,
"step": 10
},
{
"epoch": 0.11560693641618497,
"grad_norm": 11.420437812805176,
"learning_rate": 1.9853564547206168e-05,
"loss": 0.9639083862304687,
"step": 20
},
{
"epoch": 0.17341040462427745,
"grad_norm": 8.501763343811035,
"learning_rate": 1.9776493256262043e-05,
"loss": 0.9586288452148437,
"step": 30
},
{
"epoch": 0.23121387283236994,
"grad_norm": 8.869491577148438,
"learning_rate": 1.969942196531792e-05,
"loss": 0.95760498046875,
"step": 40
},
{
"epoch": 0.28901734104046245,
"grad_norm": 6.993536472320557,
"learning_rate": 1.96223506743738e-05,
"loss": 0.8282135009765625,
"step": 50
},
{
"epoch": 0.3468208092485549,
"grad_norm": 9.088873863220215,
"learning_rate": 1.9545279383429674e-05,
"loss": 0.7820648193359375,
"step": 60
},
{
"epoch": 0.4046242774566474,
"grad_norm": 6.549789905548096,
"learning_rate": 1.946820809248555e-05,
"loss": 0.7731849670410156,
"step": 70
},
{
"epoch": 0.4624277456647399,
"grad_norm": 8.019621849060059,
"learning_rate": 1.9391136801541427e-05,
"loss": 0.6875213623046875,
"step": 80
},
{
"epoch": 0.5202312138728323,
"grad_norm": 14.647834777832031,
"learning_rate": 1.9314065510597305e-05,
"loss": 0.8264602661132813,
"step": 90
},
{
"epoch": 0.5780346820809249,
"grad_norm": 4.440170764923096,
"learning_rate": 1.923699421965318e-05,
"loss": 0.66641845703125,
"step": 100
},
{
"epoch": 0.6358381502890174,
"grad_norm": 7.89495325088501,
"learning_rate": 1.9159922928709058e-05,
"loss": 0.721063232421875,
"step": 110
},
{
"epoch": 0.6936416184971098,
"grad_norm": 8.646476745605469,
"learning_rate": 1.9082851637764936e-05,
"loss": 0.6277095794677734,
"step": 120
},
{
"epoch": 0.7514450867052023,
"grad_norm": 7.094816207885742,
"learning_rate": 1.900578034682081e-05,
"loss": 0.626312255859375,
"step": 130
},
{
"epoch": 0.8092485549132948,
"grad_norm": 8.350555419921875,
"learning_rate": 1.892870905587669e-05,
"loss": 0.6980167388916015,
"step": 140
},
{
"epoch": 0.8670520231213873,
"grad_norm": 8.661638259887695,
"learning_rate": 1.8851637764932563e-05,
"loss": 0.7745811462402343,
"step": 150
},
{
"epoch": 0.9248554913294798,
"grad_norm": 13.135848999023438,
"learning_rate": 1.877456647398844e-05,
"loss": 0.6397926330566406,
"step": 160
},
{
"epoch": 0.9826589595375722,
"grad_norm": 6.089743614196777,
"learning_rate": 1.869749518304432e-05,
"loss": 0.7790138244628906,
"step": 170
},
{
"epoch": 1.0,
"eval_accuracy": 0.7620967741935484,
"eval_f1": 0.7666429887363669,
"eval_loss": 0.5470803380012512,
"eval_runtime": 3.7787,
"eval_samples_per_second": 65.631,
"eval_steps_per_second": 8.204,
"step": 173
},
{
"epoch": 1.0404624277456647,
"grad_norm": 6.115858554840088,
"learning_rate": 1.8620423892100194e-05,
"loss": 0.41851234436035156,
"step": 180
},
{
"epoch": 1.0982658959537572,
"grad_norm": 8.266204833984375,
"learning_rate": 1.854335260115607e-05,
"loss": 0.38555469512939455,
"step": 190
},
{
"epoch": 1.1560693641618498,
"grad_norm": 7.23341178894043,
"learning_rate": 1.8466281310211947e-05,
"loss": 0.3871160507202148,
"step": 200
},
{
"epoch": 1.2138728323699421,
"grad_norm": 4.417539596557617,
"learning_rate": 1.8389210019267825e-05,
"loss": 0.3288217544555664,
"step": 210
},
{
"epoch": 1.2716763005780347,
"grad_norm": 6.040576934814453,
"learning_rate": 1.83121387283237e-05,
"loss": 0.3685466766357422,
"step": 220
},
{
"epoch": 1.3294797687861273,
"grad_norm": 7.501523017883301,
"learning_rate": 1.8235067437379578e-05,
"loss": 0.26166458129882814,
"step": 230
},
{
"epoch": 1.3872832369942196,
"grad_norm": 6.774686813354492,
"learning_rate": 1.8157996146435456e-05,
"loss": 0.4264092445373535,
"step": 240
},
{
"epoch": 1.4450867052023122,
"grad_norm": 7.900248050689697,
"learning_rate": 1.808092485549133e-05,
"loss": 0.4633197784423828,
"step": 250
},
{
"epoch": 1.5028901734104045,
"grad_norm": 6.848799228668213,
"learning_rate": 1.8003853564547206e-05,
"loss": 0.3210134506225586,
"step": 260
},
{
"epoch": 1.560693641618497,
"grad_norm": 12.119475364685059,
"learning_rate": 1.7926782273603084e-05,
"loss": 0.4417572975158691,
"step": 270
},
{
"epoch": 1.6184971098265897,
"grad_norm": 11.230687141418457,
"learning_rate": 1.7849710982658962e-05,
"loss": 0.3607762336730957,
"step": 280
},
{
"epoch": 1.6763005780346822,
"grad_norm": 4.957355976104736,
"learning_rate": 1.7772639691714836e-05,
"loss": 0.4235343933105469,
"step": 290
},
{
"epoch": 1.7341040462427746,
"grad_norm": 15.573174476623535,
"learning_rate": 1.7695568400770715e-05,
"loss": 0.2808579444885254,
"step": 300
},
{
"epoch": 1.791907514450867,
"grad_norm": 4.865276336669922,
"learning_rate": 1.7618497109826593e-05,
"loss": 0.3062352180480957,
"step": 310
},
{
"epoch": 1.8497109826589595,
"grad_norm": 9.93105697631836,
"learning_rate": 1.7541425818882467e-05,
"loss": 0.4238410949707031,
"step": 320
},
{
"epoch": 1.907514450867052,
"grad_norm": 4.529516696929932,
"learning_rate": 1.7464354527938346e-05,
"loss": 0.4600528717041016,
"step": 330
},
{
"epoch": 1.9653179190751446,
"grad_norm": 2.661755323410034,
"learning_rate": 1.738728323699422e-05,
"loss": 0.4357139587402344,
"step": 340
},
{
"epoch": 2.0,
"eval_accuracy": 0.7701612903225806,
"eval_f1": 0.7565331283801342,
"eval_loss": 0.5092476010322571,
"eval_runtime": 3.7765,
"eval_samples_per_second": 65.67,
"eval_steps_per_second": 8.209,
"step": 346
},
{
"epoch": 2.023121387283237,
"grad_norm": 5.79880428314209,
"learning_rate": 1.73102119460501e-05,
"loss": 0.2926643848419189,
"step": 350
},
{
"epoch": 2.0809248554913293,
"grad_norm": 4.915356159210205,
"learning_rate": 1.7233140655105977e-05,
"loss": 0.12723102569580078,
"step": 360
},
{
"epoch": 2.138728323699422,
"grad_norm": 2.580575942993164,
"learning_rate": 1.715606936416185e-05,
"loss": 0.13565282821655272,
"step": 370
},
{
"epoch": 2.1965317919075145,
"grad_norm": 4.596588611602783,
"learning_rate": 1.7078998073217726e-05,
"loss": 0.16868581771850585,
"step": 380
},
{
"epoch": 2.254335260115607,
"grad_norm": 1.6427044868469238,
"learning_rate": 1.7001926782273604e-05,
"loss": 0.10326943397521973,
"step": 390
},
{
"epoch": 2.3121387283236996,
"grad_norm": 3.8816680908203125,
"learning_rate": 1.6924855491329482e-05,
"loss": 0.10355021953582763,
"step": 400
},
{
"epoch": 2.3699421965317917,
"grad_norm": 4.5500922203063965,
"learning_rate": 1.6847784200385357e-05,
"loss": 0.16852855682373047,
"step": 410
},
{
"epoch": 2.4277456647398843,
"grad_norm": 5.74709939956665,
"learning_rate": 1.6770712909441235e-05,
"loss": 0.1326436996459961,
"step": 420
},
{
"epoch": 2.485549132947977,
"grad_norm": 1.0953601598739624,
"learning_rate": 1.6693641618497113e-05,
"loss": 0.18565785884857178,
"step": 430
},
{
"epoch": 2.5433526011560694,
"grad_norm": 3.5484166145324707,
"learning_rate": 1.6616570327552988e-05,
"loss": 0.11107982397079467,
"step": 440
},
{
"epoch": 2.601156069364162,
"grad_norm": 10.514449119567871,
"learning_rate": 1.6539499036608863e-05,
"loss": 0.23550875186920167,
"step": 450
},
{
"epoch": 2.6589595375722546,
"grad_norm": 1.2385636568069458,
"learning_rate": 1.646242774566474e-05,
"loss": 0.09937280416488647,
"step": 460
},
{
"epoch": 2.7167630057803467,
"grad_norm": 2.3061702251434326,
"learning_rate": 1.638535645472062e-05,
"loss": 0.11133263111114503,
"step": 470
},
{
"epoch": 2.7745664739884393,
"grad_norm": 13.695456504821777,
"learning_rate": 1.6308285163776494e-05,
"loss": 0.14138509035110475,
"step": 480
},
{
"epoch": 2.832369942196532,
"grad_norm": 11.574437141418457,
"learning_rate": 1.6231213872832372e-05,
"loss": 0.15487065315246581,
"step": 490
},
{
"epoch": 2.8901734104046244,
"grad_norm": 9.247588157653809,
"learning_rate": 1.6154142581888246e-05,
"loss": 0.18497473001480103,
"step": 500
},
{
"epoch": 2.9479768786127165,
"grad_norm": 1.725334644317627,
"learning_rate": 1.6077071290944125e-05,
"loss": 0.14644594192504884,
"step": 510
},
{
"epoch": 3.0,
"eval_accuracy": 0.7620967741935484,
"eval_f1": 0.7681438802699209,
"eval_loss": 0.6075211763381958,
"eval_runtime": 3.8105,
"eval_samples_per_second": 65.084,
"eval_steps_per_second": 8.136,
"step": 519
},
{
"epoch": 3.005780346820809,
"grad_norm": 0.43373608589172363,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.1772174835205078,
"step": 520
},
{
"epoch": 3.0635838150289016,
"grad_norm": 0.47126689553260803,
"learning_rate": 1.5922928709055877e-05,
"loss": 0.019189047813415527,
"step": 530
},
{
"epoch": 3.121387283236994,
"grad_norm": 1.0332409143447876,
"learning_rate": 1.5845857418111756e-05,
"loss": 0.017575371265411376,
"step": 540
},
{
"epoch": 3.179190751445087,
"grad_norm": 1.5852550268173218,
"learning_rate": 1.5768786127167634e-05,
"loss": 0.037454456090927124,
"step": 550
},
{
"epoch": 3.2369942196531793,
"grad_norm": 1.1080495119094849,
"learning_rate": 1.569171483622351e-05,
"loss": 0.031146246194839477,
"step": 560
},
{
"epoch": 3.294797687861272,
"grad_norm": 0.29696905612945557,
"learning_rate": 1.5614643545279383e-05,
"loss": 0.029173070192337038,
"step": 570
},
{
"epoch": 3.352601156069364,
"grad_norm": 3.4115827083587646,
"learning_rate": 1.553757225433526e-05,
"loss": 0.046107858419418335,
"step": 580
},
{
"epoch": 3.4104046242774566,
"grad_norm": 0.939679741859436,
"learning_rate": 1.546050096339114e-05,
"loss": 0.04879339635372162,
"step": 590
},
{
"epoch": 3.468208092485549,
"grad_norm": 0.7068578600883484,
"learning_rate": 1.5383429672447014e-05,
"loss": 0.02517341673374176,
"step": 600
},
{
"epoch": 3.5260115606936417,
"grad_norm": 0.2978713810443878,
"learning_rate": 1.530635838150289e-05,
"loss": 0.09835766553878784,
"step": 610
},
{
"epoch": 3.583815028901734,
"grad_norm": 10.272834777832031,
"learning_rate": 1.5229287090558769e-05,
"loss": 0.03281269967556,
"step": 620
},
{
"epoch": 3.6416184971098264,
"grad_norm": 6.145327568054199,
"learning_rate": 1.5152215799614645e-05,
"loss": 0.04014509916305542,
"step": 630
},
{
"epoch": 3.699421965317919,
"grad_norm": 0.6408748626708984,
"learning_rate": 1.5075144508670521e-05,
"loss": 0.10582492351531983,
"step": 640
},
{
"epoch": 3.7572254335260116,
"grad_norm": 0.1795165091753006,
"learning_rate": 1.49980732177264e-05,
"loss": 0.03988836109638214,
"step": 650
},
{
"epoch": 3.815028901734104,
"grad_norm": 0.6440432667732239,
"learning_rate": 1.4921001926782274e-05,
"loss": 0.1083465576171875,
"step": 660
},
{
"epoch": 3.8728323699421967,
"grad_norm": 2.706090211868286,
"learning_rate": 1.484393063583815e-05,
"loss": 0.007834103703498841,
"step": 670
},
{
"epoch": 3.9306358381502893,
"grad_norm": 2.186443328857422,
"learning_rate": 1.4766859344894029e-05,
"loss": 0.017444241046905517,
"step": 680
},
{
"epoch": 3.9884393063583814,
"grad_norm": 0.10573586076498032,
"learning_rate": 1.4689788053949905e-05,
"loss": 0.008943480253219605,
"step": 690
},
{
"epoch": 4.0,
"eval_accuracy": 0.8145161290322581,
"eval_f1": 0.8148257052568041,
"eval_loss": 0.6730512976646423,
"eval_runtime": 3.8196,
"eval_samples_per_second": 64.928,
"eval_steps_per_second": 8.116,
"step": 692
},
{
"epoch": 4.046242774566474,
"grad_norm": 0.24642640352249146,
"learning_rate": 1.4612716763005782e-05,
"loss": 0.011015585064888,
"step": 700
},
{
"epoch": 4.104046242774566,
"grad_norm": 0.15465769171714783,
"learning_rate": 1.453564547206166e-05,
"loss": 0.00537472665309906,
"step": 710
},
{
"epoch": 4.161849710982659,
"grad_norm": 0.2364652454853058,
"learning_rate": 1.4458574181117534e-05,
"loss": 0.006760424375534058,
"step": 720
},
{
"epoch": 4.219653179190751,
"grad_norm": 0.039116956293582916,
"learning_rate": 1.4381502890173411e-05,
"loss": 0.005632311105728149,
"step": 730
},
{
"epoch": 4.277456647398844,
"grad_norm": 0.06665871292352676,
"learning_rate": 1.4304431599229289e-05,
"loss": 0.004969970881938934,
"step": 740
},
{
"epoch": 4.335260115606936,
"grad_norm": 0.14143921434879303,
"learning_rate": 1.4227360308285165e-05,
"loss": 0.014902213215827942,
"step": 750
},
{
"epoch": 4.393063583815029,
"grad_norm": 0.2047356367111206,
"learning_rate": 1.4150289017341042e-05,
"loss": 0.005004642903804779,
"step": 760
},
{
"epoch": 4.4508670520231215,
"grad_norm": 0.03578726947307587,
"learning_rate": 1.4073217726396917e-05,
"loss": 0.0032742366194725035,
"step": 770
},
{
"epoch": 4.508670520231214,
"grad_norm": 0.05993838235735893,
"learning_rate": 1.3996146435452795e-05,
"loss": 0.002082832157611847,
"step": 780
},
{
"epoch": 4.566473988439307,
"grad_norm": 0.25260815024375916,
"learning_rate": 1.3919075144508671e-05,
"loss": 0.002451476454734802,
"step": 790
},
{
"epoch": 4.624277456647399,
"grad_norm": 0.030600009486079216,
"learning_rate": 1.3842003853564548e-05,
"loss": 0.003828507661819458,
"step": 800
},
{
"epoch": 4.682080924855491,
"grad_norm": 12.333958625793457,
"learning_rate": 1.3764932562620426e-05,
"loss": 0.06371065378189086,
"step": 810
},
{
"epoch": 4.7398843930635834,
"grad_norm": 0.22651025652885437,
"learning_rate": 1.3687861271676302e-05,
"loss": 0.004562181234359741,
"step": 820
},
{
"epoch": 4.797687861271676,
"grad_norm": 0.03751413896679878,
"learning_rate": 1.3610789980732177e-05,
"loss": 0.0019490152597427368,
"step": 830
},
{
"epoch": 4.855491329479769,
"grad_norm": 1.809687852859497,
"learning_rate": 1.3533718689788055e-05,
"loss": 0.006183743476867676,
"step": 840
},
{
"epoch": 4.913294797687861,
"grad_norm": 1.0058674812316895,
"learning_rate": 1.3456647398843931e-05,
"loss": 0.008757662773132325,
"step": 850
},
{
"epoch": 4.971098265895954,
"grad_norm": 0.0775017961859703,
"learning_rate": 1.3379576107899808e-05,
"loss": 0.0020394161343574523,
"step": 860
},
{
"epoch": 5.0,
"eval_accuracy": 0.8064516129032258,
"eval_f1": 0.8074801611817632,
"eval_loss": 0.7273994088172913,
"eval_runtime": 3.8077,
"eval_samples_per_second": 65.132,
"eval_steps_per_second": 8.141,
"step": 865
},
{
"epoch": 5.028901734104046,
"grad_norm": 0.07742282748222351,
"learning_rate": 1.3302504816955686e-05,
"loss": 0.0031921621412038803,
"step": 870
},
{
"epoch": 5.086705202312139,
"grad_norm": 0.021719103679060936,
"learning_rate": 1.3225433526011562e-05,
"loss": 0.0016711041331291198,
"step": 880
},
{
"epoch": 5.144508670520231,
"grad_norm": 0.040297143161296844,
"learning_rate": 1.3148362235067437e-05,
"loss": 0.0009254798293113709,
"step": 890
},
{
"epoch": 5.202312138728324,
"grad_norm": 0.06567544490098953,
"learning_rate": 1.3071290944123315e-05,
"loss": 0.0013921096920967101,
"step": 900
},
{
"epoch": 5.2601156069364166,
"grad_norm": 0.10894370079040527,
"learning_rate": 1.2994219653179192e-05,
"loss": 0.0009916990995407105,
"step": 910
},
{
"epoch": 5.317919075144509,
"grad_norm": 0.030588222667574883,
"learning_rate": 1.2917148362235068e-05,
"loss": 0.0011297404766082765,
"step": 920
},
{
"epoch": 5.375722543352601,
"grad_norm": 0.044895388185977936,
"learning_rate": 1.2840077071290946e-05,
"loss": 0.0009317293763160706,
"step": 930
},
{
"epoch": 5.433526011560693,
"grad_norm": 0.030870944261550903,
"learning_rate": 1.2763005780346823e-05,
"loss": 0.0010338693857192994,
"step": 940
},
{
"epoch": 5.491329479768786,
"grad_norm": 0.024050451815128326,
"learning_rate": 1.2685934489402697e-05,
"loss": 0.000977499783039093,
"step": 950
},
{
"epoch": 5.5491329479768785,
"grad_norm": 0.02893257327377796,
"learning_rate": 1.2608863198458577e-05,
"loss": 0.0013506487011909485,
"step": 960
},
{
"epoch": 5.606936416184971,
"grad_norm": 0.09149627387523651,
"learning_rate": 1.2531791907514452e-05,
"loss": 0.0009457975625991822,
"step": 970
},
{
"epoch": 5.664739884393064,
"grad_norm": 0.020059145987033844,
"learning_rate": 1.2454720616570328e-05,
"loss": 0.0008360743522644043,
"step": 980
},
{
"epoch": 5.722543352601156,
"grad_norm": 0.02598397620022297,
"learning_rate": 1.2377649325626205e-05,
"loss": 0.0008077919483184814,
"step": 990
},
{
"epoch": 5.780346820809249,
"grad_norm": 0.051848188042640686,
"learning_rate": 1.2300578034682083e-05,
"loss": 0.0009181752800941467,
"step": 1000
},
{
"epoch": 5.838150289017341,
"grad_norm": 0.02003743126988411,
"learning_rate": 1.222350674373796e-05,
"loss": 0.0008455753326416015,
"step": 1010
},
{
"epoch": 5.895953757225434,
"grad_norm": 0.4392681121826172,
"learning_rate": 1.2146435452793834e-05,
"loss": 0.03311595022678375,
"step": 1020
},
{
"epoch": 5.953757225433526,
"grad_norm": 0.041885748505592346,
"learning_rate": 1.2069364161849712e-05,
"loss": 0.0012869253754615785,
"step": 1030
},
{
"epoch": 6.0,
"eval_accuracy": 0.7903225806451613,
"eval_f1": 0.7943751207262894,
"eval_loss": 0.8309548497200012,
"eval_runtime": 3.7714,
"eval_samples_per_second": 65.759,
"eval_steps_per_second": 8.22,
"step": 1038
},
{
"epoch": 6.011560693641618,
"grad_norm": 0.026743754744529724,
"learning_rate": 1.1992292870905588e-05,
"loss": 0.0006453114096075296,
"step": 1040
},
{
"epoch": 6.069364161849711,
"grad_norm": 0.06693530082702637,
"learning_rate": 1.1915221579961465e-05,
"loss": 0.0007815584540367126,
"step": 1050
},
{
"epoch": 6.127167630057803,
"grad_norm": 0.015539342537522316,
"learning_rate": 1.1838150289017343e-05,
"loss": 0.0006080090999603272,
"step": 1060
},
{
"epoch": 6.184971098265896,
"grad_norm": 0.014636315405368805,
"learning_rate": 1.176107899807322e-05,
"loss": 0.0006367906928062439,
"step": 1070
},
{
"epoch": 6.242774566473988,
"grad_norm": 0.019367052242159843,
"learning_rate": 1.1684007707129094e-05,
"loss": 0.0007835239171981812,
"step": 1080
},
{
"epoch": 6.300578034682081,
"grad_norm": 0.01782175898551941,
"learning_rate": 1.1606936416184972e-05,
"loss": 0.0006255954504013062,
"step": 1090
},
{
"epoch": 6.358381502890174,
"grad_norm": 0.009536216966807842,
"learning_rate": 1.1529865125240849e-05,
"loss": 0.0005810096859931946,
"step": 1100
},
{
"epoch": 6.416184971098266,
"grad_norm": 0.021404527127742767,
"learning_rate": 1.1452793834296725e-05,
"loss": 0.000746677815914154,
"step": 1110
},
{
"epoch": 6.473988439306359,
"grad_norm": 0.049797266721725464,
"learning_rate": 1.1375722543352603e-05,
"loss": 0.00063580721616745,
"step": 1120
},
{
"epoch": 6.531791907514451,
"grad_norm": 0.025786111131310463,
"learning_rate": 1.129865125240848e-05,
"loss": 0.0006033405661582947,
"step": 1130
},
{
"epoch": 6.589595375722544,
"grad_norm": 0.016592316329479218,
"learning_rate": 1.1221579961464354e-05,
"loss": 0.0006539627909660339,
"step": 1140
},
{
"epoch": 6.6473988439306355,
"grad_norm": 0.016465384513139725,
"learning_rate": 1.1144508670520232e-05,
"loss": 0.0005995437502861023,
"step": 1150
},
{
"epoch": 6.705202312138728,
"grad_norm": 0.019848085939884186,
"learning_rate": 1.1067437379576109e-05,
"loss": 0.0006159201264381408,
"step": 1160
},
{
"epoch": 6.763005780346821,
"grad_norm": 0.015507291071116924,
"learning_rate": 1.0990366088631985e-05,
"loss": 0.0004596635699272156,
"step": 1170
},
{
"epoch": 6.820809248554913,
"grad_norm": 0.022381598129868507,
"learning_rate": 1.0913294797687862e-05,
"loss": 0.000590360164642334,
"step": 1180
},
{
"epoch": 6.878612716763006,
"grad_norm": 0.01475490815937519,
"learning_rate": 1.083622350674374e-05,
"loss": 0.000518760085105896,
"step": 1190
},
{
"epoch": 6.936416184971098,
"grad_norm": 0.0208587646484375,
"learning_rate": 1.0759152215799615e-05,
"loss": 0.0006424024701118469,
"step": 1200
},
{
"epoch": 6.994219653179191,
"grad_norm": 0.08424866199493408,
"learning_rate": 1.0682080924855491e-05,
"loss": 0.0008051112294197082,
"step": 1210
},
{
"epoch": 7.0,
"eval_accuracy": 0.8104838709677419,
"eval_f1": 0.8129958949851424,
"eval_loss": 0.8261250853538513,
"eval_runtime": 3.8032,
"eval_samples_per_second": 65.208,
"eval_steps_per_second": 8.151,
"step": 1211
},
{
"epoch": 7.0520231213872835,
"grad_norm": 0.026666566729545593,
"learning_rate": 1.0605009633911369e-05,
"loss": 0.000561926607042551,
"step": 1220
},
{
"epoch": 7.109826589595376,
"grad_norm": 0.008546934463083744,
"learning_rate": 1.0527938342967246e-05,
"loss": 0.0005572408437728882,
"step": 1230
},
{
"epoch": 7.167630057803469,
"grad_norm": 0.007139866705983877,
"learning_rate": 1.0450867052023122e-05,
"loss": 0.00040052533149719237,
"step": 1240
},
{
"epoch": 7.22543352601156,
"grad_norm": 0.01626797765493393,
"learning_rate": 1.0373795761079e-05,
"loss": 0.0005501970648765564,
"step": 1250
},
{
"epoch": 7.283236994219653,
"grad_norm": 0.012005102820694447,
"learning_rate": 1.0296724470134875e-05,
"loss": 0.0004243999719619751,
"step": 1260
},
{
"epoch": 7.341040462427745,
"grad_norm": 0.018900051712989807,
"learning_rate": 1.0219653179190751e-05,
"loss": 0.0004561007022857666,
"step": 1270
},
{
"epoch": 7.398843930635838,
"grad_norm": 0.016326697543263435,
"learning_rate": 1.014258188824663e-05,
"loss": 0.0005092039704322815,
"step": 1280
},
{
"epoch": 7.456647398843931,
"grad_norm": 0.010545836761593819,
"learning_rate": 1.0065510597302506e-05,
"loss": 0.0004412621259689331,
"step": 1290
},
{
"epoch": 7.514450867052023,
"grad_norm": 0.01526038721203804,
"learning_rate": 9.988439306358382e-06,
"loss": 0.0004318729043006897,
"step": 1300
},
{
"epoch": 7.572254335260116,
"grad_norm": 0.012989806942641735,
"learning_rate": 9.911368015414259e-06,
"loss": 0.00048479437828063965,
"step": 1310
},
{
"epoch": 7.630057803468208,
"grad_norm": 0.009836602956056595,
"learning_rate": 9.834296724470137e-06,
"loss": 0.0003552690148353577,
"step": 1320
},
{
"epoch": 7.687861271676301,
"grad_norm": 0.012384418398141861,
"learning_rate": 9.757225433526011e-06,
"loss": 0.0004932507872581482,
"step": 1330
},
{
"epoch": 7.745664739884393,
"grad_norm": 0.020340140908956528,
"learning_rate": 9.68015414258189e-06,
"loss": 0.00048180222511291505,
"step": 1340
},
{
"epoch": 7.803468208092486,
"grad_norm": 0.03233597055077553,
"learning_rate": 9.603082851637766e-06,
"loss": 0.0004775360226631165,
"step": 1350
},
{
"epoch": 7.861271676300578,
"grad_norm": 0.01245969720184803,
"learning_rate": 9.526011560693642e-06,
"loss": 0.00040520131587982176,
"step": 1360
},
{
"epoch": 7.91907514450867,
"grad_norm": 0.00969842541962862,
"learning_rate": 9.448940269749519e-06,
"loss": 0.0003412917256355286,
"step": 1370
},
{
"epoch": 7.976878612716763,
"grad_norm": 0.022187134250998497,
"learning_rate": 9.371868978805397e-06,
"loss": 0.0004110649228096008,
"step": 1380
},
{
"epoch": 8.0,
"eval_accuracy": 0.8064516129032258,
"eval_f1": 0.8086838155814223,
"eval_loss": 0.8546451330184937,
"eval_runtime": 3.7864,
"eval_samples_per_second": 65.497,
"eval_steps_per_second": 8.187,
"step": 1384
},
{
"epoch": 8.034682080924856,
"grad_norm": 0.019872142001986504,
"learning_rate": 9.294797687861272e-06,
"loss": 0.0003799670375883579,
"step": 1390
},
{
"epoch": 8.092485549132949,
"grad_norm": 0.011816315352916718,
"learning_rate": 9.21772639691715e-06,
"loss": 0.00041468888521194457,
"step": 1400
},
{
"epoch": 8.15028901734104,
"grad_norm": 0.009374646469950676,
"learning_rate": 9.140655105973025e-06,
"loss": 0.00042216181755065917,
"step": 1410
},
{
"epoch": 8.208092485549132,
"grad_norm": 0.01085547637194395,
"learning_rate": 9.063583815028903e-06,
"loss": 0.0004324719309806824,
"step": 1420
},
{
"epoch": 8.265895953757225,
"grad_norm": 0.028972823172807693,
"learning_rate": 8.986512524084779e-06,
"loss": 0.00038540661334991456,
"step": 1430
},
{
"epoch": 8.323699421965317,
"grad_norm": 0.01954697258770466,
"learning_rate": 8.909441233140655e-06,
"loss": 0.0004615575075149536,
"step": 1440
},
{
"epoch": 8.38150289017341,
"grad_norm": 0.01238598208874464,
"learning_rate": 8.832369942196532e-06,
"loss": 0.0003170013427734375,
"step": 1450
},
{
"epoch": 8.439306358381502,
"grad_norm": 0.005586686078459024,
"learning_rate": 8.75529865125241e-06,
"loss": 0.0003178909420967102,
"step": 1460
},
{
"epoch": 8.497109826589595,
"grad_norm": 0.0054536196403205395,
"learning_rate": 8.678227360308286e-06,
"loss": 0.00030860304832458496,
"step": 1470
},
{
"epoch": 8.554913294797688,
"grad_norm": 0.005438173655420542,
"learning_rate": 8.601156069364163e-06,
"loss": 0.0003883242607116699,
"step": 1480
},
{
"epoch": 8.61271676300578,
"grad_norm": 0.010960490442812443,
"learning_rate": 8.52408477842004e-06,
"loss": 0.0003649115562438965,
"step": 1490
},
{
"epoch": 8.670520231213873,
"grad_norm": 0.010015477426350117,
"learning_rate": 8.447013487475916e-06,
"loss": 0.0002670750021934509,
"step": 1500
},
{
"epoch": 8.728323699421965,
"grad_norm": 0.01602529175579548,
"learning_rate": 8.369942196531792e-06,
"loss": 0.0003640010952949524,
"step": 1510
},
{
"epoch": 8.786127167630058,
"grad_norm": 0.011708080768585205,
"learning_rate": 8.292870905587669e-06,
"loss": 0.0003057181835174561,
"step": 1520
},
{
"epoch": 8.84393063583815,
"grad_norm": 0.01114922296255827,
"learning_rate": 8.215799614643547e-06,
"loss": 0.0003286987543106079,
"step": 1530
},
{
"epoch": 8.901734104046243,
"grad_norm": 0.015181174501776695,
"learning_rate": 8.138728323699423e-06,
"loss": 0.0003021523356437683,
"step": 1540
},
{
"epoch": 8.959537572254336,
"grad_norm": 0.014381779357790947,
"learning_rate": 8.0616570327553e-06,
"loss": 0.0003357663750648499,
"step": 1550
},
{
"epoch": 9.0,
"eval_accuracy": 0.8104838709677419,
"eval_f1": 0.8129958949851424,
"eval_loss": 0.8783804178237915,
"eval_runtime": 3.8263,
"eval_samples_per_second": 64.814,
"eval_steps_per_second": 8.102,
"step": 1557
}
],
"logging_steps": 10,
"max_steps": 2595,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.8442416458701537e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}