DamCat / trainer_state.json
Cpope3's picture
Upload 9 files
d5a88bb verified
{
"best_global_step": 1380,
"best_metric": 0.9760859393830551,
"best_model_checkpoint": "./vitmodel-results3\\checkpoint-1380",
"epoch": 8.0,
"eval_steps": 500,
"global_step": 1840,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.043478260869565216,
"grad_norm": 6.461246490478516,
"learning_rate": 1.9947826086956524e-05,
"loss": 0.6514617919921875,
"step": 10
},
{
"epoch": 0.08695652173913043,
"grad_norm": 3.8991446495056152,
"learning_rate": 1.988985507246377e-05,
"loss": 0.45957489013671876,
"step": 20
},
{
"epoch": 0.13043478260869565,
"grad_norm": 2.4451658725738525,
"learning_rate": 1.9831884057971015e-05,
"loss": 0.3501922607421875,
"step": 30
},
{
"epoch": 0.17391304347826086,
"grad_norm": 3.7429535388946533,
"learning_rate": 1.9773913043478265e-05,
"loss": 0.27413215637207033,
"step": 40
},
{
"epoch": 0.21739130434782608,
"grad_norm": 12.403484344482422,
"learning_rate": 1.971594202898551e-05,
"loss": 0.45773887634277344,
"step": 50
},
{
"epoch": 0.2608695652173913,
"grad_norm": 5.481701850891113,
"learning_rate": 1.9657971014492755e-05,
"loss": 0.23930573463439941,
"step": 60
},
{
"epoch": 0.30434782608695654,
"grad_norm": 4.304569244384766,
"learning_rate": 1.9600000000000002e-05,
"loss": 0.2975881576538086,
"step": 70
},
{
"epoch": 0.34782608695652173,
"grad_norm": 8.520660400390625,
"learning_rate": 1.954202898550725e-05,
"loss": 0.2244499921798706,
"step": 80
},
{
"epoch": 0.391304347826087,
"grad_norm": 5.180691719055176,
"learning_rate": 1.9484057971014492e-05,
"loss": 0.17172325849533082,
"step": 90
},
{
"epoch": 0.43478260869565216,
"grad_norm": 0.8175772428512573,
"learning_rate": 1.9426086956521743e-05,
"loss": 0.2387838363647461,
"step": 100
},
{
"epoch": 0.4782608695652174,
"grad_norm": 0.4058602452278137,
"learning_rate": 1.9368115942028986e-05,
"loss": 0.10988756418228149,
"step": 110
},
{
"epoch": 0.5217391304347826,
"grad_norm": 0.9439899921417236,
"learning_rate": 1.9310144927536233e-05,
"loss": 0.14697353839874266,
"step": 120
},
{
"epoch": 0.5652173913043478,
"grad_norm": 0.6720163822174072,
"learning_rate": 1.925217391304348e-05,
"loss": 0.21182384490966796,
"step": 130
},
{
"epoch": 0.6086956521739131,
"grad_norm": 0.371541291475296,
"learning_rate": 1.9194202898550727e-05,
"loss": 0.2180387258529663,
"step": 140
},
{
"epoch": 0.6521739130434783,
"grad_norm": 1.6255207061767578,
"learning_rate": 1.9136231884057973e-05,
"loss": 0.20694243907928467,
"step": 150
},
{
"epoch": 0.6956521739130435,
"grad_norm": 7.544068813323975,
"learning_rate": 1.907826086956522e-05,
"loss": 0.1606433391571045,
"step": 160
},
{
"epoch": 0.7391304347826086,
"grad_norm": 0.549288272857666,
"learning_rate": 1.9020289855072464e-05,
"loss": 0.17944023609161378,
"step": 170
},
{
"epoch": 0.782608695652174,
"grad_norm": 0.43946510553359985,
"learning_rate": 1.896231884057971e-05,
"loss": 0.2129380226135254,
"step": 180
},
{
"epoch": 0.8260869565217391,
"grad_norm": 0.20731233060359955,
"learning_rate": 1.8904347826086957e-05,
"loss": 0.11439937353134155,
"step": 190
},
{
"epoch": 0.8695652173913043,
"grad_norm": 0.1378840208053589,
"learning_rate": 1.8846376811594204e-05,
"loss": 0.21877152919769288,
"step": 200
},
{
"epoch": 0.9130434782608695,
"grad_norm": 0.35801249742507935,
"learning_rate": 1.878840579710145e-05,
"loss": 0.10442726612091065,
"step": 210
},
{
"epoch": 0.9565217391304348,
"grad_norm": 10.050288200378418,
"learning_rate": 1.8730434782608698e-05,
"loss": 0.19560953378677368,
"step": 220
},
{
"epoch": 1.0,
"grad_norm": 0.15084530413150787,
"learning_rate": 1.867246376811594e-05,
"loss": 0.02761389911174774,
"step": 230
},
{
"epoch": 1.0,
"eval_accuracy": 0.9521739130434783,
"eval_f1": 0.9520642679853729,
"eval_loss": 0.14785830676555634,
"eval_runtime": 5.6109,
"eval_samples_per_second": 81.984,
"eval_steps_per_second": 10.337,
"step": 230
},
{
"epoch": 1.0434782608695652,
"grad_norm": 0.186416894197464,
"learning_rate": 1.861449275362319e-05,
"loss": 0.04351995289325714,
"step": 240
},
{
"epoch": 1.0869565217391304,
"grad_norm": 0.06740374863147736,
"learning_rate": 1.8556521739130435e-05,
"loss": 0.005690252780914307,
"step": 250
},
{
"epoch": 1.1304347826086956,
"grad_norm": 6.0832743644714355,
"learning_rate": 1.8498550724637682e-05,
"loss": 0.016183775663375855,
"step": 260
},
{
"epoch": 1.1739130434782608,
"grad_norm": 0.025469312444329262,
"learning_rate": 1.844057971014493e-05,
"loss": 0.025768563151359558,
"step": 270
},
{
"epoch": 1.2173913043478262,
"grad_norm": 0.05013096332550049,
"learning_rate": 1.8382608695652175e-05,
"loss": 0.07644214630126953,
"step": 280
},
{
"epoch": 1.2608695652173914,
"grad_norm": 0.19063518941402435,
"learning_rate": 1.8324637681159422e-05,
"loss": 0.06052442789077759,
"step": 290
},
{
"epoch": 1.3043478260869565,
"grad_norm": 0.04668483883142471,
"learning_rate": 1.826666666666667e-05,
"loss": 0.0497345507144928,
"step": 300
},
{
"epoch": 1.3478260869565217,
"grad_norm": 0.027216244488954544,
"learning_rate": 1.8208695652173916e-05,
"loss": 0.08886347413063049,
"step": 310
},
{
"epoch": 1.391304347826087,
"grad_norm": 0.3011648952960968,
"learning_rate": 1.815072463768116e-05,
"loss": 0.0871780276298523,
"step": 320
},
{
"epoch": 1.434782608695652,
"grad_norm": 9.434959411621094,
"learning_rate": 1.809275362318841e-05,
"loss": 0.06977825760841369,
"step": 330
},
{
"epoch": 1.4782608695652173,
"grad_norm": 0.07939770817756653,
"learning_rate": 1.8034782608695653e-05,
"loss": 0.11019858121871948,
"step": 340
},
{
"epoch": 1.5217391304347827,
"grad_norm": 6.755427837371826,
"learning_rate": 1.79768115942029e-05,
"loss": 0.07228946685791016,
"step": 350
},
{
"epoch": 1.5652173913043477,
"grad_norm": 10.783921241760254,
"learning_rate": 1.7918840579710147e-05,
"loss": 0.06457504034042358,
"step": 360
},
{
"epoch": 1.608695652173913,
"grad_norm": 0.10878543555736542,
"learning_rate": 1.7860869565217394e-05,
"loss": 0.021503202617168427,
"step": 370
},
{
"epoch": 1.6521739130434783,
"grad_norm": 0.25200846791267395,
"learning_rate": 1.780289855072464e-05,
"loss": 0.06428139805793762,
"step": 380
},
{
"epoch": 1.6956521739130435,
"grad_norm": 1.0684906244277954,
"learning_rate": 1.7744927536231887e-05,
"loss": 0.09642828106880189,
"step": 390
},
{
"epoch": 1.7391304347826086,
"grad_norm": 12.593297004699707,
"learning_rate": 1.768695652173913e-05,
"loss": 0.028580766916275025,
"step": 400
},
{
"epoch": 1.7826086956521738,
"grad_norm": 0.04352446645498276,
"learning_rate": 1.7628985507246377e-05,
"loss": 0.1266841173171997,
"step": 410
},
{
"epoch": 1.8260869565217392,
"grad_norm": 0.03208275884389877,
"learning_rate": 1.7571014492753624e-05,
"loss": 0.0604109525680542,
"step": 420
},
{
"epoch": 1.8695652173913042,
"grad_norm": 0.0292875487357378,
"learning_rate": 1.751304347826087e-05,
"loss": 0.08443626761436462,
"step": 430
},
{
"epoch": 1.9130434782608696,
"grad_norm": 0.04183952882885933,
"learning_rate": 1.7455072463768118e-05,
"loss": 0.026611250638961793,
"step": 440
},
{
"epoch": 1.9565217391304348,
"grad_norm": 0.419708788394928,
"learning_rate": 1.7397101449275365e-05,
"loss": 0.1165506362915039,
"step": 450
},
{
"epoch": 2.0,
"grad_norm": 8.380155563354492,
"learning_rate": 1.7339130434782608e-05,
"loss": 0.041912269592285153,
"step": 460
},
{
"epoch": 2.0,
"eval_accuracy": 0.9565217391304348,
"eval_f1": 0.9565184513006655,
"eval_loss": 0.1165793240070343,
"eval_runtime": 3.7298,
"eval_samples_per_second": 123.331,
"eval_steps_per_second": 15.55,
"step": 460
},
{
"epoch": 2.0434782608695654,
"grad_norm": 0.07667429745197296,
"learning_rate": 1.728115942028986e-05,
"loss": 0.0013138219714164735,
"step": 470
},
{
"epoch": 2.0869565217391304,
"grad_norm": 0.05316108465194702,
"learning_rate": 1.7223188405797102e-05,
"loss": 0.004785384237766266,
"step": 480
},
{
"epoch": 2.130434782608696,
"grad_norm": 0.018993400037288666,
"learning_rate": 1.716521739130435e-05,
"loss": 0.0010403752326965331,
"step": 490
},
{
"epoch": 2.1739130434782608,
"grad_norm": 0.005419578403234482,
"learning_rate": 1.7107246376811596e-05,
"loss": 0.0010405436158180236,
"step": 500
},
{
"epoch": 2.217391304347826,
"grad_norm": 2.7880542278289795,
"learning_rate": 1.7049275362318842e-05,
"loss": 0.01008293330669403,
"step": 510
},
{
"epoch": 2.260869565217391,
"grad_norm": 0.19926372170448303,
"learning_rate": 1.6991304347826086e-05,
"loss": 0.002237708866596222,
"step": 520
},
{
"epoch": 2.3043478260869565,
"grad_norm": 0.006354826502501965,
"learning_rate": 1.6933333333333336e-05,
"loss": 0.015198694169521331,
"step": 530
},
{
"epoch": 2.3478260869565215,
"grad_norm": 0.01782035082578659,
"learning_rate": 1.687536231884058e-05,
"loss": 0.0017350628972053529,
"step": 540
},
{
"epoch": 2.391304347826087,
"grad_norm": 0.6461467742919922,
"learning_rate": 1.6817391304347826e-05,
"loss": 0.0012194350361824035,
"step": 550
},
{
"epoch": 2.4347826086956523,
"grad_norm": 0.014753537252545357,
"learning_rate": 1.6759420289855073e-05,
"loss": 0.03461991548538208,
"step": 560
},
{
"epoch": 2.4782608695652173,
"grad_norm": 0.015930302441120148,
"learning_rate": 1.670144927536232e-05,
"loss": 0.0030654460191726685,
"step": 570
},
{
"epoch": 2.5217391304347827,
"grad_norm": 0.07892700284719467,
"learning_rate": 1.6643478260869567e-05,
"loss": 0.0017842918634414673,
"step": 580
},
{
"epoch": 2.5652173913043477,
"grad_norm": 0.05785762518644333,
"learning_rate": 1.6585507246376814e-05,
"loss": 0.0016030147671699524,
"step": 590
},
{
"epoch": 2.608695652173913,
"grad_norm": 0.051935892552137375,
"learning_rate": 1.652753623188406e-05,
"loss": 0.0006048619747161865,
"step": 600
},
{
"epoch": 2.6521739130434785,
"grad_norm": 0.009883932769298553,
"learning_rate": 1.6469565217391304e-05,
"loss": 0.0022064462304115296,
"step": 610
},
{
"epoch": 2.6956521739130435,
"grad_norm": 0.01653284765779972,
"learning_rate": 1.6411594202898554e-05,
"loss": 0.010119739174842834,
"step": 620
},
{
"epoch": 2.7391304347826084,
"grad_norm": 0.013404067605733871,
"learning_rate": 1.6353623188405798e-05,
"loss": 0.004131542146205902,
"step": 630
},
{
"epoch": 2.782608695652174,
"grad_norm": 0.009171389043331146,
"learning_rate": 1.6295652173913044e-05,
"loss": 0.08883790969848633,
"step": 640
},
{
"epoch": 2.8260869565217392,
"grad_norm": 12.090933799743652,
"learning_rate": 1.623768115942029e-05,
"loss": 0.010134254395961762,
"step": 650
},
{
"epoch": 2.869565217391304,
"grad_norm": 4.632288455963135,
"learning_rate": 1.6179710144927538e-05,
"loss": 0.003986392915248871,
"step": 660
},
{
"epoch": 2.9130434782608696,
"grad_norm": 0.06515643000602722,
"learning_rate": 1.6121739130434785e-05,
"loss": 0.0041788950562477115,
"step": 670
},
{
"epoch": 2.9565217391304346,
"grad_norm": 0.33638763427734375,
"learning_rate": 1.6063768115942032e-05,
"loss": 0.0013911113142967223,
"step": 680
},
{
"epoch": 3.0,
"grad_norm": 0.1827061027288437,
"learning_rate": 1.6005797101449275e-05,
"loss": 0.0004976257681846618,
"step": 690
},
{
"epoch": 3.0,
"eval_accuracy": 0.9695652173913043,
"eval_f1": 0.9695652173913043,
"eval_loss": 0.09442394226789474,
"eval_runtime": 3.6984,
"eval_samples_per_second": 124.377,
"eval_steps_per_second": 15.682,
"step": 690
},
{
"epoch": 3.0434782608695654,
"grad_norm": 0.04813811555504799,
"learning_rate": 1.5947826086956522e-05,
"loss": 0.0004477664828300476,
"step": 700
},
{
"epoch": 3.0869565217391304,
"grad_norm": 0.0175640732049942,
"learning_rate": 1.588985507246377e-05,
"loss": 0.0004123836755752563,
"step": 710
},
{
"epoch": 3.130434782608696,
"grad_norm": 0.008048221468925476,
"learning_rate": 1.5831884057971016e-05,
"loss": 0.0004120379686355591,
"step": 720
},
{
"epoch": 3.1739130434782608,
"grad_norm": 0.0071647269651293755,
"learning_rate": 1.5773913043478263e-05,
"loss": 0.00032983869314193723,
"step": 730
},
{
"epoch": 3.217391304347826,
"grad_norm": 0.015544029884040356,
"learning_rate": 1.571594202898551e-05,
"loss": 0.00034575462341308595,
"step": 740
},
{
"epoch": 3.260869565217391,
"grad_norm": 0.004907351918518543,
"learning_rate": 1.5657971014492753e-05,
"loss": 0.00026599913835525515,
"step": 750
},
{
"epoch": 3.3043478260869565,
"grad_norm": 0.013097843155264854,
"learning_rate": 1.5600000000000003e-05,
"loss": 0.0016580477356910705,
"step": 760
},
{
"epoch": 3.3478260869565215,
"grad_norm": 0.004332110285758972,
"learning_rate": 1.5542028985507247e-05,
"loss": 0.00046425610780715943,
"step": 770
},
{
"epoch": 3.391304347826087,
"grad_norm": 0.06276489794254303,
"learning_rate": 1.5484057971014493e-05,
"loss": 0.0007047504186630249,
"step": 780
},
{
"epoch": 3.4347826086956523,
"grad_norm": 0.00449096504598856,
"learning_rate": 1.542608695652174e-05,
"loss": 0.0002553284168243408,
"step": 790
},
{
"epoch": 3.4782608695652173,
"grad_norm": 0.011169650591909885,
"learning_rate": 1.5368115942028987e-05,
"loss": 0.0003493279218673706,
"step": 800
},
{
"epoch": 3.5217391304347827,
"grad_norm": 0.025958970189094543,
"learning_rate": 1.5310144927536234e-05,
"loss": 0.0002732709050178528,
"step": 810
},
{
"epoch": 3.5652173913043477,
"grad_norm": 0.01937592588365078,
"learning_rate": 1.5252173913043479e-05,
"loss": 0.000246034562587738,
"step": 820
},
{
"epoch": 3.608695652173913,
"grad_norm": 0.00856866966933012,
"learning_rate": 1.5194202898550726e-05,
"loss": 0.00028263479471206664,
"step": 830
},
{
"epoch": 3.6521739130434785,
"grad_norm": 0.12088195979595184,
"learning_rate": 1.5136231884057973e-05,
"loss": 0.0003507554531097412,
"step": 840
},
{
"epoch": 3.6956521739130435,
"grad_norm": 0.02024533785879612,
"learning_rate": 1.5078260869565218e-05,
"loss": 0.00027790963649749757,
"step": 850
},
{
"epoch": 3.7391304347826084,
"grad_norm": 0.0040628910064697266,
"learning_rate": 1.5020289855072465e-05,
"loss": 0.0002285495400428772,
"step": 860
},
{
"epoch": 3.782608695652174,
"grad_norm": 0.0061136772856116295,
"learning_rate": 1.496231884057971e-05,
"loss": 0.00027128159999847414,
"step": 870
},
{
"epoch": 3.8260869565217392,
"grad_norm": 0.012037448585033417,
"learning_rate": 1.4904347826086958e-05,
"loss": 0.0002808883786201477,
"step": 880
},
{
"epoch": 3.869565217391304,
"grad_norm": 0.004823528695851564,
"learning_rate": 1.4846376811594203e-05,
"loss": 0.0005329117178916931,
"step": 890
},
{
"epoch": 3.9130434782608696,
"grad_norm": 0.04427816718816757,
"learning_rate": 1.478840579710145e-05,
"loss": 0.00029876679182052614,
"step": 900
},
{
"epoch": 3.9565217391304346,
"grad_norm": 0.04008401557803154,
"learning_rate": 1.4730434782608695e-05,
"loss": 0.00039345473051071166,
"step": 910
},
{
"epoch": 4.0,
"grad_norm": 0.010993687435984612,
"learning_rate": 1.4672463768115944e-05,
"loss": 0.00024021416902542114,
"step": 920
},
{
"epoch": 4.0,
"eval_accuracy": 0.967391304347826,
"eval_f1": 0.967383751435824,
"eval_loss": 0.10565203428268433,
"eval_runtime": 3.7655,
"eval_samples_per_second": 122.162,
"eval_steps_per_second": 15.403,
"step": 920
},
{
"epoch": 4.043478260869565,
"grad_norm": 0.009720547124743462,
"learning_rate": 1.461449275362319e-05,
"loss": 0.00024558454751968386,
"step": 930
},
{
"epoch": 4.086956521739131,
"grad_norm": 0.017342587932944298,
"learning_rate": 1.4556521739130436e-05,
"loss": 0.00018810927867889403,
"step": 940
},
{
"epoch": 4.130434782608695,
"grad_norm": 0.011509642004966736,
"learning_rate": 1.4498550724637683e-05,
"loss": 0.00023101717233657836,
"step": 950
},
{
"epoch": 4.173913043478261,
"grad_norm": 0.0029383855871856213,
"learning_rate": 1.4440579710144928e-05,
"loss": 0.00020957440137863158,
"step": 960
},
{
"epoch": 4.217391304347826,
"grad_norm": 0.016090553253889084,
"learning_rate": 1.4382608695652176e-05,
"loss": 0.0001988038420677185,
"step": 970
},
{
"epoch": 4.260869565217392,
"grad_norm": 0.005717333406209946,
"learning_rate": 1.4324637681159422e-05,
"loss": 0.00017771720886230468,
"step": 980
},
{
"epoch": 4.304347826086957,
"grad_norm": 0.0067417211830616,
"learning_rate": 1.4266666666666668e-05,
"loss": 0.0001595720648765564,
"step": 990
},
{
"epoch": 4.3478260869565215,
"grad_norm": 0.014678889885544777,
"learning_rate": 1.4208695652173914e-05,
"loss": 0.00021335333585739135,
"step": 1000
},
{
"epoch": 4.391304347826087,
"grad_norm": 0.015480758622288704,
"learning_rate": 1.4150724637681162e-05,
"loss": 0.00018725097179412843,
"step": 1010
},
{
"epoch": 4.434782608695652,
"grad_norm": 0.009670041501522064,
"learning_rate": 1.4092753623188407e-05,
"loss": 0.00017006248235702516,
"step": 1020
},
{
"epoch": 4.478260869565218,
"grad_norm": 0.004368505906313658,
"learning_rate": 1.4034782608695654e-05,
"loss": 0.00011847317218780518,
"step": 1030
},
{
"epoch": 4.521739130434782,
"grad_norm": 0.00646650604903698,
"learning_rate": 1.39768115942029e-05,
"loss": 0.00011199414730072022,
"step": 1040
},
{
"epoch": 4.565217391304348,
"grad_norm": 0.0032207826152443886,
"learning_rate": 1.3918840579710146e-05,
"loss": 0.0001057848334312439,
"step": 1050
},
{
"epoch": 4.608695652173913,
"grad_norm": 0.004954950883984566,
"learning_rate": 1.3860869565217391e-05,
"loss": 0.00018178075551986695,
"step": 1060
},
{
"epoch": 4.6521739130434785,
"grad_norm": 0.002452458254992962,
"learning_rate": 1.380289855072464e-05,
"loss": 0.00011045336723327636,
"step": 1070
},
{
"epoch": 4.695652173913043,
"grad_norm": 0.008102525025606155,
"learning_rate": 1.3744927536231885e-05,
"loss": 0.00026093870401382445,
"step": 1080
},
{
"epoch": 4.739130434782608,
"grad_norm": 0.010890824720263481,
"learning_rate": 1.3686956521739132e-05,
"loss": 0.0001526176929473877,
"step": 1090
},
{
"epoch": 4.782608695652174,
"grad_norm": 0.004832288715988398,
"learning_rate": 1.3628985507246377e-05,
"loss": 0.0004844769835472107,
"step": 1100
},
{
"epoch": 4.826086956521739,
"grad_norm": 0.0037648973520845175,
"learning_rate": 1.3571014492753625e-05,
"loss": 0.00011702477931976318,
"step": 1110
},
{
"epoch": 4.869565217391305,
"grad_norm": 0.005592594854533672,
"learning_rate": 1.351304347826087e-05,
"loss": 0.00010377466678619384,
"step": 1120
},
{
"epoch": 4.913043478260869,
"grad_norm": 0.007901474833488464,
"learning_rate": 1.3455072463768117e-05,
"loss": 0.00013610869646072388,
"step": 1130
},
{
"epoch": 4.956521739130435,
"grad_norm": 0.01237920019775629,
"learning_rate": 1.3397101449275362e-05,
"loss": 0.00013603121042251586,
"step": 1140
},
{
"epoch": 5.0,
"grad_norm": 0.0020453499164432287,
"learning_rate": 1.333913043478261e-05,
"loss": 0.0001194879412651062,
"step": 1150
},
{
"epoch": 5.0,
"eval_accuracy": 0.9739130434782609,
"eval_f1": 0.9739110707803992,
"eval_loss": 0.10354098677635193,
"eval_runtime": 3.6993,
"eval_samples_per_second": 124.349,
"eval_steps_per_second": 15.679,
"step": 1150
},
{
"epoch": 5.043478260869565,
"grad_norm": 0.006073773372918367,
"learning_rate": 1.3281159420289856e-05,
"loss": 0.00012996643781661987,
"step": 1160
},
{
"epoch": 5.086956521739131,
"grad_norm": 0.004777880851179361,
"learning_rate": 1.3223188405797103e-05,
"loss": 0.0001592189073562622,
"step": 1170
},
{
"epoch": 5.130434782608695,
"grad_norm": 0.057864073663949966,
"learning_rate": 1.3165217391304348e-05,
"loss": 0.00019505620002746582,
"step": 1180
},
{
"epoch": 5.173913043478261,
"grad_norm": 0.004903986118733883,
"learning_rate": 1.3107246376811595e-05,
"loss": 0.00014082193374633789,
"step": 1190
},
{
"epoch": 5.217391304347826,
"grad_norm": 0.0034294510260224342,
"learning_rate": 1.304927536231884e-05,
"loss": 0.00015170425176620484,
"step": 1200
},
{
"epoch": 5.260869565217392,
"grad_norm": 0.0011764679802581668,
"learning_rate": 1.2991304347826089e-05,
"loss": 7.397085428237916e-05,
"step": 1210
},
{
"epoch": 5.304347826086957,
"grad_norm": 0.0015955602284520864,
"learning_rate": 1.2933333333333334e-05,
"loss": 0.00010628998279571533,
"step": 1220
},
{
"epoch": 5.3478260869565215,
"grad_norm": 0.0054084137082099915,
"learning_rate": 1.287536231884058e-05,
"loss": 0.00010003894567489624,
"step": 1230
},
{
"epoch": 5.391304347826087,
"grad_norm": 0.0409197136759758,
"learning_rate": 1.2817391304347827e-05,
"loss": 0.0001949608325958252,
"step": 1240
},
{
"epoch": 5.434782608695652,
"grad_norm": 0.005638486705720425,
"learning_rate": 1.2759420289855074e-05,
"loss": 0.00010839402675628662,
"step": 1250
},
{
"epoch": 5.478260869565218,
"grad_norm": 0.002196825807914138,
"learning_rate": 1.2701449275362321e-05,
"loss": 0.00011780411005020141,
"step": 1260
},
{
"epoch": 5.521739130434782,
"grad_norm": 0.004170795436948538,
"learning_rate": 1.2643478260869566e-05,
"loss": 7.52761960029602e-05,
"step": 1270
},
{
"epoch": 5.565217391304348,
"grad_norm": 0.0018888239283114672,
"learning_rate": 1.2585507246376813e-05,
"loss": 8.64073634147644e-05,
"step": 1280
},
{
"epoch": 5.608695652173913,
"grad_norm": 0.004605341702699661,
"learning_rate": 1.2527536231884058e-05,
"loss": 0.00010445266962051391,
"step": 1290
},
{
"epoch": 5.6521739130434785,
"grad_norm": 0.003109138226136565,
"learning_rate": 1.2469565217391307e-05,
"loss": 0.00017313212156295777,
"step": 1300
},
{
"epoch": 5.695652173913043,
"grad_norm": 0.010427464731037617,
"learning_rate": 1.2411594202898552e-05,
"loss": 0.00013125985860824586,
"step": 1310
},
{
"epoch": 5.739130434782608,
"grad_norm": 0.003667028620839119,
"learning_rate": 1.2353623188405799e-05,
"loss": 8.144229650497437e-05,
"step": 1320
},
{
"epoch": 5.782608695652174,
"grad_norm": 0.0063975197263062,
"learning_rate": 1.2295652173913044e-05,
"loss": 8.790493011474609e-05,
"step": 1330
},
{
"epoch": 5.826086956521739,
"grad_norm": 0.0025064516812562943,
"learning_rate": 1.2237681159420292e-05,
"loss": 9.892880916595459e-05,
"step": 1340
},
{
"epoch": 5.869565217391305,
"grad_norm": 0.0023004047106951475,
"learning_rate": 1.2179710144927537e-05,
"loss": 8.99285078048706e-05,
"step": 1350
},
{
"epoch": 5.913043478260869,
"grad_norm": 0.00247712479904294,
"learning_rate": 1.2121739130434784e-05,
"loss": 7.850229740142822e-05,
"step": 1360
},
{
"epoch": 5.956521739130435,
"grad_norm": 0.004787979181855917,
"learning_rate": 1.206376811594203e-05,
"loss": 0.00013543367385864257,
"step": 1370
},
{
"epoch": 6.0,
"grad_norm": 0.0011665808269754052,
"learning_rate": 1.2005797101449276e-05,
"loss": 0.00010280609130859375,
"step": 1380
},
{
"epoch": 6.0,
"eval_accuracy": 0.9760869565217392,
"eval_f1": 0.9760859393830551,
"eval_loss": 0.10542083531618118,
"eval_runtime": 3.6712,
"eval_samples_per_second": 125.301,
"eval_steps_per_second": 15.799,
"step": 1380
},
{
"epoch": 6.043478260869565,
"grad_norm": 0.003608932951465249,
"learning_rate": 1.1947826086956521e-05,
"loss": 6.381869316101074e-05,
"step": 1390
},
{
"epoch": 6.086956521739131,
"grad_norm": 0.001413961173966527,
"learning_rate": 1.188985507246377e-05,
"loss": 0.00011366158723831176,
"step": 1400
},
{
"epoch": 6.130434782608695,
"grad_norm": 0.008014041930437088,
"learning_rate": 1.1831884057971015e-05,
"loss": 8.733570575714111e-05,
"step": 1410
},
{
"epoch": 6.173913043478261,
"grad_norm": 0.003111343365162611,
"learning_rate": 1.1773913043478262e-05,
"loss": 8.406937122344971e-05,
"step": 1420
},
{
"epoch": 6.217391304347826,
"grad_norm": 0.005770743824541569,
"learning_rate": 1.1715942028985507e-05,
"loss": 0.00010157078504562378,
"step": 1430
},
{
"epoch": 6.260869565217392,
"grad_norm": 0.0032873093150556087,
"learning_rate": 1.1657971014492756e-05,
"loss": 0.00014556646347045897,
"step": 1440
},
{
"epoch": 6.304347826086957,
"grad_norm": 0.001812812639400363,
"learning_rate": 1.16e-05,
"loss": 0.00010097324848175049,
"step": 1450
},
{
"epoch": 6.3478260869565215,
"grad_norm": 0.004035606049001217,
"learning_rate": 1.1542028985507248e-05,
"loss": 9.941011667251587e-05,
"step": 1460
},
{
"epoch": 6.391304347826087,
"grad_norm": 0.0012575940927490592,
"learning_rate": 1.1484057971014493e-05,
"loss": 6.15835189819336e-05,
"step": 1470
},
{
"epoch": 6.434782608695652,
"grad_norm": 0.003833119058981538,
"learning_rate": 1.142608695652174e-05,
"loss": 8.669793605804443e-05,
"step": 1480
},
{
"epoch": 6.478260869565218,
"grad_norm": 0.004782689735293388,
"learning_rate": 1.1368115942028985e-05,
"loss": 7.78600573539734e-05,
"step": 1490
},
{
"epoch": 6.521739130434782,
"grad_norm": 0.001532797235995531,
"learning_rate": 1.1310144927536233e-05,
"loss": 6.358325481414795e-05,
"step": 1500
},
{
"epoch": 6.565217391304348,
"grad_norm": 0.002565442817285657,
"learning_rate": 1.1252173913043478e-05,
"loss": 0.0001420259475708008,
"step": 1510
},
{
"epoch": 6.608695652173913,
"grad_norm": 0.0025454177521169186,
"learning_rate": 1.1194202898550725e-05,
"loss": 8.515864610671997e-05,
"step": 1520
},
{
"epoch": 6.6521739130434785,
"grad_norm": 0.0020964243449270725,
"learning_rate": 1.113623188405797e-05,
"loss": 6.471127271652221e-05,
"step": 1530
},
{
"epoch": 6.695652173913043,
"grad_norm": 0.003716124454513192,
"learning_rate": 1.1078260869565219e-05,
"loss": 8.204132318496704e-05,
"step": 1540
},
{
"epoch": 6.739130434782608,
"grad_norm": 0.008757402189075947,
"learning_rate": 1.1020289855072466e-05,
"loss": 8.024424314498902e-05,
"step": 1550
},
{
"epoch": 6.782608695652174,
"grad_norm": 0.0014845369150862098,
"learning_rate": 1.096231884057971e-05,
"loss": 6.451904773712158e-05,
"step": 1560
},
{
"epoch": 6.826086956521739,
"grad_norm": 0.008402503095567226,
"learning_rate": 1.0904347826086958e-05,
"loss": 0.00010472536087036133,
"step": 1570
},
{
"epoch": 6.869565217391305,
"grad_norm": 0.0024845916777849197,
"learning_rate": 1.0846376811594203e-05,
"loss": 7.791221141815186e-05,
"step": 1580
},
{
"epoch": 6.913043478260869,
"grad_norm": 0.0009611704736016691,
"learning_rate": 1.0788405797101451e-05,
"loss": 6.439834833145141e-05,
"step": 1590
},
{
"epoch": 6.956521739130435,
"grad_norm": 0.002504365984350443,
"learning_rate": 1.0730434782608696e-05,
"loss": 0.00010657459497451783,
"step": 1600
},
{
"epoch": 7.0,
"grad_norm": 0.0028592213056981564,
"learning_rate": 1.0672463768115943e-05,
"loss": 6.621479988098145e-05,
"step": 1610
},
{
"epoch": 7.0,
"eval_accuracy": 0.9717391304347827,
"eval_f1": 0.9717357910906297,
"eval_loss": 0.10924158990383148,
"eval_runtime": 3.7267,
"eval_samples_per_second": 123.432,
"eval_steps_per_second": 15.563,
"step": 1610
},
{
"epoch": 7.043478260869565,
"grad_norm": 0.0038551699835807085,
"learning_rate": 1.0614492753623188e-05,
"loss": 6.931275129318237e-05,
"step": 1620
},
{
"epoch": 7.086956521739131,
"grad_norm": 0.001955242594704032,
"learning_rate": 1.0556521739130437e-05,
"loss": 6.8606436252594e-05,
"step": 1630
},
{
"epoch": 7.130434782608695,
"grad_norm": 0.0016041912604123354,
"learning_rate": 1.0498550724637682e-05,
"loss": 5.517750978469849e-05,
"step": 1640
},
{
"epoch": 7.173913043478261,
"grad_norm": 0.00400899862870574,
"learning_rate": 1.0440579710144929e-05,
"loss": 6.250441074371338e-05,
"step": 1650
},
{
"epoch": 7.217391304347826,
"grad_norm": 0.00452436925843358,
"learning_rate": 1.0382608695652174e-05,
"loss": 7.809549570083618e-05,
"step": 1660
},
{
"epoch": 7.260869565217392,
"grad_norm": 0.004081172402948141,
"learning_rate": 1.0324637681159423e-05,
"loss": 6.081312894821167e-05,
"step": 1670
},
{
"epoch": 7.304347826086957,
"grad_norm": 0.0009276916971430182,
"learning_rate": 1.0266666666666668e-05,
"loss": 7.750093936920166e-05,
"step": 1680
},
{
"epoch": 7.3478260869565215,
"grad_norm": 0.0008240043534897268,
"learning_rate": 1.0208695652173915e-05,
"loss": 5.295425653457641e-05,
"step": 1690
},
{
"epoch": 7.391304347826087,
"grad_norm": 0.0009307338623329997,
"learning_rate": 1.015072463768116e-05,
"loss": 4.418641328811646e-05,
"step": 1700
},
{
"epoch": 7.434782608695652,
"grad_norm": 0.0026277746073901653,
"learning_rate": 1.0092753623188407e-05,
"loss": 7.459372282028198e-05,
"step": 1710
},
{
"epoch": 7.478260869565218,
"grad_norm": 0.0009984400821849704,
"learning_rate": 1.0034782608695652e-05,
"loss": 5.8722496032714846e-05,
"step": 1720
},
{
"epoch": 7.521739130434782,
"grad_norm": 0.006830462254583836,
"learning_rate": 9.9768115942029e-06,
"loss": 0.00010163038969039917,
"step": 1730
},
{
"epoch": 7.565217391304348,
"grad_norm": 0.002571334131062031,
"learning_rate": 9.918840579710145e-06,
"loss": 4.719942808151245e-05,
"step": 1740
},
{
"epoch": 7.608695652173913,
"grad_norm": 0.006324245594441891,
"learning_rate": 9.860869565217392e-06,
"loss": 0.00011334121227264404,
"step": 1750
},
{
"epoch": 7.6521739130434785,
"grad_norm": 0.004802050068974495,
"learning_rate": 9.802898550724639e-06,
"loss": 7.033348083496094e-05,
"step": 1760
},
{
"epoch": 7.695652173913043,
"grad_norm": 0.004078683443367481,
"learning_rate": 9.744927536231886e-06,
"loss": 7.486343383789062e-05,
"step": 1770
},
{
"epoch": 7.739130434782608,
"grad_norm": 0.0017736013978719711,
"learning_rate": 9.686956521739131e-06,
"loss": 6.642341613769532e-05,
"step": 1780
},
{
"epoch": 7.782608695652174,
"grad_norm": 0.0009903659811243415,
"learning_rate": 9.628985507246378e-06,
"loss": 6.065666675567627e-05,
"step": 1790
},
{
"epoch": 7.826086956521739,
"grad_norm": 0.0012112981639802456,
"learning_rate": 9.571014492753625e-06,
"loss": 6.491392850875855e-05,
"step": 1800
},
{
"epoch": 7.869565217391305,
"grad_norm": 0.0009230478899553418,
"learning_rate": 9.51304347826087e-06,
"loss": 5.654692649841309e-05,
"step": 1810
},
{
"epoch": 7.913043478260869,
"grad_norm": 0.0006778881652280688,
"learning_rate": 9.455072463768117e-06,
"loss": 5.7981908321380614e-05,
"step": 1820
},
{
"epoch": 7.956521739130435,
"grad_norm": 0.003380276495590806,
"learning_rate": 9.397101449275363e-06,
"loss": 0.00010381042957305909,
"step": 1830
},
{
"epoch": 8.0,
"grad_norm": 0.0043472591787576675,
"learning_rate": 9.33913043478261e-06,
"loss": 7.407516241073609e-05,
"step": 1840
},
{
"epoch": 8.0,
"eval_accuracy": 0.9717391304347827,
"eval_f1": 0.9717357910906297,
"eval_loss": 0.11276786029338837,
"eval_runtime": 3.6362,
"eval_samples_per_second": 126.506,
"eval_steps_per_second": 15.951,
"step": 1840
}
],
"logging_steps": 10,
"max_steps": 3450,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.1406820871267942e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}