| { | |
| "best_global_step": 1380, | |
| "best_metric": 0.9760859393830551, | |
| "best_model_checkpoint": "./vitmodel-results3\\checkpoint-1380", | |
| "epoch": 8.0, | |
| "eval_steps": 500, | |
| "global_step": 1840, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.043478260869565216, | |
| "grad_norm": 6.461246490478516, | |
| "learning_rate": 1.9947826086956524e-05, | |
| "loss": 0.6514617919921875, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.08695652173913043, | |
| "grad_norm": 3.8991446495056152, | |
| "learning_rate": 1.988985507246377e-05, | |
| "loss": 0.45957489013671876, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.13043478260869565, | |
| "grad_norm": 2.4451658725738525, | |
| "learning_rate": 1.9831884057971015e-05, | |
| "loss": 0.3501922607421875, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.17391304347826086, | |
| "grad_norm": 3.7429535388946533, | |
| "learning_rate": 1.9773913043478265e-05, | |
| "loss": 0.27413215637207033, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.21739130434782608, | |
| "grad_norm": 12.403484344482422, | |
| "learning_rate": 1.971594202898551e-05, | |
| "loss": 0.45773887634277344, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2608695652173913, | |
| "grad_norm": 5.481701850891113, | |
| "learning_rate": 1.9657971014492755e-05, | |
| "loss": 0.23930573463439941, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.30434782608695654, | |
| "grad_norm": 4.304569244384766, | |
| "learning_rate": 1.9600000000000002e-05, | |
| "loss": 0.2975881576538086, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.34782608695652173, | |
| "grad_norm": 8.520660400390625, | |
| "learning_rate": 1.954202898550725e-05, | |
| "loss": 0.2244499921798706, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.391304347826087, | |
| "grad_norm": 5.180691719055176, | |
| "learning_rate": 1.9484057971014492e-05, | |
| "loss": 0.17172325849533082, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.43478260869565216, | |
| "grad_norm": 0.8175772428512573, | |
| "learning_rate": 1.9426086956521743e-05, | |
| "loss": 0.2387838363647461, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4782608695652174, | |
| "grad_norm": 0.4058602452278137, | |
| "learning_rate": 1.9368115942028986e-05, | |
| "loss": 0.10988756418228149, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5217391304347826, | |
| "grad_norm": 0.9439899921417236, | |
| "learning_rate": 1.9310144927536233e-05, | |
| "loss": 0.14697353839874266, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5652173913043478, | |
| "grad_norm": 0.6720163822174072, | |
| "learning_rate": 1.925217391304348e-05, | |
| "loss": 0.21182384490966796, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6086956521739131, | |
| "grad_norm": 0.371541291475296, | |
| "learning_rate": 1.9194202898550727e-05, | |
| "loss": 0.2180387258529663, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6521739130434783, | |
| "grad_norm": 1.6255207061767578, | |
| "learning_rate": 1.9136231884057973e-05, | |
| "loss": 0.20694243907928467, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6956521739130435, | |
| "grad_norm": 7.544068813323975, | |
| "learning_rate": 1.907826086956522e-05, | |
| "loss": 0.1606433391571045, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7391304347826086, | |
| "grad_norm": 0.549288272857666, | |
| "learning_rate": 1.9020289855072464e-05, | |
| "loss": 0.17944023609161378, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.782608695652174, | |
| "grad_norm": 0.43946510553359985, | |
| "learning_rate": 1.896231884057971e-05, | |
| "loss": 0.2129380226135254, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.8260869565217391, | |
| "grad_norm": 0.20731233060359955, | |
| "learning_rate": 1.8904347826086957e-05, | |
| "loss": 0.11439937353134155, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 0.1378840208053589, | |
| "learning_rate": 1.8846376811594204e-05, | |
| "loss": 0.21877152919769288, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.9130434782608695, | |
| "grad_norm": 0.35801249742507935, | |
| "learning_rate": 1.878840579710145e-05, | |
| "loss": 0.10442726612091065, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9565217391304348, | |
| "grad_norm": 10.050288200378418, | |
| "learning_rate": 1.8730434782608698e-05, | |
| "loss": 0.19560953378677368, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.15084530413150787, | |
| "learning_rate": 1.867246376811594e-05, | |
| "loss": 0.02761389911174774, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9521739130434783, | |
| "eval_f1": 0.9520642679853729, | |
| "eval_loss": 0.14785830676555634, | |
| "eval_runtime": 5.6109, | |
| "eval_samples_per_second": 81.984, | |
| "eval_steps_per_second": 10.337, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.0434782608695652, | |
| "grad_norm": 0.186416894197464, | |
| "learning_rate": 1.861449275362319e-05, | |
| "loss": 0.04351995289325714, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.0869565217391304, | |
| "grad_norm": 0.06740374863147736, | |
| "learning_rate": 1.8556521739130435e-05, | |
| "loss": 0.005690252780914307, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.1304347826086956, | |
| "grad_norm": 6.0832743644714355, | |
| "learning_rate": 1.8498550724637682e-05, | |
| "loss": 0.016183775663375855, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.1739130434782608, | |
| "grad_norm": 0.025469312444329262, | |
| "learning_rate": 1.844057971014493e-05, | |
| "loss": 0.025768563151359558, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.2173913043478262, | |
| "grad_norm": 0.05013096332550049, | |
| "learning_rate": 1.8382608695652175e-05, | |
| "loss": 0.07644214630126953, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.2608695652173914, | |
| "grad_norm": 0.19063518941402435, | |
| "learning_rate": 1.8324637681159422e-05, | |
| "loss": 0.06052442789077759, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.3043478260869565, | |
| "grad_norm": 0.04668483883142471, | |
| "learning_rate": 1.826666666666667e-05, | |
| "loss": 0.0497345507144928, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.3478260869565217, | |
| "grad_norm": 0.027216244488954544, | |
| "learning_rate": 1.8208695652173916e-05, | |
| "loss": 0.08886347413063049, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.391304347826087, | |
| "grad_norm": 0.3011648952960968, | |
| "learning_rate": 1.815072463768116e-05, | |
| "loss": 0.0871780276298523, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.434782608695652, | |
| "grad_norm": 9.434959411621094, | |
| "learning_rate": 1.809275362318841e-05, | |
| "loss": 0.06977825760841369, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.4782608695652173, | |
| "grad_norm": 0.07939770817756653, | |
| "learning_rate": 1.8034782608695653e-05, | |
| "loss": 0.11019858121871948, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.5217391304347827, | |
| "grad_norm": 6.755427837371826, | |
| "learning_rate": 1.79768115942029e-05, | |
| "loss": 0.07228946685791016, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.5652173913043477, | |
| "grad_norm": 10.783921241760254, | |
| "learning_rate": 1.7918840579710147e-05, | |
| "loss": 0.06457504034042358, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.608695652173913, | |
| "grad_norm": 0.10878543555736542, | |
| "learning_rate": 1.7860869565217394e-05, | |
| "loss": 0.021503202617168427, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.6521739130434783, | |
| "grad_norm": 0.25200846791267395, | |
| "learning_rate": 1.780289855072464e-05, | |
| "loss": 0.06428139805793762, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.6956521739130435, | |
| "grad_norm": 1.0684906244277954, | |
| "learning_rate": 1.7744927536231887e-05, | |
| "loss": 0.09642828106880189, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.7391304347826086, | |
| "grad_norm": 12.593297004699707, | |
| "learning_rate": 1.768695652173913e-05, | |
| "loss": 0.028580766916275025, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.7826086956521738, | |
| "grad_norm": 0.04352446645498276, | |
| "learning_rate": 1.7628985507246377e-05, | |
| "loss": 0.1266841173171997, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.8260869565217392, | |
| "grad_norm": 0.03208275884389877, | |
| "learning_rate": 1.7571014492753624e-05, | |
| "loss": 0.0604109525680542, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.8695652173913042, | |
| "grad_norm": 0.0292875487357378, | |
| "learning_rate": 1.751304347826087e-05, | |
| "loss": 0.08443626761436462, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.9130434782608696, | |
| "grad_norm": 0.04183952882885933, | |
| "learning_rate": 1.7455072463768118e-05, | |
| "loss": 0.026611250638961793, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.9565217391304348, | |
| "grad_norm": 0.419708788394928, | |
| "learning_rate": 1.7397101449275365e-05, | |
| "loss": 0.1165506362915039, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 8.380155563354492, | |
| "learning_rate": 1.7339130434782608e-05, | |
| "loss": 0.041912269592285153, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9565217391304348, | |
| "eval_f1": 0.9565184513006655, | |
| "eval_loss": 0.1165793240070343, | |
| "eval_runtime": 3.7298, | |
| "eval_samples_per_second": 123.331, | |
| "eval_steps_per_second": 15.55, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.0434782608695654, | |
| "grad_norm": 0.07667429745197296, | |
| "learning_rate": 1.728115942028986e-05, | |
| "loss": 0.0013138219714164735, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.0869565217391304, | |
| "grad_norm": 0.05316108465194702, | |
| "learning_rate": 1.7223188405797102e-05, | |
| "loss": 0.004785384237766266, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.130434782608696, | |
| "grad_norm": 0.018993400037288666, | |
| "learning_rate": 1.716521739130435e-05, | |
| "loss": 0.0010403752326965331, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.1739130434782608, | |
| "grad_norm": 0.005419578403234482, | |
| "learning_rate": 1.7107246376811596e-05, | |
| "loss": 0.0010405436158180236, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.217391304347826, | |
| "grad_norm": 2.7880542278289795, | |
| "learning_rate": 1.7049275362318842e-05, | |
| "loss": 0.01008293330669403, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.260869565217391, | |
| "grad_norm": 0.19926372170448303, | |
| "learning_rate": 1.6991304347826086e-05, | |
| "loss": 0.002237708866596222, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.3043478260869565, | |
| "grad_norm": 0.006354826502501965, | |
| "learning_rate": 1.6933333333333336e-05, | |
| "loss": 0.015198694169521331, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.3478260869565215, | |
| "grad_norm": 0.01782035082578659, | |
| "learning_rate": 1.687536231884058e-05, | |
| "loss": 0.0017350628972053529, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.391304347826087, | |
| "grad_norm": 0.6461467742919922, | |
| "learning_rate": 1.6817391304347826e-05, | |
| "loss": 0.0012194350361824035, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.4347826086956523, | |
| "grad_norm": 0.014753537252545357, | |
| "learning_rate": 1.6759420289855073e-05, | |
| "loss": 0.03461991548538208, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.4782608695652173, | |
| "grad_norm": 0.015930302441120148, | |
| "learning_rate": 1.670144927536232e-05, | |
| "loss": 0.0030654460191726685, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.5217391304347827, | |
| "grad_norm": 0.07892700284719467, | |
| "learning_rate": 1.6643478260869567e-05, | |
| "loss": 0.0017842918634414673, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.5652173913043477, | |
| "grad_norm": 0.05785762518644333, | |
| "learning_rate": 1.6585507246376814e-05, | |
| "loss": 0.0016030147671699524, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.608695652173913, | |
| "grad_norm": 0.051935892552137375, | |
| "learning_rate": 1.652753623188406e-05, | |
| "loss": 0.0006048619747161865, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.6521739130434785, | |
| "grad_norm": 0.009883932769298553, | |
| "learning_rate": 1.6469565217391304e-05, | |
| "loss": 0.0022064462304115296, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.6956521739130435, | |
| "grad_norm": 0.01653284765779972, | |
| "learning_rate": 1.6411594202898554e-05, | |
| "loss": 0.010119739174842834, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.7391304347826084, | |
| "grad_norm": 0.013404067605733871, | |
| "learning_rate": 1.6353623188405798e-05, | |
| "loss": 0.004131542146205902, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.782608695652174, | |
| "grad_norm": 0.009171389043331146, | |
| "learning_rate": 1.6295652173913044e-05, | |
| "loss": 0.08883790969848633, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.8260869565217392, | |
| "grad_norm": 12.090933799743652, | |
| "learning_rate": 1.623768115942029e-05, | |
| "loss": 0.010134254395961762, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.869565217391304, | |
| "grad_norm": 4.632288455963135, | |
| "learning_rate": 1.6179710144927538e-05, | |
| "loss": 0.003986392915248871, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.9130434782608696, | |
| "grad_norm": 0.06515643000602722, | |
| "learning_rate": 1.6121739130434785e-05, | |
| "loss": 0.0041788950562477115, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.9565217391304346, | |
| "grad_norm": 0.33638763427734375, | |
| "learning_rate": 1.6063768115942032e-05, | |
| "loss": 0.0013911113142967223, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.1827061027288437, | |
| "learning_rate": 1.6005797101449275e-05, | |
| "loss": 0.0004976257681846618, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9695652173913043, | |
| "eval_f1": 0.9695652173913043, | |
| "eval_loss": 0.09442394226789474, | |
| "eval_runtime": 3.6984, | |
| "eval_samples_per_second": 124.377, | |
| "eval_steps_per_second": 15.682, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 3.0434782608695654, | |
| "grad_norm": 0.04813811555504799, | |
| "learning_rate": 1.5947826086956522e-05, | |
| "loss": 0.0004477664828300476, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.0869565217391304, | |
| "grad_norm": 0.0175640732049942, | |
| "learning_rate": 1.588985507246377e-05, | |
| "loss": 0.0004123836755752563, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 3.130434782608696, | |
| "grad_norm": 0.008048221468925476, | |
| "learning_rate": 1.5831884057971016e-05, | |
| "loss": 0.0004120379686355591, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 3.1739130434782608, | |
| "grad_norm": 0.0071647269651293755, | |
| "learning_rate": 1.5773913043478263e-05, | |
| "loss": 0.00032983869314193723, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 3.217391304347826, | |
| "grad_norm": 0.015544029884040356, | |
| "learning_rate": 1.571594202898551e-05, | |
| "loss": 0.00034575462341308595, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.260869565217391, | |
| "grad_norm": 0.004907351918518543, | |
| "learning_rate": 1.5657971014492753e-05, | |
| "loss": 0.00026599913835525515, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.3043478260869565, | |
| "grad_norm": 0.013097843155264854, | |
| "learning_rate": 1.5600000000000003e-05, | |
| "loss": 0.0016580477356910705, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.3478260869565215, | |
| "grad_norm": 0.004332110285758972, | |
| "learning_rate": 1.5542028985507247e-05, | |
| "loss": 0.00046425610780715943, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.391304347826087, | |
| "grad_norm": 0.06276489794254303, | |
| "learning_rate": 1.5484057971014493e-05, | |
| "loss": 0.0007047504186630249, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.4347826086956523, | |
| "grad_norm": 0.00449096504598856, | |
| "learning_rate": 1.542608695652174e-05, | |
| "loss": 0.0002553284168243408, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.4782608695652173, | |
| "grad_norm": 0.011169650591909885, | |
| "learning_rate": 1.5368115942028987e-05, | |
| "loss": 0.0003493279218673706, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.5217391304347827, | |
| "grad_norm": 0.025958970189094543, | |
| "learning_rate": 1.5310144927536234e-05, | |
| "loss": 0.0002732709050178528, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.5652173913043477, | |
| "grad_norm": 0.01937592588365078, | |
| "learning_rate": 1.5252173913043479e-05, | |
| "loss": 0.000246034562587738, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.608695652173913, | |
| "grad_norm": 0.00856866966933012, | |
| "learning_rate": 1.5194202898550726e-05, | |
| "loss": 0.00028263479471206664, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.6521739130434785, | |
| "grad_norm": 0.12088195979595184, | |
| "learning_rate": 1.5136231884057973e-05, | |
| "loss": 0.0003507554531097412, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.6956521739130435, | |
| "grad_norm": 0.02024533785879612, | |
| "learning_rate": 1.5078260869565218e-05, | |
| "loss": 0.00027790963649749757, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.7391304347826084, | |
| "grad_norm": 0.0040628910064697266, | |
| "learning_rate": 1.5020289855072465e-05, | |
| "loss": 0.0002285495400428772, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.782608695652174, | |
| "grad_norm": 0.0061136772856116295, | |
| "learning_rate": 1.496231884057971e-05, | |
| "loss": 0.00027128159999847414, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.8260869565217392, | |
| "grad_norm": 0.012037448585033417, | |
| "learning_rate": 1.4904347826086958e-05, | |
| "loss": 0.0002808883786201477, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.869565217391304, | |
| "grad_norm": 0.004823528695851564, | |
| "learning_rate": 1.4846376811594203e-05, | |
| "loss": 0.0005329117178916931, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.9130434782608696, | |
| "grad_norm": 0.04427816718816757, | |
| "learning_rate": 1.478840579710145e-05, | |
| "loss": 0.00029876679182052614, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.9565217391304346, | |
| "grad_norm": 0.04008401557803154, | |
| "learning_rate": 1.4730434782608695e-05, | |
| "loss": 0.00039345473051071166, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.010993687435984612, | |
| "learning_rate": 1.4672463768115944e-05, | |
| "loss": 0.00024021416902542114, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.967391304347826, | |
| "eval_f1": 0.967383751435824, | |
| "eval_loss": 0.10565203428268433, | |
| "eval_runtime": 3.7655, | |
| "eval_samples_per_second": 122.162, | |
| "eval_steps_per_second": 15.403, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 4.043478260869565, | |
| "grad_norm": 0.009720547124743462, | |
| "learning_rate": 1.461449275362319e-05, | |
| "loss": 0.00024558454751968386, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 4.086956521739131, | |
| "grad_norm": 0.017342587932944298, | |
| "learning_rate": 1.4556521739130436e-05, | |
| "loss": 0.00018810927867889403, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 4.130434782608695, | |
| "grad_norm": 0.011509642004966736, | |
| "learning_rate": 1.4498550724637683e-05, | |
| "loss": 0.00023101717233657836, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 4.173913043478261, | |
| "grad_norm": 0.0029383855871856213, | |
| "learning_rate": 1.4440579710144928e-05, | |
| "loss": 0.00020957440137863158, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 4.217391304347826, | |
| "grad_norm": 0.016090553253889084, | |
| "learning_rate": 1.4382608695652176e-05, | |
| "loss": 0.0001988038420677185, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 4.260869565217392, | |
| "grad_norm": 0.005717333406209946, | |
| "learning_rate": 1.4324637681159422e-05, | |
| "loss": 0.00017771720886230468, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 4.304347826086957, | |
| "grad_norm": 0.0067417211830616, | |
| "learning_rate": 1.4266666666666668e-05, | |
| "loss": 0.0001595720648765564, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 4.3478260869565215, | |
| "grad_norm": 0.014678889885544777, | |
| "learning_rate": 1.4208695652173914e-05, | |
| "loss": 0.00021335333585739135, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.391304347826087, | |
| "grad_norm": 0.015480758622288704, | |
| "learning_rate": 1.4150724637681162e-05, | |
| "loss": 0.00018725097179412843, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 4.434782608695652, | |
| "grad_norm": 0.009670041501522064, | |
| "learning_rate": 1.4092753623188407e-05, | |
| "loss": 0.00017006248235702516, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 4.478260869565218, | |
| "grad_norm": 0.004368505906313658, | |
| "learning_rate": 1.4034782608695654e-05, | |
| "loss": 0.00011847317218780518, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 4.521739130434782, | |
| "grad_norm": 0.00646650604903698, | |
| "learning_rate": 1.39768115942029e-05, | |
| "loss": 0.00011199414730072022, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 4.565217391304348, | |
| "grad_norm": 0.0032207826152443886, | |
| "learning_rate": 1.3918840579710146e-05, | |
| "loss": 0.0001057848334312439, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 4.608695652173913, | |
| "grad_norm": 0.004954950883984566, | |
| "learning_rate": 1.3860869565217391e-05, | |
| "loss": 0.00018178075551986695, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 4.6521739130434785, | |
| "grad_norm": 0.002452458254992962, | |
| "learning_rate": 1.380289855072464e-05, | |
| "loss": 0.00011045336723327636, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 4.695652173913043, | |
| "grad_norm": 0.008102525025606155, | |
| "learning_rate": 1.3744927536231885e-05, | |
| "loss": 0.00026093870401382445, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 4.739130434782608, | |
| "grad_norm": 0.010890824720263481, | |
| "learning_rate": 1.3686956521739132e-05, | |
| "loss": 0.0001526176929473877, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 4.782608695652174, | |
| "grad_norm": 0.004832288715988398, | |
| "learning_rate": 1.3628985507246377e-05, | |
| "loss": 0.0004844769835472107, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 4.826086956521739, | |
| "grad_norm": 0.0037648973520845175, | |
| "learning_rate": 1.3571014492753625e-05, | |
| "loss": 0.00011702477931976318, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 4.869565217391305, | |
| "grad_norm": 0.005592594854533672, | |
| "learning_rate": 1.351304347826087e-05, | |
| "loss": 0.00010377466678619384, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 4.913043478260869, | |
| "grad_norm": 0.007901474833488464, | |
| "learning_rate": 1.3455072463768117e-05, | |
| "loss": 0.00013610869646072388, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 4.956521739130435, | |
| "grad_norm": 0.01237920019775629, | |
| "learning_rate": 1.3397101449275362e-05, | |
| "loss": 0.00013603121042251586, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.0020453499164432287, | |
| "learning_rate": 1.333913043478261e-05, | |
| "loss": 0.0001194879412651062, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9739130434782609, | |
| "eval_f1": 0.9739110707803992, | |
| "eval_loss": 0.10354098677635193, | |
| "eval_runtime": 3.6993, | |
| "eval_samples_per_second": 124.349, | |
| "eval_steps_per_second": 15.679, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 5.043478260869565, | |
| "grad_norm": 0.006073773372918367, | |
| "learning_rate": 1.3281159420289856e-05, | |
| "loss": 0.00012996643781661987, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 5.086956521739131, | |
| "grad_norm": 0.004777880851179361, | |
| "learning_rate": 1.3223188405797103e-05, | |
| "loss": 0.0001592189073562622, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 5.130434782608695, | |
| "grad_norm": 0.057864073663949966, | |
| "learning_rate": 1.3165217391304348e-05, | |
| "loss": 0.00019505620002746582, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 5.173913043478261, | |
| "grad_norm": 0.004903986118733883, | |
| "learning_rate": 1.3107246376811595e-05, | |
| "loss": 0.00014082193374633789, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 5.217391304347826, | |
| "grad_norm": 0.0034294510260224342, | |
| "learning_rate": 1.304927536231884e-05, | |
| "loss": 0.00015170425176620484, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 5.260869565217392, | |
| "grad_norm": 0.0011764679802581668, | |
| "learning_rate": 1.2991304347826089e-05, | |
| "loss": 7.397085428237916e-05, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 5.304347826086957, | |
| "grad_norm": 0.0015955602284520864, | |
| "learning_rate": 1.2933333333333334e-05, | |
| "loss": 0.00010628998279571533, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 5.3478260869565215, | |
| "grad_norm": 0.0054084137082099915, | |
| "learning_rate": 1.287536231884058e-05, | |
| "loss": 0.00010003894567489624, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 5.391304347826087, | |
| "grad_norm": 0.0409197136759758, | |
| "learning_rate": 1.2817391304347827e-05, | |
| "loss": 0.0001949608325958252, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 5.434782608695652, | |
| "grad_norm": 0.005638486705720425, | |
| "learning_rate": 1.2759420289855074e-05, | |
| "loss": 0.00010839402675628662, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 5.478260869565218, | |
| "grad_norm": 0.002196825807914138, | |
| "learning_rate": 1.2701449275362321e-05, | |
| "loss": 0.00011780411005020141, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 5.521739130434782, | |
| "grad_norm": 0.004170795436948538, | |
| "learning_rate": 1.2643478260869566e-05, | |
| "loss": 7.52761960029602e-05, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 5.565217391304348, | |
| "grad_norm": 0.0018888239283114672, | |
| "learning_rate": 1.2585507246376813e-05, | |
| "loss": 8.64073634147644e-05, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 5.608695652173913, | |
| "grad_norm": 0.004605341702699661, | |
| "learning_rate": 1.2527536231884058e-05, | |
| "loss": 0.00010445266962051391, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 5.6521739130434785, | |
| "grad_norm": 0.003109138226136565, | |
| "learning_rate": 1.2469565217391307e-05, | |
| "loss": 0.00017313212156295777, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 5.695652173913043, | |
| "grad_norm": 0.010427464731037617, | |
| "learning_rate": 1.2411594202898552e-05, | |
| "loss": 0.00013125985860824586, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 5.739130434782608, | |
| "grad_norm": 0.003667028620839119, | |
| "learning_rate": 1.2353623188405799e-05, | |
| "loss": 8.144229650497437e-05, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 5.782608695652174, | |
| "grad_norm": 0.0063975197263062, | |
| "learning_rate": 1.2295652173913044e-05, | |
| "loss": 8.790493011474609e-05, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 5.826086956521739, | |
| "grad_norm": 0.0025064516812562943, | |
| "learning_rate": 1.2237681159420292e-05, | |
| "loss": 9.892880916595459e-05, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 5.869565217391305, | |
| "grad_norm": 0.0023004047106951475, | |
| "learning_rate": 1.2179710144927537e-05, | |
| "loss": 8.99285078048706e-05, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 5.913043478260869, | |
| "grad_norm": 0.00247712479904294, | |
| "learning_rate": 1.2121739130434784e-05, | |
| "loss": 7.850229740142822e-05, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 5.956521739130435, | |
| "grad_norm": 0.004787979181855917, | |
| "learning_rate": 1.206376811594203e-05, | |
| "loss": 0.00013543367385864257, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.0011665808269754052, | |
| "learning_rate": 1.2005797101449276e-05, | |
| "loss": 0.00010280609130859375, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9760869565217392, | |
| "eval_f1": 0.9760859393830551, | |
| "eval_loss": 0.10542083531618118, | |
| "eval_runtime": 3.6712, | |
| "eval_samples_per_second": 125.301, | |
| "eval_steps_per_second": 15.799, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 6.043478260869565, | |
| "grad_norm": 0.003608932951465249, | |
| "learning_rate": 1.1947826086956521e-05, | |
| "loss": 6.381869316101074e-05, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 6.086956521739131, | |
| "grad_norm": 0.001413961173966527, | |
| "learning_rate": 1.188985507246377e-05, | |
| "loss": 0.00011366158723831176, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 6.130434782608695, | |
| "grad_norm": 0.008014041930437088, | |
| "learning_rate": 1.1831884057971015e-05, | |
| "loss": 8.733570575714111e-05, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 6.173913043478261, | |
| "grad_norm": 0.003111343365162611, | |
| "learning_rate": 1.1773913043478262e-05, | |
| "loss": 8.406937122344971e-05, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 6.217391304347826, | |
| "grad_norm": 0.005770743824541569, | |
| "learning_rate": 1.1715942028985507e-05, | |
| "loss": 0.00010157078504562378, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 6.260869565217392, | |
| "grad_norm": 0.0032873093150556087, | |
| "learning_rate": 1.1657971014492756e-05, | |
| "loss": 0.00014556646347045897, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 6.304347826086957, | |
| "grad_norm": 0.001812812639400363, | |
| "learning_rate": 1.16e-05, | |
| "loss": 0.00010097324848175049, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 6.3478260869565215, | |
| "grad_norm": 0.004035606049001217, | |
| "learning_rate": 1.1542028985507248e-05, | |
| "loss": 9.941011667251587e-05, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 6.391304347826087, | |
| "grad_norm": 0.0012575940927490592, | |
| "learning_rate": 1.1484057971014493e-05, | |
| "loss": 6.15835189819336e-05, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 6.434782608695652, | |
| "grad_norm": 0.003833119058981538, | |
| "learning_rate": 1.142608695652174e-05, | |
| "loss": 8.669793605804443e-05, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 6.478260869565218, | |
| "grad_norm": 0.004782689735293388, | |
| "learning_rate": 1.1368115942028985e-05, | |
| "loss": 7.78600573539734e-05, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 6.521739130434782, | |
| "grad_norm": 0.001532797235995531, | |
| "learning_rate": 1.1310144927536233e-05, | |
| "loss": 6.358325481414795e-05, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 6.565217391304348, | |
| "grad_norm": 0.002565442817285657, | |
| "learning_rate": 1.1252173913043478e-05, | |
| "loss": 0.0001420259475708008, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 6.608695652173913, | |
| "grad_norm": 0.0025454177521169186, | |
| "learning_rate": 1.1194202898550725e-05, | |
| "loss": 8.515864610671997e-05, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 6.6521739130434785, | |
| "grad_norm": 0.0020964243449270725, | |
| "learning_rate": 1.113623188405797e-05, | |
| "loss": 6.471127271652221e-05, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 6.695652173913043, | |
| "grad_norm": 0.003716124454513192, | |
| "learning_rate": 1.1078260869565219e-05, | |
| "loss": 8.204132318496704e-05, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 6.739130434782608, | |
| "grad_norm": 0.008757402189075947, | |
| "learning_rate": 1.1020289855072466e-05, | |
| "loss": 8.024424314498902e-05, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 6.782608695652174, | |
| "grad_norm": 0.0014845369150862098, | |
| "learning_rate": 1.096231884057971e-05, | |
| "loss": 6.451904773712158e-05, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 6.826086956521739, | |
| "grad_norm": 0.008402503095567226, | |
| "learning_rate": 1.0904347826086958e-05, | |
| "loss": 0.00010472536087036133, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 6.869565217391305, | |
| "grad_norm": 0.0024845916777849197, | |
| "learning_rate": 1.0846376811594203e-05, | |
| "loss": 7.791221141815186e-05, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 6.913043478260869, | |
| "grad_norm": 0.0009611704736016691, | |
| "learning_rate": 1.0788405797101451e-05, | |
| "loss": 6.439834833145141e-05, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 6.956521739130435, | |
| "grad_norm": 0.002504365984350443, | |
| "learning_rate": 1.0730434782608696e-05, | |
| "loss": 0.00010657459497451783, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.0028592213056981564, | |
| "learning_rate": 1.0672463768115943e-05, | |
| "loss": 6.621479988098145e-05, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9717391304347827, | |
| "eval_f1": 0.9717357910906297, | |
| "eval_loss": 0.10924158990383148, | |
| "eval_runtime": 3.7267, | |
| "eval_samples_per_second": 123.432, | |
| "eval_steps_per_second": 15.563, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 7.043478260869565, | |
| "grad_norm": 0.0038551699835807085, | |
| "learning_rate": 1.0614492753623188e-05, | |
| "loss": 6.931275129318237e-05, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 7.086956521739131, | |
| "grad_norm": 0.001955242594704032, | |
| "learning_rate": 1.0556521739130437e-05, | |
| "loss": 6.8606436252594e-05, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 7.130434782608695, | |
| "grad_norm": 0.0016041912604123354, | |
| "learning_rate": 1.0498550724637682e-05, | |
| "loss": 5.517750978469849e-05, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 7.173913043478261, | |
| "grad_norm": 0.00400899862870574, | |
| "learning_rate": 1.0440579710144929e-05, | |
| "loss": 6.250441074371338e-05, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 7.217391304347826, | |
| "grad_norm": 0.00452436925843358, | |
| "learning_rate": 1.0382608695652174e-05, | |
| "loss": 7.809549570083618e-05, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 7.260869565217392, | |
| "grad_norm": 0.004081172402948141, | |
| "learning_rate": 1.0324637681159423e-05, | |
| "loss": 6.081312894821167e-05, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 7.304347826086957, | |
| "grad_norm": 0.0009276916971430182, | |
| "learning_rate": 1.0266666666666668e-05, | |
| "loss": 7.750093936920166e-05, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 7.3478260869565215, | |
| "grad_norm": 0.0008240043534897268, | |
| "learning_rate": 1.0208695652173915e-05, | |
| "loss": 5.295425653457641e-05, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 7.391304347826087, | |
| "grad_norm": 0.0009307338623329997, | |
| "learning_rate": 1.015072463768116e-05, | |
| "loss": 4.418641328811646e-05, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 7.434782608695652, | |
| "grad_norm": 0.0026277746073901653, | |
| "learning_rate": 1.0092753623188407e-05, | |
| "loss": 7.459372282028198e-05, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 7.478260869565218, | |
| "grad_norm": 0.0009984400821849704, | |
| "learning_rate": 1.0034782608695652e-05, | |
| "loss": 5.8722496032714846e-05, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 7.521739130434782, | |
| "grad_norm": 0.006830462254583836, | |
| "learning_rate": 9.9768115942029e-06, | |
| "loss": 0.00010163038969039917, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 7.565217391304348, | |
| "grad_norm": 0.002571334131062031, | |
| "learning_rate": 9.918840579710145e-06, | |
| "loss": 4.719942808151245e-05, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 7.608695652173913, | |
| "grad_norm": 0.006324245594441891, | |
| "learning_rate": 9.860869565217392e-06, | |
| "loss": 0.00011334121227264404, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 7.6521739130434785, | |
| "grad_norm": 0.004802050068974495, | |
| "learning_rate": 9.802898550724639e-06, | |
| "loss": 7.033348083496094e-05, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 7.695652173913043, | |
| "grad_norm": 0.004078683443367481, | |
| "learning_rate": 9.744927536231886e-06, | |
| "loss": 7.486343383789062e-05, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 7.739130434782608, | |
| "grad_norm": 0.0017736013978719711, | |
| "learning_rate": 9.686956521739131e-06, | |
| "loss": 6.642341613769532e-05, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 7.782608695652174, | |
| "grad_norm": 0.0009903659811243415, | |
| "learning_rate": 9.628985507246378e-06, | |
| "loss": 6.065666675567627e-05, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 7.826086956521739, | |
| "grad_norm": 0.0012112981639802456, | |
| "learning_rate": 9.571014492753625e-06, | |
| "loss": 6.491392850875855e-05, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 7.869565217391305, | |
| "grad_norm": 0.0009230478899553418, | |
| "learning_rate": 9.51304347826087e-06, | |
| "loss": 5.654692649841309e-05, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 7.913043478260869, | |
| "grad_norm": 0.0006778881652280688, | |
| "learning_rate": 9.455072463768117e-06, | |
| "loss": 5.7981908321380614e-05, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 7.956521739130435, | |
| "grad_norm": 0.003380276495590806, | |
| "learning_rate": 9.397101449275363e-06, | |
| "loss": 0.00010381042957305909, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.0043472591787576675, | |
| "learning_rate": 9.33913043478261e-06, | |
| "loss": 7.407516241073609e-05, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9717391304347827, | |
| "eval_f1": 0.9717357910906297, | |
| "eval_loss": 0.11276786029338837, | |
| "eval_runtime": 3.6362, | |
| "eval_samples_per_second": 126.506, | |
| "eval_steps_per_second": 15.951, | |
| "step": 1840 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3450, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 15, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1406820871267942e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |