| { |
| "best_metric": 0.5136106610298157, |
| "best_model_checkpoint": "./results/checkpoint-2874", |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 5748, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.006958942240779402, |
| "grad_norm": 4.3180251121521, |
| "learning_rate": 1.9972164231036883e-05, |
| "loss": 1.3759, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.013917884481558803, |
| "grad_norm": 2.7253687381744385, |
| "learning_rate": 1.9944328462073764e-05, |
| "loss": 1.3343, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.020876826722338204, |
| "grad_norm": 3.703948736190796, |
| "learning_rate": 1.9916492693110648e-05, |
| "loss": 1.226, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.027835768963117607, |
| "grad_norm": 6.535583972930908, |
| "learning_rate": 1.9888656924147533e-05, |
| "loss": 1.0883, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03479471120389701, |
| "grad_norm": 13.897150039672852, |
| "learning_rate": 1.9860821155184414e-05, |
| "loss": 0.9748, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04175365344467641, |
| "grad_norm": 7.23193359375, |
| "learning_rate": 1.9832985386221295e-05, |
| "loss": 0.8725, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.04871259568545581, |
| "grad_norm": 13.598752975463867, |
| "learning_rate": 1.980514961725818e-05, |
| "loss": 0.8301, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.055671537926235214, |
| "grad_norm": 15.322084426879883, |
| "learning_rate": 1.977731384829506e-05, |
| "loss": 0.8432, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06263048016701461, |
| "grad_norm": 6.434969425201416, |
| "learning_rate": 1.974947807933194e-05, |
| "loss": 0.7408, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06958942240779402, |
| "grad_norm": 8.11408805847168, |
| "learning_rate": 1.9721642310368826e-05, |
| "loss": 0.7086, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07654836464857341, |
| "grad_norm": 7.85330057144165, |
| "learning_rate": 1.969380654140571e-05, |
| "loss": 0.7318, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.08350730688935282, |
| "grad_norm": 10.534364700317383, |
| "learning_rate": 1.966597077244259e-05, |
| "loss": 0.6798, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.09046624913013222, |
| "grad_norm": 13.151297569274902, |
| "learning_rate": 1.9638135003479472e-05, |
| "loss": 0.6988, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.09742519137091162, |
| "grad_norm": 9.17402458190918, |
| "learning_rate": 1.9610299234516353e-05, |
| "loss": 0.6723, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.10438413361169102, |
| "grad_norm": 19.499664306640625, |
| "learning_rate": 1.9582463465553238e-05, |
| "loss": 0.5994, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.11134307585247043, |
| "grad_norm": 11.141926765441895, |
| "learning_rate": 1.955462769659012e-05, |
| "loss": 0.6565, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.11830201809324982, |
| "grad_norm": 15.322267532348633, |
| "learning_rate": 1.9526791927627003e-05, |
| "loss": 0.7444, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.12526096033402923, |
| "grad_norm": 9.982789993286133, |
| "learning_rate": 1.9498956158663885e-05, |
| "loss": 0.6882, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.13221990257480862, |
| "grad_norm": 15.503236770629883, |
| "learning_rate": 1.9471120389700766e-05, |
| "loss": 0.6286, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.13917884481558804, |
| "grad_norm": 9.867083549499512, |
| "learning_rate": 1.944328462073765e-05, |
| "loss": 0.7333, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.14613778705636743, |
| "grad_norm": 21.876277923583984, |
| "learning_rate": 1.941544885177453e-05, |
| "loss": 0.7438, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.15309672929714682, |
| "grad_norm": 17.48896598815918, |
| "learning_rate": 1.9387613082811416e-05, |
| "loss": 0.6269, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.16005567153792624, |
| "grad_norm": 6.696071147918701, |
| "learning_rate": 1.9359777313848297e-05, |
| "loss": 0.6739, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.16701461377870563, |
| "grad_norm": 11.048694610595703, |
| "learning_rate": 1.933194154488518e-05, |
| "loss": 0.7373, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.17397355601948503, |
| "grad_norm": 8.395058631896973, |
| "learning_rate": 1.9304105775922062e-05, |
| "loss": 0.6133, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.18093249826026445, |
| "grad_norm": 9.003988265991211, |
| "learning_rate": 1.9276270006958943e-05, |
| "loss": 0.5964, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.18789144050104384, |
| "grad_norm": 10.79345703125, |
| "learning_rate": 1.9248434237995824e-05, |
| "loss": 0.6293, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.19485038274182323, |
| "grad_norm": 19.241323471069336, |
| "learning_rate": 1.922059846903271e-05, |
| "loss": 0.6418, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.20180932498260265, |
| "grad_norm": 20.079133987426758, |
| "learning_rate": 1.9192762700069593e-05, |
| "loss": 0.749, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.20876826722338204, |
| "grad_norm": 31.14455223083496, |
| "learning_rate": 1.9164926931106474e-05, |
| "loss": 0.6719, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.21572720946416143, |
| "grad_norm": 18.57120132446289, |
| "learning_rate": 1.9137091162143355e-05, |
| "loss": 0.674, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.22268615170494085, |
| "grad_norm": 6.905323028564453, |
| "learning_rate": 1.9109255393180236e-05, |
| "loss": 0.5172, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.22964509394572025, |
| "grad_norm": 9.24687671661377, |
| "learning_rate": 1.908141962421712e-05, |
| "loss": 0.637, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.23660403618649964, |
| "grad_norm": 11.945255279541016, |
| "learning_rate": 1.9053583855254002e-05, |
| "loss": 0.6888, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.24356297842727906, |
| "grad_norm": 7.873608112335205, |
| "learning_rate": 1.9025748086290886e-05, |
| "loss": 0.625, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.25052192066805845, |
| "grad_norm": 10.357784271240234, |
| "learning_rate": 1.8997912317327767e-05, |
| "loss": 0.5676, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.25748086290883787, |
| "grad_norm": 12.345287322998047, |
| "learning_rate": 1.8970076548364652e-05, |
| "loss": 0.5983, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.26443980514961724, |
| "grad_norm": 7.470931053161621, |
| "learning_rate": 1.8942240779401533e-05, |
| "loss": 0.5978, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.27139874739039666, |
| "grad_norm": 10.457704544067383, |
| "learning_rate": 1.8914405010438414e-05, |
| "loss": 0.6074, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2783576896311761, |
| "grad_norm": 14.570989608764648, |
| "learning_rate": 1.8886569241475295e-05, |
| "loss": 0.6218, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.28531663187195544, |
| "grad_norm": 4.791262626647949, |
| "learning_rate": 1.885873347251218e-05, |
| "loss": 0.5941, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.29227557411273486, |
| "grad_norm": 7.30219030380249, |
| "learning_rate": 1.8830897703549064e-05, |
| "loss": 0.5407, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2992345163535143, |
| "grad_norm": 7.455644130706787, |
| "learning_rate": 1.8803061934585945e-05, |
| "loss": 0.6167, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.30619345859429364, |
| "grad_norm": 14.505638122558594, |
| "learning_rate": 1.8775226165622826e-05, |
| "loss": 0.6554, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.31315240083507306, |
| "grad_norm": 9.505303382873535, |
| "learning_rate": 1.8747390396659707e-05, |
| "loss": 0.5817, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.3201113430758525, |
| "grad_norm": 11.907074928283691, |
| "learning_rate": 1.871955462769659e-05, |
| "loss": 0.5423, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.32707028531663185, |
| "grad_norm": 9.421733856201172, |
| "learning_rate": 1.8691718858733473e-05, |
| "loss": 0.5928, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.33402922755741127, |
| "grad_norm": 8.2644624710083, |
| "learning_rate": 1.8663883089770357e-05, |
| "loss": 0.6234, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3409881697981907, |
| "grad_norm": 16.61441421508789, |
| "learning_rate": 1.8636047320807238e-05, |
| "loss": 0.5777, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.34794711203897005, |
| "grad_norm": 8.484561920166016, |
| "learning_rate": 1.8608211551844123e-05, |
| "loss": 0.5505, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.35490605427974947, |
| "grad_norm": 7.690084457397461, |
| "learning_rate": 1.8580375782881004e-05, |
| "loss": 0.5898, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3618649965205289, |
| "grad_norm": 7.3729963302612305, |
| "learning_rate": 1.8552540013917885e-05, |
| "loss": 0.6634, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.36882393876130826, |
| "grad_norm": 8.139713287353516, |
| "learning_rate": 1.852470424495477e-05, |
| "loss": 0.6074, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3757828810020877, |
| "grad_norm": 10.640233039855957, |
| "learning_rate": 1.849686847599165e-05, |
| "loss": 0.6352, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3827418232428671, |
| "grad_norm": 16.793916702270508, |
| "learning_rate": 1.8469032707028535e-05, |
| "loss": 0.6306, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.38970076548364646, |
| "grad_norm": 15.125770568847656, |
| "learning_rate": 1.8441196938065416e-05, |
| "loss": 0.6629, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.3966597077244259, |
| "grad_norm": 13.026156425476074, |
| "learning_rate": 1.8413361169102297e-05, |
| "loss": 0.5743, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.4036186499652053, |
| "grad_norm": 10.893036842346191, |
| "learning_rate": 1.838552540013918e-05, |
| "loss": 0.5443, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.41057759220598466, |
| "grad_norm": 21.39899253845215, |
| "learning_rate": 1.8357689631176062e-05, |
| "loss": 0.5717, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.4175365344467641, |
| "grad_norm": 29.05453872680664, |
| "learning_rate": 1.8329853862212947e-05, |
| "loss": 0.5856, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.4244954766875435, |
| "grad_norm": 9.653709411621094, |
| "learning_rate": 1.8302018093249828e-05, |
| "loss": 0.5105, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.43145441892832287, |
| "grad_norm": 14.518112182617188, |
| "learning_rate": 1.827418232428671e-05, |
| "loss": 0.6757, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.4384133611691023, |
| "grad_norm": 12.425946235656738, |
| "learning_rate": 1.8246346555323593e-05, |
| "loss": 0.5708, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.4453723034098817, |
| "grad_norm": 16.857666015625, |
| "learning_rate": 1.8218510786360474e-05, |
| "loss": 0.603, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4523312456506611, |
| "grad_norm": 19.68130111694336, |
| "learning_rate": 1.8190675017397356e-05, |
| "loss": 0.611, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.4592901878914405, |
| "grad_norm": 13.848482131958008, |
| "learning_rate": 1.816283924843424e-05, |
| "loss": 0.625, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.4662491301322199, |
| "grad_norm": 12.750751495361328, |
| "learning_rate": 1.813500347947112e-05, |
| "loss": 0.616, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4732080723729993, |
| "grad_norm": 9.34748363494873, |
| "learning_rate": 1.8107167710508005e-05, |
| "loss": 0.612, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.4801670146137787, |
| "grad_norm": 14.746898651123047, |
| "learning_rate": 1.8079331941544887e-05, |
| "loss": 0.6521, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.4871259568545581, |
| "grad_norm": 16.734874725341797, |
| "learning_rate": 1.8051496172581768e-05, |
| "loss": 0.606, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.4940848990953375, |
| "grad_norm": 10.393136024475098, |
| "learning_rate": 1.8023660403618652e-05, |
| "loss": 0.6286, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5010438413361169, |
| "grad_norm": 8.78846263885498, |
| "learning_rate": 1.7995824634655533e-05, |
| "loss": 0.4593, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5080027835768963, |
| "grad_norm": 18.29091453552246, |
| "learning_rate": 1.7967988865692418e-05, |
| "loss": 0.6764, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.5149617258176757, |
| "grad_norm": 13.510717391967773, |
| "learning_rate": 1.79401530967293e-05, |
| "loss": 0.6387, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5219206680584552, |
| "grad_norm": 8.811914443969727, |
| "learning_rate": 1.791231732776618e-05, |
| "loss": 0.6072, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5288796102992345, |
| "grad_norm": 12.649739265441895, |
| "learning_rate": 1.7884481558803064e-05, |
| "loss": 0.6128, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5358385525400139, |
| "grad_norm": 9.891730308532715, |
| "learning_rate": 1.7856645789839945e-05, |
| "loss": 0.568, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5427974947807933, |
| "grad_norm": 9.014187812805176, |
| "learning_rate": 1.7828810020876826e-05, |
| "loss": 0.5393, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5497564370215727, |
| "grad_norm": 19.861948013305664, |
| "learning_rate": 1.780097425191371e-05, |
| "loss": 0.5345, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5567153792623522, |
| "grad_norm": 8.01091480255127, |
| "learning_rate": 1.7773138482950595e-05, |
| "loss": 0.6457, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5636743215031316, |
| "grad_norm": 10.99765682220459, |
| "learning_rate": 1.7745302713987476e-05, |
| "loss": 0.5605, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5706332637439109, |
| "grad_norm": 7.248875617980957, |
| "learning_rate": 1.7717466945024357e-05, |
| "loss": 0.5975, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5775922059846903, |
| "grad_norm": 12.024378776550293, |
| "learning_rate": 1.768963117606124e-05, |
| "loss": 0.6238, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5845511482254697, |
| "grad_norm": 7.073344707489014, |
| "learning_rate": 1.7661795407098123e-05, |
| "loss": 0.6067, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5915100904662491, |
| "grad_norm": 11.20361614227295, |
| "learning_rate": 1.7633959638135004e-05, |
| "loss": 0.5319, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5984690327070286, |
| "grad_norm": 15.502198219299316, |
| "learning_rate": 1.760612386917189e-05, |
| "loss": 0.6038, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.605427974947808, |
| "grad_norm": 7.949779510498047, |
| "learning_rate": 1.757828810020877e-05, |
| "loss": 0.5103, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.6123869171885873, |
| "grad_norm": 21.52058219909668, |
| "learning_rate": 1.7550452331245654e-05, |
| "loss": 0.5607, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6193458594293667, |
| "grad_norm": 11.751809120178223, |
| "learning_rate": 1.7522616562282535e-05, |
| "loss": 0.46, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6263048016701461, |
| "grad_norm": 19.769689559936523, |
| "learning_rate": 1.7494780793319416e-05, |
| "loss": 0.5555, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6332637439109255, |
| "grad_norm": 6.040853977203369, |
| "learning_rate": 1.74669450243563e-05, |
| "loss": 0.6577, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.640222686151705, |
| "grad_norm": 10.926910400390625, |
| "learning_rate": 1.743910925539318e-05, |
| "loss": 0.5674, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6471816283924844, |
| "grad_norm": 5.619332790374756, |
| "learning_rate": 1.7411273486430066e-05, |
| "loss": 0.5937, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.6541405706332637, |
| "grad_norm": 9.416117668151855, |
| "learning_rate": 1.7383437717466947e-05, |
| "loss": 0.5718, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6610995128740431, |
| "grad_norm": 16.25941276550293, |
| "learning_rate": 1.7355601948503828e-05, |
| "loss": 0.5068, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.6680584551148225, |
| "grad_norm": 10.545511245727539, |
| "learning_rate": 1.732776617954071e-05, |
| "loss": 0.5642, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.675017397355602, |
| "grad_norm": 7.5949578285217285, |
| "learning_rate": 1.7299930410577594e-05, |
| "loss": 0.5243, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.6819763395963814, |
| "grad_norm": 10.766368865966797, |
| "learning_rate": 1.7272094641614475e-05, |
| "loss": 0.5285, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6889352818371608, |
| "grad_norm": 7.256805419921875, |
| "learning_rate": 1.724425887265136e-05, |
| "loss": 0.5286, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6958942240779401, |
| "grad_norm": 10.257540702819824, |
| "learning_rate": 1.721642310368824e-05, |
| "loss": 0.6115, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7028531663187195, |
| "grad_norm": 14.531510353088379, |
| "learning_rate": 1.7188587334725125e-05, |
| "loss": 0.6112, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.7098121085594989, |
| "grad_norm": 8.326130867004395, |
| "learning_rate": 1.7160751565762006e-05, |
| "loss": 0.5534, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.7167710508002784, |
| "grad_norm": 13.698468208312988, |
| "learning_rate": 1.7132915796798887e-05, |
| "loss": 0.5505, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.7237299930410578, |
| "grad_norm": 12.968040466308594, |
| "learning_rate": 1.710508002783577e-05, |
| "loss": 0.596, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.7306889352818372, |
| "grad_norm": 10.557011604309082, |
| "learning_rate": 1.7077244258872652e-05, |
| "loss": 0.5588, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.7376478775226165, |
| "grad_norm": 7.124124526977539, |
| "learning_rate": 1.7049408489909537e-05, |
| "loss": 0.5644, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.7446068197633959, |
| "grad_norm": 7.210671901702881, |
| "learning_rate": 1.7021572720946418e-05, |
| "loss": 0.5828, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.7515657620041754, |
| "grad_norm": 20.80126190185547, |
| "learning_rate": 1.69937369519833e-05, |
| "loss": 0.5491, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.7585247042449548, |
| "grad_norm": 8.95080852508545, |
| "learning_rate": 1.696590118302018e-05, |
| "loss": 0.495, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.7654836464857342, |
| "grad_norm": 8.503535270690918, |
| "learning_rate": 1.6938065414057064e-05, |
| "loss": 0.5253, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.7724425887265136, |
| "grad_norm": 6.803649425506592, |
| "learning_rate": 1.691022964509395e-05, |
| "loss": 0.5859, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.7794015309672929, |
| "grad_norm": 9.828047752380371, |
| "learning_rate": 1.688239387613083e-05, |
| "loss": 0.5418, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.7863604732080723, |
| "grad_norm": 7.690149307250977, |
| "learning_rate": 1.685455810716771e-05, |
| "loss": 0.5738, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.7933194154488518, |
| "grad_norm": 8.76807975769043, |
| "learning_rate": 1.6826722338204595e-05, |
| "loss": 0.458, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.8002783576896312, |
| "grad_norm": 10.1242036819458, |
| "learning_rate": 1.6798886569241476e-05, |
| "loss": 0.5573, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.8072372999304106, |
| "grad_norm": 8.316211700439453, |
| "learning_rate": 1.6771050800278358e-05, |
| "loss": 0.5527, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.81419624217119, |
| "grad_norm": 15.57465934753418, |
| "learning_rate": 1.6743215031315242e-05, |
| "loss": 0.5704, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.8211551844119693, |
| "grad_norm": 13.459155082702637, |
| "learning_rate": 1.6715379262352126e-05, |
| "loss": 0.5659, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.8281141266527487, |
| "grad_norm": 7.378421783447266, |
| "learning_rate": 1.6687543493389008e-05, |
| "loss": 0.5482, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.8350730688935282, |
| "grad_norm": 10.607648849487305, |
| "learning_rate": 1.665970772442589e-05, |
| "loss": 0.5627, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.8420320111343076, |
| "grad_norm": 13.157455444335938, |
| "learning_rate": 1.663187195546277e-05, |
| "loss": 0.5479, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.848990953375087, |
| "grad_norm": 9.343613624572754, |
| "learning_rate": 1.6604036186499654e-05, |
| "loss": 0.5695, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.8559498956158664, |
| "grad_norm": 24.01211929321289, |
| "learning_rate": 1.6576200417536535e-05, |
| "loss": 0.5173, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.8629088378566457, |
| "grad_norm": 14.033452987670898, |
| "learning_rate": 1.654836464857342e-05, |
| "loss": 0.5604, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.8698677800974252, |
| "grad_norm": 13.518189430236816, |
| "learning_rate": 1.65205288796103e-05, |
| "loss": 0.5797, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.8768267223382046, |
| "grad_norm": 9.779712677001953, |
| "learning_rate": 1.6492693110647182e-05, |
| "loss": 0.5761, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.883785664578984, |
| "grad_norm": 25.537031173706055, |
| "learning_rate": 1.6464857341684066e-05, |
| "loss": 0.4546, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.8907446068197634, |
| "grad_norm": 9.35092544555664, |
| "learning_rate": 1.6437021572720947e-05, |
| "loss": 0.5037, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.8977035490605428, |
| "grad_norm": 11.983678817749023, |
| "learning_rate": 1.640918580375783e-05, |
| "loss": 0.4967, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.9046624913013221, |
| "grad_norm": 8.681296348571777, |
| "learning_rate": 1.6381350034794713e-05, |
| "loss": 0.6147, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.9116214335421016, |
| "grad_norm": 9.482718467712402, |
| "learning_rate": 1.6353514265831597e-05, |
| "loss": 0.5906, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.918580375782881, |
| "grad_norm": 11.09154987335205, |
| "learning_rate": 1.6325678496868478e-05, |
| "loss": 0.5307, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.9255393180236604, |
| "grad_norm": 12.523815155029297, |
| "learning_rate": 1.629784272790536e-05, |
| "loss": 0.4931, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.9324982602644398, |
| "grad_norm": 13.545294761657715, |
| "learning_rate": 1.627000695894224e-05, |
| "loss": 0.4874, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.9394572025052192, |
| "grad_norm": 8.994409561157227, |
| "learning_rate": 1.6242171189979125e-05, |
| "loss": 0.5581, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.9464161447459986, |
| "grad_norm": 9.682478904724121, |
| "learning_rate": 1.6214335421016006e-05, |
| "loss": 0.5548, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.953375086986778, |
| "grad_norm": 7.079063415527344, |
| "learning_rate": 1.618649965205289e-05, |
| "loss": 0.4918, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.9603340292275574, |
| "grad_norm": 10.886133193969727, |
| "learning_rate": 1.615866388308977e-05, |
| "loss": 0.594, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.9672929714683368, |
| "grad_norm": 17.207847595214844, |
| "learning_rate": 1.6130828114126653e-05, |
| "loss": 0.5892, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.9742519137091162, |
| "grad_norm": 9.363895416259766, |
| "learning_rate": 1.6102992345163537e-05, |
| "loss": 0.5774, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.9812108559498957, |
| "grad_norm": 16.450660705566406, |
| "learning_rate": 1.6075156576200418e-05, |
| "loss": 0.481, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.988169798190675, |
| "grad_norm": 12.754993438720703, |
| "learning_rate": 1.6047320807237302e-05, |
| "loss": 0.5667, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.9951287404314544, |
| "grad_norm": 9.848027229309082, |
| "learning_rate": 1.6019485038274184e-05, |
| "loss": 0.562, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.8099216710182767, |
| "eval_f1": 0.8040056577967634, |
| "eval_loss": 0.518168568611145, |
| "eval_precision": 0.8071878224437447, |
| "eval_recall": 0.8099216710182767, |
| "eval_runtime": 30.1807, |
| "eval_samples_per_second": 190.353, |
| "eval_steps_per_second": 5.964, |
| "step": 1437 |
| }, |
| { |
| "epoch": 1.0020876826722338, |
| "grad_norm": 10.002483367919922, |
| "learning_rate": 1.5991649269311068e-05, |
| "loss": 0.489, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.0090466249130132, |
| "grad_norm": 8.09190559387207, |
| "learning_rate": 1.596381350034795e-05, |
| "loss": 0.4158, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.0160055671537926, |
| "grad_norm": 11.66649055480957, |
| "learning_rate": 1.593597773138483e-05, |
| "loss": 0.4249, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.022964509394572, |
| "grad_norm": 11.087824821472168, |
| "learning_rate": 1.590814196242171e-05, |
| "loss": 0.4064, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.0299234516353515, |
| "grad_norm": 14.069585800170898, |
| "learning_rate": 1.5880306193458596e-05, |
| "loss": 0.4159, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.036882393876131, |
| "grad_norm": 17.53207778930664, |
| "learning_rate": 1.585247042449548e-05, |
| "loss": 0.3956, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.0438413361169103, |
| "grad_norm": 5.558675289154053, |
| "learning_rate": 1.582463465553236e-05, |
| "loss": 0.3837, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.0508002783576895, |
| "grad_norm": 14.77510929107666, |
| "learning_rate": 1.5796798886569242e-05, |
| "loss": 0.3886, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.057759220598469, |
| "grad_norm": 11.358789443969727, |
| "learning_rate": 1.5768963117606123e-05, |
| "loss": 0.3557, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.0647181628392484, |
| "grad_norm": 15.428235054016113, |
| "learning_rate": 1.5741127348643008e-05, |
| "loss": 0.4789, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.0716771050800278, |
| "grad_norm": 16.330005645751953, |
| "learning_rate": 1.571329157967989e-05, |
| "loss": 0.4957, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.0786360473208072, |
| "grad_norm": 11.652454376220703, |
| "learning_rate": 1.5685455810716773e-05, |
| "loss": 0.4305, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.0855949895615866, |
| "grad_norm": 11.190437316894531, |
| "learning_rate": 1.5657620041753654e-05, |
| "loss": 0.4325, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.092553931802366, |
| "grad_norm": 17.103654861450195, |
| "learning_rate": 1.562978427279054e-05, |
| "loss": 0.4482, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.0995128740431455, |
| "grad_norm": 13.081258773803711, |
| "learning_rate": 1.560194850382742e-05, |
| "loss": 0.3345, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.1064718162839249, |
| "grad_norm": 10.141121864318848, |
| "learning_rate": 1.55741127348643e-05, |
| "loss": 0.453, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.1134307585247043, |
| "grad_norm": 15.91781997680664, |
| "learning_rate": 1.5546276965901182e-05, |
| "loss": 0.4351, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.1203897007654837, |
| "grad_norm": 11.977279663085938, |
| "learning_rate": 1.5518441196938066e-05, |
| "loss": 0.3826, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.1273486430062631, |
| "grad_norm": 12.163490295410156, |
| "learning_rate": 1.549060542797495e-05, |
| "loss": 0.439, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.1343075852470426, |
| "grad_norm": 12.221410751342773, |
| "learning_rate": 1.5462769659011832e-05, |
| "loss": 0.4793, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.1412665274878218, |
| "grad_norm": 9.962662696838379, |
| "learning_rate": 1.5434933890048713e-05, |
| "loss": 0.4524, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.1482254697286012, |
| "grad_norm": 10.281346321105957, |
| "learning_rate": 1.5407098121085594e-05, |
| "loss": 0.4537, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.1551844119693806, |
| "grad_norm": 8.511795997619629, |
| "learning_rate": 1.537926235212248e-05, |
| "loss": 0.3701, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.16214335421016, |
| "grad_norm": 20.107454299926758, |
| "learning_rate": 1.535142658315936e-05, |
| "loss": 0.4807, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.1691022964509394, |
| "grad_norm": 12.120085716247559, |
| "learning_rate": 1.5323590814196244e-05, |
| "loss": 0.4253, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.1760612386917189, |
| "grad_norm": 13.358500480651855, |
| "learning_rate": 1.5295755045233125e-05, |
| "loss": 0.3941, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.1830201809324983, |
| "grad_norm": 10.09534740447998, |
| "learning_rate": 1.526791927627001e-05, |
| "loss": 0.4248, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.1899791231732777, |
| "grad_norm": 23.822050094604492, |
| "learning_rate": 1.524008350730689e-05, |
| "loss": 0.4355, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.1969380654140571, |
| "grad_norm": 10.628303527832031, |
| "learning_rate": 1.5212247738343773e-05, |
| "loss": 0.3619, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.2038970076548365, |
| "grad_norm": 18.837543487548828, |
| "learning_rate": 1.5184411969380654e-05, |
| "loss": 0.4091, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.210855949895616, |
| "grad_norm": 9.72425365447998, |
| "learning_rate": 1.5156576200417539e-05, |
| "loss": 0.4468, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.2178148921363952, |
| "grad_norm": 21.17300796508789, |
| "learning_rate": 1.512874043145442e-05, |
| "loss": 0.4977, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.2247738343771746, |
| "grad_norm": 13.43977165222168, |
| "learning_rate": 1.5100904662491303e-05, |
| "loss": 0.4028, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.231732776617954, |
| "grad_norm": 11.052497863769531, |
| "learning_rate": 1.5073068893528184e-05, |
| "loss": 0.5183, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.2386917188587334, |
| "grad_norm": 12.521780014038086, |
| "learning_rate": 1.5045233124565067e-05, |
| "loss": 0.4714, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.2456506610995128, |
| "grad_norm": 10.825766563415527, |
| "learning_rate": 1.5017397355601951e-05, |
| "loss": 0.459, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.2526096033402923, |
| "grad_norm": 9.5389404296875, |
| "learning_rate": 1.4989561586638832e-05, |
| "loss": 0.4247, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.2595685455810717, |
| "grad_norm": 10.02591609954834, |
| "learning_rate": 1.4961725817675715e-05, |
| "loss": 0.4035, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.266527487821851, |
| "grad_norm": 11.203591346740723, |
| "learning_rate": 1.4933890048712596e-05, |
| "loss": 0.3517, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.2734864300626305, |
| "grad_norm": 10.96849250793457, |
| "learning_rate": 1.490605427974948e-05, |
| "loss": 0.4199, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.28044537230341, |
| "grad_norm": 11.830713272094727, |
| "learning_rate": 1.4878218510786361e-05, |
| "loss": 0.4145, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.2874043145441894, |
| "grad_norm": 11.980402946472168, |
| "learning_rate": 1.4850382741823244e-05, |
| "loss": 0.4801, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.2943632567849686, |
| "grad_norm": 12.308026313781738, |
| "learning_rate": 1.4822546972860125e-05, |
| "loss": 0.4601, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.3013221990257482, |
| "grad_norm": 16.0020694732666, |
| "learning_rate": 1.479471120389701e-05, |
| "loss": 0.4674, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.3082811412665274, |
| "grad_norm": 6.870344161987305, |
| "learning_rate": 1.4766875434933892e-05, |
| "loss": 0.4389, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.3152400835073068, |
| "grad_norm": 12.440506935119629, |
| "learning_rate": 1.4739039665970773e-05, |
| "loss": 0.4242, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.3221990257480862, |
| "grad_norm": 11.824153900146484, |
| "learning_rate": 1.4711203897007655e-05, |
| "loss": 0.4483, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.3291579679888657, |
| "grad_norm": 6.609494686126709, |
| "learning_rate": 1.4683368128044539e-05, |
| "loss": 0.4573, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.336116910229645, |
| "grad_norm": 12.088859558105469, |
| "learning_rate": 1.4655532359081422e-05, |
| "loss": 0.3912, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.3430758524704245, |
| "grad_norm": 7.114819526672363, |
| "learning_rate": 1.4627696590118303e-05, |
| "loss": 0.3867, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.350034794711204, |
| "grad_norm": 11.105134010314941, |
| "learning_rate": 1.4599860821155186e-05, |
| "loss": 0.5326, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.3569937369519833, |
| "grad_norm": 8.85695743560791, |
| "learning_rate": 1.4572025052192067e-05, |
| "loss": 0.445, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.3639526791927628, |
| "grad_norm": 13.736560821533203, |
| "learning_rate": 1.4544189283228951e-05, |
| "loss": 0.4333, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.3709116214335422, |
| "grad_norm": 9.378311157226562, |
| "learning_rate": 1.4516353514265832e-05, |
| "loss": 0.4275, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.3778705636743216, |
| "grad_norm": 10.04967212677002, |
| "learning_rate": 1.4488517745302715e-05, |
| "loss": 0.4151, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.3848295059151008, |
| "grad_norm": 7.61630392074585, |
| "learning_rate": 1.4460681976339596e-05, |
| "loss": 0.4994, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.3917884481558804, |
| "grad_norm": 14.55225944519043, |
| "learning_rate": 1.443284620737648e-05, |
| "loss": 0.4048, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.3987473903966596, |
| "grad_norm": 11.608763694763184, |
| "learning_rate": 1.4405010438413363e-05, |
| "loss": 0.3836, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.405706332637439, |
| "grad_norm": 11.155454635620117, |
| "learning_rate": 1.4377174669450244e-05, |
| "loss": 0.3942, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.4126652748782185, |
| "grad_norm": 11.708532333374023, |
| "learning_rate": 1.4349338900487127e-05, |
| "loss": 0.478, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.4196242171189979, |
| "grad_norm": 9.829862594604492, |
| "learning_rate": 1.432150313152401e-05, |
| "loss": 0.3487, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.4265831593597773, |
| "grad_norm": 14.25184440612793, |
| "learning_rate": 1.4293667362560893e-05, |
| "loss": 0.5225, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.4335421016005567, |
| "grad_norm": 12.317340850830078, |
| "learning_rate": 1.4265831593597774e-05, |
| "loss": 0.37, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.4405010438413361, |
| "grad_norm": 13.569458961486816, |
| "learning_rate": 1.4237995824634656e-05, |
| "loss": 0.3805, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.4474599860821156, |
| "grad_norm": 16.662263870239258, |
| "learning_rate": 1.4210160055671537e-05, |
| "loss": 0.3996, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.454418928322895, |
| "grad_norm": 12.971599578857422, |
| "learning_rate": 1.4182324286708422e-05, |
| "loss": 0.4265, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.4613778705636742, |
| "grad_norm": 9.266508102416992, |
| "learning_rate": 1.4154488517745305e-05, |
| "loss": 0.4199, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.4683368128044538, |
| "grad_norm": 15.103167533874512, |
| "learning_rate": 1.4126652748782186e-05, |
| "loss": 0.472, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.475295755045233, |
| "grad_norm": 13.94981861114502, |
| "learning_rate": 1.4098816979819068e-05, |
| "loss": 0.4681, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.4822546972860124, |
| "grad_norm": 19.643762588500977, |
| "learning_rate": 1.4070981210855951e-05, |
| "loss": 0.3848, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.4892136395267919, |
| "grad_norm": 11.58189868927002, |
| "learning_rate": 1.4043145441892834e-05, |
| "loss": 0.5083, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.4961725817675713, |
| "grad_norm": 13.264250755310059, |
| "learning_rate": 1.4015309672929715e-05, |
| "loss": 0.45, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.5031315240083507, |
| "grad_norm": 10.432905197143555, |
| "learning_rate": 1.3987473903966598e-05, |
| "loss": 0.4583, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.5100904662491301, |
| "grad_norm": 9.850616455078125, |
| "learning_rate": 1.3959638135003482e-05, |
| "loss": 0.349, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.5170494084899095, |
| "grad_norm": 14.087292671203613, |
| "learning_rate": 1.3931802366040363e-05, |
| "loss": 0.448, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.524008350730689, |
| "grad_norm": 12.514032363891602, |
| "learning_rate": 1.3903966597077246e-05, |
| "loss": 0.514, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.5309672929714684, |
| "grad_norm": 25.41820526123047, |
| "learning_rate": 1.3876130828114127e-05, |
| "loss": 0.4356, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.5379262352122476, |
| "grad_norm": 11.849440574645996, |
| "learning_rate": 1.3848295059151012e-05, |
| "loss": 0.3895, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.5448851774530272, |
| "grad_norm": 8.636540412902832, |
| "learning_rate": 1.3820459290187893e-05, |
| "loss": 0.481, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.5518441196938064, |
| "grad_norm": 11.286504745483398, |
| "learning_rate": 1.3792623521224775e-05, |
| "loss": 0.4019, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.558803061934586, |
| "grad_norm": 11.524672508239746, |
| "learning_rate": 1.3764787752261656e-05, |
| "loss": 0.4617, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.5657620041753653, |
| "grad_norm": 11.370726585388184, |
| "learning_rate": 1.373695198329854e-05, |
| "loss": 0.4186, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.572720946416145, |
| "grad_norm": 23.02247428894043, |
| "learning_rate": 1.3709116214335422e-05, |
| "loss": 0.4743, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.579679888656924, |
| "grad_norm": 11.176335334777832, |
| "learning_rate": 1.3681280445372305e-05, |
| "loss": 0.41, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.5866388308977035, |
| "grad_norm": 11.33989429473877, |
| "learning_rate": 1.3653444676409186e-05, |
| "loss": 0.4822, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.593597773138483, |
| "grad_norm": 18.040159225463867, |
| "learning_rate": 1.3625608907446069e-05, |
| "loss": 0.351, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.6005567153792624, |
| "grad_norm": 5.855461597442627, |
| "learning_rate": 1.3597773138482953e-05, |
| "loss": 0.4567, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.6075156576200418, |
| "grad_norm": 10.47138500213623, |
| "learning_rate": 1.3569937369519834e-05, |
| "loss": 0.4875, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.6144745998608212, |
| "grad_norm": 11.59261417388916, |
| "learning_rate": 1.3542101600556717e-05, |
| "loss": 0.4871, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.6214335421016006, |
| "grad_norm": 7.732606410980225, |
| "learning_rate": 1.3514265831593598e-05, |
| "loss": 0.4078, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.6283924843423798, |
| "grad_norm": 10.10660457611084, |
| "learning_rate": 1.3486430062630482e-05, |
| "loss": 0.3767, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.6353514265831595, |
| "grad_norm": 10.724883079528809, |
| "learning_rate": 1.3458594293667363e-05, |
| "loss": 0.4102, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.6423103688239387, |
| "grad_norm": 11.941119194030762, |
| "learning_rate": 1.3430758524704246e-05, |
| "loss": 0.5101, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.6492693110647183, |
| "grad_norm": 11.30588436126709, |
| "learning_rate": 1.3402922755741127e-05, |
| "loss": 0.3157, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.6562282533054975, |
| "grad_norm": 11.969123840332031, |
| "learning_rate": 1.337508698677801e-05, |
| "loss": 0.4568, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.663187195546277, |
| "grad_norm": 12.086457252502441, |
| "learning_rate": 1.3347251217814894e-05, |
| "loss": 0.4307, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.6701461377870563, |
| "grad_norm": 11.068685531616211, |
| "learning_rate": 1.3319415448851776e-05, |
| "loss": 0.4913, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.6771050800278358, |
| "grad_norm": 7.290180206298828, |
| "learning_rate": 1.3291579679888658e-05, |
| "loss": 0.4157, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.6840640222686152, |
| "grad_norm": 12.097051620483398, |
| "learning_rate": 1.326374391092554e-05, |
| "loss": 0.4356, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.6910229645093946, |
| "grad_norm": 10.983007431030273, |
| "learning_rate": 1.3235908141962424e-05, |
| "loss": 0.4594, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.697981906750174, |
| "grad_norm": 8.463971138000488, |
| "learning_rate": 1.3208072372999305e-05, |
| "loss": 0.395, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.7049408489909532, |
| "grad_norm": 10.346870422363281, |
| "learning_rate": 1.3180236604036188e-05, |
| "loss": 0.436, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.7118997912317329, |
| "grad_norm": 13.56679916381836, |
| "learning_rate": 1.3152400835073069e-05, |
| "loss": 0.4011, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.718858733472512, |
| "grad_norm": 8.750198364257812, |
| "learning_rate": 1.3124565066109953e-05, |
| "loss": 0.4479, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.7258176757132917, |
| "grad_norm": 8.680354118347168, |
| "learning_rate": 1.3096729297146836e-05, |
| "loss": 0.357, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.732776617954071, |
| "grad_norm": 13.647467613220215, |
| "learning_rate": 1.3068893528183717e-05, |
| "loss": 0.4286, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.7397355601948505, |
| "grad_norm": 8.507746696472168, |
| "learning_rate": 1.30410577592206e-05, |
| "loss": 0.3357, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.7466945024356297, |
| "grad_norm": 12.067097663879395, |
| "learning_rate": 1.3013221990257482e-05, |
| "loss": 0.434, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.7536534446764092, |
| "grad_norm": 9.736947059631348, |
| "learning_rate": 1.2985386221294365e-05, |
| "loss": 0.4228, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.7606123869171886, |
| "grad_norm": 14.245895385742188, |
| "learning_rate": 1.2957550452331246e-05, |
| "loss": 0.4379, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.767571329157968, |
| "grad_norm": 12.476272583007812, |
| "learning_rate": 1.2929714683368129e-05, |
| "loss": 0.4332, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.7745302713987474, |
| "grad_norm": 13.964608192443848, |
| "learning_rate": 1.290187891440501e-05, |
| "loss": 0.3422, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.7814892136395268, |
| "grad_norm": 11.833532333374023, |
| "learning_rate": 1.2874043145441895e-05, |
| "loss": 0.401, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.7884481558803063, |
| "grad_norm": 10.771284103393555, |
| "learning_rate": 1.2846207376478776e-05, |
| "loss": 0.3829, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.7954070981210855, |
| "grad_norm": 13.72558307647705, |
| "learning_rate": 1.2818371607515658e-05, |
| "loss": 0.3885, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.802366040361865, |
| "grad_norm": 19.78665542602539, |
| "learning_rate": 1.279053583855254e-05, |
| "loss": 0.5615, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.8093249826026443, |
| "grad_norm": 10.085536003112793, |
| "learning_rate": 1.2762700069589424e-05, |
| "loss": 0.3689, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.816283924843424, |
| "grad_norm": 11.349889755249023, |
| "learning_rate": 1.2734864300626307e-05, |
| "loss": 0.3492, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.8232428670842031, |
| "grad_norm": 15.420230865478516, |
| "learning_rate": 1.2707028531663188e-05, |
| "loss": 0.4059, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.8302018093249826, |
| "grad_norm": 43.528160095214844, |
| "learning_rate": 1.267919276270007e-05, |
| "loss": 0.4361, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.837160751565762, |
| "grad_norm": 8.911616325378418, |
| "learning_rate": 1.2651356993736953e-05, |
| "loss": 0.413, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.8441196938065414, |
| "grad_norm": 15.205978393554688, |
| "learning_rate": 1.2623521224773836e-05, |
| "loss": 0.4352, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.8510786360473208, |
| "grad_norm": 15.270347595214844, |
| "learning_rate": 1.2595685455810717e-05, |
| "loss": 0.5509, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.8580375782881002, |
| "grad_norm": 7.940185546875, |
| "learning_rate": 1.25678496868476e-05, |
| "loss": 0.4402, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.8649965205288797, |
| "grad_norm": 9.823007583618164, |
| "learning_rate": 1.2540013917884481e-05, |
| "loss": 0.4116, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.8719554627696589, |
| "grad_norm": 14.74289321899414, |
| "learning_rate": 1.2512178148921365e-05, |
| "loss": 0.4503, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.8789144050104385, |
| "grad_norm": 13.300530433654785, |
| "learning_rate": 1.2484342379958248e-05, |
| "loss": 0.4735, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.8858733472512177, |
| "grad_norm": 10.028038024902344, |
| "learning_rate": 1.245650661099513e-05, |
| "loss": 0.4889, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.8928322894919973, |
| "grad_norm": 13.30984878540039, |
| "learning_rate": 1.2428670842032012e-05, |
| "loss": 0.4714, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.8997912317327765, |
| "grad_norm": 8.563050270080566, |
| "learning_rate": 1.2400835073068895e-05, |
| "loss": 0.4601, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.9067501739735562, |
| "grad_norm": 9.030021667480469, |
| "learning_rate": 1.2372999304105777e-05, |
| "loss": 0.448, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.9137091162143354, |
| "grad_norm": 11.629081726074219, |
| "learning_rate": 1.2345163535142659e-05, |
| "loss": 0.4628, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.9206680584551148, |
| "grad_norm": 13.654706001281738, |
| "learning_rate": 1.2317327766179541e-05, |
| "loss": 0.4105, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.9276270006958942, |
| "grad_norm": 10.076985359191895, |
| "learning_rate": 1.2289491997216426e-05, |
| "loss": 0.4087, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.9345859429366736, |
| "grad_norm": 10.824203491210938, |
| "learning_rate": 1.2261656228253307e-05, |
| "loss": 0.3625, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.941544885177453, |
| "grad_norm": 19.84947395324707, |
| "learning_rate": 1.223382045929019e-05, |
| "loss": 0.3909, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.9485038274182325, |
| "grad_norm": 11.292709350585938, |
| "learning_rate": 1.220598469032707e-05, |
| "loss": 0.5342, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.955462769659012, |
| "grad_norm": 12.195109367370605, |
| "learning_rate": 1.2178148921363955e-05, |
| "loss": 0.434, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.962421711899791, |
| "grad_norm": 5.671847820281982, |
| "learning_rate": 1.2150313152400836e-05, |
| "loss": 0.3821, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.9693806541405707, |
| "grad_norm": 6.8894267082214355, |
| "learning_rate": 1.2122477383437719e-05, |
| "loss": 0.4741, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.97633959638135, |
| "grad_norm": 11.644760131835938, |
| "learning_rate": 1.20946416144746e-05, |
| "loss": 0.4485, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.9832985386221296, |
| "grad_norm": 13.690812110900879, |
| "learning_rate": 1.2066805845511483e-05, |
| "loss": 0.4329, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.9902574808629088, |
| "grad_norm": 14.693482398986816, |
| "learning_rate": 1.2038970076548367e-05, |
| "loss": 0.4345, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.9972164231036882, |
| "grad_norm": 8.777437210083008, |
| "learning_rate": 1.2011134307585248e-05, |
| "loss": 0.4272, |
| "step": 2870 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.8186248912097476, |
| "eval_f1": 0.8142148630689494, |
| "eval_loss": 0.5136106610298157, |
| "eval_precision": 0.815336261474068, |
| "eval_recall": 0.8186248912097476, |
| "eval_runtime": 30.487, |
| "eval_samples_per_second": 188.441, |
| "eval_steps_per_second": 5.904, |
| "step": 2874 |
| }, |
| { |
| "epoch": 2.0041753653444676, |
| "grad_norm": 6.2629876136779785, |
| "learning_rate": 1.198329853862213e-05, |
| "loss": 0.3394, |
| "step": 2880 |
| }, |
| { |
| "epoch": 2.0111343075852472, |
| "grad_norm": 13.670483589172363, |
| "learning_rate": 1.1955462769659012e-05, |
| "loss": 0.2971, |
| "step": 2890 |
| }, |
| { |
| "epoch": 2.0180932498260264, |
| "grad_norm": 7.209113121032715, |
| "learning_rate": 1.1927627000695896e-05, |
| "loss": 0.3459, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.0250521920668056, |
| "grad_norm": 14.117879867553711, |
| "learning_rate": 1.1899791231732778e-05, |
| "loss": 0.2936, |
| "step": 2910 |
| }, |
| { |
| "epoch": 2.0320111343075853, |
| "grad_norm": 8.980249404907227, |
| "learning_rate": 1.187195546276966e-05, |
| "loss": 0.3326, |
| "step": 2920 |
| }, |
| { |
| "epoch": 2.0389700765483645, |
| "grad_norm": 16.819644927978516, |
| "learning_rate": 1.1844119693806541e-05, |
| "loss": 0.3578, |
| "step": 2930 |
| }, |
| { |
| "epoch": 2.045929018789144, |
| "grad_norm": 14.287947654724121, |
| "learning_rate": 1.1816283924843426e-05, |
| "loss": 0.3025, |
| "step": 2940 |
| }, |
| { |
| "epoch": 2.0528879610299233, |
| "grad_norm": 11.339349746704102, |
| "learning_rate": 1.1788448155880307e-05, |
| "loss": 0.3576, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.059846903270703, |
| "grad_norm": 7.132763862609863, |
| "learning_rate": 1.176061238691719e-05, |
| "loss": 0.319, |
| "step": 2960 |
| }, |
| { |
| "epoch": 2.066805845511482, |
| "grad_norm": 10.997299194335938, |
| "learning_rate": 1.173277661795407e-05, |
| "loss": 0.398, |
| "step": 2970 |
| }, |
| { |
| "epoch": 2.073764787752262, |
| "grad_norm": 27.665699005126953, |
| "learning_rate": 1.1704940848990953e-05, |
| "loss": 0.2853, |
| "step": 2980 |
| }, |
| { |
| "epoch": 2.080723729993041, |
| "grad_norm": 25.032983779907227, |
| "learning_rate": 1.1677105080027838e-05, |
| "loss": 0.3484, |
| "step": 2990 |
| }, |
| { |
| "epoch": 2.0876826722338206, |
| "grad_norm": 17.8544921875, |
| "learning_rate": 1.1649269311064719e-05, |
| "loss": 0.3496, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.0946416144746, |
| "grad_norm": 19.761899948120117, |
| "learning_rate": 1.1621433542101602e-05, |
| "loss": 0.3137, |
| "step": 3010 |
| }, |
| { |
| "epoch": 2.101600556715379, |
| "grad_norm": 21.972309112548828, |
| "learning_rate": 1.1593597773138483e-05, |
| "loss": 0.3503, |
| "step": 3020 |
| }, |
| { |
| "epoch": 2.1085594989561587, |
| "grad_norm": 14.141931533813477, |
| "learning_rate": 1.1565762004175367e-05, |
| "loss": 0.2801, |
| "step": 3030 |
| }, |
| { |
| "epoch": 2.115518441196938, |
| "grad_norm": 9.858434677124023, |
| "learning_rate": 1.1537926235212248e-05, |
| "loss": 0.2912, |
| "step": 3040 |
| }, |
| { |
| "epoch": 2.1224773834377175, |
| "grad_norm": 11.357017517089844, |
| "learning_rate": 1.1510090466249131e-05, |
| "loss": 0.3761, |
| "step": 3050 |
| }, |
| { |
| "epoch": 2.1294363256784967, |
| "grad_norm": 14.140629768371582, |
| "learning_rate": 1.1482254697286012e-05, |
| "loss": 0.4435, |
| "step": 3060 |
| }, |
| { |
| "epoch": 2.1363952679192764, |
| "grad_norm": 14.739768028259277, |
| "learning_rate": 1.1454418928322897e-05, |
| "loss": 0.3424, |
| "step": 3070 |
| }, |
| { |
| "epoch": 2.1433542101600556, |
| "grad_norm": 11.241192817687988, |
| "learning_rate": 1.142658315935978e-05, |
| "loss": 0.3119, |
| "step": 3080 |
| }, |
| { |
| "epoch": 2.150313152400835, |
| "grad_norm": 7.299347400665283, |
| "learning_rate": 1.139874739039666e-05, |
| "loss": 0.2737, |
| "step": 3090 |
| }, |
| { |
| "epoch": 2.1572720946416144, |
| "grad_norm": 12.915804862976074, |
| "learning_rate": 1.1370911621433543e-05, |
| "loss": 0.3366, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.164231036882394, |
| "grad_norm": 14.415313720703125, |
| "learning_rate": 1.1343075852470426e-05, |
| "loss": 0.3461, |
| "step": 3110 |
| }, |
| { |
| "epoch": 2.1711899791231732, |
| "grad_norm": 37.51091384887695, |
| "learning_rate": 1.1315240083507309e-05, |
| "loss": 0.2735, |
| "step": 3120 |
| }, |
| { |
| "epoch": 2.178148921363953, |
| "grad_norm": 14.238667488098145, |
| "learning_rate": 1.128740431454419e-05, |
| "loss": 0.3137, |
| "step": 3130 |
| }, |
| { |
| "epoch": 2.185107863604732, |
| "grad_norm": 10.343038558959961, |
| "learning_rate": 1.1259568545581073e-05, |
| "loss": 0.3305, |
| "step": 3140 |
| }, |
| { |
| "epoch": 2.1920668058455113, |
| "grad_norm": 11.619972229003906, |
| "learning_rate": 1.1231732776617954e-05, |
| "loss": 0.3192, |
| "step": 3150 |
| }, |
| { |
| "epoch": 2.199025748086291, |
| "grad_norm": 10.04326343536377, |
| "learning_rate": 1.1203897007654838e-05, |
| "loss": 0.3116, |
| "step": 3160 |
| }, |
| { |
| "epoch": 2.20598469032707, |
| "grad_norm": 10.689598083496094, |
| "learning_rate": 1.117606123869172e-05, |
| "loss": 0.2896, |
| "step": 3170 |
| }, |
| { |
| "epoch": 2.2129436325678498, |
| "grad_norm": 13.70692253112793, |
| "learning_rate": 1.1148225469728602e-05, |
| "loss": 0.3201, |
| "step": 3180 |
| }, |
| { |
| "epoch": 2.219902574808629, |
| "grad_norm": 11.719026565551758, |
| "learning_rate": 1.1120389700765483e-05, |
| "loss": 0.2838, |
| "step": 3190 |
| }, |
| { |
| "epoch": 2.2268615170494086, |
| "grad_norm": 14.697103500366211, |
| "learning_rate": 1.1092553931802367e-05, |
| "loss": 0.3266, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.233820459290188, |
| "grad_norm": 9.828338623046875, |
| "learning_rate": 1.106471816283925e-05, |
| "loss": 0.2887, |
| "step": 3210 |
| }, |
| { |
| "epoch": 2.2407794015309674, |
| "grad_norm": 17.478595733642578, |
| "learning_rate": 1.1036882393876131e-05, |
| "loss": 0.263, |
| "step": 3220 |
| }, |
| { |
| "epoch": 2.2477383437717466, |
| "grad_norm": 12.797255516052246, |
| "learning_rate": 1.1009046624913014e-05, |
| "loss": 0.2799, |
| "step": 3230 |
| }, |
| { |
| "epoch": 2.2546972860125263, |
| "grad_norm": 7.045528888702393, |
| "learning_rate": 1.0981210855949897e-05, |
| "loss": 0.3794, |
| "step": 3240 |
| }, |
| { |
| "epoch": 2.2616562282533055, |
| "grad_norm": 13.09620189666748, |
| "learning_rate": 1.095337508698678e-05, |
| "loss": 0.2539, |
| "step": 3250 |
| }, |
| { |
| "epoch": 2.268615170494085, |
| "grad_norm": 7.552209377288818, |
| "learning_rate": 1.092553931802366e-05, |
| "loss": 0.3526, |
| "step": 3260 |
| }, |
| { |
| "epoch": 2.2755741127348643, |
| "grad_norm": 10.503962516784668, |
| "learning_rate": 1.0897703549060543e-05, |
| "loss": 0.3042, |
| "step": 3270 |
| }, |
| { |
| "epoch": 2.2825330549756435, |
| "grad_norm": 9.320645332336426, |
| "learning_rate": 1.0869867780097424e-05, |
| "loss": 0.2384, |
| "step": 3280 |
| }, |
| { |
| "epoch": 2.289491997216423, |
| "grad_norm": 9.707759857177734, |
| "learning_rate": 1.0842032011134309e-05, |
| "loss": 0.2633, |
| "step": 3290 |
| }, |
| { |
| "epoch": 2.2964509394572024, |
| "grad_norm": 10.683955192565918, |
| "learning_rate": 1.0814196242171192e-05, |
| "loss": 0.2941, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.303409881697982, |
| "grad_norm": 11.840535163879395, |
| "learning_rate": 1.0786360473208073e-05, |
| "loss": 0.2836, |
| "step": 3310 |
| }, |
| { |
| "epoch": 2.310368823938761, |
| "grad_norm": 17.78310203552246, |
| "learning_rate": 1.0758524704244955e-05, |
| "loss": 0.317, |
| "step": 3320 |
| }, |
| { |
| "epoch": 2.317327766179541, |
| "grad_norm": 14.615537643432617, |
| "learning_rate": 1.0730688935281838e-05, |
| "loss": 0.2933, |
| "step": 3330 |
| }, |
| { |
| "epoch": 2.32428670842032, |
| "grad_norm": 14.550018310546875, |
| "learning_rate": 1.0702853166318721e-05, |
| "loss": 0.3915, |
| "step": 3340 |
| }, |
| { |
| "epoch": 2.3312456506610997, |
| "grad_norm": 11.032766342163086, |
| "learning_rate": 1.0675017397355602e-05, |
| "loss": 0.3216, |
| "step": 3350 |
| }, |
| { |
| "epoch": 2.338204592901879, |
| "grad_norm": 11.570281028747559, |
| "learning_rate": 1.0647181628392485e-05, |
| "loss": 0.3011, |
| "step": 3360 |
| }, |
| { |
| "epoch": 2.3451635351426585, |
| "grad_norm": 8.726863861083984, |
| "learning_rate": 1.061934585942937e-05, |
| "loss": 0.2779, |
| "step": 3370 |
| }, |
| { |
| "epoch": 2.3521224773834377, |
| "grad_norm": 11.70459270477295, |
| "learning_rate": 1.059151009046625e-05, |
| "loss": 0.3044, |
| "step": 3380 |
| }, |
| { |
| "epoch": 2.359081419624217, |
| "grad_norm": 11.244171142578125, |
| "learning_rate": 1.0563674321503133e-05, |
| "loss": 0.2742, |
| "step": 3390 |
| }, |
| { |
| "epoch": 2.3660403618649966, |
| "grad_norm": 13.98281192779541, |
| "learning_rate": 1.0535838552540014e-05, |
| "loss": 0.4095, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.3729993041057758, |
| "grad_norm": 60.7244758605957, |
| "learning_rate": 1.0508002783576897e-05, |
| "loss": 0.2786, |
| "step": 3410 |
| }, |
| { |
| "epoch": 2.3799582463465554, |
| "grad_norm": 17.862695693969727, |
| "learning_rate": 1.048016701461378e-05, |
| "loss": 0.4003, |
| "step": 3420 |
| }, |
| { |
| "epoch": 2.3869171885873346, |
| "grad_norm": 11.725099563598633, |
| "learning_rate": 1.0452331245650662e-05, |
| "loss": 0.2304, |
| "step": 3430 |
| }, |
| { |
| "epoch": 2.3938761308281142, |
| "grad_norm": 14.38791561126709, |
| "learning_rate": 1.0424495476687543e-05, |
| "loss": 0.3011, |
| "step": 3440 |
| }, |
| { |
| "epoch": 2.4008350730688934, |
| "grad_norm": 17.396326065063477, |
| "learning_rate": 1.0396659707724426e-05, |
| "loss": 0.2762, |
| "step": 3450 |
| }, |
| { |
| "epoch": 2.407794015309673, |
| "grad_norm": 15.41369915008545, |
| "learning_rate": 1.036882393876131e-05, |
| "loss": 0.318, |
| "step": 3460 |
| }, |
| { |
| "epoch": 2.4147529575504523, |
| "grad_norm": 10.988295555114746, |
| "learning_rate": 1.0340988169798192e-05, |
| "loss": 0.3218, |
| "step": 3470 |
| }, |
| { |
| "epoch": 2.421711899791232, |
| "grad_norm": 25.048797607421875, |
| "learning_rate": 1.0313152400835074e-05, |
| "loss": 0.3305, |
| "step": 3480 |
| }, |
| { |
| "epoch": 2.428670842032011, |
| "grad_norm": 14.599493026733398, |
| "learning_rate": 1.0285316631871956e-05, |
| "loss": 0.3633, |
| "step": 3490 |
| }, |
| { |
| "epoch": 2.4356297842727903, |
| "grad_norm": 14.409786224365234, |
| "learning_rate": 1.025748086290884e-05, |
| "loss": 0.3577, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.44258872651357, |
| "grad_norm": 11.649439811706543, |
| "learning_rate": 1.0229645093945721e-05, |
| "loss": 0.321, |
| "step": 3510 |
| }, |
| { |
| "epoch": 2.449547668754349, |
| "grad_norm": 20.704423904418945, |
| "learning_rate": 1.0201809324982604e-05, |
| "loss": 0.3077, |
| "step": 3520 |
| }, |
| { |
| "epoch": 2.456506610995129, |
| "grad_norm": 9.154399871826172, |
| "learning_rate": 1.0173973556019485e-05, |
| "loss": 0.2218, |
| "step": 3530 |
| }, |
| { |
| "epoch": 2.463465553235908, |
| "grad_norm": 10.178906440734863, |
| "learning_rate": 1.014613778705637e-05, |
| "loss": 0.3181, |
| "step": 3540 |
| }, |
| { |
| "epoch": 2.4704244954766876, |
| "grad_norm": 12.843514442443848, |
| "learning_rate": 1.011830201809325e-05, |
| "loss": 0.2807, |
| "step": 3550 |
| }, |
| { |
| "epoch": 2.477383437717467, |
| "grad_norm": 15.194390296936035, |
| "learning_rate": 1.0090466249130133e-05, |
| "loss": 0.3834, |
| "step": 3560 |
| }, |
| { |
| "epoch": 2.4843423799582465, |
| "grad_norm": 10.255640983581543, |
| "learning_rate": 1.0062630480167014e-05, |
| "loss": 0.2968, |
| "step": 3570 |
| }, |
| { |
| "epoch": 2.4913013221990257, |
| "grad_norm": 12.686639785766602, |
| "learning_rate": 1.0034794711203897e-05, |
| "loss": 0.3559, |
| "step": 3580 |
| }, |
| { |
| "epoch": 2.4982602644398053, |
| "grad_norm": 18.302518844604492, |
| "learning_rate": 1.0006958942240781e-05, |
| "loss": 0.2919, |
| "step": 3590 |
| }, |
| { |
| "epoch": 2.5052192066805845, |
| "grad_norm": 13.351080894470215, |
| "learning_rate": 9.979123173277662e-06, |
| "loss": 0.374, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.5121781489213637, |
| "grad_norm": 11.826626777648926, |
| "learning_rate": 9.951287404314545e-06, |
| "loss": 0.351, |
| "step": 3610 |
| }, |
| { |
| "epoch": 2.5191370911621433, |
| "grad_norm": 14.403546333312988, |
| "learning_rate": 9.923451635351428e-06, |
| "loss": 0.3448, |
| "step": 3620 |
| }, |
| { |
| "epoch": 2.526096033402923, |
| "grad_norm": 8.75331974029541, |
| "learning_rate": 9.895615866388309e-06, |
| "loss": 0.3678, |
| "step": 3630 |
| }, |
| { |
| "epoch": 2.533054975643702, |
| "grad_norm": 7.926251411437988, |
| "learning_rate": 9.867780097425192e-06, |
| "loss": 0.3124, |
| "step": 3640 |
| }, |
| { |
| "epoch": 2.5400139178844814, |
| "grad_norm": 14.520807266235352, |
| "learning_rate": 9.839944328462075e-06, |
| "loss": 0.3685, |
| "step": 3650 |
| }, |
| { |
| "epoch": 2.546972860125261, |
| "grad_norm": 6.630367279052734, |
| "learning_rate": 9.812108559498957e-06, |
| "loss": 0.4329, |
| "step": 3660 |
| }, |
| { |
| "epoch": 2.5539318023660402, |
| "grad_norm": 16.33591651916504, |
| "learning_rate": 9.784272790535838e-06, |
| "loss": 0.2904, |
| "step": 3670 |
| }, |
| { |
| "epoch": 2.56089074460682, |
| "grad_norm": 12.767754554748535, |
| "learning_rate": 9.756437021572723e-06, |
| "loss": 0.3424, |
| "step": 3680 |
| }, |
| { |
| "epoch": 2.567849686847599, |
| "grad_norm": 17.801118850708008, |
| "learning_rate": 9.728601252609604e-06, |
| "loss": 0.3373, |
| "step": 3690 |
| }, |
| { |
| "epoch": 2.5748086290883787, |
| "grad_norm": 12.671394348144531, |
| "learning_rate": 9.700765483646487e-06, |
| "loss": 0.3632, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.581767571329158, |
| "grad_norm": 13.465224266052246, |
| "learning_rate": 9.67292971468337e-06, |
| "loss": 0.3065, |
| "step": 3710 |
| }, |
| { |
| "epoch": 2.588726513569937, |
| "grad_norm": 7.1438822746276855, |
| "learning_rate": 9.64509394572025e-06, |
| "loss": 0.2948, |
| "step": 3720 |
| }, |
| { |
| "epoch": 2.5956854558107167, |
| "grad_norm": 6.285761833190918, |
| "learning_rate": 9.617258176757133e-06, |
| "loss": 0.2847, |
| "step": 3730 |
| }, |
| { |
| "epoch": 2.6026443980514964, |
| "grad_norm": 12.588811874389648, |
| "learning_rate": 9.589422407794016e-06, |
| "loss": 0.3873, |
| "step": 3740 |
| }, |
| { |
| "epoch": 2.6096033402922756, |
| "grad_norm": 10.82616138458252, |
| "learning_rate": 9.561586638830899e-06, |
| "loss": 0.276, |
| "step": 3750 |
| }, |
| { |
| "epoch": 2.616562282533055, |
| "grad_norm": 19.147323608398438, |
| "learning_rate": 9.53375086986778e-06, |
| "loss": 0.3486, |
| "step": 3760 |
| }, |
| { |
| "epoch": 2.6235212247738344, |
| "grad_norm": 5.6541266441345215, |
| "learning_rate": 9.505915100904664e-06, |
| "loss": 0.3499, |
| "step": 3770 |
| }, |
| { |
| "epoch": 2.6304801670146136, |
| "grad_norm": 11.496247291564941, |
| "learning_rate": 9.478079331941545e-06, |
| "loss": 0.3104, |
| "step": 3780 |
| }, |
| { |
| "epoch": 2.6374391092553933, |
| "grad_norm": 13.55700397491455, |
| "learning_rate": 9.450243562978428e-06, |
| "loss": 0.3168, |
| "step": 3790 |
| }, |
| { |
| "epoch": 2.6443980514961725, |
| "grad_norm": 13.626465797424316, |
| "learning_rate": 9.422407794015311e-06, |
| "loss": 0.3061, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.651356993736952, |
| "grad_norm": 15.465563774108887, |
| "learning_rate": 9.394572025052194e-06, |
| "loss": 0.2827, |
| "step": 3810 |
| }, |
| { |
| "epoch": 2.6583159359777313, |
| "grad_norm": 10.07729721069336, |
| "learning_rate": 9.366736256089075e-06, |
| "loss": 0.2696, |
| "step": 3820 |
| }, |
| { |
| "epoch": 2.665274878218511, |
| "grad_norm": 14.677043914794922, |
| "learning_rate": 9.338900487125957e-06, |
| "loss": 0.3666, |
| "step": 3830 |
| }, |
| { |
| "epoch": 2.67223382045929, |
| "grad_norm": 9.545304298400879, |
| "learning_rate": 9.31106471816284e-06, |
| "loss": 0.3099, |
| "step": 3840 |
| }, |
| { |
| "epoch": 2.67919276270007, |
| "grad_norm": 13.406818389892578, |
| "learning_rate": 9.283228949199723e-06, |
| "loss": 0.3011, |
| "step": 3850 |
| }, |
| { |
| "epoch": 2.686151704940849, |
| "grad_norm": 9.16269302368164, |
| "learning_rate": 9.255393180236604e-06, |
| "loss": 0.3376, |
| "step": 3860 |
| }, |
| { |
| "epoch": 2.693110647181628, |
| "grad_norm": 13.706355094909668, |
| "learning_rate": 9.227557411273487e-06, |
| "loss": 0.3558, |
| "step": 3870 |
| }, |
| { |
| "epoch": 2.700069589422408, |
| "grad_norm": 13.172536849975586, |
| "learning_rate": 9.19972164231037e-06, |
| "loss": 0.3418, |
| "step": 3880 |
| }, |
| { |
| "epoch": 2.7070285316631875, |
| "grad_norm": 13.34077262878418, |
| "learning_rate": 9.171885873347252e-06, |
| "loss": 0.4049, |
| "step": 3890 |
| }, |
| { |
| "epoch": 2.7139874739039667, |
| "grad_norm": 22.909151077270508, |
| "learning_rate": 9.144050104384135e-06, |
| "loss": 0.3101, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.720946416144746, |
| "grad_norm": 10.906767845153809, |
| "learning_rate": 9.116214335421016e-06, |
| "loss": 0.2435, |
| "step": 3910 |
| }, |
| { |
| "epoch": 2.7279053583855255, |
| "grad_norm": 17.20676040649414, |
| "learning_rate": 9.088378566457899e-06, |
| "loss": 0.3154, |
| "step": 3920 |
| }, |
| { |
| "epoch": 2.7348643006263047, |
| "grad_norm": 12.238724708557129, |
| "learning_rate": 9.060542797494782e-06, |
| "loss": 0.2535, |
| "step": 3930 |
| }, |
| { |
| "epoch": 2.7418232428670843, |
| "grad_norm": 14.290855407714844, |
| "learning_rate": 9.032707028531664e-06, |
| "loss": 0.32, |
| "step": 3940 |
| }, |
| { |
| "epoch": 2.7487821851078635, |
| "grad_norm": 7.506951332092285, |
| "learning_rate": 9.004871259568545e-06, |
| "loss": 0.3707, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.755741127348643, |
| "grad_norm": 9.022459030151367, |
| "learning_rate": 8.97703549060543e-06, |
| "loss": 0.2429, |
| "step": 3960 |
| }, |
| { |
| "epoch": 2.7627000695894224, |
| "grad_norm": 8.920448303222656, |
| "learning_rate": 8.949199721642311e-06, |
| "loss": 0.2934, |
| "step": 3970 |
| }, |
| { |
| "epoch": 2.7696590118302016, |
| "grad_norm": 7.519834995269775, |
| "learning_rate": 8.921363952679194e-06, |
| "loss": 0.2833, |
| "step": 3980 |
| }, |
| { |
| "epoch": 2.776617954070981, |
| "grad_norm": 10.720945358276367, |
| "learning_rate": 8.893528183716076e-06, |
| "loss": 0.2456, |
| "step": 3990 |
| }, |
| { |
| "epoch": 2.783576896311761, |
| "grad_norm": 11.830615997314453, |
| "learning_rate": 8.865692414752958e-06, |
| "loss": 0.2867, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.79053583855254, |
| "grad_norm": 9.925026893615723, |
| "learning_rate": 8.83785664578984e-06, |
| "loss": 0.3503, |
| "step": 4010 |
| }, |
| { |
| "epoch": 2.7974947807933193, |
| "grad_norm": 9.181790351867676, |
| "learning_rate": 8.810020876826723e-06, |
| "loss": 0.3396, |
| "step": 4020 |
| }, |
| { |
| "epoch": 2.804453723034099, |
| "grad_norm": 17.847026824951172, |
| "learning_rate": 8.782185107863606e-06, |
| "loss": 0.3592, |
| "step": 4030 |
| }, |
| { |
| "epoch": 2.811412665274878, |
| "grad_norm": 14.639543533325195, |
| "learning_rate": 8.754349338900487e-06, |
| "loss": 0.2279, |
| "step": 4040 |
| }, |
| { |
| "epoch": 2.8183716075156577, |
| "grad_norm": 14.787379264831543, |
| "learning_rate": 8.72651356993737e-06, |
| "loss": 0.298, |
| "step": 4050 |
| }, |
| { |
| "epoch": 2.825330549756437, |
| "grad_norm": 9.879755020141602, |
| "learning_rate": 8.698677800974252e-06, |
| "loss": 0.3329, |
| "step": 4060 |
| }, |
| { |
| "epoch": 2.8322894919972166, |
| "grad_norm": 8.337702751159668, |
| "learning_rate": 8.670842032011135e-06, |
| "loss": 0.3393, |
| "step": 4070 |
| }, |
| { |
| "epoch": 2.8392484342379958, |
| "grad_norm": 11.59692668914795, |
| "learning_rate": 8.643006263048018e-06, |
| "loss": 0.2697, |
| "step": 4080 |
| }, |
| { |
| "epoch": 2.846207376478775, |
| "grad_norm": 22.700538635253906, |
| "learning_rate": 8.6151704940849e-06, |
| "loss": 0.3444, |
| "step": 4090 |
| }, |
| { |
| "epoch": 2.8531663187195546, |
| "grad_norm": 13.9461088180542, |
| "learning_rate": 8.587334725121782e-06, |
| "loss": 0.2616, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.8601252609603343, |
| "grad_norm": 9.75053882598877, |
| "learning_rate": 8.559498956158664e-06, |
| "loss": 0.3219, |
| "step": 4110 |
| }, |
| { |
| "epoch": 2.8670842032011135, |
| "grad_norm": 11.127705574035645, |
| "learning_rate": 8.531663187195547e-06, |
| "loss": 0.3238, |
| "step": 4120 |
| }, |
| { |
| "epoch": 2.8740431454418927, |
| "grad_norm": 13.097844123840332, |
| "learning_rate": 8.50382741823243e-06, |
| "loss": 0.3177, |
| "step": 4130 |
| }, |
| { |
| "epoch": 2.8810020876826723, |
| "grad_norm": 11.675921440124512, |
| "learning_rate": 8.475991649269311e-06, |
| "loss": 0.2872, |
| "step": 4140 |
| }, |
| { |
| "epoch": 2.8879610299234515, |
| "grad_norm": 9.369670867919922, |
| "learning_rate": 8.448155880306194e-06, |
| "loss": 0.2693, |
| "step": 4150 |
| }, |
| { |
| "epoch": 2.894919972164231, |
| "grad_norm": 8.535505294799805, |
| "learning_rate": 8.420320111343077e-06, |
| "loss": 0.2828, |
| "step": 4160 |
| }, |
| { |
| "epoch": 2.9018789144050103, |
| "grad_norm": 11.415098190307617, |
| "learning_rate": 8.392484342379958e-06, |
| "loss": 0.341, |
| "step": 4170 |
| }, |
| { |
| "epoch": 2.90883785664579, |
| "grad_norm": 19.970497131347656, |
| "learning_rate": 8.364648573416842e-06, |
| "loss": 0.3472, |
| "step": 4180 |
| }, |
| { |
| "epoch": 2.915796798886569, |
| "grad_norm": 6.632875919342041, |
| "learning_rate": 8.336812804453723e-06, |
| "loss": 0.317, |
| "step": 4190 |
| }, |
| { |
| "epoch": 2.9227557411273484, |
| "grad_norm": 18.783174514770508, |
| "learning_rate": 8.308977035490606e-06, |
| "loss": 0.3287, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.929714683368128, |
| "grad_norm": 10.871125221252441, |
| "learning_rate": 8.281141266527489e-06, |
| "loss": 0.2833, |
| "step": 4210 |
| }, |
| { |
| "epoch": 2.9366736256089077, |
| "grad_norm": 16.211822509765625, |
| "learning_rate": 8.253305497564371e-06, |
| "loss": 0.2999, |
| "step": 4220 |
| }, |
| { |
| "epoch": 2.943632567849687, |
| "grad_norm": 15.132637023925781, |
| "learning_rate": 8.225469728601253e-06, |
| "loss": 0.3001, |
| "step": 4230 |
| }, |
| { |
| "epoch": 2.950591510090466, |
| "grad_norm": 15.456144332885742, |
| "learning_rate": 8.197633959638135e-06, |
| "loss": 0.3072, |
| "step": 4240 |
| }, |
| { |
| "epoch": 2.9575504523312457, |
| "grad_norm": 12.601120948791504, |
| "learning_rate": 8.169798190675018e-06, |
| "loss": 0.343, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.964509394572025, |
| "grad_norm": 18.754928588867188, |
| "learning_rate": 8.1419624217119e-06, |
| "loss": 0.3183, |
| "step": 4260 |
| }, |
| { |
| "epoch": 2.9714683368128045, |
| "grad_norm": 12.473878860473633, |
| "learning_rate": 8.114126652748784e-06, |
| "loss": 0.3068, |
| "step": 4270 |
| }, |
| { |
| "epoch": 2.9784272790535837, |
| "grad_norm": 15.256598472595215, |
| "learning_rate": 8.086290883785666e-06, |
| "loss": 0.3455, |
| "step": 4280 |
| }, |
| { |
| "epoch": 2.9853862212943634, |
| "grad_norm": 9.27231216430664, |
| "learning_rate": 8.058455114822547e-06, |
| "loss": 0.352, |
| "step": 4290 |
| }, |
| { |
| "epoch": 2.9923451635351426, |
| "grad_norm": 9.258604049682617, |
| "learning_rate": 8.03061934585943e-06, |
| "loss": 0.2213, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.999304105775922, |
| "grad_norm": 14.93215560913086, |
| "learning_rate": 8.002783576896313e-06, |
| "loss": 0.4324, |
| "step": 4310 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.816710182767624, |
| "eval_f1": 0.8145784658491753, |
| "eval_loss": 0.567512571811676, |
| "eval_precision": 0.8135975269617428, |
| "eval_recall": 0.816710182767624, |
| "eval_runtime": 30.2203, |
| "eval_samples_per_second": 190.104, |
| "eval_steps_per_second": 5.956, |
| "step": 4311 |
| }, |
| { |
| "epoch": 3.0062630480167014, |
| "grad_norm": 18.71284294128418, |
| "learning_rate": 7.974947807933194e-06, |
| "loss": 0.2542, |
| "step": 4320 |
| }, |
| { |
| "epoch": 3.013221990257481, |
| "grad_norm": 7.149287223815918, |
| "learning_rate": 7.947112038970077e-06, |
| "loss": 0.2366, |
| "step": 4330 |
| }, |
| { |
| "epoch": 3.0201809324982603, |
| "grad_norm": 11.145984649658203, |
| "learning_rate": 7.91927627000696e-06, |
| "loss": 0.2427, |
| "step": 4340 |
| }, |
| { |
| "epoch": 3.0271398747390394, |
| "grad_norm": 14.64748764038086, |
| "learning_rate": 7.891440501043842e-06, |
| "loss": 0.2473, |
| "step": 4350 |
| }, |
| { |
| "epoch": 3.034098816979819, |
| "grad_norm": 13.893207550048828, |
| "learning_rate": 7.863604732080723e-06, |
| "loss": 0.2094, |
| "step": 4360 |
| }, |
| { |
| "epoch": 3.0410577592205983, |
| "grad_norm": 22.052799224853516, |
| "learning_rate": 7.835768963117608e-06, |
| "loss": 0.2436, |
| "step": 4370 |
| }, |
| { |
| "epoch": 3.048016701461378, |
| "grad_norm": 16.942176818847656, |
| "learning_rate": 7.807933194154489e-06, |
| "loss": 0.1888, |
| "step": 4380 |
| }, |
| { |
| "epoch": 3.054975643702157, |
| "grad_norm": 12.624246597290039, |
| "learning_rate": 7.780097425191372e-06, |
| "loss": 0.2503, |
| "step": 4390 |
| }, |
| { |
| "epoch": 3.0619345859429368, |
| "grad_norm": 6.612172603607178, |
| "learning_rate": 7.752261656228254e-06, |
| "loss": 0.2775, |
| "step": 4400 |
| }, |
| { |
| "epoch": 3.068893528183716, |
| "grad_norm": 7.194397449493408, |
| "learning_rate": 7.724425887265137e-06, |
| "loss": 0.2143, |
| "step": 4410 |
| }, |
| { |
| "epoch": 3.0758524704244956, |
| "grad_norm": 15.386282920837402, |
| "learning_rate": 7.696590118302018e-06, |
| "loss": 0.2393, |
| "step": 4420 |
| }, |
| { |
| "epoch": 3.082811412665275, |
| "grad_norm": 4.229943752288818, |
| "learning_rate": 7.668754349338901e-06, |
| "loss": 0.2499, |
| "step": 4430 |
| }, |
| { |
| "epoch": 3.0897703549060545, |
| "grad_norm": 7.80819845199585, |
| "learning_rate": 7.640918580375784e-06, |
| "loss": 0.2578, |
| "step": 4440 |
| }, |
| { |
| "epoch": 3.0967292971468336, |
| "grad_norm": 3.920732259750366, |
| "learning_rate": 7.6130828114126656e-06, |
| "loss": 0.2114, |
| "step": 4450 |
| }, |
| { |
| "epoch": 3.1036882393876133, |
| "grad_norm": 16.482385635375977, |
| "learning_rate": 7.585247042449548e-06, |
| "loss": 0.1836, |
| "step": 4460 |
| }, |
| { |
| "epoch": 3.1106471816283925, |
| "grad_norm": 10.486527442932129, |
| "learning_rate": 7.55741127348643e-06, |
| "loss": 0.2508, |
| "step": 4470 |
| }, |
| { |
| "epoch": 3.1176061238691717, |
| "grad_norm": 9.817858695983887, |
| "learning_rate": 7.529575504523313e-06, |
| "loss": 0.2671, |
| "step": 4480 |
| }, |
| { |
| "epoch": 3.1245650661099513, |
| "grad_norm": 7.070506572723389, |
| "learning_rate": 7.501739735560195e-06, |
| "loss": 0.2899, |
| "step": 4490 |
| }, |
| { |
| "epoch": 3.1315240083507305, |
| "grad_norm": 11.537872314453125, |
| "learning_rate": 7.473903966597078e-06, |
| "loss": 0.2668, |
| "step": 4500 |
| }, |
| { |
| "epoch": 3.13848295059151, |
| "grad_norm": 14.454391479492188, |
| "learning_rate": 7.44606819763396e-06, |
| "loss": 0.2487, |
| "step": 4510 |
| }, |
| { |
| "epoch": 3.1454418928322894, |
| "grad_norm": 12.986367225646973, |
| "learning_rate": 7.418232428670843e-06, |
| "loss": 0.3061, |
| "step": 4520 |
| }, |
| { |
| "epoch": 3.152400835073069, |
| "grad_norm": 21.419010162353516, |
| "learning_rate": 7.390396659707725e-06, |
| "loss": 0.2157, |
| "step": 4530 |
| }, |
| { |
| "epoch": 3.159359777313848, |
| "grad_norm": 19.896608352661133, |
| "learning_rate": 7.362560890744608e-06, |
| "loss": 0.3067, |
| "step": 4540 |
| }, |
| { |
| "epoch": 3.166318719554628, |
| "grad_norm": 12.328235626220703, |
| "learning_rate": 7.33472512178149e-06, |
| "loss": 0.2303, |
| "step": 4550 |
| }, |
| { |
| "epoch": 3.173277661795407, |
| "grad_norm": 10.288804054260254, |
| "learning_rate": 7.3068893528183725e-06, |
| "loss": 0.3152, |
| "step": 4560 |
| }, |
| { |
| "epoch": 3.1802366040361867, |
| "grad_norm": 7.457220077514648, |
| "learning_rate": 7.2790535838552544e-06, |
| "loss": 0.2319, |
| "step": 4570 |
| }, |
| { |
| "epoch": 3.187195546276966, |
| "grad_norm": 11.831998825073242, |
| "learning_rate": 7.251217814892137e-06, |
| "loss": 0.2369, |
| "step": 4580 |
| }, |
| { |
| "epoch": 3.1941544885177455, |
| "grad_norm": 8.234902381896973, |
| "learning_rate": 7.223382045929019e-06, |
| "loss": 0.1987, |
| "step": 4590 |
| }, |
| { |
| "epoch": 3.2011134307585247, |
| "grad_norm": 11.515932083129883, |
| "learning_rate": 7.195546276965901e-06, |
| "loss": 0.1955, |
| "step": 4600 |
| }, |
| { |
| "epoch": 3.208072372999304, |
| "grad_norm": 13.247298240661621, |
| "learning_rate": 7.167710508002785e-06, |
| "loss": 0.2123, |
| "step": 4610 |
| }, |
| { |
| "epoch": 3.2150313152400836, |
| "grad_norm": 9.564682006835938, |
| "learning_rate": 7.139874739039666e-06, |
| "loss": 0.2544, |
| "step": 4620 |
| }, |
| { |
| "epoch": 3.2219902574808628, |
| "grad_norm": 16.233783721923828, |
| "learning_rate": 7.112038970076549e-06, |
| "loss": 0.2659, |
| "step": 4630 |
| }, |
| { |
| "epoch": 3.2289491997216424, |
| "grad_norm": 6.909665107727051, |
| "learning_rate": 7.084203201113431e-06, |
| "loss": 0.1884, |
| "step": 4640 |
| }, |
| { |
| "epoch": 3.2359081419624216, |
| "grad_norm": 13.52547836303711, |
| "learning_rate": 7.056367432150314e-06, |
| "loss": 0.2759, |
| "step": 4650 |
| }, |
| { |
| "epoch": 3.2428670842032012, |
| "grad_norm": 10.246102333068848, |
| "learning_rate": 7.028531663187196e-06, |
| "loss": 0.2318, |
| "step": 4660 |
| }, |
| { |
| "epoch": 3.2498260264439804, |
| "grad_norm": 25.54823875427246, |
| "learning_rate": 7.000695894224079e-06, |
| "loss": 0.2425, |
| "step": 4670 |
| }, |
| { |
| "epoch": 3.25678496868476, |
| "grad_norm": 10.150367736816406, |
| "learning_rate": 6.9728601252609605e-06, |
| "loss": 0.2687, |
| "step": 4680 |
| }, |
| { |
| "epoch": 3.2637439109255393, |
| "grad_norm": 17.207233428955078, |
| "learning_rate": 6.945024356297843e-06, |
| "loss": 0.2855, |
| "step": 4690 |
| }, |
| { |
| "epoch": 3.270702853166319, |
| "grad_norm": 8.081562042236328, |
| "learning_rate": 6.917188587334725e-06, |
| "loss": 0.2238, |
| "step": 4700 |
| }, |
| { |
| "epoch": 3.277661795407098, |
| "grad_norm": 22.442302703857422, |
| "learning_rate": 6.889352818371609e-06, |
| "loss": 0.2874, |
| "step": 4710 |
| }, |
| { |
| "epoch": 3.2846207376478773, |
| "grad_norm": 5.26035213470459, |
| "learning_rate": 6.861517049408491e-06, |
| "loss": 0.2347, |
| "step": 4720 |
| }, |
| { |
| "epoch": 3.291579679888657, |
| "grad_norm": 11.35543155670166, |
| "learning_rate": 6.8336812804453735e-06, |
| "loss": 0.2517, |
| "step": 4730 |
| }, |
| { |
| "epoch": 3.298538622129436, |
| "grad_norm": 20.361177444458008, |
| "learning_rate": 6.805845511482255e-06, |
| "loss": 0.2306, |
| "step": 4740 |
| }, |
| { |
| "epoch": 3.305497564370216, |
| "grad_norm": 21.40257453918457, |
| "learning_rate": 6.778009742519137e-06, |
| "loss": 0.245, |
| "step": 4750 |
| }, |
| { |
| "epoch": 3.312456506610995, |
| "grad_norm": 15.335564613342285, |
| "learning_rate": 6.75017397355602e-06, |
| "loss": 0.2872, |
| "step": 4760 |
| }, |
| { |
| "epoch": 3.3194154488517746, |
| "grad_norm": 12.894388198852539, |
| "learning_rate": 6.722338204592902e-06, |
| "loss": 0.2023, |
| "step": 4770 |
| }, |
| { |
| "epoch": 3.326374391092554, |
| "grad_norm": 9.890000343322754, |
| "learning_rate": 6.694502435629785e-06, |
| "loss": 0.2154, |
| "step": 4780 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 9.852010726928711, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.2223, |
| "step": 4790 |
| }, |
| { |
| "epoch": 3.3402922755741127, |
| "grad_norm": 12.012703895568848, |
| "learning_rate": 6.638830897703549e-06, |
| "loss": 0.1917, |
| "step": 4800 |
| }, |
| { |
| "epoch": 3.3472512178148923, |
| "grad_norm": 6.725717544555664, |
| "learning_rate": 6.610995128740431e-06, |
| "loss": 0.2387, |
| "step": 4810 |
| }, |
| { |
| "epoch": 3.3542101600556715, |
| "grad_norm": 11.746057510375977, |
| "learning_rate": 6.583159359777315e-06, |
| "loss": 0.2588, |
| "step": 4820 |
| }, |
| { |
| "epoch": 3.3611691022964507, |
| "grad_norm": 15.1249418258667, |
| "learning_rate": 6.555323590814197e-06, |
| "loss": 0.2504, |
| "step": 4830 |
| }, |
| { |
| "epoch": 3.3681280445372304, |
| "grad_norm": 43.986305236816406, |
| "learning_rate": 6.52748782185108e-06, |
| "loss": 0.2518, |
| "step": 4840 |
| }, |
| { |
| "epoch": 3.3750869867780096, |
| "grad_norm": 6.751053810119629, |
| "learning_rate": 6.4996520528879615e-06, |
| "loss": 0.2555, |
| "step": 4850 |
| }, |
| { |
| "epoch": 3.382045929018789, |
| "grad_norm": 11.51275634765625, |
| "learning_rate": 6.471816283924844e-06, |
| "loss": 0.2098, |
| "step": 4860 |
| }, |
| { |
| "epoch": 3.3890048712595684, |
| "grad_norm": 28.333683013916016, |
| "learning_rate": 6.443980514961726e-06, |
| "loss": 0.2588, |
| "step": 4870 |
| }, |
| { |
| "epoch": 3.395963813500348, |
| "grad_norm": 13.346843719482422, |
| "learning_rate": 6.416144745998609e-06, |
| "loss": 0.2254, |
| "step": 4880 |
| }, |
| { |
| "epoch": 3.4029227557411272, |
| "grad_norm": 14.337092399597168, |
| "learning_rate": 6.388308977035491e-06, |
| "loss": 0.2666, |
| "step": 4890 |
| }, |
| { |
| "epoch": 3.409881697981907, |
| "grad_norm": 19.120765686035156, |
| "learning_rate": 6.360473208072373e-06, |
| "loss": 0.2897, |
| "step": 4900 |
| }, |
| { |
| "epoch": 3.416840640222686, |
| "grad_norm": 9.88152027130127, |
| "learning_rate": 6.332637439109256e-06, |
| "loss": 0.272, |
| "step": 4910 |
| }, |
| { |
| "epoch": 3.4237995824634657, |
| "grad_norm": 16.881410598754883, |
| "learning_rate": 6.304801670146138e-06, |
| "loss": 0.1939, |
| "step": 4920 |
| }, |
| { |
| "epoch": 3.430758524704245, |
| "grad_norm": 9.80156421661377, |
| "learning_rate": 6.276965901183021e-06, |
| "loss": 0.1824, |
| "step": 4930 |
| }, |
| { |
| "epoch": 3.437717466945024, |
| "grad_norm": 13.772383689880371, |
| "learning_rate": 6.249130132219903e-06, |
| "loss": 0.2546, |
| "step": 4940 |
| }, |
| { |
| "epoch": 3.4446764091858038, |
| "grad_norm": 15.60239028930664, |
| "learning_rate": 6.221294363256786e-06, |
| "loss": 0.287, |
| "step": 4950 |
| }, |
| { |
| "epoch": 3.4516353514265834, |
| "grad_norm": 13.885263442993164, |
| "learning_rate": 6.193458594293668e-06, |
| "loss": 0.3121, |
| "step": 4960 |
| }, |
| { |
| "epoch": 3.4585942936673626, |
| "grad_norm": 13.832782745361328, |
| "learning_rate": 6.16562282533055e-06, |
| "loss": 0.1614, |
| "step": 4970 |
| }, |
| { |
| "epoch": 3.465553235908142, |
| "grad_norm": 8.264083862304688, |
| "learning_rate": 6.137787056367432e-06, |
| "loss": 0.2014, |
| "step": 4980 |
| }, |
| { |
| "epoch": 3.4725121781489214, |
| "grad_norm": 10.630083084106445, |
| "learning_rate": 6.109951287404315e-06, |
| "loss": 0.198, |
| "step": 4990 |
| }, |
| { |
| "epoch": 3.4794711203897006, |
| "grad_norm": 12.914116859436035, |
| "learning_rate": 6.082115518441197e-06, |
| "loss": 0.27, |
| "step": 5000 |
| }, |
| { |
| "epoch": 3.4864300626304803, |
| "grad_norm": 9.667845726013184, |
| "learning_rate": 6.0542797494780806e-06, |
| "loss": 0.2419, |
| "step": 5010 |
| }, |
| { |
| "epoch": 3.4933890048712595, |
| "grad_norm": 12.074315071105957, |
| "learning_rate": 6.0264439805149625e-06, |
| "loss": 0.2857, |
| "step": 5020 |
| }, |
| { |
| "epoch": 3.500347947112039, |
| "grad_norm": 15.645792007446289, |
| "learning_rate": 5.998608211551845e-06, |
| "loss": 0.2042, |
| "step": 5030 |
| }, |
| { |
| "epoch": 3.5073068893528183, |
| "grad_norm": 9.472585678100586, |
| "learning_rate": 5.970772442588727e-06, |
| "loss": 0.1702, |
| "step": 5040 |
| }, |
| { |
| "epoch": 3.5142658315935975, |
| "grad_norm": 11.11557674407959, |
| "learning_rate": 5.942936673625609e-06, |
| "loss": 0.24, |
| "step": 5050 |
| }, |
| { |
| "epoch": 3.521224773834377, |
| "grad_norm": 6.724925518035889, |
| "learning_rate": 5.915100904662492e-06, |
| "loss": 0.2401, |
| "step": 5060 |
| }, |
| { |
| "epoch": 3.528183716075157, |
| "grad_norm": 32.468055725097656, |
| "learning_rate": 5.887265135699374e-06, |
| "loss": 0.1586, |
| "step": 5070 |
| }, |
| { |
| "epoch": 3.535142658315936, |
| "grad_norm": 19.15355110168457, |
| "learning_rate": 5.8594293667362565e-06, |
| "loss": 0.2534, |
| "step": 5080 |
| }, |
| { |
| "epoch": 3.542101600556715, |
| "grad_norm": 5.981065273284912, |
| "learning_rate": 5.831593597773138e-06, |
| "loss": 0.275, |
| "step": 5090 |
| }, |
| { |
| "epoch": 3.549060542797495, |
| "grad_norm": 13.749540328979492, |
| "learning_rate": 5.803757828810022e-06, |
| "loss": 0.2548, |
| "step": 5100 |
| }, |
| { |
| "epoch": 3.556019485038274, |
| "grad_norm": 11.47478199005127, |
| "learning_rate": 5.775922059846903e-06, |
| "loss": 0.2089, |
| "step": 5110 |
| }, |
| { |
| "epoch": 3.5629784272790537, |
| "grad_norm": 9.613821029663086, |
| "learning_rate": 5.748086290883787e-06, |
| "loss": 0.2008, |
| "step": 5120 |
| }, |
| { |
| "epoch": 3.569937369519833, |
| "grad_norm": 9.990856170654297, |
| "learning_rate": 5.7202505219206686e-06, |
| "loss": 0.1772, |
| "step": 5130 |
| }, |
| { |
| "epoch": 3.5768963117606125, |
| "grad_norm": 13.554731369018555, |
| "learning_rate": 5.692414752957551e-06, |
| "loss": 0.2308, |
| "step": 5140 |
| }, |
| { |
| "epoch": 3.5838552540013917, |
| "grad_norm": 13.590909004211426, |
| "learning_rate": 5.664578983994433e-06, |
| "loss": 0.1847, |
| "step": 5150 |
| }, |
| { |
| "epoch": 3.5908141962421714, |
| "grad_norm": 19.21977996826172, |
| "learning_rate": 5.636743215031316e-06, |
| "loss": 0.2994, |
| "step": 5160 |
| }, |
| { |
| "epoch": 3.5977731384829506, |
| "grad_norm": 12.067795753479004, |
| "learning_rate": 5.608907446068198e-06, |
| "loss": 0.2212, |
| "step": 5170 |
| }, |
| { |
| "epoch": 3.60473208072373, |
| "grad_norm": 23.037675857543945, |
| "learning_rate": 5.581071677105081e-06, |
| "loss": 0.2694, |
| "step": 5180 |
| }, |
| { |
| "epoch": 3.6116910229645094, |
| "grad_norm": 7.532259464263916, |
| "learning_rate": 5.553235908141963e-06, |
| "loss": 0.2941, |
| "step": 5190 |
| }, |
| { |
| "epoch": 3.6186499652052886, |
| "grad_norm": 10.377799987792969, |
| "learning_rate": 5.5254001391788445e-06, |
| "loss": 0.2152, |
| "step": 5200 |
| }, |
| { |
| "epoch": 3.6256089074460682, |
| "grad_norm": 7.491756916046143, |
| "learning_rate": 5.497564370215728e-06, |
| "loss": 0.2326, |
| "step": 5210 |
| }, |
| { |
| "epoch": 3.632567849686848, |
| "grad_norm": 13.305363655090332, |
| "learning_rate": 5.46972860125261e-06, |
| "loss": 0.1967, |
| "step": 5220 |
| }, |
| { |
| "epoch": 3.639526791927627, |
| "grad_norm": 8.822273254394531, |
| "learning_rate": 5.441892832289493e-06, |
| "loss": 0.2532, |
| "step": 5230 |
| }, |
| { |
| "epoch": 3.6464857341684063, |
| "grad_norm": 22.017900466918945, |
| "learning_rate": 5.414057063326375e-06, |
| "loss": 0.2643, |
| "step": 5240 |
| }, |
| { |
| "epoch": 3.653444676409186, |
| "grad_norm": 17.09214210510254, |
| "learning_rate": 5.3862212943632574e-06, |
| "loss": 0.2167, |
| "step": 5250 |
| }, |
| { |
| "epoch": 3.660403618649965, |
| "grad_norm": 14.493659973144531, |
| "learning_rate": 5.358385525400139e-06, |
| "loss": 0.2321, |
| "step": 5260 |
| }, |
| { |
| "epoch": 3.6673625608907447, |
| "grad_norm": 4.279122829437256, |
| "learning_rate": 5.330549756437022e-06, |
| "loss": 0.228, |
| "step": 5270 |
| }, |
| { |
| "epoch": 3.674321503131524, |
| "grad_norm": 4.165134429931641, |
| "learning_rate": 5.302713987473904e-06, |
| "loss": 0.2386, |
| "step": 5280 |
| }, |
| { |
| "epoch": 3.6812804453723036, |
| "grad_norm": 15.653182983398438, |
| "learning_rate": 5.274878218510787e-06, |
| "loss": 0.2298, |
| "step": 5290 |
| }, |
| { |
| "epoch": 3.688239387613083, |
| "grad_norm": 6.955724239349365, |
| "learning_rate": 5.247042449547669e-06, |
| "loss": 0.1807, |
| "step": 5300 |
| }, |
| { |
| "epoch": 3.695198329853862, |
| "grad_norm": 12.580881118774414, |
| "learning_rate": 5.219206680584552e-06, |
| "loss": 0.2604, |
| "step": 5310 |
| }, |
| { |
| "epoch": 3.7021572720946416, |
| "grad_norm": 9.050446510314941, |
| "learning_rate": 5.191370911621434e-06, |
| "loss": 0.2233, |
| "step": 5320 |
| }, |
| { |
| "epoch": 3.7091162143354213, |
| "grad_norm": 8.741286277770996, |
| "learning_rate": 5.163535142658317e-06, |
| "loss": 0.223, |
| "step": 5330 |
| }, |
| { |
| "epoch": 3.7160751565762005, |
| "grad_norm": 5.017666816711426, |
| "learning_rate": 5.135699373695199e-06, |
| "loss": 0.1976, |
| "step": 5340 |
| }, |
| { |
| "epoch": 3.7230340988169797, |
| "grad_norm": 15.6959228515625, |
| "learning_rate": 5.107863604732081e-06, |
| "loss": 0.2077, |
| "step": 5350 |
| }, |
| { |
| "epoch": 3.7299930410577593, |
| "grad_norm": 12.862638473510742, |
| "learning_rate": 5.0800278357689635e-06, |
| "loss": 0.1962, |
| "step": 5360 |
| }, |
| { |
| "epoch": 3.7369519832985385, |
| "grad_norm": 11.374602317810059, |
| "learning_rate": 5.0521920668058454e-06, |
| "loss": 0.2101, |
| "step": 5370 |
| }, |
| { |
| "epoch": 3.743910925539318, |
| "grad_norm": 25.180683135986328, |
| "learning_rate": 5.024356297842728e-06, |
| "loss": 0.2527, |
| "step": 5380 |
| }, |
| { |
| "epoch": 3.7508698677800973, |
| "grad_norm": 7.092601299285889, |
| "learning_rate": 4.996520528879611e-06, |
| "loss": 0.249, |
| "step": 5390 |
| }, |
| { |
| "epoch": 3.757828810020877, |
| "grad_norm": 12.866328239440918, |
| "learning_rate": 4.968684759916494e-06, |
| "loss": 0.1986, |
| "step": 5400 |
| }, |
| { |
| "epoch": 3.764787752261656, |
| "grad_norm": 15.909232139587402, |
| "learning_rate": 4.940848990953376e-06, |
| "loss": 0.2104, |
| "step": 5410 |
| }, |
| { |
| "epoch": 3.7717466945024354, |
| "grad_norm": 18.9605770111084, |
| "learning_rate": 4.9130132219902575e-06, |
| "loss": 0.2601, |
| "step": 5420 |
| }, |
| { |
| "epoch": 3.778705636743215, |
| "grad_norm": 21.599374771118164, |
| "learning_rate": 4.88517745302714e-06, |
| "loss": 0.222, |
| "step": 5430 |
| }, |
| { |
| "epoch": 3.7856645789839947, |
| "grad_norm": 16.166671752929688, |
| "learning_rate": 4.857341684064022e-06, |
| "loss": 0.2494, |
| "step": 5440 |
| }, |
| { |
| "epoch": 3.792623521224774, |
| "grad_norm": 5.08117151260376, |
| "learning_rate": 4.829505915100905e-06, |
| "loss": 0.2836, |
| "step": 5450 |
| }, |
| { |
| "epoch": 3.799582463465553, |
| "grad_norm": 20.023841857910156, |
| "learning_rate": 4.801670146137788e-06, |
| "loss": 0.2123, |
| "step": 5460 |
| }, |
| { |
| "epoch": 3.8065414057063327, |
| "grad_norm": 17.623476028442383, |
| "learning_rate": 4.77383437717467e-06, |
| "loss": 0.2018, |
| "step": 5470 |
| }, |
| { |
| "epoch": 3.813500347947112, |
| "grad_norm": 17.300357818603516, |
| "learning_rate": 4.745998608211552e-06, |
| "loss": 0.2031, |
| "step": 5480 |
| }, |
| { |
| "epoch": 3.8204592901878915, |
| "grad_norm": 19.605348587036133, |
| "learning_rate": 4.718162839248434e-06, |
| "loss": 0.1733, |
| "step": 5490 |
| }, |
| { |
| "epoch": 3.8274182324286707, |
| "grad_norm": 13.359166145324707, |
| "learning_rate": 4.690327070285317e-06, |
| "loss": 0.2236, |
| "step": 5500 |
| }, |
| { |
| "epoch": 3.8343771746694504, |
| "grad_norm": 22.73190689086914, |
| "learning_rate": 4.6624913013222e-06, |
| "loss": 0.2148, |
| "step": 5510 |
| }, |
| { |
| "epoch": 3.8413361169102296, |
| "grad_norm": 14.263452529907227, |
| "learning_rate": 4.634655532359082e-06, |
| "loss": 0.3008, |
| "step": 5520 |
| }, |
| { |
| "epoch": 3.848295059151009, |
| "grad_norm": 24.41339111328125, |
| "learning_rate": 4.6068197633959645e-06, |
| "loss": 0.2365, |
| "step": 5530 |
| }, |
| { |
| "epoch": 3.8552540013917884, |
| "grad_norm": 10.054245948791504, |
| "learning_rate": 4.578983994432846e-06, |
| "loss": 0.2201, |
| "step": 5540 |
| }, |
| { |
| "epoch": 3.862212943632568, |
| "grad_norm": 20.606000900268555, |
| "learning_rate": 4.551148225469729e-06, |
| "loss": 0.2901, |
| "step": 5550 |
| }, |
| { |
| "epoch": 3.8691718858733473, |
| "grad_norm": 8.361483573913574, |
| "learning_rate": 4.523312456506611e-06, |
| "loss": 0.2133, |
| "step": 5560 |
| }, |
| { |
| "epoch": 3.8761308281141265, |
| "grad_norm": 16.224584579467773, |
| "learning_rate": 4.495476687543494e-06, |
| "loss": 0.2648, |
| "step": 5570 |
| }, |
| { |
| "epoch": 3.883089770354906, |
| "grad_norm": 24.251644134521484, |
| "learning_rate": 4.467640918580376e-06, |
| "loss": 0.2505, |
| "step": 5580 |
| }, |
| { |
| "epoch": 3.8900487125956853, |
| "grad_norm": 10.059554100036621, |
| "learning_rate": 4.4398051496172585e-06, |
| "loss": 0.1715, |
| "step": 5590 |
| }, |
| { |
| "epoch": 3.897007654836465, |
| "grad_norm": 22.54900550842285, |
| "learning_rate": 4.41196938065414e-06, |
| "loss": 0.1525, |
| "step": 5600 |
| }, |
| { |
| "epoch": 3.903966597077244, |
| "grad_norm": 5.793692588806152, |
| "learning_rate": 4.384133611691023e-06, |
| "loss": 0.2345, |
| "step": 5610 |
| }, |
| { |
| "epoch": 3.910925539318024, |
| "grad_norm": 19.2098445892334, |
| "learning_rate": 4.356297842727906e-06, |
| "loss": 0.165, |
| "step": 5620 |
| }, |
| { |
| "epoch": 3.917884481558803, |
| "grad_norm": 11.589962005615234, |
| "learning_rate": 4.328462073764788e-06, |
| "loss": 0.2033, |
| "step": 5630 |
| }, |
| { |
| "epoch": 3.9248434237995826, |
| "grad_norm": 13.061795234680176, |
| "learning_rate": 4.300626304801671e-06, |
| "loss": 0.2517, |
| "step": 5640 |
| }, |
| { |
| "epoch": 3.931802366040362, |
| "grad_norm": 9.12142276763916, |
| "learning_rate": 4.272790535838553e-06, |
| "loss": 0.2068, |
| "step": 5650 |
| }, |
| { |
| "epoch": 3.9387613082811415, |
| "grad_norm": 14.601790428161621, |
| "learning_rate": 4.244954766875435e-06, |
| "loss": 0.2864, |
| "step": 5660 |
| }, |
| { |
| "epoch": 3.9457202505219207, |
| "grad_norm": 10.787036895751953, |
| "learning_rate": 4.217118997912318e-06, |
| "loss": 0.2286, |
| "step": 5670 |
| }, |
| { |
| "epoch": 3.9526791927627, |
| "grad_norm": 11.121417045593262, |
| "learning_rate": 4.1892832289492e-06, |
| "loss": 0.2385, |
| "step": 5680 |
| }, |
| { |
| "epoch": 3.9596381350034795, |
| "grad_norm": 11.553411483764648, |
| "learning_rate": 4.161447459986083e-06, |
| "loss": 0.2509, |
| "step": 5690 |
| }, |
| { |
| "epoch": 3.966597077244259, |
| "grad_norm": 12.59765625, |
| "learning_rate": 4.1336116910229655e-06, |
| "loss": 0.2478, |
| "step": 5700 |
| }, |
| { |
| "epoch": 3.9735560194850383, |
| "grad_norm": 10.153321266174316, |
| "learning_rate": 4.105775922059847e-06, |
| "loss": 0.2149, |
| "step": 5710 |
| }, |
| { |
| "epoch": 3.9805149617258175, |
| "grad_norm": 10.688750267028809, |
| "learning_rate": 4.077940153096729e-06, |
| "loss": 0.2423, |
| "step": 5720 |
| }, |
| { |
| "epoch": 3.987473903966597, |
| "grad_norm": 22.212329864501953, |
| "learning_rate": 4.050104384133612e-06, |
| "loss": 0.2626, |
| "step": 5730 |
| }, |
| { |
| "epoch": 3.9944328462073764, |
| "grad_norm": 7.783158302307129, |
| "learning_rate": 4.022268615170494e-06, |
| "loss": 0.2033, |
| "step": 5740 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.8193211488250652, |
| "eval_f1": 0.8180624199818877, |
| "eval_loss": 0.6137004494667053, |
| "eval_precision": 0.8174134517321099, |
| "eval_recall": 0.8193211488250652, |
| "eval_runtime": 30.262, |
| "eval_samples_per_second": 189.842, |
| "eval_steps_per_second": 5.948, |
| "step": 5748 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 7185, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.2096484874133504e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|