| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9912379178471235, |
| "eval_steps": 50000, |
| "global_step": 430000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0004610408920219179, |
| "grad_norm": 6.876699447631836, |
| "learning_rate": 4.997694795539891e-05, |
| "loss": 6.5362, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0009220817840438358, |
| "grad_norm": 5.820261478424072, |
| "learning_rate": 4.995389591079781e-05, |
| "loss": 2.9133, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.0013831226760657536, |
| "grad_norm": 5.954094886779785, |
| "learning_rate": 4.993084386619671e-05, |
| "loss": 2.3126, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.0018441635680876715, |
| "grad_norm": 8.361907005310059, |
| "learning_rate": 4.990779182159562e-05, |
| "loss": 2.1419, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.0023052044601095893, |
| "grad_norm": 3.7237906455993652, |
| "learning_rate": 4.9884739776994524e-05, |
| "loss": 2.0302, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.002766245352131507, |
| "grad_norm": 2.4539265632629395, |
| "learning_rate": 4.986168773239342e-05, |
| "loss": 1.9613, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.003227286244153425, |
| "grad_norm": 4.33223819732666, |
| "learning_rate": 4.983863568779233e-05, |
| "loss": 1.9683, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.003688327136175343, |
| "grad_norm": 5.55894136428833, |
| "learning_rate": 4.9815583643191234e-05, |
| "loss": 1.8365, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.004149368028197261, |
| "grad_norm": 2.5861189365386963, |
| "learning_rate": 4.979253159859014e-05, |
| "loss": 1.7759, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.0046104089202191785, |
| "grad_norm": 3.7939908504486084, |
| "learning_rate": 4.9769479553989046e-05, |
| "loss": 1.7706, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.005071449812241097, |
| "grad_norm": 6.777012825012207, |
| "learning_rate": 4.974642750938795e-05, |
| "loss": 1.8386, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.005532490704263014, |
| "grad_norm": 3.8171310424804688, |
| "learning_rate": 4.972337546478685e-05, |
| "loss": 1.7511, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.005993531596284933, |
| "grad_norm": 3.042823076248169, |
| "learning_rate": 4.9700323420185756e-05, |
| "loss": 1.7009, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.00645457248830685, |
| "grad_norm": 3.2283356189727783, |
| "learning_rate": 4.967727137558466e-05, |
| "loss": 1.6847, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.006915613380328769, |
| "grad_norm": 2.336369037628174, |
| "learning_rate": 4.965421933098357e-05, |
| "loss": 1.6153, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.007376654272350686, |
| "grad_norm": 7.53791618347168, |
| "learning_rate": 4.9631167286382466e-05, |
| "loss": 1.645, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.007837695164372604, |
| "grad_norm": 3.086069345474243, |
| "learning_rate": 4.960811524178137e-05, |
| "loss": 1.5648, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.008298736056394522, |
| "grad_norm": 4.105820178985596, |
| "learning_rate": 4.958506319718028e-05, |
| "loss": 1.5533, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.00875977694841644, |
| "grad_norm": 3.5181972980499268, |
| "learning_rate": 4.9562011152579176e-05, |
| "loss": 1.5467, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.009220817840438357, |
| "grad_norm": 3.0619754791259766, |
| "learning_rate": 4.953895910797808e-05, |
| "loss": 1.5904, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.009681858732460276, |
| "grad_norm": 3.566425085067749, |
| "learning_rate": 4.951590706337699e-05, |
| "loss": 1.4991, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.010142899624482194, |
| "grad_norm": 2.424494743347168, |
| "learning_rate": 4.949285501877589e-05, |
| "loss": 1.4342, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.010603940516504111, |
| "grad_norm": 2.841980218887329, |
| "learning_rate": 4.946980297417479e-05, |
| "loss": 1.4694, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.011064981408526029, |
| "grad_norm": 3.2722415924072266, |
| "learning_rate": 4.94467509295737e-05, |
| "loss": 1.4593, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.011526022300547946, |
| "grad_norm": 3.9442007541656494, |
| "learning_rate": 4.9423698884972604e-05, |
| "loss": 1.4766, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.011987063192569865, |
| "grad_norm": 3.1083531379699707, |
| "learning_rate": 4.940064684037151e-05, |
| "loss": 1.3964, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.012448104084591783, |
| "grad_norm": 2.1749391555786133, |
| "learning_rate": 4.9377594795770415e-05, |
| "loss": 1.4046, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.0129091449766137, |
| "grad_norm": 3.8060245513916016, |
| "learning_rate": 4.935454275116932e-05, |
| "loss": 1.4299, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.013370185868635618, |
| "grad_norm": 3.412968397140503, |
| "learning_rate": 4.9331490706568226e-05, |
| "loss": 1.4236, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.013831226760657537, |
| "grad_norm": 3.04774808883667, |
| "learning_rate": 4.9308438661967125e-05, |
| "loss": 1.44, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.014292267652679455, |
| "grad_norm": 4.18248176574707, |
| "learning_rate": 4.928538661736603e-05, |
| "loss": 1.3892, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.014753308544701372, |
| "grad_norm": 4.440049648284912, |
| "learning_rate": 4.9262334572764937e-05, |
| "loss": 1.3493, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.01521434943672329, |
| "grad_norm": 2.509575128555298, |
| "learning_rate": 4.9239282528163835e-05, |
| "loss": 1.409, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.01567539032874521, |
| "grad_norm": 3.4608070850372314, |
| "learning_rate": 4.921623048356274e-05, |
| "loss": 1.4013, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.016136431220767126, |
| "grad_norm": 6.1122331619262695, |
| "learning_rate": 4.919317843896165e-05, |
| "loss": 1.4161, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.016597472112789044, |
| "grad_norm": 2.507805824279785, |
| "learning_rate": 4.917012639436055e-05, |
| "loss": 1.3504, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.01705851300481096, |
| "grad_norm": 4.3215012550354, |
| "learning_rate": 4.914707434975945e-05, |
| "loss": 1.3324, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.01751955389683288, |
| "grad_norm": 2.7966041564941406, |
| "learning_rate": 4.912402230515836e-05, |
| "loss": 1.3049, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.017980594788854797, |
| "grad_norm": 2.308271884918213, |
| "learning_rate": 4.910097026055726e-05, |
| "loss": 1.4026, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.018441635680876714, |
| "grad_norm": 2.970160484313965, |
| "learning_rate": 4.907791821595616e-05, |
| "loss": 1.363, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.01890267657289863, |
| "grad_norm": 2.760039806365967, |
| "learning_rate": 4.905486617135507e-05, |
| "loss": 1.3202, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.019363717464920552, |
| "grad_norm": 3.558941125869751, |
| "learning_rate": 4.903181412675397e-05, |
| "loss": 1.3337, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.01982475835694247, |
| "grad_norm": 2.800959587097168, |
| "learning_rate": 4.900876208215288e-05, |
| "loss": 1.2881, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.020285799248964387, |
| "grad_norm": 3.2539665699005127, |
| "learning_rate": 4.8985710037551784e-05, |
| "loss": 1.2243, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.020746840140986305, |
| "grad_norm": 4.043671131134033, |
| "learning_rate": 4.896265799295069e-05, |
| "loss": 1.2051, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.021207881033008222, |
| "grad_norm": 1.6535764932632446, |
| "learning_rate": 4.8939605948349596e-05, |
| "loss": 1.1973, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.02166892192503014, |
| "grad_norm": 2.395977020263672, |
| "learning_rate": 4.8916553903748495e-05, |
| "loss": 1.2781, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.022129962817052058, |
| "grad_norm": 2.445537567138672, |
| "learning_rate": 4.88935018591474e-05, |
| "loss": 1.2479, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.022591003709073975, |
| "grad_norm": 2.0848629474639893, |
| "learning_rate": 4.8870449814546306e-05, |
| "loss": 1.2139, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.023052044601095893, |
| "grad_norm": 2.457559585571289, |
| "learning_rate": 4.8847397769945205e-05, |
| "loss": 1.1932, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.023513085493117813, |
| "grad_norm": 3.07852840423584, |
| "learning_rate": 4.882434572534411e-05, |
| "loss": 1.179, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.02397412638513973, |
| "grad_norm": 2.2961785793304443, |
| "learning_rate": 4.8801293680743016e-05, |
| "loss": 1.1815, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.02443516727716165, |
| "grad_norm": 1.7498642206192017, |
| "learning_rate": 4.877824163614192e-05, |
| "loss": 1.1654, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.024896208169183566, |
| "grad_norm": 1.7616724967956543, |
| "learning_rate": 4.875518959154082e-05, |
| "loss": 1.2226, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.025357249061205483, |
| "grad_norm": 1.761996865272522, |
| "learning_rate": 4.8732137546939726e-05, |
| "loss": 1.1674, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.0258182899532274, |
| "grad_norm": 2.531437635421753, |
| "learning_rate": 4.870908550233863e-05, |
| "loss": 1.1966, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.02627933084524932, |
| "grad_norm": 2.4334516525268555, |
| "learning_rate": 4.868603345773753e-05, |
| "loss": 1.1132, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.026740371737271236, |
| "grad_norm": 3.8797712326049805, |
| "learning_rate": 4.866298141313644e-05, |
| "loss": 1.1435, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.027201412629293154, |
| "grad_norm": 3.8280975818634033, |
| "learning_rate": 4.863992936853534e-05, |
| "loss": 1.1939, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.027662453521315074, |
| "grad_norm": 4.0427703857421875, |
| "learning_rate": 4.861687732393425e-05, |
| "loss": 1.0749, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.028123494413336992, |
| "grad_norm": 2.366419553756714, |
| "learning_rate": 4.8593825279333154e-05, |
| "loss": 1.1491, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.02858453530535891, |
| "grad_norm": 2.7581288814544678, |
| "learning_rate": 4.857077323473206e-05, |
| "loss": 1.1654, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.029045576197380827, |
| "grad_norm": 1.7824950218200684, |
| "learning_rate": 4.8547721190130965e-05, |
| "loss": 1.0653, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.029506617089402744, |
| "grad_norm": 3.288841485977173, |
| "learning_rate": 4.8524669145529864e-05, |
| "loss": 1.107, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.029967657981424662, |
| "grad_norm": 2.365614652633667, |
| "learning_rate": 4.850161710092877e-05, |
| "loss": 1.1595, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.03042869887344658, |
| "grad_norm": 3.3963623046875, |
| "learning_rate": 4.8478565056327675e-05, |
| "loss": 1.1216, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.030889739765468497, |
| "grad_norm": 2.7090468406677246, |
| "learning_rate": 4.845551301172658e-05, |
| "loss": 1.1002, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.03135078065749042, |
| "grad_norm": 2.3977348804473877, |
| "learning_rate": 4.843246096712548e-05, |
| "loss": 1.0468, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.031811821549512335, |
| "grad_norm": 1.6876981258392334, |
| "learning_rate": 4.8409408922524386e-05, |
| "loss": 1.0675, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.03227286244153425, |
| "grad_norm": 2.1002566814422607, |
| "learning_rate": 4.838635687792329e-05, |
| "loss": 1.0378, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.03273390333355617, |
| "grad_norm": 1.5463937520980835, |
| "learning_rate": 4.836330483332219e-05, |
| "loss": 1.0406, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.03319494422557809, |
| "grad_norm": 2.2645134925842285, |
| "learning_rate": 4.8340252788721096e-05, |
| "loss": 1.118, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.033655985117600005, |
| "grad_norm": 4.435282230377197, |
| "learning_rate": 4.831720074412e-05, |
| "loss": 1.0542, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.03411702600962192, |
| "grad_norm": 2.544870615005493, |
| "learning_rate": 4.829414869951891e-05, |
| "loss": 1.0016, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.03457806690164384, |
| "grad_norm": 1.6866127252578735, |
| "learning_rate": 4.8271096654917806e-05, |
| "loss": 1.0184, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.03503910779366576, |
| "grad_norm": 2.1351208686828613, |
| "learning_rate": 4.824804461031671e-05, |
| "loss": 1.0695, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.035500148685687676, |
| "grad_norm": 2.4183170795440674, |
| "learning_rate": 4.8224992565715624e-05, |
| "loss": 1.0519, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.03596118957770959, |
| "grad_norm": 3.3186428546905518, |
| "learning_rate": 4.820194052111452e-05, |
| "loss": 1.0147, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.03642223046973151, |
| "grad_norm": 2.0496957302093506, |
| "learning_rate": 4.817888847651343e-05, |
| "loss": 1.054, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.03688327136175343, |
| "grad_norm": 2.232973575592041, |
| "learning_rate": 4.8155836431912334e-05, |
| "loss": 1.0129, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.037344312253775346, |
| "grad_norm": 2.1059677600860596, |
| "learning_rate": 4.813278438731123e-05, |
| "loss": 1.0563, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.03780535314579726, |
| "grad_norm": 3.6700918674468994, |
| "learning_rate": 4.810973234271014e-05, |
| "loss": 1.0314, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.03826639403781919, |
| "grad_norm": 2.078857421875, |
| "learning_rate": 4.8086680298109045e-05, |
| "loss": 1.0505, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.038727434929841105, |
| "grad_norm": 3.156902551651001, |
| "learning_rate": 4.806362825350795e-05, |
| "loss": 0.9496, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.03918847582186302, |
| "grad_norm": 5.415482044219971, |
| "learning_rate": 4.804057620890685e-05, |
| "loss": 1.0288, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.03964951671388494, |
| "grad_norm": 1.94370698928833, |
| "learning_rate": 4.8017524164305755e-05, |
| "loss": 1.0689, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.04011055760590686, |
| "grad_norm": 5.076870441436768, |
| "learning_rate": 4.799447211970466e-05, |
| "loss": 0.9809, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.040571598497928775, |
| "grad_norm": 1.5371116399765015, |
| "learning_rate": 4.797142007510356e-05, |
| "loss": 0.9868, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.04103263938995069, |
| "grad_norm": 3.8730454444885254, |
| "learning_rate": 4.7948368030502465e-05, |
| "loss": 0.9093, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.04149368028197261, |
| "grad_norm": 2.304157257080078, |
| "learning_rate": 4.792531598590137e-05, |
| "loss": 0.9993, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.04195472117399453, |
| "grad_norm": 3.116572856903076, |
| "learning_rate": 4.7902263941300277e-05, |
| "loss": 1.0456, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.042415762066016445, |
| "grad_norm": 2.1732380390167236, |
| "learning_rate": 4.7879211896699175e-05, |
| "loss": 0.9607, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.04287680295803836, |
| "grad_norm": 3.363409996032715, |
| "learning_rate": 4.785615985209808e-05, |
| "loss": 1.0467, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.04333784385006028, |
| "grad_norm": 3.746406078338623, |
| "learning_rate": 4.7833107807496994e-05, |
| "loss": 0.9335, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.0437988847420822, |
| "grad_norm": 1.5440335273742676, |
| "learning_rate": 4.781005576289589e-05, |
| "loss": 0.9262, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.044259925634104115, |
| "grad_norm": 3.108581066131592, |
| "learning_rate": 4.77870037182948e-05, |
| "loss": 0.9564, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.04472096652612603, |
| "grad_norm": 2.0899717807769775, |
| "learning_rate": 4.7763951673693704e-05, |
| "loss": 0.9544, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.04518200741814795, |
| "grad_norm": 2.250314474105835, |
| "learning_rate": 4.77408996290926e-05, |
| "loss": 0.9592, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.04564304831016987, |
| "grad_norm": 1.656875729560852, |
| "learning_rate": 4.771784758449151e-05, |
| "loss": 0.9054, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.046104089202191785, |
| "grad_norm": 2.7267401218414307, |
| "learning_rate": 4.7694795539890414e-05, |
| "loss": 0.932, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.04656513009421371, |
| "grad_norm": 1.7133885622024536, |
| "learning_rate": 4.767174349528932e-05, |
| "loss": 0.9014, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.04702617098623563, |
| "grad_norm": 1.699610710144043, |
| "learning_rate": 4.764869145068822e-05, |
| "loss": 0.9469, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.047487211878257544, |
| "grad_norm": 2.0547351837158203, |
| "learning_rate": 4.7625639406087124e-05, |
| "loss": 0.911, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.04794825277027946, |
| "grad_norm": 2.4188601970672607, |
| "learning_rate": 4.760258736148603e-05, |
| "loss": 0.9195, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.04840929366230138, |
| "grad_norm": 1.6855781078338623, |
| "learning_rate": 4.757953531688493e-05, |
| "loss": 0.9212, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.0488703345543233, |
| "grad_norm": 3.0659608840942383, |
| "learning_rate": 4.7556483272283835e-05, |
| "loss": 0.9408, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.049331375446345214, |
| "grad_norm": 1.8149137496948242, |
| "learning_rate": 4.753343122768274e-05, |
| "loss": 0.8916, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.04979241633836713, |
| "grad_norm": 3.0508041381835938, |
| "learning_rate": 4.7510379183081646e-05, |
| "loss": 0.9197, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.05025345723038905, |
| "grad_norm": 1.7217645645141602, |
| "learning_rate": 4.7487327138480545e-05, |
| "loss": 0.977, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.05071449812241097, |
| "grad_norm": 2.8696203231811523, |
| "learning_rate": 4.746427509387945e-05, |
| "loss": 0.8998, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.051175539014432884, |
| "grad_norm": 2.4317526817321777, |
| "learning_rate": 4.744122304927836e-05, |
| "loss": 0.895, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.0516365799064548, |
| "grad_norm": 1.4486078023910522, |
| "learning_rate": 4.741817100467726e-05, |
| "loss": 0.8794, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.05209762079847672, |
| "grad_norm": 2.6138267517089844, |
| "learning_rate": 4.739511896007617e-05, |
| "loss": 0.906, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.05255866169049864, |
| "grad_norm": 1.7025116682052612, |
| "learning_rate": 4.737206691547507e-05, |
| "loss": 0.9726, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.053019702582520555, |
| "grad_norm": 1.7836490869522095, |
| "learning_rate": 4.734901487087398e-05, |
| "loss": 0.8891, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.05348074347454247, |
| "grad_norm": 2.3155412673950195, |
| "learning_rate": 4.732596282627288e-05, |
| "loss": 0.8737, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.05394178436656439, |
| "grad_norm": 3.3721256256103516, |
| "learning_rate": 4.7302910781671783e-05, |
| "loss": 0.9199, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.05440282525858631, |
| "grad_norm": 1.807015061378479, |
| "learning_rate": 4.727985873707069e-05, |
| "loss": 0.9182, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.05486386615060823, |
| "grad_norm": 2.1808011531829834, |
| "learning_rate": 4.725680669246959e-05, |
| "loss": 0.9268, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.05532490704263015, |
| "grad_norm": 3.614689350128174, |
| "learning_rate": 4.7233754647868494e-05, |
| "loss": 0.8777, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.055785947934652066, |
| "grad_norm": 1.881955623626709, |
| "learning_rate": 4.72107026032674e-05, |
| "loss": 0.8564, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.056246988826673984, |
| "grad_norm": 2.8941149711608887, |
| "learning_rate": 4.7187650558666305e-05, |
| "loss": 0.8296, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.0567080297186959, |
| "grad_norm": 1.4242929220199585, |
| "learning_rate": 4.7164598514065204e-05, |
| "loss": 0.7617, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.05716907061071782, |
| "grad_norm": 4.67744255065918, |
| "learning_rate": 4.714154646946411e-05, |
| "loss": 0.9068, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.057630111502739736, |
| "grad_norm": 2.4979476928710938, |
| "learning_rate": 4.7118494424863015e-05, |
| "loss": 0.8382, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.058091152394761654, |
| "grad_norm": 2.030360460281372, |
| "learning_rate": 4.7095442380261914e-05, |
| "loss": 0.8524, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.05855219328678357, |
| "grad_norm": 2.282217502593994, |
| "learning_rate": 4.707239033566083e-05, |
| "loss": 0.8889, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.05901323417880549, |
| "grad_norm": 1.7318954467773438, |
| "learning_rate": 4.704933829105973e-05, |
| "loss": 0.9187, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.059474275070827406, |
| "grad_norm": 1.8734960556030273, |
| "learning_rate": 4.702628624645863e-05, |
| "loss": 0.7524, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.059935315962849324, |
| "grad_norm": 2.359909772872925, |
| "learning_rate": 4.700323420185754e-05, |
| "loss": 0.8388, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.06039635685487124, |
| "grad_norm": 1.9811803102493286, |
| "learning_rate": 4.698018215725644e-05, |
| "loss": 0.8994, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.06085739774689316, |
| "grad_norm": 1.4820138216018677, |
| "learning_rate": 4.695713011265535e-05, |
| "loss": 0.8098, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.061318438638915077, |
| "grad_norm": 3.8355236053466797, |
| "learning_rate": 4.693407806805425e-05, |
| "loss": 0.8365, |
| "step": 26600 |
| }, |
| { |
| "epoch": 0.061779479530936994, |
| "grad_norm": 1.9260506629943848, |
| "learning_rate": 4.691102602345315e-05, |
| "loss": 0.8508, |
| "step": 26800 |
| }, |
| { |
| "epoch": 0.06224052042295891, |
| "grad_norm": 1.6395090818405151, |
| "learning_rate": 4.688797397885206e-05, |
| "loss": 0.8537, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.06270156131498084, |
| "grad_norm": 0.8920634984970093, |
| "learning_rate": 4.686492193425096e-05, |
| "loss": 0.857, |
| "step": 27200 |
| }, |
| { |
| "epoch": 0.06316260220700275, |
| "grad_norm": 2.063812494277954, |
| "learning_rate": 4.684186988964986e-05, |
| "loss": 0.835, |
| "step": 27400 |
| }, |
| { |
| "epoch": 0.06362364309902467, |
| "grad_norm": 1.6132125854492188, |
| "learning_rate": 4.681881784504877e-05, |
| "loss": 0.8629, |
| "step": 27600 |
| }, |
| { |
| "epoch": 0.06408468399104658, |
| "grad_norm": 1.4121313095092773, |
| "learning_rate": 4.6795765800447674e-05, |
| "loss": 0.8026, |
| "step": 27800 |
| }, |
| { |
| "epoch": 0.0645457248830685, |
| "grad_norm": 1.3277547359466553, |
| "learning_rate": 4.677271375584657e-05, |
| "loss": 0.8315, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.06500676577509042, |
| "grad_norm": 1.51455819606781, |
| "learning_rate": 4.674966171124548e-05, |
| "loss": 0.8664, |
| "step": 28200 |
| }, |
| { |
| "epoch": 0.06546780666711234, |
| "grad_norm": 16.388320922851562, |
| "learning_rate": 4.6726609666644385e-05, |
| "loss": 0.8476, |
| "step": 28400 |
| }, |
| { |
| "epoch": 0.06592884755913425, |
| "grad_norm": 2.9211268424987793, |
| "learning_rate": 4.6703557622043284e-05, |
| "loss": 0.9153, |
| "step": 28600 |
| }, |
| { |
| "epoch": 0.06638988845115618, |
| "grad_norm": 1.7601099014282227, |
| "learning_rate": 4.6680505577442196e-05, |
| "loss": 0.8675, |
| "step": 28800 |
| }, |
| { |
| "epoch": 0.06685092934317809, |
| "grad_norm": 3.691970109939575, |
| "learning_rate": 4.66574535328411e-05, |
| "loss": 0.7636, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.06731197023520001, |
| "grad_norm": 0.8975255489349365, |
| "learning_rate": 4.663440148824e-05, |
| "loss": 0.7958, |
| "step": 29200 |
| }, |
| { |
| "epoch": 0.06777301112722194, |
| "grad_norm": 1.8996187448501587, |
| "learning_rate": 4.6611349443638906e-05, |
| "loss": 0.7784, |
| "step": 29400 |
| }, |
| { |
| "epoch": 0.06823405201924385, |
| "grad_norm": 2.2210590839385986, |
| "learning_rate": 4.658829739903781e-05, |
| "loss": 0.8002, |
| "step": 29600 |
| }, |
| { |
| "epoch": 0.06869509291126577, |
| "grad_norm": 2.341740369796753, |
| "learning_rate": 4.656524535443672e-05, |
| "loss": 0.778, |
| "step": 29800 |
| }, |
| { |
| "epoch": 0.06915613380328768, |
| "grad_norm": 2.567145347595215, |
| "learning_rate": 4.6542193309835617e-05, |
| "loss": 0.8061, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.0696171746953096, |
| "grad_norm": 0.8251648545265198, |
| "learning_rate": 4.651914126523452e-05, |
| "loss": 0.8163, |
| "step": 30200 |
| }, |
| { |
| "epoch": 0.07007821558733152, |
| "grad_norm": 3.3493523597717285, |
| "learning_rate": 4.649608922063343e-05, |
| "loss": 0.7326, |
| "step": 30400 |
| }, |
| { |
| "epoch": 0.07053925647935344, |
| "grad_norm": 3.1266913414001465, |
| "learning_rate": 4.6473037176032334e-05, |
| "loss": 0.7925, |
| "step": 30600 |
| }, |
| { |
| "epoch": 0.07100029737137535, |
| "grad_norm": 1.5895702838897705, |
| "learning_rate": 4.644998513143123e-05, |
| "loss": 0.7932, |
| "step": 30800 |
| }, |
| { |
| "epoch": 0.07146133826339728, |
| "grad_norm": 1.4103891849517822, |
| "learning_rate": 4.642693308683014e-05, |
| "loss": 0.7583, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.07192237915541919, |
| "grad_norm": 1.4762630462646484, |
| "learning_rate": 4.6403881042229044e-05, |
| "loss": 0.7364, |
| "step": 31200 |
| }, |
| { |
| "epoch": 0.07238342004744111, |
| "grad_norm": 1.4868961572647095, |
| "learning_rate": 4.638082899762794e-05, |
| "loss": 0.7874, |
| "step": 31400 |
| }, |
| { |
| "epoch": 0.07284446093946302, |
| "grad_norm": 1.9157131910324097, |
| "learning_rate": 4.635777695302685e-05, |
| "loss": 0.7669, |
| "step": 31600 |
| }, |
| { |
| "epoch": 0.07330550183148495, |
| "grad_norm": 1.249605417251587, |
| "learning_rate": 4.6334724908425754e-05, |
| "loss": 0.7711, |
| "step": 31800 |
| }, |
| { |
| "epoch": 0.07376654272350686, |
| "grad_norm": 2.004805326461792, |
| "learning_rate": 4.631167286382466e-05, |
| "loss": 0.7518, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.07422758361552878, |
| "grad_norm": 1.682356595993042, |
| "learning_rate": 4.6288620819223565e-05, |
| "loss": 0.7534, |
| "step": 32200 |
| }, |
| { |
| "epoch": 0.07468862450755069, |
| "grad_norm": 2.679586887359619, |
| "learning_rate": 4.626556877462247e-05, |
| "loss": 0.8238, |
| "step": 32400 |
| }, |
| { |
| "epoch": 0.07514966539957262, |
| "grad_norm": 1.364603042602539, |
| "learning_rate": 4.624251673002138e-05, |
| "loss": 0.8159, |
| "step": 32600 |
| }, |
| { |
| "epoch": 0.07561070629159453, |
| "grad_norm": 2.2733583450317383, |
| "learning_rate": 4.6219464685420276e-05, |
| "loss": 0.7822, |
| "step": 32800 |
| }, |
| { |
| "epoch": 0.07607174718361645, |
| "grad_norm": 2.5104455947875977, |
| "learning_rate": 4.619641264081918e-05, |
| "loss": 0.7664, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.07653278807563837, |
| "grad_norm": 1.4707565307617188, |
| "learning_rate": 4.617336059621809e-05, |
| "loss": 0.7817, |
| "step": 33200 |
| }, |
| { |
| "epoch": 0.07699382896766029, |
| "grad_norm": 1.9409255981445312, |
| "learning_rate": 4.6150308551616986e-05, |
| "loss": 0.8272, |
| "step": 33400 |
| }, |
| { |
| "epoch": 0.07745486985968221, |
| "grad_norm": 1.9460760354995728, |
| "learning_rate": 4.612725650701589e-05, |
| "loss": 0.815, |
| "step": 33600 |
| }, |
| { |
| "epoch": 0.07791591075170412, |
| "grad_norm": 2.3821299076080322, |
| "learning_rate": 4.61042044624148e-05, |
| "loss": 0.7747, |
| "step": 33800 |
| }, |
| { |
| "epoch": 0.07837695164372604, |
| "grad_norm": 1.8464001417160034, |
| "learning_rate": 4.60811524178137e-05, |
| "loss": 0.7529, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.07883799253574796, |
| "grad_norm": 2.189345121383667, |
| "learning_rate": 4.60581003732126e-05, |
| "loss": 0.7485, |
| "step": 34200 |
| }, |
| { |
| "epoch": 0.07929903342776988, |
| "grad_norm": 1.4213758707046509, |
| "learning_rate": 4.603504832861151e-05, |
| "loss": 0.7748, |
| "step": 34400 |
| }, |
| { |
| "epoch": 0.07976007431979179, |
| "grad_norm": 1.6908587217330933, |
| "learning_rate": 4.601199628401041e-05, |
| "loss": 0.7975, |
| "step": 34600 |
| }, |
| { |
| "epoch": 0.08022111521181371, |
| "grad_norm": 1.0378413200378418, |
| "learning_rate": 4.598894423940931e-05, |
| "loss": 0.7697, |
| "step": 34800 |
| }, |
| { |
| "epoch": 0.08068215610383563, |
| "grad_norm": 1.9026026725769043, |
| "learning_rate": 4.596589219480822e-05, |
| "loss": 0.7898, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.08114319699585755, |
| "grad_norm": 3.3741543292999268, |
| "learning_rate": 4.5942840150207123e-05, |
| "loss": 0.7273, |
| "step": 35200 |
| }, |
| { |
| "epoch": 0.08160423788787946, |
| "grad_norm": 1.1691900491714478, |
| "learning_rate": 4.591978810560603e-05, |
| "loss": 0.8134, |
| "step": 35400 |
| }, |
| { |
| "epoch": 0.08206527877990138, |
| "grad_norm": 1.40901780128479, |
| "learning_rate": 4.5896736061004935e-05, |
| "loss": 0.7824, |
| "step": 35600 |
| }, |
| { |
| "epoch": 0.0825263196719233, |
| "grad_norm": 2.224029064178467, |
| "learning_rate": 4.587368401640384e-05, |
| "loss": 0.7151, |
| "step": 35800 |
| }, |
| { |
| "epoch": 0.08298736056394522, |
| "grad_norm": 2.2175581455230713, |
| "learning_rate": 4.5850631971802746e-05, |
| "loss": 0.7764, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.08344840145596713, |
| "grad_norm": 1.4262895584106445, |
| "learning_rate": 4.5827579927201645e-05, |
| "loss": 0.8068, |
| "step": 36200 |
| }, |
| { |
| "epoch": 0.08390944234798905, |
| "grad_norm": 1.3810303211212158, |
| "learning_rate": 4.580452788260055e-05, |
| "loss": 0.7415, |
| "step": 36400 |
| }, |
| { |
| "epoch": 0.08437048324001098, |
| "grad_norm": 1.2411589622497559, |
| "learning_rate": 4.5781475837999456e-05, |
| "loss": 0.7406, |
| "step": 36600 |
| }, |
| { |
| "epoch": 0.08483152413203289, |
| "grad_norm": 1.8816428184509277, |
| "learning_rate": 4.5758423793398355e-05, |
| "loss": 0.7843, |
| "step": 36800 |
| }, |
| { |
| "epoch": 0.08529256502405481, |
| "grad_norm": 0.910955548286438, |
| "learning_rate": 4.573537174879726e-05, |
| "loss": 0.7791, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.08575360591607673, |
| "grad_norm": 2.3837499618530273, |
| "learning_rate": 4.571231970419617e-05, |
| "loss": 0.7272, |
| "step": 37200 |
| }, |
| { |
| "epoch": 0.08621464680809865, |
| "grad_norm": 1.2091758251190186, |
| "learning_rate": 4.568926765959507e-05, |
| "loss": 0.7586, |
| "step": 37400 |
| }, |
| { |
| "epoch": 0.08667568770012056, |
| "grad_norm": 2.031092643737793, |
| "learning_rate": 4.566621561499397e-05, |
| "loss": 0.7467, |
| "step": 37600 |
| }, |
| { |
| "epoch": 0.08713672859214248, |
| "grad_norm": 1.8834586143493652, |
| "learning_rate": 4.564316357039288e-05, |
| "loss": 0.7743, |
| "step": 37800 |
| }, |
| { |
| "epoch": 0.0875977694841644, |
| "grad_norm": 4.032400131225586, |
| "learning_rate": 4.562011152579178e-05, |
| "loss": 0.7581, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.08805881037618632, |
| "grad_norm": 1.3809504508972168, |
| "learning_rate": 4.559705948119069e-05, |
| "loss": 0.7752, |
| "step": 38200 |
| }, |
| { |
| "epoch": 0.08851985126820823, |
| "grad_norm": 2.5716593265533447, |
| "learning_rate": 4.557400743658959e-05, |
| "loss": 0.7599, |
| "step": 38400 |
| }, |
| { |
| "epoch": 0.08898089216023015, |
| "grad_norm": 1.1471354961395264, |
| "learning_rate": 4.555095539198849e-05, |
| "loss": 0.7802, |
| "step": 38600 |
| }, |
| { |
| "epoch": 0.08944193305225207, |
| "grad_norm": 1.757161259651184, |
| "learning_rate": 4.55279033473874e-05, |
| "loss": 0.7891, |
| "step": 38800 |
| }, |
| { |
| "epoch": 0.08990297394427399, |
| "grad_norm": 1.920569896697998, |
| "learning_rate": 4.5504851302786304e-05, |
| "loss": 0.833, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.0903640148362959, |
| "grad_norm": 1.7894421815872192, |
| "learning_rate": 4.548179925818521e-05, |
| "loss": 0.7353, |
| "step": 39200 |
| }, |
| { |
| "epoch": 0.09082505572831782, |
| "grad_norm": 1.6656538248062134, |
| "learning_rate": 4.5458747213584115e-05, |
| "loss": 0.7091, |
| "step": 39400 |
| }, |
| { |
| "epoch": 0.09128609662033974, |
| "grad_norm": 2.3881382942199707, |
| "learning_rate": 4.5435695168983014e-05, |
| "loss": 0.7609, |
| "step": 39600 |
| }, |
| { |
| "epoch": 0.09174713751236166, |
| "grad_norm": 2.9305579662323, |
| "learning_rate": 4.541264312438192e-05, |
| "loss": 0.785, |
| "step": 39800 |
| }, |
| { |
| "epoch": 0.09220817840438357, |
| "grad_norm": 1.734604835510254, |
| "learning_rate": 4.5389591079780826e-05, |
| "loss": 0.8049, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.0926692192964055, |
| "grad_norm": 2.1614363193511963, |
| "learning_rate": 4.536653903517973e-05, |
| "loss": 0.7618, |
| "step": 40200 |
| }, |
| { |
| "epoch": 0.09313026018842742, |
| "grad_norm": 1.1229090690612793, |
| "learning_rate": 4.534348699057863e-05, |
| "loss": 0.7517, |
| "step": 40400 |
| }, |
| { |
| "epoch": 0.09359130108044933, |
| "grad_norm": 2.0106265544891357, |
| "learning_rate": 4.5320434945977536e-05, |
| "loss": 0.7922, |
| "step": 40600 |
| }, |
| { |
| "epoch": 0.09405234197247125, |
| "grad_norm": 2.5871689319610596, |
| "learning_rate": 4.529738290137644e-05, |
| "loss": 0.6778, |
| "step": 40800 |
| }, |
| { |
| "epoch": 0.09451338286449316, |
| "grad_norm": 1.3384044170379639, |
| "learning_rate": 4.527433085677534e-05, |
| "loss": 0.8056, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.09497442375651509, |
| "grad_norm": 2.1800479888916016, |
| "learning_rate": 4.5251278812174246e-05, |
| "loss": 0.771, |
| "step": 41200 |
| }, |
| { |
| "epoch": 0.095435464648537, |
| "grad_norm": 2.0507094860076904, |
| "learning_rate": 4.522822676757315e-05, |
| "loss": 0.758, |
| "step": 41400 |
| }, |
| { |
| "epoch": 0.09589650554055892, |
| "grad_norm": 0.8887900710105896, |
| "learning_rate": 4.520517472297206e-05, |
| "loss": 0.7563, |
| "step": 41600 |
| }, |
| { |
| "epoch": 0.09635754643258083, |
| "grad_norm": 2.479279041290283, |
| "learning_rate": 4.5182122678370956e-05, |
| "loss": 0.7284, |
| "step": 41800 |
| }, |
| { |
| "epoch": 0.09681858732460276, |
| "grad_norm": 1.2137857675552368, |
| "learning_rate": 4.515907063376986e-05, |
| "loss": 0.7773, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.09727962821662467, |
| "grad_norm": 1.0529214143753052, |
| "learning_rate": 4.513601858916877e-05, |
| "loss": 0.7663, |
| "step": 42200 |
| }, |
| { |
| "epoch": 0.0977406691086466, |
| "grad_norm": 1.7939465045928955, |
| "learning_rate": 4.5112966544567674e-05, |
| "loss": 0.7695, |
| "step": 42400 |
| }, |
| { |
| "epoch": 0.0982017100006685, |
| "grad_norm": 0.4527842104434967, |
| "learning_rate": 4.508991449996658e-05, |
| "loss": 0.7037, |
| "step": 42600 |
| }, |
| { |
| "epoch": 0.09866275089269043, |
| "grad_norm": 1.5540140867233276, |
| "learning_rate": 4.5066862455365485e-05, |
| "loss": 0.7478, |
| "step": 42800 |
| }, |
| { |
| "epoch": 0.09912379178471234, |
| "grad_norm": 1.9301183223724365, |
| "learning_rate": 4.5043810410764384e-05, |
| "loss": 0.7098, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.09958483267673426, |
| "grad_norm": 2.3165171146392822, |
| "learning_rate": 4.502075836616329e-05, |
| "loss": 0.7192, |
| "step": 43200 |
| }, |
| { |
| "epoch": 0.10004587356875617, |
| "grad_norm": 2.4089784622192383, |
| "learning_rate": 4.4997706321562195e-05, |
| "loss": 0.7016, |
| "step": 43400 |
| }, |
| { |
| "epoch": 0.1005069144607781, |
| "grad_norm": 1.5298134088516235, |
| "learning_rate": 4.49746542769611e-05, |
| "loss": 0.7295, |
| "step": 43600 |
| }, |
| { |
| "epoch": 0.10096795535280002, |
| "grad_norm": 1.7216567993164062, |
| "learning_rate": 4.495160223236e-05, |
| "loss": 0.7603, |
| "step": 43800 |
| }, |
| { |
| "epoch": 0.10142899624482193, |
| "grad_norm": 2.678551435470581, |
| "learning_rate": 4.4928550187758905e-05, |
| "loss": 0.7225, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.10189003713684386, |
| "grad_norm": 2.051182985305786, |
| "learning_rate": 4.490549814315781e-05, |
| "loss": 0.7398, |
| "step": 44200 |
| }, |
| { |
| "epoch": 0.10235107802886577, |
| "grad_norm": 1.0527026653289795, |
| "learning_rate": 4.488244609855671e-05, |
| "loss": 0.7041, |
| "step": 44400 |
| }, |
| { |
| "epoch": 0.1028121189208877, |
| "grad_norm": 2.363438367843628, |
| "learning_rate": 4.4859394053955616e-05, |
| "loss": 0.7273, |
| "step": 44600 |
| }, |
| { |
| "epoch": 0.1032731598129096, |
| "grad_norm": 3.6583263874053955, |
| "learning_rate": 4.483634200935452e-05, |
| "loss": 0.7321, |
| "step": 44800 |
| }, |
| { |
| "epoch": 0.10373420070493153, |
| "grad_norm": 1.391920804977417, |
| "learning_rate": 4.481328996475343e-05, |
| "loss": 0.7498, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.10419524159695344, |
| "grad_norm": 1.3391286134719849, |
| "learning_rate": 4.4790237920152326e-05, |
| "loss": 0.7436, |
| "step": 45200 |
| }, |
| { |
| "epoch": 0.10465628248897536, |
| "grad_norm": 1.6960753202438354, |
| "learning_rate": 4.476718587555123e-05, |
| "loss": 0.6681, |
| "step": 45400 |
| }, |
| { |
| "epoch": 0.10511732338099727, |
| "grad_norm": 1.6384496688842773, |
| "learning_rate": 4.474413383095014e-05, |
| "loss": 0.6966, |
| "step": 45600 |
| }, |
| { |
| "epoch": 0.1055783642730192, |
| "grad_norm": 2.391704559326172, |
| "learning_rate": 4.472108178634904e-05, |
| "loss": 0.7039, |
| "step": 45800 |
| }, |
| { |
| "epoch": 0.10603940516504111, |
| "grad_norm": 1.6314672231674194, |
| "learning_rate": 4.469802974174795e-05, |
| "loss": 0.715, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.10650044605706303, |
| "grad_norm": 0.872035026550293, |
| "learning_rate": 4.4674977697146854e-05, |
| "loss": 0.7375, |
| "step": 46200 |
| }, |
| { |
| "epoch": 0.10696148694908494, |
| "grad_norm": 2.016697645187378, |
| "learning_rate": 4.465192565254575e-05, |
| "loss": 0.7388, |
| "step": 46400 |
| }, |
| { |
| "epoch": 0.10742252784110687, |
| "grad_norm": 2.294455051422119, |
| "learning_rate": 4.462887360794466e-05, |
| "loss": 0.7218, |
| "step": 46600 |
| }, |
| { |
| "epoch": 0.10788356873312878, |
| "grad_norm": 1.2068428993225098, |
| "learning_rate": 4.4605821563343564e-05, |
| "loss": 0.6365, |
| "step": 46800 |
| }, |
| { |
| "epoch": 0.1083446096251507, |
| "grad_norm": 2.1000618934631348, |
| "learning_rate": 4.458276951874247e-05, |
| "loss": 0.6978, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.10880565051717261, |
| "grad_norm": 2.496563673019409, |
| "learning_rate": 4.455971747414137e-05, |
| "loss": 0.6807, |
| "step": 47200 |
| }, |
| { |
| "epoch": 0.10926669140919454, |
| "grad_norm": 1.9439219236373901, |
| "learning_rate": 4.4536665429540275e-05, |
| "loss": 0.7186, |
| "step": 47400 |
| }, |
| { |
| "epoch": 0.10972773230121646, |
| "grad_norm": 1.817345142364502, |
| "learning_rate": 4.451361338493918e-05, |
| "loss": 0.7519, |
| "step": 47600 |
| }, |
| { |
| "epoch": 0.11018877319323837, |
| "grad_norm": 2.6443488597869873, |
| "learning_rate": 4.4490561340338086e-05, |
| "loss": 0.672, |
| "step": 47800 |
| }, |
| { |
| "epoch": 0.1106498140852603, |
| "grad_norm": 7.7301483154296875, |
| "learning_rate": 4.4467509295736985e-05, |
| "loss": 0.7019, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.11111085497728221, |
| "grad_norm": 2.1185405254364014, |
| "learning_rate": 4.444445725113589e-05, |
| "loss": 0.7819, |
| "step": 48200 |
| }, |
| { |
| "epoch": 0.11157189586930413, |
| "grad_norm": 1.3251652717590332, |
| "learning_rate": 4.4421405206534796e-05, |
| "loss": 0.688, |
| "step": 48400 |
| }, |
| { |
| "epoch": 0.11203293676132604, |
| "grad_norm": 2.554704427719116, |
| "learning_rate": 4.4398353161933695e-05, |
| "loss": 0.7729, |
| "step": 48600 |
| }, |
| { |
| "epoch": 0.11249397765334797, |
| "grad_norm": 1.0944995880126953, |
| "learning_rate": 4.43753011173326e-05, |
| "loss": 0.7296, |
| "step": 48800 |
| }, |
| { |
| "epoch": 0.11295501854536988, |
| "grad_norm": 0.5829809904098511, |
| "learning_rate": 4.4352249072731507e-05, |
| "loss": 0.6906, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.1134160594373918, |
| "grad_norm": 1.3186956644058228, |
| "learning_rate": 4.432919702813041e-05, |
| "loss": 0.6849, |
| "step": 49200 |
| }, |
| { |
| "epoch": 0.11387710032941371, |
| "grad_norm": 2.7295708656311035, |
| "learning_rate": 4.430614498352932e-05, |
| "loss": 0.7398, |
| "step": 49400 |
| }, |
| { |
| "epoch": 0.11433814122143564, |
| "grad_norm": 0.8470388054847717, |
| "learning_rate": 4.4283092938928224e-05, |
| "loss": 0.7197, |
| "step": 49600 |
| }, |
| { |
| "epoch": 0.11479918211345755, |
| "grad_norm": 2.0679562091827393, |
| "learning_rate": 4.426004089432713e-05, |
| "loss": 0.7102, |
| "step": 49800 |
| }, |
| { |
| "epoch": 0.11526022300547947, |
| "grad_norm": 1.7280285358428955, |
| "learning_rate": 4.423698884972603e-05, |
| "loss": 0.6808, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.11526022300547947, |
| "eval_loss": 0.6911378502845764, |
| "eval_runtime": 143.422, |
| "eval_samples_per_second": 30.553, |
| "eval_steps_per_second": 30.553, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.11572126389750138, |
| "grad_norm": 1.910679578781128, |
| "learning_rate": 4.4213936805124934e-05, |
| "loss": 0.6451, |
| "step": 50200 |
| }, |
| { |
| "epoch": 0.11618230478952331, |
| "grad_norm": 1.45720636844635, |
| "learning_rate": 4.419088476052384e-05, |
| "loss": 0.6488, |
| "step": 50400 |
| }, |
| { |
| "epoch": 0.11664334568154522, |
| "grad_norm": 2.245499610900879, |
| "learning_rate": 4.416783271592274e-05, |
| "loss": 0.6719, |
| "step": 50600 |
| }, |
| { |
| "epoch": 0.11710438657356714, |
| "grad_norm": 1.8845460414886475, |
| "learning_rate": 4.4144780671321644e-05, |
| "loss": 0.6931, |
| "step": 50800 |
| }, |
| { |
| "epoch": 0.11756542746558905, |
| "grad_norm": 0.9793957471847534, |
| "learning_rate": 4.412172862672055e-05, |
| "loss": 0.6606, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.11802646835761098, |
| "grad_norm": 0.7978737950325012, |
| "learning_rate": 4.4098676582119455e-05, |
| "loss": 0.7226, |
| "step": 51200 |
| }, |
| { |
| "epoch": 0.1184875092496329, |
| "grad_norm": 1.30372953414917, |
| "learning_rate": 4.4075624537518354e-05, |
| "loss": 0.6551, |
| "step": 51400 |
| }, |
| { |
| "epoch": 0.11894855014165481, |
| "grad_norm": 2.127319812774658, |
| "learning_rate": 4.405257249291726e-05, |
| "loss": 0.703, |
| "step": 51600 |
| }, |
| { |
| "epoch": 0.11940959103367674, |
| "grad_norm": 2.518284797668457, |
| "learning_rate": 4.4029520448316166e-05, |
| "loss": 0.6229, |
| "step": 51800 |
| }, |
| { |
| "epoch": 0.11987063192569865, |
| "grad_norm": 1.752998948097229, |
| "learning_rate": 4.4006468403715065e-05, |
| "loss": 0.7245, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.12033167281772057, |
| "grad_norm": 1.0647391080856323, |
| "learning_rate": 4.398341635911397e-05, |
| "loss": 0.6879, |
| "step": 52200 |
| }, |
| { |
| "epoch": 0.12079271370974248, |
| "grad_norm": 2.2331488132476807, |
| "learning_rate": 4.3960364314512876e-05, |
| "loss": 0.675, |
| "step": 52400 |
| }, |
| { |
| "epoch": 0.12125375460176441, |
| "grad_norm": 2.0386297702789307, |
| "learning_rate": 4.393731226991178e-05, |
| "loss": 0.6941, |
| "step": 52600 |
| }, |
| { |
| "epoch": 0.12171479549378632, |
| "grad_norm": 1.6465948820114136, |
| "learning_rate": 4.391426022531069e-05, |
| "loss": 0.6883, |
| "step": 52800 |
| }, |
| { |
| "epoch": 0.12217583638580824, |
| "grad_norm": 0.915367066860199, |
| "learning_rate": 4.389120818070959e-05, |
| "loss": 0.7423, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.12263687727783015, |
| "grad_norm": 1.3777244091033936, |
| "learning_rate": 4.38681561361085e-05, |
| "loss": 0.7046, |
| "step": 53200 |
| }, |
| { |
| "epoch": 0.12309791816985208, |
| "grad_norm": 1.9694982767105103, |
| "learning_rate": 4.38451040915074e-05, |
| "loss": 0.7019, |
| "step": 53400 |
| }, |
| { |
| "epoch": 0.12355895906187399, |
| "grad_norm": 2.005706310272217, |
| "learning_rate": 4.38220520469063e-05, |
| "loss": 0.633, |
| "step": 53600 |
| }, |
| { |
| "epoch": 0.12401999995389591, |
| "grad_norm": 1.4841361045837402, |
| "learning_rate": 4.379900000230521e-05, |
| "loss": 0.6973, |
| "step": 53800 |
| }, |
| { |
| "epoch": 0.12448104084591782, |
| "grad_norm": 1.7717888355255127, |
| "learning_rate": 4.377594795770411e-05, |
| "loss": 0.6861, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.12494208173793975, |
| "grad_norm": 2.585420608520508, |
| "learning_rate": 4.3752895913103013e-05, |
| "loss": 0.7221, |
| "step": 54200 |
| }, |
| { |
| "epoch": 0.12540312262996167, |
| "grad_norm": 1.8941155672073364, |
| "learning_rate": 4.372984386850192e-05, |
| "loss": 0.7162, |
| "step": 54400 |
| }, |
| { |
| "epoch": 0.12586416352198357, |
| "grad_norm": 1.920271396636963, |
| "learning_rate": 4.3706791823900825e-05, |
| "loss": 0.6739, |
| "step": 54600 |
| }, |
| { |
| "epoch": 0.1263252044140055, |
| "grad_norm": 1.4717075824737549, |
| "learning_rate": 4.3683739779299724e-05, |
| "loss": 0.6691, |
| "step": 54800 |
| }, |
| { |
| "epoch": 0.12678624530602742, |
| "grad_norm": 1.4651011228561401, |
| "learning_rate": 4.366068773469863e-05, |
| "loss": 0.7049, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.12724728619804934, |
| "grad_norm": 1.613660216331482, |
| "learning_rate": 4.3637635690097535e-05, |
| "loss": 0.7058, |
| "step": 55200 |
| }, |
| { |
| "epoch": 0.12770832709007127, |
| "grad_norm": 1.3848841190338135, |
| "learning_rate": 4.361458364549644e-05, |
| "loss": 0.6994, |
| "step": 55400 |
| }, |
| { |
| "epoch": 0.12816936798209316, |
| "grad_norm": 3.159140110015869, |
| "learning_rate": 4.359153160089534e-05, |
| "loss": 0.6746, |
| "step": 55600 |
| }, |
| { |
| "epoch": 0.1286304088741151, |
| "grad_norm": 1.353094458580017, |
| "learning_rate": 4.3568479556294245e-05, |
| "loss": 0.641, |
| "step": 55800 |
| }, |
| { |
| "epoch": 0.129091449766137, |
| "grad_norm": 1.5936461687088013, |
| "learning_rate": 4.354542751169315e-05, |
| "loss": 0.7054, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.12955249065815894, |
| "grad_norm": 1.0393725633621216, |
| "learning_rate": 4.352237546709206e-05, |
| "loss": 0.636, |
| "step": 56200 |
| }, |
| { |
| "epoch": 0.13001353155018083, |
| "grad_norm": 1.6490427255630493, |
| "learning_rate": 4.349932342249096e-05, |
| "loss": 0.6936, |
| "step": 56400 |
| }, |
| { |
| "epoch": 0.13047457244220276, |
| "grad_norm": 1.1870497465133667, |
| "learning_rate": 4.347627137788987e-05, |
| "loss": 0.7119, |
| "step": 56600 |
| }, |
| { |
| "epoch": 0.13093561333422468, |
| "grad_norm": 2.3116602897644043, |
| "learning_rate": 4.345321933328877e-05, |
| "loss": 0.6961, |
| "step": 56800 |
| }, |
| { |
| "epoch": 0.1313966542262466, |
| "grad_norm": 1.674390435218811, |
| "learning_rate": 4.343016728868767e-05, |
| "loss": 0.7203, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.1318576951182685, |
| "grad_norm": 1.583085536956787, |
| "learning_rate": 4.340711524408658e-05, |
| "loss": 0.6594, |
| "step": 57200 |
| }, |
| { |
| "epoch": 0.13231873601029043, |
| "grad_norm": 1.5700510740280151, |
| "learning_rate": 4.3384063199485484e-05, |
| "loss": 0.6794, |
| "step": 57400 |
| }, |
| { |
| "epoch": 0.13277977690231235, |
| "grad_norm": 2.0833590030670166, |
| "learning_rate": 4.336101115488438e-05, |
| "loss": 0.6751, |
| "step": 57600 |
| }, |
| { |
| "epoch": 0.13324081779433428, |
| "grad_norm": 6.332681655883789, |
| "learning_rate": 4.333795911028329e-05, |
| "loss": 0.6574, |
| "step": 57800 |
| }, |
| { |
| "epoch": 0.13370185868635617, |
| "grad_norm": 1.0451648235321045, |
| "learning_rate": 4.3314907065682194e-05, |
| "loss": 0.6514, |
| "step": 58000 |
| }, |
| { |
| "epoch": 0.1341628995783781, |
| "grad_norm": 2.710758924484253, |
| "learning_rate": 4.329185502108109e-05, |
| "loss": 0.6839, |
| "step": 58200 |
| }, |
| { |
| "epoch": 0.13462394047040002, |
| "grad_norm": 1.8599129915237427, |
| "learning_rate": 4.326880297648e-05, |
| "loss": 0.6423, |
| "step": 58400 |
| }, |
| { |
| "epoch": 0.13508498136242195, |
| "grad_norm": 2.223250389099121, |
| "learning_rate": 4.3245750931878904e-05, |
| "loss": 0.6981, |
| "step": 58600 |
| }, |
| { |
| "epoch": 0.13554602225444387, |
| "grad_norm": 1.308874249458313, |
| "learning_rate": 4.322269888727781e-05, |
| "loss": 0.6425, |
| "step": 58800 |
| }, |
| { |
| "epoch": 0.13600706314646577, |
| "grad_norm": 1.2840343713760376, |
| "learning_rate": 4.319964684267671e-05, |
| "loss": 0.6832, |
| "step": 59000 |
| }, |
| { |
| "epoch": 0.1364681040384877, |
| "grad_norm": 1.2683848142623901, |
| "learning_rate": 4.3176594798075615e-05, |
| "loss": 0.6748, |
| "step": 59200 |
| }, |
| { |
| "epoch": 0.13692914493050962, |
| "grad_norm": 1.666727900505066, |
| "learning_rate": 4.315354275347453e-05, |
| "loss": 0.6459, |
| "step": 59400 |
| }, |
| { |
| "epoch": 0.13739018582253154, |
| "grad_norm": 1.8931647539138794, |
| "learning_rate": 4.3130490708873426e-05, |
| "loss": 0.6688, |
| "step": 59600 |
| }, |
| { |
| "epoch": 0.13785122671455344, |
| "grad_norm": 1.728664755821228, |
| "learning_rate": 4.310743866427233e-05, |
| "loss": 0.6489, |
| "step": 59800 |
| }, |
| { |
| "epoch": 0.13831226760657536, |
| "grad_norm": 1.461280345916748, |
| "learning_rate": 4.308438661967124e-05, |
| "loss": 0.6415, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.1387733084985973, |
| "grad_norm": 0.6125675439834595, |
| "learning_rate": 4.3061334575070136e-05, |
| "loss": 0.6321, |
| "step": 60200 |
| }, |
| { |
| "epoch": 0.1392343493906192, |
| "grad_norm": 1.7109229564666748, |
| "learning_rate": 4.303828253046904e-05, |
| "loss": 0.6506, |
| "step": 60400 |
| }, |
| { |
| "epoch": 0.1396953902826411, |
| "grad_norm": 1.3291008472442627, |
| "learning_rate": 4.301523048586795e-05, |
| "loss": 0.6312, |
| "step": 60600 |
| }, |
| { |
| "epoch": 0.14015643117466303, |
| "grad_norm": 1.697153091430664, |
| "learning_rate": 4.299217844126685e-05, |
| "loss": 0.701, |
| "step": 60800 |
| }, |
| { |
| "epoch": 0.14061747206668496, |
| "grad_norm": 0.8234291672706604, |
| "learning_rate": 4.296912639666575e-05, |
| "loss": 0.6556, |
| "step": 61000 |
| }, |
| { |
| "epoch": 0.14107851295870688, |
| "grad_norm": 1.3336366415023804, |
| "learning_rate": 4.294607435206466e-05, |
| "loss": 0.6853, |
| "step": 61200 |
| }, |
| { |
| "epoch": 0.14153955385072878, |
| "grad_norm": 1.8199868202209473, |
| "learning_rate": 4.2923022307463564e-05, |
| "loss": 0.6498, |
| "step": 61400 |
| }, |
| { |
| "epoch": 0.1420005947427507, |
| "grad_norm": 2.1182050704956055, |
| "learning_rate": 4.289997026286246e-05, |
| "loss": 0.6555, |
| "step": 61600 |
| }, |
| { |
| "epoch": 0.14246163563477263, |
| "grad_norm": 1.9714126586914062, |
| "learning_rate": 4.287691821826137e-05, |
| "loss": 0.7304, |
| "step": 61800 |
| }, |
| { |
| "epoch": 0.14292267652679455, |
| "grad_norm": 1.536047339439392, |
| "learning_rate": 4.2853866173660274e-05, |
| "loss": 0.5836, |
| "step": 62000 |
| }, |
| { |
| "epoch": 0.14338371741881648, |
| "grad_norm": 1.4263625144958496, |
| "learning_rate": 4.283081412905918e-05, |
| "loss": 0.6165, |
| "step": 62200 |
| }, |
| { |
| "epoch": 0.14384475831083837, |
| "grad_norm": 0.6614183783531189, |
| "learning_rate": 4.280776208445808e-05, |
| "loss": 0.7117, |
| "step": 62400 |
| }, |
| { |
| "epoch": 0.1443057992028603, |
| "grad_norm": 1.4404590129852295, |
| "learning_rate": 4.2784710039856984e-05, |
| "loss": 0.6583, |
| "step": 62600 |
| }, |
| { |
| "epoch": 0.14476684009488222, |
| "grad_norm": 3.333214044570923, |
| "learning_rate": 4.2761657995255897e-05, |
| "loss": 0.5992, |
| "step": 62800 |
| }, |
| { |
| "epoch": 0.14522788098690415, |
| "grad_norm": 1.3741906881332397, |
| "learning_rate": 4.2738605950654795e-05, |
| "loss": 0.6238, |
| "step": 63000 |
| }, |
| { |
| "epoch": 0.14568892187892604, |
| "grad_norm": 2.261046886444092, |
| "learning_rate": 4.27155539060537e-05, |
| "loss": 0.6908, |
| "step": 63200 |
| }, |
| { |
| "epoch": 0.14614996277094797, |
| "grad_norm": 2.2750587463378906, |
| "learning_rate": 4.269250186145261e-05, |
| "loss": 0.6479, |
| "step": 63400 |
| }, |
| { |
| "epoch": 0.1466110036629699, |
| "grad_norm": 2.38415265083313, |
| "learning_rate": 4.2669449816851506e-05, |
| "loss": 0.6621, |
| "step": 63600 |
| }, |
| { |
| "epoch": 0.14707204455499182, |
| "grad_norm": 4.09643030166626, |
| "learning_rate": 4.264639777225041e-05, |
| "loss": 0.6689, |
| "step": 63800 |
| }, |
| { |
| "epoch": 0.1475330854470137, |
| "grad_norm": 1.5877435207366943, |
| "learning_rate": 4.262334572764932e-05, |
| "loss": 0.6664, |
| "step": 64000 |
| }, |
| { |
| "epoch": 0.14799412633903564, |
| "grad_norm": 1.692415475845337, |
| "learning_rate": 4.260029368304822e-05, |
| "loss": 0.6646, |
| "step": 64200 |
| }, |
| { |
| "epoch": 0.14845516723105756, |
| "grad_norm": 1.6003667116165161, |
| "learning_rate": 4.257724163844712e-05, |
| "loss": 0.6305, |
| "step": 64400 |
| }, |
| { |
| "epoch": 0.14891620812307949, |
| "grad_norm": 1.2886855602264404, |
| "learning_rate": 4.255418959384603e-05, |
| "loss": 0.6017, |
| "step": 64600 |
| }, |
| { |
| "epoch": 0.14937724901510138, |
| "grad_norm": 0.7296251654624939, |
| "learning_rate": 4.253113754924493e-05, |
| "loss": 0.6852, |
| "step": 64800 |
| }, |
| { |
| "epoch": 0.1498382899071233, |
| "grad_norm": 1.687552809715271, |
| "learning_rate": 4.250808550464384e-05, |
| "loss": 0.6716, |
| "step": 65000 |
| }, |
| { |
| "epoch": 0.15029933079914523, |
| "grad_norm": 1.0152884721755981, |
| "learning_rate": 4.248503346004274e-05, |
| "loss": 0.6823, |
| "step": 65200 |
| }, |
| { |
| "epoch": 0.15076037169116716, |
| "grad_norm": 2.022918939590454, |
| "learning_rate": 4.246198141544164e-05, |
| "loss": 0.6713, |
| "step": 65400 |
| }, |
| { |
| "epoch": 0.15122141258318905, |
| "grad_norm": 0.733291745185852, |
| "learning_rate": 4.243892937084055e-05, |
| "loss": 0.6375, |
| "step": 65600 |
| }, |
| { |
| "epoch": 0.15168245347521098, |
| "grad_norm": 2.1983726024627686, |
| "learning_rate": 4.241587732623945e-05, |
| "loss": 0.6861, |
| "step": 65800 |
| }, |
| { |
| "epoch": 0.1521434943672329, |
| "grad_norm": 3.5877902507781982, |
| "learning_rate": 4.2392825281638353e-05, |
| "loss": 0.6393, |
| "step": 66000 |
| }, |
| { |
| "epoch": 0.15260453525925483, |
| "grad_norm": 1.1176559925079346, |
| "learning_rate": 4.2369773237037266e-05, |
| "loss": 0.6933, |
| "step": 66200 |
| }, |
| { |
| "epoch": 0.15306557615127675, |
| "grad_norm": 1.4344258308410645, |
| "learning_rate": 4.2346721192436165e-05, |
| "loss": 0.6471, |
| "step": 66400 |
| }, |
| { |
| "epoch": 0.15352661704329865, |
| "grad_norm": 1.4673750400543213, |
| "learning_rate": 4.232366914783507e-05, |
| "loss": 0.6657, |
| "step": 66600 |
| }, |
| { |
| "epoch": 0.15398765793532057, |
| "grad_norm": 1.2807679176330566, |
| "learning_rate": 4.2300617103233976e-05, |
| "loss": 0.6353, |
| "step": 66800 |
| }, |
| { |
| "epoch": 0.1544486988273425, |
| "grad_norm": 1.1444551944732666, |
| "learning_rate": 4.227756505863288e-05, |
| "loss": 0.6789, |
| "step": 67000 |
| }, |
| { |
| "epoch": 0.15490973971936442, |
| "grad_norm": 2.322291374206543, |
| "learning_rate": 4.225451301403178e-05, |
| "loss": 0.6841, |
| "step": 67200 |
| }, |
| { |
| "epoch": 0.15537078061138632, |
| "grad_norm": 1.6149322986602783, |
| "learning_rate": 4.2231460969430686e-05, |
| "loss": 0.6654, |
| "step": 67400 |
| }, |
| { |
| "epoch": 0.15583182150340824, |
| "grad_norm": 1.7921006679534912, |
| "learning_rate": 4.220840892482959e-05, |
| "loss": 0.6688, |
| "step": 67600 |
| }, |
| { |
| "epoch": 0.15629286239543017, |
| "grad_norm": 1.522269606590271, |
| "learning_rate": 4.218535688022849e-05, |
| "loss": 0.6815, |
| "step": 67800 |
| }, |
| { |
| "epoch": 0.1567539032874521, |
| "grad_norm": 1.6208064556121826, |
| "learning_rate": 4.21623048356274e-05, |
| "loss": 0.6331, |
| "step": 68000 |
| }, |
| { |
| "epoch": 0.157214944179474, |
| "grad_norm": 1.7673718929290771, |
| "learning_rate": 4.21392527910263e-05, |
| "loss": 0.5858, |
| "step": 68200 |
| }, |
| { |
| "epoch": 0.1576759850714959, |
| "grad_norm": 1.3930482864379883, |
| "learning_rate": 4.211620074642521e-05, |
| "loss": 0.6221, |
| "step": 68400 |
| }, |
| { |
| "epoch": 0.15813702596351784, |
| "grad_norm": 1.0463271141052246, |
| "learning_rate": 4.209314870182411e-05, |
| "loss": 0.596, |
| "step": 68600 |
| }, |
| { |
| "epoch": 0.15859806685553976, |
| "grad_norm": 1.5553432703018188, |
| "learning_rate": 4.207009665722301e-05, |
| "loss": 0.6048, |
| "step": 68800 |
| }, |
| { |
| "epoch": 0.15905910774756166, |
| "grad_norm": 1.9478529691696167, |
| "learning_rate": 4.204704461262192e-05, |
| "loss": 0.6838, |
| "step": 69000 |
| }, |
| { |
| "epoch": 0.15952014863958358, |
| "grad_norm": 1.5347201824188232, |
| "learning_rate": 4.202399256802082e-05, |
| "loss": 0.6536, |
| "step": 69200 |
| }, |
| { |
| "epoch": 0.1599811895316055, |
| "grad_norm": 1.2360255718231201, |
| "learning_rate": 4.200094052341972e-05, |
| "loss": 0.662, |
| "step": 69400 |
| }, |
| { |
| "epoch": 0.16044223042362743, |
| "grad_norm": 1.09177827835083, |
| "learning_rate": 4.1977888478818635e-05, |
| "loss": 0.6767, |
| "step": 69600 |
| }, |
| { |
| "epoch": 0.16090327131564935, |
| "grad_norm": 1.0002694129943848, |
| "learning_rate": 4.1954836434217534e-05, |
| "loss": 0.6057, |
| "step": 69800 |
| }, |
| { |
| "epoch": 0.16136431220767125, |
| "grad_norm": 1.2823467254638672, |
| "learning_rate": 4.193178438961644e-05, |
| "loss": 0.6153, |
| "step": 70000 |
| }, |
| { |
| "epoch": 0.16182535309969318, |
| "grad_norm": 0.9123159646987915, |
| "learning_rate": 4.1908732345015346e-05, |
| "loss": 0.6432, |
| "step": 70200 |
| }, |
| { |
| "epoch": 0.1622863939917151, |
| "grad_norm": 2.3576698303222656, |
| "learning_rate": 4.188568030041425e-05, |
| "loss": 0.6284, |
| "step": 70400 |
| }, |
| { |
| "epoch": 0.16274743488373702, |
| "grad_norm": 0.9124912023544312, |
| "learning_rate": 4.186262825581315e-05, |
| "loss": 0.6879, |
| "step": 70600 |
| }, |
| { |
| "epoch": 0.16320847577575892, |
| "grad_norm": 1.3194003105163574, |
| "learning_rate": 4.1839576211212056e-05, |
| "loss": 0.6337, |
| "step": 70800 |
| }, |
| { |
| "epoch": 0.16366951666778085, |
| "grad_norm": 1.6139734983444214, |
| "learning_rate": 4.181652416661096e-05, |
| "loss": 0.6522, |
| "step": 71000 |
| }, |
| { |
| "epoch": 0.16413055755980277, |
| "grad_norm": 9.392971992492676, |
| "learning_rate": 4.179347212200986e-05, |
| "loss": 0.6708, |
| "step": 71200 |
| }, |
| { |
| "epoch": 0.1645915984518247, |
| "grad_norm": 1.462740421295166, |
| "learning_rate": 4.1770420077408766e-05, |
| "loss": 0.598, |
| "step": 71400 |
| }, |
| { |
| "epoch": 0.1650526393438466, |
| "grad_norm": 1.7748998403549194, |
| "learning_rate": 4.174736803280767e-05, |
| "loss": 0.644, |
| "step": 71600 |
| }, |
| { |
| "epoch": 0.16551368023586852, |
| "grad_norm": 1.202195644378662, |
| "learning_rate": 4.172431598820658e-05, |
| "loss": 0.6229, |
| "step": 71800 |
| }, |
| { |
| "epoch": 0.16597472112789044, |
| "grad_norm": 1.877752423286438, |
| "learning_rate": 4.1701263943605476e-05, |
| "loss": 0.5753, |
| "step": 72000 |
| }, |
| { |
| "epoch": 0.16643576201991236, |
| "grad_norm": 3.8123841285705566, |
| "learning_rate": 4.167821189900438e-05, |
| "loss": 0.6965, |
| "step": 72200 |
| }, |
| { |
| "epoch": 0.16689680291193426, |
| "grad_norm": 2.4701273441314697, |
| "learning_rate": 4.165515985440329e-05, |
| "loss": 0.6491, |
| "step": 72400 |
| }, |
| { |
| "epoch": 0.16735784380395619, |
| "grad_norm": 1.3478227853775024, |
| "learning_rate": 4.163210780980219e-05, |
| "loss": 0.657, |
| "step": 72600 |
| }, |
| { |
| "epoch": 0.1678188846959781, |
| "grad_norm": 1.1858279705047607, |
| "learning_rate": 4.16090557652011e-05, |
| "loss": 0.6297, |
| "step": 72800 |
| }, |
| { |
| "epoch": 0.16827992558800003, |
| "grad_norm": 1.016969919204712, |
| "learning_rate": 4.1586003720600005e-05, |
| "loss": 0.5969, |
| "step": 73000 |
| }, |
| { |
| "epoch": 0.16874096648002196, |
| "grad_norm": 1.7557319402694702, |
| "learning_rate": 4.1562951675998904e-05, |
| "loss": 0.6602, |
| "step": 73200 |
| }, |
| { |
| "epoch": 0.16920200737204386, |
| "grad_norm": 1.2610116004943848, |
| "learning_rate": 4.153989963139781e-05, |
| "loss": 0.5832, |
| "step": 73400 |
| }, |
| { |
| "epoch": 0.16966304826406578, |
| "grad_norm": 1.012919545173645, |
| "learning_rate": 4.1516847586796715e-05, |
| "loss": 0.6437, |
| "step": 73600 |
| }, |
| { |
| "epoch": 0.1701240891560877, |
| "grad_norm": 3.5607211589813232, |
| "learning_rate": 4.149379554219562e-05, |
| "loss": 0.6131, |
| "step": 73800 |
| }, |
| { |
| "epoch": 0.17058513004810963, |
| "grad_norm": 1.3184549808502197, |
| "learning_rate": 4.147074349759452e-05, |
| "loss": 0.5669, |
| "step": 74000 |
| }, |
| { |
| "epoch": 0.17104617094013153, |
| "grad_norm": 2.453568458557129, |
| "learning_rate": 4.1447691452993425e-05, |
| "loss": 0.609, |
| "step": 74200 |
| }, |
| { |
| "epoch": 0.17150721183215345, |
| "grad_norm": 0.942398726940155, |
| "learning_rate": 4.142463940839233e-05, |
| "loss": 0.6451, |
| "step": 74400 |
| }, |
| { |
| "epoch": 0.17196825272417537, |
| "grad_norm": 2.131546974182129, |
| "learning_rate": 4.1401587363791237e-05, |
| "loss": 0.6167, |
| "step": 74600 |
| }, |
| { |
| "epoch": 0.1724292936161973, |
| "grad_norm": 1.0977692604064941, |
| "learning_rate": 4.1378535319190135e-05, |
| "loss": 0.6038, |
| "step": 74800 |
| }, |
| { |
| "epoch": 0.1728903345082192, |
| "grad_norm": 1.6585220098495483, |
| "learning_rate": 4.135548327458904e-05, |
| "loss": 0.6221, |
| "step": 75000 |
| }, |
| { |
| "epoch": 0.17335137540024112, |
| "grad_norm": 1.4961862564086914, |
| "learning_rate": 4.133243122998795e-05, |
| "loss": 0.6083, |
| "step": 75200 |
| }, |
| { |
| "epoch": 0.17381241629226304, |
| "grad_norm": 1.8815230131149292, |
| "learning_rate": 4.1309379185386846e-05, |
| "loss": 0.6484, |
| "step": 75400 |
| }, |
| { |
| "epoch": 0.17427345718428497, |
| "grad_norm": 1.2106714248657227, |
| "learning_rate": 4.128632714078575e-05, |
| "loss": 0.6745, |
| "step": 75600 |
| }, |
| { |
| "epoch": 0.17473449807630687, |
| "grad_norm": 15.076075553894043, |
| "learning_rate": 4.126327509618466e-05, |
| "loss": 0.5759, |
| "step": 75800 |
| }, |
| { |
| "epoch": 0.1751955389683288, |
| "grad_norm": 1.6629307270050049, |
| "learning_rate": 4.124022305158356e-05, |
| "loss": 0.6511, |
| "step": 76000 |
| }, |
| { |
| "epoch": 0.17565657986035071, |
| "grad_norm": 0.919217586517334, |
| "learning_rate": 4.121717100698247e-05, |
| "loss": 0.6124, |
| "step": 76200 |
| }, |
| { |
| "epoch": 0.17611762075237264, |
| "grad_norm": 0.9907572269439697, |
| "learning_rate": 4.1194118962381374e-05, |
| "loss": 0.6668, |
| "step": 76400 |
| }, |
| { |
| "epoch": 0.17657866164439456, |
| "grad_norm": 1.0881201028823853, |
| "learning_rate": 4.117106691778028e-05, |
| "loss": 0.6564, |
| "step": 76600 |
| }, |
| { |
| "epoch": 0.17703970253641646, |
| "grad_norm": 1.2789230346679688, |
| "learning_rate": 4.114801487317918e-05, |
| "loss": 0.6228, |
| "step": 76800 |
| }, |
| { |
| "epoch": 0.17750074342843838, |
| "grad_norm": 2.680896759033203, |
| "learning_rate": 4.1124962828578084e-05, |
| "loss": 0.6754, |
| "step": 77000 |
| }, |
| { |
| "epoch": 0.1779617843204603, |
| "grad_norm": 1.4832789897918701, |
| "learning_rate": 4.110191078397699e-05, |
| "loss": 0.6153, |
| "step": 77200 |
| }, |
| { |
| "epoch": 0.17842282521248223, |
| "grad_norm": 1.8197680711746216, |
| "learning_rate": 4.107885873937589e-05, |
| "loss": 0.6174, |
| "step": 77400 |
| }, |
| { |
| "epoch": 0.17888386610450413, |
| "grad_norm": 1.8292102813720703, |
| "learning_rate": 4.1055806694774795e-05, |
| "loss": 0.63, |
| "step": 77600 |
| }, |
| { |
| "epoch": 0.17934490699652605, |
| "grad_norm": 1.0683658123016357, |
| "learning_rate": 4.10327546501737e-05, |
| "loss": 0.6622, |
| "step": 77800 |
| }, |
| { |
| "epoch": 0.17980594788854798, |
| "grad_norm": 1.9662219285964966, |
| "learning_rate": 4.1009702605572606e-05, |
| "loss": 0.6231, |
| "step": 78000 |
| }, |
| { |
| "epoch": 0.1802669887805699, |
| "grad_norm": 1.7541677951812744, |
| "learning_rate": 4.0986650560971505e-05, |
| "loss": 0.6551, |
| "step": 78200 |
| }, |
| { |
| "epoch": 0.1807280296725918, |
| "grad_norm": 1.8776569366455078, |
| "learning_rate": 4.096359851637041e-05, |
| "loss": 0.6121, |
| "step": 78400 |
| }, |
| { |
| "epoch": 0.18118907056461372, |
| "grad_norm": 1.9241667985916138, |
| "learning_rate": 4.0940546471769316e-05, |
| "loss": 0.6205, |
| "step": 78600 |
| }, |
| { |
| "epoch": 0.18165011145663565, |
| "grad_norm": 1.7925617694854736, |
| "learning_rate": 4.0917494427168215e-05, |
| "loss": 0.6353, |
| "step": 78800 |
| }, |
| { |
| "epoch": 0.18211115234865757, |
| "grad_norm": 0.9358586072921753, |
| "learning_rate": 4.089444238256712e-05, |
| "loss": 0.6129, |
| "step": 79000 |
| }, |
| { |
| "epoch": 0.18257219324067947, |
| "grad_norm": 1.744363784790039, |
| "learning_rate": 4.0871390337966026e-05, |
| "loss": 0.5996, |
| "step": 79200 |
| }, |
| { |
| "epoch": 0.1830332341327014, |
| "grad_norm": 1.6181316375732422, |
| "learning_rate": 4.084833829336493e-05, |
| "loss": 0.6316, |
| "step": 79400 |
| }, |
| { |
| "epoch": 0.18349427502472332, |
| "grad_norm": 0.8998286128044128, |
| "learning_rate": 4.082528624876384e-05, |
| "loss": 0.6386, |
| "step": 79600 |
| }, |
| { |
| "epoch": 0.18395531591674524, |
| "grad_norm": 1.9069503545761108, |
| "learning_rate": 4.0802234204162743e-05, |
| "loss": 0.6345, |
| "step": 79800 |
| }, |
| { |
| "epoch": 0.18441635680876714, |
| "grad_norm": 1.7913002967834473, |
| "learning_rate": 4.077918215956165e-05, |
| "loss": 0.5903, |
| "step": 80000 |
| }, |
| { |
| "epoch": 0.18487739770078906, |
| "grad_norm": 2.31486177444458, |
| "learning_rate": 4.075613011496055e-05, |
| "loss": 0.6663, |
| "step": 80200 |
| }, |
| { |
| "epoch": 0.185338438592811, |
| "grad_norm": 1.4911130666732788, |
| "learning_rate": 4.0733078070359454e-05, |
| "loss": 0.6346, |
| "step": 80400 |
| }, |
| { |
| "epoch": 0.1857994794848329, |
| "grad_norm": 0.8119006752967834, |
| "learning_rate": 4.071002602575836e-05, |
| "loss": 0.5683, |
| "step": 80600 |
| }, |
| { |
| "epoch": 0.18626052037685484, |
| "grad_norm": 1.8645226955413818, |
| "learning_rate": 4.068697398115726e-05, |
| "loss": 0.5985, |
| "step": 80800 |
| }, |
| { |
| "epoch": 0.18672156126887673, |
| "grad_norm": 0.8933721780776978, |
| "learning_rate": 4.0663921936556164e-05, |
| "loss": 0.6082, |
| "step": 81000 |
| }, |
| { |
| "epoch": 0.18718260216089866, |
| "grad_norm": 0.9477849006652832, |
| "learning_rate": 4.064086989195507e-05, |
| "loss": 0.5934, |
| "step": 81200 |
| }, |
| { |
| "epoch": 0.18764364305292058, |
| "grad_norm": 2.2654476165771484, |
| "learning_rate": 4.0617817847353975e-05, |
| "loss": 0.6266, |
| "step": 81400 |
| }, |
| { |
| "epoch": 0.1881046839449425, |
| "grad_norm": 1.381350040435791, |
| "learning_rate": 4.0594765802752874e-05, |
| "loss": 0.6231, |
| "step": 81600 |
| }, |
| { |
| "epoch": 0.1885657248369644, |
| "grad_norm": 1.9982389211654663, |
| "learning_rate": 4.057171375815178e-05, |
| "loss": 0.6029, |
| "step": 81800 |
| }, |
| { |
| "epoch": 0.18902676572898633, |
| "grad_norm": 1.583160400390625, |
| "learning_rate": 4.0548661713550686e-05, |
| "loss": 0.6152, |
| "step": 82000 |
| }, |
| { |
| "epoch": 0.18948780662100825, |
| "grad_norm": 0.8362854719161987, |
| "learning_rate": 4.052560966894959e-05, |
| "loss": 0.6231, |
| "step": 82200 |
| }, |
| { |
| "epoch": 0.18994884751303018, |
| "grad_norm": 2.0223453044891357, |
| "learning_rate": 4.050255762434849e-05, |
| "loss": 0.6013, |
| "step": 82400 |
| }, |
| { |
| "epoch": 0.19040988840505207, |
| "grad_norm": 1.9948159456253052, |
| "learning_rate": 4.0479505579747396e-05, |
| "loss": 0.6374, |
| "step": 82600 |
| }, |
| { |
| "epoch": 0.190870929297074, |
| "grad_norm": 1.763412594795227, |
| "learning_rate": 4.04564535351463e-05, |
| "loss": 0.6696, |
| "step": 82800 |
| }, |
| { |
| "epoch": 0.19133197018909592, |
| "grad_norm": 1.4458279609680176, |
| "learning_rate": 4.043340149054521e-05, |
| "loss": 0.6253, |
| "step": 83000 |
| }, |
| { |
| "epoch": 0.19179301108111785, |
| "grad_norm": 1.9040172100067139, |
| "learning_rate": 4.041034944594411e-05, |
| "loss": 0.6292, |
| "step": 83200 |
| }, |
| { |
| "epoch": 0.19225405197313974, |
| "grad_norm": 0.5876076817512512, |
| "learning_rate": 4.038729740134302e-05, |
| "loss": 0.5721, |
| "step": 83400 |
| }, |
| { |
| "epoch": 0.19271509286516167, |
| "grad_norm": 1.4014763832092285, |
| "learning_rate": 4.036424535674192e-05, |
| "loss": 0.6496, |
| "step": 83600 |
| }, |
| { |
| "epoch": 0.1931761337571836, |
| "grad_norm": 1.3236879110336304, |
| "learning_rate": 4.034119331214082e-05, |
| "loss": 0.6824, |
| "step": 83800 |
| }, |
| { |
| "epoch": 0.19363717464920552, |
| "grad_norm": 1.3417832851409912, |
| "learning_rate": 4.031814126753973e-05, |
| "loss": 0.6155, |
| "step": 84000 |
| }, |
| { |
| "epoch": 0.19409821554122744, |
| "grad_norm": 1.254905104637146, |
| "learning_rate": 4.0295089222938634e-05, |
| "loss": 0.6194, |
| "step": 84200 |
| }, |
| { |
| "epoch": 0.19455925643324934, |
| "grad_norm": 1.0880146026611328, |
| "learning_rate": 4.027203717833753e-05, |
| "loss": 0.566, |
| "step": 84400 |
| }, |
| { |
| "epoch": 0.19502029732527126, |
| "grad_norm": 0.5658175945281982, |
| "learning_rate": 4.024898513373644e-05, |
| "loss": 0.6118, |
| "step": 84600 |
| }, |
| { |
| "epoch": 0.1954813382172932, |
| "grad_norm": 1.4203405380249023, |
| "learning_rate": 4.0225933089135345e-05, |
| "loss": 0.6458, |
| "step": 84800 |
| }, |
| { |
| "epoch": 0.1959423791093151, |
| "grad_norm": 1.4831221103668213, |
| "learning_rate": 4.0202881044534244e-05, |
| "loss": 0.6129, |
| "step": 85000 |
| }, |
| { |
| "epoch": 0.196403420001337, |
| "grad_norm": 2.332782506942749, |
| "learning_rate": 4.017982899993315e-05, |
| "loss": 0.6036, |
| "step": 85200 |
| }, |
| { |
| "epoch": 0.19686446089335893, |
| "grad_norm": 1.699129343032837, |
| "learning_rate": 4.0156776955332055e-05, |
| "loss": 0.6667, |
| "step": 85400 |
| }, |
| { |
| "epoch": 0.19732550178538086, |
| "grad_norm": 2.4848811626434326, |
| "learning_rate": 4.013372491073096e-05, |
| "loss": 0.6281, |
| "step": 85600 |
| }, |
| { |
| "epoch": 0.19778654267740278, |
| "grad_norm": 1.896471381187439, |
| "learning_rate": 4.011067286612986e-05, |
| "loss": 0.6028, |
| "step": 85800 |
| }, |
| { |
| "epoch": 0.19824758356942468, |
| "grad_norm": 1.61887526512146, |
| "learning_rate": 4.0087620821528765e-05, |
| "loss": 0.6086, |
| "step": 86000 |
| }, |
| { |
| "epoch": 0.1987086244614466, |
| "grad_norm": 1.0907816886901855, |
| "learning_rate": 4.006456877692767e-05, |
| "loss": 0.6499, |
| "step": 86200 |
| }, |
| { |
| "epoch": 0.19916966535346853, |
| "grad_norm": 1.1306065320968628, |
| "learning_rate": 4.0041516732326576e-05, |
| "loss": 0.6152, |
| "step": 86400 |
| }, |
| { |
| "epoch": 0.19963070624549045, |
| "grad_norm": 4.158120155334473, |
| "learning_rate": 4.001846468772548e-05, |
| "loss": 0.6039, |
| "step": 86600 |
| }, |
| { |
| "epoch": 0.20009174713751235, |
| "grad_norm": 1.0758455991744995, |
| "learning_rate": 3.999541264312439e-05, |
| "loss": 0.5966, |
| "step": 86800 |
| }, |
| { |
| "epoch": 0.20055278802953427, |
| "grad_norm": 1.0376372337341309, |
| "learning_rate": 3.997236059852329e-05, |
| "loss": 0.589, |
| "step": 87000 |
| }, |
| { |
| "epoch": 0.2010138289215562, |
| "grad_norm": 1.2652366161346436, |
| "learning_rate": 3.994930855392219e-05, |
| "loss": 0.6588, |
| "step": 87200 |
| }, |
| { |
| "epoch": 0.20147486981357812, |
| "grad_norm": 1.8211579322814941, |
| "learning_rate": 3.99262565093211e-05, |
| "loss": 0.6191, |
| "step": 87400 |
| }, |
| { |
| "epoch": 0.20193591070560005, |
| "grad_norm": 4.478600025177002, |
| "learning_rate": 3.9903204464720004e-05, |
| "loss": 0.5878, |
| "step": 87600 |
| }, |
| { |
| "epoch": 0.20239695159762194, |
| "grad_norm": 1.4553157091140747, |
| "learning_rate": 3.98801524201189e-05, |
| "loss": 0.6204, |
| "step": 87800 |
| }, |
| { |
| "epoch": 0.20285799248964387, |
| "grad_norm": 1.3515084981918335, |
| "learning_rate": 3.985710037551781e-05, |
| "loss": 0.5467, |
| "step": 88000 |
| }, |
| { |
| "epoch": 0.2033190333816658, |
| "grad_norm": 1.0609192848205566, |
| "learning_rate": 3.9834048330916714e-05, |
| "loss": 0.5393, |
| "step": 88200 |
| }, |
| { |
| "epoch": 0.20378007427368772, |
| "grad_norm": 2.3497846126556396, |
| "learning_rate": 3.981099628631561e-05, |
| "loss": 0.6261, |
| "step": 88400 |
| }, |
| { |
| "epoch": 0.2042411151657096, |
| "grad_norm": 1.129948616027832, |
| "learning_rate": 3.978794424171452e-05, |
| "loss": 0.6367, |
| "step": 88600 |
| }, |
| { |
| "epoch": 0.20470215605773154, |
| "grad_norm": 1.0302705764770508, |
| "learning_rate": 3.9764892197113424e-05, |
| "loss": 0.59, |
| "step": 88800 |
| }, |
| { |
| "epoch": 0.20516319694975346, |
| "grad_norm": 1.1066232919692993, |
| "learning_rate": 3.974184015251233e-05, |
| "loss": 0.6325, |
| "step": 89000 |
| }, |
| { |
| "epoch": 0.2056242378417754, |
| "grad_norm": 2.078610897064209, |
| "learning_rate": 3.971878810791123e-05, |
| "loss": 0.6465, |
| "step": 89200 |
| }, |
| { |
| "epoch": 0.20608527873379728, |
| "grad_norm": 1.8704718351364136, |
| "learning_rate": 3.9695736063310134e-05, |
| "loss": 0.6202, |
| "step": 89400 |
| }, |
| { |
| "epoch": 0.2065463196258192, |
| "grad_norm": 0.496405690908432, |
| "learning_rate": 3.967268401870904e-05, |
| "loss": 0.6073, |
| "step": 89600 |
| }, |
| { |
| "epoch": 0.20700736051784113, |
| "grad_norm": 1.9287617206573486, |
| "learning_rate": 3.9649631974107946e-05, |
| "loss": 0.5779, |
| "step": 89800 |
| }, |
| { |
| "epoch": 0.20746840140986306, |
| "grad_norm": 1.867727279663086, |
| "learning_rate": 3.962657992950685e-05, |
| "loss": 0.5736, |
| "step": 90000 |
| }, |
| { |
| "epoch": 0.20792944230188495, |
| "grad_norm": 0.9726611971855164, |
| "learning_rate": 3.960352788490576e-05, |
| "loss": 0.6051, |
| "step": 90200 |
| }, |
| { |
| "epoch": 0.20839048319390688, |
| "grad_norm": 1.8991550207138062, |
| "learning_rate": 3.9580475840304656e-05, |
| "loss": 0.6306, |
| "step": 90400 |
| }, |
| { |
| "epoch": 0.2088515240859288, |
| "grad_norm": 1.3989739418029785, |
| "learning_rate": 3.955742379570356e-05, |
| "loss": 0.6165, |
| "step": 90600 |
| }, |
| { |
| "epoch": 0.20931256497795073, |
| "grad_norm": 2.0542263984680176, |
| "learning_rate": 3.953437175110247e-05, |
| "loss": 0.606, |
| "step": 90800 |
| }, |
| { |
| "epoch": 0.20977360586997262, |
| "grad_norm": 1.3546398878097534, |
| "learning_rate": 3.951131970650137e-05, |
| "loss": 0.6513, |
| "step": 91000 |
| }, |
| { |
| "epoch": 0.21023464676199455, |
| "grad_norm": 2.3966128826141357, |
| "learning_rate": 3.948826766190027e-05, |
| "loss": 0.6147, |
| "step": 91200 |
| }, |
| { |
| "epoch": 0.21069568765401647, |
| "grad_norm": 1.8540971279144287, |
| "learning_rate": 3.946521561729918e-05, |
| "loss": 0.6128, |
| "step": 91400 |
| }, |
| { |
| "epoch": 0.2111567285460384, |
| "grad_norm": 0.6874774694442749, |
| "learning_rate": 3.944216357269808e-05, |
| "loss": 0.603, |
| "step": 91600 |
| }, |
| { |
| "epoch": 0.21161776943806032, |
| "grad_norm": 3.1788859367370605, |
| "learning_rate": 3.941911152809699e-05, |
| "loss": 0.6173, |
| "step": 91800 |
| }, |
| { |
| "epoch": 0.21207881033008222, |
| "grad_norm": 1.5572599172592163, |
| "learning_rate": 3.939605948349589e-05, |
| "loss": 0.603, |
| "step": 92000 |
| }, |
| { |
| "epoch": 0.21253985122210414, |
| "grad_norm": 1.5014060735702515, |
| "learning_rate": 3.9373007438894794e-05, |
| "loss": 0.5746, |
| "step": 92200 |
| }, |
| { |
| "epoch": 0.21300089211412607, |
| "grad_norm": 2.458667516708374, |
| "learning_rate": 3.93499553942937e-05, |
| "loss": 0.6277, |
| "step": 92400 |
| }, |
| { |
| "epoch": 0.213461933006148, |
| "grad_norm": 2.5523571968078613, |
| "learning_rate": 3.93269033496926e-05, |
| "loss": 0.5994, |
| "step": 92600 |
| }, |
| { |
| "epoch": 0.2139229738981699, |
| "grad_norm": 1.136783480644226, |
| "learning_rate": 3.9303851305091504e-05, |
| "loss": 0.6284, |
| "step": 92800 |
| }, |
| { |
| "epoch": 0.2143840147901918, |
| "grad_norm": 1.2271496057510376, |
| "learning_rate": 3.928079926049041e-05, |
| "loss": 0.5876, |
| "step": 93000 |
| }, |
| { |
| "epoch": 0.21484505568221374, |
| "grad_norm": 0.6214015483856201, |
| "learning_rate": 3.9257747215889315e-05, |
| "loss": 0.6294, |
| "step": 93200 |
| }, |
| { |
| "epoch": 0.21530609657423566, |
| "grad_norm": 1.4034799337387085, |
| "learning_rate": 3.923469517128822e-05, |
| "loss": 0.6242, |
| "step": 93400 |
| }, |
| { |
| "epoch": 0.21576713746625756, |
| "grad_norm": 1.160979151725769, |
| "learning_rate": 3.9211643126687127e-05, |
| "loss": 0.6082, |
| "step": 93600 |
| }, |
| { |
| "epoch": 0.21622817835827948, |
| "grad_norm": 1.3025540113449097, |
| "learning_rate": 3.918859108208603e-05, |
| "loss": 0.5844, |
| "step": 93800 |
| }, |
| { |
| "epoch": 0.2166892192503014, |
| "grad_norm": 2.7265303134918213, |
| "learning_rate": 3.916553903748493e-05, |
| "loss": 0.5367, |
| "step": 94000 |
| }, |
| { |
| "epoch": 0.21715026014232333, |
| "grad_norm": 2.3376145362854004, |
| "learning_rate": 3.914248699288384e-05, |
| "loss": 0.605, |
| "step": 94200 |
| }, |
| { |
| "epoch": 0.21761130103434523, |
| "grad_norm": 0.6863404512405396, |
| "learning_rate": 3.911943494828274e-05, |
| "loss": 0.6038, |
| "step": 94400 |
| }, |
| { |
| "epoch": 0.21807234192636715, |
| "grad_norm": 2.042480230331421, |
| "learning_rate": 3.909638290368164e-05, |
| "loss": 0.5875, |
| "step": 94600 |
| }, |
| { |
| "epoch": 0.21853338281838908, |
| "grad_norm": 1.5179613828659058, |
| "learning_rate": 3.907333085908055e-05, |
| "loss": 0.6374, |
| "step": 94800 |
| }, |
| { |
| "epoch": 0.218994423710411, |
| "grad_norm": 1.8562968969345093, |
| "learning_rate": 3.905027881447945e-05, |
| "loss": 0.6243, |
| "step": 95000 |
| }, |
| { |
| "epoch": 0.21945546460243293, |
| "grad_norm": 1.0300766229629517, |
| "learning_rate": 3.902722676987836e-05, |
| "loss": 0.6338, |
| "step": 95200 |
| }, |
| { |
| "epoch": 0.21991650549445482, |
| "grad_norm": 3.0744545459747314, |
| "learning_rate": 3.900417472527726e-05, |
| "loss": 0.6158, |
| "step": 95400 |
| }, |
| { |
| "epoch": 0.22037754638647675, |
| "grad_norm": 3.355592727661133, |
| "learning_rate": 3.898112268067616e-05, |
| "loss": 0.628, |
| "step": 95600 |
| }, |
| { |
| "epoch": 0.22083858727849867, |
| "grad_norm": 1.0590027570724487, |
| "learning_rate": 3.895807063607507e-05, |
| "loss": 0.6363, |
| "step": 95800 |
| }, |
| { |
| "epoch": 0.2212996281705206, |
| "grad_norm": 1.37596595287323, |
| "learning_rate": 3.893501859147397e-05, |
| "loss": 0.6107, |
| "step": 96000 |
| }, |
| { |
| "epoch": 0.2217606690625425, |
| "grad_norm": 1.392102599143982, |
| "learning_rate": 3.891196654687287e-05, |
| "loss": 0.6182, |
| "step": 96200 |
| }, |
| { |
| "epoch": 0.22222170995456442, |
| "grad_norm": 1.0778827667236328, |
| "learning_rate": 3.888891450227178e-05, |
| "loss": 0.6225, |
| "step": 96400 |
| }, |
| { |
| "epoch": 0.22268275084658634, |
| "grad_norm": 0.8405503034591675, |
| "learning_rate": 3.8865862457670685e-05, |
| "loss": 0.5607, |
| "step": 96600 |
| }, |
| { |
| "epoch": 0.22314379173860827, |
| "grad_norm": 1.857490062713623, |
| "learning_rate": 3.884281041306959e-05, |
| "loss": 0.5927, |
| "step": 96800 |
| }, |
| { |
| "epoch": 0.22360483263063016, |
| "grad_norm": 1.9052844047546387, |
| "learning_rate": 3.8819758368468496e-05, |
| "loss": 0.631, |
| "step": 97000 |
| }, |
| { |
| "epoch": 0.2240658735226521, |
| "grad_norm": 0.8537679314613342, |
| "learning_rate": 3.87967063238674e-05, |
| "loss": 0.6252, |
| "step": 97200 |
| }, |
| { |
| "epoch": 0.224526914414674, |
| "grad_norm": 1.5780411958694458, |
| "learning_rate": 3.87736542792663e-05, |
| "loss": 0.6445, |
| "step": 97400 |
| }, |
| { |
| "epoch": 0.22498795530669594, |
| "grad_norm": 1.55938720703125, |
| "learning_rate": 3.8750602234665206e-05, |
| "loss": 0.592, |
| "step": 97600 |
| }, |
| { |
| "epoch": 0.22544899619871783, |
| "grad_norm": 2.4053783416748047, |
| "learning_rate": 3.872755019006411e-05, |
| "loss": 0.5912, |
| "step": 97800 |
| }, |
| { |
| "epoch": 0.22591003709073976, |
| "grad_norm": 1.1745800971984863, |
| "learning_rate": 3.870449814546301e-05, |
| "loss": 0.6163, |
| "step": 98000 |
| }, |
| { |
| "epoch": 0.22637107798276168, |
| "grad_norm": 1.0355582237243652, |
| "learning_rate": 3.8681446100861916e-05, |
| "loss": 0.6557, |
| "step": 98200 |
| }, |
| { |
| "epoch": 0.2268321188747836, |
| "grad_norm": 1.5494755506515503, |
| "learning_rate": 3.865839405626082e-05, |
| "loss": 0.5803, |
| "step": 98400 |
| }, |
| { |
| "epoch": 0.22729315976680553, |
| "grad_norm": 0.9093578457832336, |
| "learning_rate": 3.863534201165973e-05, |
| "loss": 0.5485, |
| "step": 98600 |
| }, |
| { |
| "epoch": 0.22775420065882743, |
| "grad_norm": 3.997178077697754, |
| "learning_rate": 3.861228996705863e-05, |
| "loss": 0.608, |
| "step": 98800 |
| }, |
| { |
| "epoch": 0.22821524155084935, |
| "grad_norm": 0.7264981269836426, |
| "learning_rate": 3.858923792245753e-05, |
| "loss": 0.5569, |
| "step": 99000 |
| }, |
| { |
| "epoch": 0.22867628244287128, |
| "grad_norm": 1.214425802230835, |
| "learning_rate": 3.856618587785644e-05, |
| "loss": 0.5799, |
| "step": 99200 |
| }, |
| { |
| "epoch": 0.2291373233348932, |
| "grad_norm": 1.1324894428253174, |
| "learning_rate": 3.8543133833255344e-05, |
| "loss": 0.5854, |
| "step": 99400 |
| }, |
| { |
| "epoch": 0.2295983642269151, |
| "grad_norm": 1.1045070886611938, |
| "learning_rate": 3.852008178865424e-05, |
| "loss": 0.6338, |
| "step": 99600 |
| }, |
| { |
| "epoch": 0.23005940511893702, |
| "grad_norm": 1.4003263711929321, |
| "learning_rate": 3.849702974405315e-05, |
| "loss": 0.6131, |
| "step": 99800 |
| }, |
| { |
| "epoch": 0.23052044601095895, |
| "grad_norm": 1.9223850965499878, |
| "learning_rate": 3.8473977699452054e-05, |
| "loss": 0.583, |
| "step": 100000 |
| }, |
| { |
| "epoch": 0.23052044601095895, |
| "eval_loss": 0.5901287198066711, |
| "eval_runtime": 144.11, |
| "eval_samples_per_second": 30.407, |
| "eval_steps_per_second": 30.407, |
| "step": 100000 |
| }, |
| { |
| "epoch": 0.23098148690298087, |
| "grad_norm": 3.727125883102417, |
| "learning_rate": 3.845092565485096e-05, |
| "loss": 0.5922, |
| "step": 100200 |
| }, |
| { |
| "epoch": 0.23144252779500277, |
| "grad_norm": 2.583871364593506, |
| "learning_rate": 3.8427873610249865e-05, |
| "loss": 0.5656, |
| "step": 100400 |
| }, |
| { |
| "epoch": 0.2319035686870247, |
| "grad_norm": 1.4674535989761353, |
| "learning_rate": 3.840482156564877e-05, |
| "loss": 0.6487, |
| "step": 100600 |
| }, |
| { |
| "epoch": 0.23236460957904662, |
| "grad_norm": 1.2001768350601196, |
| "learning_rate": 3.838176952104767e-05, |
| "loss": 0.5979, |
| "step": 100800 |
| }, |
| { |
| "epoch": 0.23282565047106854, |
| "grad_norm": 1.036700963973999, |
| "learning_rate": 3.8358717476446576e-05, |
| "loss": 0.5853, |
| "step": 101000 |
| }, |
| { |
| "epoch": 0.23328669136309044, |
| "grad_norm": 1.6959054470062256, |
| "learning_rate": 3.833566543184548e-05, |
| "loss": 0.6108, |
| "step": 101200 |
| }, |
| { |
| "epoch": 0.23374773225511236, |
| "grad_norm": 1.153205156326294, |
| "learning_rate": 3.831261338724439e-05, |
| "loss": 0.5994, |
| "step": 101400 |
| }, |
| { |
| "epoch": 0.23420877314713429, |
| "grad_norm": 1.5132783651351929, |
| "learning_rate": 3.8289561342643286e-05, |
| "loss": 0.5739, |
| "step": 101600 |
| }, |
| { |
| "epoch": 0.2346698140391562, |
| "grad_norm": 1.745678424835205, |
| "learning_rate": 3.826650929804219e-05, |
| "loss": 0.6051, |
| "step": 101800 |
| }, |
| { |
| "epoch": 0.2351308549311781, |
| "grad_norm": 1.6017553806304932, |
| "learning_rate": 3.82434572534411e-05, |
| "loss": 0.6234, |
| "step": 102000 |
| }, |
| { |
| "epoch": 0.23559189582320003, |
| "grad_norm": 1.4784915447235107, |
| "learning_rate": 3.8220405208839996e-05, |
| "loss": 0.61, |
| "step": 102200 |
| }, |
| { |
| "epoch": 0.23605293671522196, |
| "grad_norm": 1.5724163055419922, |
| "learning_rate": 3.81973531642389e-05, |
| "loss": 0.5704, |
| "step": 102400 |
| }, |
| { |
| "epoch": 0.23651397760724388, |
| "grad_norm": 1.936811923980713, |
| "learning_rate": 3.817430111963781e-05, |
| "loss": 0.6272, |
| "step": 102600 |
| }, |
| { |
| "epoch": 0.2369750184992658, |
| "grad_norm": 0.96824711561203, |
| "learning_rate": 3.815124907503671e-05, |
| "loss": 0.6139, |
| "step": 102800 |
| }, |
| { |
| "epoch": 0.2374360593912877, |
| "grad_norm": 1.1771214008331299, |
| "learning_rate": 3.812819703043561e-05, |
| "loss": 0.5996, |
| "step": 103000 |
| }, |
| { |
| "epoch": 0.23789710028330963, |
| "grad_norm": 1.3290009498596191, |
| "learning_rate": 3.810514498583452e-05, |
| "loss": 0.5637, |
| "step": 103200 |
| }, |
| { |
| "epoch": 0.23835814117533155, |
| "grad_norm": 1.389938473701477, |
| "learning_rate": 3.808209294123343e-05, |
| "loss": 0.5753, |
| "step": 103400 |
| }, |
| { |
| "epoch": 0.23881918206735347, |
| "grad_norm": 1.5995765924453735, |
| "learning_rate": 3.805904089663233e-05, |
| "loss": 0.5625, |
| "step": 103600 |
| }, |
| { |
| "epoch": 0.23928022295937537, |
| "grad_norm": 1.64626145362854, |
| "learning_rate": 3.8035988852031235e-05, |
| "loss": 0.6059, |
| "step": 103800 |
| }, |
| { |
| "epoch": 0.2397412638513973, |
| "grad_norm": 1.7561503648757935, |
| "learning_rate": 3.801293680743014e-05, |
| "loss": 0.5819, |
| "step": 104000 |
| }, |
| { |
| "epoch": 0.24020230474341922, |
| "grad_norm": 1.4345256090164185, |
| "learning_rate": 3.798988476282904e-05, |
| "loss": 0.6131, |
| "step": 104200 |
| }, |
| { |
| "epoch": 0.24066334563544114, |
| "grad_norm": 1.1421653032302856, |
| "learning_rate": 3.7966832718227945e-05, |
| "loss": 0.5468, |
| "step": 104400 |
| }, |
| { |
| "epoch": 0.24112438652746304, |
| "grad_norm": 1.356677532196045, |
| "learning_rate": 3.794378067362685e-05, |
| "loss": 0.5659, |
| "step": 104600 |
| }, |
| { |
| "epoch": 0.24158542741948497, |
| "grad_norm": 1.065327763557434, |
| "learning_rate": 3.7920728629025756e-05, |
| "loss": 0.5518, |
| "step": 104800 |
| }, |
| { |
| "epoch": 0.2420464683115069, |
| "grad_norm": 2.1725375652313232, |
| "learning_rate": 3.7897676584424655e-05, |
| "loss": 0.6386, |
| "step": 105000 |
| }, |
| { |
| "epoch": 0.24250750920352881, |
| "grad_norm": 1.0061650276184082, |
| "learning_rate": 3.787462453982356e-05, |
| "loss": 0.5936, |
| "step": 105200 |
| }, |
| { |
| "epoch": 0.2429685500955507, |
| "grad_norm": 1.8890901803970337, |
| "learning_rate": 3.7851572495222467e-05, |
| "loss": 0.5985, |
| "step": 105400 |
| }, |
| { |
| "epoch": 0.24342959098757264, |
| "grad_norm": 0.9927252531051636, |
| "learning_rate": 3.7828520450621365e-05, |
| "loss": 0.6082, |
| "step": 105600 |
| }, |
| { |
| "epoch": 0.24389063187959456, |
| "grad_norm": 1.791656494140625, |
| "learning_rate": 3.780546840602027e-05, |
| "loss": 0.5913, |
| "step": 105800 |
| }, |
| { |
| "epoch": 0.24435167277161648, |
| "grad_norm": 0.8485866785049438, |
| "learning_rate": 3.778241636141918e-05, |
| "loss": 0.5868, |
| "step": 106000 |
| }, |
| { |
| "epoch": 0.2448127136636384, |
| "grad_norm": 2.2644290924072266, |
| "learning_rate": 3.775936431681808e-05, |
| "loss": 0.5296, |
| "step": 106200 |
| }, |
| { |
| "epoch": 0.2452737545556603, |
| "grad_norm": 1.4203904867172241, |
| "learning_rate": 3.773631227221698e-05, |
| "loss": 0.542, |
| "step": 106400 |
| }, |
| { |
| "epoch": 0.24573479544768223, |
| "grad_norm": 1.9407037496566772, |
| "learning_rate": 3.771326022761589e-05, |
| "loss": 0.5666, |
| "step": 106600 |
| }, |
| { |
| "epoch": 0.24619583633970415, |
| "grad_norm": 0.9351466298103333, |
| "learning_rate": 3.76902081830148e-05, |
| "loss": 0.6103, |
| "step": 106800 |
| }, |
| { |
| "epoch": 0.24665687723172608, |
| "grad_norm": 0.9978102445602417, |
| "learning_rate": 3.76671561384137e-05, |
| "loss": 0.5896, |
| "step": 107000 |
| }, |
| { |
| "epoch": 0.24711791812374798, |
| "grad_norm": 1.419097900390625, |
| "learning_rate": 3.7644104093812604e-05, |
| "loss": 0.5511, |
| "step": 107200 |
| }, |
| { |
| "epoch": 0.2475789590157699, |
| "grad_norm": 0.8121142387390137, |
| "learning_rate": 3.762105204921151e-05, |
| "loss": 0.567, |
| "step": 107400 |
| }, |
| { |
| "epoch": 0.24803999990779182, |
| "grad_norm": 1.2004528045654297, |
| "learning_rate": 3.759800000461041e-05, |
| "loss": 0.5494, |
| "step": 107600 |
| }, |
| { |
| "epoch": 0.24850104079981375, |
| "grad_norm": 1.426767349243164, |
| "learning_rate": 3.7574947960009314e-05, |
| "loss": 0.5833, |
| "step": 107800 |
| }, |
| { |
| "epoch": 0.24896208169183565, |
| "grad_norm": 2.5049235820770264, |
| "learning_rate": 3.755189591540822e-05, |
| "loss": 0.6164, |
| "step": 108000 |
| }, |
| { |
| "epoch": 0.24942312258385757, |
| "grad_norm": 2.0731942653656006, |
| "learning_rate": 3.7528843870807126e-05, |
| "loss": 0.5709, |
| "step": 108200 |
| }, |
| { |
| "epoch": 0.2498841634758795, |
| "grad_norm": 1.43949556350708, |
| "learning_rate": 3.7505791826206025e-05, |
| "loss": 0.6137, |
| "step": 108400 |
| }, |
| { |
| "epoch": 0.2503452043679014, |
| "grad_norm": 1.452414870262146, |
| "learning_rate": 3.748273978160493e-05, |
| "loss": 0.5779, |
| "step": 108600 |
| }, |
| { |
| "epoch": 0.25080624525992334, |
| "grad_norm": 2.6152195930480957, |
| "learning_rate": 3.7459687737003836e-05, |
| "loss": 0.5681, |
| "step": 108800 |
| }, |
| { |
| "epoch": 0.25126728615194527, |
| "grad_norm": 1.348482370376587, |
| "learning_rate": 3.743663569240274e-05, |
| "loss": 0.6671, |
| "step": 109000 |
| }, |
| { |
| "epoch": 0.25172832704396714, |
| "grad_norm": 0.8128360509872437, |
| "learning_rate": 3.741358364780164e-05, |
| "loss": 0.5648, |
| "step": 109200 |
| }, |
| { |
| "epoch": 0.25218936793598906, |
| "grad_norm": 0.83039790391922, |
| "learning_rate": 3.7390531603200546e-05, |
| "loss": 0.6204, |
| "step": 109400 |
| }, |
| { |
| "epoch": 0.252650408828011, |
| "grad_norm": 1.9912052154541016, |
| "learning_rate": 3.736747955859945e-05, |
| "loss": 0.5364, |
| "step": 109600 |
| }, |
| { |
| "epoch": 0.2531114497200329, |
| "grad_norm": 1.4351979494094849, |
| "learning_rate": 3.734442751399835e-05, |
| "loss": 0.6486, |
| "step": 109800 |
| }, |
| { |
| "epoch": 0.25357249061205483, |
| "grad_norm": 1.6197021007537842, |
| "learning_rate": 3.7321375469397256e-05, |
| "loss": 0.5651, |
| "step": 110000 |
| }, |
| { |
| "epoch": 0.25403353150407676, |
| "grad_norm": 2.011810541152954, |
| "learning_rate": 3.729832342479617e-05, |
| "loss": 0.6064, |
| "step": 110200 |
| }, |
| { |
| "epoch": 0.2544945723960987, |
| "grad_norm": 1.3699722290039062, |
| "learning_rate": 3.727527138019507e-05, |
| "loss": 0.536, |
| "step": 110400 |
| }, |
| { |
| "epoch": 0.2549556132881206, |
| "grad_norm": 2.089066743850708, |
| "learning_rate": 3.7252219335593973e-05, |
| "loss": 0.6077, |
| "step": 110600 |
| }, |
| { |
| "epoch": 0.25541665418014253, |
| "grad_norm": 0.4626462459564209, |
| "learning_rate": 3.722916729099288e-05, |
| "loss": 0.5726, |
| "step": 110800 |
| }, |
| { |
| "epoch": 0.2558776950721644, |
| "grad_norm": 1.4077805280685425, |
| "learning_rate": 3.7206115246391785e-05, |
| "loss": 0.6218, |
| "step": 111000 |
| }, |
| { |
| "epoch": 0.2563387359641863, |
| "grad_norm": 2.0903522968292236, |
| "learning_rate": 3.7183063201790684e-05, |
| "loss": 0.5829, |
| "step": 111200 |
| }, |
| { |
| "epoch": 0.25679977685620825, |
| "grad_norm": 1.4433337450027466, |
| "learning_rate": 3.716001115718959e-05, |
| "loss": 0.6412, |
| "step": 111400 |
| }, |
| { |
| "epoch": 0.2572608177482302, |
| "grad_norm": 2.1463751792907715, |
| "learning_rate": 3.7136959112588495e-05, |
| "loss": 0.6183, |
| "step": 111600 |
| }, |
| { |
| "epoch": 0.2577218586402521, |
| "grad_norm": 0.8230465054512024, |
| "learning_rate": 3.7113907067987394e-05, |
| "loss": 0.5919, |
| "step": 111800 |
| }, |
| { |
| "epoch": 0.258182899532274, |
| "grad_norm": 1.8142331838607788, |
| "learning_rate": 3.70908550233863e-05, |
| "loss": 0.5895, |
| "step": 112000 |
| }, |
| { |
| "epoch": 0.25864394042429595, |
| "grad_norm": 1.7713125944137573, |
| "learning_rate": 3.7067802978785205e-05, |
| "loss": 0.591, |
| "step": 112200 |
| }, |
| { |
| "epoch": 0.2591049813163179, |
| "grad_norm": 1.0239676237106323, |
| "learning_rate": 3.704475093418411e-05, |
| "loss": 0.6209, |
| "step": 112400 |
| }, |
| { |
| "epoch": 0.25956602220833974, |
| "grad_norm": 2.02620267868042, |
| "learning_rate": 3.702169888958301e-05, |
| "loss": 0.5581, |
| "step": 112600 |
| }, |
| { |
| "epoch": 0.26002706310036167, |
| "grad_norm": 1.8414267301559448, |
| "learning_rate": 3.6998646844981916e-05, |
| "loss": 0.6137, |
| "step": 112800 |
| }, |
| { |
| "epoch": 0.2604881039923836, |
| "grad_norm": 1.4095929861068726, |
| "learning_rate": 3.697559480038082e-05, |
| "loss": 0.6136, |
| "step": 113000 |
| }, |
| { |
| "epoch": 0.2609491448844055, |
| "grad_norm": 1.6548664569854736, |
| "learning_rate": 3.695254275577972e-05, |
| "loss": 0.5464, |
| "step": 113200 |
| }, |
| { |
| "epoch": 0.26141018577642744, |
| "grad_norm": 1.0387002229690552, |
| "learning_rate": 3.6929490711178626e-05, |
| "loss": 0.6102, |
| "step": 113400 |
| }, |
| { |
| "epoch": 0.26187122666844936, |
| "grad_norm": 0.6978960633277893, |
| "learning_rate": 3.690643866657754e-05, |
| "loss": 0.5755, |
| "step": 113600 |
| }, |
| { |
| "epoch": 0.2623322675604713, |
| "grad_norm": 1.7503503561019897, |
| "learning_rate": 3.688338662197644e-05, |
| "loss": 0.5449, |
| "step": 113800 |
| }, |
| { |
| "epoch": 0.2627933084524932, |
| "grad_norm": 0.6255602836608887, |
| "learning_rate": 3.686033457737534e-05, |
| "loss": 0.5577, |
| "step": 114000 |
| }, |
| { |
| "epoch": 0.26325434934451514, |
| "grad_norm": 1.001632571220398, |
| "learning_rate": 3.683728253277425e-05, |
| "loss": 0.6007, |
| "step": 114200 |
| }, |
| { |
| "epoch": 0.263715390236537, |
| "grad_norm": 1.6783490180969238, |
| "learning_rate": 3.6814230488173154e-05, |
| "loss": 0.5887, |
| "step": 114400 |
| }, |
| { |
| "epoch": 0.26417643112855893, |
| "grad_norm": 0.6255197525024414, |
| "learning_rate": 3.679117844357205e-05, |
| "loss": 0.5561, |
| "step": 114600 |
| }, |
| { |
| "epoch": 0.26463747202058086, |
| "grad_norm": 2.288745880126953, |
| "learning_rate": 3.676812639897096e-05, |
| "loss": 0.5486, |
| "step": 114800 |
| }, |
| { |
| "epoch": 0.2650985129126028, |
| "grad_norm": 1.1330058574676514, |
| "learning_rate": 3.6745074354369864e-05, |
| "loss": 0.5508, |
| "step": 115000 |
| }, |
| { |
| "epoch": 0.2655595538046247, |
| "grad_norm": 1.2491919994354248, |
| "learning_rate": 3.672202230976876e-05, |
| "loss": 0.6188, |
| "step": 115200 |
| }, |
| { |
| "epoch": 0.26602059469664663, |
| "grad_norm": 1.020461916923523, |
| "learning_rate": 3.669897026516767e-05, |
| "loss": 0.6308, |
| "step": 115400 |
| }, |
| { |
| "epoch": 0.26648163558866855, |
| "grad_norm": 1.3160836696624756, |
| "learning_rate": 3.6675918220566575e-05, |
| "loss": 0.6101, |
| "step": 115600 |
| }, |
| { |
| "epoch": 0.2669426764806905, |
| "grad_norm": 1.1758986711502075, |
| "learning_rate": 3.665286617596548e-05, |
| "loss": 0.5964, |
| "step": 115800 |
| }, |
| { |
| "epoch": 0.26740371737271235, |
| "grad_norm": 0.9118921756744385, |
| "learning_rate": 3.662981413136438e-05, |
| "loss": 0.5713, |
| "step": 116000 |
| }, |
| { |
| "epoch": 0.26786475826473427, |
| "grad_norm": 1.9953539371490479, |
| "learning_rate": 3.6606762086763285e-05, |
| "loss": 0.5761, |
| "step": 116200 |
| }, |
| { |
| "epoch": 0.2683257991567562, |
| "grad_norm": 1.5514432191848755, |
| "learning_rate": 3.658371004216219e-05, |
| "loss": 0.5848, |
| "step": 116400 |
| }, |
| { |
| "epoch": 0.2687868400487781, |
| "grad_norm": 0.9288082122802734, |
| "learning_rate": 3.6560657997561096e-05, |
| "loss": 0.5664, |
| "step": 116600 |
| }, |
| { |
| "epoch": 0.26924788094080004, |
| "grad_norm": 1.547339677810669, |
| "learning_rate": 3.6537605952959995e-05, |
| "loss": 0.5863, |
| "step": 116800 |
| }, |
| { |
| "epoch": 0.26970892183282197, |
| "grad_norm": 1.671633005142212, |
| "learning_rate": 3.651455390835891e-05, |
| "loss": 0.5271, |
| "step": 117000 |
| }, |
| { |
| "epoch": 0.2701699627248439, |
| "grad_norm": 0.9012247920036316, |
| "learning_rate": 3.6491501863757807e-05, |
| "loss": 0.5724, |
| "step": 117200 |
| }, |
| { |
| "epoch": 0.2706310036168658, |
| "grad_norm": 2.2852792739868164, |
| "learning_rate": 3.646844981915671e-05, |
| "loss": 0.5644, |
| "step": 117400 |
| }, |
| { |
| "epoch": 0.27109204450888774, |
| "grad_norm": 1.312666893005371, |
| "learning_rate": 3.644539777455562e-05, |
| "loss": 0.612, |
| "step": 117600 |
| }, |
| { |
| "epoch": 0.2715530854009096, |
| "grad_norm": 0.9513750672340393, |
| "learning_rate": 3.6422345729954524e-05, |
| "loss": 0.5791, |
| "step": 117800 |
| }, |
| { |
| "epoch": 0.27201412629293154, |
| "grad_norm": 1.9773327112197876, |
| "learning_rate": 3.639929368535342e-05, |
| "loss": 0.5628, |
| "step": 118000 |
| }, |
| { |
| "epoch": 0.27247516718495346, |
| "grad_norm": 1.666195273399353, |
| "learning_rate": 3.637624164075233e-05, |
| "loss": 0.5722, |
| "step": 118200 |
| }, |
| { |
| "epoch": 0.2729362080769754, |
| "grad_norm": 1.6101315021514893, |
| "learning_rate": 3.6353189596151234e-05, |
| "loss": 0.6474, |
| "step": 118400 |
| }, |
| { |
| "epoch": 0.2733972489689973, |
| "grad_norm": 0.8097496628761292, |
| "learning_rate": 3.633013755155014e-05, |
| "loss": 0.5353, |
| "step": 118600 |
| }, |
| { |
| "epoch": 0.27385828986101923, |
| "grad_norm": 1.7693250179290771, |
| "learning_rate": 3.630708550694904e-05, |
| "loss": 0.6161, |
| "step": 118800 |
| }, |
| { |
| "epoch": 0.27431933075304116, |
| "grad_norm": 1.4188885688781738, |
| "learning_rate": 3.6284033462347944e-05, |
| "loss": 0.6031, |
| "step": 119000 |
| }, |
| { |
| "epoch": 0.2747803716450631, |
| "grad_norm": 0.7906126379966736, |
| "learning_rate": 3.626098141774685e-05, |
| "loss": 0.5421, |
| "step": 119200 |
| }, |
| { |
| "epoch": 0.27524141253708495, |
| "grad_norm": 1.3080761432647705, |
| "learning_rate": 3.623792937314575e-05, |
| "loss": 0.565, |
| "step": 119400 |
| }, |
| { |
| "epoch": 0.2757024534291069, |
| "grad_norm": 1.3079235553741455, |
| "learning_rate": 3.6214877328544654e-05, |
| "loss": 0.5828, |
| "step": 119600 |
| }, |
| { |
| "epoch": 0.2761634943211288, |
| "grad_norm": 1.9901784658432007, |
| "learning_rate": 3.619182528394356e-05, |
| "loss": 0.5621, |
| "step": 119800 |
| }, |
| { |
| "epoch": 0.2766245352131507, |
| "grad_norm": 0.5003865957260132, |
| "learning_rate": 3.6168773239342466e-05, |
| "loss": 0.5374, |
| "step": 120000 |
| }, |
| { |
| "epoch": 0.27708557610517265, |
| "grad_norm": 1.5458438396453857, |
| "learning_rate": 3.614572119474137e-05, |
| "loss": 0.5449, |
| "step": 120200 |
| }, |
| { |
| "epoch": 0.2775466169971946, |
| "grad_norm": 1.4383118152618408, |
| "learning_rate": 3.612266915014028e-05, |
| "loss": 0.6142, |
| "step": 120400 |
| }, |
| { |
| "epoch": 0.2780076578892165, |
| "grad_norm": 1.1855522394180298, |
| "learning_rate": 3.609961710553918e-05, |
| "loss": 0.564, |
| "step": 120600 |
| }, |
| { |
| "epoch": 0.2784686987812384, |
| "grad_norm": 0.840207040309906, |
| "learning_rate": 3.607656506093808e-05, |
| "loss": 0.5621, |
| "step": 120800 |
| }, |
| { |
| "epoch": 0.27892973967326035, |
| "grad_norm": 1.0996273756027222, |
| "learning_rate": 3.605351301633699e-05, |
| "loss": 0.5671, |
| "step": 121000 |
| }, |
| { |
| "epoch": 0.2793907805652822, |
| "grad_norm": 1.7531362771987915, |
| "learning_rate": 3.603046097173589e-05, |
| "loss": 0.6016, |
| "step": 121200 |
| }, |
| { |
| "epoch": 0.27985182145730414, |
| "grad_norm": 0.8433918952941895, |
| "learning_rate": 3.600740892713479e-05, |
| "loss": 0.632, |
| "step": 121400 |
| }, |
| { |
| "epoch": 0.28031286234932606, |
| "grad_norm": 0.8943939208984375, |
| "learning_rate": 3.59843568825337e-05, |
| "loss": 0.4969, |
| "step": 121600 |
| }, |
| { |
| "epoch": 0.280773903241348, |
| "grad_norm": 0.8883448839187622, |
| "learning_rate": 3.59613048379326e-05, |
| "loss": 0.5624, |
| "step": 121800 |
| }, |
| { |
| "epoch": 0.2812349441333699, |
| "grad_norm": 1.5441436767578125, |
| "learning_rate": 3.593825279333151e-05, |
| "loss": 0.5934, |
| "step": 122000 |
| }, |
| { |
| "epoch": 0.28169598502539184, |
| "grad_norm": 1.6779813766479492, |
| "learning_rate": 3.591520074873041e-05, |
| "loss": 0.5975, |
| "step": 122200 |
| }, |
| { |
| "epoch": 0.28215702591741376, |
| "grad_norm": 1.3484402894973755, |
| "learning_rate": 3.5892148704129313e-05, |
| "loss": 0.6151, |
| "step": 122400 |
| }, |
| { |
| "epoch": 0.2826180668094357, |
| "grad_norm": 0.881047785282135, |
| "learning_rate": 3.586909665952822e-05, |
| "loss": 0.5377, |
| "step": 122600 |
| }, |
| { |
| "epoch": 0.28307910770145756, |
| "grad_norm": 2.1730856895446777, |
| "learning_rate": 3.584604461492712e-05, |
| "loss": 0.5002, |
| "step": 122800 |
| }, |
| { |
| "epoch": 0.2835401485934795, |
| "grad_norm": 1.7546623945236206, |
| "learning_rate": 3.5822992570326024e-05, |
| "loss": 0.5439, |
| "step": 123000 |
| }, |
| { |
| "epoch": 0.2840011894855014, |
| "grad_norm": 1.6560966968536377, |
| "learning_rate": 3.579994052572493e-05, |
| "loss": 0.5278, |
| "step": 123200 |
| }, |
| { |
| "epoch": 0.28446223037752333, |
| "grad_norm": 1.4443609714508057, |
| "learning_rate": 3.5776888481123835e-05, |
| "loss": 0.631, |
| "step": 123400 |
| }, |
| { |
| "epoch": 0.28492327126954525, |
| "grad_norm": 1.6837761402130127, |
| "learning_rate": 3.575383643652274e-05, |
| "loss": 0.5833, |
| "step": 123600 |
| }, |
| { |
| "epoch": 0.2853843121615672, |
| "grad_norm": 1.0554946660995483, |
| "learning_rate": 3.5730784391921646e-05, |
| "loss": 0.5635, |
| "step": 123800 |
| }, |
| { |
| "epoch": 0.2858453530535891, |
| "grad_norm": 1.2719945907592773, |
| "learning_rate": 3.570773234732055e-05, |
| "loss": 0.5692, |
| "step": 124000 |
| }, |
| { |
| "epoch": 0.286306393945611, |
| "grad_norm": 0.48329654335975647, |
| "learning_rate": 3.568468030271945e-05, |
| "loss": 0.5724, |
| "step": 124200 |
| }, |
| { |
| "epoch": 0.28676743483763295, |
| "grad_norm": 1.2862858772277832, |
| "learning_rate": 3.566162825811836e-05, |
| "loss": 0.5593, |
| "step": 124400 |
| }, |
| { |
| "epoch": 0.2872284757296548, |
| "grad_norm": 2.067934513092041, |
| "learning_rate": 3.563857621351726e-05, |
| "loss": 0.5513, |
| "step": 124600 |
| }, |
| { |
| "epoch": 0.28768951662167674, |
| "grad_norm": 1.8785241842269897, |
| "learning_rate": 3.561552416891616e-05, |
| "loss": 0.5874, |
| "step": 124800 |
| }, |
| { |
| "epoch": 0.28815055751369867, |
| "grad_norm": 3.0009591579437256, |
| "learning_rate": 3.559247212431507e-05, |
| "loss": 0.5906, |
| "step": 125000 |
| }, |
| { |
| "epoch": 0.2886115984057206, |
| "grad_norm": 1.5266379117965698, |
| "learning_rate": 3.556942007971397e-05, |
| "loss": 0.6025, |
| "step": 125200 |
| }, |
| { |
| "epoch": 0.2890726392977425, |
| "grad_norm": 1.0007365942001343, |
| "learning_rate": 3.554636803511288e-05, |
| "loss": 0.5562, |
| "step": 125400 |
| }, |
| { |
| "epoch": 0.28953368018976444, |
| "grad_norm": 2.2831757068634033, |
| "learning_rate": 3.552331599051178e-05, |
| "loss": 0.6007, |
| "step": 125600 |
| }, |
| { |
| "epoch": 0.28999472108178637, |
| "grad_norm": 1.6605206727981567, |
| "learning_rate": 3.550026394591068e-05, |
| "loss": 0.5505, |
| "step": 125800 |
| }, |
| { |
| "epoch": 0.2904557619738083, |
| "grad_norm": 1.3791511058807373, |
| "learning_rate": 3.547721190130959e-05, |
| "loss": 0.6039, |
| "step": 126000 |
| }, |
| { |
| "epoch": 0.29091680286583016, |
| "grad_norm": 1.0427671670913696, |
| "learning_rate": 3.5454159856708494e-05, |
| "loss": 0.5216, |
| "step": 126200 |
| }, |
| { |
| "epoch": 0.2913778437578521, |
| "grad_norm": 1.1405614614486694, |
| "learning_rate": 3.543110781210739e-05, |
| "loss": 0.5689, |
| "step": 126400 |
| }, |
| { |
| "epoch": 0.291838884649874, |
| "grad_norm": 2.266157388687134, |
| "learning_rate": 3.54080557675063e-05, |
| "loss": 0.5273, |
| "step": 126600 |
| }, |
| { |
| "epoch": 0.29229992554189593, |
| "grad_norm": 1.7301876544952393, |
| "learning_rate": 3.5385003722905204e-05, |
| "loss": 0.5355, |
| "step": 126800 |
| }, |
| { |
| "epoch": 0.29276096643391786, |
| "grad_norm": 0.9307401180267334, |
| "learning_rate": 3.536195167830411e-05, |
| "loss": 0.5431, |
| "step": 127000 |
| }, |
| { |
| "epoch": 0.2932220073259398, |
| "grad_norm": 1.8494658470153809, |
| "learning_rate": 3.5338899633703016e-05, |
| "loss": 0.5768, |
| "step": 127200 |
| }, |
| { |
| "epoch": 0.2936830482179617, |
| "grad_norm": 1.0275499820709229, |
| "learning_rate": 3.531584758910192e-05, |
| "loss": 0.5996, |
| "step": 127400 |
| }, |
| { |
| "epoch": 0.29414408910998363, |
| "grad_norm": 0.5210323333740234, |
| "learning_rate": 3.529279554450082e-05, |
| "loss": 0.5473, |
| "step": 127600 |
| }, |
| { |
| "epoch": 0.29460513000200556, |
| "grad_norm": 1.827402114868164, |
| "learning_rate": 3.5269743499899726e-05, |
| "loss": 0.5728, |
| "step": 127800 |
| }, |
| { |
| "epoch": 0.2950661708940274, |
| "grad_norm": 2.054245948791504, |
| "learning_rate": 3.524669145529863e-05, |
| "loss": 0.6179, |
| "step": 128000 |
| }, |
| { |
| "epoch": 0.29552721178604935, |
| "grad_norm": 1.6693862676620483, |
| "learning_rate": 3.522363941069754e-05, |
| "loss": 0.5453, |
| "step": 128200 |
| }, |
| { |
| "epoch": 0.2959882526780713, |
| "grad_norm": 23.072887420654297, |
| "learning_rate": 3.5200587366096436e-05, |
| "loss": 0.5791, |
| "step": 128400 |
| }, |
| { |
| "epoch": 0.2964492935700932, |
| "grad_norm": 1.1938518285751343, |
| "learning_rate": 3.517753532149534e-05, |
| "loss": 0.5507, |
| "step": 128600 |
| }, |
| { |
| "epoch": 0.2969103344621151, |
| "grad_norm": 2.9994335174560547, |
| "learning_rate": 3.515448327689425e-05, |
| "loss": 0.5737, |
| "step": 128800 |
| }, |
| { |
| "epoch": 0.29737137535413705, |
| "grad_norm": 2.0268101692199707, |
| "learning_rate": 3.5131431232293146e-05, |
| "loss": 0.5941, |
| "step": 129000 |
| }, |
| { |
| "epoch": 0.29783241624615897, |
| "grad_norm": 1.4600251913070679, |
| "learning_rate": 3.510837918769205e-05, |
| "loss": 0.5456, |
| "step": 129200 |
| }, |
| { |
| "epoch": 0.2982934571381809, |
| "grad_norm": 0.5370715260505676, |
| "learning_rate": 3.508532714309096e-05, |
| "loss": 0.5618, |
| "step": 129400 |
| }, |
| { |
| "epoch": 0.29875449803020276, |
| "grad_norm": 1.65589439868927, |
| "learning_rate": 3.5062275098489864e-05, |
| "loss": 0.5189, |
| "step": 129600 |
| }, |
| { |
| "epoch": 0.2992155389222247, |
| "grad_norm": 1.9053618907928467, |
| "learning_rate": 3.503922305388876e-05, |
| "loss": 0.5698, |
| "step": 129800 |
| }, |
| { |
| "epoch": 0.2996765798142466, |
| "grad_norm": 0.9981529116630554, |
| "learning_rate": 3.501617100928767e-05, |
| "loss": 0.5622, |
| "step": 130000 |
| }, |
| { |
| "epoch": 0.30013762070626854, |
| "grad_norm": 1.5136228799819946, |
| "learning_rate": 3.4993118964686574e-05, |
| "loss": 0.5812, |
| "step": 130200 |
| }, |
| { |
| "epoch": 0.30059866159829046, |
| "grad_norm": 1.9930968284606934, |
| "learning_rate": 3.497006692008548e-05, |
| "loss": 0.5754, |
| "step": 130400 |
| }, |
| { |
| "epoch": 0.3010597024903124, |
| "grad_norm": 1.6242766380310059, |
| "learning_rate": 3.4947014875484385e-05, |
| "loss": 0.6422, |
| "step": 130600 |
| }, |
| { |
| "epoch": 0.3015207433823343, |
| "grad_norm": 1.142068862915039, |
| "learning_rate": 3.492396283088329e-05, |
| "loss": 0.5647, |
| "step": 130800 |
| }, |
| { |
| "epoch": 0.30198178427435624, |
| "grad_norm": 0.8593564629554749, |
| "learning_rate": 3.490091078628219e-05, |
| "loss": 0.5709, |
| "step": 131000 |
| }, |
| { |
| "epoch": 0.3024428251663781, |
| "grad_norm": 1.0364127159118652, |
| "learning_rate": 3.4877858741681095e-05, |
| "loss": 0.6261, |
| "step": 131200 |
| }, |
| { |
| "epoch": 0.30290386605840003, |
| "grad_norm": 0.7950695157051086, |
| "learning_rate": 3.485480669708e-05, |
| "loss": 0.5276, |
| "step": 131400 |
| }, |
| { |
| "epoch": 0.30336490695042195, |
| "grad_norm": 0.7673638463020325, |
| "learning_rate": 3.483175465247891e-05, |
| "loss": 0.5289, |
| "step": 131600 |
| }, |
| { |
| "epoch": 0.3038259478424439, |
| "grad_norm": 0.7830930948257446, |
| "learning_rate": 3.4808702607877806e-05, |
| "loss": 0.512, |
| "step": 131800 |
| }, |
| { |
| "epoch": 0.3042869887344658, |
| "grad_norm": 2.0144901275634766, |
| "learning_rate": 3.478565056327671e-05, |
| "loss": 0.5974, |
| "step": 132000 |
| }, |
| { |
| "epoch": 0.3047480296264877, |
| "grad_norm": 1.531823754310608, |
| "learning_rate": 3.476259851867562e-05, |
| "loss": 0.5889, |
| "step": 132200 |
| }, |
| { |
| "epoch": 0.30520907051850965, |
| "grad_norm": 1.1989134550094604, |
| "learning_rate": 3.4739546474074516e-05, |
| "loss": 0.5664, |
| "step": 132400 |
| }, |
| { |
| "epoch": 0.3056701114105316, |
| "grad_norm": 1.5596988201141357, |
| "learning_rate": 3.471649442947342e-05, |
| "loss": 0.5465, |
| "step": 132600 |
| }, |
| { |
| "epoch": 0.3061311523025535, |
| "grad_norm": 1.2339794635772705, |
| "learning_rate": 3.469344238487233e-05, |
| "loss": 0.5387, |
| "step": 132800 |
| }, |
| { |
| "epoch": 0.30659219319457537, |
| "grad_norm": 0.7480385303497314, |
| "learning_rate": 3.467039034027123e-05, |
| "loss": 0.5744, |
| "step": 133000 |
| }, |
| { |
| "epoch": 0.3070532340865973, |
| "grad_norm": 1.1106038093566895, |
| "learning_rate": 3.464733829567013e-05, |
| "loss": 0.5523, |
| "step": 133200 |
| }, |
| { |
| "epoch": 0.3075142749786192, |
| "grad_norm": 1.145395040512085, |
| "learning_rate": 3.462428625106904e-05, |
| "loss": 0.5758, |
| "step": 133400 |
| }, |
| { |
| "epoch": 0.30797531587064114, |
| "grad_norm": 1.4697068929672241, |
| "learning_rate": 3.460123420646794e-05, |
| "loss": 0.5938, |
| "step": 133600 |
| }, |
| { |
| "epoch": 0.30843635676266307, |
| "grad_norm": 1.8657139539718628, |
| "learning_rate": 3.457818216186685e-05, |
| "loss": 0.612, |
| "step": 133800 |
| }, |
| { |
| "epoch": 0.308897397654685, |
| "grad_norm": 1.3529716730117798, |
| "learning_rate": 3.4555130117265754e-05, |
| "loss": 0.6109, |
| "step": 134000 |
| }, |
| { |
| "epoch": 0.3093584385467069, |
| "grad_norm": 1.7217750549316406, |
| "learning_rate": 3.453207807266466e-05, |
| "loss": 0.5585, |
| "step": 134200 |
| }, |
| { |
| "epoch": 0.30981947943872884, |
| "grad_norm": 2.0881683826446533, |
| "learning_rate": 3.450902602806356e-05, |
| "loss": 0.5603, |
| "step": 134400 |
| }, |
| { |
| "epoch": 0.3102805203307507, |
| "grad_norm": 1.4093154668807983, |
| "learning_rate": 3.4485973983462465e-05, |
| "loss": 0.6025, |
| "step": 134600 |
| }, |
| { |
| "epoch": 0.31074156122277263, |
| "grad_norm": 1.2909964323043823, |
| "learning_rate": 3.446292193886137e-05, |
| "loss": 0.6318, |
| "step": 134800 |
| }, |
| { |
| "epoch": 0.31120260211479456, |
| "grad_norm": 1.9000458717346191, |
| "learning_rate": 3.4439869894260276e-05, |
| "loss": 0.5565, |
| "step": 135000 |
| }, |
| { |
| "epoch": 0.3116636430068165, |
| "grad_norm": 1.2994461059570312, |
| "learning_rate": 3.4416817849659175e-05, |
| "loss": 0.5426, |
| "step": 135200 |
| }, |
| { |
| "epoch": 0.3121246838988384, |
| "grad_norm": 0.6507192850112915, |
| "learning_rate": 3.439376580505808e-05, |
| "loss": 0.5631, |
| "step": 135400 |
| }, |
| { |
| "epoch": 0.31258572479086033, |
| "grad_norm": 1.4689639806747437, |
| "learning_rate": 3.4370713760456986e-05, |
| "loss": 0.6069, |
| "step": 135600 |
| }, |
| { |
| "epoch": 0.31304676568288226, |
| "grad_norm": 0.9149547219276428, |
| "learning_rate": 3.434766171585589e-05, |
| "loss": 0.5872, |
| "step": 135800 |
| }, |
| { |
| "epoch": 0.3135078065749042, |
| "grad_norm": 1.8406304121017456, |
| "learning_rate": 3.432460967125479e-05, |
| "loss": 0.5729, |
| "step": 136000 |
| }, |
| { |
| "epoch": 0.3139688474669261, |
| "grad_norm": 1.9627593755722046, |
| "learning_rate": 3.4301557626653697e-05, |
| "loss": 0.5771, |
| "step": 136200 |
| }, |
| { |
| "epoch": 0.314429888358948, |
| "grad_norm": 0.7546736001968384, |
| "learning_rate": 3.42785055820526e-05, |
| "loss": 0.4629, |
| "step": 136400 |
| }, |
| { |
| "epoch": 0.3148909292509699, |
| "grad_norm": 1.3984806537628174, |
| "learning_rate": 3.42554535374515e-05, |
| "loss": 0.5377, |
| "step": 136600 |
| }, |
| { |
| "epoch": 0.3153519701429918, |
| "grad_norm": 1.5485873222351074, |
| "learning_rate": 3.423240149285041e-05, |
| "loss": 0.5739, |
| "step": 136800 |
| }, |
| { |
| "epoch": 0.31581301103501375, |
| "grad_norm": 1.7093192338943481, |
| "learning_rate": 3.420934944824931e-05, |
| "loss": 0.5751, |
| "step": 137000 |
| }, |
| { |
| "epoch": 0.31627405192703567, |
| "grad_norm": 1.5941184759140015, |
| "learning_rate": 3.418629740364822e-05, |
| "loss": 0.555, |
| "step": 137200 |
| }, |
| { |
| "epoch": 0.3167350928190576, |
| "grad_norm": 1.0753742456436157, |
| "learning_rate": 3.4163245359047124e-05, |
| "loss": 0.5638, |
| "step": 137400 |
| }, |
| { |
| "epoch": 0.3171961337110795, |
| "grad_norm": 1.171726107597351, |
| "learning_rate": 3.414019331444603e-05, |
| "loss": 0.5748, |
| "step": 137600 |
| }, |
| { |
| "epoch": 0.31765717460310144, |
| "grad_norm": 1.5128881931304932, |
| "learning_rate": 3.4117141269844935e-05, |
| "loss": 0.5728, |
| "step": 137800 |
| }, |
| { |
| "epoch": 0.3181182154951233, |
| "grad_norm": 2.131058692932129, |
| "learning_rate": 3.4094089225243834e-05, |
| "loss": 0.5536, |
| "step": 138000 |
| }, |
| { |
| "epoch": 0.31857925638714524, |
| "grad_norm": 1.5034462213516235, |
| "learning_rate": 3.407103718064274e-05, |
| "loss": 0.5505, |
| "step": 138200 |
| }, |
| { |
| "epoch": 0.31904029727916716, |
| "grad_norm": 1.4908447265625, |
| "learning_rate": 3.4047985136041645e-05, |
| "loss": 0.5813, |
| "step": 138400 |
| }, |
| { |
| "epoch": 0.3195013381711891, |
| "grad_norm": 1.6707509756088257, |
| "learning_rate": 3.4024933091440544e-05, |
| "loss": 0.5984, |
| "step": 138600 |
| }, |
| { |
| "epoch": 0.319962379063211, |
| "grad_norm": 1.7882601022720337, |
| "learning_rate": 3.400188104683945e-05, |
| "loss": 0.5801, |
| "step": 138800 |
| }, |
| { |
| "epoch": 0.32042341995523294, |
| "grad_norm": 2.314807176589966, |
| "learning_rate": 3.3978829002238356e-05, |
| "loss": 0.5608, |
| "step": 139000 |
| }, |
| { |
| "epoch": 0.32088446084725486, |
| "grad_norm": 0.6125404834747314, |
| "learning_rate": 3.395577695763726e-05, |
| "loss": 0.5732, |
| "step": 139200 |
| }, |
| { |
| "epoch": 0.3213455017392768, |
| "grad_norm": 1.9929119348526, |
| "learning_rate": 3.393272491303616e-05, |
| "loss": 0.5998, |
| "step": 139400 |
| }, |
| { |
| "epoch": 0.3218065426312987, |
| "grad_norm": 1.571915626525879, |
| "learning_rate": 3.3909672868435066e-05, |
| "loss": 0.5613, |
| "step": 139600 |
| }, |
| { |
| "epoch": 0.3222675835233206, |
| "grad_norm": 1.3218785524368286, |
| "learning_rate": 3.388662082383397e-05, |
| "loss": 0.5558, |
| "step": 139800 |
| }, |
| { |
| "epoch": 0.3227286244153425, |
| "grad_norm": 1.0370618104934692, |
| "learning_rate": 3.386356877923287e-05, |
| "loss": 0.5212, |
| "step": 140000 |
| }, |
| { |
| "epoch": 0.3231896653073644, |
| "grad_norm": 1.202951431274414, |
| "learning_rate": 3.3840516734631776e-05, |
| "loss": 0.5084, |
| "step": 140200 |
| }, |
| { |
| "epoch": 0.32365070619938635, |
| "grad_norm": 1.7719680070877075, |
| "learning_rate": 3.381746469003068e-05, |
| "loss": 0.5619, |
| "step": 140400 |
| }, |
| { |
| "epoch": 0.3241117470914083, |
| "grad_norm": 1.611811876296997, |
| "learning_rate": 3.379441264542959e-05, |
| "loss": 0.5645, |
| "step": 140600 |
| }, |
| { |
| "epoch": 0.3245727879834302, |
| "grad_norm": 1.4955034255981445, |
| "learning_rate": 3.377136060082849e-05, |
| "loss": 0.5335, |
| "step": 140800 |
| }, |
| { |
| "epoch": 0.3250338288754521, |
| "grad_norm": 1.1228415966033936, |
| "learning_rate": 3.37483085562274e-05, |
| "loss": 0.538, |
| "step": 141000 |
| }, |
| { |
| "epoch": 0.32549486976747405, |
| "grad_norm": 0.8524361848831177, |
| "learning_rate": 3.3725256511626305e-05, |
| "loss": 0.5565, |
| "step": 141200 |
| }, |
| { |
| "epoch": 0.3259559106594959, |
| "grad_norm": 0.7709594368934631, |
| "learning_rate": 3.3702204467025203e-05, |
| "loss": 0.5728, |
| "step": 141400 |
| }, |
| { |
| "epoch": 0.32641695155151784, |
| "grad_norm": 0.9017342329025269, |
| "learning_rate": 3.367915242242411e-05, |
| "loss": 0.574, |
| "step": 141600 |
| }, |
| { |
| "epoch": 0.32687799244353977, |
| "grad_norm": 1.6135542392730713, |
| "learning_rate": 3.3656100377823015e-05, |
| "loss": 0.5467, |
| "step": 141800 |
| }, |
| { |
| "epoch": 0.3273390333355617, |
| "grad_norm": 1.0958969593048096, |
| "learning_rate": 3.3633048333221914e-05, |
| "loss": 0.5548, |
| "step": 142000 |
| }, |
| { |
| "epoch": 0.3278000742275836, |
| "grad_norm": 0.8333266973495483, |
| "learning_rate": 3.360999628862082e-05, |
| "loss": 0.6149, |
| "step": 142200 |
| }, |
| { |
| "epoch": 0.32826111511960554, |
| "grad_norm": 1.3214168548583984, |
| "learning_rate": 3.3586944244019725e-05, |
| "loss": 0.5691, |
| "step": 142400 |
| }, |
| { |
| "epoch": 0.32872215601162746, |
| "grad_norm": 1.9546606540679932, |
| "learning_rate": 3.356389219941863e-05, |
| "loss": 0.5188, |
| "step": 142600 |
| }, |
| { |
| "epoch": 0.3291831969036494, |
| "grad_norm": 2.063167095184326, |
| "learning_rate": 3.354084015481753e-05, |
| "loss": 0.5576, |
| "step": 142800 |
| }, |
| { |
| "epoch": 0.3296442377956713, |
| "grad_norm": 1.5281319618225098, |
| "learning_rate": 3.3517788110216435e-05, |
| "loss": 0.6239, |
| "step": 143000 |
| }, |
| { |
| "epoch": 0.3301052786876932, |
| "grad_norm": 0.9940102696418762, |
| "learning_rate": 3.349473606561534e-05, |
| "loss": 0.5521, |
| "step": 143200 |
| }, |
| { |
| "epoch": 0.3305663195797151, |
| "grad_norm": 0.5748217105865479, |
| "learning_rate": 3.347168402101425e-05, |
| "loss": 0.536, |
| "step": 143400 |
| }, |
| { |
| "epoch": 0.33102736047173703, |
| "grad_norm": 1.7020162343978882, |
| "learning_rate": 3.3448631976413146e-05, |
| "loss": 0.573, |
| "step": 143600 |
| }, |
| { |
| "epoch": 0.33148840136375896, |
| "grad_norm": 1.1483004093170166, |
| "learning_rate": 3.342557993181205e-05, |
| "loss": 0.5677, |
| "step": 143800 |
| }, |
| { |
| "epoch": 0.3319494422557809, |
| "grad_norm": 0.9976577162742615, |
| "learning_rate": 3.3402527887210964e-05, |
| "loss": 0.5171, |
| "step": 144000 |
| }, |
| { |
| "epoch": 0.3324104831478028, |
| "grad_norm": 1.9477131366729736, |
| "learning_rate": 3.337947584260986e-05, |
| "loss": 0.5206, |
| "step": 144200 |
| }, |
| { |
| "epoch": 0.33287152403982473, |
| "grad_norm": 2.5591280460357666, |
| "learning_rate": 3.335642379800877e-05, |
| "loss": 0.5785, |
| "step": 144400 |
| }, |
| { |
| "epoch": 0.33333256493184665, |
| "grad_norm": 0.9699960947036743, |
| "learning_rate": 3.3333371753407674e-05, |
| "loss": 0.5573, |
| "step": 144600 |
| }, |
| { |
| "epoch": 0.3337936058238685, |
| "grad_norm": 1.0641608238220215, |
| "learning_rate": 3.331031970880657e-05, |
| "loss": 0.5807, |
| "step": 144800 |
| }, |
| { |
| "epoch": 0.33425464671589045, |
| "grad_norm": 1.6940183639526367, |
| "learning_rate": 3.328726766420548e-05, |
| "loss": 0.5861, |
| "step": 145000 |
| }, |
| { |
| "epoch": 0.33471568760791237, |
| "grad_norm": 1.1107732057571411, |
| "learning_rate": 3.3264215619604384e-05, |
| "loss": 0.5613, |
| "step": 145200 |
| }, |
| { |
| "epoch": 0.3351767284999343, |
| "grad_norm": 1.3826497793197632, |
| "learning_rate": 3.324116357500329e-05, |
| "loss": 0.5364, |
| "step": 145400 |
| }, |
| { |
| "epoch": 0.3356377693919562, |
| "grad_norm": 2.2688817977905273, |
| "learning_rate": 3.321811153040219e-05, |
| "loss": 0.5485, |
| "step": 145600 |
| }, |
| { |
| "epoch": 0.33609881028397814, |
| "grad_norm": 1.0029947757720947, |
| "learning_rate": 3.3195059485801094e-05, |
| "loss": 0.5915, |
| "step": 145800 |
| }, |
| { |
| "epoch": 0.33655985117600007, |
| "grad_norm": 1.0812941789627075, |
| "learning_rate": 3.31720074412e-05, |
| "loss": 0.5652, |
| "step": 146000 |
| }, |
| { |
| "epoch": 0.337020892068022, |
| "grad_norm": 1.1072156429290771, |
| "learning_rate": 3.31489553965989e-05, |
| "loss": 0.5462, |
| "step": 146200 |
| }, |
| { |
| "epoch": 0.3374819329600439, |
| "grad_norm": 0.6877702474594116, |
| "learning_rate": 3.3125903351997805e-05, |
| "loss": 0.5372, |
| "step": 146400 |
| }, |
| { |
| "epoch": 0.3379429738520658, |
| "grad_norm": 1.1875689029693604, |
| "learning_rate": 3.310285130739671e-05, |
| "loss": 0.5579, |
| "step": 146600 |
| }, |
| { |
| "epoch": 0.3384040147440877, |
| "grad_norm": 1.9786611795425415, |
| "learning_rate": 3.3079799262795616e-05, |
| "loss": 0.5614, |
| "step": 146800 |
| }, |
| { |
| "epoch": 0.33886505563610964, |
| "grad_norm": 0.24953074753284454, |
| "learning_rate": 3.3056747218194515e-05, |
| "loss": 0.5648, |
| "step": 147000 |
| }, |
| { |
| "epoch": 0.33932609652813156, |
| "grad_norm": 2.5248162746429443, |
| "learning_rate": 3.303369517359342e-05, |
| "loss": 0.4776, |
| "step": 147200 |
| }, |
| { |
| "epoch": 0.3397871374201535, |
| "grad_norm": 0.7923634052276611, |
| "learning_rate": 3.301064312899233e-05, |
| "loss": 0.5524, |
| "step": 147400 |
| }, |
| { |
| "epoch": 0.3402481783121754, |
| "grad_norm": 1.1320934295654297, |
| "learning_rate": 3.298759108439123e-05, |
| "loss": 0.5924, |
| "step": 147600 |
| }, |
| { |
| "epoch": 0.34070921920419733, |
| "grad_norm": 0.9425584673881531, |
| "learning_rate": 3.296453903979014e-05, |
| "loss": 0.5637, |
| "step": 147800 |
| }, |
| { |
| "epoch": 0.34117026009621926, |
| "grad_norm": 1.1642394065856934, |
| "learning_rate": 3.294148699518904e-05, |
| "loss": 0.555, |
| "step": 148000 |
| }, |
| { |
| "epoch": 0.3416313009882411, |
| "grad_norm": 1.479867935180664, |
| "learning_rate": 3.291843495058794e-05, |
| "loss": 0.5555, |
| "step": 148200 |
| }, |
| { |
| "epoch": 0.34209234188026305, |
| "grad_norm": 1.6537656784057617, |
| "learning_rate": 3.289538290598685e-05, |
| "loss": 0.5266, |
| "step": 148400 |
| }, |
| { |
| "epoch": 0.342553382772285, |
| "grad_norm": 0.8928322196006775, |
| "learning_rate": 3.2872330861385754e-05, |
| "loss": 0.5169, |
| "step": 148600 |
| }, |
| { |
| "epoch": 0.3430144236643069, |
| "grad_norm": 0.6630598902702332, |
| "learning_rate": 3.284927881678466e-05, |
| "loss": 0.5868, |
| "step": 148800 |
| }, |
| { |
| "epoch": 0.3434754645563288, |
| "grad_norm": 1.361573338508606, |
| "learning_rate": 3.282622677218356e-05, |
| "loss": 0.542, |
| "step": 149000 |
| }, |
| { |
| "epoch": 0.34393650544835075, |
| "grad_norm": 1.668082356452942, |
| "learning_rate": 3.2803174727582464e-05, |
| "loss": 0.5735, |
| "step": 149200 |
| }, |
| { |
| "epoch": 0.3443975463403727, |
| "grad_norm": 2.2211737632751465, |
| "learning_rate": 3.278012268298137e-05, |
| "loss": 0.5747, |
| "step": 149400 |
| }, |
| { |
| "epoch": 0.3448585872323946, |
| "grad_norm": 0.685369610786438, |
| "learning_rate": 3.275707063838027e-05, |
| "loss": 0.5401, |
| "step": 149600 |
| }, |
| { |
| "epoch": 0.3453196281244165, |
| "grad_norm": 1.617565631866455, |
| "learning_rate": 3.2734018593779174e-05, |
| "loss": 0.5635, |
| "step": 149800 |
| }, |
| { |
| "epoch": 0.3457806690164384, |
| "grad_norm": 1.5583852529525757, |
| "learning_rate": 3.271096654917808e-05, |
| "loss": 0.542, |
| "step": 150000 |
| }, |
| { |
| "epoch": 0.3457806690164384, |
| "eval_loss": 0.5474369525909424, |
| "eval_runtime": 144.1295, |
| "eval_samples_per_second": 30.403, |
| "eval_steps_per_second": 30.403, |
| "step": 150000 |
| }, |
| { |
| "epoch": 0.3462417099084603, |
| "grad_norm": 1.612930178642273, |
| "learning_rate": 3.2687914504576985e-05, |
| "loss": 0.5401, |
| "step": 150200 |
| }, |
| { |
| "epoch": 0.34670275080048224, |
| "grad_norm": 1.3440135717391968, |
| "learning_rate": 3.2664862459975884e-05, |
| "loss": 0.557, |
| "step": 150400 |
| }, |
| { |
| "epoch": 0.34716379169250416, |
| "grad_norm": 1.8030917644500732, |
| "learning_rate": 3.264181041537479e-05, |
| "loss": 0.528, |
| "step": 150600 |
| }, |
| { |
| "epoch": 0.3476248325845261, |
| "grad_norm": 1.355789303779602, |
| "learning_rate": 3.26187583707737e-05, |
| "loss": 0.5977, |
| "step": 150800 |
| }, |
| { |
| "epoch": 0.348085873476548, |
| "grad_norm": 1.8958524465560913, |
| "learning_rate": 3.25957063261726e-05, |
| "loss": 0.5509, |
| "step": 151000 |
| }, |
| { |
| "epoch": 0.34854691436856994, |
| "grad_norm": 1.62078857421875, |
| "learning_rate": 3.257265428157151e-05, |
| "loss": 0.5261, |
| "step": 151200 |
| }, |
| { |
| "epoch": 0.34900795526059186, |
| "grad_norm": 1.1603842973709106, |
| "learning_rate": 3.254960223697041e-05, |
| "loss": 0.5319, |
| "step": 151400 |
| }, |
| { |
| "epoch": 0.34946899615261373, |
| "grad_norm": 1.1251477003097534, |
| "learning_rate": 3.252655019236932e-05, |
| "loss": 0.5416, |
| "step": 151600 |
| }, |
| { |
| "epoch": 0.34993003704463566, |
| "grad_norm": 1.0224628448486328, |
| "learning_rate": 3.250349814776822e-05, |
| "loss": 0.5649, |
| "step": 151800 |
| }, |
| { |
| "epoch": 0.3503910779366576, |
| "grad_norm": 1.211235523223877, |
| "learning_rate": 3.248044610316712e-05, |
| "loss": 0.596, |
| "step": 152000 |
| }, |
| { |
| "epoch": 0.3508521188286795, |
| "grad_norm": 0.8075993061065674, |
| "learning_rate": 3.245739405856603e-05, |
| "loss": 0.5491, |
| "step": 152200 |
| }, |
| { |
| "epoch": 0.35131315972070143, |
| "grad_norm": 1.6871740818023682, |
| "learning_rate": 3.243434201396493e-05, |
| "loss": 0.5996, |
| "step": 152400 |
| }, |
| { |
| "epoch": 0.35177420061272335, |
| "grad_norm": 1.8563005924224854, |
| "learning_rate": 3.241128996936383e-05, |
| "loss": 0.5544, |
| "step": 152600 |
| }, |
| { |
| "epoch": 0.3522352415047453, |
| "grad_norm": 1.102376103401184, |
| "learning_rate": 3.238823792476274e-05, |
| "loss": 0.5294, |
| "step": 152800 |
| }, |
| { |
| "epoch": 0.3526962823967672, |
| "grad_norm": 1.3146488666534424, |
| "learning_rate": 3.2365185880161645e-05, |
| "loss": 0.55, |
| "step": 153000 |
| }, |
| { |
| "epoch": 0.3531573232887891, |
| "grad_norm": 1.509630799293518, |
| "learning_rate": 3.2342133835560543e-05, |
| "loss": 0.5853, |
| "step": 153200 |
| }, |
| { |
| "epoch": 0.353618364180811, |
| "grad_norm": 1.378322958946228, |
| "learning_rate": 3.231908179095945e-05, |
| "loss": 0.5718, |
| "step": 153400 |
| }, |
| { |
| "epoch": 0.3540794050728329, |
| "grad_norm": 1.8150678873062134, |
| "learning_rate": 3.2296029746358355e-05, |
| "loss": 0.5234, |
| "step": 153600 |
| }, |
| { |
| "epoch": 0.35454044596485484, |
| "grad_norm": 1.5151995420455933, |
| "learning_rate": 3.2272977701757254e-05, |
| "loss": 0.55, |
| "step": 153800 |
| }, |
| { |
| "epoch": 0.35500148685687677, |
| "grad_norm": 1.823546290397644, |
| "learning_rate": 3.224992565715616e-05, |
| "loss": 0.5458, |
| "step": 154000 |
| }, |
| { |
| "epoch": 0.3554625277488987, |
| "grad_norm": 1.5419812202453613, |
| "learning_rate": 3.222687361255507e-05, |
| "loss": 0.567, |
| "step": 154200 |
| }, |
| { |
| "epoch": 0.3559235686409206, |
| "grad_norm": 0.9206061959266663, |
| "learning_rate": 3.220382156795397e-05, |
| "loss": 0.5666, |
| "step": 154400 |
| }, |
| { |
| "epoch": 0.35638460953294254, |
| "grad_norm": 1.9426078796386719, |
| "learning_rate": 3.2180769523352876e-05, |
| "loss": 0.5598, |
| "step": 154600 |
| }, |
| { |
| "epoch": 0.35684565042496447, |
| "grad_norm": 2.45462965965271, |
| "learning_rate": 3.215771747875178e-05, |
| "loss": 0.5728, |
| "step": 154800 |
| }, |
| { |
| "epoch": 0.35730669131698634, |
| "grad_norm": 1.4566892385482788, |
| "learning_rate": 3.213466543415069e-05, |
| "loss": 0.5465, |
| "step": 155000 |
| }, |
| { |
| "epoch": 0.35776773220900826, |
| "grad_norm": 1.2060158252716064, |
| "learning_rate": 3.211161338954959e-05, |
| "loss": 0.5656, |
| "step": 155200 |
| }, |
| { |
| "epoch": 0.3582287731010302, |
| "grad_norm": 2.714728832244873, |
| "learning_rate": 3.208856134494849e-05, |
| "loss": 0.5431, |
| "step": 155400 |
| }, |
| { |
| "epoch": 0.3586898139930521, |
| "grad_norm": 1.1903655529022217, |
| "learning_rate": 3.20655093003474e-05, |
| "loss": 0.543, |
| "step": 155600 |
| }, |
| { |
| "epoch": 0.35915085488507403, |
| "grad_norm": 1.3290653228759766, |
| "learning_rate": 3.20424572557463e-05, |
| "loss": 0.5193, |
| "step": 155800 |
| }, |
| { |
| "epoch": 0.35961189577709596, |
| "grad_norm": 1.43769371509552, |
| "learning_rate": 3.20194052111452e-05, |
| "loss": 0.5177, |
| "step": 156000 |
| }, |
| { |
| "epoch": 0.3600729366691179, |
| "grad_norm": 1.404023289680481, |
| "learning_rate": 3.199635316654411e-05, |
| "loss": 0.5425, |
| "step": 156200 |
| }, |
| { |
| "epoch": 0.3605339775611398, |
| "grad_norm": 1.71915602684021, |
| "learning_rate": 3.1973301121943014e-05, |
| "loss": 0.5128, |
| "step": 156400 |
| }, |
| { |
| "epoch": 0.3609950184531617, |
| "grad_norm": 0.7645987272262573, |
| "learning_rate": 3.195024907734191e-05, |
| "loss": 0.5194, |
| "step": 156600 |
| }, |
| { |
| "epoch": 0.3614560593451836, |
| "grad_norm": 0.7512270212173462, |
| "learning_rate": 3.192719703274082e-05, |
| "loss": 0.5535, |
| "step": 156800 |
| }, |
| { |
| "epoch": 0.3619171002372055, |
| "grad_norm": 1.369632601737976, |
| "learning_rate": 3.1904144988139724e-05, |
| "loss": 0.5799, |
| "step": 157000 |
| }, |
| { |
| "epoch": 0.36237814112922745, |
| "grad_norm": 1.033872127532959, |
| "learning_rate": 3.188109294353862e-05, |
| "loss": 0.4932, |
| "step": 157200 |
| }, |
| { |
| "epoch": 0.3628391820212494, |
| "grad_norm": 1.6982067823410034, |
| "learning_rate": 3.185804089893753e-05, |
| "loss": 0.5428, |
| "step": 157400 |
| }, |
| { |
| "epoch": 0.3633002229132713, |
| "grad_norm": 1.2654556035995483, |
| "learning_rate": 3.183498885433644e-05, |
| "loss": 0.5261, |
| "step": 157600 |
| }, |
| { |
| "epoch": 0.3637612638052932, |
| "grad_norm": 0.6754932403564453, |
| "learning_rate": 3.181193680973534e-05, |
| "loss": 0.5388, |
| "step": 157800 |
| }, |
| { |
| "epoch": 0.36422230469731515, |
| "grad_norm": 1.5985398292541504, |
| "learning_rate": 3.1788884765134246e-05, |
| "loss": 0.553, |
| "step": 158000 |
| }, |
| { |
| "epoch": 0.36468334558933707, |
| "grad_norm": 0.4007735848426819, |
| "learning_rate": 3.176583272053315e-05, |
| "loss": 0.5233, |
| "step": 158200 |
| }, |
| { |
| "epoch": 0.36514438648135894, |
| "grad_norm": 1.1381844282150269, |
| "learning_rate": 3.174278067593206e-05, |
| "loss": 0.5748, |
| "step": 158400 |
| }, |
| { |
| "epoch": 0.36560542737338086, |
| "grad_norm": 0.9528195858001709, |
| "learning_rate": 3.1719728631330956e-05, |
| "loss": 0.5558, |
| "step": 158600 |
| }, |
| { |
| "epoch": 0.3660664682654028, |
| "grad_norm": 0.8936863541603088, |
| "learning_rate": 3.169667658672986e-05, |
| "loss": 0.5473, |
| "step": 158800 |
| }, |
| { |
| "epoch": 0.3665275091574247, |
| "grad_norm": 1.4663864374160767, |
| "learning_rate": 3.167362454212877e-05, |
| "loss": 0.5891, |
| "step": 159000 |
| }, |
| { |
| "epoch": 0.36698855004944664, |
| "grad_norm": 1.6440341472625732, |
| "learning_rate": 3.1650572497527666e-05, |
| "loss": 0.5361, |
| "step": 159200 |
| }, |
| { |
| "epoch": 0.36744959094146856, |
| "grad_norm": 0.7922578454017639, |
| "learning_rate": 3.162752045292657e-05, |
| "loss": 0.5754, |
| "step": 159400 |
| }, |
| { |
| "epoch": 0.3679106318334905, |
| "grad_norm": 2.1551461219787598, |
| "learning_rate": 3.160446840832548e-05, |
| "loss": 0.512, |
| "step": 159600 |
| }, |
| { |
| "epoch": 0.3683716727255124, |
| "grad_norm": 0.9643208980560303, |
| "learning_rate": 3.158141636372438e-05, |
| "loss": 0.5467, |
| "step": 159800 |
| }, |
| { |
| "epoch": 0.3688327136175343, |
| "grad_norm": 2.1086177825927734, |
| "learning_rate": 3.155836431912328e-05, |
| "loss": 0.5213, |
| "step": 160000 |
| }, |
| { |
| "epoch": 0.3692937545095562, |
| "grad_norm": 1.441178321838379, |
| "learning_rate": 3.153531227452219e-05, |
| "loss": 0.6028, |
| "step": 160200 |
| }, |
| { |
| "epoch": 0.36975479540157813, |
| "grad_norm": 1.4054416418075562, |
| "learning_rate": 3.1512260229921094e-05, |
| "loss": 0.4865, |
| "step": 160400 |
| }, |
| { |
| "epoch": 0.37021583629360005, |
| "grad_norm": 1.6927324533462524, |
| "learning_rate": 3.148920818532e-05, |
| "loss": 0.626, |
| "step": 160600 |
| }, |
| { |
| "epoch": 0.370676877185622, |
| "grad_norm": 0.4474141299724579, |
| "learning_rate": 3.14661561407189e-05, |
| "loss": 0.5385, |
| "step": 160800 |
| }, |
| { |
| "epoch": 0.3711379180776439, |
| "grad_norm": 1.3374356031417847, |
| "learning_rate": 3.144310409611781e-05, |
| "loss": 0.5159, |
| "step": 161000 |
| }, |
| { |
| "epoch": 0.3715989589696658, |
| "grad_norm": 0.9584740996360779, |
| "learning_rate": 3.1420052051516716e-05, |
| "loss": 0.5547, |
| "step": 161200 |
| }, |
| { |
| "epoch": 0.37205999986168775, |
| "grad_norm": 0.8642265200614929, |
| "learning_rate": 3.1397000006915615e-05, |
| "loss": 0.5651, |
| "step": 161400 |
| }, |
| { |
| "epoch": 0.3725210407537097, |
| "grad_norm": 1.4360606670379639, |
| "learning_rate": 3.137394796231452e-05, |
| "loss": 0.535, |
| "step": 161600 |
| }, |
| { |
| "epoch": 0.37298208164573154, |
| "grad_norm": 1.210317611694336, |
| "learning_rate": 3.1350895917713427e-05, |
| "loss": 0.5291, |
| "step": 161800 |
| }, |
| { |
| "epoch": 0.37344312253775347, |
| "grad_norm": 0.818991482257843, |
| "learning_rate": 3.1327843873112325e-05, |
| "loss": 0.5441, |
| "step": 162000 |
| }, |
| { |
| "epoch": 0.3739041634297754, |
| "grad_norm": 1.7334657907485962, |
| "learning_rate": 3.130479182851123e-05, |
| "loss": 0.547, |
| "step": 162200 |
| }, |
| { |
| "epoch": 0.3743652043217973, |
| "grad_norm": 1.3756144046783447, |
| "learning_rate": 3.128173978391014e-05, |
| "loss": 0.5386, |
| "step": 162400 |
| }, |
| { |
| "epoch": 0.37482624521381924, |
| "grad_norm": 1.6707614660263062, |
| "learning_rate": 3.125868773930904e-05, |
| "loss": 0.5332, |
| "step": 162600 |
| }, |
| { |
| "epoch": 0.37528728610584117, |
| "grad_norm": 1.2302086353302002, |
| "learning_rate": 3.123563569470794e-05, |
| "loss": 0.5376, |
| "step": 162800 |
| }, |
| { |
| "epoch": 0.3757483269978631, |
| "grad_norm": 1.47279953956604, |
| "learning_rate": 3.121258365010685e-05, |
| "loss": 0.5065, |
| "step": 163000 |
| }, |
| { |
| "epoch": 0.376209367889885, |
| "grad_norm": 1.31904935836792, |
| "learning_rate": 3.118953160550575e-05, |
| "loss": 0.5673, |
| "step": 163200 |
| }, |
| { |
| "epoch": 0.3766704087819069, |
| "grad_norm": 0.5999027490615845, |
| "learning_rate": 3.116647956090465e-05, |
| "loss": 0.5637, |
| "step": 163400 |
| }, |
| { |
| "epoch": 0.3771314496739288, |
| "grad_norm": 0.6730818152427673, |
| "learning_rate": 3.114342751630356e-05, |
| "loss": 0.5457, |
| "step": 163600 |
| }, |
| { |
| "epoch": 0.37759249056595073, |
| "grad_norm": 1.5005543231964111, |
| "learning_rate": 3.112037547170246e-05, |
| "loss": 0.54, |
| "step": 163800 |
| }, |
| { |
| "epoch": 0.37805353145797266, |
| "grad_norm": 0.8119702339172363, |
| "learning_rate": 3.109732342710137e-05, |
| "loss": 0.539, |
| "step": 164000 |
| }, |
| { |
| "epoch": 0.3785145723499946, |
| "grad_norm": 0.7515968680381775, |
| "learning_rate": 3.107427138250027e-05, |
| "loss": 0.5466, |
| "step": 164200 |
| }, |
| { |
| "epoch": 0.3789756132420165, |
| "grad_norm": 1.7886674404144287, |
| "learning_rate": 3.105121933789918e-05, |
| "loss": 0.5196, |
| "step": 164400 |
| }, |
| { |
| "epoch": 0.37943665413403843, |
| "grad_norm": 1.1930861473083496, |
| "learning_rate": 3.1028167293298086e-05, |
| "loss": 0.5678, |
| "step": 164600 |
| }, |
| { |
| "epoch": 0.37989769502606036, |
| "grad_norm": 1.8339203596115112, |
| "learning_rate": 3.1005115248696985e-05, |
| "loss": 0.5559, |
| "step": 164800 |
| }, |
| { |
| "epoch": 0.3803587359180823, |
| "grad_norm": 1.1968586444854736, |
| "learning_rate": 3.098206320409589e-05, |
| "loss": 0.5661, |
| "step": 165000 |
| }, |
| { |
| "epoch": 0.38081977681010415, |
| "grad_norm": 1.7871519327163696, |
| "learning_rate": 3.0959011159494796e-05, |
| "loss": 0.5931, |
| "step": 165200 |
| }, |
| { |
| "epoch": 0.3812808177021261, |
| "grad_norm": 0.8988884091377258, |
| "learning_rate": 3.0935959114893695e-05, |
| "loss": 0.4913, |
| "step": 165400 |
| }, |
| { |
| "epoch": 0.381741858594148, |
| "grad_norm": 0.36570337414741516, |
| "learning_rate": 3.09129070702926e-05, |
| "loss": 0.5088, |
| "step": 165600 |
| }, |
| { |
| "epoch": 0.3822028994861699, |
| "grad_norm": 1.5454649925231934, |
| "learning_rate": 3.0889855025691506e-05, |
| "loss": 0.5556, |
| "step": 165800 |
| }, |
| { |
| "epoch": 0.38266394037819185, |
| "grad_norm": 4.354947090148926, |
| "learning_rate": 3.086680298109041e-05, |
| "loss": 0.543, |
| "step": 166000 |
| }, |
| { |
| "epoch": 0.38312498127021377, |
| "grad_norm": 1.1687140464782715, |
| "learning_rate": 3.084375093648931e-05, |
| "loss": 0.5557, |
| "step": 166200 |
| }, |
| { |
| "epoch": 0.3835860221622357, |
| "grad_norm": 0.9749841690063477, |
| "learning_rate": 3.0820698891888216e-05, |
| "loss": 0.5267, |
| "step": 166400 |
| }, |
| { |
| "epoch": 0.3840470630542576, |
| "grad_norm": 1.900041103363037, |
| "learning_rate": 3.079764684728712e-05, |
| "loss": 0.5163, |
| "step": 166600 |
| }, |
| { |
| "epoch": 0.3845081039462795, |
| "grad_norm": 1.2895805835723877, |
| "learning_rate": 3.077459480268602e-05, |
| "loss": 0.5756, |
| "step": 166800 |
| }, |
| { |
| "epoch": 0.3849691448383014, |
| "grad_norm": 1.4463883638381958, |
| "learning_rate": 3.075154275808493e-05, |
| "loss": 0.5656, |
| "step": 167000 |
| }, |
| { |
| "epoch": 0.38543018573032334, |
| "grad_norm": 0.9612560272216797, |
| "learning_rate": 3.072849071348383e-05, |
| "loss": 0.5103, |
| "step": 167200 |
| }, |
| { |
| "epoch": 0.38589122662234526, |
| "grad_norm": 1.8480556011199951, |
| "learning_rate": 3.070543866888274e-05, |
| "loss": 0.5257, |
| "step": 167400 |
| }, |
| { |
| "epoch": 0.3863522675143672, |
| "grad_norm": 1.0281248092651367, |
| "learning_rate": 3.0682386624281644e-05, |
| "loss": 0.5381, |
| "step": 167600 |
| }, |
| { |
| "epoch": 0.3868133084063891, |
| "grad_norm": 1.657851219177246, |
| "learning_rate": 3.065933457968055e-05, |
| "loss": 0.5224, |
| "step": 167800 |
| }, |
| { |
| "epoch": 0.38727434929841104, |
| "grad_norm": 0.9592533707618713, |
| "learning_rate": 3.0636282535079455e-05, |
| "loss": 0.527, |
| "step": 168000 |
| }, |
| { |
| "epoch": 0.38773539019043296, |
| "grad_norm": 2.421381950378418, |
| "learning_rate": 3.0613230490478354e-05, |
| "loss": 0.5972, |
| "step": 168200 |
| }, |
| { |
| "epoch": 0.3881964310824549, |
| "grad_norm": 0.9807179570198059, |
| "learning_rate": 3.059017844587726e-05, |
| "loss": 0.6076, |
| "step": 168400 |
| }, |
| { |
| "epoch": 0.38865747197447675, |
| "grad_norm": 1.1217988729476929, |
| "learning_rate": 3.0567126401276165e-05, |
| "loss": 0.5442, |
| "step": 168600 |
| }, |
| { |
| "epoch": 0.3891185128664987, |
| "grad_norm": 0.9705345630645752, |
| "learning_rate": 3.054407435667507e-05, |
| "loss": 0.5831, |
| "step": 168800 |
| }, |
| { |
| "epoch": 0.3895795537585206, |
| "grad_norm": 0.9477503895759583, |
| "learning_rate": 3.052102231207397e-05, |
| "loss": 0.5955, |
| "step": 169000 |
| }, |
| { |
| "epoch": 0.3900405946505425, |
| "grad_norm": 0.7813563346862793, |
| "learning_rate": 3.0497970267472876e-05, |
| "loss": 0.5686, |
| "step": 169200 |
| }, |
| { |
| "epoch": 0.39050163554256445, |
| "grad_norm": 1.0669126510620117, |
| "learning_rate": 3.0474918222871778e-05, |
| "loss": 0.5756, |
| "step": 169400 |
| }, |
| { |
| "epoch": 0.3909626764345864, |
| "grad_norm": 1.3676906824111938, |
| "learning_rate": 3.0451866178270683e-05, |
| "loss": 0.4965, |
| "step": 169600 |
| }, |
| { |
| "epoch": 0.3914237173266083, |
| "grad_norm": 1.404822587966919, |
| "learning_rate": 3.0428814133669586e-05, |
| "loss": 0.5471, |
| "step": 169800 |
| }, |
| { |
| "epoch": 0.3918847582186302, |
| "grad_norm": 0.7466553449630737, |
| "learning_rate": 3.040576208906849e-05, |
| "loss": 0.556, |
| "step": 170000 |
| }, |
| { |
| "epoch": 0.3923457991106521, |
| "grad_norm": 1.3484429121017456, |
| "learning_rate": 3.0382710044467394e-05, |
| "loss": 0.5521, |
| "step": 170200 |
| }, |
| { |
| "epoch": 0.392806840002674, |
| "grad_norm": 3.4249660968780518, |
| "learning_rate": 3.03596579998663e-05, |
| "loss": 0.5787, |
| "step": 170400 |
| }, |
| { |
| "epoch": 0.39326788089469594, |
| "grad_norm": 0.8153938055038452, |
| "learning_rate": 3.03366059552652e-05, |
| "loss": 0.5223, |
| "step": 170600 |
| }, |
| { |
| "epoch": 0.39372892178671787, |
| "grad_norm": 2.557283401489258, |
| "learning_rate": 3.0313553910664104e-05, |
| "loss": 0.5833, |
| "step": 170800 |
| }, |
| { |
| "epoch": 0.3941899626787398, |
| "grad_norm": 1.367695927619934, |
| "learning_rate": 3.0290501866063013e-05, |
| "loss": 0.5317, |
| "step": 171000 |
| }, |
| { |
| "epoch": 0.3946510035707617, |
| "grad_norm": 1.190898060798645, |
| "learning_rate": 3.026744982146192e-05, |
| "loss": 0.5361, |
| "step": 171200 |
| }, |
| { |
| "epoch": 0.39511204446278364, |
| "grad_norm": 1.7618181705474854, |
| "learning_rate": 3.024439777686082e-05, |
| "loss": 0.6089, |
| "step": 171400 |
| }, |
| { |
| "epoch": 0.39557308535480556, |
| "grad_norm": 1.191237211227417, |
| "learning_rate": 3.0221345732259727e-05, |
| "loss": 0.5271, |
| "step": 171600 |
| }, |
| { |
| "epoch": 0.3960341262468275, |
| "grad_norm": 1.8360000848770142, |
| "learning_rate": 3.019829368765863e-05, |
| "loss": 0.5879, |
| "step": 171800 |
| }, |
| { |
| "epoch": 0.39649516713884936, |
| "grad_norm": 1.363987684249878, |
| "learning_rate": 3.0175241643057535e-05, |
| "loss": 0.5211, |
| "step": 172000 |
| }, |
| { |
| "epoch": 0.3969562080308713, |
| "grad_norm": 0.9211211800575256, |
| "learning_rate": 3.0152189598456437e-05, |
| "loss": 0.5419, |
| "step": 172200 |
| }, |
| { |
| "epoch": 0.3974172489228932, |
| "grad_norm": 1.8756023645401, |
| "learning_rate": 3.0129137553855343e-05, |
| "loss": 0.5281, |
| "step": 172400 |
| }, |
| { |
| "epoch": 0.39787828981491513, |
| "grad_norm": 0.9270503520965576, |
| "learning_rate": 3.0106085509254245e-05, |
| "loss": 0.5506, |
| "step": 172600 |
| }, |
| { |
| "epoch": 0.39833933070693706, |
| "grad_norm": 1.689388394355774, |
| "learning_rate": 3.0083033464653147e-05, |
| "loss": 0.4929, |
| "step": 172800 |
| }, |
| { |
| "epoch": 0.398800371598959, |
| "grad_norm": 1.1315703392028809, |
| "learning_rate": 3.0059981420052053e-05, |
| "loss": 0.5469, |
| "step": 173000 |
| }, |
| { |
| "epoch": 0.3992614124909809, |
| "grad_norm": 1.1053519248962402, |
| "learning_rate": 3.0036929375450955e-05, |
| "loss": 0.5001, |
| "step": 173200 |
| }, |
| { |
| "epoch": 0.39972245338300283, |
| "grad_norm": 1.1651402711868286, |
| "learning_rate": 3.001387733084986e-05, |
| "loss": 0.5255, |
| "step": 173400 |
| }, |
| { |
| "epoch": 0.4001834942750247, |
| "grad_norm": 1.540276288986206, |
| "learning_rate": 2.9990825286248763e-05, |
| "loss": 0.5644, |
| "step": 173600 |
| }, |
| { |
| "epoch": 0.4006445351670466, |
| "grad_norm": 0.8608019948005676, |
| "learning_rate": 2.996777324164767e-05, |
| "loss": 0.5312, |
| "step": 173800 |
| }, |
| { |
| "epoch": 0.40110557605906855, |
| "grad_norm": 0.959018886089325, |
| "learning_rate": 2.994472119704657e-05, |
| "loss": 0.5322, |
| "step": 174000 |
| }, |
| { |
| "epoch": 0.40156661695109047, |
| "grad_norm": 2.531625986099243, |
| "learning_rate": 2.9921669152445477e-05, |
| "loss": 0.5521, |
| "step": 174200 |
| }, |
| { |
| "epoch": 0.4020276578431124, |
| "grad_norm": 1.8716404438018799, |
| "learning_rate": 2.9898617107844386e-05, |
| "loss": 0.4931, |
| "step": 174400 |
| }, |
| { |
| "epoch": 0.4024886987351343, |
| "grad_norm": 1.4556031227111816, |
| "learning_rate": 2.9875565063243288e-05, |
| "loss": 0.5879, |
| "step": 174600 |
| }, |
| { |
| "epoch": 0.40294973962715624, |
| "grad_norm": 1.2687571048736572, |
| "learning_rate": 2.985251301864219e-05, |
| "loss": 0.5636, |
| "step": 174800 |
| }, |
| { |
| "epoch": 0.40341078051917817, |
| "grad_norm": 1.354716420173645, |
| "learning_rate": 2.9829460974041096e-05, |
| "loss": 0.5851, |
| "step": 175000 |
| }, |
| { |
| "epoch": 0.4038718214112001, |
| "grad_norm": 0.4532039761543274, |
| "learning_rate": 2.980640892944e-05, |
| "loss": 0.5726, |
| "step": 175200 |
| }, |
| { |
| "epoch": 0.40433286230322196, |
| "grad_norm": 1.2430226802825928, |
| "learning_rate": 2.9783356884838904e-05, |
| "loss": 0.5263, |
| "step": 175400 |
| }, |
| { |
| "epoch": 0.4047939031952439, |
| "grad_norm": 1.0308810472488403, |
| "learning_rate": 2.9760304840237806e-05, |
| "loss": 0.5634, |
| "step": 175600 |
| }, |
| { |
| "epoch": 0.4052549440872658, |
| "grad_norm": 1.0540807247161865, |
| "learning_rate": 2.9737252795636712e-05, |
| "loss": 0.546, |
| "step": 175800 |
| }, |
| { |
| "epoch": 0.40571598497928774, |
| "grad_norm": 1.632247805595398, |
| "learning_rate": 2.9714200751035614e-05, |
| "loss": 0.5265, |
| "step": 176000 |
| }, |
| { |
| "epoch": 0.40617702587130966, |
| "grad_norm": 1.5189135074615479, |
| "learning_rate": 2.969114870643452e-05, |
| "loss": 0.5582, |
| "step": 176200 |
| }, |
| { |
| "epoch": 0.4066380667633316, |
| "grad_norm": 1.3175644874572754, |
| "learning_rate": 2.9668096661833422e-05, |
| "loss": 0.555, |
| "step": 176400 |
| }, |
| { |
| "epoch": 0.4070991076553535, |
| "grad_norm": 1.3439033031463623, |
| "learning_rate": 2.9645044617232325e-05, |
| "loss": 0.5526, |
| "step": 176600 |
| }, |
| { |
| "epoch": 0.40756014854737543, |
| "grad_norm": 0.6501840949058533, |
| "learning_rate": 2.962199257263123e-05, |
| "loss": 0.4856, |
| "step": 176800 |
| }, |
| { |
| "epoch": 0.4080211894393973, |
| "grad_norm": 2.5215022563934326, |
| "learning_rate": 2.9598940528030132e-05, |
| "loss": 0.5419, |
| "step": 177000 |
| }, |
| { |
| "epoch": 0.4084822303314192, |
| "grad_norm": 1.9052616357803345, |
| "learning_rate": 2.9575888483429038e-05, |
| "loss": 0.5189, |
| "step": 177200 |
| }, |
| { |
| "epoch": 0.40894327122344115, |
| "grad_norm": 1.2403985261917114, |
| "learning_rate": 2.955283643882794e-05, |
| "loss": 0.6047, |
| "step": 177400 |
| }, |
| { |
| "epoch": 0.4094043121154631, |
| "grad_norm": 1.517579436302185, |
| "learning_rate": 2.9529784394226846e-05, |
| "loss": 0.5691, |
| "step": 177600 |
| }, |
| { |
| "epoch": 0.409865353007485, |
| "grad_norm": 2.5231924057006836, |
| "learning_rate": 2.9506732349625755e-05, |
| "loss": 0.5686, |
| "step": 177800 |
| }, |
| { |
| "epoch": 0.4103263938995069, |
| "grad_norm": 0.6522693634033203, |
| "learning_rate": 2.9483680305024657e-05, |
| "loss": 0.5318, |
| "step": 178000 |
| }, |
| { |
| "epoch": 0.41078743479152885, |
| "grad_norm": 0.9372640252113342, |
| "learning_rate": 2.9460628260423563e-05, |
| "loss": 0.5535, |
| "step": 178200 |
| }, |
| { |
| "epoch": 0.4112484756835508, |
| "grad_norm": 1.2775940895080566, |
| "learning_rate": 2.9437576215822465e-05, |
| "loss": 0.5885, |
| "step": 178400 |
| }, |
| { |
| "epoch": 0.41170951657557264, |
| "grad_norm": 1.6325544118881226, |
| "learning_rate": 2.9414524171221368e-05, |
| "loss": 0.5622, |
| "step": 178600 |
| }, |
| { |
| "epoch": 0.41217055746759457, |
| "grad_norm": 1.4288066625595093, |
| "learning_rate": 2.9391472126620273e-05, |
| "loss": 0.4999, |
| "step": 178800 |
| }, |
| { |
| "epoch": 0.4126315983596165, |
| "grad_norm": 2.633436918258667, |
| "learning_rate": 2.9368420082019176e-05, |
| "loss": 0.5428, |
| "step": 179000 |
| }, |
| { |
| "epoch": 0.4130926392516384, |
| "grad_norm": 1.5107150077819824, |
| "learning_rate": 2.934536803741808e-05, |
| "loss": 0.5327, |
| "step": 179200 |
| }, |
| { |
| "epoch": 0.41355368014366034, |
| "grad_norm": 1.3021948337554932, |
| "learning_rate": 2.9322315992816984e-05, |
| "loss": 0.5725, |
| "step": 179400 |
| }, |
| { |
| "epoch": 0.41401472103568226, |
| "grad_norm": 1.0030542612075806, |
| "learning_rate": 2.929926394821589e-05, |
| "loss": 0.521, |
| "step": 179600 |
| }, |
| { |
| "epoch": 0.4144757619277042, |
| "grad_norm": 1.4533718824386597, |
| "learning_rate": 2.927621190361479e-05, |
| "loss": 0.537, |
| "step": 179800 |
| }, |
| { |
| "epoch": 0.4149368028197261, |
| "grad_norm": 0.5830268263816833, |
| "learning_rate": 2.9253159859013697e-05, |
| "loss": 0.6027, |
| "step": 180000 |
| }, |
| { |
| "epoch": 0.41539784371174804, |
| "grad_norm": 2.173309087753296, |
| "learning_rate": 2.92301078144126e-05, |
| "loss": 0.5337, |
| "step": 180200 |
| }, |
| { |
| "epoch": 0.4158588846037699, |
| "grad_norm": 1.0939158201217651, |
| "learning_rate": 2.9207055769811502e-05, |
| "loss": 0.5293, |
| "step": 180400 |
| }, |
| { |
| "epoch": 0.41631992549579183, |
| "grad_norm": 1.6121618747711182, |
| "learning_rate": 2.9184003725210408e-05, |
| "loss": 0.546, |
| "step": 180600 |
| }, |
| { |
| "epoch": 0.41678096638781376, |
| "grad_norm": 0.8111677169799805, |
| "learning_rate": 2.916095168060931e-05, |
| "loss": 0.5222, |
| "step": 180800 |
| }, |
| { |
| "epoch": 0.4172420072798357, |
| "grad_norm": 0.7552040219306946, |
| "learning_rate": 2.9137899636008215e-05, |
| "loss": 0.5809, |
| "step": 181000 |
| }, |
| { |
| "epoch": 0.4177030481718576, |
| "grad_norm": 1.146061897277832, |
| "learning_rate": 2.9114847591407125e-05, |
| "loss": 0.5609, |
| "step": 181200 |
| }, |
| { |
| "epoch": 0.41816408906387953, |
| "grad_norm": 0.885413646697998, |
| "learning_rate": 2.9091795546806027e-05, |
| "loss": 0.5252, |
| "step": 181400 |
| }, |
| { |
| "epoch": 0.41862512995590145, |
| "grad_norm": 1.3384150266647339, |
| "learning_rate": 2.9068743502204933e-05, |
| "loss": 0.5202, |
| "step": 181600 |
| }, |
| { |
| "epoch": 0.4190861708479234, |
| "grad_norm": 0.9868043065071106, |
| "learning_rate": 2.9045691457603835e-05, |
| "loss": 0.5393, |
| "step": 181800 |
| }, |
| { |
| "epoch": 0.41954721173994525, |
| "grad_norm": 1.3893357515335083, |
| "learning_rate": 2.902263941300274e-05, |
| "loss": 0.5337, |
| "step": 182000 |
| }, |
| { |
| "epoch": 0.42000825263196717, |
| "grad_norm": 1.7168641090393066, |
| "learning_rate": 2.8999587368401643e-05, |
| "loss": 0.5119, |
| "step": 182200 |
| }, |
| { |
| "epoch": 0.4204692935239891, |
| "grad_norm": 0.6522820591926575, |
| "learning_rate": 2.8976535323800545e-05, |
| "loss": 0.5551, |
| "step": 182400 |
| }, |
| { |
| "epoch": 0.420930334416011, |
| "grad_norm": 1.6360949277877808, |
| "learning_rate": 2.895348327919945e-05, |
| "loss": 0.5413, |
| "step": 182600 |
| }, |
| { |
| "epoch": 0.42139137530803294, |
| "grad_norm": 2.0071022510528564, |
| "learning_rate": 2.8930431234598353e-05, |
| "loss": 0.556, |
| "step": 182800 |
| }, |
| { |
| "epoch": 0.42185241620005487, |
| "grad_norm": 1.155096173286438, |
| "learning_rate": 2.890737918999726e-05, |
| "loss": 0.4924, |
| "step": 183000 |
| }, |
| { |
| "epoch": 0.4223134570920768, |
| "grad_norm": 0.7732855677604675, |
| "learning_rate": 2.888432714539616e-05, |
| "loss": 0.5849, |
| "step": 183200 |
| }, |
| { |
| "epoch": 0.4227744979840987, |
| "grad_norm": 1.4793187379837036, |
| "learning_rate": 2.8861275100795067e-05, |
| "loss": 0.5426, |
| "step": 183400 |
| }, |
| { |
| "epoch": 0.42323553887612064, |
| "grad_norm": 1.6665247678756714, |
| "learning_rate": 2.883822305619397e-05, |
| "loss": 0.5926, |
| "step": 183600 |
| }, |
| { |
| "epoch": 0.4236965797681425, |
| "grad_norm": 1.4480516910552979, |
| "learning_rate": 2.8815171011592875e-05, |
| "loss": 0.5335, |
| "step": 183800 |
| }, |
| { |
| "epoch": 0.42415762066016444, |
| "grad_norm": 0.944604754447937, |
| "learning_rate": 2.8792118966991777e-05, |
| "loss": 0.516, |
| "step": 184000 |
| }, |
| { |
| "epoch": 0.42461866155218636, |
| "grad_norm": 1.405192255973816, |
| "learning_rate": 2.876906692239068e-05, |
| "loss": 0.5339, |
| "step": 184200 |
| }, |
| { |
| "epoch": 0.4250797024442083, |
| "grad_norm": 1.1222949028015137, |
| "learning_rate": 2.8746014877789585e-05, |
| "loss": 0.5023, |
| "step": 184400 |
| }, |
| { |
| "epoch": 0.4255407433362302, |
| "grad_norm": 1.2079672813415527, |
| "learning_rate": 2.8722962833188494e-05, |
| "loss": 0.4828, |
| "step": 184600 |
| }, |
| { |
| "epoch": 0.42600178422825213, |
| "grad_norm": 3.4156157970428467, |
| "learning_rate": 2.8699910788587396e-05, |
| "loss": 0.4995, |
| "step": 184800 |
| }, |
| { |
| "epoch": 0.42646282512027406, |
| "grad_norm": 1.3917217254638672, |
| "learning_rate": 2.8676858743986302e-05, |
| "loss": 0.5099, |
| "step": 185000 |
| }, |
| { |
| "epoch": 0.426923866012296, |
| "grad_norm": 1.514889121055603, |
| "learning_rate": 2.8653806699385204e-05, |
| "loss": 0.5377, |
| "step": 185200 |
| }, |
| { |
| "epoch": 0.42738490690431785, |
| "grad_norm": 1.0316505432128906, |
| "learning_rate": 2.863075465478411e-05, |
| "loss": 0.5223, |
| "step": 185400 |
| }, |
| { |
| "epoch": 0.4278459477963398, |
| "grad_norm": 2.2684624195098877, |
| "learning_rate": 2.8607702610183012e-05, |
| "loss": 0.5482, |
| "step": 185600 |
| }, |
| { |
| "epoch": 0.4283069886883617, |
| "grad_norm": 0.6258700489997864, |
| "learning_rate": 2.8584650565581918e-05, |
| "loss": 0.5643, |
| "step": 185800 |
| }, |
| { |
| "epoch": 0.4287680295803836, |
| "grad_norm": 0.6727305054664612, |
| "learning_rate": 2.856159852098082e-05, |
| "loss": 0.5405, |
| "step": 186000 |
| }, |
| { |
| "epoch": 0.42922907047240555, |
| "grad_norm": 0.6856648921966553, |
| "learning_rate": 2.8538546476379722e-05, |
| "loss": 0.5571, |
| "step": 186200 |
| }, |
| { |
| "epoch": 0.4296901113644275, |
| "grad_norm": 1.6323261260986328, |
| "learning_rate": 2.8515494431778628e-05, |
| "loss": 0.5369, |
| "step": 186400 |
| }, |
| { |
| "epoch": 0.4301511522564494, |
| "grad_norm": 1.5054471492767334, |
| "learning_rate": 2.849244238717753e-05, |
| "loss": 0.5402, |
| "step": 186600 |
| }, |
| { |
| "epoch": 0.4306121931484713, |
| "grad_norm": 1.21519136428833, |
| "learning_rate": 2.8469390342576436e-05, |
| "loss": 0.4947, |
| "step": 186800 |
| }, |
| { |
| "epoch": 0.43107323404049325, |
| "grad_norm": 1.126180648803711, |
| "learning_rate": 2.8446338297975338e-05, |
| "loss": 0.4861, |
| "step": 187000 |
| }, |
| { |
| "epoch": 0.4315342749325151, |
| "grad_norm": 1.4017746448516846, |
| "learning_rate": 2.8423286253374244e-05, |
| "loss": 0.4995, |
| "step": 187200 |
| }, |
| { |
| "epoch": 0.43199531582453704, |
| "grad_norm": 1.8414978981018066, |
| "learning_rate": 2.8400234208773146e-05, |
| "loss": 0.5247, |
| "step": 187400 |
| }, |
| { |
| "epoch": 0.43245635671655897, |
| "grad_norm": 0.9502488374710083, |
| "learning_rate": 2.8377182164172052e-05, |
| "loss": 0.5712, |
| "step": 187600 |
| }, |
| { |
| "epoch": 0.4329173976085809, |
| "grad_norm": 1.3080493211746216, |
| "learning_rate": 2.8354130119570954e-05, |
| "loss": 0.5895, |
| "step": 187800 |
| }, |
| { |
| "epoch": 0.4333784385006028, |
| "grad_norm": 1.122564673423767, |
| "learning_rate": 2.8331078074969863e-05, |
| "loss": 0.5105, |
| "step": 188000 |
| }, |
| { |
| "epoch": 0.43383947939262474, |
| "grad_norm": 3.3100082874298096, |
| "learning_rate": 2.8308026030368766e-05, |
| "loss": 0.5266, |
| "step": 188200 |
| }, |
| { |
| "epoch": 0.43430052028464666, |
| "grad_norm": 2.0265512466430664, |
| "learning_rate": 2.828497398576767e-05, |
| "loss": 0.5605, |
| "step": 188400 |
| }, |
| { |
| "epoch": 0.4347615611766686, |
| "grad_norm": 1.7905211448669434, |
| "learning_rate": 2.8261921941166574e-05, |
| "loss": 0.5581, |
| "step": 188600 |
| }, |
| { |
| "epoch": 0.43522260206869046, |
| "grad_norm": 1.0183840990066528, |
| "learning_rate": 2.823886989656548e-05, |
| "loss": 0.5051, |
| "step": 188800 |
| }, |
| { |
| "epoch": 0.4356836429607124, |
| "grad_norm": 1.128341794013977, |
| "learning_rate": 2.821581785196438e-05, |
| "loss": 0.5511, |
| "step": 189000 |
| }, |
| { |
| "epoch": 0.4361446838527343, |
| "grad_norm": 0.9863077998161316, |
| "learning_rate": 2.8192765807363287e-05, |
| "loss": 0.5541, |
| "step": 189200 |
| }, |
| { |
| "epoch": 0.43660572474475623, |
| "grad_norm": 2.1484644412994385, |
| "learning_rate": 2.816971376276219e-05, |
| "loss": 0.5729, |
| "step": 189400 |
| }, |
| { |
| "epoch": 0.43706676563677815, |
| "grad_norm": 0.716901421546936, |
| "learning_rate": 2.8146661718161095e-05, |
| "loss": 0.5085, |
| "step": 189600 |
| }, |
| { |
| "epoch": 0.4375278065288001, |
| "grad_norm": 1.7285312414169312, |
| "learning_rate": 2.8123609673559997e-05, |
| "loss": 0.49, |
| "step": 189800 |
| }, |
| { |
| "epoch": 0.437988847420822, |
| "grad_norm": 1.697322130203247, |
| "learning_rate": 2.81005576289589e-05, |
| "loss": 0.5524, |
| "step": 190000 |
| }, |
| { |
| "epoch": 0.4384498883128439, |
| "grad_norm": 0.9568549394607544, |
| "learning_rate": 2.8077505584357805e-05, |
| "loss": 0.5403, |
| "step": 190200 |
| }, |
| { |
| "epoch": 0.43891092920486585, |
| "grad_norm": 2.225656747817993, |
| "learning_rate": 2.8054453539756708e-05, |
| "loss": 0.5146, |
| "step": 190400 |
| }, |
| { |
| "epoch": 0.4393719700968877, |
| "grad_norm": 1.7832934856414795, |
| "learning_rate": 2.8031401495155613e-05, |
| "loss": 0.5734, |
| "step": 190600 |
| }, |
| { |
| "epoch": 0.43983301098890965, |
| "grad_norm": 1.1611802577972412, |
| "learning_rate": 2.8008349450554516e-05, |
| "loss": 0.5316, |
| "step": 190800 |
| }, |
| { |
| "epoch": 0.44029405188093157, |
| "grad_norm": 0.3716856837272644, |
| "learning_rate": 2.798529740595342e-05, |
| "loss": 0.5683, |
| "step": 191000 |
| }, |
| { |
| "epoch": 0.4407550927729535, |
| "grad_norm": 0.911855161190033, |
| "learning_rate": 2.7962245361352324e-05, |
| "loss": 0.5488, |
| "step": 191200 |
| }, |
| { |
| "epoch": 0.4412161336649754, |
| "grad_norm": 4.299455165863037, |
| "learning_rate": 2.7939193316751233e-05, |
| "loss": 0.5083, |
| "step": 191400 |
| }, |
| { |
| "epoch": 0.44167717455699734, |
| "grad_norm": 0.8923743367195129, |
| "learning_rate": 2.791614127215014e-05, |
| "loss": 0.5514, |
| "step": 191600 |
| }, |
| { |
| "epoch": 0.44213821544901927, |
| "grad_norm": 2.5912487506866455, |
| "learning_rate": 2.789308922754904e-05, |
| "loss": 0.5585, |
| "step": 191800 |
| }, |
| { |
| "epoch": 0.4425992563410412, |
| "grad_norm": 1.8387411832809448, |
| "learning_rate": 2.7870037182947943e-05, |
| "loss": 0.5628, |
| "step": 192000 |
| }, |
| { |
| "epoch": 0.44306029723306306, |
| "grad_norm": 1.2115058898925781, |
| "learning_rate": 2.784698513834685e-05, |
| "loss": 0.5178, |
| "step": 192200 |
| }, |
| { |
| "epoch": 0.443521338125085, |
| "grad_norm": 1.1574034690856934, |
| "learning_rate": 2.782393309374575e-05, |
| "loss": 0.4819, |
| "step": 192400 |
| }, |
| { |
| "epoch": 0.4439823790171069, |
| "grad_norm": 0.6429279446601868, |
| "learning_rate": 2.7800881049144657e-05, |
| "loss": 0.5674, |
| "step": 192600 |
| }, |
| { |
| "epoch": 0.44444341990912883, |
| "grad_norm": 1.5901168584823608, |
| "learning_rate": 2.777782900454356e-05, |
| "loss": 0.5352, |
| "step": 192800 |
| }, |
| { |
| "epoch": 0.44490446080115076, |
| "grad_norm": 0.7381865978240967, |
| "learning_rate": 2.7754776959942465e-05, |
| "loss": 0.5223, |
| "step": 193000 |
| }, |
| { |
| "epoch": 0.4453655016931727, |
| "grad_norm": 0.6729177236557007, |
| "learning_rate": 2.7731724915341367e-05, |
| "loss": 0.5568, |
| "step": 193200 |
| }, |
| { |
| "epoch": 0.4458265425851946, |
| "grad_norm": 1.1146801710128784, |
| "learning_rate": 2.7708672870740272e-05, |
| "loss": 0.5336, |
| "step": 193400 |
| }, |
| { |
| "epoch": 0.44628758347721653, |
| "grad_norm": 0.9231970906257629, |
| "learning_rate": 2.7685620826139175e-05, |
| "loss": 0.5331, |
| "step": 193600 |
| }, |
| { |
| "epoch": 0.44674862436923846, |
| "grad_norm": 0.9126871228218079, |
| "learning_rate": 2.7662568781538077e-05, |
| "loss": 0.5018, |
| "step": 193800 |
| }, |
| { |
| "epoch": 0.4472096652612603, |
| "grad_norm": 1.343369483947754, |
| "learning_rate": 2.7639516736936983e-05, |
| "loss": 0.5321, |
| "step": 194000 |
| }, |
| { |
| "epoch": 0.44767070615328225, |
| "grad_norm": 1.209140419960022, |
| "learning_rate": 2.7616464692335885e-05, |
| "loss": 0.5341, |
| "step": 194200 |
| }, |
| { |
| "epoch": 0.4481317470453042, |
| "grad_norm": 2.7046828269958496, |
| "learning_rate": 2.759341264773479e-05, |
| "loss": 0.5259, |
| "step": 194400 |
| }, |
| { |
| "epoch": 0.4485927879373261, |
| "grad_norm": 1.0318337678909302, |
| "learning_rate": 2.7570360603133693e-05, |
| "loss": 0.5131, |
| "step": 194600 |
| }, |
| { |
| "epoch": 0.449053828829348, |
| "grad_norm": 2.206500291824341, |
| "learning_rate": 2.7547308558532602e-05, |
| "loss": 0.4956, |
| "step": 194800 |
| }, |
| { |
| "epoch": 0.44951486972136995, |
| "grad_norm": 1.1853792667388916, |
| "learning_rate": 2.7524256513931508e-05, |
| "loss": 0.4903, |
| "step": 195000 |
| }, |
| { |
| "epoch": 0.44997591061339187, |
| "grad_norm": 2.2172162532806396, |
| "learning_rate": 2.750120446933041e-05, |
| "loss": 0.5276, |
| "step": 195200 |
| }, |
| { |
| "epoch": 0.4504369515054138, |
| "grad_norm": 0.8798406720161438, |
| "learning_rate": 2.7478152424729316e-05, |
| "loss": 0.526, |
| "step": 195400 |
| }, |
| { |
| "epoch": 0.45089799239743567, |
| "grad_norm": 1.5308436155319214, |
| "learning_rate": 2.7455100380128218e-05, |
| "loss": 0.5206, |
| "step": 195600 |
| }, |
| { |
| "epoch": 0.4513590332894576, |
| "grad_norm": 0.7613127827644348, |
| "learning_rate": 2.743204833552712e-05, |
| "loss": 0.4945, |
| "step": 195800 |
| }, |
| { |
| "epoch": 0.4518200741814795, |
| "grad_norm": 1.1208069324493408, |
| "learning_rate": 2.7408996290926026e-05, |
| "loss": 0.4972, |
| "step": 196000 |
| }, |
| { |
| "epoch": 0.45228111507350144, |
| "grad_norm": 1.172491431236267, |
| "learning_rate": 2.7385944246324928e-05, |
| "loss": 0.519, |
| "step": 196200 |
| }, |
| { |
| "epoch": 0.45274215596552336, |
| "grad_norm": 1.6736866235733032, |
| "learning_rate": 2.7362892201723834e-05, |
| "loss": 0.5425, |
| "step": 196400 |
| }, |
| { |
| "epoch": 0.4532031968575453, |
| "grad_norm": 1.6905968189239502, |
| "learning_rate": 2.7339840157122736e-05, |
| "loss": 0.5561, |
| "step": 196600 |
| }, |
| { |
| "epoch": 0.4536642377495672, |
| "grad_norm": 1.852290153503418, |
| "learning_rate": 2.7316788112521642e-05, |
| "loss": 0.4633, |
| "step": 196800 |
| }, |
| { |
| "epoch": 0.45412527864158914, |
| "grad_norm": 1.671228289604187, |
| "learning_rate": 2.7293736067920544e-05, |
| "loss": 0.5361, |
| "step": 197000 |
| }, |
| { |
| "epoch": 0.45458631953361106, |
| "grad_norm": 4.358177185058594, |
| "learning_rate": 2.727068402331945e-05, |
| "loss": 0.5422, |
| "step": 197200 |
| }, |
| { |
| "epoch": 0.45504736042563293, |
| "grad_norm": 1.261697769165039, |
| "learning_rate": 2.7247631978718352e-05, |
| "loss": 0.5468, |
| "step": 197400 |
| }, |
| { |
| "epoch": 0.45550840131765485, |
| "grad_norm": 1.6779541969299316, |
| "learning_rate": 2.7224579934117254e-05, |
| "loss": 0.5578, |
| "step": 197600 |
| }, |
| { |
| "epoch": 0.4559694422096768, |
| "grad_norm": 1.5837364196777344, |
| "learning_rate": 2.720152788951616e-05, |
| "loss": 0.5796, |
| "step": 197800 |
| }, |
| { |
| "epoch": 0.4564304831016987, |
| "grad_norm": 2.479245662689209, |
| "learning_rate": 2.7178475844915062e-05, |
| "loss": 0.5441, |
| "step": 198000 |
| }, |
| { |
| "epoch": 0.4568915239937206, |
| "grad_norm": 2.000091552734375, |
| "learning_rate": 2.715542380031397e-05, |
| "loss": 0.5661, |
| "step": 198200 |
| }, |
| { |
| "epoch": 0.45735256488574255, |
| "grad_norm": 1.4363523721694946, |
| "learning_rate": 2.7132371755712877e-05, |
| "loss": 0.5565, |
| "step": 198400 |
| }, |
| { |
| "epoch": 0.4578136057777645, |
| "grad_norm": 1.766074776649475, |
| "learning_rate": 2.710931971111178e-05, |
| "loss": 0.5825, |
| "step": 198600 |
| }, |
| { |
| "epoch": 0.4582746466697864, |
| "grad_norm": 0.5402831435203552, |
| "learning_rate": 2.7086267666510685e-05, |
| "loss": 0.5039, |
| "step": 198800 |
| }, |
| { |
| "epoch": 0.45873568756180827, |
| "grad_norm": 1.0958600044250488, |
| "learning_rate": 2.7063215621909587e-05, |
| "loss": 0.5534, |
| "step": 199000 |
| }, |
| { |
| "epoch": 0.4591967284538302, |
| "grad_norm": 1.6260972023010254, |
| "learning_rate": 2.7040163577308493e-05, |
| "loss": 0.5222, |
| "step": 199200 |
| }, |
| { |
| "epoch": 0.4596577693458521, |
| "grad_norm": 1.382095217704773, |
| "learning_rate": 2.7017111532707395e-05, |
| "loss": 0.5278, |
| "step": 199400 |
| }, |
| { |
| "epoch": 0.46011881023787404, |
| "grad_norm": 1.0845330953598022, |
| "learning_rate": 2.6994059488106298e-05, |
| "loss": 0.5143, |
| "step": 199600 |
| }, |
| { |
| "epoch": 0.46057985112989597, |
| "grad_norm": 1.2804137468338013, |
| "learning_rate": 2.6971007443505203e-05, |
| "loss": 0.511, |
| "step": 199800 |
| }, |
| { |
| "epoch": 0.4610408920219179, |
| "grad_norm": 3.7605793476104736, |
| "learning_rate": 2.6947955398904106e-05, |
| "loss": 0.531, |
| "step": 200000 |
| }, |
| { |
| "epoch": 0.4610408920219179, |
| "eval_loss": 0.5235968232154846, |
| "eval_runtime": 144.1603, |
| "eval_samples_per_second": 30.397, |
| "eval_steps_per_second": 30.397, |
| "step": 200000 |
| }, |
| { |
| "epoch": 0.4615019329139398, |
| "grad_norm": 1.2853552103042603, |
| "learning_rate": 2.692490335430301e-05, |
| "loss": 0.52, |
| "step": 200200 |
| }, |
| { |
| "epoch": 0.46196297380596174, |
| "grad_norm": 0.8464341759681702, |
| "learning_rate": 2.6901851309701914e-05, |
| "loss": 0.5059, |
| "step": 200400 |
| }, |
| { |
| "epoch": 0.46242401469798367, |
| "grad_norm": 1.0232640504837036, |
| "learning_rate": 2.687879926510082e-05, |
| "loss": 0.6008, |
| "step": 200600 |
| }, |
| { |
| "epoch": 0.46288505559000553, |
| "grad_norm": 1.2209442853927612, |
| "learning_rate": 2.685574722049972e-05, |
| "loss": 0.5058, |
| "step": 200800 |
| }, |
| { |
| "epoch": 0.46334609648202746, |
| "grad_norm": 0.827387809753418, |
| "learning_rate": 2.6832695175898627e-05, |
| "loss": 0.5022, |
| "step": 201000 |
| }, |
| { |
| "epoch": 0.4638071373740494, |
| "grad_norm": 0.663145899772644, |
| "learning_rate": 2.680964313129753e-05, |
| "loss": 0.5287, |
| "step": 201200 |
| }, |
| { |
| "epoch": 0.4642681782660713, |
| "grad_norm": 1.2869213819503784, |
| "learning_rate": 2.6786591086696432e-05, |
| "loss": 0.588, |
| "step": 201400 |
| }, |
| { |
| "epoch": 0.46472921915809323, |
| "grad_norm": 0.9213125705718994, |
| "learning_rate": 2.676353904209534e-05, |
| "loss": 0.5375, |
| "step": 201600 |
| }, |
| { |
| "epoch": 0.46519026005011516, |
| "grad_norm": 0.9459083080291748, |
| "learning_rate": 2.6740486997494246e-05, |
| "loss": 0.539, |
| "step": 201800 |
| }, |
| { |
| "epoch": 0.4656513009421371, |
| "grad_norm": 0.9873161315917969, |
| "learning_rate": 2.671743495289315e-05, |
| "loss": 0.5549, |
| "step": 202000 |
| }, |
| { |
| "epoch": 0.466112341834159, |
| "grad_norm": 1.8117451667785645, |
| "learning_rate": 2.6694382908292054e-05, |
| "loss": 0.5255, |
| "step": 202200 |
| }, |
| { |
| "epoch": 0.4665733827261809, |
| "grad_norm": 1.219114899635315, |
| "learning_rate": 2.6671330863690957e-05, |
| "loss": 0.4845, |
| "step": 202400 |
| }, |
| { |
| "epoch": 0.4670344236182028, |
| "grad_norm": 2.0464797019958496, |
| "learning_rate": 2.6648278819089862e-05, |
| "loss": 0.5696, |
| "step": 202600 |
| }, |
| { |
| "epoch": 0.4674954645102247, |
| "grad_norm": 2.183873176574707, |
| "learning_rate": 2.6625226774488765e-05, |
| "loss": 0.5078, |
| "step": 202800 |
| }, |
| { |
| "epoch": 0.46795650540224665, |
| "grad_norm": 0.8037805557250977, |
| "learning_rate": 2.660217472988767e-05, |
| "loss": 0.5538, |
| "step": 203000 |
| }, |
| { |
| "epoch": 0.46841754629426857, |
| "grad_norm": 0.2990266978740692, |
| "learning_rate": 2.6579122685286573e-05, |
| "loss": 0.5458, |
| "step": 203200 |
| }, |
| { |
| "epoch": 0.4688785871862905, |
| "grad_norm": 1.854121446609497, |
| "learning_rate": 2.6556070640685475e-05, |
| "loss": 0.5138, |
| "step": 203400 |
| }, |
| { |
| "epoch": 0.4693396280783124, |
| "grad_norm": 2.2942981719970703, |
| "learning_rate": 2.653301859608438e-05, |
| "loss": 0.5268, |
| "step": 203600 |
| }, |
| { |
| "epoch": 0.46980066897033435, |
| "grad_norm": 1.3234660625457764, |
| "learning_rate": 2.6509966551483283e-05, |
| "loss": 0.4838, |
| "step": 203800 |
| }, |
| { |
| "epoch": 0.4702617098623562, |
| "grad_norm": 2.0463480949401855, |
| "learning_rate": 2.648691450688219e-05, |
| "loss": 0.5101, |
| "step": 204000 |
| }, |
| { |
| "epoch": 0.47072275075437814, |
| "grad_norm": 2.1210684776306152, |
| "learning_rate": 2.646386246228109e-05, |
| "loss": 0.5376, |
| "step": 204200 |
| }, |
| { |
| "epoch": 0.47118379164640006, |
| "grad_norm": 1.7364137172698975, |
| "learning_rate": 2.6440810417679997e-05, |
| "loss": 0.5649, |
| "step": 204400 |
| }, |
| { |
| "epoch": 0.471644832538422, |
| "grad_norm": 0.9832141399383545, |
| "learning_rate": 2.64177583730789e-05, |
| "loss": 0.5415, |
| "step": 204600 |
| }, |
| { |
| "epoch": 0.4721058734304439, |
| "grad_norm": 2.0210485458374023, |
| "learning_rate": 2.6394706328477804e-05, |
| "loss": 0.5323, |
| "step": 204800 |
| }, |
| { |
| "epoch": 0.47256691432246584, |
| "grad_norm": 1.7423853874206543, |
| "learning_rate": 2.6371654283876714e-05, |
| "loss": 0.5177, |
| "step": 205000 |
| }, |
| { |
| "epoch": 0.47302795521448776, |
| "grad_norm": 0.6872438788414001, |
| "learning_rate": 2.6348602239275616e-05, |
| "loss": 0.5507, |
| "step": 205200 |
| }, |
| { |
| "epoch": 0.4734889961065097, |
| "grad_norm": 1.3187884092330933, |
| "learning_rate": 2.6325550194674518e-05, |
| "loss": 0.5919, |
| "step": 205400 |
| }, |
| { |
| "epoch": 0.4739500369985316, |
| "grad_norm": 0.8862842321395874, |
| "learning_rate": 2.6302498150073424e-05, |
| "loss": 0.4935, |
| "step": 205600 |
| }, |
| { |
| "epoch": 0.4744110778905535, |
| "grad_norm": 1.1730307340621948, |
| "learning_rate": 2.6279446105472326e-05, |
| "loss": 0.5093, |
| "step": 205800 |
| }, |
| { |
| "epoch": 0.4748721187825754, |
| "grad_norm": 1.160568118095398, |
| "learning_rate": 2.6256394060871232e-05, |
| "loss": 0.5479, |
| "step": 206000 |
| }, |
| { |
| "epoch": 0.4753331596745973, |
| "grad_norm": 1.4531235694885254, |
| "learning_rate": 2.6233342016270134e-05, |
| "loss": 0.5399, |
| "step": 206200 |
| }, |
| { |
| "epoch": 0.47579420056661925, |
| "grad_norm": 2.6737730503082275, |
| "learning_rate": 2.621028997166904e-05, |
| "loss": 0.5246, |
| "step": 206400 |
| }, |
| { |
| "epoch": 0.4762552414586412, |
| "grad_norm": 1.8411715030670166, |
| "learning_rate": 2.6187237927067942e-05, |
| "loss": 0.548, |
| "step": 206600 |
| }, |
| { |
| "epoch": 0.4767162823506631, |
| "grad_norm": 1.6035988330841064, |
| "learning_rate": 2.6164185882466848e-05, |
| "loss": 0.4635, |
| "step": 206800 |
| }, |
| { |
| "epoch": 0.477177323242685, |
| "grad_norm": 0.9196053743362427, |
| "learning_rate": 2.614113383786575e-05, |
| "loss": 0.4865, |
| "step": 207000 |
| }, |
| { |
| "epoch": 0.47763836413470695, |
| "grad_norm": 1.3672767877578735, |
| "learning_rate": 2.6118081793264652e-05, |
| "loss": 0.5275, |
| "step": 207200 |
| }, |
| { |
| "epoch": 0.4780994050267288, |
| "grad_norm": 1.2003188133239746, |
| "learning_rate": 2.6095029748663558e-05, |
| "loss": 0.5181, |
| "step": 207400 |
| }, |
| { |
| "epoch": 0.47856044591875074, |
| "grad_norm": 0.8703144788742065, |
| "learning_rate": 2.607197770406246e-05, |
| "loss": 0.4987, |
| "step": 207600 |
| }, |
| { |
| "epoch": 0.47902148681077267, |
| "grad_norm": 1.2609108686447144, |
| "learning_rate": 2.6048925659461366e-05, |
| "loss": 0.5032, |
| "step": 207800 |
| }, |
| { |
| "epoch": 0.4794825277027946, |
| "grad_norm": 1.2695225477218628, |
| "learning_rate": 2.6025873614860268e-05, |
| "loss": 0.5221, |
| "step": 208000 |
| }, |
| { |
| "epoch": 0.4799435685948165, |
| "grad_norm": 1.1836507320404053, |
| "learning_rate": 2.6002821570259174e-05, |
| "loss": 0.5443, |
| "step": 208200 |
| }, |
| { |
| "epoch": 0.48040460948683844, |
| "grad_norm": 1.0860618352890015, |
| "learning_rate": 2.5979769525658083e-05, |
| "loss": 0.5391, |
| "step": 208400 |
| }, |
| { |
| "epoch": 0.48086565037886037, |
| "grad_norm": 2.6720314025878906, |
| "learning_rate": 2.5956717481056985e-05, |
| "loss": 0.5293, |
| "step": 208600 |
| }, |
| { |
| "epoch": 0.4813266912708823, |
| "grad_norm": 2.128580093383789, |
| "learning_rate": 2.593366543645589e-05, |
| "loss": 0.5426, |
| "step": 208800 |
| }, |
| { |
| "epoch": 0.4817877321629042, |
| "grad_norm": 1.0625451803207397, |
| "learning_rate": 2.5910613391854793e-05, |
| "loss": 0.5703, |
| "step": 209000 |
| }, |
| { |
| "epoch": 0.4822487730549261, |
| "grad_norm": 1.0436484813690186, |
| "learning_rate": 2.5887561347253695e-05, |
| "loss": 0.5036, |
| "step": 209200 |
| }, |
| { |
| "epoch": 0.482709813946948, |
| "grad_norm": 1.5313512086868286, |
| "learning_rate": 2.58645093026526e-05, |
| "loss": 0.4912, |
| "step": 209400 |
| }, |
| { |
| "epoch": 0.48317085483896993, |
| "grad_norm": 2.2933545112609863, |
| "learning_rate": 2.5841457258051503e-05, |
| "loss": 0.5143, |
| "step": 209600 |
| }, |
| { |
| "epoch": 0.48363189573099186, |
| "grad_norm": 0.9948174357414246, |
| "learning_rate": 2.581840521345041e-05, |
| "loss": 0.4997, |
| "step": 209800 |
| }, |
| { |
| "epoch": 0.4840929366230138, |
| "grad_norm": 0.6930698752403259, |
| "learning_rate": 2.579535316884931e-05, |
| "loss": 0.5701, |
| "step": 210000 |
| }, |
| { |
| "epoch": 0.4845539775150357, |
| "grad_norm": 2.551692247390747, |
| "learning_rate": 2.5772301124248217e-05, |
| "loss": 0.5026, |
| "step": 210200 |
| }, |
| { |
| "epoch": 0.48501501840705763, |
| "grad_norm": 0.6203674674034119, |
| "learning_rate": 2.574924907964712e-05, |
| "loss": 0.5457, |
| "step": 210400 |
| }, |
| { |
| "epoch": 0.48547605929907955, |
| "grad_norm": 0.8173620104789734, |
| "learning_rate": 2.5726197035046025e-05, |
| "loss": 0.5061, |
| "step": 210600 |
| }, |
| { |
| "epoch": 0.4859371001911014, |
| "grad_norm": 1.0083948373794556, |
| "learning_rate": 2.5703144990444927e-05, |
| "loss": 0.4877, |
| "step": 210800 |
| }, |
| { |
| "epoch": 0.48639814108312335, |
| "grad_norm": 0.48525819182395935, |
| "learning_rate": 2.568009294584383e-05, |
| "loss": 0.5158, |
| "step": 211000 |
| }, |
| { |
| "epoch": 0.48685918197514527, |
| "grad_norm": 2.381948709487915, |
| "learning_rate": 2.5657040901242735e-05, |
| "loss": 0.5087, |
| "step": 211200 |
| }, |
| { |
| "epoch": 0.4873202228671672, |
| "grad_norm": 1.283881425857544, |
| "learning_rate": 2.5633988856641638e-05, |
| "loss": 0.5529, |
| "step": 211400 |
| }, |
| { |
| "epoch": 0.4877812637591891, |
| "grad_norm": 1.0474011898040771, |
| "learning_rate": 2.5610936812040543e-05, |
| "loss": 0.4997, |
| "step": 211600 |
| }, |
| { |
| "epoch": 0.48824230465121105, |
| "grad_norm": 1.509234070777893, |
| "learning_rate": 2.5587884767439452e-05, |
| "loss": 0.5289, |
| "step": 211800 |
| }, |
| { |
| "epoch": 0.48870334554323297, |
| "grad_norm": 0.736985445022583, |
| "learning_rate": 2.5564832722838355e-05, |
| "loss": 0.56, |
| "step": 212000 |
| }, |
| { |
| "epoch": 0.4891643864352549, |
| "grad_norm": 0.5530835390090942, |
| "learning_rate": 2.554178067823726e-05, |
| "loss": 0.5385, |
| "step": 212200 |
| }, |
| { |
| "epoch": 0.4896254273272768, |
| "grad_norm": 1.0076507329940796, |
| "learning_rate": 2.5518728633636163e-05, |
| "loss": 0.5014, |
| "step": 212400 |
| }, |
| { |
| "epoch": 0.4900864682192987, |
| "grad_norm": 0.7996362447738647, |
| "learning_rate": 2.5495676589035068e-05, |
| "loss": 0.5417, |
| "step": 212600 |
| }, |
| { |
| "epoch": 0.4905475091113206, |
| "grad_norm": 1.1056005954742432, |
| "learning_rate": 2.547262454443397e-05, |
| "loss": 0.4684, |
| "step": 212800 |
| }, |
| { |
| "epoch": 0.49100855000334254, |
| "grad_norm": 1.4682406187057495, |
| "learning_rate": 2.5449572499832873e-05, |
| "loss": 0.5222, |
| "step": 213000 |
| }, |
| { |
| "epoch": 0.49146959089536446, |
| "grad_norm": 2.054387331008911, |
| "learning_rate": 2.542652045523178e-05, |
| "loss": 0.5487, |
| "step": 213200 |
| }, |
| { |
| "epoch": 0.4919306317873864, |
| "grad_norm": 1.1834423542022705, |
| "learning_rate": 2.540346841063068e-05, |
| "loss": 0.523, |
| "step": 213400 |
| }, |
| { |
| "epoch": 0.4923916726794083, |
| "grad_norm": 1.6938774585723877, |
| "learning_rate": 2.5380416366029586e-05, |
| "loss": 0.5807, |
| "step": 213600 |
| }, |
| { |
| "epoch": 0.49285271357143023, |
| "grad_norm": 1.825681209564209, |
| "learning_rate": 2.535736432142849e-05, |
| "loss": 0.5444, |
| "step": 213800 |
| }, |
| { |
| "epoch": 0.49331375446345216, |
| "grad_norm": 1.6016223430633545, |
| "learning_rate": 2.5334312276827394e-05, |
| "loss": 0.5095, |
| "step": 214000 |
| }, |
| { |
| "epoch": 0.493774795355474, |
| "grad_norm": 0.7464369535446167, |
| "learning_rate": 2.5311260232226297e-05, |
| "loss": 0.5111, |
| "step": 214200 |
| }, |
| { |
| "epoch": 0.49423583624749595, |
| "grad_norm": 1.6987085342407227, |
| "learning_rate": 2.5288208187625202e-05, |
| "loss": 0.4878, |
| "step": 214400 |
| }, |
| { |
| "epoch": 0.4946968771395179, |
| "grad_norm": 1.2027496099472046, |
| "learning_rate": 2.5265156143024105e-05, |
| "loss": 0.5734, |
| "step": 214600 |
| }, |
| { |
| "epoch": 0.4951579180315398, |
| "grad_norm": 1.1822620630264282, |
| "learning_rate": 2.5242104098423007e-05, |
| "loss": 0.5592, |
| "step": 214800 |
| }, |
| { |
| "epoch": 0.4956189589235617, |
| "grad_norm": 1.0884791612625122, |
| "learning_rate": 2.521905205382192e-05, |
| "loss": 0.5228, |
| "step": 215000 |
| }, |
| { |
| "epoch": 0.49607999981558365, |
| "grad_norm": 3.0900111198425293, |
| "learning_rate": 2.519600000922082e-05, |
| "loss": 0.5193, |
| "step": 215200 |
| }, |
| { |
| "epoch": 0.4965410407076056, |
| "grad_norm": 0.8263806104660034, |
| "learning_rate": 2.5172947964619724e-05, |
| "loss": 0.5436, |
| "step": 215400 |
| }, |
| { |
| "epoch": 0.4970020815996275, |
| "grad_norm": 0.9320021271705627, |
| "learning_rate": 2.514989592001863e-05, |
| "loss": 0.5525, |
| "step": 215600 |
| }, |
| { |
| "epoch": 0.4974631224916494, |
| "grad_norm": 1.8418340682983398, |
| "learning_rate": 2.5126843875417532e-05, |
| "loss": 0.5159, |
| "step": 215800 |
| }, |
| { |
| "epoch": 0.4979241633836713, |
| "grad_norm": 1.0613411664962769, |
| "learning_rate": 2.5103791830816438e-05, |
| "loss": 0.5222, |
| "step": 216000 |
| }, |
| { |
| "epoch": 0.4983852042756932, |
| "grad_norm": 0.9613930583000183, |
| "learning_rate": 2.508073978621534e-05, |
| "loss": 0.5506, |
| "step": 216200 |
| }, |
| { |
| "epoch": 0.49884624516771514, |
| "grad_norm": 1.2147666215896606, |
| "learning_rate": 2.5057687741614246e-05, |
| "loss": 0.5332, |
| "step": 216400 |
| }, |
| { |
| "epoch": 0.49930728605973707, |
| "grad_norm": 0.8295925259590149, |
| "learning_rate": 2.5034635697013148e-05, |
| "loss": 0.5083, |
| "step": 216600 |
| }, |
| { |
| "epoch": 0.499768326951759, |
| "grad_norm": 1.5370151996612549, |
| "learning_rate": 2.501158365241205e-05, |
| "loss": 0.5137, |
| "step": 216800 |
| }, |
| { |
| "epoch": 0.5002293678437809, |
| "grad_norm": 1.137407898902893, |
| "learning_rate": 2.4988531607810956e-05, |
| "loss": 0.5289, |
| "step": 217000 |
| }, |
| { |
| "epoch": 0.5006904087358028, |
| "grad_norm": 1.1642227172851562, |
| "learning_rate": 2.4965479563209858e-05, |
| "loss": 0.5223, |
| "step": 217200 |
| }, |
| { |
| "epoch": 0.5011514496278248, |
| "grad_norm": 1.7283347845077515, |
| "learning_rate": 2.4942427518608764e-05, |
| "loss": 0.5269, |
| "step": 217400 |
| }, |
| { |
| "epoch": 0.5016124905198467, |
| "grad_norm": 1.0114668607711792, |
| "learning_rate": 2.491937547400767e-05, |
| "loss": 0.5464, |
| "step": 217600 |
| }, |
| { |
| "epoch": 0.5020735314118686, |
| "grad_norm": 2.422441244125366, |
| "learning_rate": 2.4896323429406572e-05, |
| "loss": 0.5441, |
| "step": 217800 |
| }, |
| { |
| "epoch": 0.5025345723038905, |
| "grad_norm": 0.6557809710502625, |
| "learning_rate": 2.4873271384805477e-05, |
| "loss": 0.4985, |
| "step": 218000 |
| }, |
| { |
| "epoch": 0.5029956131959125, |
| "grad_norm": 1.6513997316360474, |
| "learning_rate": 2.485021934020438e-05, |
| "loss": 0.5022, |
| "step": 218200 |
| }, |
| { |
| "epoch": 0.5034566540879343, |
| "grad_norm": 0.7555482387542725, |
| "learning_rate": 2.4827167295603285e-05, |
| "loss": 0.5285, |
| "step": 218400 |
| }, |
| { |
| "epoch": 0.5039176949799562, |
| "grad_norm": 0.9121997356414795, |
| "learning_rate": 2.4804115251002188e-05, |
| "loss": 0.5312, |
| "step": 218600 |
| }, |
| { |
| "epoch": 0.5043787358719781, |
| "grad_norm": 0.36491402983665466, |
| "learning_rate": 2.4781063206401093e-05, |
| "loss": 0.5309, |
| "step": 218800 |
| }, |
| { |
| "epoch": 0.504839776764, |
| "grad_norm": 2.048449993133545, |
| "learning_rate": 2.4758011161799996e-05, |
| "loss": 0.5274, |
| "step": 219000 |
| }, |
| { |
| "epoch": 0.505300817656022, |
| "grad_norm": 2.769894599914551, |
| "learning_rate": 2.47349591171989e-05, |
| "loss": 0.5035, |
| "step": 219200 |
| }, |
| { |
| "epoch": 0.5057618585480439, |
| "grad_norm": 1.8023812770843506, |
| "learning_rate": 2.4711907072597807e-05, |
| "loss": 0.5071, |
| "step": 219400 |
| }, |
| { |
| "epoch": 0.5062228994400658, |
| "grad_norm": 0.6726931929588318, |
| "learning_rate": 2.468885502799671e-05, |
| "loss": 0.5223, |
| "step": 219600 |
| }, |
| { |
| "epoch": 0.5066839403320877, |
| "grad_norm": 9.744784355163574, |
| "learning_rate": 2.4665802983395615e-05, |
| "loss": 0.4931, |
| "step": 219800 |
| }, |
| { |
| "epoch": 0.5071449812241097, |
| "grad_norm": 1.1189628839492798, |
| "learning_rate": 2.4642750938794517e-05, |
| "loss": 0.5205, |
| "step": 220000 |
| }, |
| { |
| "epoch": 0.5076060221161316, |
| "grad_norm": 1.6368327140808105, |
| "learning_rate": 2.4619698894193423e-05, |
| "loss": 0.5169, |
| "step": 220200 |
| }, |
| { |
| "epoch": 0.5080670630081535, |
| "grad_norm": 1.834841012954712, |
| "learning_rate": 2.4596646849592325e-05, |
| "loss": 0.4931, |
| "step": 220400 |
| }, |
| { |
| "epoch": 0.5085281039001754, |
| "grad_norm": 1.0901039838790894, |
| "learning_rate": 2.4573594804991227e-05, |
| "loss": 0.5193, |
| "step": 220600 |
| }, |
| { |
| "epoch": 0.5089891447921974, |
| "grad_norm": 0.9557801485061646, |
| "learning_rate": 2.4550542760390133e-05, |
| "loss": 0.5249, |
| "step": 220800 |
| }, |
| { |
| "epoch": 0.5094501856842193, |
| "grad_norm": 1.0982486009597778, |
| "learning_rate": 2.452749071578904e-05, |
| "loss": 0.4845, |
| "step": 221000 |
| }, |
| { |
| "epoch": 0.5099112265762412, |
| "grad_norm": 1.3123830556869507, |
| "learning_rate": 2.4504438671187944e-05, |
| "loss": 0.4842, |
| "step": 221200 |
| }, |
| { |
| "epoch": 0.5103722674682631, |
| "grad_norm": 1.05722975730896, |
| "learning_rate": 2.4481386626586847e-05, |
| "loss": 0.5196, |
| "step": 221400 |
| }, |
| { |
| "epoch": 0.5108333083602851, |
| "grad_norm": 1.5994271039962769, |
| "learning_rate": 2.445833458198575e-05, |
| "loss": 0.4932, |
| "step": 221600 |
| }, |
| { |
| "epoch": 0.5112943492523069, |
| "grad_norm": 0.3710331916809082, |
| "learning_rate": 2.4435282537384655e-05, |
| "loss": 0.4854, |
| "step": 221800 |
| }, |
| { |
| "epoch": 0.5117553901443288, |
| "grad_norm": 1.2854666709899902, |
| "learning_rate": 2.4412230492783557e-05, |
| "loss": 0.5092, |
| "step": 222000 |
| }, |
| { |
| "epoch": 0.5122164310363507, |
| "grad_norm": 1.364815354347229, |
| "learning_rate": 2.4389178448182463e-05, |
| "loss": 0.4975, |
| "step": 222200 |
| }, |
| { |
| "epoch": 0.5126774719283727, |
| "grad_norm": 1.2252674102783203, |
| "learning_rate": 2.4366126403581365e-05, |
| "loss": 0.5075, |
| "step": 222400 |
| }, |
| { |
| "epoch": 0.5131385128203946, |
| "grad_norm": 0.9235671758651733, |
| "learning_rate": 2.434307435898027e-05, |
| "loss": 0.5051, |
| "step": 222600 |
| }, |
| { |
| "epoch": 0.5135995537124165, |
| "grad_norm": 1.0827833414077759, |
| "learning_rate": 2.4320022314379176e-05, |
| "loss": 0.498, |
| "step": 222800 |
| }, |
| { |
| "epoch": 0.5140605946044384, |
| "grad_norm": 1.4872461557388306, |
| "learning_rate": 2.429697026977808e-05, |
| "loss": 0.5253, |
| "step": 223000 |
| }, |
| { |
| "epoch": 0.5145216354964604, |
| "grad_norm": 0.5086209177970886, |
| "learning_rate": 2.4273918225176984e-05, |
| "loss": 0.4979, |
| "step": 223200 |
| }, |
| { |
| "epoch": 0.5149826763884823, |
| "grad_norm": 1.0882658958435059, |
| "learning_rate": 2.4250866180575887e-05, |
| "loss": 0.5244, |
| "step": 223400 |
| }, |
| { |
| "epoch": 0.5154437172805042, |
| "grad_norm": 1.3784066438674927, |
| "learning_rate": 2.4227814135974792e-05, |
| "loss": 0.5057, |
| "step": 223600 |
| }, |
| { |
| "epoch": 0.5159047581725261, |
| "grad_norm": 1.245423674583435, |
| "learning_rate": 2.4204762091373695e-05, |
| "loss": 0.5005, |
| "step": 223800 |
| }, |
| { |
| "epoch": 0.516365799064548, |
| "grad_norm": 2.1874382495880127, |
| "learning_rate": 2.41817100467726e-05, |
| "loss": 0.5206, |
| "step": 224000 |
| }, |
| { |
| "epoch": 0.51682683995657, |
| "grad_norm": 1.1349289417266846, |
| "learning_rate": 2.4158658002171503e-05, |
| "loss": 0.5547, |
| "step": 224200 |
| }, |
| { |
| "epoch": 0.5172878808485919, |
| "grad_norm": 0.9220569729804993, |
| "learning_rate": 2.4135605957570408e-05, |
| "loss": 0.5421, |
| "step": 224400 |
| }, |
| { |
| "epoch": 0.5177489217406138, |
| "grad_norm": 0.7660688757896423, |
| "learning_rate": 2.4112553912969314e-05, |
| "loss": 0.4737, |
| "step": 224600 |
| }, |
| { |
| "epoch": 0.5182099626326357, |
| "grad_norm": 1.1073906421661377, |
| "learning_rate": 2.4089501868368216e-05, |
| "loss": 0.5424, |
| "step": 224800 |
| }, |
| { |
| "epoch": 0.5186710035246577, |
| "grad_norm": 0.5724996328353882, |
| "learning_rate": 2.4066449823767122e-05, |
| "loss": 0.5261, |
| "step": 225000 |
| }, |
| { |
| "epoch": 0.5191320444166795, |
| "grad_norm": 0.3339095413684845, |
| "learning_rate": 2.4043397779166024e-05, |
| "loss": 0.5172, |
| "step": 225200 |
| }, |
| { |
| "epoch": 0.5195930853087014, |
| "grad_norm": 1.5384175777435303, |
| "learning_rate": 2.4020345734564926e-05, |
| "loss": 0.498, |
| "step": 225400 |
| }, |
| { |
| "epoch": 0.5200541262007233, |
| "grad_norm": 1.137721061706543, |
| "learning_rate": 2.3997293689963832e-05, |
| "loss": 0.5326, |
| "step": 225600 |
| }, |
| { |
| "epoch": 0.5205151670927453, |
| "grad_norm": 0.3401934504508972, |
| "learning_rate": 2.3974241645362734e-05, |
| "loss": 0.5264, |
| "step": 225800 |
| }, |
| { |
| "epoch": 0.5209762079847672, |
| "grad_norm": 0.9476338624954224, |
| "learning_rate": 2.3951189600761643e-05, |
| "loss": 0.4663, |
| "step": 226000 |
| }, |
| { |
| "epoch": 0.5214372488767891, |
| "grad_norm": 1.3103936910629272, |
| "learning_rate": 2.3928137556160546e-05, |
| "loss": 0.5328, |
| "step": 226200 |
| }, |
| { |
| "epoch": 0.521898289768811, |
| "grad_norm": 1.7903141975402832, |
| "learning_rate": 2.3905085511559448e-05, |
| "loss": 0.4953, |
| "step": 226400 |
| }, |
| { |
| "epoch": 0.522359330660833, |
| "grad_norm": 0.7507403492927551, |
| "learning_rate": 2.3882033466958354e-05, |
| "loss": 0.5132, |
| "step": 226600 |
| }, |
| { |
| "epoch": 0.5228203715528549, |
| "grad_norm": 1.1141492128372192, |
| "learning_rate": 2.3858981422357256e-05, |
| "loss": 0.4964, |
| "step": 226800 |
| }, |
| { |
| "epoch": 0.5232814124448768, |
| "grad_norm": 0.9881762862205505, |
| "learning_rate": 2.383592937775616e-05, |
| "loss": 0.5187, |
| "step": 227000 |
| }, |
| { |
| "epoch": 0.5237424533368987, |
| "grad_norm": 2.4193100929260254, |
| "learning_rate": 2.3812877333155064e-05, |
| "loss": 0.5324, |
| "step": 227200 |
| }, |
| { |
| "epoch": 0.5242034942289207, |
| "grad_norm": 0.5690718293190002, |
| "learning_rate": 2.378982528855397e-05, |
| "loss": 0.4951, |
| "step": 227400 |
| }, |
| { |
| "epoch": 0.5246645351209426, |
| "grad_norm": 1.6624326705932617, |
| "learning_rate": 2.3766773243952872e-05, |
| "loss": 0.5211, |
| "step": 227600 |
| }, |
| { |
| "epoch": 0.5251255760129645, |
| "grad_norm": 0.916460394859314, |
| "learning_rate": 2.3743721199351778e-05, |
| "loss": 0.5439, |
| "step": 227800 |
| }, |
| { |
| "epoch": 0.5255866169049864, |
| "grad_norm": 1.8242855072021484, |
| "learning_rate": 2.3720669154750683e-05, |
| "loss": 0.5436, |
| "step": 228000 |
| }, |
| { |
| "epoch": 0.5260476577970084, |
| "grad_norm": 1.3293455839157104, |
| "learning_rate": 2.3697617110149586e-05, |
| "loss": 0.5383, |
| "step": 228200 |
| }, |
| { |
| "epoch": 0.5265086986890303, |
| "grad_norm": 1.328596830368042, |
| "learning_rate": 2.367456506554849e-05, |
| "loss": 0.5401, |
| "step": 228400 |
| }, |
| { |
| "epoch": 0.5269697395810521, |
| "grad_norm": 0.9804822206497192, |
| "learning_rate": 2.3651513020947393e-05, |
| "loss": 0.5252, |
| "step": 228600 |
| }, |
| { |
| "epoch": 0.527430780473074, |
| "grad_norm": 1.9417587518692017, |
| "learning_rate": 2.36284609763463e-05, |
| "loss": 0.5389, |
| "step": 228800 |
| }, |
| { |
| "epoch": 0.5278918213650959, |
| "grad_norm": 1.445884346961975, |
| "learning_rate": 2.36054089317452e-05, |
| "loss": 0.5014, |
| "step": 229000 |
| }, |
| { |
| "epoch": 0.5283528622571179, |
| "grad_norm": 1.5352164506912231, |
| "learning_rate": 2.3582356887144104e-05, |
| "loss": 0.4702, |
| "step": 229200 |
| }, |
| { |
| "epoch": 0.5288139031491398, |
| "grad_norm": 0.47279122471809387, |
| "learning_rate": 2.3559304842543013e-05, |
| "loss": 0.5097, |
| "step": 229400 |
| }, |
| { |
| "epoch": 0.5292749440411617, |
| "grad_norm": 0.591940701007843, |
| "learning_rate": 2.3536252797941915e-05, |
| "loss": 0.4762, |
| "step": 229600 |
| }, |
| { |
| "epoch": 0.5297359849331836, |
| "grad_norm": 1.6824707984924316, |
| "learning_rate": 2.351320075334082e-05, |
| "loss": 0.4868, |
| "step": 229800 |
| }, |
| { |
| "epoch": 0.5301970258252056, |
| "grad_norm": 0.9410609602928162, |
| "learning_rate": 2.3490148708739723e-05, |
| "loss": 0.5622, |
| "step": 230000 |
| }, |
| { |
| "epoch": 0.5306580667172275, |
| "grad_norm": 1.2229105234146118, |
| "learning_rate": 2.3467096664138625e-05, |
| "loss": 0.5073, |
| "step": 230200 |
| }, |
| { |
| "epoch": 0.5311191076092494, |
| "grad_norm": 0.7156030535697937, |
| "learning_rate": 2.344404461953753e-05, |
| "loss": 0.4934, |
| "step": 230400 |
| }, |
| { |
| "epoch": 0.5315801485012713, |
| "grad_norm": 1.401571273803711, |
| "learning_rate": 2.3420992574936433e-05, |
| "loss": 0.4973, |
| "step": 230600 |
| }, |
| { |
| "epoch": 0.5320411893932933, |
| "grad_norm": 0.503180205821991, |
| "learning_rate": 2.339794053033534e-05, |
| "loss": 0.4983, |
| "step": 230800 |
| }, |
| { |
| "epoch": 0.5325022302853152, |
| "grad_norm": 1.6790913343429565, |
| "learning_rate": 2.337488848573424e-05, |
| "loss": 0.4945, |
| "step": 231000 |
| }, |
| { |
| "epoch": 0.5329632711773371, |
| "grad_norm": 1.007137417793274, |
| "learning_rate": 2.3351836441133147e-05, |
| "loss": 0.4822, |
| "step": 231200 |
| }, |
| { |
| "epoch": 0.533424312069359, |
| "grad_norm": 2.378171920776367, |
| "learning_rate": 2.3328784396532053e-05, |
| "loss": 0.5775, |
| "step": 231400 |
| }, |
| { |
| "epoch": 0.533885352961381, |
| "grad_norm": 1.203321099281311, |
| "learning_rate": 2.3305732351930955e-05, |
| "loss": 0.4724, |
| "step": 231600 |
| }, |
| { |
| "epoch": 0.5343463938534029, |
| "grad_norm": 1.0625741481781006, |
| "learning_rate": 2.328268030732986e-05, |
| "loss": 0.4916, |
| "step": 231800 |
| }, |
| { |
| "epoch": 0.5348074347454247, |
| "grad_norm": 1.0948866605758667, |
| "learning_rate": 2.3259628262728763e-05, |
| "loss": 0.5066, |
| "step": 232000 |
| }, |
| { |
| "epoch": 0.5352684756374466, |
| "grad_norm": 1.4360226392745972, |
| "learning_rate": 2.323657621812767e-05, |
| "loss": 0.4836, |
| "step": 232200 |
| }, |
| { |
| "epoch": 0.5357295165294685, |
| "grad_norm": 1.1512943506240845, |
| "learning_rate": 2.321352417352657e-05, |
| "loss": 0.5677, |
| "step": 232400 |
| }, |
| { |
| "epoch": 0.5361905574214905, |
| "grad_norm": 1.0096590518951416, |
| "learning_rate": 2.3190472128925477e-05, |
| "loss": 0.5734, |
| "step": 232600 |
| }, |
| { |
| "epoch": 0.5366515983135124, |
| "grad_norm": 1.4425885677337646, |
| "learning_rate": 2.3167420084324382e-05, |
| "loss": 0.4956, |
| "step": 232800 |
| }, |
| { |
| "epoch": 0.5371126392055343, |
| "grad_norm": 0.5548868775367737, |
| "learning_rate": 2.3144368039723284e-05, |
| "loss": 0.4904, |
| "step": 233000 |
| }, |
| { |
| "epoch": 0.5375736800975562, |
| "grad_norm": 1.1134722232818604, |
| "learning_rate": 2.312131599512219e-05, |
| "loss": 0.5376, |
| "step": 233200 |
| }, |
| { |
| "epoch": 0.5380347209895782, |
| "grad_norm": 0.9351561069488525, |
| "learning_rate": 2.3098263950521092e-05, |
| "loss": 0.5593, |
| "step": 233400 |
| }, |
| { |
| "epoch": 0.5384957618816001, |
| "grad_norm": 1.064975380897522, |
| "learning_rate": 2.3075211905919998e-05, |
| "loss": 0.5187, |
| "step": 233600 |
| }, |
| { |
| "epoch": 0.538956802773622, |
| "grad_norm": 1.065260648727417, |
| "learning_rate": 2.30521598613189e-05, |
| "loss": 0.5143, |
| "step": 233800 |
| }, |
| { |
| "epoch": 0.5394178436656439, |
| "grad_norm": 1.2114022970199585, |
| "learning_rate": 2.3029107816717803e-05, |
| "loss": 0.5338, |
| "step": 234000 |
| }, |
| { |
| "epoch": 0.5398788845576659, |
| "grad_norm": 0.8252068758010864, |
| "learning_rate": 2.300605577211671e-05, |
| "loss": 0.5506, |
| "step": 234200 |
| }, |
| { |
| "epoch": 0.5403399254496878, |
| "grad_norm": 1.3504903316497803, |
| "learning_rate": 2.298300372751561e-05, |
| "loss": 0.4864, |
| "step": 234400 |
| }, |
| { |
| "epoch": 0.5408009663417097, |
| "grad_norm": 1.2112751007080078, |
| "learning_rate": 2.295995168291452e-05, |
| "loss": 0.4996, |
| "step": 234600 |
| }, |
| { |
| "epoch": 0.5412620072337316, |
| "grad_norm": 0.6069416999816895, |
| "learning_rate": 2.2936899638313422e-05, |
| "loss": 0.5307, |
| "step": 234800 |
| }, |
| { |
| "epoch": 0.5417230481257536, |
| "grad_norm": 1.572514533996582, |
| "learning_rate": 2.2913847593712324e-05, |
| "loss": 0.5292, |
| "step": 235000 |
| }, |
| { |
| "epoch": 0.5421840890177755, |
| "grad_norm": 1.0099878311157227, |
| "learning_rate": 2.289079554911123e-05, |
| "loss": 0.5688, |
| "step": 235200 |
| }, |
| { |
| "epoch": 0.5426451299097973, |
| "grad_norm": 0.9012830853462219, |
| "learning_rate": 2.2867743504510132e-05, |
| "loss": 0.5366, |
| "step": 235400 |
| }, |
| { |
| "epoch": 0.5431061708018192, |
| "grad_norm": 1.135108232498169, |
| "learning_rate": 2.2844691459909038e-05, |
| "loss": 0.4941, |
| "step": 235600 |
| }, |
| { |
| "epoch": 0.5435672116938411, |
| "grad_norm": 0.9751501083374023, |
| "learning_rate": 2.282163941530794e-05, |
| "loss": 0.5217, |
| "step": 235800 |
| }, |
| { |
| "epoch": 0.5440282525858631, |
| "grad_norm": 1.2317419052124023, |
| "learning_rate": 2.2798587370706846e-05, |
| "loss": 0.5562, |
| "step": 236000 |
| }, |
| { |
| "epoch": 0.544489293477885, |
| "grad_norm": 1.3884457349777222, |
| "learning_rate": 2.277553532610575e-05, |
| "loss": 0.4626, |
| "step": 236200 |
| }, |
| { |
| "epoch": 0.5449503343699069, |
| "grad_norm": 0.9288251996040344, |
| "learning_rate": 2.2752483281504654e-05, |
| "loss": 0.5039, |
| "step": 236400 |
| }, |
| { |
| "epoch": 0.5454113752619288, |
| "grad_norm": 0.3665759563446045, |
| "learning_rate": 2.272943123690356e-05, |
| "loss": 0.5163, |
| "step": 236600 |
| }, |
| { |
| "epoch": 0.5458724161539508, |
| "grad_norm": 2.027440309524536, |
| "learning_rate": 2.2706379192302462e-05, |
| "loss": 0.5599, |
| "step": 236800 |
| }, |
| { |
| "epoch": 0.5463334570459727, |
| "grad_norm": 1.916327953338623, |
| "learning_rate": 2.2683327147701367e-05, |
| "loss": 0.482, |
| "step": 237000 |
| }, |
| { |
| "epoch": 0.5467944979379946, |
| "grad_norm": 1.4914941787719727, |
| "learning_rate": 2.266027510310027e-05, |
| "loss": 0.5569, |
| "step": 237200 |
| }, |
| { |
| "epoch": 0.5472555388300165, |
| "grad_norm": 1.7089998722076416, |
| "learning_rate": 2.2637223058499175e-05, |
| "loss": 0.4639, |
| "step": 237400 |
| }, |
| { |
| "epoch": 0.5477165797220385, |
| "grad_norm": 4.126305103302002, |
| "learning_rate": 2.2614171013898078e-05, |
| "loss": 0.519, |
| "step": 237600 |
| }, |
| { |
| "epoch": 0.5481776206140604, |
| "grad_norm": 1.5551437139511108, |
| "learning_rate": 2.259111896929698e-05, |
| "loss": 0.5204, |
| "step": 237800 |
| }, |
| { |
| "epoch": 0.5486386615060823, |
| "grad_norm": 0.7548621296882629, |
| "learning_rate": 2.256806692469589e-05, |
| "loss": 0.5807, |
| "step": 238000 |
| }, |
| { |
| "epoch": 0.5490997023981042, |
| "grad_norm": 0.2803627550601959, |
| "learning_rate": 2.254501488009479e-05, |
| "loss": 0.4846, |
| "step": 238200 |
| }, |
| { |
| "epoch": 0.5495607432901262, |
| "grad_norm": 0.9677246809005737, |
| "learning_rate": 2.2521962835493697e-05, |
| "loss": 0.4721, |
| "step": 238400 |
| }, |
| { |
| "epoch": 0.5500217841821481, |
| "grad_norm": 1.637499451637268, |
| "learning_rate": 2.24989107908926e-05, |
| "loss": 0.5269, |
| "step": 238600 |
| }, |
| { |
| "epoch": 0.5504828250741699, |
| "grad_norm": 2.227924346923828, |
| "learning_rate": 2.24758587462915e-05, |
| "loss": 0.5198, |
| "step": 238800 |
| }, |
| { |
| "epoch": 0.5509438659661918, |
| "grad_norm": 0.7341607213020325, |
| "learning_rate": 2.2452806701690407e-05, |
| "loss": 0.4917, |
| "step": 239000 |
| }, |
| { |
| "epoch": 0.5514049068582138, |
| "grad_norm": 0.4585340917110443, |
| "learning_rate": 2.242975465708931e-05, |
| "loss": 0.5, |
| "step": 239200 |
| }, |
| { |
| "epoch": 0.5518659477502357, |
| "grad_norm": 1.405619502067566, |
| "learning_rate": 2.2406702612488215e-05, |
| "loss": 0.5141, |
| "step": 239400 |
| }, |
| { |
| "epoch": 0.5523269886422576, |
| "grad_norm": 1.2896803617477417, |
| "learning_rate": 2.238365056788712e-05, |
| "loss": 0.4999, |
| "step": 239600 |
| }, |
| { |
| "epoch": 0.5527880295342795, |
| "grad_norm": 2.165039300918579, |
| "learning_rate": 2.2360598523286023e-05, |
| "loss": 0.5722, |
| "step": 239800 |
| }, |
| { |
| "epoch": 0.5532490704263014, |
| "grad_norm": 1.3514726161956787, |
| "learning_rate": 2.233754647868493e-05, |
| "loss": 0.5017, |
| "step": 240000 |
| }, |
| { |
| "epoch": 0.5537101113183234, |
| "grad_norm": 0.8125177621841431, |
| "learning_rate": 2.231449443408383e-05, |
| "loss": 0.5618, |
| "step": 240200 |
| }, |
| { |
| "epoch": 0.5541711522103453, |
| "grad_norm": 0.4262295961380005, |
| "learning_rate": 2.2291442389482737e-05, |
| "loss": 0.4865, |
| "step": 240400 |
| }, |
| { |
| "epoch": 0.5546321931023672, |
| "grad_norm": 2.328521966934204, |
| "learning_rate": 2.226839034488164e-05, |
| "loss": 0.5051, |
| "step": 240600 |
| }, |
| { |
| "epoch": 0.5550932339943891, |
| "grad_norm": 1.1261919736862183, |
| "learning_rate": 2.2245338300280545e-05, |
| "loss": 0.5119, |
| "step": 240800 |
| }, |
| { |
| "epoch": 0.5555542748864111, |
| "grad_norm": 1.1566516160964966, |
| "learning_rate": 2.2222286255679447e-05, |
| "loss": 0.5197, |
| "step": 241000 |
| }, |
| { |
| "epoch": 0.556015315778433, |
| "grad_norm": 1.7515827417373657, |
| "learning_rate": 2.2199234211078353e-05, |
| "loss": 0.5552, |
| "step": 241200 |
| }, |
| { |
| "epoch": 0.5564763566704549, |
| "grad_norm": 1.8269792795181274, |
| "learning_rate": 2.217618216647726e-05, |
| "loss": 0.4796, |
| "step": 241400 |
| }, |
| { |
| "epoch": 0.5569373975624768, |
| "grad_norm": 0.7790307402610779, |
| "learning_rate": 2.215313012187616e-05, |
| "loss": 0.5293, |
| "step": 241600 |
| }, |
| { |
| "epoch": 0.5573984384544988, |
| "grad_norm": 0.49990883469581604, |
| "learning_rate": 2.2130078077275066e-05, |
| "loss": 0.4879, |
| "step": 241800 |
| }, |
| { |
| "epoch": 0.5578594793465207, |
| "grad_norm": 1.0329365730285645, |
| "learning_rate": 2.210702603267397e-05, |
| "loss": 0.521, |
| "step": 242000 |
| }, |
| { |
| "epoch": 0.5583205202385425, |
| "grad_norm": 1.125595211982727, |
| "learning_rate": 2.2083973988072874e-05, |
| "loss": 0.4795, |
| "step": 242200 |
| }, |
| { |
| "epoch": 0.5587815611305644, |
| "grad_norm": 1.1356284618377686, |
| "learning_rate": 2.2060921943471777e-05, |
| "loss": 0.4882, |
| "step": 242400 |
| }, |
| { |
| "epoch": 0.5592426020225864, |
| "grad_norm": 0.7517489194869995, |
| "learning_rate": 2.203786989887068e-05, |
| "loss": 0.5531, |
| "step": 242600 |
| }, |
| { |
| "epoch": 0.5597036429146083, |
| "grad_norm": 1.4066451787948608, |
| "learning_rate": 2.2014817854269585e-05, |
| "loss": 0.5133, |
| "step": 242800 |
| }, |
| { |
| "epoch": 0.5601646838066302, |
| "grad_norm": 0.7683632373809814, |
| "learning_rate": 2.199176580966849e-05, |
| "loss": 0.5379, |
| "step": 243000 |
| }, |
| { |
| "epoch": 0.5606257246986521, |
| "grad_norm": 0.3758114278316498, |
| "learning_rate": 2.1968713765067396e-05, |
| "loss": 0.4681, |
| "step": 243200 |
| }, |
| { |
| "epoch": 0.561086765590674, |
| "grad_norm": 1.2410677671432495, |
| "learning_rate": 2.1945661720466298e-05, |
| "loss": 0.5409, |
| "step": 243400 |
| }, |
| { |
| "epoch": 0.561547806482696, |
| "grad_norm": 1.4236176013946533, |
| "learning_rate": 2.19226096758652e-05, |
| "loss": 0.4861, |
| "step": 243600 |
| }, |
| { |
| "epoch": 0.5620088473747179, |
| "grad_norm": 0.9534035325050354, |
| "learning_rate": 2.1899557631264106e-05, |
| "loss": 0.5307, |
| "step": 243800 |
| }, |
| { |
| "epoch": 0.5624698882667398, |
| "grad_norm": 0.711057186126709, |
| "learning_rate": 2.187650558666301e-05, |
| "loss": 0.4825, |
| "step": 244000 |
| }, |
| { |
| "epoch": 0.5629309291587618, |
| "grad_norm": 2.3626081943511963, |
| "learning_rate": 2.1853453542061914e-05, |
| "loss": 0.5344, |
| "step": 244200 |
| }, |
| { |
| "epoch": 0.5633919700507837, |
| "grad_norm": 0.23439358174800873, |
| "learning_rate": 2.1830401497460816e-05, |
| "loss": 0.5146, |
| "step": 244400 |
| }, |
| { |
| "epoch": 0.5638530109428056, |
| "grad_norm": 2.047996997833252, |
| "learning_rate": 2.1807349452859722e-05, |
| "loss": 0.4826, |
| "step": 244600 |
| }, |
| { |
| "epoch": 0.5643140518348275, |
| "grad_norm": 1.1761419773101807, |
| "learning_rate": 2.1784297408258628e-05, |
| "loss": 0.5096, |
| "step": 244800 |
| }, |
| { |
| "epoch": 0.5647750927268494, |
| "grad_norm": 1.0271129608154297, |
| "learning_rate": 2.176124536365753e-05, |
| "loss": 0.5182, |
| "step": 245000 |
| }, |
| { |
| "epoch": 0.5652361336188714, |
| "grad_norm": 1.1691869497299194, |
| "learning_rate": 2.1738193319056436e-05, |
| "loss": 0.4849, |
| "step": 245200 |
| }, |
| { |
| "epoch": 0.5656971745108933, |
| "grad_norm": 0.9857134222984314, |
| "learning_rate": 2.1715141274455338e-05, |
| "loss": 0.4795, |
| "step": 245400 |
| }, |
| { |
| "epoch": 0.5661582154029151, |
| "grad_norm": 0.6204602122306824, |
| "learning_rate": 2.1692089229854244e-05, |
| "loss": 0.5282, |
| "step": 245600 |
| }, |
| { |
| "epoch": 0.566619256294937, |
| "grad_norm": 2.198983669281006, |
| "learning_rate": 2.1669037185253146e-05, |
| "loss": 0.534, |
| "step": 245800 |
| }, |
| { |
| "epoch": 0.567080297186959, |
| "grad_norm": 0.9738652110099792, |
| "learning_rate": 2.1645985140652052e-05, |
| "loss": 0.5499, |
| "step": 246000 |
| }, |
| { |
| "epoch": 0.5675413380789809, |
| "grad_norm": 0.801446795463562, |
| "learning_rate": 2.1622933096050954e-05, |
| "loss": 0.5452, |
| "step": 246200 |
| }, |
| { |
| "epoch": 0.5680023789710028, |
| "grad_norm": 1.2199312448501587, |
| "learning_rate": 2.159988105144986e-05, |
| "loss": 0.5296, |
| "step": 246400 |
| }, |
| { |
| "epoch": 0.5684634198630247, |
| "grad_norm": 1.333871603012085, |
| "learning_rate": 2.1576829006848765e-05, |
| "loss": 0.5443, |
| "step": 246600 |
| }, |
| { |
| "epoch": 0.5689244607550467, |
| "grad_norm": 1.0577268600463867, |
| "learning_rate": 2.1553776962247668e-05, |
| "loss": 0.5119, |
| "step": 246800 |
| }, |
| { |
| "epoch": 0.5693855016470686, |
| "grad_norm": 1.1730480194091797, |
| "learning_rate": 2.1530724917646573e-05, |
| "loss": 0.5124, |
| "step": 247000 |
| }, |
| { |
| "epoch": 0.5698465425390905, |
| "grad_norm": 1.0999897718429565, |
| "learning_rate": 2.1507672873045476e-05, |
| "loss": 0.5589, |
| "step": 247200 |
| }, |
| { |
| "epoch": 0.5703075834311124, |
| "grad_norm": 1.2525196075439453, |
| "learning_rate": 2.1484620828444378e-05, |
| "loss": 0.5254, |
| "step": 247400 |
| }, |
| { |
| "epoch": 0.5707686243231344, |
| "grad_norm": 1.3364574909210205, |
| "learning_rate": 2.1461568783843284e-05, |
| "loss": 0.496, |
| "step": 247600 |
| }, |
| { |
| "epoch": 0.5712296652151563, |
| "grad_norm": 0.8777609467506409, |
| "learning_rate": 2.1438516739242186e-05, |
| "loss": 0.5314, |
| "step": 247800 |
| }, |
| { |
| "epoch": 0.5716907061071782, |
| "grad_norm": 0.9641389846801758, |
| "learning_rate": 2.141546469464109e-05, |
| "loss": 0.4883, |
| "step": 248000 |
| }, |
| { |
| "epoch": 0.5721517469992001, |
| "grad_norm": 0.8974488973617554, |
| "learning_rate": 2.1392412650039997e-05, |
| "loss": 0.4994, |
| "step": 248200 |
| }, |
| { |
| "epoch": 0.572612787891222, |
| "grad_norm": 1.1016892194747925, |
| "learning_rate": 2.13693606054389e-05, |
| "loss": 0.5206, |
| "step": 248400 |
| }, |
| { |
| "epoch": 0.573073828783244, |
| "grad_norm": 1.8941538333892822, |
| "learning_rate": 2.1346308560837805e-05, |
| "loss": 0.5283, |
| "step": 248600 |
| }, |
| { |
| "epoch": 0.5735348696752659, |
| "grad_norm": 0.882707417011261, |
| "learning_rate": 2.1323256516236707e-05, |
| "loss": 0.523, |
| "step": 248800 |
| }, |
| { |
| "epoch": 0.5739959105672877, |
| "grad_norm": 1.1047805547714233, |
| "learning_rate": 2.1300204471635613e-05, |
| "loss": 0.5199, |
| "step": 249000 |
| }, |
| { |
| "epoch": 0.5744569514593096, |
| "grad_norm": 0.9764407873153687, |
| "learning_rate": 2.1277152427034515e-05, |
| "loss": 0.4902, |
| "step": 249200 |
| }, |
| { |
| "epoch": 0.5749179923513316, |
| "grad_norm": 0.9825992584228516, |
| "learning_rate": 2.125410038243342e-05, |
| "loss": 0.5253, |
| "step": 249400 |
| }, |
| { |
| "epoch": 0.5753790332433535, |
| "grad_norm": 0.5447947978973389, |
| "learning_rate": 2.1231048337832323e-05, |
| "loss": 0.5162, |
| "step": 249600 |
| }, |
| { |
| "epoch": 0.5758400741353754, |
| "grad_norm": 1.0377503633499146, |
| "learning_rate": 2.120799629323123e-05, |
| "loss": 0.5193, |
| "step": 249800 |
| }, |
| { |
| "epoch": 0.5763011150273973, |
| "grad_norm": 0.5433443188667297, |
| "learning_rate": 2.1184944248630135e-05, |
| "loss": 0.5163, |
| "step": 250000 |
| }, |
| { |
| "epoch": 0.5763011150273973, |
| "eval_loss": 0.5065879821777344, |
| "eval_runtime": 144.2776, |
| "eval_samples_per_second": 30.372, |
| "eval_steps_per_second": 30.372, |
| "step": 250000 |
| }, |
| { |
| "epoch": 0.5767621559194193, |
| "grad_norm": 1.6914293766021729, |
| "learning_rate": 2.1161892204029037e-05, |
| "loss": 0.5304, |
| "step": 250200 |
| }, |
| { |
| "epoch": 0.5772231968114412, |
| "grad_norm": 1.1830875873565674, |
| "learning_rate": 2.1138840159427943e-05, |
| "loss": 0.518, |
| "step": 250400 |
| }, |
| { |
| "epoch": 0.5776842377034631, |
| "grad_norm": 1.4796136617660522, |
| "learning_rate": 2.1115788114826845e-05, |
| "loss": 0.525, |
| "step": 250600 |
| }, |
| { |
| "epoch": 0.578145278595485, |
| "grad_norm": 1.81144118309021, |
| "learning_rate": 2.109273607022575e-05, |
| "loss": 0.536, |
| "step": 250800 |
| }, |
| { |
| "epoch": 0.578606319487507, |
| "grad_norm": 1.3345705270767212, |
| "learning_rate": 2.1069684025624653e-05, |
| "loss": 0.4776, |
| "step": 251000 |
| }, |
| { |
| "epoch": 0.5790673603795289, |
| "grad_norm": 1.4617594480514526, |
| "learning_rate": 2.1046631981023555e-05, |
| "loss": 0.5112, |
| "step": 251200 |
| }, |
| { |
| "epoch": 0.5795284012715508, |
| "grad_norm": 1.4168286323547363, |
| "learning_rate": 2.1023579936422464e-05, |
| "loss": 0.5247, |
| "step": 251400 |
| }, |
| { |
| "epoch": 0.5799894421635727, |
| "grad_norm": 0.9052757024765015, |
| "learning_rate": 2.1000527891821367e-05, |
| "loss": 0.5189, |
| "step": 251600 |
| }, |
| { |
| "epoch": 0.5804504830555947, |
| "grad_norm": 1.7687321901321411, |
| "learning_rate": 2.0977475847220272e-05, |
| "loss": 0.4998, |
| "step": 251800 |
| }, |
| { |
| "epoch": 0.5809115239476166, |
| "grad_norm": 1.1558544635772705, |
| "learning_rate": 2.0954423802619175e-05, |
| "loss": 0.5648, |
| "step": 252000 |
| }, |
| { |
| "epoch": 0.5813725648396385, |
| "grad_norm": 1.4480737447738647, |
| "learning_rate": 2.0931371758018077e-05, |
| "loss": 0.5221, |
| "step": 252200 |
| }, |
| { |
| "epoch": 0.5818336057316603, |
| "grad_norm": 1.6768193244934082, |
| "learning_rate": 2.0908319713416982e-05, |
| "loss": 0.4758, |
| "step": 252400 |
| }, |
| { |
| "epoch": 0.5822946466236822, |
| "grad_norm": 1.9604754447937012, |
| "learning_rate": 2.0885267668815885e-05, |
| "loss": 0.5225, |
| "step": 252600 |
| }, |
| { |
| "epoch": 0.5827556875157042, |
| "grad_norm": 1.8727524280548096, |
| "learning_rate": 2.086221562421479e-05, |
| "loss": 0.5262, |
| "step": 252800 |
| }, |
| { |
| "epoch": 0.5832167284077261, |
| "grad_norm": 1.510044813156128, |
| "learning_rate": 2.0839163579613693e-05, |
| "loss": 0.5664, |
| "step": 253000 |
| }, |
| { |
| "epoch": 0.583677769299748, |
| "grad_norm": 1.9544621706008911, |
| "learning_rate": 2.0816111535012602e-05, |
| "loss": 0.5053, |
| "step": 253200 |
| }, |
| { |
| "epoch": 0.5841388101917699, |
| "grad_norm": 0.9827083349227905, |
| "learning_rate": 2.0793059490411504e-05, |
| "loss": 0.5479, |
| "step": 253400 |
| }, |
| { |
| "epoch": 0.5845998510837919, |
| "grad_norm": 2.2708816528320312, |
| "learning_rate": 2.0770007445810406e-05, |
| "loss": 0.5025, |
| "step": 253600 |
| }, |
| { |
| "epoch": 0.5850608919758138, |
| "grad_norm": 2.2587356567382812, |
| "learning_rate": 2.0746955401209312e-05, |
| "loss": 0.4923, |
| "step": 253800 |
| }, |
| { |
| "epoch": 0.5855219328678357, |
| "grad_norm": 1.3918339014053345, |
| "learning_rate": 2.0723903356608214e-05, |
| "loss": 0.4738, |
| "step": 254000 |
| }, |
| { |
| "epoch": 0.5859829737598576, |
| "grad_norm": 1.7613333463668823, |
| "learning_rate": 2.070085131200712e-05, |
| "loss": 0.4592, |
| "step": 254200 |
| }, |
| { |
| "epoch": 0.5864440146518796, |
| "grad_norm": 2.323390007019043, |
| "learning_rate": 2.0677799267406022e-05, |
| "loss": 0.4962, |
| "step": 254400 |
| }, |
| { |
| "epoch": 0.5869050555439015, |
| "grad_norm": 1.5669095516204834, |
| "learning_rate": 2.0654747222804928e-05, |
| "loss": 0.5616, |
| "step": 254600 |
| }, |
| { |
| "epoch": 0.5873660964359234, |
| "grad_norm": 1.5922577381134033, |
| "learning_rate": 2.0631695178203834e-05, |
| "loss": 0.494, |
| "step": 254800 |
| }, |
| { |
| "epoch": 0.5878271373279453, |
| "grad_norm": 1.2841917276382446, |
| "learning_rate": 2.0608643133602736e-05, |
| "loss": 0.4663, |
| "step": 255000 |
| }, |
| { |
| "epoch": 0.5882881782199673, |
| "grad_norm": 0.8427960872650146, |
| "learning_rate": 2.058559108900164e-05, |
| "loss": 0.5203, |
| "step": 255200 |
| }, |
| { |
| "epoch": 0.5887492191119892, |
| "grad_norm": 1.1014477014541626, |
| "learning_rate": 2.0562539044400544e-05, |
| "loss": 0.4983, |
| "step": 255400 |
| }, |
| { |
| "epoch": 0.5892102600040111, |
| "grad_norm": 0.7464996576309204, |
| "learning_rate": 2.053948699979945e-05, |
| "loss": 0.512, |
| "step": 255600 |
| }, |
| { |
| "epoch": 0.5896713008960329, |
| "grad_norm": 1.1050175428390503, |
| "learning_rate": 2.0516434955198352e-05, |
| "loss": 0.5039, |
| "step": 255800 |
| }, |
| { |
| "epoch": 0.5901323417880548, |
| "grad_norm": 1.4962995052337646, |
| "learning_rate": 2.0493382910597254e-05, |
| "loss": 0.4859, |
| "step": 256000 |
| }, |
| { |
| "epoch": 0.5905933826800768, |
| "grad_norm": 1.086658239364624, |
| "learning_rate": 2.047033086599616e-05, |
| "loss": 0.512, |
| "step": 256200 |
| }, |
| { |
| "epoch": 0.5910544235720987, |
| "grad_norm": 1.5740742683410645, |
| "learning_rate": 2.0447278821395062e-05, |
| "loss": 0.5017, |
| "step": 256400 |
| }, |
| { |
| "epoch": 0.5915154644641206, |
| "grad_norm": 1.2784602642059326, |
| "learning_rate": 2.042422677679397e-05, |
| "loss": 0.5347, |
| "step": 256600 |
| }, |
| { |
| "epoch": 0.5919765053561425, |
| "grad_norm": 1.1897175312042236, |
| "learning_rate": 2.0401174732192873e-05, |
| "loss": 0.5442, |
| "step": 256800 |
| }, |
| { |
| "epoch": 0.5924375462481645, |
| "grad_norm": 1.5644766092300415, |
| "learning_rate": 2.0378122687591776e-05, |
| "loss": 0.4957, |
| "step": 257000 |
| }, |
| { |
| "epoch": 0.5928985871401864, |
| "grad_norm": 1.350401520729065, |
| "learning_rate": 2.035507064299068e-05, |
| "loss": 0.4763, |
| "step": 257200 |
| }, |
| { |
| "epoch": 0.5933596280322083, |
| "grad_norm": 1.8206768035888672, |
| "learning_rate": 2.0332018598389584e-05, |
| "loss": 0.4756, |
| "step": 257400 |
| }, |
| { |
| "epoch": 0.5938206689242302, |
| "grad_norm": 1.9066009521484375, |
| "learning_rate": 2.030896655378849e-05, |
| "loss": 0.4968, |
| "step": 257600 |
| }, |
| { |
| "epoch": 0.5942817098162522, |
| "grad_norm": 0.9539717435836792, |
| "learning_rate": 2.028591450918739e-05, |
| "loss": 0.5454, |
| "step": 257800 |
| }, |
| { |
| "epoch": 0.5947427507082741, |
| "grad_norm": 1.8135906457901, |
| "learning_rate": 2.0262862464586297e-05, |
| "loss": 0.4961, |
| "step": 258000 |
| }, |
| { |
| "epoch": 0.595203791600296, |
| "grad_norm": 1.2675491571426392, |
| "learning_rate": 2.0239810419985203e-05, |
| "loss": 0.4997, |
| "step": 258200 |
| }, |
| { |
| "epoch": 0.5956648324923179, |
| "grad_norm": 0.6522994041442871, |
| "learning_rate": 2.0216758375384105e-05, |
| "loss": 0.5243, |
| "step": 258400 |
| }, |
| { |
| "epoch": 0.5961258733843399, |
| "grad_norm": 0.3235660791397095, |
| "learning_rate": 2.019370633078301e-05, |
| "loss": 0.4942, |
| "step": 258600 |
| }, |
| { |
| "epoch": 0.5965869142763618, |
| "grad_norm": 1.0544391870498657, |
| "learning_rate": 2.0170654286181913e-05, |
| "loss": 0.5452, |
| "step": 258800 |
| }, |
| { |
| "epoch": 0.5970479551683837, |
| "grad_norm": 2.637691020965576, |
| "learning_rate": 2.014760224158082e-05, |
| "loss": 0.5338, |
| "step": 259000 |
| }, |
| { |
| "epoch": 0.5975089960604055, |
| "grad_norm": 0.2857421934604645, |
| "learning_rate": 2.012455019697972e-05, |
| "loss": 0.5621, |
| "step": 259200 |
| }, |
| { |
| "epoch": 0.5979700369524275, |
| "grad_norm": 0.93863445520401, |
| "learning_rate": 2.0101498152378627e-05, |
| "loss": 0.5391, |
| "step": 259400 |
| }, |
| { |
| "epoch": 0.5984310778444494, |
| "grad_norm": 0.6566616892814636, |
| "learning_rate": 2.007844610777753e-05, |
| "loss": 0.5247, |
| "step": 259600 |
| }, |
| { |
| "epoch": 0.5988921187364713, |
| "grad_norm": 1.3079489469528198, |
| "learning_rate": 2.005539406317643e-05, |
| "loss": 0.5031, |
| "step": 259800 |
| }, |
| { |
| "epoch": 0.5993531596284932, |
| "grad_norm": 0.5705758333206177, |
| "learning_rate": 2.003234201857534e-05, |
| "loss": 0.5046, |
| "step": 260000 |
| }, |
| { |
| "epoch": 0.5998142005205152, |
| "grad_norm": 1.439122200012207, |
| "learning_rate": 2.0009289973974243e-05, |
| "loss": 0.4972, |
| "step": 260200 |
| }, |
| { |
| "epoch": 0.6002752414125371, |
| "grad_norm": 0.7958211302757263, |
| "learning_rate": 1.998623792937315e-05, |
| "loss": 0.5172, |
| "step": 260400 |
| }, |
| { |
| "epoch": 0.600736282304559, |
| "grad_norm": 1.4362818002700806, |
| "learning_rate": 1.996318588477205e-05, |
| "loss": 0.5031, |
| "step": 260600 |
| }, |
| { |
| "epoch": 0.6011973231965809, |
| "grad_norm": 1.128711462020874, |
| "learning_rate": 1.9940133840170953e-05, |
| "loss": 0.5035, |
| "step": 260800 |
| }, |
| { |
| "epoch": 0.6016583640886028, |
| "grad_norm": 0.9221576452255249, |
| "learning_rate": 1.991708179556986e-05, |
| "loss": 0.5039, |
| "step": 261000 |
| }, |
| { |
| "epoch": 0.6021194049806248, |
| "grad_norm": 1.0171575546264648, |
| "learning_rate": 1.989402975096876e-05, |
| "loss": 0.5009, |
| "step": 261200 |
| }, |
| { |
| "epoch": 0.6025804458726467, |
| "grad_norm": 1.2728921175003052, |
| "learning_rate": 1.9870977706367667e-05, |
| "loss": 0.56, |
| "step": 261400 |
| }, |
| { |
| "epoch": 0.6030414867646686, |
| "grad_norm": 0.6258471012115479, |
| "learning_rate": 1.9847925661766572e-05, |
| "loss": 0.5025, |
| "step": 261600 |
| }, |
| { |
| "epoch": 0.6035025276566905, |
| "grad_norm": 1.2376896142959595, |
| "learning_rate": 1.9824873617165478e-05, |
| "loss": 0.5488, |
| "step": 261800 |
| }, |
| { |
| "epoch": 0.6039635685487125, |
| "grad_norm": 1.5317405462265015, |
| "learning_rate": 1.980182157256438e-05, |
| "loss": 0.5412, |
| "step": 262000 |
| }, |
| { |
| "epoch": 0.6044246094407344, |
| "grad_norm": 2.4922080039978027, |
| "learning_rate": 1.9778769527963283e-05, |
| "loss": 0.5095, |
| "step": 262200 |
| }, |
| { |
| "epoch": 0.6048856503327562, |
| "grad_norm": 0.9650156497955322, |
| "learning_rate": 1.975571748336219e-05, |
| "loss": 0.5217, |
| "step": 262400 |
| }, |
| { |
| "epoch": 0.6053466912247781, |
| "grad_norm": 1.3613967895507812, |
| "learning_rate": 1.973266543876109e-05, |
| "loss": 0.5102, |
| "step": 262600 |
| }, |
| { |
| "epoch": 0.6058077321168001, |
| "grad_norm": 1.0593500137329102, |
| "learning_rate": 1.9709613394159996e-05, |
| "loss": 0.5723, |
| "step": 262800 |
| }, |
| { |
| "epoch": 0.606268773008822, |
| "grad_norm": 1.8354504108428955, |
| "learning_rate": 1.96865613495589e-05, |
| "loss": 0.5125, |
| "step": 263000 |
| }, |
| { |
| "epoch": 0.6067298139008439, |
| "grad_norm": 2.131420373916626, |
| "learning_rate": 1.9663509304957804e-05, |
| "loss": 0.5214, |
| "step": 263200 |
| }, |
| { |
| "epoch": 0.6071908547928658, |
| "grad_norm": 1.4709240198135376, |
| "learning_rate": 1.964045726035671e-05, |
| "loss": 0.4658, |
| "step": 263400 |
| }, |
| { |
| "epoch": 0.6076518956848878, |
| "grad_norm": 1.3069663047790527, |
| "learning_rate": 1.9617405215755612e-05, |
| "loss": 0.5497, |
| "step": 263600 |
| }, |
| { |
| "epoch": 0.6081129365769097, |
| "grad_norm": 0.6274604797363281, |
| "learning_rate": 1.9594353171154518e-05, |
| "loss": 0.5266, |
| "step": 263800 |
| }, |
| { |
| "epoch": 0.6085739774689316, |
| "grad_norm": 0.9188045263290405, |
| "learning_rate": 1.957130112655342e-05, |
| "loss": 0.5668, |
| "step": 264000 |
| }, |
| { |
| "epoch": 0.6090350183609535, |
| "grad_norm": 0.5703033804893494, |
| "learning_rate": 1.9548249081952326e-05, |
| "loss": 0.4844, |
| "step": 264200 |
| }, |
| { |
| "epoch": 0.6094960592529755, |
| "grad_norm": 2.1700258255004883, |
| "learning_rate": 1.9525197037351228e-05, |
| "loss": 0.5282, |
| "step": 264400 |
| }, |
| { |
| "epoch": 0.6099571001449974, |
| "grad_norm": 1.3549532890319824, |
| "learning_rate": 1.950214499275013e-05, |
| "loss": 0.4734, |
| "step": 264600 |
| }, |
| { |
| "epoch": 0.6104181410370193, |
| "grad_norm": 1.7705378532409668, |
| "learning_rate": 1.9479092948149036e-05, |
| "loss": 0.496, |
| "step": 264800 |
| }, |
| { |
| "epoch": 0.6108791819290412, |
| "grad_norm": 0.578196108341217, |
| "learning_rate": 1.9456040903547942e-05, |
| "loss": 0.5078, |
| "step": 265000 |
| }, |
| { |
| "epoch": 0.6113402228210632, |
| "grad_norm": 1.1500052213668823, |
| "learning_rate": 1.9432988858946847e-05, |
| "loss": 0.5196, |
| "step": 265200 |
| }, |
| { |
| "epoch": 0.6118012637130851, |
| "grad_norm": 1.3695541620254517, |
| "learning_rate": 1.940993681434575e-05, |
| "loss": 0.4915, |
| "step": 265400 |
| }, |
| { |
| "epoch": 0.612262304605107, |
| "grad_norm": 0.8905289173126221, |
| "learning_rate": 1.9386884769744655e-05, |
| "loss": 0.4662, |
| "step": 265600 |
| }, |
| { |
| "epoch": 0.6127233454971288, |
| "grad_norm": 2.054939031600952, |
| "learning_rate": 1.9363832725143558e-05, |
| "loss": 0.4665, |
| "step": 265800 |
| }, |
| { |
| "epoch": 0.6131843863891507, |
| "grad_norm": 1.421302080154419, |
| "learning_rate": 1.934078068054246e-05, |
| "loss": 0.5074, |
| "step": 266000 |
| }, |
| { |
| "epoch": 0.6136454272811727, |
| "grad_norm": 1.0554801225662231, |
| "learning_rate": 1.9317728635941366e-05, |
| "loss": 0.5406, |
| "step": 266200 |
| }, |
| { |
| "epoch": 0.6141064681731946, |
| "grad_norm": 1.5464704036712646, |
| "learning_rate": 1.9294676591340268e-05, |
| "loss": 0.5273, |
| "step": 266400 |
| }, |
| { |
| "epoch": 0.6145675090652165, |
| "grad_norm": 2.142878293991089, |
| "learning_rate": 1.9271624546739174e-05, |
| "loss": 0.5035, |
| "step": 266600 |
| }, |
| { |
| "epoch": 0.6150285499572384, |
| "grad_norm": 2.7854163646698, |
| "learning_rate": 1.924857250213808e-05, |
| "loss": 0.4915, |
| "step": 266800 |
| }, |
| { |
| "epoch": 0.6154895908492604, |
| "grad_norm": 1.0420928001403809, |
| "learning_rate": 1.922552045753698e-05, |
| "loss": 0.5025, |
| "step": 267000 |
| }, |
| { |
| "epoch": 0.6159506317412823, |
| "grad_norm": 1.2104905843734741, |
| "learning_rate": 1.9202468412935887e-05, |
| "loss": 0.5118, |
| "step": 267200 |
| }, |
| { |
| "epoch": 0.6164116726333042, |
| "grad_norm": 1.4268879890441895, |
| "learning_rate": 1.917941636833479e-05, |
| "loss": 0.5147, |
| "step": 267400 |
| }, |
| { |
| "epoch": 0.6168727135253261, |
| "grad_norm": 1.690464973449707, |
| "learning_rate": 1.9156364323733695e-05, |
| "loss": 0.4835, |
| "step": 267600 |
| }, |
| { |
| "epoch": 0.6173337544173481, |
| "grad_norm": 1.919801115989685, |
| "learning_rate": 1.9133312279132598e-05, |
| "loss": 0.5243, |
| "step": 267800 |
| }, |
| { |
| "epoch": 0.61779479530937, |
| "grad_norm": 0.6003401875495911, |
| "learning_rate": 1.9110260234531503e-05, |
| "loss": 0.4886, |
| "step": 268000 |
| }, |
| { |
| "epoch": 0.6182558362013919, |
| "grad_norm": 1.350727915763855, |
| "learning_rate": 1.9087208189930405e-05, |
| "loss": 0.5038, |
| "step": 268200 |
| }, |
| { |
| "epoch": 0.6187168770934138, |
| "grad_norm": 0.8154557347297668, |
| "learning_rate": 1.906415614532931e-05, |
| "loss": 0.5153, |
| "step": 268400 |
| }, |
| { |
| "epoch": 0.6191779179854358, |
| "grad_norm": 0.5474942326545715, |
| "learning_rate": 1.9041104100728217e-05, |
| "loss": 0.5569, |
| "step": 268600 |
| }, |
| { |
| "epoch": 0.6196389588774577, |
| "grad_norm": 0.8887852430343628, |
| "learning_rate": 1.901805205612712e-05, |
| "loss": 0.5289, |
| "step": 268800 |
| }, |
| { |
| "epoch": 0.6200999997694796, |
| "grad_norm": 0.9565109014511108, |
| "learning_rate": 1.8995000011526025e-05, |
| "loss": 0.5446, |
| "step": 269000 |
| }, |
| { |
| "epoch": 0.6205610406615014, |
| "grad_norm": 1.2200897932052612, |
| "learning_rate": 1.8971947966924927e-05, |
| "loss": 0.4689, |
| "step": 269200 |
| }, |
| { |
| "epoch": 0.6210220815535233, |
| "grad_norm": 0.5202858448028564, |
| "learning_rate": 1.894889592232383e-05, |
| "loss": 0.5117, |
| "step": 269400 |
| }, |
| { |
| "epoch": 0.6214831224455453, |
| "grad_norm": 1.1108614206314087, |
| "learning_rate": 1.8925843877722735e-05, |
| "loss": 0.5495, |
| "step": 269600 |
| }, |
| { |
| "epoch": 0.6219441633375672, |
| "grad_norm": 0.7820692658424377, |
| "learning_rate": 1.8902791833121637e-05, |
| "loss": 0.5489, |
| "step": 269800 |
| }, |
| { |
| "epoch": 0.6224052042295891, |
| "grad_norm": 0.5939005613327026, |
| "learning_rate": 1.8879739788520543e-05, |
| "loss": 0.5139, |
| "step": 270000 |
| }, |
| { |
| "epoch": 0.622866245121611, |
| "grad_norm": 0.809594452381134, |
| "learning_rate": 1.885668774391945e-05, |
| "loss": 0.5195, |
| "step": 270200 |
| }, |
| { |
| "epoch": 0.623327286013633, |
| "grad_norm": 1.638484001159668, |
| "learning_rate": 1.8833635699318354e-05, |
| "loss": 0.487, |
| "step": 270400 |
| }, |
| { |
| "epoch": 0.6237883269056549, |
| "grad_norm": 1.4749358892440796, |
| "learning_rate": 1.8810583654717257e-05, |
| "loss": 0.5058, |
| "step": 270600 |
| }, |
| { |
| "epoch": 0.6242493677976768, |
| "grad_norm": 0.8880025744438171, |
| "learning_rate": 1.878753161011616e-05, |
| "loss": 0.513, |
| "step": 270800 |
| }, |
| { |
| "epoch": 0.6247104086896987, |
| "grad_norm": 0.9958152174949646, |
| "learning_rate": 1.8764479565515065e-05, |
| "loss": 0.5261, |
| "step": 271000 |
| }, |
| { |
| "epoch": 0.6251714495817207, |
| "grad_norm": 1.6274564266204834, |
| "learning_rate": 1.8741427520913967e-05, |
| "loss": 0.5416, |
| "step": 271200 |
| }, |
| { |
| "epoch": 0.6256324904737426, |
| "grad_norm": 1.5362344980239868, |
| "learning_rate": 1.8718375476312873e-05, |
| "loss": 0.513, |
| "step": 271400 |
| }, |
| { |
| "epoch": 0.6260935313657645, |
| "grad_norm": 0.9581994414329529, |
| "learning_rate": 1.8695323431711775e-05, |
| "loss": 0.505, |
| "step": 271600 |
| }, |
| { |
| "epoch": 0.6265545722577864, |
| "grad_norm": 1.3298275470733643, |
| "learning_rate": 1.867227138711068e-05, |
| "loss": 0.4829, |
| "step": 271800 |
| }, |
| { |
| "epoch": 0.6270156131498084, |
| "grad_norm": 1.5617239475250244, |
| "learning_rate": 1.8649219342509586e-05, |
| "loss": 0.5669, |
| "step": 272000 |
| }, |
| { |
| "epoch": 0.6274766540418303, |
| "grad_norm": 1.6053404808044434, |
| "learning_rate": 1.862616729790849e-05, |
| "loss": 0.5203, |
| "step": 272200 |
| }, |
| { |
| "epoch": 0.6279376949338522, |
| "grad_norm": 1.7851396799087524, |
| "learning_rate": 1.8603115253307394e-05, |
| "loss": 0.512, |
| "step": 272400 |
| }, |
| { |
| "epoch": 0.628398735825874, |
| "grad_norm": 1.3142194747924805, |
| "learning_rate": 1.8580063208706296e-05, |
| "loss": 0.5205, |
| "step": 272600 |
| }, |
| { |
| "epoch": 0.628859776717896, |
| "grad_norm": 1.7642301321029663, |
| "learning_rate": 1.8557011164105202e-05, |
| "loss": 0.5199, |
| "step": 272800 |
| }, |
| { |
| "epoch": 0.6293208176099179, |
| "grad_norm": 1.0019512176513672, |
| "learning_rate": 1.8533959119504104e-05, |
| "loss": 0.5, |
| "step": 273000 |
| }, |
| { |
| "epoch": 0.6297818585019398, |
| "grad_norm": 1.3982213735580444, |
| "learning_rate": 1.8510907074903007e-05, |
| "loss": 0.4773, |
| "step": 273200 |
| }, |
| { |
| "epoch": 0.6302428993939617, |
| "grad_norm": 0.6312654614448547, |
| "learning_rate": 1.8487855030301912e-05, |
| "loss": 0.4853, |
| "step": 273400 |
| }, |
| { |
| "epoch": 0.6307039402859836, |
| "grad_norm": 1.554456353187561, |
| "learning_rate": 1.8464802985700818e-05, |
| "loss": 0.5074, |
| "step": 273600 |
| }, |
| { |
| "epoch": 0.6311649811780056, |
| "grad_norm": 1.26462984085083, |
| "learning_rate": 1.8441750941099724e-05, |
| "loss": 0.482, |
| "step": 273800 |
| }, |
| { |
| "epoch": 0.6316260220700275, |
| "grad_norm": 1.3933197259902954, |
| "learning_rate": 1.8418698896498626e-05, |
| "loss": 0.5149, |
| "step": 274000 |
| }, |
| { |
| "epoch": 0.6320870629620494, |
| "grad_norm": 1.4466843605041504, |
| "learning_rate": 1.839564685189753e-05, |
| "loss": 0.5609, |
| "step": 274200 |
| }, |
| { |
| "epoch": 0.6325481038540713, |
| "grad_norm": 0.9413987398147583, |
| "learning_rate": 1.8372594807296434e-05, |
| "loss": 0.5348, |
| "step": 274400 |
| }, |
| { |
| "epoch": 0.6330091447460933, |
| "grad_norm": 2.5217905044555664, |
| "learning_rate": 1.8349542762695336e-05, |
| "loss": 0.483, |
| "step": 274600 |
| }, |
| { |
| "epoch": 0.6334701856381152, |
| "grad_norm": 1.803232192993164, |
| "learning_rate": 1.8326490718094242e-05, |
| "loss": 0.5096, |
| "step": 274800 |
| }, |
| { |
| "epoch": 0.6339312265301371, |
| "grad_norm": 1.1358133554458618, |
| "learning_rate": 1.8303438673493144e-05, |
| "loss": 0.5148, |
| "step": 275000 |
| }, |
| { |
| "epoch": 0.634392267422159, |
| "grad_norm": 1.4829622507095337, |
| "learning_rate": 1.8280386628892053e-05, |
| "loss": 0.5048, |
| "step": 275200 |
| }, |
| { |
| "epoch": 0.634853308314181, |
| "grad_norm": 1.8766462802886963, |
| "learning_rate": 1.8257334584290956e-05, |
| "loss": 0.501, |
| "step": 275400 |
| }, |
| { |
| "epoch": 0.6353143492062029, |
| "grad_norm": 1.7556136846542358, |
| "learning_rate": 1.8234282539689858e-05, |
| "loss": 0.4652, |
| "step": 275600 |
| }, |
| { |
| "epoch": 0.6357753900982248, |
| "grad_norm": 1.6334820985794067, |
| "learning_rate": 1.8211230495088764e-05, |
| "loss": 0.5093, |
| "step": 275800 |
| }, |
| { |
| "epoch": 0.6362364309902466, |
| "grad_norm": 0.6144605875015259, |
| "learning_rate": 1.8188178450487666e-05, |
| "loss": 0.4899, |
| "step": 276000 |
| }, |
| { |
| "epoch": 0.6366974718822686, |
| "grad_norm": 0.49530643224716187, |
| "learning_rate": 1.816512640588657e-05, |
| "loss": 0.5037, |
| "step": 276200 |
| }, |
| { |
| "epoch": 0.6371585127742905, |
| "grad_norm": 0.8908922672271729, |
| "learning_rate": 1.8142074361285474e-05, |
| "loss": 0.4913, |
| "step": 276400 |
| }, |
| { |
| "epoch": 0.6376195536663124, |
| "grad_norm": 0.7277461290359497, |
| "learning_rate": 1.811902231668438e-05, |
| "loss": 0.509, |
| "step": 276600 |
| }, |
| { |
| "epoch": 0.6380805945583343, |
| "grad_norm": 1.4402283430099487, |
| "learning_rate": 1.8095970272083285e-05, |
| "loss": 0.5063, |
| "step": 276800 |
| }, |
| { |
| "epoch": 0.6385416354503562, |
| "grad_norm": 1.40396249294281, |
| "learning_rate": 1.8072918227482187e-05, |
| "loss": 0.5368, |
| "step": 277000 |
| }, |
| { |
| "epoch": 0.6390026763423782, |
| "grad_norm": 1.9143671989440918, |
| "learning_rate": 1.8049866182881093e-05, |
| "loss": 0.5159, |
| "step": 277200 |
| }, |
| { |
| "epoch": 0.6394637172344001, |
| "grad_norm": 1.0167429447174072, |
| "learning_rate": 1.8026814138279995e-05, |
| "loss": 0.4895, |
| "step": 277400 |
| }, |
| { |
| "epoch": 0.639924758126422, |
| "grad_norm": 1.2387683391571045, |
| "learning_rate": 1.80037620936789e-05, |
| "loss": 0.4982, |
| "step": 277600 |
| }, |
| { |
| "epoch": 0.640385799018444, |
| "grad_norm": 1.7970925569534302, |
| "learning_rate": 1.7980710049077803e-05, |
| "loss": 0.4749, |
| "step": 277800 |
| }, |
| { |
| "epoch": 0.6408468399104659, |
| "grad_norm": 1.2486504316329956, |
| "learning_rate": 1.7957658004476706e-05, |
| "loss": 0.5113, |
| "step": 278000 |
| }, |
| { |
| "epoch": 0.6413078808024878, |
| "grad_norm": 0.9315382838249207, |
| "learning_rate": 1.793460595987561e-05, |
| "loss": 0.5022, |
| "step": 278200 |
| }, |
| { |
| "epoch": 0.6417689216945097, |
| "grad_norm": 1.3397549390792847, |
| "learning_rate": 1.7911553915274514e-05, |
| "loss": 0.5411, |
| "step": 278400 |
| }, |
| { |
| "epoch": 0.6422299625865316, |
| "grad_norm": 1.5810282230377197, |
| "learning_rate": 1.7888501870673423e-05, |
| "loss": 0.5015, |
| "step": 278600 |
| }, |
| { |
| "epoch": 0.6426910034785536, |
| "grad_norm": 0.9700754284858704, |
| "learning_rate": 1.7865449826072325e-05, |
| "loss": 0.5014, |
| "step": 278800 |
| }, |
| { |
| "epoch": 0.6431520443705755, |
| "grad_norm": 1.5773003101348877, |
| "learning_rate": 1.784239778147123e-05, |
| "loss": 0.5001, |
| "step": 279000 |
| }, |
| { |
| "epoch": 0.6436130852625974, |
| "grad_norm": 1.5198345184326172, |
| "learning_rate": 1.7819345736870133e-05, |
| "loss": 0.4951, |
| "step": 279200 |
| }, |
| { |
| "epoch": 0.6440741261546192, |
| "grad_norm": 0.9884507060050964, |
| "learning_rate": 1.7796293692269035e-05, |
| "loss": 0.5342, |
| "step": 279400 |
| }, |
| { |
| "epoch": 0.6445351670466412, |
| "grad_norm": 0.6419351696968079, |
| "learning_rate": 1.777324164766794e-05, |
| "loss": 0.4904, |
| "step": 279600 |
| }, |
| { |
| "epoch": 0.6449962079386631, |
| "grad_norm": 1.171769618988037, |
| "learning_rate": 1.7750189603066843e-05, |
| "loss": 0.5071, |
| "step": 279800 |
| }, |
| { |
| "epoch": 0.645457248830685, |
| "grad_norm": 1.362993836402893, |
| "learning_rate": 1.772713755846575e-05, |
| "loss": 0.5205, |
| "step": 280000 |
| }, |
| { |
| "epoch": 0.6459182897227069, |
| "grad_norm": 1.8605279922485352, |
| "learning_rate": 1.7704085513864655e-05, |
| "loss": 0.5206, |
| "step": 280200 |
| }, |
| { |
| "epoch": 0.6463793306147289, |
| "grad_norm": 1.154487133026123, |
| "learning_rate": 1.7681033469263557e-05, |
| "loss": 0.5846, |
| "step": 280400 |
| }, |
| { |
| "epoch": 0.6468403715067508, |
| "grad_norm": 1.5201776027679443, |
| "learning_rate": 1.7657981424662462e-05, |
| "loss": 0.4864, |
| "step": 280600 |
| }, |
| { |
| "epoch": 0.6473014123987727, |
| "grad_norm": 1.0261558294296265, |
| "learning_rate": 1.7634929380061365e-05, |
| "loss": 0.4937, |
| "step": 280800 |
| }, |
| { |
| "epoch": 0.6477624532907946, |
| "grad_norm": 0.9769271612167358, |
| "learning_rate": 1.761187733546027e-05, |
| "loss": 0.4865, |
| "step": 281000 |
| }, |
| { |
| "epoch": 0.6482234941828166, |
| "grad_norm": 1.5987550020217896, |
| "learning_rate": 1.7588825290859173e-05, |
| "loss": 0.5231, |
| "step": 281200 |
| }, |
| { |
| "epoch": 0.6486845350748385, |
| "grad_norm": 1.0639326572418213, |
| "learning_rate": 1.756577324625808e-05, |
| "loss": 0.5197, |
| "step": 281400 |
| }, |
| { |
| "epoch": 0.6491455759668604, |
| "grad_norm": 2.6763956546783447, |
| "learning_rate": 1.754272120165698e-05, |
| "loss": 0.5459, |
| "step": 281600 |
| }, |
| { |
| "epoch": 0.6496066168588823, |
| "grad_norm": 0.49132779240608215, |
| "learning_rate": 1.7519669157055883e-05, |
| "loss": 0.518, |
| "step": 281800 |
| }, |
| { |
| "epoch": 0.6500676577509042, |
| "grad_norm": 1.8411035537719727, |
| "learning_rate": 1.7496617112454792e-05, |
| "loss": 0.4867, |
| "step": 282000 |
| }, |
| { |
| "epoch": 0.6505286986429262, |
| "grad_norm": 0.6566082835197449, |
| "learning_rate": 1.7473565067853694e-05, |
| "loss": 0.5135, |
| "step": 282200 |
| }, |
| { |
| "epoch": 0.6509897395349481, |
| "grad_norm": 1.3667335510253906, |
| "learning_rate": 1.74505130232526e-05, |
| "loss": 0.5306, |
| "step": 282400 |
| }, |
| { |
| "epoch": 0.65145078042697, |
| "grad_norm": 1.3689517974853516, |
| "learning_rate": 1.7427460978651502e-05, |
| "loss": 0.4903, |
| "step": 282600 |
| }, |
| { |
| "epoch": 0.6519118213189918, |
| "grad_norm": 1.0682365894317627, |
| "learning_rate": 1.7404408934050408e-05, |
| "loss": 0.5197, |
| "step": 282800 |
| }, |
| { |
| "epoch": 0.6523728622110138, |
| "grad_norm": 0.9434696435928345, |
| "learning_rate": 1.738135688944931e-05, |
| "loss": 0.5309, |
| "step": 283000 |
| }, |
| { |
| "epoch": 0.6528339031030357, |
| "grad_norm": 1.378448724746704, |
| "learning_rate": 1.7358304844848213e-05, |
| "loss": 0.4943, |
| "step": 283200 |
| }, |
| { |
| "epoch": 0.6532949439950576, |
| "grad_norm": 1.0012249946594238, |
| "learning_rate": 1.7335252800247118e-05, |
| "loss": 0.5066, |
| "step": 283400 |
| }, |
| { |
| "epoch": 0.6537559848870795, |
| "grad_norm": 2.5924713611602783, |
| "learning_rate": 1.7312200755646024e-05, |
| "loss": 0.4943, |
| "step": 283600 |
| }, |
| { |
| "epoch": 0.6542170257791015, |
| "grad_norm": 1.0362581014633179, |
| "learning_rate": 1.728914871104493e-05, |
| "loss": 0.5225, |
| "step": 283800 |
| }, |
| { |
| "epoch": 0.6546780666711234, |
| "grad_norm": 2.9695885181427, |
| "learning_rate": 1.7266096666443832e-05, |
| "loss": 0.516, |
| "step": 284000 |
| }, |
| { |
| "epoch": 0.6551391075631453, |
| "grad_norm": 1.1434212923049927, |
| "learning_rate": 1.7243044621842734e-05, |
| "loss": 0.51, |
| "step": 284200 |
| }, |
| { |
| "epoch": 0.6556001484551672, |
| "grad_norm": 0.8968667387962341, |
| "learning_rate": 1.721999257724164e-05, |
| "loss": 0.4848, |
| "step": 284400 |
| }, |
| { |
| "epoch": 0.6560611893471892, |
| "grad_norm": 6.086385726928711, |
| "learning_rate": 1.7196940532640542e-05, |
| "loss": 0.5094, |
| "step": 284600 |
| }, |
| { |
| "epoch": 0.6565222302392111, |
| "grad_norm": 1.7994771003723145, |
| "learning_rate": 1.7173888488039448e-05, |
| "loss": 0.5308, |
| "step": 284800 |
| }, |
| { |
| "epoch": 0.656983271131233, |
| "grad_norm": 1.471977949142456, |
| "learning_rate": 1.715083644343835e-05, |
| "loss": 0.4866, |
| "step": 285000 |
| }, |
| { |
| "epoch": 0.6574443120232549, |
| "grad_norm": 0.9150500893592834, |
| "learning_rate": 1.7127784398837256e-05, |
| "loss": 0.4983, |
| "step": 285200 |
| }, |
| { |
| "epoch": 0.6579053529152769, |
| "grad_norm": 1.0636359453201294, |
| "learning_rate": 1.710473235423616e-05, |
| "loss": 0.5032, |
| "step": 285400 |
| }, |
| { |
| "epoch": 0.6583663938072988, |
| "grad_norm": 1.005440354347229, |
| "learning_rate": 1.7081680309635064e-05, |
| "loss": 0.5163, |
| "step": 285600 |
| }, |
| { |
| "epoch": 0.6588274346993207, |
| "grad_norm": 0.7577878832817078, |
| "learning_rate": 1.705862826503397e-05, |
| "loss": 0.4763, |
| "step": 285800 |
| }, |
| { |
| "epoch": 0.6592884755913426, |
| "grad_norm": 1.632212519645691, |
| "learning_rate": 1.703557622043287e-05, |
| "loss": 0.477, |
| "step": 286000 |
| }, |
| { |
| "epoch": 0.6597495164833644, |
| "grad_norm": 0.42119720578193665, |
| "learning_rate": 1.7012524175831777e-05, |
| "loss": 0.5244, |
| "step": 286200 |
| }, |
| { |
| "epoch": 0.6602105573753864, |
| "grad_norm": 1.7082394361495972, |
| "learning_rate": 1.698947213123068e-05, |
| "loss": 0.4961, |
| "step": 286400 |
| }, |
| { |
| "epoch": 0.6606715982674083, |
| "grad_norm": 1.360280990600586, |
| "learning_rate": 1.6966420086629582e-05, |
| "loss": 0.5161, |
| "step": 286600 |
| }, |
| { |
| "epoch": 0.6611326391594302, |
| "grad_norm": 1.266839623451233, |
| "learning_rate": 1.6943368042028488e-05, |
| "loss": 0.4477, |
| "step": 286800 |
| }, |
| { |
| "epoch": 0.6615936800514521, |
| "grad_norm": 0.5453054308891296, |
| "learning_rate": 1.6920315997427393e-05, |
| "loss": 0.4772, |
| "step": 287000 |
| }, |
| { |
| "epoch": 0.6620547209434741, |
| "grad_norm": 1.4255741834640503, |
| "learning_rate": 1.68972639528263e-05, |
| "loss": 0.4471, |
| "step": 287200 |
| }, |
| { |
| "epoch": 0.662515761835496, |
| "grad_norm": 2.048753261566162, |
| "learning_rate": 1.68742119082252e-05, |
| "loss": 0.4945, |
| "step": 287400 |
| }, |
| { |
| "epoch": 0.6629768027275179, |
| "grad_norm": 1.00551176071167, |
| "learning_rate": 1.6851159863624107e-05, |
| "loss": 0.5258, |
| "step": 287600 |
| }, |
| { |
| "epoch": 0.6634378436195398, |
| "grad_norm": 1.403394103050232, |
| "learning_rate": 1.682810781902301e-05, |
| "loss": 0.515, |
| "step": 287800 |
| }, |
| { |
| "epoch": 0.6638988845115618, |
| "grad_norm": 1.374613881111145, |
| "learning_rate": 1.680505577442191e-05, |
| "loss": 0.504, |
| "step": 288000 |
| }, |
| { |
| "epoch": 0.6643599254035837, |
| "grad_norm": 0.9842983484268188, |
| "learning_rate": 1.6782003729820817e-05, |
| "loss": 0.536, |
| "step": 288200 |
| }, |
| { |
| "epoch": 0.6648209662956056, |
| "grad_norm": 1.1047396659851074, |
| "learning_rate": 1.675895168521972e-05, |
| "loss": 0.501, |
| "step": 288400 |
| }, |
| { |
| "epoch": 0.6652820071876275, |
| "grad_norm": 0.7167093753814697, |
| "learning_rate": 1.6735899640618625e-05, |
| "loss": 0.5139, |
| "step": 288600 |
| }, |
| { |
| "epoch": 0.6657430480796495, |
| "grad_norm": 2.0152106285095215, |
| "learning_rate": 1.671284759601753e-05, |
| "loss": 0.4873, |
| "step": 288800 |
| }, |
| { |
| "epoch": 0.6662040889716714, |
| "grad_norm": 2.2245209217071533, |
| "learning_rate": 1.6689795551416433e-05, |
| "loss": 0.5077, |
| "step": 289000 |
| }, |
| { |
| "epoch": 0.6666651298636933, |
| "grad_norm": 1.8077071905136108, |
| "learning_rate": 1.666674350681534e-05, |
| "loss": 0.5168, |
| "step": 289200 |
| }, |
| { |
| "epoch": 0.6671261707557152, |
| "grad_norm": 2.8042407035827637, |
| "learning_rate": 1.664369146221424e-05, |
| "loss": 0.5174, |
| "step": 289400 |
| }, |
| { |
| "epoch": 0.667587211647737, |
| "grad_norm": 0.7965187430381775, |
| "learning_rate": 1.6620639417613147e-05, |
| "loss": 0.4988, |
| "step": 289600 |
| }, |
| { |
| "epoch": 0.668048252539759, |
| "grad_norm": 0.6338868141174316, |
| "learning_rate": 1.659758737301205e-05, |
| "loss": 0.5782, |
| "step": 289800 |
| }, |
| { |
| "epoch": 0.6685092934317809, |
| "grad_norm": 1.7595531940460205, |
| "learning_rate": 1.6574535328410955e-05, |
| "loss": 0.4831, |
| "step": 290000 |
| }, |
| { |
| "epoch": 0.6689703343238028, |
| "grad_norm": 1.2702540159225464, |
| "learning_rate": 1.6551483283809857e-05, |
| "loss": 0.4689, |
| "step": 290200 |
| }, |
| { |
| "epoch": 0.6694313752158247, |
| "grad_norm": 0.9792807102203369, |
| "learning_rate": 1.6528431239208763e-05, |
| "loss": 0.5161, |
| "step": 290400 |
| }, |
| { |
| "epoch": 0.6698924161078467, |
| "grad_norm": 1.6363322734832764, |
| "learning_rate": 1.650537919460767e-05, |
| "loss": 0.5315, |
| "step": 290600 |
| }, |
| { |
| "epoch": 0.6703534569998686, |
| "grad_norm": 1.1259363889694214, |
| "learning_rate": 1.648232715000657e-05, |
| "loss": 0.5286, |
| "step": 290800 |
| }, |
| { |
| "epoch": 0.6708144978918905, |
| "grad_norm": 1.2707172632217407, |
| "learning_rate": 1.6459275105405476e-05, |
| "loss": 0.4925, |
| "step": 291000 |
| }, |
| { |
| "epoch": 0.6712755387839124, |
| "grad_norm": 1.0751131772994995, |
| "learning_rate": 1.643622306080438e-05, |
| "loss": 0.4835, |
| "step": 291200 |
| }, |
| { |
| "epoch": 0.6717365796759344, |
| "grad_norm": 0.9899608492851257, |
| "learning_rate": 1.6413171016203284e-05, |
| "loss": 0.4812, |
| "step": 291400 |
| }, |
| { |
| "epoch": 0.6721976205679563, |
| "grad_norm": 3.855407238006592, |
| "learning_rate": 1.6390118971602187e-05, |
| "loss": 0.5086, |
| "step": 291600 |
| }, |
| { |
| "epoch": 0.6726586614599782, |
| "grad_norm": 1.1831018924713135, |
| "learning_rate": 1.636706692700109e-05, |
| "loss": 0.5044, |
| "step": 291800 |
| }, |
| { |
| "epoch": 0.6731197023520001, |
| "grad_norm": 0.9542708396911621, |
| "learning_rate": 1.6344014882399994e-05, |
| "loss": 0.5374, |
| "step": 292000 |
| }, |
| { |
| "epoch": 0.6735807432440221, |
| "grad_norm": 1.1548891067504883, |
| "learning_rate": 1.63209628377989e-05, |
| "loss": 0.537, |
| "step": 292200 |
| }, |
| { |
| "epoch": 0.674041784136044, |
| "grad_norm": 0.7885655760765076, |
| "learning_rate": 1.6297910793197806e-05, |
| "loss": 0.4424, |
| "step": 292400 |
| }, |
| { |
| "epoch": 0.6745028250280659, |
| "grad_norm": 0.3185381293296814, |
| "learning_rate": 1.6274858748596708e-05, |
| "loss": 0.4631, |
| "step": 292600 |
| }, |
| { |
| "epoch": 0.6749638659200878, |
| "grad_norm": 1.5828882455825806, |
| "learning_rate": 1.625180670399561e-05, |
| "loss": 0.4709, |
| "step": 292800 |
| }, |
| { |
| "epoch": 0.6754249068121096, |
| "grad_norm": 1.0387425422668457, |
| "learning_rate": 1.6228754659394516e-05, |
| "loss": 0.5046, |
| "step": 293000 |
| }, |
| { |
| "epoch": 0.6758859477041316, |
| "grad_norm": 0.9464387893676758, |
| "learning_rate": 1.620570261479342e-05, |
| "loss": 0.4864, |
| "step": 293200 |
| }, |
| { |
| "epoch": 0.6763469885961535, |
| "grad_norm": 2.105416774749756, |
| "learning_rate": 1.6182650570192324e-05, |
| "loss": 0.4753, |
| "step": 293400 |
| }, |
| { |
| "epoch": 0.6768080294881754, |
| "grad_norm": 19.655559539794922, |
| "learning_rate": 1.6159598525591226e-05, |
| "loss": 0.5156, |
| "step": 293600 |
| }, |
| { |
| "epoch": 0.6772690703801973, |
| "grad_norm": 0.9485812187194824, |
| "learning_rate": 1.6136546480990132e-05, |
| "loss": 0.4566, |
| "step": 293800 |
| }, |
| { |
| "epoch": 0.6777301112722193, |
| "grad_norm": 2.1423091888427734, |
| "learning_rate": 1.6113494436389038e-05, |
| "loss": 0.4994, |
| "step": 294000 |
| }, |
| { |
| "epoch": 0.6781911521642412, |
| "grad_norm": 1.1267365217208862, |
| "learning_rate": 1.609044239178794e-05, |
| "loss": 0.4647, |
| "step": 294200 |
| }, |
| { |
| "epoch": 0.6786521930562631, |
| "grad_norm": 1.5974739789962769, |
| "learning_rate": 1.6067390347186846e-05, |
| "loss": 0.4851, |
| "step": 294400 |
| }, |
| { |
| "epoch": 0.679113233948285, |
| "grad_norm": 1.6099416017532349, |
| "learning_rate": 1.6044338302585748e-05, |
| "loss": 0.5076, |
| "step": 294600 |
| }, |
| { |
| "epoch": 0.679574274840307, |
| "grad_norm": 2.5845448970794678, |
| "learning_rate": 1.6021286257984654e-05, |
| "loss": 0.4898, |
| "step": 294800 |
| }, |
| { |
| "epoch": 0.6800353157323289, |
| "grad_norm": 2.4938390254974365, |
| "learning_rate": 1.5998234213383556e-05, |
| "loss": 0.5057, |
| "step": 295000 |
| }, |
| { |
| "epoch": 0.6804963566243508, |
| "grad_norm": 1.8456722497940063, |
| "learning_rate": 1.5975182168782458e-05, |
| "loss": 0.5114, |
| "step": 295200 |
| }, |
| { |
| "epoch": 0.6809573975163727, |
| "grad_norm": 1.0706640481948853, |
| "learning_rate": 1.5952130124181364e-05, |
| "loss": 0.5209, |
| "step": 295400 |
| }, |
| { |
| "epoch": 0.6814184384083947, |
| "grad_norm": 3.961984872817993, |
| "learning_rate": 1.592907807958027e-05, |
| "loss": 0.4766, |
| "step": 295600 |
| }, |
| { |
| "epoch": 0.6818794793004166, |
| "grad_norm": 1.8537254333496094, |
| "learning_rate": 1.5906026034979175e-05, |
| "loss": 0.5056, |
| "step": 295800 |
| }, |
| { |
| "epoch": 0.6823405201924385, |
| "grad_norm": 1.2177605628967285, |
| "learning_rate": 1.5882973990378077e-05, |
| "loss": 0.4694, |
| "step": 296000 |
| }, |
| { |
| "epoch": 0.6828015610844604, |
| "grad_norm": 4.802238464355469, |
| "learning_rate": 1.5859921945776983e-05, |
| "loss": 0.4912, |
| "step": 296200 |
| }, |
| { |
| "epoch": 0.6832626019764823, |
| "grad_norm": 1.457472801208496, |
| "learning_rate": 1.5836869901175885e-05, |
| "loss": 0.5113, |
| "step": 296400 |
| }, |
| { |
| "epoch": 0.6837236428685042, |
| "grad_norm": 1.4785571098327637, |
| "learning_rate": 1.5813817856574788e-05, |
| "loss": 0.5273, |
| "step": 296600 |
| }, |
| { |
| "epoch": 0.6841846837605261, |
| "grad_norm": 1.4524779319763184, |
| "learning_rate": 1.5790765811973693e-05, |
| "loss": 0.5218, |
| "step": 296800 |
| }, |
| { |
| "epoch": 0.684645724652548, |
| "grad_norm": 0.7074722051620483, |
| "learning_rate": 1.5767713767372596e-05, |
| "loss": 0.4772, |
| "step": 297000 |
| }, |
| { |
| "epoch": 0.68510676554457, |
| "grad_norm": 2.3584671020507812, |
| "learning_rate": 1.5744661722771505e-05, |
| "loss": 0.4854, |
| "step": 297200 |
| }, |
| { |
| "epoch": 0.6855678064365919, |
| "grad_norm": 0.7205916047096252, |
| "learning_rate": 1.5721609678170407e-05, |
| "loss": 0.5049, |
| "step": 297400 |
| }, |
| { |
| "epoch": 0.6860288473286138, |
| "grad_norm": 1.152288794517517, |
| "learning_rate": 1.569855763356931e-05, |
| "loss": 0.4726, |
| "step": 297600 |
| }, |
| { |
| "epoch": 0.6864898882206357, |
| "grad_norm": 1.2458863258361816, |
| "learning_rate": 1.5675505588968215e-05, |
| "loss": 0.5022, |
| "step": 297800 |
| }, |
| { |
| "epoch": 0.6869509291126576, |
| "grad_norm": 0.4532303214073181, |
| "learning_rate": 1.5652453544367117e-05, |
| "loss": 0.4986, |
| "step": 298000 |
| }, |
| { |
| "epoch": 0.6874119700046796, |
| "grad_norm": 1.452418327331543, |
| "learning_rate": 1.5629401499766023e-05, |
| "loss": 0.529, |
| "step": 298200 |
| }, |
| { |
| "epoch": 0.6878730108967015, |
| "grad_norm": 0.909852921962738, |
| "learning_rate": 1.5606349455164925e-05, |
| "loss": 0.4958, |
| "step": 298400 |
| }, |
| { |
| "epoch": 0.6883340517887234, |
| "grad_norm": 1.39362370967865, |
| "learning_rate": 1.558329741056383e-05, |
| "loss": 0.5138, |
| "step": 298600 |
| }, |
| { |
| "epoch": 0.6887950926807453, |
| "grad_norm": 1.186716914176941, |
| "learning_rate": 1.5560245365962737e-05, |
| "loss": 0.489, |
| "step": 298800 |
| }, |
| { |
| "epoch": 0.6892561335727673, |
| "grad_norm": 1.4374350309371948, |
| "learning_rate": 1.553719332136164e-05, |
| "loss": 0.476, |
| "step": 299000 |
| }, |
| { |
| "epoch": 0.6897171744647892, |
| "grad_norm": 1.2326973676681519, |
| "learning_rate": 1.5514141276760545e-05, |
| "loss": 0.5138, |
| "step": 299200 |
| }, |
| { |
| "epoch": 0.6901782153568111, |
| "grad_norm": 2.208893299102783, |
| "learning_rate": 1.5491089232159447e-05, |
| "loss": 0.5194, |
| "step": 299400 |
| }, |
| { |
| "epoch": 0.690639256248833, |
| "grad_norm": 2.6161091327667236, |
| "learning_rate": 1.5468037187558353e-05, |
| "loss": 0.5107, |
| "step": 299600 |
| }, |
| { |
| "epoch": 0.6911002971408549, |
| "grad_norm": 0.7406659126281738, |
| "learning_rate": 1.5444985142957255e-05, |
| "loss": 0.5295, |
| "step": 299800 |
| }, |
| { |
| "epoch": 0.6915613380328768, |
| "grad_norm": 0.9079631567001343, |
| "learning_rate": 1.542193309835616e-05, |
| "loss": 0.4774, |
| "step": 300000 |
| }, |
| { |
| "epoch": 0.6915613380328768, |
| "eval_loss": 0.49455514550209045, |
| "eval_runtime": 144.4178, |
| "eval_samples_per_second": 30.343, |
| "eval_steps_per_second": 30.343, |
| "step": 300000 |
| }, |
| { |
| "epoch": 0.6920223789248987, |
| "grad_norm": 1.3833597898483276, |
| "learning_rate": 1.5398881053755063e-05, |
| "loss": 0.5354, |
| "step": 300200 |
| }, |
| { |
| "epoch": 0.6924834198169206, |
| "grad_norm": 0.6728918552398682, |
| "learning_rate": 1.5375829009153965e-05, |
| "loss": 0.4536, |
| "step": 300400 |
| }, |
| { |
| "epoch": 0.6929444607089426, |
| "grad_norm": 1.655994176864624, |
| "learning_rate": 1.5352776964552874e-05, |
| "loss": 0.4603, |
| "step": 300600 |
| }, |
| { |
| "epoch": 0.6934055016009645, |
| "grad_norm": 1.8707417249679565, |
| "learning_rate": 1.5329724919951776e-05, |
| "loss": 0.5031, |
| "step": 300800 |
| }, |
| { |
| "epoch": 0.6938665424929864, |
| "grad_norm": 1.189855694770813, |
| "learning_rate": 1.5306672875350682e-05, |
| "loss": 0.4406, |
| "step": 301000 |
| }, |
| { |
| "epoch": 0.6943275833850083, |
| "grad_norm": 0.5549800395965576, |
| "learning_rate": 1.5283620830749584e-05, |
| "loss": 0.4955, |
| "step": 301200 |
| }, |
| { |
| "epoch": 0.6947886242770303, |
| "grad_norm": 1.3587613105773926, |
| "learning_rate": 1.5260568786148487e-05, |
| "loss": 0.4695, |
| "step": 301400 |
| }, |
| { |
| "epoch": 0.6952496651690522, |
| "grad_norm": 1.1256383657455444, |
| "learning_rate": 1.5237516741547392e-05, |
| "loss": 0.4928, |
| "step": 301600 |
| }, |
| { |
| "epoch": 0.6957107060610741, |
| "grad_norm": 1.0597585439682007, |
| "learning_rate": 1.5214464696946296e-05, |
| "loss": 0.5788, |
| "step": 301800 |
| }, |
| { |
| "epoch": 0.696171746953096, |
| "grad_norm": 1.196616768836975, |
| "learning_rate": 1.5191412652345199e-05, |
| "loss": 0.4771, |
| "step": 302000 |
| }, |
| { |
| "epoch": 0.696632787845118, |
| "grad_norm": 0.942761242389679, |
| "learning_rate": 1.5168360607744106e-05, |
| "loss": 0.5354, |
| "step": 302200 |
| }, |
| { |
| "epoch": 0.6970938287371399, |
| "grad_norm": 1.2657501697540283, |
| "learning_rate": 1.514530856314301e-05, |
| "loss": 0.4893, |
| "step": 302400 |
| }, |
| { |
| "epoch": 0.6975548696291618, |
| "grad_norm": 2.3571038246154785, |
| "learning_rate": 1.5122256518541914e-05, |
| "loss": 0.5137, |
| "step": 302600 |
| }, |
| { |
| "epoch": 0.6980159105211837, |
| "grad_norm": 0.39919519424438477, |
| "learning_rate": 1.5099204473940818e-05, |
| "loss": 0.4944, |
| "step": 302800 |
| }, |
| { |
| "epoch": 0.6984769514132056, |
| "grad_norm": 0.5027835965156555, |
| "learning_rate": 1.507615242933972e-05, |
| "loss": 0.5393, |
| "step": 303000 |
| }, |
| { |
| "epoch": 0.6989379923052275, |
| "grad_norm": 1.1620961427688599, |
| "learning_rate": 1.5053100384738624e-05, |
| "loss": 0.4845, |
| "step": 303200 |
| }, |
| { |
| "epoch": 0.6993990331972494, |
| "grad_norm": 1.5563163757324219, |
| "learning_rate": 1.5030048340137528e-05, |
| "loss": 0.5067, |
| "step": 303400 |
| }, |
| { |
| "epoch": 0.6998600740892713, |
| "grad_norm": 0.9374263882637024, |
| "learning_rate": 1.5006996295536432e-05, |
| "loss": 0.4745, |
| "step": 303600 |
| }, |
| { |
| "epoch": 0.7003211149812932, |
| "grad_norm": 1.7934794425964355, |
| "learning_rate": 1.4983944250935336e-05, |
| "loss": 0.469, |
| "step": 303800 |
| }, |
| { |
| "epoch": 0.7007821558733152, |
| "grad_norm": 1.6941883563995361, |
| "learning_rate": 1.4960892206334244e-05, |
| "loss": 0.4998, |
| "step": 304000 |
| }, |
| { |
| "epoch": 0.7012431967653371, |
| "grad_norm": 1.3214648962020874, |
| "learning_rate": 1.4937840161733146e-05, |
| "loss": 0.4831, |
| "step": 304200 |
| }, |
| { |
| "epoch": 0.701704237657359, |
| "grad_norm": 1.517357587814331, |
| "learning_rate": 1.491478811713205e-05, |
| "loss": 0.4715, |
| "step": 304400 |
| }, |
| { |
| "epoch": 0.7021652785493809, |
| "grad_norm": 0.819487988948822, |
| "learning_rate": 1.4891736072530954e-05, |
| "loss": 0.4914, |
| "step": 304600 |
| }, |
| { |
| "epoch": 0.7026263194414029, |
| "grad_norm": 1.0428346395492554, |
| "learning_rate": 1.4868684027929858e-05, |
| "loss": 0.5116, |
| "step": 304800 |
| }, |
| { |
| "epoch": 0.7030873603334248, |
| "grad_norm": 1.9063506126403809, |
| "learning_rate": 1.4845631983328762e-05, |
| "loss": 0.4993, |
| "step": 305000 |
| }, |
| { |
| "epoch": 0.7035484012254467, |
| "grad_norm": 2.997563600540161, |
| "learning_rate": 1.4822579938727666e-05, |
| "loss": 0.4698, |
| "step": 305200 |
| }, |
| { |
| "epoch": 0.7040094421174686, |
| "grad_norm": 1.612297534942627, |
| "learning_rate": 1.479952789412657e-05, |
| "loss": 0.5322, |
| "step": 305400 |
| }, |
| { |
| "epoch": 0.7044704830094906, |
| "grad_norm": 1.348860740661621, |
| "learning_rate": 1.4776475849525475e-05, |
| "loss": 0.5132, |
| "step": 305600 |
| }, |
| { |
| "epoch": 0.7049315239015125, |
| "grad_norm": 0.9498617649078369, |
| "learning_rate": 1.475342380492438e-05, |
| "loss": 0.5163, |
| "step": 305800 |
| }, |
| { |
| "epoch": 0.7053925647935344, |
| "grad_norm": 1.5654537677764893, |
| "learning_rate": 1.4730371760323283e-05, |
| "loss": 0.5524, |
| "step": 306000 |
| }, |
| { |
| "epoch": 0.7058536056855563, |
| "grad_norm": 1.3119844198226929, |
| "learning_rate": 1.4707319715722187e-05, |
| "loss": 0.5214, |
| "step": 306200 |
| }, |
| { |
| "epoch": 0.7063146465775783, |
| "grad_norm": 0.8046100735664368, |
| "learning_rate": 1.4684267671121091e-05, |
| "loss": 0.4921, |
| "step": 306400 |
| }, |
| { |
| "epoch": 0.7067756874696001, |
| "grad_norm": 0.5308769941329956, |
| "learning_rate": 1.4661215626519995e-05, |
| "loss": 0.4677, |
| "step": 306600 |
| }, |
| { |
| "epoch": 0.707236728361622, |
| "grad_norm": 1.8907235860824585, |
| "learning_rate": 1.4638163581918898e-05, |
| "loss": 0.5625, |
| "step": 306800 |
| }, |
| { |
| "epoch": 0.7076977692536439, |
| "grad_norm": 1.138887882232666, |
| "learning_rate": 1.4615111537317802e-05, |
| "loss": 0.4624, |
| "step": 307000 |
| }, |
| { |
| "epoch": 0.7081588101456658, |
| "grad_norm": 0.6800757646560669, |
| "learning_rate": 1.4592059492716706e-05, |
| "loss": 0.5375, |
| "step": 307200 |
| }, |
| { |
| "epoch": 0.7086198510376878, |
| "grad_norm": 1.3743557929992676, |
| "learning_rate": 1.4569007448115613e-05, |
| "loss": 0.548, |
| "step": 307400 |
| }, |
| { |
| "epoch": 0.7090808919297097, |
| "grad_norm": 1.4539231061935425, |
| "learning_rate": 1.4545955403514517e-05, |
| "loss": 0.5169, |
| "step": 307600 |
| }, |
| { |
| "epoch": 0.7095419328217316, |
| "grad_norm": 0.6173273324966431, |
| "learning_rate": 1.4522903358913421e-05, |
| "loss": 0.4933, |
| "step": 307800 |
| }, |
| { |
| "epoch": 0.7100029737137535, |
| "grad_norm": 1.401665210723877, |
| "learning_rate": 1.4499851314312323e-05, |
| "loss": 0.5009, |
| "step": 308000 |
| }, |
| { |
| "epoch": 0.7104640146057755, |
| "grad_norm": 1.782645344734192, |
| "learning_rate": 1.4476799269711227e-05, |
| "loss": 0.5133, |
| "step": 308200 |
| }, |
| { |
| "epoch": 0.7109250554977974, |
| "grad_norm": 1.1517479419708252, |
| "learning_rate": 1.4453747225110131e-05, |
| "loss": 0.4714, |
| "step": 308400 |
| }, |
| { |
| "epoch": 0.7113860963898193, |
| "grad_norm": 0.3535856604576111, |
| "learning_rate": 1.4430695180509035e-05, |
| "loss": 0.4667, |
| "step": 308600 |
| }, |
| { |
| "epoch": 0.7118471372818412, |
| "grad_norm": 1.6771602630615234, |
| "learning_rate": 1.4407643135907939e-05, |
| "loss": 0.4971, |
| "step": 308800 |
| }, |
| { |
| "epoch": 0.7123081781738632, |
| "grad_norm": 1.895080804824829, |
| "learning_rate": 1.4384591091306845e-05, |
| "loss": 0.4917, |
| "step": 309000 |
| }, |
| { |
| "epoch": 0.7127692190658851, |
| "grad_norm": 1.5443464517593384, |
| "learning_rate": 1.4361539046705749e-05, |
| "loss": 0.4998, |
| "step": 309200 |
| }, |
| { |
| "epoch": 0.713230259957907, |
| "grad_norm": 0.635612428188324, |
| "learning_rate": 1.4338487002104653e-05, |
| "loss": 0.5347, |
| "step": 309400 |
| }, |
| { |
| "epoch": 0.7136913008499289, |
| "grad_norm": 1.680080771446228, |
| "learning_rate": 1.4315434957503557e-05, |
| "loss": 0.5551, |
| "step": 309600 |
| }, |
| { |
| "epoch": 0.7141523417419507, |
| "grad_norm": 0.8438254594802856, |
| "learning_rate": 1.429238291290246e-05, |
| "loss": 0.5284, |
| "step": 309800 |
| }, |
| { |
| "epoch": 0.7146133826339727, |
| "grad_norm": 1.1309008598327637, |
| "learning_rate": 1.4269330868301365e-05, |
| "loss": 0.5249, |
| "step": 310000 |
| }, |
| { |
| "epoch": 0.7150744235259946, |
| "grad_norm": 0.8668766021728516, |
| "learning_rate": 1.4246278823700269e-05, |
| "loss": 0.4738, |
| "step": 310200 |
| }, |
| { |
| "epoch": 0.7155354644180165, |
| "grad_norm": 0.8339349627494812, |
| "learning_rate": 1.4223226779099173e-05, |
| "loss": 0.4969, |
| "step": 310400 |
| }, |
| { |
| "epoch": 0.7159965053100384, |
| "grad_norm": 1.1966744661331177, |
| "learning_rate": 1.4200174734498075e-05, |
| "loss": 0.5029, |
| "step": 310600 |
| }, |
| { |
| "epoch": 0.7164575462020604, |
| "grad_norm": 1.6723459959030151, |
| "learning_rate": 1.4177122689896982e-05, |
| "loss": 0.538, |
| "step": 310800 |
| }, |
| { |
| "epoch": 0.7169185870940823, |
| "grad_norm": 0.6843717694282532, |
| "learning_rate": 1.4154070645295886e-05, |
| "loss": 0.4896, |
| "step": 311000 |
| }, |
| { |
| "epoch": 0.7173796279861042, |
| "grad_norm": 2.2339181900024414, |
| "learning_rate": 1.413101860069479e-05, |
| "loss": 0.5173, |
| "step": 311200 |
| }, |
| { |
| "epoch": 0.7178406688781261, |
| "grad_norm": 1.8708288669586182, |
| "learning_rate": 1.4107966556093694e-05, |
| "loss": 0.4853, |
| "step": 311400 |
| }, |
| { |
| "epoch": 0.7183017097701481, |
| "grad_norm": 0.8902921080589294, |
| "learning_rate": 1.4084914511492597e-05, |
| "loss": 0.4688, |
| "step": 311600 |
| }, |
| { |
| "epoch": 0.71876275066217, |
| "grad_norm": 0.9172972440719604, |
| "learning_rate": 1.40618624668915e-05, |
| "loss": 0.4588, |
| "step": 311800 |
| }, |
| { |
| "epoch": 0.7192237915541919, |
| "grad_norm": 1.278566837310791, |
| "learning_rate": 1.4038810422290404e-05, |
| "loss": 0.5001, |
| "step": 312000 |
| }, |
| { |
| "epoch": 0.7196848324462138, |
| "grad_norm": 0.6410205364227295, |
| "learning_rate": 1.4015758377689308e-05, |
| "loss": 0.4772, |
| "step": 312200 |
| }, |
| { |
| "epoch": 0.7201458733382358, |
| "grad_norm": 1.300574541091919, |
| "learning_rate": 1.3992706333088216e-05, |
| "loss": 0.4829, |
| "step": 312400 |
| }, |
| { |
| "epoch": 0.7206069142302577, |
| "grad_norm": 1.1145926713943481, |
| "learning_rate": 1.396965428848712e-05, |
| "loss": 0.5403, |
| "step": 312600 |
| }, |
| { |
| "epoch": 0.7210679551222796, |
| "grad_norm": 2.115949869155884, |
| "learning_rate": 1.3946602243886022e-05, |
| "loss": 0.5284, |
| "step": 312800 |
| }, |
| { |
| "epoch": 0.7215289960143015, |
| "grad_norm": 1.5189509391784668, |
| "learning_rate": 1.3923550199284926e-05, |
| "loss": 0.4795, |
| "step": 313000 |
| }, |
| { |
| "epoch": 0.7219900369063234, |
| "grad_norm": 0.7120934724807739, |
| "learning_rate": 1.390049815468383e-05, |
| "loss": 0.4977, |
| "step": 313200 |
| }, |
| { |
| "epoch": 0.7224510777983453, |
| "grad_norm": 1.7092379331588745, |
| "learning_rate": 1.3877446110082734e-05, |
| "loss": 0.448, |
| "step": 313400 |
| }, |
| { |
| "epoch": 0.7229121186903672, |
| "grad_norm": 1.4430723190307617, |
| "learning_rate": 1.3854394065481638e-05, |
| "loss": 0.4991, |
| "step": 313600 |
| }, |
| { |
| "epoch": 0.7233731595823891, |
| "grad_norm": 0.8764591217041016, |
| "learning_rate": 1.3831342020880542e-05, |
| "loss": 0.5104, |
| "step": 313800 |
| }, |
| { |
| "epoch": 0.723834200474411, |
| "grad_norm": 1.5279911756515503, |
| "learning_rate": 1.3808289976279446e-05, |
| "loss": 0.4888, |
| "step": 314000 |
| }, |
| { |
| "epoch": 0.724295241366433, |
| "grad_norm": 1.9160465002059937, |
| "learning_rate": 1.3785237931678352e-05, |
| "loss": 0.5148, |
| "step": 314200 |
| }, |
| { |
| "epoch": 0.7247562822584549, |
| "grad_norm": 0.8003278374671936, |
| "learning_rate": 1.3762185887077256e-05, |
| "loss": 0.5243, |
| "step": 314400 |
| }, |
| { |
| "epoch": 0.7252173231504768, |
| "grad_norm": 1.049712061882019, |
| "learning_rate": 1.373913384247616e-05, |
| "loss": 0.4999, |
| "step": 314600 |
| }, |
| { |
| "epoch": 0.7256783640424987, |
| "grad_norm": 1.2144337892532349, |
| "learning_rate": 1.3716081797875064e-05, |
| "loss": 0.5561, |
| "step": 314800 |
| }, |
| { |
| "epoch": 0.7261394049345207, |
| "grad_norm": 2.1154098510742188, |
| "learning_rate": 1.3693029753273968e-05, |
| "loss": 0.4614, |
| "step": 315000 |
| }, |
| { |
| "epoch": 0.7266004458265426, |
| "grad_norm": 0.5475128889083862, |
| "learning_rate": 1.3669977708672872e-05, |
| "loss": 0.479, |
| "step": 315200 |
| }, |
| { |
| "epoch": 0.7270614867185645, |
| "grad_norm": 1.0177366733551025, |
| "learning_rate": 1.3646925664071774e-05, |
| "loss": 0.5073, |
| "step": 315400 |
| }, |
| { |
| "epoch": 0.7275225276105864, |
| "grad_norm": 3.217353105545044, |
| "learning_rate": 1.3623873619470678e-05, |
| "loss": 0.5399, |
| "step": 315600 |
| }, |
| { |
| "epoch": 0.7279835685026084, |
| "grad_norm": 2.1022963523864746, |
| "learning_rate": 1.3600821574869585e-05, |
| "loss": 0.5137, |
| "step": 315800 |
| }, |
| { |
| "epoch": 0.7284446093946303, |
| "grad_norm": 0.4113731384277344, |
| "learning_rate": 1.357776953026849e-05, |
| "loss": 0.4935, |
| "step": 316000 |
| }, |
| { |
| "epoch": 0.7289056502866522, |
| "grad_norm": 0.6860734224319458, |
| "learning_rate": 1.3554717485667393e-05, |
| "loss": 0.5092, |
| "step": 316200 |
| }, |
| { |
| "epoch": 0.7293666911786741, |
| "grad_norm": 1.0901679992675781, |
| "learning_rate": 1.3531665441066297e-05, |
| "loss": 0.5062, |
| "step": 316400 |
| }, |
| { |
| "epoch": 0.729827732070696, |
| "grad_norm": 1.102059006690979, |
| "learning_rate": 1.35086133964652e-05, |
| "loss": 0.5143, |
| "step": 316600 |
| }, |
| { |
| "epoch": 0.7302887729627179, |
| "grad_norm": 1.0236157178878784, |
| "learning_rate": 1.3485561351864103e-05, |
| "loss": 0.5476, |
| "step": 316800 |
| }, |
| { |
| "epoch": 0.7307498138547398, |
| "grad_norm": 1.4766557216644287, |
| "learning_rate": 1.3462509307263007e-05, |
| "loss": 0.4543, |
| "step": 317000 |
| }, |
| { |
| "epoch": 0.7312108547467617, |
| "grad_norm": 1.7664604187011719, |
| "learning_rate": 1.3439457262661911e-05, |
| "loss": 0.5531, |
| "step": 317200 |
| }, |
| { |
| "epoch": 0.7316718956387837, |
| "grad_norm": 1.5094674825668335, |
| "learning_rate": 1.3416405218060815e-05, |
| "loss": 0.4851, |
| "step": 317400 |
| }, |
| { |
| "epoch": 0.7321329365308056, |
| "grad_norm": 0.6211707592010498, |
| "learning_rate": 1.3393353173459721e-05, |
| "loss": 0.4945, |
| "step": 317600 |
| }, |
| { |
| "epoch": 0.7325939774228275, |
| "grad_norm": 0.9305445551872253, |
| "learning_rate": 1.3370301128858625e-05, |
| "loss": 0.5324, |
| "step": 317800 |
| }, |
| { |
| "epoch": 0.7330550183148494, |
| "grad_norm": 1.2025363445281982, |
| "learning_rate": 1.3347249084257529e-05, |
| "loss": 0.5128, |
| "step": 318000 |
| }, |
| { |
| "epoch": 0.7335160592068714, |
| "grad_norm": 1.1147645711898804, |
| "learning_rate": 1.3324197039656433e-05, |
| "loss": 0.4722, |
| "step": 318200 |
| }, |
| { |
| "epoch": 0.7339771000988933, |
| "grad_norm": 1.073165774345398, |
| "learning_rate": 1.3301144995055337e-05, |
| "loss": 0.5153, |
| "step": 318400 |
| }, |
| { |
| "epoch": 0.7344381409909152, |
| "grad_norm": 1.6959824562072754, |
| "learning_rate": 1.3278092950454241e-05, |
| "loss": 0.4795, |
| "step": 318600 |
| }, |
| { |
| "epoch": 0.7348991818829371, |
| "grad_norm": 0.850702702999115, |
| "learning_rate": 1.3255040905853145e-05, |
| "loss": 0.4875, |
| "step": 318800 |
| }, |
| { |
| "epoch": 0.735360222774959, |
| "grad_norm": 1.5950241088867188, |
| "learning_rate": 1.3231988861252049e-05, |
| "loss": 0.4988, |
| "step": 319000 |
| }, |
| { |
| "epoch": 0.735821263666981, |
| "grad_norm": 1.4513007402420044, |
| "learning_rate": 1.3208936816650955e-05, |
| "loss": 0.4947, |
| "step": 319200 |
| }, |
| { |
| "epoch": 0.7362823045590029, |
| "grad_norm": 2.507760524749756, |
| "learning_rate": 1.3185884772049859e-05, |
| "loss": 0.5527, |
| "step": 319400 |
| }, |
| { |
| "epoch": 0.7367433454510248, |
| "grad_norm": 0.49451202154159546, |
| "learning_rate": 1.3162832727448763e-05, |
| "loss": 0.5249, |
| "step": 319600 |
| }, |
| { |
| "epoch": 0.7372043863430467, |
| "grad_norm": 1.2579914331436157, |
| "learning_rate": 1.3139780682847667e-05, |
| "loss": 0.4911, |
| "step": 319800 |
| }, |
| { |
| "epoch": 0.7376654272350686, |
| "grad_norm": 0.30338361859321594, |
| "learning_rate": 1.311672863824657e-05, |
| "loss": 0.524, |
| "step": 320000 |
| }, |
| { |
| "epoch": 0.7381264681270905, |
| "grad_norm": 3.077241897583008, |
| "learning_rate": 1.3093676593645473e-05, |
| "loss": 0.5091, |
| "step": 320200 |
| }, |
| { |
| "epoch": 0.7385875090191124, |
| "grad_norm": 1.3362106084823608, |
| "learning_rate": 1.3070624549044377e-05, |
| "loss": 0.4827, |
| "step": 320400 |
| }, |
| { |
| "epoch": 0.7390485499111343, |
| "grad_norm": 1.2579853534698486, |
| "learning_rate": 1.304757250444328e-05, |
| "loss": 0.4945, |
| "step": 320600 |
| }, |
| { |
| "epoch": 0.7395095908031563, |
| "grad_norm": 1.0365217924118042, |
| "learning_rate": 1.3024520459842185e-05, |
| "loss": 0.5256, |
| "step": 320800 |
| }, |
| { |
| "epoch": 0.7399706316951782, |
| "grad_norm": 0.9613335132598877, |
| "learning_rate": 1.3001468415241092e-05, |
| "loss": 0.4811, |
| "step": 321000 |
| }, |
| { |
| "epoch": 0.7404316725872001, |
| "grad_norm": 1.111335039138794, |
| "learning_rate": 1.2978416370639996e-05, |
| "loss": 0.5011, |
| "step": 321200 |
| }, |
| { |
| "epoch": 0.740892713479222, |
| "grad_norm": 1.1504440307617188, |
| "learning_rate": 1.2955364326038898e-05, |
| "loss": 0.4916, |
| "step": 321400 |
| }, |
| { |
| "epoch": 0.741353754371244, |
| "grad_norm": 0.9241997599601746, |
| "learning_rate": 1.2932312281437802e-05, |
| "loss": 0.4507, |
| "step": 321600 |
| }, |
| { |
| "epoch": 0.7418147952632659, |
| "grad_norm": 1.1424815654754639, |
| "learning_rate": 1.2909260236836706e-05, |
| "loss": 0.5188, |
| "step": 321800 |
| }, |
| { |
| "epoch": 0.7422758361552878, |
| "grad_norm": 0.8069947957992554, |
| "learning_rate": 1.288620819223561e-05, |
| "loss": 0.4967, |
| "step": 322000 |
| }, |
| { |
| "epoch": 0.7427368770473097, |
| "grad_norm": 1.4160171747207642, |
| "learning_rate": 1.2863156147634514e-05, |
| "loss": 0.514, |
| "step": 322200 |
| }, |
| { |
| "epoch": 0.7431979179393317, |
| "grad_norm": 1.1542912721633911, |
| "learning_rate": 1.2840104103033418e-05, |
| "loss": 0.4799, |
| "step": 322400 |
| }, |
| { |
| "epoch": 0.7436589588313536, |
| "grad_norm": 1.112442970275879, |
| "learning_rate": 1.2817052058432324e-05, |
| "loss": 0.4787, |
| "step": 322600 |
| }, |
| { |
| "epoch": 0.7441199997233755, |
| "grad_norm": 1.970729112625122, |
| "learning_rate": 1.2794000013831228e-05, |
| "loss": 0.4734, |
| "step": 322800 |
| }, |
| { |
| "epoch": 0.7445810406153974, |
| "grad_norm": 0.7014828324317932, |
| "learning_rate": 1.2770947969230132e-05, |
| "loss": 0.5364, |
| "step": 323000 |
| }, |
| { |
| "epoch": 0.7450420815074194, |
| "grad_norm": 0.852289080619812, |
| "learning_rate": 1.2747895924629036e-05, |
| "loss": 0.5169, |
| "step": 323200 |
| }, |
| { |
| "epoch": 0.7455031223994412, |
| "grad_norm": 1.6365413665771484, |
| "learning_rate": 1.272484388002794e-05, |
| "loss": 0.4716, |
| "step": 323400 |
| }, |
| { |
| "epoch": 0.7459641632914631, |
| "grad_norm": 1.1326274871826172, |
| "learning_rate": 1.2701791835426844e-05, |
| "loss": 0.483, |
| "step": 323600 |
| }, |
| { |
| "epoch": 0.746425204183485, |
| "grad_norm": 1.7985695600509644, |
| "learning_rate": 1.2678739790825748e-05, |
| "loss": 0.5214, |
| "step": 323800 |
| }, |
| { |
| "epoch": 0.7468862450755069, |
| "grad_norm": 1.3214313983917236, |
| "learning_rate": 1.265568774622465e-05, |
| "loss": 0.5369, |
| "step": 324000 |
| }, |
| { |
| "epoch": 0.7473472859675289, |
| "grad_norm": 1.8575730323791504, |
| "learning_rate": 1.2632635701623557e-05, |
| "loss": 0.5292, |
| "step": 324200 |
| }, |
| { |
| "epoch": 0.7478083268595508, |
| "grad_norm": 0.62919682264328, |
| "learning_rate": 1.2609583657022461e-05, |
| "loss": 0.4887, |
| "step": 324400 |
| }, |
| { |
| "epoch": 0.7482693677515727, |
| "grad_norm": 2.681436777114868, |
| "learning_rate": 1.2586531612421365e-05, |
| "loss": 0.5284, |
| "step": 324600 |
| }, |
| { |
| "epoch": 0.7487304086435946, |
| "grad_norm": 1.6911917924880981, |
| "learning_rate": 1.256347956782027e-05, |
| "loss": 0.504, |
| "step": 324800 |
| }, |
| { |
| "epoch": 0.7491914495356166, |
| "grad_norm": 1.236039638519287, |
| "learning_rate": 1.2540427523219173e-05, |
| "loss": 0.5036, |
| "step": 325000 |
| }, |
| { |
| "epoch": 0.7496524904276385, |
| "grad_norm": 1.1618597507476807, |
| "learning_rate": 1.2517375478618076e-05, |
| "loss": 0.5154, |
| "step": 325200 |
| }, |
| { |
| "epoch": 0.7501135313196604, |
| "grad_norm": 1.5990595817565918, |
| "learning_rate": 1.249432343401698e-05, |
| "loss": 0.4939, |
| "step": 325400 |
| }, |
| { |
| "epoch": 0.7505745722116823, |
| "grad_norm": 1.3306795358657837, |
| "learning_rate": 1.2471271389415885e-05, |
| "loss": 0.5226, |
| "step": 325600 |
| }, |
| { |
| "epoch": 0.7510356131037043, |
| "grad_norm": 9.81534481048584, |
| "learning_rate": 1.244821934481479e-05, |
| "loss": 0.4952, |
| "step": 325800 |
| }, |
| { |
| "epoch": 0.7514966539957262, |
| "grad_norm": 1.0444341897964478, |
| "learning_rate": 1.2425167300213693e-05, |
| "loss": 0.4981, |
| "step": 326000 |
| }, |
| { |
| "epoch": 0.7519576948877481, |
| "grad_norm": 0.957382321357727, |
| "learning_rate": 1.2402115255612597e-05, |
| "loss": 0.4855, |
| "step": 326200 |
| }, |
| { |
| "epoch": 0.75241873577977, |
| "grad_norm": 1.7747009992599487, |
| "learning_rate": 1.2379063211011501e-05, |
| "loss": 0.4847, |
| "step": 326400 |
| }, |
| { |
| "epoch": 0.752879776671792, |
| "grad_norm": 0.8051755428314209, |
| "learning_rate": 1.2356011166410405e-05, |
| "loss": 0.4675, |
| "step": 326600 |
| }, |
| { |
| "epoch": 0.7533408175638138, |
| "grad_norm": 0.8562848567962646, |
| "learning_rate": 1.233295912180931e-05, |
| "loss": 0.5252, |
| "step": 326800 |
| }, |
| { |
| "epoch": 0.7538018584558357, |
| "grad_norm": 0.8655639886856079, |
| "learning_rate": 1.2309907077208213e-05, |
| "loss": 0.4904, |
| "step": 327000 |
| }, |
| { |
| "epoch": 0.7542628993478576, |
| "grad_norm": 2.3433034420013428, |
| "learning_rate": 1.2286855032607119e-05, |
| "loss": 0.4885, |
| "step": 327200 |
| }, |
| { |
| "epoch": 0.7547239402398795, |
| "grad_norm": 1.1155329942703247, |
| "learning_rate": 1.2263802988006023e-05, |
| "loss": 0.4917, |
| "step": 327400 |
| }, |
| { |
| "epoch": 0.7551849811319015, |
| "grad_norm": 1.4027127027511597, |
| "learning_rate": 1.2240750943404925e-05, |
| "loss": 0.4837, |
| "step": 327600 |
| }, |
| { |
| "epoch": 0.7556460220239234, |
| "grad_norm": 1.8373444080352783, |
| "learning_rate": 1.2217698898803829e-05, |
| "loss": 0.4972, |
| "step": 327800 |
| }, |
| { |
| "epoch": 0.7561070629159453, |
| "grad_norm": 1.7816424369812012, |
| "learning_rate": 1.2194646854202735e-05, |
| "loss": 0.526, |
| "step": 328000 |
| }, |
| { |
| "epoch": 0.7565681038079672, |
| "grad_norm": 1.9828554391860962, |
| "learning_rate": 1.2171594809601639e-05, |
| "loss": 0.4813, |
| "step": 328200 |
| }, |
| { |
| "epoch": 0.7570291446999892, |
| "grad_norm": 2.528639078140259, |
| "learning_rate": 1.2148542765000543e-05, |
| "loss": 0.4961, |
| "step": 328400 |
| }, |
| { |
| "epoch": 0.7574901855920111, |
| "grad_norm": 0.7348084449768066, |
| "learning_rate": 1.2125490720399447e-05, |
| "loss": 0.4763, |
| "step": 328600 |
| }, |
| { |
| "epoch": 0.757951226484033, |
| "grad_norm": 0.5879639983177185, |
| "learning_rate": 1.2102438675798349e-05, |
| "loss": 0.472, |
| "step": 328800 |
| }, |
| { |
| "epoch": 0.7584122673760549, |
| "grad_norm": 0.9352529048919678, |
| "learning_rate": 1.2079386631197255e-05, |
| "loss": 0.4944, |
| "step": 329000 |
| }, |
| { |
| "epoch": 0.7588733082680769, |
| "grad_norm": 1.5848828554153442, |
| "learning_rate": 1.2056334586596159e-05, |
| "loss": 0.5116, |
| "step": 329200 |
| }, |
| { |
| "epoch": 0.7593343491600988, |
| "grad_norm": 0.44051986932754517, |
| "learning_rate": 1.2033282541995063e-05, |
| "loss": 0.5375, |
| "step": 329400 |
| }, |
| { |
| "epoch": 0.7597953900521207, |
| "grad_norm": 2.127389907836914, |
| "learning_rate": 1.2010230497393967e-05, |
| "loss": 0.4606, |
| "step": 329600 |
| }, |
| { |
| "epoch": 0.7602564309441426, |
| "grad_norm": 1.7485988140106201, |
| "learning_rate": 1.1987178452792872e-05, |
| "loss": 0.4817, |
| "step": 329800 |
| }, |
| { |
| "epoch": 0.7607174718361646, |
| "grad_norm": 1.1227333545684814, |
| "learning_rate": 1.1964126408191775e-05, |
| "loss": 0.5069, |
| "step": 330000 |
| }, |
| { |
| "epoch": 0.7611785127281864, |
| "grad_norm": 0.8382754325866699, |
| "learning_rate": 1.1941074363590679e-05, |
| "loss": 0.5328, |
| "step": 330200 |
| }, |
| { |
| "epoch": 0.7616395536202083, |
| "grad_norm": 0.9372780323028564, |
| "learning_rate": 1.1918022318989583e-05, |
| "loss": 0.4781, |
| "step": 330400 |
| }, |
| { |
| "epoch": 0.7621005945122302, |
| "grad_norm": 1.3626426458358765, |
| "learning_rate": 1.1894970274388488e-05, |
| "loss": 0.4831, |
| "step": 330600 |
| }, |
| { |
| "epoch": 0.7625616354042521, |
| "grad_norm": 0.8523277044296265, |
| "learning_rate": 1.1871918229787392e-05, |
| "loss": 0.5254, |
| "step": 330800 |
| }, |
| { |
| "epoch": 0.7630226762962741, |
| "grad_norm": 1.5201365947723389, |
| "learning_rate": 1.1848866185186296e-05, |
| "loss": 0.5154, |
| "step": 331000 |
| }, |
| { |
| "epoch": 0.763483717188296, |
| "grad_norm": 0.46071958541870117, |
| "learning_rate": 1.18258141405852e-05, |
| "loss": 0.4999, |
| "step": 331200 |
| }, |
| { |
| "epoch": 0.7639447580803179, |
| "grad_norm": 1.4432693719863892, |
| "learning_rate": 1.1802762095984104e-05, |
| "loss": 0.4895, |
| "step": 331400 |
| }, |
| { |
| "epoch": 0.7644057989723398, |
| "grad_norm": 3.8710200786590576, |
| "learning_rate": 1.1779710051383008e-05, |
| "loss": 0.5162, |
| "step": 331600 |
| }, |
| { |
| "epoch": 0.7648668398643618, |
| "grad_norm": 1.2128450870513916, |
| "learning_rate": 1.1756658006781912e-05, |
| "loss": 0.482, |
| "step": 331800 |
| }, |
| { |
| "epoch": 0.7653278807563837, |
| "grad_norm": 1.517349123954773, |
| "learning_rate": 1.1733605962180816e-05, |
| "loss": 0.5071, |
| "step": 332000 |
| }, |
| { |
| "epoch": 0.7657889216484056, |
| "grad_norm": 1.6065720319747925, |
| "learning_rate": 1.171055391757972e-05, |
| "loss": 0.5092, |
| "step": 332200 |
| }, |
| { |
| "epoch": 0.7662499625404275, |
| "grad_norm": 2.150094747543335, |
| "learning_rate": 1.1687501872978624e-05, |
| "loss": 0.4952, |
| "step": 332400 |
| }, |
| { |
| "epoch": 0.7667110034324495, |
| "grad_norm": 0.7310593724250793, |
| "learning_rate": 1.1664449828377528e-05, |
| "loss": 0.4762, |
| "step": 332600 |
| }, |
| { |
| "epoch": 0.7671720443244714, |
| "grad_norm": 1.276360034942627, |
| "learning_rate": 1.1641397783776432e-05, |
| "loss": 0.481, |
| "step": 332800 |
| }, |
| { |
| "epoch": 0.7676330852164933, |
| "grad_norm": 0.42438310384750366, |
| "learning_rate": 1.1618345739175336e-05, |
| "loss": 0.4871, |
| "step": 333000 |
| }, |
| { |
| "epoch": 0.7680941261085152, |
| "grad_norm": 1.0823901891708374, |
| "learning_rate": 1.1595293694574242e-05, |
| "loss": 0.4841, |
| "step": 333200 |
| }, |
| { |
| "epoch": 0.7685551670005372, |
| "grad_norm": 1.3709418773651123, |
| "learning_rate": 1.1572241649973146e-05, |
| "loss": 0.4975, |
| "step": 333400 |
| }, |
| { |
| "epoch": 0.769016207892559, |
| "grad_norm": 1.654448390007019, |
| "learning_rate": 1.154918960537205e-05, |
| "loss": 0.4477, |
| "step": 333600 |
| }, |
| { |
| "epoch": 0.7694772487845809, |
| "grad_norm": 0.4724847078323364, |
| "learning_rate": 1.1526137560770952e-05, |
| "loss": 0.4991, |
| "step": 333800 |
| }, |
| { |
| "epoch": 0.7699382896766028, |
| "grad_norm": 1.3029577732086182, |
| "learning_rate": 1.1503085516169858e-05, |
| "loss": 0.5075, |
| "step": 334000 |
| }, |
| { |
| "epoch": 0.7703993305686248, |
| "grad_norm": 1.2783386707305908, |
| "learning_rate": 1.1480033471568762e-05, |
| "loss": 0.5014, |
| "step": 334200 |
| }, |
| { |
| "epoch": 0.7708603714606467, |
| "grad_norm": 1.8879179954528809, |
| "learning_rate": 1.1456981426967666e-05, |
| "loss": 0.4937, |
| "step": 334400 |
| }, |
| { |
| "epoch": 0.7713214123526686, |
| "grad_norm": 1.2683477401733398, |
| "learning_rate": 1.143392938236657e-05, |
| "loss": 0.4751, |
| "step": 334600 |
| }, |
| { |
| "epoch": 0.7717824532446905, |
| "grad_norm": 2.740619421005249, |
| "learning_rate": 1.1410877337765474e-05, |
| "loss": 0.5027, |
| "step": 334800 |
| }, |
| { |
| "epoch": 0.7722434941367124, |
| "grad_norm": 1.6804182529449463, |
| "learning_rate": 1.1387825293164378e-05, |
| "loss": 0.4677, |
| "step": 335000 |
| }, |
| { |
| "epoch": 0.7727045350287344, |
| "grad_norm": 2.2255728244781494, |
| "learning_rate": 1.1364773248563282e-05, |
| "loss": 0.4803, |
| "step": 335200 |
| }, |
| { |
| "epoch": 0.7731655759207563, |
| "grad_norm": 1.0658611059188843, |
| "learning_rate": 1.1341721203962186e-05, |
| "loss": 0.4537, |
| "step": 335400 |
| }, |
| { |
| "epoch": 0.7736266168127782, |
| "grad_norm": 1.3411928415298462, |
| "learning_rate": 1.131866915936109e-05, |
| "loss": 0.4775, |
| "step": 335600 |
| }, |
| { |
| "epoch": 0.7740876577048001, |
| "grad_norm": 1.467576265335083, |
| "learning_rate": 1.1295617114759995e-05, |
| "loss": 0.501, |
| "step": 335800 |
| }, |
| { |
| "epoch": 0.7745486985968221, |
| "grad_norm": 1.2459622621536255, |
| "learning_rate": 1.1272565070158899e-05, |
| "loss": 0.5128, |
| "step": 336000 |
| }, |
| { |
| "epoch": 0.775009739488844, |
| "grad_norm": 1.0791770219802856, |
| "learning_rate": 1.1249513025557801e-05, |
| "loss": 0.4476, |
| "step": 336200 |
| }, |
| { |
| "epoch": 0.7754707803808659, |
| "grad_norm": 1.271998643875122, |
| "learning_rate": 1.1226460980956705e-05, |
| "loss": 0.4701, |
| "step": 336400 |
| }, |
| { |
| "epoch": 0.7759318212728878, |
| "grad_norm": 1.7874670028686523, |
| "learning_rate": 1.1203408936355611e-05, |
| "loss": 0.5229, |
| "step": 336600 |
| }, |
| { |
| "epoch": 0.7763928621649098, |
| "grad_norm": 0.7723343968391418, |
| "learning_rate": 1.1180356891754515e-05, |
| "loss": 0.3966, |
| "step": 336800 |
| }, |
| { |
| "epoch": 0.7768539030569316, |
| "grad_norm": 1.4732195138931274, |
| "learning_rate": 1.1157304847153419e-05, |
| "loss": 0.4943, |
| "step": 337000 |
| }, |
| { |
| "epoch": 0.7773149439489535, |
| "grad_norm": 1.1352183818817139, |
| "learning_rate": 1.1134252802552323e-05, |
| "loss": 0.5189, |
| "step": 337200 |
| }, |
| { |
| "epoch": 0.7777759848409754, |
| "grad_norm": 1.1527478694915771, |
| "learning_rate": 1.1111200757951227e-05, |
| "loss": 0.5249, |
| "step": 337400 |
| }, |
| { |
| "epoch": 0.7782370257329974, |
| "grad_norm": 0.9301843643188477, |
| "learning_rate": 1.1088148713350131e-05, |
| "loss": 0.4648, |
| "step": 337600 |
| }, |
| { |
| "epoch": 0.7786980666250193, |
| "grad_norm": 1.1807146072387695, |
| "learning_rate": 1.1065096668749035e-05, |
| "loss": 0.4759, |
| "step": 337800 |
| }, |
| { |
| "epoch": 0.7791591075170412, |
| "grad_norm": 1.4340068101882935, |
| "learning_rate": 1.1042044624147939e-05, |
| "loss": 0.4719, |
| "step": 338000 |
| }, |
| { |
| "epoch": 0.7796201484090631, |
| "grad_norm": 1.1477597951889038, |
| "learning_rate": 1.1018992579546845e-05, |
| "loss": 0.5048, |
| "step": 338200 |
| }, |
| { |
| "epoch": 0.780081189301085, |
| "grad_norm": 1.487963318824768, |
| "learning_rate": 1.0995940534945749e-05, |
| "loss": 0.5077, |
| "step": 338400 |
| }, |
| { |
| "epoch": 0.780542230193107, |
| "grad_norm": 3.070131301879883, |
| "learning_rate": 1.0972888490344651e-05, |
| "loss": 0.4992, |
| "step": 338600 |
| }, |
| { |
| "epoch": 0.7810032710851289, |
| "grad_norm": 0.9652560949325562, |
| "learning_rate": 1.0949836445743555e-05, |
| "loss": 0.5147, |
| "step": 338800 |
| }, |
| { |
| "epoch": 0.7814643119771508, |
| "grad_norm": 1.0315585136413574, |
| "learning_rate": 1.092678440114246e-05, |
| "loss": 0.4721, |
| "step": 339000 |
| }, |
| { |
| "epoch": 0.7819253528691728, |
| "grad_norm": 1.015569806098938, |
| "learning_rate": 1.0903732356541365e-05, |
| "loss": 0.4365, |
| "step": 339200 |
| }, |
| { |
| "epoch": 0.7823863937611947, |
| "grad_norm": 0.49842461943626404, |
| "learning_rate": 1.0880680311940269e-05, |
| "loss": 0.4841, |
| "step": 339400 |
| }, |
| { |
| "epoch": 0.7828474346532166, |
| "grad_norm": 0.7842098474502563, |
| "learning_rate": 1.0857628267339173e-05, |
| "loss": 0.4589, |
| "step": 339600 |
| }, |
| { |
| "epoch": 0.7833084755452385, |
| "grad_norm": 1.2681951522827148, |
| "learning_rate": 1.0834576222738076e-05, |
| "loss": 0.4821, |
| "step": 339800 |
| }, |
| { |
| "epoch": 0.7837695164372604, |
| "grad_norm": 1.8472216129302979, |
| "learning_rate": 1.081152417813698e-05, |
| "loss": 0.4841, |
| "step": 340000 |
| }, |
| { |
| "epoch": 0.7842305573292824, |
| "grad_norm": 1.1875754594802856, |
| "learning_rate": 1.0788472133535884e-05, |
| "loss": 0.4509, |
| "step": 340200 |
| }, |
| { |
| "epoch": 0.7846915982213042, |
| "grad_norm": 1.493262529373169, |
| "learning_rate": 1.0765420088934788e-05, |
| "loss": 0.4853, |
| "step": 340400 |
| }, |
| { |
| "epoch": 0.7851526391133261, |
| "grad_norm": 1.0441592931747437, |
| "learning_rate": 1.0742368044333692e-05, |
| "loss": 0.5009, |
| "step": 340600 |
| }, |
| { |
| "epoch": 0.785613680005348, |
| "grad_norm": 1.7319620847702026, |
| "learning_rate": 1.0719315999732598e-05, |
| "loss": 0.5304, |
| "step": 340800 |
| }, |
| { |
| "epoch": 0.78607472089737, |
| "grad_norm": 1.3646876811981201, |
| "learning_rate": 1.06962639551315e-05, |
| "loss": 0.4885, |
| "step": 341000 |
| }, |
| { |
| "epoch": 0.7865357617893919, |
| "grad_norm": 1.5010404586791992, |
| "learning_rate": 1.0673211910530404e-05, |
| "loss": 0.4912, |
| "step": 341200 |
| }, |
| { |
| "epoch": 0.7869968026814138, |
| "grad_norm": 0.8283145427703857, |
| "learning_rate": 1.0650159865929308e-05, |
| "loss": 0.4941, |
| "step": 341400 |
| }, |
| { |
| "epoch": 0.7874578435734357, |
| "grad_norm": 0.6535471677780151, |
| "learning_rate": 1.0627107821328214e-05, |
| "loss": 0.522, |
| "step": 341600 |
| }, |
| { |
| "epoch": 0.7879188844654577, |
| "grad_norm": 1.1741523742675781, |
| "learning_rate": 1.0604055776727118e-05, |
| "loss": 0.5234, |
| "step": 341800 |
| }, |
| { |
| "epoch": 0.7883799253574796, |
| "grad_norm": 1.3052113056182861, |
| "learning_rate": 1.0581003732126022e-05, |
| "loss": 0.495, |
| "step": 342000 |
| }, |
| { |
| "epoch": 0.7888409662495015, |
| "grad_norm": 1.795502781867981, |
| "learning_rate": 1.0557951687524926e-05, |
| "loss": 0.4678, |
| "step": 342200 |
| }, |
| { |
| "epoch": 0.7893020071415234, |
| "grad_norm": 0.9580342769622803, |
| "learning_rate": 1.053489964292383e-05, |
| "loss": 0.5116, |
| "step": 342400 |
| }, |
| { |
| "epoch": 0.7897630480335454, |
| "grad_norm": 1.020665168762207, |
| "learning_rate": 1.0511847598322734e-05, |
| "loss": 0.4891, |
| "step": 342600 |
| }, |
| { |
| "epoch": 0.7902240889255673, |
| "grad_norm": 0.8749563694000244, |
| "learning_rate": 1.0488795553721638e-05, |
| "loss": 0.4898, |
| "step": 342800 |
| }, |
| { |
| "epoch": 0.7906851298175892, |
| "grad_norm": 0.8884357810020447, |
| "learning_rate": 1.0465743509120542e-05, |
| "loss": 0.4513, |
| "step": 343000 |
| }, |
| { |
| "epoch": 0.7911461707096111, |
| "grad_norm": 0.8629872798919678, |
| "learning_rate": 1.0442691464519446e-05, |
| "loss": 0.4825, |
| "step": 343200 |
| }, |
| { |
| "epoch": 0.791607211601633, |
| "grad_norm": 1.346708059310913, |
| "learning_rate": 1.041963941991835e-05, |
| "loss": 0.5254, |
| "step": 343400 |
| }, |
| { |
| "epoch": 0.792068252493655, |
| "grad_norm": 0.5898563265800476, |
| "learning_rate": 1.0396587375317254e-05, |
| "loss": 0.4761, |
| "step": 343600 |
| }, |
| { |
| "epoch": 0.7925292933856768, |
| "grad_norm": 0.49635791778564453, |
| "learning_rate": 1.0373535330716158e-05, |
| "loss": 0.4639, |
| "step": 343800 |
| }, |
| { |
| "epoch": 0.7929903342776987, |
| "grad_norm": 0.534585177898407, |
| "learning_rate": 1.0350483286115062e-05, |
| "loss": 0.5002, |
| "step": 344000 |
| }, |
| { |
| "epoch": 0.7934513751697206, |
| "grad_norm": 1.0430246591567993, |
| "learning_rate": 1.0327431241513967e-05, |
| "loss": 0.4492, |
| "step": 344200 |
| }, |
| { |
| "epoch": 0.7939124160617426, |
| "grad_norm": 0.9281976819038391, |
| "learning_rate": 1.0304379196912871e-05, |
| "loss": 0.4478, |
| "step": 344400 |
| }, |
| { |
| "epoch": 0.7943734569537645, |
| "grad_norm": 1.5951513051986694, |
| "learning_rate": 1.0281327152311775e-05, |
| "loss": 0.4651, |
| "step": 344600 |
| }, |
| { |
| "epoch": 0.7948344978457864, |
| "grad_norm": 1.9117207527160645, |
| "learning_rate": 1.0258275107710678e-05, |
| "loss": 0.4564, |
| "step": 344800 |
| }, |
| { |
| "epoch": 0.7952955387378083, |
| "grad_norm": 1.1856075525283813, |
| "learning_rate": 1.0235223063109583e-05, |
| "loss": 0.5218, |
| "step": 345000 |
| }, |
| { |
| "epoch": 0.7957565796298303, |
| "grad_norm": 1.4824328422546387, |
| "learning_rate": 1.0212171018508487e-05, |
| "loss": 0.5053, |
| "step": 345200 |
| }, |
| { |
| "epoch": 0.7962176205218522, |
| "grad_norm": 1.768130898475647, |
| "learning_rate": 1.0189118973907391e-05, |
| "loss": 0.5017, |
| "step": 345400 |
| }, |
| { |
| "epoch": 0.7966786614138741, |
| "grad_norm": 1.2414652109146118, |
| "learning_rate": 1.0166066929306295e-05, |
| "loss": 0.51, |
| "step": 345600 |
| }, |
| { |
| "epoch": 0.797139702305896, |
| "grad_norm": 2.830430507659912, |
| "learning_rate": 1.01430148847052e-05, |
| "loss": 0.4847, |
| "step": 345800 |
| }, |
| { |
| "epoch": 0.797600743197918, |
| "grad_norm": 1.8276104927062988, |
| "learning_rate": 1.0119962840104103e-05, |
| "loss": 0.5412, |
| "step": 346000 |
| }, |
| { |
| "epoch": 0.7980617840899399, |
| "grad_norm": 1.8435417413711548, |
| "learning_rate": 1.0096910795503007e-05, |
| "loss": 0.4832, |
| "step": 346200 |
| }, |
| { |
| "epoch": 0.7985228249819618, |
| "grad_norm": 1.2370027303695679, |
| "learning_rate": 1.0073858750901911e-05, |
| "loss": 0.437, |
| "step": 346400 |
| }, |
| { |
| "epoch": 0.7989838658739837, |
| "grad_norm": 0.6917985677719116, |
| "learning_rate": 1.0050806706300815e-05, |
| "loss": 0.5141, |
| "step": 346600 |
| }, |
| { |
| "epoch": 0.7994449067660057, |
| "grad_norm": 2.1598243713378906, |
| "learning_rate": 1.0027754661699721e-05, |
| "loss": 0.4865, |
| "step": 346800 |
| }, |
| { |
| "epoch": 0.7999059476580276, |
| "grad_norm": 1.8002493381500244, |
| "learning_rate": 1.0004702617098625e-05, |
| "loss": 0.4876, |
| "step": 347000 |
| }, |
| { |
| "epoch": 0.8003669885500494, |
| "grad_norm": 1.486546277999878, |
| "learning_rate": 9.981650572497527e-06, |
| "loss": 0.5157, |
| "step": 347200 |
| }, |
| { |
| "epoch": 0.8008280294420713, |
| "grad_norm": 1.7758817672729492, |
| "learning_rate": 9.958598527896431e-06, |
| "loss": 0.5249, |
| "step": 347400 |
| }, |
| { |
| "epoch": 0.8012890703340932, |
| "grad_norm": 0.8744950294494629, |
| "learning_rate": 9.935546483295337e-06, |
| "loss": 0.4575, |
| "step": 347600 |
| }, |
| { |
| "epoch": 0.8017501112261152, |
| "grad_norm": 1.4803967475891113, |
| "learning_rate": 9.91249443869424e-06, |
| "loss": 0.5104, |
| "step": 347800 |
| }, |
| { |
| "epoch": 0.8022111521181371, |
| "grad_norm": 2.251115560531616, |
| "learning_rate": 9.889442394093145e-06, |
| "loss": 0.471, |
| "step": 348000 |
| }, |
| { |
| "epoch": 0.802672193010159, |
| "grad_norm": 1.8598825931549072, |
| "learning_rate": 9.866390349492049e-06, |
| "loss": 0.5484, |
| "step": 348200 |
| }, |
| { |
| "epoch": 0.8031332339021809, |
| "grad_norm": 1.993989109992981, |
| "learning_rate": 9.843338304890953e-06, |
| "loss": 0.5437, |
| "step": 348400 |
| }, |
| { |
| "epoch": 0.8035942747942029, |
| "grad_norm": 1.425431251525879, |
| "learning_rate": 9.820286260289857e-06, |
| "loss": 0.4386, |
| "step": 348600 |
| }, |
| { |
| "epoch": 0.8040553156862248, |
| "grad_norm": 0.4540669620037079, |
| "learning_rate": 9.79723421568876e-06, |
| "loss": 0.4557, |
| "step": 348800 |
| }, |
| { |
| "epoch": 0.8045163565782467, |
| "grad_norm": 1.800315022468567, |
| "learning_rate": 9.774182171087665e-06, |
| "loss": 0.4771, |
| "step": 349000 |
| }, |
| { |
| "epoch": 0.8049773974702686, |
| "grad_norm": 0.8877231478691101, |
| "learning_rate": 9.75113012648657e-06, |
| "loss": 0.4811, |
| "step": 349200 |
| }, |
| { |
| "epoch": 0.8054384383622906, |
| "grad_norm": 1.3885689973831177, |
| "learning_rate": 9.728078081885474e-06, |
| "loss": 0.5492, |
| "step": 349400 |
| }, |
| { |
| "epoch": 0.8058994792543125, |
| "grad_norm": 1.6329267024993896, |
| "learning_rate": 9.705026037284377e-06, |
| "loss": 0.4933, |
| "step": 349600 |
| }, |
| { |
| "epoch": 0.8063605201463344, |
| "grad_norm": 1.2911161184310913, |
| "learning_rate": 9.68197399268328e-06, |
| "loss": 0.4724, |
| "step": 349800 |
| }, |
| { |
| "epoch": 0.8068215610383563, |
| "grad_norm": 1.7925668954849243, |
| "learning_rate": 9.658921948082185e-06, |
| "loss": 0.5562, |
| "step": 350000 |
| }, |
| { |
| "epoch": 0.8068215610383563, |
| "eval_loss": 0.48525139689445496, |
| "eval_runtime": 144.215, |
| "eval_samples_per_second": 30.385, |
| "eval_steps_per_second": 30.385, |
| "step": 350000 |
| }, |
| { |
| "epoch": 0.8072826019303783, |
| "grad_norm": 1.9523992538452148, |
| "learning_rate": 9.63586990348109e-06, |
| "loss": 0.4891, |
| "step": 350200 |
| }, |
| { |
| "epoch": 0.8077436428224002, |
| "grad_norm": 0.8594640493392944, |
| "learning_rate": 9.612817858879994e-06, |
| "loss": 0.5044, |
| "step": 350400 |
| }, |
| { |
| "epoch": 0.808204683714422, |
| "grad_norm": 0.9530147314071655, |
| "learning_rate": 9.589765814278898e-06, |
| "loss": 0.4518, |
| "step": 350600 |
| }, |
| { |
| "epoch": 0.8086657246064439, |
| "grad_norm": 1.8223358392715454, |
| "learning_rate": 9.566713769677802e-06, |
| "loss": 0.4809, |
| "step": 350800 |
| }, |
| { |
| "epoch": 0.8091267654984658, |
| "grad_norm": 4.091012477874756, |
| "learning_rate": 9.543661725076706e-06, |
| "loss": 0.4465, |
| "step": 351000 |
| }, |
| { |
| "epoch": 0.8095878063904878, |
| "grad_norm": 1.6293407678604126, |
| "learning_rate": 9.52060968047561e-06, |
| "loss": 0.4734, |
| "step": 351200 |
| }, |
| { |
| "epoch": 0.8100488472825097, |
| "grad_norm": 1.2203644514083862, |
| "learning_rate": 9.497557635874514e-06, |
| "loss": 0.5044, |
| "step": 351400 |
| }, |
| { |
| "epoch": 0.8105098881745316, |
| "grad_norm": 1.3531818389892578, |
| "learning_rate": 9.474505591273418e-06, |
| "loss": 0.4731, |
| "step": 351600 |
| }, |
| { |
| "epoch": 0.8109709290665535, |
| "grad_norm": 2.762836217880249, |
| "learning_rate": 9.451453546672324e-06, |
| "loss": 0.5298, |
| "step": 351800 |
| }, |
| { |
| "epoch": 0.8114319699585755, |
| "grad_norm": 1.708924651145935, |
| "learning_rate": 9.428401502071226e-06, |
| "loss": 0.5214, |
| "step": 352000 |
| }, |
| { |
| "epoch": 0.8118930108505974, |
| "grad_norm": 1.0070140361785889, |
| "learning_rate": 9.40534945747013e-06, |
| "loss": 0.55, |
| "step": 352200 |
| }, |
| { |
| "epoch": 0.8123540517426193, |
| "grad_norm": 1.6505459547042847, |
| "learning_rate": 9.382297412869034e-06, |
| "loss": 0.5069, |
| "step": 352400 |
| }, |
| { |
| "epoch": 0.8128150926346412, |
| "grad_norm": 1.5503573417663574, |
| "learning_rate": 9.35924536826794e-06, |
| "loss": 0.4478, |
| "step": 352600 |
| }, |
| { |
| "epoch": 0.8132761335266632, |
| "grad_norm": 1.1401780843734741, |
| "learning_rate": 9.336193323666844e-06, |
| "loss": 0.5148, |
| "step": 352800 |
| }, |
| { |
| "epoch": 0.8137371744186851, |
| "grad_norm": 1.4352729320526123, |
| "learning_rate": 9.313141279065748e-06, |
| "loss": 0.5326, |
| "step": 353000 |
| }, |
| { |
| "epoch": 0.814198215310707, |
| "grad_norm": 0.6954234838485718, |
| "learning_rate": 9.290089234464652e-06, |
| "loss": 0.5324, |
| "step": 353200 |
| }, |
| { |
| "epoch": 0.8146592562027289, |
| "grad_norm": 1.4972223043441772, |
| "learning_rate": 9.267037189863556e-06, |
| "loss": 0.4461, |
| "step": 353400 |
| }, |
| { |
| "epoch": 0.8151202970947509, |
| "grad_norm": 1.3123633861541748, |
| "learning_rate": 9.24398514526246e-06, |
| "loss": 0.4987, |
| "step": 353600 |
| }, |
| { |
| "epoch": 0.8155813379867728, |
| "grad_norm": 0.849063515663147, |
| "learning_rate": 9.220933100661364e-06, |
| "loss": 0.5218, |
| "step": 353800 |
| }, |
| { |
| "epoch": 0.8160423788787946, |
| "grad_norm": 0.5541665554046631, |
| "learning_rate": 9.197881056060268e-06, |
| "loss": 0.5017, |
| "step": 354000 |
| }, |
| { |
| "epoch": 0.8165034197708165, |
| "grad_norm": 2.253199577331543, |
| "learning_rate": 9.174829011459172e-06, |
| "loss": 0.5043, |
| "step": 354200 |
| }, |
| { |
| "epoch": 0.8169644606628385, |
| "grad_norm": 0.6903029680252075, |
| "learning_rate": 9.151776966858076e-06, |
| "loss": 0.494, |
| "step": 354400 |
| }, |
| { |
| "epoch": 0.8174255015548604, |
| "grad_norm": 0.6121809482574463, |
| "learning_rate": 9.12872492225698e-06, |
| "loss": 0.4946, |
| "step": 354600 |
| }, |
| { |
| "epoch": 0.8178865424468823, |
| "grad_norm": 1.0890499353408813, |
| "learning_rate": 9.105672877655884e-06, |
| "loss": 0.4798, |
| "step": 354800 |
| }, |
| { |
| "epoch": 0.8183475833389042, |
| "grad_norm": 1.7235876321792603, |
| "learning_rate": 9.082620833054788e-06, |
| "loss": 0.4504, |
| "step": 355000 |
| }, |
| { |
| "epoch": 0.8188086242309262, |
| "grad_norm": 0.649757444858551, |
| "learning_rate": 9.059568788453693e-06, |
| "loss": 0.5059, |
| "step": 355200 |
| }, |
| { |
| "epoch": 0.8192696651229481, |
| "grad_norm": 1.162328839302063, |
| "learning_rate": 9.036516743852597e-06, |
| "loss": 0.5443, |
| "step": 355400 |
| }, |
| { |
| "epoch": 0.81973070601497, |
| "grad_norm": 0.9448625445365906, |
| "learning_rate": 9.013464699251501e-06, |
| "loss": 0.4844, |
| "step": 355600 |
| }, |
| { |
| "epoch": 0.8201917469069919, |
| "grad_norm": 1.1881784200668335, |
| "learning_rate": 8.990412654650403e-06, |
| "loss": 0.5193, |
| "step": 355800 |
| }, |
| { |
| "epoch": 0.8206527877990138, |
| "grad_norm": 1.7445374727249146, |
| "learning_rate": 8.967360610049309e-06, |
| "loss": 0.4849, |
| "step": 356000 |
| }, |
| { |
| "epoch": 0.8211138286910358, |
| "grad_norm": 1.524045705795288, |
| "learning_rate": 8.944308565448213e-06, |
| "loss": 0.4695, |
| "step": 356200 |
| }, |
| { |
| "epoch": 0.8215748695830577, |
| "grad_norm": 1.7928262948989868, |
| "learning_rate": 8.921256520847117e-06, |
| "loss": 0.5043, |
| "step": 356400 |
| }, |
| { |
| "epoch": 0.8220359104750796, |
| "grad_norm": 1.1687183380126953, |
| "learning_rate": 8.898204476246021e-06, |
| "loss": 0.5196, |
| "step": 356600 |
| }, |
| { |
| "epoch": 0.8224969513671015, |
| "grad_norm": 0.9082534909248352, |
| "learning_rate": 8.875152431644925e-06, |
| "loss": 0.4634, |
| "step": 356800 |
| }, |
| { |
| "epoch": 0.8229579922591235, |
| "grad_norm": 1.261551022529602, |
| "learning_rate": 8.852100387043829e-06, |
| "loss": 0.4848, |
| "step": 357000 |
| }, |
| { |
| "epoch": 0.8234190331511453, |
| "grad_norm": 0.9897369146347046, |
| "learning_rate": 8.829048342442733e-06, |
| "loss": 0.4554, |
| "step": 357200 |
| }, |
| { |
| "epoch": 0.8238800740431672, |
| "grad_norm": 0.7321066856384277, |
| "learning_rate": 8.805996297841637e-06, |
| "loss": 0.4909, |
| "step": 357400 |
| }, |
| { |
| "epoch": 0.8243411149351891, |
| "grad_norm": 1.8298851251602173, |
| "learning_rate": 8.782944253240541e-06, |
| "loss": 0.4725, |
| "step": 357600 |
| }, |
| { |
| "epoch": 0.8248021558272111, |
| "grad_norm": 1.113755702972412, |
| "learning_rate": 8.759892208639447e-06, |
| "loss": 0.4988, |
| "step": 357800 |
| }, |
| { |
| "epoch": 0.825263196719233, |
| "grad_norm": 0.5906481742858887, |
| "learning_rate": 8.73684016403835e-06, |
| "loss": 0.4976, |
| "step": 358000 |
| }, |
| { |
| "epoch": 0.8257242376112549, |
| "grad_norm": 1.478716254234314, |
| "learning_rate": 8.713788119437253e-06, |
| "loss": 0.4813, |
| "step": 358200 |
| }, |
| { |
| "epoch": 0.8261852785032768, |
| "grad_norm": 1.8848345279693604, |
| "learning_rate": 8.690736074836157e-06, |
| "loss": 0.5385, |
| "step": 358400 |
| }, |
| { |
| "epoch": 0.8266463193952988, |
| "grad_norm": 2.71705961227417, |
| "learning_rate": 8.667684030235063e-06, |
| "loss": 0.4924, |
| "step": 358600 |
| }, |
| { |
| "epoch": 0.8271073602873207, |
| "grad_norm": 1.3063760995864868, |
| "learning_rate": 8.644631985633967e-06, |
| "loss": 0.5204, |
| "step": 358800 |
| }, |
| { |
| "epoch": 0.8275684011793426, |
| "grad_norm": 1.4281903505325317, |
| "learning_rate": 8.62157994103287e-06, |
| "loss": 0.4972, |
| "step": 359000 |
| }, |
| { |
| "epoch": 0.8280294420713645, |
| "grad_norm": 1.393025517463684, |
| "learning_rate": 8.598527896431775e-06, |
| "loss": 0.489, |
| "step": 359200 |
| }, |
| { |
| "epoch": 0.8284904829633865, |
| "grad_norm": 0.7618604302406311, |
| "learning_rate": 8.575475851830678e-06, |
| "loss": 0.4807, |
| "step": 359400 |
| }, |
| { |
| "epoch": 0.8289515238554084, |
| "grad_norm": 0.7368053197860718, |
| "learning_rate": 8.552423807229582e-06, |
| "loss": 0.4818, |
| "step": 359600 |
| }, |
| { |
| "epoch": 0.8294125647474303, |
| "grad_norm": 1.3130792379379272, |
| "learning_rate": 8.529371762628486e-06, |
| "loss": 0.4886, |
| "step": 359800 |
| }, |
| { |
| "epoch": 0.8298736056394522, |
| "grad_norm": 1.5593905448913574, |
| "learning_rate": 8.50631971802739e-06, |
| "loss": 0.5196, |
| "step": 360000 |
| }, |
| { |
| "epoch": 0.8303346465314742, |
| "grad_norm": 0.9520807266235352, |
| "learning_rate": 8.483267673426296e-06, |
| "loss": 0.4526, |
| "step": 360200 |
| }, |
| { |
| "epoch": 0.8307956874234961, |
| "grad_norm": 1.134156346321106, |
| "learning_rate": 8.4602156288252e-06, |
| "loss": 0.5142, |
| "step": 360400 |
| }, |
| { |
| "epoch": 0.8312567283155179, |
| "grad_norm": 0.47593235969543457, |
| "learning_rate": 8.437163584224102e-06, |
| "loss": 0.5145, |
| "step": 360600 |
| }, |
| { |
| "epoch": 0.8317177692075398, |
| "grad_norm": 1.2350735664367676, |
| "learning_rate": 8.414111539623006e-06, |
| "loss": 0.5239, |
| "step": 360800 |
| }, |
| { |
| "epoch": 0.8321788100995617, |
| "grad_norm": 1.0222281217575073, |
| "learning_rate": 8.39105949502191e-06, |
| "loss": 0.4508, |
| "step": 361000 |
| }, |
| { |
| "epoch": 0.8326398509915837, |
| "grad_norm": 1.2607372999191284, |
| "learning_rate": 8.368007450420816e-06, |
| "loss": 0.5243, |
| "step": 361200 |
| }, |
| { |
| "epoch": 0.8331008918836056, |
| "grad_norm": 1.2229344844818115, |
| "learning_rate": 8.34495540581972e-06, |
| "loss": 0.5201, |
| "step": 361400 |
| }, |
| { |
| "epoch": 0.8335619327756275, |
| "grad_norm": 1.4129853248596191, |
| "learning_rate": 8.321903361218624e-06, |
| "loss": 0.4407, |
| "step": 361600 |
| }, |
| { |
| "epoch": 0.8340229736676494, |
| "grad_norm": 0.8093553185462952, |
| "learning_rate": 8.298851316617528e-06, |
| "loss": 0.4439, |
| "step": 361800 |
| }, |
| { |
| "epoch": 0.8344840145596714, |
| "grad_norm": 0.9249831438064575, |
| "learning_rate": 8.275799272016432e-06, |
| "loss": 0.5359, |
| "step": 362000 |
| }, |
| { |
| "epoch": 0.8349450554516933, |
| "grad_norm": 1.773339867591858, |
| "learning_rate": 8.252747227415336e-06, |
| "loss": 0.4875, |
| "step": 362200 |
| }, |
| { |
| "epoch": 0.8354060963437152, |
| "grad_norm": 1.0773868560791016, |
| "learning_rate": 8.22969518281424e-06, |
| "loss": 0.4725, |
| "step": 362400 |
| }, |
| { |
| "epoch": 0.8358671372357371, |
| "grad_norm": 1.0181094408035278, |
| "learning_rate": 8.206643138213144e-06, |
| "loss": 0.4273, |
| "step": 362600 |
| }, |
| { |
| "epoch": 0.8363281781277591, |
| "grad_norm": 1.118444800376892, |
| "learning_rate": 8.18359109361205e-06, |
| "loss": 0.4468, |
| "step": 362800 |
| }, |
| { |
| "epoch": 0.836789219019781, |
| "grad_norm": 1.1972088813781738, |
| "learning_rate": 8.160539049010952e-06, |
| "loss": 0.4841, |
| "step": 363000 |
| }, |
| { |
| "epoch": 0.8372502599118029, |
| "grad_norm": 1.2389174699783325, |
| "learning_rate": 8.137487004409856e-06, |
| "loss": 0.4965, |
| "step": 363200 |
| }, |
| { |
| "epoch": 0.8377113008038248, |
| "grad_norm": 1.1917423009872437, |
| "learning_rate": 8.11443495980876e-06, |
| "loss": 0.4591, |
| "step": 363400 |
| }, |
| { |
| "epoch": 0.8381723416958468, |
| "grad_norm": 1.3053388595581055, |
| "learning_rate": 8.091382915207665e-06, |
| "loss": 0.473, |
| "step": 363600 |
| }, |
| { |
| "epoch": 0.8386333825878687, |
| "grad_norm": 1.1159336566925049, |
| "learning_rate": 8.06833087060657e-06, |
| "loss": 0.5112, |
| "step": 363800 |
| }, |
| { |
| "epoch": 0.8390944234798905, |
| "grad_norm": 1.8432027101516724, |
| "learning_rate": 8.045278826005473e-06, |
| "loss": 0.4769, |
| "step": 364000 |
| }, |
| { |
| "epoch": 0.8395554643719124, |
| "grad_norm": 1.2790404558181763, |
| "learning_rate": 8.022226781404377e-06, |
| "loss": 0.4743, |
| "step": 364200 |
| }, |
| { |
| "epoch": 0.8400165052639343, |
| "grad_norm": 1.2240092754364014, |
| "learning_rate": 7.999174736803281e-06, |
| "loss": 0.5003, |
| "step": 364400 |
| }, |
| { |
| "epoch": 0.8404775461559563, |
| "grad_norm": 1.5568150281906128, |
| "learning_rate": 7.976122692202185e-06, |
| "loss": 0.5212, |
| "step": 364600 |
| }, |
| { |
| "epoch": 0.8409385870479782, |
| "grad_norm": 1.0242736339569092, |
| "learning_rate": 7.95307064760109e-06, |
| "loss": 0.5117, |
| "step": 364800 |
| }, |
| { |
| "epoch": 0.8413996279400001, |
| "grad_norm": 1.5472807884216309, |
| "learning_rate": 7.930018602999993e-06, |
| "loss": 0.5041, |
| "step": 365000 |
| }, |
| { |
| "epoch": 0.841860668832022, |
| "grad_norm": 1.990938663482666, |
| "learning_rate": 7.906966558398897e-06, |
| "loss": 0.4807, |
| "step": 365200 |
| }, |
| { |
| "epoch": 0.842321709724044, |
| "grad_norm": 1.9151630401611328, |
| "learning_rate": 7.883914513797801e-06, |
| "loss": 0.4795, |
| "step": 365400 |
| }, |
| { |
| "epoch": 0.8427827506160659, |
| "grad_norm": 1.0808899402618408, |
| "learning_rate": 7.860862469196705e-06, |
| "loss": 0.513, |
| "step": 365600 |
| }, |
| { |
| "epoch": 0.8432437915080878, |
| "grad_norm": 0.6713162660598755, |
| "learning_rate": 7.83781042459561e-06, |
| "loss": 0.4933, |
| "step": 365800 |
| }, |
| { |
| "epoch": 0.8437048324001097, |
| "grad_norm": 1.5635173320770264, |
| "learning_rate": 7.814758379994513e-06, |
| "loss": 0.4533, |
| "step": 366000 |
| }, |
| { |
| "epoch": 0.8441658732921317, |
| "grad_norm": 0.9642801880836487, |
| "learning_rate": 7.791706335393419e-06, |
| "loss": 0.5032, |
| "step": 366200 |
| }, |
| { |
| "epoch": 0.8446269141841536, |
| "grad_norm": 2.7265806198120117, |
| "learning_rate": 7.768654290792323e-06, |
| "loss": 0.4832, |
| "step": 366400 |
| }, |
| { |
| "epoch": 0.8450879550761755, |
| "grad_norm": 1.7652499675750732, |
| "learning_rate": 7.745602246191227e-06, |
| "loss": 0.5327, |
| "step": 366600 |
| }, |
| { |
| "epoch": 0.8455489959681974, |
| "grad_norm": 1.3054319620132446, |
| "learning_rate": 7.72255020159013e-06, |
| "loss": 0.4696, |
| "step": 366800 |
| }, |
| { |
| "epoch": 0.8460100368602194, |
| "grad_norm": 1.4413760900497437, |
| "learning_rate": 7.699498156989035e-06, |
| "loss": 0.5115, |
| "step": 367000 |
| }, |
| { |
| "epoch": 0.8464710777522413, |
| "grad_norm": 1.7205134630203247, |
| "learning_rate": 7.676446112387939e-06, |
| "loss": 0.5295, |
| "step": 367200 |
| }, |
| { |
| "epoch": 0.8469321186442631, |
| "grad_norm": 0.7298296689987183, |
| "learning_rate": 7.653394067786843e-06, |
| "loss": 0.4637, |
| "step": 367400 |
| }, |
| { |
| "epoch": 0.847393159536285, |
| "grad_norm": 1.1865860223770142, |
| "learning_rate": 7.630342023185747e-06, |
| "loss": 0.4559, |
| "step": 367600 |
| }, |
| { |
| "epoch": 0.847854200428307, |
| "grad_norm": 4.188174247741699, |
| "learning_rate": 7.607289978584652e-06, |
| "loss": 0.4763, |
| "step": 367800 |
| }, |
| { |
| "epoch": 0.8483152413203289, |
| "grad_norm": 1.0749932527542114, |
| "learning_rate": 7.584237933983556e-06, |
| "loss": 0.4536, |
| "step": 368000 |
| }, |
| { |
| "epoch": 0.8487762822123508, |
| "grad_norm": 2.213075637817383, |
| "learning_rate": 7.561185889382459e-06, |
| "loss": 0.5016, |
| "step": 368200 |
| }, |
| { |
| "epoch": 0.8492373231043727, |
| "grad_norm": 2.0269930362701416, |
| "learning_rate": 7.538133844781363e-06, |
| "loss": 0.4591, |
| "step": 368400 |
| }, |
| { |
| "epoch": 0.8496983639963946, |
| "grad_norm": 1.625063180923462, |
| "learning_rate": 7.515081800180267e-06, |
| "loss": 0.4562, |
| "step": 368600 |
| }, |
| { |
| "epoch": 0.8501594048884166, |
| "grad_norm": 1.1130571365356445, |
| "learning_rate": 7.4920297555791715e-06, |
| "loss": 0.4971, |
| "step": 368800 |
| }, |
| { |
| "epoch": 0.8506204457804385, |
| "grad_norm": 1.0837411880493164, |
| "learning_rate": 7.4689777109780755e-06, |
| "loss": 0.52, |
| "step": 369000 |
| }, |
| { |
| "epoch": 0.8510814866724604, |
| "grad_norm": 1.0088603496551514, |
| "learning_rate": 7.4459256663769795e-06, |
| "loss": 0.4737, |
| "step": 369200 |
| }, |
| { |
| "epoch": 0.8515425275644823, |
| "grad_norm": 1.067406177520752, |
| "learning_rate": 7.4228736217758835e-06, |
| "loss": 0.5613, |
| "step": 369400 |
| }, |
| { |
| "epoch": 0.8520035684565043, |
| "grad_norm": 1.7936733961105347, |
| "learning_rate": 7.399821577174788e-06, |
| "loss": 0.5067, |
| "step": 369600 |
| }, |
| { |
| "epoch": 0.8524646093485262, |
| "grad_norm": 0.9910215139389038, |
| "learning_rate": 7.376769532573692e-06, |
| "loss": 0.473, |
| "step": 369800 |
| }, |
| { |
| "epoch": 0.8529256502405481, |
| "grad_norm": 1.1868542432785034, |
| "learning_rate": 7.353717487972596e-06, |
| "loss": 0.4962, |
| "step": 370000 |
| }, |
| { |
| "epoch": 0.85338669113257, |
| "grad_norm": 0.6035569906234741, |
| "learning_rate": 7.330665443371499e-06, |
| "loss": 0.5243, |
| "step": 370200 |
| }, |
| { |
| "epoch": 0.853847732024592, |
| "grad_norm": 1.3407708406448364, |
| "learning_rate": 7.307613398770405e-06, |
| "loss": 0.445, |
| "step": 370400 |
| }, |
| { |
| "epoch": 0.8543087729166139, |
| "grad_norm": 2.3808753490448, |
| "learning_rate": 7.284561354169308e-06, |
| "loss": 0.4929, |
| "step": 370600 |
| }, |
| { |
| "epoch": 0.8547698138086357, |
| "grad_norm": 1.6823943853378296, |
| "learning_rate": 7.261509309568212e-06, |
| "loss": 0.5004, |
| "step": 370800 |
| }, |
| { |
| "epoch": 0.8552308547006576, |
| "grad_norm": 0.6995494365692139, |
| "learning_rate": 7.238457264967116e-06, |
| "loss": 0.5143, |
| "step": 371000 |
| }, |
| { |
| "epoch": 0.8556918955926796, |
| "grad_norm": 0.914682924747467, |
| "learning_rate": 7.215405220366021e-06, |
| "loss": 0.4528, |
| "step": 371200 |
| }, |
| { |
| "epoch": 0.8561529364847015, |
| "grad_norm": 0.5527245402336121, |
| "learning_rate": 7.192353175764925e-06, |
| "loss": 0.5004, |
| "step": 371400 |
| }, |
| { |
| "epoch": 0.8566139773767234, |
| "grad_norm": 1.3169046640396118, |
| "learning_rate": 7.169301131163829e-06, |
| "loss": 0.4956, |
| "step": 371600 |
| }, |
| { |
| "epoch": 0.8570750182687453, |
| "grad_norm": 1.4355896711349487, |
| "learning_rate": 7.146249086562733e-06, |
| "loss": 0.4683, |
| "step": 371800 |
| }, |
| { |
| "epoch": 0.8575360591607673, |
| "grad_norm": 1.7638542652130127, |
| "learning_rate": 7.123197041961636e-06, |
| "loss": 0.4969, |
| "step": 372000 |
| }, |
| { |
| "epoch": 0.8579971000527892, |
| "grad_norm": 0.9192449450492859, |
| "learning_rate": 7.100144997360542e-06, |
| "loss": 0.5414, |
| "step": 372200 |
| }, |
| { |
| "epoch": 0.8584581409448111, |
| "grad_norm": 0.7934924960136414, |
| "learning_rate": 7.077092952759446e-06, |
| "loss": 0.4668, |
| "step": 372400 |
| }, |
| { |
| "epoch": 0.858919181836833, |
| "grad_norm": 1.7283356189727783, |
| "learning_rate": 7.054040908158349e-06, |
| "loss": 0.4944, |
| "step": 372600 |
| }, |
| { |
| "epoch": 0.859380222728855, |
| "grad_norm": 0.7687679529190063, |
| "learning_rate": 7.030988863557253e-06, |
| "loss": 0.51, |
| "step": 372800 |
| }, |
| { |
| "epoch": 0.8598412636208769, |
| "grad_norm": 1.0831148624420166, |
| "learning_rate": 7.0079368189561585e-06, |
| "loss": 0.531, |
| "step": 373000 |
| }, |
| { |
| "epoch": 0.8603023045128988, |
| "grad_norm": 1.0071626901626587, |
| "learning_rate": 6.984884774355062e-06, |
| "loss": 0.4789, |
| "step": 373200 |
| }, |
| { |
| "epoch": 0.8607633454049207, |
| "grad_norm": 0.7966915369033813, |
| "learning_rate": 6.961832729753966e-06, |
| "loss": 0.4438, |
| "step": 373400 |
| }, |
| { |
| "epoch": 0.8612243862969426, |
| "grad_norm": 0.544999897480011, |
| "learning_rate": 6.93878068515287e-06, |
| "loss": 0.5262, |
| "step": 373600 |
| }, |
| { |
| "epoch": 0.8616854271889646, |
| "grad_norm": 1.592140555381775, |
| "learning_rate": 6.9157286405517745e-06, |
| "loss": 0.5089, |
| "step": 373800 |
| }, |
| { |
| "epoch": 0.8621464680809865, |
| "grad_norm": 1.578158974647522, |
| "learning_rate": 6.8926765959506784e-06, |
| "loss": 0.5181, |
| "step": 374000 |
| }, |
| { |
| "epoch": 0.8626075089730083, |
| "grad_norm": 1.4605205059051514, |
| "learning_rate": 6.869624551349582e-06, |
| "loss": 0.5081, |
| "step": 374200 |
| }, |
| { |
| "epoch": 0.8630685498650302, |
| "grad_norm": 2.263418436050415, |
| "learning_rate": 6.8465725067484856e-06, |
| "loss": 0.467, |
| "step": 374400 |
| }, |
| { |
| "epoch": 0.8635295907570522, |
| "grad_norm": 1.5185531377792358, |
| "learning_rate": 6.823520462147391e-06, |
| "loss": 0.5348, |
| "step": 374600 |
| }, |
| { |
| "epoch": 0.8639906316490741, |
| "grad_norm": 1.1345553398132324, |
| "learning_rate": 6.800468417546295e-06, |
| "loss": 0.4811, |
| "step": 374800 |
| }, |
| { |
| "epoch": 0.864451672541096, |
| "grad_norm": 1.926391363143921, |
| "learning_rate": 6.777416372945198e-06, |
| "loss": 0.5368, |
| "step": 375000 |
| }, |
| { |
| "epoch": 0.8649127134331179, |
| "grad_norm": 0.6592217087745667, |
| "learning_rate": 6.754364328344102e-06, |
| "loss": 0.4902, |
| "step": 375200 |
| }, |
| { |
| "epoch": 0.8653737543251399, |
| "grad_norm": 1.7800625562667847, |
| "learning_rate": 6.731312283743008e-06, |
| "loss": 0.4957, |
| "step": 375400 |
| }, |
| { |
| "epoch": 0.8658347952171618, |
| "grad_norm": 0.7634375095367432, |
| "learning_rate": 6.708260239141911e-06, |
| "loss": 0.4961, |
| "step": 375600 |
| }, |
| { |
| "epoch": 0.8662958361091837, |
| "grad_norm": 1.417075514793396, |
| "learning_rate": 6.685208194540815e-06, |
| "loss": 0.4946, |
| "step": 375800 |
| }, |
| { |
| "epoch": 0.8667568770012056, |
| "grad_norm": 1.4515326023101807, |
| "learning_rate": 6.662156149939719e-06, |
| "loss": 0.4915, |
| "step": 376000 |
| }, |
| { |
| "epoch": 0.8672179178932276, |
| "grad_norm": 0.6862966418266296, |
| "learning_rate": 6.639104105338623e-06, |
| "loss": 0.4808, |
| "step": 376200 |
| }, |
| { |
| "epoch": 0.8676789587852495, |
| "grad_norm": 1.4989879131317139, |
| "learning_rate": 6.616052060737528e-06, |
| "loss": 0.4978, |
| "step": 376400 |
| }, |
| { |
| "epoch": 0.8681399996772714, |
| "grad_norm": 1.7666966915130615, |
| "learning_rate": 6.593000016136432e-06, |
| "loss": 0.4961, |
| "step": 376600 |
| }, |
| { |
| "epoch": 0.8686010405692933, |
| "grad_norm": 1.286030888557434, |
| "learning_rate": 6.569947971535335e-06, |
| "loss": 0.4874, |
| "step": 376800 |
| }, |
| { |
| "epoch": 0.8690620814613153, |
| "grad_norm": 1.1866004467010498, |
| "learning_rate": 6.546895926934239e-06, |
| "loss": 0.488, |
| "step": 377000 |
| }, |
| { |
| "epoch": 0.8695231223533372, |
| "grad_norm": 1.996006965637207, |
| "learning_rate": 6.523843882333145e-06, |
| "loss": 0.4987, |
| "step": 377200 |
| }, |
| { |
| "epoch": 0.8699841632453591, |
| "grad_norm": 3.1626696586608887, |
| "learning_rate": 6.500791837732048e-06, |
| "loss": 0.4398, |
| "step": 377400 |
| }, |
| { |
| "epoch": 0.8704452041373809, |
| "grad_norm": 2.171281337738037, |
| "learning_rate": 6.477739793130952e-06, |
| "loss": 0.4559, |
| "step": 377600 |
| }, |
| { |
| "epoch": 0.8709062450294028, |
| "grad_norm": 1.2836635112762451, |
| "learning_rate": 6.454687748529856e-06, |
| "loss": 0.4576, |
| "step": 377800 |
| }, |
| { |
| "epoch": 0.8713672859214248, |
| "grad_norm": 4.639097213745117, |
| "learning_rate": 6.431635703928761e-06, |
| "loss": 0.4969, |
| "step": 378000 |
| }, |
| { |
| "epoch": 0.8718283268134467, |
| "grad_norm": 1.5262006521224976, |
| "learning_rate": 6.408583659327665e-06, |
| "loss": 0.4538, |
| "step": 378200 |
| }, |
| { |
| "epoch": 0.8722893677054686, |
| "grad_norm": 2.327629566192627, |
| "learning_rate": 6.3855316147265686e-06, |
| "loss": 0.4854, |
| "step": 378400 |
| }, |
| { |
| "epoch": 0.8727504085974905, |
| "grad_norm": 2.366154909133911, |
| "learning_rate": 6.3624795701254725e-06, |
| "loss": 0.5611, |
| "step": 378600 |
| }, |
| { |
| "epoch": 0.8732114494895125, |
| "grad_norm": 1.4881547689437866, |
| "learning_rate": 6.339427525524377e-06, |
| "loss": 0.5075, |
| "step": 378800 |
| }, |
| { |
| "epoch": 0.8736724903815344, |
| "grad_norm": 1.2280333042144775, |
| "learning_rate": 6.316375480923281e-06, |
| "loss": 0.5045, |
| "step": 379000 |
| }, |
| { |
| "epoch": 0.8741335312735563, |
| "grad_norm": 4.236263751983643, |
| "learning_rate": 6.2933234363221845e-06, |
| "loss": 0.4608, |
| "step": 379200 |
| }, |
| { |
| "epoch": 0.8745945721655782, |
| "grad_norm": 1.3050642013549805, |
| "learning_rate": 6.2702713917210885e-06, |
| "loss": 0.4287, |
| "step": 379400 |
| }, |
| { |
| "epoch": 0.8750556130576002, |
| "grad_norm": 2.5533287525177, |
| "learning_rate": 6.247219347119993e-06, |
| "loss": 0.5086, |
| "step": 379600 |
| }, |
| { |
| "epoch": 0.8755166539496221, |
| "grad_norm": 1.513671875, |
| "learning_rate": 6.224167302518897e-06, |
| "loss": 0.4632, |
| "step": 379800 |
| }, |
| { |
| "epoch": 0.875977694841644, |
| "grad_norm": 1.573878288269043, |
| "learning_rate": 6.201115257917801e-06, |
| "loss": 0.4616, |
| "step": 380000 |
| }, |
| { |
| "epoch": 0.8764387357336659, |
| "grad_norm": 1.5972181558609009, |
| "learning_rate": 6.178063213316705e-06, |
| "loss": 0.5206, |
| "step": 380200 |
| }, |
| { |
| "epoch": 0.8768997766256879, |
| "grad_norm": 1.171190857887268, |
| "learning_rate": 6.15501116871561e-06, |
| "loss": 0.4504, |
| "step": 380400 |
| }, |
| { |
| "epoch": 0.8773608175177098, |
| "grad_norm": 2.334261178970337, |
| "learning_rate": 6.131959124114513e-06, |
| "loss": 0.4412, |
| "step": 380600 |
| }, |
| { |
| "epoch": 0.8778218584097317, |
| "grad_norm": 1.540120005607605, |
| "learning_rate": 6.108907079513418e-06, |
| "loss": 0.4997, |
| "step": 380800 |
| }, |
| { |
| "epoch": 0.8782828993017535, |
| "grad_norm": 1.3362219333648682, |
| "learning_rate": 6.085855034912322e-06, |
| "loss": 0.4924, |
| "step": 381000 |
| }, |
| { |
| "epoch": 0.8787439401937754, |
| "grad_norm": 1.4779139757156372, |
| "learning_rate": 6.062802990311226e-06, |
| "loss": 0.5206, |
| "step": 381200 |
| }, |
| { |
| "epoch": 0.8792049810857974, |
| "grad_norm": 2.28874135017395, |
| "learning_rate": 6.03975094571013e-06, |
| "loss": 0.4775, |
| "step": 381400 |
| }, |
| { |
| "epoch": 0.8796660219778193, |
| "grad_norm": 0.9095715880393982, |
| "learning_rate": 6.016698901109035e-06, |
| "loss": 0.452, |
| "step": 381600 |
| }, |
| { |
| "epoch": 0.8801270628698412, |
| "grad_norm": 2.00390887260437, |
| "learning_rate": 5.993646856507938e-06, |
| "loss": 0.5132, |
| "step": 381800 |
| }, |
| { |
| "epoch": 0.8805881037618631, |
| "grad_norm": 1.8259698152542114, |
| "learning_rate": 5.970594811906843e-06, |
| "loss": 0.4957, |
| "step": 382000 |
| }, |
| { |
| "epoch": 0.8810491446538851, |
| "grad_norm": 1.8643205165863037, |
| "learning_rate": 5.947542767305747e-06, |
| "loss": 0.4866, |
| "step": 382200 |
| }, |
| { |
| "epoch": 0.881510185545907, |
| "grad_norm": 1.181175708770752, |
| "learning_rate": 5.924490722704651e-06, |
| "loss": 0.4836, |
| "step": 382400 |
| }, |
| { |
| "epoch": 0.8819712264379289, |
| "grad_norm": 0.8782649040222168, |
| "learning_rate": 5.901438678103555e-06, |
| "loss": 0.4407, |
| "step": 382600 |
| }, |
| { |
| "epoch": 0.8824322673299508, |
| "grad_norm": 0.6341625452041626, |
| "learning_rate": 5.878386633502459e-06, |
| "loss": 0.4339, |
| "step": 382800 |
| }, |
| { |
| "epoch": 0.8828933082219728, |
| "grad_norm": 1.5220824480056763, |
| "learning_rate": 5.855334588901363e-06, |
| "loss": 0.4594, |
| "step": 383000 |
| }, |
| { |
| "epoch": 0.8833543491139947, |
| "grad_norm": 1.0653526782989502, |
| "learning_rate": 5.832282544300267e-06, |
| "loss": 0.4825, |
| "step": 383200 |
| }, |
| { |
| "epoch": 0.8838153900060166, |
| "grad_norm": 2.4460973739624023, |
| "learning_rate": 5.8092304996991715e-06, |
| "loss": 0.4756, |
| "step": 383400 |
| }, |
| { |
| "epoch": 0.8842764308980385, |
| "grad_norm": 2.1946046352386475, |
| "learning_rate": 5.786178455098075e-06, |
| "loss": 0.4698, |
| "step": 383600 |
| }, |
| { |
| "epoch": 0.8847374717900605, |
| "grad_norm": 0.8791565895080566, |
| "learning_rate": 5.7631264104969794e-06, |
| "loss": 0.4769, |
| "step": 383800 |
| }, |
| { |
| "epoch": 0.8851985126820824, |
| "grad_norm": 1.2844878435134888, |
| "learning_rate": 5.7400743658958834e-06, |
| "loss": 0.5021, |
| "step": 384000 |
| }, |
| { |
| "epoch": 0.8856595535741043, |
| "grad_norm": 1.0738441944122314, |
| "learning_rate": 5.717022321294787e-06, |
| "loss": 0.4685, |
| "step": 384200 |
| }, |
| { |
| "epoch": 0.8861205944661261, |
| "grad_norm": 1.2508662939071655, |
| "learning_rate": 5.693970276693691e-06, |
| "loss": 0.4716, |
| "step": 384400 |
| }, |
| { |
| "epoch": 0.886581635358148, |
| "grad_norm": 0.8982871174812317, |
| "learning_rate": 5.670918232092596e-06, |
| "loss": 0.509, |
| "step": 384600 |
| }, |
| { |
| "epoch": 0.88704267625017, |
| "grad_norm": 0.6702489852905273, |
| "learning_rate": 5.647866187491499e-06, |
| "loss": 0.5054, |
| "step": 384800 |
| }, |
| { |
| "epoch": 0.8875037171421919, |
| "grad_norm": 0.8494447469711304, |
| "learning_rate": 5.624814142890404e-06, |
| "loss": 0.4878, |
| "step": 385000 |
| }, |
| { |
| "epoch": 0.8879647580342138, |
| "grad_norm": 0.8795982599258423, |
| "learning_rate": 5.601762098289308e-06, |
| "loss": 0.4457, |
| "step": 385200 |
| }, |
| { |
| "epoch": 0.8884257989262357, |
| "grad_norm": 1.1568052768707275, |
| "learning_rate": 5.578710053688212e-06, |
| "loss": 0.4967, |
| "step": 385400 |
| }, |
| { |
| "epoch": 0.8888868398182577, |
| "grad_norm": 0.8400896191596985, |
| "learning_rate": 5.555658009087116e-06, |
| "loss": 0.4741, |
| "step": 385600 |
| }, |
| { |
| "epoch": 0.8893478807102796, |
| "grad_norm": 2.020911693572998, |
| "learning_rate": 5.532605964486021e-06, |
| "loss": 0.4592, |
| "step": 385800 |
| }, |
| { |
| "epoch": 0.8898089216023015, |
| "grad_norm": 1.735339641571045, |
| "learning_rate": 5.509553919884924e-06, |
| "loss": 0.4947, |
| "step": 386000 |
| }, |
| { |
| "epoch": 0.8902699624943234, |
| "grad_norm": 1.34779953956604, |
| "learning_rate": 5.486501875283829e-06, |
| "loss": 0.5204, |
| "step": 386200 |
| }, |
| { |
| "epoch": 0.8907310033863454, |
| "grad_norm": 1.5430375337600708, |
| "learning_rate": 5.463449830682733e-06, |
| "loss": 0.4742, |
| "step": 386400 |
| }, |
| { |
| "epoch": 0.8911920442783673, |
| "grad_norm": 1.2541803121566772, |
| "learning_rate": 5.440397786081636e-06, |
| "loss": 0.4613, |
| "step": 386600 |
| }, |
| { |
| "epoch": 0.8916530851703892, |
| "grad_norm": 1.007149577140808, |
| "learning_rate": 5.417345741480541e-06, |
| "loss": 0.4892, |
| "step": 386800 |
| }, |
| { |
| "epoch": 0.8921141260624111, |
| "grad_norm": 0.808237612247467, |
| "learning_rate": 5.394293696879445e-06, |
| "loss": 0.5208, |
| "step": 387000 |
| }, |
| { |
| "epoch": 0.8925751669544331, |
| "grad_norm": 0.8351776599884033, |
| "learning_rate": 5.371241652278349e-06, |
| "loss": 0.4799, |
| "step": 387200 |
| }, |
| { |
| "epoch": 0.893036207846455, |
| "grad_norm": 1.6771140098571777, |
| "learning_rate": 5.348189607677253e-06, |
| "loss": 0.4752, |
| "step": 387400 |
| }, |
| { |
| "epoch": 0.8934972487384769, |
| "grad_norm": 0.966846227645874, |
| "learning_rate": 5.325137563076158e-06, |
| "loss": 0.4927, |
| "step": 387600 |
| }, |
| { |
| "epoch": 0.8939582896304987, |
| "grad_norm": 1.701539397239685, |
| "learning_rate": 5.302085518475061e-06, |
| "loss": 0.524, |
| "step": 387800 |
| }, |
| { |
| "epoch": 0.8944193305225207, |
| "grad_norm": 0.4714783728122711, |
| "learning_rate": 5.279033473873966e-06, |
| "loss": 0.499, |
| "step": 388000 |
| }, |
| { |
| "epoch": 0.8948803714145426, |
| "grad_norm": 1.1191890239715576, |
| "learning_rate": 5.2559814292728696e-06, |
| "loss": 0.5277, |
| "step": 388200 |
| }, |
| { |
| "epoch": 0.8953414123065645, |
| "grad_norm": 1.3981695175170898, |
| "learning_rate": 5.2329293846717736e-06, |
| "loss": 0.4648, |
| "step": 388400 |
| }, |
| { |
| "epoch": 0.8958024531985864, |
| "grad_norm": 1.0515044927597046, |
| "learning_rate": 5.2098773400706775e-06, |
| "loss": 0.4638, |
| "step": 388600 |
| }, |
| { |
| "epoch": 0.8962634940906083, |
| "grad_norm": 0.9398881196975708, |
| "learning_rate": 5.186825295469582e-06, |
| "loss": 0.4993, |
| "step": 388800 |
| }, |
| { |
| "epoch": 0.8967245349826303, |
| "grad_norm": 0.9516793489456177, |
| "learning_rate": 5.163773250868486e-06, |
| "loss": 0.502, |
| "step": 389000 |
| }, |
| { |
| "epoch": 0.8971855758746522, |
| "grad_norm": 2.8746252059936523, |
| "learning_rate": 5.14072120626739e-06, |
| "loss": 0.4688, |
| "step": 389200 |
| }, |
| { |
| "epoch": 0.8976466167666741, |
| "grad_norm": 0.9749366641044617, |
| "learning_rate": 5.117669161666294e-06, |
| "loss": 0.4976, |
| "step": 389400 |
| }, |
| { |
| "epoch": 0.898107657658696, |
| "grad_norm": 1.4214197397232056, |
| "learning_rate": 5.094617117065198e-06, |
| "loss": 0.5034, |
| "step": 389600 |
| }, |
| { |
| "epoch": 0.898568698550718, |
| "grad_norm": 1.5979713201522827, |
| "learning_rate": 5.071565072464102e-06, |
| "loss": 0.4687, |
| "step": 389800 |
| }, |
| { |
| "epoch": 0.8990297394427399, |
| "grad_norm": 1.1002912521362305, |
| "learning_rate": 5.048513027863007e-06, |
| "loss": 0.4935, |
| "step": 390000 |
| }, |
| { |
| "epoch": 0.8994907803347618, |
| "grad_norm": 1.4925017356872559, |
| "learning_rate": 5.025460983261911e-06, |
| "loss": 0.4793, |
| "step": 390200 |
| }, |
| { |
| "epoch": 0.8999518212267837, |
| "grad_norm": 1.721877932548523, |
| "learning_rate": 5.002408938660814e-06, |
| "loss": 0.4216, |
| "step": 390400 |
| }, |
| { |
| "epoch": 0.9004128621188057, |
| "grad_norm": 1.0198794603347778, |
| "learning_rate": 4.979356894059719e-06, |
| "loss": 0.5106, |
| "step": 390600 |
| }, |
| { |
| "epoch": 0.9008739030108276, |
| "grad_norm": 1.2488328218460083, |
| "learning_rate": 4.956304849458623e-06, |
| "loss": 0.4419, |
| "step": 390800 |
| }, |
| { |
| "epoch": 0.9013349439028495, |
| "grad_norm": 1.1686707735061646, |
| "learning_rate": 4.933252804857527e-06, |
| "loss": 0.5004, |
| "step": 391000 |
| }, |
| { |
| "epoch": 0.9017959847948713, |
| "grad_norm": 2.1322028636932373, |
| "learning_rate": 4.910200760256431e-06, |
| "loss": 0.449, |
| "step": 391200 |
| }, |
| { |
| "epoch": 0.9022570256868933, |
| "grad_norm": 1.7106928825378418, |
| "learning_rate": 4.887148715655336e-06, |
| "loss": 0.4574, |
| "step": 391400 |
| }, |
| { |
| "epoch": 0.9027180665789152, |
| "grad_norm": 0.9263075590133667, |
| "learning_rate": 4.864096671054239e-06, |
| "loss": 0.4774, |
| "step": 391600 |
| }, |
| { |
| "epoch": 0.9031791074709371, |
| "grad_norm": 1.4855661392211914, |
| "learning_rate": 4.841044626453144e-06, |
| "loss": 0.4855, |
| "step": 391800 |
| }, |
| { |
| "epoch": 0.903640148362959, |
| "grad_norm": 1.2408193349838257, |
| "learning_rate": 4.817992581852048e-06, |
| "loss": 0.5039, |
| "step": 392000 |
| }, |
| { |
| "epoch": 0.904101189254981, |
| "grad_norm": 0.6845735311508179, |
| "learning_rate": 4.794940537250952e-06, |
| "loss": 0.5082, |
| "step": 392200 |
| }, |
| { |
| "epoch": 0.9045622301470029, |
| "grad_norm": 1.4098901748657227, |
| "learning_rate": 4.771888492649856e-06, |
| "loss": 0.5194, |
| "step": 392400 |
| }, |
| { |
| "epoch": 0.9050232710390248, |
| "grad_norm": 1.3616442680358887, |
| "learning_rate": 4.7488364480487605e-06, |
| "loss": 0.5032, |
| "step": 392600 |
| }, |
| { |
| "epoch": 0.9054843119310467, |
| "grad_norm": 1.0427989959716797, |
| "learning_rate": 4.725784403447664e-06, |
| "loss": 0.4969, |
| "step": 392800 |
| }, |
| { |
| "epoch": 0.9059453528230687, |
| "grad_norm": 1.2512778043746948, |
| "learning_rate": 4.7027323588465685e-06, |
| "loss": 0.4729, |
| "step": 393000 |
| }, |
| { |
| "epoch": 0.9064063937150906, |
| "grad_norm": 1.1229169368743896, |
| "learning_rate": 4.6796803142454725e-06, |
| "loss": 0.5122, |
| "step": 393200 |
| }, |
| { |
| "epoch": 0.9068674346071125, |
| "grad_norm": 1.5654805898666382, |
| "learning_rate": 4.6566282696443765e-06, |
| "loss": 0.4615, |
| "step": 393400 |
| }, |
| { |
| "epoch": 0.9073284754991344, |
| "grad_norm": 0.5866159796714783, |
| "learning_rate": 4.6335762250432805e-06, |
| "loss": 0.4916, |
| "step": 393600 |
| }, |
| { |
| "epoch": 0.9077895163911563, |
| "grad_norm": 1.5592825412750244, |
| "learning_rate": 4.6105241804421844e-06, |
| "loss": 0.4996, |
| "step": 393800 |
| }, |
| { |
| "epoch": 0.9082505572831783, |
| "grad_norm": 0.9208193421363831, |
| "learning_rate": 4.587472135841088e-06, |
| "loss": 0.4957, |
| "step": 394000 |
| }, |
| { |
| "epoch": 0.9087115981752002, |
| "grad_norm": 1.1779547929763794, |
| "learning_rate": 4.564420091239992e-06, |
| "loss": 0.4633, |
| "step": 394200 |
| }, |
| { |
| "epoch": 0.9091726390672221, |
| "grad_norm": 1.6727235317230225, |
| "learning_rate": 4.541368046638897e-06, |
| "loss": 0.4862, |
| "step": 394400 |
| }, |
| { |
| "epoch": 0.9096336799592439, |
| "grad_norm": 1.5873490571975708, |
| "learning_rate": 4.5183160020378e-06, |
| "loss": 0.4517, |
| "step": 394600 |
| }, |
| { |
| "epoch": 0.9100947208512659, |
| "grad_norm": 1.310510277748108, |
| "learning_rate": 4.495263957436705e-06, |
| "loss": 0.5331, |
| "step": 394800 |
| }, |
| { |
| "epoch": 0.9105557617432878, |
| "grad_norm": 0.5663114190101624, |
| "learning_rate": 4.472211912835609e-06, |
| "loss": 0.4823, |
| "step": 395000 |
| }, |
| { |
| "epoch": 0.9110168026353097, |
| "grad_norm": 1.231022596359253, |
| "learning_rate": 4.449159868234513e-06, |
| "loss": 0.4842, |
| "step": 395200 |
| }, |
| { |
| "epoch": 0.9114778435273316, |
| "grad_norm": 1.241389274597168, |
| "learning_rate": 4.426107823633417e-06, |
| "loss": 0.4604, |
| "step": 395400 |
| }, |
| { |
| "epoch": 0.9119388844193536, |
| "grad_norm": 0.6958038210868835, |
| "learning_rate": 4.403055779032322e-06, |
| "loss": 0.4894, |
| "step": 395600 |
| }, |
| { |
| "epoch": 0.9123999253113755, |
| "grad_norm": 0.7018533945083618, |
| "learning_rate": 4.380003734431225e-06, |
| "loss": 0.4328, |
| "step": 395800 |
| }, |
| { |
| "epoch": 0.9128609662033974, |
| "grad_norm": 1.3242965936660767, |
| "learning_rate": 4.35695168983013e-06, |
| "loss": 0.4759, |
| "step": 396000 |
| }, |
| { |
| "epoch": 0.9133220070954193, |
| "grad_norm": 1.1554487943649292, |
| "learning_rate": 4.333899645229034e-06, |
| "loss": 0.5244, |
| "step": 396200 |
| }, |
| { |
| "epoch": 0.9137830479874413, |
| "grad_norm": 1.8800641298294067, |
| "learning_rate": 4.310847600627938e-06, |
| "loss": 0.4965, |
| "step": 396400 |
| }, |
| { |
| "epoch": 0.9142440888794632, |
| "grad_norm": 4.930298328399658, |
| "learning_rate": 4.287795556026842e-06, |
| "loss": 0.4721, |
| "step": 396600 |
| }, |
| { |
| "epoch": 0.9147051297714851, |
| "grad_norm": 1.5765228271484375, |
| "learning_rate": 4.264743511425747e-06, |
| "loss": 0.5089, |
| "step": 396800 |
| }, |
| { |
| "epoch": 0.915166170663507, |
| "grad_norm": 0.436431348323822, |
| "learning_rate": 4.24169146682465e-06, |
| "loss": 0.4522, |
| "step": 397000 |
| }, |
| { |
| "epoch": 0.915627211555529, |
| "grad_norm": 1.2564866542816162, |
| "learning_rate": 4.218639422223555e-06, |
| "loss": 0.4901, |
| "step": 397200 |
| }, |
| { |
| "epoch": 0.9160882524475509, |
| "grad_norm": 1.2301688194274902, |
| "learning_rate": 4.195587377622459e-06, |
| "loss": 0.4858, |
| "step": 397400 |
| }, |
| { |
| "epoch": 0.9165492933395728, |
| "grad_norm": 3.086254358291626, |
| "learning_rate": 4.172535333021363e-06, |
| "loss": 0.4907, |
| "step": 397600 |
| }, |
| { |
| "epoch": 0.9170103342315947, |
| "grad_norm": 1.3928074836730957, |
| "learning_rate": 4.149483288420267e-06, |
| "loss": 0.5467, |
| "step": 397800 |
| }, |
| { |
| "epoch": 0.9174713751236165, |
| "grad_norm": 0.784092366695404, |
| "learning_rate": 4.126431243819171e-06, |
| "loss": 0.4641, |
| "step": 398000 |
| }, |
| { |
| "epoch": 0.9179324160156385, |
| "grad_norm": 0.6679478287696838, |
| "learning_rate": 4.1033791992180746e-06, |
| "loss": 0.5061, |
| "step": 398200 |
| }, |
| { |
| "epoch": 0.9183934569076604, |
| "grad_norm": 1.8378559350967407, |
| "learning_rate": 4.0803271546169785e-06, |
| "loss": 0.4525, |
| "step": 398400 |
| }, |
| { |
| "epoch": 0.9188544977996823, |
| "grad_norm": 1.1620184183120728, |
| "learning_rate": 4.057275110015883e-06, |
| "loss": 0.561, |
| "step": 398600 |
| }, |
| { |
| "epoch": 0.9193155386917042, |
| "grad_norm": 1.5776022672653198, |
| "learning_rate": 4.034223065414787e-06, |
| "loss": 0.4991, |
| "step": 398800 |
| }, |
| { |
| "epoch": 0.9197765795837262, |
| "grad_norm": 1.4153554439544678, |
| "learning_rate": 4.011171020813691e-06, |
| "loss": 0.5109, |
| "step": 399000 |
| }, |
| { |
| "epoch": 0.9202376204757481, |
| "grad_norm": 1.5178178548812866, |
| "learning_rate": 3.988118976212595e-06, |
| "loss": 0.4836, |
| "step": 399200 |
| }, |
| { |
| "epoch": 0.92069866136777, |
| "grad_norm": 0.7913076281547546, |
| "learning_rate": 3.965066931611499e-06, |
| "loss": 0.4699, |
| "step": 399400 |
| }, |
| { |
| "epoch": 0.9211597022597919, |
| "grad_norm": 0.7987996339797974, |
| "learning_rate": 3.942014887010403e-06, |
| "loss": 0.5013, |
| "step": 399600 |
| }, |
| { |
| "epoch": 0.9216207431518139, |
| "grad_norm": 0.8386745452880859, |
| "learning_rate": 3.918962842409308e-06, |
| "loss": 0.4956, |
| "step": 399800 |
| }, |
| { |
| "epoch": 0.9220817840438358, |
| "grad_norm": 0.8059350252151489, |
| "learning_rate": 3.895910797808212e-06, |
| "loss": 0.502, |
| "step": 400000 |
| }, |
| { |
| "epoch": 0.9220817840438358, |
| "eval_loss": 0.4786904454231262, |
| "eval_runtime": 144.2001, |
| "eval_samples_per_second": 30.388, |
| "eval_steps_per_second": 30.388, |
| "step": 400000 |
| }, |
| { |
| "epoch": 0.9225428249358577, |
| "grad_norm": 2.3382468223571777, |
| "learning_rate": 3.872858753207116e-06, |
| "loss": 0.5051, |
| "step": 400200 |
| }, |
| { |
| "epoch": 0.9230038658278796, |
| "grad_norm": 0.6655104756355286, |
| "learning_rate": 3.84980670860602e-06, |
| "loss": 0.4295, |
| "step": 400400 |
| }, |
| { |
| "epoch": 0.9234649067199016, |
| "grad_norm": 2.225646734237671, |
| "learning_rate": 3.826754664004924e-06, |
| "loss": 0.4922, |
| "step": 400600 |
| }, |
| { |
| "epoch": 0.9239259476119235, |
| "grad_norm": 1.4331623315811157, |
| "learning_rate": 3.803702619403828e-06, |
| "loss": 0.485, |
| "step": 400800 |
| }, |
| { |
| "epoch": 0.9243869885039454, |
| "grad_norm": 1.335250973701477, |
| "learning_rate": 3.780650574802732e-06, |
| "loss": 0.5001, |
| "step": 401000 |
| }, |
| { |
| "epoch": 0.9248480293959673, |
| "grad_norm": 1.107587218284607, |
| "learning_rate": 3.7575985302016364e-06, |
| "loss": 0.4745, |
| "step": 401200 |
| }, |
| { |
| "epoch": 0.9253090702879891, |
| "grad_norm": 1.2675089836120605, |
| "learning_rate": 3.7345464856005404e-06, |
| "loss": 0.4863, |
| "step": 401400 |
| }, |
| { |
| "epoch": 0.9257701111800111, |
| "grad_norm": 1.018123984336853, |
| "learning_rate": 3.7114944409994448e-06, |
| "loss": 0.4784, |
| "step": 401600 |
| }, |
| { |
| "epoch": 0.926231152072033, |
| "grad_norm": 0.6232244968414307, |
| "learning_rate": 3.6884423963983483e-06, |
| "loss": 0.4996, |
| "step": 401800 |
| }, |
| { |
| "epoch": 0.9266921929640549, |
| "grad_norm": 1.347090482711792, |
| "learning_rate": 3.6653903517972527e-06, |
| "loss": 0.4704, |
| "step": 402000 |
| }, |
| { |
| "epoch": 0.9271532338560768, |
| "grad_norm": 0.9146246314048767, |
| "learning_rate": 3.6423383071961567e-06, |
| "loss": 0.4572, |
| "step": 402200 |
| }, |
| { |
| "epoch": 0.9276142747480988, |
| "grad_norm": 1.380365014076233, |
| "learning_rate": 3.619286262595061e-06, |
| "loss": 0.5064, |
| "step": 402400 |
| }, |
| { |
| "epoch": 0.9280753156401207, |
| "grad_norm": 1.536133050918579, |
| "learning_rate": 3.596234217993965e-06, |
| "loss": 0.481, |
| "step": 402600 |
| }, |
| { |
| "epoch": 0.9285363565321426, |
| "grad_norm": 1.290397047996521, |
| "learning_rate": 3.5731821733928695e-06, |
| "loss": 0.4891, |
| "step": 402800 |
| }, |
| { |
| "epoch": 0.9289973974241645, |
| "grad_norm": 2.4600837230682373, |
| "learning_rate": 3.550130128791773e-06, |
| "loss": 0.4528, |
| "step": 403000 |
| }, |
| { |
| "epoch": 0.9294584383161865, |
| "grad_norm": 1.7255617380142212, |
| "learning_rate": 3.5270780841906775e-06, |
| "loss": 0.4647, |
| "step": 403200 |
| }, |
| { |
| "epoch": 0.9299194792082084, |
| "grad_norm": 1.0559278726577759, |
| "learning_rate": 3.5040260395895815e-06, |
| "loss": 0.5023, |
| "step": 403400 |
| }, |
| { |
| "epoch": 0.9303805201002303, |
| "grad_norm": 0.7714131474494934, |
| "learning_rate": 3.480973994988486e-06, |
| "loss": 0.449, |
| "step": 403600 |
| }, |
| { |
| "epoch": 0.9308415609922522, |
| "grad_norm": 1.1090224981307983, |
| "learning_rate": 3.45792195038739e-06, |
| "loss": 0.5151, |
| "step": 403800 |
| }, |
| { |
| "epoch": 0.9313026018842742, |
| "grad_norm": 1.1689685583114624, |
| "learning_rate": 3.4348699057862943e-06, |
| "loss": 0.4831, |
| "step": 404000 |
| }, |
| { |
| "epoch": 0.9317636427762961, |
| "grad_norm": 1.7004835605621338, |
| "learning_rate": 3.411817861185198e-06, |
| "loss": 0.4517, |
| "step": 404200 |
| }, |
| { |
| "epoch": 0.932224683668318, |
| "grad_norm": 1.8636317253112793, |
| "learning_rate": 3.3887658165841022e-06, |
| "loss": 0.4642, |
| "step": 404400 |
| }, |
| { |
| "epoch": 0.9326857245603398, |
| "grad_norm": 1.8215795755386353, |
| "learning_rate": 3.365713771983006e-06, |
| "loss": 0.4997, |
| "step": 404600 |
| }, |
| { |
| "epoch": 0.9331467654523617, |
| "grad_norm": 1.2667629718780518, |
| "learning_rate": 3.34266172738191e-06, |
| "loss": 0.475, |
| "step": 404800 |
| }, |
| { |
| "epoch": 0.9336078063443837, |
| "grad_norm": 1.1865830421447754, |
| "learning_rate": 3.3196096827808146e-06, |
| "loss": 0.495, |
| "step": 405000 |
| }, |
| { |
| "epoch": 0.9340688472364056, |
| "grad_norm": 0.7197660803794861, |
| "learning_rate": 3.296557638179718e-06, |
| "loss": 0.4726, |
| "step": 405200 |
| }, |
| { |
| "epoch": 0.9345298881284275, |
| "grad_norm": 0.5987845659255981, |
| "learning_rate": 3.2735055935786225e-06, |
| "loss": 0.4799, |
| "step": 405400 |
| }, |
| { |
| "epoch": 0.9349909290204494, |
| "grad_norm": 3.0414366722106934, |
| "learning_rate": 3.2504535489775265e-06, |
| "loss": 0.5096, |
| "step": 405600 |
| }, |
| { |
| "epoch": 0.9354519699124714, |
| "grad_norm": 1.372909426689148, |
| "learning_rate": 3.227401504376431e-06, |
| "loss": 0.4626, |
| "step": 405800 |
| }, |
| { |
| "epoch": 0.9359130108044933, |
| "grad_norm": 1.5821083784103394, |
| "learning_rate": 3.204349459775335e-06, |
| "loss": 0.4574, |
| "step": 406000 |
| }, |
| { |
| "epoch": 0.9363740516965152, |
| "grad_norm": 0.5546638369560242, |
| "learning_rate": 3.1812974151742393e-06, |
| "loss": 0.5013, |
| "step": 406200 |
| }, |
| { |
| "epoch": 0.9368350925885371, |
| "grad_norm": 1.4737298488616943, |
| "learning_rate": 3.158245370573143e-06, |
| "loss": 0.473, |
| "step": 406400 |
| }, |
| { |
| "epoch": 0.9372961334805591, |
| "grad_norm": 1.4075927734375, |
| "learning_rate": 3.1351933259720473e-06, |
| "loss": 0.4748, |
| "step": 406600 |
| }, |
| { |
| "epoch": 0.937757174372581, |
| "grad_norm": 1.2418146133422852, |
| "learning_rate": 3.1121412813709513e-06, |
| "loss": 0.5203, |
| "step": 406800 |
| }, |
| { |
| "epoch": 0.9382182152646029, |
| "grad_norm": 2.0183310508728027, |
| "learning_rate": 3.0890892367698552e-06, |
| "loss": 0.5137, |
| "step": 407000 |
| }, |
| { |
| "epoch": 0.9386792561566248, |
| "grad_norm": 0.8458141684532166, |
| "learning_rate": 3.0660371921687596e-06, |
| "loss": 0.4607, |
| "step": 407200 |
| }, |
| { |
| "epoch": 0.9391402970486468, |
| "grad_norm": 1.4068762063980103, |
| "learning_rate": 3.0429851475676636e-06, |
| "loss": 0.461, |
| "step": 407400 |
| }, |
| { |
| "epoch": 0.9396013379406687, |
| "grad_norm": 4.581197261810303, |
| "learning_rate": 3.0199331029665676e-06, |
| "loss": 0.4692, |
| "step": 407600 |
| }, |
| { |
| "epoch": 0.9400623788326906, |
| "grad_norm": 2.506011724472046, |
| "learning_rate": 2.996881058365472e-06, |
| "loss": 0.4364, |
| "step": 407800 |
| }, |
| { |
| "epoch": 0.9405234197247124, |
| "grad_norm": 1.3012163639068604, |
| "learning_rate": 2.973829013764376e-06, |
| "loss": 0.466, |
| "step": 408000 |
| }, |
| { |
| "epoch": 0.9409844606167344, |
| "grad_norm": 0.9710767269134521, |
| "learning_rate": 2.95077696916328e-06, |
| "loss": 0.4706, |
| "step": 408200 |
| }, |
| { |
| "epoch": 0.9414455015087563, |
| "grad_norm": 1.5749614238739014, |
| "learning_rate": 2.9277249245621844e-06, |
| "loss": 0.4507, |
| "step": 408400 |
| }, |
| { |
| "epoch": 0.9419065424007782, |
| "grad_norm": 1.1157305240631104, |
| "learning_rate": 2.9046728799610884e-06, |
| "loss": 0.4798, |
| "step": 408600 |
| }, |
| { |
| "epoch": 0.9423675832928001, |
| "grad_norm": 1.0349030494689941, |
| "learning_rate": 2.8816208353599923e-06, |
| "loss": 0.4385, |
| "step": 408800 |
| }, |
| { |
| "epoch": 0.942828624184822, |
| "grad_norm": 0.7431963682174683, |
| "learning_rate": 2.8585687907588967e-06, |
| "loss": 0.4558, |
| "step": 409000 |
| }, |
| { |
| "epoch": 0.943289665076844, |
| "grad_norm": 1.7582494020462036, |
| "learning_rate": 2.8355167461578007e-06, |
| "loss": 0.4805, |
| "step": 409200 |
| }, |
| { |
| "epoch": 0.9437507059688659, |
| "grad_norm": 1.688696026802063, |
| "learning_rate": 2.8124647015567047e-06, |
| "loss": 0.462, |
| "step": 409400 |
| }, |
| { |
| "epoch": 0.9442117468608878, |
| "grad_norm": 2.057497262954712, |
| "learning_rate": 2.789412656955609e-06, |
| "loss": 0.444, |
| "step": 409600 |
| }, |
| { |
| "epoch": 0.9446727877529097, |
| "grad_norm": 1.7381998300552368, |
| "learning_rate": 2.766360612354513e-06, |
| "loss": 0.4716, |
| "step": 409800 |
| }, |
| { |
| "epoch": 0.9451338286449317, |
| "grad_norm": 1.4783737659454346, |
| "learning_rate": 2.743308567753417e-06, |
| "loss": 0.507, |
| "step": 410000 |
| }, |
| { |
| "epoch": 0.9455948695369536, |
| "grad_norm": 2.4599671363830566, |
| "learning_rate": 2.720256523152321e-06, |
| "loss": 0.4724, |
| "step": 410200 |
| }, |
| { |
| "epoch": 0.9460559104289755, |
| "grad_norm": 1.6075626611709595, |
| "learning_rate": 2.697204478551225e-06, |
| "loss": 0.486, |
| "step": 410400 |
| }, |
| { |
| "epoch": 0.9465169513209974, |
| "grad_norm": 2.0719876289367676, |
| "learning_rate": 2.674152433950129e-06, |
| "loss": 0.4248, |
| "step": 410600 |
| }, |
| { |
| "epoch": 0.9469779922130194, |
| "grad_norm": 2.098074197769165, |
| "learning_rate": 2.6511003893490334e-06, |
| "loss": 0.4426, |
| "step": 410800 |
| }, |
| { |
| "epoch": 0.9474390331050413, |
| "grad_norm": 1.1049730777740479, |
| "learning_rate": 2.6280483447479374e-06, |
| "loss": 0.4627, |
| "step": 411000 |
| }, |
| { |
| "epoch": 0.9479000739970632, |
| "grad_norm": 0.8188923001289368, |
| "learning_rate": 2.6049963001468414e-06, |
| "loss": 0.4888, |
| "step": 411200 |
| }, |
| { |
| "epoch": 0.948361114889085, |
| "grad_norm": 0.9490247368812561, |
| "learning_rate": 2.581944255545746e-06, |
| "loss": 0.4942, |
| "step": 411400 |
| }, |
| { |
| "epoch": 0.948822155781107, |
| "grad_norm": 1.2527036666870117, |
| "learning_rate": 2.5588922109446498e-06, |
| "loss": 0.4867, |
| "step": 411600 |
| }, |
| { |
| "epoch": 0.9492831966731289, |
| "grad_norm": 0.47197577357292175, |
| "learning_rate": 2.5358401663435538e-06, |
| "loss": 0.464, |
| "step": 411800 |
| }, |
| { |
| "epoch": 0.9497442375651508, |
| "grad_norm": 1.5637418031692505, |
| "learning_rate": 2.512788121742458e-06, |
| "loss": 0.4771, |
| "step": 412000 |
| }, |
| { |
| "epoch": 0.9502052784571727, |
| "grad_norm": 0.9019871950149536, |
| "learning_rate": 2.489736077141362e-06, |
| "loss": 0.4814, |
| "step": 412200 |
| }, |
| { |
| "epoch": 0.9506663193491947, |
| "grad_norm": 0.7962387800216675, |
| "learning_rate": 2.466684032540266e-06, |
| "loss": 0.488, |
| "step": 412400 |
| }, |
| { |
| "epoch": 0.9511273602412166, |
| "grad_norm": 0.810796320438385, |
| "learning_rate": 2.4436319879391705e-06, |
| "loss": 0.4098, |
| "step": 412600 |
| }, |
| { |
| "epoch": 0.9515884011332385, |
| "grad_norm": 1.6829875707626343, |
| "learning_rate": 2.4205799433380745e-06, |
| "loss": 0.4659, |
| "step": 412800 |
| }, |
| { |
| "epoch": 0.9520494420252604, |
| "grad_norm": 0.9560777544975281, |
| "learning_rate": 2.3975278987369785e-06, |
| "loss": 0.4853, |
| "step": 413000 |
| }, |
| { |
| "epoch": 0.9525104829172824, |
| "grad_norm": 2.32140851020813, |
| "learning_rate": 2.374475854135883e-06, |
| "loss": 0.5007, |
| "step": 413200 |
| }, |
| { |
| "epoch": 0.9529715238093043, |
| "grad_norm": 1.2261013984680176, |
| "learning_rate": 2.351423809534787e-06, |
| "loss": 0.5207, |
| "step": 413400 |
| }, |
| { |
| "epoch": 0.9534325647013262, |
| "grad_norm": 1.996286153793335, |
| "learning_rate": 2.3283717649336913e-06, |
| "loss": 0.5164, |
| "step": 413600 |
| }, |
| { |
| "epoch": 0.9538936055933481, |
| "grad_norm": 1.2934073209762573, |
| "learning_rate": 2.305319720332595e-06, |
| "loss": 0.4809, |
| "step": 413800 |
| }, |
| { |
| "epoch": 0.95435464648537, |
| "grad_norm": 0.7042099833488464, |
| "learning_rate": 2.282267675731499e-06, |
| "loss": 0.4893, |
| "step": 414000 |
| }, |
| { |
| "epoch": 0.954815687377392, |
| "grad_norm": 1.0539119243621826, |
| "learning_rate": 2.2592156311304032e-06, |
| "loss": 0.5039, |
| "step": 414200 |
| }, |
| { |
| "epoch": 0.9552767282694139, |
| "grad_norm": 1.2834453582763672, |
| "learning_rate": 2.236163586529307e-06, |
| "loss": 0.5054, |
| "step": 414400 |
| }, |
| { |
| "epoch": 0.9557377691614358, |
| "grad_norm": 0.6034151911735535, |
| "learning_rate": 2.213111541928211e-06, |
| "loss": 0.4729, |
| "step": 414600 |
| }, |
| { |
| "epoch": 0.9561988100534576, |
| "grad_norm": 2.897521734237671, |
| "learning_rate": 2.1900594973271156e-06, |
| "loss": 0.4549, |
| "step": 414800 |
| }, |
| { |
| "epoch": 0.9566598509454796, |
| "grad_norm": 1.0604009628295898, |
| "learning_rate": 2.1670074527260196e-06, |
| "loss": 0.4573, |
| "step": 415000 |
| }, |
| { |
| "epoch": 0.9571208918375015, |
| "grad_norm": 0.8515986800193787, |
| "learning_rate": 2.1439554081249235e-06, |
| "loss": 0.4521, |
| "step": 415200 |
| }, |
| { |
| "epoch": 0.9575819327295234, |
| "grad_norm": 1.5794425010681152, |
| "learning_rate": 2.120903363523828e-06, |
| "loss": 0.5358, |
| "step": 415400 |
| }, |
| { |
| "epoch": 0.9580429736215453, |
| "grad_norm": 1.2372163534164429, |
| "learning_rate": 2.097851318922732e-06, |
| "loss": 0.4345, |
| "step": 415600 |
| }, |
| { |
| "epoch": 0.9585040145135673, |
| "grad_norm": 2.6105234622955322, |
| "learning_rate": 2.074799274321636e-06, |
| "loss": 0.5035, |
| "step": 415800 |
| }, |
| { |
| "epoch": 0.9589650554055892, |
| "grad_norm": 0.8453428149223328, |
| "learning_rate": 2.0517472297205403e-06, |
| "loss": 0.4723, |
| "step": 416000 |
| }, |
| { |
| "epoch": 0.9594260962976111, |
| "grad_norm": 1.2745046615600586, |
| "learning_rate": 2.0286951851194443e-06, |
| "loss": 0.4754, |
| "step": 416200 |
| }, |
| { |
| "epoch": 0.959887137189633, |
| "grad_norm": 1.6135262250900269, |
| "learning_rate": 2.0056431405183483e-06, |
| "loss": 0.4801, |
| "step": 416400 |
| }, |
| { |
| "epoch": 0.960348178081655, |
| "grad_norm": 1.6727254390716553, |
| "learning_rate": 1.9825910959172527e-06, |
| "loss": 0.4818, |
| "step": 416600 |
| }, |
| { |
| "epoch": 0.9608092189736769, |
| "grad_norm": 1.026893973350525, |
| "learning_rate": 1.9595390513161567e-06, |
| "loss": 0.4383, |
| "step": 416800 |
| }, |
| { |
| "epoch": 0.9612702598656988, |
| "grad_norm": 1.3765745162963867, |
| "learning_rate": 1.9364870067150607e-06, |
| "loss": 0.4625, |
| "step": 417000 |
| }, |
| { |
| "epoch": 0.9617313007577207, |
| "grad_norm": 1.6205723285675049, |
| "learning_rate": 1.913434962113965e-06, |
| "loss": 0.4705, |
| "step": 417200 |
| }, |
| { |
| "epoch": 0.9621923416497427, |
| "grad_norm": 1.5419261455535889, |
| "learning_rate": 1.8903829175128688e-06, |
| "loss": 0.5056, |
| "step": 417400 |
| }, |
| { |
| "epoch": 0.9626533825417646, |
| "grad_norm": 0.6733845472335815, |
| "learning_rate": 1.8673308729117728e-06, |
| "loss": 0.4988, |
| "step": 417600 |
| }, |
| { |
| "epoch": 0.9631144234337865, |
| "grad_norm": 1.0940847396850586, |
| "learning_rate": 1.844278828310677e-06, |
| "loss": 0.4654, |
| "step": 417800 |
| }, |
| { |
| "epoch": 0.9635754643258084, |
| "grad_norm": 1.1737462282180786, |
| "learning_rate": 1.8212267837095812e-06, |
| "loss": 0.4574, |
| "step": 418000 |
| }, |
| { |
| "epoch": 0.9640365052178302, |
| "grad_norm": 1.6984807252883911, |
| "learning_rate": 1.7981747391084852e-06, |
| "loss": 0.4525, |
| "step": 418200 |
| }, |
| { |
| "epoch": 0.9644975461098522, |
| "grad_norm": 1.8462785482406616, |
| "learning_rate": 1.7751226945073894e-06, |
| "loss": 0.4889, |
| "step": 418400 |
| }, |
| { |
| "epoch": 0.9649585870018741, |
| "grad_norm": 1.7319543361663818, |
| "learning_rate": 1.7520706499062936e-06, |
| "loss": 0.4821, |
| "step": 418600 |
| }, |
| { |
| "epoch": 0.965419627893896, |
| "grad_norm": 2.190124034881592, |
| "learning_rate": 1.7290186053051975e-06, |
| "loss": 0.5076, |
| "step": 418800 |
| }, |
| { |
| "epoch": 0.9658806687859179, |
| "grad_norm": 1.911737322807312, |
| "learning_rate": 1.7059665607041017e-06, |
| "loss": 0.4905, |
| "step": 419000 |
| }, |
| { |
| "epoch": 0.9663417096779399, |
| "grad_norm": 3.9163506031036377, |
| "learning_rate": 1.682914516103006e-06, |
| "loss": 0.469, |
| "step": 419200 |
| }, |
| { |
| "epoch": 0.9668027505699618, |
| "grad_norm": 1.2527137994766235, |
| "learning_rate": 1.65986247150191e-06, |
| "loss": 0.4247, |
| "step": 419400 |
| }, |
| { |
| "epoch": 0.9672637914619837, |
| "grad_norm": 0.49080777168273926, |
| "learning_rate": 1.636810426900814e-06, |
| "loss": 0.4783, |
| "step": 419600 |
| }, |
| { |
| "epoch": 0.9677248323540056, |
| "grad_norm": 1.7566986083984375, |
| "learning_rate": 1.6137583822997183e-06, |
| "loss": 0.5296, |
| "step": 419800 |
| }, |
| { |
| "epoch": 0.9681858732460276, |
| "grad_norm": 0.9041785597801208, |
| "learning_rate": 1.5907063376986223e-06, |
| "loss": 0.4821, |
| "step": 420000 |
| }, |
| { |
| "epoch": 0.9686469141380495, |
| "grad_norm": 1.486576795578003, |
| "learning_rate": 1.5676542930975265e-06, |
| "loss": 0.4653, |
| "step": 420200 |
| }, |
| { |
| "epoch": 0.9691079550300714, |
| "grad_norm": 1.5304393768310547, |
| "learning_rate": 1.5446022484964304e-06, |
| "loss": 0.4657, |
| "step": 420400 |
| }, |
| { |
| "epoch": 0.9695689959220933, |
| "grad_norm": 4.940136432647705, |
| "learning_rate": 1.5215502038953344e-06, |
| "loss": 0.4656, |
| "step": 420600 |
| }, |
| { |
| "epoch": 0.9700300368141153, |
| "grad_norm": 2.155877113342285, |
| "learning_rate": 1.4984981592942386e-06, |
| "loss": 0.4687, |
| "step": 420800 |
| }, |
| { |
| "epoch": 0.9704910777061372, |
| "grad_norm": 0.5753369927406311, |
| "learning_rate": 1.4754461146931428e-06, |
| "loss": 0.4809, |
| "step": 421000 |
| }, |
| { |
| "epoch": 0.9709521185981591, |
| "grad_norm": 1.4241207838058472, |
| "learning_rate": 1.4523940700920468e-06, |
| "loss": 0.4721, |
| "step": 421200 |
| }, |
| { |
| "epoch": 0.971413159490181, |
| "grad_norm": 0.4579220414161682, |
| "learning_rate": 1.429342025490951e-06, |
| "loss": 0.4865, |
| "step": 421400 |
| }, |
| { |
| "epoch": 0.9718742003822028, |
| "grad_norm": 2.118295669555664, |
| "learning_rate": 1.4062899808898552e-06, |
| "loss": 0.4461, |
| "step": 421600 |
| }, |
| { |
| "epoch": 0.9723352412742248, |
| "grad_norm": 2.08658766746521, |
| "learning_rate": 1.3832379362887594e-06, |
| "loss": 0.4564, |
| "step": 421800 |
| }, |
| { |
| "epoch": 0.9727962821662467, |
| "grad_norm": 1.8553085327148438, |
| "learning_rate": 1.3601858916876631e-06, |
| "loss": 0.5111, |
| "step": 422000 |
| }, |
| { |
| "epoch": 0.9732573230582686, |
| "grad_norm": 1.5697154998779297, |
| "learning_rate": 1.3371338470865673e-06, |
| "loss": 0.4776, |
| "step": 422200 |
| }, |
| { |
| "epoch": 0.9737183639502905, |
| "grad_norm": 0.5918155312538147, |
| "learning_rate": 1.3140818024854715e-06, |
| "loss": 0.4928, |
| "step": 422400 |
| }, |
| { |
| "epoch": 0.9741794048423125, |
| "grad_norm": 0.9090703725814819, |
| "learning_rate": 1.2910297578843755e-06, |
| "loss": 0.493, |
| "step": 422600 |
| }, |
| { |
| "epoch": 0.9746404457343344, |
| "grad_norm": 2.200510025024414, |
| "learning_rate": 1.2679777132832797e-06, |
| "loss": 0.4584, |
| "step": 422800 |
| }, |
| { |
| "epoch": 0.9751014866263563, |
| "grad_norm": 1.3335816860198975, |
| "learning_rate": 1.244925668682184e-06, |
| "loss": 0.4461, |
| "step": 423000 |
| }, |
| { |
| "epoch": 0.9755625275183782, |
| "grad_norm": 1.2546000480651855, |
| "learning_rate": 1.2218736240810879e-06, |
| "loss": 0.4431, |
| "step": 423200 |
| }, |
| { |
| "epoch": 0.9760235684104002, |
| "grad_norm": 1.394166350364685, |
| "learning_rate": 1.198821579479992e-06, |
| "loss": 0.452, |
| "step": 423400 |
| }, |
| { |
| "epoch": 0.9764846093024221, |
| "grad_norm": 1.7498624324798584, |
| "learning_rate": 1.1757695348788963e-06, |
| "loss": 0.5626, |
| "step": 423600 |
| }, |
| { |
| "epoch": 0.976945650194444, |
| "grad_norm": 1.2629833221435547, |
| "learning_rate": 1.1527174902778002e-06, |
| "loss": 0.4461, |
| "step": 423800 |
| }, |
| { |
| "epoch": 0.9774066910864659, |
| "grad_norm": 1.0957165956497192, |
| "learning_rate": 1.1296654456767042e-06, |
| "loss": 0.483, |
| "step": 424000 |
| }, |
| { |
| "epoch": 0.9778677319784879, |
| "grad_norm": 1.3717105388641357, |
| "learning_rate": 1.1066134010756084e-06, |
| "loss": 0.4599, |
| "step": 424200 |
| }, |
| { |
| "epoch": 0.9783287728705098, |
| "grad_norm": 1.0456079244613647, |
| "learning_rate": 1.0835613564745126e-06, |
| "loss": 0.4624, |
| "step": 424400 |
| }, |
| { |
| "epoch": 0.9787898137625317, |
| "grad_norm": 0.7499749660491943, |
| "learning_rate": 1.0605093118734166e-06, |
| "loss": 0.4567, |
| "step": 424600 |
| }, |
| { |
| "epoch": 0.9792508546545536, |
| "grad_norm": 1.2902302742004395, |
| "learning_rate": 1.0374572672723208e-06, |
| "loss": 0.4881, |
| "step": 424800 |
| }, |
| { |
| "epoch": 0.9797118955465755, |
| "grad_norm": 1.2328616380691528, |
| "learning_rate": 1.014405222671225e-06, |
| "loss": 0.4676, |
| "step": 425000 |
| }, |
| { |
| "epoch": 0.9801729364385974, |
| "grad_norm": 1.0173146724700928, |
| "learning_rate": 9.91353178070129e-07, |
| "loss": 0.4841, |
| "step": 425200 |
| }, |
| { |
| "epoch": 0.9806339773306193, |
| "grad_norm": 0.421296089887619, |
| "learning_rate": 9.683011334690332e-07, |
| "loss": 0.4964, |
| "step": 425400 |
| }, |
| { |
| "epoch": 0.9810950182226412, |
| "grad_norm": 0.7365984916687012, |
| "learning_rate": 9.452490888679371e-07, |
| "loss": 0.4944, |
| "step": 425600 |
| }, |
| { |
| "epoch": 0.9815560591146631, |
| "grad_norm": 1.2316726446151733, |
| "learning_rate": 9.221970442668412e-07, |
| "loss": 0.4656, |
| "step": 425800 |
| }, |
| { |
| "epoch": 0.9820171000066851, |
| "grad_norm": 0.8625339269638062, |
| "learning_rate": 8.991449996657453e-07, |
| "loss": 0.4667, |
| "step": 426000 |
| }, |
| { |
| "epoch": 0.982478140898707, |
| "grad_norm": 1.1301565170288086, |
| "learning_rate": 8.760929550646495e-07, |
| "loss": 0.4699, |
| "step": 426200 |
| }, |
| { |
| "epoch": 0.9829391817907289, |
| "grad_norm": 0.8868315815925598, |
| "learning_rate": 8.530409104635536e-07, |
| "loss": 0.5316, |
| "step": 426400 |
| }, |
| { |
| "epoch": 0.9834002226827508, |
| "grad_norm": 2.410291910171509, |
| "learning_rate": 8.299888658624577e-07, |
| "loss": 0.4651, |
| "step": 426600 |
| }, |
| { |
| "epoch": 0.9838612635747728, |
| "grad_norm": 3.955040693283081, |
| "learning_rate": 8.069368212613619e-07, |
| "loss": 0.4732, |
| "step": 426800 |
| }, |
| { |
| "epoch": 0.9843223044667947, |
| "grad_norm": 1.6138865947723389, |
| "learning_rate": 7.83884776660266e-07, |
| "loss": 0.5057, |
| "step": 427000 |
| }, |
| { |
| "epoch": 0.9847833453588166, |
| "grad_norm": 1.141384482383728, |
| "learning_rate": 7.6083273205917e-07, |
| "loss": 0.49, |
| "step": 427200 |
| }, |
| { |
| "epoch": 0.9852443862508385, |
| "grad_norm": 0.964368999004364, |
| "learning_rate": 7.377806874580741e-07, |
| "loss": 0.4702, |
| "step": 427400 |
| }, |
| { |
| "epoch": 0.9857054271428605, |
| "grad_norm": 1.7662829160690308, |
| "learning_rate": 7.147286428569782e-07, |
| "loss": 0.477, |
| "step": 427600 |
| }, |
| { |
| "epoch": 0.9861664680348824, |
| "grad_norm": 1.14377760887146, |
| "learning_rate": 6.916765982558824e-07, |
| "loss": 0.4613, |
| "step": 427800 |
| }, |
| { |
| "epoch": 0.9866275089269043, |
| "grad_norm": 1.1552037000656128, |
| "learning_rate": 6.686245536547864e-07, |
| "loss": 0.4659, |
| "step": 428000 |
| }, |
| { |
| "epoch": 0.9870885498189262, |
| "grad_norm": 1.6723991632461548, |
| "learning_rate": 6.455725090536906e-07, |
| "loss": 0.4614, |
| "step": 428200 |
| }, |
| { |
| "epoch": 0.987549590710948, |
| "grad_norm": 1.3214787244796753, |
| "learning_rate": 6.225204644525947e-07, |
| "loss": 0.4599, |
| "step": 428400 |
| }, |
| { |
| "epoch": 0.98801063160297, |
| "grad_norm": 0.9534615874290466, |
| "learning_rate": 5.994684198514988e-07, |
| "loss": 0.5336, |
| "step": 428600 |
| }, |
| { |
| "epoch": 0.9884716724949919, |
| "grad_norm": 1.9757567644119263, |
| "learning_rate": 5.764163752504028e-07, |
| "loss": 0.502, |
| "step": 428800 |
| }, |
| { |
| "epoch": 0.9889327133870138, |
| "grad_norm": 1.4372884035110474, |
| "learning_rate": 5.533643306493069e-07, |
| "loss": 0.4847, |
| "step": 429000 |
| }, |
| { |
| "epoch": 0.9893937542790358, |
| "grad_norm": 1.3356891870498657, |
| "learning_rate": 5.30312286048211e-07, |
| "loss": 0.5075, |
| "step": 429200 |
| }, |
| { |
| "epoch": 0.9898547951710577, |
| "grad_norm": 0.3389435410499573, |
| "learning_rate": 5.072602414471152e-07, |
| "loss": 0.4614, |
| "step": 429400 |
| }, |
| { |
| "epoch": 0.9903158360630796, |
| "grad_norm": 4.016057968139648, |
| "learning_rate": 4.842081968460193e-07, |
| "loss": 0.4765, |
| "step": 429600 |
| }, |
| { |
| "epoch": 0.9907768769551015, |
| "grad_norm": 1.7579454183578491, |
| "learning_rate": 4.6115615224492333e-07, |
| "loss": 0.4588, |
| "step": 429800 |
| }, |
| { |
| "epoch": 0.9912379178471235, |
| "grad_norm": 1.2797824144363403, |
| "learning_rate": 4.381041076438275e-07, |
| "loss": 0.511, |
| "step": 430000 |
| } |
| ], |
| "logging_steps": 200, |
| "max_steps": 433801, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 10000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.972456968192e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|