{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 11928, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00025150905432595576, "grad_norm": 5.952406406402588, "learning_rate": 8.382229673093043e-09, "loss": 0.8367, "step": 1 }, { "epoch": 0.0005030181086519115, "grad_norm": 6.153665542602539, "learning_rate": 1.6764459346186086e-08, "loss": 0.8394, "step": 2 }, { "epoch": 0.0007545271629778672, "grad_norm": 5.872622966766357, "learning_rate": 2.5146689019279132e-08, "loss": 0.8047, "step": 3 }, { "epoch": 0.001006036217303823, "grad_norm": 6.132771968841553, "learning_rate": 3.352891869237217e-08, "loss": 0.8308, "step": 4 }, { "epoch": 0.0012575452716297787, "grad_norm": 6.068148612976074, "learning_rate": 4.191114836546522e-08, "loss": 0.7871, "step": 5 }, { "epoch": 0.0015090543259557343, "grad_norm": 5.759831428527832, "learning_rate": 5.0293378038558264e-08, "loss": 0.8009, "step": 6 }, { "epoch": 0.0017605633802816902, "grad_norm": 6.06758975982666, "learning_rate": 5.8675607711651307e-08, "loss": 0.8295, "step": 7 }, { "epoch": 0.002012072434607646, "grad_norm": 6.002471923828125, "learning_rate": 6.705783738474434e-08, "loss": 0.8253, "step": 8 }, { "epoch": 0.0022635814889336017, "grad_norm": 6.0556440353393555, "learning_rate": 7.544006705783739e-08, "loss": 0.8259, "step": 9 }, { "epoch": 0.0025150905432595573, "grad_norm": 5.968860626220703, "learning_rate": 8.382229673093044e-08, "loss": 0.8368, "step": 10 }, { "epoch": 0.002766599597585513, "grad_norm": 5.972998142242432, "learning_rate": 9.220452640402346e-08, "loss": 0.8222, "step": 11 }, { "epoch": 0.0030181086519114686, "grad_norm": 5.935672760009766, "learning_rate": 1.0058675607711653e-07, "loss": 0.8099, "step": 12 }, { "epoch": 0.0032696177062374247, "grad_norm": 6.067246913909912, "learning_rate": 1.0896898575020955e-07, "loss": 0.832, "step": 13 }, { "epoch": 0.0035211267605633804, "grad_norm": 6.090888977050781, "learning_rate": 1.1735121542330261e-07, "loss": 0.8276, "step": 14 }, { "epoch": 0.003772635814889336, "grad_norm": 5.8601837158203125, "learning_rate": 1.2573344509639564e-07, "loss": 0.8273, "step": 15 }, { "epoch": 0.004024144869215292, "grad_norm": 6.011853218078613, "learning_rate": 1.3411567476948869e-07, "loss": 0.8194, "step": 16 }, { "epoch": 0.004275653923541248, "grad_norm": 5.5620808601379395, "learning_rate": 1.4249790444258174e-07, "loss": 0.791, "step": 17 }, { "epoch": 0.004527162977867203, "grad_norm": 5.906956672668457, "learning_rate": 1.5088013411567478e-07, "loss": 0.8452, "step": 18 }, { "epoch": 0.004778672032193159, "grad_norm": 5.789212226867676, "learning_rate": 1.592623637887678e-07, "loss": 0.8181, "step": 19 }, { "epoch": 0.005030181086519115, "grad_norm": 5.891463756561279, "learning_rate": 1.6764459346186088e-07, "loss": 0.8255, "step": 20 }, { "epoch": 0.00528169014084507, "grad_norm": 5.891812801361084, "learning_rate": 1.7602682313495393e-07, "loss": 0.8251, "step": 21 }, { "epoch": 0.005533199195171026, "grad_norm": 5.88001823425293, "learning_rate": 1.8440905280804693e-07, "loss": 0.8264, "step": 22 }, { "epoch": 0.005784708249496982, "grad_norm": 5.622618675231934, "learning_rate": 1.9279128248113998e-07, "loss": 0.7947, "step": 23 }, { "epoch": 0.006036217303822937, "grad_norm": 5.888658046722412, "learning_rate": 2.0117351215423305e-07, "loss": 0.843, "step": 24 }, { "epoch": 0.006287726358148894, "grad_norm": 5.512774467468262, "learning_rate": 2.095557418273261e-07, "loss": 0.7908, "step": 25 }, { "epoch": 0.006539235412474849, "grad_norm": 5.594388484954834, "learning_rate": 2.179379715004191e-07, "loss": 0.8071, "step": 26 }, { "epoch": 0.006790744466800805, "grad_norm": 5.4865617752075195, "learning_rate": 2.2632020117351218e-07, "loss": 0.7797, "step": 27 }, { "epoch": 0.007042253521126761, "grad_norm": 5.4088969230651855, "learning_rate": 2.3470243084660523e-07, "loss": 0.7925, "step": 28 }, { "epoch": 0.007293762575452716, "grad_norm": 5.587332248687744, "learning_rate": 2.430846605196983e-07, "loss": 0.7799, "step": 29 }, { "epoch": 0.007545271629778672, "grad_norm": 5.692927837371826, "learning_rate": 2.5146689019279127e-07, "loss": 0.8255, "step": 30 }, { "epoch": 0.007796780684104628, "grad_norm": 5.381591796875, "learning_rate": 2.598491198658843e-07, "loss": 0.8173, "step": 31 }, { "epoch": 0.008048289738430584, "grad_norm": 5.045378684997559, "learning_rate": 2.6823134953897737e-07, "loss": 0.7774, "step": 32 }, { "epoch": 0.00829979879275654, "grad_norm": 4.887575626373291, "learning_rate": 2.766135792120704e-07, "loss": 0.7928, "step": 33 }, { "epoch": 0.008551307847082495, "grad_norm": 4.738620281219482, "learning_rate": 2.8499580888516347e-07, "loss": 0.768, "step": 34 }, { "epoch": 0.008802816901408451, "grad_norm": 4.525857448577881, "learning_rate": 2.933780385582565e-07, "loss": 0.7588, "step": 35 }, { "epoch": 0.009054325955734407, "grad_norm": 4.540877342224121, "learning_rate": 3.0176026823134957e-07, "loss": 0.7746, "step": 36 }, { "epoch": 0.009305835010060362, "grad_norm": 4.293127059936523, "learning_rate": 3.101424979044426e-07, "loss": 0.7452, "step": 37 }, { "epoch": 0.009557344064386318, "grad_norm": 4.619227409362793, "learning_rate": 3.185247275775356e-07, "loss": 0.764, "step": 38 }, { "epoch": 0.009808853118712274, "grad_norm": 4.370224475860596, "learning_rate": 3.269069572506287e-07, "loss": 0.7549, "step": 39 }, { "epoch": 0.01006036217303823, "grad_norm": 4.469829082489014, "learning_rate": 3.3528918692372177e-07, "loss": 0.7507, "step": 40 }, { "epoch": 0.010311871227364185, "grad_norm": 4.509310722351074, "learning_rate": 3.4367141659681476e-07, "loss": 0.753, "step": 41 }, { "epoch": 0.01056338028169014, "grad_norm": 4.393963813781738, "learning_rate": 3.5205364626990787e-07, "loss": 0.7567, "step": 42 }, { "epoch": 0.010814889336016096, "grad_norm": 4.315878391265869, "learning_rate": 3.6043587594300086e-07, "loss": 0.7331, "step": 43 }, { "epoch": 0.011066398390342052, "grad_norm": 3.5298051834106445, "learning_rate": 3.6881810561609386e-07, "loss": 0.7346, "step": 44 }, { "epoch": 0.011317907444668008, "grad_norm": 2.8748891353607178, "learning_rate": 3.7720033528918696e-07, "loss": 0.6869, "step": 45 }, { "epoch": 0.011569416498993963, "grad_norm": 2.6105713844299316, "learning_rate": 3.8558256496227996e-07, "loss": 0.7293, "step": 46 }, { "epoch": 0.011820925553319919, "grad_norm": 2.637141704559326, "learning_rate": 3.9396479463537306e-07, "loss": 0.7522, "step": 47 }, { "epoch": 0.012072434607645875, "grad_norm": 2.4601848125457764, "learning_rate": 4.023470243084661e-07, "loss": 0.7251, "step": 48 }, { "epoch": 0.01232394366197183, "grad_norm": 2.4165666103363037, "learning_rate": 4.107292539815591e-07, "loss": 0.6959, "step": 49 }, { "epoch": 0.012575452716297788, "grad_norm": 2.4845943450927734, "learning_rate": 4.191114836546522e-07, "loss": 0.7036, "step": 50 }, { "epoch": 0.012826961770623743, "grad_norm": 2.3925764560699463, "learning_rate": 4.274937133277452e-07, "loss": 0.719, "step": 51 }, { "epoch": 0.013078470824949699, "grad_norm": 2.3824775218963623, "learning_rate": 4.358759430008382e-07, "loss": 0.673, "step": 52 }, { "epoch": 0.013329979879275655, "grad_norm": 2.228027820587158, "learning_rate": 4.442581726739313e-07, "loss": 0.7147, "step": 53 }, { "epoch": 0.01358148893360161, "grad_norm": 2.108171224594116, "learning_rate": 4.5264040234702435e-07, "loss": 0.712, "step": 54 }, { "epoch": 0.013832997987927566, "grad_norm": 1.9733554124832153, "learning_rate": 4.610226320201174e-07, "loss": 0.6986, "step": 55 }, { "epoch": 0.014084507042253521, "grad_norm": 1.9992328882217407, "learning_rate": 4.6940486169321045e-07, "loss": 0.6853, "step": 56 }, { "epoch": 0.014336016096579477, "grad_norm": 1.991721749305725, "learning_rate": 4.777870913663035e-07, "loss": 0.6935, "step": 57 }, { "epoch": 0.014587525150905433, "grad_norm": 1.8876029253005981, "learning_rate": 4.861693210393966e-07, "loss": 0.686, "step": 58 }, { "epoch": 0.014839034205231388, "grad_norm": 1.6275702714920044, "learning_rate": 4.945515507124896e-07, "loss": 0.6991, "step": 59 }, { "epoch": 0.015090543259557344, "grad_norm": 1.4490710496902466, "learning_rate": 5.029337803855825e-07, "loss": 0.7004, "step": 60 }, { "epoch": 0.0153420523138833, "grad_norm": 1.453107476234436, "learning_rate": 5.113160100586757e-07, "loss": 0.6786, "step": 61 }, { "epoch": 0.015593561368209255, "grad_norm": 1.5337774753570557, "learning_rate": 5.196982397317686e-07, "loss": 0.6438, "step": 62 }, { "epoch": 0.01584507042253521, "grad_norm": 1.4964603185653687, "learning_rate": 5.280804694048618e-07, "loss": 0.6416, "step": 63 }, { "epoch": 0.01609657947686117, "grad_norm": 1.6416826248168945, "learning_rate": 5.364626990779547e-07, "loss": 0.6863, "step": 64 }, { "epoch": 0.016348088531187122, "grad_norm": 1.6651757955551147, "learning_rate": 5.448449287510478e-07, "loss": 0.6805, "step": 65 }, { "epoch": 0.01659959758551308, "grad_norm": 1.5026111602783203, "learning_rate": 5.532271584241408e-07, "loss": 0.6329, "step": 66 }, { "epoch": 0.016851106639839034, "grad_norm": 1.5439717769622803, "learning_rate": 5.616093880972339e-07, "loss": 0.6221, "step": 67 }, { "epoch": 0.01710261569416499, "grad_norm": 1.5488256216049194, "learning_rate": 5.699916177703269e-07, "loss": 0.657, "step": 68 }, { "epoch": 0.017354124748490945, "grad_norm": 1.4593708515167236, "learning_rate": 5.7837384744342e-07, "loss": 0.6557, "step": 69 }, { "epoch": 0.017605633802816902, "grad_norm": 1.368078589439392, "learning_rate": 5.86756077116513e-07, "loss": 0.6378, "step": 70 }, { "epoch": 0.017857142857142856, "grad_norm": 1.2166882753372192, "learning_rate": 5.951383067896061e-07, "loss": 0.6182, "step": 71 }, { "epoch": 0.018108651911468814, "grad_norm": 1.1713978052139282, "learning_rate": 6.035205364626991e-07, "loss": 0.6465, "step": 72 }, { "epoch": 0.018360160965794767, "grad_norm": 1.0981969833374023, "learning_rate": 6.119027661357922e-07, "loss": 0.6062, "step": 73 }, { "epoch": 0.018611670020120725, "grad_norm": 1.0137531757354736, "learning_rate": 6.202849958088852e-07, "loss": 0.6123, "step": 74 }, { "epoch": 0.01886317907444668, "grad_norm": 1.0327494144439697, "learning_rate": 6.286672254819783e-07, "loss": 0.6315, "step": 75 }, { "epoch": 0.019114688128772636, "grad_norm": 0.9916748404502869, "learning_rate": 6.370494551550712e-07, "loss": 0.6504, "step": 76 }, { "epoch": 0.01936619718309859, "grad_norm": 0.9969479441642761, "learning_rate": 6.454316848281643e-07, "loss": 0.6598, "step": 77 }, { "epoch": 0.019617706237424547, "grad_norm": 0.9439221620559692, "learning_rate": 6.538139145012574e-07, "loss": 0.6136, "step": 78 }, { "epoch": 0.0198692152917505, "grad_norm": 0.9918829202651978, "learning_rate": 6.621961441743505e-07, "loss": 0.6273, "step": 79 }, { "epoch": 0.02012072434607646, "grad_norm": 0.8537020683288574, "learning_rate": 6.705783738474435e-07, "loss": 0.6015, "step": 80 }, { "epoch": 0.020372233400402416, "grad_norm": 0.8386716246604919, "learning_rate": 6.789606035205365e-07, "loss": 0.6091, "step": 81 }, { "epoch": 0.02062374245472837, "grad_norm": 0.8146318793296814, "learning_rate": 6.873428331936295e-07, "loss": 0.6225, "step": 82 }, { "epoch": 0.020875251509054327, "grad_norm": 0.7194071412086487, "learning_rate": 6.957250628667227e-07, "loss": 0.6023, "step": 83 }, { "epoch": 0.02112676056338028, "grad_norm": 0.672926127910614, "learning_rate": 7.041072925398157e-07, "loss": 0.572, "step": 84 }, { "epoch": 0.02137826961770624, "grad_norm": 0.729033887386322, "learning_rate": 7.124895222129087e-07, "loss": 0.6156, "step": 85 }, { "epoch": 0.021629778672032193, "grad_norm": 0.6650065779685974, "learning_rate": 7.208717518860017e-07, "loss": 0.5969, "step": 86 }, { "epoch": 0.02188128772635815, "grad_norm": 0.7430527806282043, "learning_rate": 7.292539815590948e-07, "loss": 0.583, "step": 87 }, { "epoch": 0.022132796780684104, "grad_norm": 0.7079744338989258, "learning_rate": 7.376362112321877e-07, "loss": 0.5836, "step": 88 }, { "epoch": 0.02238430583501006, "grad_norm": 0.6756497025489807, "learning_rate": 7.460184409052809e-07, "loss": 0.5745, "step": 89 }, { "epoch": 0.022635814889336015, "grad_norm": 0.655807614326477, "learning_rate": 7.544006705783739e-07, "loss": 0.5946, "step": 90 }, { "epoch": 0.022887323943661973, "grad_norm": 0.5830637216567993, "learning_rate": 7.62782900251467e-07, "loss": 0.5515, "step": 91 }, { "epoch": 0.023138832997987926, "grad_norm": 0.6083541512489319, "learning_rate": 7.711651299245599e-07, "loss": 0.6012, "step": 92 }, { "epoch": 0.023390342052313884, "grad_norm": 0.5965361595153809, "learning_rate": 7.79547359597653e-07, "loss": 0.6039, "step": 93 }, { "epoch": 0.023641851106639838, "grad_norm": 0.5302997827529907, "learning_rate": 7.879295892707461e-07, "loss": 0.5848, "step": 94 }, { "epoch": 0.023893360160965795, "grad_norm": 0.6046608090400696, "learning_rate": 7.963118189438392e-07, "loss": 0.5951, "step": 95 }, { "epoch": 0.02414486921529175, "grad_norm": 0.5611419677734375, "learning_rate": 8.046940486169322e-07, "loss": 0.545, "step": 96 }, { "epoch": 0.024396378269617706, "grad_norm": 0.5234445929527283, "learning_rate": 8.130762782900252e-07, "loss": 0.5226, "step": 97 }, { "epoch": 0.02464788732394366, "grad_norm": 0.564545750617981, "learning_rate": 8.214585079631182e-07, "loss": 0.5596, "step": 98 }, { "epoch": 0.024899396378269618, "grad_norm": 0.5597972273826599, "learning_rate": 8.298407376362114e-07, "loss": 0.5828, "step": 99 }, { "epoch": 0.025150905432595575, "grad_norm": 0.5264250040054321, "learning_rate": 8.382229673093044e-07, "loss": 0.5531, "step": 100 }, { "epoch": 0.02540241448692153, "grad_norm": 0.5365298390388489, "learning_rate": 8.466051969823974e-07, "loss": 0.5539, "step": 101 }, { "epoch": 0.025653923541247486, "grad_norm": 0.5420358180999756, "learning_rate": 8.549874266554904e-07, "loss": 0.5675, "step": 102 }, { "epoch": 0.02590543259557344, "grad_norm": 0.48730742931365967, "learning_rate": 8.633696563285835e-07, "loss": 0.5321, "step": 103 }, { "epoch": 0.026156941649899398, "grad_norm": 0.47938722372055054, "learning_rate": 8.717518860016764e-07, "loss": 0.5236, "step": 104 }, { "epoch": 0.02640845070422535, "grad_norm": 0.4556962549686432, "learning_rate": 8.801341156747697e-07, "loss": 0.5348, "step": 105 }, { "epoch": 0.02665995975855131, "grad_norm": 0.5305030941963196, "learning_rate": 8.885163453478626e-07, "loss": 0.5567, "step": 106 }, { "epoch": 0.026911468812877263, "grad_norm": 0.5030955672264099, "learning_rate": 8.968985750209557e-07, "loss": 0.5235, "step": 107 }, { "epoch": 0.02716297786720322, "grad_norm": 0.45537424087524414, "learning_rate": 9.052808046940487e-07, "loss": 0.5289, "step": 108 }, { "epoch": 0.027414486921529174, "grad_norm": 0.4804135859012604, "learning_rate": 9.136630343671417e-07, "loss": 0.5481, "step": 109 }, { "epoch": 0.02766599597585513, "grad_norm": 0.4467982351779938, "learning_rate": 9.220452640402348e-07, "loss": 0.5437, "step": 110 }, { "epoch": 0.027917505030181086, "grad_norm": 0.5055573582649231, "learning_rate": 9.304274937133279e-07, "loss": 0.5313, "step": 111 }, { "epoch": 0.028169014084507043, "grad_norm": 0.5437252521514893, "learning_rate": 9.388097233864209e-07, "loss": 0.538, "step": 112 }, { "epoch": 0.028420523138832997, "grad_norm": 0.44438403844833374, "learning_rate": 9.471919530595138e-07, "loss": 0.5469, "step": 113 }, { "epoch": 0.028672032193158954, "grad_norm": 0.45675307512283325, "learning_rate": 9.55574182732607e-07, "loss": 0.5007, "step": 114 }, { "epoch": 0.028923541247484908, "grad_norm": 0.44954171776771545, "learning_rate": 9.639564124056998e-07, "loss": 0.5704, "step": 115 }, { "epoch": 0.029175050301810865, "grad_norm": 0.4356625974178314, "learning_rate": 9.72338642078793e-07, "loss": 0.5339, "step": 116 }, { "epoch": 0.02942655935613682, "grad_norm": 0.44852906465530396, "learning_rate": 9.807208717518862e-07, "loss": 0.5344, "step": 117 }, { "epoch": 0.029678068410462777, "grad_norm": 0.5255141258239746, "learning_rate": 9.891031014249792e-07, "loss": 0.5457, "step": 118 }, { "epoch": 0.02992957746478873, "grad_norm": 0.4613071084022522, "learning_rate": 9.97485331098072e-07, "loss": 0.566, "step": 119 }, { "epoch": 0.030181086519114688, "grad_norm": 0.4343860149383545, "learning_rate": 1.005867560771165e-06, "loss": 0.5314, "step": 120 }, { "epoch": 0.030432595573440645, "grad_norm": 0.4519299864768982, "learning_rate": 1.0142497904442584e-06, "loss": 0.5543, "step": 121 }, { "epoch": 0.0306841046277666, "grad_norm": 0.45609790086746216, "learning_rate": 1.0226320201173514e-06, "loss": 0.5292, "step": 122 }, { "epoch": 0.030935613682092557, "grad_norm": 0.449398010969162, "learning_rate": 1.0310142497904444e-06, "loss": 0.5476, "step": 123 }, { "epoch": 0.03118712273641851, "grad_norm": 0.45544400811195374, "learning_rate": 1.0393964794635373e-06, "loss": 0.5264, "step": 124 }, { "epoch": 0.031438631790744465, "grad_norm": 0.4554864466190338, "learning_rate": 1.0477787091366303e-06, "loss": 0.5316, "step": 125 }, { "epoch": 0.03169014084507042, "grad_norm": 0.42402562499046326, "learning_rate": 1.0561609388097236e-06, "loss": 0.5422, "step": 126 }, { "epoch": 0.03194164989939638, "grad_norm": 0.47918954491615295, "learning_rate": 1.0645431684828166e-06, "loss": 0.5639, "step": 127 }, { "epoch": 0.03219315895372234, "grad_norm": 0.44865623116493225, "learning_rate": 1.0729253981559095e-06, "loss": 0.5292, "step": 128 }, { "epoch": 0.03244466800804829, "grad_norm": 0.4049345850944519, "learning_rate": 1.0813076278290025e-06, "loss": 0.5318, "step": 129 }, { "epoch": 0.032696177062374245, "grad_norm": 0.4720096290111542, "learning_rate": 1.0896898575020956e-06, "loss": 0.5059, "step": 130 }, { "epoch": 0.0329476861167002, "grad_norm": 0.4590936005115509, "learning_rate": 1.0980720871751886e-06, "loss": 0.4943, "step": 131 }, { "epoch": 0.03319919517102616, "grad_norm": 0.4278355538845062, "learning_rate": 1.1064543168482817e-06, "loss": 0.5273, "step": 132 }, { "epoch": 0.03345070422535211, "grad_norm": 0.4701170325279236, "learning_rate": 1.1148365465213747e-06, "loss": 0.5292, "step": 133 }, { "epoch": 0.03370221327967807, "grad_norm": 0.46045827865600586, "learning_rate": 1.1232187761944678e-06, "loss": 0.5208, "step": 134 }, { "epoch": 0.033953722334004025, "grad_norm": 0.4302027225494385, "learning_rate": 1.1316010058675608e-06, "loss": 0.5287, "step": 135 }, { "epoch": 0.03420523138832998, "grad_norm": 0.510970413684845, "learning_rate": 1.1399832355406539e-06, "loss": 0.5514, "step": 136 }, { "epoch": 0.03445674044265593, "grad_norm": 0.4021950364112854, "learning_rate": 1.148365465213747e-06, "loss": 0.5356, "step": 137 }, { "epoch": 0.03470824949698189, "grad_norm": 0.4260016083717346, "learning_rate": 1.15674769488684e-06, "loss": 0.5668, "step": 138 }, { "epoch": 0.03495975855130785, "grad_norm": 0.4197461009025574, "learning_rate": 1.165129924559933e-06, "loss": 0.5274, "step": 139 }, { "epoch": 0.035211267605633804, "grad_norm": 0.4209143817424774, "learning_rate": 1.173512154233026e-06, "loss": 0.5337, "step": 140 }, { "epoch": 0.03546277665995976, "grad_norm": 0.4232824742794037, "learning_rate": 1.1818943839061191e-06, "loss": 0.5264, "step": 141 }, { "epoch": 0.03571428571428571, "grad_norm": 0.45739907026290894, "learning_rate": 1.1902766135792122e-06, "loss": 0.5232, "step": 142 }, { "epoch": 0.03596579476861167, "grad_norm": 0.4441063702106476, "learning_rate": 1.1986588432523052e-06, "loss": 0.5409, "step": 143 }, { "epoch": 0.03621730382293763, "grad_norm": 0.43283092975616455, "learning_rate": 1.2070410729253983e-06, "loss": 0.5167, "step": 144 }, { "epoch": 0.036468812877263584, "grad_norm": 0.4424374997615814, "learning_rate": 1.2154233025984913e-06, "loss": 0.5366, "step": 145 }, { "epoch": 0.036720321931589535, "grad_norm": 0.5044413805007935, "learning_rate": 1.2238055322715844e-06, "loss": 0.5398, "step": 146 }, { "epoch": 0.03697183098591549, "grad_norm": 0.46940240263938904, "learning_rate": 1.2321877619446772e-06, "loss": 0.499, "step": 147 }, { "epoch": 0.03722334004024145, "grad_norm": 0.44987642765045166, "learning_rate": 1.2405699916177705e-06, "loss": 0.5116, "step": 148 }, { "epoch": 0.03747484909456741, "grad_norm": 0.4250459671020508, "learning_rate": 1.2489522212908635e-06, "loss": 0.5055, "step": 149 }, { "epoch": 0.03772635814889336, "grad_norm": 0.4617857038974762, "learning_rate": 1.2573344509639566e-06, "loss": 0.5347, "step": 150 }, { "epoch": 0.037977867203219315, "grad_norm": 0.48853859305381775, "learning_rate": 1.2657166806370496e-06, "loss": 0.5067, "step": 151 }, { "epoch": 0.03822937625754527, "grad_norm": 0.4387054145336151, "learning_rate": 1.2740989103101425e-06, "loss": 0.5091, "step": 152 }, { "epoch": 0.03848088531187123, "grad_norm": 0.41969773173332214, "learning_rate": 1.2824811399832357e-06, "loss": 0.495, "step": 153 }, { "epoch": 0.03873239436619718, "grad_norm": 0.43173882365226746, "learning_rate": 1.2908633696563286e-06, "loss": 0.4991, "step": 154 }, { "epoch": 0.03898390342052314, "grad_norm": 0.4111224412918091, "learning_rate": 1.2992455993294218e-06, "loss": 0.5335, "step": 155 }, { "epoch": 0.039235412474849095, "grad_norm": 0.40816769003868103, "learning_rate": 1.3076278290025149e-06, "loss": 0.5024, "step": 156 }, { "epoch": 0.03948692152917505, "grad_norm": 0.41955387592315674, "learning_rate": 1.3160100586756077e-06, "loss": 0.5378, "step": 157 }, { "epoch": 0.039738430583501, "grad_norm": 0.4666993021965027, "learning_rate": 1.324392288348701e-06, "loss": 0.5112, "step": 158 }, { "epoch": 0.03998993963782696, "grad_norm": 0.41977325081825256, "learning_rate": 1.3327745180217938e-06, "loss": 0.5198, "step": 159 }, { "epoch": 0.04024144869215292, "grad_norm": 0.4063010811805725, "learning_rate": 1.341156747694887e-06, "loss": 0.4893, "step": 160 }, { "epoch": 0.040492957746478875, "grad_norm": 0.42728182673454285, "learning_rate": 1.3495389773679801e-06, "loss": 0.5049, "step": 161 }, { "epoch": 0.04074446680080483, "grad_norm": 0.4450185298919678, "learning_rate": 1.357921207041073e-06, "loss": 0.5015, "step": 162 }, { "epoch": 0.04099597585513078, "grad_norm": 0.3846238851547241, "learning_rate": 1.3663034367141662e-06, "loss": 0.515, "step": 163 }, { "epoch": 0.04124748490945674, "grad_norm": 0.4138552248477936, "learning_rate": 1.374685666387259e-06, "loss": 0.5249, "step": 164 }, { "epoch": 0.0414989939637827, "grad_norm": 0.4613569378852844, "learning_rate": 1.383067896060352e-06, "loss": 0.5409, "step": 165 }, { "epoch": 0.041750503018108655, "grad_norm": 0.40616121888160706, "learning_rate": 1.3914501257334454e-06, "loss": 0.503, "step": 166 }, { "epoch": 0.042002012072434605, "grad_norm": 0.4463784098625183, "learning_rate": 1.3998323554065382e-06, "loss": 0.5177, "step": 167 }, { "epoch": 0.04225352112676056, "grad_norm": 0.40528833866119385, "learning_rate": 1.4082145850796315e-06, "loss": 0.4995, "step": 168 }, { "epoch": 0.04250503018108652, "grad_norm": 0.41246384382247925, "learning_rate": 1.4165968147527243e-06, "loss": 0.5055, "step": 169 }, { "epoch": 0.04275653923541248, "grad_norm": 0.4155355989933014, "learning_rate": 1.4249790444258174e-06, "loss": 0.5046, "step": 170 }, { "epoch": 0.04300804828973843, "grad_norm": 0.4172171354293823, "learning_rate": 1.4333612740989102e-06, "loss": 0.4889, "step": 171 }, { "epoch": 0.043259557344064385, "grad_norm": 0.40003055334091187, "learning_rate": 1.4417435037720035e-06, "loss": 0.4965, "step": 172 }, { "epoch": 0.04351106639839034, "grad_norm": 0.48593074083328247, "learning_rate": 1.4501257334450967e-06, "loss": 0.4937, "step": 173 }, { "epoch": 0.0437625754527163, "grad_norm": 0.41993674635887146, "learning_rate": 1.4585079631181895e-06, "loss": 0.4663, "step": 174 }, { "epoch": 0.04401408450704225, "grad_norm": 0.40402930974960327, "learning_rate": 1.4668901927912826e-06, "loss": 0.5113, "step": 175 }, { "epoch": 0.04426559356136821, "grad_norm": 0.4269852340221405, "learning_rate": 1.4752724224643754e-06, "loss": 0.5034, "step": 176 }, { "epoch": 0.044517102615694165, "grad_norm": 0.41009941697120667, "learning_rate": 1.4836546521374687e-06, "loss": 0.51, "step": 177 }, { "epoch": 0.04476861167002012, "grad_norm": 0.40047401189804077, "learning_rate": 1.4920368818105617e-06, "loss": 0.4719, "step": 178 }, { "epoch": 0.04502012072434608, "grad_norm": 0.389114648103714, "learning_rate": 1.5004191114836548e-06, "loss": 0.4971, "step": 179 }, { "epoch": 0.04527162977867203, "grad_norm": 0.41636624932289124, "learning_rate": 1.5088013411567478e-06, "loss": 0.4907, "step": 180 }, { "epoch": 0.04552313883299799, "grad_norm": 0.41309916973114014, "learning_rate": 1.5171835708298407e-06, "loss": 0.509, "step": 181 }, { "epoch": 0.045774647887323945, "grad_norm": 0.4215018153190613, "learning_rate": 1.525565800502934e-06, "loss": 0.4977, "step": 182 }, { "epoch": 0.0460261569416499, "grad_norm": 0.4160129427909851, "learning_rate": 1.533948030176027e-06, "loss": 0.5073, "step": 183 }, { "epoch": 0.04627766599597585, "grad_norm": 0.492434024810791, "learning_rate": 1.5423302598491198e-06, "loss": 0.4979, "step": 184 }, { "epoch": 0.04652917505030181, "grad_norm": 0.3985655605792999, "learning_rate": 1.550712489522213e-06, "loss": 0.5291, "step": 185 }, { "epoch": 0.04678068410462777, "grad_norm": 0.4607956111431122, "learning_rate": 1.559094719195306e-06, "loss": 0.5082, "step": 186 }, { "epoch": 0.047032193158953725, "grad_norm": 0.4652690887451172, "learning_rate": 1.5674769488683992e-06, "loss": 0.5108, "step": 187 }, { "epoch": 0.047283702213279676, "grad_norm": 0.43926411867141724, "learning_rate": 1.5758591785414922e-06, "loss": 0.5274, "step": 188 }, { "epoch": 0.04753521126760563, "grad_norm": 0.4484438896179199, "learning_rate": 1.584241408214585e-06, "loss": 0.5397, "step": 189 }, { "epoch": 0.04778672032193159, "grad_norm": 0.45970824360847473, "learning_rate": 1.5926236378876783e-06, "loss": 0.5249, "step": 190 }, { "epoch": 0.04803822937625755, "grad_norm": 0.44468578696250916, "learning_rate": 1.6010058675607712e-06, "loss": 0.4686, "step": 191 }, { "epoch": 0.0482897384305835, "grad_norm": 0.4506581127643585, "learning_rate": 1.6093880972338644e-06, "loss": 0.4777, "step": 192 }, { "epoch": 0.048541247484909456, "grad_norm": 0.39604270458221436, "learning_rate": 1.6177703269069575e-06, "loss": 0.4839, "step": 193 }, { "epoch": 0.04879275653923541, "grad_norm": 0.41182881593704224, "learning_rate": 1.6261525565800503e-06, "loss": 0.4888, "step": 194 }, { "epoch": 0.04904426559356137, "grad_norm": 0.44034087657928467, "learning_rate": 1.6345347862531436e-06, "loss": 0.547, "step": 195 }, { "epoch": 0.04929577464788732, "grad_norm": 0.453494131565094, "learning_rate": 1.6429170159262364e-06, "loss": 0.4995, "step": 196 }, { "epoch": 0.04954728370221328, "grad_norm": 0.41761818528175354, "learning_rate": 1.6512992455993297e-06, "loss": 0.4797, "step": 197 }, { "epoch": 0.049798792756539235, "grad_norm": 0.4482254385948181, "learning_rate": 1.6596814752724227e-06, "loss": 0.4607, "step": 198 }, { "epoch": 0.05005030181086519, "grad_norm": 0.41836273670196533, "learning_rate": 1.6680637049455156e-06, "loss": 0.4702, "step": 199 }, { "epoch": 0.05030181086519115, "grad_norm": 0.46722131967544556, "learning_rate": 1.6764459346186088e-06, "loss": 0.5276, "step": 200 }, { "epoch": 0.0505533199195171, "grad_norm": 0.43717604875564575, "learning_rate": 1.6848281642917017e-06, "loss": 0.4681, "step": 201 }, { "epoch": 0.05080482897384306, "grad_norm": 0.3926369249820709, "learning_rate": 1.6932103939647947e-06, "loss": 0.5017, "step": 202 }, { "epoch": 0.051056338028169015, "grad_norm": 0.4253596365451813, "learning_rate": 1.7015926236378878e-06, "loss": 0.507, "step": 203 }, { "epoch": 0.05130784708249497, "grad_norm": 0.4381323456764221, "learning_rate": 1.7099748533109808e-06, "loss": 0.4898, "step": 204 }, { "epoch": 0.05155935613682092, "grad_norm": 0.4161489009857178, "learning_rate": 1.718357082984074e-06, "loss": 0.4943, "step": 205 }, { "epoch": 0.05181086519114688, "grad_norm": 0.4436219036579132, "learning_rate": 1.726739312657167e-06, "loss": 0.4665, "step": 206 }, { "epoch": 0.05206237424547284, "grad_norm": 0.43012455105781555, "learning_rate": 1.73512154233026e-06, "loss": 0.515, "step": 207 }, { "epoch": 0.052313883299798795, "grad_norm": 0.4141829311847687, "learning_rate": 1.7435037720033528e-06, "loss": 0.5038, "step": 208 }, { "epoch": 0.052565392354124746, "grad_norm": 0.41762787103652954, "learning_rate": 1.751886001676446e-06, "loss": 0.5124, "step": 209 }, { "epoch": 0.0528169014084507, "grad_norm": 0.39720189571380615, "learning_rate": 1.7602682313495393e-06, "loss": 0.4578, "step": 210 }, { "epoch": 0.05306841046277666, "grad_norm": 0.41066601872444153, "learning_rate": 1.7686504610226322e-06, "loss": 0.4708, "step": 211 }, { "epoch": 0.05331991951710262, "grad_norm": 0.43149372935295105, "learning_rate": 1.7770326906957252e-06, "loss": 0.4894, "step": 212 }, { "epoch": 0.05357142857142857, "grad_norm": 0.42833876609802246, "learning_rate": 1.785414920368818e-06, "loss": 0.4971, "step": 213 }, { "epoch": 0.053822937625754526, "grad_norm": 0.43520480394363403, "learning_rate": 1.7937971500419113e-06, "loss": 0.518, "step": 214 }, { "epoch": 0.05407444668008048, "grad_norm": 0.432537317276001, "learning_rate": 1.8021793797150044e-06, "loss": 0.4551, "step": 215 }, { "epoch": 0.05432595573440644, "grad_norm": 0.4245152771472931, "learning_rate": 1.8105616093880974e-06, "loss": 0.4727, "step": 216 }, { "epoch": 0.05457746478873239, "grad_norm": 0.4602232873439789, "learning_rate": 1.8189438390611905e-06, "loss": 0.4905, "step": 217 }, { "epoch": 0.05482897384305835, "grad_norm": 0.4128243625164032, "learning_rate": 1.8273260687342833e-06, "loss": 0.4866, "step": 218 }, { "epoch": 0.055080482897384306, "grad_norm": 0.44259390234947205, "learning_rate": 1.8357082984073766e-06, "loss": 0.4894, "step": 219 }, { "epoch": 0.05533199195171026, "grad_norm": 0.46606069803237915, "learning_rate": 1.8440905280804696e-06, "loss": 0.5172, "step": 220 }, { "epoch": 0.05558350100603622, "grad_norm": 0.4518153667449951, "learning_rate": 1.8524727577535625e-06, "loss": 0.5033, "step": 221 }, { "epoch": 0.05583501006036217, "grad_norm": 0.4819628894329071, "learning_rate": 1.8608549874266557e-06, "loss": 0.488, "step": 222 }, { "epoch": 0.05608651911468813, "grad_norm": 0.44667211174964905, "learning_rate": 1.8692372170997485e-06, "loss": 0.5062, "step": 223 }, { "epoch": 0.056338028169014086, "grad_norm": 0.4287307560443878, "learning_rate": 1.8776194467728418e-06, "loss": 0.4432, "step": 224 }, { "epoch": 0.05658953722334004, "grad_norm": 0.46748512983322144, "learning_rate": 1.8860016764459349e-06, "loss": 0.4407, "step": 225 }, { "epoch": 0.056841046277665994, "grad_norm": 0.44293415546417236, "learning_rate": 1.8943839061190277e-06, "loss": 0.4634, "step": 226 }, { "epoch": 0.05709255533199195, "grad_norm": 0.41884273290634155, "learning_rate": 1.902766135792121e-06, "loss": 0.4978, "step": 227 }, { "epoch": 0.05734406438631791, "grad_norm": 0.4945652186870575, "learning_rate": 1.911148365465214e-06, "loss": 0.493, "step": 228 }, { "epoch": 0.057595573440643866, "grad_norm": 0.4238063097000122, "learning_rate": 1.919530595138307e-06, "loss": 0.4558, "step": 229 }, { "epoch": 0.057847082494969816, "grad_norm": 0.39406704902648926, "learning_rate": 1.9279128248113997e-06, "loss": 0.479, "step": 230 }, { "epoch": 0.058098591549295774, "grad_norm": 0.4454141855239868, "learning_rate": 1.936295054484493e-06, "loss": 0.4817, "step": 231 }, { "epoch": 0.05835010060362173, "grad_norm": 0.45866918563842773, "learning_rate": 1.944677284157586e-06, "loss": 0.461, "step": 232 }, { "epoch": 0.05860160965794769, "grad_norm": 0.41936853528022766, "learning_rate": 1.953059513830679e-06, "loss": 0.4745, "step": 233 }, { "epoch": 0.05885311871227364, "grad_norm": 0.4199956953525543, "learning_rate": 1.9614417435037723e-06, "loss": 0.486, "step": 234 }, { "epoch": 0.059104627766599596, "grad_norm": 0.4122017025947571, "learning_rate": 1.969823973176865e-06, "loss": 0.5059, "step": 235 }, { "epoch": 0.059356136820925554, "grad_norm": 0.4136897027492523, "learning_rate": 1.9782062028499584e-06, "loss": 0.4689, "step": 236 }, { "epoch": 0.05960764587525151, "grad_norm": 0.44036155939102173, "learning_rate": 1.9865884325230515e-06, "loss": 0.4885, "step": 237 }, { "epoch": 0.05985915492957746, "grad_norm": 0.39697256684303284, "learning_rate": 1.994970662196144e-06, "loss": 0.4923, "step": 238 }, { "epoch": 0.06011066398390342, "grad_norm": 0.3980274200439453, "learning_rate": 2.0033528918692376e-06, "loss": 0.4702, "step": 239 }, { "epoch": 0.060362173038229376, "grad_norm": 0.4443422257900238, "learning_rate": 2.01173512154233e-06, "loss": 0.4845, "step": 240 }, { "epoch": 0.060613682092555334, "grad_norm": 0.39754951000213623, "learning_rate": 2.0201173512154237e-06, "loss": 0.4652, "step": 241 }, { "epoch": 0.06086519114688129, "grad_norm": 0.41801464557647705, "learning_rate": 2.0284995808885167e-06, "loss": 0.4821, "step": 242 }, { "epoch": 0.06111670020120724, "grad_norm": 0.45718705654144287, "learning_rate": 2.0368818105616093e-06, "loss": 0.5005, "step": 243 }, { "epoch": 0.0613682092555332, "grad_norm": 0.4293982684612274, "learning_rate": 2.045264040234703e-06, "loss": 0.5131, "step": 244 }, { "epoch": 0.061619718309859156, "grad_norm": 0.4417547285556793, "learning_rate": 2.0536462699077954e-06, "loss": 0.4852, "step": 245 }, { "epoch": 0.061871227364185113, "grad_norm": 0.43375566601753235, "learning_rate": 2.062028499580889e-06, "loss": 0.474, "step": 246 }, { "epoch": 0.062122736418511064, "grad_norm": 0.4546149969100952, "learning_rate": 2.070410729253982e-06, "loss": 0.4667, "step": 247 }, { "epoch": 0.06237424547283702, "grad_norm": 0.4586324095726013, "learning_rate": 2.0787929589270746e-06, "loss": 0.514, "step": 248 }, { "epoch": 0.06262575452716297, "grad_norm": 0.40271812677383423, "learning_rate": 2.087175188600168e-06, "loss": 0.4615, "step": 249 }, { "epoch": 0.06287726358148893, "grad_norm": 0.44942378997802734, "learning_rate": 2.0955574182732607e-06, "loss": 0.4823, "step": 250 }, { "epoch": 0.06312877263581489, "grad_norm": 0.3924196660518646, "learning_rate": 2.1039396479463537e-06, "loss": 0.4828, "step": 251 }, { "epoch": 0.06338028169014084, "grad_norm": 0.4237384796142578, "learning_rate": 2.112321877619447e-06, "loss": 0.4931, "step": 252 }, { "epoch": 0.0636317907444668, "grad_norm": 0.40384024381637573, "learning_rate": 2.12070410729254e-06, "loss": 0.473, "step": 253 }, { "epoch": 0.06388329979879276, "grad_norm": 0.4171174466609955, "learning_rate": 2.1290863369656333e-06, "loss": 0.4971, "step": 254 }, { "epoch": 0.06413480885311872, "grad_norm": 0.43529245257377625, "learning_rate": 2.137468566638726e-06, "loss": 0.4433, "step": 255 }, { "epoch": 0.06438631790744467, "grad_norm": 0.46728262305259705, "learning_rate": 2.145850796311819e-06, "loss": 0.4561, "step": 256 }, { "epoch": 0.06463782696177062, "grad_norm": 0.5025231242179871, "learning_rate": 2.1542330259849124e-06, "loss": 0.4916, "step": 257 }, { "epoch": 0.06488933601609657, "grad_norm": 0.3811725378036499, "learning_rate": 2.162615255658005e-06, "loss": 0.4842, "step": 258 }, { "epoch": 0.06514084507042253, "grad_norm": 0.4305613040924072, "learning_rate": 2.1709974853310985e-06, "loss": 0.4609, "step": 259 }, { "epoch": 0.06539235412474849, "grad_norm": 0.42544025182724, "learning_rate": 2.179379715004191e-06, "loss": 0.4745, "step": 260 }, { "epoch": 0.06564386317907445, "grad_norm": 0.42461109161376953, "learning_rate": 2.1877619446772842e-06, "loss": 0.4945, "step": 261 }, { "epoch": 0.0658953722334004, "grad_norm": 0.42680662870407104, "learning_rate": 2.1961441743503773e-06, "loss": 0.4918, "step": 262 }, { "epoch": 0.06614688128772636, "grad_norm": 0.42169326543807983, "learning_rate": 2.2045264040234703e-06, "loss": 0.4724, "step": 263 }, { "epoch": 0.06639839034205232, "grad_norm": 0.4097406268119812, "learning_rate": 2.2129086336965634e-06, "loss": 0.4682, "step": 264 }, { "epoch": 0.06664989939637828, "grad_norm": 0.5153217911720276, "learning_rate": 2.2212908633696564e-06, "loss": 0.4742, "step": 265 }, { "epoch": 0.06690140845070422, "grad_norm": 0.4730076491832733, "learning_rate": 2.2296730930427495e-06, "loss": 0.4757, "step": 266 }, { "epoch": 0.06715291750503018, "grad_norm": 0.48586782813072205, "learning_rate": 2.2380553227158425e-06, "loss": 0.4851, "step": 267 }, { "epoch": 0.06740442655935613, "grad_norm": 0.4455511271953583, "learning_rate": 2.2464375523889356e-06, "loss": 0.4448, "step": 268 }, { "epoch": 0.06765593561368209, "grad_norm": 0.5135384798049927, "learning_rate": 2.2548197820620286e-06, "loss": 0.4944, "step": 269 }, { "epoch": 0.06790744466800805, "grad_norm": 0.4233526289463043, "learning_rate": 2.2632020117351217e-06, "loss": 0.4523, "step": 270 }, { "epoch": 0.068158953722334, "grad_norm": 0.4129721522331238, "learning_rate": 2.2715842414082147e-06, "loss": 0.4809, "step": 271 }, { "epoch": 0.06841046277665996, "grad_norm": 0.4507489800453186, "learning_rate": 2.2799664710813078e-06, "loss": 0.4521, "step": 272 }, { "epoch": 0.06866197183098592, "grad_norm": 0.5152677297592163, "learning_rate": 2.288348700754401e-06, "loss": 0.4878, "step": 273 }, { "epoch": 0.06891348088531186, "grad_norm": 0.4466034471988678, "learning_rate": 2.296730930427494e-06, "loss": 0.4493, "step": 274 }, { "epoch": 0.06916498993963782, "grad_norm": 0.4332417845726013, "learning_rate": 2.305113160100587e-06, "loss": 0.4904, "step": 275 }, { "epoch": 0.06941649899396378, "grad_norm": 0.4686703085899353, "learning_rate": 2.31349538977368e-06, "loss": 0.5085, "step": 276 }, { "epoch": 0.06966800804828974, "grad_norm": 0.46179959177970886, "learning_rate": 2.321877619446773e-06, "loss": 0.4841, "step": 277 }, { "epoch": 0.0699195171026157, "grad_norm": 0.43604379892349243, "learning_rate": 2.330259849119866e-06, "loss": 0.4666, "step": 278 }, { "epoch": 0.07017102615694165, "grad_norm": 0.4303692877292633, "learning_rate": 2.338642078792959e-06, "loss": 0.4745, "step": 279 }, { "epoch": 0.07042253521126761, "grad_norm": 0.46457597613334656, "learning_rate": 2.347024308466052e-06, "loss": 0.5055, "step": 280 }, { "epoch": 0.07067404426559357, "grad_norm": 0.3907102942466736, "learning_rate": 2.355406538139145e-06, "loss": 0.4719, "step": 281 }, { "epoch": 0.07092555331991952, "grad_norm": 0.4557718336582184, "learning_rate": 2.3637887678122383e-06, "loss": 0.4796, "step": 282 }, { "epoch": 0.07117706237424547, "grad_norm": 0.4310013949871063, "learning_rate": 2.3721709974853313e-06, "loss": 0.4753, "step": 283 }, { "epoch": 0.07142857142857142, "grad_norm": 0.4076017737388611, "learning_rate": 2.3805532271584244e-06, "loss": 0.4697, "step": 284 }, { "epoch": 0.07168008048289738, "grad_norm": 0.4143841564655304, "learning_rate": 2.3889354568315174e-06, "loss": 0.4839, "step": 285 }, { "epoch": 0.07193158953722334, "grad_norm": 0.4165581166744232, "learning_rate": 2.3973176865046105e-06, "loss": 0.4777, "step": 286 }, { "epoch": 0.0721830985915493, "grad_norm": 0.3928240239620209, "learning_rate": 2.4056999161777035e-06, "loss": 0.4581, "step": 287 }, { "epoch": 0.07243460764587525, "grad_norm": 0.45718878507614136, "learning_rate": 2.4140821458507966e-06, "loss": 0.4856, "step": 288 }, { "epoch": 0.07268611670020121, "grad_norm": 0.40149986743927, "learning_rate": 2.4224643755238896e-06, "loss": 0.4889, "step": 289 }, { "epoch": 0.07293762575452717, "grad_norm": 0.40798652172088623, "learning_rate": 2.4308466051969827e-06, "loss": 0.4864, "step": 290 }, { "epoch": 0.07318913480885311, "grad_norm": 0.4065083861351013, "learning_rate": 2.4392288348700757e-06, "loss": 0.4788, "step": 291 }, { "epoch": 0.07344064386317907, "grad_norm": 0.4597596824169159, "learning_rate": 2.4476110645431688e-06, "loss": 0.4578, "step": 292 }, { "epoch": 0.07369215291750503, "grad_norm": 0.4461594521999359, "learning_rate": 2.455993294216262e-06, "loss": 0.4879, "step": 293 }, { "epoch": 0.07394366197183098, "grad_norm": 0.3977876901626587, "learning_rate": 2.4643755238893544e-06, "loss": 0.4687, "step": 294 }, { "epoch": 0.07419517102615694, "grad_norm": 0.4070913791656494, "learning_rate": 2.472757753562448e-06, "loss": 0.4712, "step": 295 }, { "epoch": 0.0744466800804829, "grad_norm": 0.495626300573349, "learning_rate": 2.481139983235541e-06, "loss": 0.4682, "step": 296 }, { "epoch": 0.07469818913480886, "grad_norm": 0.42833268642425537, "learning_rate": 2.489522212908634e-06, "loss": 0.4579, "step": 297 }, { "epoch": 0.07494969818913481, "grad_norm": 0.4342649579048157, "learning_rate": 2.497904442581727e-06, "loss": 0.4299, "step": 298 }, { "epoch": 0.07520120724346077, "grad_norm": 0.48605650663375854, "learning_rate": 2.50628667225482e-06, "loss": 0.4821, "step": 299 }, { "epoch": 0.07545271629778671, "grad_norm": 0.45871275663375854, "learning_rate": 2.514668901927913e-06, "loss": 0.4783, "step": 300 }, { "epoch": 0.07570422535211267, "grad_norm": 0.42935168743133545, "learning_rate": 2.5230511316010058e-06, "loss": 0.4676, "step": 301 }, { "epoch": 0.07595573440643863, "grad_norm": 0.429830938577652, "learning_rate": 2.5314333612740992e-06, "loss": 0.4846, "step": 302 }, { "epoch": 0.07620724346076459, "grad_norm": 0.4596365988254547, "learning_rate": 2.5398155909471923e-06, "loss": 0.47, "step": 303 }, { "epoch": 0.07645875251509054, "grad_norm": 0.49432510137557983, "learning_rate": 2.548197820620285e-06, "loss": 0.4693, "step": 304 }, { "epoch": 0.0767102615694165, "grad_norm": 0.44141435623168945, "learning_rate": 2.5565800502933784e-06, "loss": 0.4631, "step": 305 }, { "epoch": 0.07696177062374246, "grad_norm": 0.43977904319763184, "learning_rate": 2.5649622799664714e-06, "loss": 0.4513, "step": 306 }, { "epoch": 0.07721327967806842, "grad_norm": 0.4588465988636017, "learning_rate": 2.573344509639564e-06, "loss": 0.4577, "step": 307 }, { "epoch": 0.07746478873239436, "grad_norm": 0.454364538192749, "learning_rate": 2.581726739312657e-06, "loss": 0.4682, "step": 308 }, { "epoch": 0.07771629778672032, "grad_norm": 0.4163168668746948, "learning_rate": 2.5901089689857506e-06, "loss": 0.4905, "step": 309 }, { "epoch": 0.07796780684104627, "grad_norm": 0.4823428988456726, "learning_rate": 2.5984911986588436e-06, "loss": 0.4737, "step": 310 }, { "epoch": 0.07821931589537223, "grad_norm": 0.46709927916526794, "learning_rate": 2.6068734283319363e-06, "loss": 0.4498, "step": 311 }, { "epoch": 0.07847082494969819, "grad_norm": 0.45121198892593384, "learning_rate": 2.6152556580050297e-06, "loss": 0.4373, "step": 312 }, { "epoch": 0.07872233400402415, "grad_norm": 0.41081297397613525, "learning_rate": 2.623637887678123e-06, "loss": 0.4669, "step": 313 }, { "epoch": 0.0789738430583501, "grad_norm": 0.4702714681625366, "learning_rate": 2.6320201173512154e-06, "loss": 0.45, "step": 314 }, { "epoch": 0.07922535211267606, "grad_norm": 0.4380156099796295, "learning_rate": 2.6404023470243085e-06, "loss": 0.4857, "step": 315 }, { "epoch": 0.079476861167002, "grad_norm": 0.4465760290622711, "learning_rate": 2.648784576697402e-06, "loss": 0.495, "step": 316 }, { "epoch": 0.07972837022132796, "grad_norm": 0.4858304262161255, "learning_rate": 2.6571668063704946e-06, "loss": 0.4662, "step": 317 }, { "epoch": 0.07997987927565392, "grad_norm": 0.44153428077697754, "learning_rate": 2.6655490360435876e-06, "loss": 0.4861, "step": 318 }, { "epoch": 0.08023138832997988, "grad_norm": 0.44172754883766174, "learning_rate": 2.673931265716681e-06, "loss": 0.4725, "step": 319 }, { "epoch": 0.08048289738430583, "grad_norm": 0.4511873126029968, "learning_rate": 2.682313495389774e-06, "loss": 0.4707, "step": 320 }, { "epoch": 0.08073440643863179, "grad_norm": 0.4447844326496124, "learning_rate": 2.6906957250628668e-06, "loss": 0.4294, "step": 321 }, { "epoch": 0.08098591549295775, "grad_norm": 0.4373539686203003, "learning_rate": 2.6990779547359602e-06, "loss": 0.4257, "step": 322 }, { "epoch": 0.08123742454728371, "grad_norm": 0.4205957055091858, "learning_rate": 2.7074601844090533e-06, "loss": 0.4782, "step": 323 }, { "epoch": 0.08148893360160966, "grad_norm": 0.4675666093826294, "learning_rate": 2.715842414082146e-06, "loss": 0.4852, "step": 324 }, { "epoch": 0.08174044265593561, "grad_norm": 0.4324727952480316, "learning_rate": 2.724224643755239e-06, "loss": 0.4666, "step": 325 }, { "epoch": 0.08199195171026157, "grad_norm": 0.49073663353919983, "learning_rate": 2.7326068734283324e-06, "loss": 0.4565, "step": 326 }, { "epoch": 0.08224346076458752, "grad_norm": 0.38460773229599, "learning_rate": 2.740989103101425e-06, "loss": 0.4608, "step": 327 }, { "epoch": 0.08249496981891348, "grad_norm": 0.4604051113128662, "learning_rate": 2.749371332774518e-06, "loss": 0.5168, "step": 328 }, { "epoch": 0.08274647887323944, "grad_norm": 0.4031774401664734, "learning_rate": 2.7577535624476116e-06, "loss": 0.4719, "step": 329 }, { "epoch": 0.0829979879275654, "grad_norm": 0.40108442306518555, "learning_rate": 2.766135792120704e-06, "loss": 0.4358, "step": 330 }, { "epoch": 0.08324949698189135, "grad_norm": 0.533218502998352, "learning_rate": 2.7745180217937973e-06, "loss": 0.4492, "step": 331 }, { "epoch": 0.08350100603621731, "grad_norm": 0.43500781059265137, "learning_rate": 2.7829002514668907e-06, "loss": 0.4561, "step": 332 }, { "epoch": 0.08375251509054325, "grad_norm": 0.47511211037635803, "learning_rate": 2.7912824811399838e-06, "loss": 0.4575, "step": 333 }, { "epoch": 0.08400402414486921, "grad_norm": 0.41863763332366943, "learning_rate": 2.7996647108130764e-06, "loss": 0.4525, "step": 334 }, { "epoch": 0.08425553319919517, "grad_norm": 0.4359985589981079, "learning_rate": 2.8080469404861695e-06, "loss": 0.4666, "step": 335 }, { "epoch": 0.08450704225352113, "grad_norm": 0.44898366928100586, "learning_rate": 2.816429170159263e-06, "loss": 0.4736, "step": 336 }, { "epoch": 0.08475855130784708, "grad_norm": 0.44528570771217346, "learning_rate": 2.8248113998323556e-06, "loss": 0.4477, "step": 337 }, { "epoch": 0.08501006036217304, "grad_norm": 0.4499231278896332, "learning_rate": 2.8331936295054486e-06, "loss": 0.4482, "step": 338 }, { "epoch": 0.085261569416499, "grad_norm": 0.4471151828765869, "learning_rate": 2.841575859178542e-06, "loss": 0.4732, "step": 339 }, { "epoch": 0.08551307847082495, "grad_norm": 0.42974570393562317, "learning_rate": 2.8499580888516347e-06, "loss": 0.4857, "step": 340 }, { "epoch": 0.08576458752515091, "grad_norm": 0.40349292755126953, "learning_rate": 2.8583403185247278e-06, "loss": 0.4637, "step": 341 }, { "epoch": 0.08601609657947686, "grad_norm": 0.38529425859451294, "learning_rate": 2.8667225481978204e-06, "loss": 0.4581, "step": 342 }, { "epoch": 0.08626760563380281, "grad_norm": 0.4391642212867737, "learning_rate": 2.875104777870914e-06, "loss": 0.4549, "step": 343 }, { "epoch": 0.08651911468812877, "grad_norm": 0.43910202383995056, "learning_rate": 2.883487007544007e-06, "loss": 0.45, "step": 344 }, { "epoch": 0.08677062374245473, "grad_norm": 0.4166280925273895, "learning_rate": 2.8918692372171e-06, "loss": 0.4512, "step": 345 }, { "epoch": 0.08702213279678069, "grad_norm": 0.40169304609298706, "learning_rate": 2.9002514668901934e-06, "loss": 0.4629, "step": 346 }, { "epoch": 0.08727364185110664, "grad_norm": 0.45463281869888306, "learning_rate": 2.908633696563286e-06, "loss": 0.4465, "step": 347 }, { "epoch": 0.0875251509054326, "grad_norm": 0.4328685700893402, "learning_rate": 2.917015926236379e-06, "loss": 0.4352, "step": 348 }, { "epoch": 0.08777665995975856, "grad_norm": 0.4192608892917633, "learning_rate": 2.9253981559094726e-06, "loss": 0.483, "step": 349 }, { "epoch": 0.0880281690140845, "grad_norm": 0.4864928126335144, "learning_rate": 2.933780385582565e-06, "loss": 0.464, "step": 350 }, { "epoch": 0.08827967806841046, "grad_norm": 0.4200999140739441, "learning_rate": 2.9421626152556582e-06, "loss": 0.4781, "step": 351 }, { "epoch": 0.08853118712273642, "grad_norm": 0.430940181016922, "learning_rate": 2.950544844928751e-06, "loss": 0.5017, "step": 352 }, { "epoch": 0.08878269617706237, "grad_norm": 0.4381304085254669, "learning_rate": 2.9589270746018443e-06, "loss": 0.4565, "step": 353 }, { "epoch": 0.08903420523138833, "grad_norm": 0.4690648019313812, "learning_rate": 2.9673093042749374e-06, "loss": 0.4408, "step": 354 }, { "epoch": 0.08928571428571429, "grad_norm": 0.45738598704338074, "learning_rate": 2.97569153394803e-06, "loss": 0.4732, "step": 355 }, { "epoch": 0.08953722334004025, "grad_norm": 0.4283290505409241, "learning_rate": 2.9840737636211235e-06, "loss": 0.4854, "step": 356 }, { "epoch": 0.0897887323943662, "grad_norm": 0.4096241891384125, "learning_rate": 2.9924559932942165e-06, "loss": 0.4874, "step": 357 }, { "epoch": 0.09004024144869216, "grad_norm": 0.47754043340682983, "learning_rate": 3.0008382229673096e-06, "loss": 0.4878, "step": 358 }, { "epoch": 0.0902917505030181, "grad_norm": 0.43061840534210205, "learning_rate": 3.009220452640403e-06, "loss": 0.4499, "step": 359 }, { "epoch": 0.09054325955734406, "grad_norm": 0.4733719825744629, "learning_rate": 3.0176026823134957e-06, "loss": 0.4817, "step": 360 }, { "epoch": 0.09079476861167002, "grad_norm": 0.44821831583976746, "learning_rate": 3.0259849119865887e-06, "loss": 0.4783, "step": 361 }, { "epoch": 0.09104627766599598, "grad_norm": 0.4314734935760498, "learning_rate": 3.0343671416596814e-06, "loss": 0.4892, "step": 362 }, { "epoch": 0.09129778672032193, "grad_norm": 0.4240245223045349, "learning_rate": 3.042749371332775e-06, "loss": 0.448, "step": 363 }, { "epoch": 0.09154929577464789, "grad_norm": 0.41133949160575867, "learning_rate": 3.051131601005868e-06, "loss": 0.4519, "step": 364 }, { "epoch": 0.09180080482897385, "grad_norm": 0.4324391782283783, "learning_rate": 3.0595138306789605e-06, "loss": 0.4609, "step": 365 }, { "epoch": 0.0920523138832998, "grad_norm": 0.3948531150817871, "learning_rate": 3.067896060352054e-06, "loss": 0.4525, "step": 366 }, { "epoch": 0.09230382293762575, "grad_norm": 0.4043181836605072, "learning_rate": 3.076278290025147e-06, "loss": 0.4684, "step": 367 }, { "epoch": 0.0925553319919517, "grad_norm": 0.49464017152786255, "learning_rate": 3.0846605196982397e-06, "loss": 0.4863, "step": 368 }, { "epoch": 0.09280684104627766, "grad_norm": 0.4050739109516144, "learning_rate": 3.0930427493713327e-06, "loss": 0.4399, "step": 369 }, { "epoch": 0.09305835010060362, "grad_norm": 0.46191224455833435, "learning_rate": 3.101424979044426e-06, "loss": 0.4729, "step": 370 }, { "epoch": 0.09330985915492958, "grad_norm": 0.4379456043243408, "learning_rate": 3.1098072087175192e-06, "loss": 0.4765, "step": 371 }, { "epoch": 0.09356136820925554, "grad_norm": 0.443185031414032, "learning_rate": 3.118189438390612e-06, "loss": 0.4497, "step": 372 }, { "epoch": 0.09381287726358149, "grad_norm": 0.4520972967147827, "learning_rate": 3.1265716680637053e-06, "loss": 0.4766, "step": 373 }, { "epoch": 0.09406438631790745, "grad_norm": 0.4627498984336853, "learning_rate": 3.1349538977367984e-06, "loss": 0.4559, "step": 374 }, { "epoch": 0.0943158953722334, "grad_norm": 0.42422908544540405, "learning_rate": 3.143336127409891e-06, "loss": 0.442, "step": 375 }, { "epoch": 0.09456740442655935, "grad_norm": 0.4159485399723053, "learning_rate": 3.1517183570829845e-06, "loss": 0.4841, "step": 376 }, { "epoch": 0.09481891348088531, "grad_norm": 0.3887682855129242, "learning_rate": 3.1601005867560775e-06, "loss": 0.4615, "step": 377 }, { "epoch": 0.09507042253521127, "grad_norm": 0.4549719989299774, "learning_rate": 3.16848281642917e-06, "loss": 0.4507, "step": 378 }, { "epoch": 0.09532193158953722, "grad_norm": 0.4081493020057678, "learning_rate": 3.176865046102263e-06, "loss": 0.4658, "step": 379 }, { "epoch": 0.09557344064386318, "grad_norm": 0.4539159834384918, "learning_rate": 3.1852472757753567e-06, "loss": 0.4618, "step": 380 }, { "epoch": 0.09582494969818914, "grad_norm": 0.4574434459209442, "learning_rate": 3.1936295054484493e-06, "loss": 0.4952, "step": 381 }, { "epoch": 0.0960764587525151, "grad_norm": 0.4301118552684784, "learning_rate": 3.2020117351215424e-06, "loss": 0.4533, "step": 382 }, { "epoch": 0.09632796780684105, "grad_norm": 0.422446072101593, "learning_rate": 3.210393964794636e-06, "loss": 0.4402, "step": 383 }, { "epoch": 0.096579476861167, "grad_norm": 0.43976905941963196, "learning_rate": 3.218776194467729e-06, "loss": 0.4586, "step": 384 }, { "epoch": 0.09683098591549295, "grad_norm": 0.4008273482322693, "learning_rate": 3.2271584241408215e-06, "loss": 0.4547, "step": 385 }, { "epoch": 0.09708249496981891, "grad_norm": 0.3887328803539276, "learning_rate": 3.235540653813915e-06, "loss": 0.4525, "step": 386 }, { "epoch": 0.09733400402414487, "grad_norm": 0.4399072229862213, "learning_rate": 3.243922883487008e-06, "loss": 0.4558, "step": 387 }, { "epoch": 0.09758551307847083, "grad_norm": 0.42582908272743225, "learning_rate": 3.2523051131601007e-06, "loss": 0.4602, "step": 388 }, { "epoch": 0.09783702213279678, "grad_norm": 0.41464588046073914, "learning_rate": 3.2606873428331937e-06, "loss": 0.48, "step": 389 }, { "epoch": 0.09808853118712274, "grad_norm": 0.4072147309780121, "learning_rate": 3.269069572506287e-06, "loss": 0.461, "step": 390 }, { "epoch": 0.0983400402414487, "grad_norm": 0.39760878682136536, "learning_rate": 3.27745180217938e-06, "loss": 0.4555, "step": 391 }, { "epoch": 0.09859154929577464, "grad_norm": 0.42994043231010437, "learning_rate": 3.285834031852473e-06, "loss": 0.4648, "step": 392 }, { "epoch": 0.0988430583501006, "grad_norm": 0.4006013870239258, "learning_rate": 3.2942162615255663e-06, "loss": 0.4379, "step": 393 }, { "epoch": 0.09909456740442656, "grad_norm": 0.44781169295310974, "learning_rate": 3.3025984911986594e-06, "loss": 0.4628, "step": 394 }, { "epoch": 0.09934607645875251, "grad_norm": 0.45750489830970764, "learning_rate": 3.310980720871752e-06, "loss": 0.4313, "step": 395 }, { "epoch": 0.09959758551307847, "grad_norm": 0.4136182963848114, "learning_rate": 3.3193629505448455e-06, "loss": 0.4463, "step": 396 }, { "epoch": 0.09984909456740443, "grad_norm": 0.45176711678504944, "learning_rate": 3.3277451802179385e-06, "loss": 0.4713, "step": 397 }, { "epoch": 0.10010060362173039, "grad_norm": 0.4363192021846771, "learning_rate": 3.336127409891031e-06, "loss": 0.4316, "step": 398 }, { "epoch": 0.10035211267605634, "grad_norm": 0.4392482340335846, "learning_rate": 3.344509639564124e-06, "loss": 0.4388, "step": 399 }, { "epoch": 0.1006036217303823, "grad_norm": 0.43276289105415344, "learning_rate": 3.3528918692372177e-06, "loss": 0.481, "step": 400 }, { "epoch": 0.10085513078470824, "grad_norm": 0.40245917439460754, "learning_rate": 3.3612740989103103e-06, "loss": 0.4321, "step": 401 }, { "epoch": 0.1011066398390342, "grad_norm": 0.4259881377220154, "learning_rate": 3.3696563285834033e-06, "loss": 0.4465, "step": 402 }, { "epoch": 0.10135814889336016, "grad_norm": 0.452953964471817, "learning_rate": 3.378038558256497e-06, "loss": 0.4421, "step": 403 }, { "epoch": 0.10160965794768612, "grad_norm": 0.39972519874572754, "learning_rate": 3.3864207879295894e-06, "loss": 0.431, "step": 404 }, { "epoch": 0.10186116700201207, "grad_norm": 0.48080217838287354, "learning_rate": 3.3948030176026825e-06, "loss": 0.4602, "step": 405 }, { "epoch": 0.10211267605633803, "grad_norm": 0.3960705101490021, "learning_rate": 3.4031852472757755e-06, "loss": 0.4666, "step": 406 }, { "epoch": 0.10236418511066399, "grad_norm": 0.40246954560279846, "learning_rate": 3.411567476948869e-06, "loss": 0.4266, "step": 407 }, { "epoch": 0.10261569416498995, "grad_norm": 0.43389272689819336, "learning_rate": 3.4199497066219616e-06, "loss": 0.4464, "step": 408 }, { "epoch": 0.10286720321931589, "grad_norm": 0.3892342746257782, "learning_rate": 3.4283319362950547e-06, "loss": 0.4403, "step": 409 }, { "epoch": 0.10311871227364185, "grad_norm": 0.45472291111946106, "learning_rate": 3.436714165968148e-06, "loss": 0.479, "step": 410 }, { "epoch": 0.1033702213279678, "grad_norm": 0.4327657222747803, "learning_rate": 3.445096395641241e-06, "loss": 0.4696, "step": 411 }, { "epoch": 0.10362173038229376, "grad_norm": 0.44970017671585083, "learning_rate": 3.453478625314334e-06, "loss": 0.4425, "step": 412 }, { "epoch": 0.10387323943661972, "grad_norm": 0.4211669862270355, "learning_rate": 3.4618608549874273e-06, "loss": 0.4674, "step": 413 }, { "epoch": 0.10412474849094568, "grad_norm": 0.3977251946926117, "learning_rate": 3.47024308466052e-06, "loss": 0.4427, "step": 414 }, { "epoch": 0.10437625754527163, "grad_norm": 0.41054680943489075, "learning_rate": 3.478625314333613e-06, "loss": 0.4539, "step": 415 }, { "epoch": 0.10462776659959759, "grad_norm": 0.42676669359207153, "learning_rate": 3.4870075440067056e-06, "loss": 0.4568, "step": 416 }, { "epoch": 0.10487927565392353, "grad_norm": 0.4475442171096802, "learning_rate": 3.495389773679799e-06, "loss": 0.4512, "step": 417 }, { "epoch": 0.10513078470824949, "grad_norm": 0.406112402677536, "learning_rate": 3.503772003352892e-06, "loss": 0.3903, "step": 418 }, { "epoch": 0.10538229376257545, "grad_norm": 0.4194413125514984, "learning_rate": 3.512154233025985e-06, "loss": 0.4447, "step": 419 }, { "epoch": 0.1056338028169014, "grad_norm": 0.4342004954814911, "learning_rate": 3.5205364626990787e-06, "loss": 0.4612, "step": 420 }, { "epoch": 0.10588531187122736, "grad_norm": 0.408413290977478, "learning_rate": 3.5289186923721713e-06, "loss": 0.4595, "step": 421 }, { "epoch": 0.10613682092555332, "grad_norm": 0.45207443833351135, "learning_rate": 3.5373009220452643e-06, "loss": 0.4918, "step": 422 }, { "epoch": 0.10638832997987928, "grad_norm": 0.4605708718299866, "learning_rate": 3.545683151718358e-06, "loss": 0.4277, "step": 423 }, { "epoch": 0.10663983903420524, "grad_norm": 0.45539170503616333, "learning_rate": 3.5540653813914504e-06, "loss": 0.4672, "step": 424 }, { "epoch": 0.1068913480885312, "grad_norm": 0.4451138973236084, "learning_rate": 3.5624476110645435e-06, "loss": 0.4436, "step": 425 }, { "epoch": 0.10714285714285714, "grad_norm": 0.4522078335285187, "learning_rate": 3.570829840737636e-06, "loss": 0.4035, "step": 426 }, { "epoch": 0.1073943661971831, "grad_norm": 0.48479580879211426, "learning_rate": 3.5792120704107296e-06, "loss": 0.4759, "step": 427 }, { "epoch": 0.10764587525150905, "grad_norm": 0.44372591376304626, "learning_rate": 3.5875943000838226e-06, "loss": 0.4588, "step": 428 }, { "epoch": 0.10789738430583501, "grad_norm": 0.42548373341560364, "learning_rate": 3.5959765297569153e-06, "loss": 0.4506, "step": 429 }, { "epoch": 0.10814889336016097, "grad_norm": 0.49597272276878357, "learning_rate": 3.6043587594300087e-06, "loss": 0.4831, "step": 430 }, { "epoch": 0.10840040241448692, "grad_norm": 0.44530630111694336, "learning_rate": 3.6127409891031018e-06, "loss": 0.4139, "step": 431 }, { "epoch": 0.10865191146881288, "grad_norm": 0.40483203530311584, "learning_rate": 3.621123218776195e-06, "loss": 0.4462, "step": 432 }, { "epoch": 0.10890342052313884, "grad_norm": 0.38956066966056824, "learning_rate": 3.6295054484492875e-06, "loss": 0.4374, "step": 433 }, { "epoch": 0.10915492957746478, "grad_norm": 0.46306437253952026, "learning_rate": 3.637887678122381e-06, "loss": 0.4532, "step": 434 }, { "epoch": 0.10940643863179074, "grad_norm": 0.43688806891441345, "learning_rate": 3.646269907795474e-06, "loss": 0.4223, "step": 435 }, { "epoch": 0.1096579476861167, "grad_norm": 0.43097516894340515, "learning_rate": 3.6546521374685666e-06, "loss": 0.4335, "step": 436 }, { "epoch": 0.10990945674044265, "grad_norm": 0.4253983199596405, "learning_rate": 3.66303436714166e-06, "loss": 0.439, "step": 437 }, { "epoch": 0.11016096579476861, "grad_norm": 0.4934920072555542, "learning_rate": 3.671416596814753e-06, "loss": 0.4308, "step": 438 }, { "epoch": 0.11041247484909457, "grad_norm": 0.42577728629112244, "learning_rate": 3.6797988264878458e-06, "loss": 0.4365, "step": 439 }, { "epoch": 0.11066398390342053, "grad_norm": 0.3953479826450348, "learning_rate": 3.6881810561609392e-06, "loss": 0.4392, "step": 440 }, { "epoch": 0.11091549295774648, "grad_norm": 0.4147731363773346, "learning_rate": 3.6965632858340323e-06, "loss": 0.4588, "step": 441 }, { "epoch": 0.11116700201207244, "grad_norm": 0.529985249042511, "learning_rate": 3.704945515507125e-06, "loss": 0.4484, "step": 442 }, { "epoch": 0.11141851106639838, "grad_norm": 0.4451605975627899, "learning_rate": 3.713327745180218e-06, "loss": 0.4692, "step": 443 }, { "epoch": 0.11167002012072434, "grad_norm": 0.46244457364082336, "learning_rate": 3.7217099748533114e-06, "loss": 0.4871, "step": 444 }, { "epoch": 0.1119215291750503, "grad_norm": 0.39799419045448303, "learning_rate": 3.7300922045264045e-06, "loss": 0.4153, "step": 445 }, { "epoch": 0.11217303822937626, "grad_norm": 0.48522502183914185, "learning_rate": 3.738474434199497e-06, "loss": 0.4595, "step": 446 }, { "epoch": 0.11242454728370221, "grad_norm": 0.4234901964664459, "learning_rate": 3.7468566638725906e-06, "loss": 0.4008, "step": 447 }, { "epoch": 0.11267605633802817, "grad_norm": 0.4186321198940277, "learning_rate": 3.7552388935456836e-06, "loss": 0.4629, "step": 448 }, { "epoch": 0.11292756539235413, "grad_norm": 0.4625905156135559, "learning_rate": 3.7636211232187762e-06, "loss": 0.4222, "step": 449 }, { "epoch": 0.11317907444668009, "grad_norm": 0.44527822732925415, "learning_rate": 3.7720033528918697e-06, "loss": 0.4633, "step": 450 }, { "epoch": 0.11343058350100603, "grad_norm": 0.47749683260917664, "learning_rate": 3.7803855825649628e-06, "loss": 0.4123, "step": 451 }, { "epoch": 0.11368209255533199, "grad_norm": 0.4436284601688385, "learning_rate": 3.7887678122380554e-06, "loss": 0.4425, "step": 452 }, { "epoch": 0.11393360160965794, "grad_norm": 0.41336312890052795, "learning_rate": 3.7971500419111484e-06, "loss": 0.4376, "step": 453 }, { "epoch": 0.1141851106639839, "grad_norm": 0.43879643082618713, "learning_rate": 3.805532271584242e-06, "loss": 0.4315, "step": 454 }, { "epoch": 0.11443661971830986, "grad_norm": 0.44419604539871216, "learning_rate": 3.8139145012573345e-06, "loss": 0.4351, "step": 455 }, { "epoch": 0.11468812877263582, "grad_norm": 0.46073997020721436, "learning_rate": 3.822296730930428e-06, "loss": 0.4417, "step": 456 }, { "epoch": 0.11493963782696177, "grad_norm": 0.538664698600769, "learning_rate": 3.8306789606035215e-06, "loss": 0.4817, "step": 457 }, { "epoch": 0.11519114688128773, "grad_norm": 0.49703216552734375, "learning_rate": 3.839061190276614e-06, "loss": 0.4625, "step": 458 }, { "epoch": 0.11544265593561369, "grad_norm": 0.4681941568851471, "learning_rate": 3.847443419949707e-06, "loss": 0.4454, "step": 459 }, { "epoch": 0.11569416498993963, "grad_norm": 0.512788712978363, "learning_rate": 3.855825649622799e-06, "loss": 0.4434, "step": 460 }, { "epoch": 0.11594567404426559, "grad_norm": 0.5022100210189819, "learning_rate": 3.864207879295893e-06, "loss": 0.4626, "step": 461 }, { "epoch": 0.11619718309859155, "grad_norm": 0.48100098967552185, "learning_rate": 3.872590108968986e-06, "loss": 0.4431, "step": 462 }, { "epoch": 0.1164486921529175, "grad_norm": 0.4972802698612213, "learning_rate": 3.880972338642079e-06, "loss": 0.4199, "step": 463 }, { "epoch": 0.11670020120724346, "grad_norm": 0.492680162191391, "learning_rate": 3.889354568315172e-06, "loss": 0.4546, "step": 464 }, { "epoch": 0.11695171026156942, "grad_norm": 0.4608025848865509, "learning_rate": 3.897736797988265e-06, "loss": 0.4149, "step": 465 }, { "epoch": 0.11720321931589538, "grad_norm": 0.4747462868690491, "learning_rate": 3.906119027661358e-06, "loss": 0.4254, "step": 466 }, { "epoch": 0.11745472837022133, "grad_norm": 0.5062370300292969, "learning_rate": 3.914501257334451e-06, "loss": 0.4295, "step": 467 }, { "epoch": 0.11770623742454728, "grad_norm": 0.4820232093334198, "learning_rate": 3.922883487007545e-06, "loss": 0.4812, "step": 468 }, { "epoch": 0.11795774647887323, "grad_norm": 0.5073891282081604, "learning_rate": 3.931265716680637e-06, "loss": 0.4456, "step": 469 }, { "epoch": 0.11820925553319919, "grad_norm": 0.4702168107032776, "learning_rate": 3.93964794635373e-06, "loss": 0.4479, "step": 470 }, { "epoch": 0.11846076458752515, "grad_norm": 0.4862816035747528, "learning_rate": 3.948030176026823e-06, "loss": 0.4485, "step": 471 }, { "epoch": 0.11871227364185111, "grad_norm": 0.49707740545272827, "learning_rate": 3.956412405699917e-06, "loss": 0.4292, "step": 472 }, { "epoch": 0.11896378269617706, "grad_norm": 0.4554164707660675, "learning_rate": 3.9647946353730094e-06, "loss": 0.4791, "step": 473 }, { "epoch": 0.11921529175050302, "grad_norm": 0.4359682500362396, "learning_rate": 3.973176865046103e-06, "loss": 0.465, "step": 474 }, { "epoch": 0.11946680080482898, "grad_norm": 0.47159573435783386, "learning_rate": 3.9815590947191955e-06, "loss": 0.46, "step": 475 }, { "epoch": 0.11971830985915492, "grad_norm": 0.47286924719810486, "learning_rate": 3.989941324392288e-06, "loss": 0.4639, "step": 476 }, { "epoch": 0.11996981891348088, "grad_norm": 0.46829286217689514, "learning_rate": 3.998323554065382e-06, "loss": 0.4678, "step": 477 }, { "epoch": 0.12022132796780684, "grad_norm": 0.44564419984817505, "learning_rate": 4.006705783738475e-06, "loss": 0.4401, "step": 478 }, { "epoch": 0.1204728370221328, "grad_norm": 0.4393386244773865, "learning_rate": 4.015088013411568e-06, "loss": 0.46, "step": 479 }, { "epoch": 0.12072434607645875, "grad_norm": 0.4447605311870575, "learning_rate": 4.02347024308466e-06, "loss": 0.4711, "step": 480 }, { "epoch": 0.12097585513078471, "grad_norm": 0.4248807430267334, "learning_rate": 4.031852472757754e-06, "loss": 0.4239, "step": 481 }, { "epoch": 0.12122736418511067, "grad_norm": 0.4450969398021698, "learning_rate": 4.040234702430847e-06, "loss": 0.4406, "step": 482 }, { "epoch": 0.12147887323943662, "grad_norm": 0.5100280046463013, "learning_rate": 4.04861693210394e-06, "loss": 0.4406, "step": 483 }, { "epoch": 0.12173038229376258, "grad_norm": 0.47610798478126526, "learning_rate": 4.056999161777033e-06, "loss": 0.4112, "step": 484 }, { "epoch": 0.12198189134808853, "grad_norm": 0.43737804889678955, "learning_rate": 4.065381391450126e-06, "loss": 0.4757, "step": 485 }, { "epoch": 0.12223340040241448, "grad_norm": 0.4593689441680908, "learning_rate": 4.073763621123219e-06, "loss": 0.4445, "step": 486 }, { "epoch": 0.12248490945674044, "grad_norm": 0.46989330649375916, "learning_rate": 4.082145850796312e-06, "loss": 0.4559, "step": 487 }, { "epoch": 0.1227364185110664, "grad_norm": 0.48121753334999084, "learning_rate": 4.090528080469406e-06, "loss": 0.4492, "step": 488 }, { "epoch": 0.12298792756539235, "grad_norm": 0.4634334444999695, "learning_rate": 4.098910310142498e-06, "loss": 0.4346, "step": 489 }, { "epoch": 0.12323943661971831, "grad_norm": 0.4351527988910675, "learning_rate": 4.107292539815591e-06, "loss": 0.4539, "step": 490 }, { "epoch": 0.12349094567404427, "grad_norm": 0.47944945096969604, "learning_rate": 4.115674769488684e-06, "loss": 0.4655, "step": 491 }, { "epoch": 0.12374245472837023, "grad_norm": 0.45442450046539307, "learning_rate": 4.124056999161778e-06, "loss": 0.4277, "step": 492 }, { "epoch": 0.12399396378269617, "grad_norm": 0.4729836583137512, "learning_rate": 4.1324392288348704e-06, "loss": 0.4299, "step": 493 }, { "epoch": 0.12424547283702213, "grad_norm": 0.40985509753227234, "learning_rate": 4.140821458507964e-06, "loss": 0.393, "step": 494 }, { "epoch": 0.12449698189134809, "grad_norm": 0.49194350838661194, "learning_rate": 4.1492036881810565e-06, "loss": 0.4523, "step": 495 }, { "epoch": 0.12474849094567404, "grad_norm": 0.47581902146339417, "learning_rate": 4.157585917854149e-06, "loss": 0.4494, "step": 496 }, { "epoch": 0.125, "grad_norm": 0.4293755888938904, "learning_rate": 4.165968147527243e-06, "loss": 0.4087, "step": 497 }, { "epoch": 0.12525150905432594, "grad_norm": 0.41463249921798706, "learning_rate": 4.174350377200336e-06, "loss": 0.4436, "step": 498 }, { "epoch": 0.12550301810865191, "grad_norm": 0.4227687120437622, "learning_rate": 4.182732606873429e-06, "loss": 0.4367, "step": 499 }, { "epoch": 0.12575452716297786, "grad_norm": 0.4326210916042328, "learning_rate": 4.191114836546521e-06, "loss": 0.4265, "step": 500 }, { "epoch": 0.12600603621730383, "grad_norm": 0.4279535412788391, "learning_rate": 4.199497066219615e-06, "loss": 0.4702, "step": 501 }, { "epoch": 0.12625754527162977, "grad_norm": 0.4595158100128174, "learning_rate": 4.2078792958927074e-06, "loss": 0.4425, "step": 502 }, { "epoch": 0.12650905432595574, "grad_norm": 0.41721752285957336, "learning_rate": 4.216261525565801e-06, "loss": 0.4491, "step": 503 }, { "epoch": 0.1267605633802817, "grad_norm": 0.43518510460853577, "learning_rate": 4.224643755238894e-06, "loss": 0.4387, "step": 504 }, { "epoch": 0.12701207243460766, "grad_norm": 0.4557619094848633, "learning_rate": 4.233025984911987e-06, "loss": 0.4852, "step": 505 }, { "epoch": 0.1272635814889336, "grad_norm": 0.4129253327846527, "learning_rate": 4.24140821458508e-06, "loss": 0.4359, "step": 506 }, { "epoch": 0.12751509054325955, "grad_norm": 0.4084147810935974, "learning_rate": 4.249790444258173e-06, "loss": 0.4316, "step": 507 }, { "epoch": 0.12776659959758552, "grad_norm": 0.4409344792366028, "learning_rate": 4.258172673931267e-06, "loss": 0.4375, "step": 508 }, { "epoch": 0.12801810865191146, "grad_norm": 0.4991019368171692, "learning_rate": 4.266554903604359e-06, "loss": 0.4551, "step": 509 }, { "epoch": 0.12826961770623743, "grad_norm": 0.4926038682460785, "learning_rate": 4.274937133277452e-06, "loss": 0.4643, "step": 510 }, { "epoch": 0.12852112676056338, "grad_norm": 0.46267032623291016, "learning_rate": 4.283319362950545e-06, "loss": 0.4648, "step": 511 }, { "epoch": 0.12877263581488935, "grad_norm": 0.45014211535453796, "learning_rate": 4.291701592623638e-06, "loss": 0.4574, "step": 512 }, { "epoch": 0.1290241448692153, "grad_norm": 0.5043582320213318, "learning_rate": 4.300083822296731e-06, "loss": 0.463, "step": 513 }, { "epoch": 0.12927565392354123, "grad_norm": 0.41695088148117065, "learning_rate": 4.308466051969825e-06, "loss": 0.4247, "step": 514 }, { "epoch": 0.1295271629778672, "grad_norm": 0.4031003713607788, "learning_rate": 4.3168482816429175e-06, "loss": 0.4563, "step": 515 }, { "epoch": 0.12977867203219315, "grad_norm": 0.4944654405117035, "learning_rate": 4.32523051131601e-06, "loss": 0.4323, "step": 516 }, { "epoch": 0.13003018108651912, "grad_norm": 0.5555236339569092, "learning_rate": 4.333612740989104e-06, "loss": 0.4263, "step": 517 }, { "epoch": 0.13028169014084506, "grad_norm": 0.4785892963409424, "learning_rate": 4.341994970662197e-06, "loss": 0.4331, "step": 518 }, { "epoch": 0.13053319919517103, "grad_norm": 0.4259514808654785, "learning_rate": 4.35037720033529e-06, "loss": 0.4495, "step": 519 }, { "epoch": 0.13078470824949698, "grad_norm": 0.4405849575996399, "learning_rate": 4.358759430008382e-06, "loss": 0.4482, "step": 520 }, { "epoch": 0.13103621730382295, "grad_norm": 0.4837403893470764, "learning_rate": 4.367141659681476e-06, "loss": 0.4452, "step": 521 }, { "epoch": 0.1312877263581489, "grad_norm": 0.44248178601264954, "learning_rate": 4.3755238893545684e-06, "loss": 0.4755, "step": 522 }, { "epoch": 0.13153923541247484, "grad_norm": 0.4511367678642273, "learning_rate": 4.383906119027662e-06, "loss": 0.4325, "step": 523 }, { "epoch": 0.1317907444668008, "grad_norm": 0.4366004168987274, "learning_rate": 4.3922883487007545e-06, "loss": 0.4221, "step": 524 }, { "epoch": 0.13204225352112675, "grad_norm": 0.4173247218132019, "learning_rate": 4.400670578373848e-06, "loss": 0.4719, "step": 525 }, { "epoch": 0.13229376257545272, "grad_norm": 0.4634314179420471, "learning_rate": 4.409052808046941e-06, "loss": 0.4307, "step": 526 }, { "epoch": 0.13254527162977867, "grad_norm": 0.464970201253891, "learning_rate": 4.417435037720033e-06, "loss": 0.4247, "step": 527 }, { "epoch": 0.13279678068410464, "grad_norm": 0.49737635254859924, "learning_rate": 4.425817267393127e-06, "loss": 0.4293, "step": 528 }, { "epoch": 0.13304828973843058, "grad_norm": 0.4311039447784424, "learning_rate": 4.43419949706622e-06, "loss": 0.4513, "step": 529 }, { "epoch": 0.13329979879275655, "grad_norm": 0.46590545773506165, "learning_rate": 4.442581726739313e-06, "loss": 0.4163, "step": 530 }, { "epoch": 0.1335513078470825, "grad_norm": 0.4511418640613556, "learning_rate": 4.450963956412406e-06, "loss": 0.4741, "step": 531 }, { "epoch": 0.13380281690140844, "grad_norm": 0.4536955952644348, "learning_rate": 4.459346186085499e-06, "loss": 0.4525, "step": 532 }, { "epoch": 0.1340543259557344, "grad_norm": 0.4565694332122803, "learning_rate": 4.467728415758592e-06, "loss": 0.4405, "step": 533 }, { "epoch": 0.13430583501006035, "grad_norm": 0.4305288791656494, "learning_rate": 4.476110645431685e-06, "loss": 0.4585, "step": 534 }, { "epoch": 0.13455734406438632, "grad_norm": 0.4427644610404968, "learning_rate": 4.4844928751047785e-06, "loss": 0.4338, "step": 535 }, { "epoch": 0.13480885311871227, "grad_norm": 0.4909707307815552, "learning_rate": 4.492875104777871e-06, "loss": 0.4331, "step": 536 }, { "epoch": 0.13506036217303824, "grad_norm": 0.5035997629165649, "learning_rate": 4.501257334450964e-06, "loss": 0.4287, "step": 537 }, { "epoch": 0.13531187122736418, "grad_norm": 0.45080435276031494, "learning_rate": 4.509639564124057e-06, "loss": 0.452, "step": 538 }, { "epoch": 0.13556338028169015, "grad_norm": 0.4062177836894989, "learning_rate": 4.518021793797151e-06, "loss": 0.421, "step": 539 }, { "epoch": 0.1358148893360161, "grad_norm": 0.47915151715278625, "learning_rate": 4.526404023470243e-06, "loss": 0.4678, "step": 540 }, { "epoch": 0.13606639839034204, "grad_norm": 0.4649512469768524, "learning_rate": 4.534786253143337e-06, "loss": 0.4532, "step": 541 }, { "epoch": 0.136317907444668, "grad_norm": 0.5145220756530762, "learning_rate": 4.5431684828164294e-06, "loss": 0.4529, "step": 542 }, { "epoch": 0.13656941649899396, "grad_norm": 0.39938247203826904, "learning_rate": 4.551550712489523e-06, "loss": 0.4457, "step": 543 }, { "epoch": 0.13682092555331993, "grad_norm": 0.5199249386787415, "learning_rate": 4.5599329421626155e-06, "loss": 0.442, "step": 544 }, { "epoch": 0.13707243460764587, "grad_norm": 0.43543845415115356, "learning_rate": 4.568315171835709e-06, "loss": 0.4312, "step": 545 }, { "epoch": 0.13732394366197184, "grad_norm": 0.44674405455589294, "learning_rate": 4.576697401508802e-06, "loss": 0.4456, "step": 546 }, { "epoch": 0.13757545271629779, "grad_norm": 0.45393526554107666, "learning_rate": 4.585079631181894e-06, "loss": 0.4537, "step": 547 }, { "epoch": 0.13782696177062373, "grad_norm": 0.43489089608192444, "learning_rate": 4.593461860854988e-06, "loss": 0.398, "step": 548 }, { "epoch": 0.1380784708249497, "grad_norm": 0.41286396980285645, "learning_rate": 4.601844090528081e-06, "loss": 0.4434, "step": 549 }, { "epoch": 0.13832997987927564, "grad_norm": 0.44615626335144043, "learning_rate": 4.610226320201174e-06, "loss": 0.4542, "step": 550 }, { "epoch": 0.13858148893360162, "grad_norm": 0.4532471001148224, "learning_rate": 4.6186085498742664e-06, "loss": 0.462, "step": 551 }, { "epoch": 0.13883299798792756, "grad_norm": 0.42889168858528137, "learning_rate": 4.62699077954736e-06, "loss": 0.4172, "step": 552 }, { "epoch": 0.13908450704225353, "grad_norm": 0.46256425976753235, "learning_rate": 4.6353730092204525e-06, "loss": 0.4216, "step": 553 }, { "epoch": 0.13933601609657947, "grad_norm": 0.4532136023044586, "learning_rate": 4.643755238893546e-06, "loss": 0.4269, "step": 554 }, { "epoch": 0.13958752515090544, "grad_norm": 0.43736207485198975, "learning_rate": 4.6521374685666395e-06, "loss": 0.4318, "step": 555 }, { "epoch": 0.1398390342052314, "grad_norm": 0.40762320160865784, "learning_rate": 4.660519698239732e-06, "loss": 0.4601, "step": 556 }, { "epoch": 0.14009054325955733, "grad_norm": 0.4910624027252197, "learning_rate": 4.668901927912825e-06, "loss": 0.4335, "step": 557 }, { "epoch": 0.1403420523138833, "grad_norm": 0.4497995972633362, "learning_rate": 4.677284157585918e-06, "loss": 0.4311, "step": 558 }, { "epoch": 0.14059356136820925, "grad_norm": 0.4066920876502991, "learning_rate": 4.685666387259012e-06, "loss": 0.4364, "step": 559 }, { "epoch": 0.14084507042253522, "grad_norm": 0.4641585350036621, "learning_rate": 4.694048616932104e-06, "loss": 0.4406, "step": 560 }, { "epoch": 0.14109657947686116, "grad_norm": 0.46958115696907043, "learning_rate": 4.702430846605197e-06, "loss": 0.4608, "step": 561 }, { "epoch": 0.14134808853118713, "grad_norm": 0.45457375049591064, "learning_rate": 4.71081307627829e-06, "loss": 0.4342, "step": 562 }, { "epoch": 0.14159959758551308, "grad_norm": 0.4222368597984314, "learning_rate": 4.719195305951383e-06, "loss": 0.4364, "step": 563 }, { "epoch": 0.14185110663983905, "grad_norm": 0.4284665882587433, "learning_rate": 4.7275775356244765e-06, "loss": 0.4101, "step": 564 }, { "epoch": 0.142102615694165, "grad_norm": 0.44402772188186646, "learning_rate": 4.73595976529757e-06, "loss": 0.3907, "step": 565 }, { "epoch": 0.14235412474849093, "grad_norm": 0.4303772747516632, "learning_rate": 4.744341994970663e-06, "loss": 0.4507, "step": 566 }, { "epoch": 0.1426056338028169, "grad_norm": 0.4649859070777893, "learning_rate": 4.752724224643755e-06, "loss": 0.4351, "step": 567 }, { "epoch": 0.14285714285714285, "grad_norm": 0.4507344365119934, "learning_rate": 4.761106454316849e-06, "loss": 0.4358, "step": 568 }, { "epoch": 0.14310865191146882, "grad_norm": 0.41727519035339355, "learning_rate": 4.769488683989942e-06, "loss": 0.4509, "step": 569 }, { "epoch": 0.14336016096579476, "grad_norm": 0.4347597658634186, "learning_rate": 4.777870913663035e-06, "loss": 0.4301, "step": 570 }, { "epoch": 0.14361167002012074, "grad_norm": 0.46765145659446716, "learning_rate": 4.7862531433361274e-06, "loss": 0.4384, "step": 571 }, { "epoch": 0.14386317907444668, "grad_norm": 0.46579083800315857, "learning_rate": 4.794635373009221e-06, "loss": 0.4452, "step": 572 }, { "epoch": 0.14411468812877262, "grad_norm": 0.4726623296737671, "learning_rate": 4.8030176026823135e-06, "loss": 0.4457, "step": 573 }, { "epoch": 0.1443661971830986, "grad_norm": 0.4563959240913391, "learning_rate": 4.811399832355407e-06, "loss": 0.4354, "step": 574 }, { "epoch": 0.14461770623742454, "grad_norm": 0.4445574879646301, "learning_rate": 4.8197820620285005e-06, "loss": 0.447, "step": 575 }, { "epoch": 0.1448692152917505, "grad_norm": 0.43714749813079834, "learning_rate": 4.828164291701593e-06, "loss": 0.4659, "step": 576 }, { "epoch": 0.14512072434607645, "grad_norm": 0.4672676920890808, "learning_rate": 4.836546521374686e-06, "loss": 0.3998, "step": 577 }, { "epoch": 0.14537223340040242, "grad_norm": 0.4701994061470032, "learning_rate": 4.844928751047779e-06, "loss": 0.4256, "step": 578 }, { "epoch": 0.14562374245472837, "grad_norm": 0.44352054595947266, "learning_rate": 4.853310980720873e-06, "loss": 0.4475, "step": 579 }, { "epoch": 0.14587525150905434, "grad_norm": 0.44086214900016785, "learning_rate": 4.861693210393965e-06, "loss": 0.4671, "step": 580 }, { "epoch": 0.14612676056338028, "grad_norm": 0.4606599509716034, "learning_rate": 4.870075440067058e-06, "loss": 0.4333, "step": 581 }, { "epoch": 0.14637826961770622, "grad_norm": 0.42309969663619995, "learning_rate": 4.878457669740151e-06, "loss": 0.4494, "step": 582 }, { "epoch": 0.1466297786720322, "grad_norm": 0.48204946517944336, "learning_rate": 4.886839899413244e-06, "loss": 0.4518, "step": 583 }, { "epoch": 0.14688128772635814, "grad_norm": 0.4570547044277191, "learning_rate": 4.8952221290863375e-06, "loss": 0.46, "step": 584 }, { "epoch": 0.1471327967806841, "grad_norm": 0.4400968551635742, "learning_rate": 4.903604358759431e-06, "loss": 0.415, "step": 585 }, { "epoch": 0.14738430583501005, "grad_norm": 0.48668766021728516, "learning_rate": 4.911986588432524e-06, "loss": 0.4209, "step": 586 }, { "epoch": 0.14763581488933603, "grad_norm": 0.4713693857192993, "learning_rate": 4.920368818105616e-06, "loss": 0.4527, "step": 587 }, { "epoch": 0.14788732394366197, "grad_norm": 0.4478401839733124, "learning_rate": 4.928751047778709e-06, "loss": 0.4484, "step": 588 }, { "epoch": 0.14813883299798794, "grad_norm": 0.46621429920196533, "learning_rate": 4.937133277451802e-06, "loss": 0.4337, "step": 589 }, { "epoch": 0.14839034205231388, "grad_norm": 0.4544297456741333, "learning_rate": 4.945515507124896e-06, "loss": 0.4596, "step": 590 }, { "epoch": 0.14864185110663983, "grad_norm": 0.4180624186992645, "learning_rate": 4.9538977367979884e-06, "loss": 0.4492, "step": 591 }, { "epoch": 0.1488933601609658, "grad_norm": 0.4281826615333557, "learning_rate": 4.962279966471082e-06, "loss": 0.4149, "step": 592 }, { "epoch": 0.14914486921529174, "grad_norm": 0.4696996808052063, "learning_rate": 4.9706621961441745e-06, "loss": 0.4426, "step": 593 }, { "epoch": 0.1493963782696177, "grad_norm": 0.42568251490592957, "learning_rate": 4.979044425817268e-06, "loss": 0.4312, "step": 594 }, { "epoch": 0.14964788732394366, "grad_norm": 0.45928239822387695, "learning_rate": 4.9874266554903615e-06, "loss": 0.4318, "step": 595 }, { "epoch": 0.14989939637826963, "grad_norm": 0.45567813515663147, "learning_rate": 4.995808885163454e-06, "loss": 0.457, "step": 596 }, { "epoch": 0.15015090543259557, "grad_norm": 0.42233970761299133, "learning_rate": 5.0041911148365476e-06, "loss": 0.4411, "step": 597 }, { "epoch": 0.15040241448692154, "grad_norm": 0.5426906943321228, "learning_rate": 5.01257334450964e-06, "loss": 0.4288, "step": 598 }, { "epoch": 0.1506539235412475, "grad_norm": 0.43617966771125793, "learning_rate": 5.020955574182733e-06, "loss": 0.4557, "step": 599 }, { "epoch": 0.15090543259557343, "grad_norm": 0.42684197425842285, "learning_rate": 5.029337803855826e-06, "loss": 0.4727, "step": 600 }, { "epoch": 0.1511569416498994, "grad_norm": 0.4799075126647949, "learning_rate": 5.037720033528919e-06, "loss": 0.4247, "step": 601 }, { "epoch": 0.15140845070422534, "grad_norm": 0.47558286786079407, "learning_rate": 5.0461022632020115e-06, "loss": 0.4396, "step": 602 }, { "epoch": 0.15165995975855132, "grad_norm": 0.6897980570793152, "learning_rate": 5.054484492875105e-06, "loss": 0.4226, "step": 603 }, { "epoch": 0.15191146881287726, "grad_norm": 0.49807223677635193, "learning_rate": 5.0628667225481985e-06, "loss": 0.4555, "step": 604 }, { "epoch": 0.15216297786720323, "grad_norm": 0.4609929025173187, "learning_rate": 5.071248952221292e-06, "loss": 0.44, "step": 605 }, { "epoch": 0.15241448692152917, "grad_norm": 0.42947277426719666, "learning_rate": 5.079631181894385e-06, "loss": 0.4162, "step": 606 }, { "epoch": 0.15266599597585512, "grad_norm": 0.4348846971988678, "learning_rate": 5.088013411567477e-06, "loss": 0.4073, "step": 607 }, { "epoch": 0.1529175050301811, "grad_norm": 0.5616949200630188, "learning_rate": 5.09639564124057e-06, "loss": 0.4516, "step": 608 }, { "epoch": 0.15316901408450703, "grad_norm": 0.4498026371002197, "learning_rate": 5.104777870913663e-06, "loss": 0.3877, "step": 609 }, { "epoch": 0.153420523138833, "grad_norm": 0.4951980412006378, "learning_rate": 5.113160100586757e-06, "loss": 0.4378, "step": 610 }, { "epoch": 0.15367203219315895, "grad_norm": 0.5059778094291687, "learning_rate": 5.12154233025985e-06, "loss": 0.4475, "step": 611 }, { "epoch": 0.15392354124748492, "grad_norm": 0.4149338901042938, "learning_rate": 5.129924559932943e-06, "loss": 0.451, "step": 612 }, { "epoch": 0.15417505030181086, "grad_norm": 0.4755608141422272, "learning_rate": 5.1383067896060355e-06, "loss": 0.4458, "step": 613 }, { "epoch": 0.15442655935613683, "grad_norm": 0.4765431582927704, "learning_rate": 5.146689019279128e-06, "loss": 0.4514, "step": 614 }, { "epoch": 0.15467806841046278, "grad_norm": 0.445700079202652, "learning_rate": 5.155071248952222e-06, "loss": 0.4362, "step": 615 }, { "epoch": 0.15492957746478872, "grad_norm": 0.46473878622055054, "learning_rate": 5.163453478625314e-06, "loss": 0.4453, "step": 616 }, { "epoch": 0.1551810865191147, "grad_norm": 0.471645712852478, "learning_rate": 5.1718357082984086e-06, "loss": 0.4708, "step": 617 }, { "epoch": 0.15543259557344064, "grad_norm": 0.4333302676677704, "learning_rate": 5.180217937971501e-06, "loss": 0.436, "step": 618 }, { "epoch": 0.1556841046277666, "grad_norm": 0.414048433303833, "learning_rate": 5.188600167644594e-06, "loss": 0.4313, "step": 619 }, { "epoch": 0.15593561368209255, "grad_norm": 0.4397493302822113, "learning_rate": 5.196982397317687e-06, "loss": 0.4555, "step": 620 }, { "epoch": 0.15618712273641852, "grad_norm": 0.43406859040260315, "learning_rate": 5.20536462699078e-06, "loss": 0.4276, "step": 621 }, { "epoch": 0.15643863179074446, "grad_norm": 0.39688730239868164, "learning_rate": 5.2137468566638725e-06, "loss": 0.4435, "step": 622 }, { "epoch": 0.15669014084507044, "grad_norm": 0.4431197941303253, "learning_rate": 5.222129086336965e-06, "loss": 0.4676, "step": 623 }, { "epoch": 0.15694164989939638, "grad_norm": 0.3907865881919861, "learning_rate": 5.2305113160100595e-06, "loss": 0.4142, "step": 624 }, { "epoch": 0.15719315895372232, "grad_norm": 0.48109179735183716, "learning_rate": 5.238893545683152e-06, "loss": 0.4503, "step": 625 }, { "epoch": 0.1574446680080483, "grad_norm": 0.42568618059158325, "learning_rate": 5.247275775356246e-06, "loss": 0.4298, "step": 626 }, { "epoch": 0.15769617706237424, "grad_norm": 0.4349900186061859, "learning_rate": 5.255658005029338e-06, "loss": 0.4306, "step": 627 }, { "epoch": 0.1579476861167002, "grad_norm": 0.45168644189834595, "learning_rate": 5.264040234702431e-06, "loss": 0.4328, "step": 628 }, { "epoch": 0.15819919517102615, "grad_norm": 0.4641101360321045, "learning_rate": 5.272422464375524e-06, "loss": 0.4293, "step": 629 }, { "epoch": 0.15845070422535212, "grad_norm": 0.450825035572052, "learning_rate": 5.280804694048617e-06, "loss": 0.4415, "step": 630 }, { "epoch": 0.15870221327967807, "grad_norm": 0.4262521266937256, "learning_rate": 5.289186923721711e-06, "loss": 0.4678, "step": 631 }, { "epoch": 0.158953722334004, "grad_norm": 0.4274531304836273, "learning_rate": 5.297569153394804e-06, "loss": 0.4443, "step": 632 }, { "epoch": 0.15920523138832998, "grad_norm": 0.46715742349624634, "learning_rate": 5.3059513830678965e-06, "loss": 0.4056, "step": 633 }, { "epoch": 0.15945674044265593, "grad_norm": 0.4867947995662689, "learning_rate": 5.314333612740989e-06, "loss": 0.4539, "step": 634 }, { "epoch": 0.1597082494969819, "grad_norm": 0.44424811005592346, "learning_rate": 5.322715842414083e-06, "loss": 0.4558, "step": 635 }, { "epoch": 0.15995975855130784, "grad_norm": 0.4673738181591034, "learning_rate": 5.331098072087175e-06, "loss": 0.4272, "step": 636 }, { "epoch": 0.1602112676056338, "grad_norm": 0.4130132794380188, "learning_rate": 5.3394803017602695e-06, "loss": 0.4446, "step": 637 }, { "epoch": 0.16046277665995975, "grad_norm": 0.43853166699409485, "learning_rate": 5.347862531433362e-06, "loss": 0.4472, "step": 638 }, { "epoch": 0.16071428571428573, "grad_norm": 0.4459279477596283, "learning_rate": 5.356244761106455e-06, "loss": 0.4636, "step": 639 }, { "epoch": 0.16096579476861167, "grad_norm": 0.4516228437423706, "learning_rate": 5.364626990779548e-06, "loss": 0.4081, "step": 640 }, { "epoch": 0.1612173038229376, "grad_norm": 0.41011399030685425, "learning_rate": 5.373009220452641e-06, "loss": 0.4401, "step": 641 }, { "epoch": 0.16146881287726358, "grad_norm": 0.3807559311389923, "learning_rate": 5.3813914501257335e-06, "loss": 0.4051, "step": 642 }, { "epoch": 0.16172032193158953, "grad_norm": 0.41934090852737427, "learning_rate": 5.389773679798826e-06, "loss": 0.4234, "step": 643 }, { "epoch": 0.1619718309859155, "grad_norm": 0.39974445104599, "learning_rate": 5.3981559094719205e-06, "loss": 0.421, "step": 644 }, { "epoch": 0.16222334004024144, "grad_norm": 0.4237842261791229, "learning_rate": 5.406538139145013e-06, "loss": 0.4301, "step": 645 }, { "epoch": 0.16247484909456741, "grad_norm": 0.4234157204627991, "learning_rate": 5.4149203688181066e-06, "loss": 0.414, "step": 646 }, { "epoch": 0.16272635814889336, "grad_norm": 0.4288012385368347, "learning_rate": 5.423302598491199e-06, "loss": 0.4019, "step": 647 }, { "epoch": 0.16297786720321933, "grad_norm": 0.4307977855205536, "learning_rate": 5.431684828164292e-06, "loss": 0.4164, "step": 648 }, { "epoch": 0.16322937625754527, "grad_norm": 0.4072883725166321, "learning_rate": 5.4400670578373844e-06, "loss": 0.4237, "step": 649 }, { "epoch": 0.16348088531187122, "grad_norm": 0.44802358746528625, "learning_rate": 5.448449287510478e-06, "loss": 0.4257, "step": 650 }, { "epoch": 0.1637323943661972, "grad_norm": 0.44640132784843445, "learning_rate": 5.456831517183571e-06, "loss": 0.4128, "step": 651 }, { "epoch": 0.16398390342052313, "grad_norm": 0.45102569460868835, "learning_rate": 5.465213746856665e-06, "loss": 0.4205, "step": 652 }, { "epoch": 0.1642354124748491, "grad_norm": 0.4356350004673004, "learning_rate": 5.4735959765297575e-06, "loss": 0.4444, "step": 653 }, { "epoch": 0.16448692152917505, "grad_norm": 0.4442964196205139, "learning_rate": 5.48197820620285e-06, "loss": 0.4406, "step": 654 }, { "epoch": 0.16473843058350102, "grad_norm": 0.5060700178146362, "learning_rate": 5.490360435875944e-06, "loss": 0.4425, "step": 655 }, { "epoch": 0.16498993963782696, "grad_norm": 0.45222505927085876, "learning_rate": 5.498742665549036e-06, "loss": 0.4296, "step": 656 }, { "epoch": 0.16524144869215293, "grad_norm": 0.47446000576019287, "learning_rate": 5.507124895222129e-06, "loss": 0.4272, "step": 657 }, { "epoch": 0.16549295774647887, "grad_norm": 0.4703046977519989, "learning_rate": 5.515507124895223e-06, "loss": 0.4174, "step": 658 }, { "epoch": 0.16574446680080482, "grad_norm": 0.48616233468055725, "learning_rate": 5.523889354568316e-06, "loss": 0.4248, "step": 659 }, { "epoch": 0.1659959758551308, "grad_norm": 0.4568127989768982, "learning_rate": 5.532271584241408e-06, "loss": 0.4402, "step": 660 }, { "epoch": 0.16624748490945673, "grad_norm": 0.44677838683128357, "learning_rate": 5.540653813914502e-06, "loss": 0.4516, "step": 661 }, { "epoch": 0.1664989939637827, "grad_norm": 0.42325931787490845, "learning_rate": 5.5490360435875945e-06, "loss": 0.4393, "step": 662 }, { "epoch": 0.16675050301810865, "grad_norm": 0.4119207561016083, "learning_rate": 5.557418273260687e-06, "loss": 0.3994, "step": 663 }, { "epoch": 0.16700201207243462, "grad_norm": 0.45278915762901306, "learning_rate": 5.5658005029337815e-06, "loss": 0.4034, "step": 664 }, { "epoch": 0.16725352112676056, "grad_norm": 0.5348659157752991, "learning_rate": 5.574182732606874e-06, "loss": 0.4471, "step": 665 }, { "epoch": 0.1675050301810865, "grad_norm": 0.44384071230888367, "learning_rate": 5.5825649622799676e-06, "loss": 0.4334, "step": 666 }, { "epoch": 0.16775653923541248, "grad_norm": 0.4300418496131897, "learning_rate": 5.59094719195306e-06, "loss": 0.4094, "step": 667 }, { "epoch": 0.16800804828973842, "grad_norm": 0.4564829170703888, "learning_rate": 5.599329421626153e-06, "loss": 0.4138, "step": 668 }, { "epoch": 0.1682595573440644, "grad_norm": 0.5108182430267334, "learning_rate": 5.6077116512992454e-06, "loss": 0.4135, "step": 669 }, { "epoch": 0.16851106639839034, "grad_norm": 0.4513753652572632, "learning_rate": 5.616093880972339e-06, "loss": 0.456, "step": 670 }, { "epoch": 0.1687625754527163, "grad_norm": 0.4487459659576416, "learning_rate": 5.624476110645432e-06, "loss": 0.4374, "step": 671 }, { "epoch": 0.16901408450704225, "grad_norm": 0.48467063903808594, "learning_rate": 5.632858340318526e-06, "loss": 0.4575, "step": 672 }, { "epoch": 0.16926559356136822, "grad_norm": 0.47948938608169556, "learning_rate": 5.6412405699916185e-06, "loss": 0.4399, "step": 673 }, { "epoch": 0.16951710261569417, "grad_norm": 0.4448109269142151, "learning_rate": 5.649622799664711e-06, "loss": 0.4395, "step": 674 }, { "epoch": 0.1697686116700201, "grad_norm": 0.44593971967697144, "learning_rate": 5.658005029337804e-06, "loss": 0.4382, "step": 675 }, { "epoch": 0.17002012072434608, "grad_norm": 0.4504484236240387, "learning_rate": 5.666387259010897e-06, "loss": 0.4252, "step": 676 }, { "epoch": 0.17027162977867202, "grad_norm": 0.5128602981567383, "learning_rate": 5.67476948868399e-06, "loss": 0.4444, "step": 677 }, { "epoch": 0.170523138832998, "grad_norm": 0.4349728226661682, "learning_rate": 5.683151718357084e-06, "loss": 0.4152, "step": 678 }, { "epoch": 0.17077464788732394, "grad_norm": 0.4326830804347992, "learning_rate": 5.691533948030177e-06, "loss": 0.4416, "step": 679 }, { "epoch": 0.1710261569416499, "grad_norm": 0.42152220010757446, "learning_rate": 5.699916177703269e-06, "loss": 0.4363, "step": 680 }, { "epoch": 0.17127766599597585, "grad_norm": 0.4485848844051361, "learning_rate": 5.708298407376363e-06, "loss": 0.4467, "step": 681 }, { "epoch": 0.17152917505030182, "grad_norm": 0.42400580644607544, "learning_rate": 5.7166806370494555e-06, "loss": 0.4293, "step": 682 }, { "epoch": 0.17178068410462777, "grad_norm": 0.4600382447242737, "learning_rate": 5.725062866722548e-06, "loss": 0.4293, "step": 683 }, { "epoch": 0.1720321931589537, "grad_norm": 0.5220768451690674, "learning_rate": 5.733445096395641e-06, "loss": 0.3961, "step": 684 }, { "epoch": 0.17228370221327968, "grad_norm": 0.5387700796127319, "learning_rate": 5.741827326068735e-06, "loss": 0.468, "step": 685 }, { "epoch": 0.17253521126760563, "grad_norm": 0.4986119270324707, "learning_rate": 5.750209555741828e-06, "loss": 0.4452, "step": 686 }, { "epoch": 0.1727867203219316, "grad_norm": 0.4661785066127777, "learning_rate": 5.758591785414921e-06, "loss": 0.4413, "step": 687 }, { "epoch": 0.17303822937625754, "grad_norm": 0.4665091335773468, "learning_rate": 5.766974015088014e-06, "loss": 0.4327, "step": 688 }, { "epoch": 0.1732897384305835, "grad_norm": 0.46862325072288513, "learning_rate": 5.7753562447611064e-06, "loss": 0.4422, "step": 689 }, { "epoch": 0.17354124748490946, "grad_norm": 0.47405147552490234, "learning_rate": 5.7837384744342e-06, "loss": 0.4377, "step": 690 }, { "epoch": 0.1737927565392354, "grad_norm": 0.49012836813926697, "learning_rate": 5.792120704107293e-06, "loss": 0.45, "step": 691 }, { "epoch": 0.17404426559356137, "grad_norm": 0.49610865116119385, "learning_rate": 5.800502933780387e-06, "loss": 0.405, "step": 692 }, { "epoch": 0.1742957746478873, "grad_norm": 0.5310260653495789, "learning_rate": 5.8088851634534795e-06, "loss": 0.4148, "step": 693 }, { "epoch": 0.17454728370221329, "grad_norm": 0.4385521411895752, "learning_rate": 5.817267393126572e-06, "loss": 0.3932, "step": 694 }, { "epoch": 0.17479879275653923, "grad_norm": 0.44616925716400146, "learning_rate": 5.825649622799665e-06, "loss": 0.4482, "step": 695 }, { "epoch": 0.1750503018108652, "grad_norm": 0.5454944372177124, "learning_rate": 5.834031852472758e-06, "loss": 0.4579, "step": 696 }, { "epoch": 0.17530181086519114, "grad_norm": 0.4732443690299988, "learning_rate": 5.842414082145851e-06, "loss": 0.4198, "step": 697 }, { "epoch": 0.17555331991951711, "grad_norm": 0.5170097947120667, "learning_rate": 5.850796311818945e-06, "loss": 0.4551, "step": 698 }, { "epoch": 0.17580482897384306, "grad_norm": 0.4894830584526062, "learning_rate": 5.859178541492038e-06, "loss": 0.411, "step": 699 }, { "epoch": 0.176056338028169, "grad_norm": 0.47865331172943115, "learning_rate": 5.86756077116513e-06, "loss": 0.4162, "step": 700 }, { "epoch": 0.17630784708249497, "grad_norm": 0.5109310150146484, "learning_rate": 5.875943000838223e-06, "loss": 0.4255, "step": 701 }, { "epoch": 0.17655935613682092, "grad_norm": 0.4590068459510803, "learning_rate": 5.8843252305113165e-06, "loss": 0.4096, "step": 702 }, { "epoch": 0.1768108651911469, "grad_norm": 0.5353825092315674, "learning_rate": 5.892707460184409e-06, "loss": 0.4477, "step": 703 }, { "epoch": 0.17706237424547283, "grad_norm": 0.4406241178512573, "learning_rate": 5.901089689857502e-06, "loss": 0.4037, "step": 704 }, { "epoch": 0.1773138832997988, "grad_norm": 0.4536992609500885, "learning_rate": 5.909471919530596e-06, "loss": 0.4397, "step": 705 }, { "epoch": 0.17756539235412475, "grad_norm": 0.4446880519390106, "learning_rate": 5.917854149203689e-06, "loss": 0.4454, "step": 706 }, { "epoch": 0.17781690140845072, "grad_norm": 0.4549560546875, "learning_rate": 5.926236378876782e-06, "loss": 0.4199, "step": 707 }, { "epoch": 0.17806841046277666, "grad_norm": 0.4807390570640564, "learning_rate": 5.934618608549875e-06, "loss": 0.4336, "step": 708 }, { "epoch": 0.1783199195171026, "grad_norm": 0.4760938882827759, "learning_rate": 5.943000838222967e-06, "loss": 0.4095, "step": 709 }, { "epoch": 0.17857142857142858, "grad_norm": 0.4646598696708679, "learning_rate": 5.95138306789606e-06, "loss": 0.3987, "step": 710 }, { "epoch": 0.17882293762575452, "grad_norm": 0.5018036961555481, "learning_rate": 5.9597652975691535e-06, "loss": 0.4004, "step": 711 }, { "epoch": 0.1790744466800805, "grad_norm": 0.4650225341320038, "learning_rate": 5.968147527242247e-06, "loss": 0.4507, "step": 712 }, { "epoch": 0.17932595573440643, "grad_norm": 0.44860950112342834, "learning_rate": 5.9765297569153405e-06, "loss": 0.44, "step": 713 }, { "epoch": 0.1795774647887324, "grad_norm": 0.46424388885498047, "learning_rate": 5.984911986588433e-06, "loss": 0.4355, "step": 714 }, { "epoch": 0.17982897384305835, "grad_norm": 0.49789562821388245, "learning_rate": 5.993294216261526e-06, "loss": 0.4256, "step": 715 }, { "epoch": 0.18008048289738432, "grad_norm": 0.4016691744327545, "learning_rate": 6.001676445934619e-06, "loss": 0.4454, "step": 716 }, { "epoch": 0.18033199195171026, "grad_norm": 0.47032442688941956, "learning_rate": 6.010058675607712e-06, "loss": 0.4023, "step": 717 }, { "epoch": 0.1805835010060362, "grad_norm": 0.4652996063232422, "learning_rate": 6.018440905280806e-06, "loss": 0.4393, "step": 718 }, { "epoch": 0.18083501006036218, "grad_norm": 0.4808005392551422, "learning_rate": 6.026823134953899e-06, "loss": 0.4102, "step": 719 }, { "epoch": 0.18108651911468812, "grad_norm": 0.42760953307151794, "learning_rate": 6.035205364626991e-06, "loss": 0.4416, "step": 720 }, { "epoch": 0.1813380281690141, "grad_norm": 0.4750773012638092, "learning_rate": 6.043587594300084e-06, "loss": 0.4321, "step": 721 }, { "epoch": 0.18158953722334004, "grad_norm": 0.4393182098865509, "learning_rate": 6.0519698239731775e-06, "loss": 0.4264, "step": 722 }, { "epoch": 0.181841046277666, "grad_norm": 0.47229018807411194, "learning_rate": 6.06035205364627e-06, "loss": 0.4255, "step": 723 }, { "epoch": 0.18209255533199195, "grad_norm": 0.4402623772621155, "learning_rate": 6.068734283319363e-06, "loss": 0.4308, "step": 724 }, { "epoch": 0.1823440643863179, "grad_norm": 0.4287785589694977, "learning_rate": 6.077116512992457e-06, "loss": 0.4413, "step": 725 }, { "epoch": 0.18259557344064387, "grad_norm": 0.49795329570770264, "learning_rate": 6.08549874266555e-06, "loss": 0.4398, "step": 726 }, { "epoch": 0.1828470824949698, "grad_norm": 0.4231499433517456, "learning_rate": 6.093880972338643e-06, "loss": 0.4207, "step": 727 }, { "epoch": 0.18309859154929578, "grad_norm": 0.4155465066432953, "learning_rate": 6.102263202011736e-06, "loss": 0.4195, "step": 728 }, { "epoch": 0.18335010060362172, "grad_norm": 0.4875541925430298, "learning_rate": 6.110645431684828e-06, "loss": 0.4675, "step": 729 }, { "epoch": 0.1836016096579477, "grad_norm": 0.46131083369255066, "learning_rate": 6.119027661357921e-06, "loss": 0.4296, "step": 730 }, { "epoch": 0.18385311871227364, "grad_norm": 0.46306151151657104, "learning_rate": 6.1274098910310145e-06, "loss": 0.4315, "step": 731 }, { "epoch": 0.1841046277665996, "grad_norm": 0.4583171308040619, "learning_rate": 6.135792120704108e-06, "loss": 0.3973, "step": 732 }, { "epoch": 0.18435613682092555, "grad_norm": 0.4396132528781891, "learning_rate": 6.1441743503772014e-06, "loss": 0.4166, "step": 733 }, { "epoch": 0.1846076458752515, "grad_norm": 0.45620885491371155, "learning_rate": 6.152556580050294e-06, "loss": 0.4134, "step": 734 }, { "epoch": 0.18485915492957747, "grad_norm": 0.48990169167518616, "learning_rate": 6.160938809723387e-06, "loss": 0.4615, "step": 735 }, { "epoch": 0.1851106639839034, "grad_norm": 0.4548690617084503, "learning_rate": 6.169321039396479e-06, "loss": 0.4019, "step": 736 }, { "epoch": 0.18536217303822938, "grad_norm": 0.4465884864330292, "learning_rate": 6.177703269069573e-06, "loss": 0.4115, "step": 737 }, { "epoch": 0.18561368209255533, "grad_norm": 0.5192622542381287, "learning_rate": 6.1860854987426654e-06, "loss": 0.4071, "step": 738 }, { "epoch": 0.1858651911468813, "grad_norm": 0.5081340074539185, "learning_rate": 6.19446772841576e-06, "loss": 0.4464, "step": 739 }, { "epoch": 0.18611670020120724, "grad_norm": 0.6187810301780701, "learning_rate": 6.202849958088852e-06, "loss": 0.4386, "step": 740 }, { "epoch": 0.1863682092555332, "grad_norm": 0.45214521884918213, "learning_rate": 6.211232187761945e-06, "loss": 0.4626, "step": 741 }, { "epoch": 0.18661971830985916, "grad_norm": 0.5023926496505737, "learning_rate": 6.2196144174350385e-06, "loss": 0.4768, "step": 742 }, { "epoch": 0.1868712273641851, "grad_norm": 0.49080634117126465, "learning_rate": 6.227996647108131e-06, "loss": 0.4381, "step": 743 }, { "epoch": 0.18712273641851107, "grad_norm": 0.5521237850189209, "learning_rate": 6.236378876781224e-06, "loss": 0.4458, "step": 744 }, { "epoch": 0.18737424547283701, "grad_norm": 0.46638408303260803, "learning_rate": 6.244761106454318e-06, "loss": 0.4433, "step": 745 }, { "epoch": 0.18762575452716299, "grad_norm": 0.5203446745872498, "learning_rate": 6.253143336127411e-06, "loss": 0.4255, "step": 746 }, { "epoch": 0.18787726358148893, "grad_norm": 0.5413115620613098, "learning_rate": 6.261525565800503e-06, "loss": 0.4302, "step": 747 }, { "epoch": 0.1881287726358149, "grad_norm": 0.5347134470939636, "learning_rate": 6.269907795473597e-06, "loss": 0.4414, "step": 748 }, { "epoch": 0.18838028169014084, "grad_norm": 0.43149664998054504, "learning_rate": 6.278290025146689e-06, "loss": 0.4167, "step": 749 }, { "epoch": 0.1886317907444668, "grad_norm": 0.522585391998291, "learning_rate": 6.286672254819782e-06, "loss": 0.4172, "step": 750 }, { "epoch": 0.18888329979879276, "grad_norm": 0.5557539463043213, "learning_rate": 6.2950544844928755e-06, "loss": 0.436, "step": 751 }, { "epoch": 0.1891348088531187, "grad_norm": 0.47711122035980225, "learning_rate": 6.303436714165969e-06, "loss": 0.4602, "step": 752 }, { "epoch": 0.18938631790744467, "grad_norm": 0.5225254893302917, "learning_rate": 6.3118189438390624e-06, "loss": 0.438, "step": 753 }, { "epoch": 0.18963782696177062, "grad_norm": 0.4202134907245636, "learning_rate": 6.320201173512155e-06, "loss": 0.4453, "step": 754 }, { "epoch": 0.1898893360160966, "grad_norm": 0.48253220319747925, "learning_rate": 6.328583403185248e-06, "loss": 0.4296, "step": 755 }, { "epoch": 0.19014084507042253, "grad_norm": 0.525970995426178, "learning_rate": 6.33696563285834e-06, "loss": 0.4248, "step": 756 }, { "epoch": 0.1903923541247485, "grad_norm": 0.49865517020225525, "learning_rate": 6.345347862531434e-06, "loss": 0.4083, "step": 757 }, { "epoch": 0.19064386317907445, "grad_norm": 0.5443746447563171, "learning_rate": 6.353730092204526e-06, "loss": 0.4326, "step": 758 }, { "epoch": 0.1908953722334004, "grad_norm": 0.5639643669128418, "learning_rate": 6.362112321877621e-06, "loss": 0.4212, "step": 759 }, { "epoch": 0.19114688128772636, "grad_norm": 0.4444926381111145, "learning_rate": 6.370494551550713e-06, "loss": 0.407, "step": 760 }, { "epoch": 0.1913983903420523, "grad_norm": 0.5601978898048401, "learning_rate": 6.378876781223806e-06, "loss": 0.4492, "step": 761 }, { "epoch": 0.19164989939637828, "grad_norm": 0.5076690912246704, "learning_rate": 6.387259010896899e-06, "loss": 0.4241, "step": 762 }, { "epoch": 0.19190140845070422, "grad_norm": 0.45043453574180603, "learning_rate": 6.395641240569992e-06, "loss": 0.4345, "step": 763 }, { "epoch": 0.1921529175050302, "grad_norm": 0.413898229598999, "learning_rate": 6.404023470243085e-06, "loss": 0.4396, "step": 764 }, { "epoch": 0.19240442655935613, "grad_norm": 0.43958157300949097, "learning_rate": 6.412405699916177e-06, "loss": 0.4294, "step": 765 }, { "epoch": 0.1926559356136821, "grad_norm": 0.46111080050468445, "learning_rate": 6.420787929589272e-06, "loss": 0.4508, "step": 766 }, { "epoch": 0.19290744466800805, "grad_norm": 0.4380728602409363, "learning_rate": 6.429170159262364e-06, "loss": 0.4308, "step": 767 }, { "epoch": 0.193158953722334, "grad_norm": 0.47174325585365295, "learning_rate": 6.437552388935458e-06, "loss": 0.4047, "step": 768 }, { "epoch": 0.19341046277665996, "grad_norm": 0.5069427490234375, "learning_rate": 6.44593461860855e-06, "loss": 0.4623, "step": 769 }, { "epoch": 0.1936619718309859, "grad_norm": 0.47728949785232544, "learning_rate": 6.454316848281643e-06, "loss": 0.4226, "step": 770 }, { "epoch": 0.19391348088531188, "grad_norm": 0.4369402229785919, "learning_rate": 6.462699077954736e-06, "loss": 0.4355, "step": 771 }, { "epoch": 0.19416498993963782, "grad_norm": 0.49592912197113037, "learning_rate": 6.47108130762783e-06, "loss": 0.4504, "step": 772 }, { "epoch": 0.1944164989939638, "grad_norm": 0.4935101270675659, "learning_rate": 6.479463537300923e-06, "loss": 0.4434, "step": 773 }, { "epoch": 0.19466800804828974, "grad_norm": 0.4729015827178955, "learning_rate": 6.487845766974016e-06, "loss": 0.4066, "step": 774 }, { "epoch": 0.19491951710261568, "grad_norm": 0.5029162168502808, "learning_rate": 6.496227996647109e-06, "loss": 0.4262, "step": 775 }, { "epoch": 0.19517102615694165, "grad_norm": 0.6650319695472717, "learning_rate": 6.504610226320201e-06, "loss": 0.434, "step": 776 }, { "epoch": 0.1954225352112676, "grad_norm": 0.464813768863678, "learning_rate": 6.512992455993295e-06, "loss": 0.4368, "step": 777 }, { "epoch": 0.19567404426559357, "grad_norm": 0.5334668755531311, "learning_rate": 6.521374685666387e-06, "loss": 0.4279, "step": 778 }, { "epoch": 0.1959255533199195, "grad_norm": 0.6014412641525269, "learning_rate": 6.529756915339482e-06, "loss": 0.4236, "step": 779 }, { "epoch": 0.19617706237424548, "grad_norm": 0.48810648918151855, "learning_rate": 6.538139145012574e-06, "loss": 0.4528, "step": 780 }, { "epoch": 0.19642857142857142, "grad_norm": 0.5004664063453674, "learning_rate": 6.546521374685667e-06, "loss": 0.4335, "step": 781 }, { "epoch": 0.1966800804828974, "grad_norm": 0.5810479521751404, "learning_rate": 6.55490360435876e-06, "loss": 0.4348, "step": 782 }, { "epoch": 0.19693158953722334, "grad_norm": 0.5041718482971191, "learning_rate": 6.563285834031853e-06, "loss": 0.4341, "step": 783 }, { "epoch": 0.19718309859154928, "grad_norm": 0.498522013425827, "learning_rate": 6.571668063704946e-06, "loss": 0.4269, "step": 784 }, { "epoch": 0.19743460764587525, "grad_norm": 0.45353996753692627, "learning_rate": 6.580050293378038e-06, "loss": 0.4199, "step": 785 }, { "epoch": 0.1976861167002012, "grad_norm": 0.45865288376808167, "learning_rate": 6.588432523051133e-06, "loss": 0.4335, "step": 786 }, { "epoch": 0.19793762575452717, "grad_norm": 0.4978802800178528, "learning_rate": 6.596814752724225e-06, "loss": 0.424, "step": 787 }, { "epoch": 0.1981891348088531, "grad_norm": 0.45598986744880676, "learning_rate": 6.605196982397319e-06, "loss": 0.4375, "step": 788 }, { "epoch": 0.19844064386317908, "grad_norm": 0.448933869600296, "learning_rate": 6.613579212070411e-06, "loss": 0.4242, "step": 789 }, { "epoch": 0.19869215291750503, "grad_norm": 0.4256827235221863, "learning_rate": 6.621961441743504e-06, "loss": 0.4396, "step": 790 }, { "epoch": 0.198943661971831, "grad_norm": 0.4087207317352295, "learning_rate": 6.630343671416597e-06, "loss": 0.3984, "step": 791 }, { "epoch": 0.19919517102615694, "grad_norm": 0.5045828819274902, "learning_rate": 6.638725901089691e-06, "loss": 0.4333, "step": 792 }, { "epoch": 0.19944668008048289, "grad_norm": 0.4137192368507385, "learning_rate": 6.6471081307627836e-06, "loss": 0.4048, "step": 793 }, { "epoch": 0.19969818913480886, "grad_norm": 0.43266329169273376, "learning_rate": 6.655490360435877e-06, "loss": 0.4228, "step": 794 }, { "epoch": 0.1999496981891348, "grad_norm": 0.43893131613731384, "learning_rate": 6.66387259010897e-06, "loss": 0.4284, "step": 795 }, { "epoch": 0.20020120724346077, "grad_norm": 0.4277397096157074, "learning_rate": 6.672254819782062e-06, "loss": 0.4379, "step": 796 }, { "epoch": 0.20045271629778671, "grad_norm": 0.4732038974761963, "learning_rate": 6.680637049455155e-06, "loss": 0.4393, "step": 797 }, { "epoch": 0.2007042253521127, "grad_norm": 0.40471675992012024, "learning_rate": 6.689019279128248e-06, "loss": 0.4128, "step": 798 }, { "epoch": 0.20095573440643863, "grad_norm": 0.47634950280189514, "learning_rate": 6.697401508801342e-06, "loss": 0.4163, "step": 799 }, { "epoch": 0.2012072434607646, "grad_norm": 0.44929447770118713, "learning_rate": 6.705783738474435e-06, "loss": 0.4569, "step": 800 }, { "epoch": 0.20145875251509054, "grad_norm": 0.40945762395858765, "learning_rate": 6.714165968147528e-06, "loss": 0.4021, "step": 801 }, { "epoch": 0.2017102615694165, "grad_norm": 0.4612343907356262, "learning_rate": 6.722548197820621e-06, "loss": 0.4204, "step": 802 }, { "epoch": 0.20196177062374246, "grad_norm": 0.40259966254234314, "learning_rate": 6.730930427493714e-06, "loss": 0.4317, "step": 803 }, { "epoch": 0.2022132796780684, "grad_norm": 0.4034963548183441, "learning_rate": 6.739312657166807e-06, "loss": 0.4112, "step": 804 }, { "epoch": 0.20246478873239437, "grad_norm": 0.4619353413581848, "learning_rate": 6.747694886839899e-06, "loss": 0.4341, "step": 805 }, { "epoch": 0.20271629778672032, "grad_norm": 0.40814584493637085, "learning_rate": 6.756077116512994e-06, "loss": 0.4438, "step": 806 }, { "epoch": 0.2029678068410463, "grad_norm": 0.47560909390449524, "learning_rate": 6.764459346186086e-06, "loss": 0.4254, "step": 807 }, { "epoch": 0.20321931589537223, "grad_norm": 0.43871134519577026, "learning_rate": 6.772841575859179e-06, "loss": 0.4295, "step": 808 }, { "epoch": 0.20347082494969818, "grad_norm": 0.49143895506858826, "learning_rate": 6.781223805532272e-06, "loss": 0.4294, "step": 809 }, { "epoch": 0.20372233400402415, "grad_norm": 0.4383901059627533, "learning_rate": 6.789606035205365e-06, "loss": 0.4183, "step": 810 }, { "epoch": 0.2039738430583501, "grad_norm": 0.4648225009441376, "learning_rate": 6.797988264878458e-06, "loss": 0.4105, "step": 811 }, { "epoch": 0.20422535211267606, "grad_norm": 0.4313584566116333, "learning_rate": 6.806370494551551e-06, "loss": 0.4132, "step": 812 }, { "epoch": 0.204476861167002, "grad_norm": 0.4867895543575287, "learning_rate": 6.8147527242246446e-06, "loss": 0.4601, "step": 813 }, { "epoch": 0.20472837022132798, "grad_norm": 0.4108380675315857, "learning_rate": 6.823134953897738e-06, "loss": 0.4437, "step": 814 }, { "epoch": 0.20497987927565392, "grad_norm": 0.46014007925987244, "learning_rate": 6.831517183570831e-06, "loss": 0.4335, "step": 815 }, { "epoch": 0.2052313883299799, "grad_norm": 0.42462071776390076, "learning_rate": 6.839899413243923e-06, "loss": 0.4316, "step": 816 }, { "epoch": 0.20548289738430583, "grad_norm": 0.4779759645462036, "learning_rate": 6.848281642917016e-06, "loss": 0.3952, "step": 817 }, { "epoch": 0.20573440643863178, "grad_norm": 0.4665524363517761, "learning_rate": 6.856663872590109e-06, "loss": 0.4114, "step": 818 }, { "epoch": 0.20598591549295775, "grad_norm": 0.5013046264648438, "learning_rate": 6.865046102263203e-06, "loss": 0.4086, "step": 819 }, { "epoch": 0.2062374245472837, "grad_norm": 0.4999219477176666, "learning_rate": 6.873428331936296e-06, "loss": 0.433, "step": 820 }, { "epoch": 0.20648893360160966, "grad_norm": 0.46619290113449097, "learning_rate": 6.881810561609389e-06, "loss": 0.4102, "step": 821 }, { "epoch": 0.2067404426559356, "grad_norm": 0.4653915464878082, "learning_rate": 6.890192791282482e-06, "loss": 0.42, "step": 822 }, { "epoch": 0.20699195171026158, "grad_norm": 0.4532758295536041, "learning_rate": 6.898575020955574e-06, "loss": 0.4216, "step": 823 }, { "epoch": 0.20724346076458752, "grad_norm": 0.45343294739723206, "learning_rate": 6.906957250628668e-06, "loss": 0.4259, "step": 824 }, { "epoch": 0.2074949698189135, "grad_norm": 0.4827394485473633, "learning_rate": 6.91533948030176e-06, "loss": 0.4252, "step": 825 }, { "epoch": 0.20774647887323944, "grad_norm": 0.48318737745285034, "learning_rate": 6.923721709974855e-06, "loss": 0.4241, "step": 826 }, { "epoch": 0.20799798792756538, "grad_norm": 0.4679957330226898, "learning_rate": 6.932103939647947e-06, "loss": 0.4154, "step": 827 }, { "epoch": 0.20824949698189135, "grad_norm": 0.43381795287132263, "learning_rate": 6.94048616932104e-06, "loss": 0.4172, "step": 828 }, { "epoch": 0.2085010060362173, "grad_norm": 0.45988166332244873, "learning_rate": 6.948868398994133e-06, "loss": 0.4147, "step": 829 }, { "epoch": 0.20875251509054327, "grad_norm": 0.400395005941391, "learning_rate": 6.957250628667226e-06, "loss": 0.4308, "step": 830 }, { "epoch": 0.2090040241448692, "grad_norm": 0.4453587532043457, "learning_rate": 6.965632858340319e-06, "loss": 0.4018, "step": 831 }, { "epoch": 0.20925553319919518, "grad_norm": 0.4165091812610626, "learning_rate": 6.974015088013411e-06, "loss": 0.4022, "step": 832 }, { "epoch": 0.20950704225352113, "grad_norm": 0.49082446098327637, "learning_rate": 6.9823973176865055e-06, "loss": 0.4378, "step": 833 }, { "epoch": 0.20975855130784707, "grad_norm": 0.4576517641544342, "learning_rate": 6.990779547359598e-06, "loss": 0.4107, "step": 834 }, { "epoch": 0.21001006036217304, "grad_norm": 0.43755435943603516, "learning_rate": 6.999161777032692e-06, "loss": 0.4226, "step": 835 }, { "epoch": 0.21026156941649898, "grad_norm": 0.45865172147750854, "learning_rate": 7.007544006705784e-06, "loss": 0.4089, "step": 836 }, { "epoch": 0.21051307847082495, "grad_norm": 0.5721135139465332, "learning_rate": 7.015926236378877e-06, "loss": 0.4173, "step": 837 }, { "epoch": 0.2107645875251509, "grad_norm": 0.43587735295295715, "learning_rate": 7.02430846605197e-06, "loss": 0.4121, "step": 838 }, { "epoch": 0.21101609657947687, "grad_norm": 0.5028086304664612, "learning_rate": 7.032690695725063e-06, "loss": 0.4339, "step": 839 }, { "epoch": 0.2112676056338028, "grad_norm": 0.5480897426605225, "learning_rate": 7.041072925398157e-06, "loss": 0.422, "step": 840 }, { "epoch": 0.21151911468812878, "grad_norm": 0.4621444642543793, "learning_rate": 7.04945515507125e-06, "loss": 0.396, "step": 841 }, { "epoch": 0.21177062374245473, "grad_norm": 0.4964749813079834, "learning_rate": 7.0578373847443426e-06, "loss": 0.4597, "step": 842 }, { "epoch": 0.21202213279678067, "grad_norm": 0.48463791608810425, "learning_rate": 7.066219614417435e-06, "loss": 0.421, "step": 843 }, { "epoch": 0.21227364185110664, "grad_norm": 0.4578920900821686, "learning_rate": 7.074601844090529e-06, "loss": 0.4552, "step": 844 }, { "epoch": 0.21252515090543259, "grad_norm": 0.5604377388954163, "learning_rate": 7.082984073763621e-06, "loss": 0.4275, "step": 845 }, { "epoch": 0.21277665995975856, "grad_norm": 0.424990177154541, "learning_rate": 7.091366303436716e-06, "loss": 0.4237, "step": 846 }, { "epoch": 0.2130281690140845, "grad_norm": 0.5051470398902893, "learning_rate": 7.099748533109808e-06, "loss": 0.4197, "step": 847 }, { "epoch": 0.21327967806841047, "grad_norm": 0.5595173239707947, "learning_rate": 7.108130762782901e-06, "loss": 0.4045, "step": 848 }, { "epoch": 0.21353118712273642, "grad_norm": 0.43428587913513184, "learning_rate": 7.116512992455994e-06, "loss": 0.416, "step": 849 }, { "epoch": 0.2137826961770624, "grad_norm": 0.5479362607002258, "learning_rate": 7.124895222129087e-06, "loss": 0.4212, "step": 850 }, { "epoch": 0.21403420523138833, "grad_norm": 0.4589390754699707, "learning_rate": 7.13327745180218e-06, "loss": 0.4336, "step": 851 }, { "epoch": 0.21428571428571427, "grad_norm": 0.4126336872577667, "learning_rate": 7.141659681475272e-06, "loss": 0.4356, "step": 852 }, { "epoch": 0.21453722334004025, "grad_norm": 0.5102934241294861, "learning_rate": 7.1500419111483665e-06, "loss": 0.4416, "step": 853 }, { "epoch": 0.2147887323943662, "grad_norm": 0.4380166232585907, "learning_rate": 7.158424140821459e-06, "loss": 0.4087, "step": 854 }, { "epoch": 0.21504024144869216, "grad_norm": 0.4341869652271271, "learning_rate": 7.166806370494553e-06, "loss": 0.4436, "step": 855 }, { "epoch": 0.2152917505030181, "grad_norm": 0.42801982164382935, "learning_rate": 7.175188600167645e-06, "loss": 0.4048, "step": 856 }, { "epoch": 0.21554325955734407, "grad_norm": 0.46920889616012573, "learning_rate": 7.183570829840738e-06, "loss": 0.4304, "step": 857 }, { "epoch": 0.21579476861167002, "grad_norm": 0.4114355444908142, "learning_rate": 7.1919530595138305e-06, "loss": 0.4258, "step": 858 }, { "epoch": 0.216046277665996, "grad_norm": 0.47151094675064087, "learning_rate": 7.200335289186924e-06, "loss": 0.3901, "step": 859 }, { "epoch": 0.21629778672032193, "grad_norm": 0.4899015724658966, "learning_rate": 7.2087175188600175e-06, "loss": 0.4144, "step": 860 }, { "epoch": 0.21654929577464788, "grad_norm": 0.4297754764556885, "learning_rate": 7.217099748533111e-06, "loss": 0.4041, "step": 861 }, { "epoch": 0.21680080482897385, "grad_norm": 0.5144082307815552, "learning_rate": 7.2254819782062036e-06, "loss": 0.4448, "step": 862 }, { "epoch": 0.2170523138832998, "grad_norm": 0.4766099750995636, "learning_rate": 7.233864207879296e-06, "loss": 0.3988, "step": 863 }, { "epoch": 0.21730382293762576, "grad_norm": 0.46279922127723694, "learning_rate": 7.24224643755239e-06, "loss": 0.3987, "step": 864 }, { "epoch": 0.2175553319919517, "grad_norm": 0.5055639743804932, "learning_rate": 7.250628667225482e-06, "loss": 0.4175, "step": 865 }, { "epoch": 0.21780684104627768, "grad_norm": 0.5239527821540833, "learning_rate": 7.259010896898575e-06, "loss": 0.4355, "step": 866 }, { "epoch": 0.21805835010060362, "grad_norm": 0.4184260964393616, "learning_rate": 7.267393126571669e-06, "loss": 0.4454, "step": 867 }, { "epoch": 0.21830985915492956, "grad_norm": 0.5631728768348694, "learning_rate": 7.275775356244762e-06, "loss": 0.4466, "step": 868 }, { "epoch": 0.21856136820925554, "grad_norm": 0.4764256775379181, "learning_rate": 7.2841575859178545e-06, "loss": 0.4278, "step": 869 }, { "epoch": 0.21881287726358148, "grad_norm": 0.4316968619823456, "learning_rate": 7.292539815590948e-06, "loss": 0.4642, "step": 870 }, { "epoch": 0.21906438631790745, "grad_norm": 0.4983559846878052, "learning_rate": 7.300922045264041e-06, "loss": 0.4296, "step": 871 }, { "epoch": 0.2193158953722334, "grad_norm": 0.49155017733573914, "learning_rate": 7.309304274937133e-06, "loss": 0.452, "step": 872 }, { "epoch": 0.21956740442655936, "grad_norm": 0.5096904635429382, "learning_rate": 7.3176865046102275e-06, "loss": 0.4112, "step": 873 }, { "epoch": 0.2198189134808853, "grad_norm": 0.41947141289711, "learning_rate": 7.32606873428332e-06, "loss": 0.426, "step": 874 }, { "epoch": 0.22007042253521128, "grad_norm": 0.49421125650405884, "learning_rate": 7.334450963956414e-06, "loss": 0.4014, "step": 875 }, { "epoch": 0.22032193158953722, "grad_norm": 0.532922089099884, "learning_rate": 7.342833193629506e-06, "loss": 0.4217, "step": 876 }, { "epoch": 0.22057344064386317, "grad_norm": 0.4467262923717499, "learning_rate": 7.351215423302599e-06, "loss": 0.4348, "step": 877 }, { "epoch": 0.22082494969818914, "grad_norm": 0.4734607934951782, "learning_rate": 7.3595976529756915e-06, "loss": 0.432, "step": 878 }, { "epoch": 0.22107645875251508, "grad_norm": 0.4447082579135895, "learning_rate": 7.367979882648785e-06, "loss": 0.4272, "step": 879 }, { "epoch": 0.22132796780684105, "grad_norm": 0.4721175730228424, "learning_rate": 7.3763621123218785e-06, "loss": 0.4091, "step": 880 }, { "epoch": 0.221579476861167, "grad_norm": 0.45364779233932495, "learning_rate": 7.384744341994972e-06, "loss": 0.422, "step": 881 }, { "epoch": 0.22183098591549297, "grad_norm": 0.5278758406639099, "learning_rate": 7.3931265716680646e-06, "loss": 0.4253, "step": 882 }, { "epoch": 0.2220824949698189, "grad_norm": 0.4425363540649414, "learning_rate": 7.401508801341157e-06, "loss": 0.4227, "step": 883 }, { "epoch": 0.22233400402414488, "grad_norm": 0.4397623836994171, "learning_rate": 7.40989103101425e-06, "loss": 0.4274, "step": 884 }, { "epoch": 0.22258551307847083, "grad_norm": 0.5451933741569519, "learning_rate": 7.418273260687343e-06, "loss": 0.4258, "step": 885 }, { "epoch": 0.22283702213279677, "grad_norm": 0.5051050186157227, "learning_rate": 7.426655490360436e-06, "loss": 0.4037, "step": 886 }, { "epoch": 0.22308853118712274, "grad_norm": 0.4557664096355438, "learning_rate": 7.43503772003353e-06, "loss": 0.4222, "step": 887 }, { "epoch": 0.22334004024144868, "grad_norm": 0.5401166677474976, "learning_rate": 7.443419949706623e-06, "loss": 0.4399, "step": 888 }, { "epoch": 0.22359154929577466, "grad_norm": 0.48601412773132324, "learning_rate": 7.4518021793797155e-06, "loss": 0.4292, "step": 889 }, { "epoch": 0.2238430583501006, "grad_norm": 0.5143569707870483, "learning_rate": 7.460184409052809e-06, "loss": 0.4331, "step": 890 }, { "epoch": 0.22409456740442657, "grad_norm": 0.4917342960834503, "learning_rate": 7.4685666387259016e-06, "loss": 0.4079, "step": 891 }, { "epoch": 0.2243460764587525, "grad_norm": 0.5398588180541992, "learning_rate": 7.476948868398994e-06, "loss": 0.4244, "step": 892 }, { "epoch": 0.22459758551307846, "grad_norm": 0.4740265905857086, "learning_rate": 7.485331098072087e-06, "loss": 0.4131, "step": 893 }, { "epoch": 0.22484909456740443, "grad_norm": 0.5333375334739685, "learning_rate": 7.493713327745181e-06, "loss": 0.3979, "step": 894 }, { "epoch": 0.22510060362173037, "grad_norm": 0.47554007172584534, "learning_rate": 7.502095557418274e-06, "loss": 0.4184, "step": 895 }, { "epoch": 0.22535211267605634, "grad_norm": 0.5044271945953369, "learning_rate": 7.510477787091367e-06, "loss": 0.4374, "step": 896 }, { "epoch": 0.2256036217303823, "grad_norm": 0.4042213261127472, "learning_rate": 7.51886001676446e-06, "loss": 0.4121, "step": 897 }, { "epoch": 0.22585513078470826, "grad_norm": 0.47659948468208313, "learning_rate": 7.5272422464375525e-06, "loss": 0.411, "step": 898 }, { "epoch": 0.2261066398390342, "grad_norm": 0.48039042949676514, "learning_rate": 7.535624476110646e-06, "loss": 0.4294, "step": 899 }, { "epoch": 0.22635814889336017, "grad_norm": 0.468188613653183, "learning_rate": 7.5440067057837394e-06, "loss": 0.4076, "step": 900 }, { "epoch": 0.22660965794768612, "grad_norm": 0.47702232003211975, "learning_rate": 7.552388935456833e-06, "loss": 0.423, "step": 901 }, { "epoch": 0.22686116700201206, "grad_norm": 0.4733107089996338, "learning_rate": 7.5607711651299255e-06, "loss": 0.4356, "step": 902 }, { "epoch": 0.22711267605633803, "grad_norm": 0.4408296048641205, "learning_rate": 7.569153394803018e-06, "loss": 0.4132, "step": 903 }, { "epoch": 0.22736418511066397, "grad_norm": 0.4489647150039673, "learning_rate": 7.577535624476111e-06, "loss": 0.4259, "step": 904 }, { "epoch": 0.22761569416498995, "grad_norm": 0.42070379853248596, "learning_rate": 7.585917854149204e-06, "loss": 0.4326, "step": 905 }, { "epoch": 0.2278672032193159, "grad_norm": 0.43029969930648804, "learning_rate": 7.594300083822297e-06, "loss": 0.4053, "step": 906 }, { "epoch": 0.22811871227364186, "grad_norm": 0.38450947403907776, "learning_rate": 7.602682313495391e-06, "loss": 0.4256, "step": 907 }, { "epoch": 0.2283702213279678, "grad_norm": 0.45817065238952637, "learning_rate": 7.611064543168484e-06, "loss": 0.4427, "step": 908 }, { "epoch": 0.22862173038229378, "grad_norm": 0.44824421405792236, "learning_rate": 7.6194467728415765e-06, "loss": 0.4002, "step": 909 }, { "epoch": 0.22887323943661972, "grad_norm": 0.4120684862136841, "learning_rate": 7.627829002514669e-06, "loss": 0.4054, "step": 910 }, { "epoch": 0.22912474849094566, "grad_norm": 0.4775068759918213, "learning_rate": 7.636211232187762e-06, "loss": 0.4149, "step": 911 }, { "epoch": 0.22937625754527163, "grad_norm": 0.5041594505310059, "learning_rate": 7.644593461860856e-06, "loss": 0.4529, "step": 912 }, { "epoch": 0.22962776659959758, "grad_norm": 0.4362272620201111, "learning_rate": 7.652975691533949e-06, "loss": 0.4144, "step": 913 }, { "epoch": 0.22987927565392355, "grad_norm": 0.4660468101501465, "learning_rate": 7.661357921207043e-06, "loss": 0.4065, "step": 914 }, { "epoch": 0.2301307847082495, "grad_norm": 0.4543038606643677, "learning_rate": 7.669740150880136e-06, "loss": 0.4356, "step": 915 }, { "epoch": 0.23038229376257546, "grad_norm": 0.4837052524089813, "learning_rate": 7.678122380553228e-06, "loss": 0.4341, "step": 916 }, { "epoch": 0.2306338028169014, "grad_norm": 0.47712114453315735, "learning_rate": 7.686504610226321e-06, "loss": 0.4127, "step": 917 }, { "epoch": 0.23088531187122738, "grad_norm": 0.44308313727378845, "learning_rate": 7.694886839899413e-06, "loss": 0.4173, "step": 918 }, { "epoch": 0.23113682092555332, "grad_norm": 0.4565160274505615, "learning_rate": 7.703269069572506e-06, "loss": 0.4009, "step": 919 }, { "epoch": 0.23138832997987926, "grad_norm": 0.4511704742908478, "learning_rate": 7.711651299245599e-06, "loss": 0.4337, "step": 920 }, { "epoch": 0.23163983903420524, "grad_norm": 0.46274664998054504, "learning_rate": 7.720033528918693e-06, "loss": 0.4184, "step": 921 }, { "epoch": 0.23189134808853118, "grad_norm": 0.4714046120643616, "learning_rate": 7.728415758591786e-06, "loss": 0.4313, "step": 922 }, { "epoch": 0.23214285714285715, "grad_norm": 0.46814224123954773, "learning_rate": 7.73679798826488e-06, "loss": 0.4318, "step": 923 }, { "epoch": 0.2323943661971831, "grad_norm": 0.47005271911621094, "learning_rate": 7.745180217937973e-06, "loss": 0.4237, "step": 924 }, { "epoch": 0.23264587525150907, "grad_norm": 0.48916304111480713, "learning_rate": 7.753562447611065e-06, "loss": 0.3893, "step": 925 }, { "epoch": 0.232897384305835, "grad_norm": 0.4641698896884918, "learning_rate": 7.761944677284158e-06, "loss": 0.4322, "step": 926 }, { "epoch": 0.23314889336016095, "grad_norm": 0.44672325253486633, "learning_rate": 7.770326906957252e-06, "loss": 0.4581, "step": 927 }, { "epoch": 0.23340040241448692, "grad_norm": 0.43729367852211, "learning_rate": 7.778709136630345e-06, "loss": 0.4139, "step": 928 }, { "epoch": 0.23365191146881287, "grad_norm": 0.45290911197662354, "learning_rate": 7.787091366303437e-06, "loss": 0.418, "step": 929 }, { "epoch": 0.23390342052313884, "grad_norm": 0.42791497707366943, "learning_rate": 7.79547359597653e-06, "loss": 0.4188, "step": 930 }, { "epoch": 0.23415492957746478, "grad_norm": 0.5161230564117432, "learning_rate": 7.803855825649623e-06, "loss": 0.4236, "step": 931 }, { "epoch": 0.23440643863179075, "grad_norm": 0.4312325119972229, "learning_rate": 7.812238055322715e-06, "loss": 0.4313, "step": 932 }, { "epoch": 0.2346579476861167, "grad_norm": 0.47641661763191223, "learning_rate": 7.82062028499581e-06, "loss": 0.4152, "step": 933 }, { "epoch": 0.23490945674044267, "grad_norm": 0.4813452363014221, "learning_rate": 7.829002514668902e-06, "loss": 0.4111, "step": 934 }, { "epoch": 0.2351609657947686, "grad_norm": 0.49906906485557556, "learning_rate": 7.837384744341997e-06, "loss": 0.3846, "step": 935 }, { "epoch": 0.23541247484909456, "grad_norm": 0.48748451471328735, "learning_rate": 7.84576697401509e-06, "loss": 0.4329, "step": 936 }, { "epoch": 0.23566398390342053, "grad_norm": 0.5034842491149902, "learning_rate": 7.854149203688182e-06, "loss": 0.4054, "step": 937 }, { "epoch": 0.23591549295774647, "grad_norm": 0.45687928795814514, "learning_rate": 7.862531433361274e-06, "loss": 0.4466, "step": 938 }, { "epoch": 0.23616700201207244, "grad_norm": 0.4433618485927582, "learning_rate": 7.870913663034367e-06, "loss": 0.3867, "step": 939 }, { "epoch": 0.23641851106639838, "grad_norm": 0.4428368806838989, "learning_rate": 7.87929589270746e-06, "loss": 0.3938, "step": 940 }, { "epoch": 0.23667002012072436, "grad_norm": 0.49069783091545105, "learning_rate": 7.887678122380554e-06, "loss": 0.3964, "step": 941 }, { "epoch": 0.2369215291750503, "grad_norm": 0.45439690351486206, "learning_rate": 7.896060352053647e-06, "loss": 0.4115, "step": 942 }, { "epoch": 0.23717303822937627, "grad_norm": 0.4886208772659302, "learning_rate": 7.90444258172674e-06, "loss": 0.4415, "step": 943 }, { "epoch": 0.23742454728370221, "grad_norm": 0.47720471024513245, "learning_rate": 7.912824811399834e-06, "loss": 0.3896, "step": 944 }, { "epoch": 0.23767605633802816, "grad_norm": 0.47643229365348816, "learning_rate": 7.921207041072926e-06, "loss": 0.4226, "step": 945 }, { "epoch": 0.23792756539235413, "grad_norm": 0.4700299799442291, "learning_rate": 7.929589270746019e-06, "loss": 0.4165, "step": 946 }, { "epoch": 0.23817907444668007, "grad_norm": 0.41711124777793884, "learning_rate": 7.937971500419113e-06, "loss": 0.4345, "step": 947 }, { "epoch": 0.23843058350100604, "grad_norm": 0.45945024490356445, "learning_rate": 7.946353730092206e-06, "loss": 0.4379, "step": 948 }, { "epoch": 0.238682092555332, "grad_norm": 0.4340158998966217, "learning_rate": 7.954735959765298e-06, "loss": 0.391, "step": 949 }, { "epoch": 0.23893360160965796, "grad_norm": 0.4647141396999359, "learning_rate": 7.963118189438391e-06, "loss": 0.4351, "step": 950 }, { "epoch": 0.2391851106639839, "grad_norm": 0.43337324261665344, "learning_rate": 7.971500419111484e-06, "loss": 0.4201, "step": 951 }, { "epoch": 0.23943661971830985, "grad_norm": 0.45250189304351807, "learning_rate": 7.979882648784576e-06, "loss": 0.4328, "step": 952 }, { "epoch": 0.23968812877263582, "grad_norm": 0.47799962759017944, "learning_rate": 7.98826487845767e-06, "loss": 0.4089, "step": 953 }, { "epoch": 0.23993963782696176, "grad_norm": 0.4971179962158203, "learning_rate": 7.996647108130763e-06, "loss": 0.4312, "step": 954 }, { "epoch": 0.24019114688128773, "grad_norm": 0.4583699703216553, "learning_rate": 8.005029337803858e-06, "loss": 0.4179, "step": 955 }, { "epoch": 0.24044265593561368, "grad_norm": 0.3827430009841919, "learning_rate": 8.01341156747695e-06, "loss": 0.4026, "step": 956 }, { "epoch": 0.24069416498993965, "grad_norm": 0.4374185800552368, "learning_rate": 8.021793797150043e-06, "loss": 0.4196, "step": 957 }, { "epoch": 0.2409456740442656, "grad_norm": 0.4754588305950165, "learning_rate": 8.030176026823135e-06, "loss": 0.4214, "step": 958 }, { "epoch": 0.24119718309859156, "grad_norm": 0.44250696897506714, "learning_rate": 8.038558256496228e-06, "loss": 0.3991, "step": 959 }, { "epoch": 0.2414486921529175, "grad_norm": 0.5119554400444031, "learning_rate": 8.04694048616932e-06, "loss": 0.428, "step": 960 }, { "epoch": 0.24170020120724345, "grad_norm": 0.5002796649932861, "learning_rate": 8.055322715842415e-06, "loss": 0.4342, "step": 961 }, { "epoch": 0.24195171026156942, "grad_norm": 0.470553457736969, "learning_rate": 8.063704945515508e-06, "loss": 0.3983, "step": 962 }, { "epoch": 0.24220321931589536, "grad_norm": 0.4792780876159668, "learning_rate": 8.0720871751886e-06, "loss": 0.4156, "step": 963 }, { "epoch": 0.24245472837022133, "grad_norm": 0.44463014602661133, "learning_rate": 8.080469404861695e-06, "loss": 0.4143, "step": 964 }, { "epoch": 0.24270623742454728, "grad_norm": 0.4440299868583679, "learning_rate": 8.088851634534787e-06, "loss": 0.3994, "step": 965 }, { "epoch": 0.24295774647887325, "grad_norm": 0.45312440395355225, "learning_rate": 8.09723386420788e-06, "loss": 0.4236, "step": 966 }, { "epoch": 0.2432092555331992, "grad_norm": 0.4270833134651184, "learning_rate": 8.105616093880972e-06, "loss": 0.4166, "step": 967 }, { "epoch": 0.24346076458752516, "grad_norm": 0.46889373660087585, "learning_rate": 8.113998323554067e-06, "loss": 0.4467, "step": 968 }, { "epoch": 0.2437122736418511, "grad_norm": 0.43110471963882446, "learning_rate": 8.12238055322716e-06, "loss": 0.4207, "step": 969 }, { "epoch": 0.24396378269617705, "grad_norm": 0.500971257686615, "learning_rate": 8.130762782900252e-06, "loss": 0.4096, "step": 970 }, { "epoch": 0.24421529175050302, "grad_norm": 0.43988287448883057, "learning_rate": 8.139145012573345e-06, "loss": 0.4236, "step": 971 }, { "epoch": 0.24446680080482897, "grad_norm": 0.43207594752311707, "learning_rate": 8.147527242246437e-06, "loss": 0.4139, "step": 972 }, { "epoch": 0.24471830985915494, "grad_norm": 0.6875976920127869, "learning_rate": 8.155909471919532e-06, "loss": 0.4108, "step": 973 }, { "epoch": 0.24496981891348088, "grad_norm": 0.4743395447731018, "learning_rate": 8.164291701592624e-06, "loss": 0.423, "step": 974 }, { "epoch": 0.24522132796780685, "grad_norm": 0.45225653052330017, "learning_rate": 8.172673931265719e-06, "loss": 0.4037, "step": 975 }, { "epoch": 0.2454728370221328, "grad_norm": 0.45756688714027405, "learning_rate": 8.181056160938811e-06, "loss": 0.4419, "step": 976 }, { "epoch": 0.24572434607645877, "grad_norm": 0.48021066188812256, "learning_rate": 8.189438390611904e-06, "loss": 0.4449, "step": 977 }, { "epoch": 0.2459758551307847, "grad_norm": 0.44351431727409363, "learning_rate": 8.197820620284996e-06, "loss": 0.4226, "step": 978 }, { "epoch": 0.24622736418511065, "grad_norm": 0.4529029428958893, "learning_rate": 8.206202849958089e-06, "loss": 0.4384, "step": 979 }, { "epoch": 0.24647887323943662, "grad_norm": 0.5040020942687988, "learning_rate": 8.214585079631182e-06, "loss": 0.4464, "step": 980 }, { "epoch": 0.24673038229376257, "grad_norm": 0.45902392268180847, "learning_rate": 8.222967309304276e-06, "loss": 0.3818, "step": 981 }, { "epoch": 0.24698189134808854, "grad_norm": 0.4509660601615906, "learning_rate": 8.231349538977369e-06, "loss": 0.3941, "step": 982 }, { "epoch": 0.24723340040241448, "grad_norm": 0.38173332810401917, "learning_rate": 8.239731768650461e-06, "loss": 0.4172, "step": 983 }, { "epoch": 0.24748490945674045, "grad_norm": 0.45830610394477844, "learning_rate": 8.248113998323556e-06, "loss": 0.407, "step": 984 }, { "epoch": 0.2477364185110664, "grad_norm": 0.49054500460624695, "learning_rate": 8.256496227996648e-06, "loss": 0.4212, "step": 985 }, { "epoch": 0.24798792756539234, "grad_norm": 0.4405127763748169, "learning_rate": 8.264878457669741e-06, "loss": 0.4381, "step": 986 }, { "epoch": 0.2482394366197183, "grad_norm": 0.4861133098602295, "learning_rate": 8.273260687342833e-06, "loss": 0.4282, "step": 987 }, { "epoch": 0.24849094567404426, "grad_norm": 0.39729857444763184, "learning_rate": 8.281642917015928e-06, "loss": 0.4062, "step": 988 }, { "epoch": 0.24874245472837023, "grad_norm": 0.48022037744522095, "learning_rate": 8.29002514668902e-06, "loss": 0.425, "step": 989 }, { "epoch": 0.24899396378269617, "grad_norm": 0.46410584449768066, "learning_rate": 8.298407376362113e-06, "loss": 0.3949, "step": 990 }, { "epoch": 0.24924547283702214, "grad_norm": 0.48868152499198914, "learning_rate": 8.306789606035206e-06, "loss": 0.4166, "step": 991 }, { "epoch": 0.24949698189134809, "grad_norm": 0.46738943457603455, "learning_rate": 8.315171835708298e-06, "loss": 0.4221, "step": 992 }, { "epoch": 0.24974849094567406, "grad_norm": 0.42502716183662415, "learning_rate": 8.323554065381391e-06, "loss": 0.4006, "step": 993 }, { "epoch": 0.25, "grad_norm": 0.41025519371032715, "learning_rate": 8.331936295054485e-06, "loss": 0.4027, "step": 994 }, { "epoch": 0.25025150905432597, "grad_norm": 0.46180248260498047, "learning_rate": 8.340318524727578e-06, "loss": 0.4375, "step": 995 }, { "epoch": 0.2505030181086519, "grad_norm": 0.4456992447376251, "learning_rate": 8.348700754400672e-06, "loss": 0.4348, "step": 996 }, { "epoch": 0.25075452716297786, "grad_norm": 0.4408442974090576, "learning_rate": 8.357082984073765e-06, "loss": 0.4062, "step": 997 }, { "epoch": 0.25100603621730383, "grad_norm": 0.49061262607574463, "learning_rate": 8.365465213746857e-06, "loss": 0.4509, "step": 998 }, { "epoch": 0.2512575452716298, "grad_norm": 0.5399837493896484, "learning_rate": 8.37384744341995e-06, "loss": 0.4313, "step": 999 }, { "epoch": 0.2515090543259557, "grad_norm": 0.5015750527381897, "learning_rate": 8.382229673093043e-06, "loss": 0.4199, "step": 1000 }, { "epoch": 0.2517605633802817, "grad_norm": 0.550068199634552, "learning_rate": 8.390611902766137e-06, "loss": 0.4691, "step": 1001 }, { "epoch": 0.25201207243460766, "grad_norm": 0.4467526376247406, "learning_rate": 8.39899413243923e-06, "loss": 0.4382, "step": 1002 }, { "epoch": 0.2522635814889336, "grad_norm": 0.5115616321563721, "learning_rate": 8.407376362112322e-06, "loss": 0.4377, "step": 1003 }, { "epoch": 0.25251509054325955, "grad_norm": 0.4908027946949005, "learning_rate": 8.415758591785415e-06, "loss": 0.4417, "step": 1004 }, { "epoch": 0.2527665995975855, "grad_norm": 0.5211309790611267, "learning_rate": 8.42414082145851e-06, "loss": 0.4143, "step": 1005 }, { "epoch": 0.2530181086519115, "grad_norm": 0.5352211594581604, "learning_rate": 8.432523051131602e-06, "loss": 0.3923, "step": 1006 }, { "epoch": 0.2532696177062374, "grad_norm": 0.511669397354126, "learning_rate": 8.440905280804694e-06, "loss": 0.4113, "step": 1007 }, { "epoch": 0.2535211267605634, "grad_norm": 0.5156270861625671, "learning_rate": 8.449287510477789e-06, "loss": 0.4233, "step": 1008 }, { "epoch": 0.25377263581488935, "grad_norm": 0.49817922711372375, "learning_rate": 8.457669740150881e-06, "loss": 0.3877, "step": 1009 }, { "epoch": 0.2540241448692153, "grad_norm": 0.4637084901332855, "learning_rate": 8.466051969823974e-06, "loss": 0.4413, "step": 1010 }, { "epoch": 0.25427565392354123, "grad_norm": 0.5281439423561096, "learning_rate": 8.474434199497067e-06, "loss": 0.4063, "step": 1011 }, { "epoch": 0.2545271629778672, "grad_norm": 0.45103931427001953, "learning_rate": 8.48281642917016e-06, "loss": 0.4049, "step": 1012 }, { "epoch": 0.2547786720321932, "grad_norm": 0.47929713129997253, "learning_rate": 8.491198658843252e-06, "loss": 0.413, "step": 1013 }, { "epoch": 0.2550301810865191, "grad_norm": 0.4654606580734253, "learning_rate": 8.499580888516346e-06, "loss": 0.4246, "step": 1014 }, { "epoch": 0.25528169014084506, "grad_norm": 0.4492524266242981, "learning_rate": 8.507963118189439e-06, "loss": 0.4328, "step": 1015 }, { "epoch": 0.25553319919517103, "grad_norm": 0.5223946571350098, "learning_rate": 8.516345347862533e-06, "loss": 0.3964, "step": 1016 }, { "epoch": 0.255784708249497, "grad_norm": 0.5574830770492554, "learning_rate": 8.524727577535626e-06, "loss": 0.4041, "step": 1017 }, { "epoch": 0.2560362173038229, "grad_norm": 0.5438414216041565, "learning_rate": 8.533109807208718e-06, "loss": 0.422, "step": 1018 }, { "epoch": 0.2562877263581489, "grad_norm": 0.49950259923934937, "learning_rate": 8.541492036881811e-06, "loss": 0.4054, "step": 1019 }, { "epoch": 0.25653923541247486, "grad_norm": 0.5470491051673889, "learning_rate": 8.549874266554904e-06, "loss": 0.4237, "step": 1020 }, { "epoch": 0.2567907444668008, "grad_norm": 0.4732471704483032, "learning_rate": 8.558256496227996e-06, "loss": 0.4189, "step": 1021 }, { "epoch": 0.25704225352112675, "grad_norm": 0.4920424818992615, "learning_rate": 8.56663872590109e-06, "loss": 0.4443, "step": 1022 }, { "epoch": 0.2572937625754527, "grad_norm": 0.4606372117996216, "learning_rate": 8.575020955574183e-06, "loss": 0.4187, "step": 1023 }, { "epoch": 0.2575452716297787, "grad_norm": 0.5176842212677002, "learning_rate": 8.583403185247276e-06, "loss": 0.4096, "step": 1024 }, { "epoch": 0.2577967806841046, "grad_norm": 0.4598933458328247, "learning_rate": 8.59178541492037e-06, "loss": 0.4443, "step": 1025 }, { "epoch": 0.2580482897384306, "grad_norm": 0.4918026030063629, "learning_rate": 8.600167644593463e-06, "loss": 0.4208, "step": 1026 }, { "epoch": 0.25829979879275655, "grad_norm": 0.46681898832321167, "learning_rate": 8.608549874266555e-06, "loss": 0.4168, "step": 1027 }, { "epoch": 0.25855130784708247, "grad_norm": 0.4654182493686676, "learning_rate": 8.61693210393965e-06, "loss": 0.4186, "step": 1028 }, { "epoch": 0.25880281690140844, "grad_norm": 0.5116965770721436, "learning_rate": 8.625314333612742e-06, "loss": 0.43, "step": 1029 }, { "epoch": 0.2590543259557344, "grad_norm": 0.4793979823589325, "learning_rate": 8.633696563285835e-06, "loss": 0.4371, "step": 1030 }, { "epoch": 0.2593058350100604, "grad_norm": 0.49715420603752136, "learning_rate": 8.642078792958928e-06, "loss": 0.4197, "step": 1031 }, { "epoch": 0.2595573440643863, "grad_norm": 0.43482688069343567, "learning_rate": 8.65046102263202e-06, "loss": 0.4, "step": 1032 }, { "epoch": 0.25980885311871227, "grad_norm": 0.5626974701881409, "learning_rate": 8.658843252305113e-06, "loss": 0.4498, "step": 1033 }, { "epoch": 0.26006036217303824, "grad_norm": 0.4670769274234772, "learning_rate": 8.667225481978207e-06, "loss": 0.4038, "step": 1034 }, { "epoch": 0.2603118712273642, "grad_norm": 0.4788265824317932, "learning_rate": 8.6756077116513e-06, "loss": 0.4186, "step": 1035 }, { "epoch": 0.2605633802816901, "grad_norm": 0.5014175772666931, "learning_rate": 8.683989941324394e-06, "loss": 0.4175, "step": 1036 }, { "epoch": 0.2608148893360161, "grad_norm": 0.4646727740764618, "learning_rate": 8.692372170997487e-06, "loss": 0.4149, "step": 1037 }, { "epoch": 0.26106639839034207, "grad_norm": 0.4624409079551697, "learning_rate": 8.70075440067058e-06, "loss": 0.376, "step": 1038 }, { "epoch": 0.261317907444668, "grad_norm": 0.4742669463157654, "learning_rate": 8.709136630343672e-06, "loss": 0.4228, "step": 1039 }, { "epoch": 0.26156941649899396, "grad_norm": 0.551210880279541, "learning_rate": 8.717518860016765e-06, "loss": 0.4292, "step": 1040 }, { "epoch": 0.2618209255533199, "grad_norm": 0.48132550716400146, "learning_rate": 8.725901089689857e-06, "loss": 0.4313, "step": 1041 }, { "epoch": 0.2620724346076459, "grad_norm": 0.5049678683280945, "learning_rate": 8.734283319362952e-06, "loss": 0.4193, "step": 1042 }, { "epoch": 0.2623239436619718, "grad_norm": 0.5476779341697693, "learning_rate": 8.742665549036044e-06, "loss": 0.4106, "step": 1043 }, { "epoch": 0.2625754527162978, "grad_norm": 0.4799102246761322, "learning_rate": 8.751047778709137e-06, "loss": 0.4198, "step": 1044 }, { "epoch": 0.26282696177062376, "grad_norm": 0.46002301573753357, "learning_rate": 8.75943000838223e-06, "loss": 0.4007, "step": 1045 }, { "epoch": 0.2630784708249497, "grad_norm": 0.5139147043228149, "learning_rate": 8.767812238055324e-06, "loss": 0.399, "step": 1046 }, { "epoch": 0.26332997987927564, "grad_norm": 0.4995320439338684, "learning_rate": 8.776194467728416e-06, "loss": 0.4379, "step": 1047 }, { "epoch": 0.2635814889336016, "grad_norm": 0.4521893560886383, "learning_rate": 8.784576697401509e-06, "loss": 0.4338, "step": 1048 }, { "epoch": 0.2638329979879276, "grad_norm": 0.4362622797489166, "learning_rate": 8.792958927074603e-06, "loss": 0.4217, "step": 1049 }, { "epoch": 0.2640845070422535, "grad_norm": 0.4953138828277588, "learning_rate": 8.801341156747696e-06, "loss": 0.4167, "step": 1050 }, { "epoch": 0.2643360160965795, "grad_norm": 0.4723548889160156, "learning_rate": 8.809723386420789e-06, "loss": 0.4537, "step": 1051 }, { "epoch": 0.26458752515090544, "grad_norm": 0.5056288242340088, "learning_rate": 8.818105616093881e-06, "loss": 0.4412, "step": 1052 }, { "epoch": 0.26483903420523136, "grad_norm": 0.5119395852088928, "learning_rate": 8.826487845766974e-06, "loss": 0.4184, "step": 1053 }, { "epoch": 0.26509054325955733, "grad_norm": 0.45162543654441833, "learning_rate": 8.834870075440067e-06, "loss": 0.4135, "step": 1054 }, { "epoch": 0.2653420523138833, "grad_norm": 0.5046412348747253, "learning_rate": 8.84325230511316e-06, "loss": 0.3995, "step": 1055 }, { "epoch": 0.2655935613682093, "grad_norm": 0.42225155234336853, "learning_rate": 8.851634534786253e-06, "loss": 0.4316, "step": 1056 }, { "epoch": 0.2658450704225352, "grad_norm": 0.5072396397590637, "learning_rate": 8.860016764459348e-06, "loss": 0.4268, "step": 1057 }, { "epoch": 0.26609657947686116, "grad_norm": 0.4895479083061218, "learning_rate": 8.86839899413244e-06, "loss": 0.4227, "step": 1058 }, { "epoch": 0.26634808853118713, "grad_norm": 0.42159146070480347, "learning_rate": 8.876781223805533e-06, "loss": 0.4053, "step": 1059 }, { "epoch": 0.2665995975855131, "grad_norm": 0.4871593713760376, "learning_rate": 8.885163453478626e-06, "loss": 0.4027, "step": 1060 }, { "epoch": 0.266851106639839, "grad_norm": 0.4512350559234619, "learning_rate": 8.893545683151718e-06, "loss": 0.4113, "step": 1061 }, { "epoch": 0.267102615694165, "grad_norm": 0.4652332365512848, "learning_rate": 8.901927912824813e-06, "loss": 0.4308, "step": 1062 }, { "epoch": 0.26735412474849096, "grad_norm": 0.4900280833244324, "learning_rate": 8.910310142497905e-06, "loss": 0.399, "step": 1063 }, { "epoch": 0.2676056338028169, "grad_norm": 0.42869365215301514, "learning_rate": 8.918692372170998e-06, "loss": 0.4127, "step": 1064 }, { "epoch": 0.26785714285714285, "grad_norm": 0.4814034700393677, "learning_rate": 8.92707460184409e-06, "loss": 0.4181, "step": 1065 }, { "epoch": 0.2681086519114688, "grad_norm": 0.47565895318984985, "learning_rate": 8.935456831517185e-06, "loss": 0.4279, "step": 1066 }, { "epoch": 0.2683601609657948, "grad_norm": 0.4539470374584198, "learning_rate": 8.943839061190277e-06, "loss": 0.4044, "step": 1067 }, { "epoch": 0.2686116700201207, "grad_norm": 0.44740232825279236, "learning_rate": 8.95222129086337e-06, "loss": 0.4206, "step": 1068 }, { "epoch": 0.2688631790744467, "grad_norm": 0.46885591745376587, "learning_rate": 8.960603520536464e-06, "loss": 0.3829, "step": 1069 }, { "epoch": 0.26911468812877265, "grad_norm": 0.5142282247543335, "learning_rate": 8.968985750209557e-06, "loss": 0.4, "step": 1070 }, { "epoch": 0.26936619718309857, "grad_norm": 0.406148225069046, "learning_rate": 8.97736797988265e-06, "loss": 0.3931, "step": 1071 }, { "epoch": 0.26961770623742454, "grad_norm": 0.449266254901886, "learning_rate": 8.985750209555742e-06, "loss": 0.3975, "step": 1072 }, { "epoch": 0.2698692152917505, "grad_norm": 0.530730664730072, "learning_rate": 8.994132439228835e-06, "loss": 0.4193, "step": 1073 }, { "epoch": 0.2701207243460765, "grad_norm": 0.495766282081604, "learning_rate": 9.002514668901928e-06, "loss": 0.4453, "step": 1074 }, { "epoch": 0.2703722334004024, "grad_norm": 0.43788525462150574, "learning_rate": 9.010896898575022e-06, "loss": 0.4432, "step": 1075 }, { "epoch": 0.27062374245472837, "grad_norm": 0.5969604253768921, "learning_rate": 9.019279128248114e-06, "loss": 0.4155, "step": 1076 }, { "epoch": 0.27087525150905434, "grad_norm": 0.5051907896995544, "learning_rate": 9.027661357921209e-06, "loss": 0.4363, "step": 1077 }, { "epoch": 0.2711267605633803, "grad_norm": 0.48711901903152466, "learning_rate": 9.036043587594301e-06, "loss": 0.4054, "step": 1078 }, { "epoch": 0.2713782696177062, "grad_norm": 0.745858907699585, "learning_rate": 9.044425817267394e-06, "loss": 0.436, "step": 1079 }, { "epoch": 0.2716297786720322, "grad_norm": 0.558805525302887, "learning_rate": 9.052808046940487e-06, "loss": 0.3979, "step": 1080 }, { "epoch": 0.27188128772635817, "grad_norm": 0.5273854732513428, "learning_rate": 9.06119027661358e-06, "loss": 0.4198, "step": 1081 }, { "epoch": 0.2721327967806841, "grad_norm": 0.5657176375389099, "learning_rate": 9.069572506286674e-06, "loss": 0.4124, "step": 1082 }, { "epoch": 0.27238430583501005, "grad_norm": 0.46423089504241943, "learning_rate": 9.077954735959766e-06, "loss": 0.426, "step": 1083 }, { "epoch": 0.272635814889336, "grad_norm": 0.5339906811714172, "learning_rate": 9.086336965632859e-06, "loss": 0.4062, "step": 1084 }, { "epoch": 0.272887323943662, "grad_norm": 0.46117010712623596, "learning_rate": 9.094719195305951e-06, "loss": 0.4059, "step": 1085 }, { "epoch": 0.2731388329979879, "grad_norm": 0.4512513279914856, "learning_rate": 9.103101424979046e-06, "loss": 0.4119, "step": 1086 }, { "epoch": 0.2733903420523139, "grad_norm": 0.5174707770347595, "learning_rate": 9.111483654652138e-06, "loss": 0.436, "step": 1087 }, { "epoch": 0.27364185110663986, "grad_norm": 0.4645453989505768, "learning_rate": 9.119865884325231e-06, "loss": 0.4332, "step": 1088 }, { "epoch": 0.27389336016096577, "grad_norm": 0.5541778206825256, "learning_rate": 9.128248113998325e-06, "loss": 0.4114, "step": 1089 }, { "epoch": 0.27414486921529174, "grad_norm": 0.47235891222953796, "learning_rate": 9.136630343671418e-06, "loss": 0.4452, "step": 1090 }, { "epoch": 0.2743963782696177, "grad_norm": 0.4705214500427246, "learning_rate": 9.14501257334451e-06, "loss": 0.4028, "step": 1091 }, { "epoch": 0.2746478873239437, "grad_norm": 0.4457109272480011, "learning_rate": 9.153394803017603e-06, "loss": 0.3873, "step": 1092 }, { "epoch": 0.2748993963782696, "grad_norm": 0.4580100178718567, "learning_rate": 9.161777032690696e-06, "loss": 0.4188, "step": 1093 }, { "epoch": 0.27515090543259557, "grad_norm": 0.50628662109375, "learning_rate": 9.170159262363788e-06, "loss": 0.4683, "step": 1094 }, { "epoch": 0.27540241448692154, "grad_norm": 0.5623934864997864, "learning_rate": 9.178541492036883e-06, "loss": 0.4115, "step": 1095 }, { "epoch": 0.27565392354124746, "grad_norm": 0.45169952511787415, "learning_rate": 9.186923721709975e-06, "loss": 0.4382, "step": 1096 }, { "epoch": 0.27590543259557343, "grad_norm": 0.4219382107257843, "learning_rate": 9.19530595138307e-06, "loss": 0.3918, "step": 1097 }, { "epoch": 0.2761569416498994, "grad_norm": 0.47983744740486145, "learning_rate": 9.203688181056162e-06, "loss": 0.4257, "step": 1098 }, { "epoch": 0.2764084507042254, "grad_norm": 0.462584525346756, "learning_rate": 9.212070410729255e-06, "loss": 0.4289, "step": 1099 }, { "epoch": 0.2766599597585513, "grad_norm": 0.4568127393722534, "learning_rate": 9.220452640402348e-06, "loss": 0.4181, "step": 1100 }, { "epoch": 0.27691146881287726, "grad_norm": 0.42234259843826294, "learning_rate": 9.22883487007544e-06, "loss": 0.4076, "step": 1101 }, { "epoch": 0.27716297786720323, "grad_norm": 0.5175244212150574, "learning_rate": 9.237217099748533e-06, "loss": 0.4207, "step": 1102 }, { "epoch": 0.2774144869215292, "grad_norm": 0.4390292763710022, "learning_rate": 9.245599329421627e-06, "loss": 0.4413, "step": 1103 }, { "epoch": 0.2776659959758551, "grad_norm": 0.5344337821006775, "learning_rate": 9.25398155909472e-06, "loss": 0.4002, "step": 1104 }, { "epoch": 0.2779175050301811, "grad_norm": 0.5546042919158936, "learning_rate": 9.262363788767812e-06, "loss": 0.3978, "step": 1105 }, { "epoch": 0.27816901408450706, "grad_norm": 0.45198217034339905, "learning_rate": 9.270746018440905e-06, "loss": 0.4228, "step": 1106 }, { "epoch": 0.278420523138833, "grad_norm": 0.5244150757789612, "learning_rate": 9.279128248114e-06, "loss": 0.4018, "step": 1107 }, { "epoch": 0.27867203219315895, "grad_norm": 0.44966578483581543, "learning_rate": 9.287510477787092e-06, "loss": 0.388, "step": 1108 }, { "epoch": 0.2789235412474849, "grad_norm": 0.5236296653747559, "learning_rate": 9.295892707460186e-06, "loss": 0.4182, "step": 1109 }, { "epoch": 0.2791750503018109, "grad_norm": 0.46590518951416016, "learning_rate": 9.304274937133279e-06, "loss": 0.4121, "step": 1110 }, { "epoch": 0.2794265593561368, "grad_norm": 0.4703758955001831, "learning_rate": 9.312657166806372e-06, "loss": 0.4208, "step": 1111 }, { "epoch": 0.2796780684104628, "grad_norm": 0.431341290473938, "learning_rate": 9.321039396479464e-06, "loss": 0.4101, "step": 1112 }, { "epoch": 0.27992957746478875, "grad_norm": 0.5059375762939453, "learning_rate": 9.329421626152557e-06, "loss": 0.4478, "step": 1113 }, { "epoch": 0.28018108651911466, "grad_norm": 0.4744166433811188, "learning_rate": 9.33780385582565e-06, "loss": 0.4076, "step": 1114 }, { "epoch": 0.28043259557344064, "grad_norm": 0.4455539584159851, "learning_rate": 9.346186085498742e-06, "loss": 0.4235, "step": 1115 }, { "epoch": 0.2806841046277666, "grad_norm": 0.5029807090759277, "learning_rate": 9.354568315171836e-06, "loss": 0.4215, "step": 1116 }, { "epoch": 0.2809356136820926, "grad_norm": 0.4714094400405884, "learning_rate": 9.362950544844929e-06, "loss": 0.3958, "step": 1117 }, { "epoch": 0.2811871227364185, "grad_norm": 0.4298758804798126, "learning_rate": 9.371332774518023e-06, "loss": 0.4283, "step": 1118 }, { "epoch": 0.28143863179074446, "grad_norm": 0.4799964427947998, "learning_rate": 9.379715004191116e-06, "loss": 0.4207, "step": 1119 }, { "epoch": 0.28169014084507044, "grad_norm": 0.5128260850906372, "learning_rate": 9.388097233864209e-06, "loss": 0.4336, "step": 1120 }, { "epoch": 0.28194164989939635, "grad_norm": 0.44608020782470703, "learning_rate": 9.396479463537301e-06, "loss": 0.4154, "step": 1121 }, { "epoch": 0.2821931589537223, "grad_norm": 0.4927178621292114, "learning_rate": 9.404861693210394e-06, "loss": 0.4369, "step": 1122 }, { "epoch": 0.2824446680080483, "grad_norm": 0.42505180835723877, "learning_rate": 9.413243922883488e-06, "loss": 0.4089, "step": 1123 }, { "epoch": 0.28269617706237427, "grad_norm": 0.4519820213317871, "learning_rate": 9.42162615255658e-06, "loss": 0.4242, "step": 1124 }, { "epoch": 0.2829476861167002, "grad_norm": 0.4712681770324707, "learning_rate": 9.430008382229673e-06, "loss": 0.4362, "step": 1125 }, { "epoch": 0.28319919517102615, "grad_norm": 0.45173585414886475, "learning_rate": 9.438390611902766e-06, "loss": 0.4332, "step": 1126 }, { "epoch": 0.2834507042253521, "grad_norm": 0.4184447228908539, "learning_rate": 9.44677284157586e-06, "loss": 0.4312, "step": 1127 }, { "epoch": 0.2837022132796781, "grad_norm": 0.4533083736896515, "learning_rate": 9.455155071248953e-06, "loss": 0.4127, "step": 1128 }, { "epoch": 0.283953722334004, "grad_norm": 0.4317009449005127, "learning_rate": 9.463537300922047e-06, "loss": 0.4158, "step": 1129 }, { "epoch": 0.28420523138833, "grad_norm": 0.4305717945098877, "learning_rate": 9.47191953059514e-06, "loss": 0.4266, "step": 1130 }, { "epoch": 0.28445674044265595, "grad_norm": 0.4272315204143524, "learning_rate": 9.480301760268233e-06, "loss": 0.4434, "step": 1131 }, { "epoch": 0.28470824949698187, "grad_norm": 0.4827929735183716, "learning_rate": 9.488683989941325e-06, "loss": 0.4404, "step": 1132 }, { "epoch": 0.28495975855130784, "grad_norm": 0.4054642617702484, "learning_rate": 9.497066219614418e-06, "loss": 0.41, "step": 1133 }, { "epoch": 0.2852112676056338, "grad_norm": 0.4518846869468689, "learning_rate": 9.50544844928751e-06, "loss": 0.4361, "step": 1134 }, { "epoch": 0.2854627766599598, "grad_norm": 0.4325503706932068, "learning_rate": 9.513830678960603e-06, "loss": 0.399, "step": 1135 }, { "epoch": 0.2857142857142857, "grad_norm": 0.4354625642299652, "learning_rate": 9.522212908633697e-06, "loss": 0.4186, "step": 1136 }, { "epoch": 0.28596579476861167, "grad_norm": 0.43451640009880066, "learning_rate": 9.53059513830679e-06, "loss": 0.4315, "step": 1137 }, { "epoch": 0.28621730382293764, "grad_norm": 0.4460737109184265, "learning_rate": 9.538977367979884e-06, "loss": 0.3852, "step": 1138 }, { "epoch": 0.28646881287726356, "grad_norm": 0.44246113300323486, "learning_rate": 9.547359597652977e-06, "loss": 0.4176, "step": 1139 }, { "epoch": 0.28672032193158953, "grad_norm": 0.4565284550189972, "learning_rate": 9.55574182732607e-06, "loss": 0.4202, "step": 1140 }, { "epoch": 0.2869718309859155, "grad_norm": 0.46833330392837524, "learning_rate": 9.564124056999162e-06, "loss": 0.4173, "step": 1141 }, { "epoch": 0.28722334004024147, "grad_norm": 0.5017228722572327, "learning_rate": 9.572506286672255e-06, "loss": 0.3843, "step": 1142 }, { "epoch": 0.2874748490945674, "grad_norm": 0.5037913918495178, "learning_rate": 9.58088851634535e-06, "loss": 0.4185, "step": 1143 }, { "epoch": 0.28772635814889336, "grad_norm": 0.5422639846801758, "learning_rate": 9.589270746018442e-06, "loss": 0.4084, "step": 1144 }, { "epoch": 0.28797786720321933, "grad_norm": 0.4788282513618469, "learning_rate": 9.597652975691534e-06, "loss": 0.3975, "step": 1145 }, { "epoch": 0.28822937625754524, "grad_norm": 0.5363534092903137, "learning_rate": 9.606035205364627e-06, "loss": 0.4217, "step": 1146 }, { "epoch": 0.2884808853118712, "grad_norm": 0.44976770877838135, "learning_rate": 9.614417435037721e-06, "loss": 0.3822, "step": 1147 }, { "epoch": 0.2887323943661972, "grad_norm": 0.49459314346313477, "learning_rate": 9.622799664710814e-06, "loss": 0.4358, "step": 1148 }, { "epoch": 0.28898390342052316, "grad_norm": 0.45928260684013367, "learning_rate": 9.631181894383907e-06, "loss": 0.4268, "step": 1149 }, { "epoch": 0.2892354124748491, "grad_norm": 0.43904656171798706, "learning_rate": 9.639564124057001e-06, "loss": 0.4202, "step": 1150 }, { "epoch": 0.28948692152917505, "grad_norm": 0.45102381706237793, "learning_rate": 9.647946353730094e-06, "loss": 0.4087, "step": 1151 }, { "epoch": 0.289738430583501, "grad_norm": 0.4579281210899353, "learning_rate": 9.656328583403186e-06, "loss": 0.4153, "step": 1152 }, { "epoch": 0.289989939637827, "grad_norm": 0.4361988604068756, "learning_rate": 9.664710813076279e-06, "loss": 0.4393, "step": 1153 }, { "epoch": 0.2902414486921529, "grad_norm": 0.43042805790901184, "learning_rate": 9.673093042749371e-06, "loss": 0.4031, "step": 1154 }, { "epoch": 0.2904929577464789, "grad_norm": 0.5227105021476746, "learning_rate": 9.681475272422464e-06, "loss": 0.433, "step": 1155 }, { "epoch": 0.29074446680080485, "grad_norm": 0.48663586378097534, "learning_rate": 9.689857502095558e-06, "loss": 0.3878, "step": 1156 }, { "epoch": 0.29099597585513076, "grad_norm": 0.5375685691833496, "learning_rate": 9.698239731768651e-06, "loss": 0.4254, "step": 1157 }, { "epoch": 0.29124748490945673, "grad_norm": 0.5156546235084534, "learning_rate": 9.706621961441745e-06, "loss": 0.414, "step": 1158 }, { "epoch": 0.2914989939637827, "grad_norm": 0.4819703698158264, "learning_rate": 9.715004191114838e-06, "loss": 0.4194, "step": 1159 }, { "epoch": 0.2917505030181087, "grad_norm": 0.4525262117385864, "learning_rate": 9.72338642078793e-06, "loss": 0.421, "step": 1160 }, { "epoch": 0.2920020120724346, "grad_norm": 0.5090615153312683, "learning_rate": 9.731768650461023e-06, "loss": 0.4238, "step": 1161 }, { "epoch": 0.29225352112676056, "grad_norm": 0.42741072177886963, "learning_rate": 9.740150880134116e-06, "loss": 0.3865, "step": 1162 }, { "epoch": 0.29250503018108653, "grad_norm": 0.5380824208259583, "learning_rate": 9.74853310980721e-06, "loss": 0.4206, "step": 1163 }, { "epoch": 0.29275653923541245, "grad_norm": 0.44021573662757874, "learning_rate": 9.756915339480303e-06, "loss": 0.4082, "step": 1164 }, { "epoch": 0.2930080482897384, "grad_norm": 0.4981013834476471, "learning_rate": 9.765297569153395e-06, "loss": 0.4137, "step": 1165 }, { "epoch": 0.2932595573440644, "grad_norm": 0.5061078071594238, "learning_rate": 9.773679798826488e-06, "loss": 0.404, "step": 1166 }, { "epoch": 0.29351106639839036, "grad_norm": 0.4524628818035126, "learning_rate": 9.78206202849958e-06, "loss": 0.4101, "step": 1167 }, { "epoch": 0.2937625754527163, "grad_norm": 0.5405965447425842, "learning_rate": 9.790444258172675e-06, "loss": 0.4407, "step": 1168 }, { "epoch": 0.29401408450704225, "grad_norm": 0.43946367502212524, "learning_rate": 9.798826487845768e-06, "loss": 0.4234, "step": 1169 }, { "epoch": 0.2942655935613682, "grad_norm": 0.52604079246521, "learning_rate": 9.807208717518862e-06, "loss": 0.4164, "step": 1170 }, { "epoch": 0.29451710261569414, "grad_norm": 0.46760255098342896, "learning_rate": 9.815590947191955e-06, "loss": 0.4186, "step": 1171 }, { "epoch": 0.2947686116700201, "grad_norm": 0.5770349502563477, "learning_rate": 9.823973176865047e-06, "loss": 0.3987, "step": 1172 }, { "epoch": 0.2950201207243461, "grad_norm": 0.5591742992401123, "learning_rate": 9.83235540653814e-06, "loss": 0.4338, "step": 1173 }, { "epoch": 0.29527162977867205, "grad_norm": 0.5872971415519714, "learning_rate": 9.840737636211232e-06, "loss": 0.4077, "step": 1174 }, { "epoch": 0.29552313883299797, "grad_norm": 0.5050575733184814, "learning_rate": 9.849119865884325e-06, "loss": 0.4163, "step": 1175 }, { "epoch": 0.29577464788732394, "grad_norm": 0.5279958844184875, "learning_rate": 9.857502095557418e-06, "loss": 0.4493, "step": 1176 }, { "epoch": 0.2960261569416499, "grad_norm": 0.4451628625392914, "learning_rate": 9.865884325230512e-06, "loss": 0.4077, "step": 1177 }, { "epoch": 0.2962776659959759, "grad_norm": 0.42817094922065735, "learning_rate": 9.874266554903605e-06, "loss": 0.3958, "step": 1178 }, { "epoch": 0.2965291750503018, "grad_norm": 0.5152884125709534, "learning_rate": 9.882648784576699e-06, "loss": 0.3644, "step": 1179 }, { "epoch": 0.29678068410462777, "grad_norm": 0.5255314707756042, "learning_rate": 9.891031014249792e-06, "loss": 0.4299, "step": 1180 }, { "epoch": 0.29703219315895374, "grad_norm": 0.45311322808265686, "learning_rate": 9.899413243922884e-06, "loss": 0.4234, "step": 1181 }, { "epoch": 0.29728370221327965, "grad_norm": 0.5598365068435669, "learning_rate": 9.907795473595977e-06, "loss": 0.4224, "step": 1182 }, { "epoch": 0.2975352112676056, "grad_norm": 0.45655763149261475, "learning_rate": 9.916177703269071e-06, "loss": 0.4283, "step": 1183 }, { "epoch": 0.2977867203219316, "grad_norm": 0.5361334085464478, "learning_rate": 9.924559932942164e-06, "loss": 0.4216, "step": 1184 }, { "epoch": 0.29803822937625757, "grad_norm": 0.45937347412109375, "learning_rate": 9.932942162615256e-06, "loss": 0.4372, "step": 1185 }, { "epoch": 0.2982897384305835, "grad_norm": 0.5063474178314209, "learning_rate": 9.941324392288349e-06, "loss": 0.434, "step": 1186 }, { "epoch": 0.29854124748490946, "grad_norm": 0.5321951508522034, "learning_rate": 9.949706621961442e-06, "loss": 0.4464, "step": 1187 }, { "epoch": 0.2987927565392354, "grad_norm": 0.4343951344490051, "learning_rate": 9.958088851634536e-06, "loss": 0.4008, "step": 1188 }, { "epoch": 0.29904426559356134, "grad_norm": 0.48022282123565674, "learning_rate": 9.966471081307629e-06, "loss": 0.4137, "step": 1189 }, { "epoch": 0.2992957746478873, "grad_norm": 0.4919980466365814, "learning_rate": 9.974853310980723e-06, "loss": 0.4176, "step": 1190 }, { "epoch": 0.2995472837022133, "grad_norm": 0.4395059049129486, "learning_rate": 9.983235540653816e-06, "loss": 0.3917, "step": 1191 }, { "epoch": 0.29979879275653926, "grad_norm": 0.4927704930305481, "learning_rate": 9.991617770326908e-06, "loss": 0.3653, "step": 1192 }, { "epoch": 0.30005030181086517, "grad_norm": 0.458484411239624, "learning_rate": 1e-05, "loss": 0.4349, "step": 1193 }, { "epoch": 0.30030181086519114, "grad_norm": 0.44261494278907776, "learning_rate": 9.999999785890641e-06, "loss": 0.4293, "step": 1194 }, { "epoch": 0.3005533199195171, "grad_norm": 0.5019251108169556, "learning_rate": 9.999999143562579e-06, "loss": 0.423, "step": 1195 }, { "epoch": 0.3008048289738431, "grad_norm": 0.42558160424232483, "learning_rate": 9.99999807301587e-06, "loss": 0.3981, "step": 1196 }, { "epoch": 0.301056338028169, "grad_norm": 0.44284799695014954, "learning_rate": 9.999996574250606e-06, "loss": 0.4336, "step": 1197 }, { "epoch": 0.301307847082495, "grad_norm": 0.45541757345199585, "learning_rate": 9.999994647266916e-06, "loss": 0.4049, "step": 1198 }, { "epoch": 0.30155935613682094, "grad_norm": 0.5015101432800293, "learning_rate": 9.999992292064964e-06, "loss": 0.3797, "step": 1199 }, { "epoch": 0.30181086519114686, "grad_norm": 0.4215429723262787, "learning_rate": 9.999989508644953e-06, "loss": 0.4165, "step": 1200 }, { "epoch": 0.30206237424547283, "grad_norm": 0.43915969133377075, "learning_rate": 9.999986297007118e-06, "loss": 0.4326, "step": 1201 }, { "epoch": 0.3023138832997988, "grad_norm": 0.46269261837005615, "learning_rate": 9.999982657151738e-06, "loss": 0.4243, "step": 1202 }, { "epoch": 0.3025653923541248, "grad_norm": 0.4217459261417389, "learning_rate": 9.999978589079125e-06, "loss": 0.4055, "step": 1203 }, { "epoch": 0.3028169014084507, "grad_norm": 0.5113700032234192, "learning_rate": 9.999974092789623e-06, "loss": 0.4091, "step": 1204 }, { "epoch": 0.30306841046277666, "grad_norm": 0.4101250171661377, "learning_rate": 9.999969168283621e-06, "loss": 0.401, "step": 1205 }, { "epoch": 0.30331991951710263, "grad_norm": 0.5615075826644897, "learning_rate": 9.999963815561538e-06, "loss": 0.4226, "step": 1206 }, { "epoch": 0.30357142857142855, "grad_norm": 0.4130154550075531, "learning_rate": 9.999958034623836e-06, "loss": 0.4115, "step": 1207 }, { "epoch": 0.3038229376257545, "grad_norm": 0.4956052899360657, "learning_rate": 9.999951825471005e-06, "loss": 0.4305, "step": 1208 }, { "epoch": 0.3040744466800805, "grad_norm": 0.5325097441673279, "learning_rate": 9.999945188103582e-06, "loss": 0.4063, "step": 1209 }, { "epoch": 0.30432595573440646, "grad_norm": 0.45363694429397583, "learning_rate": 9.999938122522133e-06, "loss": 0.4244, "step": 1210 }, { "epoch": 0.3045774647887324, "grad_norm": 0.41715431213378906, "learning_rate": 9.999930628727264e-06, "loss": 0.3687, "step": 1211 }, { "epoch": 0.30482897384305835, "grad_norm": 0.49758753180503845, "learning_rate": 9.999922706719614e-06, "loss": 0.3923, "step": 1212 }, { "epoch": 0.3050804828973843, "grad_norm": 0.4848545491695404, "learning_rate": 9.999914356499864e-06, "loss": 0.4266, "step": 1213 }, { "epoch": 0.30533199195171024, "grad_norm": 0.3956749439239502, "learning_rate": 9.99990557806873e-06, "loss": 0.3876, "step": 1214 }, { "epoch": 0.3055835010060362, "grad_norm": 0.4752289354801178, "learning_rate": 9.999896371426962e-06, "loss": 0.439, "step": 1215 }, { "epoch": 0.3058350100603622, "grad_norm": 0.4210512936115265, "learning_rate": 9.999886736575349e-06, "loss": 0.4215, "step": 1216 }, { "epoch": 0.30608651911468815, "grad_norm": 0.44492197036743164, "learning_rate": 9.999876673514718e-06, "loss": 0.436, "step": 1217 }, { "epoch": 0.30633802816901406, "grad_norm": 0.41508495807647705, "learning_rate": 9.999866182245926e-06, "loss": 0.3962, "step": 1218 }, { "epoch": 0.30658953722334004, "grad_norm": 0.5030407905578613, "learning_rate": 9.999855262769875e-06, "loss": 0.4369, "step": 1219 }, { "epoch": 0.306841046277666, "grad_norm": 0.43382754921913147, "learning_rate": 9.9998439150875e-06, "loss": 0.4056, "step": 1220 }, { "epoch": 0.307092555331992, "grad_norm": 0.4502340853214264, "learning_rate": 9.999832139199775e-06, "loss": 0.4028, "step": 1221 }, { "epoch": 0.3073440643863179, "grad_norm": 0.46800076961517334, "learning_rate": 9.999819935107705e-06, "loss": 0.401, "step": 1222 }, { "epoch": 0.30759557344064387, "grad_norm": 0.47623687982559204, "learning_rate": 9.999807302812335e-06, "loss": 0.394, "step": 1223 }, { "epoch": 0.30784708249496984, "grad_norm": 0.4112638831138611, "learning_rate": 9.99979424231475e-06, "loss": 0.407, "step": 1224 }, { "epoch": 0.30809859154929575, "grad_norm": 0.5141077041625977, "learning_rate": 9.999780753616064e-06, "loss": 0.4244, "step": 1225 }, { "epoch": 0.3083501006036217, "grad_norm": 0.4799872636795044, "learning_rate": 9.999766836717437e-06, "loss": 0.4207, "step": 1226 }, { "epoch": 0.3086016096579477, "grad_norm": 0.5113214254379272, "learning_rate": 9.999752491620058e-06, "loss": 0.4062, "step": 1227 }, { "epoch": 0.30885311871227367, "grad_norm": 0.5039216876029968, "learning_rate": 9.999737718325157e-06, "loss": 0.42, "step": 1228 }, { "epoch": 0.3091046277665996, "grad_norm": 0.44573184847831726, "learning_rate": 9.999722516833999e-06, "loss": 0.4212, "step": 1229 }, { "epoch": 0.30935613682092555, "grad_norm": 0.4311496615409851, "learning_rate": 9.999706887147884e-06, "loss": 0.4031, "step": 1230 }, { "epoch": 0.3096076458752515, "grad_norm": 0.5166363716125488, "learning_rate": 9.999690829268154e-06, "loss": 0.4214, "step": 1231 }, { "epoch": 0.30985915492957744, "grad_norm": 0.44504567980766296, "learning_rate": 9.999674343196182e-06, "loss": 0.3918, "step": 1232 }, { "epoch": 0.3101106639839034, "grad_norm": 0.44314002990722656, "learning_rate": 9.99965742893338e-06, "loss": 0.4428, "step": 1233 }, { "epoch": 0.3103621730382294, "grad_norm": 0.4878178536891937, "learning_rate": 9.999640086481198e-06, "loss": 0.3875, "step": 1234 }, { "epoch": 0.31061368209255535, "grad_norm": 0.45717740058898926, "learning_rate": 9.99962231584112e-06, "loss": 0.3977, "step": 1235 }, { "epoch": 0.31086519114688127, "grad_norm": 0.4271807372570038, "learning_rate": 9.999604117014667e-06, "loss": 0.4027, "step": 1236 }, { "epoch": 0.31111670020120724, "grad_norm": 0.4877811074256897, "learning_rate": 9.999585490003399e-06, "loss": 0.3856, "step": 1237 }, { "epoch": 0.3113682092555332, "grad_norm": 0.5068530440330505, "learning_rate": 9.999566434808912e-06, "loss": 0.3895, "step": 1238 }, { "epoch": 0.31161971830985913, "grad_norm": 0.4381490647792816, "learning_rate": 9.999546951432837e-06, "loss": 0.4222, "step": 1239 }, { "epoch": 0.3118712273641851, "grad_norm": 0.44648122787475586, "learning_rate": 9.999527039876843e-06, "loss": 0.4092, "step": 1240 }, { "epoch": 0.31212273641851107, "grad_norm": 0.5271798968315125, "learning_rate": 9.999506700142633e-06, "loss": 0.4265, "step": 1241 }, { "epoch": 0.31237424547283704, "grad_norm": 0.467507928609848, "learning_rate": 9.999485932231951e-06, "loss": 0.4255, "step": 1242 }, { "epoch": 0.31262575452716296, "grad_norm": 0.43331214785575867, "learning_rate": 9.999464736146578e-06, "loss": 0.4022, "step": 1243 }, { "epoch": 0.31287726358148893, "grad_norm": 0.511960506439209, "learning_rate": 9.999443111888325e-06, "loss": 0.4229, "step": 1244 }, { "epoch": 0.3131287726358149, "grad_norm": 0.4495149850845337, "learning_rate": 9.999421059459047e-06, "loss": 0.4109, "step": 1245 }, { "epoch": 0.31338028169014087, "grad_norm": 0.4889768362045288, "learning_rate": 9.999398578860631e-06, "loss": 0.3932, "step": 1246 }, { "epoch": 0.3136317907444668, "grad_norm": 0.4641020596027374, "learning_rate": 9.999375670095003e-06, "loss": 0.4236, "step": 1247 }, { "epoch": 0.31388329979879276, "grad_norm": 0.47060105204582214, "learning_rate": 9.999352333164125e-06, "loss": 0.418, "step": 1248 }, { "epoch": 0.31413480885311873, "grad_norm": 0.4797796308994293, "learning_rate": 9.999328568069994e-06, "loss": 0.4168, "step": 1249 }, { "epoch": 0.31438631790744465, "grad_norm": 0.474357932806015, "learning_rate": 9.99930437481465e-06, "loss": 0.4113, "step": 1250 }, { "epoch": 0.3146378269617706, "grad_norm": 0.4692329466342926, "learning_rate": 9.99927975340016e-06, "loss": 0.3967, "step": 1251 }, { "epoch": 0.3148893360160966, "grad_norm": 0.5016288161277771, "learning_rate": 9.999254703828634e-06, "loss": 0.427, "step": 1252 }, { "epoch": 0.31514084507042256, "grad_norm": 0.42063063383102417, "learning_rate": 9.999229226102218e-06, "loss": 0.423, "step": 1253 }, { "epoch": 0.3153923541247485, "grad_norm": 0.4207392632961273, "learning_rate": 9.999203320223095e-06, "loss": 0.4237, "step": 1254 }, { "epoch": 0.31564386317907445, "grad_norm": 0.5132213830947876, "learning_rate": 9.999176986193481e-06, "loss": 0.3826, "step": 1255 }, { "epoch": 0.3158953722334004, "grad_norm": 0.4629036784172058, "learning_rate": 9.999150224015634e-06, "loss": 0.3976, "step": 1256 }, { "epoch": 0.31614688128772633, "grad_norm": 0.45483675599098206, "learning_rate": 9.999123033691844e-06, "loss": 0.4291, "step": 1257 }, { "epoch": 0.3163983903420523, "grad_norm": 0.45758309960365295, "learning_rate": 9.999095415224443e-06, "loss": 0.4088, "step": 1258 }, { "epoch": 0.3166498993963783, "grad_norm": 0.49029773473739624, "learning_rate": 9.999067368615791e-06, "loss": 0.4092, "step": 1259 }, { "epoch": 0.31690140845070425, "grad_norm": 0.4661732017993927, "learning_rate": 9.999038893868293e-06, "loss": 0.4094, "step": 1260 }, { "epoch": 0.31715291750503016, "grad_norm": 0.43760454654693604, "learning_rate": 9.999009990984389e-06, "loss": 0.3962, "step": 1261 }, { "epoch": 0.31740442655935613, "grad_norm": 0.46413469314575195, "learning_rate": 9.998980659966553e-06, "loss": 0.4161, "step": 1262 }, { "epoch": 0.3176559356136821, "grad_norm": 0.46015673875808716, "learning_rate": 9.998950900817297e-06, "loss": 0.3893, "step": 1263 }, { "epoch": 0.317907444668008, "grad_norm": 0.44548308849334717, "learning_rate": 9.998920713539169e-06, "loss": 0.4252, "step": 1264 }, { "epoch": 0.318158953722334, "grad_norm": 0.452181875705719, "learning_rate": 9.998890098134757e-06, "loss": 0.4226, "step": 1265 }, { "epoch": 0.31841046277665996, "grad_norm": 0.5392069220542908, "learning_rate": 9.998859054606677e-06, "loss": 0.4124, "step": 1266 }, { "epoch": 0.31866197183098594, "grad_norm": 0.43742406368255615, "learning_rate": 9.998827582957596e-06, "loss": 0.4125, "step": 1267 }, { "epoch": 0.31891348088531185, "grad_norm": 0.4227537512779236, "learning_rate": 9.998795683190202e-06, "loss": 0.4211, "step": 1268 }, { "epoch": 0.3191649899396378, "grad_norm": 0.4836387038230896, "learning_rate": 9.998763355307232e-06, "loss": 0.4342, "step": 1269 }, { "epoch": 0.3194164989939638, "grad_norm": 0.48729145526885986, "learning_rate": 9.998730599311452e-06, "loss": 0.4045, "step": 1270 }, { "epoch": 0.31966800804828976, "grad_norm": 0.46449029445648193, "learning_rate": 9.998697415205667e-06, "loss": 0.4175, "step": 1271 }, { "epoch": 0.3199195171026157, "grad_norm": 0.4990949332714081, "learning_rate": 9.998663802992723e-06, "loss": 0.435, "step": 1272 }, { "epoch": 0.32017102615694165, "grad_norm": 0.42860570549964905, "learning_rate": 9.998629762675493e-06, "loss": 0.4114, "step": 1273 }, { "epoch": 0.3204225352112676, "grad_norm": 0.5099340081214905, "learning_rate": 9.998595294256897e-06, "loss": 0.4395, "step": 1274 }, { "epoch": 0.32067404426559354, "grad_norm": 0.5020102262496948, "learning_rate": 9.998560397739885e-06, "loss": 0.3879, "step": 1275 }, { "epoch": 0.3209255533199195, "grad_norm": 0.41706791520118713, "learning_rate": 9.998525073127445e-06, "loss": 0.4166, "step": 1276 }, { "epoch": 0.3211770623742455, "grad_norm": 0.43367356061935425, "learning_rate": 9.998489320422604e-06, "loss": 0.4152, "step": 1277 }, { "epoch": 0.32142857142857145, "grad_norm": 0.3915824294090271, "learning_rate": 9.998453139628422e-06, "loss": 0.393, "step": 1278 }, { "epoch": 0.32168008048289737, "grad_norm": 0.49054497480392456, "learning_rate": 9.998416530748e-06, "loss": 0.4235, "step": 1279 }, { "epoch": 0.32193158953722334, "grad_norm": 0.4301794469356537, "learning_rate": 9.998379493784472e-06, "loss": 0.4096, "step": 1280 }, { "epoch": 0.3221830985915493, "grad_norm": 0.5517282485961914, "learning_rate": 9.998342028741009e-06, "loss": 0.395, "step": 1281 }, { "epoch": 0.3224346076458752, "grad_norm": 0.47454625368118286, "learning_rate": 9.998304135620822e-06, "loss": 0.4091, "step": 1282 }, { "epoch": 0.3226861167002012, "grad_norm": 0.44431060552597046, "learning_rate": 9.998265814427156e-06, "loss": 0.4341, "step": 1283 }, { "epoch": 0.32293762575452717, "grad_norm": 0.5420903563499451, "learning_rate": 9.998227065163289e-06, "loss": 0.3937, "step": 1284 }, { "epoch": 0.32318913480885314, "grad_norm": 0.49979931116104126, "learning_rate": 9.998187887832544e-06, "loss": 0.4236, "step": 1285 }, { "epoch": 0.32344064386317906, "grad_norm": 0.488714337348938, "learning_rate": 9.998148282438276e-06, "loss": 0.3862, "step": 1286 }, { "epoch": 0.323692152917505, "grad_norm": 0.5104209184646606, "learning_rate": 9.998108248983875e-06, "loss": 0.4263, "step": 1287 }, { "epoch": 0.323943661971831, "grad_norm": 0.5148292779922485, "learning_rate": 9.998067787472772e-06, "loss": 0.3773, "step": 1288 }, { "epoch": 0.3241951710261569, "grad_norm": 0.46651163697242737, "learning_rate": 9.998026897908429e-06, "loss": 0.4054, "step": 1289 }, { "epoch": 0.3244466800804829, "grad_norm": 0.5032572150230408, "learning_rate": 9.99798558029435e-06, "loss": 0.3935, "step": 1290 }, { "epoch": 0.32469818913480886, "grad_norm": 0.4591807425022125, "learning_rate": 9.997943834634074e-06, "loss": 0.3915, "step": 1291 }, { "epoch": 0.32494969818913483, "grad_norm": 0.4753274917602539, "learning_rate": 9.997901660931175e-06, "loss": 0.4095, "step": 1292 }, { "epoch": 0.32520120724346074, "grad_norm": 0.4749658703804016, "learning_rate": 9.997859059189266e-06, "loss": 0.384, "step": 1293 }, { "epoch": 0.3254527162977867, "grad_norm": 0.4379113018512726, "learning_rate": 9.997816029411996e-06, "loss": 0.4158, "step": 1294 }, { "epoch": 0.3257042253521127, "grad_norm": 0.5521072745323181, "learning_rate": 9.997772571603047e-06, "loss": 0.4081, "step": 1295 }, { "epoch": 0.32595573440643866, "grad_norm": 0.45863693952560425, "learning_rate": 9.997728685766144e-06, "loss": 0.4219, "step": 1296 }, { "epoch": 0.3262072434607646, "grad_norm": 0.5103247165679932, "learning_rate": 9.997684371905046e-06, "loss": 0.428, "step": 1297 }, { "epoch": 0.32645875251509054, "grad_norm": 0.4744894802570343, "learning_rate": 9.997639630023546e-06, "loss": 0.4249, "step": 1298 }, { "epoch": 0.3267102615694165, "grad_norm": 0.47997725009918213, "learning_rate": 9.997594460125477e-06, "loss": 0.4097, "step": 1299 }, { "epoch": 0.32696177062374243, "grad_norm": 0.48586708307266235, "learning_rate": 9.997548862214708e-06, "loss": 0.4036, "step": 1300 }, { "epoch": 0.3272132796780684, "grad_norm": 0.4447525441646576, "learning_rate": 9.99750283629514e-06, "loss": 0.3932, "step": 1301 }, { "epoch": 0.3274647887323944, "grad_norm": 0.4103879928588867, "learning_rate": 9.997456382370723e-06, "loss": 0.4181, "step": 1302 }, { "epoch": 0.32771629778672035, "grad_norm": 0.4686889052391052, "learning_rate": 9.99740950044543e-06, "loss": 0.4132, "step": 1303 }, { "epoch": 0.32796780684104626, "grad_norm": 0.41833746433258057, "learning_rate": 9.997362190523275e-06, "loss": 0.4269, "step": 1304 }, { "epoch": 0.32821931589537223, "grad_norm": 0.4362890124320984, "learning_rate": 9.997314452608313e-06, "loss": 0.4028, "step": 1305 }, { "epoch": 0.3284708249496982, "grad_norm": 0.47095364332199097, "learning_rate": 9.99726628670463e-06, "loss": 0.4157, "step": 1306 }, { "epoch": 0.3287223340040241, "grad_norm": 0.44647061824798584, "learning_rate": 9.997217692816355e-06, "loss": 0.3963, "step": 1307 }, { "epoch": 0.3289738430583501, "grad_norm": 0.44647330045700073, "learning_rate": 9.997168670947644e-06, "loss": 0.3993, "step": 1308 }, { "epoch": 0.32922535211267606, "grad_norm": 0.46429726481437683, "learning_rate": 9.9971192211027e-06, "loss": 0.4067, "step": 1309 }, { "epoch": 0.32947686116700203, "grad_norm": 0.465006560087204, "learning_rate": 9.997069343285757e-06, "loss": 0.3886, "step": 1310 }, { "epoch": 0.32972837022132795, "grad_norm": 0.4865345358848572, "learning_rate": 9.997019037501086e-06, "loss": 0.4055, "step": 1311 }, { "epoch": 0.3299798792756539, "grad_norm": 0.46681225299835205, "learning_rate": 9.996968303752996e-06, "loss": 0.4287, "step": 1312 }, { "epoch": 0.3302313883299799, "grad_norm": 0.5172387361526489, "learning_rate": 9.996917142045832e-06, "loss": 0.4436, "step": 1313 }, { "epoch": 0.33048289738430586, "grad_norm": 0.3964807689189911, "learning_rate": 9.996865552383975e-06, "loss": 0.4051, "step": 1314 }, { "epoch": 0.3307344064386318, "grad_norm": 0.41110795736312866, "learning_rate": 9.996813534771843e-06, "loss": 0.3752, "step": 1315 }, { "epoch": 0.33098591549295775, "grad_norm": 0.4638274908065796, "learning_rate": 9.996761089213891e-06, "loss": 0.4208, "step": 1316 }, { "epoch": 0.3312374245472837, "grad_norm": 0.4156520962715149, "learning_rate": 9.996708215714613e-06, "loss": 0.4042, "step": 1317 }, { "epoch": 0.33148893360160964, "grad_norm": 0.42104753851890564, "learning_rate": 9.996654914278535e-06, "loss": 0.4499, "step": 1318 }, { "epoch": 0.3317404426559356, "grad_norm": 0.42806974053382874, "learning_rate": 9.996601184910223e-06, "loss": 0.3979, "step": 1319 }, { "epoch": 0.3319919517102616, "grad_norm": 0.38782554864883423, "learning_rate": 9.996547027614279e-06, "loss": 0.392, "step": 1320 }, { "epoch": 0.33224346076458755, "grad_norm": 0.4238336682319641, "learning_rate": 9.996492442395338e-06, "loss": 0.4193, "step": 1321 }, { "epoch": 0.33249496981891347, "grad_norm": 0.4696759879589081, "learning_rate": 9.996437429258079e-06, "loss": 0.4137, "step": 1322 }, { "epoch": 0.33274647887323944, "grad_norm": 0.4905252158641815, "learning_rate": 9.996381988207211e-06, "loss": 0.4354, "step": 1323 }, { "epoch": 0.3329979879275654, "grad_norm": 0.4017488360404968, "learning_rate": 9.996326119247484e-06, "loss": 0.4061, "step": 1324 }, { "epoch": 0.3332494969818913, "grad_norm": 0.4732036292552948, "learning_rate": 9.99626982238368e-06, "loss": 0.4296, "step": 1325 }, { "epoch": 0.3335010060362173, "grad_norm": 0.4144299626350403, "learning_rate": 9.996213097620623e-06, "loss": 0.4169, "step": 1326 }, { "epoch": 0.33375251509054327, "grad_norm": 0.45625752210617065, "learning_rate": 9.996155944963173e-06, "loss": 0.4019, "step": 1327 }, { "epoch": 0.33400402414486924, "grad_norm": 0.3766085207462311, "learning_rate": 9.996098364416219e-06, "loss": 0.3802, "step": 1328 }, { "epoch": 0.33425553319919515, "grad_norm": 0.40613994002342224, "learning_rate": 9.996040355984697e-06, "loss": 0.3963, "step": 1329 }, { "epoch": 0.3345070422535211, "grad_norm": 0.4915297031402588, "learning_rate": 9.995981919673571e-06, "loss": 0.4075, "step": 1330 }, { "epoch": 0.3347585513078471, "grad_norm": 0.40623193979263306, "learning_rate": 9.995923055487853e-06, "loss": 0.392, "step": 1331 }, { "epoch": 0.335010060362173, "grad_norm": 0.47242090106010437, "learning_rate": 9.995863763432577e-06, "loss": 0.3792, "step": 1332 }, { "epoch": 0.335261569416499, "grad_norm": 0.39874276518821716, "learning_rate": 9.995804043512824e-06, "loss": 0.3989, "step": 1333 }, { "epoch": 0.33551307847082495, "grad_norm": 0.4230663776397705, "learning_rate": 9.995743895733707e-06, "loss": 0.4004, "step": 1334 }, { "epoch": 0.3357645875251509, "grad_norm": 0.4566534161567688, "learning_rate": 9.99568332010038e-06, "loss": 0.4478, "step": 1335 }, { "epoch": 0.33601609657947684, "grad_norm": 0.48702409863471985, "learning_rate": 9.99562231661803e-06, "loss": 0.4014, "step": 1336 }, { "epoch": 0.3362676056338028, "grad_norm": 0.4124425947666168, "learning_rate": 9.995560885291879e-06, "loss": 0.4075, "step": 1337 }, { "epoch": 0.3365191146881288, "grad_norm": 0.4736267626285553, "learning_rate": 9.995499026127194e-06, "loss": 0.381, "step": 1338 }, { "epoch": 0.33677062374245476, "grad_norm": 0.41389134526252747, "learning_rate": 9.995436739129267e-06, "loss": 0.4299, "step": 1339 }, { "epoch": 0.33702213279678067, "grad_norm": 0.49148082733154297, "learning_rate": 9.995374024303433e-06, "loss": 0.4016, "step": 1340 }, { "epoch": 0.33727364185110664, "grad_norm": 0.4507843554019928, "learning_rate": 9.995310881655066e-06, "loss": 0.4305, "step": 1341 }, { "epoch": 0.3375251509054326, "grad_norm": 0.41378965973854065, "learning_rate": 9.995247311189573e-06, "loss": 0.3896, "step": 1342 }, { "epoch": 0.33777665995975853, "grad_norm": 0.511529803276062, "learning_rate": 9.9951833129124e-06, "loss": 0.4367, "step": 1343 }, { "epoch": 0.3380281690140845, "grad_norm": 0.4993535578250885, "learning_rate": 9.995118886829023e-06, "loss": 0.4217, "step": 1344 }, { "epoch": 0.33827967806841047, "grad_norm": 0.4538334012031555, "learning_rate": 9.995054032944963e-06, "loss": 0.4088, "step": 1345 }, { "epoch": 0.33853118712273644, "grad_norm": 0.42592087388038635, "learning_rate": 9.994988751265775e-06, "loss": 0.4142, "step": 1346 }, { "epoch": 0.33878269617706236, "grad_norm": 0.4514321982860565, "learning_rate": 9.994923041797049e-06, "loss": 0.4318, "step": 1347 }, { "epoch": 0.33903420523138833, "grad_norm": 0.43856939673423767, "learning_rate": 9.994856904544412e-06, "loss": 0.4195, "step": 1348 }, { "epoch": 0.3392857142857143, "grad_norm": 0.4376979470252991, "learning_rate": 9.99479033951353e-06, "loss": 0.391, "step": 1349 }, { "epoch": 0.3395372233400402, "grad_norm": 0.43217065930366516, "learning_rate": 9.9947233467101e-06, "loss": 0.4241, "step": 1350 }, { "epoch": 0.3397887323943662, "grad_norm": 0.44194620847702026, "learning_rate": 9.994655926139864e-06, "loss": 0.401, "step": 1351 }, { "epoch": 0.34004024144869216, "grad_norm": 0.439430296421051, "learning_rate": 9.994588077808595e-06, "loss": 0.4185, "step": 1352 }, { "epoch": 0.34029175050301813, "grad_norm": 0.4721353054046631, "learning_rate": 9.994519801722103e-06, "loss": 0.4053, "step": 1353 }, { "epoch": 0.34054325955734405, "grad_norm": 0.4690324068069458, "learning_rate": 9.994451097886236e-06, "loss": 0.4294, "step": 1354 }, { "epoch": 0.34079476861167, "grad_norm": 0.4453498125076294, "learning_rate": 9.994381966306877e-06, "loss": 0.4371, "step": 1355 }, { "epoch": 0.341046277665996, "grad_norm": 0.43481093645095825, "learning_rate": 9.994312406989947e-06, "loss": 0.4068, "step": 1356 }, { "epoch": 0.3412977867203219, "grad_norm": 0.45696157217025757, "learning_rate": 9.994242419941403e-06, "loss": 0.4137, "step": 1357 }, { "epoch": 0.3415492957746479, "grad_norm": 0.5058369636535645, "learning_rate": 9.994172005167242e-06, "loss": 0.4273, "step": 1358 }, { "epoch": 0.34180080482897385, "grad_norm": 0.43768513202667236, "learning_rate": 9.994101162673491e-06, "loss": 0.3701, "step": 1359 }, { "epoch": 0.3420523138832998, "grad_norm": 0.447859525680542, "learning_rate": 9.994029892466218e-06, "loss": 0.4028, "step": 1360 }, { "epoch": 0.34230382293762573, "grad_norm": 0.5294939279556274, "learning_rate": 9.993958194551528e-06, "loss": 0.4043, "step": 1361 }, { "epoch": 0.3425553319919517, "grad_norm": 0.4057393968105316, "learning_rate": 9.993886068935559e-06, "loss": 0.4308, "step": 1362 }, { "epoch": 0.3428068410462777, "grad_norm": 0.4801323413848877, "learning_rate": 9.993813515624491e-06, "loss": 0.3747, "step": 1363 }, { "epoch": 0.34305835010060365, "grad_norm": 0.519101083278656, "learning_rate": 9.993740534624536e-06, "loss": 0.3993, "step": 1364 }, { "epoch": 0.34330985915492956, "grad_norm": 0.44538334012031555, "learning_rate": 9.993667125941946e-06, "loss": 0.3992, "step": 1365 }, { "epoch": 0.34356136820925554, "grad_norm": 0.5548309087753296, "learning_rate": 9.993593289583005e-06, "loss": 0.4186, "step": 1366 }, { "epoch": 0.3438128772635815, "grad_norm": 0.46345213055610657, "learning_rate": 9.993519025554041e-06, "loss": 0.3935, "step": 1367 }, { "epoch": 0.3440643863179074, "grad_norm": 0.5251536965370178, "learning_rate": 9.993444333861411e-06, "loss": 0.4056, "step": 1368 }, { "epoch": 0.3443158953722334, "grad_norm": 0.5084334015846252, "learning_rate": 9.993369214511512e-06, "loss": 0.4039, "step": 1369 }, { "epoch": 0.34456740442655936, "grad_norm": 0.5444664359092712, "learning_rate": 9.99329366751078e-06, "loss": 0.3857, "step": 1370 }, { "epoch": 0.34481891348088534, "grad_norm": 0.4715754985809326, "learning_rate": 9.993217692865683e-06, "loss": 0.4328, "step": 1371 }, { "epoch": 0.34507042253521125, "grad_norm": 0.5068243741989136, "learning_rate": 9.993141290582726e-06, "loss": 0.379, "step": 1372 }, { "epoch": 0.3453219315895372, "grad_norm": 0.5259790420532227, "learning_rate": 9.993064460668456e-06, "loss": 0.4116, "step": 1373 }, { "epoch": 0.3455734406438632, "grad_norm": 0.46770498156547546, "learning_rate": 9.992987203129451e-06, "loss": 0.4302, "step": 1374 }, { "epoch": 0.3458249496981891, "grad_norm": 0.47269558906555176, "learning_rate": 9.99290951797233e-06, "loss": 0.4102, "step": 1375 }, { "epoch": 0.3460764587525151, "grad_norm": 0.43284550309181213, "learning_rate": 9.99283140520374e-06, "loss": 0.4029, "step": 1376 }, { "epoch": 0.34632796780684105, "grad_norm": 0.49771684408187866, "learning_rate": 9.992752864830379e-06, "loss": 0.414, "step": 1377 }, { "epoch": 0.346579476861167, "grad_norm": 0.4438658356666565, "learning_rate": 9.992673896858969e-06, "loss": 0.4105, "step": 1378 }, { "epoch": 0.34683098591549294, "grad_norm": 0.46066081523895264, "learning_rate": 9.992594501296272e-06, "loss": 0.4012, "step": 1379 }, { "epoch": 0.3470824949698189, "grad_norm": 0.562817394733429, "learning_rate": 9.992514678149092e-06, "loss": 0.424, "step": 1380 }, { "epoch": 0.3473340040241449, "grad_norm": 0.5239977240562439, "learning_rate": 9.992434427424261e-06, "loss": 0.3936, "step": 1381 }, { "epoch": 0.3475855130784708, "grad_norm": 0.4289341866970062, "learning_rate": 9.992353749128653e-06, "loss": 0.4092, "step": 1382 }, { "epoch": 0.34783702213279677, "grad_norm": 0.565528154373169, "learning_rate": 9.992272643269181e-06, "loss": 0.4239, "step": 1383 }, { "epoch": 0.34808853118712274, "grad_norm": 0.44149690866470337, "learning_rate": 9.992191109852788e-06, "loss": 0.4068, "step": 1384 }, { "epoch": 0.3483400402414487, "grad_norm": 0.4838801920413971, "learning_rate": 9.992109148886457e-06, "loss": 0.4237, "step": 1385 }, { "epoch": 0.3485915492957746, "grad_norm": 0.5003640651702881, "learning_rate": 9.992026760377207e-06, "loss": 0.3963, "step": 1386 }, { "epoch": 0.3488430583501006, "grad_norm": 0.4707315266132355, "learning_rate": 9.991943944332097e-06, "loss": 0.4227, "step": 1387 }, { "epoch": 0.34909456740442657, "grad_norm": 0.4761320650577545, "learning_rate": 9.991860700758217e-06, "loss": 0.4195, "step": 1388 }, { "epoch": 0.34934607645875254, "grad_norm": 0.39643487334251404, "learning_rate": 9.991777029662698e-06, "loss": 0.4183, "step": 1389 }, { "epoch": 0.34959758551307846, "grad_norm": 0.4800592064857483, "learning_rate": 9.991692931052703e-06, "loss": 0.4301, "step": 1390 }, { "epoch": 0.34984909456740443, "grad_norm": 0.42438507080078125, "learning_rate": 9.991608404935435e-06, "loss": 0.4011, "step": 1391 }, { "epoch": 0.3501006036217304, "grad_norm": 0.3981070816516876, "learning_rate": 9.991523451318137e-06, "loss": 0.4184, "step": 1392 }, { "epoch": 0.3503521126760563, "grad_norm": 0.4492408037185669, "learning_rate": 9.991438070208082e-06, "loss": 0.4009, "step": 1393 }, { "epoch": 0.3506036217303823, "grad_norm": 0.41902977228164673, "learning_rate": 9.991352261612583e-06, "loss": 0.4174, "step": 1394 }, { "epoch": 0.35085513078470826, "grad_norm": 0.4707157611846924, "learning_rate": 9.991266025538988e-06, "loss": 0.4226, "step": 1395 }, { "epoch": 0.35110663983903423, "grad_norm": 0.4216676950454712, "learning_rate": 9.991179361994683e-06, "loss": 0.4397, "step": 1396 }, { "epoch": 0.35135814889336014, "grad_norm": 0.47134819626808167, "learning_rate": 9.99109227098709e-06, "loss": 0.3967, "step": 1397 }, { "epoch": 0.3516096579476861, "grad_norm": 0.4079870283603668, "learning_rate": 9.991004752523668e-06, "loss": 0.394, "step": 1398 }, { "epoch": 0.3518611670020121, "grad_norm": 0.46213197708129883, "learning_rate": 9.990916806611915e-06, "loss": 0.413, "step": 1399 }, { "epoch": 0.352112676056338, "grad_norm": 0.40814223885536194, "learning_rate": 9.99082843325936e-06, "loss": 0.4259, "step": 1400 }, { "epoch": 0.352364185110664, "grad_norm": 0.39051973819732666, "learning_rate": 9.99073963247357e-06, "loss": 0.4197, "step": 1401 }, { "epoch": 0.35261569416498995, "grad_norm": 0.4494209885597229, "learning_rate": 9.990650404262152e-06, "loss": 0.3972, "step": 1402 }, { "epoch": 0.3528672032193159, "grad_norm": 0.43199771642684937, "learning_rate": 9.99056074863275e-06, "loss": 0.4034, "step": 1403 }, { "epoch": 0.35311871227364183, "grad_norm": 0.46416175365448, "learning_rate": 9.99047066559304e-06, "loss": 0.4253, "step": 1404 }, { "epoch": 0.3533702213279678, "grad_norm": 0.49905675649642944, "learning_rate": 9.990380155150739e-06, "loss": 0.4184, "step": 1405 }, { "epoch": 0.3536217303822938, "grad_norm": 0.41556206345558167, "learning_rate": 9.990289217313597e-06, "loss": 0.3867, "step": 1406 }, { "epoch": 0.3538732394366197, "grad_norm": 0.47178298234939575, "learning_rate": 9.990197852089403e-06, "loss": 0.4356, "step": 1407 }, { "epoch": 0.35412474849094566, "grad_norm": 0.4404408037662506, "learning_rate": 9.99010605948598e-06, "loss": 0.4127, "step": 1408 }, { "epoch": 0.35437625754527163, "grad_norm": 0.4431535601615906, "learning_rate": 9.990013839511193e-06, "loss": 0.3965, "step": 1409 }, { "epoch": 0.3546277665995976, "grad_norm": 0.4630604684352875, "learning_rate": 9.989921192172936e-06, "loss": 0.4251, "step": 1410 }, { "epoch": 0.3548792756539235, "grad_norm": 0.4784621298313141, "learning_rate": 9.989828117479149e-06, "loss": 0.4314, "step": 1411 }, { "epoch": 0.3551307847082495, "grad_norm": 0.4682464301586151, "learning_rate": 9.989734615437797e-06, "loss": 0.3869, "step": 1412 }, { "epoch": 0.35538229376257546, "grad_norm": 0.4220459759235382, "learning_rate": 9.989640686056891e-06, "loss": 0.4142, "step": 1413 }, { "epoch": 0.35563380281690143, "grad_norm": 0.4825068712234497, "learning_rate": 9.989546329344474e-06, "loss": 0.3999, "step": 1414 }, { "epoch": 0.35588531187122735, "grad_norm": 0.47478923201560974, "learning_rate": 9.989451545308633e-06, "loss": 0.4274, "step": 1415 }, { "epoch": 0.3561368209255533, "grad_norm": 0.37347909808158875, "learning_rate": 9.989356333957477e-06, "loss": 0.4146, "step": 1416 }, { "epoch": 0.3563883299798793, "grad_norm": 0.484246164560318, "learning_rate": 9.989260695299165e-06, "loss": 0.3937, "step": 1417 }, { "epoch": 0.3566398390342052, "grad_norm": 0.47754985094070435, "learning_rate": 9.989164629341889e-06, "loss": 0.4186, "step": 1418 }, { "epoch": 0.3568913480885312, "grad_norm": 0.45537054538726807, "learning_rate": 9.989068136093873e-06, "loss": 0.4436, "step": 1419 }, { "epoch": 0.35714285714285715, "grad_norm": 0.4763503074645996, "learning_rate": 9.988971215563383e-06, "loss": 0.4262, "step": 1420 }, { "epoch": 0.3573943661971831, "grad_norm": 0.41354265809059143, "learning_rate": 9.98887386775872e-06, "loss": 0.4016, "step": 1421 }, { "epoch": 0.35764587525150904, "grad_norm": 0.4630916118621826, "learning_rate": 9.988776092688221e-06, "loss": 0.438, "step": 1422 }, { "epoch": 0.357897384305835, "grad_norm": 0.40816864371299744, "learning_rate": 9.988677890360258e-06, "loss": 0.4204, "step": 1423 }, { "epoch": 0.358148893360161, "grad_norm": 0.4877932071685791, "learning_rate": 9.988579260783242e-06, "loss": 0.3938, "step": 1424 }, { "epoch": 0.3584004024144869, "grad_norm": 0.4669157862663269, "learning_rate": 9.988480203965623e-06, "loss": 0.4211, "step": 1425 }, { "epoch": 0.35865191146881287, "grad_norm": 0.40130600333213806, "learning_rate": 9.988380719915881e-06, "loss": 0.4003, "step": 1426 }, { "epoch": 0.35890342052313884, "grad_norm": 0.4261634647846222, "learning_rate": 9.988280808642538e-06, "loss": 0.4073, "step": 1427 }, { "epoch": 0.3591549295774648, "grad_norm": 0.4852161705493927, "learning_rate": 9.988180470154149e-06, "loss": 0.4191, "step": 1428 }, { "epoch": 0.3594064386317907, "grad_norm": 0.4650234878063202, "learning_rate": 9.98807970445931e-06, "loss": 0.4394, "step": 1429 }, { "epoch": 0.3596579476861167, "grad_norm": 0.4383230209350586, "learning_rate": 9.987978511566651e-06, "loss": 0.3868, "step": 1430 }, { "epoch": 0.35990945674044267, "grad_norm": 0.4849243760108948, "learning_rate": 9.987876891484836e-06, "loss": 0.4096, "step": 1431 }, { "epoch": 0.36016096579476864, "grad_norm": 0.40414345264434814, "learning_rate": 9.987774844222568e-06, "loss": 0.4105, "step": 1432 }, { "epoch": 0.36041247484909456, "grad_norm": 0.4532756507396698, "learning_rate": 9.987672369788589e-06, "loss": 0.4123, "step": 1433 }, { "epoch": 0.3606639839034205, "grad_norm": 0.46058785915374756, "learning_rate": 9.987569468191674e-06, "loss": 0.4172, "step": 1434 }, { "epoch": 0.3609154929577465, "grad_norm": 0.448282390832901, "learning_rate": 9.987466139440636e-06, "loss": 0.393, "step": 1435 }, { "epoch": 0.3611670020120724, "grad_norm": 0.4629223644733429, "learning_rate": 9.987362383544326e-06, "loss": 0.4136, "step": 1436 }, { "epoch": 0.3614185110663984, "grad_norm": 0.44559571146965027, "learning_rate": 9.987258200511627e-06, "loss": 0.4002, "step": 1437 }, { "epoch": 0.36167002012072436, "grad_norm": 0.42130130529403687, "learning_rate": 9.987153590351466e-06, "loss": 0.4134, "step": 1438 }, { "epoch": 0.3619215291750503, "grad_norm": 0.4000697731971741, "learning_rate": 9.987048553072796e-06, "loss": 0.3989, "step": 1439 }, { "epoch": 0.36217303822937624, "grad_norm": 0.42225921154022217, "learning_rate": 9.986943088684619e-06, "loss": 0.4023, "step": 1440 }, { "epoch": 0.3624245472837022, "grad_norm": 0.39401090145111084, "learning_rate": 9.986837197195964e-06, "loss": 0.4141, "step": 1441 }, { "epoch": 0.3626760563380282, "grad_norm": 0.41057607531547546, "learning_rate": 9.9867308786159e-06, "loss": 0.4282, "step": 1442 }, { "epoch": 0.3629275653923541, "grad_norm": 0.40181800723075867, "learning_rate": 9.986624132953533e-06, "loss": 0.3689, "step": 1443 }, { "epoch": 0.3631790744466801, "grad_norm": 0.4301682710647583, "learning_rate": 9.986516960218005e-06, "loss": 0.4181, "step": 1444 }, { "epoch": 0.36343058350100604, "grad_norm": 0.40248462557792664, "learning_rate": 9.986409360418497e-06, "loss": 0.4252, "step": 1445 }, { "epoch": 0.363682092555332, "grad_norm": 0.451557993888855, "learning_rate": 9.98630133356422e-06, "loss": 0.4201, "step": 1446 }, { "epoch": 0.36393360160965793, "grad_norm": 0.4034324586391449, "learning_rate": 9.986192879664428e-06, "loss": 0.4109, "step": 1447 }, { "epoch": 0.3641851106639839, "grad_norm": 0.41685450077056885, "learning_rate": 9.98608399872841e-06, "loss": 0.4119, "step": 1448 }, { "epoch": 0.3644366197183099, "grad_norm": 0.4460361897945404, "learning_rate": 9.985974690765492e-06, "loss": 0.4037, "step": 1449 }, { "epoch": 0.3646881287726358, "grad_norm": 0.42670106887817383, "learning_rate": 9.985864955785032e-06, "loss": 0.3833, "step": 1450 }, { "epoch": 0.36493963782696176, "grad_norm": 0.4803377091884613, "learning_rate": 9.98575479379643e-06, "loss": 0.3959, "step": 1451 }, { "epoch": 0.36519114688128773, "grad_norm": 0.4145619869232178, "learning_rate": 9.98564420480912e-06, "loss": 0.3964, "step": 1452 }, { "epoch": 0.3654426559356137, "grad_norm": 0.45921090245246887, "learning_rate": 9.985533188832575e-06, "loss": 0.3993, "step": 1453 }, { "epoch": 0.3656941649899396, "grad_norm": 0.39491912722587585, "learning_rate": 9.985421745876302e-06, "loss": 0.3956, "step": 1454 }, { "epoch": 0.3659456740442656, "grad_norm": 0.4830361306667328, "learning_rate": 9.985309875949844e-06, "loss": 0.4028, "step": 1455 }, { "epoch": 0.36619718309859156, "grad_norm": 0.42572900652885437, "learning_rate": 9.985197579062784e-06, "loss": 0.422, "step": 1456 }, { "epoch": 0.36644869215291753, "grad_norm": 0.4604795277118683, "learning_rate": 9.98508485522474e-06, "loss": 0.4295, "step": 1457 }, { "epoch": 0.36670020120724345, "grad_norm": 0.4539150297641754, "learning_rate": 9.984971704445363e-06, "loss": 0.3922, "step": 1458 }, { "epoch": 0.3669517102615694, "grad_norm": 0.4076423645019531, "learning_rate": 9.984858126734345e-06, "loss": 0.412, "step": 1459 }, { "epoch": 0.3672032193158954, "grad_norm": 0.4578339457511902, "learning_rate": 9.984744122101415e-06, "loss": 0.4124, "step": 1460 }, { "epoch": 0.3674547283702213, "grad_norm": 0.43359407782554626, "learning_rate": 9.984629690556336e-06, "loss": 0.4002, "step": 1461 }, { "epoch": 0.3677062374245473, "grad_norm": 0.4007089138031006, "learning_rate": 9.984514832108905e-06, "loss": 0.413, "step": 1462 }, { "epoch": 0.36795774647887325, "grad_norm": 0.4894416928291321, "learning_rate": 9.984399546768964e-06, "loss": 0.4383, "step": 1463 }, { "epoch": 0.3682092555331992, "grad_norm": 0.45550626516342163, "learning_rate": 9.984283834546383e-06, "loss": 0.4012, "step": 1464 }, { "epoch": 0.36846076458752514, "grad_norm": 0.4249536991119385, "learning_rate": 9.984167695451075e-06, "loss": 0.4238, "step": 1465 }, { "epoch": 0.3687122736418511, "grad_norm": 0.43250423669815063, "learning_rate": 9.984051129492982e-06, "loss": 0.4172, "step": 1466 }, { "epoch": 0.3689637826961771, "grad_norm": 0.42905688285827637, "learning_rate": 9.983934136682092e-06, "loss": 0.4134, "step": 1467 }, { "epoch": 0.369215291750503, "grad_norm": 0.4656728208065033, "learning_rate": 9.983816717028421e-06, "loss": 0.4207, "step": 1468 }, { "epoch": 0.36946680080482897, "grad_norm": 0.3909688889980316, "learning_rate": 9.983698870542028e-06, "loss": 0.4257, "step": 1469 }, { "epoch": 0.36971830985915494, "grad_norm": 0.49250149726867676, "learning_rate": 9.983580597233005e-06, "loss": 0.3796, "step": 1470 }, { "epoch": 0.3699698189134809, "grad_norm": 0.42419931292533875, "learning_rate": 9.98346189711148e-06, "loss": 0.4042, "step": 1471 }, { "epoch": 0.3702213279678068, "grad_norm": 0.41211193799972534, "learning_rate": 9.98334277018762e-06, "loss": 0.3932, "step": 1472 }, { "epoch": 0.3704728370221328, "grad_norm": 0.41742807626724243, "learning_rate": 9.983223216471627e-06, "loss": 0.3787, "step": 1473 }, { "epoch": 0.37072434607645877, "grad_norm": 0.42407408356666565, "learning_rate": 9.98310323597374e-06, "loss": 0.4117, "step": 1474 }, { "epoch": 0.3709758551307847, "grad_norm": 0.469777375459671, "learning_rate": 9.982982828704237e-06, "loss": 0.4397, "step": 1475 }, { "epoch": 0.37122736418511065, "grad_norm": 0.4012710154056549, "learning_rate": 9.982861994673427e-06, "loss": 0.3935, "step": 1476 }, { "epoch": 0.3714788732394366, "grad_norm": 0.5193198323249817, "learning_rate": 9.982740733891661e-06, "loss": 0.3994, "step": 1477 }, { "epoch": 0.3717303822937626, "grad_norm": 0.4547984004020691, "learning_rate": 9.982619046369321e-06, "loss": 0.3832, "step": 1478 }, { "epoch": 0.3719818913480885, "grad_norm": 0.43693944811820984, "learning_rate": 9.982496932116835e-06, "loss": 0.3778, "step": 1479 }, { "epoch": 0.3722334004024145, "grad_norm": 0.4877944588661194, "learning_rate": 9.982374391144653e-06, "loss": 0.4425, "step": 1480 }, { "epoch": 0.37248490945674045, "grad_norm": 0.4946407973766327, "learning_rate": 9.982251423463275e-06, "loss": 0.407, "step": 1481 }, { "epoch": 0.3727364185110664, "grad_norm": 0.4377669394016266, "learning_rate": 9.98212802908323e-06, "loss": 0.3936, "step": 1482 }, { "epoch": 0.37298792756539234, "grad_norm": 0.42440077662467957, "learning_rate": 9.982004208015091e-06, "loss": 0.4186, "step": 1483 }, { "epoch": 0.3732394366197183, "grad_norm": 0.5275130867958069, "learning_rate": 9.981879960269458e-06, "loss": 0.4257, "step": 1484 }, { "epoch": 0.3734909456740443, "grad_norm": 0.5148937106132507, "learning_rate": 9.98175528585697e-06, "loss": 0.398, "step": 1485 }, { "epoch": 0.3737424547283702, "grad_norm": 0.44241297245025635, "learning_rate": 9.981630184788311e-06, "loss": 0.4011, "step": 1486 }, { "epoch": 0.37399396378269617, "grad_norm": 0.4566717743873596, "learning_rate": 9.98150465707419e-06, "loss": 0.4122, "step": 1487 }, { "epoch": 0.37424547283702214, "grad_norm": 0.5034930109977722, "learning_rate": 9.981378702725359e-06, "loss": 0.3888, "step": 1488 }, { "epoch": 0.3744969818913481, "grad_norm": 0.41092514991760254, "learning_rate": 9.981252321752606e-06, "loss": 0.4067, "step": 1489 }, { "epoch": 0.37474849094567403, "grad_norm": 0.47386881709098816, "learning_rate": 9.981125514166755e-06, "loss": 0.4153, "step": 1490 }, { "epoch": 0.375, "grad_norm": 0.4686683714389801, "learning_rate": 9.980998279978664e-06, "loss": 0.4279, "step": 1491 }, { "epoch": 0.37525150905432597, "grad_norm": 0.4030835032463074, "learning_rate": 9.980870619199232e-06, "loss": 0.4043, "step": 1492 }, { "epoch": 0.3755030181086519, "grad_norm": 0.39192453026771545, "learning_rate": 9.980742531839393e-06, "loss": 0.3521, "step": 1493 }, { "epoch": 0.37575452716297786, "grad_norm": 0.42179104685783386, "learning_rate": 9.980614017910112e-06, "loss": 0.3976, "step": 1494 }, { "epoch": 0.37600603621730383, "grad_norm": 0.4499700665473938, "learning_rate": 9.980485077422404e-06, "loss": 0.4033, "step": 1495 }, { "epoch": 0.3762575452716298, "grad_norm": 0.40622663497924805, "learning_rate": 9.980355710387304e-06, "loss": 0.3914, "step": 1496 }, { "epoch": 0.3765090543259557, "grad_norm": 0.441628098487854, "learning_rate": 9.980225916815894e-06, "loss": 0.41, "step": 1497 }, { "epoch": 0.3767605633802817, "grad_norm": 0.4532158672809601, "learning_rate": 9.980095696719291e-06, "loss": 0.4123, "step": 1498 }, { "epoch": 0.37701207243460766, "grad_norm": 0.3944168984889984, "learning_rate": 9.979965050108648e-06, "loss": 0.3929, "step": 1499 }, { "epoch": 0.3772635814889336, "grad_norm": 0.41683652997016907, "learning_rate": 9.979833976995153e-06, "loss": 0.3946, "step": 1500 }, { "epoch": 0.37751509054325955, "grad_norm": 0.4310116469860077, "learning_rate": 9.97970247739003e-06, "loss": 0.4022, "step": 1501 }, { "epoch": 0.3777665995975855, "grad_norm": 0.44122573733329773, "learning_rate": 9.979570551304543e-06, "loss": 0.403, "step": 1502 }, { "epoch": 0.3780181086519115, "grad_norm": 0.4259469211101532, "learning_rate": 9.979438198749991e-06, "loss": 0.3935, "step": 1503 }, { "epoch": 0.3782696177062374, "grad_norm": 0.4257745146751404, "learning_rate": 9.979305419737709e-06, "loss": 0.4151, "step": 1504 }, { "epoch": 0.3785211267605634, "grad_norm": 0.44491034746170044, "learning_rate": 9.979172214279067e-06, "loss": 0.4164, "step": 1505 }, { "epoch": 0.37877263581488935, "grad_norm": 0.40862441062927246, "learning_rate": 9.979038582385475e-06, "loss": 0.3876, "step": 1506 }, { "epoch": 0.3790241448692153, "grad_norm": 0.3930971026420593, "learning_rate": 9.978904524068378e-06, "loss": 0.3938, "step": 1507 }, { "epoch": 0.37927565392354123, "grad_norm": 0.3884839117527008, "learning_rate": 9.978770039339256e-06, "loss": 0.3696, "step": 1508 }, { "epoch": 0.3795271629778672, "grad_norm": 0.43522050976753235, "learning_rate": 9.978635128209626e-06, "loss": 0.4352, "step": 1509 }, { "epoch": 0.3797786720321932, "grad_norm": 0.3984873294830322, "learning_rate": 9.978499790691045e-06, "loss": 0.3988, "step": 1510 }, { "epoch": 0.3800301810865191, "grad_norm": 0.4236599802970886, "learning_rate": 9.978364026795102e-06, "loss": 0.4121, "step": 1511 }, { "epoch": 0.38028169014084506, "grad_norm": 0.40510380268096924, "learning_rate": 9.978227836533424e-06, "loss": 0.416, "step": 1512 }, { "epoch": 0.38053319919517103, "grad_norm": 0.5215635299682617, "learning_rate": 9.978091219917675e-06, "loss": 0.419, "step": 1513 }, { "epoch": 0.380784708249497, "grad_norm": 0.4424278140068054, "learning_rate": 9.977954176959558e-06, "loss": 0.4188, "step": 1514 }, { "epoch": 0.3810362173038229, "grad_norm": 0.40322160720825195, "learning_rate": 9.977816707670806e-06, "loss": 0.4067, "step": 1515 }, { "epoch": 0.3812877263581489, "grad_norm": 0.4065972864627838, "learning_rate": 9.977678812063195e-06, "loss": 0.3698, "step": 1516 }, { "epoch": 0.38153923541247486, "grad_norm": 0.47318071126937866, "learning_rate": 9.977540490148534e-06, "loss": 0.3954, "step": 1517 }, { "epoch": 0.3817907444668008, "grad_norm": 0.4260292947292328, "learning_rate": 9.977401741938667e-06, "loss": 0.4078, "step": 1518 }, { "epoch": 0.38204225352112675, "grad_norm": 0.5069632530212402, "learning_rate": 9.977262567445482e-06, "loss": 0.4108, "step": 1519 }, { "epoch": 0.3822937625754527, "grad_norm": 0.42806974053382874, "learning_rate": 9.977122966680896e-06, "loss": 0.4037, "step": 1520 }, { "epoch": 0.3825452716297787, "grad_norm": 0.42634284496307373, "learning_rate": 9.976982939656866e-06, "loss": 0.3896, "step": 1521 }, { "epoch": 0.3827967806841046, "grad_norm": 0.4124469757080078, "learning_rate": 9.976842486385379e-06, "loss": 0.3939, "step": 1522 }, { "epoch": 0.3830482897384306, "grad_norm": 0.43972286581993103, "learning_rate": 9.976701606878471e-06, "loss": 0.4143, "step": 1523 }, { "epoch": 0.38329979879275655, "grad_norm": 0.44509053230285645, "learning_rate": 9.976560301148203e-06, "loss": 0.4016, "step": 1524 }, { "epoch": 0.38355130784708247, "grad_norm": 0.4027416408061981, "learning_rate": 9.976418569206678e-06, "loss": 0.3824, "step": 1525 }, { "epoch": 0.38380281690140844, "grad_norm": 0.4699016511440277, "learning_rate": 9.976276411066037e-06, "loss": 0.391, "step": 1526 }, { "epoch": 0.3840543259557344, "grad_norm": 0.44244781136512756, "learning_rate": 9.976133826738452e-06, "loss": 0.3873, "step": 1527 }, { "epoch": 0.3843058350100604, "grad_norm": 0.4619574546813965, "learning_rate": 9.975990816236135e-06, "loss": 0.4172, "step": 1528 }, { "epoch": 0.3845573440643863, "grad_norm": 0.45410341024398804, "learning_rate": 9.975847379571336e-06, "loss": 0.4099, "step": 1529 }, { "epoch": 0.38480885311871227, "grad_norm": 0.4313986003398895, "learning_rate": 9.975703516756334e-06, "loss": 0.4118, "step": 1530 }, { "epoch": 0.38506036217303824, "grad_norm": 0.4216327965259552, "learning_rate": 9.975559227803458e-06, "loss": 0.3965, "step": 1531 }, { "epoch": 0.3853118712273642, "grad_norm": 0.46033933758735657, "learning_rate": 9.975414512725058e-06, "loss": 0.3729, "step": 1532 }, { "epoch": 0.3855633802816901, "grad_norm": 0.42646700143814087, "learning_rate": 9.97526937153353e-06, "loss": 0.3818, "step": 1533 }, { "epoch": 0.3858148893360161, "grad_norm": 0.4267144203186035, "learning_rate": 9.975123804241309e-06, "loss": 0.4088, "step": 1534 }, { "epoch": 0.38606639839034207, "grad_norm": 0.46120572090148926, "learning_rate": 9.974977810860858e-06, "loss": 0.4, "step": 1535 }, { "epoch": 0.386317907444668, "grad_norm": 0.41420215368270874, "learning_rate": 9.97483139140468e-06, "loss": 0.4145, "step": 1536 }, { "epoch": 0.38656941649899396, "grad_norm": 0.514680802822113, "learning_rate": 9.974684545885315e-06, "loss": 0.4021, "step": 1537 }, { "epoch": 0.3868209255533199, "grad_norm": 0.4189431369304657, "learning_rate": 9.97453727431534e-06, "loss": 0.3973, "step": 1538 }, { "epoch": 0.3870724346076459, "grad_norm": 0.47960418462753296, "learning_rate": 9.97438957670737e-06, "loss": 0.3942, "step": 1539 }, { "epoch": 0.3873239436619718, "grad_norm": 0.49763473868370056, "learning_rate": 9.974241453074051e-06, "loss": 0.3891, "step": 1540 }, { "epoch": 0.3875754527162978, "grad_norm": 0.48309236764907837, "learning_rate": 9.974092903428072e-06, "loss": 0.416, "step": 1541 }, { "epoch": 0.38782696177062376, "grad_norm": 0.48040205240249634, "learning_rate": 9.973943927782152e-06, "loss": 0.3942, "step": 1542 }, { "epoch": 0.3880784708249497, "grad_norm": 0.4268017113208771, "learning_rate": 9.973794526149051e-06, "loss": 0.4178, "step": 1543 }, { "epoch": 0.38832997987927564, "grad_norm": 0.4728141725063324, "learning_rate": 9.973644698541567e-06, "loss": 0.3855, "step": 1544 }, { "epoch": 0.3885814889336016, "grad_norm": 0.46177420020103455, "learning_rate": 9.973494444972527e-06, "loss": 0.4203, "step": 1545 }, { "epoch": 0.3888329979879276, "grad_norm": 0.47535744309425354, "learning_rate": 9.973343765454803e-06, "loss": 0.4344, "step": 1546 }, { "epoch": 0.3890845070422535, "grad_norm": 0.4919487535953522, "learning_rate": 9.973192660001299e-06, "loss": 0.429, "step": 1547 }, { "epoch": 0.3893360160965795, "grad_norm": 0.40322598814964294, "learning_rate": 9.973041128624956e-06, "loss": 0.4181, "step": 1548 }, { "epoch": 0.38958752515090544, "grad_norm": 0.538712739944458, "learning_rate": 9.972889171338752e-06, "loss": 0.3997, "step": 1549 }, { "epoch": 0.38983903420523136, "grad_norm": 0.43118852376937866, "learning_rate": 9.9727367881557e-06, "loss": 0.4037, "step": 1550 }, { "epoch": 0.39009054325955733, "grad_norm": 0.4399496912956238, "learning_rate": 9.97258397908885e-06, "loss": 0.4109, "step": 1551 }, { "epoch": 0.3903420523138833, "grad_norm": 0.42758503556251526, "learning_rate": 9.972430744151292e-06, "loss": 0.401, "step": 1552 }, { "epoch": 0.3905935613682093, "grad_norm": 0.4662076234817505, "learning_rate": 9.97227708335615e-06, "loss": 0.4129, "step": 1553 }, { "epoch": 0.3908450704225352, "grad_norm": 0.38940244913101196, "learning_rate": 9.97212299671658e-06, "loss": 0.3821, "step": 1554 }, { "epoch": 0.39109657947686116, "grad_norm": 0.4017338156700134, "learning_rate": 9.97196848424578e-06, "loss": 0.4161, "step": 1555 }, { "epoch": 0.39134808853118713, "grad_norm": 0.4648374319076538, "learning_rate": 9.971813545956986e-06, "loss": 0.3979, "step": 1556 }, { "epoch": 0.3915995975855131, "grad_norm": 0.3893650472164154, "learning_rate": 9.971658181863464e-06, "loss": 0.3949, "step": 1557 }, { "epoch": 0.391851106639839, "grad_norm": 0.47235551476478577, "learning_rate": 9.971502391978523e-06, "loss": 0.4114, "step": 1558 }, { "epoch": 0.392102615694165, "grad_norm": 0.5151132941246033, "learning_rate": 9.971346176315501e-06, "loss": 0.4216, "step": 1559 }, { "epoch": 0.39235412474849096, "grad_norm": 0.36058980226516724, "learning_rate": 9.971189534887781e-06, "loss": 0.3828, "step": 1560 }, { "epoch": 0.3926056338028169, "grad_norm": 0.5667259693145752, "learning_rate": 9.971032467708779e-06, "loss": 0.3984, "step": 1561 }, { "epoch": 0.39285714285714285, "grad_norm": 0.4725882112979889, "learning_rate": 9.970874974791942e-06, "loss": 0.4021, "step": 1562 }, { "epoch": 0.3931086519114688, "grad_norm": 0.4235588014125824, "learning_rate": 9.970717056150764e-06, "loss": 0.3842, "step": 1563 }, { "epoch": 0.3933601609657948, "grad_norm": 0.5390638709068298, "learning_rate": 9.970558711798763e-06, "loss": 0.4046, "step": 1564 }, { "epoch": 0.3936116700201207, "grad_norm": 0.5337787866592407, "learning_rate": 9.970399941749507e-06, "loss": 0.3996, "step": 1565 }, { "epoch": 0.3938631790744467, "grad_norm": 0.4832894504070282, "learning_rate": 9.970240746016588e-06, "loss": 0.3957, "step": 1566 }, { "epoch": 0.39411468812877265, "grad_norm": 0.5507562160491943, "learning_rate": 9.970081124613647e-06, "loss": 0.395, "step": 1567 }, { "epoch": 0.39436619718309857, "grad_norm": 0.4837891459465027, "learning_rate": 9.969921077554347e-06, "loss": 0.3878, "step": 1568 }, { "epoch": 0.39461770623742454, "grad_norm": 0.4195319414138794, "learning_rate": 9.969760604852399e-06, "loss": 0.4046, "step": 1569 }, { "epoch": 0.3948692152917505, "grad_norm": 0.5062696933746338, "learning_rate": 9.969599706521546e-06, "loss": 0.4181, "step": 1570 }, { "epoch": 0.3951207243460765, "grad_norm": 0.5001570582389832, "learning_rate": 9.969438382575569e-06, "loss": 0.3919, "step": 1571 }, { "epoch": 0.3953722334004024, "grad_norm": 0.47448521852493286, "learning_rate": 9.969276633028281e-06, "loss": 0.3904, "step": 1572 }, { "epoch": 0.39562374245472837, "grad_norm": 0.5753097534179688, "learning_rate": 9.96911445789354e-06, "loss": 0.4031, "step": 1573 }, { "epoch": 0.39587525150905434, "grad_norm": 0.4995958209037781, "learning_rate": 9.96895185718523e-06, "loss": 0.4172, "step": 1574 }, { "epoch": 0.3961267605633803, "grad_norm": 0.42421847581863403, "learning_rate": 9.96878883091728e-06, "loss": 0.3958, "step": 1575 }, { "epoch": 0.3963782696177062, "grad_norm": 0.42944973707199097, "learning_rate": 9.968625379103651e-06, "loss": 0.4098, "step": 1576 }, { "epoch": 0.3966297786720322, "grad_norm": 0.5947927832603455, "learning_rate": 9.968461501758343e-06, "loss": 0.4095, "step": 1577 }, { "epoch": 0.39688128772635817, "grad_norm": 0.43222859501838684, "learning_rate": 9.968297198895388e-06, "loss": 0.4088, "step": 1578 }, { "epoch": 0.3971327967806841, "grad_norm": 0.4837366044521332, "learning_rate": 9.968132470528862e-06, "loss": 0.3992, "step": 1579 }, { "epoch": 0.39738430583501005, "grad_norm": 0.46640220284461975, "learning_rate": 9.967967316672869e-06, "loss": 0.3845, "step": 1580 }, { "epoch": 0.397635814889336, "grad_norm": 0.39366641640663147, "learning_rate": 9.967801737341556e-06, "loss": 0.4074, "step": 1581 }, { "epoch": 0.397887323943662, "grad_norm": 0.4720052182674408, "learning_rate": 9.9676357325491e-06, "loss": 0.4037, "step": 1582 }, { "epoch": 0.3981388329979879, "grad_norm": 0.43749743700027466, "learning_rate": 9.967469302309722e-06, "loss": 0.4145, "step": 1583 }, { "epoch": 0.3983903420523139, "grad_norm": 0.39652055501937866, "learning_rate": 9.967302446637677e-06, "loss": 0.4018, "step": 1584 }, { "epoch": 0.39864185110663986, "grad_norm": 0.4925057291984558, "learning_rate": 9.96713516554725e-06, "loss": 0.4028, "step": 1585 }, { "epoch": 0.39889336016096577, "grad_norm": 0.448711633682251, "learning_rate": 9.966967459052771e-06, "loss": 0.4107, "step": 1586 }, { "epoch": 0.39914486921529174, "grad_norm": 0.3823001980781555, "learning_rate": 9.966799327168603e-06, "loss": 0.4128, "step": 1587 }, { "epoch": 0.3993963782696177, "grad_norm": 0.47937512397766113, "learning_rate": 9.966630769909145e-06, "loss": 0.4312, "step": 1588 }, { "epoch": 0.3996478873239437, "grad_norm": 0.4800701439380646, "learning_rate": 9.966461787288832e-06, "loss": 0.4116, "step": 1589 }, { "epoch": 0.3998993963782696, "grad_norm": 0.4551905691623688, "learning_rate": 9.966292379322138e-06, "loss": 0.4165, "step": 1590 }, { "epoch": 0.40015090543259557, "grad_norm": 0.4783093333244324, "learning_rate": 9.966122546023568e-06, "loss": 0.3833, "step": 1591 }, { "epoch": 0.40040241448692154, "grad_norm": 0.4122217893600464, "learning_rate": 9.965952287407674e-06, "loss": 0.3951, "step": 1592 }, { "epoch": 0.40065392354124746, "grad_norm": 0.4596366584300995, "learning_rate": 9.965781603489032e-06, "loss": 0.3803, "step": 1593 }, { "epoch": 0.40090543259557343, "grad_norm": 0.4768621027469635, "learning_rate": 9.96561049428226e-06, "loss": 0.3958, "step": 1594 }, { "epoch": 0.4011569416498994, "grad_norm": 0.45015203952789307, "learning_rate": 9.965438959802015e-06, "loss": 0.3932, "step": 1595 }, { "epoch": 0.4014084507042254, "grad_norm": 0.44677606225013733, "learning_rate": 9.965267000062986e-06, "loss": 0.4062, "step": 1596 }, { "epoch": 0.4016599597585513, "grad_norm": 0.45594921708106995, "learning_rate": 9.965094615079902e-06, "loss": 0.3921, "step": 1597 }, { "epoch": 0.40191146881287726, "grad_norm": 0.44499877095222473, "learning_rate": 9.964921804867524e-06, "loss": 0.4086, "step": 1598 }, { "epoch": 0.40216297786720323, "grad_norm": 0.45410072803497314, "learning_rate": 9.964748569440656e-06, "loss": 0.4115, "step": 1599 }, { "epoch": 0.4024144869215292, "grad_norm": 0.42579391598701477, "learning_rate": 9.964574908814131e-06, "loss": 0.4072, "step": 1600 }, { "epoch": 0.4026659959758551, "grad_norm": 0.42585766315460205, "learning_rate": 9.964400823002825e-06, "loss": 0.3703, "step": 1601 }, { "epoch": 0.4029175050301811, "grad_norm": 0.4478361904621124, "learning_rate": 9.964226312021645e-06, "loss": 0.4314, "step": 1602 }, { "epoch": 0.40316901408450706, "grad_norm": 0.3969224691390991, "learning_rate": 9.964051375885537e-06, "loss": 0.3911, "step": 1603 }, { "epoch": 0.403420523138833, "grad_norm": 0.4182923138141632, "learning_rate": 9.963876014609484e-06, "loss": 0.4009, "step": 1604 }, { "epoch": 0.40367203219315895, "grad_norm": 0.43487611413002014, "learning_rate": 9.963700228208503e-06, "loss": 0.3828, "step": 1605 }, { "epoch": 0.4039235412474849, "grad_norm": 0.4310348927974701, "learning_rate": 9.963524016697651e-06, "loss": 0.4142, "step": 1606 }, { "epoch": 0.4041750503018109, "grad_norm": 0.4118841588497162, "learning_rate": 9.96334738009202e-06, "loss": 0.4051, "step": 1607 }, { "epoch": 0.4044265593561368, "grad_norm": 0.4341716170310974, "learning_rate": 9.963170318406737e-06, "loss": 0.4171, "step": 1608 }, { "epoch": 0.4046780684104628, "grad_norm": 0.4583278298377991, "learning_rate": 9.962992831656964e-06, "loss": 0.4026, "step": 1609 }, { "epoch": 0.40492957746478875, "grad_norm": 0.38871103525161743, "learning_rate": 9.962814919857903e-06, "loss": 0.3959, "step": 1610 }, { "epoch": 0.40518108651911466, "grad_norm": 0.4146328866481781, "learning_rate": 9.962636583024792e-06, "loss": 0.3994, "step": 1611 }, { "epoch": 0.40543259557344064, "grad_norm": 0.371489554643631, "learning_rate": 9.962457821172903e-06, "loss": 0.3671, "step": 1612 }, { "epoch": 0.4056841046277666, "grad_norm": 0.4303867816925049, "learning_rate": 9.962278634317549e-06, "loss": 0.3833, "step": 1613 }, { "epoch": 0.4059356136820926, "grad_norm": 0.3755389451980591, "learning_rate": 9.962099022474072e-06, "loss": 0.4061, "step": 1614 }, { "epoch": 0.4061871227364185, "grad_norm": 0.4392898678779602, "learning_rate": 9.961918985657857e-06, "loss": 0.4042, "step": 1615 }, { "epoch": 0.40643863179074446, "grad_norm": 0.4400614798069, "learning_rate": 9.961738523884322e-06, "loss": 0.4054, "step": 1616 }, { "epoch": 0.40669014084507044, "grad_norm": 0.4476352035999298, "learning_rate": 9.961557637168924e-06, "loss": 0.416, "step": 1617 }, { "epoch": 0.40694164989939635, "grad_norm": 0.4568561613559723, "learning_rate": 9.961376325527152e-06, "loss": 0.3866, "step": 1618 }, { "epoch": 0.4071931589537223, "grad_norm": 0.40012219548225403, "learning_rate": 9.961194588974538e-06, "loss": 0.3919, "step": 1619 }, { "epoch": 0.4074446680080483, "grad_norm": 0.4288276135921478, "learning_rate": 9.961012427526644e-06, "loss": 0.3801, "step": 1620 }, { "epoch": 0.40769617706237427, "grad_norm": 0.4316503703594208, "learning_rate": 9.960829841199071e-06, "loss": 0.4102, "step": 1621 }, { "epoch": 0.4079476861167002, "grad_norm": 0.4526152014732361, "learning_rate": 9.960646830007456e-06, "loss": 0.4437, "step": 1622 }, { "epoch": 0.40819919517102615, "grad_norm": 0.4108959138393402, "learning_rate": 9.960463393967476e-06, "loss": 0.3752, "step": 1623 }, { "epoch": 0.4084507042253521, "grad_norm": 0.4516206383705139, "learning_rate": 9.960279533094838e-06, "loss": 0.4061, "step": 1624 }, { "epoch": 0.4087022132796781, "grad_norm": 0.46021711826324463, "learning_rate": 9.96009524740529e-06, "loss": 0.4302, "step": 1625 }, { "epoch": 0.408953722334004, "grad_norm": 0.3884812295436859, "learning_rate": 9.959910536914614e-06, "loss": 0.429, "step": 1626 }, { "epoch": 0.40920523138833, "grad_norm": 0.4301206171512604, "learning_rate": 9.95972540163863e-06, "loss": 0.4074, "step": 1627 }, { "epoch": 0.40945674044265595, "grad_norm": 0.4203178286552429, "learning_rate": 9.959539841593192e-06, "loss": 0.3939, "step": 1628 }, { "epoch": 0.40970824949698187, "grad_norm": 0.4511706233024597, "learning_rate": 9.959353856794194e-06, "loss": 0.4382, "step": 1629 }, { "epoch": 0.40995975855130784, "grad_norm": 0.42685285210609436, "learning_rate": 9.959167447257563e-06, "loss": 0.3805, "step": 1630 }, { "epoch": 0.4102112676056338, "grad_norm": 0.4132843315601349, "learning_rate": 9.958980612999265e-06, "loss": 0.3986, "step": 1631 }, { "epoch": 0.4104627766599598, "grad_norm": 0.3776272237300873, "learning_rate": 9.9587933540353e-06, "loss": 0.3944, "step": 1632 }, { "epoch": 0.4107142857142857, "grad_norm": 0.5198211073875427, "learning_rate": 9.958605670381709e-06, "loss": 0.3953, "step": 1633 }, { "epoch": 0.41096579476861167, "grad_norm": 0.4831434488296509, "learning_rate": 9.958417562054561e-06, "loss": 0.4026, "step": 1634 }, { "epoch": 0.41121730382293764, "grad_norm": 0.43184301257133484, "learning_rate": 9.958229029069969e-06, "loss": 0.4047, "step": 1635 }, { "epoch": 0.41146881287726356, "grad_norm": 0.46622034907341003, "learning_rate": 9.95804007144408e-06, "loss": 0.3992, "step": 1636 }, { "epoch": 0.41172032193158953, "grad_norm": 0.4247199296951294, "learning_rate": 9.957850689193075e-06, "loss": 0.4125, "step": 1637 }, { "epoch": 0.4119718309859155, "grad_norm": 0.4305685758590698, "learning_rate": 9.957660882333176e-06, "loss": 0.3811, "step": 1638 }, { "epoch": 0.41222334004024147, "grad_norm": 0.45092910528182983, "learning_rate": 9.957470650880636e-06, "loss": 0.3988, "step": 1639 }, { "epoch": 0.4124748490945674, "grad_norm": 0.40285438299179077, "learning_rate": 9.957279994851751e-06, "loss": 0.3985, "step": 1640 }, { "epoch": 0.41272635814889336, "grad_norm": 0.3911204934120178, "learning_rate": 9.957088914262844e-06, "loss": 0.4334, "step": 1641 }, { "epoch": 0.41297786720321933, "grad_norm": 0.4495960474014282, "learning_rate": 9.956897409130286e-06, "loss": 0.4241, "step": 1642 }, { "epoch": 0.41322937625754524, "grad_norm": 0.4449693560600281, "learning_rate": 9.956705479470473e-06, "loss": 0.4062, "step": 1643 }, { "epoch": 0.4134808853118712, "grad_norm": 0.4287528097629547, "learning_rate": 9.956513125299847e-06, "loss": 0.3907, "step": 1644 }, { "epoch": 0.4137323943661972, "grad_norm": 0.40359970927238464, "learning_rate": 9.956320346634877e-06, "loss": 0.4134, "step": 1645 }, { "epoch": 0.41398390342052316, "grad_norm": 0.48502546548843384, "learning_rate": 9.956127143492077e-06, "loss": 0.3796, "step": 1646 }, { "epoch": 0.4142354124748491, "grad_norm": 0.4318941533565521, "learning_rate": 9.955933515887992e-06, "loss": 0.4273, "step": 1647 }, { "epoch": 0.41448692152917505, "grad_norm": 0.4548565447330475, "learning_rate": 9.955739463839206e-06, "loss": 0.4136, "step": 1648 }, { "epoch": 0.414738430583501, "grad_norm": 0.4548591077327728, "learning_rate": 9.955544987362339e-06, "loss": 0.4114, "step": 1649 }, { "epoch": 0.414989939637827, "grad_norm": 0.4239116311073303, "learning_rate": 9.955350086474045e-06, "loss": 0.4106, "step": 1650 }, { "epoch": 0.4152414486921529, "grad_norm": 0.40276509523391724, "learning_rate": 9.955154761191017e-06, "loss": 0.3762, "step": 1651 }, { "epoch": 0.4154929577464789, "grad_norm": 0.513320803642273, "learning_rate": 9.954959011529982e-06, "loss": 0.4079, "step": 1652 }, { "epoch": 0.41574446680080485, "grad_norm": 0.42079389095306396, "learning_rate": 9.954762837507705e-06, "loss": 0.3759, "step": 1653 }, { "epoch": 0.41599597585513076, "grad_norm": 0.4646183252334595, "learning_rate": 9.95456623914099e-06, "loss": 0.4014, "step": 1654 }, { "epoch": 0.41624748490945673, "grad_norm": 0.49453848600387573, "learning_rate": 9.954369216446672e-06, "loss": 0.4085, "step": 1655 }, { "epoch": 0.4164989939637827, "grad_norm": 0.4750482738018036, "learning_rate": 9.954171769441625e-06, "loss": 0.3955, "step": 1656 }, { "epoch": 0.4167505030181087, "grad_norm": 0.4437958598136902, "learning_rate": 9.953973898142759e-06, "loss": 0.3681, "step": 1657 }, { "epoch": 0.4170020120724346, "grad_norm": 0.4792766571044922, "learning_rate": 9.953775602567019e-06, "loss": 0.3918, "step": 1658 }, { "epoch": 0.41725352112676056, "grad_norm": 0.5086737275123596, "learning_rate": 9.95357688273139e-06, "loss": 0.3897, "step": 1659 }, { "epoch": 0.41750503018108653, "grad_norm": 0.4676213264465332, "learning_rate": 9.953377738652892e-06, "loss": 0.3902, "step": 1660 }, { "epoch": 0.41775653923541245, "grad_norm": 0.5371580719947815, "learning_rate": 9.953178170348578e-06, "loss": 0.4179, "step": 1661 }, { "epoch": 0.4180080482897384, "grad_norm": 0.4958730638027191, "learning_rate": 9.952978177835542e-06, "loss": 0.3896, "step": 1662 }, { "epoch": 0.4182595573440644, "grad_norm": 0.43639904260635376, "learning_rate": 9.952777761130909e-06, "loss": 0.3949, "step": 1663 }, { "epoch": 0.41851106639839036, "grad_norm": 0.43316468596458435, "learning_rate": 9.952576920251845e-06, "loss": 0.3852, "step": 1664 }, { "epoch": 0.4187625754527163, "grad_norm": 0.3840535879135132, "learning_rate": 9.952375655215551e-06, "loss": 0.3757, "step": 1665 }, { "epoch": 0.41901408450704225, "grad_norm": 0.4622785151004791, "learning_rate": 9.952173966039266e-06, "loss": 0.3856, "step": 1666 }, { "epoch": 0.4192655935613682, "grad_norm": 0.4158206880092621, "learning_rate": 9.951971852740262e-06, "loss": 0.3824, "step": 1667 }, { "epoch": 0.41951710261569414, "grad_norm": 0.42178934812545776, "learning_rate": 9.951769315335843e-06, "loss": 0.3969, "step": 1668 }, { "epoch": 0.4197686116700201, "grad_norm": 0.4934524595737457, "learning_rate": 9.951566353843365e-06, "loss": 0.3818, "step": 1669 }, { "epoch": 0.4200201207243461, "grad_norm": 0.4420875608921051, "learning_rate": 9.951362968280205e-06, "loss": 0.4343, "step": 1670 }, { "epoch": 0.42027162977867205, "grad_norm": 0.42826712131500244, "learning_rate": 9.951159158663782e-06, "loss": 0.3899, "step": 1671 }, { "epoch": 0.42052313883299797, "grad_norm": 0.4212586283683777, "learning_rate": 9.950954925011552e-06, "loss": 0.3819, "step": 1672 }, { "epoch": 0.42077464788732394, "grad_norm": 0.5084717869758606, "learning_rate": 9.950750267341004e-06, "loss": 0.4106, "step": 1673 }, { "epoch": 0.4210261569416499, "grad_norm": 0.4205119013786316, "learning_rate": 9.950545185669668e-06, "loss": 0.3926, "step": 1674 }, { "epoch": 0.4212776659959759, "grad_norm": 0.4944150447845459, "learning_rate": 9.95033968001511e-06, "loss": 0.4076, "step": 1675 }, { "epoch": 0.4215291750503018, "grad_norm": 0.4203665852546692, "learning_rate": 9.950133750394926e-06, "loss": 0.3952, "step": 1676 }, { "epoch": 0.42178068410462777, "grad_norm": 0.41072648763656616, "learning_rate": 9.949927396826753e-06, "loss": 0.3923, "step": 1677 }, { "epoch": 0.42203219315895374, "grad_norm": 0.4945848882198334, "learning_rate": 9.949720619328265e-06, "loss": 0.4096, "step": 1678 }, { "epoch": 0.42228370221327965, "grad_norm": 0.3855612576007843, "learning_rate": 9.949513417917173e-06, "loss": 0.3916, "step": 1679 }, { "epoch": 0.4225352112676056, "grad_norm": 0.38849636912345886, "learning_rate": 9.94930579261122e-06, "loss": 0.3744, "step": 1680 }, { "epoch": 0.4227867203219316, "grad_norm": 0.45562243461608887, "learning_rate": 9.949097743428188e-06, "loss": 0.3882, "step": 1681 }, { "epoch": 0.42303822937625757, "grad_norm": 0.38708576560020447, "learning_rate": 9.948889270385897e-06, "loss": 0.4077, "step": 1682 }, { "epoch": 0.4232897384305835, "grad_norm": 0.4899725317955017, "learning_rate": 9.948680373502199e-06, "loss": 0.4271, "step": 1683 }, { "epoch": 0.42354124748490946, "grad_norm": 0.4218765199184418, "learning_rate": 9.948471052794988e-06, "loss": 0.4052, "step": 1684 }, { "epoch": 0.4237927565392354, "grad_norm": 0.45039162039756775, "learning_rate": 9.948261308282187e-06, "loss": 0.398, "step": 1685 }, { "epoch": 0.42404426559356134, "grad_norm": 0.3818122148513794, "learning_rate": 9.94805113998176e-06, "loss": 0.4196, "step": 1686 }, { "epoch": 0.4242957746478873, "grad_norm": 0.4184150695800781, "learning_rate": 9.94784054791171e-06, "loss": 0.3955, "step": 1687 }, { "epoch": 0.4245472837022133, "grad_norm": 0.4112408459186554, "learning_rate": 9.94762953209007e-06, "loss": 0.4208, "step": 1688 }, { "epoch": 0.42479879275653926, "grad_norm": 0.44503697752952576, "learning_rate": 9.947418092534912e-06, "loss": 0.3858, "step": 1689 }, { "epoch": 0.42505030181086517, "grad_norm": 0.42923927307128906, "learning_rate": 9.947206229264346e-06, "loss": 0.3939, "step": 1690 }, { "epoch": 0.42530181086519114, "grad_norm": 0.4138977825641632, "learning_rate": 9.946993942296517e-06, "loss": 0.3967, "step": 1691 }, { "epoch": 0.4255533199195171, "grad_norm": 0.4376116991043091, "learning_rate": 9.946781231649605e-06, "loss": 0.3883, "step": 1692 }, { "epoch": 0.4258048289738431, "grad_norm": 0.3793240487575531, "learning_rate": 9.946568097341827e-06, "loss": 0.388, "step": 1693 }, { "epoch": 0.426056338028169, "grad_norm": 0.4271707832813263, "learning_rate": 9.946354539391436e-06, "loss": 0.3985, "step": 1694 }, { "epoch": 0.426307847082495, "grad_norm": 0.396963894367218, "learning_rate": 9.946140557816724e-06, "loss": 0.3966, "step": 1695 }, { "epoch": 0.42655935613682094, "grad_norm": 0.39994847774505615, "learning_rate": 9.945926152636017e-06, "loss": 0.3922, "step": 1696 }, { "epoch": 0.42681086519114686, "grad_norm": 0.38306140899658203, "learning_rate": 9.945711323867674e-06, "loss": 0.406, "step": 1697 }, { "epoch": 0.42706237424547283, "grad_norm": 0.4517843723297119, "learning_rate": 9.945496071530098e-06, "loss": 0.3871, "step": 1698 }, { "epoch": 0.4273138832997988, "grad_norm": 0.40096959471702576, "learning_rate": 9.945280395641724e-06, "loss": 0.4098, "step": 1699 }, { "epoch": 0.4275653923541248, "grad_norm": 0.42314156889915466, "learning_rate": 9.945064296221019e-06, "loss": 0.3858, "step": 1700 }, { "epoch": 0.4278169014084507, "grad_norm": 0.4481252431869507, "learning_rate": 9.944847773286495e-06, "loss": 0.4273, "step": 1701 }, { "epoch": 0.42806841046277666, "grad_norm": 0.4047831892967224, "learning_rate": 9.944630826856694e-06, "loss": 0.3892, "step": 1702 }, { "epoch": 0.42831991951710263, "grad_norm": 0.40450939536094666, "learning_rate": 9.944413456950195e-06, "loss": 0.4199, "step": 1703 }, { "epoch": 0.42857142857142855, "grad_norm": 0.3987005352973938, "learning_rate": 9.944195663585616e-06, "loss": 0.4254, "step": 1704 }, { "epoch": 0.4288229376257545, "grad_norm": 0.426527738571167, "learning_rate": 9.94397744678161e-06, "loss": 0.3992, "step": 1705 }, { "epoch": 0.4290744466800805, "grad_norm": 0.4253356158733368, "learning_rate": 9.943758806556864e-06, "loss": 0.3876, "step": 1706 }, { "epoch": 0.42932595573440646, "grad_norm": 0.4413727819919586, "learning_rate": 9.943539742930105e-06, "loss": 0.4213, "step": 1707 }, { "epoch": 0.4295774647887324, "grad_norm": 0.4230106472969055, "learning_rate": 9.943320255920093e-06, "loss": 0.4092, "step": 1708 }, { "epoch": 0.42982897384305835, "grad_norm": 0.4080696702003479, "learning_rate": 9.943100345545627e-06, "loss": 0.3789, "step": 1709 }, { "epoch": 0.4300804828973843, "grad_norm": 0.4206973612308502, "learning_rate": 9.94288001182554e-06, "loss": 0.4128, "step": 1710 }, { "epoch": 0.43033199195171024, "grad_norm": 0.41314277052879333, "learning_rate": 9.942659254778703e-06, "loss": 0.3883, "step": 1711 }, { "epoch": 0.4305835010060362, "grad_norm": 0.42901384830474854, "learning_rate": 9.942438074424024e-06, "loss": 0.3917, "step": 1712 }, { "epoch": 0.4308350100603622, "grad_norm": 0.39158564805984497, "learning_rate": 9.942216470780441e-06, "loss": 0.3778, "step": 1713 }, { "epoch": 0.43108651911468815, "grad_norm": 0.423117995262146, "learning_rate": 9.941994443866936e-06, "loss": 0.408, "step": 1714 }, { "epoch": 0.43133802816901406, "grad_norm": 0.44455990195274353, "learning_rate": 9.941771993702524e-06, "loss": 0.3849, "step": 1715 }, { "epoch": 0.43158953722334004, "grad_norm": 0.41482892632484436, "learning_rate": 9.941549120306257e-06, "loss": 0.4121, "step": 1716 }, { "epoch": 0.431841046277666, "grad_norm": 0.47499921917915344, "learning_rate": 9.941325823697221e-06, "loss": 0.3987, "step": 1717 }, { "epoch": 0.432092555331992, "grad_norm": 0.40756213665008545, "learning_rate": 9.941102103894541e-06, "loss": 0.4043, "step": 1718 }, { "epoch": 0.4323440643863179, "grad_norm": 0.42999786138534546, "learning_rate": 9.94087796091738e-06, "loss": 0.4134, "step": 1719 }, { "epoch": 0.43259557344064387, "grad_norm": 0.469237744808197, "learning_rate": 9.940653394784929e-06, "loss": 0.3982, "step": 1720 }, { "epoch": 0.43284708249496984, "grad_norm": 0.3991852402687073, "learning_rate": 9.940428405516423e-06, "loss": 0.3709, "step": 1721 }, { "epoch": 0.43309859154929575, "grad_norm": 0.42819303274154663, "learning_rate": 9.940202993131132e-06, "loss": 0.3955, "step": 1722 }, { "epoch": 0.4333501006036217, "grad_norm": 0.40518656373023987, "learning_rate": 9.939977157648361e-06, "loss": 0.3797, "step": 1723 }, { "epoch": 0.4336016096579477, "grad_norm": 0.38709619641304016, "learning_rate": 9.939750899087451e-06, "loss": 0.4136, "step": 1724 }, { "epoch": 0.43385311871227367, "grad_norm": 0.400952011346817, "learning_rate": 9.939524217467779e-06, "loss": 0.3917, "step": 1725 }, { "epoch": 0.4341046277665996, "grad_norm": 0.4235305190086365, "learning_rate": 9.93929711280876e-06, "loss": 0.3936, "step": 1726 }, { "epoch": 0.43435613682092555, "grad_norm": 0.4117744266986847, "learning_rate": 9.939069585129841e-06, "loss": 0.4083, "step": 1727 }, { "epoch": 0.4346076458752515, "grad_norm": 0.37095022201538086, "learning_rate": 9.938841634450513e-06, "loss": 0.3841, "step": 1728 }, { "epoch": 0.43485915492957744, "grad_norm": 0.4193072021007538, "learning_rate": 9.938613260790295e-06, "loss": 0.4061, "step": 1729 }, { "epoch": 0.4351106639839034, "grad_norm": 0.45826607942581177, "learning_rate": 9.938384464168748e-06, "loss": 0.3937, "step": 1730 }, { "epoch": 0.4353621730382294, "grad_norm": 0.3635403513908386, "learning_rate": 9.938155244605467e-06, "loss": 0.4176, "step": 1731 }, { "epoch": 0.43561368209255535, "grad_norm": 0.47581997513771057, "learning_rate": 9.937925602120083e-06, "loss": 0.4053, "step": 1732 }, { "epoch": 0.43586519114688127, "grad_norm": 0.4900721311569214, "learning_rate": 9.937695536732259e-06, "loss": 0.3965, "step": 1733 }, { "epoch": 0.43611670020120724, "grad_norm": 0.4279528856277466, "learning_rate": 9.937465048461705e-06, "loss": 0.4019, "step": 1734 }, { "epoch": 0.4363682092555332, "grad_norm": 0.42261025309562683, "learning_rate": 9.937234137328157e-06, "loss": 0.4054, "step": 1735 }, { "epoch": 0.43661971830985913, "grad_norm": 0.4319506287574768, "learning_rate": 9.937002803351394e-06, "loss": 0.3924, "step": 1736 }, { "epoch": 0.4368712273641851, "grad_norm": 0.4078814387321472, "learning_rate": 9.936771046551225e-06, "loss": 0.3855, "step": 1737 }, { "epoch": 0.43712273641851107, "grad_norm": 0.4161885678768158, "learning_rate": 9.936538866947501e-06, "loss": 0.4115, "step": 1738 }, { "epoch": 0.43737424547283704, "grad_norm": 0.3567904233932495, "learning_rate": 9.936306264560107e-06, "loss": 0.3882, "step": 1739 }, { "epoch": 0.43762575452716296, "grad_norm": 0.43193182349205017, "learning_rate": 9.93607323940896e-06, "loss": 0.3699, "step": 1740 }, { "epoch": 0.43787726358148893, "grad_norm": 0.3843153715133667, "learning_rate": 9.935839791514024e-06, "loss": 0.4166, "step": 1741 }, { "epoch": 0.4381287726358149, "grad_norm": 0.3638061583042145, "learning_rate": 9.935605920895286e-06, "loss": 0.3891, "step": 1742 }, { "epoch": 0.43838028169014087, "grad_norm": 0.3796941041946411, "learning_rate": 9.93537162757278e-06, "loss": 0.3806, "step": 1743 }, { "epoch": 0.4386317907444668, "grad_norm": 0.3639376163482666, "learning_rate": 9.935136911566566e-06, "loss": 0.3785, "step": 1744 }, { "epoch": 0.43888329979879276, "grad_norm": 0.4310847520828247, "learning_rate": 9.93490177289675e-06, "loss": 0.4118, "step": 1745 }, { "epoch": 0.43913480885311873, "grad_norm": 0.4827233552932739, "learning_rate": 9.934666211583472e-06, "loss": 0.4192, "step": 1746 }, { "epoch": 0.43938631790744465, "grad_norm": 0.36181917786598206, "learning_rate": 9.934430227646904e-06, "loss": 0.4225, "step": 1747 }, { "epoch": 0.4396378269617706, "grad_norm": 0.4338870346546173, "learning_rate": 9.934193821107256e-06, "loss": 0.3857, "step": 1748 }, { "epoch": 0.4398893360160966, "grad_norm": 0.454122930765152, "learning_rate": 9.933956991984775e-06, "loss": 0.4203, "step": 1749 }, { "epoch": 0.44014084507042256, "grad_norm": 0.440476655960083, "learning_rate": 9.933719740299745e-06, "loss": 0.411, "step": 1750 }, { "epoch": 0.4403923541247485, "grad_norm": 0.3782346546649933, "learning_rate": 9.933482066072485e-06, "loss": 0.383, "step": 1751 }, { "epoch": 0.44064386317907445, "grad_norm": 0.4182790219783783, "learning_rate": 9.93324396932335e-06, "loss": 0.3782, "step": 1752 }, { "epoch": 0.4408953722334004, "grad_norm": 0.40436455607414246, "learning_rate": 9.93300545007273e-06, "loss": 0.3773, "step": 1753 }, { "epoch": 0.44114688128772633, "grad_norm": 0.448367714881897, "learning_rate": 9.932766508341054e-06, "loss": 0.4067, "step": 1754 }, { "epoch": 0.4413983903420523, "grad_norm": 0.39983075857162476, "learning_rate": 9.932527144148788e-06, "loss": 0.3907, "step": 1755 }, { "epoch": 0.4416498993963783, "grad_norm": 0.4192653000354767, "learning_rate": 9.932287357516428e-06, "loss": 0.3743, "step": 1756 }, { "epoch": 0.44190140845070425, "grad_norm": 0.4609915614128113, "learning_rate": 9.932047148464512e-06, "loss": 0.3671, "step": 1757 }, { "epoch": 0.44215291750503016, "grad_norm": 0.4244169294834137, "learning_rate": 9.931806517013612e-06, "loss": 0.3848, "step": 1758 }, { "epoch": 0.44240442655935613, "grad_norm": 0.4056912064552307, "learning_rate": 9.931565463184339e-06, "loss": 0.4049, "step": 1759 }, { "epoch": 0.4426559356136821, "grad_norm": 0.5338891744613647, "learning_rate": 9.931323986997334e-06, "loss": 0.3858, "step": 1760 }, { "epoch": 0.442907444668008, "grad_norm": 0.4530712962150574, "learning_rate": 9.93108208847328e-06, "loss": 0.4024, "step": 1761 }, { "epoch": 0.443158953722334, "grad_norm": 0.43134480714797974, "learning_rate": 9.930839767632895e-06, "loss": 0.4158, "step": 1762 }, { "epoch": 0.44341046277665996, "grad_norm": 0.475333571434021, "learning_rate": 9.930597024496933e-06, "loss": 0.4289, "step": 1763 }, { "epoch": 0.44366197183098594, "grad_norm": 0.46597325801849365, "learning_rate": 9.930353859086177e-06, "loss": 0.409, "step": 1764 }, { "epoch": 0.44391348088531185, "grad_norm": 0.3584541976451874, "learning_rate": 9.93011027142146e-06, "loss": 0.3936, "step": 1765 }, { "epoch": 0.4441649899396378, "grad_norm": 0.4492575526237488, "learning_rate": 9.92986626152364e-06, "loss": 0.4067, "step": 1766 }, { "epoch": 0.4444164989939638, "grad_norm": 0.4518819749355316, "learning_rate": 9.929621829413616e-06, "loss": 0.3997, "step": 1767 }, { "epoch": 0.44466800804828976, "grad_norm": 0.39923927187919617, "learning_rate": 9.929376975112324e-06, "loss": 0.4011, "step": 1768 }, { "epoch": 0.4449195171026157, "grad_norm": 0.4502282440662384, "learning_rate": 9.92913169864073e-06, "loss": 0.4245, "step": 1769 }, { "epoch": 0.44517102615694165, "grad_norm": 0.49698328971862793, "learning_rate": 9.928886000019842e-06, "loss": 0.409, "step": 1770 }, { "epoch": 0.4454225352112676, "grad_norm": 0.43445685505867004, "learning_rate": 9.928639879270705e-06, "loss": 0.3875, "step": 1771 }, { "epoch": 0.44567404426559354, "grad_norm": 0.43353793025016785, "learning_rate": 9.928393336414394e-06, "loss": 0.4041, "step": 1772 }, { "epoch": 0.4459255533199195, "grad_norm": 0.5283091068267822, "learning_rate": 9.928146371472027e-06, "loss": 0.409, "step": 1773 }, { "epoch": 0.4461770623742455, "grad_norm": 0.3890385925769806, "learning_rate": 9.927898984464753e-06, "loss": 0.3992, "step": 1774 }, { "epoch": 0.44642857142857145, "grad_norm": 0.36789190769195557, "learning_rate": 9.92765117541376e-06, "loss": 0.3825, "step": 1775 }, { "epoch": 0.44668008048289737, "grad_norm": 0.4484959840774536, "learning_rate": 9.927402944340271e-06, "loss": 0.3804, "step": 1776 }, { "epoch": 0.44693158953722334, "grad_norm": 0.4116009473800659, "learning_rate": 9.927154291265546e-06, "loss": 0.4022, "step": 1777 }, { "epoch": 0.4471830985915493, "grad_norm": 0.4052729308605194, "learning_rate": 9.92690521621088e-06, "loss": 0.4163, "step": 1778 }, { "epoch": 0.4474346076458752, "grad_norm": 0.4312780797481537, "learning_rate": 9.926655719197604e-06, "loss": 0.4167, "step": 1779 }, { "epoch": 0.4476861167002012, "grad_norm": 0.38401511311531067, "learning_rate": 9.926405800247088e-06, "loss": 0.3918, "step": 1780 }, { "epoch": 0.44793762575452717, "grad_norm": 0.4729456305503845, "learning_rate": 9.926155459380733e-06, "loss": 0.416, "step": 1781 }, { "epoch": 0.44818913480885314, "grad_norm": 0.43422985076904297, "learning_rate": 9.925904696619983e-06, "loss": 0.3793, "step": 1782 }, { "epoch": 0.44844064386317906, "grad_norm": 0.3729407489299774, "learning_rate": 9.92565351198631e-06, "loss": 0.369, "step": 1783 }, { "epoch": 0.448692152917505, "grad_norm": 0.3949672281742096, "learning_rate": 9.92540190550123e-06, "loss": 0.3661, "step": 1784 }, { "epoch": 0.448943661971831, "grad_norm": 0.5043959617614746, "learning_rate": 9.92514987718629e-06, "loss": 0.388, "step": 1785 }, { "epoch": 0.4491951710261569, "grad_norm": 0.4054137170314789, "learning_rate": 9.924897427063074e-06, "loss": 0.3962, "step": 1786 }, { "epoch": 0.4494466800804829, "grad_norm": 0.426899790763855, "learning_rate": 9.924644555153203e-06, "loss": 0.4045, "step": 1787 }, { "epoch": 0.44969818913480886, "grad_norm": 0.4646405577659607, "learning_rate": 9.924391261478334e-06, "loss": 0.3934, "step": 1788 }, { "epoch": 0.44994969818913483, "grad_norm": 0.3873588442802429, "learning_rate": 9.924137546060162e-06, "loss": 0.4043, "step": 1789 }, { "epoch": 0.45020120724346074, "grad_norm": 0.5013946890830994, "learning_rate": 9.923883408920414e-06, "loss": 0.3877, "step": 1790 }, { "epoch": 0.4504527162977867, "grad_norm": 0.4803258776664734, "learning_rate": 9.923628850080856e-06, "loss": 0.4154, "step": 1791 }, { "epoch": 0.4507042253521127, "grad_norm": 0.47052329778671265, "learning_rate": 9.923373869563288e-06, "loss": 0.4065, "step": 1792 }, { "epoch": 0.45095573440643866, "grad_norm": 0.4442056119441986, "learning_rate": 9.92311846738955e-06, "loss": 0.3974, "step": 1793 }, { "epoch": 0.4512072434607646, "grad_norm": 0.4476431906223297, "learning_rate": 9.922862643581512e-06, "loss": 0.4098, "step": 1794 }, { "epoch": 0.45145875251509054, "grad_norm": 0.41717463731765747, "learning_rate": 9.922606398161088e-06, "loss": 0.4118, "step": 1795 }, { "epoch": 0.4517102615694165, "grad_norm": 0.4191329777240753, "learning_rate": 9.922349731150221e-06, "loss": 0.403, "step": 1796 }, { "epoch": 0.45196177062374243, "grad_norm": 0.41227200627326965, "learning_rate": 9.922092642570894e-06, "loss": 0.4162, "step": 1797 }, { "epoch": 0.4522132796780684, "grad_norm": 0.3862850069999695, "learning_rate": 9.921835132445124e-06, "loss": 0.4071, "step": 1798 }, { "epoch": 0.4524647887323944, "grad_norm": 0.42549851536750793, "learning_rate": 9.921577200794968e-06, "loss": 0.4289, "step": 1799 }, { "epoch": 0.45271629778672035, "grad_norm": 0.40862515568733215, "learning_rate": 9.921318847642511e-06, "loss": 0.4009, "step": 1800 }, { "epoch": 0.45296780684104626, "grad_norm": 0.3994438350200653, "learning_rate": 9.921060073009884e-06, "loss": 0.3995, "step": 1801 }, { "epoch": 0.45321931589537223, "grad_norm": 0.45960313081741333, "learning_rate": 9.920800876919248e-06, "loss": 0.4031, "step": 1802 }, { "epoch": 0.4534708249496982, "grad_norm": 0.42091697454452515, "learning_rate": 9.9205412593928e-06, "loss": 0.398, "step": 1803 }, { "epoch": 0.4537223340040241, "grad_norm": 0.45224541425704956, "learning_rate": 9.920281220452776e-06, "loss": 0.3865, "step": 1804 }, { "epoch": 0.4539738430583501, "grad_norm": 0.45991694927215576, "learning_rate": 9.920020760121447e-06, "loss": 0.3965, "step": 1805 }, { "epoch": 0.45422535211267606, "grad_norm": 0.433249294757843, "learning_rate": 9.919759878421121e-06, "loss": 0.405, "step": 1806 }, { "epoch": 0.45447686116700203, "grad_norm": 0.43937093019485474, "learning_rate": 9.919498575374138e-06, "loss": 0.4068, "step": 1807 }, { "epoch": 0.45472837022132795, "grad_norm": 0.5110582709312439, "learning_rate": 9.919236851002879e-06, "loss": 0.4061, "step": 1808 }, { "epoch": 0.4549798792756539, "grad_norm": 0.4061850607395172, "learning_rate": 9.918974705329756e-06, "loss": 0.3686, "step": 1809 }, { "epoch": 0.4552313883299799, "grad_norm": 0.49449342489242554, "learning_rate": 9.918712138377226e-06, "loss": 0.3984, "step": 1810 }, { "epoch": 0.45548289738430586, "grad_norm": 0.44511350989341736, "learning_rate": 9.91844915016777e-06, "loss": 0.3846, "step": 1811 }, { "epoch": 0.4557344064386318, "grad_norm": 0.3825312852859497, "learning_rate": 9.918185740723916e-06, "loss": 0.4109, "step": 1812 }, { "epoch": 0.45598591549295775, "grad_norm": 0.44606679677963257, "learning_rate": 9.91792191006822e-06, "loss": 0.3899, "step": 1813 }, { "epoch": 0.4562374245472837, "grad_norm": 0.42780601978302, "learning_rate": 9.917657658223278e-06, "loss": 0.409, "step": 1814 }, { "epoch": 0.45648893360160964, "grad_norm": 0.44149789214134216, "learning_rate": 9.917392985211725e-06, "loss": 0.4126, "step": 1815 }, { "epoch": 0.4567404426559356, "grad_norm": 0.40314993262290955, "learning_rate": 9.917127891056223e-06, "loss": 0.3824, "step": 1816 }, { "epoch": 0.4569919517102616, "grad_norm": 0.42890751361846924, "learning_rate": 9.916862375779482e-06, "loss": 0.4023, "step": 1817 }, { "epoch": 0.45724346076458755, "grad_norm": 0.39626947045326233, "learning_rate": 9.916596439404235e-06, "loss": 0.4113, "step": 1818 }, { "epoch": 0.45749496981891347, "grad_norm": 0.5269667506217957, "learning_rate": 9.916330081953262e-06, "loss": 0.3958, "step": 1819 }, { "epoch": 0.45774647887323944, "grad_norm": 0.4348817765712738, "learning_rate": 9.916063303449374e-06, "loss": 0.373, "step": 1820 }, { "epoch": 0.4579979879275654, "grad_norm": 0.36105191707611084, "learning_rate": 9.91579610391542e-06, "loss": 0.3845, "step": 1821 }, { "epoch": 0.4582494969818913, "grad_norm": 0.5355477333068848, "learning_rate": 9.915528483374283e-06, "loss": 0.4226, "step": 1822 }, { "epoch": 0.4585010060362173, "grad_norm": 0.45753616094589233, "learning_rate": 9.915260441848883e-06, "loss": 0.4067, "step": 1823 }, { "epoch": 0.45875251509054327, "grad_norm": 0.41938281059265137, "learning_rate": 9.914991979362173e-06, "loss": 0.4038, "step": 1824 }, { "epoch": 0.45900402414486924, "grad_norm": 0.41249436140060425, "learning_rate": 9.91472309593715e-06, "loss": 0.3903, "step": 1825 }, { "epoch": 0.45925553319919515, "grad_norm": 0.40385809540748596, "learning_rate": 9.914453791596841e-06, "loss": 0.4094, "step": 1826 }, { "epoch": 0.4595070422535211, "grad_norm": 0.4012143909931183, "learning_rate": 9.914184066364308e-06, "loss": 0.3886, "step": 1827 }, { "epoch": 0.4597585513078471, "grad_norm": 0.41770312190055847, "learning_rate": 9.913913920262654e-06, "loss": 0.3926, "step": 1828 }, { "epoch": 0.460010060362173, "grad_norm": 0.3697265684604645, "learning_rate": 9.913643353315015e-06, "loss": 0.3827, "step": 1829 }, { "epoch": 0.460261569416499, "grad_norm": 0.4404751658439636, "learning_rate": 9.91337236554456e-06, "loss": 0.3875, "step": 1830 }, { "epoch": 0.46051307847082495, "grad_norm": 0.43419405817985535, "learning_rate": 9.9131009569745e-06, "loss": 0.4164, "step": 1831 }, { "epoch": 0.4607645875251509, "grad_norm": 0.4106455445289612, "learning_rate": 9.91282912762808e-06, "loss": 0.4161, "step": 1832 }, { "epoch": 0.46101609657947684, "grad_norm": 0.48274555802345276, "learning_rate": 9.912556877528582e-06, "loss": 0.3943, "step": 1833 }, { "epoch": 0.4612676056338028, "grad_norm": 0.39638254046440125, "learning_rate": 9.912284206699317e-06, "loss": 0.4098, "step": 1834 }, { "epoch": 0.4615191146881288, "grad_norm": 0.4625331461429596, "learning_rate": 9.912011115163642e-06, "loss": 0.3682, "step": 1835 }, { "epoch": 0.46177062374245476, "grad_norm": 0.4894983172416687, "learning_rate": 9.911737602944943e-06, "loss": 0.3913, "step": 1836 }, { "epoch": 0.46202213279678067, "grad_norm": 0.3602517247200012, "learning_rate": 9.911463670066648e-06, "loss": 0.3814, "step": 1837 }, { "epoch": 0.46227364185110664, "grad_norm": 0.4734443128108978, "learning_rate": 9.911189316552217e-06, "loss": 0.3926, "step": 1838 }, { "epoch": 0.4625251509054326, "grad_norm": 0.4669021964073181, "learning_rate": 9.910914542425143e-06, "loss": 0.4003, "step": 1839 }, { "epoch": 0.46277665995975853, "grad_norm": 0.3793289363384247, "learning_rate": 9.91063934770896e-06, "loss": 0.3894, "step": 1840 }, { "epoch": 0.4630281690140845, "grad_norm": 0.4388173818588257, "learning_rate": 9.910363732427241e-06, "loss": 0.4081, "step": 1841 }, { "epoch": 0.46327967806841047, "grad_norm": 0.44316378235816956, "learning_rate": 9.910087696603585e-06, "loss": 0.3768, "step": 1842 }, { "epoch": 0.46353118712273644, "grad_norm": 0.4063831567764282, "learning_rate": 9.909811240261635e-06, "loss": 0.3986, "step": 1843 }, { "epoch": 0.46378269617706236, "grad_norm": 0.40213075280189514, "learning_rate": 9.90953436342507e-06, "loss": 0.4181, "step": 1844 }, { "epoch": 0.46403420523138833, "grad_norm": 0.38924410939216614, "learning_rate": 9.909257066117599e-06, "loss": 0.4157, "step": 1845 }, { "epoch": 0.4642857142857143, "grad_norm": 0.3829781115055084, "learning_rate": 9.908979348362974e-06, "loss": 0.3814, "step": 1846 }, { "epoch": 0.4645372233400402, "grad_norm": 0.3905456066131592, "learning_rate": 9.908701210184976e-06, "loss": 0.3843, "step": 1847 }, { "epoch": 0.4647887323943662, "grad_norm": 0.3960774838924408, "learning_rate": 9.90842265160743e-06, "loss": 0.409, "step": 1848 }, { "epoch": 0.46504024144869216, "grad_norm": 0.39505699276924133, "learning_rate": 9.908143672654192e-06, "loss": 0.3942, "step": 1849 }, { "epoch": 0.46529175050301813, "grad_norm": 0.4073687791824341, "learning_rate": 9.907864273349152e-06, "loss": 0.4076, "step": 1850 }, { "epoch": 0.46554325955734405, "grad_norm": 0.4546225368976593, "learning_rate": 9.907584453716238e-06, "loss": 0.3739, "step": 1851 }, { "epoch": 0.46579476861167, "grad_norm": 0.4456266760826111, "learning_rate": 9.907304213779422e-06, "loss": 0.3742, "step": 1852 }, { "epoch": 0.466046277665996, "grad_norm": 0.4133337438106537, "learning_rate": 9.907023553562699e-06, "loss": 0.3993, "step": 1853 }, { "epoch": 0.4662977867203219, "grad_norm": 0.4463947117328644, "learning_rate": 9.906742473090105e-06, "loss": 0.3906, "step": 1854 }, { "epoch": 0.4665492957746479, "grad_norm": 0.4527071714401245, "learning_rate": 9.906460972385715e-06, "loss": 0.3838, "step": 1855 }, { "epoch": 0.46680080482897385, "grad_norm": 0.42571839690208435, "learning_rate": 9.906179051473638e-06, "loss": 0.3812, "step": 1856 }, { "epoch": 0.4670523138832998, "grad_norm": 0.5096774697303772, "learning_rate": 9.905896710378019e-06, "loss": 0.4192, "step": 1857 }, { "epoch": 0.46730382293762573, "grad_norm": 0.43383294343948364, "learning_rate": 9.905613949123036e-06, "loss": 0.3769, "step": 1858 }, { "epoch": 0.4675553319919517, "grad_norm": 0.45289137959480286, "learning_rate": 9.90533076773291e-06, "loss": 0.4123, "step": 1859 }, { "epoch": 0.4678068410462777, "grad_norm": 0.4052902162075043, "learning_rate": 9.905047166231889e-06, "loss": 0.4346, "step": 1860 }, { "epoch": 0.46805835010060365, "grad_norm": 0.41848742961883545, "learning_rate": 9.904763144644265e-06, "loss": 0.4022, "step": 1861 }, { "epoch": 0.46830985915492956, "grad_norm": 0.4163335859775543, "learning_rate": 9.904478702994362e-06, "loss": 0.4254, "step": 1862 }, { "epoch": 0.46856136820925554, "grad_norm": 0.36754798889160156, "learning_rate": 9.90419384130654e-06, "loss": 0.4084, "step": 1863 }, { "epoch": 0.4688128772635815, "grad_norm": 0.4322851002216339, "learning_rate": 9.903908559605197e-06, "loss": 0.3982, "step": 1864 }, { "epoch": 0.4690643863179074, "grad_norm": 0.41940197348594666, "learning_rate": 9.903622857914766e-06, "loss": 0.4128, "step": 1865 }, { "epoch": 0.4693158953722334, "grad_norm": 0.3755689859390259, "learning_rate": 9.90333673625971e-06, "loss": 0.4054, "step": 1866 }, { "epoch": 0.46956740442655936, "grad_norm": 0.422498881816864, "learning_rate": 9.903050194664541e-06, "loss": 0.4145, "step": 1867 }, { "epoch": 0.46981891348088534, "grad_norm": 0.3807189166545868, "learning_rate": 9.902763233153796e-06, "loss": 0.4154, "step": 1868 }, { "epoch": 0.47007042253521125, "grad_norm": 0.4063322842121124, "learning_rate": 9.90247585175205e-06, "loss": 0.3964, "step": 1869 }, { "epoch": 0.4703219315895372, "grad_norm": 0.42195388674736023, "learning_rate": 9.902188050483918e-06, "loss": 0.4153, "step": 1870 }, { "epoch": 0.4705734406438632, "grad_norm": 0.44243067502975464, "learning_rate": 9.901899829374048e-06, "loss": 0.4107, "step": 1871 }, { "epoch": 0.4708249496981891, "grad_norm": 0.39202213287353516, "learning_rate": 9.901611188447123e-06, "loss": 0.3929, "step": 1872 }, { "epoch": 0.4710764587525151, "grad_norm": 0.4214573800563812, "learning_rate": 9.901322127727864e-06, "loss": 0.4043, "step": 1873 }, { "epoch": 0.47132796780684105, "grad_norm": 0.4241698980331421, "learning_rate": 9.901032647241028e-06, "loss": 0.4463, "step": 1874 }, { "epoch": 0.471579476861167, "grad_norm": 0.3987068831920624, "learning_rate": 9.900742747011405e-06, "loss": 0.4225, "step": 1875 }, { "epoch": 0.47183098591549294, "grad_norm": 0.38595229387283325, "learning_rate": 9.900452427063827e-06, "loss": 0.401, "step": 1876 }, { "epoch": 0.4720824949698189, "grad_norm": 0.4303046464920044, "learning_rate": 9.900161687423155e-06, "loss": 0.424, "step": 1877 }, { "epoch": 0.4723340040241449, "grad_norm": 0.453784704208374, "learning_rate": 9.89987052811429e-06, "loss": 0.4054, "step": 1878 }, { "epoch": 0.4725855130784708, "grad_norm": 0.41478443145751953, "learning_rate": 9.899578949162167e-06, "loss": 0.3934, "step": 1879 }, { "epoch": 0.47283702213279677, "grad_norm": 0.4205837547779083, "learning_rate": 9.899286950591758e-06, "loss": 0.379, "step": 1880 }, { "epoch": 0.47308853118712274, "grad_norm": 0.4482855498790741, "learning_rate": 9.898994532428071e-06, "loss": 0.3874, "step": 1881 }, { "epoch": 0.4733400402414487, "grad_norm": 0.3712460994720459, "learning_rate": 9.898701694696154e-06, "loss": 0.423, "step": 1882 }, { "epoch": 0.4735915492957746, "grad_norm": 0.4586082100868225, "learning_rate": 9.89840843742108e-06, "loss": 0.39, "step": 1883 }, { "epoch": 0.4738430583501006, "grad_norm": 0.3899551033973694, "learning_rate": 9.898114760627968e-06, "loss": 0.4005, "step": 1884 }, { "epoch": 0.47409456740442657, "grad_norm": 0.39340347051620483, "learning_rate": 9.89782066434197e-06, "loss": 0.4229, "step": 1885 }, { "epoch": 0.47434607645875254, "grad_norm": 0.4018033742904663, "learning_rate": 9.897526148588272e-06, "loss": 0.3884, "step": 1886 }, { "epoch": 0.47459758551307846, "grad_norm": 0.38298699259757996, "learning_rate": 9.8972312133921e-06, "loss": 0.414, "step": 1887 }, { "epoch": 0.47484909456740443, "grad_norm": 0.36675944924354553, "learning_rate": 9.896935858778708e-06, "loss": 0.3942, "step": 1888 }, { "epoch": 0.4751006036217304, "grad_norm": 0.4059602916240692, "learning_rate": 9.896640084773399e-06, "loss": 0.3728, "step": 1889 }, { "epoch": 0.4753521126760563, "grad_norm": 0.4235600233078003, "learning_rate": 9.896343891401498e-06, "loss": 0.3886, "step": 1890 }, { "epoch": 0.4756036217303823, "grad_norm": 0.3745618760585785, "learning_rate": 9.896047278688375e-06, "loss": 0.3832, "step": 1891 }, { "epoch": 0.47585513078470826, "grad_norm": 0.41866034269332886, "learning_rate": 9.89575024665943e-06, "loss": 0.3678, "step": 1892 }, { "epoch": 0.47610663983903423, "grad_norm": 0.39261144399642944, "learning_rate": 9.895452795340106e-06, "loss": 0.4036, "step": 1893 }, { "epoch": 0.47635814889336014, "grad_norm": 0.43027186393737793, "learning_rate": 9.895154924755875e-06, "loss": 0.3915, "step": 1894 }, { "epoch": 0.4766096579476861, "grad_norm": 0.42222538590431213, "learning_rate": 9.894856634932249e-06, "loss": 0.3949, "step": 1895 }, { "epoch": 0.4768611670020121, "grad_norm": 0.3839363157749176, "learning_rate": 9.894557925894775e-06, "loss": 0.4005, "step": 1896 }, { "epoch": 0.477112676056338, "grad_norm": 0.39426472783088684, "learning_rate": 9.894258797669034e-06, "loss": 0.3867, "step": 1897 }, { "epoch": 0.477364185110664, "grad_norm": 0.3634321391582489, "learning_rate": 9.893959250280646e-06, "loss": 0.3862, "step": 1898 }, { "epoch": 0.47761569416498995, "grad_norm": 0.4207385182380676, "learning_rate": 9.893659283755264e-06, "loss": 0.3887, "step": 1899 }, { "epoch": 0.4778672032193159, "grad_norm": 0.3956140875816345, "learning_rate": 9.89335889811858e-06, "loss": 0.397, "step": 1900 }, { "epoch": 0.47811871227364183, "grad_norm": 0.37039822340011597, "learning_rate": 9.893058093396318e-06, "loss": 0.386, "step": 1901 }, { "epoch": 0.4783702213279678, "grad_norm": 0.38195037841796875, "learning_rate": 9.892756869614242e-06, "loss": 0.4047, "step": 1902 }, { "epoch": 0.4786217303822938, "grad_norm": 0.39862626791000366, "learning_rate": 9.892455226798148e-06, "loss": 0.4128, "step": 1903 }, { "epoch": 0.4788732394366197, "grad_norm": 0.3864774703979492, "learning_rate": 9.892153164973873e-06, "loss": 0.3959, "step": 1904 }, { "epoch": 0.47912474849094566, "grad_norm": 0.40958353877067566, "learning_rate": 9.891850684167284e-06, "loss": 0.4151, "step": 1905 }, { "epoch": 0.47937625754527163, "grad_norm": 0.3836343586444855, "learning_rate": 9.891547784404285e-06, "loss": 0.3945, "step": 1906 }, { "epoch": 0.4796277665995976, "grad_norm": 0.37921276688575745, "learning_rate": 9.891244465710822e-06, "loss": 0.3856, "step": 1907 }, { "epoch": 0.4798792756539235, "grad_norm": 0.39044657349586487, "learning_rate": 9.890940728112869e-06, "loss": 0.3913, "step": 1908 }, { "epoch": 0.4801307847082495, "grad_norm": 0.38842642307281494, "learning_rate": 9.89063657163644e-06, "loss": 0.378, "step": 1909 }, { "epoch": 0.48038229376257546, "grad_norm": 0.4392659068107605, "learning_rate": 9.890331996307585e-06, "loss": 0.3937, "step": 1910 }, { "epoch": 0.48063380281690143, "grad_norm": 0.41104656457901, "learning_rate": 9.89002700215239e-06, "loss": 0.3849, "step": 1911 }, { "epoch": 0.48088531187122735, "grad_norm": 0.3936188519001007, "learning_rate": 9.88972158919697e-06, "loss": 0.4185, "step": 1912 }, { "epoch": 0.4811368209255533, "grad_norm": 0.39013659954071045, "learning_rate": 9.88941575746749e-06, "loss": 0.353, "step": 1913 }, { "epoch": 0.4813883299798793, "grad_norm": 0.3972854018211365, "learning_rate": 9.889109506990137e-06, "loss": 0.3933, "step": 1914 }, { "epoch": 0.4816398390342052, "grad_norm": 0.46189555525779724, "learning_rate": 9.88880283779114e-06, "loss": 0.4109, "step": 1915 }, { "epoch": 0.4818913480885312, "grad_norm": 0.391872763633728, "learning_rate": 9.888495749896766e-06, "loss": 0.4003, "step": 1916 }, { "epoch": 0.48214285714285715, "grad_norm": 0.49157020449638367, "learning_rate": 9.888188243333313e-06, "loss": 0.3938, "step": 1917 }, { "epoch": 0.4823943661971831, "grad_norm": 0.6632839441299438, "learning_rate": 9.887880318127116e-06, "loss": 0.3952, "step": 1918 }, { "epoch": 0.48264587525150904, "grad_norm": 0.42987990379333496, "learning_rate": 9.88757197430455e-06, "loss": 0.4032, "step": 1919 }, { "epoch": 0.482897384305835, "grad_norm": 0.43732520937919617, "learning_rate": 9.887263211892017e-06, "loss": 0.4166, "step": 1920 }, { "epoch": 0.483148893360161, "grad_norm": 0.45760130882263184, "learning_rate": 9.886954030915969e-06, "loss": 0.3951, "step": 1921 }, { "epoch": 0.4834004024144869, "grad_norm": 0.4097820520401001, "learning_rate": 9.886644431402879e-06, "loss": 0.422, "step": 1922 }, { "epoch": 0.48365191146881287, "grad_norm": 0.47169190645217896, "learning_rate": 9.886334413379263e-06, "loss": 0.4127, "step": 1923 }, { "epoch": 0.48390342052313884, "grad_norm": 0.45416510105133057, "learning_rate": 9.886023976871678e-06, "loss": 0.3942, "step": 1924 }, { "epoch": 0.4841549295774648, "grad_norm": 0.4500854015350342, "learning_rate": 9.885713121906701e-06, "loss": 0.3861, "step": 1925 }, { "epoch": 0.4844064386317907, "grad_norm": 0.49403610825538635, "learning_rate": 9.885401848510962e-06, "loss": 0.4131, "step": 1926 }, { "epoch": 0.4846579476861167, "grad_norm": 0.43086037039756775, "learning_rate": 9.88509015671112e-06, "loss": 0.3868, "step": 1927 }, { "epoch": 0.48490945674044267, "grad_norm": 0.4836970269680023, "learning_rate": 9.884778046533863e-06, "loss": 0.3939, "step": 1928 }, { "epoch": 0.48516096579476864, "grad_norm": 0.41725337505340576, "learning_rate": 9.884465518005927e-06, "loss": 0.4104, "step": 1929 }, { "epoch": 0.48541247484909456, "grad_norm": 0.43721911311149597, "learning_rate": 9.884152571154077e-06, "loss": 0.4114, "step": 1930 }, { "epoch": 0.4856639839034205, "grad_norm": 0.39998695254325867, "learning_rate": 9.883839206005115e-06, "loss": 0.386, "step": 1931 }, { "epoch": 0.4859154929577465, "grad_norm": 0.41072046756744385, "learning_rate": 9.883525422585877e-06, "loss": 0.4145, "step": 1932 }, { "epoch": 0.4861670020120724, "grad_norm": 0.40265727043151855, "learning_rate": 9.883211220923237e-06, "loss": 0.4205, "step": 1933 }, { "epoch": 0.4864185110663984, "grad_norm": 0.4055761396884918, "learning_rate": 9.882896601044107e-06, "loss": 0.4011, "step": 1934 }, { "epoch": 0.48667002012072436, "grad_norm": 0.43374502658843994, "learning_rate": 9.882581562975431e-06, "loss": 0.4031, "step": 1935 }, { "epoch": 0.4869215291750503, "grad_norm": 0.3941003978252411, "learning_rate": 9.882266106744187e-06, "loss": 0.3767, "step": 1936 }, { "epoch": 0.48717303822937624, "grad_norm": 0.41482222080230713, "learning_rate": 9.881950232377397e-06, "loss": 0.3977, "step": 1937 }, { "epoch": 0.4874245472837022, "grad_norm": 0.39503028988838196, "learning_rate": 9.88163393990211e-06, "loss": 0.3972, "step": 1938 }, { "epoch": 0.4876760563380282, "grad_norm": 0.43991711735725403, "learning_rate": 9.881317229345414e-06, "loss": 0.375, "step": 1939 }, { "epoch": 0.4879275653923541, "grad_norm": 0.41871747374534607, "learning_rate": 9.881000100734436e-06, "loss": 0.4188, "step": 1940 }, { "epoch": 0.4881790744466801, "grad_norm": 0.4454898238182068, "learning_rate": 9.880682554096334e-06, "loss": 0.3924, "step": 1941 }, { "epoch": 0.48843058350100604, "grad_norm": 0.4203794598579407, "learning_rate": 9.880364589458306e-06, "loss": 0.3826, "step": 1942 }, { "epoch": 0.488682092555332, "grad_norm": 0.4890785813331604, "learning_rate": 9.880046206847581e-06, "loss": 0.4006, "step": 1943 }, { "epoch": 0.48893360160965793, "grad_norm": 0.9945582151412964, "learning_rate": 9.879727406291429e-06, "loss": 0.3835, "step": 1944 }, { "epoch": 0.4891851106639839, "grad_norm": 0.44654008746147156, "learning_rate": 9.879408187817153e-06, "loss": 0.3997, "step": 1945 }, { "epoch": 0.4894366197183099, "grad_norm": 0.5195226669311523, "learning_rate": 9.879088551452088e-06, "loss": 0.3954, "step": 1946 }, { "epoch": 0.4896881287726358, "grad_norm": 0.4593251645565033, "learning_rate": 9.878768497223614e-06, "loss": 0.4029, "step": 1947 }, { "epoch": 0.48993963782696176, "grad_norm": 0.5262354612350464, "learning_rate": 9.87844802515914e-06, "loss": 0.3896, "step": 1948 }, { "epoch": 0.49019114688128773, "grad_norm": 0.5133786797523499, "learning_rate": 9.878127135286112e-06, "loss": 0.3685, "step": 1949 }, { "epoch": 0.4904426559356137, "grad_norm": 0.43951818346977234, "learning_rate": 9.877805827632013e-06, "loss": 0.4082, "step": 1950 }, { "epoch": 0.4906941649899396, "grad_norm": 0.5165718197822571, "learning_rate": 9.877484102224359e-06, "loss": 0.4198, "step": 1951 }, { "epoch": 0.4909456740442656, "grad_norm": 0.47304123640060425, "learning_rate": 9.877161959090704e-06, "loss": 0.4279, "step": 1952 }, { "epoch": 0.49119718309859156, "grad_norm": 0.41676169633865356, "learning_rate": 9.87683939825864e-06, "loss": 0.3885, "step": 1953 }, { "epoch": 0.49144869215291753, "grad_norm": 0.4127172529697418, "learning_rate": 9.876516419755793e-06, "loss": 0.3738, "step": 1954 }, { "epoch": 0.49170020120724345, "grad_norm": 0.4324229061603546, "learning_rate": 9.87619302360982e-06, "loss": 0.3703, "step": 1955 }, { "epoch": 0.4919517102615694, "grad_norm": 0.3876967430114746, "learning_rate": 9.875869209848418e-06, "loss": 0.4076, "step": 1956 }, { "epoch": 0.4922032193158954, "grad_norm": 0.4635095000267029, "learning_rate": 9.875544978499326e-06, "loss": 0.4025, "step": 1957 }, { "epoch": 0.4924547283702213, "grad_norm": 0.410577654838562, "learning_rate": 9.875220329590304e-06, "loss": 0.4046, "step": 1958 }, { "epoch": 0.4927062374245473, "grad_norm": 0.3948517441749573, "learning_rate": 9.874895263149163e-06, "loss": 0.4266, "step": 1959 }, { "epoch": 0.49295774647887325, "grad_norm": 0.3900180757045746, "learning_rate": 9.874569779203737e-06, "loss": 0.3899, "step": 1960 }, { "epoch": 0.4932092555331992, "grad_norm": 0.3953598737716675, "learning_rate": 9.874243877781906e-06, "loss": 0.4044, "step": 1961 }, { "epoch": 0.49346076458752514, "grad_norm": 0.3972851037979126, "learning_rate": 9.87391755891158e-06, "loss": 0.4096, "step": 1962 }, { "epoch": 0.4937122736418511, "grad_norm": 0.3685133457183838, "learning_rate": 9.873590822620706e-06, "loss": 0.4009, "step": 1963 }, { "epoch": 0.4939637826961771, "grad_norm": 0.5181507468223572, "learning_rate": 9.873263668937268e-06, "loss": 0.4115, "step": 1964 }, { "epoch": 0.494215291750503, "grad_norm": 0.38723844289779663, "learning_rate": 9.872936097889284e-06, "loss": 0.3957, "step": 1965 }, { "epoch": 0.49446680080482897, "grad_norm": 0.42989009618759155, "learning_rate": 9.872608109504807e-06, "loss": 0.402, "step": 1966 }, { "epoch": 0.49471830985915494, "grad_norm": 0.4205054044723511, "learning_rate": 9.872279703811929e-06, "loss": 0.4242, "step": 1967 }, { "epoch": 0.4949698189134809, "grad_norm": 0.369878888130188, "learning_rate": 9.871950880838774e-06, "loss": 0.4199, "step": 1968 }, { "epoch": 0.4952213279678068, "grad_norm": 0.3752254843711853, "learning_rate": 9.871621640613506e-06, "loss": 0.3765, "step": 1969 }, { "epoch": 0.4954728370221328, "grad_norm": 0.4001399278640747, "learning_rate": 9.871291983164322e-06, "loss": 0.3697, "step": 1970 }, { "epoch": 0.49572434607645877, "grad_norm": 0.38128310441970825, "learning_rate": 9.870961908519454e-06, "loss": 0.4113, "step": 1971 }, { "epoch": 0.4959758551307847, "grad_norm": 0.3907400071620941, "learning_rate": 9.87063141670717e-06, "loss": 0.404, "step": 1972 }, { "epoch": 0.49622736418511065, "grad_norm": 0.3958117961883545, "learning_rate": 9.870300507755777e-06, "loss": 0.3943, "step": 1973 }, { "epoch": 0.4964788732394366, "grad_norm": 0.412076473236084, "learning_rate": 9.869969181693613e-06, "loss": 0.4207, "step": 1974 }, { "epoch": 0.4967303822937626, "grad_norm": 0.3858659863471985, "learning_rate": 9.869637438549056e-06, "loss": 0.396, "step": 1975 }, { "epoch": 0.4969818913480885, "grad_norm": 0.44061511754989624, "learning_rate": 9.869305278350516e-06, "loss": 0.4096, "step": 1976 }, { "epoch": 0.4972334004024145, "grad_norm": 0.400973379611969, "learning_rate": 9.868972701126442e-06, "loss": 0.382, "step": 1977 }, { "epoch": 0.49748490945674045, "grad_norm": 0.430945485830307, "learning_rate": 9.868639706905314e-06, "loss": 0.3994, "step": 1978 }, { "epoch": 0.4977364185110664, "grad_norm": 0.36873745918273926, "learning_rate": 9.868306295715656e-06, "loss": 0.3846, "step": 1979 }, { "epoch": 0.49798792756539234, "grad_norm": 0.3921675682067871, "learning_rate": 9.86797246758602e-06, "loss": 0.3977, "step": 1980 }, { "epoch": 0.4982394366197183, "grad_norm": 0.3892073333263397, "learning_rate": 9.867638222544994e-06, "loss": 0.383, "step": 1981 }, { "epoch": 0.4984909456740443, "grad_norm": 0.3832413852214813, "learning_rate": 9.867303560621207e-06, "loss": 0.3954, "step": 1982 }, { "epoch": 0.4987424547283702, "grad_norm": 0.3576008677482605, "learning_rate": 9.86696848184332e-06, "loss": 0.3985, "step": 1983 }, { "epoch": 0.49899396378269617, "grad_norm": 0.44481638073921204, "learning_rate": 9.86663298624003e-06, "loss": 0.3919, "step": 1984 }, { "epoch": 0.49924547283702214, "grad_norm": 0.37005627155303955, "learning_rate": 9.86629707384007e-06, "loss": 0.3772, "step": 1985 }, { "epoch": 0.4994969818913481, "grad_norm": 0.39425691962242126, "learning_rate": 9.86596074467221e-06, "loss": 0.4188, "step": 1986 }, { "epoch": 0.49974849094567403, "grad_norm": 0.39733070135116577, "learning_rate": 9.865623998765253e-06, "loss": 0.4111, "step": 1987 }, { "epoch": 0.5, "grad_norm": 0.40979063510894775, "learning_rate": 9.865286836148039e-06, "loss": 0.3874, "step": 1988 }, { "epoch": 0.5002515090543259, "grad_norm": 0.361208438873291, "learning_rate": 9.864949256849445e-06, "loss": 0.4006, "step": 1989 }, { "epoch": 0.5005030181086519, "grad_norm": 0.42786529660224915, "learning_rate": 9.864611260898383e-06, "loss": 0.397, "step": 1990 }, { "epoch": 0.5007545271629779, "grad_norm": 0.40797531604766846, "learning_rate": 9.8642728483238e-06, "loss": 0.4219, "step": 1991 }, { "epoch": 0.5010060362173038, "grad_norm": 0.3617155849933624, "learning_rate": 9.863934019154676e-06, "loss": 0.4213, "step": 1992 }, { "epoch": 0.5012575452716298, "grad_norm": 0.39363011717796326, "learning_rate": 9.863594773420033e-06, "loss": 0.391, "step": 1993 }, { "epoch": 0.5015090543259557, "grad_norm": 0.36764830350875854, "learning_rate": 9.863255111148925e-06, "loss": 0.393, "step": 1994 }, { "epoch": 0.5017605633802817, "grad_norm": 0.377768337726593, "learning_rate": 9.862915032370441e-06, "loss": 0.4024, "step": 1995 }, { "epoch": 0.5020120724346077, "grad_norm": 0.3849732577800751, "learning_rate": 9.862574537113705e-06, "loss": 0.3818, "step": 1996 }, { "epoch": 0.5022635814889336, "grad_norm": 0.399882048368454, "learning_rate": 9.862233625407882e-06, "loss": 0.3862, "step": 1997 }, { "epoch": 0.5025150905432596, "grad_norm": 0.439528226852417, "learning_rate": 9.861892297282167e-06, "loss": 0.3957, "step": 1998 }, { "epoch": 0.5027665995975855, "grad_norm": 0.40875646471977234, "learning_rate": 9.86155055276579e-06, "loss": 0.3947, "step": 1999 }, { "epoch": 0.5030181086519114, "grad_norm": 0.41643694043159485, "learning_rate": 9.861208391888024e-06, "loss": 0.4071, "step": 2000 }, { "epoch": 0.5032696177062375, "grad_norm": 0.47003522515296936, "learning_rate": 9.860865814678172e-06, "loss": 0.3874, "step": 2001 }, { "epoch": 0.5035211267605634, "grad_norm": 0.3645997643470764, "learning_rate": 9.860522821165572e-06, "loss": 0.3991, "step": 2002 }, { "epoch": 0.5037726358148893, "grad_norm": 0.3944624960422516, "learning_rate": 9.860179411379598e-06, "loss": 0.3663, "step": 2003 }, { "epoch": 0.5040241448692153, "grad_norm": 0.41858211159706116, "learning_rate": 9.859835585349664e-06, "loss": 0.4056, "step": 2004 }, { "epoch": 0.5042756539235412, "grad_norm": 0.42395514249801636, "learning_rate": 9.859491343105215e-06, "loss": 0.4058, "step": 2005 }, { "epoch": 0.5045271629778671, "grad_norm": 0.41400447487831116, "learning_rate": 9.859146684675733e-06, "loss": 0.4235, "step": 2006 }, { "epoch": 0.5047786720321932, "grad_norm": 0.3939945101737976, "learning_rate": 9.858801610090736e-06, "loss": 0.3848, "step": 2007 }, { "epoch": 0.5050301810865191, "grad_norm": 0.4896427094936371, "learning_rate": 9.858456119379779e-06, "loss": 0.3592, "step": 2008 }, { "epoch": 0.5052816901408451, "grad_norm": 0.43367308378219604, "learning_rate": 9.858110212572448e-06, "loss": 0.4206, "step": 2009 }, { "epoch": 0.505533199195171, "grad_norm": 0.36704427003860474, "learning_rate": 9.85776388969837e-06, "loss": 0.4041, "step": 2010 }, { "epoch": 0.505784708249497, "grad_norm": 0.5114539265632629, "learning_rate": 9.857417150787206e-06, "loss": 0.3805, "step": 2011 }, { "epoch": 0.506036217303823, "grad_norm": 0.4381665885448456, "learning_rate": 9.857069995868648e-06, "loss": 0.4056, "step": 2012 }, { "epoch": 0.5062877263581489, "grad_norm": 0.450435072183609, "learning_rate": 9.856722424972434e-06, "loss": 0.4096, "step": 2013 }, { "epoch": 0.5065392354124748, "grad_norm": 0.44745296239852905, "learning_rate": 9.856374438128327e-06, "loss": 0.4042, "step": 2014 }, { "epoch": 0.5067907444668008, "grad_norm": 0.4272843301296234, "learning_rate": 9.85602603536613e-06, "loss": 0.3735, "step": 2015 }, { "epoch": 0.5070422535211268, "grad_norm": 0.3728364109992981, "learning_rate": 9.855677216715682e-06, "loss": 0.4043, "step": 2016 }, { "epoch": 0.5072937625754527, "grad_norm": 0.4467531740665436, "learning_rate": 9.855327982206859e-06, "loss": 0.3911, "step": 2017 }, { "epoch": 0.5075452716297787, "grad_norm": 0.4462815225124359, "learning_rate": 9.854978331869568e-06, "loss": 0.4317, "step": 2018 }, { "epoch": 0.5077967806841046, "grad_norm": 0.38220712542533875, "learning_rate": 9.854628265733755e-06, "loss": 0.4053, "step": 2019 }, { "epoch": 0.5080482897384306, "grad_norm": 0.3900271952152252, "learning_rate": 9.854277783829402e-06, "loss": 0.3861, "step": 2020 }, { "epoch": 0.5082997987927566, "grad_norm": 0.4110308587551117, "learning_rate": 9.853926886186527e-06, "loss": 0.3917, "step": 2021 }, { "epoch": 0.5085513078470825, "grad_norm": 0.40754181146621704, "learning_rate": 9.853575572835179e-06, "loss": 0.4176, "step": 2022 }, { "epoch": 0.5088028169014085, "grad_norm": 0.4448073208332062, "learning_rate": 9.853223843805445e-06, "loss": 0.3992, "step": 2023 }, { "epoch": 0.5090543259557344, "grad_norm": 0.40429484844207764, "learning_rate": 9.852871699127453e-06, "loss": 0.4232, "step": 2024 }, { "epoch": 0.5093058350100603, "grad_norm": 0.4180743992328644, "learning_rate": 9.852519138831358e-06, "loss": 0.4074, "step": 2025 }, { "epoch": 0.5095573440643864, "grad_norm": 0.46965914964675903, "learning_rate": 9.852166162947356e-06, "loss": 0.3685, "step": 2026 }, { "epoch": 0.5098088531187123, "grad_norm": 0.400019109249115, "learning_rate": 9.851812771505678e-06, "loss": 0.3915, "step": 2027 }, { "epoch": 0.5100603621730382, "grad_norm": 0.39726099371910095, "learning_rate": 9.851458964536589e-06, "loss": 0.3435, "step": 2028 }, { "epoch": 0.5103118712273642, "grad_norm": 0.41831856966018677, "learning_rate": 9.85110474207039e-06, "loss": 0.382, "step": 2029 }, { "epoch": 0.5105633802816901, "grad_norm": 0.41029006242752075, "learning_rate": 9.85075010413742e-06, "loss": 0.4017, "step": 2030 }, { "epoch": 0.510814889336016, "grad_norm": 0.42589080333709717, "learning_rate": 9.850395050768047e-06, "loss": 0.3784, "step": 2031 }, { "epoch": 0.5110663983903421, "grad_norm": 0.4287776052951813, "learning_rate": 9.850039581992683e-06, "loss": 0.4288, "step": 2032 }, { "epoch": 0.511317907444668, "grad_norm": 0.38227567076683044, "learning_rate": 9.84968369784177e-06, "loss": 0.4244, "step": 2033 }, { "epoch": 0.511569416498994, "grad_norm": 0.40670591592788696, "learning_rate": 9.849327398345788e-06, "loss": 0.414, "step": 2034 }, { "epoch": 0.5118209255533199, "grad_norm": 0.4272759258747101, "learning_rate": 9.848970683535253e-06, "loss": 0.3923, "step": 2035 }, { "epoch": 0.5120724346076458, "grad_norm": 0.5142744779586792, "learning_rate": 9.84861355344071e-06, "loss": 0.3805, "step": 2036 }, { "epoch": 0.5123239436619719, "grad_norm": 0.38634923100471497, "learning_rate": 9.848256008092754e-06, "loss": 0.3946, "step": 2037 }, { "epoch": 0.5125754527162978, "grad_norm": 0.4425947964191437, "learning_rate": 9.847898047522e-06, "loss": 0.3706, "step": 2038 }, { "epoch": 0.5128269617706237, "grad_norm": 0.4305839240550995, "learning_rate": 9.847539671759105e-06, "loss": 0.3941, "step": 2039 }, { "epoch": 0.5130784708249497, "grad_norm": 0.4277026653289795, "learning_rate": 9.847180880834764e-06, "loss": 0.3712, "step": 2040 }, { "epoch": 0.5133299798792756, "grad_norm": 0.3730026185512543, "learning_rate": 9.846821674779705e-06, "loss": 0.3905, "step": 2041 }, { "epoch": 0.5135814889336016, "grad_norm": 0.39186757802963257, "learning_rate": 9.846462053624691e-06, "loss": 0.4005, "step": 2042 }, { "epoch": 0.5138329979879276, "grad_norm": 0.39070603251457214, "learning_rate": 9.846102017400523e-06, "loss": 0.3957, "step": 2043 }, { "epoch": 0.5140845070422535, "grad_norm": 0.3925558924674988, "learning_rate": 9.845741566138031e-06, "loss": 0.3974, "step": 2044 }, { "epoch": 0.5143360160965795, "grad_norm": 0.4226996600627899, "learning_rate": 9.84538069986809e-06, "loss": 0.4011, "step": 2045 }, { "epoch": 0.5145875251509054, "grad_norm": 0.39153674244880676, "learning_rate": 9.845019418621606e-06, "loss": 0.3668, "step": 2046 }, { "epoch": 0.5148390342052314, "grad_norm": 0.4205423891544342, "learning_rate": 9.844657722429518e-06, "loss": 0.402, "step": 2047 }, { "epoch": 0.5150905432595574, "grad_norm": 0.4090724289417267, "learning_rate": 9.844295611322804e-06, "loss": 0.4084, "step": 2048 }, { "epoch": 0.5153420523138833, "grad_norm": 0.38471901416778564, "learning_rate": 9.843933085332477e-06, "loss": 0.3758, "step": 2049 }, { "epoch": 0.5155935613682092, "grad_norm": 0.3938809037208557, "learning_rate": 9.843570144489585e-06, "loss": 0.3913, "step": 2050 }, { "epoch": 0.5158450704225352, "grad_norm": 0.36798256635665894, "learning_rate": 9.843206788825211e-06, "loss": 0.4112, "step": 2051 }, { "epoch": 0.5160965794768612, "grad_norm": 0.3941897749900818, "learning_rate": 9.842843018370475e-06, "loss": 0.4002, "step": 2052 }, { "epoch": 0.5163480885311871, "grad_norm": 0.4066377580165863, "learning_rate": 9.84247883315653e-06, "loss": 0.4136, "step": 2053 }, { "epoch": 0.5165995975855131, "grad_norm": 0.4132152795791626, "learning_rate": 9.84211423321457e-06, "loss": 0.4202, "step": 2054 }, { "epoch": 0.516851106639839, "grad_norm": 0.3840596377849579, "learning_rate": 9.841749218575815e-06, "loss": 0.4002, "step": 2055 }, { "epoch": 0.5171026156941649, "grad_norm": 0.4097898304462433, "learning_rate": 9.841383789271533e-06, "loss": 0.4072, "step": 2056 }, { "epoch": 0.517354124748491, "grad_norm": 0.39788705110549927, "learning_rate": 9.841017945333014e-06, "loss": 0.4225, "step": 2057 }, { "epoch": 0.5176056338028169, "grad_norm": 0.3664261996746063, "learning_rate": 9.840651686791593e-06, "loss": 0.4118, "step": 2058 }, { "epoch": 0.5178571428571429, "grad_norm": 0.31850335001945496, "learning_rate": 9.84028501367864e-06, "loss": 0.3815, "step": 2059 }, { "epoch": 0.5181086519114688, "grad_norm": 0.4274592697620392, "learning_rate": 9.839917926025555e-06, "loss": 0.4198, "step": 2060 }, { "epoch": 0.5183601609657947, "grad_norm": 0.3651498854160309, "learning_rate": 9.839550423863779e-06, "loss": 0.3958, "step": 2061 }, { "epoch": 0.5186116700201208, "grad_norm": 0.35910671949386597, "learning_rate": 9.839182507224786e-06, "loss": 0.3888, "step": 2062 }, { "epoch": 0.5188631790744467, "grad_norm": 0.39101144671440125, "learning_rate": 9.838814176140084e-06, "loss": 0.4144, "step": 2063 }, { "epoch": 0.5191146881287726, "grad_norm": 0.34476083517074585, "learning_rate": 9.838445430641219e-06, "loss": 0.4075, "step": 2064 }, { "epoch": 0.5193661971830986, "grad_norm": 0.3631290793418884, "learning_rate": 9.838076270759771e-06, "loss": 0.3737, "step": 2065 }, { "epoch": 0.5196177062374245, "grad_norm": 0.38996070623397827, "learning_rate": 9.83770669652736e-06, "loss": 0.3986, "step": 2066 }, { "epoch": 0.5198692152917505, "grad_norm": 0.35395678877830505, "learning_rate": 9.837336707975633e-06, "loss": 0.384, "step": 2067 }, { "epoch": 0.5201207243460765, "grad_norm": 0.3823234438896179, "learning_rate": 9.83696630513628e-06, "loss": 0.3907, "step": 2068 }, { "epoch": 0.5203722334004024, "grad_norm": 0.40023064613342285, "learning_rate": 9.836595488041022e-06, "loss": 0.3846, "step": 2069 }, { "epoch": 0.5206237424547284, "grad_norm": 0.4110654592514038, "learning_rate": 9.83622425672162e-06, "loss": 0.4082, "step": 2070 }, { "epoch": 0.5208752515090543, "grad_norm": 0.3667696416378021, "learning_rate": 9.835852611209865e-06, "loss": 0.4005, "step": 2071 }, { "epoch": 0.5211267605633803, "grad_norm": 0.39130160212516785, "learning_rate": 9.835480551537587e-06, "loss": 0.412, "step": 2072 }, { "epoch": 0.5213782696177063, "grad_norm": 0.4086337685585022, "learning_rate": 9.83510807773665e-06, "loss": 0.4241, "step": 2073 }, { "epoch": 0.5216297786720322, "grad_norm": 0.3477274179458618, "learning_rate": 9.834735189838954e-06, "loss": 0.4019, "step": 2074 }, { "epoch": 0.5218812877263581, "grad_norm": 0.4180651605129242, "learning_rate": 9.834361887876436e-06, "loss": 0.4283, "step": 2075 }, { "epoch": 0.5221327967806841, "grad_norm": 0.4306004047393799, "learning_rate": 9.833988171881066e-06, "loss": 0.4239, "step": 2076 }, { "epoch": 0.52238430583501, "grad_norm": 0.38910388946533203, "learning_rate": 9.83361404188485e-06, "loss": 0.3697, "step": 2077 }, { "epoch": 0.522635814889336, "grad_norm": 0.49108606576919556, "learning_rate": 9.83323949791983e-06, "loss": 0.4168, "step": 2078 }, { "epoch": 0.522887323943662, "grad_norm": 0.42532941699028015, "learning_rate": 9.832864540018083e-06, "loss": 0.4159, "step": 2079 }, { "epoch": 0.5231388329979879, "grad_norm": 0.3561785817146301, "learning_rate": 9.832489168211723e-06, "loss": 0.3945, "step": 2080 }, { "epoch": 0.5233903420523138, "grad_norm": 0.3978264629840851, "learning_rate": 9.832113382532899e-06, "loss": 0.4115, "step": 2081 }, { "epoch": 0.5236418511066399, "grad_norm": 0.38779211044311523, "learning_rate": 9.831737183013792e-06, "loss": 0.3627, "step": 2082 }, { "epoch": 0.5238933601609658, "grad_norm": 0.3900914192199707, "learning_rate": 9.831360569686623e-06, "loss": 0.4184, "step": 2083 }, { "epoch": 0.5241448692152918, "grad_norm": 0.44345182180404663, "learning_rate": 9.830983542583647e-06, "loss": 0.4029, "step": 2084 }, { "epoch": 0.5243963782696177, "grad_norm": 0.3975575864315033, "learning_rate": 9.830606101737153e-06, "loss": 0.3758, "step": 2085 }, { "epoch": 0.5246478873239436, "grad_norm": 0.4029177129268646, "learning_rate": 9.830228247179465e-06, "loss": 0.4035, "step": 2086 }, { "epoch": 0.5248993963782697, "grad_norm": 0.44459268450737, "learning_rate": 9.829849978942948e-06, "loss": 0.4014, "step": 2087 }, { "epoch": 0.5251509054325956, "grad_norm": 0.39327511191368103, "learning_rate": 9.829471297059991e-06, "loss": 0.391, "step": 2088 }, { "epoch": 0.5254024144869215, "grad_norm": 0.37261277437210083, "learning_rate": 9.829092201563035e-06, "loss": 0.4013, "step": 2089 }, { "epoch": 0.5256539235412475, "grad_norm": 0.4421737790107727, "learning_rate": 9.828712692484541e-06, "loss": 0.3947, "step": 2090 }, { "epoch": 0.5259054325955734, "grad_norm": 0.35906025767326355, "learning_rate": 9.828332769857014e-06, "loss": 0.3556, "step": 2091 }, { "epoch": 0.5261569416498993, "grad_norm": 0.3702889084815979, "learning_rate": 9.82795243371299e-06, "loss": 0.4143, "step": 2092 }, { "epoch": 0.5264084507042254, "grad_norm": 0.41510313749313354, "learning_rate": 9.827571684085045e-06, "loss": 0.4048, "step": 2093 }, { "epoch": 0.5266599597585513, "grad_norm": 0.3920029103755951, "learning_rate": 9.827190521005786e-06, "loss": 0.4078, "step": 2094 }, { "epoch": 0.5269114688128773, "grad_norm": 0.4151366651058197, "learning_rate": 9.826808944507855e-06, "loss": 0.4112, "step": 2095 }, { "epoch": 0.5271629778672032, "grad_norm": 0.40691617131233215, "learning_rate": 9.826426954623937e-06, "loss": 0.3947, "step": 2096 }, { "epoch": 0.5274144869215291, "grad_norm": 0.3902684152126312, "learning_rate": 9.826044551386743e-06, "loss": 0.3756, "step": 2097 }, { "epoch": 0.5276659959758552, "grad_norm": 0.436042457818985, "learning_rate": 9.825661734829027e-06, "loss": 0.3787, "step": 2098 }, { "epoch": 0.5279175050301811, "grad_norm": 0.40818825364112854, "learning_rate": 9.825278504983571e-06, "loss": 0.408, "step": 2099 }, { "epoch": 0.528169014084507, "grad_norm": 0.4207872152328491, "learning_rate": 9.824894861883198e-06, "loss": 0.3696, "step": 2100 }, { "epoch": 0.528420523138833, "grad_norm": 0.40610066056251526, "learning_rate": 9.824510805560765e-06, "loss": 0.3853, "step": 2101 }, { "epoch": 0.528672032193159, "grad_norm": 0.4684671461582184, "learning_rate": 9.824126336049164e-06, "loss": 0.3864, "step": 2102 }, { "epoch": 0.5289235412474849, "grad_norm": 0.41243529319763184, "learning_rate": 9.823741453381322e-06, "loss": 0.4081, "step": 2103 }, { "epoch": 0.5291750503018109, "grad_norm": 0.3892309069633484, "learning_rate": 9.8233561575902e-06, "loss": 0.3777, "step": 2104 }, { "epoch": 0.5294265593561368, "grad_norm": 0.4003976285457611, "learning_rate": 9.822970448708799e-06, "loss": 0.3686, "step": 2105 }, { "epoch": 0.5296780684104627, "grad_norm": 0.38374191522598267, "learning_rate": 9.822584326770152e-06, "loss": 0.3977, "step": 2106 }, { "epoch": 0.5299295774647887, "grad_norm": 0.4218411147594452, "learning_rate": 9.822197791807328e-06, "loss": 0.3836, "step": 2107 }, { "epoch": 0.5301810865191147, "grad_norm": 0.4624592959880829, "learning_rate": 9.821810843853428e-06, "loss": 0.4268, "step": 2108 }, { "epoch": 0.5304325955734407, "grad_norm": 0.3895992040634155, "learning_rate": 9.821423482941597e-06, "loss": 0.3982, "step": 2109 }, { "epoch": 0.5306841046277666, "grad_norm": 0.4353783428668976, "learning_rate": 9.821035709105006e-06, "loss": 0.4131, "step": 2110 }, { "epoch": 0.5309356136820925, "grad_norm": 0.4408473074436188, "learning_rate": 9.820647522376868e-06, "loss": 0.3785, "step": 2111 }, { "epoch": 0.5311871227364185, "grad_norm": 0.39889922738075256, "learning_rate": 9.820258922790427e-06, "loss": 0.4074, "step": 2112 }, { "epoch": 0.5314386317907445, "grad_norm": 0.43087536096572876, "learning_rate": 9.819869910378964e-06, "loss": 0.3995, "step": 2113 }, { "epoch": 0.5316901408450704, "grad_norm": 0.3881208002567291, "learning_rate": 9.819480485175797e-06, "loss": 0.4023, "step": 2114 }, { "epoch": 0.5319416498993964, "grad_norm": 0.39766907691955566, "learning_rate": 9.819090647214277e-06, "loss": 0.42, "step": 2115 }, { "epoch": 0.5321931589537223, "grad_norm": 0.42352211475372314, "learning_rate": 9.818700396527791e-06, "loss": 0.3988, "step": 2116 }, { "epoch": 0.5324446680080482, "grad_norm": 0.48607903718948364, "learning_rate": 9.818309733149762e-06, "loss": 0.384, "step": 2117 }, { "epoch": 0.5326961770623743, "grad_norm": 0.38936519622802734, "learning_rate": 9.817918657113648e-06, "loss": 0.4082, "step": 2118 }, { "epoch": 0.5329476861167002, "grad_norm": 0.4536868929862976, "learning_rate": 9.81752716845294e-06, "loss": 0.399, "step": 2119 }, { "epoch": 0.5331991951710262, "grad_norm": 0.42506319284439087, "learning_rate": 9.81713526720117e-06, "loss": 0.3987, "step": 2120 }, { "epoch": 0.5334507042253521, "grad_norm": 0.42283493280410767, "learning_rate": 9.8167429533919e-06, "loss": 0.3804, "step": 2121 }, { "epoch": 0.533702213279678, "grad_norm": 0.4490463137626648, "learning_rate": 9.816350227058728e-06, "loss": 0.4026, "step": 2122 }, { "epoch": 0.5339537223340041, "grad_norm": 0.4041505455970764, "learning_rate": 9.815957088235293e-06, "loss": 0.3712, "step": 2123 }, { "epoch": 0.53420523138833, "grad_norm": 0.3451807200908661, "learning_rate": 9.81556353695526e-06, "loss": 0.376, "step": 2124 }, { "epoch": 0.5344567404426559, "grad_norm": 0.3851028382778168, "learning_rate": 9.815169573252336e-06, "loss": 0.3902, "step": 2125 }, { "epoch": 0.5347082494969819, "grad_norm": 0.38843706250190735, "learning_rate": 9.814775197160262e-06, "loss": 0.4208, "step": 2126 }, { "epoch": 0.5349597585513078, "grad_norm": 0.3881021738052368, "learning_rate": 9.814380408712813e-06, "loss": 0.4059, "step": 2127 }, { "epoch": 0.5352112676056338, "grad_norm": 0.33903956413269043, "learning_rate": 9.813985207943802e-06, "loss": 0.3739, "step": 2128 }, { "epoch": 0.5354627766599598, "grad_norm": 0.36936119198799133, "learning_rate": 9.813589594887074e-06, "loss": 0.416, "step": 2129 }, { "epoch": 0.5357142857142857, "grad_norm": 0.38683557510375977, "learning_rate": 9.81319356957651e-06, "loss": 0.3924, "step": 2130 }, { "epoch": 0.5359657947686117, "grad_norm": 0.3721410036087036, "learning_rate": 9.812797132046028e-06, "loss": 0.4107, "step": 2131 }, { "epoch": 0.5362173038229376, "grad_norm": 0.34902095794677734, "learning_rate": 9.812400282329579e-06, "loss": 0.3693, "step": 2132 }, { "epoch": 0.5364688128772636, "grad_norm": 0.39966878294944763, "learning_rate": 9.812003020461155e-06, "loss": 0.3925, "step": 2133 }, { "epoch": 0.5367203219315896, "grad_norm": 0.36834654211997986, "learning_rate": 9.811605346474775e-06, "loss": 0.3942, "step": 2134 }, { "epoch": 0.5369718309859155, "grad_norm": 0.37292787432670593, "learning_rate": 9.811207260404499e-06, "loss": 0.4042, "step": 2135 }, { "epoch": 0.5372233400402414, "grad_norm": 0.35834237933158875, "learning_rate": 9.810808762284419e-06, "loss": 0.3952, "step": 2136 }, { "epoch": 0.5374748490945674, "grad_norm": 0.40317603945732117, "learning_rate": 9.810409852148665e-06, "loss": 0.4013, "step": 2137 }, { "epoch": 0.5377263581488934, "grad_norm": 0.4264310598373413, "learning_rate": 9.8100105300314e-06, "loss": 0.4076, "step": 2138 }, { "epoch": 0.5379778672032193, "grad_norm": 0.42888686060905457, "learning_rate": 9.809610795966826e-06, "loss": 0.3646, "step": 2139 }, { "epoch": 0.5382293762575453, "grad_norm": 0.42549654841423035, "learning_rate": 9.809210649989175e-06, "loss": 0.4074, "step": 2140 }, { "epoch": 0.5384808853118712, "grad_norm": 0.4057452976703644, "learning_rate": 9.80881009213272e-06, "loss": 0.3669, "step": 2141 }, { "epoch": 0.5387323943661971, "grad_norm": 0.41238072514533997, "learning_rate": 9.808409122431764e-06, "loss": 0.401, "step": 2142 }, { "epoch": 0.5389839034205232, "grad_norm": 0.42542120814323425, "learning_rate": 9.808007740920647e-06, "loss": 0.3928, "step": 2143 }, { "epoch": 0.5392354124748491, "grad_norm": 0.37695184350013733, "learning_rate": 9.807605947633745e-06, "loss": 0.3742, "step": 2144 }, { "epoch": 0.5394869215291751, "grad_norm": 0.4471421539783478, "learning_rate": 9.807203742605472e-06, "loss": 0.3786, "step": 2145 }, { "epoch": 0.539738430583501, "grad_norm": 0.45084428787231445, "learning_rate": 9.80680112587027e-06, "loss": 0.3869, "step": 2146 }, { "epoch": 0.5399899396378269, "grad_norm": 0.37542101740837097, "learning_rate": 9.806398097462624e-06, "loss": 0.3566, "step": 2147 }, { "epoch": 0.540241448692153, "grad_norm": 0.4376560151576996, "learning_rate": 9.805994657417049e-06, "loss": 0.4074, "step": 2148 }, { "epoch": 0.5404929577464789, "grad_norm": 0.5008549094200134, "learning_rate": 9.8055908057681e-06, "loss": 0.3917, "step": 2149 }, { "epoch": 0.5407444668008048, "grad_norm": 0.37436121702194214, "learning_rate": 9.80518654255036e-06, "loss": 0.3788, "step": 2150 }, { "epoch": 0.5409959758551308, "grad_norm": 0.3661990463733673, "learning_rate": 9.804781867798454e-06, "loss": 0.3834, "step": 2151 }, { "epoch": 0.5412474849094567, "grad_norm": 0.45389556884765625, "learning_rate": 9.804376781547041e-06, "loss": 0.3688, "step": 2152 }, { "epoch": 0.5414989939637826, "grad_norm": 0.3825472593307495, "learning_rate": 9.80397128383081e-06, "loss": 0.3752, "step": 2153 }, { "epoch": 0.5417505030181087, "grad_norm": 0.3918491005897522, "learning_rate": 9.803565374684494e-06, "loss": 0.391, "step": 2154 }, { "epoch": 0.5420020120724346, "grad_norm": 0.3703051507472992, "learning_rate": 9.803159054142855e-06, "loss": 0.3866, "step": 2155 }, { "epoch": 0.5422535211267606, "grad_norm": 0.4313543140888214, "learning_rate": 9.802752322240692e-06, "loss": 0.4013, "step": 2156 }, { "epoch": 0.5425050301810865, "grad_norm": 0.3883597254753113, "learning_rate": 9.802345179012837e-06, "loss": 0.4049, "step": 2157 }, { "epoch": 0.5427565392354124, "grad_norm": 0.37119433283805847, "learning_rate": 9.801937624494161e-06, "loss": 0.3798, "step": 2158 }, { "epoch": 0.5430080482897385, "grad_norm": 0.3962958753108978, "learning_rate": 9.801529658719568e-06, "loss": 0.4036, "step": 2159 }, { "epoch": 0.5432595573440644, "grad_norm": 0.37826666235923767, "learning_rate": 9.801121281724e-06, "loss": 0.3931, "step": 2160 }, { "epoch": 0.5435110663983903, "grad_norm": 0.41150638461112976, "learning_rate": 9.800712493542428e-06, "loss": 0.3994, "step": 2161 }, { "epoch": 0.5437625754527163, "grad_norm": 0.3833552896976471, "learning_rate": 9.800303294209865e-06, "loss": 0.4038, "step": 2162 }, { "epoch": 0.5440140845070423, "grad_norm": 0.35805314779281616, "learning_rate": 9.799893683761355e-06, "loss": 0.3862, "step": 2163 }, { "epoch": 0.5442655935613682, "grad_norm": 0.39856329560279846, "learning_rate": 9.79948366223198e-06, "loss": 0.3888, "step": 2164 }, { "epoch": 0.5445171026156942, "grad_norm": 0.42184582352638245, "learning_rate": 9.799073229656853e-06, "loss": 0.3901, "step": 2165 }, { "epoch": 0.5447686116700201, "grad_norm": 0.36999794840812683, "learning_rate": 9.798662386071127e-06, "loss": 0.3794, "step": 2166 }, { "epoch": 0.545020120724346, "grad_norm": 0.36188700795173645, "learning_rate": 9.79825113150999e-06, "loss": 0.4028, "step": 2167 }, { "epoch": 0.545271629778672, "grad_norm": 0.39296042919158936, "learning_rate": 9.797839466008659e-06, "loss": 0.3667, "step": 2168 }, { "epoch": 0.545523138832998, "grad_norm": 0.38612136244773865, "learning_rate": 9.797427389602393e-06, "loss": 0.4027, "step": 2169 }, { "epoch": 0.545774647887324, "grad_norm": 0.3653174042701721, "learning_rate": 9.797014902326487e-06, "loss": 0.3901, "step": 2170 }, { "epoch": 0.5460261569416499, "grad_norm": 0.37336331605911255, "learning_rate": 9.796602004216261e-06, "loss": 0.3656, "step": 2171 }, { "epoch": 0.5462776659959758, "grad_norm": 0.3571634888648987, "learning_rate": 9.796188695307083e-06, "loss": 0.3877, "step": 2172 }, { "epoch": 0.5465291750503019, "grad_norm": 0.3366375267505646, "learning_rate": 9.795774975634347e-06, "loss": 0.3793, "step": 2173 }, { "epoch": 0.5467806841046278, "grad_norm": 0.41412922739982605, "learning_rate": 9.795360845233485e-06, "loss": 0.395, "step": 2174 }, { "epoch": 0.5470321931589537, "grad_norm": 0.3812969923019409, "learning_rate": 9.794946304139969e-06, "loss": 0.4144, "step": 2175 }, { "epoch": 0.5472837022132797, "grad_norm": 0.3604353368282318, "learning_rate": 9.794531352389298e-06, "loss": 0.3636, "step": 2176 }, { "epoch": 0.5475352112676056, "grad_norm": 0.3878379166126251, "learning_rate": 9.794115990017012e-06, "loss": 0.4151, "step": 2177 }, { "epoch": 0.5477867203219315, "grad_norm": 0.3887823820114136, "learning_rate": 9.793700217058683e-06, "loss": 0.4009, "step": 2178 }, { "epoch": 0.5480382293762576, "grad_norm": 0.37774476408958435, "learning_rate": 9.793284033549919e-06, "loss": 0.4105, "step": 2179 }, { "epoch": 0.5482897384305835, "grad_norm": 0.37441837787628174, "learning_rate": 9.792867439526366e-06, "loss": 0.4019, "step": 2180 }, { "epoch": 0.5485412474849095, "grad_norm": 0.37095755338668823, "learning_rate": 9.792450435023699e-06, "loss": 0.3687, "step": 2181 }, { "epoch": 0.5487927565392354, "grad_norm": 0.40394505858421326, "learning_rate": 9.792033020077634e-06, "loss": 0.4008, "step": 2182 }, { "epoch": 0.5490442655935613, "grad_norm": 0.38541585206985474, "learning_rate": 9.79161519472392e-06, "loss": 0.4145, "step": 2183 }, { "epoch": 0.5492957746478874, "grad_norm": 0.3427443504333496, "learning_rate": 9.79119695899834e-06, "loss": 0.3838, "step": 2184 }, { "epoch": 0.5495472837022133, "grad_norm": 0.3911786675453186, "learning_rate": 9.790778312936715e-06, "loss": 0.3893, "step": 2185 }, { "epoch": 0.5497987927565392, "grad_norm": 0.36838027834892273, "learning_rate": 9.790359256574899e-06, "loss": 0.3763, "step": 2186 }, { "epoch": 0.5500503018108652, "grad_norm": 0.3471270501613617, "learning_rate": 9.78993978994878e-06, "loss": 0.3924, "step": 2187 }, { "epoch": 0.5503018108651911, "grad_norm": 0.33406156301498413, "learning_rate": 9.789519913094286e-06, "loss": 0.3775, "step": 2188 }, { "epoch": 0.5505533199195171, "grad_norm": 0.36162400245666504, "learning_rate": 9.789099626047372e-06, "loss": 0.3834, "step": 2189 }, { "epoch": 0.5508048289738431, "grad_norm": 0.4071558713912964, "learning_rate": 9.788678928844036e-06, "loss": 0.3924, "step": 2190 }, { "epoch": 0.551056338028169, "grad_norm": 0.3942394256591797, "learning_rate": 9.788257821520308e-06, "loss": 0.3962, "step": 2191 }, { "epoch": 0.5513078470824949, "grad_norm": 0.41246795654296875, "learning_rate": 9.787836304112253e-06, "loss": 0.3822, "step": 2192 }, { "epoch": 0.5515593561368209, "grad_norm": 0.3739457130432129, "learning_rate": 9.78741437665597e-06, "loss": 0.3988, "step": 2193 }, { "epoch": 0.5518108651911469, "grad_norm": 0.3873920440673828, "learning_rate": 9.786992039187598e-06, "loss": 0.4142, "step": 2194 }, { "epoch": 0.5520623742454729, "grad_norm": 0.3712387979030609, "learning_rate": 9.786569291743305e-06, "loss": 0.3964, "step": 2195 }, { "epoch": 0.5523138832997988, "grad_norm": 0.4185509979724884, "learning_rate": 9.786146134359294e-06, "loss": 0.4275, "step": 2196 }, { "epoch": 0.5525653923541247, "grad_norm": 0.3870210647583008, "learning_rate": 9.785722567071811e-06, "loss": 0.4037, "step": 2197 }, { "epoch": 0.5528169014084507, "grad_norm": 0.39058247208595276, "learning_rate": 9.785298589917128e-06, "loss": 0.4133, "step": 2198 }, { "epoch": 0.5530684104627767, "grad_norm": 0.39365655183792114, "learning_rate": 9.784874202931558e-06, "loss": 0.4017, "step": 2199 }, { "epoch": 0.5533199195171026, "grad_norm": 0.44448891282081604, "learning_rate": 9.784449406151448e-06, "loss": 0.3863, "step": 2200 }, { "epoch": 0.5535714285714286, "grad_norm": 0.3912818133831024, "learning_rate": 9.784024199613176e-06, "loss": 0.3809, "step": 2201 }, { "epoch": 0.5538229376257545, "grad_norm": 0.3555707633495331, "learning_rate": 9.783598583353161e-06, "loss": 0.4174, "step": 2202 }, { "epoch": 0.5540744466800804, "grad_norm": 0.3489442765712738, "learning_rate": 9.783172557407852e-06, "loss": 0.4051, "step": 2203 }, { "epoch": 0.5543259557344065, "grad_norm": 0.4021318554878235, "learning_rate": 9.78274612181374e-06, "loss": 0.3971, "step": 2204 }, { "epoch": 0.5545774647887324, "grad_norm": 0.3958930969238281, "learning_rate": 9.78231927660734e-06, "loss": 0.408, "step": 2205 }, { "epoch": 0.5548289738430584, "grad_norm": 0.38725799322128296, "learning_rate": 9.781892021825215e-06, "loss": 0.4053, "step": 2206 }, { "epoch": 0.5550804828973843, "grad_norm": 0.3717680275440216, "learning_rate": 9.781464357503951e-06, "loss": 0.4101, "step": 2207 }, { "epoch": 0.5553319919517102, "grad_norm": 0.42192167043685913, "learning_rate": 9.781036283680179e-06, "loss": 0.3914, "step": 2208 }, { "epoch": 0.5555835010060363, "grad_norm": 0.3718080520629883, "learning_rate": 9.78060780039056e-06, "loss": 0.3633, "step": 2209 }, { "epoch": 0.5558350100603622, "grad_norm": 0.38646015524864197, "learning_rate": 9.780178907671788e-06, "loss": 0.3974, "step": 2210 }, { "epoch": 0.5560865191146881, "grad_norm": 0.36778998374938965, "learning_rate": 9.7797496055606e-06, "loss": 0.4049, "step": 2211 }, { "epoch": 0.5563380281690141, "grad_norm": 0.3933306336402893, "learning_rate": 9.779319894093759e-06, "loss": 0.4053, "step": 2212 }, { "epoch": 0.55658953722334, "grad_norm": 0.39641377329826355, "learning_rate": 9.778889773308069e-06, "loss": 0.4054, "step": 2213 }, { "epoch": 0.556841046277666, "grad_norm": 0.44454824924468994, "learning_rate": 9.778459243240365e-06, "loss": 0.4066, "step": 2214 }, { "epoch": 0.557092555331992, "grad_norm": 0.3420102894306183, "learning_rate": 9.778028303927522e-06, "loss": 0.3604, "step": 2215 }, { "epoch": 0.5573440643863179, "grad_norm": 0.4014604091644287, "learning_rate": 9.777596955406446e-06, "loss": 0.4214, "step": 2216 }, { "epoch": 0.5575955734406438, "grad_norm": 0.416327565908432, "learning_rate": 9.77716519771408e-06, "loss": 0.4317, "step": 2217 }, { "epoch": 0.5578470824949698, "grad_norm": 0.43000099062919617, "learning_rate": 9.7767330308874e-06, "loss": 0.4061, "step": 2218 }, { "epoch": 0.5580985915492958, "grad_norm": 0.3848433494567871, "learning_rate": 9.776300454963417e-06, "loss": 0.3885, "step": 2219 }, { "epoch": 0.5583501006036218, "grad_norm": 0.3921235203742981, "learning_rate": 9.775867469979184e-06, "loss": 0.4042, "step": 2220 }, { "epoch": 0.5586016096579477, "grad_norm": 0.3721983730792999, "learning_rate": 9.775434075971777e-06, "loss": 0.3859, "step": 2221 }, { "epoch": 0.5588531187122736, "grad_norm": 0.38795045018196106, "learning_rate": 9.77500027297832e-06, "loss": 0.3851, "step": 2222 }, { "epoch": 0.5591046277665996, "grad_norm": 0.3403348922729492, "learning_rate": 9.774566061035957e-06, "loss": 0.3788, "step": 2223 }, { "epoch": 0.5593561368209256, "grad_norm": 0.4166750907897949, "learning_rate": 9.774131440181884e-06, "loss": 0.3765, "step": 2224 }, { "epoch": 0.5596076458752515, "grad_norm": 0.4601471722126007, "learning_rate": 9.77369641045332e-06, "loss": 0.4047, "step": 2225 }, { "epoch": 0.5598591549295775, "grad_norm": 0.3640338182449341, "learning_rate": 9.77326097188752e-06, "loss": 0.4057, "step": 2226 }, { "epoch": 0.5601106639839034, "grad_norm": 0.49202394485473633, "learning_rate": 9.77282512452178e-06, "loss": 0.3838, "step": 2227 }, { "epoch": 0.5603621730382293, "grad_norm": 0.4419405162334442, "learning_rate": 9.77238886839343e-06, "loss": 0.4005, "step": 2228 }, { "epoch": 0.5606136820925554, "grad_norm": 0.43457552790641785, "learning_rate": 9.771952203539826e-06, "loss": 0.3764, "step": 2229 }, { "epoch": 0.5608651911468813, "grad_norm": 0.41830188035964966, "learning_rate": 9.77151512999837e-06, "loss": 0.3886, "step": 2230 }, { "epoch": 0.5611167002012073, "grad_norm": 0.42050424218177795, "learning_rate": 9.771077647806494e-06, "loss": 0.4007, "step": 2231 }, { "epoch": 0.5613682092555332, "grad_norm": 0.3640553057193756, "learning_rate": 9.770639757001665e-06, "loss": 0.4145, "step": 2232 }, { "epoch": 0.5616197183098591, "grad_norm": 0.3788280487060547, "learning_rate": 9.770201457621386e-06, "loss": 0.3933, "step": 2233 }, { "epoch": 0.5618712273641852, "grad_norm": 0.4044826924800873, "learning_rate": 9.769762749703194e-06, "loss": 0.3897, "step": 2234 }, { "epoch": 0.5621227364185111, "grad_norm": 0.3986685574054718, "learning_rate": 9.769323633284662e-06, "loss": 0.3978, "step": 2235 }, { "epoch": 0.562374245472837, "grad_norm": 0.3790046274662018, "learning_rate": 9.768884108403399e-06, "loss": 0.3759, "step": 2236 }, { "epoch": 0.562625754527163, "grad_norm": 0.382379412651062, "learning_rate": 9.768444175097043e-06, "loss": 0.3848, "step": 2237 }, { "epoch": 0.5628772635814889, "grad_norm": 0.3763681650161743, "learning_rate": 9.768003833403278e-06, "loss": 0.3983, "step": 2238 }, { "epoch": 0.5631287726358148, "grad_norm": 0.3796675503253937, "learning_rate": 9.767563083359812e-06, "loss": 0.4085, "step": 2239 }, { "epoch": 0.5633802816901409, "grad_norm": 0.40289655327796936, "learning_rate": 9.767121925004393e-06, "loss": 0.4021, "step": 2240 }, { "epoch": 0.5636317907444668, "grad_norm": 0.3628096878528595, "learning_rate": 9.766680358374805e-06, "loss": 0.3883, "step": 2241 }, { "epoch": 0.5638832997987927, "grad_norm": 0.3843015730381012, "learning_rate": 9.766238383508863e-06, "loss": 0.4002, "step": 2242 }, { "epoch": 0.5641348088531187, "grad_norm": 0.3902144432067871, "learning_rate": 9.765796000444423e-06, "loss": 0.3985, "step": 2243 }, { "epoch": 0.5643863179074446, "grad_norm": 0.3879857063293457, "learning_rate": 9.76535320921937e-06, "loss": 0.396, "step": 2244 }, { "epoch": 0.5646378269617707, "grad_norm": 0.38419634103775024, "learning_rate": 9.764910009871626e-06, "loss": 0.3882, "step": 2245 }, { "epoch": 0.5648893360160966, "grad_norm": 0.4009936451911926, "learning_rate": 9.76446640243915e-06, "loss": 0.3931, "step": 2246 }, { "epoch": 0.5651408450704225, "grad_norm": 0.35781916975975037, "learning_rate": 9.764022386959931e-06, "loss": 0.3602, "step": 2247 }, { "epoch": 0.5653923541247485, "grad_norm": 0.3593212366104126, "learning_rate": 9.763577963472e-06, "loss": 0.3984, "step": 2248 }, { "epoch": 0.5656438631790744, "grad_norm": 0.41306471824645996, "learning_rate": 9.763133132013415e-06, "loss": 0.4013, "step": 2249 }, { "epoch": 0.5658953722334004, "grad_norm": 0.4159875810146332, "learning_rate": 9.762687892622278e-06, "loss": 0.3889, "step": 2250 }, { "epoch": 0.5661468812877264, "grad_norm": 0.4286075532436371, "learning_rate": 9.762242245336718e-06, "loss": 0.418, "step": 2251 }, { "epoch": 0.5663983903420523, "grad_norm": 0.41156652569770813, "learning_rate": 9.7617961901949e-06, "loss": 0.4001, "step": 2252 }, { "epoch": 0.5666498993963782, "grad_norm": 0.4073083698749542, "learning_rate": 9.76134972723503e-06, "loss": 0.3782, "step": 2253 }, { "epoch": 0.5669014084507042, "grad_norm": 0.43342074751853943, "learning_rate": 9.76090285649534e-06, "loss": 0.4144, "step": 2254 }, { "epoch": 0.5671529175050302, "grad_norm": 0.37153980135917664, "learning_rate": 9.760455578014107e-06, "loss": 0.4146, "step": 2255 }, { "epoch": 0.5674044265593562, "grad_norm": 0.4138156473636627, "learning_rate": 9.760007891829635e-06, "loss": 0.411, "step": 2256 }, { "epoch": 0.5676559356136821, "grad_norm": 0.39025819301605225, "learning_rate": 9.759559797980265e-06, "loss": 0.3999, "step": 2257 }, { "epoch": 0.567907444668008, "grad_norm": 0.3810308873653412, "learning_rate": 9.759111296504374e-06, "loss": 0.3973, "step": 2258 }, { "epoch": 0.568158953722334, "grad_norm": 0.38749489188194275, "learning_rate": 9.758662387440374e-06, "loss": 0.3751, "step": 2259 }, { "epoch": 0.56841046277666, "grad_norm": 0.42117783427238464, "learning_rate": 9.75821307082671e-06, "loss": 0.3943, "step": 2260 }, { "epoch": 0.5686619718309859, "grad_norm": 0.38864991068840027, "learning_rate": 9.757763346701863e-06, "loss": 0.3931, "step": 2261 }, { "epoch": 0.5689134808853119, "grad_norm": 0.4100189805030823, "learning_rate": 9.757313215104352e-06, "loss": 0.4236, "step": 2262 }, { "epoch": 0.5691649899396378, "grad_norm": 0.36280521750450134, "learning_rate": 9.756862676072724e-06, "loss": 0.3773, "step": 2263 }, { "epoch": 0.5694164989939637, "grad_norm": 0.4067562222480774, "learning_rate": 9.756411729645567e-06, "loss": 0.388, "step": 2264 }, { "epoch": 0.5696680080482898, "grad_norm": 0.3898506164550781, "learning_rate": 9.755960375861502e-06, "loss": 0.3675, "step": 2265 }, { "epoch": 0.5699195171026157, "grad_norm": 0.38783085346221924, "learning_rate": 9.755508614759183e-06, "loss": 0.4199, "step": 2266 }, { "epoch": 0.5701710261569416, "grad_norm": 0.3513777256011963, "learning_rate": 9.755056446377302e-06, "loss": 0.3937, "step": 2267 }, { "epoch": 0.5704225352112676, "grad_norm": 0.39856287837028503, "learning_rate": 9.754603870754584e-06, "loss": 0.407, "step": 2268 }, { "epoch": 0.5706740442655935, "grad_norm": 0.39470675587654114, "learning_rate": 9.754150887929789e-06, "loss": 0.3876, "step": 2269 }, { "epoch": 0.5709255533199196, "grad_norm": 0.43347692489624023, "learning_rate": 9.753697497941713e-06, "loss": 0.4072, "step": 2270 }, { "epoch": 0.5711770623742455, "grad_norm": 0.41265973448753357, "learning_rate": 9.753243700829185e-06, "loss": 0.3898, "step": 2271 }, { "epoch": 0.5714285714285714, "grad_norm": 0.4033988416194916, "learning_rate": 9.75278949663107e-06, "loss": 0.3958, "step": 2272 }, { "epoch": 0.5716800804828974, "grad_norm": 0.4230313301086426, "learning_rate": 9.752334885386268e-06, "loss": 0.3657, "step": 2273 }, { "epoch": 0.5719315895372233, "grad_norm": 0.4359712600708008, "learning_rate": 9.751879867133713e-06, "loss": 0.4302, "step": 2274 }, { "epoch": 0.5721830985915493, "grad_norm": 0.3776133060455322, "learning_rate": 9.751424441912376e-06, "loss": 0.3786, "step": 2275 }, { "epoch": 0.5724346076458753, "grad_norm": 0.39019957184791565, "learning_rate": 9.75096860976126e-06, "loss": 0.3824, "step": 2276 }, { "epoch": 0.5726861167002012, "grad_norm": 0.38191524147987366, "learning_rate": 9.750512370719404e-06, "loss": 0.4106, "step": 2277 }, { "epoch": 0.5729376257545271, "grad_norm": 0.35566771030426025, "learning_rate": 9.750055724825885e-06, "loss": 0.387, "step": 2278 }, { "epoch": 0.5731891348088531, "grad_norm": 0.37126556038856506, "learning_rate": 9.749598672119807e-06, "loss": 0.4093, "step": 2279 }, { "epoch": 0.5734406438631791, "grad_norm": 0.3403175473213196, "learning_rate": 9.749141212640317e-06, "loss": 0.3993, "step": 2280 }, { "epoch": 0.5736921529175051, "grad_norm": 0.37190091609954834, "learning_rate": 9.748683346426591e-06, "loss": 0.3956, "step": 2281 }, { "epoch": 0.573943661971831, "grad_norm": 0.3574543595314026, "learning_rate": 9.748225073517845e-06, "loss": 0.4014, "step": 2282 }, { "epoch": 0.5741951710261569, "grad_norm": 0.4083276689052582, "learning_rate": 9.747766393953327e-06, "loss": 0.424, "step": 2283 }, { "epoch": 0.5744466800804829, "grad_norm": 0.35851186513900757, "learning_rate": 9.74730730777232e-06, "loss": 0.3974, "step": 2284 }, { "epoch": 0.5746981891348089, "grad_norm": 0.3459572494029999, "learning_rate": 9.74684781501414e-06, "loss": 0.384, "step": 2285 }, { "epoch": 0.5749496981891348, "grad_norm": 0.38862690329551697, "learning_rate": 9.746387915718139e-06, "loss": 0.4025, "step": 2286 }, { "epoch": 0.5752012072434608, "grad_norm": 0.3741775155067444, "learning_rate": 9.745927609923709e-06, "loss": 0.3985, "step": 2287 }, { "epoch": 0.5754527162977867, "grad_norm": 0.33961108326911926, "learning_rate": 9.745466897670268e-06, "loss": 0.3606, "step": 2288 }, { "epoch": 0.5757042253521126, "grad_norm": 0.3567553460597992, "learning_rate": 9.745005778997277e-06, "loss": 0.3874, "step": 2289 }, { "epoch": 0.5759557344064387, "grad_norm": 0.4063310921192169, "learning_rate": 9.744544253944223e-06, "loss": 0.385, "step": 2290 }, { "epoch": 0.5762072434607646, "grad_norm": 0.3446536660194397, "learning_rate": 9.744082322550637e-06, "loss": 0.4224, "step": 2291 }, { "epoch": 0.5764587525150905, "grad_norm": 0.3623557984828949, "learning_rate": 9.743619984856078e-06, "loss": 0.3903, "step": 2292 }, { "epoch": 0.5767102615694165, "grad_norm": 0.37968283891677856, "learning_rate": 9.743157240900145e-06, "loss": 0.404, "step": 2293 }, { "epoch": 0.5769617706237424, "grad_norm": 0.37783414125442505, "learning_rate": 9.742694090722466e-06, "loss": 0.3811, "step": 2294 }, { "epoch": 0.5772132796780685, "grad_norm": 0.36868754029273987, "learning_rate": 9.742230534362708e-06, "loss": 0.3869, "step": 2295 }, { "epoch": 0.5774647887323944, "grad_norm": 0.4327180087566376, "learning_rate": 9.741766571860573e-06, "loss": 0.399, "step": 2296 }, { "epoch": 0.5777162977867203, "grad_norm": 0.4020644724369049, "learning_rate": 9.741302203255796e-06, "loss": 0.4062, "step": 2297 }, { "epoch": 0.5779678068410463, "grad_norm": 0.3767627477645874, "learning_rate": 9.740837428588147e-06, "loss": 0.3712, "step": 2298 }, { "epoch": 0.5782193158953722, "grad_norm": 0.428296834230423, "learning_rate": 9.740372247897429e-06, "loss": 0.3948, "step": 2299 }, { "epoch": 0.5784708249496981, "grad_norm": 0.3837886154651642, "learning_rate": 9.739906661223485e-06, "loss": 0.3894, "step": 2300 }, { "epoch": 0.5787223340040242, "grad_norm": 0.38341113924980164, "learning_rate": 9.739440668606188e-06, "loss": 0.3709, "step": 2301 }, { "epoch": 0.5789738430583501, "grad_norm": 0.4215813875198364, "learning_rate": 9.738974270085447e-06, "loss": 0.3978, "step": 2302 }, { "epoch": 0.579225352112676, "grad_norm": 0.4352063834667206, "learning_rate": 9.738507465701207e-06, "loss": 0.3826, "step": 2303 }, { "epoch": 0.579476861167002, "grad_norm": 0.36459967494010925, "learning_rate": 9.738040255493446e-06, "loss": 0.3813, "step": 2304 }, { "epoch": 0.579728370221328, "grad_norm": 0.37163200974464417, "learning_rate": 9.737572639502179e-06, "loss": 0.3872, "step": 2305 }, { "epoch": 0.579979879275654, "grad_norm": 0.4496228098869324, "learning_rate": 9.737104617767454e-06, "loss": 0.4004, "step": 2306 }, { "epoch": 0.5802313883299799, "grad_norm": 0.4434552490711212, "learning_rate": 9.736636190329355e-06, "loss": 0.3779, "step": 2307 }, { "epoch": 0.5804828973843058, "grad_norm": 0.3943467438220978, "learning_rate": 9.736167357227995e-06, "loss": 0.3992, "step": 2308 }, { "epoch": 0.5807344064386318, "grad_norm": 0.41185152530670166, "learning_rate": 9.735698118503531e-06, "loss": 0.3785, "step": 2309 }, { "epoch": 0.5809859154929577, "grad_norm": 0.4015391170978546, "learning_rate": 9.735228474196152e-06, "loss": 0.399, "step": 2310 }, { "epoch": 0.5812374245472837, "grad_norm": 0.43395721912384033, "learning_rate": 9.734758424346075e-06, "loss": 0.3948, "step": 2311 }, { "epoch": 0.5814889336016097, "grad_norm": 0.41467130184173584, "learning_rate": 9.734287968993561e-06, "loss": 0.4115, "step": 2312 }, { "epoch": 0.5817404426559356, "grad_norm": 0.4236811697483063, "learning_rate": 9.7338171081789e-06, "loss": 0.4043, "step": 2313 }, { "epoch": 0.5819919517102615, "grad_norm": 0.38736096024513245, "learning_rate": 9.733345841942418e-06, "loss": 0.3729, "step": 2314 }, { "epoch": 0.5822434607645876, "grad_norm": 0.37845367193222046, "learning_rate": 9.732874170324479e-06, "loss": 0.4106, "step": 2315 }, { "epoch": 0.5824949698189135, "grad_norm": 0.35245969891548157, "learning_rate": 9.732402093365471e-06, "loss": 0.4144, "step": 2316 }, { "epoch": 0.5827464788732394, "grad_norm": 0.39204463362693787, "learning_rate": 9.731929611105833e-06, "loss": 0.4053, "step": 2317 }, { "epoch": 0.5829979879275654, "grad_norm": 0.389548122882843, "learning_rate": 9.731456723586026e-06, "loss": 0.4192, "step": 2318 }, { "epoch": 0.5832494969818913, "grad_norm": 0.38718488812446594, "learning_rate": 9.73098343084655e-06, "loss": 0.3687, "step": 2319 }, { "epoch": 0.5835010060362174, "grad_norm": 0.409236341714859, "learning_rate": 9.73050973292794e-06, "loss": 0.3934, "step": 2320 }, { "epoch": 0.5837525150905433, "grad_norm": 0.41710254549980164, "learning_rate": 9.730035629870766e-06, "loss": 0.375, "step": 2321 }, { "epoch": 0.5840040241448692, "grad_norm": 0.421811580657959, "learning_rate": 9.729561121715632e-06, "loss": 0.3895, "step": 2322 }, { "epoch": 0.5842555331991952, "grad_norm": 0.39268958568573, "learning_rate": 9.729086208503174e-06, "loss": 0.3992, "step": 2323 }, { "epoch": 0.5845070422535211, "grad_norm": 0.3878178298473358, "learning_rate": 9.728610890274068e-06, "loss": 0.3971, "step": 2324 }, { "epoch": 0.584758551307847, "grad_norm": 0.4158124029636383, "learning_rate": 9.728135167069022e-06, "loss": 0.3639, "step": 2325 }, { "epoch": 0.5850100603621731, "grad_norm": 0.3816595673561096, "learning_rate": 9.727659038928778e-06, "loss": 0.3709, "step": 2326 }, { "epoch": 0.585261569416499, "grad_norm": 0.40008431673049927, "learning_rate": 9.727182505894112e-06, "loss": 0.3762, "step": 2327 }, { "epoch": 0.5855130784708249, "grad_norm": 0.40834906697273254, "learning_rate": 9.72670556800584e-06, "loss": 0.3712, "step": 2328 }, { "epoch": 0.5857645875251509, "grad_norm": 0.37313729524612427, "learning_rate": 9.726228225304806e-06, "loss": 0.3801, "step": 2329 }, { "epoch": 0.5860160965794768, "grad_norm": 0.4533138871192932, "learning_rate": 9.72575047783189e-06, "loss": 0.4047, "step": 2330 }, { "epoch": 0.5862676056338029, "grad_norm": 0.3437679708003998, "learning_rate": 9.72527232562801e-06, "loss": 0.4038, "step": 2331 }, { "epoch": 0.5865191146881288, "grad_norm": 0.45910757780075073, "learning_rate": 9.724793768734117e-06, "loss": 0.4086, "step": 2332 }, { "epoch": 0.5867706237424547, "grad_norm": 0.41133296489715576, "learning_rate": 9.724314807191197e-06, "loss": 0.386, "step": 2333 }, { "epoch": 0.5870221327967807, "grad_norm": 0.39628586173057556, "learning_rate": 9.723835441040268e-06, "loss": 0.4064, "step": 2334 }, { "epoch": 0.5872736418511066, "grad_norm": 0.4965059459209442, "learning_rate": 9.723355670322385e-06, "loss": 0.416, "step": 2335 }, { "epoch": 0.5875251509054326, "grad_norm": 0.4447380304336548, "learning_rate": 9.722875495078638e-06, "loss": 0.4007, "step": 2336 }, { "epoch": 0.5877766599597586, "grad_norm": 0.3534230887889862, "learning_rate": 9.722394915350153e-06, "loss": 0.3665, "step": 2337 }, { "epoch": 0.5880281690140845, "grad_norm": 0.5113065838813782, "learning_rate": 9.721913931178084e-06, "loss": 0.3697, "step": 2338 }, { "epoch": 0.5882796780684104, "grad_norm": 0.37465915083885193, "learning_rate": 9.72143254260363e-06, "loss": 0.3943, "step": 2339 }, { "epoch": 0.5885311871227364, "grad_norm": 0.4188118278980255, "learning_rate": 9.720950749668013e-06, "loss": 0.3736, "step": 2340 }, { "epoch": 0.5887826961770624, "grad_norm": 0.4283120036125183, "learning_rate": 9.720468552412501e-06, "loss": 0.3916, "step": 2341 }, { "epoch": 0.5890342052313883, "grad_norm": 0.4219818115234375, "learning_rate": 9.719985950878386e-06, "loss": 0.37, "step": 2342 }, { "epoch": 0.5892857142857143, "grad_norm": 0.41337502002716064, "learning_rate": 9.719502945107004e-06, "loss": 0.3874, "step": 2343 }, { "epoch": 0.5895372233400402, "grad_norm": 0.3993411660194397, "learning_rate": 9.71901953513972e-06, "loss": 0.3988, "step": 2344 }, { "epoch": 0.5897887323943662, "grad_norm": 0.4108680188655853, "learning_rate": 9.718535721017936e-06, "loss": 0.4016, "step": 2345 }, { "epoch": 0.5900402414486922, "grad_norm": 0.4107624888420105, "learning_rate": 9.718051502783084e-06, "loss": 0.3762, "step": 2346 }, { "epoch": 0.5902917505030181, "grad_norm": 0.4297145903110504, "learning_rate": 9.717566880476639e-06, "loss": 0.3873, "step": 2347 }, { "epoch": 0.5905432595573441, "grad_norm": 0.42795687913894653, "learning_rate": 9.717081854140103e-06, "loss": 0.4027, "step": 2348 }, { "epoch": 0.59079476861167, "grad_norm": 0.42112451791763306, "learning_rate": 9.716596423815016e-06, "loss": 0.4161, "step": 2349 }, { "epoch": 0.5910462776659959, "grad_norm": 0.46127063035964966, "learning_rate": 9.716110589542952e-06, "loss": 0.3833, "step": 2350 }, { "epoch": 0.591297786720322, "grad_norm": 0.4687446355819702, "learning_rate": 9.71562435136552e-06, "loss": 0.391, "step": 2351 }, { "epoch": 0.5915492957746479, "grad_norm": 0.4273424446582794, "learning_rate": 9.715137709324363e-06, "loss": 0.3899, "step": 2352 }, { "epoch": 0.5918008048289738, "grad_norm": 0.4016727805137634, "learning_rate": 9.71465066346116e-06, "loss": 0.3874, "step": 2353 }, { "epoch": 0.5920523138832998, "grad_norm": 0.4089299440383911, "learning_rate": 9.714163213817621e-06, "loss": 0.3946, "step": 2354 }, { "epoch": 0.5923038229376257, "grad_norm": 0.3954598903656006, "learning_rate": 9.713675360435495e-06, "loss": 0.393, "step": 2355 }, { "epoch": 0.5925553319919518, "grad_norm": 0.42446476221084595, "learning_rate": 9.713187103356563e-06, "loss": 0.3604, "step": 2356 }, { "epoch": 0.5928068410462777, "grad_norm": 0.461434543132782, "learning_rate": 9.71269844262264e-06, "loss": 0.3915, "step": 2357 }, { "epoch": 0.5930583501006036, "grad_norm": 0.3812006711959839, "learning_rate": 9.712209378275581e-06, "loss": 0.3874, "step": 2358 }, { "epoch": 0.5933098591549296, "grad_norm": 0.44694066047668457, "learning_rate": 9.711719910357267e-06, "loss": 0.3899, "step": 2359 }, { "epoch": 0.5935613682092555, "grad_norm": 0.39847058057785034, "learning_rate": 9.711230038909619e-06, "loss": 0.4042, "step": 2360 }, { "epoch": 0.5938128772635815, "grad_norm": 0.3936542570590973, "learning_rate": 9.71073976397459e-06, "loss": 0.4044, "step": 2361 }, { "epoch": 0.5940643863179075, "grad_norm": 0.38293659687042236, "learning_rate": 9.710249085594171e-06, "loss": 0.3467, "step": 2362 }, { "epoch": 0.5943158953722334, "grad_norm": 0.4120056927204132, "learning_rate": 9.709758003810388e-06, "loss": 0.4156, "step": 2363 }, { "epoch": 0.5945674044265593, "grad_norm": 0.40175196528434753, "learning_rate": 9.709266518665293e-06, "loss": 0.3832, "step": 2364 }, { "epoch": 0.5948189134808853, "grad_norm": 0.3934299051761627, "learning_rate": 9.708774630200983e-06, "loss": 0.3808, "step": 2365 }, { "epoch": 0.5950704225352113, "grad_norm": 0.3721473515033722, "learning_rate": 9.708282338459582e-06, "loss": 0.3742, "step": 2366 }, { "epoch": 0.5953219315895373, "grad_norm": 0.34081289172172546, "learning_rate": 9.707789643483256e-06, "loss": 0.4101, "step": 2367 }, { "epoch": 0.5955734406438632, "grad_norm": 0.36988210678100586, "learning_rate": 9.707296545314197e-06, "loss": 0.4075, "step": 2368 }, { "epoch": 0.5958249496981891, "grad_norm": 0.3822750747203827, "learning_rate": 9.70680304399464e-06, "loss": 0.3974, "step": 2369 }, { "epoch": 0.5960764587525151, "grad_norm": 0.3905336856842041, "learning_rate": 9.706309139566847e-06, "loss": 0.3861, "step": 2370 }, { "epoch": 0.596327967806841, "grad_norm": 0.3887244760990143, "learning_rate": 9.705814832073118e-06, "loss": 0.3831, "step": 2371 }, { "epoch": 0.596579476861167, "grad_norm": 0.38636234402656555, "learning_rate": 9.705320121555789e-06, "loss": 0.3969, "step": 2372 }, { "epoch": 0.596830985915493, "grad_norm": 0.3817340135574341, "learning_rate": 9.704825008057229e-06, "loss": 0.3942, "step": 2373 }, { "epoch": 0.5970824949698189, "grad_norm": 0.41212648153305054, "learning_rate": 9.704329491619837e-06, "loss": 0.3865, "step": 2374 }, { "epoch": 0.5973340040241448, "grad_norm": 0.3645521402359009, "learning_rate": 9.703833572286056e-06, "loss": 0.389, "step": 2375 }, { "epoch": 0.5975855130784709, "grad_norm": 0.4224928319454193, "learning_rate": 9.703337250098357e-06, "loss": 0.3855, "step": 2376 }, { "epoch": 0.5978370221327968, "grad_norm": 0.40865787863731384, "learning_rate": 9.702840525099247e-06, "loss": 0.3671, "step": 2377 }, { "epoch": 0.5980885311871227, "grad_norm": 0.348753422498703, "learning_rate": 9.702343397331266e-06, "loss": 0.3635, "step": 2378 }, { "epoch": 0.5983400402414487, "grad_norm": 0.4527108669281006, "learning_rate": 9.701845866836992e-06, "loss": 0.4217, "step": 2379 }, { "epoch": 0.5985915492957746, "grad_norm": 0.4157269597053528, "learning_rate": 9.70134793365903e-06, "loss": 0.3824, "step": 2380 }, { "epoch": 0.5988430583501007, "grad_norm": 0.35614490509033203, "learning_rate": 9.700849597840035e-06, "loss": 0.4004, "step": 2381 }, { "epoch": 0.5990945674044266, "grad_norm": 0.41033607721328735, "learning_rate": 9.700350859422675e-06, "loss": 0.3927, "step": 2382 }, { "epoch": 0.5993460764587525, "grad_norm": 0.380023717880249, "learning_rate": 9.699851718449672e-06, "loss": 0.4076, "step": 2383 }, { "epoch": 0.5995975855130785, "grad_norm": 0.3968260586261749, "learning_rate": 9.699352174963772e-06, "loss": 0.3977, "step": 2384 }, { "epoch": 0.5998490945674044, "grad_norm": 0.3838002383708954, "learning_rate": 9.698852229007756e-06, "loss": 0.401, "step": 2385 }, { "epoch": 0.6001006036217303, "grad_norm": 0.4010978043079376, "learning_rate": 9.698351880624444e-06, "loss": 0.3975, "step": 2386 }, { "epoch": 0.6003521126760564, "grad_norm": 0.4017295241355896, "learning_rate": 9.697851129856687e-06, "loss": 0.3984, "step": 2387 }, { "epoch": 0.6006036217303823, "grad_norm": 0.4557097554206848, "learning_rate": 9.697349976747366e-06, "loss": 0.388, "step": 2388 }, { "epoch": 0.6008551307847082, "grad_norm": 0.3855472803115845, "learning_rate": 9.696848421339409e-06, "loss": 0.4105, "step": 2389 }, { "epoch": 0.6011066398390342, "grad_norm": 0.37822774052619934, "learning_rate": 9.696346463675767e-06, "loss": 0.3676, "step": 2390 }, { "epoch": 0.6013581488933601, "grad_norm": 0.41425397992134094, "learning_rate": 9.695844103799432e-06, "loss": 0.3996, "step": 2391 }, { "epoch": 0.6016096579476862, "grad_norm": 0.3729078769683838, "learning_rate": 9.695341341753426e-06, "loss": 0.3881, "step": 2392 }, { "epoch": 0.6018611670020121, "grad_norm": 0.41532474756240845, "learning_rate": 9.69483817758081e-06, "loss": 0.397, "step": 2393 }, { "epoch": 0.602112676056338, "grad_norm": 0.389249324798584, "learning_rate": 9.694334611324672e-06, "loss": 0.3902, "step": 2394 }, { "epoch": 0.602364185110664, "grad_norm": 0.3921426236629486, "learning_rate": 9.693830643028142e-06, "loss": 0.4204, "step": 2395 }, { "epoch": 0.60261569416499, "grad_norm": 0.3730478584766388, "learning_rate": 9.693326272734384e-06, "loss": 0.384, "step": 2396 }, { "epoch": 0.6028672032193159, "grad_norm": 0.3994380831718445, "learning_rate": 9.692821500486592e-06, "loss": 0.4142, "step": 2397 }, { "epoch": 0.6031187122736419, "grad_norm": 0.3791671395301819, "learning_rate": 9.692316326327995e-06, "loss": 0.3905, "step": 2398 }, { "epoch": 0.6033702213279678, "grad_norm": 0.3484344780445099, "learning_rate": 9.69181075030186e-06, "loss": 0.4205, "step": 2399 }, { "epoch": 0.6036217303822937, "grad_norm": 0.3879513144493103, "learning_rate": 9.691304772451487e-06, "loss": 0.3693, "step": 2400 }, { "epoch": 0.6038732394366197, "grad_norm": 0.4011459946632385, "learning_rate": 9.690798392820208e-06, "loss": 0.4037, "step": 2401 }, { "epoch": 0.6041247484909457, "grad_norm": 0.3635562062263489, "learning_rate": 9.690291611451394e-06, "loss": 0.391, "step": 2402 }, { "epoch": 0.6043762575452716, "grad_norm": 0.37556272745132446, "learning_rate": 9.689784428388444e-06, "loss": 0.4034, "step": 2403 }, { "epoch": 0.6046277665995976, "grad_norm": 0.38869237899780273, "learning_rate": 9.689276843674797e-06, "loss": 0.3951, "step": 2404 }, { "epoch": 0.6048792756539235, "grad_norm": 0.41964560747146606, "learning_rate": 9.688768857353925e-06, "loss": 0.4044, "step": 2405 }, { "epoch": 0.6051307847082495, "grad_norm": 0.40390586853027344, "learning_rate": 9.688260469469333e-06, "loss": 0.4318, "step": 2406 }, { "epoch": 0.6053822937625755, "grad_norm": 0.38943222165107727, "learning_rate": 9.687751680064562e-06, "loss": 0.4171, "step": 2407 }, { "epoch": 0.6056338028169014, "grad_norm": 0.37889039516448975, "learning_rate": 9.687242489183187e-06, "loss": 0.3723, "step": 2408 }, { "epoch": 0.6058853118712274, "grad_norm": 0.3625709116458893, "learning_rate": 9.686732896868814e-06, "loss": 0.3664, "step": 2409 }, { "epoch": 0.6061368209255533, "grad_norm": 0.37902170419692993, "learning_rate": 9.68622290316509e-06, "loss": 0.3782, "step": 2410 }, { "epoch": 0.6063883299798792, "grad_norm": 0.35425660014152527, "learning_rate": 9.68571250811569e-06, "loss": 0.3889, "step": 2411 }, { "epoch": 0.6066398390342053, "grad_norm": 0.3910084068775177, "learning_rate": 9.685201711764328e-06, "loss": 0.3994, "step": 2412 }, { "epoch": 0.6068913480885312, "grad_norm": 0.3474578261375427, "learning_rate": 9.68469051415475e-06, "loss": 0.3665, "step": 2413 }, { "epoch": 0.6071428571428571, "grad_norm": 0.37786930799484253, "learning_rate": 9.684178915330736e-06, "loss": 0.4192, "step": 2414 }, { "epoch": 0.6073943661971831, "grad_norm": 0.34964871406555176, "learning_rate": 9.683666915336102e-06, "loss": 0.3848, "step": 2415 }, { "epoch": 0.607645875251509, "grad_norm": 0.37684234976768494, "learning_rate": 9.683154514214698e-06, "loss": 0.3607, "step": 2416 }, { "epoch": 0.6078973843058351, "grad_norm": 0.41027969121932983, "learning_rate": 9.68264171201041e-06, "loss": 0.4109, "step": 2417 }, { "epoch": 0.608148893360161, "grad_norm": 0.3731859028339386, "learning_rate": 9.682128508767151e-06, "loss": 0.4011, "step": 2418 }, { "epoch": 0.6084004024144869, "grad_norm": 0.3846946954727173, "learning_rate": 9.681614904528877e-06, "loss": 0.3984, "step": 2419 }, { "epoch": 0.6086519114688129, "grad_norm": 0.378128319978714, "learning_rate": 9.681100899339574e-06, "loss": 0.396, "step": 2420 }, { "epoch": 0.6089034205231388, "grad_norm": 0.39976274967193604, "learning_rate": 9.680586493243265e-06, "loss": 0.3737, "step": 2421 }, { "epoch": 0.6091549295774648, "grad_norm": 0.38559094071388245, "learning_rate": 9.680071686284005e-06, "loss": 0.385, "step": 2422 }, { "epoch": 0.6094064386317908, "grad_norm": 0.43395066261291504, "learning_rate": 9.679556478505882e-06, "loss": 0.3969, "step": 2423 }, { "epoch": 0.6096579476861167, "grad_norm": 0.3740188181400299, "learning_rate": 9.679040869953023e-06, "loss": 0.3783, "step": 2424 }, { "epoch": 0.6099094567404426, "grad_norm": 0.38674283027648926, "learning_rate": 9.678524860669584e-06, "loss": 0.4026, "step": 2425 }, { "epoch": 0.6101609657947686, "grad_norm": 0.399495929479599, "learning_rate": 9.678008450699761e-06, "loss": 0.3948, "step": 2426 }, { "epoch": 0.6104124748490946, "grad_norm": 0.390152245759964, "learning_rate": 9.677491640087779e-06, "loss": 0.3892, "step": 2427 }, { "epoch": 0.6106639839034205, "grad_norm": 0.36457139253616333, "learning_rate": 9.6769744288779e-06, "loss": 0.3923, "step": 2428 }, { "epoch": 0.6109154929577465, "grad_norm": 0.3784310519695282, "learning_rate": 9.676456817114423e-06, "loss": 0.3828, "step": 2429 }, { "epoch": 0.6111670020120724, "grad_norm": 0.41220057010650635, "learning_rate": 9.675938804841673e-06, "loss": 0.4132, "step": 2430 }, { "epoch": 0.6114185110663984, "grad_norm": 0.3676017224788666, "learning_rate": 9.675420392104016e-06, "loss": 0.3921, "step": 2431 }, { "epoch": 0.6116700201207244, "grad_norm": 0.3908953070640564, "learning_rate": 9.674901578945853e-06, "loss": 0.3877, "step": 2432 }, { "epoch": 0.6119215291750503, "grad_norm": 0.38477277755737305, "learning_rate": 9.674382365411617e-06, "loss": 0.3855, "step": 2433 }, { "epoch": 0.6121730382293763, "grad_norm": 0.40748539566993713, "learning_rate": 9.673862751545773e-06, "loss": 0.3879, "step": 2434 }, { "epoch": 0.6124245472837022, "grad_norm": 0.389136403799057, "learning_rate": 9.673342737392824e-06, "loss": 0.4344, "step": 2435 }, { "epoch": 0.6126760563380281, "grad_norm": 0.3874034583568573, "learning_rate": 9.672822322997305e-06, "loss": 0.4105, "step": 2436 }, { "epoch": 0.6129275653923542, "grad_norm": 0.3671362102031708, "learning_rate": 9.672301508403788e-06, "loss": 0.383, "step": 2437 }, { "epoch": 0.6131790744466801, "grad_norm": 0.35450032353401184, "learning_rate": 9.671780293656876e-06, "loss": 0.3807, "step": 2438 }, { "epoch": 0.613430583501006, "grad_norm": 0.4165121018886566, "learning_rate": 9.67125867880121e-06, "loss": 0.3878, "step": 2439 }, { "epoch": 0.613682092555332, "grad_norm": 0.40714791417121887, "learning_rate": 9.67073666388146e-06, "loss": 0.4076, "step": 2440 }, { "epoch": 0.6139336016096579, "grad_norm": 0.42955923080444336, "learning_rate": 9.670214248942335e-06, "loss": 0.4108, "step": 2441 }, { "epoch": 0.614185110663984, "grad_norm": 0.38127392530441284, "learning_rate": 9.669691434028576e-06, "loss": 0.4032, "step": 2442 }, { "epoch": 0.6144366197183099, "grad_norm": 0.4305511713027954, "learning_rate": 9.66916821918496e-06, "loss": 0.3918, "step": 2443 }, { "epoch": 0.6146881287726358, "grad_norm": 0.45002248883247375, "learning_rate": 9.668644604456297e-06, "loss": 0.3847, "step": 2444 }, { "epoch": 0.6149396378269618, "grad_norm": 0.34926843643188477, "learning_rate": 9.668120589887429e-06, "loss": 0.3919, "step": 2445 }, { "epoch": 0.6151911468812877, "grad_norm": 0.39543116092681885, "learning_rate": 9.667596175523237e-06, "loss": 0.3568, "step": 2446 }, { "epoch": 0.6154426559356136, "grad_norm": 0.3891063630580902, "learning_rate": 9.667071361408633e-06, "loss": 0.3759, "step": 2447 }, { "epoch": 0.6156941649899397, "grad_norm": 0.3981350064277649, "learning_rate": 9.666546147588563e-06, "loss": 0.3784, "step": 2448 }, { "epoch": 0.6159456740442656, "grad_norm": 0.3683044910430908, "learning_rate": 9.666020534108009e-06, "loss": 0.3846, "step": 2449 }, { "epoch": 0.6161971830985915, "grad_norm": 0.423318088054657, "learning_rate": 9.665494521011988e-06, "loss": 0.3747, "step": 2450 }, { "epoch": 0.6164486921529175, "grad_norm": 0.3727003037929535, "learning_rate": 9.664968108345549e-06, "loss": 0.3517, "step": 2451 }, { "epoch": 0.6167002012072434, "grad_norm": 0.3899252414703369, "learning_rate": 9.664441296153775e-06, "loss": 0.3964, "step": 2452 }, { "epoch": 0.6169517102615694, "grad_norm": 0.415444940328598, "learning_rate": 9.663914084481784e-06, "loss": 0.3811, "step": 2453 }, { "epoch": 0.6172032193158954, "grad_norm": 0.4385893940925598, "learning_rate": 9.66338647337473e-06, "loss": 0.4241, "step": 2454 }, { "epoch": 0.6174547283702213, "grad_norm": 0.3797210454940796, "learning_rate": 9.662858462877797e-06, "loss": 0.3835, "step": 2455 }, { "epoch": 0.6177062374245473, "grad_norm": 0.4100744426250458, "learning_rate": 9.662330053036208e-06, "loss": 0.408, "step": 2456 }, { "epoch": 0.6179577464788732, "grad_norm": 0.3938503563404083, "learning_rate": 9.66180124389522e-06, "loss": 0.4077, "step": 2457 }, { "epoch": 0.6182092555331992, "grad_norm": 0.3984260857105255, "learning_rate": 9.661272035500115e-06, "loss": 0.3775, "step": 2458 }, { "epoch": 0.6184607645875252, "grad_norm": 0.39625781774520874, "learning_rate": 9.660742427896224e-06, "loss": 0.3969, "step": 2459 }, { "epoch": 0.6187122736418511, "grad_norm": 0.4334189295768738, "learning_rate": 9.6602124211289e-06, "loss": 0.3946, "step": 2460 }, { "epoch": 0.618963782696177, "grad_norm": 0.38942191004753113, "learning_rate": 9.65968201524354e-06, "loss": 0.3788, "step": 2461 }, { "epoch": 0.619215291750503, "grad_norm": 0.36435267329216003, "learning_rate": 9.659151210285562e-06, "loss": 0.3685, "step": 2462 }, { "epoch": 0.619466800804829, "grad_norm": 0.4146406650543213, "learning_rate": 9.658620006300432e-06, "loss": 0.3682, "step": 2463 }, { "epoch": 0.6197183098591549, "grad_norm": 0.41353920102119446, "learning_rate": 9.658088403333642e-06, "loss": 0.3967, "step": 2464 }, { "epoch": 0.6199698189134809, "grad_norm": 0.41589221358299255, "learning_rate": 9.657556401430723e-06, "loss": 0.4066, "step": 2465 }, { "epoch": 0.6202213279678068, "grad_norm": 0.47424760460853577, "learning_rate": 9.657024000637235e-06, "loss": 0.3937, "step": 2466 }, { "epoch": 0.6204728370221329, "grad_norm": 0.3641650080680847, "learning_rate": 9.656491200998774e-06, "loss": 0.3926, "step": 2467 }, { "epoch": 0.6207243460764588, "grad_norm": 0.4509759247303009, "learning_rate": 9.655958002560974e-06, "loss": 0.4217, "step": 2468 }, { "epoch": 0.6209758551307847, "grad_norm": 0.442841112613678, "learning_rate": 9.655424405369497e-06, "loss": 0.3757, "step": 2469 }, { "epoch": 0.6212273641851107, "grad_norm": 0.3849235475063324, "learning_rate": 9.654890409470047e-06, "loss": 0.4041, "step": 2470 }, { "epoch": 0.6214788732394366, "grad_norm": 0.39690494537353516, "learning_rate": 9.654356014908352e-06, "loss": 0.3915, "step": 2471 }, { "epoch": 0.6217303822937625, "grad_norm": 0.4252414107322693, "learning_rate": 9.653821221730183e-06, "loss": 0.3808, "step": 2472 }, { "epoch": 0.6219818913480886, "grad_norm": 0.38544967770576477, "learning_rate": 9.65328602998134e-06, "loss": 0.3856, "step": 2473 }, { "epoch": 0.6222334004024145, "grad_norm": 0.3949087858200073, "learning_rate": 9.65275043970766e-06, "loss": 0.3938, "step": 2474 }, { "epoch": 0.6224849094567404, "grad_norm": 0.3579958975315094, "learning_rate": 9.65221445095501e-06, "loss": 0.4041, "step": 2475 }, { "epoch": 0.6227364185110664, "grad_norm": 0.40078824758529663, "learning_rate": 9.6516780637693e-06, "loss": 0.367, "step": 2476 }, { "epoch": 0.6229879275653923, "grad_norm": 0.39681002497673035, "learning_rate": 9.651141278196462e-06, "loss": 0.4074, "step": 2477 }, { "epoch": 0.6232394366197183, "grad_norm": 0.37867873907089233, "learning_rate": 9.650604094282471e-06, "loss": 0.3701, "step": 2478 }, { "epoch": 0.6234909456740443, "grad_norm": 0.43801194429397583, "learning_rate": 9.650066512073336e-06, "loss": 0.3802, "step": 2479 }, { "epoch": 0.6237424547283702, "grad_norm": 0.34844502806663513, "learning_rate": 9.649528531615094e-06, "loss": 0.3914, "step": 2480 }, { "epoch": 0.6239939637826962, "grad_norm": 0.3765834867954254, "learning_rate": 9.64899015295382e-06, "loss": 0.3964, "step": 2481 }, { "epoch": 0.6242454728370221, "grad_norm": 0.4189784526824951, "learning_rate": 9.648451376135624e-06, "loss": 0.3936, "step": 2482 }, { "epoch": 0.6244969818913481, "grad_norm": 0.3877945840358734, "learning_rate": 9.647912201206646e-06, "loss": 0.3809, "step": 2483 }, { "epoch": 0.6247484909456741, "grad_norm": 0.34489884972572327, "learning_rate": 9.647372628213068e-06, "loss": 0.3805, "step": 2484 }, { "epoch": 0.625, "grad_norm": 0.3784612715244293, "learning_rate": 9.646832657201097e-06, "loss": 0.3824, "step": 2485 }, { "epoch": 0.6252515090543259, "grad_norm": 0.3295539319515228, "learning_rate": 9.646292288216978e-06, "loss": 0.3783, "step": 2486 }, { "epoch": 0.6255030181086519, "grad_norm": 0.3725329339504242, "learning_rate": 9.645751521306994e-06, "loss": 0.4002, "step": 2487 }, { "epoch": 0.6257545271629779, "grad_norm": 0.386722207069397, "learning_rate": 9.645210356517454e-06, "loss": 0.3934, "step": 2488 }, { "epoch": 0.6260060362173038, "grad_norm": 0.3421211838722229, "learning_rate": 9.64466879389471e-06, "loss": 0.3854, "step": 2489 }, { "epoch": 0.6262575452716298, "grad_norm": 0.42979565262794495, "learning_rate": 9.644126833485139e-06, "loss": 0.3659, "step": 2490 }, { "epoch": 0.6265090543259557, "grad_norm": 0.36490967869758606, "learning_rate": 9.643584475335157e-06, "loss": 0.384, "step": 2491 }, { "epoch": 0.6267605633802817, "grad_norm": 0.4025133550167084, "learning_rate": 9.643041719491218e-06, "loss": 0.3844, "step": 2492 }, { "epoch": 0.6270120724346077, "grad_norm": 0.36443737149238586, "learning_rate": 9.6424985659998e-06, "loss": 0.3851, "step": 2493 }, { "epoch": 0.6272635814889336, "grad_norm": 0.33622828125953674, "learning_rate": 9.641955014907425e-06, "loss": 0.3756, "step": 2494 }, { "epoch": 0.6275150905432596, "grad_norm": 0.3600637912750244, "learning_rate": 9.64141106626064e-06, "loss": 0.3833, "step": 2495 }, { "epoch": 0.6277665995975855, "grad_norm": 0.37680599093437195, "learning_rate": 9.640866720106037e-06, "loss": 0.3976, "step": 2496 }, { "epoch": 0.6280181086519114, "grad_norm": 0.380525141954422, "learning_rate": 9.64032197649023e-06, "loss": 0.3714, "step": 2497 }, { "epoch": 0.6282696177062375, "grad_norm": 0.3835989534854889, "learning_rate": 9.639776835459878e-06, "loss": 0.4056, "step": 2498 }, { "epoch": 0.6285211267605634, "grad_norm": 0.46692678332328796, "learning_rate": 9.639231297061663e-06, "loss": 0.4207, "step": 2499 }, { "epoch": 0.6287726358148893, "grad_norm": 0.3480624258518219, "learning_rate": 9.638685361342314e-06, "loss": 0.4059, "step": 2500 }, { "epoch": 0.6290241448692153, "grad_norm": 0.4090098440647125, "learning_rate": 9.63813902834858e-06, "loss": 0.3965, "step": 2501 }, { "epoch": 0.6292756539235412, "grad_norm": 0.37328529357910156, "learning_rate": 9.637592298127258e-06, "loss": 0.3768, "step": 2502 }, { "epoch": 0.6295271629778671, "grad_norm": 0.43410345911979675, "learning_rate": 9.637045170725165e-06, "loss": 0.3961, "step": 2503 }, { "epoch": 0.6297786720321932, "grad_norm": 0.3536240756511688, "learning_rate": 9.636497646189165e-06, "loss": 0.3811, "step": 2504 }, { "epoch": 0.6300301810865191, "grad_norm": 0.35403186082839966, "learning_rate": 9.635949724566147e-06, "loss": 0.3764, "step": 2505 }, { "epoch": 0.6302816901408451, "grad_norm": 0.3755878508090973, "learning_rate": 9.635401405903037e-06, "loss": 0.378, "step": 2506 }, { "epoch": 0.630533199195171, "grad_norm": 0.36756351590156555, "learning_rate": 9.634852690246795e-06, "loss": 0.387, "step": 2507 }, { "epoch": 0.630784708249497, "grad_norm": 0.3477321267127991, "learning_rate": 9.634303577644415e-06, "loss": 0.3545, "step": 2508 }, { "epoch": 0.631036217303823, "grad_norm": 0.33334577083587646, "learning_rate": 9.633754068142928e-06, "loss": 0.3817, "step": 2509 }, { "epoch": 0.6312877263581489, "grad_norm": 0.34079161286354065, "learning_rate": 9.633204161789392e-06, "loss": 0.367, "step": 2510 }, { "epoch": 0.6315392354124748, "grad_norm": 0.4175172746181488, "learning_rate": 9.632653858630905e-06, "loss": 0.3825, "step": 2511 }, { "epoch": 0.6317907444668008, "grad_norm": 0.3560580015182495, "learning_rate": 9.632103158714596e-06, "loss": 0.3525, "step": 2512 }, { "epoch": 0.6320422535211268, "grad_norm": 0.3939068913459778, "learning_rate": 9.631552062087632e-06, "loss": 0.4022, "step": 2513 }, { "epoch": 0.6322937625754527, "grad_norm": 0.3403138518333435, "learning_rate": 9.631000568797208e-06, "loss": 0.4065, "step": 2514 }, { "epoch": 0.6325452716297787, "grad_norm": 0.3796042799949646, "learning_rate": 9.630448678890556e-06, "loss": 0.4052, "step": 2515 }, { "epoch": 0.6327967806841046, "grad_norm": 0.3852931559085846, "learning_rate": 9.629896392414943e-06, "loss": 0.3769, "step": 2516 }, { "epoch": 0.6330482897384306, "grad_norm": 0.3552147448062897, "learning_rate": 9.62934370941767e-06, "loss": 0.3913, "step": 2517 }, { "epoch": 0.6332997987927566, "grad_norm": 0.37440934777259827, "learning_rate": 9.628790629946066e-06, "loss": 0.387, "step": 2518 }, { "epoch": 0.6335513078470825, "grad_norm": 0.4127063751220703, "learning_rate": 9.628237154047504e-06, "loss": 0.3924, "step": 2519 }, { "epoch": 0.6338028169014085, "grad_norm": 0.3693302273750305, "learning_rate": 9.627683281769384e-06, "loss": 0.3824, "step": 2520 }, { "epoch": 0.6340543259557344, "grad_norm": 0.3535366654396057, "learning_rate": 9.627129013159142e-06, "loss": 0.3505, "step": 2521 }, { "epoch": 0.6343058350100603, "grad_norm": 0.43735992908477783, "learning_rate": 9.626574348264246e-06, "loss": 0.3739, "step": 2522 }, { "epoch": 0.6345573440643864, "grad_norm": 0.41980883479118347, "learning_rate": 9.626019287132202e-06, "loss": 0.3887, "step": 2523 }, { "epoch": 0.6348088531187123, "grad_norm": 0.3785690665245056, "learning_rate": 9.625463829810547e-06, "loss": 0.4176, "step": 2524 }, { "epoch": 0.6350603621730382, "grad_norm": 0.42933017015457153, "learning_rate": 9.62490797634685e-06, "loss": 0.3994, "step": 2525 }, { "epoch": 0.6353118712273642, "grad_norm": 0.3780410885810852, "learning_rate": 9.624351726788719e-06, "loss": 0.379, "step": 2526 }, { "epoch": 0.6355633802816901, "grad_norm": 0.36872223019599915, "learning_rate": 9.623795081183794e-06, "loss": 0.4074, "step": 2527 }, { "epoch": 0.635814889336016, "grad_norm": 0.3785404562950134, "learning_rate": 9.623238039579742e-06, "loss": 0.3602, "step": 2528 }, { "epoch": 0.6360663983903421, "grad_norm": 0.3856281638145447, "learning_rate": 9.622680602024278e-06, "loss": 0.3694, "step": 2529 }, { "epoch": 0.636317907444668, "grad_norm": 0.3590550124645233, "learning_rate": 9.62212276856514e-06, "loss": 0.3768, "step": 2530 }, { "epoch": 0.636569416498994, "grad_norm": 0.4134266972541809, "learning_rate": 9.621564539250103e-06, "loss": 0.3803, "step": 2531 }, { "epoch": 0.6368209255533199, "grad_norm": 0.3633298873901367, "learning_rate": 9.621005914126974e-06, "loss": 0.3774, "step": 2532 }, { "epoch": 0.6370724346076458, "grad_norm": 0.4221361577510834, "learning_rate": 9.6204468932436e-06, "loss": 0.3943, "step": 2533 }, { "epoch": 0.6373239436619719, "grad_norm": 0.3500104546546936, "learning_rate": 9.619887476647854e-06, "loss": 0.3893, "step": 2534 }, { "epoch": 0.6375754527162978, "grad_norm": 0.365842342376709, "learning_rate": 9.619327664387648e-06, "loss": 0.3697, "step": 2535 }, { "epoch": 0.6378269617706237, "grad_norm": 0.36636513471603394, "learning_rate": 9.618767456510924e-06, "loss": 0.3848, "step": 2536 }, { "epoch": 0.6380784708249497, "grad_norm": 0.37152019143104553, "learning_rate": 9.618206853065664e-06, "loss": 0.377, "step": 2537 }, { "epoch": 0.6383299798792756, "grad_norm": 0.37324413657188416, "learning_rate": 9.617645854099878e-06, "loss": 0.3837, "step": 2538 }, { "epoch": 0.6385814889336016, "grad_norm": 0.3649263083934784, "learning_rate": 9.617084459661612e-06, "loss": 0.3921, "step": 2539 }, { "epoch": 0.6388329979879276, "grad_norm": 0.4074450135231018, "learning_rate": 9.616522669798947e-06, "loss": 0.4087, "step": 2540 }, { "epoch": 0.6390845070422535, "grad_norm": 0.3745105564594269, "learning_rate": 9.615960484559995e-06, "loss": 0.3926, "step": 2541 }, { "epoch": 0.6393360160965795, "grad_norm": 0.42575785517692566, "learning_rate": 9.615397903992906e-06, "loss": 0.3892, "step": 2542 }, { "epoch": 0.6395875251509054, "grad_norm": 0.3537648916244507, "learning_rate": 9.61483492814586e-06, "loss": 0.3679, "step": 2543 }, { "epoch": 0.6398390342052314, "grad_norm": 0.37703290581703186, "learning_rate": 9.614271557067072e-06, "loss": 0.3567, "step": 2544 }, { "epoch": 0.6400905432595574, "grad_norm": 0.40531283617019653, "learning_rate": 9.613707790804794e-06, "loss": 0.3992, "step": 2545 }, { "epoch": 0.6403420523138833, "grad_norm": 0.37500283122062683, "learning_rate": 9.613143629407305e-06, "loss": 0.3933, "step": 2546 }, { "epoch": 0.6405935613682092, "grad_norm": 0.41981109976768494, "learning_rate": 9.612579072922926e-06, "loss": 0.3811, "step": 2547 }, { "epoch": 0.6408450704225352, "grad_norm": 0.416825532913208, "learning_rate": 9.612014121400003e-06, "loss": 0.3978, "step": 2548 }, { "epoch": 0.6410965794768612, "grad_norm": 0.40258339047431946, "learning_rate": 9.611448774886925e-06, "loss": 0.3916, "step": 2549 }, { "epoch": 0.6413480885311871, "grad_norm": 0.44783157110214233, "learning_rate": 9.610883033432107e-06, "loss": 0.3962, "step": 2550 }, { "epoch": 0.6415995975855131, "grad_norm": 0.3728145360946655, "learning_rate": 9.610316897084004e-06, "loss": 0.3778, "step": 2551 }, { "epoch": 0.641851106639839, "grad_norm": 0.3508928418159485, "learning_rate": 9.6097503658911e-06, "loss": 0.3749, "step": 2552 }, { "epoch": 0.6421026156941649, "grad_norm": 0.38925930857658386, "learning_rate": 9.609183439901917e-06, "loss": 0.3726, "step": 2553 }, { "epoch": 0.642354124748491, "grad_norm": 0.3719974458217621, "learning_rate": 9.608616119165007e-06, "loss": 0.3923, "step": 2554 }, { "epoch": 0.6426056338028169, "grad_norm": 0.3723568618297577, "learning_rate": 9.608048403728957e-06, "loss": 0.3739, "step": 2555 }, { "epoch": 0.6428571428571429, "grad_norm": 0.3961268365383148, "learning_rate": 9.60748029364239e-06, "loss": 0.3812, "step": 2556 }, { "epoch": 0.6431086519114688, "grad_norm": 0.38643985986709595, "learning_rate": 9.60691178895396e-06, "loss": 0.4085, "step": 2557 }, { "epoch": 0.6433601609657947, "grad_norm": 0.3590714931488037, "learning_rate": 9.606342889712354e-06, "loss": 0.3862, "step": 2558 }, { "epoch": 0.6436116700201208, "grad_norm": 0.35285237431526184, "learning_rate": 9.605773595966298e-06, "loss": 0.4006, "step": 2559 }, { "epoch": 0.6438631790744467, "grad_norm": 0.36502790451049805, "learning_rate": 9.605203907764548e-06, "loss": 0.3655, "step": 2560 }, { "epoch": 0.6441146881287726, "grad_norm": 0.3753395676612854, "learning_rate": 9.604633825155894e-06, "loss": 0.4009, "step": 2561 }, { "epoch": 0.6443661971830986, "grad_norm": 0.39320510625839233, "learning_rate": 9.604063348189158e-06, "loss": 0.3895, "step": 2562 }, { "epoch": 0.6446177062374245, "grad_norm": 0.320444256067276, "learning_rate": 9.603492476913199e-06, "loss": 0.3756, "step": 2563 }, { "epoch": 0.6448692152917505, "grad_norm": 0.349065899848938, "learning_rate": 9.60292121137691e-06, "loss": 0.4084, "step": 2564 }, { "epoch": 0.6451207243460765, "grad_norm": 0.34628018736839294, "learning_rate": 9.602349551629213e-06, "loss": 0.4003, "step": 2565 }, { "epoch": 0.6453722334004024, "grad_norm": 0.35905465483665466, "learning_rate": 9.601777497719071e-06, "loss": 0.3999, "step": 2566 }, { "epoch": 0.6456237424547284, "grad_norm": 0.3582354187965393, "learning_rate": 9.601205049695473e-06, "loss": 0.3676, "step": 2567 }, { "epoch": 0.6458752515090543, "grad_norm": 0.35198500752449036, "learning_rate": 9.60063220760745e-06, "loss": 0.4059, "step": 2568 }, { "epoch": 0.6461267605633803, "grad_norm": 0.3506782352924347, "learning_rate": 9.600058971504058e-06, "loss": 0.3826, "step": 2569 }, { "epoch": 0.6463782696177063, "grad_norm": 0.3575206398963928, "learning_rate": 9.599485341434394e-06, "loss": 0.3791, "step": 2570 }, { "epoch": 0.6466297786720322, "grad_norm": 0.3570882976055145, "learning_rate": 9.598911317447583e-06, "loss": 0.3798, "step": 2571 }, { "epoch": 0.6468812877263581, "grad_norm": 0.36667707562446594, "learning_rate": 9.598336899592791e-06, "loss": 0.3967, "step": 2572 }, { "epoch": 0.6471327967806841, "grad_norm": 0.31375589966773987, "learning_rate": 9.597762087919209e-06, "loss": 0.3925, "step": 2573 }, { "epoch": 0.64738430583501, "grad_norm": 0.3781939446926117, "learning_rate": 9.597186882476069e-06, "loss": 0.3934, "step": 2574 }, { "epoch": 0.647635814889336, "grad_norm": 0.36152127385139465, "learning_rate": 9.59661128331263e-06, "loss": 0.4066, "step": 2575 }, { "epoch": 0.647887323943662, "grad_norm": 0.3301715552806854, "learning_rate": 9.596035290478192e-06, "loss": 0.3804, "step": 2576 }, { "epoch": 0.6481388329979879, "grad_norm": 0.4257327914237976, "learning_rate": 9.595458904022086e-06, "loss": 0.3964, "step": 2577 }, { "epoch": 0.6483903420523138, "grad_norm": 0.3644980490207672, "learning_rate": 9.594882123993671e-06, "loss": 0.3798, "step": 2578 }, { "epoch": 0.6486418511066399, "grad_norm": 0.3696765601634979, "learning_rate": 9.59430495044235e-06, "loss": 0.3466, "step": 2579 }, { "epoch": 0.6488933601609658, "grad_norm": 0.450061172246933, "learning_rate": 9.593727383417551e-06, "loss": 0.3997, "step": 2580 }, { "epoch": 0.6491448692152918, "grad_norm": 0.4249400794506073, "learning_rate": 9.59314942296874e-06, "loss": 0.3988, "step": 2581 }, { "epoch": 0.6493963782696177, "grad_norm": 0.44383350014686584, "learning_rate": 9.592571069145415e-06, "loss": 0.3948, "step": 2582 }, { "epoch": 0.6496478873239436, "grad_norm": 0.3797934353351593, "learning_rate": 9.591992321997107e-06, "loss": 0.3807, "step": 2583 }, { "epoch": 0.6498993963782697, "grad_norm": 0.40666332840919495, "learning_rate": 9.591413181573388e-06, "loss": 0.4152, "step": 2584 }, { "epoch": 0.6501509054325956, "grad_norm": 0.36471301317214966, "learning_rate": 9.590833647923852e-06, "loss": 0.3952, "step": 2585 }, { "epoch": 0.6504024144869215, "grad_norm": 0.40786394476890564, "learning_rate": 9.590253721098135e-06, "loss": 0.3957, "step": 2586 }, { "epoch": 0.6506539235412475, "grad_norm": 0.3736938238143921, "learning_rate": 9.589673401145902e-06, "loss": 0.3554, "step": 2587 }, { "epoch": 0.6509054325955734, "grad_norm": 0.3855222761631012, "learning_rate": 9.589092688116855e-06, "loss": 0.4014, "step": 2588 }, { "epoch": 0.6511569416498993, "grad_norm": 0.41750627756118774, "learning_rate": 9.58851158206073e-06, "loss": 0.3998, "step": 2589 }, { "epoch": 0.6514084507042254, "grad_norm": 0.4044592082500458, "learning_rate": 9.587930083027293e-06, "loss": 0.3571, "step": 2590 }, { "epoch": 0.6516599597585513, "grad_norm": 0.3685952126979828, "learning_rate": 9.587348191066345e-06, "loss": 0.3984, "step": 2591 }, { "epoch": 0.6519114688128773, "grad_norm": 0.3659170866012573, "learning_rate": 9.586765906227727e-06, "loss": 0.3794, "step": 2592 }, { "epoch": 0.6521629778672032, "grad_norm": 0.3769875466823578, "learning_rate": 9.586183228561299e-06, "loss": 0.3803, "step": 2593 }, { "epoch": 0.6524144869215291, "grad_norm": 0.40973907709121704, "learning_rate": 9.58560015811697e-06, "loss": 0.3876, "step": 2594 }, { "epoch": 0.6526659959758552, "grad_norm": 0.377444326877594, "learning_rate": 9.585016694944676e-06, "loss": 0.402, "step": 2595 }, { "epoch": 0.6529175050301811, "grad_norm": 0.3654913902282715, "learning_rate": 9.584432839094387e-06, "loss": 0.3613, "step": 2596 }, { "epoch": 0.653169014084507, "grad_norm": 0.3855876326560974, "learning_rate": 9.583848590616102e-06, "loss": 0.3833, "step": 2597 }, { "epoch": 0.653420523138833, "grad_norm": 0.40650826692581177, "learning_rate": 9.583263949559864e-06, "loss": 0.3993, "step": 2598 }, { "epoch": 0.653672032193159, "grad_norm": 0.4067690670490265, "learning_rate": 9.582678915975741e-06, "loss": 0.3818, "step": 2599 }, { "epoch": 0.6539235412474849, "grad_norm": 0.38773733377456665, "learning_rate": 9.582093489913838e-06, "loss": 0.39, "step": 2600 }, { "epoch": 0.6541750503018109, "grad_norm": 0.3709178864955902, "learning_rate": 9.581507671424293e-06, "loss": 0.3899, "step": 2601 }, { "epoch": 0.6544265593561368, "grad_norm": 0.4474826157093048, "learning_rate": 9.580921460557278e-06, "loss": 0.406, "step": 2602 }, { "epoch": 0.6546780684104627, "grad_norm": 0.3681783974170685, "learning_rate": 9.580334857362997e-06, "loss": 0.393, "step": 2603 }, { "epoch": 0.6549295774647887, "grad_norm": 0.4004930555820465, "learning_rate": 9.57974786189169e-06, "loss": 0.4031, "step": 2604 }, { "epoch": 0.6551810865191147, "grad_norm": 0.48355597257614136, "learning_rate": 9.579160474193632e-06, "loss": 0.3869, "step": 2605 }, { "epoch": 0.6554325955734407, "grad_norm": 0.3602335751056671, "learning_rate": 9.578572694319124e-06, "loss": 0.4049, "step": 2606 }, { "epoch": 0.6556841046277666, "grad_norm": 0.40569397807121277, "learning_rate": 9.577984522318508e-06, "loss": 0.3723, "step": 2607 }, { "epoch": 0.6559356136820925, "grad_norm": 0.41683316230773926, "learning_rate": 9.57739595824216e-06, "loss": 0.3858, "step": 2608 }, { "epoch": 0.6561871227364185, "grad_norm": 0.3983708322048187, "learning_rate": 9.576807002140483e-06, "loss": 0.4024, "step": 2609 }, { "epoch": 0.6564386317907445, "grad_norm": 0.3469032347202301, "learning_rate": 9.576217654063917e-06, "loss": 0.3775, "step": 2610 }, { "epoch": 0.6566901408450704, "grad_norm": 0.4117089509963989, "learning_rate": 9.57562791406294e-06, "loss": 0.4051, "step": 2611 }, { "epoch": 0.6569416498993964, "grad_norm": 0.37733131647109985, "learning_rate": 9.575037782188054e-06, "loss": 0.3771, "step": 2612 }, { "epoch": 0.6571931589537223, "grad_norm": 0.3928118944168091, "learning_rate": 9.574447258489808e-06, "loss": 0.3604, "step": 2613 }, { "epoch": 0.6574446680080482, "grad_norm": 0.3928970396518707, "learning_rate": 9.573856343018768e-06, "loss": 0.3788, "step": 2614 }, { "epoch": 0.6576961770623743, "grad_norm": 0.402646541595459, "learning_rate": 9.573265035825548e-06, "loss": 0.3922, "step": 2615 }, { "epoch": 0.6579476861167002, "grad_norm": 0.3756962716579437, "learning_rate": 9.572673336960787e-06, "loss": 0.3839, "step": 2616 }, { "epoch": 0.6581991951710262, "grad_norm": 0.3633159399032593, "learning_rate": 9.572081246475162e-06, "loss": 0.3558, "step": 2617 }, { "epoch": 0.6584507042253521, "grad_norm": 0.39355260133743286, "learning_rate": 9.571488764419381e-06, "loss": 0.3855, "step": 2618 }, { "epoch": 0.658702213279678, "grad_norm": 0.3599531054496765, "learning_rate": 9.570895890844188e-06, "loss": 0.3917, "step": 2619 }, { "epoch": 0.6589537223340041, "grad_norm": 0.3619001507759094, "learning_rate": 9.570302625800353e-06, "loss": 0.3906, "step": 2620 }, { "epoch": 0.65920523138833, "grad_norm": 0.35282886028289795, "learning_rate": 9.569708969338694e-06, "loss": 0.3902, "step": 2621 }, { "epoch": 0.6594567404426559, "grad_norm": 0.36944806575775146, "learning_rate": 9.569114921510048e-06, "loss": 0.3758, "step": 2622 }, { "epoch": 0.6597082494969819, "grad_norm": 0.3732403516769409, "learning_rate": 9.568520482365293e-06, "loss": 0.3795, "step": 2623 }, { "epoch": 0.6599597585513078, "grad_norm": 0.35911738872528076, "learning_rate": 9.56792565195534e-06, "loss": 0.4018, "step": 2624 }, { "epoch": 0.6602112676056338, "grad_norm": 0.385262131690979, "learning_rate": 9.567330430331133e-06, "loss": 0.3562, "step": 2625 }, { "epoch": 0.6604627766599598, "grad_norm": 0.3803010582923889, "learning_rate": 9.566734817543645e-06, "loss": 0.3979, "step": 2626 }, { "epoch": 0.6607142857142857, "grad_norm": 0.4101577401161194, "learning_rate": 9.566138813643891e-06, "loss": 0.4253, "step": 2627 }, { "epoch": 0.6609657947686117, "grad_norm": 0.39751216769218445, "learning_rate": 9.565542418682914e-06, "loss": 0.3937, "step": 2628 }, { "epoch": 0.6612173038229376, "grad_norm": 0.3846282958984375, "learning_rate": 9.564945632711789e-06, "loss": 0.403, "step": 2629 }, { "epoch": 0.6614688128772636, "grad_norm": 0.3686662018299103, "learning_rate": 9.56434845578163e-06, "loss": 0.37, "step": 2630 }, { "epoch": 0.6617203219315896, "grad_norm": 0.43031734228134155, "learning_rate": 9.56375088794358e-06, "loss": 0.381, "step": 2631 }, { "epoch": 0.6619718309859155, "grad_norm": 0.36381101608276367, "learning_rate": 9.563152929248817e-06, "loss": 0.4052, "step": 2632 }, { "epoch": 0.6622233400402414, "grad_norm": 0.3557662069797516, "learning_rate": 9.562554579748553e-06, "loss": 0.4099, "step": 2633 }, { "epoch": 0.6624748490945674, "grad_norm": 0.3922789692878723, "learning_rate": 9.561955839494032e-06, "loss": 0.3999, "step": 2634 }, { "epoch": 0.6627263581488934, "grad_norm": 0.39800694584846497, "learning_rate": 9.561356708536532e-06, "loss": 0.4047, "step": 2635 }, { "epoch": 0.6629778672032193, "grad_norm": 0.35587430000305176, "learning_rate": 9.560757186927367e-06, "loss": 0.3867, "step": 2636 }, { "epoch": 0.6632293762575453, "grad_norm": 0.4054100811481476, "learning_rate": 9.56015727471788e-06, "loss": 0.3606, "step": 2637 }, { "epoch": 0.6634808853118712, "grad_norm": 0.3700953423976898, "learning_rate": 9.559556971959452e-06, "loss": 0.3743, "step": 2638 }, { "epoch": 0.6637323943661971, "grad_norm": 0.3898264169692993, "learning_rate": 9.558956278703493e-06, "loss": 0.3565, "step": 2639 }, { "epoch": 0.6639839034205232, "grad_norm": 0.4122966527938843, "learning_rate": 9.55835519500145e-06, "loss": 0.4195, "step": 2640 }, { "epoch": 0.6642354124748491, "grad_norm": 0.375590443611145, "learning_rate": 9.557753720904801e-06, "loss": 0.409, "step": 2641 }, { "epoch": 0.6644869215291751, "grad_norm": 0.3992207944393158, "learning_rate": 9.557151856465059e-06, "loss": 0.3778, "step": 2642 }, { "epoch": 0.664738430583501, "grad_norm": 0.36784839630126953, "learning_rate": 9.556549601733769e-06, "loss": 0.3846, "step": 2643 }, { "epoch": 0.6649899396378269, "grad_norm": 0.3585883677005768, "learning_rate": 9.555946956762513e-06, "loss": 0.3747, "step": 2644 }, { "epoch": 0.665241448692153, "grad_norm": 0.4158109128475189, "learning_rate": 9.555343921602901e-06, "loss": 0.3686, "step": 2645 }, { "epoch": 0.6654929577464789, "grad_norm": 0.41617828607559204, "learning_rate": 9.55474049630658e-06, "loss": 0.3826, "step": 2646 }, { "epoch": 0.6657444668008048, "grad_norm": 0.3927265703678131, "learning_rate": 9.554136680925232e-06, "loss": 0.4062, "step": 2647 }, { "epoch": 0.6659959758551308, "grad_norm": 0.3728967607021332, "learning_rate": 9.553532475510565e-06, "loss": 0.3673, "step": 2648 }, { "epoch": 0.6662474849094567, "grad_norm": 0.3700278103351593, "learning_rate": 9.55292788011433e-06, "loss": 0.3934, "step": 2649 }, { "epoch": 0.6664989939637826, "grad_norm": 0.4425957500934601, "learning_rate": 9.552322894788306e-06, "loss": 0.3672, "step": 2650 }, { "epoch": 0.6667505030181087, "grad_norm": 0.36857491731643677, "learning_rate": 9.551717519584303e-06, "loss": 0.3733, "step": 2651 }, { "epoch": 0.6670020120724346, "grad_norm": 0.4787845313549042, "learning_rate": 9.551111754554172e-06, "loss": 0.3793, "step": 2652 }, { "epoch": 0.6672535211267606, "grad_norm": 0.3845168948173523, "learning_rate": 9.55050559974979e-06, "loss": 0.3796, "step": 2653 }, { "epoch": 0.6675050301810865, "grad_norm": 0.38348764181137085, "learning_rate": 9.54989905522307e-06, "loss": 0.3925, "step": 2654 }, { "epoch": 0.6677565392354124, "grad_norm": 0.41745656728744507, "learning_rate": 9.549292121025961e-06, "loss": 0.3836, "step": 2655 }, { "epoch": 0.6680080482897385, "grad_norm": 0.37018293142318726, "learning_rate": 9.548684797210444e-06, "loss": 0.3806, "step": 2656 }, { "epoch": 0.6682595573440644, "grad_norm": 0.3973758816719055, "learning_rate": 9.548077083828528e-06, "loss": 0.3908, "step": 2657 }, { "epoch": 0.6685110663983903, "grad_norm": 0.3895440101623535, "learning_rate": 9.547468980932263e-06, "loss": 0.3907, "step": 2658 }, { "epoch": 0.6687625754527163, "grad_norm": 0.32289016246795654, "learning_rate": 9.546860488573729e-06, "loss": 0.3738, "step": 2659 }, { "epoch": 0.6690140845070423, "grad_norm": 0.4137352705001831, "learning_rate": 9.54625160680504e-06, "loss": 0.3909, "step": 2660 }, { "epoch": 0.6692655935613682, "grad_norm": 0.34959176182746887, "learning_rate": 9.545642335678341e-06, "loss": 0.3639, "step": 2661 }, { "epoch": 0.6695171026156942, "grad_norm": 0.36586570739746094, "learning_rate": 9.545032675245814e-06, "loss": 0.3729, "step": 2662 }, { "epoch": 0.6697686116700201, "grad_norm": 0.39214131236076355, "learning_rate": 9.544422625559671e-06, "loss": 0.3859, "step": 2663 }, { "epoch": 0.670020120724346, "grad_norm": 0.35369038581848145, "learning_rate": 9.543812186672161e-06, "loss": 0.3796, "step": 2664 }, { "epoch": 0.670271629778672, "grad_norm": 0.40630146861076355, "learning_rate": 9.543201358635564e-06, "loss": 0.3971, "step": 2665 }, { "epoch": 0.670523138832998, "grad_norm": 0.34926897287368774, "learning_rate": 9.54259014150219e-06, "loss": 0.3889, "step": 2666 }, { "epoch": 0.670774647887324, "grad_norm": 0.4089818298816681, "learning_rate": 9.54197853532439e-06, "loss": 0.392, "step": 2667 }, { "epoch": 0.6710261569416499, "grad_norm": 0.3891892433166504, "learning_rate": 9.541366540154544e-06, "loss": 0.4018, "step": 2668 }, { "epoch": 0.6712776659959758, "grad_norm": 0.3339185118675232, "learning_rate": 9.540754156045064e-06, "loss": 0.3712, "step": 2669 }, { "epoch": 0.6715291750503019, "grad_norm": 0.40440425276756287, "learning_rate": 9.540141383048398e-06, "loss": 0.4362, "step": 2670 }, { "epoch": 0.6717806841046278, "grad_norm": 0.39212754368782043, "learning_rate": 9.539528221217026e-06, "loss": 0.4054, "step": 2671 }, { "epoch": 0.6720321931589537, "grad_norm": 0.3525048494338989, "learning_rate": 9.538914670603458e-06, "loss": 0.3754, "step": 2672 }, { "epoch": 0.6722837022132797, "grad_norm": 0.37717682123184204, "learning_rate": 9.538300731260247e-06, "loss": 0.4042, "step": 2673 }, { "epoch": 0.6725352112676056, "grad_norm": 0.4224480986595154, "learning_rate": 9.537686403239967e-06, "loss": 0.359, "step": 2674 }, { "epoch": 0.6727867203219315, "grad_norm": 0.4100128412246704, "learning_rate": 9.537071686595237e-06, "loss": 0.3911, "step": 2675 }, { "epoch": 0.6730382293762576, "grad_norm": 0.34391623735427856, "learning_rate": 9.536456581378699e-06, "loss": 0.3899, "step": 2676 }, { "epoch": 0.6732897384305835, "grad_norm": 0.40277257561683655, "learning_rate": 9.535841087643036e-06, "loss": 0.3973, "step": 2677 }, { "epoch": 0.6735412474849095, "grad_norm": 0.3905733823776245, "learning_rate": 9.535225205440958e-06, "loss": 0.3931, "step": 2678 }, { "epoch": 0.6737927565392354, "grad_norm": 0.3948303461074829, "learning_rate": 9.534608934825217e-06, "loss": 0.3773, "step": 2679 }, { "epoch": 0.6740442655935613, "grad_norm": 0.4447704255580902, "learning_rate": 9.533992275848587e-06, "loss": 0.4014, "step": 2680 }, { "epoch": 0.6742957746478874, "grad_norm": 0.3908079266548157, "learning_rate": 9.533375228563883e-06, "loss": 0.394, "step": 2681 }, { "epoch": 0.6745472837022133, "grad_norm": 0.354306697845459, "learning_rate": 9.532757793023952e-06, "loss": 0.3703, "step": 2682 }, { "epoch": 0.6747987927565392, "grad_norm": 0.3971775472164154, "learning_rate": 9.532139969281673e-06, "loss": 0.3872, "step": 2683 }, { "epoch": 0.6750503018108652, "grad_norm": 0.3223625421524048, "learning_rate": 9.531521757389957e-06, "loss": 0.3702, "step": 2684 }, { "epoch": 0.6753018108651911, "grad_norm": 0.4071231186389923, "learning_rate": 9.530903157401755e-06, "loss": 0.4038, "step": 2685 }, { "epoch": 0.6755533199195171, "grad_norm": 0.3665800094604492, "learning_rate": 9.530284169370039e-06, "loss": 0.3836, "step": 2686 }, { "epoch": 0.6758048289738431, "grad_norm": 0.3697899878025055, "learning_rate": 9.529664793347827e-06, "loss": 0.383, "step": 2687 }, { "epoch": 0.676056338028169, "grad_norm": 0.4246238172054291, "learning_rate": 9.529045029388162e-06, "loss": 0.4008, "step": 2688 }, { "epoch": 0.6763078470824949, "grad_norm": 0.3734142482280731, "learning_rate": 9.528424877544125e-06, "loss": 0.3812, "step": 2689 }, { "epoch": 0.6765593561368209, "grad_norm": 0.35906723141670227, "learning_rate": 9.527804337868827e-06, "loss": 0.3821, "step": 2690 }, { "epoch": 0.6768108651911469, "grad_norm": 0.42187240719795227, "learning_rate": 9.527183410415413e-06, "loss": 0.3884, "step": 2691 }, { "epoch": 0.6770623742454729, "grad_norm": 0.3730980455875397, "learning_rate": 9.526562095237061e-06, "loss": 0.3747, "step": 2692 }, { "epoch": 0.6773138832997988, "grad_norm": 0.44730526208877563, "learning_rate": 9.525940392386985e-06, "loss": 0.3753, "step": 2693 }, { "epoch": 0.6775653923541247, "grad_norm": 0.38479888439178467, "learning_rate": 9.525318301918427e-06, "loss": 0.402, "step": 2694 }, { "epoch": 0.6778169014084507, "grad_norm": 0.40130650997161865, "learning_rate": 9.524695823884669e-06, "loss": 0.3775, "step": 2695 }, { "epoch": 0.6780684104627767, "grad_norm": 0.41748368740081787, "learning_rate": 9.524072958339019e-06, "loss": 0.4146, "step": 2696 }, { "epoch": 0.6783199195171026, "grad_norm": 0.3546696901321411, "learning_rate": 9.523449705334821e-06, "loss": 0.3782, "step": 2697 }, { "epoch": 0.6785714285714286, "grad_norm": 0.4278590679168701, "learning_rate": 9.522826064925457e-06, "loss": 0.4017, "step": 2698 }, { "epoch": 0.6788229376257545, "grad_norm": 0.3383933901786804, "learning_rate": 9.522202037164333e-06, "loss": 0.3774, "step": 2699 }, { "epoch": 0.6790744466800804, "grad_norm": 0.3658550977706909, "learning_rate": 9.521577622104897e-06, "loss": 0.3783, "step": 2700 }, { "epoch": 0.6793259557344065, "grad_norm": 0.40239429473876953, "learning_rate": 9.520952819800624e-06, "loss": 0.3896, "step": 2701 }, { "epoch": 0.6795774647887324, "grad_norm": 0.3836422562599182, "learning_rate": 9.520327630305026e-06, "loss": 0.3932, "step": 2702 }, { "epoch": 0.6798289738430584, "grad_norm": 0.3793487548828125, "learning_rate": 9.519702053671643e-06, "loss": 0.3851, "step": 2703 }, { "epoch": 0.6800804828973843, "grad_norm": 0.38546112179756165, "learning_rate": 9.519076089954056e-06, "loss": 0.3839, "step": 2704 }, { "epoch": 0.6803319919517102, "grad_norm": 0.38321325182914734, "learning_rate": 9.518449739205873e-06, "loss": 0.3793, "step": 2705 }, { "epoch": 0.6805835010060363, "grad_norm": 0.35967323184013367, "learning_rate": 9.517823001480737e-06, "loss": 0.3805, "step": 2706 }, { "epoch": 0.6808350100603622, "grad_norm": 0.3868929445743561, "learning_rate": 9.517195876832324e-06, "loss": 0.3666, "step": 2707 }, { "epoch": 0.6810865191146881, "grad_norm": 0.33470460772514343, "learning_rate": 9.516568365314345e-06, "loss": 0.3674, "step": 2708 }, { "epoch": 0.6813380281690141, "grad_norm": 0.37914395332336426, "learning_rate": 9.51594046698054e-06, "loss": 0.3757, "step": 2709 }, { "epoch": 0.68158953722334, "grad_norm": 0.3739543557167053, "learning_rate": 9.515312181884685e-06, "loss": 0.3861, "step": 2710 }, { "epoch": 0.681841046277666, "grad_norm": 0.3794519007205963, "learning_rate": 9.514683510080592e-06, "loss": 0.3961, "step": 2711 }, { "epoch": 0.682092555331992, "grad_norm": 0.39454448223114014, "learning_rate": 9.514054451622098e-06, "loss": 0.4207, "step": 2712 }, { "epoch": 0.6823440643863179, "grad_norm": 0.38329020142555237, "learning_rate": 9.51342500656308e-06, "loss": 0.3901, "step": 2713 }, { "epoch": 0.6825955734406438, "grad_norm": 0.40806901454925537, "learning_rate": 9.512795174957445e-06, "loss": 0.3576, "step": 2714 }, { "epoch": 0.6828470824949698, "grad_norm": 0.4223353862762451, "learning_rate": 9.512164956859138e-06, "loss": 0.366, "step": 2715 }, { "epoch": 0.6830985915492958, "grad_norm": 0.4174785614013672, "learning_rate": 9.511534352322128e-06, "loss": 0.3953, "step": 2716 }, { "epoch": 0.6833501006036218, "grad_norm": 0.390242338180542, "learning_rate": 9.510903361400426e-06, "loss": 0.3757, "step": 2717 }, { "epoch": 0.6836016096579477, "grad_norm": 0.45529499650001526, "learning_rate": 9.510271984148071e-06, "loss": 0.3877, "step": 2718 }, { "epoch": 0.6838531187122736, "grad_norm": 0.38153916597366333, "learning_rate": 9.509640220619136e-06, "loss": 0.4038, "step": 2719 }, { "epoch": 0.6841046277665996, "grad_norm": 0.4252956211566925, "learning_rate": 9.50900807086773e-06, "loss": 0.3811, "step": 2720 }, { "epoch": 0.6843561368209256, "grad_norm": 0.3555954098701477, "learning_rate": 9.50837553494799e-06, "loss": 0.3916, "step": 2721 }, { "epoch": 0.6846076458752515, "grad_norm": 0.3798430562019348, "learning_rate": 9.50774261291409e-06, "loss": 0.3667, "step": 2722 }, { "epoch": 0.6848591549295775, "grad_norm": 0.3840332627296448, "learning_rate": 9.507109304820234e-06, "loss": 0.3989, "step": 2723 }, { "epoch": 0.6851106639839034, "grad_norm": 0.38104766607284546, "learning_rate": 9.506475610720665e-06, "loss": 0.3793, "step": 2724 }, { "epoch": 0.6853621730382293, "grad_norm": 0.4474826455116272, "learning_rate": 9.505841530669652e-06, "loss": 0.3764, "step": 2725 }, { "epoch": 0.6856136820925554, "grad_norm": 0.37356293201446533, "learning_rate": 9.505207064721499e-06, "loss": 0.4024, "step": 2726 }, { "epoch": 0.6858651911468813, "grad_norm": 0.4107537269592285, "learning_rate": 9.504572212930544e-06, "loss": 0.3951, "step": 2727 }, { "epoch": 0.6861167002012073, "grad_norm": 0.40174853801727295, "learning_rate": 9.503936975351164e-06, "loss": 0.3964, "step": 2728 }, { "epoch": 0.6863682092555332, "grad_norm": 0.3748626708984375, "learning_rate": 9.503301352037756e-06, "loss": 0.3891, "step": 2729 }, { "epoch": 0.6866197183098591, "grad_norm": 0.357077032327652, "learning_rate": 9.50266534304476e-06, "loss": 0.3828, "step": 2730 }, { "epoch": 0.6868712273641852, "grad_norm": 0.40127694606781006, "learning_rate": 9.502028948426645e-06, "loss": 0.3847, "step": 2731 }, { "epoch": 0.6871227364185111, "grad_norm": 0.3611276149749756, "learning_rate": 9.501392168237914e-06, "loss": 0.3901, "step": 2732 }, { "epoch": 0.687374245472837, "grad_norm": 0.35712650418281555, "learning_rate": 9.500755002533109e-06, "loss": 0.3725, "step": 2733 }, { "epoch": 0.687625754527163, "grad_norm": 0.38140082359313965, "learning_rate": 9.50011745136679e-06, "loss": 0.4035, "step": 2734 }, { "epoch": 0.6878772635814889, "grad_norm": 0.375500351190567, "learning_rate": 9.499479514793568e-06, "loss": 0.3949, "step": 2735 }, { "epoch": 0.6881287726358148, "grad_norm": 0.3318289816379547, "learning_rate": 9.49884119286807e-06, "loss": 0.3663, "step": 2736 }, { "epoch": 0.6883802816901409, "grad_norm": 0.4133455753326416, "learning_rate": 9.498202485644972e-06, "loss": 0.3892, "step": 2737 }, { "epoch": 0.6886317907444668, "grad_norm": 0.34787869453430176, "learning_rate": 9.49756339317897e-06, "loss": 0.4052, "step": 2738 }, { "epoch": 0.6888832997987927, "grad_norm": 0.3496398627758026, "learning_rate": 9.4969239155248e-06, "loss": 0.3995, "step": 2739 }, { "epoch": 0.6891348088531187, "grad_norm": 0.3664000630378723, "learning_rate": 9.49628405273723e-06, "loss": 0.3926, "step": 2740 }, { "epoch": 0.6893863179074446, "grad_norm": 0.38853469491004944, "learning_rate": 9.49564380487106e-06, "loss": 0.3909, "step": 2741 }, { "epoch": 0.6896378269617707, "grad_norm": 0.3786064684391022, "learning_rate": 9.495003171981122e-06, "loss": 0.392, "step": 2742 }, { "epoch": 0.6898893360160966, "grad_norm": 0.3976394534111023, "learning_rate": 9.494362154122283e-06, "loss": 0.3786, "step": 2743 }, { "epoch": 0.6901408450704225, "grad_norm": 0.36655136942863464, "learning_rate": 9.493720751349442e-06, "loss": 0.3733, "step": 2744 }, { "epoch": 0.6903923541247485, "grad_norm": 0.32457855343818665, "learning_rate": 9.493078963717533e-06, "loss": 0.3873, "step": 2745 }, { "epoch": 0.6906438631790744, "grad_norm": 0.3550087511539459, "learning_rate": 9.492436791281516e-06, "loss": 0.3992, "step": 2746 }, { "epoch": 0.6908953722334004, "grad_norm": 0.357652872800827, "learning_rate": 9.491794234096396e-06, "loss": 0.3859, "step": 2747 }, { "epoch": 0.6911468812877264, "grad_norm": 0.3854414224624634, "learning_rate": 9.491151292217198e-06, "loss": 0.4151, "step": 2748 }, { "epoch": 0.6913983903420523, "grad_norm": 0.34755367040634155, "learning_rate": 9.490507965698988e-06, "loss": 0.3522, "step": 2749 }, { "epoch": 0.6916498993963782, "grad_norm": 0.35060808062553406, "learning_rate": 9.489864254596866e-06, "loss": 0.3856, "step": 2750 }, { "epoch": 0.6919014084507042, "grad_norm": 0.3610481023788452, "learning_rate": 9.489220158965957e-06, "loss": 0.393, "step": 2751 }, { "epoch": 0.6921529175050302, "grad_norm": 0.32515424489974976, "learning_rate": 9.488575678861426e-06, "loss": 0.3973, "step": 2752 }, { "epoch": 0.6924044265593562, "grad_norm": 0.3888111412525177, "learning_rate": 9.487930814338468e-06, "loss": 0.3946, "step": 2753 }, { "epoch": 0.6926559356136821, "grad_norm": 0.3749738931655884, "learning_rate": 9.487285565452313e-06, "loss": 0.3777, "step": 2754 }, { "epoch": 0.692907444668008, "grad_norm": 0.41394150257110596, "learning_rate": 9.486639932258223e-06, "loss": 0.3777, "step": 2755 }, { "epoch": 0.693158953722334, "grad_norm": 0.35159727931022644, "learning_rate": 9.485993914811488e-06, "loss": 0.3798, "step": 2756 }, { "epoch": 0.69341046277666, "grad_norm": 0.36780303716659546, "learning_rate": 9.485347513167443e-06, "loss": 0.382, "step": 2757 }, { "epoch": 0.6936619718309859, "grad_norm": 0.3663868308067322, "learning_rate": 9.48470072738144e-06, "loss": 0.3858, "step": 2758 }, { "epoch": 0.6939134808853119, "grad_norm": 0.3358771502971649, "learning_rate": 9.484053557508876e-06, "loss": 0.4216, "step": 2759 }, { "epoch": 0.6941649899396378, "grad_norm": 0.3996930718421936, "learning_rate": 9.483406003605178e-06, "loss": 0.3943, "step": 2760 }, { "epoch": 0.6944164989939637, "grad_norm": 0.41368430852890015, "learning_rate": 9.482758065725805e-06, "loss": 0.3836, "step": 2761 }, { "epoch": 0.6946680080482898, "grad_norm": 0.4018966555595398, "learning_rate": 9.482109743926247e-06, "loss": 0.3969, "step": 2762 }, { "epoch": 0.6949195171026157, "grad_norm": 0.3682190477848053, "learning_rate": 9.481461038262027e-06, "loss": 0.3671, "step": 2763 }, { "epoch": 0.6951710261569416, "grad_norm": 0.36615151166915894, "learning_rate": 9.480811948788708e-06, "loss": 0.378, "step": 2764 }, { "epoch": 0.6954225352112676, "grad_norm": 0.3735343813896179, "learning_rate": 9.480162475561877e-06, "loss": 0.3756, "step": 2765 }, { "epoch": 0.6956740442655935, "grad_norm": 0.37882131338119507, "learning_rate": 9.479512618637156e-06, "loss": 0.3736, "step": 2766 }, { "epoch": 0.6959255533199196, "grad_norm": 0.355418860912323, "learning_rate": 9.478862378070204e-06, "loss": 0.3856, "step": 2767 }, { "epoch": 0.6961770623742455, "grad_norm": 0.34449321031570435, "learning_rate": 9.47821175391671e-06, "loss": 0.3713, "step": 2768 }, { "epoch": 0.6964285714285714, "grad_norm": 0.39640021324157715, "learning_rate": 9.477560746232394e-06, "loss": 0.379, "step": 2769 }, { "epoch": 0.6966800804828974, "grad_norm": 0.3537764251232147, "learning_rate": 9.476909355073012e-06, "loss": 0.3901, "step": 2770 }, { "epoch": 0.6969315895372233, "grad_norm": 0.34988686442375183, "learning_rate": 9.47625758049435e-06, "loss": 0.3643, "step": 2771 }, { "epoch": 0.6971830985915493, "grad_norm": 0.39470940828323364, "learning_rate": 9.47560542255223e-06, "loss": 0.3841, "step": 2772 }, { "epoch": 0.6974346076458753, "grad_norm": 0.3538375496864319, "learning_rate": 9.474952881302506e-06, "loss": 0.3895, "step": 2773 }, { "epoch": 0.6976861167002012, "grad_norm": 0.34231036901474, "learning_rate": 9.474299956801062e-06, "loss": 0.385, "step": 2774 }, { "epoch": 0.6979376257545271, "grad_norm": 0.3452184796333313, "learning_rate": 9.473646649103819e-06, "loss": 0.3913, "step": 2775 }, { "epoch": 0.6981891348088531, "grad_norm": 0.3894374966621399, "learning_rate": 9.472992958266725e-06, "loss": 0.3922, "step": 2776 }, { "epoch": 0.6984406438631791, "grad_norm": 0.35543131828308105, "learning_rate": 9.47233888434577e-06, "loss": 0.373, "step": 2777 }, { "epoch": 0.6986921529175051, "grad_norm": 0.3584343194961548, "learning_rate": 9.471684427396966e-06, "loss": 0.3948, "step": 2778 }, { "epoch": 0.698943661971831, "grad_norm": 0.35712918639183044, "learning_rate": 9.471029587476367e-06, "loss": 0.4008, "step": 2779 }, { "epoch": 0.6991951710261569, "grad_norm": 0.36257344484329224, "learning_rate": 9.470374364640054e-06, "loss": 0.4058, "step": 2780 }, { "epoch": 0.6994466800804829, "grad_norm": 0.37731629610061646, "learning_rate": 9.469718758944144e-06, "loss": 0.4053, "step": 2781 }, { "epoch": 0.6996981891348089, "grad_norm": 0.3586489260196686, "learning_rate": 9.469062770444784e-06, "loss": 0.402, "step": 2782 }, { "epoch": 0.6999496981891348, "grad_norm": 0.36410945653915405, "learning_rate": 9.468406399198156e-06, "loss": 0.3954, "step": 2783 }, { "epoch": 0.7002012072434608, "grad_norm": 0.40461426973342896, "learning_rate": 9.467749645260475e-06, "loss": 0.406, "step": 2784 }, { "epoch": 0.7004527162977867, "grad_norm": 0.36193108558654785, "learning_rate": 9.467092508687987e-06, "loss": 0.3755, "step": 2785 }, { "epoch": 0.7007042253521126, "grad_norm": 0.32516640424728394, "learning_rate": 9.46643498953697e-06, "loss": 0.3889, "step": 2786 }, { "epoch": 0.7009557344064387, "grad_norm": 0.3632727861404419, "learning_rate": 9.46577708786374e-06, "loss": 0.4103, "step": 2787 }, { "epoch": 0.7012072434607646, "grad_norm": 0.3422258496284485, "learning_rate": 9.46511880372464e-06, "loss": 0.3833, "step": 2788 }, { "epoch": 0.7014587525150905, "grad_norm": 0.386264830827713, "learning_rate": 9.464460137176047e-06, "loss": 0.3842, "step": 2789 }, { "epoch": 0.7017102615694165, "grad_norm": 0.33963924646377563, "learning_rate": 9.463801088274374e-06, "loss": 0.3707, "step": 2790 }, { "epoch": 0.7019617706237424, "grad_norm": 0.3681434094905853, "learning_rate": 9.463141657076063e-06, "loss": 0.3914, "step": 2791 }, { "epoch": 0.7022132796780685, "grad_norm": 0.3675888180732727, "learning_rate": 9.46248184363759e-06, "loss": 0.3785, "step": 2792 }, { "epoch": 0.7024647887323944, "grad_norm": 0.3703523278236389, "learning_rate": 9.461821648015464e-06, "loss": 0.4011, "step": 2793 }, { "epoch": 0.7027162977867203, "grad_norm": 0.34886541962623596, "learning_rate": 9.461161070266226e-06, "loss": 0.3953, "step": 2794 }, { "epoch": 0.7029678068410463, "grad_norm": 0.34929558634757996, "learning_rate": 9.460500110446453e-06, "loss": 0.4049, "step": 2795 }, { "epoch": 0.7032193158953722, "grad_norm": 0.3256225883960724, "learning_rate": 9.459838768612751e-06, "loss": 0.3611, "step": 2796 }, { "epoch": 0.7034708249496981, "grad_norm": 0.34201568365097046, "learning_rate": 9.459177044821758e-06, "loss": 0.3878, "step": 2797 }, { "epoch": 0.7037223340040242, "grad_norm": 0.32223188877105713, "learning_rate": 9.458514939130148e-06, "loss": 0.3879, "step": 2798 }, { "epoch": 0.7039738430583501, "grad_norm": 0.38301658630371094, "learning_rate": 9.457852451594625e-06, "loss": 0.3776, "step": 2799 }, { "epoch": 0.704225352112676, "grad_norm": 0.3359746038913727, "learning_rate": 9.457189582271928e-06, "loss": 0.3815, "step": 2800 }, { "epoch": 0.704476861167002, "grad_norm": 0.42329642176628113, "learning_rate": 9.456526331218827e-06, "loss": 0.3806, "step": 2801 }, { "epoch": 0.704728370221328, "grad_norm": 0.43376052379608154, "learning_rate": 9.455862698492127e-06, "loss": 0.3921, "step": 2802 }, { "epoch": 0.704979879275654, "grad_norm": 0.3609297275543213, "learning_rate": 9.455198684148662e-06, "loss": 0.3749, "step": 2803 }, { "epoch": 0.7052313883299799, "grad_norm": 0.3790986239910126, "learning_rate": 9.454534288245302e-06, "loss": 0.3872, "step": 2804 }, { "epoch": 0.7054828973843058, "grad_norm": 0.36633041501045227, "learning_rate": 9.453869510838946e-06, "loss": 0.4007, "step": 2805 }, { "epoch": 0.7057344064386318, "grad_norm": 0.4222126305103302, "learning_rate": 9.45320435198653e-06, "loss": 0.3949, "step": 2806 }, { "epoch": 0.7059859154929577, "grad_norm": 0.34727922081947327, "learning_rate": 9.452538811745023e-06, "loss": 0.3738, "step": 2807 }, { "epoch": 0.7062374245472837, "grad_norm": 0.371955543756485, "learning_rate": 9.451872890171419e-06, "loss": 0.3913, "step": 2808 }, { "epoch": 0.7064889336016097, "grad_norm": 0.3855418860912323, "learning_rate": 9.451206587322754e-06, "loss": 0.367, "step": 2809 }, { "epoch": 0.7067404426559356, "grad_norm": 0.3608242869377136, "learning_rate": 9.450539903256091e-06, "loss": 0.3805, "step": 2810 }, { "epoch": 0.7069919517102615, "grad_norm": 0.3822399079799652, "learning_rate": 9.449872838028529e-06, "loss": 0.3969, "step": 2811 }, { "epoch": 0.7072434607645876, "grad_norm": 0.35486891865730286, "learning_rate": 9.449205391697196e-06, "loss": 0.3947, "step": 2812 }, { "epoch": 0.7074949698189135, "grad_norm": 0.3677852749824524, "learning_rate": 9.448537564319254e-06, "loss": 0.3856, "step": 2813 }, { "epoch": 0.7077464788732394, "grad_norm": 0.36130937933921814, "learning_rate": 9.447869355951901e-06, "loss": 0.401, "step": 2814 }, { "epoch": 0.7079979879275654, "grad_norm": 0.35179662704467773, "learning_rate": 9.447200766652363e-06, "loss": 0.3734, "step": 2815 }, { "epoch": 0.7082494969818913, "grad_norm": 0.3460598289966583, "learning_rate": 9.446531796477901e-06, "loss": 0.3591, "step": 2816 }, { "epoch": 0.7085010060362174, "grad_norm": 0.34915661811828613, "learning_rate": 9.445862445485808e-06, "loss": 0.4048, "step": 2817 }, { "epoch": 0.7087525150905433, "grad_norm": 0.38304591178894043, "learning_rate": 9.44519271373341e-06, "loss": 0.3861, "step": 2818 }, { "epoch": 0.7090040241448692, "grad_norm": 0.4027135968208313, "learning_rate": 9.444522601278065e-06, "loss": 0.4149, "step": 2819 }, { "epoch": 0.7092555331991952, "grad_norm": 0.3455667793750763, "learning_rate": 9.443852108177164e-06, "loss": 0.3671, "step": 2820 }, { "epoch": 0.7095070422535211, "grad_norm": 0.4052219092845917, "learning_rate": 9.44318123448813e-06, "loss": 0.3654, "step": 2821 }, { "epoch": 0.709758551307847, "grad_norm": 0.36053210496902466, "learning_rate": 9.44250998026842e-06, "loss": 0.4084, "step": 2822 }, { "epoch": 0.7100100603621731, "grad_norm": 0.3680732846260071, "learning_rate": 9.441838345575523e-06, "loss": 0.3848, "step": 2823 }, { "epoch": 0.710261569416499, "grad_norm": 0.36795133352279663, "learning_rate": 9.441166330466959e-06, "loss": 0.3666, "step": 2824 }, { "epoch": 0.7105130784708249, "grad_norm": 0.3703378438949585, "learning_rate": 9.440493935000283e-06, "loss": 0.3959, "step": 2825 }, { "epoch": 0.7107645875251509, "grad_norm": 0.37262654304504395, "learning_rate": 9.439821159233083e-06, "loss": 0.3584, "step": 2826 }, { "epoch": 0.7110160965794768, "grad_norm": 0.35950973629951477, "learning_rate": 9.439148003222973e-06, "loss": 0.373, "step": 2827 }, { "epoch": 0.7112676056338029, "grad_norm": 0.3705592751502991, "learning_rate": 9.43847446702761e-06, "loss": 0.4044, "step": 2828 }, { "epoch": 0.7115191146881288, "grad_norm": 0.3991115987300873, "learning_rate": 9.437800550704674e-06, "loss": 0.3979, "step": 2829 }, { "epoch": 0.7117706237424547, "grad_norm": 0.37263038754463196, "learning_rate": 9.437126254311886e-06, "loss": 0.3845, "step": 2830 }, { "epoch": 0.7120221327967807, "grad_norm": 0.389017790555954, "learning_rate": 9.436451577906991e-06, "loss": 0.392, "step": 2831 }, { "epoch": 0.7122736418511066, "grad_norm": 0.33222588896751404, "learning_rate": 9.435776521547772e-06, "loss": 0.3847, "step": 2832 }, { "epoch": 0.7125251509054326, "grad_norm": 0.342219740152359, "learning_rate": 9.435101085292047e-06, "loss": 0.3885, "step": 2833 }, { "epoch": 0.7127766599597586, "grad_norm": 0.3644763231277466, "learning_rate": 9.434425269197658e-06, "loss": 0.3635, "step": 2834 }, { "epoch": 0.7130281690140845, "grad_norm": 0.37079551815986633, "learning_rate": 9.433749073322487e-06, "loss": 0.4046, "step": 2835 }, { "epoch": 0.7132796780684104, "grad_norm": 0.39011722803115845, "learning_rate": 9.433072497724445e-06, "loss": 0.3589, "step": 2836 }, { "epoch": 0.7135311871227364, "grad_norm": 0.36313149333000183, "learning_rate": 9.432395542461476e-06, "loss": 0.3964, "step": 2837 }, { "epoch": 0.7137826961770624, "grad_norm": 0.35538873076438904, "learning_rate": 9.431718207591559e-06, "loss": 0.3914, "step": 2838 }, { "epoch": 0.7140342052313883, "grad_norm": 0.3488622307777405, "learning_rate": 9.431040493172702e-06, "loss": 0.3841, "step": 2839 }, { "epoch": 0.7142857142857143, "grad_norm": 0.4144222140312195, "learning_rate": 9.430362399262947e-06, "loss": 0.3963, "step": 2840 }, { "epoch": 0.7145372233400402, "grad_norm": 0.34440502524375916, "learning_rate": 9.429683925920369e-06, "loss": 0.3667, "step": 2841 }, { "epoch": 0.7147887323943662, "grad_norm": 0.33740341663360596, "learning_rate": 9.429005073203075e-06, "loss": 0.3964, "step": 2842 }, { "epoch": 0.7150402414486922, "grad_norm": 0.38044601678848267, "learning_rate": 9.428325841169203e-06, "loss": 0.4139, "step": 2843 }, { "epoch": 0.7152917505030181, "grad_norm": 0.39153364300727844, "learning_rate": 9.427646229876927e-06, "loss": 0.3849, "step": 2844 }, { "epoch": 0.7155432595573441, "grad_norm": 0.3845551908016205, "learning_rate": 9.42696623938445e-06, "loss": 0.4071, "step": 2845 }, { "epoch": 0.71579476861167, "grad_norm": 0.32743361592292786, "learning_rate": 9.426285869750012e-06, "loss": 0.3933, "step": 2846 }, { "epoch": 0.7160462776659959, "grad_norm": 0.48608964681625366, "learning_rate": 9.425605121031878e-06, "loss": 0.4126, "step": 2847 }, { "epoch": 0.716297786720322, "grad_norm": 0.42301592230796814, "learning_rate": 9.424923993288352e-06, "loss": 0.3774, "step": 2848 }, { "epoch": 0.7165492957746479, "grad_norm": 0.3551022708415985, "learning_rate": 9.424242486577768e-06, "loss": 0.3884, "step": 2849 }, { "epoch": 0.7168008048289738, "grad_norm": 0.4087948799133301, "learning_rate": 9.423560600958493e-06, "loss": 0.3816, "step": 2850 }, { "epoch": 0.7170523138832998, "grad_norm": 0.37452781200408936, "learning_rate": 9.422878336488928e-06, "loss": 0.3793, "step": 2851 }, { "epoch": 0.7173038229376257, "grad_norm": 0.35576769709587097, "learning_rate": 9.422195693227501e-06, "loss": 0.3753, "step": 2852 }, { "epoch": 0.7175553319919518, "grad_norm": 0.3551032841205597, "learning_rate": 9.42151267123268e-06, "loss": 0.3923, "step": 2853 }, { "epoch": 0.7178068410462777, "grad_norm": 0.37760961055755615, "learning_rate": 9.420829270562956e-06, "loss": 0.4031, "step": 2854 }, { "epoch": 0.7180583501006036, "grad_norm": 0.3662315011024475, "learning_rate": 9.420145491276864e-06, "loss": 0.3539, "step": 2855 }, { "epoch": 0.7183098591549296, "grad_norm": 0.3553292751312256, "learning_rate": 9.419461333432965e-06, "loss": 0.3812, "step": 2856 }, { "epoch": 0.7185613682092555, "grad_norm": 0.4659683108329773, "learning_rate": 9.418776797089848e-06, "loss": 0.3933, "step": 2857 }, { "epoch": 0.7188128772635815, "grad_norm": 0.37304118275642395, "learning_rate": 9.418091882306141e-06, "loss": 0.3758, "step": 2858 }, { "epoch": 0.7190643863179075, "grad_norm": 0.36666375398635864, "learning_rate": 9.417406589140507e-06, "loss": 0.4104, "step": 2859 }, { "epoch": 0.7193158953722334, "grad_norm": 0.42461302876472473, "learning_rate": 9.416720917651631e-06, "loss": 0.3893, "step": 2860 }, { "epoch": 0.7195674044265593, "grad_norm": 0.4254009425640106, "learning_rate": 9.416034867898243e-06, "loss": 0.3885, "step": 2861 }, { "epoch": 0.7198189134808853, "grad_norm": 0.3596349060535431, "learning_rate": 9.415348439939091e-06, "loss": 0.3596, "step": 2862 }, { "epoch": 0.7200704225352113, "grad_norm": 0.3977017104625702, "learning_rate": 9.41466163383297e-06, "loss": 0.3848, "step": 2863 }, { "epoch": 0.7203219315895373, "grad_norm": 0.40862926840782166, "learning_rate": 9.4139744496387e-06, "loss": 0.4016, "step": 2864 }, { "epoch": 0.7205734406438632, "grad_norm": 0.3802616596221924, "learning_rate": 9.413286887415128e-06, "loss": 0.3909, "step": 2865 }, { "epoch": 0.7208249496981891, "grad_norm": 0.34951651096343994, "learning_rate": 9.412598947221146e-06, "loss": 0.3707, "step": 2866 }, { "epoch": 0.7210764587525151, "grad_norm": 0.36474528908729553, "learning_rate": 9.411910629115667e-06, "loss": 0.4124, "step": 2867 }, { "epoch": 0.721327967806841, "grad_norm": 0.3925243020057678, "learning_rate": 9.411221933157646e-06, "loss": 0.3872, "step": 2868 }, { "epoch": 0.721579476861167, "grad_norm": 0.3429969251155853, "learning_rate": 9.41053285940606e-06, "loss": 0.3867, "step": 2869 }, { "epoch": 0.721830985915493, "grad_norm": 0.35117414593696594, "learning_rate": 9.409843407919929e-06, "loss": 0.3853, "step": 2870 }, { "epoch": 0.7220824949698189, "grad_norm": 0.35899850726127625, "learning_rate": 9.409153578758298e-06, "loss": 0.3932, "step": 2871 }, { "epoch": 0.7223340040241448, "grad_norm": 0.37349846959114075, "learning_rate": 9.408463371980248e-06, "loss": 0.3779, "step": 2872 }, { "epoch": 0.7225855130784709, "grad_norm": 0.3280394375324249, "learning_rate": 9.407772787644887e-06, "loss": 0.3697, "step": 2873 }, { "epoch": 0.7228370221327968, "grad_norm": 0.36341550946235657, "learning_rate": 9.407081825811362e-06, "loss": 0.3828, "step": 2874 }, { "epoch": 0.7230885311871227, "grad_norm": 0.36753761768341064, "learning_rate": 9.40639048653885e-06, "loss": 0.384, "step": 2875 }, { "epoch": 0.7233400402414487, "grad_norm": 0.3492330014705658, "learning_rate": 9.405698769886557e-06, "loss": 0.3842, "step": 2876 }, { "epoch": 0.7235915492957746, "grad_norm": 0.40695223212242126, "learning_rate": 9.405006675913729e-06, "loss": 0.4184, "step": 2877 }, { "epoch": 0.7238430583501007, "grad_norm": 0.342252641916275, "learning_rate": 9.404314204679636e-06, "loss": 0.3739, "step": 2878 }, { "epoch": 0.7240945674044266, "grad_norm": 0.39723700284957886, "learning_rate": 9.403621356243584e-06, "loss": 0.4288, "step": 2879 }, { "epoch": 0.7243460764587525, "grad_norm": 0.3767862021923065, "learning_rate": 9.402928130664913e-06, "loss": 0.3883, "step": 2880 }, { "epoch": 0.7245975855130785, "grad_norm": 0.37945619225502014, "learning_rate": 9.402234528002991e-06, "loss": 0.406, "step": 2881 }, { "epoch": 0.7248490945674044, "grad_norm": 0.3551163375377655, "learning_rate": 9.401540548317223e-06, "loss": 0.3808, "step": 2882 }, { "epoch": 0.7251006036217303, "grad_norm": 0.3756506145000458, "learning_rate": 9.400846191667043e-06, "loss": 0.3665, "step": 2883 }, { "epoch": 0.7253521126760564, "grad_norm": 0.38300275802612305, "learning_rate": 9.400151458111918e-06, "loss": 0.3968, "step": 2884 }, { "epoch": 0.7256036217303823, "grad_norm": 0.37950319051742554, "learning_rate": 9.399456347711348e-06, "loss": 0.3843, "step": 2885 }, { "epoch": 0.7258551307847082, "grad_norm": 0.3413698673248291, "learning_rate": 9.398760860524865e-06, "loss": 0.3965, "step": 2886 }, { "epoch": 0.7261066398390342, "grad_norm": 0.37162476778030396, "learning_rate": 9.398064996612032e-06, "loss": 0.3786, "step": 2887 }, { "epoch": 0.7263581488933601, "grad_norm": 0.3777274787425995, "learning_rate": 9.397368756032445e-06, "loss": 0.4001, "step": 2888 }, { "epoch": 0.7266096579476862, "grad_norm": 0.3456476032733917, "learning_rate": 9.396672138845737e-06, "loss": 0.3708, "step": 2889 }, { "epoch": 0.7268611670020121, "grad_norm": 0.41536620259284973, "learning_rate": 9.395975145111565e-06, "loss": 0.3886, "step": 2890 }, { "epoch": 0.727112676056338, "grad_norm": 0.3782283067703247, "learning_rate": 9.395277774889621e-06, "loss": 0.3711, "step": 2891 }, { "epoch": 0.727364185110664, "grad_norm": 0.31303125619888306, "learning_rate": 9.394580028239633e-06, "loss": 0.366, "step": 2892 }, { "epoch": 0.72761569416499, "grad_norm": 0.3784593641757965, "learning_rate": 9.39388190522136e-06, "loss": 0.4001, "step": 2893 }, { "epoch": 0.7278672032193159, "grad_norm": 0.43708619475364685, "learning_rate": 9.393183405894589e-06, "loss": 0.3977, "step": 2894 }, { "epoch": 0.7281187122736419, "grad_norm": 0.34523478150367737, "learning_rate": 9.39248453031914e-06, "loss": 0.4209, "step": 2895 }, { "epoch": 0.7283702213279678, "grad_norm": 0.37533217668533325, "learning_rate": 9.391785278554875e-06, "loss": 0.3937, "step": 2896 }, { "epoch": 0.7286217303822937, "grad_norm": 0.42460107803344727, "learning_rate": 9.391085650661672e-06, "loss": 0.3885, "step": 2897 }, { "epoch": 0.7288732394366197, "grad_norm": 0.3811191916465759, "learning_rate": 9.390385646699457e-06, "loss": 0.3899, "step": 2898 }, { "epoch": 0.7291247484909457, "grad_norm": 0.41132891178131104, "learning_rate": 9.389685266728175e-06, "loss": 0.3725, "step": 2899 }, { "epoch": 0.7293762575452716, "grad_norm": 0.4103216230869293, "learning_rate": 9.388984510807812e-06, "loss": 0.3838, "step": 2900 }, { "epoch": 0.7296277665995976, "grad_norm": 0.399691641330719, "learning_rate": 9.388283378998382e-06, "loss": 0.3846, "step": 2901 }, { "epoch": 0.7298792756539235, "grad_norm": 0.4203183650970459, "learning_rate": 9.387581871359936e-06, "loss": 0.3557, "step": 2902 }, { "epoch": 0.7301307847082495, "grad_norm": 0.3951526880264282, "learning_rate": 9.386879987952549e-06, "loss": 0.3983, "step": 2903 }, { "epoch": 0.7303822937625755, "grad_norm": 0.3411506414413452, "learning_rate": 9.386177728836337e-06, "loss": 0.3993, "step": 2904 }, { "epoch": 0.7306338028169014, "grad_norm": 0.3646346628665924, "learning_rate": 9.385475094071442e-06, "loss": 0.3693, "step": 2905 }, { "epoch": 0.7308853118712274, "grad_norm": 0.39517220854759216, "learning_rate": 9.384772083718042e-06, "loss": 0.3887, "step": 2906 }, { "epoch": 0.7311368209255533, "grad_norm": 0.36498531699180603, "learning_rate": 9.384068697836342e-06, "loss": 0.3819, "step": 2907 }, { "epoch": 0.7313883299798792, "grad_norm": 0.36910420656204224, "learning_rate": 9.383364936486585e-06, "loss": 0.3941, "step": 2908 }, { "epoch": 0.7316398390342053, "grad_norm": 0.3762173652648926, "learning_rate": 9.382660799729044e-06, "loss": 0.3762, "step": 2909 }, { "epoch": 0.7318913480885312, "grad_norm": 0.40564772486686707, "learning_rate": 9.381956287624024e-06, "loss": 0.3928, "step": 2910 }, { "epoch": 0.7321428571428571, "grad_norm": 0.354449063539505, "learning_rate": 9.381251400231859e-06, "loss": 0.4233, "step": 2911 }, { "epoch": 0.7323943661971831, "grad_norm": 0.3970174789428711, "learning_rate": 9.380546137612922e-06, "loss": 0.4486, "step": 2912 }, { "epoch": 0.732645875251509, "grad_norm": 0.3722879886627197, "learning_rate": 9.379840499827612e-06, "loss": 0.3981, "step": 2913 }, { "epoch": 0.7328973843058351, "grad_norm": 0.3905206024646759, "learning_rate": 9.379134486936366e-06, "loss": 0.3734, "step": 2914 }, { "epoch": 0.733148893360161, "grad_norm": 0.3671495020389557, "learning_rate": 9.378428098999645e-06, "loss": 0.3537, "step": 2915 }, { "epoch": 0.7334004024144869, "grad_norm": 0.3310393691062927, "learning_rate": 9.37772133607795e-06, "loss": 0.3933, "step": 2916 }, { "epoch": 0.7336519114688129, "grad_norm": 0.3759323060512543, "learning_rate": 9.377014198231807e-06, "loss": 0.3828, "step": 2917 }, { "epoch": 0.7339034205231388, "grad_norm": 0.36852389574050903, "learning_rate": 9.376306685521784e-06, "loss": 0.3723, "step": 2918 }, { "epoch": 0.7341549295774648, "grad_norm": 0.3421872854232788, "learning_rate": 9.375598798008468e-06, "loss": 0.3778, "step": 2919 }, { "epoch": 0.7344064386317908, "grad_norm": 0.37425142526626587, "learning_rate": 9.37489053575249e-06, "loss": 0.3804, "step": 2920 }, { "epoch": 0.7346579476861167, "grad_norm": 0.3890654444694519, "learning_rate": 9.374181898814508e-06, "loss": 0.3768, "step": 2921 }, { "epoch": 0.7349094567404426, "grad_norm": 0.35745400190353394, "learning_rate": 9.373472887255209e-06, "loss": 0.3843, "step": 2922 }, { "epoch": 0.7351609657947686, "grad_norm": 0.3883761465549469, "learning_rate": 9.372763501135319e-06, "loss": 0.3916, "step": 2923 }, { "epoch": 0.7354124748490946, "grad_norm": 0.393292635679245, "learning_rate": 9.37205374051559e-06, "loss": 0.3977, "step": 2924 }, { "epoch": 0.7356639839034205, "grad_norm": 0.34699979424476624, "learning_rate": 9.37134360545681e-06, "loss": 0.383, "step": 2925 }, { "epoch": 0.7359154929577465, "grad_norm": 0.40010884404182434, "learning_rate": 9.370633096019799e-06, "loss": 0.3855, "step": 2926 }, { "epoch": 0.7361670020120724, "grad_norm": 0.36946627497673035, "learning_rate": 9.369922212265403e-06, "loss": 0.3852, "step": 2927 }, { "epoch": 0.7364185110663984, "grad_norm": 0.4117673635482788, "learning_rate": 9.36921095425451e-06, "loss": 0.3849, "step": 2928 }, { "epoch": 0.7366700201207244, "grad_norm": 0.36381223797798157, "learning_rate": 9.368499322048031e-06, "loss": 0.3921, "step": 2929 }, { "epoch": 0.7369215291750503, "grad_norm": 0.3884536027908325, "learning_rate": 9.367787315706916e-06, "loss": 0.3812, "step": 2930 }, { "epoch": 0.7371730382293763, "grad_norm": 0.35373517870903015, "learning_rate": 9.36707493529214e-06, "loss": 0.3656, "step": 2931 }, { "epoch": 0.7374245472837022, "grad_norm": 0.3612503111362457, "learning_rate": 9.366362180864718e-06, "loss": 0.3782, "step": 2932 }, { "epoch": 0.7376760563380281, "grad_norm": 0.37395453453063965, "learning_rate": 9.36564905248569e-06, "loss": 0.389, "step": 2933 }, { "epoch": 0.7379275653923542, "grad_norm": 0.3890708088874817, "learning_rate": 9.364935550216133e-06, "loss": 0.3922, "step": 2934 }, { "epoch": 0.7381790744466801, "grad_norm": 0.38554948568344116, "learning_rate": 9.364221674117151e-06, "loss": 0.4182, "step": 2935 }, { "epoch": 0.738430583501006, "grad_norm": 0.3420090079307556, "learning_rate": 9.363507424249887e-06, "loss": 0.4097, "step": 2936 }, { "epoch": 0.738682092555332, "grad_norm": 0.3459598124027252, "learning_rate": 9.362792800675511e-06, "loss": 0.3826, "step": 2937 }, { "epoch": 0.7389336016096579, "grad_norm": 0.35301342606544495, "learning_rate": 9.362077803455223e-06, "loss": 0.3986, "step": 2938 }, { "epoch": 0.739185110663984, "grad_norm": 0.37958618998527527, "learning_rate": 9.361362432650261e-06, "loss": 0.4021, "step": 2939 }, { "epoch": 0.7394366197183099, "grad_norm": 0.362627238035202, "learning_rate": 9.360646688321891e-06, "loss": 0.3895, "step": 2940 }, { "epoch": 0.7396881287726358, "grad_norm": 0.37005671858787537, "learning_rate": 9.359930570531412e-06, "loss": 0.3918, "step": 2941 }, { "epoch": 0.7399396378269618, "grad_norm": 0.34973013401031494, "learning_rate": 9.359214079340158e-06, "loss": 0.3559, "step": 2942 }, { "epoch": 0.7401911468812877, "grad_norm": 0.4032422602176666, "learning_rate": 9.358497214809485e-06, "loss": 0.3907, "step": 2943 }, { "epoch": 0.7404426559356136, "grad_norm": 0.36071479320526123, "learning_rate": 9.357779977000796e-06, "loss": 0.4044, "step": 2944 }, { "epoch": 0.7406941649899397, "grad_norm": 0.4488549828529358, "learning_rate": 9.357062365975511e-06, "loss": 0.3727, "step": 2945 }, { "epoch": 0.7409456740442656, "grad_norm": 0.37173953652381897, "learning_rate": 9.356344381795094e-06, "loss": 0.3777, "step": 2946 }, { "epoch": 0.7411971830985915, "grad_norm": 0.3299272954463959, "learning_rate": 9.355626024521035e-06, "loss": 0.3997, "step": 2947 }, { "epoch": 0.7414486921529175, "grad_norm": 0.33907902240753174, "learning_rate": 9.354907294214853e-06, "loss": 0.3877, "step": 2948 }, { "epoch": 0.7417002012072434, "grad_norm": 0.3761814832687378, "learning_rate": 9.354188190938108e-06, "loss": 0.3763, "step": 2949 }, { "epoch": 0.7419517102615694, "grad_norm": 0.37312766909599304, "learning_rate": 9.353468714752381e-06, "loss": 0.3922, "step": 2950 }, { "epoch": 0.7422032193158954, "grad_norm": 0.3855811059474945, "learning_rate": 9.352748865719296e-06, "loss": 0.3519, "step": 2951 }, { "epoch": 0.7424547283702213, "grad_norm": 0.3510308861732483, "learning_rate": 9.352028643900502e-06, "loss": 0.3622, "step": 2952 }, { "epoch": 0.7427062374245473, "grad_norm": 0.36363258957862854, "learning_rate": 9.351308049357679e-06, "loss": 0.403, "step": 2953 }, { "epoch": 0.7429577464788732, "grad_norm": 0.35569775104522705, "learning_rate": 9.350587082152544e-06, "loss": 0.3811, "step": 2954 }, { "epoch": 0.7432092555331992, "grad_norm": 0.32615721225738525, "learning_rate": 9.349865742346842e-06, "loss": 0.3984, "step": 2955 }, { "epoch": 0.7434607645875252, "grad_norm": 0.3774353563785553, "learning_rate": 9.349144030002353e-06, "loss": 0.379, "step": 2956 }, { "epoch": 0.7437122736418511, "grad_norm": 0.350462943315506, "learning_rate": 9.348421945180885e-06, "loss": 0.3865, "step": 2957 }, { "epoch": 0.743963782696177, "grad_norm": 0.36871710419654846, "learning_rate": 9.347699487944282e-06, "loss": 0.3823, "step": 2958 }, { "epoch": 0.744215291750503, "grad_norm": 0.35802212357521057, "learning_rate": 9.346976658354417e-06, "loss": 0.3924, "step": 2959 }, { "epoch": 0.744466800804829, "grad_norm": 0.3417361080646515, "learning_rate": 9.346253456473196e-06, "loss": 0.3814, "step": 2960 }, { "epoch": 0.7447183098591549, "grad_norm": 0.38555172085762024, "learning_rate": 9.345529882362554e-06, "loss": 0.3986, "step": 2961 }, { "epoch": 0.7449698189134809, "grad_norm": 0.35595080256462097, "learning_rate": 9.344805936084466e-06, "loss": 0.3729, "step": 2962 }, { "epoch": 0.7452213279678068, "grad_norm": 0.36647212505340576, "learning_rate": 9.344081617700929e-06, "loss": 0.3817, "step": 2963 }, { "epoch": 0.7454728370221329, "grad_norm": 0.38703134655952454, "learning_rate": 9.343356927273978e-06, "loss": 0.3971, "step": 2964 }, { "epoch": 0.7457243460764588, "grad_norm": 0.3738643229007721, "learning_rate": 9.342631864865678e-06, "loss": 0.3807, "step": 2965 }, { "epoch": 0.7459758551307847, "grad_norm": 0.3684743046760559, "learning_rate": 9.341906430538129e-06, "loss": 0.3829, "step": 2966 }, { "epoch": 0.7462273641851107, "grad_norm": 0.37854307889938354, "learning_rate": 9.341180624353454e-06, "loss": 0.3737, "step": 2967 }, { "epoch": 0.7464788732394366, "grad_norm": 0.3924698829650879, "learning_rate": 9.34045444637382e-06, "loss": 0.3751, "step": 2968 }, { "epoch": 0.7467303822937625, "grad_norm": 0.3787640333175659, "learning_rate": 9.339727896661413e-06, "loss": 0.3736, "step": 2969 }, { "epoch": 0.7469818913480886, "grad_norm": 0.351458877325058, "learning_rate": 9.339000975278463e-06, "loss": 0.3986, "step": 2970 }, { "epoch": 0.7472334004024145, "grad_norm": 0.3286757469177246, "learning_rate": 9.338273682287222e-06, "loss": 0.3831, "step": 2971 }, { "epoch": 0.7474849094567404, "grad_norm": 0.3744126558303833, "learning_rate": 9.337546017749981e-06, "loss": 0.3868, "step": 2972 }, { "epoch": 0.7477364185110664, "grad_norm": 0.3369508981704712, "learning_rate": 9.33681798172906e-06, "loss": 0.3785, "step": 2973 }, { "epoch": 0.7479879275653923, "grad_norm": 0.3627220690250397, "learning_rate": 9.33608957428681e-06, "loss": 0.3718, "step": 2974 }, { "epoch": 0.7482394366197183, "grad_norm": 0.36162272095680237, "learning_rate": 9.335360795485615e-06, "loss": 0.3863, "step": 2975 }, { "epoch": 0.7484909456740443, "grad_norm": 0.3735608160495758, "learning_rate": 9.334631645387888e-06, "loss": 0.3625, "step": 2976 }, { "epoch": 0.7487424547283702, "grad_norm": 0.34616851806640625, "learning_rate": 9.33390212405608e-06, "loss": 0.3572, "step": 2977 }, { "epoch": 0.7489939637826962, "grad_norm": 0.35577431321144104, "learning_rate": 9.333172231552666e-06, "loss": 0.3677, "step": 2978 }, { "epoch": 0.7492454728370221, "grad_norm": 0.3937419652938843, "learning_rate": 9.332441967940161e-06, "loss": 0.3848, "step": 2979 }, { "epoch": 0.7494969818913481, "grad_norm": 0.3335328996181488, "learning_rate": 9.331711333281101e-06, "loss": 0.3998, "step": 2980 }, { "epoch": 0.7497484909456741, "grad_norm": 0.37114816904067993, "learning_rate": 9.330980327638068e-06, "loss": 0.3803, "step": 2981 }, { "epoch": 0.75, "grad_norm": 0.4506068229675293, "learning_rate": 9.330248951073664e-06, "loss": 0.4137, "step": 2982 }, { "epoch": 0.7502515090543259, "grad_norm": 0.3454316556453705, "learning_rate": 9.329517203650526e-06, "loss": 0.3696, "step": 2983 }, { "epoch": 0.7505030181086519, "grad_norm": 0.38480788469314575, "learning_rate": 9.328785085431326e-06, "loss": 0.3937, "step": 2984 }, { "epoch": 0.7507545271629779, "grad_norm": 0.4474145472049713, "learning_rate": 9.328052596478763e-06, "loss": 0.3948, "step": 2985 }, { "epoch": 0.7510060362173038, "grad_norm": 0.3649021089076996, "learning_rate": 9.327319736855574e-06, "loss": 0.3645, "step": 2986 }, { "epoch": 0.7512575452716298, "grad_norm": 0.37888169288635254, "learning_rate": 9.326586506624517e-06, "loss": 0.3737, "step": 2987 }, { "epoch": 0.7515090543259557, "grad_norm": 0.38091379404067993, "learning_rate": 9.325852905848396e-06, "loss": 0.421, "step": 2988 }, { "epoch": 0.7517605633802817, "grad_norm": 0.38274237513542175, "learning_rate": 9.325118934590036e-06, "loss": 0.3658, "step": 2989 }, { "epoch": 0.7520120724346077, "grad_norm": 0.38675278425216675, "learning_rate": 9.324384592912295e-06, "loss": 0.3835, "step": 2990 }, { "epoch": 0.7522635814889336, "grad_norm": 0.3240179717540741, "learning_rate": 9.323649880878069e-06, "loss": 0.3706, "step": 2991 }, { "epoch": 0.7525150905432596, "grad_norm": 0.4843953549861908, "learning_rate": 9.322914798550277e-06, "loss": 0.4072, "step": 2992 }, { "epoch": 0.7527665995975855, "grad_norm": 0.40057262778282166, "learning_rate": 9.32217934599188e-06, "loss": 0.3835, "step": 2993 }, { "epoch": 0.7530181086519114, "grad_norm": 0.3491225838661194, "learning_rate": 9.321443523265858e-06, "loss": 0.3848, "step": 2994 }, { "epoch": 0.7532696177062375, "grad_norm": 0.4448799192905426, "learning_rate": 9.320707330435235e-06, "loss": 0.3585, "step": 2995 }, { "epoch": 0.7535211267605634, "grad_norm": 0.3571915328502655, "learning_rate": 9.319970767563061e-06, "loss": 0.4124, "step": 2996 }, { "epoch": 0.7537726358148893, "grad_norm": 0.3818497657775879, "learning_rate": 9.319233834712413e-06, "loss": 0.3861, "step": 2997 }, { "epoch": 0.7540241448692153, "grad_norm": 0.4860544204711914, "learning_rate": 9.318496531946411e-06, "loss": 0.3887, "step": 2998 }, { "epoch": 0.7542756539235412, "grad_norm": 0.3781794011592865, "learning_rate": 9.317758859328194e-06, "loss": 0.3826, "step": 2999 }, { "epoch": 0.7545271629778671, "grad_norm": 0.3716043531894684, "learning_rate": 9.317020816920945e-06, "loss": 0.365, "step": 3000 }, { "epoch": 0.7547786720321932, "grad_norm": 0.39104846119880676, "learning_rate": 9.31628240478787e-06, "loss": 0.3894, "step": 3001 }, { "epoch": 0.7550301810865191, "grad_norm": 0.3662312924861908, "learning_rate": 9.31554362299221e-06, "loss": 0.3853, "step": 3002 }, { "epoch": 0.7552816901408451, "grad_norm": 0.4046803414821625, "learning_rate": 9.314804471597235e-06, "loss": 0.3735, "step": 3003 }, { "epoch": 0.755533199195171, "grad_norm": 0.36208009719848633, "learning_rate": 9.314064950666252e-06, "loss": 0.4042, "step": 3004 }, { "epoch": 0.755784708249497, "grad_norm": 0.33379390835762024, "learning_rate": 9.313325060262594e-06, "loss": 0.3717, "step": 3005 }, { "epoch": 0.756036217303823, "grad_norm": 0.3620927035808563, "learning_rate": 9.312584800449629e-06, "loss": 0.4094, "step": 3006 }, { "epoch": 0.7562877263581489, "grad_norm": 0.3750810921192169, "learning_rate": 9.311844171290755e-06, "loss": 0.3692, "step": 3007 }, { "epoch": 0.7565392354124748, "grad_norm": 0.35461145639419556, "learning_rate": 9.311103172849404e-06, "loss": 0.4094, "step": 3008 }, { "epoch": 0.7567907444668008, "grad_norm": 0.37598538398742676, "learning_rate": 9.310361805189033e-06, "loss": 0.3806, "step": 3009 }, { "epoch": 0.7570422535211268, "grad_norm": 0.4294974207878113, "learning_rate": 9.309620068373143e-06, "loss": 0.4, "step": 3010 }, { "epoch": 0.7572937625754527, "grad_norm": 0.3354276716709137, "learning_rate": 9.308877962465251e-06, "loss": 0.3908, "step": 3011 }, { "epoch": 0.7575452716297787, "grad_norm": 0.38359084725379944, "learning_rate": 9.308135487528919e-06, "loss": 0.3721, "step": 3012 }, { "epoch": 0.7577967806841046, "grad_norm": 0.37038716673851013, "learning_rate": 9.307392643627736e-06, "loss": 0.3704, "step": 3013 }, { "epoch": 0.7580482897384306, "grad_norm": 0.34647151827812195, "learning_rate": 9.306649430825318e-06, "loss": 0.3985, "step": 3014 }, { "epoch": 0.7582997987927566, "grad_norm": 0.3945516347885132, "learning_rate": 9.30590584918532e-06, "loss": 0.3832, "step": 3015 }, { "epoch": 0.7585513078470825, "grad_norm": 0.3935578763484955, "learning_rate": 9.305161898771422e-06, "loss": 0.3768, "step": 3016 }, { "epoch": 0.7588028169014085, "grad_norm": 0.37610089778900146, "learning_rate": 9.304417579647343e-06, "loss": 0.3763, "step": 3017 }, { "epoch": 0.7590543259557344, "grad_norm": 0.3979266881942749, "learning_rate": 9.303672891876825e-06, "loss": 0.4074, "step": 3018 }, { "epoch": 0.7593058350100603, "grad_norm": 0.36932116746902466, "learning_rate": 9.302927835523647e-06, "loss": 0.4137, "step": 3019 }, { "epoch": 0.7595573440643864, "grad_norm": 0.3783572018146515, "learning_rate": 9.302182410651618e-06, "loss": 0.3826, "step": 3020 }, { "epoch": 0.7598088531187123, "grad_norm": 0.34661227464675903, "learning_rate": 9.301436617324584e-06, "loss": 0.3745, "step": 3021 }, { "epoch": 0.7600603621730382, "grad_norm": 0.3569898307323456, "learning_rate": 9.30069045560641e-06, "loss": 0.3919, "step": 3022 }, { "epoch": 0.7603118712273642, "grad_norm": 0.43195685744285583, "learning_rate": 9.299943925561004e-06, "loss": 0.3983, "step": 3023 }, { "epoch": 0.7605633802816901, "grad_norm": 0.35552507638931274, "learning_rate": 9.299197027252302e-06, "loss": 0.3962, "step": 3024 }, { "epoch": 0.760814889336016, "grad_norm": 0.3416507840156555, "learning_rate": 9.29844976074427e-06, "loss": 0.3906, "step": 3025 }, { "epoch": 0.7610663983903421, "grad_norm": 0.3881553113460541, "learning_rate": 9.297702126100906e-06, "loss": 0.3863, "step": 3026 }, { "epoch": 0.761317907444668, "grad_norm": 0.3564288914203644, "learning_rate": 9.296954123386243e-06, "loss": 0.364, "step": 3027 }, { "epoch": 0.761569416498994, "grad_norm": 0.3937084674835205, "learning_rate": 9.29620575266434e-06, "loss": 0.3769, "step": 3028 }, { "epoch": 0.7618209255533199, "grad_norm": 0.3818730115890503, "learning_rate": 9.295457013999291e-06, "loss": 0.3923, "step": 3029 }, { "epoch": 0.7620724346076458, "grad_norm": 0.4041476547718048, "learning_rate": 9.294707907455223e-06, "loss": 0.3904, "step": 3030 }, { "epoch": 0.7623239436619719, "grad_norm": 0.36864545941352844, "learning_rate": 9.293958433096289e-06, "loss": 0.419, "step": 3031 }, { "epoch": 0.7625754527162978, "grad_norm": 0.37678250670433044, "learning_rate": 9.293208590986676e-06, "loss": 0.389, "step": 3032 }, { "epoch": 0.7628269617706237, "grad_norm": 0.35496804118156433, "learning_rate": 9.292458381190608e-06, "loss": 0.3996, "step": 3033 }, { "epoch": 0.7630784708249497, "grad_norm": 0.3487403392791748, "learning_rate": 9.291707803772332e-06, "loss": 0.3565, "step": 3034 }, { "epoch": 0.7633299798792756, "grad_norm": 0.3654000461101532, "learning_rate": 9.290956858796132e-06, "loss": 0.3849, "step": 3035 }, { "epoch": 0.7635814889336016, "grad_norm": 0.3704080879688263, "learning_rate": 9.29020554632632e-06, "loss": 0.37, "step": 3036 }, { "epoch": 0.7638329979879276, "grad_norm": 0.3763080835342407, "learning_rate": 9.289453866427245e-06, "loss": 0.4109, "step": 3037 }, { "epoch": 0.7640845070422535, "grad_norm": 0.38172775506973267, "learning_rate": 9.288701819163279e-06, "loss": 0.3791, "step": 3038 }, { "epoch": 0.7643360160965795, "grad_norm": 0.34394824504852295, "learning_rate": 9.287949404598833e-06, "loss": 0.3799, "step": 3039 }, { "epoch": 0.7645875251509054, "grad_norm": 0.34752610325813293, "learning_rate": 9.287196622798346e-06, "loss": 0.374, "step": 3040 }, { "epoch": 0.7648390342052314, "grad_norm": 0.34505367279052734, "learning_rate": 9.286443473826288e-06, "loss": 0.3854, "step": 3041 }, { "epoch": 0.7650905432595574, "grad_norm": 0.3505760431289673, "learning_rate": 9.285689957747163e-06, "loss": 0.3881, "step": 3042 }, { "epoch": 0.7653420523138833, "grad_norm": 0.36895132064819336, "learning_rate": 9.284936074625503e-06, "loss": 0.3623, "step": 3043 }, { "epoch": 0.7655935613682092, "grad_norm": 0.3329405188560486, "learning_rate": 9.284181824525877e-06, "loss": 0.3676, "step": 3044 }, { "epoch": 0.7658450704225352, "grad_norm": 0.3532065749168396, "learning_rate": 9.283427207512878e-06, "loss": 0.3853, "step": 3045 }, { "epoch": 0.7660965794768612, "grad_norm": 0.3908108174800873, "learning_rate": 9.282672223651137e-06, "loss": 0.3951, "step": 3046 }, { "epoch": 0.7663480885311871, "grad_norm": 0.34189653396606445, "learning_rate": 9.28191687300531e-06, "loss": 0.3953, "step": 3047 }, { "epoch": 0.7665995975855131, "grad_norm": 0.357767790555954, "learning_rate": 9.281161155640093e-06, "loss": 0.402, "step": 3048 }, { "epoch": 0.766851106639839, "grad_norm": 0.361261248588562, "learning_rate": 9.280405071620204e-06, "loss": 0.3652, "step": 3049 }, { "epoch": 0.7671026156941649, "grad_norm": 0.34182727336883545, "learning_rate": 9.2796486210104e-06, "loss": 0.3679, "step": 3050 }, { "epoch": 0.767354124748491, "grad_norm": 0.3388752341270447, "learning_rate": 9.278891803875466e-06, "loss": 0.4031, "step": 3051 }, { "epoch": 0.7676056338028169, "grad_norm": 0.3388610780239105, "learning_rate": 9.278134620280215e-06, "loss": 0.3881, "step": 3052 }, { "epoch": 0.7678571428571429, "grad_norm": 0.32106414437294006, "learning_rate": 9.277377070289498e-06, "loss": 0.3708, "step": 3053 }, { "epoch": 0.7681086519114688, "grad_norm": 0.402169406414032, "learning_rate": 9.276619153968197e-06, "loss": 0.3991, "step": 3054 }, { "epoch": 0.7683601609657947, "grad_norm": 0.32417601346969604, "learning_rate": 9.275860871381217e-06, "loss": 0.3865, "step": 3055 }, { "epoch": 0.7686116700201208, "grad_norm": 0.3850818872451782, "learning_rate": 9.275102222593503e-06, "loss": 0.3899, "step": 3056 }, { "epoch": 0.7688631790744467, "grad_norm": 0.36075320839881897, "learning_rate": 9.27434320767003e-06, "loss": 0.3824, "step": 3057 }, { "epoch": 0.7691146881287726, "grad_norm": 0.37227320671081543, "learning_rate": 9.2735838266758e-06, "loss": 0.3926, "step": 3058 }, { "epoch": 0.7693661971830986, "grad_norm": 0.395997554063797, "learning_rate": 9.272824079675854e-06, "loss": 0.3692, "step": 3059 }, { "epoch": 0.7696177062374245, "grad_norm": 0.3681703507900238, "learning_rate": 9.272063966735253e-06, "loss": 0.4038, "step": 3060 }, { "epoch": 0.7698692152917505, "grad_norm": 0.3632868230342865, "learning_rate": 9.2713034879191e-06, "loss": 0.4239, "step": 3061 }, { "epoch": 0.7701207243460765, "grad_norm": 0.38830870389938354, "learning_rate": 9.270542643292523e-06, "loss": 0.3763, "step": 3062 }, { "epoch": 0.7703722334004024, "grad_norm": 0.35282665491104126, "learning_rate": 9.269781432920688e-06, "loss": 0.3827, "step": 3063 }, { "epoch": 0.7706237424547284, "grad_norm": 0.3645845949649811, "learning_rate": 9.269019856868784e-06, "loss": 0.3984, "step": 3064 }, { "epoch": 0.7708752515090543, "grad_norm": 0.35338932275772095, "learning_rate": 9.268257915202037e-06, "loss": 0.3992, "step": 3065 }, { "epoch": 0.7711267605633803, "grad_norm": 0.3768799304962158, "learning_rate": 9.2674956079857e-06, "loss": 0.3858, "step": 3066 }, { "epoch": 0.7713782696177063, "grad_norm": 0.3796723484992981, "learning_rate": 9.26673293528506e-06, "loss": 0.3858, "step": 3067 }, { "epoch": 0.7716297786720322, "grad_norm": 0.3658340275287628, "learning_rate": 9.26596989716544e-06, "loss": 0.3844, "step": 3068 }, { "epoch": 0.7718812877263581, "grad_norm": 0.41284385323524475, "learning_rate": 9.265206493692185e-06, "loss": 0.3697, "step": 3069 }, { "epoch": 0.7721327967806841, "grad_norm": 0.4010927379131317, "learning_rate": 9.264442724930675e-06, "loss": 0.3509, "step": 3070 }, { "epoch": 0.77238430583501, "grad_norm": 0.37611401081085205, "learning_rate": 9.263678590946326e-06, "loss": 0.3877, "step": 3071 }, { "epoch": 0.772635814889336, "grad_norm": 0.4208700656890869, "learning_rate": 9.26291409180458e-06, "loss": 0.3599, "step": 3072 }, { "epoch": 0.772887323943662, "grad_norm": 0.3623879849910736, "learning_rate": 9.262149227570908e-06, "loss": 0.4031, "step": 3073 }, { "epoch": 0.7731388329979879, "grad_norm": 0.3639488220214844, "learning_rate": 9.261383998310822e-06, "loss": 0.3963, "step": 3074 }, { "epoch": 0.7733903420523138, "grad_norm": 0.4167322814464569, "learning_rate": 9.260618404089853e-06, "loss": 0.376, "step": 3075 }, { "epoch": 0.7736418511066399, "grad_norm": 0.33790600299835205, "learning_rate": 9.259852444973573e-06, "loss": 0.3803, "step": 3076 }, { "epoch": 0.7738933601609658, "grad_norm": 0.4101136028766632, "learning_rate": 9.25908612102758e-06, "loss": 0.3665, "step": 3077 }, { "epoch": 0.7741448692152918, "grad_norm": 0.3587418794631958, "learning_rate": 9.258319432317506e-06, "loss": 0.3875, "step": 3078 }, { "epoch": 0.7743963782696177, "grad_norm": 0.36111775040626526, "learning_rate": 9.257552378909013e-06, "loss": 0.3797, "step": 3079 }, { "epoch": 0.7746478873239436, "grad_norm": 0.4025454521179199, "learning_rate": 9.256784960867793e-06, "loss": 0.3968, "step": 3080 }, { "epoch": 0.7748993963782697, "grad_norm": 0.37682798504829407, "learning_rate": 9.256017178259572e-06, "loss": 0.3583, "step": 3081 }, { "epoch": 0.7751509054325956, "grad_norm": 0.40718135237693787, "learning_rate": 9.255249031150106e-06, "loss": 0.4151, "step": 3082 }, { "epoch": 0.7754024144869215, "grad_norm": 0.45130354166030884, "learning_rate": 9.25448051960518e-06, "loss": 0.3853, "step": 3083 }, { "epoch": 0.7756539235412475, "grad_norm": 0.3748539090156555, "learning_rate": 9.253711643690612e-06, "loss": 0.3902, "step": 3084 }, { "epoch": 0.7759054325955734, "grad_norm": 0.4091399610042572, "learning_rate": 9.252942403472256e-06, "loss": 0.3685, "step": 3085 }, { "epoch": 0.7761569416498993, "grad_norm": 0.5184744000434875, "learning_rate": 9.252172799015989e-06, "loss": 0.4028, "step": 3086 }, { "epoch": 0.7764084507042254, "grad_norm": 0.3658330738544464, "learning_rate": 9.251402830387721e-06, "loss": 0.398, "step": 3087 }, { "epoch": 0.7766599597585513, "grad_norm": 0.4008294641971588, "learning_rate": 9.250632497653398e-06, "loss": 0.392, "step": 3088 }, { "epoch": 0.7769114688128773, "grad_norm": 0.42256030440330505, "learning_rate": 9.249861800878995e-06, "loss": 0.3954, "step": 3089 }, { "epoch": 0.7771629778672032, "grad_norm": 0.38519713282585144, "learning_rate": 9.249090740130515e-06, "loss": 0.3897, "step": 3090 }, { "epoch": 0.7774144869215291, "grad_norm": 0.41861796379089355, "learning_rate": 9.248319315473995e-06, "loss": 0.3863, "step": 3091 }, { "epoch": 0.7776659959758552, "grad_norm": 0.41915813088417053, "learning_rate": 9.247547526975505e-06, "loss": 0.4048, "step": 3092 }, { "epoch": 0.7779175050301811, "grad_norm": 0.34398457407951355, "learning_rate": 9.246775374701139e-06, "loss": 0.3496, "step": 3093 }, { "epoch": 0.778169014084507, "grad_norm": 0.4545997679233551, "learning_rate": 9.246002858717031e-06, "loss": 0.3996, "step": 3094 }, { "epoch": 0.778420523138833, "grad_norm": 0.34864526987075806, "learning_rate": 9.245229979089341e-06, "loss": 0.3708, "step": 3095 }, { "epoch": 0.778672032193159, "grad_norm": 0.32429444789886475, "learning_rate": 9.244456735884261e-06, "loss": 0.3773, "step": 3096 }, { "epoch": 0.7789235412474849, "grad_norm": 0.3772585093975067, "learning_rate": 9.243683129168016e-06, "loss": 0.3615, "step": 3097 }, { "epoch": 0.7791750503018109, "grad_norm": 0.4083939492702484, "learning_rate": 9.242909159006858e-06, "loss": 0.4064, "step": 3098 }, { "epoch": 0.7794265593561368, "grad_norm": 0.3335086703300476, "learning_rate": 9.242134825467076e-06, "loss": 0.346, "step": 3099 }, { "epoch": 0.7796780684104627, "grad_norm": 0.3571106493473053, "learning_rate": 9.241360128614984e-06, "loss": 0.3438, "step": 3100 }, { "epoch": 0.7799295774647887, "grad_norm": 0.3634457588195801, "learning_rate": 9.24058506851693e-06, "loss": 0.3714, "step": 3101 }, { "epoch": 0.7801810865191147, "grad_norm": 0.36663031578063965, "learning_rate": 9.239809645239295e-06, "loss": 0.4045, "step": 3102 }, { "epoch": 0.7804325955734407, "grad_norm": 0.35045841336250305, "learning_rate": 9.239033858848487e-06, "loss": 0.3919, "step": 3103 }, { "epoch": 0.7806841046277666, "grad_norm": 0.4044874310493469, "learning_rate": 9.238257709410949e-06, "loss": 0.3807, "step": 3104 }, { "epoch": 0.7809356136820925, "grad_norm": 0.3510379493236542, "learning_rate": 9.237481196993152e-06, "loss": 0.3711, "step": 3105 }, { "epoch": 0.7811871227364185, "grad_norm": 0.39423373341560364, "learning_rate": 9.2367043216616e-06, "loss": 0.3912, "step": 3106 }, { "epoch": 0.7814386317907445, "grad_norm": 0.3713517189025879, "learning_rate": 9.23592708348283e-06, "loss": 0.3774, "step": 3107 }, { "epoch": 0.7816901408450704, "grad_norm": 0.3517390787601471, "learning_rate": 9.235149482523402e-06, "loss": 0.3597, "step": 3108 }, { "epoch": 0.7819416498993964, "grad_norm": 0.38510939478874207, "learning_rate": 9.234371518849918e-06, "loss": 0.3465, "step": 3109 }, { "epoch": 0.7821931589537223, "grad_norm": 0.3652108609676361, "learning_rate": 9.233593192529002e-06, "loss": 0.3745, "step": 3110 }, { "epoch": 0.7824446680080482, "grad_norm": 0.39064309000968933, "learning_rate": 9.232814503627316e-06, "loss": 0.3825, "step": 3111 }, { "epoch": 0.7826961770623743, "grad_norm": 0.34675848484039307, "learning_rate": 9.232035452211546e-06, "loss": 0.3928, "step": 3112 }, { "epoch": 0.7829476861167002, "grad_norm": 0.36757782101631165, "learning_rate": 9.231256038348418e-06, "loss": 0.3966, "step": 3113 }, { "epoch": 0.7831991951710262, "grad_norm": 0.36838653683662415, "learning_rate": 9.230476262104678e-06, "loss": 0.3835, "step": 3114 }, { "epoch": 0.7834507042253521, "grad_norm": 0.33910053968429565, "learning_rate": 9.229696123547114e-06, "loss": 0.4, "step": 3115 }, { "epoch": 0.783702213279678, "grad_norm": 0.3448108434677124, "learning_rate": 9.228915622742536e-06, "loss": 0.3819, "step": 3116 }, { "epoch": 0.7839537223340041, "grad_norm": 0.3653436303138733, "learning_rate": 9.228134759757791e-06, "loss": 0.3806, "step": 3117 }, { "epoch": 0.78420523138833, "grad_norm": 0.33516430854797363, "learning_rate": 9.227353534659758e-06, "loss": 0.3677, "step": 3118 }, { "epoch": 0.7844567404426559, "grad_norm": 0.36953532695770264, "learning_rate": 9.226571947515339e-06, "loss": 0.3824, "step": 3119 }, { "epoch": 0.7847082494969819, "grad_norm": 0.37254080176353455, "learning_rate": 9.225789998391473e-06, "loss": 0.3841, "step": 3120 }, { "epoch": 0.7849597585513078, "grad_norm": 0.34900060296058655, "learning_rate": 9.225007687355132e-06, "loss": 0.3699, "step": 3121 }, { "epoch": 0.7852112676056338, "grad_norm": 0.42499294877052307, "learning_rate": 9.224225014473312e-06, "loss": 0.377, "step": 3122 }, { "epoch": 0.7854627766599598, "grad_norm": 0.4103010296821594, "learning_rate": 9.223441979813049e-06, "loss": 0.3935, "step": 3123 }, { "epoch": 0.7857142857142857, "grad_norm": 0.34033411741256714, "learning_rate": 9.222658583441399e-06, "loss": 0.3798, "step": 3124 }, { "epoch": 0.7859657947686117, "grad_norm": 0.45133984088897705, "learning_rate": 9.221874825425461e-06, "loss": 0.3996, "step": 3125 }, { "epoch": 0.7862173038229376, "grad_norm": 0.3784712255001068, "learning_rate": 9.221090705832353e-06, "loss": 0.3937, "step": 3126 }, { "epoch": 0.7864688128772636, "grad_norm": 0.36274659633636475, "learning_rate": 9.220306224729237e-06, "loss": 0.3899, "step": 3127 }, { "epoch": 0.7867203219315896, "grad_norm": 0.36185523867607117, "learning_rate": 9.219521382183291e-06, "loss": 0.38, "step": 3128 }, { "epoch": 0.7869718309859155, "grad_norm": 0.3780527710914612, "learning_rate": 9.21873617826174e-06, "loss": 0.3743, "step": 3129 }, { "epoch": 0.7872233400402414, "grad_norm": 0.3563750088214874, "learning_rate": 9.217950613031826e-06, "loss": 0.3779, "step": 3130 }, { "epoch": 0.7874748490945674, "grad_norm": 0.33744367957115173, "learning_rate": 9.21716468656083e-06, "loss": 0.3847, "step": 3131 }, { "epoch": 0.7877263581488934, "grad_norm": 0.4224430322647095, "learning_rate": 9.216378398916059e-06, "loss": 0.4001, "step": 3132 }, { "epoch": 0.7879778672032193, "grad_norm": 0.3875563144683838, "learning_rate": 9.215591750164856e-06, "loss": 0.3813, "step": 3133 }, { "epoch": 0.7882293762575453, "grad_norm": 0.41393423080444336, "learning_rate": 9.214804740374594e-06, "loss": 0.3688, "step": 3134 }, { "epoch": 0.7884808853118712, "grad_norm": 0.39912310242652893, "learning_rate": 9.214017369612672e-06, "loss": 0.3786, "step": 3135 }, { "epoch": 0.7887323943661971, "grad_norm": 0.3480337858200073, "learning_rate": 9.213229637946526e-06, "loss": 0.3602, "step": 3136 }, { "epoch": 0.7889839034205232, "grad_norm": 0.3411673605442047, "learning_rate": 9.21244154544362e-06, "loss": 0.3987, "step": 3137 }, { "epoch": 0.7892354124748491, "grad_norm": 0.3513990342617035, "learning_rate": 9.211653092171447e-06, "loss": 0.3716, "step": 3138 }, { "epoch": 0.7894869215291751, "grad_norm": 0.3667123317718506, "learning_rate": 9.210864278197536e-06, "loss": 0.3672, "step": 3139 }, { "epoch": 0.789738430583501, "grad_norm": 0.3660683333873749, "learning_rate": 9.210075103589443e-06, "loss": 0.4099, "step": 3140 }, { "epoch": 0.7899899396378269, "grad_norm": 0.3497997224330902, "learning_rate": 9.209285568414755e-06, "loss": 0.3661, "step": 3141 }, { "epoch": 0.790241448692153, "grad_norm": 0.3466975688934326, "learning_rate": 9.20849567274109e-06, "loss": 0.3909, "step": 3142 }, { "epoch": 0.7904929577464789, "grad_norm": 0.3633776009082794, "learning_rate": 9.2077054166361e-06, "loss": 0.3701, "step": 3143 }, { "epoch": 0.7907444668008048, "grad_norm": 0.36884036660194397, "learning_rate": 9.206914800167463e-06, "loss": 0.3917, "step": 3144 }, { "epoch": 0.7909959758551308, "grad_norm": 0.34670379757881165, "learning_rate": 9.206123823402894e-06, "loss": 0.4399, "step": 3145 }, { "epoch": 0.7912474849094567, "grad_norm": 0.3583228588104248, "learning_rate": 9.205332486410133e-06, "loss": 0.4055, "step": 3146 }, { "epoch": 0.7914989939637826, "grad_norm": 0.38067370653152466, "learning_rate": 9.204540789256951e-06, "loss": 0.4284, "step": 3147 }, { "epoch": 0.7917505030181087, "grad_norm": 0.3454825282096863, "learning_rate": 9.203748732011154e-06, "loss": 0.4033, "step": 3148 }, { "epoch": 0.7920020120724346, "grad_norm": 0.3998292088508606, "learning_rate": 9.202956314740578e-06, "loss": 0.3992, "step": 3149 }, { "epoch": 0.7922535211267606, "grad_norm": 0.3713396489620209, "learning_rate": 9.202163537513088e-06, "loss": 0.3693, "step": 3150 }, { "epoch": 0.7925050301810865, "grad_norm": 0.393930047750473, "learning_rate": 9.201370400396578e-06, "loss": 0.3837, "step": 3151 }, { "epoch": 0.7927565392354124, "grad_norm": 0.388494610786438, "learning_rate": 9.200576903458978e-06, "loss": 0.3694, "step": 3152 }, { "epoch": 0.7930080482897385, "grad_norm": 0.4377411901950836, "learning_rate": 9.199783046768245e-06, "loss": 0.4008, "step": 3153 }, { "epoch": 0.7932595573440644, "grad_norm": 0.3396592438220978, "learning_rate": 9.198988830392365e-06, "loss": 0.3754, "step": 3154 }, { "epoch": 0.7935110663983903, "grad_norm": 0.3519555926322937, "learning_rate": 9.198194254399364e-06, "loss": 0.3816, "step": 3155 }, { "epoch": 0.7937625754527163, "grad_norm": 0.368377149105072, "learning_rate": 9.197399318857288e-06, "loss": 0.371, "step": 3156 }, { "epoch": 0.7940140845070423, "grad_norm": 0.3477652370929718, "learning_rate": 9.19660402383422e-06, "loss": 0.3841, "step": 3157 }, { "epoch": 0.7942655935613682, "grad_norm": 0.37322860956192017, "learning_rate": 9.19580836939827e-06, "loss": 0.3886, "step": 3158 }, { "epoch": 0.7945171026156942, "grad_norm": 0.39691033959388733, "learning_rate": 9.195012355617581e-06, "loss": 0.373, "step": 3159 }, { "epoch": 0.7947686116700201, "grad_norm": 0.355160117149353, "learning_rate": 9.194215982560328e-06, "loss": 0.3933, "step": 3160 }, { "epoch": 0.795020120724346, "grad_norm": 0.36236023902893066, "learning_rate": 9.193419250294717e-06, "loss": 0.4166, "step": 3161 }, { "epoch": 0.795271629778672, "grad_norm": 0.35770562291145325, "learning_rate": 9.192622158888979e-06, "loss": 0.3911, "step": 3162 }, { "epoch": 0.795523138832998, "grad_norm": 0.36751723289489746, "learning_rate": 9.191824708411384e-06, "loss": 0.3973, "step": 3163 }, { "epoch": 0.795774647887324, "grad_norm": 0.365925133228302, "learning_rate": 9.191026898930224e-06, "loss": 0.3763, "step": 3164 }, { "epoch": 0.7960261569416499, "grad_norm": 0.3750317394733429, "learning_rate": 9.190228730513832e-06, "loss": 0.3783, "step": 3165 }, { "epoch": 0.7962776659959758, "grad_norm": 0.343177855014801, "learning_rate": 9.189430203230562e-06, "loss": 0.3744, "step": 3166 }, { "epoch": 0.7965291750503019, "grad_norm": 0.3460996150970459, "learning_rate": 9.188631317148804e-06, "loss": 0.4047, "step": 3167 }, { "epoch": 0.7967806841046278, "grad_norm": 0.38344576954841614, "learning_rate": 9.187832072336977e-06, "loss": 0.3893, "step": 3168 }, { "epoch": 0.7970321931589537, "grad_norm": 0.36154693365097046, "learning_rate": 9.187032468863532e-06, "loss": 0.3512, "step": 3169 }, { "epoch": 0.7972837022132797, "grad_norm": 0.38037383556365967, "learning_rate": 9.186232506796952e-06, "loss": 0.382, "step": 3170 }, { "epoch": 0.7975352112676056, "grad_norm": 0.33756163716316223, "learning_rate": 9.185432186205744e-06, "loss": 0.3945, "step": 3171 }, { "epoch": 0.7977867203219315, "grad_norm": 0.394203245639801, "learning_rate": 9.184631507158456e-06, "loss": 0.3817, "step": 3172 }, { "epoch": 0.7980382293762576, "grad_norm": 0.37978580594062805, "learning_rate": 9.183830469723658e-06, "loss": 0.3803, "step": 3173 }, { "epoch": 0.7982897384305835, "grad_norm": 0.36637356877326965, "learning_rate": 9.183029073969953e-06, "loss": 0.3997, "step": 3174 }, { "epoch": 0.7985412474849095, "grad_norm": 0.40284034609794617, "learning_rate": 9.182227319965978e-06, "loss": 0.3895, "step": 3175 }, { "epoch": 0.7987927565392354, "grad_norm": 0.34037232398986816, "learning_rate": 9.181425207780396e-06, "loss": 0.3971, "step": 3176 }, { "epoch": 0.7990442655935613, "grad_norm": 0.3634487986564636, "learning_rate": 9.180622737481904e-06, "loss": 0.3883, "step": 3177 }, { "epoch": 0.7992957746478874, "grad_norm": 0.3719540238380432, "learning_rate": 9.179819909139228e-06, "loss": 0.3946, "step": 3178 }, { "epoch": 0.7995472837022133, "grad_norm": 0.34387439489364624, "learning_rate": 9.179016722821126e-06, "loss": 0.3701, "step": 3179 }, { "epoch": 0.7997987927565392, "grad_norm": 0.41140124201774597, "learning_rate": 9.178213178596386e-06, "loss": 0.3822, "step": 3180 }, { "epoch": 0.8000503018108652, "grad_norm": 0.3771357238292694, "learning_rate": 9.177409276533825e-06, "loss": 0.4017, "step": 3181 }, { "epoch": 0.8003018108651911, "grad_norm": 0.37716689705848694, "learning_rate": 9.176605016702294e-06, "loss": 0.3574, "step": 3182 }, { "epoch": 0.8005533199195171, "grad_norm": 0.357952356338501, "learning_rate": 9.175800399170673e-06, "loss": 0.3653, "step": 3183 }, { "epoch": 0.8008048289738431, "grad_norm": 0.3895781636238098, "learning_rate": 9.17499542400787e-06, "loss": 0.3879, "step": 3184 }, { "epoch": 0.801056338028169, "grad_norm": 0.37393030524253845, "learning_rate": 9.174190091282828e-06, "loss": 0.3834, "step": 3185 }, { "epoch": 0.8013078470824949, "grad_norm": 0.4014933407306671, "learning_rate": 9.173384401064519e-06, "loss": 0.3581, "step": 3186 }, { "epoch": 0.8015593561368209, "grad_norm": 0.39869409799575806, "learning_rate": 9.172578353421943e-06, "loss": 0.3896, "step": 3187 }, { "epoch": 0.8018108651911469, "grad_norm": 0.45484480261802673, "learning_rate": 9.171771948424138e-06, "loss": 0.3831, "step": 3188 }, { "epoch": 0.8020623742454729, "grad_norm": 0.4879845380783081, "learning_rate": 9.17096518614016e-06, "loss": 0.3998, "step": 3189 }, { "epoch": 0.8023138832997988, "grad_norm": 0.37346404790878296, "learning_rate": 9.17015806663911e-06, "loss": 0.3768, "step": 3190 }, { "epoch": 0.8025653923541247, "grad_norm": 0.38394322991371155, "learning_rate": 9.169350589990109e-06, "loss": 0.37, "step": 3191 }, { "epoch": 0.8028169014084507, "grad_norm": 0.42078500986099243, "learning_rate": 9.168542756262313e-06, "loss": 0.373, "step": 3192 }, { "epoch": 0.8030684104627767, "grad_norm": 0.37299197912216187, "learning_rate": 9.16773456552491e-06, "loss": 0.3727, "step": 3193 }, { "epoch": 0.8033199195171026, "grad_norm": 0.36001479625701904, "learning_rate": 9.166926017847113e-06, "loss": 0.3708, "step": 3194 }, { "epoch": 0.8035714285714286, "grad_norm": 0.4601292014122009, "learning_rate": 9.166117113298172e-06, "loss": 0.3842, "step": 3195 }, { "epoch": 0.8038229376257545, "grad_norm": 0.3911070227622986, "learning_rate": 9.165307851947362e-06, "loss": 0.3644, "step": 3196 }, { "epoch": 0.8040744466800804, "grad_norm": 0.3885655701160431, "learning_rate": 9.164498233863994e-06, "loss": 0.3674, "step": 3197 }, { "epoch": 0.8043259557344065, "grad_norm": 0.46075260639190674, "learning_rate": 9.163688259117405e-06, "loss": 0.3889, "step": 3198 }, { "epoch": 0.8045774647887324, "grad_norm": 0.4196132719516754, "learning_rate": 9.162877927776963e-06, "loss": 0.428, "step": 3199 }, { "epoch": 0.8048289738430584, "grad_norm": 0.38232311606407166, "learning_rate": 9.162067239912072e-06, "loss": 0.4044, "step": 3200 }, { "epoch": 0.8050804828973843, "grad_norm": 0.39964160323143005, "learning_rate": 9.161256195592157e-06, "loss": 0.3878, "step": 3201 }, { "epoch": 0.8053319919517102, "grad_norm": 0.3779206871986389, "learning_rate": 9.160444794886682e-06, "loss": 0.3822, "step": 3202 }, { "epoch": 0.8055835010060363, "grad_norm": 0.3651765286922455, "learning_rate": 9.15963303786514e-06, "loss": 0.3952, "step": 3203 }, { "epoch": 0.8058350100603622, "grad_norm": 0.3684598207473755, "learning_rate": 9.15882092459705e-06, "loss": 0.3693, "step": 3204 }, { "epoch": 0.8060865191146881, "grad_norm": 0.35791802406311035, "learning_rate": 9.158008455151965e-06, "loss": 0.4058, "step": 3205 }, { "epoch": 0.8063380281690141, "grad_norm": 0.3659406900405884, "learning_rate": 9.157195629599468e-06, "loss": 0.3879, "step": 3206 }, { "epoch": 0.80658953722334, "grad_norm": 0.3828873932361603, "learning_rate": 9.156382448009173e-06, "loss": 0.3876, "step": 3207 }, { "epoch": 0.806841046277666, "grad_norm": 0.33085519075393677, "learning_rate": 9.155568910450722e-06, "loss": 0.3745, "step": 3208 }, { "epoch": 0.807092555331992, "grad_norm": 0.35317882895469666, "learning_rate": 9.154755016993794e-06, "loss": 0.3587, "step": 3209 }, { "epoch": 0.8073440643863179, "grad_norm": 0.37781715393066406, "learning_rate": 9.15394076770809e-06, "loss": 0.4005, "step": 3210 }, { "epoch": 0.8075955734406438, "grad_norm": 0.33460843563079834, "learning_rate": 9.153126162663343e-06, "loss": 0.3969, "step": 3211 }, { "epoch": 0.8078470824949698, "grad_norm": 0.4079720377922058, "learning_rate": 9.152311201929326e-06, "loss": 0.3821, "step": 3212 }, { "epoch": 0.8080985915492958, "grad_norm": 0.3783186972141266, "learning_rate": 9.15149588557583e-06, "loss": 0.3985, "step": 3213 }, { "epoch": 0.8083501006036218, "grad_norm": 0.33954814076423645, "learning_rate": 9.150680213672683e-06, "loss": 0.4062, "step": 3214 }, { "epoch": 0.8086016096579477, "grad_norm": 0.4284415543079376, "learning_rate": 9.149864186289743e-06, "loss": 0.3759, "step": 3215 }, { "epoch": 0.8088531187122736, "grad_norm": 0.349122554063797, "learning_rate": 9.149047803496896e-06, "loss": 0.3752, "step": 3216 }, { "epoch": 0.8091046277665996, "grad_norm": 0.3756011426448822, "learning_rate": 9.148231065364062e-06, "loss": 0.3826, "step": 3217 }, { "epoch": 0.8093561368209256, "grad_norm": 0.42645543813705444, "learning_rate": 9.147413971961187e-06, "loss": 0.404, "step": 3218 }, { "epoch": 0.8096076458752515, "grad_norm": 0.35553669929504395, "learning_rate": 9.146596523358252e-06, "loss": 0.3921, "step": 3219 }, { "epoch": 0.8098591549295775, "grad_norm": 0.38775989413261414, "learning_rate": 9.145778719625266e-06, "loss": 0.3986, "step": 3220 }, { "epoch": 0.8101106639839034, "grad_norm": 0.3600097596645355, "learning_rate": 9.144960560832268e-06, "loss": 0.385, "step": 3221 }, { "epoch": 0.8103621730382293, "grad_norm": 0.3468279540538788, "learning_rate": 9.144142047049329e-06, "loss": 0.3786, "step": 3222 }, { "epoch": 0.8106136820925554, "grad_norm": 0.3529747426509857, "learning_rate": 9.14332317834655e-06, "loss": 0.3729, "step": 3223 }, { "epoch": 0.8108651911468813, "grad_norm": 0.31644824147224426, "learning_rate": 9.14250395479406e-06, "loss": 0.3905, "step": 3224 }, { "epoch": 0.8111167002012073, "grad_norm": 0.3824257254600525, "learning_rate": 9.141684376462024e-06, "loss": 0.389, "step": 3225 }, { "epoch": 0.8113682092555332, "grad_norm": 0.3192039132118225, "learning_rate": 9.140864443420629e-06, "loss": 0.3734, "step": 3226 }, { "epoch": 0.8116197183098591, "grad_norm": 0.3341809809207916, "learning_rate": 9.140044155740102e-06, "loss": 0.383, "step": 3227 }, { "epoch": 0.8118712273641852, "grad_norm": 0.33594390749931335, "learning_rate": 9.139223513490692e-06, "loss": 0.3706, "step": 3228 }, { "epoch": 0.8121227364185111, "grad_norm": 0.34297066926956177, "learning_rate": 9.138402516742681e-06, "loss": 0.3923, "step": 3229 }, { "epoch": 0.812374245472837, "grad_norm": 0.32909277081489563, "learning_rate": 9.137581165566388e-06, "loss": 0.3715, "step": 3230 }, { "epoch": 0.812625754527163, "grad_norm": 0.35775429010391235, "learning_rate": 9.13675946003215e-06, "loss": 0.3597, "step": 3231 }, { "epoch": 0.8128772635814889, "grad_norm": 0.3310528099536896, "learning_rate": 9.135937400210345e-06, "loss": 0.3929, "step": 3232 }, { "epoch": 0.8131287726358148, "grad_norm": 0.35094767808914185, "learning_rate": 9.135114986171373e-06, "loss": 0.4054, "step": 3233 }, { "epoch": 0.8133802816901409, "grad_norm": 0.38536086678504944, "learning_rate": 9.134292217985675e-06, "loss": 0.4061, "step": 3234 }, { "epoch": 0.8136317907444668, "grad_norm": 0.35083386301994324, "learning_rate": 9.133469095723712e-06, "loss": 0.3607, "step": 3235 }, { "epoch": 0.8138832997987927, "grad_norm": 0.34941115975379944, "learning_rate": 9.13264561945598e-06, "loss": 0.3522, "step": 3236 }, { "epoch": 0.8141348088531187, "grad_norm": 0.3351636826992035, "learning_rate": 9.131821789253003e-06, "loss": 0.3788, "step": 3237 }, { "epoch": 0.8143863179074446, "grad_norm": 0.3241204023361206, "learning_rate": 9.130997605185338e-06, "loss": 0.3929, "step": 3238 }, { "epoch": 0.8146378269617707, "grad_norm": 0.35494330525398254, "learning_rate": 9.130173067323575e-06, "loss": 0.4019, "step": 3239 }, { "epoch": 0.8148893360160966, "grad_norm": 0.3341788351535797, "learning_rate": 9.129348175738324e-06, "loss": 0.3821, "step": 3240 }, { "epoch": 0.8151408450704225, "grad_norm": 0.34449446201324463, "learning_rate": 9.128522930500237e-06, "loss": 0.3671, "step": 3241 }, { "epoch": 0.8153923541247485, "grad_norm": 0.31433650851249695, "learning_rate": 9.127697331679988e-06, "loss": 0.3803, "step": 3242 }, { "epoch": 0.8156438631790744, "grad_norm": 0.3512547016143799, "learning_rate": 9.126871379348284e-06, "loss": 0.3744, "step": 3243 }, { "epoch": 0.8158953722334004, "grad_norm": 0.3199286162853241, "learning_rate": 9.126045073575865e-06, "loss": 0.4147, "step": 3244 }, { "epoch": 0.8161468812877264, "grad_norm": 0.37143221497535706, "learning_rate": 9.125218414433498e-06, "loss": 0.3738, "step": 3245 }, { "epoch": 0.8163983903420523, "grad_norm": 0.3939398229122162, "learning_rate": 9.124391401991981e-06, "loss": 0.3829, "step": 3246 }, { "epoch": 0.8166498993963782, "grad_norm": 0.34580913186073303, "learning_rate": 9.123564036322143e-06, "loss": 0.3956, "step": 3247 }, { "epoch": 0.8169014084507042, "grad_norm": 0.32879477739334106, "learning_rate": 9.122736317494842e-06, "loss": 0.3746, "step": 3248 }, { "epoch": 0.8171529175050302, "grad_norm": 0.375439316034317, "learning_rate": 9.121908245580967e-06, "loss": 0.3988, "step": 3249 }, { "epoch": 0.8174044265593562, "grad_norm": 0.3741564452648163, "learning_rate": 9.121079820651438e-06, "loss": 0.4051, "step": 3250 }, { "epoch": 0.8176559356136821, "grad_norm": 0.3896329700946808, "learning_rate": 9.120251042777203e-06, "loss": 0.38, "step": 3251 }, { "epoch": 0.817907444668008, "grad_norm": 0.38490694761276245, "learning_rate": 9.119421912029243e-06, "loss": 0.352, "step": 3252 }, { "epoch": 0.818158953722334, "grad_norm": 0.3421596884727478, "learning_rate": 9.118592428478565e-06, "loss": 0.3867, "step": 3253 }, { "epoch": 0.81841046277666, "grad_norm": 0.36744779348373413, "learning_rate": 9.117762592196214e-06, "loss": 0.3598, "step": 3254 }, { "epoch": 0.8186619718309859, "grad_norm": 0.33967792987823486, "learning_rate": 9.116932403253257e-06, "loss": 0.4086, "step": 3255 }, { "epoch": 0.8189134808853119, "grad_norm": 0.35060232877731323, "learning_rate": 9.116101861720793e-06, "loss": 0.3724, "step": 3256 }, { "epoch": 0.8191649899396378, "grad_norm": 0.3623863458633423, "learning_rate": 9.115270967669958e-06, "loss": 0.3924, "step": 3257 }, { "epoch": 0.8194164989939637, "grad_norm": 0.3579019606113434, "learning_rate": 9.114439721171909e-06, "loss": 0.4078, "step": 3258 }, { "epoch": 0.8196680080482898, "grad_norm": 0.3488744795322418, "learning_rate": 9.113608122297836e-06, "loss": 0.4064, "step": 3259 }, { "epoch": 0.8199195171026157, "grad_norm": 0.38803979754447937, "learning_rate": 9.112776171118964e-06, "loss": 0.4151, "step": 3260 }, { "epoch": 0.8201710261569416, "grad_norm": 0.3529983162879944, "learning_rate": 9.11194386770654e-06, "loss": 0.3722, "step": 3261 }, { "epoch": 0.8204225352112676, "grad_norm": 0.39066073298454285, "learning_rate": 9.111111212131851e-06, "loss": 0.4216, "step": 3262 }, { "epoch": 0.8206740442655935, "grad_norm": 0.34698736667633057, "learning_rate": 9.110278204466203e-06, "loss": 0.3725, "step": 3263 }, { "epoch": 0.8209255533199196, "grad_norm": 0.337932288646698, "learning_rate": 9.109444844780942e-06, "loss": 0.4123, "step": 3264 }, { "epoch": 0.8211770623742455, "grad_norm": 0.3363993465900421, "learning_rate": 9.108611133147438e-06, "loss": 0.3716, "step": 3265 }, { "epoch": 0.8214285714285714, "grad_norm": 0.3734219968318939, "learning_rate": 9.107777069637094e-06, "loss": 0.3874, "step": 3266 }, { "epoch": 0.8216800804828974, "grad_norm": 0.32347917556762695, "learning_rate": 9.106942654321343e-06, "loss": 0.358, "step": 3267 }, { "epoch": 0.8219315895372233, "grad_norm": 0.3709772229194641, "learning_rate": 9.106107887271647e-06, "loss": 0.3634, "step": 3268 }, { "epoch": 0.8221830985915493, "grad_norm": 0.3945581018924713, "learning_rate": 9.105272768559496e-06, "loss": 0.379, "step": 3269 }, { "epoch": 0.8224346076458753, "grad_norm": 0.33245036005973816, "learning_rate": 9.104437298256416e-06, "loss": 0.3605, "step": 3270 }, { "epoch": 0.8226861167002012, "grad_norm": 0.37672650814056396, "learning_rate": 9.103601476433959e-06, "loss": 0.389, "step": 3271 }, { "epoch": 0.8229376257545271, "grad_norm": 0.37281307578086853, "learning_rate": 9.102765303163708e-06, "loss": 0.3872, "step": 3272 }, { "epoch": 0.8231891348088531, "grad_norm": 0.3488701581954956, "learning_rate": 9.101928778517275e-06, "loss": 0.3543, "step": 3273 }, { "epoch": 0.8234406438631791, "grad_norm": 0.33811309933662415, "learning_rate": 9.101091902566303e-06, "loss": 0.3833, "step": 3274 }, { "epoch": 0.8236921529175051, "grad_norm": 0.40357568860054016, "learning_rate": 9.100254675382467e-06, "loss": 0.3806, "step": 3275 }, { "epoch": 0.823943661971831, "grad_norm": 0.39533787965774536, "learning_rate": 9.099417097037468e-06, "loss": 0.3767, "step": 3276 }, { "epoch": 0.8241951710261569, "grad_norm": 0.38113778829574585, "learning_rate": 9.098579167603042e-06, "loss": 0.3862, "step": 3277 }, { "epoch": 0.8244466800804829, "grad_norm": 0.38164108991622925, "learning_rate": 9.09774088715095e-06, "loss": 0.3707, "step": 3278 }, { "epoch": 0.8246981891348089, "grad_norm": 0.4421541094779968, "learning_rate": 9.096902255752986e-06, "loss": 0.3845, "step": 3279 }, { "epoch": 0.8249496981891348, "grad_norm": 0.34088340401649475, "learning_rate": 9.096063273480975e-06, "loss": 0.3685, "step": 3280 }, { "epoch": 0.8252012072434608, "grad_norm": 0.44785863161087036, "learning_rate": 9.09522394040677e-06, "loss": 0.3757, "step": 3281 }, { "epoch": 0.8254527162977867, "grad_norm": 0.4285958409309387, "learning_rate": 9.094384256602252e-06, "loss": 0.3549, "step": 3282 }, { "epoch": 0.8257042253521126, "grad_norm": 0.35675740242004395, "learning_rate": 9.093544222139338e-06, "loss": 0.364, "step": 3283 }, { "epoch": 0.8259557344064387, "grad_norm": 0.3988295793533325, "learning_rate": 9.09270383708997e-06, "loss": 0.4011, "step": 3284 }, { "epoch": 0.8262072434607646, "grad_norm": 0.4526250958442688, "learning_rate": 9.091863101526124e-06, "loss": 0.3882, "step": 3285 }, { "epoch": 0.8264587525150905, "grad_norm": 0.416526734828949, "learning_rate": 9.091022015519798e-06, "loss": 0.4004, "step": 3286 }, { "epoch": 0.8267102615694165, "grad_norm": 0.3291431665420532, "learning_rate": 9.090180579143033e-06, "loss": 0.3499, "step": 3287 }, { "epoch": 0.8269617706237424, "grad_norm": 0.34957194328308105, "learning_rate": 9.08933879246789e-06, "loss": 0.36, "step": 3288 }, { "epoch": 0.8272132796780685, "grad_norm": 0.35774147510528564, "learning_rate": 9.08849665556646e-06, "loss": 0.3683, "step": 3289 }, { "epoch": 0.8274647887323944, "grad_norm": 0.34094980359077454, "learning_rate": 9.087654168510871e-06, "loss": 0.3622, "step": 3290 }, { "epoch": 0.8277162977867203, "grad_norm": 0.3316769599914551, "learning_rate": 9.086811331373273e-06, "loss": 0.3855, "step": 3291 }, { "epoch": 0.8279678068410463, "grad_norm": 0.35548505187034607, "learning_rate": 9.085968144225853e-06, "loss": 0.3918, "step": 3292 }, { "epoch": 0.8282193158953722, "grad_norm": 0.38808438181877136, "learning_rate": 9.085124607140822e-06, "loss": 0.3791, "step": 3293 }, { "epoch": 0.8284708249496981, "grad_norm": 0.37164920568466187, "learning_rate": 9.084280720190426e-06, "loss": 0.4012, "step": 3294 }, { "epoch": 0.8287223340040242, "grad_norm": 0.35741862654685974, "learning_rate": 9.083436483446937e-06, "loss": 0.3659, "step": 3295 }, { "epoch": 0.8289738430583501, "grad_norm": 0.3638781011104584, "learning_rate": 9.082591896982658e-06, "loss": 0.3967, "step": 3296 }, { "epoch": 0.829225352112676, "grad_norm": 0.42987996339797974, "learning_rate": 9.081746960869926e-06, "loss": 0.418, "step": 3297 }, { "epoch": 0.829476861167002, "grad_norm": 0.36449959874153137, "learning_rate": 9.0809016751811e-06, "loss": 0.3814, "step": 3298 }, { "epoch": 0.829728370221328, "grad_norm": 0.37069931626319885, "learning_rate": 9.080056039988576e-06, "loss": 0.3668, "step": 3299 }, { "epoch": 0.829979879275654, "grad_norm": 0.3759606182575226, "learning_rate": 9.079210055364777e-06, "loss": 0.3879, "step": 3300 }, { "epoch": 0.8302313883299799, "grad_norm": 0.3514502942562103, "learning_rate": 9.078363721382157e-06, "loss": 0.3783, "step": 3301 }, { "epoch": 0.8304828973843058, "grad_norm": 0.3794296979904175, "learning_rate": 9.077517038113197e-06, "loss": 0.3802, "step": 3302 }, { "epoch": 0.8307344064386318, "grad_norm": 0.3764783442020416, "learning_rate": 9.076670005630413e-06, "loss": 0.3802, "step": 3303 }, { "epoch": 0.8309859154929577, "grad_norm": 0.31016576290130615, "learning_rate": 9.075822624006345e-06, "loss": 0.3664, "step": 3304 }, { "epoch": 0.8312374245472837, "grad_norm": 0.3612141013145447, "learning_rate": 9.074974893313571e-06, "loss": 0.3909, "step": 3305 }, { "epoch": 0.8314889336016097, "grad_norm": 0.34043174982070923, "learning_rate": 9.074126813624687e-06, "loss": 0.3947, "step": 3306 }, { "epoch": 0.8317404426559356, "grad_norm": 0.3125540614128113, "learning_rate": 9.07327838501233e-06, "loss": 0.3821, "step": 3307 }, { "epoch": 0.8319919517102615, "grad_norm": 0.34392425417900085, "learning_rate": 9.072429607549161e-06, "loss": 0.3785, "step": 3308 }, { "epoch": 0.8322434607645876, "grad_norm": 0.3548860251903534, "learning_rate": 9.071580481307875e-06, "loss": 0.394, "step": 3309 }, { "epoch": 0.8324949698189135, "grad_norm": 0.34213536977767944, "learning_rate": 9.070731006361191e-06, "loss": 0.3685, "step": 3310 }, { "epoch": 0.8327464788732394, "grad_norm": 0.40426772832870483, "learning_rate": 9.069881182781864e-06, "loss": 0.3613, "step": 3311 }, { "epoch": 0.8329979879275654, "grad_norm": 0.34350600838661194, "learning_rate": 9.069031010642673e-06, "loss": 0.3672, "step": 3312 }, { "epoch": 0.8332494969818913, "grad_norm": 0.3403952717781067, "learning_rate": 9.068180490016432e-06, "loss": 0.3847, "step": 3313 }, { "epoch": 0.8335010060362174, "grad_norm": 0.4108610153198242, "learning_rate": 9.067329620975983e-06, "loss": 0.4024, "step": 3314 }, { "epoch": 0.8337525150905433, "grad_norm": 0.38602137565612793, "learning_rate": 9.066478403594196e-06, "loss": 0.3723, "step": 3315 }, { "epoch": 0.8340040241448692, "grad_norm": 0.36774957180023193, "learning_rate": 9.065626837943977e-06, "loss": 0.3747, "step": 3316 }, { "epoch": 0.8342555331991952, "grad_norm": 0.39249497652053833, "learning_rate": 9.06477492409825e-06, "loss": 0.3835, "step": 3317 }, { "epoch": 0.8345070422535211, "grad_norm": 0.3368522524833679, "learning_rate": 9.063922662129981e-06, "loss": 0.4076, "step": 3318 }, { "epoch": 0.834758551307847, "grad_norm": 0.37408214807510376, "learning_rate": 9.063070052112161e-06, "loss": 0.398, "step": 3319 }, { "epoch": 0.8350100603621731, "grad_norm": 0.35431167483329773, "learning_rate": 9.06221709411781e-06, "loss": 0.3663, "step": 3320 }, { "epoch": 0.835261569416499, "grad_norm": 0.3219449818134308, "learning_rate": 9.061363788219975e-06, "loss": 0.3794, "step": 3321 }, { "epoch": 0.8355130784708249, "grad_norm": 0.36822405457496643, "learning_rate": 9.060510134491742e-06, "loss": 0.3678, "step": 3322 }, { "epoch": 0.8357645875251509, "grad_norm": 0.32559409737586975, "learning_rate": 9.059656133006216e-06, "loss": 0.3461, "step": 3323 }, { "epoch": 0.8360160965794768, "grad_norm": 0.3630576431751251, "learning_rate": 9.058801783836542e-06, "loss": 0.3485, "step": 3324 }, { "epoch": 0.8362676056338029, "grad_norm": 0.3687560260295868, "learning_rate": 9.057947087055885e-06, "loss": 0.3806, "step": 3325 }, { "epoch": 0.8365191146881288, "grad_norm": 0.3402329981327057, "learning_rate": 9.057092042737447e-06, "loss": 0.3662, "step": 3326 }, { "epoch": 0.8367706237424547, "grad_norm": 0.34224575757980347, "learning_rate": 9.056236650954457e-06, "loss": 0.3971, "step": 3327 }, { "epoch": 0.8370221327967807, "grad_norm": 0.3538998067378998, "learning_rate": 9.055380911780175e-06, "loss": 0.3753, "step": 3328 }, { "epoch": 0.8372736418511066, "grad_norm": 0.3665867745876312, "learning_rate": 9.054524825287885e-06, "loss": 0.3894, "step": 3329 }, { "epoch": 0.8375251509054326, "grad_norm": 0.3492240309715271, "learning_rate": 9.053668391550912e-06, "loss": 0.385, "step": 3330 }, { "epoch": 0.8377766599597586, "grad_norm": 0.3423280417919159, "learning_rate": 9.052811610642599e-06, "loss": 0.3453, "step": 3331 }, { "epoch": 0.8380281690140845, "grad_norm": 0.4070049226284027, "learning_rate": 9.051954482636327e-06, "loss": 0.3987, "step": 3332 }, { "epoch": 0.8382796780684104, "grad_norm": 0.3565402328968048, "learning_rate": 9.051097007605501e-06, "loss": 0.3752, "step": 3333 }, { "epoch": 0.8385311871227364, "grad_norm": 0.31762558221817017, "learning_rate": 9.050239185623562e-06, "loss": 0.356, "step": 3334 }, { "epoch": 0.8387826961770624, "grad_norm": 0.36530783772468567, "learning_rate": 9.049381016763973e-06, "loss": 0.4123, "step": 3335 }, { "epoch": 0.8390342052313883, "grad_norm": 0.33472946286201477, "learning_rate": 9.048522501100233e-06, "loss": 0.3735, "step": 3336 }, { "epoch": 0.8392857142857143, "grad_norm": 0.3728182315826416, "learning_rate": 9.047663638705868e-06, "loss": 0.3716, "step": 3337 }, { "epoch": 0.8395372233400402, "grad_norm": 0.394871324300766, "learning_rate": 9.046804429654437e-06, "loss": 0.4137, "step": 3338 }, { "epoch": 0.8397887323943662, "grad_norm": 0.3549323081970215, "learning_rate": 9.045944874019522e-06, "loss": 0.4062, "step": 3339 }, { "epoch": 0.8400402414486922, "grad_norm": 0.34717488288879395, "learning_rate": 9.045084971874738e-06, "loss": 0.3685, "step": 3340 }, { "epoch": 0.8402917505030181, "grad_norm": 0.34687772393226624, "learning_rate": 9.044224723293734e-06, "loss": 0.3554, "step": 3341 }, { "epoch": 0.8405432595573441, "grad_norm": 0.4031140208244324, "learning_rate": 9.043364128350183e-06, "loss": 0.4139, "step": 3342 }, { "epoch": 0.84079476861167, "grad_norm": 0.33681127429008484, "learning_rate": 9.042503187117788e-06, "loss": 0.3659, "step": 3343 }, { "epoch": 0.8410462776659959, "grad_norm": 0.31947392225265503, "learning_rate": 9.041641899670286e-06, "loss": 0.3752, "step": 3344 }, { "epoch": 0.841297786720322, "grad_norm": 0.4157194197177887, "learning_rate": 9.04078026608144e-06, "loss": 0.4241, "step": 3345 }, { "epoch": 0.8415492957746479, "grad_norm": 0.340215802192688, "learning_rate": 9.039918286425042e-06, "loss": 0.3933, "step": 3346 }, { "epoch": 0.8418008048289738, "grad_norm": 0.3351587951183319, "learning_rate": 9.039055960774918e-06, "loss": 0.3608, "step": 3347 }, { "epoch": 0.8420523138832998, "grad_norm": 0.3640073537826538, "learning_rate": 9.038193289204919e-06, "loss": 0.3826, "step": 3348 }, { "epoch": 0.8423038229376257, "grad_norm": 0.37294724583625793, "learning_rate": 9.037330271788927e-06, "loss": 0.3644, "step": 3349 }, { "epoch": 0.8425553319919518, "grad_norm": 0.3982885777950287, "learning_rate": 9.036466908600856e-06, "loss": 0.3856, "step": 3350 }, { "epoch": 0.8428068410462777, "grad_norm": 0.3464895486831665, "learning_rate": 9.035603199714645e-06, "loss": 0.3804, "step": 3351 }, { "epoch": 0.8430583501006036, "grad_norm": 0.34546592831611633, "learning_rate": 9.034739145204266e-06, "loss": 0.3949, "step": 3352 }, { "epoch": 0.8433098591549296, "grad_norm": 0.39847803115844727, "learning_rate": 9.033874745143722e-06, "loss": 0.3775, "step": 3353 }, { "epoch": 0.8435613682092555, "grad_norm": 0.34165751934051514, "learning_rate": 9.033009999607042e-06, "loss": 0.3892, "step": 3354 }, { "epoch": 0.8438128772635815, "grad_norm": 0.35674750804901123, "learning_rate": 9.032144908668284e-06, "loss": 0.3703, "step": 3355 }, { "epoch": 0.8440643863179075, "grad_norm": 0.3607436418533325, "learning_rate": 9.031279472401542e-06, "loss": 0.402, "step": 3356 }, { "epoch": 0.8443158953722334, "grad_norm": 0.35292503237724304, "learning_rate": 9.03041369088093e-06, "loss": 0.3808, "step": 3357 }, { "epoch": 0.8445674044265593, "grad_norm": 0.35224297642707825, "learning_rate": 9.029547564180602e-06, "loss": 0.3921, "step": 3358 }, { "epoch": 0.8448189134808853, "grad_norm": 0.3472045361995697, "learning_rate": 9.028681092374733e-06, "loss": 0.4116, "step": 3359 }, { "epoch": 0.8450704225352113, "grad_norm": 0.3715015947818756, "learning_rate": 9.027814275537533e-06, "loss": 0.3823, "step": 3360 }, { "epoch": 0.8453219315895373, "grad_norm": 0.3859766721725464, "learning_rate": 9.026947113743237e-06, "loss": 0.3872, "step": 3361 }, { "epoch": 0.8455734406438632, "grad_norm": 0.36284056305885315, "learning_rate": 9.026079607066112e-06, "loss": 0.3781, "step": 3362 }, { "epoch": 0.8458249496981891, "grad_norm": 0.35753417015075684, "learning_rate": 9.025211755580458e-06, "loss": 0.3545, "step": 3363 }, { "epoch": 0.8460764587525151, "grad_norm": 0.4312700927257538, "learning_rate": 9.024343559360597e-06, "loss": 0.3925, "step": 3364 }, { "epoch": 0.846327967806841, "grad_norm": 0.3738209903240204, "learning_rate": 9.023475018480888e-06, "loss": 0.3912, "step": 3365 }, { "epoch": 0.846579476861167, "grad_norm": 0.3941510021686554, "learning_rate": 9.022606133015713e-06, "loss": 0.3617, "step": 3366 }, { "epoch": 0.846830985915493, "grad_norm": 0.4269218146800995, "learning_rate": 9.021736903039488e-06, "loss": 0.3998, "step": 3367 }, { "epoch": 0.8470824949698189, "grad_norm": 0.35374465584754944, "learning_rate": 9.020867328626659e-06, "loss": 0.3785, "step": 3368 }, { "epoch": 0.8473340040241448, "grad_norm": 0.34942927956581116, "learning_rate": 9.019997409851696e-06, "loss": 0.374, "step": 3369 }, { "epoch": 0.8475855130784709, "grad_norm": 0.4133439064025879, "learning_rate": 9.019127146789106e-06, "loss": 0.4161, "step": 3370 }, { "epoch": 0.8478370221327968, "grad_norm": 0.38321179151535034, "learning_rate": 9.018256539513417e-06, "loss": 0.3675, "step": 3371 }, { "epoch": 0.8480885311871227, "grad_norm": 0.3283655345439911, "learning_rate": 9.017385588099195e-06, "loss": 0.3838, "step": 3372 }, { "epoch": 0.8483400402414487, "grad_norm": 0.3512178063392639, "learning_rate": 9.016514292621027e-06, "loss": 0.3827, "step": 3373 }, { "epoch": 0.8485915492957746, "grad_norm": 0.4433364272117615, "learning_rate": 9.015642653153542e-06, "loss": 0.3925, "step": 3374 }, { "epoch": 0.8488430583501007, "grad_norm": 0.3722871243953705, "learning_rate": 9.014770669771383e-06, "loss": 0.364, "step": 3375 }, { "epoch": 0.8490945674044266, "grad_norm": 0.3842884600162506, "learning_rate": 9.013898342549233e-06, "loss": 0.3998, "step": 3376 }, { "epoch": 0.8493460764587525, "grad_norm": 0.39675992727279663, "learning_rate": 9.013025671561798e-06, "loss": 0.4026, "step": 3377 }, { "epoch": 0.8495975855130785, "grad_norm": 0.34844744205474854, "learning_rate": 9.012152656883824e-06, "loss": 0.3794, "step": 3378 }, { "epoch": 0.8498490945674044, "grad_norm": 0.3376064598560333, "learning_rate": 9.011279298590072e-06, "loss": 0.3641, "step": 3379 }, { "epoch": 0.8501006036217303, "grad_norm": 0.3386366665363312, "learning_rate": 9.010405596755345e-06, "loss": 0.3722, "step": 3380 }, { "epoch": 0.8503521126760564, "grad_norm": 0.3699395954608917, "learning_rate": 9.009531551454465e-06, "loss": 0.39, "step": 3381 }, { "epoch": 0.8506036217303823, "grad_norm": 0.35013461112976074, "learning_rate": 9.008657162762293e-06, "loss": 0.3577, "step": 3382 }, { "epoch": 0.8508551307847082, "grad_norm": 0.3586879074573517, "learning_rate": 9.007782430753712e-06, "loss": 0.3754, "step": 3383 }, { "epoch": 0.8511066398390342, "grad_norm": 0.3715461790561676, "learning_rate": 9.006907355503639e-06, "loss": 0.3569, "step": 3384 }, { "epoch": 0.8513581488933601, "grad_norm": 0.4095889925956726, "learning_rate": 9.006031937087018e-06, "loss": 0.3823, "step": 3385 }, { "epoch": 0.8516096579476862, "grad_norm": 0.34555843472480774, "learning_rate": 9.005156175578823e-06, "loss": 0.4092, "step": 3386 }, { "epoch": 0.8518611670020121, "grad_norm": 0.3641471266746521, "learning_rate": 9.004280071054058e-06, "loss": 0.3735, "step": 3387 }, { "epoch": 0.852112676056338, "grad_norm": 0.3723176419734955, "learning_rate": 9.003403623587757e-06, "loss": 0.3639, "step": 3388 }, { "epoch": 0.852364185110664, "grad_norm": 0.3690603971481323, "learning_rate": 9.002526833254979e-06, "loss": 0.3734, "step": 3389 }, { "epoch": 0.85261569416499, "grad_norm": 0.33589431643486023, "learning_rate": 9.001649700130816e-06, "loss": 0.3743, "step": 3390 }, { "epoch": 0.8528672032193159, "grad_norm": 0.3753211498260498, "learning_rate": 9.000772224290393e-06, "loss": 0.4133, "step": 3391 }, { "epoch": 0.8531187122736419, "grad_norm": 0.34786948561668396, "learning_rate": 8.999894405808857e-06, "loss": 0.3791, "step": 3392 }, { "epoch": 0.8533702213279678, "grad_norm": 0.3724614977836609, "learning_rate": 8.99901624476139e-06, "loss": 0.3819, "step": 3393 }, { "epoch": 0.8536217303822937, "grad_norm": 0.34503018856048584, "learning_rate": 8.998137741223196e-06, "loss": 0.3858, "step": 3394 }, { "epoch": 0.8538732394366197, "grad_norm": 0.37502291798591614, "learning_rate": 8.99725889526952e-06, "loss": 0.4026, "step": 3395 }, { "epoch": 0.8541247484909457, "grad_norm": 0.3725481927394867, "learning_rate": 8.996379706975624e-06, "loss": 0.353, "step": 3396 }, { "epoch": 0.8543762575452716, "grad_norm": 0.3500644862651825, "learning_rate": 8.995500176416809e-06, "loss": 0.3909, "step": 3397 }, { "epoch": 0.8546277665995976, "grad_norm": 0.3630251884460449, "learning_rate": 8.9946203036684e-06, "loss": 0.3797, "step": 3398 }, { "epoch": 0.8548792756539235, "grad_norm": 0.3816511034965515, "learning_rate": 8.99374008880575e-06, "loss": 0.3622, "step": 3399 }, { "epoch": 0.8551307847082495, "grad_norm": 0.37829670310020447, "learning_rate": 8.992859531904247e-06, "loss": 0.3791, "step": 3400 }, { "epoch": 0.8553822937625755, "grad_norm": 0.3598082363605499, "learning_rate": 8.991978633039305e-06, "loss": 0.3779, "step": 3401 }, { "epoch": 0.8556338028169014, "grad_norm": 0.3571262061595917, "learning_rate": 8.991097392286368e-06, "loss": 0.3634, "step": 3402 }, { "epoch": 0.8558853118712274, "grad_norm": 0.38512668013572693, "learning_rate": 8.990215809720905e-06, "loss": 0.3692, "step": 3403 }, { "epoch": 0.8561368209255533, "grad_norm": 0.35630372166633606, "learning_rate": 8.989333885418423e-06, "loss": 0.3878, "step": 3404 }, { "epoch": 0.8563883299798792, "grad_norm": 0.37692365050315857, "learning_rate": 8.988451619454449e-06, "loss": 0.3886, "step": 3405 }, { "epoch": 0.8566398390342053, "grad_norm": 0.3339127004146576, "learning_rate": 8.987569011904547e-06, "loss": 0.3821, "step": 3406 }, { "epoch": 0.8568913480885312, "grad_norm": 0.3581376075744629, "learning_rate": 8.986686062844303e-06, "loss": 0.4086, "step": 3407 }, { "epoch": 0.8571428571428571, "grad_norm": 0.39820095896720886, "learning_rate": 8.985802772349341e-06, "loss": 0.3902, "step": 3408 }, { "epoch": 0.8573943661971831, "grad_norm": 0.3543420135974884, "learning_rate": 8.984919140495305e-06, "loss": 0.3623, "step": 3409 }, { "epoch": 0.857645875251509, "grad_norm": 0.3442290723323822, "learning_rate": 8.984035167357874e-06, "loss": 0.3704, "step": 3410 }, { "epoch": 0.8578973843058351, "grad_norm": 0.3457908034324646, "learning_rate": 8.983150853012756e-06, "loss": 0.3606, "step": 3411 }, { "epoch": 0.858148893360161, "grad_norm": 0.36458566784858704, "learning_rate": 8.982266197535685e-06, "loss": 0.3841, "step": 3412 }, { "epoch": 0.8584004024144869, "grad_norm": 0.3184933364391327, "learning_rate": 8.981381201002428e-06, "loss": 0.3728, "step": 3413 }, { "epoch": 0.8586519114688129, "grad_norm": 0.3275623917579651, "learning_rate": 8.98049586348878e-06, "loss": 0.373, "step": 3414 }, { "epoch": 0.8589034205231388, "grad_norm": 0.35574376583099365, "learning_rate": 8.979610185070562e-06, "loss": 0.3598, "step": 3415 }, { "epoch": 0.8591549295774648, "grad_norm": 0.3995131850242615, "learning_rate": 8.978724165823626e-06, "loss": 0.3797, "step": 3416 }, { "epoch": 0.8594064386317908, "grad_norm": 0.37157928943634033, "learning_rate": 8.977837805823856e-06, "loss": 0.4016, "step": 3417 }, { "epoch": 0.8596579476861167, "grad_norm": 0.33222272992134094, "learning_rate": 8.976951105147167e-06, "loss": 0.3852, "step": 3418 }, { "epoch": 0.8599094567404426, "grad_norm": 0.37010085582733154, "learning_rate": 8.976064063869493e-06, "loss": 0.3865, "step": 3419 }, { "epoch": 0.8601609657947686, "grad_norm": 0.3240501582622528, "learning_rate": 8.975176682066805e-06, "loss": 0.4128, "step": 3420 }, { "epoch": 0.8604124748490946, "grad_norm": 0.33426669239997864, "learning_rate": 8.974288959815105e-06, "loss": 0.3636, "step": 3421 }, { "epoch": 0.8606639839034205, "grad_norm": 0.34247368574142456, "learning_rate": 8.973400897190418e-06, "loss": 0.3842, "step": 3422 }, { "epoch": 0.8609154929577465, "grad_norm": 0.343246191740036, "learning_rate": 8.9725124942688e-06, "loss": 0.3869, "step": 3423 }, { "epoch": 0.8611670020120724, "grad_norm": 0.3464204668998718, "learning_rate": 8.971623751126341e-06, "loss": 0.387, "step": 3424 }, { "epoch": 0.8614185110663984, "grad_norm": 0.3398580551147461, "learning_rate": 8.970734667839155e-06, "loss": 0.3775, "step": 3425 }, { "epoch": 0.8616700201207244, "grad_norm": 0.3092399835586548, "learning_rate": 8.969845244483383e-06, "loss": 0.3597, "step": 3426 }, { "epoch": 0.8619215291750503, "grad_norm": 0.37892070412635803, "learning_rate": 8.968955481135202e-06, "loss": 0.4005, "step": 3427 }, { "epoch": 0.8621730382293763, "grad_norm": 0.33688947558403015, "learning_rate": 8.968065377870814e-06, "loss": 0.3724, "step": 3428 }, { "epoch": 0.8624245472837022, "grad_norm": 0.35349977016448975, "learning_rate": 8.967174934766452e-06, "loss": 0.3535, "step": 3429 }, { "epoch": 0.8626760563380281, "grad_norm": 0.34343835711479187, "learning_rate": 8.966284151898373e-06, "loss": 0.3627, "step": 3430 }, { "epoch": 0.8629275653923542, "grad_norm": 0.34207355976104736, "learning_rate": 8.965393029342871e-06, "loss": 0.3745, "step": 3431 }, { "epoch": 0.8631790744466801, "grad_norm": 0.3593146502971649, "learning_rate": 8.964501567176263e-06, "loss": 0.3759, "step": 3432 }, { "epoch": 0.863430583501006, "grad_norm": 0.37325525283813477, "learning_rate": 8.963609765474897e-06, "loss": 0.3783, "step": 3433 }, { "epoch": 0.863682092555332, "grad_norm": 0.375765860080719, "learning_rate": 8.96271762431515e-06, "loss": 0.3963, "step": 3434 }, { "epoch": 0.8639336016096579, "grad_norm": 0.3585033714771271, "learning_rate": 8.96182514377343e-06, "loss": 0.3527, "step": 3435 }, { "epoch": 0.864185110663984, "grad_norm": 0.42358458042144775, "learning_rate": 8.960932323926172e-06, "loss": 0.3856, "step": 3436 }, { "epoch": 0.8644366197183099, "grad_norm": 0.374779611825943, "learning_rate": 8.96003916484984e-06, "loss": 0.3643, "step": 3437 }, { "epoch": 0.8646881287726358, "grad_norm": 0.3398546874523163, "learning_rate": 8.959145666620928e-06, "loss": 0.368, "step": 3438 }, { "epoch": 0.8649396378269618, "grad_norm": 0.36778420209884644, "learning_rate": 8.958251829315957e-06, "loss": 0.3526, "step": 3439 }, { "epoch": 0.8651911468812877, "grad_norm": 0.34406542778015137, "learning_rate": 8.957357653011481e-06, "loss": 0.3614, "step": 3440 }, { "epoch": 0.8654426559356136, "grad_norm": 0.3503788411617279, "learning_rate": 8.956463137784077e-06, "loss": 0.3757, "step": 3441 }, { "epoch": 0.8656941649899397, "grad_norm": 0.3319445550441742, "learning_rate": 8.955568283710359e-06, "loss": 0.3972, "step": 3442 }, { "epoch": 0.8659456740442656, "grad_norm": 0.39669501781463623, "learning_rate": 8.95467309086696e-06, "loss": 0.3801, "step": 3443 }, { "epoch": 0.8661971830985915, "grad_norm": 0.3425920605659485, "learning_rate": 8.953777559330554e-06, "loss": 0.3563, "step": 3444 }, { "epoch": 0.8664486921529175, "grad_norm": 0.335559606552124, "learning_rate": 8.952881689177834e-06, "loss": 0.3907, "step": 3445 }, { "epoch": 0.8667002012072434, "grad_norm": 0.3417853116989136, "learning_rate": 8.951985480485528e-06, "loss": 0.3652, "step": 3446 }, { "epoch": 0.8669517102615694, "grad_norm": 0.3930087983608246, "learning_rate": 8.951088933330387e-06, "loss": 0.3849, "step": 3447 }, { "epoch": 0.8672032193158954, "grad_norm": 0.3572435677051544, "learning_rate": 8.950192047789198e-06, "loss": 0.368, "step": 3448 }, { "epoch": 0.8674547283702213, "grad_norm": 0.3223349153995514, "learning_rate": 8.949294823938773e-06, "loss": 0.4082, "step": 3449 }, { "epoch": 0.8677062374245473, "grad_norm": 0.40850120782852173, "learning_rate": 8.94839726185595e-06, "loss": 0.3569, "step": 3450 }, { "epoch": 0.8679577464788732, "grad_norm": 0.3549629747867584, "learning_rate": 8.947499361617606e-06, "loss": 0.3895, "step": 3451 }, { "epoch": 0.8682092555331992, "grad_norm": 0.333429753780365, "learning_rate": 8.946601123300636e-06, "loss": 0.3784, "step": 3452 }, { "epoch": 0.8684607645875252, "grad_norm": 0.34425967931747437, "learning_rate": 8.94570254698197e-06, "loss": 0.3644, "step": 3453 }, { "epoch": 0.8687122736418511, "grad_norm": 0.4264945089817047, "learning_rate": 8.944803632738563e-06, "loss": 0.3787, "step": 3454 }, { "epoch": 0.868963782696177, "grad_norm": 0.35965225100517273, "learning_rate": 8.943904380647406e-06, "loss": 0.3696, "step": 3455 }, { "epoch": 0.869215291750503, "grad_norm": 0.39319103956222534, "learning_rate": 8.94300479078551e-06, "loss": 0.3785, "step": 3456 }, { "epoch": 0.869466800804829, "grad_norm": 0.3534758985042572, "learning_rate": 8.942104863229923e-06, "loss": 0.3616, "step": 3457 }, { "epoch": 0.8697183098591549, "grad_norm": 0.3524431586265564, "learning_rate": 8.941204598057715e-06, "loss": 0.3972, "step": 3458 }, { "epoch": 0.8699698189134809, "grad_norm": 0.36104217171669006, "learning_rate": 8.940303995345988e-06, "loss": 0.37, "step": 3459 }, { "epoch": 0.8702213279678068, "grad_norm": 0.39354491233825684, "learning_rate": 8.939403055171877e-06, "loss": 0.3858, "step": 3460 }, { "epoch": 0.8704728370221329, "grad_norm": 0.3502269983291626, "learning_rate": 8.938501777612538e-06, "loss": 0.3702, "step": 3461 }, { "epoch": 0.8707243460764588, "grad_norm": 0.3592958450317383, "learning_rate": 8.937600162745159e-06, "loss": 0.3917, "step": 3462 }, { "epoch": 0.8709758551307847, "grad_norm": 0.3925688862800598, "learning_rate": 8.936698210646962e-06, "loss": 0.3878, "step": 3463 }, { "epoch": 0.8712273641851107, "grad_norm": 0.35481512546539307, "learning_rate": 8.93579592139519e-06, "loss": 0.3642, "step": 3464 }, { "epoch": 0.8714788732394366, "grad_norm": 0.35621413588523865, "learning_rate": 8.93489329506712e-06, "loss": 0.408, "step": 3465 }, { "epoch": 0.8717303822937625, "grad_norm": 0.3342260420322418, "learning_rate": 8.933990331740056e-06, "loss": 0.3867, "step": 3466 }, { "epoch": 0.8719818913480886, "grad_norm": 0.3696461617946625, "learning_rate": 8.933087031491332e-06, "loss": 0.3788, "step": 3467 }, { "epoch": 0.8722334004024145, "grad_norm": 0.38092076778411865, "learning_rate": 8.932183394398309e-06, "loss": 0.375, "step": 3468 }, { "epoch": 0.8724849094567404, "grad_norm": 0.34763234853744507, "learning_rate": 8.931279420538377e-06, "loss": 0.3722, "step": 3469 }, { "epoch": 0.8727364185110664, "grad_norm": 0.3381394147872925, "learning_rate": 8.930375109988956e-06, "loss": 0.4076, "step": 3470 }, { "epoch": 0.8729879275653923, "grad_norm": 0.34451600909233093, "learning_rate": 8.929470462827496e-06, "loss": 0.3787, "step": 3471 }, { "epoch": 0.8732394366197183, "grad_norm": 0.33672693371772766, "learning_rate": 8.928565479131473e-06, "loss": 0.3889, "step": 3472 }, { "epoch": 0.8734909456740443, "grad_norm": 0.3693379759788513, "learning_rate": 8.927660158978392e-06, "loss": 0.3873, "step": 3473 }, { "epoch": 0.8737424547283702, "grad_norm": 0.32799556851387024, "learning_rate": 8.926754502445794e-06, "loss": 0.3619, "step": 3474 }, { "epoch": 0.8739939637826962, "grad_norm": 0.36521029472351074, "learning_rate": 8.925848509611237e-06, "loss": 0.3807, "step": 3475 }, { "epoch": 0.8742454728370221, "grad_norm": 0.3440057933330536, "learning_rate": 8.924942180552315e-06, "loss": 0.3797, "step": 3476 }, { "epoch": 0.8744969818913481, "grad_norm": 0.35602283477783203, "learning_rate": 8.924035515346648e-06, "loss": 0.3678, "step": 3477 }, { "epoch": 0.8747484909456741, "grad_norm": 0.34389829635620117, "learning_rate": 8.923128514071888e-06, "loss": 0.3683, "step": 3478 }, { "epoch": 0.875, "grad_norm": 0.37036365270614624, "learning_rate": 8.922221176805715e-06, "loss": 0.3992, "step": 3479 }, { "epoch": 0.8752515090543259, "grad_norm": 0.3461644649505615, "learning_rate": 8.921313503625835e-06, "loss": 0.3997, "step": 3480 }, { "epoch": 0.8755030181086519, "grad_norm": 0.35094374418258667, "learning_rate": 8.920405494609986e-06, "loss": 0.3968, "step": 3481 }, { "epoch": 0.8757545271629779, "grad_norm": 0.35670819878578186, "learning_rate": 8.919497149835932e-06, "loss": 0.3609, "step": 3482 }, { "epoch": 0.8760060362173038, "grad_norm": 0.37578085064888, "learning_rate": 8.918588469381468e-06, "loss": 0.3756, "step": 3483 }, { "epoch": 0.8762575452716298, "grad_norm": 0.35081595182418823, "learning_rate": 8.917679453324415e-06, "loss": 0.379, "step": 3484 }, { "epoch": 0.8765090543259557, "grad_norm": 0.3629753291606903, "learning_rate": 8.916770101742627e-06, "loss": 0.4066, "step": 3485 }, { "epoch": 0.8767605633802817, "grad_norm": 0.3397653102874756, "learning_rate": 8.915860414713981e-06, "loss": 0.4097, "step": 3486 }, { "epoch": 0.8770120724346077, "grad_norm": 0.3421812951564789, "learning_rate": 8.91495039231639e-06, "loss": 0.4011, "step": 3487 }, { "epoch": 0.8772635814889336, "grad_norm": 0.39413323998451233, "learning_rate": 8.914040034627788e-06, "loss": 0.4002, "step": 3488 }, { "epoch": 0.8775150905432596, "grad_norm": 0.37600401043891907, "learning_rate": 8.913129341726144e-06, "loss": 0.3507, "step": 3489 }, { "epoch": 0.8777665995975855, "grad_norm": 0.3659670352935791, "learning_rate": 8.912218313689453e-06, "loss": 0.3735, "step": 3490 }, { "epoch": 0.8780181086519114, "grad_norm": 0.3923790156841278, "learning_rate": 8.911306950595737e-06, "loss": 0.358, "step": 3491 }, { "epoch": 0.8782696177062375, "grad_norm": 0.37082934379577637, "learning_rate": 8.910395252523053e-06, "loss": 0.392, "step": 3492 }, { "epoch": 0.8785211267605634, "grad_norm": 0.4046441912651062, "learning_rate": 8.909483219549475e-06, "loss": 0.3697, "step": 3493 }, { "epoch": 0.8787726358148893, "grad_norm": 0.3740319311618805, "learning_rate": 8.908570851753117e-06, "loss": 0.3507, "step": 3494 }, { "epoch": 0.8790241448692153, "grad_norm": 0.368447870016098, "learning_rate": 8.907658149212119e-06, "loss": 0.3536, "step": 3495 }, { "epoch": 0.8792756539235412, "grad_norm": 0.3541277348995209, "learning_rate": 8.906745112004646e-06, "loss": 0.3628, "step": 3496 }, { "epoch": 0.8795271629778671, "grad_norm": 0.36320897936820984, "learning_rate": 8.905831740208896e-06, "loss": 0.3541, "step": 3497 }, { "epoch": 0.8797786720321932, "grad_norm": 0.3422798216342926, "learning_rate": 8.904918033903091e-06, "loss": 0.364, "step": 3498 }, { "epoch": 0.8800301810865191, "grad_norm": 0.35075971484184265, "learning_rate": 8.904003993165487e-06, "loss": 0.3883, "step": 3499 }, { "epoch": 0.8802816901408451, "grad_norm": 0.3443789780139923, "learning_rate": 8.903089618074362e-06, "loss": 0.3693, "step": 3500 }, { "epoch": 0.880533199195171, "grad_norm": 0.3593481481075287, "learning_rate": 8.902174908708032e-06, "loss": 0.3749, "step": 3501 }, { "epoch": 0.880784708249497, "grad_norm": 0.33803337812423706, "learning_rate": 8.901259865144831e-06, "loss": 0.3927, "step": 3502 }, { "epoch": 0.881036217303823, "grad_norm": 0.4012623131275177, "learning_rate": 8.900344487463128e-06, "loss": 0.393, "step": 3503 }, { "epoch": 0.8812877263581489, "grad_norm": 0.35286056995391846, "learning_rate": 8.899428775741321e-06, "loss": 0.377, "step": 3504 }, { "epoch": 0.8815392354124748, "grad_norm": 0.3573596775531769, "learning_rate": 8.898512730057835e-06, "loss": 0.3808, "step": 3505 }, { "epoch": 0.8817907444668008, "grad_norm": 0.33580759167671204, "learning_rate": 8.897596350491122e-06, "loss": 0.3601, "step": 3506 }, { "epoch": 0.8820422535211268, "grad_norm": 0.32967039942741394, "learning_rate": 8.896679637119665e-06, "loss": 0.3586, "step": 3507 }, { "epoch": 0.8822937625754527, "grad_norm": 0.3663707971572876, "learning_rate": 8.895762590021973e-06, "loss": 0.3816, "step": 3508 }, { "epoch": 0.8825452716297787, "grad_norm": 0.33888426423072815, "learning_rate": 8.89484520927659e-06, "loss": 0.3904, "step": 3509 }, { "epoch": 0.8827967806841046, "grad_norm": 0.334952175617218, "learning_rate": 8.893927494962078e-06, "loss": 0.3941, "step": 3510 }, { "epoch": 0.8830482897384306, "grad_norm": 0.32887038588523865, "learning_rate": 8.893009447157039e-06, "loss": 0.3737, "step": 3511 }, { "epoch": 0.8832997987927566, "grad_norm": 0.32995015382766724, "learning_rate": 8.892091065940093e-06, "loss": 0.3787, "step": 3512 }, { "epoch": 0.8835513078470825, "grad_norm": 0.33268022537231445, "learning_rate": 8.891172351389898e-06, "loss": 0.3939, "step": 3513 }, { "epoch": 0.8838028169014085, "grad_norm": 0.34591397643089294, "learning_rate": 8.890253303585133e-06, "loss": 0.3809, "step": 3514 }, { "epoch": 0.8840543259557344, "grad_norm": 0.357835054397583, "learning_rate": 8.88933392260451e-06, "loss": 0.3565, "step": 3515 }, { "epoch": 0.8843058350100603, "grad_norm": 0.3453388214111328, "learning_rate": 8.888414208526768e-06, "loss": 0.3891, "step": 3516 }, { "epoch": 0.8845573440643864, "grad_norm": 0.35793086886405945, "learning_rate": 8.887494161430676e-06, "loss": 0.3938, "step": 3517 }, { "epoch": 0.8848088531187123, "grad_norm": 0.32767254114151, "learning_rate": 8.886573781395028e-06, "loss": 0.3872, "step": 3518 }, { "epoch": 0.8850603621730382, "grad_norm": 0.3634639084339142, "learning_rate": 8.88565306849865e-06, "loss": 0.4025, "step": 3519 }, { "epoch": 0.8853118712273642, "grad_norm": 0.34274405241012573, "learning_rate": 8.884732022820396e-06, "loss": 0.4144, "step": 3520 }, { "epoch": 0.8855633802816901, "grad_norm": 0.3598005771636963, "learning_rate": 8.883810644439146e-06, "loss": 0.3667, "step": 3521 }, { "epoch": 0.885814889336016, "grad_norm": 0.3647115230560303, "learning_rate": 8.882888933433813e-06, "loss": 0.3797, "step": 3522 }, { "epoch": 0.8860663983903421, "grad_norm": 0.3279248774051666, "learning_rate": 8.881966889883334e-06, "loss": 0.3718, "step": 3523 }, { "epoch": 0.886317907444668, "grad_norm": 0.39648541808128357, "learning_rate": 8.881044513866675e-06, "loss": 0.3783, "step": 3524 }, { "epoch": 0.886569416498994, "grad_norm": 0.3662944734096527, "learning_rate": 8.880121805462834e-06, "loss": 0.3796, "step": 3525 }, { "epoch": 0.8868209255533199, "grad_norm": 0.3248366415500641, "learning_rate": 8.879198764750834e-06, "loss": 0.3804, "step": 3526 }, { "epoch": 0.8870724346076458, "grad_norm": 0.3559374511241913, "learning_rate": 8.878275391809727e-06, "loss": 0.3701, "step": 3527 }, { "epoch": 0.8873239436619719, "grad_norm": 0.37350761890411377, "learning_rate": 8.877351686718596e-06, "loss": 0.3769, "step": 3528 }, { "epoch": 0.8875754527162978, "grad_norm": 0.37377414107322693, "learning_rate": 8.876427649556549e-06, "loss": 0.4021, "step": 3529 }, { "epoch": 0.8878269617706237, "grad_norm": 0.3348924219608307, "learning_rate": 8.875503280402727e-06, "loss": 0.3881, "step": 3530 }, { "epoch": 0.8880784708249497, "grad_norm": 0.33580613136291504, "learning_rate": 8.874578579336293e-06, "loss": 0.3735, "step": 3531 }, { "epoch": 0.8883299798792756, "grad_norm": 0.35645267367362976, "learning_rate": 8.873653546436442e-06, "loss": 0.3988, "step": 3532 }, { "epoch": 0.8885814889336016, "grad_norm": 0.33889126777648926, "learning_rate": 8.872728181782399e-06, "loss": 0.3856, "step": 3533 }, { "epoch": 0.8888329979879276, "grad_norm": 0.329007089138031, "learning_rate": 8.871802485453414e-06, "loss": 0.37, "step": 3534 }, { "epoch": 0.8890845070422535, "grad_norm": 0.31005194783210754, "learning_rate": 8.87087645752877e-06, "loss": 0.3784, "step": 3535 }, { "epoch": 0.8893360160965795, "grad_norm": 0.3521806299686432, "learning_rate": 8.86995009808777e-06, "loss": 0.3909, "step": 3536 }, { "epoch": 0.8895875251509054, "grad_norm": 0.3143180310726166, "learning_rate": 8.869023407209758e-06, "loss": 0.4024, "step": 3537 }, { "epoch": 0.8898390342052314, "grad_norm": 0.32495957612991333, "learning_rate": 8.868096384974094e-06, "loss": 0.3827, "step": 3538 }, { "epoch": 0.8900905432595574, "grad_norm": 0.30812323093414307, "learning_rate": 8.867169031460175e-06, "loss": 0.362, "step": 3539 }, { "epoch": 0.8903420523138833, "grad_norm": 0.3465098440647125, "learning_rate": 8.86624134674742e-06, "loss": 0.3744, "step": 3540 }, { "epoch": 0.8905935613682092, "grad_norm": 0.3428845703601837, "learning_rate": 8.86531333091528e-06, "loss": 0.3918, "step": 3541 }, { "epoch": 0.8908450704225352, "grad_norm": 0.35687991976737976, "learning_rate": 8.864384984043234e-06, "loss": 0.3882, "step": 3542 }, { "epoch": 0.8910965794768612, "grad_norm": 0.37335455417633057, "learning_rate": 8.863456306210793e-06, "loss": 0.3702, "step": 3543 }, { "epoch": 0.8913480885311871, "grad_norm": 0.3432762622833252, "learning_rate": 8.862527297497488e-06, "loss": 0.3811, "step": 3544 }, { "epoch": 0.8915995975855131, "grad_norm": 0.3809764087200165, "learning_rate": 8.861597957982881e-06, "loss": 0.3995, "step": 3545 }, { "epoch": 0.891851106639839, "grad_norm": 0.3548218309879303, "learning_rate": 8.86066828774657e-06, "loss": 0.3965, "step": 3546 }, { "epoch": 0.8921026156941649, "grad_norm": 0.3315393924713135, "learning_rate": 8.859738286868172e-06, "loss": 0.374, "step": 3547 }, { "epoch": 0.892354124748491, "grad_norm": 0.35290321707725525, "learning_rate": 8.858807955427335e-06, "loss": 0.3589, "step": 3548 }, { "epoch": 0.8926056338028169, "grad_norm": 0.368217796087265, "learning_rate": 8.857877293503739e-06, "loss": 0.3713, "step": 3549 }, { "epoch": 0.8928571428571429, "grad_norm": 0.3560136556625366, "learning_rate": 8.856946301177085e-06, "loss": 0.3697, "step": 3550 }, { "epoch": 0.8931086519114688, "grad_norm": 0.3632422089576721, "learning_rate": 8.85601497852711e-06, "loss": 0.3798, "step": 3551 }, { "epoch": 0.8933601609657947, "grad_norm": 0.3566484749317169, "learning_rate": 8.855083325633578e-06, "loss": 0.3761, "step": 3552 }, { "epoch": 0.8936116700201208, "grad_norm": 0.36059099435806274, "learning_rate": 8.854151342576274e-06, "loss": 0.3683, "step": 3553 }, { "epoch": 0.8938631790744467, "grad_norm": 0.36407434940338135, "learning_rate": 8.853219029435019e-06, "loss": 0.3793, "step": 3554 }, { "epoch": 0.8941146881287726, "grad_norm": 0.36072081327438354, "learning_rate": 8.852286386289662e-06, "loss": 0.3949, "step": 3555 }, { "epoch": 0.8943661971830986, "grad_norm": 0.3494257628917694, "learning_rate": 8.851353413220073e-06, "loss": 0.3772, "step": 3556 }, { "epoch": 0.8946177062374245, "grad_norm": 0.3464342951774597, "learning_rate": 8.850420110306159e-06, "loss": 0.4028, "step": 3557 }, { "epoch": 0.8948692152917505, "grad_norm": 0.3632872402667999, "learning_rate": 8.849486477627851e-06, "loss": 0.378, "step": 3558 }, { "epoch": 0.8951207243460765, "grad_norm": 0.3627418875694275, "learning_rate": 8.848552515265108e-06, "loss": 0.3887, "step": 3559 }, { "epoch": 0.8953722334004024, "grad_norm": 0.3480847477912903, "learning_rate": 8.84761822329792e-06, "loss": 0.3831, "step": 3560 }, { "epoch": 0.8956237424547284, "grad_norm": 0.3408527374267578, "learning_rate": 8.8466836018063e-06, "loss": 0.3716, "step": 3561 }, { "epoch": 0.8958752515090543, "grad_norm": 0.35451194643974304, "learning_rate": 8.845748650870297e-06, "loss": 0.3733, "step": 3562 }, { "epoch": 0.8961267605633803, "grad_norm": 0.3594439625740051, "learning_rate": 8.844813370569978e-06, "loss": 0.3822, "step": 3563 }, { "epoch": 0.8963782696177063, "grad_norm": 0.33832788467407227, "learning_rate": 8.843877760985447e-06, "loss": 0.3858, "step": 3564 }, { "epoch": 0.8966297786720322, "grad_norm": 0.33233413100242615, "learning_rate": 8.842941822196835e-06, "loss": 0.3995, "step": 3565 }, { "epoch": 0.8968812877263581, "grad_norm": 0.3875477612018585, "learning_rate": 8.842005554284296e-06, "loss": 0.3922, "step": 3566 }, { "epoch": 0.8971327967806841, "grad_norm": 0.32244715094566345, "learning_rate": 8.841068957328018e-06, "loss": 0.345, "step": 3567 }, { "epoch": 0.89738430583501, "grad_norm": 0.36826932430267334, "learning_rate": 8.84013203140821e-06, "loss": 0.3665, "step": 3568 }, { "epoch": 0.897635814889336, "grad_norm": 0.3594595789909363, "learning_rate": 8.839194776605121e-06, "loss": 0.3917, "step": 3569 }, { "epoch": 0.897887323943662, "grad_norm": 0.31721267104148865, "learning_rate": 8.838257192999016e-06, "loss": 0.3672, "step": 3570 }, { "epoch": 0.8981388329979879, "grad_norm": 0.37041252851486206, "learning_rate": 8.837319280670196e-06, "loss": 0.3852, "step": 3571 }, { "epoch": 0.8983903420523138, "grad_norm": 0.3686506152153015, "learning_rate": 8.836381039698983e-06, "loss": 0.3628, "step": 3572 }, { "epoch": 0.8986418511066399, "grad_norm": 0.3326662480831146, "learning_rate": 8.835442470165736e-06, "loss": 0.3954, "step": 3573 }, { "epoch": 0.8988933601609658, "grad_norm": 0.35208654403686523, "learning_rate": 8.834503572150835e-06, "loss": 0.3983, "step": 3574 }, { "epoch": 0.8991448692152918, "grad_norm": 0.34213465452194214, "learning_rate": 8.833564345734693e-06, "loss": 0.3492, "step": 3575 }, { "epoch": 0.8993963782696177, "grad_norm": 0.3441331684589386, "learning_rate": 8.832624790997747e-06, "loss": 0.3914, "step": 3576 }, { "epoch": 0.8996478873239436, "grad_norm": 0.33155056834220886, "learning_rate": 8.831684908020463e-06, "loss": 0.4002, "step": 3577 }, { "epoch": 0.8998993963782697, "grad_norm": 0.36803004145622253, "learning_rate": 8.83074469688334e-06, "loss": 0.3868, "step": 3578 }, { "epoch": 0.9001509054325956, "grad_norm": 0.35357141494750977, "learning_rate": 8.829804157666896e-06, "loss": 0.3788, "step": 3579 }, { "epoch": 0.9004024144869215, "grad_norm": 0.3572876751422882, "learning_rate": 8.828863290451689e-06, "loss": 0.414, "step": 3580 }, { "epoch": 0.9006539235412475, "grad_norm": 0.3837227523326874, "learning_rate": 8.82792209531829e-06, "loss": 0.3958, "step": 3581 }, { "epoch": 0.9009054325955734, "grad_norm": 0.3402830958366394, "learning_rate": 8.826980572347314e-06, "loss": 0.3599, "step": 3582 }, { "epoch": 0.9011569416498993, "grad_norm": 0.41772645711898804, "learning_rate": 8.826038721619393e-06, "loss": 0.3844, "step": 3583 }, { "epoch": 0.9014084507042254, "grad_norm": 0.36479735374450684, "learning_rate": 8.82509654321519e-06, "loss": 0.3924, "step": 3584 }, { "epoch": 0.9016599597585513, "grad_norm": 0.3720943033695221, "learning_rate": 8.824154037215399e-06, "loss": 0.3861, "step": 3585 }, { "epoch": 0.9019114688128773, "grad_norm": 0.3853396475315094, "learning_rate": 8.823211203700738e-06, "loss": 0.4013, "step": 3586 }, { "epoch": 0.9021629778672032, "grad_norm": 0.3603013753890991, "learning_rate": 8.822268042751956e-06, "loss": 0.379, "step": 3587 }, { "epoch": 0.9024144869215291, "grad_norm": 0.3663939833641052, "learning_rate": 8.821324554449826e-06, "loss": 0.3767, "step": 3588 }, { "epoch": 0.9026659959758552, "grad_norm": 0.3621923327445984, "learning_rate": 8.820380738875156e-06, "loss": 0.3731, "step": 3589 }, { "epoch": 0.9029175050301811, "grad_norm": 0.333791583776474, "learning_rate": 8.819436596108775e-06, "loss": 0.384, "step": 3590 }, { "epoch": 0.903169014084507, "grad_norm": 0.36841997504234314, "learning_rate": 8.818492126231545e-06, "loss": 0.3656, "step": 3591 }, { "epoch": 0.903420523138833, "grad_norm": 0.35295945405960083, "learning_rate": 8.817547329324352e-06, "loss": 0.3935, "step": 3592 }, { "epoch": 0.903672032193159, "grad_norm": 0.3341405987739563, "learning_rate": 8.816602205468113e-06, "loss": 0.3899, "step": 3593 }, { "epoch": 0.9039235412474849, "grad_norm": 0.32092586159706116, "learning_rate": 8.815656754743772e-06, "loss": 0.3715, "step": 3594 }, { "epoch": 0.9041750503018109, "grad_norm": 0.3715393841266632, "learning_rate": 8.814710977232299e-06, "loss": 0.3753, "step": 3595 }, { "epoch": 0.9044265593561368, "grad_norm": 0.35690295696258545, "learning_rate": 8.813764873014697e-06, "loss": 0.3926, "step": 3596 }, { "epoch": 0.9046780684104627, "grad_norm": 0.3435974419116974, "learning_rate": 8.812818442171994e-06, "loss": 0.3801, "step": 3597 }, { "epoch": 0.9049295774647887, "grad_norm": 0.3631943166255951, "learning_rate": 8.811871684785242e-06, "loss": 0.3926, "step": 3598 }, { "epoch": 0.9051810865191147, "grad_norm": 0.3382608890533447, "learning_rate": 8.810924600935527e-06, "loss": 0.3677, "step": 3599 }, { "epoch": 0.9054325955734407, "grad_norm": 0.3581875264644623, "learning_rate": 8.809977190703961e-06, "loss": 0.367, "step": 3600 }, { "epoch": 0.9056841046277666, "grad_norm": 0.3534909784793854, "learning_rate": 8.809029454171684e-06, "loss": 0.424, "step": 3601 }, { "epoch": 0.9059356136820925, "grad_norm": 0.3113289475440979, "learning_rate": 8.808081391419865e-06, "loss": 0.3781, "step": 3602 }, { "epoch": 0.9061871227364185, "grad_norm": 0.39388152956962585, "learning_rate": 8.807133002529697e-06, "loss": 0.4189, "step": 3603 }, { "epoch": 0.9064386317907445, "grad_norm": 0.3407596945762634, "learning_rate": 8.806184287582404e-06, "loss": 0.3558, "step": 3604 }, { "epoch": 0.9066901408450704, "grad_norm": 0.38331282138824463, "learning_rate": 8.80523524665924e-06, "loss": 0.3851, "step": 3605 }, { "epoch": 0.9069416498993964, "grad_norm": 0.3627070188522339, "learning_rate": 8.804285879841481e-06, "loss": 0.3898, "step": 3606 }, { "epoch": 0.9071931589537223, "grad_norm": 0.31576594710350037, "learning_rate": 8.803336187210437e-06, "loss": 0.3956, "step": 3607 }, { "epoch": 0.9074446680080482, "grad_norm": 0.38593655824661255, "learning_rate": 8.802386168847442e-06, "loss": 0.3779, "step": 3608 }, { "epoch": 0.9076961770623743, "grad_norm": 0.36420145630836487, "learning_rate": 8.80143582483386e-06, "loss": 0.3473, "step": 3609 }, { "epoch": 0.9079476861167002, "grad_norm": 0.3500615656375885, "learning_rate": 8.800485155251079e-06, "loss": 0.3624, "step": 3610 }, { "epoch": 0.9081991951710262, "grad_norm": 0.3793070614337921, "learning_rate": 8.799534160180521e-06, "loss": 0.3848, "step": 3611 }, { "epoch": 0.9084507042253521, "grad_norm": 0.34537017345428467, "learning_rate": 8.798582839703634e-06, "loss": 0.3851, "step": 3612 }, { "epoch": 0.908702213279678, "grad_norm": 0.3755810260772705, "learning_rate": 8.797631193901888e-06, "loss": 0.3578, "step": 3613 }, { "epoch": 0.9089537223340041, "grad_norm": 0.36311477422714233, "learning_rate": 8.79667922285679e-06, "loss": 0.4059, "step": 3614 }, { "epoch": 0.90920523138833, "grad_norm": 0.3528873920440674, "learning_rate": 8.795726926649867e-06, "loss": 0.3788, "step": 3615 }, { "epoch": 0.9094567404426559, "grad_norm": 0.3925495445728302, "learning_rate": 8.794774305362679e-06, "loss": 0.3891, "step": 3616 }, { "epoch": 0.9097082494969819, "grad_norm": 0.3574492931365967, "learning_rate": 8.793821359076814e-06, "loss": 0.4097, "step": 3617 }, { "epoch": 0.9099597585513078, "grad_norm": 0.3412817716598511, "learning_rate": 8.79286808787388e-06, "loss": 0.3801, "step": 3618 }, { "epoch": 0.9102112676056338, "grad_norm": 0.38012850284576416, "learning_rate": 8.791914491835525e-06, "loss": 0.3801, "step": 3619 }, { "epoch": 0.9104627766599598, "grad_norm": 0.3861580789089203, "learning_rate": 8.790960571043416e-06, "loss": 0.4157, "step": 3620 }, { "epoch": 0.9107142857142857, "grad_norm": 0.36277568340301514, "learning_rate": 8.79000632557925e-06, "loss": 0.3985, "step": 3621 }, { "epoch": 0.9109657947686117, "grad_norm": 0.39433610439300537, "learning_rate": 8.789051755524752e-06, "loss": 0.4042, "step": 3622 }, { "epoch": 0.9112173038229376, "grad_norm": 0.34599095582962036, "learning_rate": 8.788096860961674e-06, "loss": 0.3646, "step": 3623 }, { "epoch": 0.9114688128772636, "grad_norm": 0.35807907581329346, "learning_rate": 8.7871416419718e-06, "loss": 0.3742, "step": 3624 }, { "epoch": 0.9117203219315896, "grad_norm": 0.4068434536457062, "learning_rate": 8.786186098636935e-06, "loss": 0.3949, "step": 3625 }, { "epoch": 0.9119718309859155, "grad_norm": 0.34174662828445435, "learning_rate": 8.78523023103892e-06, "loss": 0.3641, "step": 3626 }, { "epoch": 0.9122233400402414, "grad_norm": 0.35758674144744873, "learning_rate": 8.784274039259613e-06, "loss": 0.3822, "step": 3627 }, { "epoch": 0.9124748490945674, "grad_norm": 0.3813161849975586, "learning_rate": 8.78331752338091e-06, "loss": 0.398, "step": 3628 }, { "epoch": 0.9127263581488934, "grad_norm": 0.37230536341667175, "learning_rate": 8.78236068348473e-06, "loss": 0.3727, "step": 3629 }, { "epoch": 0.9129778672032193, "grad_norm": 0.3861142098903656, "learning_rate": 8.781403519653018e-06, "loss": 0.3595, "step": 3630 }, { "epoch": 0.9132293762575453, "grad_norm": 0.35886576771736145, "learning_rate": 8.780446031967753e-06, "loss": 0.3457, "step": 3631 }, { "epoch": 0.9134808853118712, "grad_norm": 0.4137284755706787, "learning_rate": 8.779488220510935e-06, "loss": 0.393, "step": 3632 }, { "epoch": 0.9137323943661971, "grad_norm": 0.3399023115634918, "learning_rate": 8.778530085364595e-06, "loss": 0.3529, "step": 3633 }, { "epoch": 0.9139839034205232, "grad_norm": 0.35265326499938965, "learning_rate": 8.777571626610793e-06, "loss": 0.3613, "step": 3634 }, { "epoch": 0.9142354124748491, "grad_norm": 0.38770735263824463, "learning_rate": 8.776612844331611e-06, "loss": 0.3949, "step": 3635 }, { "epoch": 0.9144869215291751, "grad_norm": 0.33681389689445496, "learning_rate": 8.775653738609167e-06, "loss": 0.3873, "step": 3636 }, { "epoch": 0.914738430583501, "grad_norm": 0.36732831597328186, "learning_rate": 8.7746943095256e-06, "loss": 0.3771, "step": 3637 }, { "epoch": 0.9149899396378269, "grad_norm": 0.35177841782569885, "learning_rate": 8.77373455716308e-06, "loss": 0.3578, "step": 3638 }, { "epoch": 0.915241448692153, "grad_norm": 0.342661589384079, "learning_rate": 8.772774481603805e-06, "loss": 0.3895, "step": 3639 }, { "epoch": 0.9154929577464789, "grad_norm": 0.3410513699054718, "learning_rate": 8.771814082929997e-06, "loss": 0.369, "step": 3640 }, { "epoch": 0.9157444668008048, "grad_norm": 0.3464511036872864, "learning_rate": 8.77085336122391e-06, "loss": 0.3705, "step": 3641 }, { "epoch": 0.9159959758551308, "grad_norm": 0.35824257135391235, "learning_rate": 8.76989231656782e-06, "loss": 0.3974, "step": 3642 }, { "epoch": 0.9162474849094567, "grad_norm": 0.3505759835243225, "learning_rate": 8.768930949044041e-06, "loss": 0.3966, "step": 3643 }, { "epoch": 0.9164989939637826, "grad_norm": 0.347253680229187, "learning_rate": 8.767969258734903e-06, "loss": 0.4126, "step": 3644 }, { "epoch": 0.9167505030181087, "grad_norm": 0.36433228850364685, "learning_rate": 8.767007245722769e-06, "loss": 0.3819, "step": 3645 }, { "epoch": 0.9170020120724346, "grad_norm": 0.34045785665512085, "learning_rate": 8.766044910090033e-06, "loss": 0.3647, "step": 3646 }, { "epoch": 0.9172535211267606, "grad_norm": 0.38475969433784485, "learning_rate": 8.76508225191911e-06, "loss": 0.4221, "step": 3647 }, { "epoch": 0.9175050301810865, "grad_norm": 0.3489360809326172, "learning_rate": 8.764119271292446e-06, "loss": 0.3675, "step": 3648 }, { "epoch": 0.9177565392354124, "grad_norm": 0.33245396614074707, "learning_rate": 8.763155968292517e-06, "loss": 0.3854, "step": 3649 }, { "epoch": 0.9180080482897385, "grad_norm": 0.3598446547985077, "learning_rate": 8.762192343001818e-06, "loss": 0.3766, "step": 3650 }, { "epoch": 0.9182595573440644, "grad_norm": 0.3599037528038025, "learning_rate": 8.761228395502883e-06, "loss": 0.3813, "step": 3651 }, { "epoch": 0.9185110663983903, "grad_norm": 0.3316107392311096, "learning_rate": 8.760264125878266e-06, "loss": 0.383, "step": 3652 }, { "epoch": 0.9187625754527163, "grad_norm": 0.3711565136909485, "learning_rate": 8.75929953421055e-06, "loss": 0.3962, "step": 3653 }, { "epoch": 0.9190140845070423, "grad_norm": 0.34968239068984985, "learning_rate": 8.758334620582346e-06, "loss": 0.3993, "step": 3654 }, { "epoch": 0.9192655935613682, "grad_norm": 0.327274888753891, "learning_rate": 8.757369385076296e-06, "loss": 0.4015, "step": 3655 }, { "epoch": 0.9195171026156942, "grad_norm": 0.38763725757598877, "learning_rate": 8.756403827775063e-06, "loss": 0.3636, "step": 3656 }, { "epoch": 0.9197686116700201, "grad_norm": 0.3284519612789154, "learning_rate": 8.755437948761344e-06, "loss": 0.3446, "step": 3657 }, { "epoch": 0.920020120724346, "grad_norm": 0.34053710103034973, "learning_rate": 8.754471748117857e-06, "loss": 0.3627, "step": 3658 }, { "epoch": 0.920271629778672, "grad_norm": 0.3225838243961334, "learning_rate": 8.753505225927352e-06, "loss": 0.3911, "step": 3659 }, { "epoch": 0.920523138832998, "grad_norm": 0.36908066272735596, "learning_rate": 8.752538382272608e-06, "loss": 0.3626, "step": 3660 }, { "epoch": 0.920774647887324, "grad_norm": 0.3289790749549866, "learning_rate": 8.751571217236426e-06, "loss": 0.4104, "step": 3661 }, { "epoch": 0.9210261569416499, "grad_norm": 0.35275307297706604, "learning_rate": 8.75060373090164e-06, "loss": 0.3679, "step": 3662 }, { "epoch": 0.9212776659959758, "grad_norm": 0.381730318069458, "learning_rate": 8.749635923351108e-06, "loss": 0.3927, "step": 3663 }, { "epoch": 0.9215291750503019, "grad_norm": 0.36701148748397827, "learning_rate": 8.748667794667715e-06, "loss": 0.3881, "step": 3664 }, { "epoch": 0.9217806841046278, "grad_norm": 0.3680243492126465, "learning_rate": 8.74769934493438e-06, "loss": 0.3805, "step": 3665 }, { "epoch": 0.9220321931589537, "grad_norm": 0.344265878200531, "learning_rate": 8.74673057423404e-06, "loss": 0.3879, "step": 3666 }, { "epoch": 0.9222837022132797, "grad_norm": 0.3326249122619629, "learning_rate": 8.745761482649667e-06, "loss": 0.3726, "step": 3667 }, { "epoch": 0.9225352112676056, "grad_norm": 0.3622082769870758, "learning_rate": 8.744792070264254e-06, "loss": 0.3893, "step": 3668 }, { "epoch": 0.9227867203219315, "grad_norm": 0.4040569067001343, "learning_rate": 8.743822337160829e-06, "loss": 0.3559, "step": 3669 }, { "epoch": 0.9230382293762576, "grad_norm": 0.34916210174560547, "learning_rate": 8.742852283422443e-06, "loss": 0.3637, "step": 3670 }, { "epoch": 0.9232897384305835, "grad_norm": 0.34636667370796204, "learning_rate": 8.741881909132171e-06, "loss": 0.3768, "step": 3671 }, { "epoch": 0.9235412474849095, "grad_norm": 0.344542533159256, "learning_rate": 8.740911214373125e-06, "loss": 0.3727, "step": 3672 }, { "epoch": 0.9237927565392354, "grad_norm": 0.37723875045776367, "learning_rate": 8.739940199228436e-06, "loss": 0.3802, "step": 3673 }, { "epoch": 0.9240442655935613, "grad_norm": 0.3496474027633667, "learning_rate": 8.738968863781267e-06, "loss": 0.3946, "step": 3674 }, { "epoch": 0.9242957746478874, "grad_norm": 0.31747496128082275, "learning_rate": 8.737997208114806e-06, "loss": 0.3364, "step": 3675 }, { "epoch": 0.9245472837022133, "grad_norm": 0.36128780245780945, "learning_rate": 8.737025232312267e-06, "loss": 0.3646, "step": 3676 }, { "epoch": 0.9247987927565392, "grad_norm": 0.3360286056995392, "learning_rate": 8.736052936456897e-06, "loss": 0.3604, "step": 3677 }, { "epoch": 0.9250503018108652, "grad_norm": 0.37256118655204773, "learning_rate": 8.735080320631966e-06, "loss": 0.3742, "step": 3678 }, { "epoch": 0.9253018108651911, "grad_norm": 0.3662985861301422, "learning_rate": 8.734107384920771e-06, "loss": 0.4107, "step": 3679 }, { "epoch": 0.9255533199195171, "grad_norm": 0.37539082765579224, "learning_rate": 8.733134129406638e-06, "loss": 0.3769, "step": 3680 }, { "epoch": 0.9258048289738431, "grad_norm": 0.379936546087265, "learning_rate": 8.732160554172923e-06, "loss": 0.3548, "step": 3681 }, { "epoch": 0.926056338028169, "grad_norm": 0.3667657673358917, "learning_rate": 8.731186659303004e-06, "loss": 0.3804, "step": 3682 }, { "epoch": 0.9263078470824949, "grad_norm": 0.374489963054657, "learning_rate": 8.73021244488029e-06, "loss": 0.3722, "step": 3683 }, { "epoch": 0.9265593561368209, "grad_norm": 0.37897610664367676, "learning_rate": 8.729237910988218e-06, "loss": 0.4052, "step": 3684 }, { "epoch": 0.9268108651911469, "grad_norm": 0.332736611366272, "learning_rate": 8.728263057710247e-06, "loss": 0.3906, "step": 3685 }, { "epoch": 0.9270623742454729, "grad_norm": 0.37332481145858765, "learning_rate": 8.72728788512987e-06, "loss": 0.3862, "step": 3686 }, { "epoch": 0.9273138832997988, "grad_norm": 0.33331403136253357, "learning_rate": 8.726312393330602e-06, "loss": 0.3965, "step": 3687 }, { "epoch": 0.9275653923541247, "grad_norm": 0.3460955321788788, "learning_rate": 8.72533658239599e-06, "loss": 0.3946, "step": 3688 }, { "epoch": 0.9278169014084507, "grad_norm": 0.3659730553627014, "learning_rate": 8.724360452409606e-06, "loss": 0.3875, "step": 3689 }, { "epoch": 0.9280684104627767, "grad_norm": 0.33877822756767273, "learning_rate": 8.723384003455049e-06, "loss": 0.3761, "step": 3690 }, { "epoch": 0.9283199195171026, "grad_norm": 0.34681734442710876, "learning_rate": 8.722407235615944e-06, "loss": 0.3584, "step": 3691 }, { "epoch": 0.9285714285714286, "grad_norm": 0.3557383716106415, "learning_rate": 8.721430148975946e-06, "loss": 0.39, "step": 3692 }, { "epoch": 0.9288229376257545, "grad_norm": 0.3659672141075134, "learning_rate": 8.72045274361874e-06, "loss": 0.3935, "step": 3693 }, { "epoch": 0.9290744466800804, "grad_norm": 0.3567541241645813, "learning_rate": 8.71947501962803e-06, "loss": 0.3621, "step": 3694 }, { "epoch": 0.9293259557344065, "grad_norm": 0.3142370581626892, "learning_rate": 8.718496977087554e-06, "loss": 0.3623, "step": 3695 }, { "epoch": 0.9295774647887324, "grad_norm": 0.3656524121761322, "learning_rate": 8.717518616081073e-06, "loss": 0.3803, "step": 3696 }, { "epoch": 0.9298289738430584, "grad_norm": 0.3680150508880615, "learning_rate": 8.716539936692381e-06, "loss": 0.3862, "step": 3697 }, { "epoch": 0.9300804828973843, "grad_norm": 0.32054194808006287, "learning_rate": 8.715560939005293e-06, "loss": 0.401, "step": 3698 }, { "epoch": 0.9303319919517102, "grad_norm": 0.3696306049823761, "learning_rate": 8.714581623103654e-06, "loss": 0.3615, "step": 3699 }, { "epoch": 0.9305835010060363, "grad_norm": 0.3776056170463562, "learning_rate": 8.713601989071342e-06, "loss": 0.3761, "step": 3700 }, { "epoch": 0.9308350100603622, "grad_norm": 0.3336465656757355, "learning_rate": 8.712622036992248e-06, "loss": 0.3806, "step": 3701 }, { "epoch": 0.9310865191146881, "grad_norm": 0.3547345995903015, "learning_rate": 8.711641766950302e-06, "loss": 0.3758, "step": 3702 }, { "epoch": 0.9313380281690141, "grad_norm": 0.3893287181854248, "learning_rate": 8.710661179029461e-06, "loss": 0.3892, "step": 3703 }, { "epoch": 0.93158953722334, "grad_norm": 0.36249130964279175, "learning_rate": 8.709680273313703e-06, "loss": 0.3726, "step": 3704 }, { "epoch": 0.931841046277666, "grad_norm": 0.35625818371772766, "learning_rate": 8.708699049887038e-06, "loss": 0.3782, "step": 3705 }, { "epoch": 0.932092555331992, "grad_norm": 0.3533684313297272, "learning_rate": 8.707717508833499e-06, "loss": 0.4038, "step": 3706 }, { "epoch": 0.9323440643863179, "grad_norm": 0.3416036367416382, "learning_rate": 8.706735650237153e-06, "loss": 0.3865, "step": 3707 }, { "epoch": 0.9325955734406438, "grad_norm": 0.3354741036891937, "learning_rate": 8.705753474182085e-06, "loss": 0.3464, "step": 3708 }, { "epoch": 0.9328470824949698, "grad_norm": 0.37271448969841003, "learning_rate": 8.704770980752417e-06, "loss": 0.3944, "step": 3709 }, { "epoch": 0.9330985915492958, "grad_norm": 0.345333993434906, "learning_rate": 8.703788170032293e-06, "loss": 0.3661, "step": 3710 }, { "epoch": 0.9333501006036218, "grad_norm": 0.39637884497642517, "learning_rate": 8.70280504210588e-06, "loss": 0.3813, "step": 3711 }, { "epoch": 0.9336016096579477, "grad_norm": 0.35547831654548645, "learning_rate": 8.701821597057381e-06, "loss": 0.4037, "step": 3712 }, { "epoch": 0.9338531187122736, "grad_norm": 0.3298320472240448, "learning_rate": 8.700837834971021e-06, "loss": 0.3871, "step": 3713 }, { "epoch": 0.9341046277665996, "grad_norm": 0.3607611358165741, "learning_rate": 8.699853755931053e-06, "loss": 0.3879, "step": 3714 }, { "epoch": 0.9343561368209256, "grad_norm": 0.3281857967376709, "learning_rate": 8.698869360021755e-06, "loss": 0.3806, "step": 3715 }, { "epoch": 0.9346076458752515, "grad_norm": 0.3486366271972656, "learning_rate": 8.697884647327438e-06, "loss": 0.3984, "step": 3716 }, { "epoch": 0.9348591549295775, "grad_norm": 0.32354462146759033, "learning_rate": 8.696899617932436e-06, "loss": 0.3889, "step": 3717 }, { "epoch": 0.9351106639839034, "grad_norm": 0.3479534387588501, "learning_rate": 8.69591427192111e-06, "loss": 0.3921, "step": 3718 }, { "epoch": 0.9353621730382293, "grad_norm": 0.3232344090938568, "learning_rate": 8.694928609377844e-06, "loss": 0.3755, "step": 3719 }, { "epoch": 0.9356136820925554, "grad_norm": 0.35283714532852173, "learning_rate": 8.69394263038706e-06, "loss": 0.3604, "step": 3720 }, { "epoch": 0.9358651911468813, "grad_norm": 0.35240834951400757, "learning_rate": 8.6929563350332e-06, "loss": 0.3754, "step": 3721 }, { "epoch": 0.9361167002012073, "grad_norm": 0.34090203046798706, "learning_rate": 8.691969723400732e-06, "loss": 0.4053, "step": 3722 }, { "epoch": 0.9363682092555332, "grad_norm": 0.389652281999588, "learning_rate": 8.690982795574155e-06, "loss": 0.3918, "step": 3723 }, { "epoch": 0.9366197183098591, "grad_norm": 0.3855276107788086, "learning_rate": 8.689995551637992e-06, "loss": 0.3933, "step": 3724 }, { "epoch": 0.9368712273641852, "grad_norm": 0.3753201365470886, "learning_rate": 8.689007991676795e-06, "loss": 0.37, "step": 3725 }, { "epoch": 0.9371227364185111, "grad_norm": 0.34677380323410034, "learning_rate": 8.68802011577514e-06, "loss": 0.3988, "step": 3726 }, { "epoch": 0.937374245472837, "grad_norm": 0.3636135458946228, "learning_rate": 8.687031924017635e-06, "loss": 0.3825, "step": 3727 }, { "epoch": 0.937625754527163, "grad_norm": 0.3638603389263153, "learning_rate": 8.686043416488913e-06, "loss": 0.3959, "step": 3728 }, { "epoch": 0.9378772635814889, "grad_norm": 0.40067726373672485, "learning_rate": 8.685054593273631e-06, "loss": 0.3546, "step": 3729 }, { "epoch": 0.9381287726358148, "grad_norm": 0.34523940086364746, "learning_rate": 8.684065454456478e-06, "loss": 0.3723, "step": 3730 }, { "epoch": 0.9383802816901409, "grad_norm": 0.3261178135871887, "learning_rate": 8.683076000122165e-06, "loss": 0.3744, "step": 3731 }, { "epoch": 0.9386317907444668, "grad_norm": 0.4006332457065582, "learning_rate": 8.682086230355432e-06, "loss": 0.357, "step": 3732 }, { "epoch": 0.9388832997987927, "grad_norm": 0.33518972992897034, "learning_rate": 8.681096145241053e-06, "loss": 0.3654, "step": 3733 }, { "epoch": 0.9391348088531187, "grad_norm": 0.3528534770011902, "learning_rate": 8.680105744863817e-06, "loss": 0.3879, "step": 3734 }, { "epoch": 0.9393863179074446, "grad_norm": 0.3312220573425293, "learning_rate": 8.679115029308543e-06, "loss": 0.3973, "step": 3735 }, { "epoch": 0.9396378269617707, "grad_norm": 0.38453420996665955, "learning_rate": 8.678123998660087e-06, "loss": 0.3724, "step": 3736 }, { "epoch": 0.9398893360160966, "grad_norm": 0.3209126889705658, "learning_rate": 8.677132653003318e-06, "loss": 0.3678, "step": 3737 }, { "epoch": 0.9401408450704225, "grad_norm": 0.39340800046920776, "learning_rate": 8.676140992423143e-06, "loss": 0.3451, "step": 3738 }, { "epoch": 0.9403923541247485, "grad_norm": 0.38696199655532837, "learning_rate": 8.67514901700449e-06, "loss": 0.4004, "step": 3739 }, { "epoch": 0.9406438631790744, "grad_norm": 0.35889217257499695, "learning_rate": 8.674156726832315e-06, "loss": 0.3793, "step": 3740 }, { "epoch": 0.9408953722334004, "grad_norm": 0.403751015663147, "learning_rate": 8.673164121991601e-06, "loss": 0.4051, "step": 3741 }, { "epoch": 0.9411468812877264, "grad_norm": 0.43993040919303894, "learning_rate": 8.672171202567359e-06, "loss": 0.3685, "step": 3742 }, { "epoch": 0.9413983903420523, "grad_norm": 0.3885231614112854, "learning_rate": 8.671177968644628e-06, "loss": 0.3809, "step": 3743 }, { "epoch": 0.9416498993963782, "grad_norm": 0.3906707167625427, "learning_rate": 8.67018442030847e-06, "loss": 0.3877, "step": 3744 }, { "epoch": 0.9419014084507042, "grad_norm": 0.35688477754592896, "learning_rate": 8.669190557643977e-06, "loss": 0.3578, "step": 3745 }, { "epoch": 0.9421529175050302, "grad_norm": 0.37239667773246765, "learning_rate": 8.668196380736267e-06, "loss": 0.3765, "step": 3746 }, { "epoch": 0.9424044265593562, "grad_norm": 0.38432252407073975, "learning_rate": 8.667201889670485e-06, "loss": 0.375, "step": 3747 }, { "epoch": 0.9426559356136821, "grad_norm": 0.36130064725875854, "learning_rate": 8.666207084531804e-06, "loss": 0.3808, "step": 3748 }, { "epoch": 0.942907444668008, "grad_norm": 0.34920212626457214, "learning_rate": 8.665211965405422e-06, "loss": 0.3584, "step": 3749 }, { "epoch": 0.943158953722334, "grad_norm": 0.34862497448921204, "learning_rate": 8.664216532376563e-06, "loss": 0.3585, "step": 3750 }, { "epoch": 0.94341046277666, "grad_norm": 0.35228458046913147, "learning_rate": 8.663220785530485e-06, "loss": 0.3725, "step": 3751 }, { "epoch": 0.9436619718309859, "grad_norm": 0.3779107928276062, "learning_rate": 8.662224724952459e-06, "loss": 0.3687, "step": 3752 }, { "epoch": 0.9439134808853119, "grad_norm": 0.36739468574523926, "learning_rate": 8.661228350727798e-06, "loss": 0.3429, "step": 3753 }, { "epoch": 0.9441649899396378, "grad_norm": 0.381713330745697, "learning_rate": 8.660231662941834e-06, "loss": 0.3805, "step": 3754 }, { "epoch": 0.9444164989939637, "grad_norm": 0.35245978832244873, "learning_rate": 8.659234661679926e-06, "loss": 0.3795, "step": 3755 }, { "epoch": 0.9446680080482898, "grad_norm": 0.3956359028816223, "learning_rate": 8.658237347027461e-06, "loss": 0.3569, "step": 3756 }, { "epoch": 0.9449195171026157, "grad_norm": 0.3203680217266083, "learning_rate": 8.657239719069854e-06, "loss": 0.386, "step": 3757 }, { "epoch": 0.9451710261569416, "grad_norm": 0.3383410573005676, "learning_rate": 8.656241777892544e-06, "loss": 0.3816, "step": 3758 }, { "epoch": 0.9454225352112676, "grad_norm": 0.3426474928855896, "learning_rate": 8.655243523580998e-06, "loss": 0.3848, "step": 3759 }, { "epoch": 0.9456740442655935, "grad_norm": 0.3572126030921936, "learning_rate": 8.654244956220713e-06, "loss": 0.3666, "step": 3760 }, { "epoch": 0.9459255533199196, "grad_norm": 0.3556264340877533, "learning_rate": 8.653246075897208e-06, "loss": 0.4032, "step": 3761 }, { "epoch": 0.9461770623742455, "grad_norm": 0.3631819486618042, "learning_rate": 8.652246882696032e-06, "loss": 0.3807, "step": 3762 }, { "epoch": 0.9464285714285714, "grad_norm": 0.36357030272483826, "learning_rate": 8.651247376702756e-06, "loss": 0.3523, "step": 3763 }, { "epoch": 0.9466800804828974, "grad_norm": 0.34328320622444153, "learning_rate": 8.650247558002987e-06, "loss": 0.3881, "step": 3764 }, { "epoch": 0.9469315895372233, "grad_norm": 0.349572092294693, "learning_rate": 8.64924742668235e-06, "loss": 0.3589, "step": 3765 }, { "epoch": 0.9471830985915493, "grad_norm": 0.3555934429168701, "learning_rate": 8.6482469828265e-06, "loss": 0.3848, "step": 3766 }, { "epoch": 0.9474346076458753, "grad_norm": 0.32735902070999146, "learning_rate": 8.64724622652112e-06, "loss": 0.3698, "step": 3767 }, { "epoch": 0.9476861167002012, "grad_norm": 0.35206887125968933, "learning_rate": 8.646245157851918e-06, "loss": 0.3755, "step": 3768 }, { "epoch": 0.9479376257545271, "grad_norm": 0.34176838397979736, "learning_rate": 8.645243776904629e-06, "loss": 0.3976, "step": 3769 }, { "epoch": 0.9481891348088531, "grad_norm": 0.3586902320384979, "learning_rate": 8.644242083765014e-06, "loss": 0.3693, "step": 3770 }, { "epoch": 0.9484406438631791, "grad_norm": 0.33086657524108887, "learning_rate": 8.643240078518865e-06, "loss": 0.3722, "step": 3771 }, { "epoch": 0.9486921529175051, "grad_norm": 0.384689062833786, "learning_rate": 8.642237761251992e-06, "loss": 0.3484, "step": 3772 }, { "epoch": 0.948943661971831, "grad_norm": 0.3711892366409302, "learning_rate": 8.641235132050243e-06, "loss": 0.3786, "step": 3773 }, { "epoch": 0.9491951710261569, "grad_norm": 0.325150728225708, "learning_rate": 8.640232190999484e-06, "loss": 0.4007, "step": 3774 }, { "epoch": 0.9494466800804829, "grad_norm": 0.3467983603477478, "learning_rate": 8.63922893818561e-06, "loss": 0.367, "step": 3775 }, { "epoch": 0.9496981891348089, "grad_norm": 0.40407732129096985, "learning_rate": 8.638225373694546e-06, "loss": 0.374, "step": 3776 }, { "epoch": 0.9499496981891348, "grad_norm": 0.3339010775089264, "learning_rate": 8.637221497612238e-06, "loss": 0.3786, "step": 3777 }, { "epoch": 0.9502012072434608, "grad_norm": 0.33294668793678284, "learning_rate": 8.636217310024664e-06, "loss": 0.4051, "step": 3778 }, { "epoch": 0.9504527162977867, "grad_norm": 0.32620373368263245, "learning_rate": 8.635212811017826e-06, "loss": 0.3619, "step": 3779 }, { "epoch": 0.9507042253521126, "grad_norm": 0.3283163011074066, "learning_rate": 8.634208000677751e-06, "loss": 0.3663, "step": 3780 }, { "epoch": 0.9509557344064387, "grad_norm": 0.3436046838760376, "learning_rate": 8.633202879090496e-06, "loss": 0.3836, "step": 3781 }, { "epoch": 0.9512072434607646, "grad_norm": 0.3591076731681824, "learning_rate": 8.632197446342145e-06, "loss": 0.3852, "step": 3782 }, { "epoch": 0.9514587525150905, "grad_norm": 0.3302284777164459, "learning_rate": 8.631191702518806e-06, "loss": 0.4113, "step": 3783 }, { "epoch": 0.9517102615694165, "grad_norm": 0.3252635896205902, "learning_rate": 8.630185647706614e-06, "loss": 0.3768, "step": 3784 }, { "epoch": 0.9519617706237424, "grad_norm": 0.3460747003555298, "learning_rate": 8.629179281991732e-06, "loss": 0.4131, "step": 3785 }, { "epoch": 0.9522132796780685, "grad_norm": 0.3621247112751007, "learning_rate": 8.628172605460347e-06, "loss": 0.393, "step": 3786 }, { "epoch": 0.9524647887323944, "grad_norm": 0.3651982843875885, "learning_rate": 8.627165618198676e-06, "loss": 0.3871, "step": 3787 }, { "epoch": 0.9527162977867203, "grad_norm": 0.3419380784034729, "learning_rate": 8.626158320292963e-06, "loss": 0.3815, "step": 3788 }, { "epoch": 0.9529678068410463, "grad_norm": 0.3466692268848419, "learning_rate": 8.625150711829475e-06, "loss": 0.3587, "step": 3789 }, { "epoch": 0.9532193158953722, "grad_norm": 0.3754306435585022, "learning_rate": 8.624142792894505e-06, "loss": 0.4, "step": 3790 }, { "epoch": 0.9534708249496981, "grad_norm": 0.348619669675827, "learning_rate": 8.62313456357438e-06, "loss": 0.3605, "step": 3791 }, { "epoch": 0.9537223340040242, "grad_norm": 0.34513843059539795, "learning_rate": 8.622126023955446e-06, "loss": 0.3467, "step": 3792 }, { "epoch": 0.9539738430583501, "grad_norm": 0.3718356788158417, "learning_rate": 8.621117174124076e-06, "loss": 0.379, "step": 3793 }, { "epoch": 0.954225352112676, "grad_norm": 0.38062798976898193, "learning_rate": 8.620108014166674e-06, "loss": 0.3619, "step": 3794 }, { "epoch": 0.954476861167002, "grad_norm": 0.3671969473361969, "learning_rate": 8.619098544169671e-06, "loss": 0.3721, "step": 3795 }, { "epoch": 0.954728370221328, "grad_norm": 0.40202799439430237, "learning_rate": 8.618088764219514e-06, "loss": 0.3779, "step": 3796 }, { "epoch": 0.954979879275654, "grad_norm": 0.36368417739868164, "learning_rate": 8.617078674402692e-06, "loss": 0.3934, "step": 3797 }, { "epoch": 0.9552313883299799, "grad_norm": 0.42696356773376465, "learning_rate": 8.616068274805709e-06, "loss": 0.3764, "step": 3798 }, { "epoch": 0.9554828973843058, "grad_norm": 0.3894241750240326, "learning_rate": 8.615057565515102e-06, "loss": 0.3658, "step": 3799 }, { "epoch": 0.9557344064386318, "grad_norm": 0.38357433676719666, "learning_rate": 8.614046546617427e-06, "loss": 0.3971, "step": 3800 }, { "epoch": 0.9559859154929577, "grad_norm": 0.3617189824581146, "learning_rate": 8.613035218199276e-06, "loss": 0.3809, "step": 3801 }, { "epoch": 0.9562374245472837, "grad_norm": 0.3896433711051941, "learning_rate": 8.612023580347264e-06, "loss": 0.3586, "step": 3802 }, { "epoch": 0.9564889336016097, "grad_norm": 0.42416492104530334, "learning_rate": 8.611011633148027e-06, "loss": 0.4036, "step": 3803 }, { "epoch": 0.9567404426559356, "grad_norm": 0.35320156812667847, "learning_rate": 8.609999376688235e-06, "loss": 0.3765, "step": 3804 }, { "epoch": 0.9569919517102615, "grad_norm": 0.37311264872550964, "learning_rate": 8.60898681105458e-06, "loss": 0.3923, "step": 3805 }, { "epoch": 0.9572434607645876, "grad_norm": 0.3776063621044159, "learning_rate": 8.607973936333782e-06, "loss": 0.3784, "step": 3806 }, { "epoch": 0.9574949698189135, "grad_norm": 0.34867435693740845, "learning_rate": 8.606960752612587e-06, "loss": 0.3768, "step": 3807 }, { "epoch": 0.9577464788732394, "grad_norm": 0.3809923529624939, "learning_rate": 8.60594725997777e-06, "loss": 0.3824, "step": 3808 }, { "epoch": 0.9579979879275654, "grad_norm": 0.352938711643219, "learning_rate": 8.604933458516129e-06, "loss": 0.403, "step": 3809 }, { "epoch": 0.9582494969818913, "grad_norm": 0.3831866681575775, "learning_rate": 8.603919348314487e-06, "loss": 0.37, "step": 3810 }, { "epoch": 0.9585010060362174, "grad_norm": 0.3907982110977173, "learning_rate": 8.602904929459702e-06, "loss": 0.3907, "step": 3811 }, { "epoch": 0.9587525150905433, "grad_norm": 0.3331465423107147, "learning_rate": 8.601890202038648e-06, "loss": 0.3946, "step": 3812 }, { "epoch": 0.9590040241448692, "grad_norm": 0.3363270163536072, "learning_rate": 8.600875166138232e-06, "loss": 0.375, "step": 3813 }, { "epoch": 0.9592555331991952, "grad_norm": 0.34430885314941406, "learning_rate": 8.599859821845386e-06, "loss": 0.3892, "step": 3814 }, { "epoch": 0.9595070422535211, "grad_norm": 0.35445672273635864, "learning_rate": 8.598844169247064e-06, "loss": 0.3733, "step": 3815 }, { "epoch": 0.959758551307847, "grad_norm": 0.3446792960166931, "learning_rate": 8.597828208430257e-06, "loss": 0.3627, "step": 3816 }, { "epoch": 0.9600100603621731, "grad_norm": 0.39067867398262024, "learning_rate": 8.596811939481971e-06, "loss": 0.3893, "step": 3817 }, { "epoch": 0.960261569416499, "grad_norm": 0.373501181602478, "learning_rate": 8.59579536248924e-06, "loss": 0.3754, "step": 3818 }, { "epoch": 0.9605130784708249, "grad_norm": 0.33692559599876404, "learning_rate": 8.594778477539136e-06, "loss": 0.3615, "step": 3819 }, { "epoch": 0.9607645875251509, "grad_norm": 0.38633298873901367, "learning_rate": 8.593761284718742e-06, "loss": 0.3761, "step": 3820 }, { "epoch": 0.9610160965794768, "grad_norm": 0.37641310691833496, "learning_rate": 8.592743784115178e-06, "loss": 0.3895, "step": 3821 }, { "epoch": 0.9612676056338029, "grad_norm": 0.34640181064605713, "learning_rate": 8.591725975815584e-06, "loss": 0.3954, "step": 3822 }, { "epoch": 0.9615191146881288, "grad_norm": 0.3677440583705902, "learning_rate": 8.59070785990713e-06, "loss": 0.3547, "step": 3823 }, { "epoch": 0.9617706237424547, "grad_norm": 0.3272249102592468, "learning_rate": 8.589689436477011e-06, "loss": 0.3579, "step": 3824 }, { "epoch": 0.9620221327967807, "grad_norm": 0.3785829544067383, "learning_rate": 8.58867070561245e-06, "loss": 0.3946, "step": 3825 }, { "epoch": 0.9622736418511066, "grad_norm": 0.36952534317970276, "learning_rate": 8.587651667400692e-06, "loss": 0.3967, "step": 3826 }, { "epoch": 0.9625251509054326, "grad_norm": 0.3846037983894348, "learning_rate": 8.586632321929013e-06, "loss": 0.3898, "step": 3827 }, { "epoch": 0.9627766599597586, "grad_norm": 0.38403981924057007, "learning_rate": 8.585612669284715e-06, "loss": 0.3846, "step": 3828 }, { "epoch": 0.9630281690140845, "grad_norm": 0.3689204156398773, "learning_rate": 8.584592709555125e-06, "loss": 0.3439, "step": 3829 }, { "epoch": 0.9632796780684104, "grad_norm": 0.34798160195350647, "learning_rate": 8.58357244282759e-06, "loss": 0.372, "step": 3830 }, { "epoch": 0.9635311871227364, "grad_norm": 0.4079124629497528, "learning_rate": 8.582551869189497e-06, "loss": 0.3902, "step": 3831 }, { "epoch": 0.9637826961770624, "grad_norm": 0.4098033010959625, "learning_rate": 8.581530988728249e-06, "loss": 0.3796, "step": 3832 }, { "epoch": 0.9640342052313883, "grad_norm": 0.3369835615158081, "learning_rate": 8.580509801531276e-06, "loss": 0.3445, "step": 3833 }, { "epoch": 0.9642857142857143, "grad_norm": 0.3969493806362152, "learning_rate": 8.57948830768604e-06, "loss": 0.3782, "step": 3834 }, { "epoch": 0.9645372233400402, "grad_norm": 0.4460330903530121, "learning_rate": 8.57846650728002e-06, "loss": 0.3792, "step": 3835 }, { "epoch": 0.9647887323943662, "grad_norm": 0.3550911843776703, "learning_rate": 8.577444400400733e-06, "loss": 0.3809, "step": 3836 }, { "epoch": 0.9650402414486922, "grad_norm": 0.4082079827785492, "learning_rate": 8.576421987135716e-06, "loss": 0.4033, "step": 3837 }, { "epoch": 0.9652917505030181, "grad_norm": 0.40458518266677856, "learning_rate": 8.575399267572527e-06, "loss": 0.3572, "step": 3838 }, { "epoch": 0.9655432595573441, "grad_norm": 0.3640364110469818, "learning_rate": 8.574376241798758e-06, "loss": 0.3959, "step": 3839 }, { "epoch": 0.96579476861167, "grad_norm": 0.3785410523414612, "learning_rate": 8.573352909902027e-06, "loss": 0.3834, "step": 3840 }, { "epoch": 0.9660462776659959, "grad_norm": 0.35065385699272156, "learning_rate": 8.572329271969972e-06, "loss": 0.3953, "step": 3841 }, { "epoch": 0.966297786720322, "grad_norm": 0.3185063600540161, "learning_rate": 8.571305328090264e-06, "loss": 0.3784, "step": 3842 }, { "epoch": 0.9665492957746479, "grad_norm": 0.3544987142086029, "learning_rate": 8.570281078350598e-06, "loss": 0.3713, "step": 3843 }, { "epoch": 0.9668008048289738, "grad_norm": 0.3804239332675934, "learning_rate": 8.569256522838692e-06, "loss": 0.3862, "step": 3844 }, { "epoch": 0.9670523138832998, "grad_norm": 0.32411083579063416, "learning_rate": 8.568231661642294e-06, "loss": 0.364, "step": 3845 }, { "epoch": 0.9673038229376257, "grad_norm": 0.3537362515926361, "learning_rate": 8.567206494849178e-06, "loss": 0.3677, "step": 3846 }, { "epoch": 0.9675553319919518, "grad_norm": 0.3677324056625366, "learning_rate": 8.56618102254714e-06, "loss": 0.4157, "step": 3847 }, { "epoch": 0.9678068410462777, "grad_norm": 0.35382696986198425, "learning_rate": 8.56515524482401e-06, "loss": 0.3653, "step": 3848 }, { "epoch": 0.9680583501006036, "grad_norm": 0.35097870230674744, "learning_rate": 8.564129161767636e-06, "loss": 0.3956, "step": 3849 }, { "epoch": 0.9683098591549296, "grad_norm": 0.35212019085884094, "learning_rate": 8.563102773465894e-06, "loss": 0.3872, "step": 3850 }, { "epoch": 0.9685613682092555, "grad_norm": 0.36232250928878784, "learning_rate": 8.562076080006693e-06, "loss": 0.3922, "step": 3851 }, { "epoch": 0.9688128772635815, "grad_norm": 0.33010706305503845, "learning_rate": 8.561049081477958e-06, "loss": 0.3726, "step": 3852 }, { "epoch": 0.9690643863179075, "grad_norm": 0.3573167026042938, "learning_rate": 8.56002177796765e-06, "loss": 0.3629, "step": 3853 }, { "epoch": 0.9693158953722334, "grad_norm": 0.372215211391449, "learning_rate": 8.558994169563745e-06, "loss": 0.3706, "step": 3854 }, { "epoch": 0.9695674044265593, "grad_norm": 0.33711981773376465, "learning_rate": 8.557966256354256e-06, "loss": 0.3661, "step": 3855 }, { "epoch": 0.9698189134808853, "grad_norm": 0.37138739228248596, "learning_rate": 8.556938038427217e-06, "loss": 0.3524, "step": 3856 }, { "epoch": 0.9700704225352113, "grad_norm": 0.34503763914108276, "learning_rate": 8.555909515870683e-06, "loss": 0.3523, "step": 3857 }, { "epoch": 0.9703219315895373, "grad_norm": 0.3223390281200409, "learning_rate": 8.55488068877275e-06, "loss": 0.3686, "step": 3858 }, { "epoch": 0.9705734406438632, "grad_norm": 0.3484128713607788, "learning_rate": 8.553851557221521e-06, "loss": 0.3785, "step": 3859 }, { "epoch": 0.9708249496981891, "grad_norm": 0.36130252480506897, "learning_rate": 8.552822121305139e-06, "loss": 0.408, "step": 3860 }, { "epoch": 0.9710764587525151, "grad_norm": 0.3652491569519043, "learning_rate": 8.551792381111771e-06, "loss": 0.3706, "step": 3861 }, { "epoch": 0.971327967806841, "grad_norm": 0.3538327217102051, "learning_rate": 8.550762336729605e-06, "loss": 0.3522, "step": 3862 }, { "epoch": 0.971579476861167, "grad_norm": 0.35552993416786194, "learning_rate": 8.549731988246858e-06, "loss": 0.3924, "step": 3863 }, { "epoch": 0.971830985915493, "grad_norm": 0.3610166013240814, "learning_rate": 8.548701335751774e-06, "loss": 0.3665, "step": 3864 }, { "epoch": 0.9720824949698189, "grad_norm": 0.368213951587677, "learning_rate": 8.54767037933262e-06, "loss": 0.3747, "step": 3865 }, { "epoch": 0.9723340040241448, "grad_norm": 0.3184708058834076, "learning_rate": 8.546639119077693e-06, "loss": 0.3717, "step": 3866 }, { "epoch": 0.9725855130784709, "grad_norm": 0.35572683811187744, "learning_rate": 8.545607555075313e-06, "loss": 0.3767, "step": 3867 }, { "epoch": 0.9728370221327968, "grad_norm": 0.33128035068511963, "learning_rate": 8.544575687413826e-06, "loss": 0.3522, "step": 3868 }, { "epoch": 0.9730885311871227, "grad_norm": 0.3530205488204956, "learning_rate": 8.543543516181607e-06, "loss": 0.409, "step": 3869 }, { "epoch": 0.9733400402414487, "grad_norm": 0.31648722290992737, "learning_rate": 8.542511041467054e-06, "loss": 0.3962, "step": 3870 }, { "epoch": 0.9735915492957746, "grad_norm": 0.33272168040275574, "learning_rate": 8.541478263358594e-06, "loss": 0.3799, "step": 3871 }, { "epoch": 0.9738430583501007, "grad_norm": 0.3218432366847992, "learning_rate": 8.540445181944673e-06, "loss": 0.3903, "step": 3872 }, { "epoch": 0.9740945674044266, "grad_norm": 0.33532074093818665, "learning_rate": 8.539411797313772e-06, "loss": 0.3831, "step": 3873 }, { "epoch": 0.9743460764587525, "grad_norm": 0.336195170879364, "learning_rate": 8.538378109554395e-06, "loss": 0.3744, "step": 3874 }, { "epoch": 0.9745975855130785, "grad_norm": 0.364101380109787, "learning_rate": 8.537344118755067e-06, "loss": 0.4106, "step": 3875 }, { "epoch": 0.9748490945674044, "grad_norm": 0.3507227301597595, "learning_rate": 8.536309825004346e-06, "loss": 0.3932, "step": 3876 }, { "epoch": 0.9751006036217303, "grad_norm": 0.368144154548645, "learning_rate": 8.53527522839081e-06, "loss": 0.3728, "step": 3877 }, { "epoch": 0.9753521126760564, "grad_norm": 0.3607654273509979, "learning_rate": 8.53424032900307e-06, "loss": 0.379, "step": 3878 }, { "epoch": 0.9756036217303823, "grad_norm": 0.3326222598552704, "learning_rate": 8.533205126929754e-06, "loss": 0.3651, "step": 3879 }, { "epoch": 0.9758551307847082, "grad_norm": 0.3740299344062805, "learning_rate": 8.532169622259524e-06, "loss": 0.3597, "step": 3880 }, { "epoch": 0.9761066398390342, "grad_norm": 0.3337053060531616, "learning_rate": 8.531133815081061e-06, "loss": 0.4037, "step": 3881 }, { "epoch": 0.9763581488933601, "grad_norm": 0.35001134872436523, "learning_rate": 8.530097705483078e-06, "loss": 0.3737, "step": 3882 }, { "epoch": 0.9766096579476862, "grad_norm": 0.3661789000034332, "learning_rate": 8.52906129355431e-06, "loss": 0.3587, "step": 3883 }, { "epoch": 0.9768611670020121, "grad_norm": 0.33241093158721924, "learning_rate": 8.528024579383522e-06, "loss": 0.377, "step": 3884 }, { "epoch": 0.977112676056338, "grad_norm": 0.3616609573364258, "learning_rate": 8.5269875630595e-06, "loss": 0.3804, "step": 3885 }, { "epoch": 0.977364185110664, "grad_norm": 0.3413762152194977, "learning_rate": 8.525950244671056e-06, "loss": 0.3983, "step": 3886 }, { "epoch": 0.97761569416499, "grad_norm": 0.41827529668807983, "learning_rate": 8.524912624307033e-06, "loss": 0.3762, "step": 3887 }, { "epoch": 0.9778672032193159, "grad_norm": 0.36674749851226807, "learning_rate": 8.523874702056296e-06, "loss": 0.37, "step": 3888 }, { "epoch": 0.9781187122736419, "grad_norm": 0.3255644142627716, "learning_rate": 8.522836478007734e-06, "loss": 0.3585, "step": 3889 }, { "epoch": 0.9783702213279678, "grad_norm": 0.3610647916793823, "learning_rate": 8.521797952250269e-06, "loss": 0.3807, "step": 3890 }, { "epoch": 0.9786217303822937, "grad_norm": 0.37085309624671936, "learning_rate": 8.52075912487284e-06, "loss": 0.3336, "step": 3891 }, { "epoch": 0.9788732394366197, "grad_norm": 0.3353044390678406, "learning_rate": 8.519719995964419e-06, "loss": 0.3723, "step": 3892 }, { "epoch": 0.9791247484909457, "grad_norm": 0.3177686631679535, "learning_rate": 8.518680565614e-06, "loss": 0.3736, "step": 3893 }, { "epoch": 0.9793762575452716, "grad_norm": 0.36374950408935547, "learning_rate": 8.517640833910602e-06, "loss": 0.4041, "step": 3894 }, { "epoch": 0.9796277665995976, "grad_norm": 0.3766506314277649, "learning_rate": 8.516600800943273e-06, "loss": 0.3796, "step": 3895 }, { "epoch": 0.9798792756539235, "grad_norm": 0.3212386667728424, "learning_rate": 8.515560466801085e-06, "loss": 0.3817, "step": 3896 }, { "epoch": 0.9801307847082495, "grad_norm": 0.35744473338127136, "learning_rate": 8.514519831573137e-06, "loss": 0.395, "step": 3897 }, { "epoch": 0.9803822937625755, "grad_norm": 0.3569160997867584, "learning_rate": 8.513478895348552e-06, "loss": 0.3832, "step": 3898 }, { "epoch": 0.9806338028169014, "grad_norm": 0.3330937922000885, "learning_rate": 8.512437658216479e-06, "loss": 0.4121, "step": 3899 }, { "epoch": 0.9808853118712274, "grad_norm": 0.3199692368507385, "learning_rate": 8.511396120266095e-06, "loss": 0.3629, "step": 3900 }, { "epoch": 0.9811368209255533, "grad_norm": 0.35635682940483093, "learning_rate": 8.510354281586601e-06, "loss": 0.3798, "step": 3901 }, { "epoch": 0.9813883299798792, "grad_norm": 0.3268703520298004, "learning_rate": 8.509312142267223e-06, "loss": 0.378, "step": 3902 }, { "epoch": 0.9816398390342053, "grad_norm": 0.3214782774448395, "learning_rate": 8.508269702397214e-06, "loss": 0.3788, "step": 3903 }, { "epoch": 0.9818913480885312, "grad_norm": 0.3744148015975952, "learning_rate": 8.507226962065852e-06, "loss": 0.39, "step": 3904 }, { "epoch": 0.9821428571428571, "grad_norm": 0.33143913745880127, "learning_rate": 8.506183921362443e-06, "loss": 0.3522, "step": 3905 }, { "epoch": 0.9823943661971831, "grad_norm": 0.3448927104473114, "learning_rate": 8.505140580376317e-06, "loss": 0.3731, "step": 3906 }, { "epoch": 0.982645875251509, "grad_norm": 0.37213438749313354, "learning_rate": 8.504096939196826e-06, "loss": 0.3569, "step": 3907 }, { "epoch": 0.9828973843058351, "grad_norm": 0.3362247943878174, "learning_rate": 8.503052997913354e-06, "loss": 0.3844, "step": 3908 }, { "epoch": 0.983148893360161, "grad_norm": 0.35339227318763733, "learning_rate": 8.50200875661531e-06, "loss": 0.3926, "step": 3909 }, { "epoch": 0.9834004024144869, "grad_norm": 0.3829168975353241, "learning_rate": 8.500964215392122e-06, "loss": 0.3594, "step": 3910 }, { "epoch": 0.9836519114688129, "grad_norm": 0.338537335395813, "learning_rate": 8.499919374333251e-06, "loss": 0.3922, "step": 3911 }, { "epoch": 0.9839034205231388, "grad_norm": 0.3299337923526764, "learning_rate": 8.498874233528183e-06, "loss": 0.3835, "step": 3912 }, { "epoch": 0.9841549295774648, "grad_norm": 0.3599414527416229, "learning_rate": 8.497828793066425e-06, "loss": 0.3518, "step": 3913 }, { "epoch": 0.9844064386317908, "grad_norm": 0.3576641380786896, "learning_rate": 8.496783053037512e-06, "loss": 0.3741, "step": 3914 }, { "epoch": 0.9846579476861167, "grad_norm": 0.3295727074146271, "learning_rate": 8.495737013531008e-06, "loss": 0.3744, "step": 3915 }, { "epoch": 0.9849094567404426, "grad_norm": 0.3430532217025757, "learning_rate": 8.494690674636497e-06, "loss": 0.3722, "step": 3916 }, { "epoch": 0.9851609657947686, "grad_norm": 0.3892742395401001, "learning_rate": 8.493644036443592e-06, "loss": 0.3736, "step": 3917 }, { "epoch": 0.9854124748490946, "grad_norm": 0.3252136707305908, "learning_rate": 8.492597099041932e-06, "loss": 0.3948, "step": 3918 }, { "epoch": 0.9856639839034205, "grad_norm": 0.3606947660446167, "learning_rate": 8.49154986252118e-06, "loss": 0.3537, "step": 3919 }, { "epoch": 0.9859154929577465, "grad_norm": 0.34790658950805664, "learning_rate": 8.490502326971026e-06, "loss": 0.3874, "step": 3920 }, { "epoch": 0.9861670020120724, "grad_norm": 0.34207773208618164, "learning_rate": 8.489454492481184e-06, "loss": 0.4054, "step": 3921 }, { "epoch": 0.9864185110663984, "grad_norm": 0.32859066128730774, "learning_rate": 8.488406359141395e-06, "loss": 0.3512, "step": 3922 }, { "epoch": 0.9866700201207244, "grad_norm": 0.42876073718070984, "learning_rate": 8.487357927041425e-06, "loss": 0.3749, "step": 3923 }, { "epoch": 0.9869215291750503, "grad_norm": 0.3597780764102936, "learning_rate": 8.486309196271063e-06, "loss": 0.3671, "step": 3924 }, { "epoch": 0.9871730382293763, "grad_norm": 0.3601861596107483, "learning_rate": 8.485260166920131e-06, "loss": 0.3696, "step": 3925 }, { "epoch": 0.9874245472837022, "grad_norm": 0.450916588306427, "learning_rate": 8.484210839078467e-06, "loss": 0.3995, "step": 3926 }, { "epoch": 0.9876760563380281, "grad_norm": 0.38571393489837646, "learning_rate": 8.483161212835944e-06, "loss": 0.3746, "step": 3927 }, { "epoch": 0.9879275653923542, "grad_norm": 0.32381007075309753, "learning_rate": 8.482111288282452e-06, "loss": 0.3762, "step": 3928 }, { "epoch": 0.9881790744466801, "grad_norm": 0.4630524516105652, "learning_rate": 8.481061065507915e-06, "loss": 0.3895, "step": 3929 }, { "epoch": 0.988430583501006, "grad_norm": 0.3516670763492584, "learning_rate": 8.480010544602274e-06, "loss": 0.3775, "step": 3930 }, { "epoch": 0.988682092555332, "grad_norm": 0.3213813006877899, "learning_rate": 8.4789597256555e-06, "loss": 0.3751, "step": 3931 }, { "epoch": 0.9889336016096579, "grad_norm": 0.37605127692222595, "learning_rate": 8.47790860875759e-06, "loss": 0.3625, "step": 3932 }, { "epoch": 0.989185110663984, "grad_norm": 0.3587568700313568, "learning_rate": 8.476857193998564e-06, "loss": 0.372, "step": 3933 }, { "epoch": 0.9894366197183099, "grad_norm": 0.3404126763343811, "learning_rate": 8.475805481468472e-06, "loss": 0.3751, "step": 3934 }, { "epoch": 0.9896881287726358, "grad_norm": 0.3651595711708069, "learning_rate": 8.474753471257385e-06, "loss": 0.3587, "step": 3935 }, { "epoch": 0.9899396378269618, "grad_norm": 0.3151306211948395, "learning_rate": 8.473701163455401e-06, "loss": 0.3889, "step": 3936 }, { "epoch": 0.9901911468812877, "grad_norm": 0.327453076839447, "learning_rate": 8.472648558152646e-06, "loss": 0.3706, "step": 3937 }, { "epoch": 0.9904426559356136, "grad_norm": 0.3796255588531494, "learning_rate": 8.471595655439263e-06, "loss": 0.3789, "step": 3938 }, { "epoch": 0.9906941649899397, "grad_norm": 0.3350921869277954, "learning_rate": 8.470542455405432e-06, "loss": 0.3938, "step": 3939 }, { "epoch": 0.9909456740442656, "grad_norm": 0.3343561291694641, "learning_rate": 8.469488958141352e-06, "loss": 0.3646, "step": 3940 }, { "epoch": 0.9911971830985915, "grad_norm": 0.35207462310791016, "learning_rate": 8.468435163737248e-06, "loss": 0.3782, "step": 3941 }, { "epoch": 0.9914486921529175, "grad_norm": 0.32531505823135376, "learning_rate": 8.46738107228337e-06, "loss": 0.3897, "step": 3942 }, { "epoch": 0.9917002012072434, "grad_norm": 0.367502897977829, "learning_rate": 8.466326683869994e-06, "loss": 0.3878, "step": 3943 }, { "epoch": 0.9919517102615694, "grad_norm": 0.37151041626930237, "learning_rate": 8.465271998587424e-06, "loss": 0.3696, "step": 3944 }, { "epoch": 0.9922032193158954, "grad_norm": 0.3209855556488037, "learning_rate": 8.464217016525985e-06, "loss": 0.3534, "step": 3945 }, { "epoch": 0.9924547283702213, "grad_norm": 0.3292153477668762, "learning_rate": 8.463161737776031e-06, "loss": 0.3612, "step": 3946 }, { "epoch": 0.9927062374245473, "grad_norm": 0.35022595524787903, "learning_rate": 8.46210616242794e-06, "loss": 0.3583, "step": 3947 }, { "epoch": 0.9929577464788732, "grad_norm": 0.3790688216686249, "learning_rate": 8.461050290572114e-06, "loss": 0.3809, "step": 3948 }, { "epoch": 0.9932092555331992, "grad_norm": 0.3780459463596344, "learning_rate": 8.459994122298985e-06, "loss": 0.3793, "step": 3949 }, { "epoch": 0.9934607645875252, "grad_norm": 0.325181782245636, "learning_rate": 8.458937657699004e-06, "loss": 0.3724, "step": 3950 }, { "epoch": 0.9937122736418511, "grad_norm": 0.35834598541259766, "learning_rate": 8.457880896862651e-06, "loss": 0.3791, "step": 3951 }, { "epoch": 0.993963782696177, "grad_norm": 0.37170320749282837, "learning_rate": 8.456823839880433e-06, "loss": 0.3852, "step": 3952 }, { "epoch": 0.994215291750503, "grad_norm": 0.3402329385280609, "learning_rate": 8.455766486842878e-06, "loss": 0.3763, "step": 3953 }, { "epoch": 0.994466800804829, "grad_norm": 0.3130355179309845, "learning_rate": 8.454708837840543e-06, "loss": 0.3747, "step": 3954 }, { "epoch": 0.9947183098591549, "grad_norm": 0.32022619247436523, "learning_rate": 8.453650892964008e-06, "loss": 0.3611, "step": 3955 }, { "epoch": 0.9949698189134809, "grad_norm": 0.35048943758010864, "learning_rate": 8.45259265230388e-06, "loss": 0.3663, "step": 3956 }, { "epoch": 0.9952213279678068, "grad_norm": 0.3296698033809662, "learning_rate": 8.45153411595079e-06, "loss": 0.3886, "step": 3957 }, { "epoch": 0.9954728370221329, "grad_norm": 0.3470214605331421, "learning_rate": 8.450475283995398e-06, "loss": 0.411, "step": 3958 }, { "epoch": 0.9957243460764588, "grad_norm": 0.3476480543613434, "learning_rate": 8.449416156528383e-06, "loss": 0.3892, "step": 3959 }, { "epoch": 0.9959758551307847, "grad_norm": 0.3552456796169281, "learning_rate": 8.448356733640453e-06, "loss": 0.4037, "step": 3960 }, { "epoch": 0.9962273641851107, "grad_norm": 0.38896414637565613, "learning_rate": 8.447297015422342e-06, "loss": 0.3919, "step": 3961 }, { "epoch": 0.9964788732394366, "grad_norm": 0.3473275899887085, "learning_rate": 8.446237001964808e-06, "loss": 0.3809, "step": 3962 }, { "epoch": 0.9967303822937625, "grad_norm": 0.3539365530014038, "learning_rate": 8.445176693358634e-06, "loss": 0.3513, "step": 3963 }, { "epoch": 0.9969818913480886, "grad_norm": 0.39693740010261536, "learning_rate": 8.444116089694631e-06, "loss": 0.3734, "step": 3964 }, { "epoch": 0.9972334004024145, "grad_norm": 0.3378576934337616, "learning_rate": 8.443055191063629e-06, "loss": 0.3644, "step": 3965 }, { "epoch": 0.9974849094567404, "grad_norm": 0.3627392053604126, "learning_rate": 8.44199399755649e-06, "loss": 0.3903, "step": 3966 }, { "epoch": 0.9977364185110664, "grad_norm": 0.38984745740890503, "learning_rate": 8.440932509264099e-06, "loss": 0.3932, "step": 3967 }, { "epoch": 0.9979879275653923, "grad_norm": 0.34223487973213196, "learning_rate": 8.439870726277364e-06, "loss": 0.3599, "step": 3968 }, { "epoch": 0.9982394366197183, "grad_norm": 0.33947303891181946, "learning_rate": 8.438808648687223e-06, "loss": 0.381, "step": 3969 }, { "epoch": 0.9984909456740443, "grad_norm": 0.3354572057723999, "learning_rate": 8.437746276584631e-06, "loss": 0.3726, "step": 3970 }, { "epoch": 0.9987424547283702, "grad_norm": 0.36442792415618896, "learning_rate": 8.43668361006058e-06, "loss": 0.3951, "step": 3971 }, { "epoch": 0.9989939637826962, "grad_norm": 0.3597300946712494, "learning_rate": 8.435620649206076e-06, "loss": 0.3476, "step": 3972 }, { "epoch": 0.9992454728370221, "grad_norm": 0.3294813632965088, "learning_rate": 8.434557394112156e-06, "loss": 0.3753, "step": 3973 }, { "epoch": 0.9994969818913481, "grad_norm": 0.32441380620002747, "learning_rate": 8.433493844869883e-06, "loss": 0.3672, "step": 3974 }, { "epoch": 0.9997484909456741, "grad_norm": 0.3753267526626587, "learning_rate": 8.432430001570343e-06, "loss": 0.377, "step": 3975 }, { "epoch": 1.0, "grad_norm": 0.3612028658390045, "learning_rate": 8.431365864304645e-06, "loss": 0.3862, "step": 3976 }, { "epoch": 1.000251509054326, "grad_norm": 0.3513164818286896, "learning_rate": 8.430301433163927e-06, "loss": 0.3648, "step": 3977 }, { "epoch": 1.0005030181086518, "grad_norm": 0.3320719599723816, "learning_rate": 8.42923670823935e-06, "loss": 0.3366, "step": 3978 }, { "epoch": 1.0007545271629779, "grad_norm": 0.3477346897125244, "learning_rate": 8.428171689622105e-06, "loss": 0.3598, "step": 3979 }, { "epoch": 1.0010060362173039, "grad_norm": 0.3291724920272827, "learning_rate": 8.4271063774034e-06, "loss": 0.3622, "step": 3980 }, { "epoch": 1.0012575452716297, "grad_norm": 0.3617793917655945, "learning_rate": 8.426040771674475e-06, "loss": 0.3601, "step": 3981 }, { "epoch": 1.0015090543259557, "grad_norm": 0.33502376079559326, "learning_rate": 8.42497487252659e-06, "loss": 0.3577, "step": 3982 }, { "epoch": 1.0017605633802817, "grad_norm": 0.35225436091423035, "learning_rate": 8.423908680051035e-06, "loss": 0.3543, "step": 3983 }, { "epoch": 1.0020120724346075, "grad_norm": 0.3484959304332733, "learning_rate": 8.42284219433912e-06, "loss": 0.3501, "step": 3984 }, { "epoch": 1.0022635814889336, "grad_norm": 0.348874568939209, "learning_rate": 8.421775415482183e-06, "loss": 0.3557, "step": 3985 }, { "epoch": 1.0025150905432596, "grad_norm": 0.36964142322540283, "learning_rate": 8.42070834357159e-06, "loss": 0.3564, "step": 3986 }, { "epoch": 1.0027665995975854, "grad_norm": 0.3703876733779907, "learning_rate": 8.419640978698728e-06, "loss": 0.3379, "step": 3987 }, { "epoch": 1.0030181086519114, "grad_norm": 0.35077622532844543, "learning_rate": 8.418573320955008e-06, "loss": 0.3577, "step": 3988 }, { "epoch": 1.0032696177062375, "grad_norm": 0.35275188088417053, "learning_rate": 8.417505370431869e-06, "loss": 0.3819, "step": 3989 }, { "epoch": 1.0035211267605635, "grad_norm": 0.3845551311969757, "learning_rate": 8.416437127220777e-06, "loss": 0.3294, "step": 3990 }, { "epoch": 1.0037726358148893, "grad_norm": 0.3270184397697449, "learning_rate": 8.415368591413218e-06, "loss": 0.3455, "step": 3991 }, { "epoch": 1.0040241448692153, "grad_norm": 0.3642314374446869, "learning_rate": 8.414299763100704e-06, "loss": 0.3649, "step": 3992 }, { "epoch": 1.0042756539235413, "grad_norm": 0.39639967679977417, "learning_rate": 8.413230642374776e-06, "loss": 0.3558, "step": 3993 }, { "epoch": 1.0045271629778671, "grad_norm": 0.3734232485294342, "learning_rate": 8.412161229326997e-06, "loss": 0.3575, "step": 3994 }, { "epoch": 1.0047786720321932, "grad_norm": 0.3293455243110657, "learning_rate": 8.411091524048953e-06, "loss": 0.369, "step": 3995 }, { "epoch": 1.0050301810865192, "grad_norm": 0.3649202883243561, "learning_rate": 8.410021526632262e-06, "loss": 0.3557, "step": 3996 }, { "epoch": 1.005281690140845, "grad_norm": 0.3437403440475464, "learning_rate": 8.408951237168559e-06, "loss": 0.3742, "step": 3997 }, { "epoch": 1.005533199195171, "grad_norm": 0.34628748893737793, "learning_rate": 8.40788065574951e-06, "loss": 0.3851, "step": 3998 }, { "epoch": 1.005784708249497, "grad_norm": 0.40169525146484375, "learning_rate": 8.4068097824668e-06, "loss": 0.3603, "step": 3999 }, { "epoch": 1.0060362173038229, "grad_norm": 0.3196563422679901, "learning_rate": 8.405738617412148e-06, "loss": 0.3477, "step": 4000 }, { "epoch": 1.006287726358149, "grad_norm": 0.3684934675693512, "learning_rate": 8.404667160677289e-06, "loss": 0.3876, "step": 4001 }, { "epoch": 1.006539235412475, "grad_norm": 0.35352370142936707, "learning_rate": 8.403595412353987e-06, "loss": 0.3676, "step": 4002 }, { "epoch": 1.0067907444668007, "grad_norm": 0.3421543836593628, "learning_rate": 8.40252337253403e-06, "loss": 0.3632, "step": 4003 }, { "epoch": 1.0070422535211268, "grad_norm": 0.34123724699020386, "learning_rate": 8.401451041309233e-06, "loss": 0.3834, "step": 4004 }, { "epoch": 1.0072937625754528, "grad_norm": 0.3148738741874695, "learning_rate": 8.400378418771434e-06, "loss": 0.3571, "step": 4005 }, { "epoch": 1.0075452716297786, "grad_norm": 0.3324401378631592, "learning_rate": 8.399305505012496e-06, "loss": 0.3645, "step": 4006 }, { "epoch": 1.0077967806841046, "grad_norm": 0.3198019564151764, "learning_rate": 8.398232300124307e-06, "loss": 0.3457, "step": 4007 }, { "epoch": 1.0080482897384306, "grad_norm": 0.3236132264137268, "learning_rate": 8.39715880419878e-06, "loss": 0.3419, "step": 4008 }, { "epoch": 1.0082997987927564, "grad_norm": 0.3504725396633148, "learning_rate": 8.396085017327854e-06, "loss": 0.3376, "step": 4009 }, { "epoch": 1.0085513078470825, "grad_norm": 0.3179847002029419, "learning_rate": 8.395010939603493e-06, "loss": 0.3549, "step": 4010 }, { "epoch": 1.0088028169014085, "grad_norm": 0.31894704699516296, "learning_rate": 8.393936571117685e-06, "loss": 0.3486, "step": 4011 }, { "epoch": 1.0090543259557343, "grad_norm": 0.3283448815345764, "learning_rate": 8.392861911962441e-06, "loss": 0.3577, "step": 4012 }, { "epoch": 1.0093058350100603, "grad_norm": 0.33308663964271545, "learning_rate": 8.3917869622298e-06, "loss": 0.3494, "step": 4013 }, { "epoch": 1.0095573440643864, "grad_norm": 0.33545833826065063, "learning_rate": 8.390711722011825e-06, "loss": 0.3131, "step": 4014 }, { "epoch": 1.0098088531187124, "grad_norm": 0.36120879650115967, "learning_rate": 8.389636191400603e-06, "loss": 0.3673, "step": 4015 }, { "epoch": 1.0100603621730382, "grad_norm": 0.3518035411834717, "learning_rate": 8.388560370488247e-06, "loss": 0.3616, "step": 4016 }, { "epoch": 1.0103118712273642, "grad_norm": 0.3019152879714966, "learning_rate": 8.387484259366894e-06, "loss": 0.3508, "step": 4017 }, { "epoch": 1.0105633802816902, "grad_norm": 0.3448386788368225, "learning_rate": 8.386407858128707e-06, "loss": 0.3544, "step": 4018 }, { "epoch": 1.010814889336016, "grad_norm": 0.32667118310928345, "learning_rate": 8.38533116686587e-06, "loss": 0.3429, "step": 4019 }, { "epoch": 1.011066398390342, "grad_norm": 0.3479123115539551, "learning_rate": 8.384254185670599e-06, "loss": 0.3632, "step": 4020 }, { "epoch": 1.011317907444668, "grad_norm": 0.3146592378616333, "learning_rate": 8.383176914635127e-06, "loss": 0.3615, "step": 4021 }, { "epoch": 1.011569416498994, "grad_norm": 0.36215662956237793, "learning_rate": 8.38209935385172e-06, "loss": 0.3659, "step": 4022 }, { "epoch": 1.01182092555332, "grad_norm": 0.3197457790374756, "learning_rate": 8.381021503412659e-06, "loss": 0.3269, "step": 4023 }, { "epoch": 1.012072434607646, "grad_norm": 0.34255650639533997, "learning_rate": 8.379943363410259e-06, "loss": 0.3715, "step": 4024 }, { "epoch": 1.0123239436619718, "grad_norm": 0.34382399916648865, "learning_rate": 8.378864933936856e-06, "loss": 0.3318, "step": 4025 }, { "epoch": 1.0125754527162978, "grad_norm": 0.3288285434246063, "learning_rate": 8.37778621508481e-06, "loss": 0.3737, "step": 4026 }, { "epoch": 1.0128269617706238, "grad_norm": 0.33756786584854126, "learning_rate": 8.376707206946503e-06, "loss": 0.3459, "step": 4027 }, { "epoch": 1.0130784708249496, "grad_norm": 0.37219181656837463, "learning_rate": 8.37562790961435e-06, "loss": 0.3549, "step": 4028 }, { "epoch": 1.0133299798792756, "grad_norm": 0.3869669735431671, "learning_rate": 8.374548323180783e-06, "loss": 0.3594, "step": 4029 }, { "epoch": 1.0135814889336017, "grad_norm": 0.3502846360206604, "learning_rate": 8.373468447738265e-06, "loss": 0.3629, "step": 4030 }, { "epoch": 1.0138329979879275, "grad_norm": 0.34061774611473083, "learning_rate": 8.372388283379277e-06, "loss": 0.3644, "step": 4031 }, { "epoch": 1.0140845070422535, "grad_norm": 0.3984410762786865, "learning_rate": 8.37130783019633e-06, "loss": 0.3692, "step": 4032 }, { "epoch": 1.0143360160965795, "grad_norm": 0.3561273515224457, "learning_rate": 8.370227088281962e-06, "loss": 0.3375, "step": 4033 }, { "epoch": 1.0145875251509053, "grad_norm": 0.3067105710506439, "learning_rate": 8.369146057728726e-06, "loss": 0.3403, "step": 4034 }, { "epoch": 1.0148390342052314, "grad_norm": 0.39280879497528076, "learning_rate": 8.368064738629205e-06, "loss": 0.3513, "step": 4035 }, { "epoch": 1.0150905432595574, "grad_norm": 0.3620704114437103, "learning_rate": 8.366983131076012e-06, "loss": 0.3579, "step": 4036 }, { "epoch": 1.0153420523138832, "grad_norm": 0.3495025038719177, "learning_rate": 8.365901235161778e-06, "loss": 0.3641, "step": 4037 }, { "epoch": 1.0155935613682092, "grad_norm": 0.350424587726593, "learning_rate": 8.36481905097916e-06, "loss": 0.3567, "step": 4038 }, { "epoch": 1.0158450704225352, "grad_norm": 0.31671619415283203, "learning_rate": 8.363736578620838e-06, "loss": 0.3795, "step": 4039 }, { "epoch": 1.0160965794768613, "grad_norm": 0.3311213552951813, "learning_rate": 8.362653818179524e-06, "loss": 0.3523, "step": 4040 }, { "epoch": 1.016348088531187, "grad_norm": 0.3370438814163208, "learning_rate": 8.361570769747948e-06, "loss": 0.3614, "step": 4041 }, { "epoch": 1.016599597585513, "grad_norm": 0.33826106786727905, "learning_rate": 8.360487433418863e-06, "loss": 0.3289, "step": 4042 }, { "epoch": 1.0168511066398391, "grad_norm": 0.37292927503585815, "learning_rate": 8.359403809285054e-06, "loss": 0.3549, "step": 4043 }, { "epoch": 1.017102615694165, "grad_norm": 0.3336230516433716, "learning_rate": 8.358319897439324e-06, "loss": 0.3397, "step": 4044 }, { "epoch": 1.017354124748491, "grad_norm": 0.3715561032295227, "learning_rate": 8.357235697974506e-06, "loss": 0.335, "step": 4045 }, { "epoch": 1.017605633802817, "grad_norm": 0.3317098617553711, "learning_rate": 8.356151210983451e-06, "loss": 0.3557, "step": 4046 }, { "epoch": 1.0178571428571428, "grad_norm": 0.3567894697189331, "learning_rate": 8.355066436559042e-06, "loss": 0.3431, "step": 4047 }, { "epoch": 1.0181086519114688, "grad_norm": 0.31306058168411255, "learning_rate": 8.353981374794184e-06, "loss": 0.3529, "step": 4048 }, { "epoch": 1.0183601609657948, "grad_norm": 0.3463604748249054, "learning_rate": 8.3528960257818e-06, "loss": 0.3559, "step": 4049 }, { "epoch": 1.0186116700201207, "grad_norm": 0.36035671830177307, "learning_rate": 8.35181038961485e-06, "loss": 0.3697, "step": 4050 }, { "epoch": 1.0188631790744467, "grad_norm": 0.31114959716796875, "learning_rate": 8.350724466386309e-06, "loss": 0.3163, "step": 4051 }, { "epoch": 1.0191146881287727, "grad_norm": 0.33771616220474243, "learning_rate": 8.349638256189178e-06, "loss": 0.3651, "step": 4052 }, { "epoch": 1.0193661971830985, "grad_norm": 0.38065779209136963, "learning_rate": 8.348551759116485e-06, "loss": 0.337, "step": 4053 }, { "epoch": 1.0196177062374245, "grad_norm": 0.3252953290939331, "learning_rate": 8.347464975261283e-06, "loss": 0.3479, "step": 4054 }, { "epoch": 1.0198692152917506, "grad_norm": 0.3428071439266205, "learning_rate": 8.346377904716649e-06, "loss": 0.3571, "step": 4055 }, { "epoch": 1.0201207243460764, "grad_norm": 0.3688924312591553, "learning_rate": 8.34529054757568e-06, "loss": 0.3772, "step": 4056 }, { "epoch": 1.0203722334004024, "grad_norm": 0.3667786121368408, "learning_rate": 8.344202903931504e-06, "loss": 0.359, "step": 4057 }, { "epoch": 1.0206237424547284, "grad_norm": 0.3390733003616333, "learning_rate": 8.343114973877273e-06, "loss": 0.3513, "step": 4058 }, { "epoch": 1.0208752515090542, "grad_norm": 0.34325966238975525, "learning_rate": 8.342026757506156e-06, "loss": 0.3834, "step": 4059 }, { "epoch": 1.0211267605633803, "grad_norm": 0.37722912430763245, "learning_rate": 8.340938254911358e-06, "loss": 0.3481, "step": 4060 }, { "epoch": 1.0213782696177063, "grad_norm": 0.33139869570732117, "learning_rate": 8.339849466186096e-06, "loss": 0.3473, "step": 4061 }, { "epoch": 1.021629778672032, "grad_norm": 0.3419877886772156, "learning_rate": 8.338760391423623e-06, "loss": 0.3419, "step": 4062 }, { "epoch": 1.0218812877263581, "grad_norm": 0.37569326162338257, "learning_rate": 8.33767103071721e-06, "loss": 0.3518, "step": 4063 }, { "epoch": 1.0221327967806841, "grad_norm": 0.3753376603126526, "learning_rate": 8.336581384160152e-06, "loss": 0.3399, "step": 4064 }, { "epoch": 1.0223843058350102, "grad_norm": 0.3511211574077606, "learning_rate": 8.335491451845774e-06, "loss": 0.3732, "step": 4065 }, { "epoch": 1.022635814889336, "grad_norm": 0.3783870339393616, "learning_rate": 8.334401233867418e-06, "loss": 0.365, "step": 4066 }, { "epoch": 1.022887323943662, "grad_norm": 0.41309934854507446, "learning_rate": 8.333310730318457e-06, "loss": 0.3703, "step": 4067 }, { "epoch": 1.023138832997988, "grad_norm": 0.5328013300895691, "learning_rate": 8.332219941292286e-06, "loss": 0.3598, "step": 4068 }, { "epoch": 1.0233903420523138, "grad_norm": 0.3765561282634735, "learning_rate": 8.331128866882323e-06, "loss": 0.3709, "step": 4069 }, { "epoch": 1.0236418511066399, "grad_norm": 0.37244337797164917, "learning_rate": 8.330037507182012e-06, "loss": 0.3685, "step": 4070 }, { "epoch": 1.0238933601609659, "grad_norm": 0.3581307530403137, "learning_rate": 8.328945862284821e-06, "loss": 0.364, "step": 4071 }, { "epoch": 1.0241448692152917, "grad_norm": 0.32666563987731934, "learning_rate": 8.327853932284242e-06, "loss": 0.3368, "step": 4072 }, { "epoch": 1.0243963782696177, "grad_norm": 0.31926366686820984, "learning_rate": 8.326761717273793e-06, "loss": 0.3595, "step": 4073 }, { "epoch": 1.0246478873239437, "grad_norm": 0.3939271867275238, "learning_rate": 8.325669217347017e-06, "loss": 0.3444, "step": 4074 }, { "epoch": 1.0248993963782695, "grad_norm": 0.33755072951316833, "learning_rate": 8.324576432597476e-06, "loss": 0.3422, "step": 4075 }, { "epoch": 1.0251509054325956, "grad_norm": 0.3223980963230133, "learning_rate": 8.32348336311876e-06, "loss": 0.3244, "step": 4076 }, { "epoch": 1.0254024144869216, "grad_norm": 0.3657818138599396, "learning_rate": 8.322390009004488e-06, "loss": 0.3487, "step": 4077 }, { "epoch": 1.0256539235412474, "grad_norm": 0.3748267590999603, "learning_rate": 8.321296370348297e-06, "loss": 0.353, "step": 4078 }, { "epoch": 1.0259054325955734, "grad_norm": 0.3817254304885864, "learning_rate": 8.320202447243851e-06, "loss": 0.3505, "step": 4079 }, { "epoch": 1.0261569416498995, "grad_norm": 0.32929888367652893, "learning_rate": 8.319108239784834e-06, "loss": 0.3457, "step": 4080 }, { "epoch": 1.0264084507042253, "grad_norm": 0.377116322517395, "learning_rate": 8.318013748064962e-06, "loss": 0.349, "step": 4081 }, { "epoch": 1.0266599597585513, "grad_norm": 0.34213367104530334, "learning_rate": 8.316918972177968e-06, "loss": 0.3627, "step": 4082 }, { "epoch": 1.0269114688128773, "grad_norm": 0.3591276705265045, "learning_rate": 8.315823912217615e-06, "loss": 0.3497, "step": 4083 }, { "epoch": 1.0271629778672031, "grad_norm": 0.33743128180503845, "learning_rate": 8.314728568277691e-06, "loss": 0.3589, "step": 4084 }, { "epoch": 1.0274144869215291, "grad_norm": 0.34647324681282043, "learning_rate": 8.313632940452e-06, "loss": 0.3588, "step": 4085 }, { "epoch": 1.0276659959758552, "grad_norm": 0.3492962718009949, "learning_rate": 8.312537028834374e-06, "loss": 0.3581, "step": 4086 }, { "epoch": 1.027917505030181, "grad_norm": 0.3124542832374573, "learning_rate": 8.311440833518678e-06, "loss": 0.3418, "step": 4087 }, { "epoch": 1.028169014084507, "grad_norm": 0.3951411545276642, "learning_rate": 8.310344354598791e-06, "loss": 0.3615, "step": 4088 }, { "epoch": 1.028420523138833, "grad_norm": 0.3501111567020416, "learning_rate": 8.30924759216862e-06, "loss": 0.3331, "step": 4089 }, { "epoch": 1.028672032193159, "grad_norm": 0.36356234550476074, "learning_rate": 8.308150546322093e-06, "loss": 0.3728, "step": 4090 }, { "epoch": 1.0289235412474849, "grad_norm": 0.3362111747264862, "learning_rate": 8.30705321715317e-06, "loss": 0.3322, "step": 4091 }, { "epoch": 1.029175050301811, "grad_norm": 0.3690354824066162, "learning_rate": 8.305955604755827e-06, "loss": 0.3943, "step": 4092 }, { "epoch": 1.029426559356137, "grad_norm": 0.3664776682853699, "learning_rate": 8.304857709224068e-06, "loss": 0.3586, "step": 4093 }, { "epoch": 1.0296780684104627, "grad_norm": 0.33852067589759827, "learning_rate": 8.303759530651921e-06, "loss": 0.3354, "step": 4094 }, { "epoch": 1.0299295774647887, "grad_norm": 0.339894562959671, "learning_rate": 8.30266106913344e-06, "loss": 0.3487, "step": 4095 }, { "epoch": 1.0301810865191148, "grad_norm": 0.34180840849876404, "learning_rate": 8.301562324762698e-06, "loss": 0.3661, "step": 4096 }, { "epoch": 1.0304325955734406, "grad_norm": 0.35557109117507935, "learning_rate": 8.300463297633798e-06, "loss": 0.3469, "step": 4097 }, { "epoch": 1.0306841046277666, "grad_norm": 0.32619279623031616, "learning_rate": 8.299363987840864e-06, "loss": 0.3601, "step": 4098 }, { "epoch": 1.0309356136820926, "grad_norm": 0.32750555872917175, "learning_rate": 8.298264395478046e-06, "loss": 0.3436, "step": 4099 }, { "epoch": 1.0311871227364184, "grad_norm": 0.3431573808193207, "learning_rate": 8.297164520639515e-06, "loss": 0.3605, "step": 4100 }, { "epoch": 1.0314386317907445, "grad_norm": 0.3561439514160156, "learning_rate": 8.29606436341947e-06, "loss": 0.3392, "step": 4101 }, { "epoch": 1.0316901408450705, "grad_norm": 0.3567664921283722, "learning_rate": 8.294963923912134e-06, "loss": 0.3511, "step": 4102 }, { "epoch": 1.0319416498993963, "grad_norm": 0.35723668336868286, "learning_rate": 8.293863202211751e-06, "loss": 0.3727, "step": 4103 }, { "epoch": 1.0321931589537223, "grad_norm": 0.34615108370780945, "learning_rate": 8.292762198412591e-06, "loss": 0.3533, "step": 4104 }, { "epoch": 1.0324446680080483, "grad_norm": 0.345207542181015, "learning_rate": 8.291660912608948e-06, "loss": 0.3307, "step": 4105 }, { "epoch": 1.0326961770623742, "grad_norm": 0.3570762276649475, "learning_rate": 8.290559344895139e-06, "loss": 0.3474, "step": 4106 }, { "epoch": 1.0329476861167002, "grad_norm": 0.3682233393192291, "learning_rate": 8.28945749536551e-06, "loss": 0.3729, "step": 4107 }, { "epoch": 1.0331991951710262, "grad_norm": 0.4089745879173279, "learning_rate": 8.288355364114423e-06, "loss": 0.3803, "step": 4108 }, { "epoch": 1.033450704225352, "grad_norm": 0.3189091682434082, "learning_rate": 8.287252951236272e-06, "loss": 0.367, "step": 4109 }, { "epoch": 1.033702213279678, "grad_norm": 0.3654859662055969, "learning_rate": 8.28615025682547e-06, "loss": 0.3674, "step": 4110 }, { "epoch": 1.033953722334004, "grad_norm": 0.38779398798942566, "learning_rate": 8.285047280976458e-06, "loss": 0.3746, "step": 4111 }, { "epoch": 1.0342052313883299, "grad_norm": 0.3446475863456726, "learning_rate": 8.283944023783697e-06, "loss": 0.3528, "step": 4112 }, { "epoch": 1.034456740442656, "grad_norm": 0.33927372097969055, "learning_rate": 8.282840485341675e-06, "loss": 0.3567, "step": 4113 }, { "epoch": 1.034708249496982, "grad_norm": 0.35856184363365173, "learning_rate": 8.281736665744902e-06, "loss": 0.3442, "step": 4114 }, { "epoch": 1.034959758551308, "grad_norm": 0.35313889384269714, "learning_rate": 8.280632565087913e-06, "loss": 0.3552, "step": 4115 }, { "epoch": 1.0352112676056338, "grad_norm": 0.33305633068084717, "learning_rate": 8.27952818346527e-06, "loss": 0.3299, "step": 4116 }, { "epoch": 1.0354627766599598, "grad_norm": 0.3259493112564087, "learning_rate": 8.278423520971556e-06, "loss": 0.3376, "step": 4117 }, { "epoch": 1.0357142857142858, "grad_norm": 0.3438626527786255, "learning_rate": 8.277318577701375e-06, "loss": 0.3475, "step": 4118 }, { "epoch": 1.0359657947686116, "grad_norm": 0.31181490421295166, "learning_rate": 8.27621335374936e-06, "loss": 0.3567, "step": 4119 }, { "epoch": 1.0362173038229376, "grad_norm": 0.3249053359031677, "learning_rate": 8.275107849210168e-06, "loss": 0.358, "step": 4120 }, { "epoch": 1.0364688128772637, "grad_norm": 0.38430964946746826, "learning_rate": 8.27400206417848e-06, "loss": 0.3461, "step": 4121 }, { "epoch": 1.0367203219315895, "grad_norm": 0.3271859884262085, "learning_rate": 8.272895998748996e-06, "loss": 0.349, "step": 4122 }, { "epoch": 1.0369718309859155, "grad_norm": 0.32328277826309204, "learning_rate": 8.271789653016445e-06, "loss": 0.3598, "step": 4123 }, { "epoch": 1.0372233400402415, "grad_norm": 0.3417379856109619, "learning_rate": 8.270683027075576e-06, "loss": 0.3591, "step": 4124 }, { "epoch": 1.0374748490945673, "grad_norm": 0.31911101937294006, "learning_rate": 8.26957612102117e-06, "loss": 0.3416, "step": 4125 }, { "epoch": 1.0377263581488934, "grad_norm": 0.31615710258483887, "learning_rate": 8.268468934948023e-06, "loss": 0.3325, "step": 4126 }, { "epoch": 1.0379778672032194, "grad_norm": 0.34855955839157104, "learning_rate": 8.267361468950958e-06, "loss": 0.3408, "step": 4127 }, { "epoch": 1.0382293762575452, "grad_norm": 0.3437637984752655, "learning_rate": 8.266253723124825e-06, "loss": 0.3463, "step": 4128 }, { "epoch": 1.0384808853118712, "grad_norm": 0.33804404735565186, "learning_rate": 8.265145697564493e-06, "loss": 0.3452, "step": 4129 }, { "epoch": 1.0387323943661972, "grad_norm": 0.3620853126049042, "learning_rate": 8.26403739236486e-06, "loss": 0.364, "step": 4130 }, { "epoch": 1.038983903420523, "grad_norm": 0.3575562834739685, "learning_rate": 8.262928807620843e-06, "loss": 0.3741, "step": 4131 }, { "epoch": 1.039235412474849, "grad_norm": 0.34148597717285156, "learning_rate": 8.261819943427387e-06, "loss": 0.3194, "step": 4132 }, { "epoch": 1.039486921529175, "grad_norm": 0.3127982020378113, "learning_rate": 8.26071079987946e-06, "loss": 0.3364, "step": 4133 }, { "epoch": 1.039738430583501, "grad_norm": 0.3347800374031067, "learning_rate": 8.25960137707205e-06, "loss": 0.3603, "step": 4134 }, { "epoch": 1.039989939637827, "grad_norm": 0.3456893861293793, "learning_rate": 8.258491675100175e-06, "loss": 0.3734, "step": 4135 }, { "epoch": 1.040241448692153, "grad_norm": 0.3339012861251831, "learning_rate": 8.257381694058873e-06, "loss": 0.3563, "step": 4136 }, { "epoch": 1.040492957746479, "grad_norm": 0.31846827268600464, "learning_rate": 8.256271434043206e-06, "loss": 0.3469, "step": 4137 }, { "epoch": 1.0407444668008048, "grad_norm": 0.3521193861961365, "learning_rate": 8.255160895148263e-06, "loss": 0.3225, "step": 4138 }, { "epoch": 1.0409959758551308, "grad_norm": 0.34375718235969543, "learning_rate": 8.254050077469153e-06, "loss": 0.3366, "step": 4139 }, { "epoch": 1.0412474849094568, "grad_norm": 0.3343333899974823, "learning_rate": 8.252938981101011e-06, "loss": 0.3674, "step": 4140 }, { "epoch": 1.0414989939637826, "grad_norm": 0.32967597246170044, "learning_rate": 8.251827606138996e-06, "loss": 0.3581, "step": 4141 }, { "epoch": 1.0417505030181087, "grad_norm": 0.3318265676498413, "learning_rate": 8.25071595267829e-06, "loss": 0.3474, "step": 4142 }, { "epoch": 1.0420020120724347, "grad_norm": 0.29342010617256165, "learning_rate": 8.249604020814099e-06, "loss": 0.3322, "step": 4143 }, { "epoch": 1.0422535211267605, "grad_norm": 0.3520354628562927, "learning_rate": 8.248491810641655e-06, "loss": 0.3457, "step": 4144 }, { "epoch": 1.0425050301810865, "grad_norm": 0.3410925269126892, "learning_rate": 8.247379322256206e-06, "loss": 0.3429, "step": 4145 }, { "epoch": 1.0427565392354126, "grad_norm": 0.31338703632354736, "learning_rate": 8.246266555753036e-06, "loss": 0.3464, "step": 4146 }, { "epoch": 1.0430080482897384, "grad_norm": 0.31659606099128723, "learning_rate": 8.245153511227443e-06, "loss": 0.3355, "step": 4147 }, { "epoch": 1.0432595573440644, "grad_norm": 0.34237006306648254, "learning_rate": 8.244040188774755e-06, "loss": 0.3729, "step": 4148 }, { "epoch": 1.0435110663983904, "grad_norm": 0.3474802076816559, "learning_rate": 8.24292658849032e-06, "loss": 0.3643, "step": 4149 }, { "epoch": 1.0437625754527162, "grad_norm": 0.31629714369773865, "learning_rate": 8.241812710469507e-06, "loss": 0.3424, "step": 4150 }, { "epoch": 1.0440140845070423, "grad_norm": 0.3564571440219879, "learning_rate": 8.240698554807717e-06, "loss": 0.333, "step": 4151 }, { "epoch": 1.0442655935613683, "grad_norm": 0.36076509952545166, "learning_rate": 8.239584121600371e-06, "loss": 0.3744, "step": 4152 }, { "epoch": 1.044517102615694, "grad_norm": 0.33399519324302673, "learning_rate": 8.238469410942911e-06, "loss": 0.3488, "step": 4153 }, { "epoch": 1.04476861167002, "grad_norm": 0.35272693634033203, "learning_rate": 8.237354422930807e-06, "loss": 0.3651, "step": 4154 }, { "epoch": 1.0450201207243461, "grad_norm": 0.3702928125858307, "learning_rate": 8.236239157659548e-06, "loss": 0.3624, "step": 4155 }, { "epoch": 1.045271629778672, "grad_norm": 0.3613063395023346, "learning_rate": 8.235123615224651e-06, "loss": 0.3579, "step": 4156 }, { "epoch": 1.045523138832998, "grad_norm": 0.3584921956062317, "learning_rate": 8.234007795721657e-06, "loss": 0.3474, "step": 4157 }, { "epoch": 1.045774647887324, "grad_norm": 0.3665167987346649, "learning_rate": 8.232891699246126e-06, "loss": 0.3479, "step": 4158 }, { "epoch": 1.0460261569416498, "grad_norm": 0.3354139029979706, "learning_rate": 8.231775325893646e-06, "loss": 0.3608, "step": 4159 }, { "epoch": 1.0462776659959758, "grad_norm": 0.31492966413497925, "learning_rate": 8.230658675759827e-06, "loss": 0.3536, "step": 4160 }, { "epoch": 1.0465291750503019, "grad_norm": 0.3596387505531311, "learning_rate": 8.229541748940301e-06, "loss": 0.3646, "step": 4161 }, { "epoch": 1.0467806841046277, "grad_norm": 0.3706818222999573, "learning_rate": 8.22842454553073e-06, "loss": 0.355, "step": 4162 }, { "epoch": 1.0470321931589537, "grad_norm": 0.37192225456237793, "learning_rate": 8.227307065626796e-06, "loss": 0.3815, "step": 4163 }, { "epoch": 1.0472837022132797, "grad_norm": 0.3455519378185272, "learning_rate": 8.2261893093242e-06, "loss": 0.363, "step": 4164 }, { "epoch": 1.0475352112676057, "grad_norm": 0.33832308650016785, "learning_rate": 8.225071276718672e-06, "loss": 0.3476, "step": 4165 }, { "epoch": 1.0477867203219315, "grad_norm": 0.3629179000854492, "learning_rate": 8.223952967905967e-06, "loss": 0.3711, "step": 4166 }, { "epoch": 1.0480382293762576, "grad_norm": 0.3548809587955475, "learning_rate": 8.222834382981858e-06, "loss": 0.3507, "step": 4167 }, { "epoch": 1.0482897384305836, "grad_norm": 0.3626018464565277, "learning_rate": 8.221715522042146e-06, "loss": 0.3621, "step": 4168 }, { "epoch": 1.0485412474849094, "grad_norm": 0.310674786567688, "learning_rate": 8.220596385182654e-06, "loss": 0.3744, "step": 4169 }, { "epoch": 1.0487927565392354, "grad_norm": 0.35073432326316833, "learning_rate": 8.219476972499229e-06, "loss": 0.3453, "step": 4170 }, { "epoch": 1.0490442655935615, "grad_norm": 0.3294939696788788, "learning_rate": 8.218357284087745e-06, "loss": 0.3357, "step": 4171 }, { "epoch": 1.0492957746478873, "grad_norm": 0.33905303478240967, "learning_rate": 8.217237320044092e-06, "loss": 0.3369, "step": 4172 }, { "epoch": 1.0495472837022133, "grad_norm": 0.34386754035949707, "learning_rate": 8.216117080464189e-06, "loss": 0.3506, "step": 4173 }, { "epoch": 1.0497987927565393, "grad_norm": 0.3818106949329376, "learning_rate": 8.214996565443979e-06, "loss": 0.3677, "step": 4174 }, { "epoch": 1.0500503018108651, "grad_norm": 0.3344820439815521, "learning_rate": 8.213875775079426e-06, "loss": 0.3711, "step": 4175 }, { "epoch": 1.0503018108651911, "grad_norm": 0.3421842157840729, "learning_rate": 8.212754709466519e-06, "loss": 0.3573, "step": 4176 }, { "epoch": 1.0505533199195172, "grad_norm": 0.37867775559425354, "learning_rate": 8.211633368701268e-06, "loss": 0.3564, "step": 4177 }, { "epoch": 1.050804828973843, "grad_norm": 0.3431204855442047, "learning_rate": 8.210511752879713e-06, "loss": 0.3577, "step": 4178 }, { "epoch": 1.051056338028169, "grad_norm": 0.32079920172691345, "learning_rate": 8.209389862097912e-06, "loss": 0.3484, "step": 4179 }, { "epoch": 1.051307847082495, "grad_norm": 0.3572438955307007, "learning_rate": 8.208267696451947e-06, "loss": 0.382, "step": 4180 }, { "epoch": 1.0515593561368208, "grad_norm": 0.3327823579311371, "learning_rate": 8.207145256037922e-06, "loss": 0.332, "step": 4181 }, { "epoch": 1.0518108651911469, "grad_norm": 0.35914427042007446, "learning_rate": 8.206022540951972e-06, "loss": 0.3365, "step": 4182 }, { "epoch": 1.0520623742454729, "grad_norm": 0.35635095834732056, "learning_rate": 8.204899551290246e-06, "loss": 0.366, "step": 4183 }, { "epoch": 1.0523138832997987, "grad_norm": 0.3339400291442871, "learning_rate": 8.203776287148925e-06, "loss": 0.342, "step": 4184 }, { "epoch": 1.0525653923541247, "grad_norm": 0.3433733582496643, "learning_rate": 8.202652748624208e-06, "loss": 0.3418, "step": 4185 }, { "epoch": 1.0528169014084507, "grad_norm": 0.34672051668167114, "learning_rate": 8.201528935812318e-06, "loss": 0.3415, "step": 4186 }, { "epoch": 1.0530684104627768, "grad_norm": 0.3377094566822052, "learning_rate": 8.200404848809504e-06, "loss": 0.3579, "step": 4187 }, { "epoch": 1.0533199195171026, "grad_norm": 0.3439509868621826, "learning_rate": 8.199280487712035e-06, "loss": 0.3485, "step": 4188 }, { "epoch": 1.0535714285714286, "grad_norm": 0.3966297507286072, "learning_rate": 8.198155852616208e-06, "loss": 0.381, "step": 4189 }, { "epoch": 1.0538229376257546, "grad_norm": 0.3165939450263977, "learning_rate": 8.19703094361834e-06, "loss": 0.3462, "step": 4190 }, { "epoch": 1.0540744466800804, "grad_norm": 0.30367371439933777, "learning_rate": 8.195905760814772e-06, "loss": 0.3467, "step": 4191 }, { "epoch": 1.0543259557344065, "grad_norm": 0.34282615780830383, "learning_rate": 8.194780304301869e-06, "loss": 0.3466, "step": 4192 }, { "epoch": 1.0545774647887325, "grad_norm": 0.3848336338996887, "learning_rate": 8.19365457417602e-06, "loss": 0.3636, "step": 4193 }, { "epoch": 1.0548289738430583, "grad_norm": 0.32148125767707825, "learning_rate": 8.192528570533636e-06, "loss": 0.3611, "step": 4194 }, { "epoch": 1.0550804828973843, "grad_norm": 0.35009652376174927, "learning_rate": 8.191402293471151e-06, "loss": 0.3771, "step": 4195 }, { "epoch": 1.0553319919517103, "grad_norm": 0.37090542912483215, "learning_rate": 8.190275743085025e-06, "loss": 0.3681, "step": 4196 }, { "epoch": 1.0555835010060362, "grad_norm": 0.3471124470233917, "learning_rate": 8.18914891947174e-06, "loss": 0.3665, "step": 4197 }, { "epoch": 1.0558350100603622, "grad_norm": 0.34268495440483093, "learning_rate": 8.188021822727804e-06, "loss": 0.3778, "step": 4198 }, { "epoch": 1.0560865191146882, "grad_norm": 0.35175612568855286, "learning_rate": 8.18689445294974e-06, "loss": 0.3611, "step": 4199 }, { "epoch": 1.056338028169014, "grad_norm": 0.3248383104801178, "learning_rate": 8.185766810234106e-06, "loss": 0.3624, "step": 4200 }, { "epoch": 1.05658953722334, "grad_norm": 0.35728633403778076, "learning_rate": 8.184638894677472e-06, "loss": 0.345, "step": 4201 }, { "epoch": 1.056841046277666, "grad_norm": 0.3417149782180786, "learning_rate": 8.183510706376441e-06, "loss": 0.3637, "step": 4202 }, { "epoch": 1.0570925553319919, "grad_norm": 0.37575763463974, "learning_rate": 8.182382245427634e-06, "loss": 0.3408, "step": 4203 }, { "epoch": 1.057344064386318, "grad_norm": 0.37389978766441345, "learning_rate": 8.181253511927696e-06, "loss": 0.3252, "step": 4204 }, { "epoch": 1.057595573440644, "grad_norm": 0.35000425577163696, "learning_rate": 8.180124505973299e-06, "loss": 0.3451, "step": 4205 }, { "epoch": 1.0578470824949697, "grad_norm": 0.3386198878288269, "learning_rate": 8.17899522766113e-06, "loss": 0.3179, "step": 4206 }, { "epoch": 1.0580985915492958, "grad_norm": 0.3945431411266327, "learning_rate": 8.177865677087908e-06, "loss": 0.3231, "step": 4207 }, { "epoch": 1.0583501006036218, "grad_norm": 0.34005576372146606, "learning_rate": 8.176735854350373e-06, "loss": 0.347, "step": 4208 }, { "epoch": 1.0586016096579476, "grad_norm": 0.36175423860549927, "learning_rate": 8.175605759545285e-06, "loss": 0.3508, "step": 4209 }, { "epoch": 1.0588531187122736, "grad_norm": 0.35928767919540405, "learning_rate": 8.17447539276943e-06, "loss": 0.3871, "step": 4210 }, { "epoch": 1.0591046277665996, "grad_norm": 0.4204372763633728, "learning_rate": 8.173344754119615e-06, "loss": 0.3536, "step": 4211 }, { "epoch": 1.0593561368209254, "grad_norm": 0.3679214119911194, "learning_rate": 8.172213843692676e-06, "loss": 0.3841, "step": 4212 }, { "epoch": 1.0596076458752515, "grad_norm": 0.3367496132850647, "learning_rate": 8.171082661585468e-06, "loss": 0.3806, "step": 4213 }, { "epoch": 1.0598591549295775, "grad_norm": 0.3853965401649475, "learning_rate": 8.169951207894866e-06, "loss": 0.35, "step": 4214 }, { "epoch": 1.0601106639839035, "grad_norm": 0.3903757929801941, "learning_rate": 8.168819482717775e-06, "loss": 0.3529, "step": 4215 }, { "epoch": 1.0603621730382293, "grad_norm": 0.35560932755470276, "learning_rate": 8.167687486151119e-06, "loss": 0.3368, "step": 4216 }, { "epoch": 1.0606136820925554, "grad_norm": 0.3805678188800812, "learning_rate": 8.166555218291847e-06, "loss": 0.332, "step": 4217 }, { "epoch": 1.0608651911468814, "grad_norm": 0.32614269852638245, "learning_rate": 8.16542267923693e-06, "loss": 0.3387, "step": 4218 }, { "epoch": 1.0611167002012072, "grad_norm": 0.35080480575561523, "learning_rate": 8.164289869083365e-06, "loss": 0.3367, "step": 4219 }, { "epoch": 1.0613682092555332, "grad_norm": 0.3625401556491852, "learning_rate": 8.163156787928169e-06, "loss": 0.3508, "step": 4220 }, { "epoch": 1.0616197183098592, "grad_norm": 0.4160093367099762, "learning_rate": 8.162023435868381e-06, "loss": 0.3577, "step": 4221 }, { "epoch": 1.061871227364185, "grad_norm": 0.3521963059902191, "learning_rate": 8.160889813001066e-06, "loss": 0.3468, "step": 4222 }, { "epoch": 1.062122736418511, "grad_norm": 0.37421825528144836, "learning_rate": 8.159755919423315e-06, "loss": 0.3796, "step": 4223 }, { "epoch": 1.062374245472837, "grad_norm": 0.34013405442237854, "learning_rate": 8.158621755232237e-06, "loss": 0.3479, "step": 4224 }, { "epoch": 1.062625754527163, "grad_norm": 0.376768559217453, "learning_rate": 8.157487320524964e-06, "loss": 0.3701, "step": 4225 }, { "epoch": 1.062877263581489, "grad_norm": 0.36440804600715637, "learning_rate": 8.156352615398658e-06, "loss": 0.386, "step": 4226 }, { "epoch": 1.063128772635815, "grad_norm": 0.3305298089981079, "learning_rate": 8.155217639950494e-06, "loss": 0.3242, "step": 4227 }, { "epoch": 1.0633802816901408, "grad_norm": 0.33112478256225586, "learning_rate": 8.154082394277678e-06, "loss": 0.3429, "step": 4228 }, { "epoch": 1.0636317907444668, "grad_norm": 0.3222111165523529, "learning_rate": 8.15294687847744e-06, "loss": 0.3478, "step": 4229 }, { "epoch": 1.0638832997987928, "grad_norm": 0.3655967116355896, "learning_rate": 8.151811092647024e-06, "loss": 0.3619, "step": 4230 }, { "epoch": 1.0641348088531186, "grad_norm": 0.3364644944667816, "learning_rate": 8.150675036883705e-06, "loss": 0.3434, "step": 4231 }, { "epoch": 1.0643863179074446, "grad_norm": 0.34147587418556213, "learning_rate": 8.149538711284782e-06, "loss": 0.3561, "step": 4232 }, { "epoch": 1.0646378269617707, "grad_norm": 0.33940380811691284, "learning_rate": 8.14840211594757e-06, "loss": 0.3664, "step": 4233 }, { "epoch": 1.0648893360160965, "grad_norm": 0.3298323154449463, "learning_rate": 8.147265250969415e-06, "loss": 0.3362, "step": 4234 }, { "epoch": 1.0651408450704225, "grad_norm": 0.34013256430625916, "learning_rate": 8.146128116447679e-06, "loss": 0.3753, "step": 4235 }, { "epoch": 1.0653923541247485, "grad_norm": 0.3295930325984955, "learning_rate": 8.144990712479753e-06, "loss": 0.3482, "step": 4236 }, { "epoch": 1.0656438631790746, "grad_norm": 0.33826765418052673, "learning_rate": 8.143853039163046e-06, "loss": 0.3402, "step": 4237 }, { "epoch": 1.0658953722334004, "grad_norm": 0.35167795419692993, "learning_rate": 8.142715096594994e-06, "loss": 0.3993, "step": 4238 }, { "epoch": 1.0661468812877264, "grad_norm": 0.35461968183517456, "learning_rate": 8.141576884873054e-06, "loss": 0.3738, "step": 4239 }, { "epoch": 1.0663983903420524, "grad_norm": 0.3144620358943939, "learning_rate": 8.140438404094711e-06, "loss": 0.3594, "step": 4240 }, { "epoch": 1.0666498993963782, "grad_norm": 0.33328554034233093, "learning_rate": 8.139299654357462e-06, "loss": 0.3361, "step": 4241 }, { "epoch": 1.0669014084507042, "grad_norm": 0.4244604706764221, "learning_rate": 8.138160635758839e-06, "loss": 0.3409, "step": 4242 }, { "epoch": 1.0671529175050303, "grad_norm": 0.336699515581131, "learning_rate": 8.137021348396389e-06, "loss": 0.3496, "step": 4243 }, { "epoch": 1.067404426559356, "grad_norm": 0.3206990361213684, "learning_rate": 8.135881792367686e-06, "loss": 0.3369, "step": 4244 }, { "epoch": 1.067655935613682, "grad_norm": 0.34667348861694336, "learning_rate": 8.134741967770325e-06, "loss": 0.3572, "step": 4245 }, { "epoch": 1.0679074446680081, "grad_norm": 0.3545694351196289, "learning_rate": 8.133601874701926e-06, "loss": 0.3533, "step": 4246 }, { "epoch": 1.068158953722334, "grad_norm": 0.33755382895469666, "learning_rate": 8.13246151326013e-06, "loss": 0.3517, "step": 4247 }, { "epoch": 1.06841046277666, "grad_norm": 0.33736327290534973, "learning_rate": 8.131320883542601e-06, "loss": 0.3281, "step": 4248 }, { "epoch": 1.068661971830986, "grad_norm": 0.3781490623950958, "learning_rate": 8.13017998564703e-06, "loss": 0.3733, "step": 4249 }, { "epoch": 1.0689134808853118, "grad_norm": 0.35155022144317627, "learning_rate": 8.129038819671122e-06, "loss": 0.3716, "step": 4250 }, { "epoch": 1.0691649899396378, "grad_norm": 0.36461761593818665, "learning_rate": 8.127897385712616e-06, "loss": 0.3367, "step": 4251 }, { "epoch": 1.0694164989939638, "grad_norm": 0.3492373526096344, "learning_rate": 8.126755683869267e-06, "loss": 0.3376, "step": 4252 }, { "epoch": 1.0696680080482897, "grad_norm": 0.3286789357662201, "learning_rate": 8.125613714238855e-06, "loss": 0.3334, "step": 4253 }, { "epoch": 1.0699195171026157, "grad_norm": 0.33751797676086426, "learning_rate": 8.124471476919183e-06, "loss": 0.3599, "step": 4254 }, { "epoch": 1.0701710261569417, "grad_norm": 0.37270689010620117, "learning_rate": 8.123328972008075e-06, "loss": 0.3753, "step": 4255 }, { "epoch": 1.0704225352112675, "grad_norm": 0.3367967903614044, "learning_rate": 8.122186199603378e-06, "loss": 0.3578, "step": 4256 }, { "epoch": 1.0706740442655935, "grad_norm": 0.3527868986129761, "learning_rate": 8.121043159802969e-06, "loss": 0.3491, "step": 4257 }, { "epoch": 1.0709255533199196, "grad_norm": 0.3186793029308319, "learning_rate": 8.119899852704736e-06, "loss": 0.3345, "step": 4258 }, { "epoch": 1.0711770623742454, "grad_norm": 0.34226033091545105, "learning_rate": 8.1187562784066e-06, "loss": 0.371, "step": 4259 }, { "epoch": 1.0714285714285714, "grad_norm": 0.36788326501846313, "learning_rate": 8.1176124370065e-06, "loss": 0.3486, "step": 4260 }, { "epoch": 1.0716800804828974, "grad_norm": 0.33759674429893494, "learning_rate": 8.116468328602397e-06, "loss": 0.3845, "step": 4261 }, { "epoch": 1.0719315895372232, "grad_norm": 0.32118430733680725, "learning_rate": 8.115323953292278e-06, "loss": 0.3362, "step": 4262 }, { "epoch": 1.0721830985915493, "grad_norm": 0.3653360605239868, "learning_rate": 8.114179311174154e-06, "loss": 0.3502, "step": 4263 }, { "epoch": 1.0724346076458753, "grad_norm": 0.32888534665107727, "learning_rate": 8.113034402346052e-06, "loss": 0.3454, "step": 4264 }, { "epoch": 1.0726861167002013, "grad_norm": 0.3332807719707489, "learning_rate": 8.11188922690603e-06, "loss": 0.3384, "step": 4265 }, { "epoch": 1.0729376257545271, "grad_norm": 0.36372309923171997, "learning_rate": 8.110743784952162e-06, "loss": 0.3606, "step": 4266 }, { "epoch": 1.0731891348088531, "grad_norm": 0.3299432396888733, "learning_rate": 8.10959807658255e-06, "loss": 0.3564, "step": 4267 }, { "epoch": 1.0734406438631792, "grad_norm": 0.3273921012878418, "learning_rate": 8.108452101895317e-06, "loss": 0.33, "step": 4268 }, { "epoch": 1.073692152917505, "grad_norm": 0.3181513547897339, "learning_rate": 8.107305860988608e-06, "loss": 0.3416, "step": 4269 }, { "epoch": 1.073943661971831, "grad_norm": 0.3415665924549103, "learning_rate": 8.10615935396059e-06, "loss": 0.353, "step": 4270 }, { "epoch": 1.074195171026157, "grad_norm": 0.34259921312332153, "learning_rate": 8.105012580909457e-06, "loss": 0.373, "step": 4271 }, { "epoch": 1.0744466800804828, "grad_norm": 0.3158281147480011, "learning_rate": 8.103865541933421e-06, "loss": 0.3496, "step": 4272 }, { "epoch": 1.0746981891348089, "grad_norm": 0.32061338424682617, "learning_rate": 8.102718237130718e-06, "loss": 0.3632, "step": 4273 }, { "epoch": 1.0749496981891349, "grad_norm": 0.35798168182373047, "learning_rate": 8.101570666599608e-06, "loss": 0.3756, "step": 4274 }, { "epoch": 1.0752012072434607, "grad_norm": 0.30440935492515564, "learning_rate": 8.100422830438376e-06, "loss": 0.359, "step": 4275 }, { "epoch": 1.0754527162977867, "grad_norm": 0.31603163480758667, "learning_rate": 8.099274728745324e-06, "loss": 0.339, "step": 4276 }, { "epoch": 1.0757042253521127, "grad_norm": 0.3540377914905548, "learning_rate": 8.09812636161878e-06, "loss": 0.3457, "step": 4277 }, { "epoch": 1.0759557344064385, "grad_norm": 0.33938080072402954, "learning_rate": 8.096977729157096e-06, "loss": 0.3666, "step": 4278 }, { "epoch": 1.0762072434607646, "grad_norm": 0.36812707781791687, "learning_rate": 8.095828831458643e-06, "loss": 0.3479, "step": 4279 }, { "epoch": 1.0764587525150906, "grad_norm": 0.33766672015190125, "learning_rate": 8.094679668621818e-06, "loss": 0.3474, "step": 4280 }, { "epoch": 1.0767102615694164, "grad_norm": 0.3450581133365631, "learning_rate": 8.09353024074504e-06, "loss": 0.3536, "step": 4281 }, { "epoch": 1.0769617706237424, "grad_norm": 0.37925583124160767, "learning_rate": 8.09238054792675e-06, "loss": 0.347, "step": 4282 }, { "epoch": 1.0772132796780685, "grad_norm": 0.35549673438072205, "learning_rate": 8.091230590265411e-06, "loss": 0.3493, "step": 4283 }, { "epoch": 1.0774647887323943, "grad_norm": 0.317950040102005, "learning_rate": 8.090080367859512e-06, "loss": 0.3477, "step": 4284 }, { "epoch": 1.0777162977867203, "grad_norm": 0.3614874482154846, "learning_rate": 8.08892988080756e-06, "loss": 0.36, "step": 4285 }, { "epoch": 1.0779678068410463, "grad_norm": 0.35449376702308655, "learning_rate": 8.087779129208088e-06, "loss": 0.371, "step": 4286 }, { "epoch": 1.0782193158953723, "grad_norm": 0.32577499747276306, "learning_rate": 8.086628113159651e-06, "loss": 0.3402, "step": 4287 }, { "epoch": 1.0784708249496981, "grad_norm": 0.3384754955768585, "learning_rate": 8.085476832760828e-06, "loss": 0.3697, "step": 4288 }, { "epoch": 1.0787223340040242, "grad_norm": 0.3504764139652252, "learning_rate": 8.084325288110215e-06, "loss": 0.3467, "step": 4289 }, { "epoch": 1.0789738430583502, "grad_norm": 0.3410959243774414, "learning_rate": 8.083173479306436e-06, "loss": 0.3749, "step": 4290 }, { "epoch": 1.079225352112676, "grad_norm": 0.34386518597602844, "learning_rate": 8.082021406448137e-06, "loss": 0.3713, "step": 4291 }, { "epoch": 1.079476861167002, "grad_norm": 0.3209969699382782, "learning_rate": 8.080869069633987e-06, "loss": 0.339, "step": 4292 }, { "epoch": 1.079728370221328, "grad_norm": 0.3502729833126068, "learning_rate": 8.079716468962673e-06, "loss": 0.3597, "step": 4293 }, { "epoch": 1.0799798792756539, "grad_norm": 0.3301171064376831, "learning_rate": 8.078563604532911e-06, "loss": 0.3534, "step": 4294 }, { "epoch": 1.08023138832998, "grad_norm": 0.38163283467292786, "learning_rate": 8.077410476443436e-06, "loss": 0.3326, "step": 4295 }, { "epoch": 1.080482897384306, "grad_norm": 0.30207228660583496, "learning_rate": 8.076257084793007e-06, "loss": 0.3437, "step": 4296 }, { "epoch": 1.0807344064386317, "grad_norm": 0.34427884221076965, "learning_rate": 8.075103429680402e-06, "loss": 0.3593, "step": 4297 }, { "epoch": 1.0809859154929577, "grad_norm": 0.40237826108932495, "learning_rate": 8.073949511204426e-06, "loss": 0.3564, "step": 4298 }, { "epoch": 1.0812374245472838, "grad_norm": 0.34032902121543884, "learning_rate": 8.072795329463907e-06, "loss": 0.3585, "step": 4299 }, { "epoch": 1.0814889336016096, "grad_norm": 0.3582513630390167, "learning_rate": 8.07164088455769e-06, "loss": 0.3272, "step": 4300 }, { "epoch": 1.0817404426559356, "grad_norm": 0.3856683075428009, "learning_rate": 8.070486176584647e-06, "loss": 0.3603, "step": 4301 }, { "epoch": 1.0819919517102616, "grad_norm": 0.3110501766204834, "learning_rate": 8.069331205643671e-06, "loss": 0.3748, "step": 4302 }, { "epoch": 1.0822434607645874, "grad_norm": 0.3185417950153351, "learning_rate": 8.068175971833679e-06, "loss": 0.3331, "step": 4303 }, { "epoch": 1.0824949698189135, "grad_norm": 0.33823883533477783, "learning_rate": 8.06702047525361e-06, "loss": 0.3492, "step": 4304 }, { "epoch": 1.0827464788732395, "grad_norm": 0.3247283399105072, "learning_rate": 8.065864716002426e-06, "loss": 0.3648, "step": 4305 }, { "epoch": 1.0829979879275653, "grad_norm": 0.35124877095222473, "learning_rate": 8.064708694179107e-06, "loss": 0.345, "step": 4306 }, { "epoch": 1.0832494969818913, "grad_norm": 0.3573397696018219, "learning_rate": 8.063552409882662e-06, "loss": 0.3676, "step": 4307 }, { "epoch": 1.0835010060362174, "grad_norm": 0.33024418354034424, "learning_rate": 8.06239586321212e-06, "loss": 0.3826, "step": 4308 }, { "epoch": 1.0837525150905432, "grad_norm": 0.33846285939216614, "learning_rate": 8.06123905426653e-06, "loss": 0.3585, "step": 4309 }, { "epoch": 1.0840040241448692, "grad_norm": 0.34866371750831604, "learning_rate": 8.060081983144964e-06, "loss": 0.3492, "step": 4310 }, { "epoch": 1.0842555331991952, "grad_norm": 0.3329204320907593, "learning_rate": 8.058924649946523e-06, "loss": 0.3892, "step": 4311 }, { "epoch": 1.084507042253521, "grad_norm": 0.32447880506515503, "learning_rate": 8.05776705477032e-06, "loss": 0.3566, "step": 4312 }, { "epoch": 1.084758551307847, "grad_norm": 0.33247438073158264, "learning_rate": 8.0566091977155e-06, "loss": 0.3458, "step": 4313 }, { "epoch": 1.085010060362173, "grad_norm": 0.3221612572669983, "learning_rate": 8.055451078881221e-06, "loss": 0.3629, "step": 4314 }, { "epoch": 1.085261569416499, "grad_norm": 0.3234616816043854, "learning_rate": 8.054292698366674e-06, "loss": 0.3566, "step": 4315 }, { "epoch": 1.085513078470825, "grad_norm": 0.3308125436306, "learning_rate": 8.053134056271064e-06, "loss": 0.3519, "step": 4316 }, { "epoch": 1.085764587525151, "grad_norm": 0.3306364119052887, "learning_rate": 8.051975152693623e-06, "loss": 0.3253, "step": 4317 }, { "epoch": 1.086016096579477, "grad_norm": 0.343723326921463, "learning_rate": 8.050815987733604e-06, "loss": 0.3455, "step": 4318 }, { "epoch": 1.0862676056338028, "grad_norm": 0.36126473546028137, "learning_rate": 8.049656561490282e-06, "loss": 0.3846, "step": 4319 }, { "epoch": 1.0865191146881288, "grad_norm": 0.3083570897579193, "learning_rate": 8.048496874062953e-06, "loss": 0.3608, "step": 4320 }, { "epoch": 1.0867706237424548, "grad_norm": 0.33808434009552, "learning_rate": 8.04733692555094e-06, "loss": 0.3466, "step": 4321 }, { "epoch": 1.0870221327967806, "grad_norm": 0.3496091067790985, "learning_rate": 8.04617671605358e-06, "loss": 0.3795, "step": 4322 }, { "epoch": 1.0872736418511066, "grad_norm": 0.34945693612098694, "learning_rate": 8.045016245670243e-06, "loss": 0.354, "step": 4323 }, { "epoch": 1.0875251509054327, "grad_norm": 0.3567672669887543, "learning_rate": 8.043855514500314e-06, "loss": 0.3537, "step": 4324 }, { "epoch": 1.0877766599597585, "grad_norm": 0.35311269760131836, "learning_rate": 8.042694522643202e-06, "loss": 0.3536, "step": 4325 }, { "epoch": 1.0880281690140845, "grad_norm": 0.33748674392700195, "learning_rate": 8.041533270198341e-06, "loss": 0.371, "step": 4326 }, { "epoch": 1.0882796780684105, "grad_norm": 0.35377809405326843, "learning_rate": 8.04037175726518e-06, "loss": 0.3517, "step": 4327 }, { "epoch": 1.0885311871227363, "grad_norm": 0.3475686311721802, "learning_rate": 8.039209983943201e-06, "loss": 0.3564, "step": 4328 }, { "epoch": 1.0887826961770624, "grad_norm": 0.35228431224823, "learning_rate": 8.0380479503319e-06, "loss": 0.3494, "step": 4329 }, { "epoch": 1.0890342052313884, "grad_norm": 0.31928402185440063, "learning_rate": 8.036885656530797e-06, "loss": 0.3457, "step": 4330 }, { "epoch": 1.0892857142857142, "grad_norm": 0.3187348246574402, "learning_rate": 8.035723102639437e-06, "loss": 0.3289, "step": 4331 }, { "epoch": 1.0895372233400402, "grad_norm": 0.3454899191856384, "learning_rate": 8.034560288757386e-06, "loss": 0.3655, "step": 4332 }, { "epoch": 1.0897887323943662, "grad_norm": 0.3134077787399292, "learning_rate": 8.033397214984226e-06, "loss": 0.35, "step": 4333 }, { "epoch": 1.0900402414486923, "grad_norm": 0.326066792011261, "learning_rate": 8.032233881419576e-06, "loss": 0.3629, "step": 4334 }, { "epoch": 1.090291750503018, "grad_norm": 0.34841668605804443, "learning_rate": 8.031070288163061e-06, "loss": 0.3792, "step": 4335 }, { "epoch": 1.090543259557344, "grad_norm": 0.3101913034915924, "learning_rate": 8.029906435314339e-06, "loss": 0.3148, "step": 4336 }, { "epoch": 1.0907947686116701, "grad_norm": 0.36832088232040405, "learning_rate": 8.028742322973085e-06, "loss": 0.3647, "step": 4337 }, { "epoch": 1.091046277665996, "grad_norm": 0.3566989004611969, "learning_rate": 8.027577951238999e-06, "loss": 0.3651, "step": 4338 }, { "epoch": 1.091297786720322, "grad_norm": 0.3333961069583893, "learning_rate": 8.026413320211804e-06, "loss": 0.3356, "step": 4339 }, { "epoch": 1.091549295774648, "grad_norm": 0.38320064544677734, "learning_rate": 8.02524842999124e-06, "loss": 0.3819, "step": 4340 }, { "epoch": 1.0918008048289738, "grad_norm": 0.3310604989528656, "learning_rate": 8.024083280677073e-06, "loss": 0.3626, "step": 4341 }, { "epoch": 1.0920523138832998, "grad_norm": 0.3529944121837616, "learning_rate": 8.02291787236909e-06, "loss": 0.3707, "step": 4342 }, { "epoch": 1.0923038229376258, "grad_norm": 0.3804549276828766, "learning_rate": 8.021752205167108e-06, "loss": 0.3516, "step": 4343 }, { "epoch": 1.0925553319919517, "grad_norm": 0.3501472473144531, "learning_rate": 8.02058627917095e-06, "loss": 0.3521, "step": 4344 }, { "epoch": 1.0928068410462777, "grad_norm": 0.36724236607551575, "learning_rate": 8.019420094480475e-06, "loss": 0.3649, "step": 4345 }, { "epoch": 1.0930583501006037, "grad_norm": 0.3527858555316925, "learning_rate": 8.018253651195556e-06, "loss": 0.36, "step": 4346 }, { "epoch": 1.0933098591549295, "grad_norm": 0.37071692943573, "learning_rate": 8.017086949416095e-06, "loss": 0.3735, "step": 4347 }, { "epoch": 1.0935613682092555, "grad_norm": 0.3491380512714386, "learning_rate": 8.015919989242014e-06, "loss": 0.3577, "step": 4348 }, { "epoch": 1.0938128772635816, "grad_norm": 0.36504706740379333, "learning_rate": 8.014752770773252e-06, "loss": 0.3314, "step": 4349 }, { "epoch": 1.0940643863179074, "grad_norm": 0.36621904373168945, "learning_rate": 8.013585294109773e-06, "loss": 0.3417, "step": 4350 }, { "epoch": 1.0943158953722334, "grad_norm": 0.35412460565567017, "learning_rate": 8.012417559351569e-06, "loss": 0.3774, "step": 4351 }, { "epoch": 1.0945674044265594, "grad_norm": 0.37957510352134705, "learning_rate": 8.011249566598647e-06, "loss": 0.384, "step": 4352 }, { "epoch": 1.0948189134808852, "grad_norm": 0.34848281741142273, "learning_rate": 8.010081315951037e-06, "loss": 0.3393, "step": 4353 }, { "epoch": 1.0950704225352113, "grad_norm": 0.33474472165107727, "learning_rate": 8.008912807508794e-06, "loss": 0.351, "step": 4354 }, { "epoch": 1.0953219315895373, "grad_norm": 0.34298956394195557, "learning_rate": 8.007744041371993e-06, "loss": 0.337, "step": 4355 }, { "epoch": 1.095573440643863, "grad_norm": 0.3466607332229614, "learning_rate": 8.00657501764073e-06, "loss": 0.3387, "step": 4356 }, { "epoch": 1.095824949698189, "grad_norm": 0.3456454575061798, "learning_rate": 8.005405736415127e-06, "loss": 0.3355, "step": 4357 }, { "epoch": 1.0960764587525151, "grad_norm": 0.3142288327217102, "learning_rate": 8.004236197795323e-06, "loss": 0.3811, "step": 4358 }, { "epoch": 1.096327967806841, "grad_norm": 0.3510921597480774, "learning_rate": 8.003066401881484e-06, "loss": 0.3682, "step": 4359 }, { "epoch": 1.096579476861167, "grad_norm": 0.37072980403900146, "learning_rate": 8.001896348773795e-06, "loss": 0.3559, "step": 4360 }, { "epoch": 1.096830985915493, "grad_norm": 0.3439767062664032, "learning_rate": 8.000726038572463e-06, "loss": 0.3517, "step": 4361 }, { "epoch": 1.0970824949698188, "grad_norm": 0.3286254405975342, "learning_rate": 7.999555471377719e-06, "loss": 0.3587, "step": 4362 }, { "epoch": 1.0973340040241448, "grad_norm": 0.33705952763557434, "learning_rate": 7.998384647289813e-06, "loss": 0.3666, "step": 4363 }, { "epoch": 1.0975855130784709, "grad_norm": 0.3337515890598297, "learning_rate": 7.997213566409022e-06, "loss": 0.3502, "step": 4364 }, { "epoch": 1.0978370221327969, "grad_norm": 0.3719703257083893, "learning_rate": 7.996042228835637e-06, "loss": 0.3815, "step": 4365 }, { "epoch": 1.0980885311871227, "grad_norm": 0.35766953229904175, "learning_rate": 7.994870634669978e-06, "loss": 0.3295, "step": 4366 }, { "epoch": 1.0983400402414487, "grad_norm": 0.33565643429756165, "learning_rate": 7.993698784012387e-06, "loss": 0.3499, "step": 4367 }, { "epoch": 1.0985915492957747, "grad_norm": 0.34716835618019104, "learning_rate": 7.992526676963222e-06, "loss": 0.3523, "step": 4368 }, { "epoch": 1.0988430583501005, "grad_norm": 0.33831220865249634, "learning_rate": 7.991354313622868e-06, "loss": 0.3546, "step": 4369 }, { "epoch": 1.0990945674044266, "grad_norm": 0.36244910955429077, "learning_rate": 7.990181694091733e-06, "loss": 0.3547, "step": 4370 }, { "epoch": 1.0993460764587526, "grad_norm": 0.33830562233924866, "learning_rate": 7.98900881847024e-06, "loss": 0.366, "step": 4371 }, { "epoch": 1.0995975855130784, "grad_norm": 0.35777151584625244, "learning_rate": 7.987835686858845e-06, "loss": 0.336, "step": 4372 }, { "epoch": 1.0998490945674044, "grad_norm": 0.36557304859161377, "learning_rate": 7.986662299358012e-06, "loss": 0.3743, "step": 4373 }, { "epoch": 1.1001006036217305, "grad_norm": 0.33524492383003235, "learning_rate": 7.985488656068238e-06, "loss": 0.3942, "step": 4374 }, { "epoch": 1.1003521126760563, "grad_norm": 0.370383620262146, "learning_rate": 7.984314757090036e-06, "loss": 0.3503, "step": 4375 }, { "epoch": 1.1006036217303823, "grad_norm": 0.31732454895973206, "learning_rate": 7.983140602523949e-06, "loss": 0.3528, "step": 4376 }, { "epoch": 1.1008551307847083, "grad_norm": 0.3776906728744507, "learning_rate": 7.981966192470529e-06, "loss": 0.3587, "step": 4377 }, { "epoch": 1.1011066398390341, "grad_norm": 0.30028223991394043, "learning_rate": 7.980791527030361e-06, "loss": 0.3388, "step": 4378 }, { "epoch": 1.1013581488933601, "grad_norm": 0.32881900668144226, "learning_rate": 7.979616606304045e-06, "loss": 0.3689, "step": 4379 }, { "epoch": 1.1016096579476862, "grad_norm": 0.4046640694141388, "learning_rate": 7.978441430392208e-06, "loss": 0.374, "step": 4380 }, { "epoch": 1.101861167002012, "grad_norm": 0.3668253719806671, "learning_rate": 7.977265999395496e-06, "loss": 0.3649, "step": 4381 }, { "epoch": 1.102112676056338, "grad_norm": 0.34378865361213684, "learning_rate": 7.976090313414575e-06, "loss": 0.3674, "step": 4382 }, { "epoch": 1.102364185110664, "grad_norm": 0.3307862877845764, "learning_rate": 7.974914372550139e-06, "loss": 0.3359, "step": 4383 }, { "epoch": 1.10261569416499, "grad_norm": 0.3454669713973999, "learning_rate": 7.973738176902897e-06, "loss": 0.3663, "step": 4384 }, { "epoch": 1.1028672032193159, "grad_norm": 0.3528616428375244, "learning_rate": 7.972561726573584e-06, "loss": 0.3359, "step": 4385 }, { "epoch": 1.1031187122736419, "grad_norm": 0.35522276163101196, "learning_rate": 7.971385021662956e-06, "loss": 0.3597, "step": 4386 }, { "epoch": 1.103370221327968, "grad_norm": 0.3870282769203186, "learning_rate": 7.970208062271791e-06, "loss": 0.3485, "step": 4387 }, { "epoch": 1.1036217303822937, "grad_norm": 0.3756462037563324, "learning_rate": 7.969030848500886e-06, "loss": 0.3455, "step": 4388 }, { "epoch": 1.1038732394366197, "grad_norm": 0.34227368235588074, "learning_rate": 7.967853380451062e-06, "loss": 0.3508, "step": 4389 }, { "epoch": 1.1041247484909458, "grad_norm": 0.4221192002296448, "learning_rate": 7.966675658223162e-06, "loss": 0.3851, "step": 4390 }, { "epoch": 1.1043762575452716, "grad_norm": 0.31191956996917725, "learning_rate": 7.965497681918052e-06, "loss": 0.3239, "step": 4391 }, { "epoch": 1.1046277665995976, "grad_norm": 0.32340946793556213, "learning_rate": 7.96431945163662e-06, "loss": 0.3637, "step": 4392 }, { "epoch": 1.1048792756539236, "grad_norm": 0.3376328945159912, "learning_rate": 7.96314096747977e-06, "loss": 0.3395, "step": 4393 }, { "epoch": 1.1051307847082494, "grad_norm": 0.33500152826309204, "learning_rate": 7.961962229548433e-06, "loss": 0.3576, "step": 4394 }, { "epoch": 1.1053822937625755, "grad_norm": 0.3390989601612091, "learning_rate": 7.960783237943561e-06, "loss": 0.3521, "step": 4395 }, { "epoch": 1.1056338028169015, "grad_norm": 0.3361843228340149, "learning_rate": 7.959603992766127e-06, "loss": 0.3493, "step": 4396 }, { "epoch": 1.1058853118712273, "grad_norm": 0.3171842098236084, "learning_rate": 7.958424494117128e-06, "loss": 0.3438, "step": 4397 }, { "epoch": 1.1061368209255533, "grad_norm": 0.2926395535469055, "learning_rate": 7.957244742097579e-06, "loss": 0.343, "step": 4398 }, { "epoch": 1.1063883299798793, "grad_norm": 0.34122782945632935, "learning_rate": 7.956064736808516e-06, "loss": 0.345, "step": 4399 }, { "epoch": 1.1066398390342052, "grad_norm": 0.3118756115436554, "learning_rate": 7.954884478351003e-06, "loss": 0.338, "step": 4400 }, { "epoch": 1.1068913480885312, "grad_norm": 0.3336293399333954, "learning_rate": 7.953703966826118e-06, "loss": 0.3618, "step": 4401 }, { "epoch": 1.1071428571428572, "grad_norm": 0.3180750906467438, "learning_rate": 7.95252320233497e-06, "loss": 0.3661, "step": 4402 }, { "epoch": 1.107394366197183, "grad_norm": 0.3299802541732788, "learning_rate": 7.951342184978678e-06, "loss": 0.3472, "step": 4403 }, { "epoch": 1.107645875251509, "grad_norm": 0.34347137808799744, "learning_rate": 7.950160914858392e-06, "loss": 0.3272, "step": 4404 }, { "epoch": 1.107897384305835, "grad_norm": 0.36151158809661865, "learning_rate": 7.94897939207528e-06, "loss": 0.3307, "step": 4405 }, { "epoch": 1.1081488933601609, "grad_norm": 0.323889821767807, "learning_rate": 7.947797616730532e-06, "loss": 0.3571, "step": 4406 }, { "epoch": 1.108400402414487, "grad_norm": 0.33470696210861206, "learning_rate": 7.94661558892536e-06, "loss": 0.354, "step": 4407 }, { "epoch": 1.108651911468813, "grad_norm": 0.33871525526046753, "learning_rate": 7.945433308760998e-06, "loss": 0.3722, "step": 4408 }, { "epoch": 1.1089034205231387, "grad_norm": 0.3187379837036133, "learning_rate": 7.944250776338696e-06, "loss": 0.3618, "step": 4409 }, { "epoch": 1.1091549295774648, "grad_norm": 0.3083096444606781, "learning_rate": 7.943067991759736e-06, "loss": 0.3508, "step": 4410 }, { "epoch": 1.1094064386317908, "grad_norm": 0.33438965678215027, "learning_rate": 7.941884955125416e-06, "loss": 0.3632, "step": 4411 }, { "epoch": 1.1096579476861166, "grad_norm": 0.35571396350860596, "learning_rate": 7.940701666537051e-06, "loss": 0.3458, "step": 4412 }, { "epoch": 1.1099094567404426, "grad_norm": 0.3360385298728943, "learning_rate": 7.939518126095986e-06, "loss": 0.3572, "step": 4413 }, { "epoch": 1.1101609657947686, "grad_norm": 0.3844273090362549, "learning_rate": 7.938334333903584e-06, "loss": 0.3569, "step": 4414 }, { "epoch": 1.1104124748490947, "grad_norm": 0.3142733573913574, "learning_rate": 7.937150290061228e-06, "loss": 0.3575, "step": 4415 }, { "epoch": 1.1106639839034205, "grad_norm": 0.32374638319015503, "learning_rate": 7.935965994670325e-06, "loss": 0.3405, "step": 4416 }, { "epoch": 1.1109154929577465, "grad_norm": 0.33620190620422363, "learning_rate": 7.9347814478323e-06, "loss": 0.3691, "step": 4417 }, { "epoch": 1.1111670020120725, "grad_norm": 0.3400149345397949, "learning_rate": 7.933596649648606e-06, "loss": 0.3382, "step": 4418 }, { "epoch": 1.1114185110663983, "grad_norm": 0.34986597299575806, "learning_rate": 7.932411600220712e-06, "loss": 0.3667, "step": 4419 }, { "epoch": 1.1116700201207244, "grad_norm": 0.34316298365592957, "learning_rate": 7.931226299650108e-06, "loss": 0.3592, "step": 4420 }, { "epoch": 1.1119215291750504, "grad_norm": 0.362543523311615, "learning_rate": 7.930040748038309e-06, "loss": 0.3544, "step": 4421 }, { "epoch": 1.1121730382293762, "grad_norm": 0.35389113426208496, "learning_rate": 7.92885494548685e-06, "loss": 0.3427, "step": 4422 }, { "epoch": 1.1124245472837022, "grad_norm": 0.32142454385757446, "learning_rate": 7.927668892097288e-06, "loss": 0.354, "step": 4423 }, { "epoch": 1.1126760563380282, "grad_norm": 0.3471391499042511, "learning_rate": 7.926482587971202e-06, "loss": 0.3538, "step": 4424 }, { "epoch": 1.112927565392354, "grad_norm": 0.33723774552345276, "learning_rate": 7.925296033210191e-06, "loss": 0.3634, "step": 4425 }, { "epoch": 1.11317907444668, "grad_norm": 0.3441479206085205, "learning_rate": 7.924109227915872e-06, "loss": 0.3669, "step": 4426 }, { "epoch": 1.113430583501006, "grad_norm": 0.3241642713546753, "learning_rate": 7.922922172189892e-06, "loss": 0.3629, "step": 4427 }, { "epoch": 1.113682092555332, "grad_norm": 0.35414087772369385, "learning_rate": 7.921734866133917e-06, "loss": 0.3546, "step": 4428 }, { "epoch": 1.113933601609658, "grad_norm": 0.3270585834980011, "learning_rate": 7.920547309849626e-06, "loss": 0.3449, "step": 4429 }, { "epoch": 1.114185110663984, "grad_norm": 0.3540771007537842, "learning_rate": 7.91935950343873e-06, "loss": 0.3508, "step": 4430 }, { "epoch": 1.1144366197183098, "grad_norm": 0.3404322862625122, "learning_rate": 7.918171447002955e-06, "loss": 0.3358, "step": 4431 }, { "epoch": 1.1146881287726358, "grad_norm": 0.335871160030365, "learning_rate": 7.916983140644052e-06, "loss": 0.345, "step": 4432 }, { "epoch": 1.1149396378269618, "grad_norm": 0.351862370967865, "learning_rate": 7.915794584463792e-06, "loss": 0.3417, "step": 4433 }, { "epoch": 1.1151911468812878, "grad_norm": 0.3961928188800812, "learning_rate": 7.914605778563965e-06, "loss": 0.3346, "step": 4434 }, { "epoch": 1.1154426559356136, "grad_norm": 0.33614158630371094, "learning_rate": 7.913416723046387e-06, "loss": 0.3679, "step": 4435 }, { "epoch": 1.1156941649899397, "grad_norm": 0.3272840082645416, "learning_rate": 7.912227418012895e-06, "loss": 0.3413, "step": 4436 }, { "epoch": 1.1159456740442657, "grad_norm": 0.3329838812351227, "learning_rate": 7.911037863565344e-06, "loss": 0.3587, "step": 4437 }, { "epoch": 1.1161971830985915, "grad_norm": 0.41790270805358887, "learning_rate": 7.90984805980561e-06, "loss": 0.3805, "step": 4438 }, { "epoch": 1.1164486921529175, "grad_norm": 0.3353562355041504, "learning_rate": 7.908658006835593e-06, "loss": 0.3301, "step": 4439 }, { "epoch": 1.1167002012072436, "grad_norm": 0.3343071937561035, "learning_rate": 7.907467704757214e-06, "loss": 0.3614, "step": 4440 }, { "epoch": 1.1169517102615694, "grad_norm": 0.37369048595428467, "learning_rate": 7.906277153672417e-06, "loss": 0.3467, "step": 4441 }, { "epoch": 1.1172032193158954, "grad_norm": 0.3621070981025696, "learning_rate": 7.905086353683162e-06, "loss": 0.3337, "step": 4442 }, { "epoch": 1.1174547283702214, "grad_norm": 0.33832234144210815, "learning_rate": 7.903895304891436e-06, "loss": 0.3742, "step": 4443 }, { "epoch": 1.1177062374245472, "grad_norm": 0.3197494447231293, "learning_rate": 7.902704007399243e-06, "loss": 0.3868, "step": 4444 }, { "epoch": 1.1179577464788732, "grad_norm": 0.3825930058956146, "learning_rate": 7.901512461308612e-06, "loss": 0.3603, "step": 4445 }, { "epoch": 1.1182092555331993, "grad_norm": 0.3661872446537018, "learning_rate": 7.90032066672159e-06, "loss": 0.3633, "step": 4446 }, { "epoch": 1.118460764587525, "grad_norm": 0.31290680170059204, "learning_rate": 7.899128623740246e-06, "loss": 0.3503, "step": 4447 }, { "epoch": 1.118712273641851, "grad_norm": 0.35227593779563904, "learning_rate": 7.897936332466674e-06, "loss": 0.3781, "step": 4448 }, { "epoch": 1.1189637826961771, "grad_norm": 0.3571607768535614, "learning_rate": 7.896743793002983e-06, "loss": 0.355, "step": 4449 }, { "epoch": 1.119215291750503, "grad_norm": 0.35419657826423645, "learning_rate": 7.89555100545131e-06, "loss": 0.3337, "step": 4450 }, { "epoch": 1.119466800804829, "grad_norm": 0.31918078660964966, "learning_rate": 7.894357969913807e-06, "loss": 0.3707, "step": 4451 }, { "epoch": 1.119718309859155, "grad_norm": 0.33282384276390076, "learning_rate": 7.893164686492652e-06, "loss": 0.3861, "step": 4452 }, { "epoch": 1.1199698189134808, "grad_norm": 0.3873891234397888, "learning_rate": 7.891971155290039e-06, "loss": 0.3788, "step": 4453 }, { "epoch": 1.1202213279678068, "grad_norm": 0.3210555613040924, "learning_rate": 7.89077737640819e-06, "loss": 0.3426, "step": 4454 }, { "epoch": 1.1204728370221329, "grad_norm": 0.33169567584991455, "learning_rate": 7.889583349949341e-06, "loss": 0.3279, "step": 4455 }, { "epoch": 1.1207243460764587, "grad_norm": 0.36757755279541016, "learning_rate": 7.88838907601576e-06, "loss": 0.3631, "step": 4456 }, { "epoch": 1.1209758551307847, "grad_norm": 0.32751592993736267, "learning_rate": 7.88719455470972e-06, "loss": 0.3331, "step": 4457 }, { "epoch": 1.1212273641851107, "grad_norm": 0.3210444152355194, "learning_rate": 7.88599978613353e-06, "loss": 0.3527, "step": 4458 }, { "epoch": 1.1214788732394365, "grad_norm": 0.37147364020347595, "learning_rate": 7.884804770389514e-06, "loss": 0.3601, "step": 4459 }, { "epoch": 1.1217303822937625, "grad_norm": 0.33347150683403015, "learning_rate": 7.883609507580016e-06, "loss": 0.336, "step": 4460 }, { "epoch": 1.1219818913480886, "grad_norm": 0.38581255078315735, "learning_rate": 7.882413997807404e-06, "loss": 0.3488, "step": 4461 }, { "epoch": 1.1222334004024144, "grad_norm": 0.3418939411640167, "learning_rate": 7.881218241174064e-06, "loss": 0.3565, "step": 4462 }, { "epoch": 1.1224849094567404, "grad_norm": 0.3888770043849945, "learning_rate": 7.880022237782407e-06, "loss": 0.3497, "step": 4463 }, { "epoch": 1.1227364185110664, "grad_norm": 0.3226841688156128, "learning_rate": 7.878825987734864e-06, "loss": 0.3558, "step": 4464 }, { "epoch": 1.1229879275653925, "grad_norm": 0.3461357355117798, "learning_rate": 7.877629491133884e-06, "loss": 0.3294, "step": 4465 }, { "epoch": 1.1232394366197183, "grad_norm": 0.3348862826824188, "learning_rate": 7.876432748081939e-06, "loss": 0.3626, "step": 4466 }, { "epoch": 1.1234909456740443, "grad_norm": 0.3597047030925751, "learning_rate": 7.875235758681527e-06, "loss": 0.3706, "step": 4467 }, { "epoch": 1.1237424547283703, "grad_norm": 0.3321612477302551, "learning_rate": 7.874038523035157e-06, "loss": 0.3608, "step": 4468 }, { "epoch": 1.1239939637826961, "grad_norm": 0.3340033292770386, "learning_rate": 7.872841041245369e-06, "loss": 0.3429, "step": 4469 }, { "epoch": 1.1242454728370221, "grad_norm": 0.34898126125335693, "learning_rate": 7.871643313414718e-06, "loss": 0.3439, "step": 4470 }, { "epoch": 1.1244969818913482, "grad_norm": 0.33899804949760437, "learning_rate": 7.870445339645783e-06, "loss": 0.3356, "step": 4471 }, { "epoch": 1.124748490945674, "grad_norm": 0.30220794677734375, "learning_rate": 7.869247120041161e-06, "loss": 0.3427, "step": 4472 }, { "epoch": 1.125, "grad_norm": 0.3090187907218933, "learning_rate": 7.868048654703474e-06, "loss": 0.344, "step": 4473 }, { "epoch": 1.125251509054326, "grad_norm": 0.33787021040916443, "learning_rate": 7.866849943735361e-06, "loss": 0.3362, "step": 4474 }, { "epoch": 1.1255030181086518, "grad_norm": 0.3474128842353821, "learning_rate": 7.865650987239485e-06, "loss": 0.3474, "step": 4475 }, { "epoch": 1.1257545271629779, "grad_norm": 0.34391891956329346, "learning_rate": 7.864451785318532e-06, "loss": 0.3917, "step": 4476 }, { "epoch": 1.1260060362173039, "grad_norm": 0.34758976101875305, "learning_rate": 7.863252338075202e-06, "loss": 0.3533, "step": 4477 }, { "epoch": 1.1262575452716297, "grad_norm": 0.32106417417526245, "learning_rate": 7.862052645612222e-06, "loss": 0.3453, "step": 4478 }, { "epoch": 1.1265090543259557, "grad_norm": 0.337090402841568, "learning_rate": 7.860852708032337e-06, "loss": 0.3541, "step": 4479 }, { "epoch": 1.1267605633802817, "grad_norm": 0.3655794560909271, "learning_rate": 7.859652525438314e-06, "loss": 0.3466, "step": 4480 }, { "epoch": 1.1270120724346078, "grad_norm": 0.32245180010795593, "learning_rate": 7.858452097932945e-06, "loss": 0.3464, "step": 4481 }, { "epoch": 1.1272635814889336, "grad_norm": 0.32112833857536316, "learning_rate": 7.857251425619034e-06, "loss": 0.3705, "step": 4482 }, { "epoch": 1.1275150905432596, "grad_norm": 0.3438814580440521, "learning_rate": 7.856050508599413e-06, "loss": 0.337, "step": 4483 }, { "epoch": 1.1277665995975856, "grad_norm": 0.33978280425071716, "learning_rate": 7.854849346976935e-06, "loss": 0.3429, "step": 4484 }, { "epoch": 1.1280181086519114, "grad_norm": 0.3462032079696655, "learning_rate": 7.85364794085447e-06, "loss": 0.3581, "step": 4485 }, { "epoch": 1.1282696177062375, "grad_norm": 0.3374899923801422, "learning_rate": 7.85244629033491e-06, "loss": 0.3614, "step": 4486 }, { "epoch": 1.1285211267605635, "grad_norm": 0.3162481486797333, "learning_rate": 7.851244395521171e-06, "loss": 0.3457, "step": 4487 }, { "epoch": 1.1287726358148893, "grad_norm": 0.3583298921585083, "learning_rate": 7.850042256516187e-06, "loss": 0.3418, "step": 4488 }, { "epoch": 1.1290241448692153, "grad_norm": 0.33638957142829895, "learning_rate": 7.848839873422913e-06, "loss": 0.3523, "step": 4489 }, { "epoch": 1.1292756539235413, "grad_norm": 0.3432900905609131, "learning_rate": 7.847637246344326e-06, "loss": 0.3641, "step": 4490 }, { "epoch": 1.1295271629778671, "grad_norm": 0.3538980185985565, "learning_rate": 7.846434375383425e-06, "loss": 0.347, "step": 4491 }, { "epoch": 1.1297786720321932, "grad_norm": 0.3335748016834259, "learning_rate": 7.845231260643226e-06, "loss": 0.3426, "step": 4492 }, { "epoch": 1.1300301810865192, "grad_norm": 0.3384160101413727, "learning_rate": 7.84402790222677e-06, "loss": 0.3483, "step": 4493 }, { "epoch": 1.130281690140845, "grad_norm": 0.3617180585861206, "learning_rate": 7.842824300237114e-06, "loss": 0.3726, "step": 4494 }, { "epoch": 1.130533199195171, "grad_norm": 0.3085126280784607, "learning_rate": 7.841620454777344e-06, "loss": 0.3614, "step": 4495 }, { "epoch": 1.130784708249497, "grad_norm": 0.3673594892024994, "learning_rate": 7.840416365950558e-06, "loss": 0.3277, "step": 4496 }, { "epoch": 1.1310362173038229, "grad_norm": 0.3403297960758209, "learning_rate": 7.839212033859882e-06, "loss": 0.36, "step": 4497 }, { "epoch": 1.131287726358149, "grad_norm": 0.3315725028514862, "learning_rate": 7.838007458608455e-06, "loss": 0.3523, "step": 4498 }, { "epoch": 1.131539235412475, "grad_norm": 0.3540287911891937, "learning_rate": 7.836802640299442e-06, "loss": 0.3584, "step": 4499 }, { "epoch": 1.1317907444668007, "grad_norm": 0.32690343260765076, "learning_rate": 7.835597579036031e-06, "loss": 0.3393, "step": 4500 }, { "epoch": 1.1320422535211268, "grad_norm": 0.37675777077674866, "learning_rate": 7.83439227492143e-06, "loss": 0.3682, "step": 4501 }, { "epoch": 1.1322937625754528, "grad_norm": 0.35298097133636475, "learning_rate": 7.833186728058859e-06, "loss": 0.3618, "step": 4502 }, { "epoch": 1.1325452716297786, "grad_norm": 0.33117878437042236, "learning_rate": 7.831980938551572e-06, "loss": 0.3414, "step": 4503 }, { "epoch": 1.1327967806841046, "grad_norm": 0.32076895236968994, "learning_rate": 7.83077490650283e-06, "loss": 0.3558, "step": 4504 }, { "epoch": 1.1330482897384306, "grad_norm": 0.3626091182231903, "learning_rate": 7.829568632015932e-06, "loss": 0.339, "step": 4505 }, { "epoch": 1.1332997987927564, "grad_norm": 0.35690760612487793, "learning_rate": 7.828362115194179e-06, "loss": 0.349, "step": 4506 }, { "epoch": 1.1335513078470825, "grad_norm": 0.32652193307876587, "learning_rate": 7.827155356140905e-06, "loss": 0.3566, "step": 4507 }, { "epoch": 1.1338028169014085, "grad_norm": 0.3355286419391632, "learning_rate": 7.825948354959464e-06, "loss": 0.3516, "step": 4508 }, { "epoch": 1.1340543259557343, "grad_norm": 0.35634520649909973, "learning_rate": 7.824741111753223e-06, "loss": 0.3398, "step": 4509 }, { "epoch": 1.1343058350100603, "grad_norm": 0.35237765312194824, "learning_rate": 7.823533626625577e-06, "loss": 0.3559, "step": 4510 }, { "epoch": 1.1345573440643864, "grad_norm": 0.3162923753261566, "learning_rate": 7.822325899679941e-06, "loss": 0.3601, "step": 4511 }, { "epoch": 1.1348088531187122, "grad_norm": 0.33308616280555725, "learning_rate": 7.821117931019749e-06, "loss": 0.346, "step": 4512 }, { "epoch": 1.1350603621730382, "grad_norm": 0.3171202540397644, "learning_rate": 7.819909720748454e-06, "loss": 0.3534, "step": 4513 }, { "epoch": 1.1353118712273642, "grad_norm": 0.35061752796173096, "learning_rate": 7.818701268969532e-06, "loss": 0.3766, "step": 4514 }, { "epoch": 1.1355633802816902, "grad_norm": 0.3866909444332123, "learning_rate": 7.817492575786481e-06, "loss": 0.3422, "step": 4515 }, { "epoch": 1.135814889336016, "grad_norm": 0.3715068995952606, "learning_rate": 7.816283641302815e-06, "loss": 0.3729, "step": 4516 }, { "epoch": 1.136066398390342, "grad_norm": 0.3531903922557831, "learning_rate": 7.815074465622076e-06, "loss": 0.3698, "step": 4517 }, { "epoch": 1.136317907444668, "grad_norm": 0.4138471186161041, "learning_rate": 7.81386504884782e-06, "loss": 0.341, "step": 4518 }, { "epoch": 1.136569416498994, "grad_norm": 0.350268691778183, "learning_rate": 7.812655391083625e-06, "loss": 0.3162, "step": 4519 }, { "epoch": 1.13682092555332, "grad_norm": 0.36769789457321167, "learning_rate": 7.811445492433091e-06, "loss": 0.3529, "step": 4520 }, { "epoch": 1.137072434607646, "grad_norm": 0.3521837890148163, "learning_rate": 7.810235352999842e-06, "loss": 0.3505, "step": 4521 }, { "epoch": 1.1373239436619718, "grad_norm": 0.3225024342536926, "learning_rate": 7.809024972887513e-06, "loss": 0.3648, "step": 4522 }, { "epoch": 1.1375754527162978, "grad_norm": 0.34223929047584534, "learning_rate": 7.807814352199769e-06, "loss": 0.3557, "step": 4523 }, { "epoch": 1.1378269617706238, "grad_norm": 0.33616122603416443, "learning_rate": 7.806603491040293e-06, "loss": 0.348, "step": 4524 }, { "epoch": 1.1380784708249496, "grad_norm": 0.36442843079566956, "learning_rate": 7.805392389512785e-06, "loss": 0.3607, "step": 4525 }, { "epoch": 1.1383299798792756, "grad_norm": 0.3255125880241394, "learning_rate": 7.80418104772097e-06, "loss": 0.3719, "step": 4526 }, { "epoch": 1.1385814889336017, "grad_norm": 0.31949490308761597, "learning_rate": 7.802969465768588e-06, "loss": 0.3558, "step": 4527 }, { "epoch": 1.1388329979879275, "grad_norm": 0.32983100414276123, "learning_rate": 7.801757643759408e-06, "loss": 0.363, "step": 4528 }, { "epoch": 1.1390845070422535, "grad_norm": 0.3546229898929596, "learning_rate": 7.800545581797217e-06, "loss": 0.3636, "step": 4529 }, { "epoch": 1.1393360160965795, "grad_norm": 0.40820834040641785, "learning_rate": 7.799333279985813e-06, "loss": 0.3654, "step": 4530 }, { "epoch": 1.1395875251509056, "grad_norm": 0.35177260637283325, "learning_rate": 7.798120738429028e-06, "loss": 0.357, "step": 4531 }, { "epoch": 1.1398390342052314, "grad_norm": 0.3217819929122925, "learning_rate": 7.796907957230706e-06, "loss": 0.3425, "step": 4532 }, { "epoch": 1.1400905432595574, "grad_norm": 0.4682590961456299, "learning_rate": 7.795694936494715e-06, "loss": 0.3726, "step": 4533 }, { "epoch": 1.1403420523138834, "grad_norm": 0.4092445373535156, "learning_rate": 7.794481676324944e-06, "loss": 0.3628, "step": 4534 }, { "epoch": 1.1405935613682092, "grad_norm": 0.31070423126220703, "learning_rate": 7.793268176825297e-06, "loss": 0.3421, "step": 4535 }, { "epoch": 1.1408450704225352, "grad_norm": 0.34520095586776733, "learning_rate": 7.792054438099706e-06, "loss": 0.36, "step": 4536 }, { "epoch": 1.1410965794768613, "grad_norm": 0.3309139609336853, "learning_rate": 7.790840460252121e-06, "loss": 0.3451, "step": 4537 }, { "epoch": 1.141348088531187, "grad_norm": 0.36794281005859375, "learning_rate": 7.789626243386508e-06, "loss": 0.3752, "step": 4538 }, { "epoch": 1.141599597585513, "grad_norm": 0.3274729251861572, "learning_rate": 7.78841178760686e-06, "loss": 0.3556, "step": 4539 }, { "epoch": 1.1418511066398391, "grad_norm": 0.32525065541267395, "learning_rate": 7.787197093017186e-06, "loss": 0.3666, "step": 4540 }, { "epoch": 1.142102615694165, "grad_norm": 0.3399212062358856, "learning_rate": 7.78598215972152e-06, "loss": 0.3457, "step": 4541 }, { "epoch": 1.142354124748491, "grad_norm": 0.3338754177093506, "learning_rate": 7.784766987823908e-06, "loss": 0.3819, "step": 4542 }, { "epoch": 1.142605633802817, "grad_norm": 0.3345630168914795, "learning_rate": 7.783551577428427e-06, "loss": 0.3374, "step": 4543 }, { "epoch": 1.1428571428571428, "grad_norm": 0.3454863727092743, "learning_rate": 7.782335928639167e-06, "loss": 0.3487, "step": 4544 }, { "epoch": 1.1431086519114688, "grad_norm": 0.34839943051338196, "learning_rate": 7.781120041560241e-06, "loss": 0.3635, "step": 4545 }, { "epoch": 1.1433601609657948, "grad_norm": 0.3082118034362793, "learning_rate": 7.779903916295781e-06, "loss": 0.3468, "step": 4546 }, { "epoch": 1.1436116700201207, "grad_norm": 0.32561156153678894, "learning_rate": 7.778687552949944e-06, "loss": 0.3237, "step": 4547 }, { "epoch": 1.1438631790744467, "grad_norm": 0.3549325466156006, "learning_rate": 7.777470951626899e-06, "loss": 0.3679, "step": 4548 }, { "epoch": 1.1441146881287727, "grad_norm": 0.33792486786842346, "learning_rate": 7.776254112430843e-06, "loss": 0.3508, "step": 4549 }, { "epoch": 1.1443661971830985, "grad_norm": 0.3282465934753418, "learning_rate": 7.775037035465992e-06, "loss": 0.3501, "step": 4550 }, { "epoch": 1.1446177062374245, "grad_norm": 0.3474048972129822, "learning_rate": 7.773819720836577e-06, "loss": 0.3573, "step": 4551 }, { "epoch": 1.1448692152917506, "grad_norm": 0.36000293493270874, "learning_rate": 7.77260216864686e-06, "loss": 0.3291, "step": 4552 }, { "epoch": 1.1451207243460764, "grad_norm": 0.3380999267101288, "learning_rate": 7.771384379001107e-06, "loss": 0.3461, "step": 4553 }, { "epoch": 1.1453722334004024, "grad_norm": 0.3201408386230469, "learning_rate": 7.770166352003622e-06, "loss": 0.3633, "step": 4554 }, { "epoch": 1.1456237424547284, "grad_norm": 0.3245110809803009, "learning_rate": 7.768948087758721e-06, "loss": 0.3486, "step": 4555 }, { "epoch": 1.1458752515090542, "grad_norm": 0.33637115359306335, "learning_rate": 7.767729586370738e-06, "loss": 0.3535, "step": 4556 }, { "epoch": 1.1461267605633803, "grad_norm": 0.33941903710365295, "learning_rate": 7.76651084794403e-06, "loss": 0.3553, "step": 4557 }, { "epoch": 1.1463782696177063, "grad_norm": 0.3234095275402069, "learning_rate": 7.765291872582975e-06, "loss": 0.3674, "step": 4558 }, { "epoch": 1.146629778672032, "grad_norm": 0.34868893027305603, "learning_rate": 7.76407266039197e-06, "loss": 0.349, "step": 4559 }, { "epoch": 1.1468812877263581, "grad_norm": 0.3286401927471161, "learning_rate": 7.762853211475436e-06, "loss": 0.3362, "step": 4560 }, { "epoch": 1.1471327967806841, "grad_norm": 0.3449994623661041, "learning_rate": 7.761633525937808e-06, "loss": 0.3408, "step": 4561 }, { "epoch": 1.14738430583501, "grad_norm": 0.42614054679870605, "learning_rate": 7.760413603883546e-06, "loss": 0.3397, "step": 4562 }, { "epoch": 1.147635814889336, "grad_norm": 0.34790942072868347, "learning_rate": 7.759193445417126e-06, "loss": 0.3705, "step": 4563 }, { "epoch": 1.147887323943662, "grad_norm": 0.33942970633506775, "learning_rate": 7.757973050643048e-06, "loss": 0.3671, "step": 4564 }, { "epoch": 1.148138832997988, "grad_norm": 0.34391435980796814, "learning_rate": 7.756752419665833e-06, "loss": 0.3341, "step": 4565 }, { "epoch": 1.1483903420523138, "grad_norm": 0.3598215579986572, "learning_rate": 7.755531552590021e-06, "loss": 0.347, "step": 4566 }, { "epoch": 1.1486418511066399, "grad_norm": 0.31856873631477356, "learning_rate": 7.754310449520169e-06, "loss": 0.3475, "step": 4567 }, { "epoch": 1.1488933601609659, "grad_norm": 0.3364936411380768, "learning_rate": 7.753089110560858e-06, "loss": 0.3521, "step": 4568 }, { "epoch": 1.1491448692152917, "grad_norm": 0.3098117709159851, "learning_rate": 7.751867535816689e-06, "loss": 0.3435, "step": 4569 }, { "epoch": 1.1493963782696177, "grad_norm": 0.33782443404197693, "learning_rate": 7.750645725392278e-06, "loss": 0.3309, "step": 4570 }, { "epoch": 1.1496478873239437, "grad_norm": 0.3181942403316498, "learning_rate": 7.749423679392271e-06, "loss": 0.3532, "step": 4571 }, { "epoch": 1.1498993963782695, "grad_norm": 0.32375866174697876, "learning_rate": 7.748201397921326e-06, "loss": 0.3546, "step": 4572 }, { "epoch": 1.1501509054325956, "grad_norm": 0.32787060737609863, "learning_rate": 7.746978881084124e-06, "loss": 0.3732, "step": 4573 }, { "epoch": 1.1504024144869216, "grad_norm": 0.34005919098854065, "learning_rate": 7.745756128985367e-06, "loss": 0.3587, "step": 4574 }, { "epoch": 1.1506539235412474, "grad_norm": 0.3388522267341614, "learning_rate": 7.744533141729773e-06, "loss": 0.3393, "step": 4575 }, { "epoch": 1.1509054325955734, "grad_norm": 0.35405799746513367, "learning_rate": 7.743309919422086e-06, "loss": 0.3485, "step": 4576 }, { "epoch": 1.1511569416498995, "grad_norm": 0.3198212683200836, "learning_rate": 7.742086462167066e-06, "loss": 0.3477, "step": 4577 }, { "epoch": 1.1514084507042253, "grad_norm": 0.3119359612464905, "learning_rate": 7.740862770069494e-06, "loss": 0.3464, "step": 4578 }, { "epoch": 1.1516599597585513, "grad_norm": 0.32057052850723267, "learning_rate": 7.739638843234176e-06, "loss": 0.3406, "step": 4579 }, { "epoch": 1.1519114688128773, "grad_norm": 0.37748971581459045, "learning_rate": 7.738414681765928e-06, "loss": 0.3845, "step": 4580 }, { "epoch": 1.1521629778672033, "grad_norm": 0.35111579298973083, "learning_rate": 7.737190285769594e-06, "loss": 0.3628, "step": 4581 }, { "epoch": 1.1524144869215291, "grad_norm": 0.31299889087677, "learning_rate": 7.735965655350034e-06, "loss": 0.3171, "step": 4582 }, { "epoch": 1.1526659959758552, "grad_norm": 0.3520580232143402, "learning_rate": 7.734740790612137e-06, "loss": 0.3537, "step": 4583 }, { "epoch": 1.1529175050301812, "grad_norm": 0.3616025447845459, "learning_rate": 7.733515691660795e-06, "loss": 0.3483, "step": 4584 }, { "epoch": 1.153169014084507, "grad_norm": 0.3729570508003235, "learning_rate": 7.732290358600936e-06, "loss": 0.3418, "step": 4585 }, { "epoch": 1.153420523138833, "grad_norm": 0.3173588812351227, "learning_rate": 7.731064791537501e-06, "loss": 0.3572, "step": 4586 }, { "epoch": 1.153672032193159, "grad_norm": 0.3116159439086914, "learning_rate": 7.72983899057545e-06, "loss": 0.3276, "step": 4587 }, { "epoch": 1.1539235412474849, "grad_norm": 0.33578798174858093, "learning_rate": 7.728612955819773e-06, "loss": 0.3469, "step": 4588 }, { "epoch": 1.154175050301811, "grad_norm": 0.35310569405555725, "learning_rate": 7.727386687375461e-06, "loss": 0.3701, "step": 4589 }, { "epoch": 1.154426559356137, "grad_norm": 0.37188154458999634, "learning_rate": 7.726160185347544e-06, "loss": 0.4008, "step": 4590 }, { "epoch": 1.1546780684104627, "grad_norm": 0.3462255597114563, "learning_rate": 7.724933449841061e-06, "loss": 0.34, "step": 4591 }, { "epoch": 1.1549295774647887, "grad_norm": 0.31674739718437195, "learning_rate": 7.723706480961078e-06, "loss": 0.3469, "step": 4592 }, { "epoch": 1.1551810865191148, "grad_norm": 0.3254792392253876, "learning_rate": 7.722479278812672e-06, "loss": 0.349, "step": 4593 }, { "epoch": 1.1554325955734406, "grad_norm": 0.36106935143470764, "learning_rate": 7.721251843500948e-06, "loss": 0.347, "step": 4594 }, { "epoch": 1.1556841046277666, "grad_norm": 0.3273780345916748, "learning_rate": 7.720024175131027e-06, "loss": 0.3775, "step": 4595 }, { "epoch": 1.1559356136820926, "grad_norm": 0.31497684121131897, "learning_rate": 7.71879627380805e-06, "loss": 0.3642, "step": 4596 }, { "epoch": 1.1561871227364184, "grad_norm": 0.360487163066864, "learning_rate": 7.717568139637184e-06, "loss": 0.3624, "step": 4597 }, { "epoch": 1.1564386317907445, "grad_norm": 0.3374006748199463, "learning_rate": 7.716339772723608e-06, "loss": 0.3559, "step": 4598 }, { "epoch": 1.1566901408450705, "grad_norm": 0.3451824486255646, "learning_rate": 7.715111173172522e-06, "loss": 0.3666, "step": 4599 }, { "epoch": 1.1569416498993963, "grad_norm": 0.3282189667224884, "learning_rate": 7.713882341089151e-06, "loss": 0.3532, "step": 4600 }, { "epoch": 1.1571931589537223, "grad_norm": 0.34830883145332336, "learning_rate": 7.712653276578734e-06, "loss": 0.3937, "step": 4601 }, { "epoch": 1.1574446680080483, "grad_norm": 0.32040953636169434, "learning_rate": 7.711423979746537e-06, "loss": 0.3401, "step": 4602 }, { "epoch": 1.1576961770623742, "grad_norm": 0.3415544927120209, "learning_rate": 7.710194450697837e-06, "loss": 0.3673, "step": 4603 }, { "epoch": 1.1579476861167002, "grad_norm": 0.35662606358528137, "learning_rate": 7.708964689537937e-06, "loss": 0.3489, "step": 4604 }, { "epoch": 1.1581991951710262, "grad_norm": 0.34960421919822693, "learning_rate": 7.707734696372158e-06, "loss": 0.3528, "step": 4605 }, { "epoch": 1.158450704225352, "grad_norm": 0.30523577332496643, "learning_rate": 7.706504471305843e-06, "loss": 0.344, "step": 4606 }, { "epoch": 1.158702213279678, "grad_norm": 0.33770856261253357, "learning_rate": 7.70527401444435e-06, "loss": 0.3737, "step": 4607 }, { "epoch": 1.158953722334004, "grad_norm": 0.34141412377357483, "learning_rate": 7.704043325893064e-06, "loss": 0.3384, "step": 4608 }, { "epoch": 1.1592052313883299, "grad_norm": 0.35698843002319336, "learning_rate": 7.702812405757382e-06, "loss": 0.3231, "step": 4609 }, { "epoch": 1.159456740442656, "grad_norm": 0.37319764494895935, "learning_rate": 7.701581254142728e-06, "loss": 0.3508, "step": 4610 }, { "epoch": 1.159708249496982, "grad_norm": 0.32721927762031555, "learning_rate": 7.70034987115454e-06, "loss": 0.3482, "step": 4611 }, { "epoch": 1.1599597585513077, "grad_norm": 0.3475745618343353, "learning_rate": 7.699118256898277e-06, "loss": 0.3454, "step": 4612 }, { "epoch": 1.1602112676056338, "grad_norm": 0.3759898245334625, "learning_rate": 7.697886411479422e-06, "loss": 0.3683, "step": 4613 }, { "epoch": 1.1604627766599598, "grad_norm": 0.35182997584342957, "learning_rate": 7.696654335003475e-06, "loss": 0.3519, "step": 4614 }, { "epoch": 1.1607142857142858, "grad_norm": 0.38022348284721375, "learning_rate": 7.695422027575953e-06, "loss": 0.3434, "step": 4615 }, { "epoch": 1.1609657947686116, "grad_norm": 0.36011841893196106, "learning_rate": 7.694189489302399e-06, "loss": 0.3508, "step": 4616 }, { "epoch": 1.1612173038229376, "grad_norm": 0.3434779942035675, "learning_rate": 7.692956720288369e-06, "loss": 0.3481, "step": 4617 }, { "epoch": 1.1614688128772637, "grad_norm": 0.3736356198787689, "learning_rate": 7.69172372063944e-06, "loss": 0.3376, "step": 4618 }, { "epoch": 1.1617203219315895, "grad_norm": 0.3600045144557953, "learning_rate": 7.690490490461217e-06, "loss": 0.3435, "step": 4619 }, { "epoch": 1.1619718309859155, "grad_norm": 0.3338755965232849, "learning_rate": 7.689257029859316e-06, "loss": 0.3457, "step": 4620 }, { "epoch": 1.1622233400402415, "grad_norm": 0.33322474360466003, "learning_rate": 7.688023338939373e-06, "loss": 0.3442, "step": 4621 }, { "epoch": 1.1624748490945673, "grad_norm": 0.3295151889324188, "learning_rate": 7.686789417807045e-06, "loss": 0.329, "step": 4622 }, { "epoch": 1.1627263581488934, "grad_norm": 0.36218923330307007, "learning_rate": 7.685555266568014e-06, "loss": 0.3676, "step": 4623 }, { "epoch": 1.1629778672032194, "grad_norm": 0.3499116003513336, "learning_rate": 7.684320885327976e-06, "loss": 0.374, "step": 4624 }, { "epoch": 1.1632293762575452, "grad_norm": 0.3172034025192261, "learning_rate": 7.683086274192647e-06, "loss": 0.3304, "step": 4625 }, { "epoch": 1.1634808853118712, "grad_norm": 0.3143727481365204, "learning_rate": 7.681851433267762e-06, "loss": 0.35, "step": 4626 }, { "epoch": 1.1637323943661972, "grad_norm": 0.3694257140159607, "learning_rate": 7.680616362659082e-06, "loss": 0.3583, "step": 4627 }, { "epoch": 1.163983903420523, "grad_norm": 0.3408942222595215, "learning_rate": 7.679381062472377e-06, "loss": 0.3374, "step": 4628 }, { "epoch": 1.164235412474849, "grad_norm": 0.29644355177879333, "learning_rate": 7.678145532813448e-06, "loss": 0.3226, "step": 4629 }, { "epoch": 1.164486921529175, "grad_norm": 0.3752206265926361, "learning_rate": 7.67690977378811e-06, "loss": 0.3861, "step": 4630 }, { "epoch": 1.1647384305835011, "grad_norm": 0.3474452495574951, "learning_rate": 7.675673785502195e-06, "loss": 0.3807, "step": 4631 }, { "epoch": 1.164989939637827, "grad_norm": 0.3323638141155243, "learning_rate": 7.674437568061559e-06, "loss": 0.347, "step": 4632 }, { "epoch": 1.165241448692153, "grad_norm": 0.322618305683136, "learning_rate": 7.673201121572077e-06, "loss": 0.3689, "step": 4633 }, { "epoch": 1.165492957746479, "grad_norm": 0.3595317304134369, "learning_rate": 7.671964446139643e-06, "loss": 0.3701, "step": 4634 }, { "epoch": 1.1657444668008048, "grad_norm": 0.33650046586990356, "learning_rate": 7.67072754187017e-06, "loss": 0.3491, "step": 4635 }, { "epoch": 1.1659959758551308, "grad_norm": 0.3536091148853302, "learning_rate": 7.66949040886959e-06, "loss": 0.3488, "step": 4636 }, { "epoch": 1.1662474849094568, "grad_norm": 0.3690861761569977, "learning_rate": 7.668253047243856e-06, "loss": 0.3511, "step": 4637 }, { "epoch": 1.1664989939637826, "grad_norm": 0.33993828296661377, "learning_rate": 7.667015457098944e-06, "loss": 0.358, "step": 4638 }, { "epoch": 1.1667505030181087, "grad_norm": 0.3575940728187561, "learning_rate": 7.66577763854084e-06, "loss": 0.3414, "step": 4639 }, { "epoch": 1.1670020120724347, "grad_norm": 0.3681694567203522, "learning_rate": 7.664539591675559e-06, "loss": 0.3922, "step": 4640 }, { "epoch": 1.1672535211267605, "grad_norm": 0.3579506576061249, "learning_rate": 7.663301316609131e-06, "loss": 0.341, "step": 4641 }, { "epoch": 1.1675050301810865, "grad_norm": 0.3645816147327423, "learning_rate": 7.662062813447608e-06, "loss": 0.3502, "step": 4642 }, { "epoch": 1.1677565392354126, "grad_norm": 0.34026220440864563, "learning_rate": 7.660824082297057e-06, "loss": 0.3656, "step": 4643 }, { "epoch": 1.1680080482897384, "grad_norm": 0.3202768862247467, "learning_rate": 7.659585123263571e-06, "loss": 0.354, "step": 4644 }, { "epoch": 1.1682595573440644, "grad_norm": 0.3285091519355774, "learning_rate": 7.658345936453257e-06, "loss": 0.3649, "step": 4645 }, { "epoch": 1.1685110663983904, "grad_norm": 0.3168383538722992, "learning_rate": 7.657106521972246e-06, "loss": 0.3475, "step": 4646 }, { "epoch": 1.1687625754527162, "grad_norm": 0.3457097113132477, "learning_rate": 7.655866879926682e-06, "loss": 0.3591, "step": 4647 }, { "epoch": 1.1690140845070423, "grad_norm": 0.31571948528289795, "learning_rate": 7.654627010422735e-06, "loss": 0.3494, "step": 4648 }, { "epoch": 1.1692655935613683, "grad_norm": 0.3494684398174286, "learning_rate": 7.653386913566593e-06, "loss": 0.3648, "step": 4649 }, { "epoch": 1.169517102615694, "grad_norm": 0.3523896038532257, "learning_rate": 7.65214658946446e-06, "loss": 0.3614, "step": 4650 }, { "epoch": 1.16976861167002, "grad_norm": 0.32975268363952637, "learning_rate": 7.650906038222563e-06, "loss": 0.3505, "step": 4651 }, { "epoch": 1.1700201207243461, "grad_norm": 0.35798007249832153, "learning_rate": 7.64966525994715e-06, "loss": 0.355, "step": 4652 }, { "epoch": 1.170271629778672, "grad_norm": 0.3435303568840027, "learning_rate": 7.648424254744481e-06, "loss": 0.3548, "step": 4653 }, { "epoch": 1.170523138832998, "grad_norm": 0.3186478316783905, "learning_rate": 7.647183022720846e-06, "loss": 0.3469, "step": 4654 }, { "epoch": 1.170774647887324, "grad_norm": 0.3599293529987335, "learning_rate": 7.645941563982544e-06, "loss": 0.3726, "step": 4655 }, { "epoch": 1.1710261569416498, "grad_norm": 0.3607332706451416, "learning_rate": 7.644699878635901e-06, "loss": 0.339, "step": 4656 }, { "epoch": 1.1712776659959758, "grad_norm": 0.3603340983390808, "learning_rate": 7.643457966787258e-06, "loss": 0.369, "step": 4657 }, { "epoch": 1.1715291750503019, "grad_norm": 0.3286212980747223, "learning_rate": 7.642215828542977e-06, "loss": 0.3645, "step": 4658 }, { "epoch": 1.1717806841046277, "grad_norm": 0.35398638248443604, "learning_rate": 7.64097346400944e-06, "loss": 0.3453, "step": 4659 }, { "epoch": 1.1720321931589537, "grad_norm": 0.3555023968219757, "learning_rate": 7.63973087329305e-06, "loss": 0.3515, "step": 4660 }, { "epoch": 1.1722837022132797, "grad_norm": 0.3314943015575409, "learning_rate": 7.638488056500222e-06, "loss": 0.3579, "step": 4661 }, { "epoch": 1.1725352112676055, "grad_norm": 0.3424238860607147, "learning_rate": 7.637245013737399e-06, "loss": 0.3462, "step": 4662 }, { "epoch": 1.1727867203219315, "grad_norm": 0.3617474138736725, "learning_rate": 7.636001745111039e-06, "loss": 0.3541, "step": 4663 }, { "epoch": 1.1730382293762576, "grad_norm": 0.3585506081581116, "learning_rate": 7.634758250727621e-06, "loss": 0.3603, "step": 4664 }, { "epoch": 1.1732897384305836, "grad_norm": 0.35511475801467896, "learning_rate": 7.633514530693642e-06, "loss": 0.3553, "step": 4665 }, { "epoch": 1.1735412474849094, "grad_norm": 0.3293471038341522, "learning_rate": 7.632270585115618e-06, "loss": 0.3478, "step": 4666 }, { "epoch": 1.1737927565392354, "grad_norm": 0.3938407599925995, "learning_rate": 7.631026414100086e-06, "loss": 0.3484, "step": 4667 }, { "epoch": 1.1740442655935615, "grad_norm": 0.3182221055030823, "learning_rate": 7.629782017753602e-06, "loss": 0.3623, "step": 4668 }, { "epoch": 1.1742957746478873, "grad_norm": 0.34111714363098145, "learning_rate": 7.628537396182739e-06, "loss": 0.3747, "step": 4669 }, { "epoch": 1.1745472837022133, "grad_norm": 0.3676598072052002, "learning_rate": 7.627292549494092e-06, "loss": 0.3687, "step": 4670 }, { "epoch": 1.1747987927565393, "grad_norm": 0.3540399670600891, "learning_rate": 7.626047477794276e-06, "loss": 0.3611, "step": 4671 }, { "epoch": 1.1750503018108651, "grad_norm": 0.32811835408210754, "learning_rate": 7.62480218118992e-06, "loss": 0.3235, "step": 4672 }, { "epoch": 1.1753018108651911, "grad_norm": 0.3579806089401245, "learning_rate": 7.6235566597876786e-06, "loss": 0.3429, "step": 4673 }, { "epoch": 1.1755533199195172, "grad_norm": 0.3340808153152466, "learning_rate": 7.622310913694222e-06, "loss": 0.3554, "step": 4674 }, { "epoch": 1.175804828973843, "grad_norm": 0.3324950933456421, "learning_rate": 7.621064943016241e-06, "loss": 0.3406, "step": 4675 }, { "epoch": 1.176056338028169, "grad_norm": 0.3520958423614502, "learning_rate": 7.6198187478604455e-06, "loss": 0.3199, "step": 4676 }, { "epoch": 1.176307847082495, "grad_norm": 0.3330570459365845, "learning_rate": 7.618572328333565e-06, "loss": 0.3447, "step": 4677 }, { "epoch": 1.1765593561368208, "grad_norm": 0.3154620826244354, "learning_rate": 7.617325684542344e-06, "loss": 0.3549, "step": 4678 }, { "epoch": 1.1768108651911469, "grad_norm": 0.29848769307136536, "learning_rate": 7.6160788165935525e-06, "loss": 0.3355, "step": 4679 }, { "epoch": 1.1770623742454729, "grad_norm": 0.32562530040740967, "learning_rate": 7.6148317245939766e-06, "loss": 0.3314, "step": 4680 }, { "epoch": 1.177313883299799, "grad_norm": 0.33761054277420044, "learning_rate": 7.613584408650423e-06, "loss": 0.3452, "step": 4681 }, { "epoch": 1.1775653923541247, "grad_norm": 0.3247910737991333, "learning_rate": 7.612336868869714e-06, "loss": 0.3577, "step": 4682 }, { "epoch": 1.1778169014084507, "grad_norm": 0.3061235845088959, "learning_rate": 7.611089105358695e-06, "loss": 0.3222, "step": 4683 }, { "epoch": 1.1780684104627768, "grad_norm": 0.32039159536361694, "learning_rate": 7.609841118224229e-06, "loss": 0.3555, "step": 4684 }, { "epoch": 1.1783199195171026, "grad_norm": 0.3344159722328186, "learning_rate": 7.608592907573199e-06, "loss": 0.3256, "step": 4685 }, { "epoch": 1.1785714285714286, "grad_norm": 0.32991334795951843, "learning_rate": 7.607344473512506e-06, "loss": 0.3265, "step": 4686 }, { "epoch": 1.1788229376257546, "grad_norm": 0.35629698634147644, "learning_rate": 7.606095816149069e-06, "loss": 0.3511, "step": 4687 }, { "epoch": 1.1790744466800804, "grad_norm": 0.3231953978538513, "learning_rate": 7.60484693558983e-06, "loss": 0.3497, "step": 4688 }, { "epoch": 1.1793259557344065, "grad_norm": 0.35513460636138916, "learning_rate": 7.603597831941747e-06, "loss": 0.3495, "step": 4689 }, { "epoch": 1.1795774647887325, "grad_norm": 0.34873831272125244, "learning_rate": 7.602348505311797e-06, "loss": 0.3525, "step": 4690 }, { "epoch": 1.1798289738430583, "grad_norm": 0.300342857837677, "learning_rate": 7.601098955806978e-06, "loss": 0.3704, "step": 4691 }, { "epoch": 1.1800804828973843, "grad_norm": 0.3526897430419922, "learning_rate": 7.5998491835343065e-06, "loss": 0.3513, "step": 4692 }, { "epoch": 1.1803319919517103, "grad_norm": 0.3581010103225708, "learning_rate": 7.598599188600817e-06, "loss": 0.3653, "step": 4693 }, { "epoch": 1.1805835010060362, "grad_norm": 0.31560835242271423, "learning_rate": 7.5973489711135625e-06, "loss": 0.3527, "step": 4694 }, { "epoch": 1.1808350100603622, "grad_norm": 0.3127973973751068, "learning_rate": 7.596098531179619e-06, "loss": 0.378, "step": 4695 }, { "epoch": 1.1810865191146882, "grad_norm": 0.35242587327957153, "learning_rate": 7.594847868906076e-06, "loss": 0.3592, "step": 4696 }, { "epoch": 1.181338028169014, "grad_norm": 0.33138447999954224, "learning_rate": 7.593596984400048e-06, "loss": 0.391, "step": 4697 }, { "epoch": 1.18158953722334, "grad_norm": 0.3515087068080902, "learning_rate": 7.592345877768663e-06, "loss": 0.3641, "step": 4698 }, { "epoch": 1.181841046277666, "grad_norm": 0.32551079988479614, "learning_rate": 7.591094549119071e-06, "loss": 0.3725, "step": 4699 }, { "epoch": 1.1820925553319919, "grad_norm": 0.32543718814849854, "learning_rate": 7.589842998558441e-06, "loss": 0.3395, "step": 4700 }, { "epoch": 1.182344064386318, "grad_norm": 0.33554258942604065, "learning_rate": 7.58859122619396e-06, "loss": 0.3503, "step": 4701 }, { "epoch": 1.182595573440644, "grad_norm": 0.33632540702819824, "learning_rate": 7.587339232132835e-06, "loss": 0.3622, "step": 4702 }, { "epoch": 1.1828470824949697, "grad_norm": 0.32022663950920105, "learning_rate": 7.586087016482291e-06, "loss": 0.3443, "step": 4703 }, { "epoch": 1.1830985915492958, "grad_norm": 0.3406105637550354, "learning_rate": 7.584834579349572e-06, "loss": 0.3413, "step": 4704 }, { "epoch": 1.1833501006036218, "grad_norm": 0.3901978135108948, "learning_rate": 7.5835819208419425e-06, "loss": 0.3665, "step": 4705 }, { "epoch": 1.1836016096579476, "grad_norm": 0.3854331076145172, "learning_rate": 7.5823290410666835e-06, "loss": 0.3607, "step": 4706 }, { "epoch": 1.1838531187122736, "grad_norm": 0.3867035210132599, "learning_rate": 7.5810759401310975e-06, "loss": 0.3537, "step": 4707 }, { "epoch": 1.1841046277665996, "grad_norm": 0.34121641516685486, "learning_rate": 7.579822618142505e-06, "loss": 0.3496, "step": 4708 }, { "epoch": 1.1843561368209254, "grad_norm": 0.3433249592781067, "learning_rate": 7.578569075208244e-06, "loss": 0.3553, "step": 4709 }, { "epoch": 1.1846076458752515, "grad_norm": 0.34000903367996216, "learning_rate": 7.577315311435674e-06, "loss": 0.3435, "step": 4710 }, { "epoch": 1.1848591549295775, "grad_norm": 0.3518748879432678, "learning_rate": 7.5760613269321715e-06, "loss": 0.382, "step": 4711 }, { "epoch": 1.1851106639839033, "grad_norm": 0.30435121059417725, "learning_rate": 7.574807121805131e-06, "loss": 0.3234, "step": 4712 }, { "epoch": 1.1853621730382293, "grad_norm": 0.33615031838417053, "learning_rate": 7.573552696161969e-06, "loss": 0.3528, "step": 4713 }, { "epoch": 1.1856136820925554, "grad_norm": 0.3324757516384125, "learning_rate": 7.572298050110118e-06, "loss": 0.3725, "step": 4714 }, { "epoch": 1.1858651911468814, "grad_norm": 0.37572944164276123, "learning_rate": 7.571043183757032e-06, "loss": 0.3328, "step": 4715 }, { "epoch": 1.1861167002012072, "grad_norm": 0.31655603647232056, "learning_rate": 7.56978809721018e-06, "loss": 0.3458, "step": 4716 }, { "epoch": 1.1863682092555332, "grad_norm": 0.33465415239334106, "learning_rate": 7.568532790577057e-06, "loss": 0.369, "step": 4717 }, { "epoch": 1.1866197183098592, "grad_norm": 0.33323749899864197, "learning_rate": 7.567277263965167e-06, "loss": 0.3469, "step": 4718 }, { "epoch": 1.186871227364185, "grad_norm": 0.31222718954086304, "learning_rate": 7.566021517482041e-06, "loss": 0.37, "step": 4719 }, { "epoch": 1.187122736418511, "grad_norm": 0.3619459271430969, "learning_rate": 7.5647655512352245e-06, "loss": 0.3535, "step": 4720 }, { "epoch": 1.187374245472837, "grad_norm": 0.3543090224266052, "learning_rate": 7.563509365332285e-06, "loss": 0.3578, "step": 4721 }, { "epoch": 1.187625754527163, "grad_norm": 0.3527710437774658, "learning_rate": 7.562252959880804e-06, "loss": 0.356, "step": 4722 }, { "epoch": 1.187877263581489, "grad_norm": 0.3505546748638153, "learning_rate": 7.560996334988386e-06, "loss": 0.3641, "step": 4723 }, { "epoch": 1.188128772635815, "grad_norm": 0.3206879794597626, "learning_rate": 7.5597394907626555e-06, "loss": 0.361, "step": 4724 }, { "epoch": 1.1883802816901408, "grad_norm": 0.3364570140838623, "learning_rate": 7.55848242731125e-06, "loss": 0.339, "step": 4725 }, { "epoch": 1.1886317907444668, "grad_norm": 0.3259868919849396, "learning_rate": 7.557225144741831e-06, "loss": 0.3472, "step": 4726 }, { "epoch": 1.1888832997987928, "grad_norm": 0.34462079405784607, "learning_rate": 7.5559676431620745e-06, "loss": 0.3461, "step": 4727 }, { "epoch": 1.1891348088531186, "grad_norm": 0.34534603357315063, "learning_rate": 7.554709922679681e-06, "loss": 0.38, "step": 4728 }, { "epoch": 1.1893863179074446, "grad_norm": 0.32316854596138, "learning_rate": 7.553451983402364e-06, "loss": 0.3291, "step": 4729 }, { "epoch": 1.1896378269617707, "grad_norm": 0.34536492824554443, "learning_rate": 7.552193825437861e-06, "loss": 0.3607, "step": 4730 }, { "epoch": 1.1898893360160967, "grad_norm": 0.34370651841163635, "learning_rate": 7.550935448893921e-06, "loss": 0.3472, "step": 4731 }, { "epoch": 1.1901408450704225, "grad_norm": 0.3375087380409241, "learning_rate": 7.54967685387832e-06, "loss": 0.364, "step": 4732 }, { "epoch": 1.1903923541247485, "grad_norm": 0.3438502848148346, "learning_rate": 7.548418040498847e-06, "loss": 0.3446, "step": 4733 }, { "epoch": 1.1906438631790746, "grad_norm": 0.34919726848602295, "learning_rate": 7.547159008863312e-06, "loss": 0.3661, "step": 4734 }, { "epoch": 1.1908953722334004, "grad_norm": 0.3322198987007141, "learning_rate": 7.545899759079542e-06, "loss": 0.3845, "step": 4735 }, { "epoch": 1.1911468812877264, "grad_norm": 0.3598858416080475, "learning_rate": 7.544640291255385e-06, "loss": 0.3669, "step": 4736 }, { "epoch": 1.1913983903420524, "grad_norm": 0.3647436797618866, "learning_rate": 7.543380605498707e-06, "loss": 0.3699, "step": 4737 }, { "epoch": 1.1916498993963782, "grad_norm": 0.3301692605018616, "learning_rate": 7.542120701917391e-06, "loss": 0.3728, "step": 4738 }, { "epoch": 1.1919014084507042, "grad_norm": 0.33648648858070374, "learning_rate": 7.540860580619339e-06, "loss": 0.3443, "step": 4739 }, { "epoch": 1.1921529175050303, "grad_norm": 0.3432157337665558, "learning_rate": 7.539600241712475e-06, "loss": 0.3518, "step": 4740 }, { "epoch": 1.192404426559356, "grad_norm": 0.32822272181510925, "learning_rate": 7.538339685304737e-06, "loss": 0.3609, "step": 4741 }, { "epoch": 1.192655935613682, "grad_norm": 0.334031343460083, "learning_rate": 7.537078911504087e-06, "loss": 0.3529, "step": 4742 }, { "epoch": 1.1929074446680081, "grad_norm": 0.3261743187904358, "learning_rate": 7.5358179204184975e-06, "loss": 0.3695, "step": 4743 }, { "epoch": 1.193158953722334, "grad_norm": 0.3211628496646881, "learning_rate": 7.53455671215597e-06, "loss": 0.3529, "step": 4744 }, { "epoch": 1.19341046277666, "grad_norm": 0.30509626865386963, "learning_rate": 7.533295286824513e-06, "loss": 0.3663, "step": 4745 }, { "epoch": 1.193661971830986, "grad_norm": 0.3319106101989746, "learning_rate": 7.532033644532166e-06, "loss": 0.3598, "step": 4746 }, { "epoch": 1.1939134808853118, "grad_norm": 0.34237736463546753, "learning_rate": 7.530771785386976e-06, "loss": 0.3661, "step": 4747 }, { "epoch": 1.1941649899396378, "grad_norm": 0.3307172656059265, "learning_rate": 7.5295097094970136e-06, "loss": 0.3489, "step": 4748 }, { "epoch": 1.1944164989939638, "grad_norm": 0.32638081908226013, "learning_rate": 7.528247416970371e-06, "loss": 0.3613, "step": 4749 }, { "epoch": 1.1946680080482897, "grad_norm": 0.3481774628162384, "learning_rate": 7.5269849079151535e-06, "loss": 0.3557, "step": 4750 }, { "epoch": 1.1949195171026157, "grad_norm": 0.3151308298110962, "learning_rate": 7.525722182439488e-06, "loss": 0.3375, "step": 4751 }, { "epoch": 1.1951710261569417, "grad_norm": 0.32890570163726807, "learning_rate": 7.524459240651518e-06, "loss": 0.3387, "step": 4752 }, { "epoch": 1.1954225352112675, "grad_norm": 0.3260030150413513, "learning_rate": 7.523196082659408e-06, "loss": 0.3428, "step": 4753 }, { "epoch": 1.1956740442655935, "grad_norm": 0.3384077847003937, "learning_rate": 7.521932708571338e-06, "loss": 0.3585, "step": 4754 }, { "epoch": 1.1959255533199196, "grad_norm": 0.3400633633136749, "learning_rate": 7.520669118495507e-06, "loss": 0.3382, "step": 4755 }, { "epoch": 1.1961770623742454, "grad_norm": 0.3479855954647064, "learning_rate": 7.519405312540137e-06, "loss": 0.3566, "step": 4756 }, { "epoch": 1.1964285714285714, "grad_norm": 0.3062722980976105, "learning_rate": 7.518141290813463e-06, "loss": 0.3361, "step": 4757 }, { "epoch": 1.1966800804828974, "grad_norm": 0.33496060967445374, "learning_rate": 7.5168770534237414e-06, "loss": 0.3296, "step": 4758 }, { "epoch": 1.1969315895372232, "grad_norm": 0.3257729709148407, "learning_rate": 7.515612600479243e-06, "loss": 0.3502, "step": 4759 }, { "epoch": 1.1971830985915493, "grad_norm": 0.3281537890434265, "learning_rate": 7.514347932088266e-06, "loss": 0.3289, "step": 4760 }, { "epoch": 1.1974346076458753, "grad_norm": 0.324022501707077, "learning_rate": 7.513083048359117e-06, "loss": 0.327, "step": 4761 }, { "epoch": 1.197686116700201, "grad_norm": 0.3308146595954895, "learning_rate": 7.511817949400126e-06, "loss": 0.3437, "step": 4762 }, { "epoch": 1.1979376257545271, "grad_norm": 0.36193880438804626, "learning_rate": 7.510552635319643e-06, "loss": 0.3438, "step": 4763 }, { "epoch": 1.1981891348088531, "grad_norm": 0.3138233721256256, "learning_rate": 7.509287106226033e-06, "loss": 0.3297, "step": 4764 }, { "epoch": 1.1984406438631792, "grad_norm": 0.3267454504966736, "learning_rate": 7.50802136222768e-06, "loss": 0.3373, "step": 4765 }, { "epoch": 1.198692152917505, "grad_norm": 0.34192612767219543, "learning_rate": 7.506755403432987e-06, "loss": 0.366, "step": 4766 }, { "epoch": 1.198943661971831, "grad_norm": 0.3394359350204468, "learning_rate": 7.505489229950375e-06, "loss": 0.3285, "step": 4767 }, { "epoch": 1.199195171026157, "grad_norm": 0.3370779752731323, "learning_rate": 7.504222841888287e-06, "loss": 0.3262, "step": 4768 }, { "epoch": 1.1994466800804828, "grad_norm": 0.33641576766967773, "learning_rate": 7.502956239355178e-06, "loss": 0.3301, "step": 4769 }, { "epoch": 1.1996981891348089, "grad_norm": 0.33692795038223267, "learning_rate": 7.5016894224595235e-06, "loss": 0.3633, "step": 4770 }, { "epoch": 1.1999496981891349, "grad_norm": 0.3135643005371094, "learning_rate": 7.5004223913098224e-06, "loss": 0.3528, "step": 4771 }, { "epoch": 1.2002012072434607, "grad_norm": 0.37040475010871887, "learning_rate": 7.4991551460145874e-06, "loss": 0.3748, "step": 4772 }, { "epoch": 1.2004527162977867, "grad_norm": 0.35265108942985535, "learning_rate": 7.497887686682347e-06, "loss": 0.3373, "step": 4773 }, { "epoch": 1.2007042253521127, "grad_norm": 0.30251818895339966, "learning_rate": 7.496620013421654e-06, "loss": 0.348, "step": 4774 }, { "epoch": 1.2009557344064385, "grad_norm": 0.37606722116470337, "learning_rate": 7.495352126341074e-06, "loss": 0.3535, "step": 4775 }, { "epoch": 1.2012072434607646, "grad_norm": 0.3614901602268219, "learning_rate": 7.4940840255491975e-06, "loss": 0.3766, "step": 4776 }, { "epoch": 1.2014587525150906, "grad_norm": 0.33923235535621643, "learning_rate": 7.492815711154626e-06, "loss": 0.3423, "step": 4777 }, { "epoch": 1.2017102615694164, "grad_norm": 0.388129860162735, "learning_rate": 7.4915471832659835e-06, "loss": 0.372, "step": 4778 }, { "epoch": 1.2019617706237424, "grad_norm": 0.3465493619441986, "learning_rate": 7.490278441991911e-06, "loss": 0.3534, "step": 4779 }, { "epoch": 1.2022132796780685, "grad_norm": 0.3515889644622803, "learning_rate": 7.489009487441071e-06, "loss": 0.3313, "step": 4780 }, { "epoch": 1.2024647887323945, "grad_norm": 0.375966340303421, "learning_rate": 7.4877403197221385e-06, "loss": 0.4004, "step": 4781 }, { "epoch": 1.2027162977867203, "grad_norm": 0.3232581317424774, "learning_rate": 7.48647093894381e-06, "loss": 0.3247, "step": 4782 }, { "epoch": 1.2029678068410463, "grad_norm": 0.3182356059551239, "learning_rate": 7.485201345214803e-06, "loss": 0.3493, "step": 4783 }, { "epoch": 1.2032193158953723, "grad_norm": 0.340154230594635, "learning_rate": 7.483931538643847e-06, "loss": 0.3646, "step": 4784 }, { "epoch": 1.2034708249496981, "grad_norm": 0.35035526752471924, "learning_rate": 7.482661519339696e-06, "loss": 0.3693, "step": 4785 }, { "epoch": 1.2037223340040242, "grad_norm": 0.34506088495254517, "learning_rate": 7.481391287411115e-06, "loss": 0.343, "step": 4786 }, { "epoch": 1.2039738430583502, "grad_norm": 0.34625160694122314, "learning_rate": 7.480120842966895e-06, "loss": 0.3267, "step": 4787 }, { "epoch": 1.204225352112676, "grad_norm": 0.35969218611717224, "learning_rate": 7.478850186115839e-06, "loss": 0.3706, "step": 4788 }, { "epoch": 1.204476861167002, "grad_norm": 0.3208048641681671, "learning_rate": 7.477579316966773e-06, "loss": 0.3509, "step": 4789 }, { "epoch": 1.204728370221328, "grad_norm": 0.34545454382896423, "learning_rate": 7.4763082356285385e-06, "loss": 0.3524, "step": 4790 }, { "epoch": 1.2049798792756539, "grad_norm": 0.37139612436294556, "learning_rate": 7.4750369422099955e-06, "loss": 0.3832, "step": 4791 }, { "epoch": 1.20523138832998, "grad_norm": 0.3318977952003479, "learning_rate": 7.473765436820021e-06, "loss": 0.3348, "step": 4792 }, { "epoch": 1.205482897384306, "grad_norm": 0.34390851855278015, "learning_rate": 7.472493719567513e-06, "loss": 0.3608, "step": 4793 }, { "epoch": 1.2057344064386317, "grad_norm": 0.3598691523075104, "learning_rate": 7.471221790561387e-06, "loss": 0.3632, "step": 4794 }, { "epoch": 1.2059859154929577, "grad_norm": 0.3379955291748047, "learning_rate": 7.469949649910574e-06, "loss": 0.3586, "step": 4795 }, { "epoch": 1.2062374245472838, "grad_norm": 0.37648218870162964, "learning_rate": 7.468677297724025e-06, "loss": 0.3536, "step": 4796 }, { "epoch": 1.2064889336016096, "grad_norm": 0.3806565999984741, "learning_rate": 7.46740473411071e-06, "loss": 0.3358, "step": 4797 }, { "epoch": 1.2067404426559356, "grad_norm": 0.34232839941978455, "learning_rate": 7.466131959179614e-06, "loss": 0.3444, "step": 4798 }, { "epoch": 1.2069919517102616, "grad_norm": 0.3516295254230499, "learning_rate": 7.464858973039746e-06, "loss": 0.3442, "step": 4799 }, { "epoch": 1.2072434607645874, "grad_norm": 0.3554040491580963, "learning_rate": 7.4635857758001266e-06, "loss": 0.3613, "step": 4800 }, { "epoch": 1.2074949698189135, "grad_norm": 0.3366535007953644, "learning_rate": 7.4623123675697976e-06, "loss": 0.3493, "step": 4801 }, { "epoch": 1.2077464788732395, "grad_norm": 0.34996628761291504, "learning_rate": 7.461038748457818e-06, "loss": 0.3765, "step": 4802 }, { "epoch": 1.2079979879275653, "grad_norm": 0.36713096499443054, "learning_rate": 7.459764918573264e-06, "loss": 0.3513, "step": 4803 }, { "epoch": 1.2082494969818913, "grad_norm": 0.3152211308479309, "learning_rate": 7.4584908780252365e-06, "loss": 0.3413, "step": 4804 }, { "epoch": 1.2085010060362174, "grad_norm": 0.33181753754615784, "learning_rate": 7.457216626922843e-06, "loss": 0.3507, "step": 4805 }, { "epoch": 1.2087525150905432, "grad_norm": 0.32960614562034607, "learning_rate": 7.45594216537522e-06, "loss": 0.3585, "step": 4806 }, { "epoch": 1.2090040241448692, "grad_norm": 0.3321514427661896, "learning_rate": 7.454667493491513e-06, "loss": 0.3329, "step": 4807 }, { "epoch": 1.2092555331991952, "grad_norm": 0.33364543318748474, "learning_rate": 7.453392611380892e-06, "loss": 0.3285, "step": 4808 }, { "epoch": 1.209507042253521, "grad_norm": 0.31527218222618103, "learning_rate": 7.452117519152542e-06, "loss": 0.3499, "step": 4809 }, { "epoch": 1.209758551307847, "grad_norm": 0.31813716888427734, "learning_rate": 7.450842216915667e-06, "loss": 0.3498, "step": 4810 }, { "epoch": 1.210010060362173, "grad_norm": 0.30781570076942444, "learning_rate": 7.449566704779488e-06, "loss": 0.3651, "step": 4811 }, { "epoch": 1.2102615694164989, "grad_norm": 0.31299492716789246, "learning_rate": 7.448290982853247e-06, "loss": 0.3392, "step": 4812 }, { "epoch": 1.210513078470825, "grad_norm": 0.3456888794898987, "learning_rate": 7.4470150512461966e-06, "loss": 0.3628, "step": 4813 }, { "epoch": 1.210764587525151, "grad_norm": 0.3564990758895874, "learning_rate": 7.445738910067618e-06, "loss": 0.3645, "step": 4814 }, { "epoch": 1.211016096579477, "grad_norm": 0.32649949193000793, "learning_rate": 7.444462559426802e-06, "loss": 0.3589, "step": 4815 }, { "epoch": 1.2112676056338028, "grad_norm": 0.36671027541160583, "learning_rate": 7.44318599943306e-06, "loss": 0.3756, "step": 4816 }, { "epoch": 1.2115191146881288, "grad_norm": 0.36808088421821594, "learning_rate": 7.441909230195722e-06, "loss": 0.3589, "step": 4817 }, { "epoch": 1.2117706237424548, "grad_norm": 0.35420069098472595, "learning_rate": 7.440632251824135e-06, "loss": 0.332, "step": 4818 }, { "epoch": 1.2120221327967806, "grad_norm": 0.3255870044231415, "learning_rate": 7.4393550644276635e-06, "loss": 0.3622, "step": 4819 }, { "epoch": 1.2122736418511066, "grad_norm": 0.335366427898407, "learning_rate": 7.438077668115692e-06, "loss": 0.3587, "step": 4820 }, { "epoch": 1.2125251509054327, "grad_norm": 0.3825639486312866, "learning_rate": 7.43680006299762e-06, "loss": 0.3776, "step": 4821 }, { "epoch": 1.2127766599597585, "grad_norm": 0.3819475769996643, "learning_rate": 7.435522249182868e-06, "loss": 0.3309, "step": 4822 }, { "epoch": 1.2130281690140845, "grad_norm": 0.3363218605518341, "learning_rate": 7.434244226780871e-06, "loss": 0.3539, "step": 4823 }, { "epoch": 1.2132796780684105, "grad_norm": 0.4024132490158081, "learning_rate": 7.432965995901085e-06, "loss": 0.3538, "step": 4824 }, { "epoch": 1.2135311871227363, "grad_norm": 0.35047945380210876, "learning_rate": 7.4316875566529825e-06, "loss": 0.3228, "step": 4825 }, { "epoch": 1.2137826961770624, "grad_norm": 0.3095787763595581, "learning_rate": 7.430408909146053e-06, "loss": 0.3695, "step": 4826 }, { "epoch": 1.2140342052313884, "grad_norm": 0.33355486392974854, "learning_rate": 7.429130053489805e-06, "loss": 0.3494, "step": 4827 }, { "epoch": 1.2142857142857142, "grad_norm": 0.37087491154670715, "learning_rate": 7.427850989793764e-06, "loss": 0.3527, "step": 4828 }, { "epoch": 1.2145372233400402, "grad_norm": 0.3255065977573395, "learning_rate": 7.426571718167476e-06, "loss": 0.3543, "step": 4829 }, { "epoch": 1.2147887323943662, "grad_norm": 0.33966347575187683, "learning_rate": 7.4252922387205e-06, "loss": 0.3526, "step": 4830 }, { "epoch": 1.2150402414486923, "grad_norm": 0.32921141386032104, "learning_rate": 7.424012551562416e-06, "loss": 0.3662, "step": 4831 }, { "epoch": 1.215291750503018, "grad_norm": 0.3616410791873932, "learning_rate": 7.422732656802821e-06, "loss": 0.3537, "step": 4832 }, { "epoch": 1.215543259557344, "grad_norm": 0.3477363586425781, "learning_rate": 7.421452554551332e-06, "loss": 0.3633, "step": 4833 }, { "epoch": 1.2157947686116701, "grad_norm": 0.34547752141952515, "learning_rate": 7.420172244917579e-06, "loss": 0.3455, "step": 4834 }, { "epoch": 1.216046277665996, "grad_norm": 0.36257532238960266, "learning_rate": 7.418891728011214e-06, "loss": 0.3381, "step": 4835 }, { "epoch": 1.216297786720322, "grad_norm": 0.34526896476745605, "learning_rate": 7.417611003941905e-06, "loss": 0.3347, "step": 4836 }, { "epoch": 1.216549295774648, "grad_norm": 0.35130295157432556, "learning_rate": 7.416330072819338e-06, "loss": 0.3579, "step": 4837 }, { "epoch": 1.2168008048289738, "grad_norm": 0.3254670202732086, "learning_rate": 7.415048934753217e-06, "loss": 0.3455, "step": 4838 }, { "epoch": 1.2170523138832998, "grad_norm": 0.3543333113193512, "learning_rate": 7.413767589853264e-06, "loss": 0.3465, "step": 4839 }, { "epoch": 1.2173038229376258, "grad_norm": 0.3948500156402588, "learning_rate": 7.412486038229217e-06, "loss": 0.354, "step": 4840 }, { "epoch": 1.2175553319919517, "grad_norm": 0.3444294035434723, "learning_rate": 7.411204279990832e-06, "loss": 0.3694, "step": 4841 }, { "epoch": 1.2178068410462777, "grad_norm": 0.35417085886001587, "learning_rate": 7.4099223152478865e-06, "loss": 0.3593, "step": 4842 }, { "epoch": 1.2180583501006037, "grad_norm": 0.35232633352279663, "learning_rate": 7.408640144110171e-06, "loss": 0.3407, "step": 4843 }, { "epoch": 1.2183098591549295, "grad_norm": 0.35325491428375244, "learning_rate": 7.407357766687495e-06, "loss": 0.3534, "step": 4844 }, { "epoch": 1.2185613682092555, "grad_norm": 0.31295645236968994, "learning_rate": 7.406075183089686e-06, "loss": 0.3598, "step": 4845 }, { "epoch": 1.2188128772635816, "grad_norm": 0.3566473424434662, "learning_rate": 7.40479239342659e-06, "loss": 0.3381, "step": 4846 }, { "epoch": 1.2190643863179074, "grad_norm": 0.36591169238090515, "learning_rate": 7.403509397808071e-06, "loss": 0.3401, "step": 4847 }, { "epoch": 1.2193158953722334, "grad_norm": 0.33974406123161316, "learning_rate": 7.402226196344008e-06, "loss": 0.3635, "step": 4848 }, { "epoch": 1.2195674044265594, "grad_norm": 0.33850404620170593, "learning_rate": 7.400942789144299e-06, "loss": 0.37, "step": 4849 }, { "epoch": 1.2198189134808852, "grad_norm": 0.34571510553359985, "learning_rate": 7.399659176318861e-06, "loss": 0.3681, "step": 4850 }, { "epoch": 1.2200704225352113, "grad_norm": 0.29631301760673523, "learning_rate": 7.398375357977626e-06, "loss": 0.3839, "step": 4851 }, { "epoch": 1.2203219315895373, "grad_norm": 0.33825013041496277, "learning_rate": 7.397091334230547e-06, "loss": 0.3593, "step": 4852 }, { "epoch": 1.220573440643863, "grad_norm": 0.3286561667919159, "learning_rate": 7.395807105187591e-06, "loss": 0.3398, "step": 4853 }, { "epoch": 1.220824949698189, "grad_norm": 0.34450599551200867, "learning_rate": 7.3945226709587434e-06, "loss": 0.3356, "step": 4854 }, { "epoch": 1.2210764587525151, "grad_norm": 0.3374083638191223, "learning_rate": 7.393238031654011e-06, "loss": 0.342, "step": 4855 }, { "epoch": 1.221327967806841, "grad_norm": 0.3444235920906067, "learning_rate": 7.391953187383411e-06, "loss": 0.3222, "step": 4856 }, { "epoch": 1.221579476861167, "grad_norm": 0.33307015895843506, "learning_rate": 7.390668138256987e-06, "loss": 0.3378, "step": 4857 }, { "epoch": 1.221830985915493, "grad_norm": 0.362884521484375, "learning_rate": 7.38938288438479e-06, "loss": 0.3765, "step": 4858 }, { "epoch": 1.2220824949698188, "grad_norm": 0.30631768703460693, "learning_rate": 7.388097425876899e-06, "loss": 0.3387, "step": 4859 }, { "epoch": 1.2223340040241448, "grad_norm": 0.3113538920879364, "learning_rate": 7.386811762843404e-06, "loss": 0.3577, "step": 4860 }, { "epoch": 1.2225855130784709, "grad_norm": 0.335956871509552, "learning_rate": 7.385525895394412e-06, "loss": 0.3272, "step": 4861 }, { "epoch": 1.2228370221327967, "grad_norm": 0.3435831367969513, "learning_rate": 7.384239823640052e-06, "loss": 0.3509, "step": 4862 }, { "epoch": 1.2230885311871227, "grad_norm": 0.31683871150016785, "learning_rate": 7.382953547690465e-06, "loss": 0.3362, "step": 4863 }, { "epoch": 1.2233400402414487, "grad_norm": 0.33224907517433167, "learning_rate": 7.381667067655815e-06, "loss": 0.3766, "step": 4864 }, { "epoch": 1.2235915492957747, "grad_norm": 0.34509238600730896, "learning_rate": 7.380380383646282e-06, "loss": 0.3812, "step": 4865 }, { "epoch": 1.2238430583501005, "grad_norm": 0.36535876989364624, "learning_rate": 7.379093495772059e-06, "loss": 0.3212, "step": 4866 }, { "epoch": 1.2240945674044266, "grad_norm": 0.3264424204826355, "learning_rate": 7.377806404143362e-06, "loss": 0.3563, "step": 4867 }, { "epoch": 1.2243460764587526, "grad_norm": 0.332707017660141, "learning_rate": 7.376519108870423e-06, "loss": 0.3651, "step": 4868 }, { "epoch": 1.2245975855130784, "grad_norm": 0.3452300429344177, "learning_rate": 7.375231610063488e-06, "loss": 0.3535, "step": 4869 }, { "epoch": 1.2248490945674044, "grad_norm": 0.2937973439693451, "learning_rate": 7.373943907832826e-06, "loss": 0.354, "step": 4870 }, { "epoch": 1.2251006036217305, "grad_norm": 0.33667927980422974, "learning_rate": 7.37265600228872e-06, "loss": 0.3762, "step": 4871 }, { "epoch": 1.2253521126760563, "grad_norm": 0.32756221294403076, "learning_rate": 7.37136789354147e-06, "loss": 0.35, "step": 4872 }, { "epoch": 1.2256036217303823, "grad_norm": 0.3230566680431366, "learning_rate": 7.370079581701396e-06, "loss": 0.3338, "step": 4873 }, { "epoch": 1.2258551307847083, "grad_norm": 0.3240143358707428, "learning_rate": 7.368791066878832e-06, "loss": 0.3427, "step": 4874 }, { "epoch": 1.2261066398390341, "grad_norm": 0.3279706835746765, "learning_rate": 7.367502349184132e-06, "loss": 0.3649, "step": 4875 }, { "epoch": 1.2263581488933601, "grad_norm": 0.35657429695129395, "learning_rate": 7.366213428727669e-06, "loss": 0.3473, "step": 4876 }, { "epoch": 1.2266096579476862, "grad_norm": 0.3800789713859558, "learning_rate": 7.364924305619827e-06, "loss": 0.3424, "step": 4877 }, { "epoch": 1.226861167002012, "grad_norm": 0.33746159076690674, "learning_rate": 7.363634979971012e-06, "loss": 0.3587, "step": 4878 }, { "epoch": 1.227112676056338, "grad_norm": 0.319251149892807, "learning_rate": 7.362345451891649e-06, "loss": 0.36, "step": 4879 }, { "epoch": 1.227364185110664, "grad_norm": 0.3327750265598297, "learning_rate": 7.361055721492177e-06, "loss": 0.3533, "step": 4880 }, { "epoch": 1.22761569416499, "grad_norm": 0.3240911364555359, "learning_rate": 7.359765788883053e-06, "loss": 0.3473, "step": 4881 }, { "epoch": 1.2278672032193159, "grad_norm": 0.3320063054561615, "learning_rate": 7.358475654174753e-06, "loss": 0.3587, "step": 4882 }, { "epoch": 1.2281187122736419, "grad_norm": 0.31429430842399597, "learning_rate": 7.357185317477766e-06, "loss": 0.3512, "step": 4883 }, { "epoch": 1.228370221327968, "grad_norm": 0.33201098442077637, "learning_rate": 7.355894778902605e-06, "loss": 0.3469, "step": 4884 }, { "epoch": 1.2286217303822937, "grad_norm": 0.31669896841049194, "learning_rate": 7.354604038559794e-06, "loss": 0.3443, "step": 4885 }, { "epoch": 1.2288732394366197, "grad_norm": 0.34272029995918274, "learning_rate": 7.3533130965598765e-06, "loss": 0.3383, "step": 4886 }, { "epoch": 1.2291247484909458, "grad_norm": 0.3486393988132477, "learning_rate": 7.352021953013415e-06, "loss": 0.3458, "step": 4887 }, { "epoch": 1.2293762575452716, "grad_norm": 0.383306622505188, "learning_rate": 7.350730608030987e-06, "loss": 0.3301, "step": 4888 }, { "epoch": 1.2296277665995976, "grad_norm": 0.3175544738769531, "learning_rate": 7.3494390617231896e-06, "loss": 0.3521, "step": 4889 }, { "epoch": 1.2298792756539236, "grad_norm": 0.32851359248161316, "learning_rate": 7.348147314200634e-06, "loss": 0.3298, "step": 4890 }, { "epoch": 1.2301307847082494, "grad_norm": 0.35966333746910095, "learning_rate": 7.346855365573951e-06, "loss": 0.3386, "step": 4891 }, { "epoch": 1.2303822937625755, "grad_norm": 0.34041905403137207, "learning_rate": 7.345563215953787e-06, "loss": 0.3561, "step": 4892 }, { "epoch": 1.2306338028169015, "grad_norm": 0.3270442485809326, "learning_rate": 7.3442708654508085e-06, "loss": 0.3536, "step": 4893 }, { "epoch": 1.2308853118712273, "grad_norm": 0.3366083800792694, "learning_rate": 7.342978314175695e-06, "loss": 0.3454, "step": 4894 }, { "epoch": 1.2311368209255533, "grad_norm": 0.3429853320121765, "learning_rate": 7.341685562239148e-06, "loss": 0.3413, "step": 4895 }, { "epoch": 1.2313883299798793, "grad_norm": 0.3033541738986969, "learning_rate": 7.3403926097518805e-06, "loss": 0.3329, "step": 4896 }, { "epoch": 1.2316398390342052, "grad_norm": 0.33623993396759033, "learning_rate": 7.339099456824628e-06, "loss": 0.3463, "step": 4897 }, { "epoch": 1.2318913480885312, "grad_norm": 0.32286033034324646, "learning_rate": 7.3378061035681415e-06, "loss": 0.344, "step": 4898 }, { "epoch": 1.2321428571428572, "grad_norm": 0.3481849431991577, "learning_rate": 7.336512550093186e-06, "loss": 0.3445, "step": 4899 }, { "epoch": 1.232394366197183, "grad_norm": 0.3121998906135559, "learning_rate": 7.335218796510548e-06, "loss": 0.3416, "step": 4900 }, { "epoch": 1.232645875251509, "grad_norm": 0.3597795367240906, "learning_rate": 7.333924842931031e-06, "loss": 0.3588, "step": 4901 }, { "epoch": 1.232897384305835, "grad_norm": 0.31854021549224854, "learning_rate": 7.332630689465449e-06, "loss": 0.3392, "step": 4902 }, { "epoch": 1.2331488933601609, "grad_norm": 0.35786551237106323, "learning_rate": 7.331336336224643e-06, "loss": 0.3464, "step": 4903 }, { "epoch": 1.233400402414487, "grad_norm": 0.36554989218711853, "learning_rate": 7.330041783319466e-06, "loss": 0.3524, "step": 4904 }, { "epoch": 1.233651911468813, "grad_norm": 0.3338761627674103, "learning_rate": 7.328747030860786e-06, "loss": 0.3245, "step": 4905 }, { "epoch": 1.2339034205231387, "grad_norm": 0.33866626024246216, "learning_rate": 7.327452078959491e-06, "loss": 0.3601, "step": 4906 }, { "epoch": 1.2341549295774648, "grad_norm": 0.3417625427246094, "learning_rate": 7.326156927726485e-06, "loss": 0.3723, "step": 4907 }, { "epoch": 1.2344064386317908, "grad_norm": 0.402842253446579, "learning_rate": 7.324861577272693e-06, "loss": 0.3539, "step": 4908 }, { "epoch": 1.2346579476861166, "grad_norm": 0.3404982089996338, "learning_rate": 7.323566027709049e-06, "loss": 0.3507, "step": 4909 }, { "epoch": 1.2349094567404426, "grad_norm": 0.32714393734931946, "learning_rate": 7.322270279146512e-06, "loss": 0.3506, "step": 4910 }, { "epoch": 1.2351609657947686, "grad_norm": 0.35655197501182556, "learning_rate": 7.320974331696053e-06, "loss": 0.3603, "step": 4911 }, { "epoch": 1.2354124748490944, "grad_norm": 0.3561979830265045, "learning_rate": 7.319678185468662e-06, "loss": 0.3486, "step": 4912 }, { "epoch": 1.2356639839034205, "grad_norm": 0.36184290051460266, "learning_rate": 7.318381840575347e-06, "loss": 0.3567, "step": 4913 }, { "epoch": 1.2359154929577465, "grad_norm": 0.30532655119895935, "learning_rate": 7.317085297127131e-06, "loss": 0.3518, "step": 4914 }, { "epoch": 1.2361670020120725, "grad_norm": 0.3469163477420807, "learning_rate": 7.315788555235055e-06, "loss": 0.3704, "step": 4915 }, { "epoch": 1.2364185110663983, "grad_norm": 0.34762290120124817, "learning_rate": 7.314491615010178e-06, "loss": 0.3404, "step": 4916 }, { "epoch": 1.2366700201207244, "grad_norm": 0.33168843388557434, "learning_rate": 7.313194476563572e-06, "loss": 0.3181, "step": 4917 }, { "epoch": 1.2369215291750504, "grad_norm": 0.34730586409568787, "learning_rate": 7.311897140006331e-06, "loss": 0.37, "step": 4918 }, { "epoch": 1.2371730382293762, "grad_norm": 0.3283190131187439, "learning_rate": 7.310599605449563e-06, "loss": 0.3246, "step": 4919 }, { "epoch": 1.2374245472837022, "grad_norm": 0.34569719433784485, "learning_rate": 7.309301873004394e-06, "loss": 0.3726, "step": 4920 }, { "epoch": 1.2376760563380282, "grad_norm": 0.33751311898231506, "learning_rate": 7.308003942781966e-06, "loss": 0.3524, "step": 4921 }, { "epoch": 1.237927565392354, "grad_norm": 0.31516730785369873, "learning_rate": 7.30670581489344e-06, "loss": 0.3436, "step": 4922 }, { "epoch": 1.23817907444668, "grad_norm": 0.40124109387397766, "learning_rate": 7.305407489449991e-06, "loss": 0.3557, "step": 4923 }, { "epoch": 1.238430583501006, "grad_norm": 0.31682252883911133, "learning_rate": 7.3041089665628125e-06, "loss": 0.3739, "step": 4924 }, { "epoch": 1.238682092555332, "grad_norm": 0.32461637258529663, "learning_rate": 7.302810246343117e-06, "loss": 0.3655, "step": 4925 }, { "epoch": 1.238933601609658, "grad_norm": 0.32863402366638184, "learning_rate": 7.30151132890213e-06, "loss": 0.3388, "step": 4926 }, { "epoch": 1.239185110663984, "grad_norm": 0.35274699330329895, "learning_rate": 7.300212214351095e-06, "loss": 0.3431, "step": 4927 }, { "epoch": 1.2394366197183098, "grad_norm": 0.343986839056015, "learning_rate": 7.298912902801274e-06, "loss": 0.3763, "step": 4928 }, { "epoch": 1.2396881287726358, "grad_norm": 0.3199062943458557, "learning_rate": 7.297613394363946e-06, "loss": 0.3223, "step": 4929 }, { "epoch": 1.2399396378269618, "grad_norm": 0.3335794508457184, "learning_rate": 7.296313689150404e-06, "loss": 0.37, "step": 4930 }, { "epoch": 1.2401911468812878, "grad_norm": 0.365524560213089, "learning_rate": 7.295013787271959e-06, "loss": 0.3746, "step": 4931 }, { "epoch": 1.2404426559356136, "grad_norm": 0.3475268483161926, "learning_rate": 7.293713688839941e-06, "loss": 0.3658, "step": 4932 }, { "epoch": 1.2406941649899397, "grad_norm": 0.3196357786655426, "learning_rate": 7.292413393965696e-06, "loss": 0.334, "step": 4933 }, { "epoch": 1.2409456740442657, "grad_norm": 0.3176928460597992, "learning_rate": 7.291112902760584e-06, "loss": 0.3439, "step": 4934 }, { "epoch": 1.2411971830985915, "grad_norm": 0.3524039685726166, "learning_rate": 7.289812215335987e-06, "loss": 0.3795, "step": 4935 }, { "epoch": 1.2414486921529175, "grad_norm": 0.3586216866970062, "learning_rate": 7.288511331803296e-06, "loss": 0.3812, "step": 4936 }, { "epoch": 1.2417002012072436, "grad_norm": 0.31926751136779785, "learning_rate": 7.2872102522739286e-06, "loss": 0.3386, "step": 4937 }, { "epoch": 1.2419517102615694, "grad_norm": 0.338290274143219, "learning_rate": 7.285908976859312e-06, "loss": 0.3553, "step": 4938 }, { "epoch": 1.2422032193158954, "grad_norm": 0.3404878079891205, "learning_rate": 7.284607505670891e-06, "loss": 0.3486, "step": 4939 }, { "epoch": 1.2424547283702214, "grad_norm": 0.3355543613433838, "learning_rate": 7.283305838820129e-06, "loss": 0.3815, "step": 4940 }, { "epoch": 1.2427062374245472, "grad_norm": 0.3367030620574951, "learning_rate": 7.282003976418508e-06, "loss": 0.3753, "step": 4941 }, { "epoch": 1.2429577464788732, "grad_norm": 0.3236370086669922, "learning_rate": 7.280701918577521e-06, "loss": 0.3493, "step": 4942 }, { "epoch": 1.2432092555331993, "grad_norm": 0.3462064862251282, "learning_rate": 7.279399665408684e-06, "loss": 0.3639, "step": 4943 }, { "epoch": 1.243460764587525, "grad_norm": 0.35062482953071594, "learning_rate": 7.278097217023523e-06, "loss": 0.3644, "step": 4944 }, { "epoch": 1.243712273641851, "grad_norm": 0.38055381178855896, "learning_rate": 7.27679457353359e-06, "loss": 0.3517, "step": 4945 }, { "epoch": 1.2439637826961771, "grad_norm": 0.36633816361427307, "learning_rate": 7.275491735050444e-06, "loss": 0.36, "step": 4946 }, { "epoch": 1.244215291750503, "grad_norm": 0.3013075590133667, "learning_rate": 7.274188701685666e-06, "loss": 0.3564, "step": 4947 }, { "epoch": 1.244466800804829, "grad_norm": 0.35343462228775024, "learning_rate": 7.272885473550855e-06, "loss": 0.3335, "step": 4948 }, { "epoch": 1.244718309859155, "grad_norm": 0.337429404258728, "learning_rate": 7.271582050757619e-06, "loss": 0.3329, "step": 4949 }, { "epoch": 1.2449698189134808, "grad_norm": 0.3367510735988617, "learning_rate": 7.2702784334175945e-06, "loss": 0.3195, "step": 4950 }, { "epoch": 1.2452213279678068, "grad_norm": 0.32875868678092957, "learning_rate": 7.268974621642424e-06, "loss": 0.3472, "step": 4951 }, { "epoch": 1.2454728370221329, "grad_norm": 0.3070097863674164, "learning_rate": 7.267670615543771e-06, "loss": 0.3493, "step": 4952 }, { "epoch": 1.2457243460764587, "grad_norm": 0.3756659924983978, "learning_rate": 7.266366415233317e-06, "loss": 0.3728, "step": 4953 }, { "epoch": 1.2459758551307847, "grad_norm": 0.3073340356349945, "learning_rate": 7.265062020822757e-06, "loss": 0.3499, "step": 4954 }, { "epoch": 1.2462273641851107, "grad_norm": 0.3308236300945282, "learning_rate": 7.263757432423806e-06, "loss": 0.385, "step": 4955 }, { "epoch": 1.2464788732394365, "grad_norm": 0.3272673487663269, "learning_rate": 7.262452650148193e-06, "loss": 0.3626, "step": 4956 }, { "epoch": 1.2467303822937625, "grad_norm": 0.33081069588661194, "learning_rate": 7.2611476741076636e-06, "loss": 0.3471, "step": 4957 }, { "epoch": 1.2469818913480886, "grad_norm": 0.33813828229904175, "learning_rate": 7.2598425044139835e-06, "loss": 0.3423, "step": 4958 }, { "epoch": 1.2472334004024144, "grad_norm": 0.319319486618042, "learning_rate": 7.258537141178928e-06, "loss": 0.3595, "step": 4959 }, { "epoch": 1.2474849094567404, "grad_norm": 0.30126598477363586, "learning_rate": 7.257231584514297e-06, "loss": 0.3368, "step": 4960 }, { "epoch": 1.2477364185110664, "grad_norm": 0.3376123309135437, "learning_rate": 7.255925834531902e-06, "loss": 0.34, "step": 4961 }, { "epoch": 1.2479879275653922, "grad_norm": 0.3421916663646698, "learning_rate": 7.254619891343572e-06, "loss": 0.3507, "step": 4962 }, { "epoch": 1.2482394366197183, "grad_norm": 0.29679960012435913, "learning_rate": 7.253313755061153e-06, "loss": 0.3465, "step": 4963 }, { "epoch": 1.2484909456740443, "grad_norm": 0.3243803381919861, "learning_rate": 7.252007425796508e-06, "loss": 0.3479, "step": 4964 }, { "epoch": 1.2487424547283703, "grad_norm": 0.3532167971134186, "learning_rate": 7.2507009036615136e-06, "loss": 0.346, "step": 4965 }, { "epoch": 1.2489939637826961, "grad_norm": 0.30621767044067383, "learning_rate": 7.24939418876807e-06, "loss": 0.3443, "step": 4966 }, { "epoch": 1.2492454728370221, "grad_norm": 0.3399735689163208, "learning_rate": 7.248087281228085e-06, "loss": 0.3302, "step": 4967 }, { "epoch": 1.2494969818913482, "grad_norm": 0.34660354256629944, "learning_rate": 7.246780181153489e-06, "loss": 0.3406, "step": 4968 }, { "epoch": 1.249748490945674, "grad_norm": 0.3369465470314026, "learning_rate": 7.2454728886562255e-06, "loss": 0.3625, "step": 4969 }, { "epoch": 1.25, "grad_norm": 0.3431127369403839, "learning_rate": 7.244165403848257e-06, "loss": 0.3365, "step": 4970 }, { "epoch": 1.250251509054326, "grad_norm": 0.3352718651294708, "learning_rate": 7.242857726841561e-06, "loss": 0.3753, "step": 4971 }, { "epoch": 1.2505030181086518, "grad_norm": 0.3221033215522766, "learning_rate": 7.2415498577481325e-06, "loss": 0.342, "step": 4972 }, { "epoch": 1.2507545271629779, "grad_norm": 0.33727914094924927, "learning_rate": 7.240241796679981e-06, "loss": 0.3419, "step": 4973 }, { "epoch": 1.2510060362173039, "grad_norm": 0.3359692096710205, "learning_rate": 7.238933543749135e-06, "loss": 0.3671, "step": 4974 }, { "epoch": 1.25125754527163, "grad_norm": 0.32565391063690186, "learning_rate": 7.237625099067638e-06, "loss": 0.3667, "step": 4975 }, { "epoch": 1.2515090543259557, "grad_norm": 0.310983806848526, "learning_rate": 7.236316462747548e-06, "loss": 0.3488, "step": 4976 }, { "epoch": 1.2517605633802817, "grad_norm": 0.3416151702404022, "learning_rate": 7.235007634900945e-06, "loss": 0.3496, "step": 4977 }, { "epoch": 1.2520120724346078, "grad_norm": 0.3046923577785492, "learning_rate": 7.23369861563992e-06, "loss": 0.3299, "step": 4978 }, { "epoch": 1.2522635814889336, "grad_norm": 0.3572548031806946, "learning_rate": 7.232389405076583e-06, "loss": 0.3538, "step": 4979 }, { "epoch": 1.2525150905432596, "grad_norm": 0.32661741971969604, "learning_rate": 7.231080003323059e-06, "loss": 0.3715, "step": 4980 }, { "epoch": 1.2527665995975856, "grad_norm": 0.3313870131969452, "learning_rate": 7.22977041049149e-06, "loss": 0.3579, "step": 4981 }, { "epoch": 1.2530181086519114, "grad_norm": 0.3333343267440796, "learning_rate": 7.228460626694034e-06, "loss": 0.318, "step": 4982 }, { "epoch": 1.2532696177062375, "grad_norm": 0.3657362163066864, "learning_rate": 7.227150652042868e-06, "loss": 0.3484, "step": 4983 }, { "epoch": 1.2535211267605635, "grad_norm": 0.3520519733428955, "learning_rate": 7.225840486650181e-06, "loss": 0.3439, "step": 4984 }, { "epoch": 1.2537726358148893, "grad_norm": 0.3073820173740387, "learning_rate": 7.2245301306281815e-06, "loss": 0.3389, "step": 4985 }, { "epoch": 1.2540241448692153, "grad_norm": 0.3221682608127594, "learning_rate": 7.223219584089091e-06, "loss": 0.3708, "step": 4986 }, { "epoch": 1.2542756539235413, "grad_norm": 0.32095620036125183, "learning_rate": 7.221908847145153e-06, "loss": 0.3656, "step": 4987 }, { "epoch": 1.2545271629778671, "grad_norm": 0.3564129173755646, "learning_rate": 7.220597919908624e-06, "loss": 0.3757, "step": 4988 }, { "epoch": 1.2547786720321932, "grad_norm": 0.33230066299438477, "learning_rate": 7.219286802491774e-06, "loss": 0.3458, "step": 4989 }, { "epoch": 1.2550301810865192, "grad_norm": 0.33365389704704285, "learning_rate": 7.217975495006892e-06, "loss": 0.3577, "step": 4990 }, { "epoch": 1.255281690140845, "grad_norm": 0.3553404211997986, "learning_rate": 7.216663997566285e-06, "loss": 0.3845, "step": 4991 }, { "epoch": 1.255533199195171, "grad_norm": 0.3103611171245575, "learning_rate": 7.215352310282275e-06, "loss": 0.3408, "step": 4992 }, { "epoch": 1.255784708249497, "grad_norm": 0.34521424770355225, "learning_rate": 7.2140404332671986e-06, "loss": 0.342, "step": 4993 }, { "epoch": 1.2560362173038229, "grad_norm": 0.31152811646461487, "learning_rate": 7.212728366633411e-06, "loss": 0.3621, "step": 4994 }, { "epoch": 1.256287726358149, "grad_norm": 0.335000604391098, "learning_rate": 7.211416110493279e-06, "loss": 0.3201, "step": 4995 }, { "epoch": 1.256539235412475, "grad_norm": 0.3560849130153656, "learning_rate": 7.210103664959194e-06, "loss": 0.373, "step": 4996 }, { "epoch": 1.2567907444668007, "grad_norm": 0.357250452041626, "learning_rate": 7.2087910301435545e-06, "loss": 0.343, "step": 4997 }, { "epoch": 1.2570422535211268, "grad_norm": 0.32552990317344666, "learning_rate": 7.207478206158782e-06, "loss": 0.3473, "step": 4998 }, { "epoch": 1.2572937625754528, "grad_norm": 0.3105219006538391, "learning_rate": 7.2061651931173115e-06, "loss": 0.3472, "step": 4999 }, { "epoch": 1.2575452716297786, "grad_norm": 0.3435421586036682, "learning_rate": 7.204851991131594e-06, "loss": 0.3495, "step": 5000 }, { "epoch": 1.2577967806841046, "grad_norm": 0.34035179018974304, "learning_rate": 7.203538600314096e-06, "loss": 0.3422, "step": 5001 }, { "epoch": 1.2580482897384306, "grad_norm": 0.3376530408859253, "learning_rate": 7.2022250207773035e-06, "loss": 0.3356, "step": 5002 }, { "epoch": 1.2582997987927564, "grad_norm": 0.3429863750934601, "learning_rate": 7.200911252633714e-06, "loss": 0.3598, "step": 5003 }, { "epoch": 1.2585513078470825, "grad_norm": 0.3484230935573578, "learning_rate": 7.199597295995846e-06, "loss": 0.3519, "step": 5004 }, { "epoch": 1.2588028169014085, "grad_norm": 0.35319894552230835, "learning_rate": 7.1982831509762294e-06, "loss": 0.3632, "step": 5005 }, { "epoch": 1.2590543259557343, "grad_norm": 0.3312159478664398, "learning_rate": 7.196968817687413e-06, "loss": 0.3324, "step": 5006 }, { "epoch": 1.2593058350100603, "grad_norm": 0.34530702233314514, "learning_rate": 7.195654296241963e-06, "loss": 0.3721, "step": 5007 }, { "epoch": 1.2595573440643864, "grad_norm": 0.3831932842731476, "learning_rate": 7.194339586752457e-06, "loss": 0.3432, "step": 5008 }, { "epoch": 1.2598088531187122, "grad_norm": 0.3508305847644806, "learning_rate": 7.193024689331493e-06, "loss": 0.3323, "step": 5009 }, { "epoch": 1.2600603621730382, "grad_norm": 0.33183974027633667, "learning_rate": 7.1917096040916835e-06, "loss": 0.3467, "step": 5010 }, { "epoch": 1.2603118712273642, "grad_norm": 0.33060377836227417, "learning_rate": 7.190394331145659e-06, "loss": 0.3598, "step": 5011 }, { "epoch": 1.26056338028169, "grad_norm": 0.3584049940109253, "learning_rate": 7.189078870606063e-06, "loss": 0.333, "step": 5012 }, { "epoch": 1.260814889336016, "grad_norm": 0.35449668765068054, "learning_rate": 7.187763222585556e-06, "loss": 0.37, "step": 5013 }, { "epoch": 1.261066398390342, "grad_norm": 0.34245765209198, "learning_rate": 7.186447387196815e-06, "loss": 0.3424, "step": 5014 }, { "epoch": 1.2613179074446679, "grad_norm": 0.3323255777359009, "learning_rate": 7.1851313645525356e-06, "loss": 0.3614, "step": 5015 }, { "epoch": 1.261569416498994, "grad_norm": 0.35160186886787415, "learning_rate": 7.183815154765423e-06, "loss": 0.3396, "step": 5016 }, { "epoch": 1.26182092555332, "grad_norm": 0.31636881828308105, "learning_rate": 7.182498757948207e-06, "loss": 0.3336, "step": 5017 }, { "epoch": 1.262072434607646, "grad_norm": 0.36350032687187195, "learning_rate": 7.181182174213623e-06, "loss": 0.3448, "step": 5018 }, { "epoch": 1.2623239436619718, "grad_norm": 0.3364773690700531, "learning_rate": 7.179865403674433e-06, "loss": 0.3263, "step": 5019 }, { "epoch": 1.2625754527162978, "grad_norm": 0.32822203636169434, "learning_rate": 7.178548446443407e-06, "loss": 0.3504, "step": 5020 }, { "epoch": 1.2628269617706238, "grad_norm": 0.33104854822158813, "learning_rate": 7.177231302633337e-06, "loss": 0.3321, "step": 5021 }, { "epoch": 1.2630784708249496, "grad_norm": 0.34756192564964294, "learning_rate": 7.175913972357025e-06, "loss": 0.3538, "step": 5022 }, { "epoch": 1.2633299798792756, "grad_norm": 0.35056936740875244, "learning_rate": 7.174596455727295e-06, "loss": 0.352, "step": 5023 }, { "epoch": 1.2635814889336017, "grad_norm": 0.3366295099258423, "learning_rate": 7.173278752856983e-06, "loss": 0.3545, "step": 5024 }, { "epoch": 1.2638329979879277, "grad_norm": 0.3299401104450226, "learning_rate": 7.171960863858941e-06, "loss": 0.3385, "step": 5025 }, { "epoch": 1.2640845070422535, "grad_norm": 0.3155493140220642, "learning_rate": 7.17064278884604e-06, "loss": 0.3304, "step": 5026 }, { "epoch": 1.2643360160965795, "grad_norm": 0.3616369068622589, "learning_rate": 7.169324527931162e-06, "loss": 0.3593, "step": 5027 }, { "epoch": 1.2645875251509056, "grad_norm": 0.3170899748802185, "learning_rate": 7.16800608122721e-06, "loss": 0.3077, "step": 5028 }, { "epoch": 1.2648390342052314, "grad_norm": 0.3360782563686371, "learning_rate": 7.1666874488471e-06, "loss": 0.3558, "step": 5029 }, { "epoch": 1.2650905432595574, "grad_norm": 0.32805049419403076, "learning_rate": 7.165368630903766e-06, "loss": 0.332, "step": 5030 }, { "epoch": 1.2653420523138834, "grad_norm": 0.34620416164398193, "learning_rate": 7.164049627510154e-06, "loss": 0.3559, "step": 5031 }, { "epoch": 1.2655935613682092, "grad_norm": 0.33738458156585693, "learning_rate": 7.1627304387792285e-06, "loss": 0.355, "step": 5032 }, { "epoch": 1.2658450704225352, "grad_norm": 0.3080560266971588, "learning_rate": 7.161411064823973e-06, "loss": 0.3521, "step": 5033 }, { "epoch": 1.2660965794768613, "grad_norm": 0.3708006739616394, "learning_rate": 7.160091505757381e-06, "loss": 0.384, "step": 5034 }, { "epoch": 1.266348088531187, "grad_norm": 0.34174585342407227, "learning_rate": 7.158771761692464e-06, "loss": 0.3298, "step": 5035 }, { "epoch": 1.266599597585513, "grad_norm": 0.3278071880340576, "learning_rate": 7.157451832742253e-06, "loss": 0.3747, "step": 5036 }, { "epoch": 1.2668511066398391, "grad_norm": 0.3372129797935486, "learning_rate": 7.156131719019789e-06, "loss": 0.371, "step": 5037 }, { "epoch": 1.267102615694165, "grad_norm": 0.3328630030155182, "learning_rate": 7.15481142063813e-06, "loss": 0.3402, "step": 5038 }, { "epoch": 1.267354124748491, "grad_norm": 0.3106217086315155, "learning_rate": 7.1534909377103555e-06, "loss": 0.3318, "step": 5039 }, { "epoch": 1.267605633802817, "grad_norm": 0.3225652575492859, "learning_rate": 7.152170270349553e-06, "loss": 0.3657, "step": 5040 }, { "epoch": 1.2678571428571428, "grad_norm": 0.3484492003917694, "learning_rate": 7.1508494186688305e-06, "loss": 0.3852, "step": 5041 }, { "epoch": 1.2681086519114688, "grad_norm": 0.3207867741584778, "learning_rate": 7.149528382781312e-06, "loss": 0.3607, "step": 5042 }, { "epoch": 1.2683601609657948, "grad_norm": 0.32192859053611755, "learning_rate": 7.148207162800135e-06, "loss": 0.3463, "step": 5043 }, { "epoch": 1.2686116700201207, "grad_norm": 0.31769081950187683, "learning_rate": 7.146885758838453e-06, "loss": 0.3437, "step": 5044 }, { "epoch": 1.2688631790744467, "grad_norm": 0.3407571315765381, "learning_rate": 7.145564171009437e-06, "loss": 0.3505, "step": 5045 }, { "epoch": 1.2691146881287727, "grad_norm": 0.32075372338294983, "learning_rate": 7.144242399426272e-06, "loss": 0.3455, "step": 5046 }, { "epoch": 1.2693661971830985, "grad_norm": 0.30012497305870056, "learning_rate": 7.14292044420216e-06, "loss": 0.33, "step": 5047 }, { "epoch": 1.2696177062374245, "grad_norm": 0.3274124264717102, "learning_rate": 7.141598305450319e-06, "loss": 0.3383, "step": 5048 }, { "epoch": 1.2698692152917506, "grad_norm": 0.3339790403842926, "learning_rate": 7.1402759832839795e-06, "loss": 0.3598, "step": 5049 }, { "epoch": 1.2701207243460764, "grad_norm": 0.33048734068870544, "learning_rate": 7.138953477816393e-06, "loss": 0.3401, "step": 5050 }, { "epoch": 1.2703722334004024, "grad_norm": 0.3411872386932373, "learning_rate": 7.137630789160821e-06, "loss": 0.3694, "step": 5051 }, { "epoch": 1.2706237424547284, "grad_norm": 0.3702620267868042, "learning_rate": 7.136307917430545e-06, "loss": 0.3455, "step": 5052 }, { "epoch": 1.2708752515090542, "grad_norm": 0.3102932274341583, "learning_rate": 7.1349848627388616e-06, "loss": 0.3652, "step": 5053 }, { "epoch": 1.2711267605633803, "grad_norm": 0.3049952983856201, "learning_rate": 7.13366162519908e-06, "loss": 0.3456, "step": 5054 }, { "epoch": 1.2713782696177063, "grad_norm": 0.34085172414779663, "learning_rate": 7.132338204924529e-06, "loss": 0.3461, "step": 5055 }, { "epoch": 1.271629778672032, "grad_norm": 0.33940356969833374, "learning_rate": 7.131014602028551e-06, "loss": 0.3462, "step": 5056 }, { "epoch": 1.2718812877263581, "grad_norm": 0.3196280896663666, "learning_rate": 7.129690816624504e-06, "loss": 0.357, "step": 5057 }, { "epoch": 1.2721327967806841, "grad_norm": 0.3302951157093048, "learning_rate": 7.128366848825761e-06, "loss": 0.3587, "step": 5058 }, { "epoch": 1.27238430583501, "grad_norm": 0.3771762549877167, "learning_rate": 7.127042698745712e-06, "loss": 0.3846, "step": 5059 }, { "epoch": 1.272635814889336, "grad_norm": 0.3365668058395386, "learning_rate": 7.125718366497763e-06, "loss": 0.3886, "step": 5060 }, { "epoch": 1.272887323943662, "grad_norm": 0.299969345331192, "learning_rate": 7.124393852195335e-06, "loss": 0.3545, "step": 5061 }, { "epoch": 1.2731388329979878, "grad_norm": 0.3523898422718048, "learning_rate": 7.123069155951864e-06, "loss": 0.3674, "step": 5062 }, { "epoch": 1.2733903420523138, "grad_norm": 0.3061320185661316, "learning_rate": 7.1217442778808e-06, "loss": 0.3479, "step": 5063 }, { "epoch": 1.2736418511066399, "grad_norm": 0.37146908044815063, "learning_rate": 7.120419218095614e-06, "loss": 0.3602, "step": 5064 }, { "epoch": 1.2738933601609657, "grad_norm": 0.32229381799697876, "learning_rate": 7.119093976709785e-06, "loss": 0.3616, "step": 5065 }, { "epoch": 1.2741448692152917, "grad_norm": 0.3241557776927948, "learning_rate": 7.117768553836816e-06, "loss": 0.3269, "step": 5066 }, { "epoch": 1.2743963782696177, "grad_norm": 0.33732870221138, "learning_rate": 7.1164429495902185e-06, "loss": 0.3396, "step": 5067 }, { "epoch": 1.2746478873239437, "grad_norm": 0.34197482466697693, "learning_rate": 7.115117164083522e-06, "loss": 0.3481, "step": 5068 }, { "epoch": 1.2748993963782695, "grad_norm": 0.32349589467048645, "learning_rate": 7.113791197430275e-06, "loss": 0.3505, "step": 5069 }, { "epoch": 1.2751509054325956, "grad_norm": 0.31883949041366577, "learning_rate": 7.112465049744033e-06, "loss": 0.305, "step": 5070 }, { "epoch": 1.2754024144869216, "grad_norm": 0.3327849507331848, "learning_rate": 7.111138721138376e-06, "loss": 0.3398, "step": 5071 }, { "epoch": 1.2756539235412474, "grad_norm": 0.3398069143295288, "learning_rate": 7.109812211726895e-06, "loss": 0.3686, "step": 5072 }, { "epoch": 1.2759054325955734, "grad_norm": 0.3131691515445709, "learning_rate": 7.108485521623196e-06, "loss": 0.3489, "step": 5073 }, { "epoch": 1.2761569416498995, "grad_norm": 0.3182958662509918, "learning_rate": 7.107158650940904e-06, "loss": 0.349, "step": 5074 }, { "epoch": 1.2764084507042255, "grad_norm": 0.2990667521953583, "learning_rate": 7.105831599793655e-06, "loss": 0.3483, "step": 5075 }, { "epoch": 1.2766599597585513, "grad_norm": 0.3262629508972168, "learning_rate": 7.104504368295105e-06, "loss": 0.364, "step": 5076 }, { "epoch": 1.2769114688128773, "grad_norm": 0.3283725082874298, "learning_rate": 7.10317695655892e-06, "loss": 0.356, "step": 5077 }, { "epoch": 1.2771629778672033, "grad_norm": 0.31310954689979553, "learning_rate": 7.101849364698786e-06, "loss": 0.3504, "step": 5078 }, { "epoch": 1.2774144869215291, "grad_norm": 0.3293515145778656, "learning_rate": 7.100521592828405e-06, "loss": 0.3362, "step": 5079 }, { "epoch": 1.2776659959758552, "grad_norm": 0.31112322211265564, "learning_rate": 7.0991936410614885e-06, "loss": 0.3425, "step": 5080 }, { "epoch": 1.2779175050301812, "grad_norm": 0.3442460298538208, "learning_rate": 7.09786550951177e-06, "loss": 0.3326, "step": 5081 }, { "epoch": 1.278169014084507, "grad_norm": 0.3595294952392578, "learning_rate": 7.096537198292994e-06, "loss": 0.3586, "step": 5082 }, { "epoch": 1.278420523138833, "grad_norm": 0.359210729598999, "learning_rate": 7.0952087075189235e-06, "loss": 0.382, "step": 5083 }, { "epoch": 1.278672032193159, "grad_norm": 0.3556901216506958, "learning_rate": 7.0938800373033355e-06, "loss": 0.3755, "step": 5084 }, { "epoch": 1.2789235412474849, "grad_norm": 0.3710108697414398, "learning_rate": 7.0925511877600195e-06, "loss": 0.3571, "step": 5085 }, { "epoch": 1.279175050301811, "grad_norm": 0.3306417465209961, "learning_rate": 7.091222159002786e-06, "loss": 0.3409, "step": 5086 }, { "epoch": 1.279426559356137, "grad_norm": 0.35231444239616394, "learning_rate": 7.0898929511454585e-06, "loss": 0.3652, "step": 5087 }, { "epoch": 1.2796780684104627, "grad_norm": 0.35659119486808777, "learning_rate": 7.088563564301874e-06, "loss": 0.3515, "step": 5088 }, { "epoch": 1.2799295774647887, "grad_norm": 0.35471397638320923, "learning_rate": 7.0872339985858855e-06, "loss": 0.3801, "step": 5089 }, { "epoch": 1.2801810865191148, "grad_norm": 0.3614024221897125, "learning_rate": 7.085904254111362e-06, "loss": 0.3543, "step": 5090 }, { "epoch": 1.2804325955734406, "grad_norm": 0.3554777503013611, "learning_rate": 7.0845743309921896e-06, "loss": 0.3349, "step": 5091 }, { "epoch": 1.2806841046277666, "grad_norm": 0.33316969871520996, "learning_rate": 7.083244229342266e-06, "loss": 0.3471, "step": 5092 }, { "epoch": 1.2809356136820926, "grad_norm": 0.3104395866394043, "learning_rate": 7.081913949275508e-06, "loss": 0.3418, "step": 5093 }, { "epoch": 1.2811871227364184, "grad_norm": 0.3648764491081238, "learning_rate": 7.080583490905845e-06, "loss": 0.3543, "step": 5094 }, { "epoch": 1.2814386317907445, "grad_norm": 0.36732056736946106, "learning_rate": 7.07925285434722e-06, "loss": 0.3516, "step": 5095 }, { "epoch": 1.2816901408450705, "grad_norm": 0.31676825881004333, "learning_rate": 7.077922039713596e-06, "loss": 0.3175, "step": 5096 }, { "epoch": 1.2819416498993963, "grad_norm": 0.35175764560699463, "learning_rate": 7.07659104711895e-06, "loss": 0.3602, "step": 5097 }, { "epoch": 1.2821931589537223, "grad_norm": 0.3779788911342621, "learning_rate": 7.075259876677272e-06, "loss": 0.3549, "step": 5098 }, { "epoch": 1.2824446680080483, "grad_norm": 0.35006478428840637, "learning_rate": 7.073928528502569e-06, "loss": 0.3774, "step": 5099 }, { "epoch": 1.2826961770623742, "grad_norm": 0.3286049962043762, "learning_rate": 7.07259700270886e-06, "loss": 0.3319, "step": 5100 }, { "epoch": 1.2829476861167002, "grad_norm": 0.3339432179927826, "learning_rate": 7.071265299410185e-06, "loss": 0.3614, "step": 5101 }, { "epoch": 1.2831991951710262, "grad_norm": 0.34671711921691895, "learning_rate": 7.069933418720594e-06, "loss": 0.3644, "step": 5102 }, { "epoch": 1.283450704225352, "grad_norm": 0.33326461911201477, "learning_rate": 7.068601360754157e-06, "loss": 0.3369, "step": 5103 }, { "epoch": 1.283702213279678, "grad_norm": 0.3220560550689697, "learning_rate": 7.0672691256249526e-06, "loss": 0.3421, "step": 5104 }, { "epoch": 1.283953722334004, "grad_norm": 0.3266070485115051, "learning_rate": 7.065936713447081e-06, "loss": 0.3523, "step": 5105 }, { "epoch": 1.2842052313883299, "grad_norm": 0.33557188510894775, "learning_rate": 7.064604124334655e-06, "loss": 0.3568, "step": 5106 }, { "epoch": 1.284456740442656, "grad_norm": 0.33787620067596436, "learning_rate": 7.063271358401802e-06, "loss": 0.3501, "step": 5107 }, { "epoch": 1.284708249496982, "grad_norm": 0.31968948245048523, "learning_rate": 7.061938415762664e-06, "loss": 0.3282, "step": 5108 }, { "epoch": 1.2849597585513077, "grad_norm": 0.309199720621109, "learning_rate": 7.060605296531401e-06, "loss": 0.3448, "step": 5109 }, { "epoch": 1.2852112676056338, "grad_norm": 0.31305763125419617, "learning_rate": 7.059272000822185e-06, "loss": 0.3649, "step": 5110 }, { "epoch": 1.2854627766599598, "grad_norm": 0.3350978195667267, "learning_rate": 7.057938528749204e-06, "loss": 0.3349, "step": 5111 }, { "epoch": 1.2857142857142856, "grad_norm": 0.3685004413127899, "learning_rate": 7.056604880426664e-06, "loss": 0.3658, "step": 5112 }, { "epoch": 1.2859657947686116, "grad_norm": 0.3085992634296417, "learning_rate": 7.055271055968782e-06, "loss": 0.3426, "step": 5113 }, { "epoch": 1.2862173038229376, "grad_norm": 0.4011576175689697, "learning_rate": 7.05393705548979e-06, "loss": 0.3575, "step": 5114 }, { "epoch": 1.2864688128772634, "grad_norm": 0.3109534680843353, "learning_rate": 7.052602879103939e-06, "loss": 0.3262, "step": 5115 }, { "epoch": 1.2867203219315895, "grad_norm": 0.3395739197731018, "learning_rate": 7.051268526925493e-06, "loss": 0.3527, "step": 5116 }, { "epoch": 1.2869718309859155, "grad_norm": 0.31294944882392883, "learning_rate": 7.049933999068731e-06, "loss": 0.3524, "step": 5117 }, { "epoch": 1.2872233400402415, "grad_norm": 0.36359307169914246, "learning_rate": 7.0485992956479466e-06, "loss": 0.338, "step": 5118 }, { "epoch": 1.2874748490945673, "grad_norm": 0.32687315344810486, "learning_rate": 7.0472644167774474e-06, "loss": 0.3405, "step": 5119 }, { "epoch": 1.2877263581488934, "grad_norm": 0.28143706917762756, "learning_rate": 7.045929362571559e-06, "loss": 0.3903, "step": 5120 }, { "epoch": 1.2879778672032194, "grad_norm": 0.32885175943374634, "learning_rate": 7.04459413314462e-06, "loss": 0.3788, "step": 5121 }, { "epoch": 1.2882293762575452, "grad_norm": 0.31145739555358887, "learning_rate": 7.0432587286109845e-06, "loss": 0.3523, "step": 5122 }, { "epoch": 1.2884808853118712, "grad_norm": 0.3306049108505249, "learning_rate": 7.041923149085021e-06, "loss": 0.3387, "step": 5123 }, { "epoch": 1.2887323943661972, "grad_norm": 0.31860941648483276, "learning_rate": 7.040587394681115e-06, "loss": 0.3411, "step": 5124 }, { "epoch": 1.2889839034205233, "grad_norm": 0.37950772047042847, "learning_rate": 7.039251465513664e-06, "loss": 0.3654, "step": 5125 }, { "epoch": 1.289235412474849, "grad_norm": 0.3230748474597931, "learning_rate": 7.037915361697082e-06, "loss": 0.372, "step": 5126 }, { "epoch": 1.289486921529175, "grad_norm": 0.3333132266998291, "learning_rate": 7.036579083345799e-06, "loss": 0.3265, "step": 5127 }, { "epoch": 1.2897384305835011, "grad_norm": 0.3597933053970337, "learning_rate": 7.035242630574257e-06, "loss": 0.363, "step": 5128 }, { "epoch": 1.289989939637827, "grad_norm": 0.3329184055328369, "learning_rate": 7.033906003496917e-06, "loss": 0.3719, "step": 5129 }, { "epoch": 1.290241448692153, "grad_norm": 0.32776138186454773, "learning_rate": 7.0325692022282535e-06, "loss": 0.3565, "step": 5130 }, { "epoch": 1.290492957746479, "grad_norm": 0.3282029628753662, "learning_rate": 7.031232226882752e-06, "loss": 0.345, "step": 5131 }, { "epoch": 1.2907444668008048, "grad_norm": 0.36610522866249084, "learning_rate": 7.029895077574918e-06, "loss": 0.331, "step": 5132 }, { "epoch": 1.2909959758551308, "grad_norm": 0.3236190974712372, "learning_rate": 7.028557754419269e-06, "loss": 0.3581, "step": 5133 }, { "epoch": 1.2912474849094568, "grad_norm": 0.34408023953437805, "learning_rate": 7.0272202575303395e-06, "loss": 0.3365, "step": 5134 }, { "epoch": 1.2914989939637826, "grad_norm": 0.32077619433403015, "learning_rate": 7.025882587022676e-06, "loss": 0.3443, "step": 5135 }, { "epoch": 1.2917505030181087, "grad_norm": 0.3198366165161133, "learning_rate": 7.024544743010845e-06, "loss": 0.348, "step": 5136 }, { "epoch": 1.2920020120724347, "grad_norm": 0.33954963088035583, "learning_rate": 7.023206725609421e-06, "loss": 0.3489, "step": 5137 }, { "epoch": 1.2922535211267605, "grad_norm": 0.3308905065059662, "learning_rate": 7.021868534932998e-06, "loss": 0.367, "step": 5138 }, { "epoch": 1.2925050301810865, "grad_norm": 0.3465730547904968, "learning_rate": 7.0205301710961825e-06, "loss": 0.3596, "step": 5139 }, { "epoch": 1.2927565392354126, "grad_norm": 0.3291175067424774, "learning_rate": 7.0191916342136e-06, "loss": 0.3774, "step": 5140 }, { "epoch": 1.2930080482897384, "grad_norm": 0.3781532645225525, "learning_rate": 7.017852924399886e-06, "loss": 0.3358, "step": 5141 }, { "epoch": 1.2932595573440644, "grad_norm": 0.3416769504547119, "learning_rate": 7.016514041769693e-06, "loss": 0.34, "step": 5142 }, { "epoch": 1.2935110663983904, "grad_norm": 0.31418657302856445, "learning_rate": 7.015174986437687e-06, "loss": 0.3507, "step": 5143 }, { "epoch": 1.2937625754527162, "grad_norm": 0.394488662481308, "learning_rate": 7.01383575851855e-06, "loss": 0.3669, "step": 5144 }, { "epoch": 1.2940140845070423, "grad_norm": 0.3305054008960724, "learning_rate": 7.012496358126979e-06, "loss": 0.37, "step": 5145 }, { "epoch": 1.2942655935613683, "grad_norm": 0.33333835005760193, "learning_rate": 7.011156785377686e-06, "loss": 0.3599, "step": 5146 }, { "epoch": 1.294517102615694, "grad_norm": 0.32395216822624207, "learning_rate": 7.009817040385395e-06, "loss": 0.3402, "step": 5147 }, { "epoch": 1.29476861167002, "grad_norm": 0.35054081678390503, "learning_rate": 7.008477123264849e-06, "loss": 0.3328, "step": 5148 }, { "epoch": 1.2950201207243461, "grad_norm": 0.32759636640548706, "learning_rate": 7.007137034130801e-06, "loss": 0.3594, "step": 5149 }, { "epoch": 1.295271629778672, "grad_norm": 0.3191884160041809, "learning_rate": 7.005796773098023e-06, "loss": 0.3493, "step": 5150 }, { "epoch": 1.295523138832998, "grad_norm": 0.33479100465774536, "learning_rate": 7.0044563402813e-06, "loss": 0.3783, "step": 5151 }, { "epoch": 1.295774647887324, "grad_norm": 0.3412805199623108, "learning_rate": 7.003115735795431e-06, "loss": 0.3371, "step": 5152 }, { "epoch": 1.2960261569416498, "grad_norm": 0.3714444041252136, "learning_rate": 7.00177495975523e-06, "loss": 0.3369, "step": 5153 }, { "epoch": 1.2962776659959758, "grad_norm": 0.33331310749053955, "learning_rate": 7.000434012275528e-06, "loss": 0.3524, "step": 5154 }, { "epoch": 1.2965291750503019, "grad_norm": 0.36643174290657043, "learning_rate": 6.999092893471166e-06, "loss": 0.3589, "step": 5155 }, { "epoch": 1.2967806841046277, "grad_norm": 0.35542479157447815, "learning_rate": 6.997751603457006e-06, "loss": 0.3364, "step": 5156 }, { "epoch": 1.2970321931589537, "grad_norm": 0.3719502389431, "learning_rate": 6.996410142347918e-06, "loss": 0.3643, "step": 5157 }, { "epoch": 1.2972837022132797, "grad_norm": 0.3411712348461151, "learning_rate": 6.995068510258791e-06, "loss": 0.3813, "step": 5158 }, { "epoch": 1.2975352112676055, "grad_norm": 0.3225451707839966, "learning_rate": 6.993726707304527e-06, "loss": 0.3343, "step": 5159 }, { "epoch": 1.2977867203219315, "grad_norm": 0.3910596966743469, "learning_rate": 6.992384733600044e-06, "loss": 0.3578, "step": 5160 }, { "epoch": 1.2980382293762576, "grad_norm": 0.3352847993373871, "learning_rate": 6.991042589260271e-06, "loss": 0.3369, "step": 5161 }, { "epoch": 1.2982897384305834, "grad_norm": 0.3266632854938507, "learning_rate": 6.989700274400157e-06, "loss": 0.3751, "step": 5162 }, { "epoch": 1.2985412474849094, "grad_norm": 0.36302119493484497, "learning_rate": 6.988357789134662e-06, "loss": 0.3549, "step": 5163 }, { "epoch": 1.2987927565392354, "grad_norm": 0.3544050455093384, "learning_rate": 6.987015133578763e-06, "loss": 0.3655, "step": 5164 }, { "epoch": 1.2990442655935612, "grad_norm": 0.3703245520591736, "learning_rate": 6.985672307847447e-06, "loss": 0.3511, "step": 5165 }, { "epoch": 1.2992957746478873, "grad_norm": 0.3711421489715576, "learning_rate": 6.98432931205572e-06, "loss": 0.3394, "step": 5166 }, { "epoch": 1.2995472837022133, "grad_norm": 0.37200912833213806, "learning_rate": 6.982986146318602e-06, "loss": 0.3642, "step": 5167 }, { "epoch": 1.2997987927565393, "grad_norm": 0.35503825545310974, "learning_rate": 6.981642810751126e-06, "loss": 0.3262, "step": 5168 }, { "epoch": 1.3000503018108651, "grad_norm": 0.35312941670417786, "learning_rate": 6.980299305468341e-06, "loss": 0.3773, "step": 5169 }, { "epoch": 1.3003018108651911, "grad_norm": 0.35091546177864075, "learning_rate": 6.978955630585309e-06, "loss": 0.3409, "step": 5170 }, { "epoch": 1.3005533199195172, "grad_norm": 0.30944791436195374, "learning_rate": 6.9776117862171065e-06, "loss": 0.3384, "step": 5171 }, { "epoch": 1.3008048289738432, "grad_norm": 0.36516040563583374, "learning_rate": 6.976267772478828e-06, "loss": 0.3533, "step": 5172 }, { "epoch": 1.301056338028169, "grad_norm": 0.34464800357818604, "learning_rate": 6.974923589485577e-06, "loss": 0.3366, "step": 5173 }, { "epoch": 1.301307847082495, "grad_norm": 0.3472990095615387, "learning_rate": 6.973579237352475e-06, "loss": 0.3472, "step": 5174 }, { "epoch": 1.301559356136821, "grad_norm": 0.33396056294441223, "learning_rate": 6.97223471619466e-06, "loss": 0.3405, "step": 5175 }, { "epoch": 1.3018108651911469, "grad_norm": 0.31734970211982727, "learning_rate": 6.970890026127278e-06, "loss": 0.34, "step": 5176 }, { "epoch": 1.3020623742454729, "grad_norm": 0.34466373920440674, "learning_rate": 6.9695451672654965e-06, "loss": 0.3438, "step": 5177 }, { "epoch": 1.302313883299799, "grad_norm": 0.3734425902366638, "learning_rate": 6.968200139724492e-06, "loss": 0.3757, "step": 5178 }, { "epoch": 1.3025653923541247, "grad_norm": 0.3214707374572754, "learning_rate": 6.966854943619459e-06, "loss": 0.3634, "step": 5179 }, { "epoch": 1.3028169014084507, "grad_norm": 0.32335829734802246, "learning_rate": 6.965509579065605e-06, "loss": 0.3365, "step": 5180 }, { "epoch": 1.3030684104627768, "grad_norm": 0.3490079641342163, "learning_rate": 6.964164046178151e-06, "loss": 0.3431, "step": 5181 }, { "epoch": 1.3033199195171026, "grad_norm": 0.3411654531955719, "learning_rate": 6.962818345072333e-06, "loss": 0.365, "step": 5182 }, { "epoch": 1.3035714285714286, "grad_norm": 0.3305014669895172, "learning_rate": 6.961472475863406e-06, "loss": 0.342, "step": 5183 }, { "epoch": 1.3038229376257546, "grad_norm": 0.33585986495018005, "learning_rate": 6.96012643866663e-06, "loss": 0.336, "step": 5184 }, { "epoch": 1.3040744466800804, "grad_norm": 0.3303409516811371, "learning_rate": 6.958780233597289e-06, "loss": 0.3481, "step": 5185 }, { "epoch": 1.3043259557344065, "grad_norm": 0.362411230802536, "learning_rate": 6.957433860770674e-06, "loss": 0.3593, "step": 5186 }, { "epoch": 1.3045774647887325, "grad_norm": 0.3479267656803131, "learning_rate": 6.956087320302094e-06, "loss": 0.3674, "step": 5187 }, { "epoch": 1.3048289738430583, "grad_norm": 0.3524285554885864, "learning_rate": 6.9547406123068724e-06, "loss": 0.3559, "step": 5188 }, { "epoch": 1.3050804828973843, "grad_norm": 0.3306998014450073, "learning_rate": 6.953393736900346e-06, "loss": 0.3547, "step": 5189 }, { "epoch": 1.3053319919517103, "grad_norm": 0.35222721099853516, "learning_rate": 6.9520466941978685e-06, "loss": 0.3576, "step": 5190 }, { "epoch": 1.3055835010060362, "grad_norm": 0.30763161182403564, "learning_rate": 6.950699484314802e-06, "loss": 0.359, "step": 5191 }, { "epoch": 1.3058350100603622, "grad_norm": 0.3283224105834961, "learning_rate": 6.949352107366528e-06, "loss": 0.3612, "step": 5192 }, { "epoch": 1.3060865191146882, "grad_norm": 0.32542482018470764, "learning_rate": 6.9480045634684405e-06, "loss": 0.3491, "step": 5193 }, { "epoch": 1.306338028169014, "grad_norm": 0.3252429962158203, "learning_rate": 6.94665685273595e-06, "loss": 0.3556, "step": 5194 }, { "epoch": 1.30658953722334, "grad_norm": 0.30148446559906006, "learning_rate": 6.945308975284478e-06, "loss": 0.347, "step": 5195 }, { "epoch": 1.306841046277666, "grad_norm": 0.3339576721191406, "learning_rate": 6.943960931229462e-06, "loss": 0.3376, "step": 5196 }, { "epoch": 1.3070925553319919, "grad_norm": 0.3315853476524353, "learning_rate": 6.942612720686355e-06, "loss": 0.3464, "step": 5197 }, { "epoch": 1.307344064386318, "grad_norm": 0.3184394836425781, "learning_rate": 6.9412643437706194e-06, "loss": 0.3549, "step": 5198 }, { "epoch": 1.307595573440644, "grad_norm": 0.34649431705474854, "learning_rate": 6.939915800597738e-06, "loss": 0.3625, "step": 5199 }, { "epoch": 1.3078470824949697, "grad_norm": 0.34910690784454346, "learning_rate": 6.938567091283205e-06, "loss": 0.3417, "step": 5200 }, { "epoch": 1.3080985915492958, "grad_norm": 0.34020349383354187, "learning_rate": 6.937218215942527e-06, "loss": 0.3706, "step": 5201 }, { "epoch": 1.3083501006036218, "grad_norm": 0.30801448225975037, "learning_rate": 6.935869174691229e-06, "loss": 0.351, "step": 5202 }, { "epoch": 1.3086016096579476, "grad_norm": 0.3194606900215149, "learning_rate": 6.934519967644847e-06, "loss": 0.3343, "step": 5203 }, { "epoch": 1.3088531187122736, "grad_norm": 0.3294532001018524, "learning_rate": 6.93317059491893e-06, "loss": 0.3352, "step": 5204 }, { "epoch": 1.3091046277665996, "grad_norm": 0.3065512180328369, "learning_rate": 6.931821056629048e-06, "loss": 0.362, "step": 5205 }, { "epoch": 1.3093561368209254, "grad_norm": 0.31400826573371887, "learning_rate": 6.930471352890777e-06, "loss": 0.3369, "step": 5206 }, { "epoch": 1.3096076458752515, "grad_norm": 0.32463496923446655, "learning_rate": 6.9291214838197114e-06, "loss": 0.3772, "step": 5207 }, { "epoch": 1.3098591549295775, "grad_norm": 0.3142559230327606, "learning_rate": 6.92777144953146e-06, "loss": 0.3552, "step": 5208 }, { "epoch": 1.3101106639839033, "grad_norm": 0.33447784185409546, "learning_rate": 6.926421250141644e-06, "loss": 0.372, "step": 5209 }, { "epoch": 1.3103621730382293, "grad_norm": 0.34302520751953125, "learning_rate": 6.925070885765899e-06, "loss": 0.335, "step": 5210 }, { "epoch": 1.3106136820925554, "grad_norm": 0.32417216897010803, "learning_rate": 6.923720356519877e-06, "loss": 0.3592, "step": 5211 }, { "epoch": 1.3108651911468812, "grad_norm": 0.30292844772338867, "learning_rate": 6.922369662519239e-06, "loss": 0.338, "step": 5212 }, { "epoch": 1.3111167002012072, "grad_norm": 0.32632318139076233, "learning_rate": 6.921018803879667e-06, "loss": 0.3547, "step": 5213 }, { "epoch": 1.3113682092555332, "grad_norm": 0.3217123746871948, "learning_rate": 6.919667780716852e-06, "loss": 0.3661, "step": 5214 }, { "epoch": 1.311619718309859, "grad_norm": 0.32875531911849976, "learning_rate": 6.918316593146501e-06, "loss": 0.3559, "step": 5215 }, { "epoch": 1.311871227364185, "grad_norm": 0.33865031599998474, "learning_rate": 6.916965241284335e-06, "loss": 0.3564, "step": 5216 }, { "epoch": 1.312122736418511, "grad_norm": 0.2909534275531769, "learning_rate": 6.9156137252460885e-06, "loss": 0.3394, "step": 5217 }, { "epoch": 1.312374245472837, "grad_norm": 0.3667255640029907, "learning_rate": 6.91426204514751e-06, "loss": 0.355, "step": 5218 }, { "epoch": 1.312625754527163, "grad_norm": 0.34232789278030396, "learning_rate": 6.912910201104363e-06, "loss": 0.343, "step": 5219 }, { "epoch": 1.312877263581489, "grad_norm": 0.33543142676353455, "learning_rate": 6.911558193232426e-06, "loss": 0.3417, "step": 5220 }, { "epoch": 1.313128772635815, "grad_norm": 0.3160308003425598, "learning_rate": 6.910206021647487e-06, "loss": 0.3491, "step": 5221 }, { "epoch": 1.313380281690141, "grad_norm": 0.3830724060535431, "learning_rate": 6.908853686465353e-06, "loss": 0.3414, "step": 5222 }, { "epoch": 1.3136317907444668, "grad_norm": 0.33467820286750793, "learning_rate": 6.907501187801843e-06, "loss": 0.3589, "step": 5223 }, { "epoch": 1.3138832997987928, "grad_norm": 0.3496091961860657, "learning_rate": 6.906148525772789e-06, "loss": 0.3608, "step": 5224 }, { "epoch": 1.3141348088531188, "grad_norm": 0.3401433229446411, "learning_rate": 6.904795700494038e-06, "loss": 0.3581, "step": 5225 }, { "epoch": 1.3143863179074446, "grad_norm": 0.3567175567150116, "learning_rate": 6.9034427120814505e-06, "loss": 0.337, "step": 5226 }, { "epoch": 1.3146378269617707, "grad_norm": 0.33114707469940186, "learning_rate": 6.902089560650904e-06, "loss": 0.3523, "step": 5227 }, { "epoch": 1.3148893360160967, "grad_norm": 0.3312157988548279, "learning_rate": 6.900736246318287e-06, "loss": 0.353, "step": 5228 }, { "epoch": 1.3151408450704225, "grad_norm": 0.3470541536808014, "learning_rate": 6.899382769199501e-06, "loss": 0.358, "step": 5229 }, { "epoch": 1.3153923541247485, "grad_norm": 0.3177447021007538, "learning_rate": 6.898029129410463e-06, "loss": 0.3718, "step": 5230 }, { "epoch": 1.3156438631790746, "grad_norm": 0.297389954328537, "learning_rate": 6.896675327067104e-06, "loss": 0.342, "step": 5231 }, { "epoch": 1.3158953722334004, "grad_norm": 0.33685746788978577, "learning_rate": 6.895321362285369e-06, "loss": 0.3386, "step": 5232 }, { "epoch": 1.3161468812877264, "grad_norm": 0.3213297724723816, "learning_rate": 6.893967235181216e-06, "loss": 0.3766, "step": 5233 }, { "epoch": 1.3163983903420524, "grad_norm": 0.2946451008319855, "learning_rate": 6.892612945870618e-06, "loss": 0.3446, "step": 5234 }, { "epoch": 1.3166498993963782, "grad_norm": 0.3347758650779724, "learning_rate": 6.891258494469561e-06, "loss": 0.3605, "step": 5235 }, { "epoch": 1.3169014084507042, "grad_norm": 0.30589884519577026, "learning_rate": 6.889903881094047e-06, "loss": 0.3453, "step": 5236 }, { "epoch": 1.3171529175050303, "grad_norm": 0.3220566511154175, "learning_rate": 6.888549105860088e-06, "loss": 0.3572, "step": 5237 }, { "epoch": 1.317404426559356, "grad_norm": 0.3071398138999939, "learning_rate": 6.887194168883713e-06, "loss": 0.3602, "step": 5238 }, { "epoch": 1.317655935613682, "grad_norm": 0.34958013892173767, "learning_rate": 6.885839070280964e-06, "loss": 0.3445, "step": 5239 }, { "epoch": 1.3179074446680081, "grad_norm": 0.33036911487579346, "learning_rate": 6.884483810167896e-06, "loss": 0.3653, "step": 5240 }, { "epoch": 1.318158953722334, "grad_norm": 0.3227456212043762, "learning_rate": 6.883128388660578e-06, "loss": 0.3573, "step": 5241 }, { "epoch": 1.31841046277666, "grad_norm": 0.304959774017334, "learning_rate": 6.881772805875095e-06, "loss": 0.3421, "step": 5242 }, { "epoch": 1.318661971830986, "grad_norm": 0.3372381627559662, "learning_rate": 6.8804170619275445e-06, "loss": 0.3234, "step": 5243 }, { "epoch": 1.3189134808853118, "grad_norm": 0.3018898665904999, "learning_rate": 6.879061156934035e-06, "loss": 0.3658, "step": 5244 }, { "epoch": 1.3191649899396378, "grad_norm": 0.3537410497665405, "learning_rate": 6.877705091010693e-06, "loss": 0.3786, "step": 5245 }, { "epoch": 1.3194164989939638, "grad_norm": 0.3048062026500702, "learning_rate": 6.8763488642736585e-06, "loss": 0.3467, "step": 5246 }, { "epoch": 1.3196680080482897, "grad_norm": 0.3114982545375824, "learning_rate": 6.874992476839081e-06, "loss": 0.3364, "step": 5247 }, { "epoch": 1.3199195171026157, "grad_norm": 0.35760530829429626, "learning_rate": 6.873635928823127e-06, "loss": 0.3648, "step": 5248 }, { "epoch": 1.3201710261569417, "grad_norm": 0.3398659825325012, "learning_rate": 6.8722792203419775e-06, "loss": 0.3682, "step": 5249 }, { "epoch": 1.3204225352112675, "grad_norm": 0.29969504475593567, "learning_rate": 6.8709223515118265e-06, "loss": 0.3255, "step": 5250 }, { "epoch": 1.3206740442655935, "grad_norm": 0.31497398018836975, "learning_rate": 6.8695653224488805e-06, "loss": 0.382, "step": 5251 }, { "epoch": 1.3209255533199196, "grad_norm": 0.3121738135814667, "learning_rate": 6.868208133269359e-06, "loss": 0.3807, "step": 5252 }, { "epoch": 1.3211770623742454, "grad_norm": 0.31807759404182434, "learning_rate": 6.8668507840895005e-06, "loss": 0.3508, "step": 5253 }, { "epoch": 1.3214285714285714, "grad_norm": 0.3177856504917145, "learning_rate": 6.86549327502555e-06, "loss": 0.3825, "step": 5254 }, { "epoch": 1.3216800804828974, "grad_norm": 0.3255070447921753, "learning_rate": 6.864135606193771e-06, "loss": 0.3538, "step": 5255 }, { "epoch": 1.3219315895372232, "grad_norm": 0.33017322421073914, "learning_rate": 6.862777777710441e-06, "loss": 0.3318, "step": 5256 }, { "epoch": 1.3221830985915493, "grad_norm": 0.3061206340789795, "learning_rate": 6.861419789691845e-06, "loss": 0.351, "step": 5257 }, { "epoch": 1.3224346076458753, "grad_norm": 0.3208281993865967, "learning_rate": 6.860061642254291e-06, "loss": 0.3615, "step": 5258 }, { "epoch": 1.322686116700201, "grad_norm": 0.31412699818611145, "learning_rate": 6.858703335514093e-06, "loss": 0.3814, "step": 5259 }, { "epoch": 1.3229376257545271, "grad_norm": 0.3277878165245056, "learning_rate": 6.857344869587583e-06, "loss": 0.3539, "step": 5260 }, { "epoch": 1.3231891348088531, "grad_norm": 0.34601694345474243, "learning_rate": 6.855986244591104e-06, "loss": 0.3516, "step": 5261 }, { "epoch": 1.323440643863179, "grad_norm": 0.33867859840393066, "learning_rate": 6.854627460641015e-06, "loss": 0.3606, "step": 5262 }, { "epoch": 1.323692152917505, "grad_norm": 0.3611641824245453, "learning_rate": 6.8532685178536865e-06, "loss": 0.3437, "step": 5263 }, { "epoch": 1.323943661971831, "grad_norm": 0.3286331593990326, "learning_rate": 6.851909416345502e-06, "loss": 0.3623, "step": 5264 }, { "epoch": 1.3241951710261568, "grad_norm": 0.3348291218280792, "learning_rate": 6.850550156232862e-06, "loss": 0.3634, "step": 5265 }, { "epoch": 1.3244466800804828, "grad_norm": 0.3188924491405487, "learning_rate": 6.849190737632179e-06, "loss": 0.3462, "step": 5266 }, { "epoch": 1.3246981891348089, "grad_norm": 0.35183659195899963, "learning_rate": 6.847831160659877e-06, "loss": 0.355, "step": 5267 }, { "epoch": 1.3249496981891349, "grad_norm": 0.2944953739643097, "learning_rate": 6.846471425432397e-06, "loss": 0.3173, "step": 5268 }, { "epoch": 1.3252012072434607, "grad_norm": 0.34920939803123474, "learning_rate": 6.845111532066189e-06, "loss": 0.3544, "step": 5269 }, { "epoch": 1.3254527162977867, "grad_norm": 0.3055819571018219, "learning_rate": 6.843751480677723e-06, "loss": 0.3572, "step": 5270 }, { "epoch": 1.3257042253521127, "grad_norm": 0.33443525433540344, "learning_rate": 6.8423912713834765e-06, "loss": 0.3686, "step": 5271 }, { "epoch": 1.3259557344064388, "grad_norm": 0.3278621435165405, "learning_rate": 6.841030904299943e-06, "loss": 0.368, "step": 5272 }, { "epoch": 1.3262072434607646, "grad_norm": 0.3702344000339508, "learning_rate": 6.839670379543632e-06, "loss": 0.3329, "step": 5273 }, { "epoch": 1.3264587525150906, "grad_norm": 0.31350359320640564, "learning_rate": 6.838309697231061e-06, "loss": 0.3686, "step": 5274 }, { "epoch": 1.3267102615694166, "grad_norm": 0.3589387834072113, "learning_rate": 6.836948857478764e-06, "loss": 0.3401, "step": 5275 }, { "epoch": 1.3269617706237424, "grad_norm": 0.37206393480300903, "learning_rate": 6.83558786040329e-06, "loss": 0.3487, "step": 5276 }, { "epoch": 1.3272132796780685, "grad_norm": 0.3218708336353302, "learning_rate": 6.834226706121198e-06, "loss": 0.3381, "step": 5277 }, { "epoch": 1.3274647887323945, "grad_norm": 0.32571589946746826, "learning_rate": 6.832865394749065e-06, "loss": 0.349, "step": 5278 }, { "epoch": 1.3277162977867203, "grad_norm": 0.3747791051864624, "learning_rate": 6.831503926403476e-06, "loss": 0.3726, "step": 5279 }, { "epoch": 1.3279678068410463, "grad_norm": 0.33507394790649414, "learning_rate": 6.830142301201035e-06, "loss": 0.3544, "step": 5280 }, { "epoch": 1.3282193158953723, "grad_norm": 0.35225021839141846, "learning_rate": 6.8287805192583534e-06, "loss": 0.3614, "step": 5281 }, { "epoch": 1.3284708249496981, "grad_norm": 0.3295999765396118, "learning_rate": 6.8274185806920625e-06, "loss": 0.3604, "step": 5282 }, { "epoch": 1.3287223340040242, "grad_norm": 0.3460395038127899, "learning_rate": 6.826056485618803e-06, "loss": 0.352, "step": 5283 }, { "epoch": 1.3289738430583502, "grad_norm": 0.3388642370700836, "learning_rate": 6.824694234155228e-06, "loss": 0.3313, "step": 5284 }, { "epoch": 1.329225352112676, "grad_norm": 0.3405665457248688, "learning_rate": 6.823331826418008e-06, "loss": 0.3362, "step": 5285 }, { "epoch": 1.329476861167002, "grad_norm": 0.3302387297153473, "learning_rate": 6.8219692625238236e-06, "loss": 0.3406, "step": 5286 }, { "epoch": 1.329728370221328, "grad_norm": 0.3334062695503235, "learning_rate": 6.8206065425893695e-06, "loss": 0.3437, "step": 5287 }, { "epoch": 1.3299798792756539, "grad_norm": 0.3606247007846832, "learning_rate": 6.819243666731356e-06, "loss": 0.3348, "step": 5288 }, { "epoch": 1.33023138832998, "grad_norm": 0.3403606712818146, "learning_rate": 6.817880635066503e-06, "loss": 0.3566, "step": 5289 }, { "epoch": 1.330482897384306, "grad_norm": 0.29230329394340515, "learning_rate": 6.816517447711546e-06, "loss": 0.3524, "step": 5290 }, { "epoch": 1.3307344064386317, "grad_norm": 0.35570812225341797, "learning_rate": 6.815154104783233e-06, "loss": 0.3324, "step": 5291 }, { "epoch": 1.3309859154929577, "grad_norm": 0.3560788333415985, "learning_rate": 6.813790606398327e-06, "loss": 0.3826, "step": 5292 }, { "epoch": 1.3312374245472838, "grad_norm": 0.34960126876831055, "learning_rate": 6.8124269526736035e-06, "loss": 0.3403, "step": 5293 }, { "epoch": 1.3314889336016096, "grad_norm": 0.37958434224128723, "learning_rate": 6.811063143725849e-06, "loss": 0.3727, "step": 5294 }, { "epoch": 1.3317404426559356, "grad_norm": 0.3619947135448456, "learning_rate": 6.809699179671867e-06, "loss": 0.3378, "step": 5295 }, { "epoch": 1.3319919517102616, "grad_norm": 0.35475045442581177, "learning_rate": 6.808335060628471e-06, "loss": 0.3679, "step": 5296 }, { "epoch": 1.3322434607645874, "grad_norm": 0.3520188331604004, "learning_rate": 6.806970786712489e-06, "loss": 0.3589, "step": 5297 }, { "epoch": 1.3324949698189135, "grad_norm": 0.3598145842552185, "learning_rate": 6.8056063580407636e-06, "loss": 0.3664, "step": 5298 }, { "epoch": 1.3327464788732395, "grad_norm": 0.3405790627002716, "learning_rate": 6.804241774730152e-06, "loss": 0.3671, "step": 5299 }, { "epoch": 1.3329979879275653, "grad_norm": 0.33218327164649963, "learning_rate": 6.802877036897518e-06, "loss": 0.3589, "step": 5300 }, { "epoch": 1.3332494969818913, "grad_norm": 0.33771365880966187, "learning_rate": 6.801512144659745e-06, "loss": 0.364, "step": 5301 }, { "epoch": 1.3335010060362174, "grad_norm": 0.31353524327278137, "learning_rate": 6.800147098133727e-06, "loss": 0.3522, "step": 5302 }, { "epoch": 1.3337525150905432, "grad_norm": 0.2938469350337982, "learning_rate": 6.798781897436371e-06, "loss": 0.3508, "step": 5303 }, { "epoch": 1.3340040241448692, "grad_norm": 0.342989057302475, "learning_rate": 6.7974165426845996e-06, "loss": 0.3249, "step": 5304 }, { "epoch": 1.3342555331991952, "grad_norm": 0.3309261202812195, "learning_rate": 6.796051033995346e-06, "loss": 0.3399, "step": 5305 }, { "epoch": 1.334507042253521, "grad_norm": 0.3215269446372986, "learning_rate": 6.7946853714855565e-06, "loss": 0.3515, "step": 5306 }, { "epoch": 1.334758551307847, "grad_norm": 0.33649203181266785, "learning_rate": 6.793319555272192e-06, "loss": 0.3635, "step": 5307 }, { "epoch": 1.335010060362173, "grad_norm": 0.34016871452331543, "learning_rate": 6.791953585472228e-06, "loss": 0.3454, "step": 5308 }, { "epoch": 1.3352615694164989, "grad_norm": 0.3312501609325409, "learning_rate": 6.790587462202649e-06, "loss": 0.3533, "step": 5309 }, { "epoch": 1.335513078470825, "grad_norm": 0.33119526505470276, "learning_rate": 6.789221185580456e-06, "loss": 0.3511, "step": 5310 }, { "epoch": 1.335764587525151, "grad_norm": 0.325152188539505, "learning_rate": 6.7878547557226616e-06, "loss": 0.3515, "step": 5311 }, { "epoch": 1.3360160965794767, "grad_norm": 0.31850430369377136, "learning_rate": 6.786488172746293e-06, "loss": 0.3725, "step": 5312 }, { "epoch": 1.3362676056338028, "grad_norm": 0.3241901397705078, "learning_rate": 6.785121436768387e-06, "loss": 0.3467, "step": 5313 }, { "epoch": 1.3365191146881288, "grad_norm": 0.3087508976459503, "learning_rate": 6.783754547905999e-06, "loss": 0.3348, "step": 5314 }, { "epoch": 1.3367706237424548, "grad_norm": 0.32791846990585327, "learning_rate": 6.782387506276191e-06, "loss": 0.3526, "step": 5315 }, { "epoch": 1.3370221327967806, "grad_norm": 0.32451775670051575, "learning_rate": 6.781020311996046e-06, "loss": 0.3459, "step": 5316 }, { "epoch": 1.3372736418511066, "grad_norm": 0.2936570644378662, "learning_rate": 6.7796529651826525e-06, "loss": 0.3415, "step": 5317 }, { "epoch": 1.3375251509054327, "grad_norm": 0.3310757577419281, "learning_rate": 6.778285465953116e-06, "loss": 0.3567, "step": 5318 }, { "epoch": 1.3377766599597585, "grad_norm": 0.3124331533908844, "learning_rate": 6.776917814424555e-06, "loss": 0.3571, "step": 5319 }, { "epoch": 1.3380281690140845, "grad_norm": 0.3068905174732208, "learning_rate": 6.775550010714099e-06, "loss": 0.3553, "step": 5320 }, { "epoch": 1.3382796780684105, "grad_norm": 0.31664854288101196, "learning_rate": 6.774182054938893e-06, "loss": 0.3275, "step": 5321 }, { "epoch": 1.3385311871227366, "grad_norm": 0.3378714919090271, "learning_rate": 6.772813947216092e-06, "loss": 0.3547, "step": 5322 }, { "epoch": 1.3387826961770624, "grad_norm": 0.3408490717411041, "learning_rate": 6.771445687662868e-06, "loss": 0.3551, "step": 5323 }, { "epoch": 1.3390342052313884, "grad_norm": 0.3183440864086151, "learning_rate": 6.770077276396402e-06, "loss": 0.382, "step": 5324 }, { "epoch": 1.3392857142857144, "grad_norm": 0.32740750908851624, "learning_rate": 6.7687087135338915e-06, "loss": 0.3654, "step": 5325 }, { "epoch": 1.3395372233400402, "grad_norm": 0.3713664412498474, "learning_rate": 6.7673399991925445e-06, "loss": 0.3561, "step": 5326 }, { "epoch": 1.3397887323943662, "grad_norm": 0.3032859265804291, "learning_rate": 6.765971133489584e-06, "loss": 0.3618, "step": 5327 }, { "epoch": 1.3400402414486923, "grad_norm": 0.36480486392974854, "learning_rate": 6.764602116542243e-06, "loss": 0.3482, "step": 5328 }, { "epoch": 1.340291750503018, "grad_norm": 0.3249639570713043, "learning_rate": 6.763232948467769e-06, "loss": 0.3664, "step": 5329 }, { "epoch": 1.340543259557344, "grad_norm": 0.338945209980011, "learning_rate": 6.761863629383425e-06, "loss": 0.3656, "step": 5330 }, { "epoch": 1.3407947686116701, "grad_norm": 0.31661343574523926, "learning_rate": 6.760494159406483e-06, "loss": 0.3439, "step": 5331 }, { "epoch": 1.341046277665996, "grad_norm": 0.3573559522628784, "learning_rate": 6.759124538654231e-06, "loss": 0.3649, "step": 5332 }, { "epoch": 1.341297786720322, "grad_norm": 0.3466895818710327, "learning_rate": 6.757754767243966e-06, "loss": 0.3554, "step": 5333 }, { "epoch": 1.341549295774648, "grad_norm": 0.32429978251457214, "learning_rate": 6.756384845293002e-06, "loss": 0.3441, "step": 5334 }, { "epoch": 1.3418008048289738, "grad_norm": 0.3625079393386841, "learning_rate": 6.7550147729186635e-06, "loss": 0.354, "step": 5335 }, { "epoch": 1.3420523138832998, "grad_norm": 0.33437997102737427, "learning_rate": 6.75364455023829e-06, "loss": 0.3665, "step": 5336 }, { "epoch": 1.3423038229376258, "grad_norm": 0.32188209891319275, "learning_rate": 6.7522741773692305e-06, "loss": 0.3652, "step": 5337 }, { "epoch": 1.3425553319919517, "grad_norm": 0.3210011422634125, "learning_rate": 6.75090365442885e-06, "loss": 0.3763, "step": 5338 }, { "epoch": 1.3428068410462777, "grad_norm": 0.32578200101852417, "learning_rate": 6.749532981534526e-06, "loss": 0.354, "step": 5339 }, { "epoch": 1.3430583501006037, "grad_norm": 0.3230711817741394, "learning_rate": 6.748162158803646e-06, "loss": 0.3456, "step": 5340 }, { "epoch": 1.3433098591549295, "grad_norm": 0.3201490342617035, "learning_rate": 6.746791186353614e-06, "loss": 0.3599, "step": 5341 }, { "epoch": 1.3435613682092555, "grad_norm": 0.3256816267967224, "learning_rate": 6.745420064301845e-06, "loss": 0.3373, "step": 5342 }, { "epoch": 1.3438128772635816, "grad_norm": 0.3285231590270996, "learning_rate": 6.744048792765767e-06, "loss": 0.3481, "step": 5343 }, { "epoch": 1.3440643863179074, "grad_norm": 0.3109275698661804, "learning_rate": 6.74267737186282e-06, "loss": 0.3511, "step": 5344 }, { "epoch": 1.3443158953722334, "grad_norm": 0.32050541043281555, "learning_rate": 6.7413058017104585e-06, "loss": 0.3641, "step": 5345 }, { "epoch": 1.3445674044265594, "grad_norm": 0.3238290846347809, "learning_rate": 6.739934082426149e-06, "loss": 0.365, "step": 5346 }, { "epoch": 1.3448189134808852, "grad_norm": 0.3411909341812134, "learning_rate": 6.73856221412737e-06, "loss": 0.3451, "step": 5347 }, { "epoch": 1.3450704225352113, "grad_norm": 0.33590638637542725, "learning_rate": 6.737190196931614e-06, "loss": 0.3539, "step": 5348 }, { "epoch": 1.3453219315895373, "grad_norm": 0.31567126512527466, "learning_rate": 6.735818030956386e-06, "loss": 0.3732, "step": 5349 }, { "epoch": 1.345573440643863, "grad_norm": 0.3316844701766968, "learning_rate": 6.734445716319202e-06, "loss": 0.35, "step": 5350 }, { "epoch": 1.345824949698189, "grad_norm": 0.2987160384654999, "learning_rate": 6.733073253137593e-06, "loss": 0.3509, "step": 5351 }, { "epoch": 1.3460764587525151, "grad_norm": 0.3660013675689697, "learning_rate": 6.731700641529103e-06, "loss": 0.3718, "step": 5352 }, { "epoch": 1.346327967806841, "grad_norm": 0.3536967635154724, "learning_rate": 6.730327881611286e-06, "loss": 0.37, "step": 5353 }, { "epoch": 1.346579476861167, "grad_norm": 0.34046685695648193, "learning_rate": 6.72895497350171e-06, "loss": 0.3546, "step": 5354 }, { "epoch": 1.346830985915493, "grad_norm": 0.3331921100616455, "learning_rate": 6.727581917317958e-06, "loss": 0.3619, "step": 5355 }, { "epoch": 1.3470824949698188, "grad_norm": 0.3279572129249573, "learning_rate": 6.726208713177622e-06, "loss": 0.3664, "step": 5356 }, { "epoch": 1.3473340040241448, "grad_norm": 0.3194010257720947, "learning_rate": 6.7248353611983084e-06, "loss": 0.3375, "step": 5357 }, { "epoch": 1.3475855130784709, "grad_norm": 0.28898268938064575, "learning_rate": 6.7234618614976375e-06, "loss": 0.3384, "step": 5358 }, { "epoch": 1.3478370221327967, "grad_norm": 0.33326342701911926, "learning_rate": 6.72208821419324e-06, "loss": 0.3417, "step": 5359 }, { "epoch": 1.3480885311871227, "grad_norm": 0.36949121952056885, "learning_rate": 6.7207144194027605e-06, "loss": 0.379, "step": 5360 }, { "epoch": 1.3483400402414487, "grad_norm": 0.3311922550201416, "learning_rate": 6.719340477243854e-06, "loss": 0.3505, "step": 5361 }, { "epoch": 1.3485915492957745, "grad_norm": 0.30410414934158325, "learning_rate": 6.717966387834194e-06, "loss": 0.3436, "step": 5362 }, { "epoch": 1.3488430583501005, "grad_norm": 0.3302040696144104, "learning_rate": 6.716592151291459e-06, "loss": 0.3396, "step": 5363 }, { "epoch": 1.3490945674044266, "grad_norm": 0.3249216377735138, "learning_rate": 6.715217767733346e-06, "loss": 0.3363, "step": 5364 }, { "epoch": 1.3493460764587526, "grad_norm": 0.30946671962738037, "learning_rate": 6.713843237277562e-06, "loss": 0.3726, "step": 5365 }, { "epoch": 1.3495975855130784, "grad_norm": 0.3507625162601471, "learning_rate": 6.712468560041825e-06, "loss": 0.3618, "step": 5366 }, { "epoch": 1.3498490945674044, "grad_norm": 0.3141408860683441, "learning_rate": 6.711093736143869e-06, "loss": 0.3566, "step": 5367 }, { "epoch": 1.3501006036217305, "grad_norm": 0.31941407918930054, "learning_rate": 6.7097187657014395e-06, "loss": 0.3568, "step": 5368 }, { "epoch": 1.3503521126760563, "grad_norm": 0.29959455132484436, "learning_rate": 6.708343648832294e-06, "loss": 0.3355, "step": 5369 }, { "epoch": 1.3506036217303823, "grad_norm": 0.32126718759536743, "learning_rate": 6.706968385654202e-06, "loss": 0.3352, "step": 5370 }, { "epoch": 1.3508551307847083, "grad_norm": 0.31553056836128235, "learning_rate": 6.705592976284948e-06, "loss": 0.3303, "step": 5371 }, { "epoch": 1.3511066398390343, "grad_norm": 0.3731096088886261, "learning_rate": 6.704217420842325e-06, "loss": 0.3627, "step": 5372 }, { "epoch": 1.3513581488933601, "grad_norm": 0.3548559248447418, "learning_rate": 6.702841719444141e-06, "loss": 0.3606, "step": 5373 }, { "epoch": 1.3516096579476862, "grad_norm": 0.3334583342075348, "learning_rate": 6.701465872208216e-06, "loss": 0.3507, "step": 5374 }, { "epoch": 1.3518611670020122, "grad_norm": 0.36840519309043884, "learning_rate": 6.700089879252385e-06, "loss": 0.3599, "step": 5375 }, { "epoch": 1.352112676056338, "grad_norm": 0.33070552349090576, "learning_rate": 6.69871374069449e-06, "loss": 0.3544, "step": 5376 }, { "epoch": 1.352364185110664, "grad_norm": 0.3327641487121582, "learning_rate": 6.6973374566523904e-06, "loss": 0.3508, "step": 5377 }, { "epoch": 1.35261569416499, "grad_norm": 0.32900556921958923, "learning_rate": 6.695961027243957e-06, "loss": 0.3593, "step": 5378 }, { "epoch": 1.3528672032193159, "grad_norm": 0.33715447783470154, "learning_rate": 6.694584452587071e-06, "loss": 0.3524, "step": 5379 }, { "epoch": 1.3531187122736419, "grad_norm": 0.31155914068222046, "learning_rate": 6.693207732799628e-06, "loss": 0.3298, "step": 5380 }, { "epoch": 1.353370221327968, "grad_norm": 0.3378448188304901, "learning_rate": 6.691830867999536e-06, "loss": 0.3213, "step": 5381 }, { "epoch": 1.3536217303822937, "grad_norm": 0.31682777404785156, "learning_rate": 6.690453858304713e-06, "loss": 0.3306, "step": 5382 }, { "epoch": 1.3538732394366197, "grad_norm": 0.3367248773574829, "learning_rate": 6.689076703833093e-06, "loss": 0.3662, "step": 5383 }, { "epoch": 1.3541247484909458, "grad_norm": 0.3396073579788208, "learning_rate": 6.687699404702621e-06, "loss": 0.3576, "step": 5384 }, { "epoch": 1.3543762575452716, "grad_norm": 0.33652400970458984, "learning_rate": 6.686321961031252e-06, "loss": 0.3369, "step": 5385 }, { "epoch": 1.3546277665995976, "grad_norm": 0.33180204033851624, "learning_rate": 6.684944372936958e-06, "loss": 0.3447, "step": 5386 }, { "epoch": 1.3548792756539236, "grad_norm": 0.32716917991638184, "learning_rate": 6.6835666405377185e-06, "loss": 0.352, "step": 5387 }, { "epoch": 1.3551307847082494, "grad_norm": 0.33084622025489807, "learning_rate": 6.682188763951528e-06, "loss": 0.3252, "step": 5388 }, { "epoch": 1.3553822937625755, "grad_norm": 0.3281020224094391, "learning_rate": 6.680810743296394e-06, "loss": 0.3865, "step": 5389 }, { "epoch": 1.3556338028169015, "grad_norm": 0.3415309488773346, "learning_rate": 6.6794325786903346e-06, "loss": 0.3433, "step": 5390 }, { "epoch": 1.3558853118712273, "grad_norm": 0.31689122319221497, "learning_rate": 6.678054270251383e-06, "loss": 0.3526, "step": 5391 }, { "epoch": 1.3561368209255533, "grad_norm": 0.33708077669143677, "learning_rate": 6.67667581809758e-06, "loss": 0.326, "step": 5392 }, { "epoch": 1.3563883299798793, "grad_norm": 0.3383912146091461, "learning_rate": 6.6752972223469825e-06, "loss": 0.3415, "step": 5393 }, { "epoch": 1.3566398390342052, "grad_norm": 0.32202810049057007, "learning_rate": 6.673918483117659e-06, "loss": 0.3472, "step": 5394 }, { "epoch": 1.3568913480885312, "grad_norm": 0.31360799074172974, "learning_rate": 6.672539600527688e-06, "loss": 0.3451, "step": 5395 }, { "epoch": 1.3571428571428572, "grad_norm": 0.33597296476364136, "learning_rate": 6.671160574695164e-06, "loss": 0.3443, "step": 5396 }, { "epoch": 1.357394366197183, "grad_norm": 0.35125431418418884, "learning_rate": 6.669781405738193e-06, "loss": 0.3528, "step": 5397 }, { "epoch": 1.357645875251509, "grad_norm": 0.2997105121612549, "learning_rate": 6.66840209377489e-06, "loss": 0.3226, "step": 5398 }, { "epoch": 1.357897384305835, "grad_norm": 0.3280561566352844, "learning_rate": 6.667022638923384e-06, "loss": 0.3512, "step": 5399 }, { "epoch": 1.3581488933601609, "grad_norm": 0.3559575080871582, "learning_rate": 6.665643041301818e-06, "loss": 0.3466, "step": 5400 }, { "epoch": 1.358400402414487, "grad_norm": 0.3210724890232086, "learning_rate": 6.6642633010283464e-06, "loss": 0.3509, "step": 5401 }, { "epoch": 1.358651911468813, "grad_norm": 0.3236124813556671, "learning_rate": 6.662883418221136e-06, "loss": 0.3407, "step": 5402 }, { "epoch": 1.3589034205231387, "grad_norm": 0.3242613971233368, "learning_rate": 6.661503392998362e-06, "loss": 0.3438, "step": 5403 }, { "epoch": 1.3591549295774648, "grad_norm": 0.33517026901245117, "learning_rate": 6.660123225478217e-06, "loss": 0.3578, "step": 5404 }, { "epoch": 1.3594064386317908, "grad_norm": 0.3171681761741638, "learning_rate": 6.658742915778904e-06, "loss": 0.339, "step": 5405 }, { "epoch": 1.3596579476861166, "grad_norm": 0.3439024090766907, "learning_rate": 6.6573624640186375e-06, "loss": 0.3572, "step": 5406 }, { "epoch": 1.3599094567404426, "grad_norm": 0.3413439691066742, "learning_rate": 6.655981870315643e-06, "loss": 0.3571, "step": 5407 }, { "epoch": 1.3601609657947686, "grad_norm": 0.33437255024909973, "learning_rate": 6.654601134788162e-06, "loss": 0.3457, "step": 5408 }, { "epoch": 1.3604124748490944, "grad_norm": 0.36181584000587463, "learning_rate": 6.653220257554446e-06, "loss": 0.3669, "step": 5409 }, { "epoch": 1.3606639839034205, "grad_norm": 0.34758883714675903, "learning_rate": 6.6518392387327545e-06, "loss": 0.3664, "step": 5410 }, { "epoch": 1.3609154929577465, "grad_norm": 0.3313416540622711, "learning_rate": 6.650458078441368e-06, "loss": 0.3483, "step": 5411 }, { "epoch": 1.3611670020120723, "grad_norm": 0.3105969727039337, "learning_rate": 6.649076776798573e-06, "loss": 0.3585, "step": 5412 }, { "epoch": 1.3614185110663983, "grad_norm": 0.3290698230266571, "learning_rate": 6.647695333922668e-06, "loss": 0.3359, "step": 5413 }, { "epoch": 1.3616700201207244, "grad_norm": 0.3040936589241028, "learning_rate": 6.646313749931965e-06, "loss": 0.3348, "step": 5414 }, { "epoch": 1.3619215291750504, "grad_norm": 0.3156396746635437, "learning_rate": 6.6449320249447905e-06, "loss": 0.3541, "step": 5415 }, { "epoch": 1.3621730382293762, "grad_norm": 0.31559011340141296, "learning_rate": 6.6435501590794775e-06, "loss": 0.3304, "step": 5416 }, { "epoch": 1.3624245472837022, "grad_norm": 0.30537793040275574, "learning_rate": 6.642168152454375e-06, "loss": 0.3714, "step": 5417 }, { "epoch": 1.3626760563380282, "grad_norm": 0.3393206298351288, "learning_rate": 6.640786005187844e-06, "loss": 0.3662, "step": 5418 }, { "epoch": 1.362927565392354, "grad_norm": 0.3266585171222687, "learning_rate": 6.639403717398256e-06, "loss": 0.369, "step": 5419 }, { "epoch": 1.36317907444668, "grad_norm": 0.31395620107650757, "learning_rate": 6.6380212892039954e-06, "loss": 0.3556, "step": 5420 }, { "epoch": 1.363430583501006, "grad_norm": 0.32085859775543213, "learning_rate": 6.636638720723459e-06, "loss": 0.3398, "step": 5421 }, { "epoch": 1.3636820925553321, "grad_norm": 0.325742244720459, "learning_rate": 6.635256012075056e-06, "loss": 0.3437, "step": 5422 }, { "epoch": 1.363933601609658, "grad_norm": 0.32642316818237305, "learning_rate": 6.633873163377206e-06, "loss": 0.3582, "step": 5423 }, { "epoch": 1.364185110663984, "grad_norm": 0.3359222710132599, "learning_rate": 6.63249017474834e-06, "loss": 0.3304, "step": 5424 }, { "epoch": 1.36443661971831, "grad_norm": 0.3578890562057495, "learning_rate": 6.631107046306902e-06, "loss": 0.3756, "step": 5425 }, { "epoch": 1.3646881287726358, "grad_norm": 0.3200285732746124, "learning_rate": 6.629723778171352e-06, "loss": 0.3426, "step": 5426 }, { "epoch": 1.3649396378269618, "grad_norm": 0.2880136966705322, "learning_rate": 6.628340370460156e-06, "loss": 0.3657, "step": 5427 }, { "epoch": 1.3651911468812878, "grad_norm": 0.3454881012439728, "learning_rate": 6.626956823291793e-06, "loss": 0.3552, "step": 5428 }, { "epoch": 1.3654426559356136, "grad_norm": 0.3579876124858856, "learning_rate": 6.625573136784755e-06, "loss": 0.3684, "step": 5429 }, { "epoch": 1.3656941649899397, "grad_norm": 0.32106253504753113, "learning_rate": 6.62418931105755e-06, "loss": 0.379, "step": 5430 }, { "epoch": 1.3659456740442657, "grad_norm": 0.34069597721099854, "learning_rate": 6.6228053462286905e-06, "loss": 0.3692, "step": 5431 }, { "epoch": 1.3661971830985915, "grad_norm": 0.31019026041030884, "learning_rate": 6.621421242416703e-06, "loss": 0.3666, "step": 5432 }, { "epoch": 1.3664486921529175, "grad_norm": 0.41305261850357056, "learning_rate": 6.6200369997401325e-06, "loss": 0.3689, "step": 5433 }, { "epoch": 1.3667002012072436, "grad_norm": 0.3238224685192108, "learning_rate": 6.618652618317527e-06, "loss": 0.3469, "step": 5434 }, { "epoch": 1.3669517102615694, "grad_norm": 0.3078828454017639, "learning_rate": 6.617268098267451e-06, "loss": 0.3354, "step": 5435 }, { "epoch": 1.3672032193158954, "grad_norm": 0.29392170906066895, "learning_rate": 6.615883439708481e-06, "loss": 0.3254, "step": 5436 }, { "epoch": 1.3674547283702214, "grad_norm": 0.34196245670318604, "learning_rate": 6.6144986427592014e-06, "loss": 0.3339, "step": 5437 }, { "epoch": 1.3677062374245472, "grad_norm": 0.31988444924354553, "learning_rate": 6.613113707538214e-06, "loss": 0.3454, "step": 5438 }, { "epoch": 1.3679577464788732, "grad_norm": 0.3208816349506378, "learning_rate": 6.6117286341641305e-06, "loss": 0.3436, "step": 5439 }, { "epoch": 1.3682092555331993, "grad_norm": 0.30724069476127625, "learning_rate": 6.610343422755572e-06, "loss": 0.3624, "step": 5440 }, { "epoch": 1.368460764587525, "grad_norm": 0.34737128019332886, "learning_rate": 6.608958073431173e-06, "loss": 0.3271, "step": 5441 }, { "epoch": 1.368712273641851, "grad_norm": 0.3552662134170532, "learning_rate": 6.607572586309581e-06, "loss": 0.3809, "step": 5442 }, { "epoch": 1.3689637826961771, "grad_norm": 0.2979397475719452, "learning_rate": 6.606186961509452e-06, "loss": 0.3306, "step": 5443 }, { "epoch": 1.369215291750503, "grad_norm": 0.3502536118030548, "learning_rate": 6.6048011991494595e-06, "loss": 0.3469, "step": 5444 }, { "epoch": 1.369466800804829, "grad_norm": 0.33274149894714355, "learning_rate": 6.603415299348284e-06, "loss": 0.3341, "step": 5445 }, { "epoch": 1.369718309859155, "grad_norm": 0.3190041482448578, "learning_rate": 6.60202926222462e-06, "loss": 0.3562, "step": 5446 }, { "epoch": 1.3699698189134808, "grad_norm": 0.31601500511169434, "learning_rate": 6.600643087897171e-06, "loss": 0.356, "step": 5447 }, { "epoch": 1.3702213279678068, "grad_norm": 0.3234473764896393, "learning_rate": 6.599256776484655e-06, "loss": 0.3756, "step": 5448 }, { "epoch": 1.3704728370221329, "grad_norm": 0.33312302827835083, "learning_rate": 6.597870328105801e-06, "loss": 0.3666, "step": 5449 }, { "epoch": 1.3707243460764587, "grad_norm": 0.32716864347457886, "learning_rate": 6.59648374287935e-06, "loss": 0.3525, "step": 5450 }, { "epoch": 1.3709758551307847, "grad_norm": 0.315767765045166, "learning_rate": 6.595097020924054e-06, "loss": 0.3242, "step": 5451 }, { "epoch": 1.3712273641851107, "grad_norm": 0.3259473741054535, "learning_rate": 6.593710162358676e-06, "loss": 0.3392, "step": 5452 }, { "epoch": 1.3714788732394365, "grad_norm": 0.3474818170070648, "learning_rate": 6.592323167301994e-06, "loss": 0.3662, "step": 5453 }, { "epoch": 1.3717303822937625, "grad_norm": 0.3318646252155304, "learning_rate": 6.590936035872792e-06, "loss": 0.3754, "step": 5454 }, { "epoch": 1.3719818913480886, "grad_norm": 0.3492967486381531, "learning_rate": 6.589548768189875e-06, "loss": 0.3491, "step": 5455 }, { "epoch": 1.3722334004024144, "grad_norm": 0.33331018686294556, "learning_rate": 6.588161364372047e-06, "loss": 0.3543, "step": 5456 }, { "epoch": 1.3724849094567404, "grad_norm": 0.3557076156139374, "learning_rate": 6.586773824538136e-06, "loss": 0.3616, "step": 5457 }, { "epoch": 1.3727364185110664, "grad_norm": 0.34410160779953003, "learning_rate": 6.585386148806974e-06, "loss": 0.3588, "step": 5458 }, { "epoch": 1.3729879275653922, "grad_norm": 0.32868391275405884, "learning_rate": 6.5839983372974045e-06, "loss": 0.3557, "step": 5459 }, { "epoch": 1.3732394366197183, "grad_norm": 0.29760217666625977, "learning_rate": 6.5826103901282875e-06, "loss": 0.3299, "step": 5460 }, { "epoch": 1.3734909456740443, "grad_norm": 0.33605876564979553, "learning_rate": 6.581222307418492e-06, "loss": 0.3432, "step": 5461 }, { "epoch": 1.37374245472837, "grad_norm": 0.30532124638557434, "learning_rate": 6.579834089286898e-06, "loss": 0.3592, "step": 5462 }, { "epoch": 1.3739939637826961, "grad_norm": 0.30395251512527466, "learning_rate": 6.578445735852397e-06, "loss": 0.341, "step": 5463 }, { "epoch": 1.3742454728370221, "grad_norm": 0.32410097122192383, "learning_rate": 6.5770572472338935e-06, "loss": 0.3545, "step": 5464 }, { "epoch": 1.3744969818913482, "grad_norm": 0.32119905948638916, "learning_rate": 6.575668623550302e-06, "loss": 0.346, "step": 5465 }, { "epoch": 1.374748490945674, "grad_norm": 0.30863913893699646, "learning_rate": 6.574279864920552e-06, "loss": 0.363, "step": 5466 }, { "epoch": 1.375, "grad_norm": 0.3214772939682007, "learning_rate": 6.572890971463579e-06, "loss": 0.3546, "step": 5467 }, { "epoch": 1.375251509054326, "grad_norm": 0.30236709117889404, "learning_rate": 6.571501943298335e-06, "loss": 0.3459, "step": 5468 }, { "epoch": 1.3755030181086518, "grad_norm": 0.35171201825141907, "learning_rate": 6.57011278054378e-06, "loss": 0.3439, "step": 5469 }, { "epoch": 1.3757545271629779, "grad_norm": 0.3353353440761566, "learning_rate": 6.568723483318889e-06, "loss": 0.3565, "step": 5470 }, { "epoch": 1.3760060362173039, "grad_norm": 0.3299080431461334, "learning_rate": 6.567334051742645e-06, "loss": 0.3654, "step": 5471 }, { "epoch": 1.37625754527163, "grad_norm": 0.3008752167224884, "learning_rate": 6.565944485934046e-06, "loss": 0.3346, "step": 5472 }, { "epoch": 1.3765090543259557, "grad_norm": 0.33441752195358276, "learning_rate": 6.564554786012096e-06, "loss": 0.3622, "step": 5473 }, { "epoch": 1.3767605633802817, "grad_norm": 0.326872855424881, "learning_rate": 6.563164952095818e-06, "loss": 0.374, "step": 5474 }, { "epoch": 1.3770120724346078, "grad_norm": 0.30647537112236023, "learning_rate": 6.561774984304241e-06, "loss": 0.3631, "step": 5475 }, { "epoch": 1.3772635814889336, "grad_norm": 0.2820424735546112, "learning_rate": 6.560384882756406e-06, "loss": 0.3572, "step": 5476 }, { "epoch": 1.3775150905432596, "grad_norm": 0.3259304463863373, "learning_rate": 6.558994647571369e-06, "loss": 0.3525, "step": 5477 }, { "epoch": 1.3777665995975856, "grad_norm": 0.3217025399208069, "learning_rate": 6.557604278868193e-06, "loss": 0.3713, "step": 5478 }, { "epoch": 1.3780181086519114, "grad_norm": 0.336544394493103, "learning_rate": 6.5562137767659516e-06, "loss": 0.3471, "step": 5479 }, { "epoch": 1.3782696177062375, "grad_norm": 0.3470653295516968, "learning_rate": 6.554823141383739e-06, "loss": 0.3517, "step": 5480 }, { "epoch": 1.3785211267605635, "grad_norm": 0.37579667568206787, "learning_rate": 6.553432372840651e-06, "loss": 0.3326, "step": 5481 }, { "epoch": 1.3787726358148893, "grad_norm": 0.31945011019706726, "learning_rate": 6.552041471255799e-06, "loss": 0.343, "step": 5482 }, { "epoch": 1.3790241448692153, "grad_norm": 0.3295787274837494, "learning_rate": 6.550650436748304e-06, "loss": 0.3246, "step": 5483 }, { "epoch": 1.3792756539235413, "grad_norm": 0.392974317073822, "learning_rate": 6.5492592694373e-06, "loss": 0.3482, "step": 5484 }, { "epoch": 1.3795271629778671, "grad_norm": 0.33396854996681213, "learning_rate": 6.547867969441931e-06, "loss": 0.3562, "step": 5485 }, { "epoch": 1.3797786720321932, "grad_norm": 0.3102302551269531, "learning_rate": 6.546476536881355e-06, "loss": 0.3471, "step": 5486 }, { "epoch": 1.3800301810865192, "grad_norm": 0.36257678270339966, "learning_rate": 6.545084971874738e-06, "loss": 0.3628, "step": 5487 }, { "epoch": 1.380281690140845, "grad_norm": 0.3609892725944519, "learning_rate": 6.543693274541259e-06, "loss": 0.353, "step": 5488 }, { "epoch": 1.380533199195171, "grad_norm": 0.3334091603755951, "learning_rate": 6.542301445000108e-06, "loss": 0.3528, "step": 5489 }, { "epoch": 1.380784708249497, "grad_norm": 0.3559829890727997, "learning_rate": 6.540909483370488e-06, "loss": 0.342, "step": 5490 }, { "epoch": 1.3810362173038229, "grad_norm": 0.35456085205078125, "learning_rate": 6.539517389771609e-06, "loss": 0.3803, "step": 5491 }, { "epoch": 1.381287726358149, "grad_norm": 0.3310755491256714, "learning_rate": 6.538125164322699e-06, "loss": 0.3475, "step": 5492 }, { "epoch": 1.381539235412475, "grad_norm": 0.35925358533859253, "learning_rate": 6.536732807142989e-06, "loss": 0.37, "step": 5493 }, { "epoch": 1.3817907444668007, "grad_norm": 0.3170955777168274, "learning_rate": 6.535340318351729e-06, "loss": 0.3237, "step": 5494 }, { "epoch": 1.3820422535211268, "grad_norm": 0.31947341561317444, "learning_rate": 6.5339476980681746e-06, "loss": 0.3396, "step": 5495 }, { "epoch": 1.3822937625754528, "grad_norm": 0.3393300473690033, "learning_rate": 6.532554946411598e-06, "loss": 0.3373, "step": 5496 }, { "epoch": 1.3825452716297786, "grad_norm": 0.3222068250179291, "learning_rate": 6.531162063501275e-06, "loss": 0.3653, "step": 5497 }, { "epoch": 1.3827967806841046, "grad_norm": 0.3433172404766083, "learning_rate": 6.529769049456502e-06, "loss": 0.3526, "step": 5498 }, { "epoch": 1.3830482897384306, "grad_norm": 0.31900420784950256, "learning_rate": 6.52837590439658e-06, "loss": 0.3523, "step": 5499 }, { "epoch": 1.3832997987927564, "grad_norm": 0.3333498537540436, "learning_rate": 6.5269826284408235e-06, "loss": 0.3579, "step": 5500 }, { "epoch": 1.3835513078470825, "grad_norm": 0.3352772891521454, "learning_rate": 6.525589221708558e-06, "loss": 0.3495, "step": 5501 }, { "epoch": 1.3838028169014085, "grad_norm": 0.35834312438964844, "learning_rate": 6.524195684319119e-06, "loss": 0.3236, "step": 5502 }, { "epoch": 1.3840543259557343, "grad_norm": 0.3257136940956116, "learning_rate": 6.522802016391856e-06, "loss": 0.3437, "step": 5503 }, { "epoch": 1.3843058350100603, "grad_norm": 0.337438702583313, "learning_rate": 6.521408218046126e-06, "loss": 0.3477, "step": 5504 }, { "epoch": 1.3845573440643864, "grad_norm": 0.33436325192451477, "learning_rate": 6.5200142894012995e-06, "loss": 0.3485, "step": 5505 }, { "epoch": 1.3848088531187122, "grad_norm": 0.3181125521659851, "learning_rate": 6.51862023057676e-06, "loss": 0.3766, "step": 5506 }, { "epoch": 1.3850603621730382, "grad_norm": 0.3215303421020508, "learning_rate": 6.517226041691897e-06, "loss": 0.3572, "step": 5507 }, { "epoch": 1.3853118712273642, "grad_norm": 0.3357185125350952, "learning_rate": 6.515831722866115e-06, "loss": 0.343, "step": 5508 }, { "epoch": 1.38556338028169, "grad_norm": 0.31294816732406616, "learning_rate": 6.514437274218829e-06, "loss": 0.3519, "step": 5509 }, { "epoch": 1.385814889336016, "grad_norm": 0.30923759937286377, "learning_rate": 6.513042695869465e-06, "loss": 0.3427, "step": 5510 }, { "epoch": 1.386066398390342, "grad_norm": 0.3177286982536316, "learning_rate": 6.51164798793746e-06, "loss": 0.3483, "step": 5511 }, { "epoch": 1.3863179074446679, "grad_norm": 0.30468153953552246, "learning_rate": 6.510253150542262e-06, "loss": 0.3639, "step": 5512 }, { "epoch": 1.386569416498994, "grad_norm": 0.33087295293807983, "learning_rate": 6.508858183803328e-06, "loss": 0.352, "step": 5513 }, { "epoch": 1.38682092555332, "grad_norm": 0.32209211587905884, "learning_rate": 6.507463087840133e-06, "loss": 0.3568, "step": 5514 }, { "epoch": 1.387072434607646, "grad_norm": 0.3076551854610443, "learning_rate": 6.506067862772153e-06, "loss": 0.3604, "step": 5515 }, { "epoch": 1.3873239436619718, "grad_norm": 0.3314887285232544, "learning_rate": 6.504672508718882e-06, "loss": 0.3678, "step": 5516 }, { "epoch": 1.3875754527162978, "grad_norm": 0.31186097860336304, "learning_rate": 6.503277025799825e-06, "loss": 0.3576, "step": 5517 }, { "epoch": 1.3878269617706238, "grad_norm": 0.29338905215263367, "learning_rate": 6.501881414134495e-06, "loss": 0.3615, "step": 5518 }, { "epoch": 1.3880784708249496, "grad_norm": 0.3048270046710968, "learning_rate": 6.500485673842417e-06, "loss": 0.3395, "step": 5519 }, { "epoch": 1.3883299798792756, "grad_norm": 0.30701351165771484, "learning_rate": 6.499089805043129e-06, "loss": 0.3739, "step": 5520 }, { "epoch": 1.3885814889336017, "grad_norm": 0.3141977787017822, "learning_rate": 6.497693807856177e-06, "loss": 0.3709, "step": 5521 }, { "epoch": 1.3888329979879277, "grad_norm": 0.3316255509853363, "learning_rate": 6.496297682401121e-06, "loss": 0.3554, "step": 5522 }, { "epoch": 1.3890845070422535, "grad_norm": 0.3405584990978241, "learning_rate": 6.494901428797526e-06, "loss": 0.3521, "step": 5523 }, { "epoch": 1.3893360160965795, "grad_norm": 0.34003138542175293, "learning_rate": 6.493505047164978e-06, "loss": 0.3322, "step": 5524 }, { "epoch": 1.3895875251509056, "grad_norm": 0.32138487696647644, "learning_rate": 6.492108537623067e-06, "loss": 0.3329, "step": 5525 }, { "epoch": 1.3898390342052314, "grad_norm": 0.328475683927536, "learning_rate": 6.490711900291393e-06, "loss": 0.349, "step": 5526 }, { "epoch": 1.3900905432595574, "grad_norm": 0.3196926414966583, "learning_rate": 6.489315135289571e-06, "loss": 0.3406, "step": 5527 }, { "epoch": 1.3903420523138834, "grad_norm": 0.32950088381767273, "learning_rate": 6.487918242737225e-06, "loss": 0.3643, "step": 5528 }, { "epoch": 1.3905935613682092, "grad_norm": 0.32452213764190674, "learning_rate": 6.4865212227539895e-06, "loss": 0.3614, "step": 5529 }, { "epoch": 1.3908450704225352, "grad_norm": 0.3157281279563904, "learning_rate": 6.4851240754595104e-06, "loss": 0.3499, "step": 5530 }, { "epoch": 1.3910965794768613, "grad_norm": 0.3089020252227783, "learning_rate": 6.483726800973447e-06, "loss": 0.351, "step": 5531 }, { "epoch": 1.391348088531187, "grad_norm": 0.32950559258461, "learning_rate": 6.482329399415463e-06, "loss": 0.3476, "step": 5532 }, { "epoch": 1.391599597585513, "grad_norm": 0.31183376908302307, "learning_rate": 6.48093187090524e-06, "loss": 0.3315, "step": 5533 }, { "epoch": 1.3918511066398391, "grad_norm": 0.34056970477104187, "learning_rate": 6.4795342155624685e-06, "loss": 0.3579, "step": 5534 }, { "epoch": 1.392102615694165, "grad_norm": 0.29875314235687256, "learning_rate": 6.478136433506846e-06, "loss": 0.3608, "step": 5535 }, { "epoch": 1.392354124748491, "grad_norm": 0.32909777760505676, "learning_rate": 6.4767385248580865e-06, "loss": 0.3757, "step": 5536 }, { "epoch": 1.392605633802817, "grad_norm": 0.33318114280700684, "learning_rate": 6.4753404897359085e-06, "loss": 0.3523, "step": 5537 }, { "epoch": 1.3928571428571428, "grad_norm": 0.32883554697036743, "learning_rate": 6.473942328260049e-06, "loss": 0.3536, "step": 5538 }, { "epoch": 1.3931086519114688, "grad_norm": 0.3212123215198517, "learning_rate": 6.4725440405502495e-06, "loss": 0.3598, "step": 5539 }, { "epoch": 1.3933601609657948, "grad_norm": 0.3189206123352051, "learning_rate": 6.471145626726265e-06, "loss": 0.3533, "step": 5540 }, { "epoch": 1.3936116700201207, "grad_norm": 0.3086141347885132, "learning_rate": 6.469747086907862e-06, "loss": 0.3797, "step": 5541 }, { "epoch": 1.3938631790744467, "grad_norm": 0.32901155948638916, "learning_rate": 6.468348421214814e-06, "loss": 0.3595, "step": 5542 }, { "epoch": 1.3941146881287727, "grad_norm": 0.33005937933921814, "learning_rate": 6.466949629766911e-06, "loss": 0.3243, "step": 5543 }, { "epoch": 1.3943661971830985, "grad_norm": 0.3070705831050873, "learning_rate": 6.465550712683949e-06, "loss": 0.3423, "step": 5544 }, { "epoch": 1.3946177062374245, "grad_norm": 0.32530298829078674, "learning_rate": 6.464151670085738e-06, "loss": 0.3656, "step": 5545 }, { "epoch": 1.3948692152917506, "grad_norm": 0.31196725368499756, "learning_rate": 6.4627525020920946e-06, "loss": 0.375, "step": 5546 }, { "epoch": 1.3951207243460764, "grad_norm": 0.32940876483917236, "learning_rate": 6.461353208822851e-06, "loss": 0.3649, "step": 5547 }, { "epoch": 1.3953722334004024, "grad_norm": 0.31415605545043945, "learning_rate": 6.459953790397847e-06, "loss": 0.3396, "step": 5548 }, { "epoch": 1.3956237424547284, "grad_norm": 0.34104838967323303, "learning_rate": 6.458554246936934e-06, "loss": 0.3251, "step": 5549 }, { "epoch": 1.3958752515090542, "grad_norm": 0.33823341131210327, "learning_rate": 6.457154578559975e-06, "loss": 0.3594, "step": 5550 }, { "epoch": 1.3961267605633803, "grad_norm": 0.34172114729881287, "learning_rate": 6.455754785386843e-06, "loss": 0.3679, "step": 5551 }, { "epoch": 1.3963782696177063, "grad_norm": 0.3444267213344574, "learning_rate": 6.454354867537418e-06, "loss": 0.366, "step": 5552 }, { "epoch": 1.396629778672032, "grad_norm": 0.32145577669143677, "learning_rate": 6.452954825131599e-06, "loss": 0.3459, "step": 5553 }, { "epoch": 1.3968812877263581, "grad_norm": 0.3104569911956787, "learning_rate": 6.4515546582892895e-06, "loss": 0.338, "step": 5554 }, { "epoch": 1.3971327967806841, "grad_norm": 0.3085978031158447, "learning_rate": 6.450154367130403e-06, "loss": 0.3378, "step": 5555 }, { "epoch": 1.39738430583501, "grad_norm": 0.33246156573295593, "learning_rate": 6.448753951774869e-06, "loss": 0.34, "step": 5556 }, { "epoch": 1.397635814889336, "grad_norm": 0.3080597519874573, "learning_rate": 6.447353412342621e-06, "loss": 0.3697, "step": 5557 }, { "epoch": 1.397887323943662, "grad_norm": 0.3250534236431122, "learning_rate": 6.445952748953607e-06, "loss": 0.3371, "step": 5558 }, { "epoch": 1.3981388329979878, "grad_norm": 0.3486895263195038, "learning_rate": 6.4445519617277874e-06, "loss": 0.3384, "step": 5559 }, { "epoch": 1.3983903420523138, "grad_norm": 0.3366147577762604, "learning_rate": 6.443151050785129e-06, "loss": 0.3304, "step": 5560 }, { "epoch": 1.3986418511066399, "grad_norm": 0.3010328412055969, "learning_rate": 6.4417500162456114e-06, "loss": 0.34, "step": 5561 }, { "epoch": 1.3988933601609657, "grad_norm": 0.32444706559181213, "learning_rate": 6.440348858229224e-06, "loss": 0.3565, "step": 5562 }, { "epoch": 1.3991448692152917, "grad_norm": 0.3689126968383789, "learning_rate": 6.4389475768559675e-06, "loss": 0.3513, "step": 5563 }, { "epoch": 1.3993963782696177, "grad_norm": 0.3049902319908142, "learning_rate": 6.437546172245855e-06, "loss": 0.3215, "step": 5564 }, { "epoch": 1.3996478873239437, "grad_norm": 0.3056788146495819, "learning_rate": 6.436144644518905e-06, "loss": 0.3364, "step": 5565 }, { "epoch": 1.3998993963782695, "grad_norm": 0.33238446712493896, "learning_rate": 6.434742993795149e-06, "loss": 0.3486, "step": 5566 }, { "epoch": 1.4001509054325956, "grad_norm": 0.3054902255535126, "learning_rate": 6.433341220194633e-06, "loss": 0.3234, "step": 5567 }, { "epoch": 1.4004024144869216, "grad_norm": 0.3176266551017761, "learning_rate": 6.431939323837409e-06, "loss": 0.3447, "step": 5568 }, { "epoch": 1.4006539235412474, "grad_norm": 0.3124770522117615, "learning_rate": 6.430537304843539e-06, "loss": 0.3541, "step": 5569 }, { "epoch": 1.4009054325955734, "grad_norm": 0.2936578094959259, "learning_rate": 6.429135163333099e-06, "loss": 0.3353, "step": 5570 }, { "epoch": 1.4011569416498995, "grad_norm": 0.32754024863243103, "learning_rate": 6.427732899426172e-06, "loss": 0.3473, "step": 5571 }, { "epoch": 1.4014084507042255, "grad_norm": 0.31696975231170654, "learning_rate": 6.426330513242855e-06, "loss": 0.3386, "step": 5572 }, { "epoch": 1.4016599597585513, "grad_norm": 0.33291855454444885, "learning_rate": 6.424928004903252e-06, "loss": 0.3386, "step": 5573 }, { "epoch": 1.4019114688128773, "grad_norm": 0.3217375874519348, "learning_rate": 6.423525374527479e-06, "loss": 0.3495, "step": 5574 }, { "epoch": 1.4021629778672033, "grad_norm": 0.2930941879749298, "learning_rate": 6.422122622235665e-06, "loss": 0.357, "step": 5575 }, { "epoch": 1.4024144869215291, "grad_norm": 0.3463757336139679, "learning_rate": 6.420719748147943e-06, "loss": 0.3652, "step": 5576 }, { "epoch": 1.4026659959758552, "grad_norm": 0.4066574275493622, "learning_rate": 6.419316752384464e-06, "loss": 0.3555, "step": 5577 }, { "epoch": 1.4029175050301812, "grad_norm": 0.33143872022628784, "learning_rate": 6.417913635065385e-06, "loss": 0.3418, "step": 5578 }, { "epoch": 1.403169014084507, "grad_norm": 0.30597761273384094, "learning_rate": 6.4165103963108724e-06, "loss": 0.3428, "step": 5579 }, { "epoch": 1.403420523138833, "grad_norm": 0.33264756202697754, "learning_rate": 6.415107036241106e-06, "loss": 0.3635, "step": 5580 }, { "epoch": 1.403672032193159, "grad_norm": 0.32489314675331116, "learning_rate": 6.413703554976276e-06, "loss": 0.3375, "step": 5581 }, { "epoch": 1.4039235412474849, "grad_norm": 0.3370649218559265, "learning_rate": 6.41229995263658e-06, "loss": 0.3336, "step": 5582 }, { "epoch": 1.404175050301811, "grad_norm": 0.34233397245407104, "learning_rate": 6.410896229342228e-06, "loss": 0.3634, "step": 5583 }, { "epoch": 1.404426559356137, "grad_norm": 0.34717515110969543, "learning_rate": 6.40949238521344e-06, "loss": 0.3228, "step": 5584 }, { "epoch": 1.4046780684104627, "grad_norm": 0.30195194482803345, "learning_rate": 6.408088420370448e-06, "loss": 0.3488, "step": 5585 }, { "epoch": 1.4049295774647887, "grad_norm": 0.33940741419792175, "learning_rate": 6.406684334933491e-06, "loss": 0.3741, "step": 5586 }, { "epoch": 1.4051810865191148, "grad_norm": 0.3448837101459503, "learning_rate": 6.405280129022821e-06, "loss": 0.3687, "step": 5587 }, { "epoch": 1.4054325955734406, "grad_norm": 0.33431434631347656, "learning_rate": 6.4038758027587e-06, "loss": 0.3487, "step": 5588 }, { "epoch": 1.4056841046277666, "grad_norm": 0.32833728194236755, "learning_rate": 6.402471356261399e-06, "loss": 0.3436, "step": 5589 }, { "epoch": 1.4059356136820926, "grad_norm": 0.3193908929824829, "learning_rate": 6.4010667896512e-06, "loss": 0.3325, "step": 5590 }, { "epoch": 1.4061871227364184, "grad_norm": 0.36166292428970337, "learning_rate": 6.399662103048396e-06, "loss": 0.3701, "step": 5591 }, { "epoch": 1.4064386317907445, "grad_norm": 0.3254150152206421, "learning_rate": 6.398257296573288e-06, "loss": 0.3524, "step": 5592 }, { "epoch": 1.4066901408450705, "grad_norm": 0.30729982256889343, "learning_rate": 6.396852370346191e-06, "loss": 0.3713, "step": 5593 }, { "epoch": 1.4069416498993963, "grad_norm": 0.3343053162097931, "learning_rate": 6.395447324487427e-06, "loss": 0.3501, "step": 5594 }, { "epoch": 1.4071931589537223, "grad_norm": 0.3478783369064331, "learning_rate": 6.394042159117329e-06, "loss": 0.3196, "step": 5595 }, { "epoch": 1.4074446680080483, "grad_norm": 0.30152082443237305, "learning_rate": 6.392636874356242e-06, "loss": 0.3359, "step": 5596 }, { "epoch": 1.4076961770623742, "grad_norm": 0.3307269513607025, "learning_rate": 6.391231470324517e-06, "loss": 0.3744, "step": 5597 }, { "epoch": 1.4079476861167002, "grad_norm": 0.33799752593040466, "learning_rate": 6.389825947142523e-06, "loss": 0.3632, "step": 5598 }, { "epoch": 1.4081991951710262, "grad_norm": 0.31849145889282227, "learning_rate": 6.38842030493063e-06, "loss": 0.3573, "step": 5599 }, { "epoch": 1.408450704225352, "grad_norm": 0.3179837763309479, "learning_rate": 6.387014543809224e-06, "loss": 0.3578, "step": 5600 }, { "epoch": 1.408702213279678, "grad_norm": 0.3246977627277374, "learning_rate": 6.3856086638986995e-06, "loss": 0.3751, "step": 5601 }, { "epoch": 1.408953722334004, "grad_norm": 0.35665377974510193, "learning_rate": 6.38420266531946e-06, "loss": 0.3697, "step": 5602 }, { "epoch": 1.4092052313883299, "grad_norm": 0.3288874924182892, "learning_rate": 6.382796548191923e-06, "loss": 0.3492, "step": 5603 }, { "epoch": 1.409456740442656, "grad_norm": 0.3103671073913574, "learning_rate": 6.381390312636513e-06, "loss": 0.3453, "step": 5604 }, { "epoch": 1.409708249496982, "grad_norm": 0.3277508616447449, "learning_rate": 6.379983958773663e-06, "loss": 0.348, "step": 5605 }, { "epoch": 1.4099597585513077, "grad_norm": 0.2977486848831177, "learning_rate": 6.378577486723821e-06, "loss": 0.3576, "step": 5606 }, { "epoch": 1.4102112676056338, "grad_norm": 0.30424800515174866, "learning_rate": 6.377170896607442e-06, "loss": 0.3413, "step": 5607 }, { "epoch": 1.4104627766599598, "grad_norm": 0.35188329219818115, "learning_rate": 6.3757641885449904e-06, "loss": 0.3816, "step": 5608 }, { "epoch": 1.4107142857142856, "grad_norm": 0.299070805311203, "learning_rate": 6.374357362656944e-06, "loss": 0.3635, "step": 5609 }, { "epoch": 1.4109657947686116, "grad_norm": 0.32047006487846375, "learning_rate": 6.372950419063787e-06, "loss": 0.3641, "step": 5610 }, { "epoch": 1.4112173038229376, "grad_norm": 0.32744547724723816, "learning_rate": 6.3715433578860155e-06, "loss": 0.358, "step": 5611 }, { "epoch": 1.4114688128772634, "grad_norm": 0.34040847420692444, "learning_rate": 6.3701361792441355e-06, "loss": 0.3533, "step": 5612 }, { "epoch": 1.4117203219315895, "grad_norm": 0.34646251797676086, "learning_rate": 6.368728883258664e-06, "loss": 0.3391, "step": 5613 }, { "epoch": 1.4119718309859155, "grad_norm": 0.3157348930835724, "learning_rate": 6.367321470050125e-06, "loss": 0.349, "step": 5614 }, { "epoch": 1.4122233400402415, "grad_norm": 0.3335605263710022, "learning_rate": 6.365913939739057e-06, "loss": 0.3467, "step": 5615 }, { "epoch": 1.4124748490945673, "grad_norm": 0.35618653893470764, "learning_rate": 6.364506292446005e-06, "loss": 0.383, "step": 5616 }, { "epoch": 1.4127263581488934, "grad_norm": 0.3262132406234741, "learning_rate": 6.363098528291525e-06, "loss": 0.3624, "step": 5617 }, { "epoch": 1.4129778672032194, "grad_norm": 0.3189094662666321, "learning_rate": 6.361690647396184e-06, "loss": 0.3665, "step": 5618 }, { "epoch": 1.4132293762575452, "grad_norm": 0.29632070660591125, "learning_rate": 6.3602826498805585e-06, "loss": 0.3518, "step": 5619 }, { "epoch": 1.4134808853118712, "grad_norm": 0.35020795464515686, "learning_rate": 6.358874535865233e-06, "loss": 0.342, "step": 5620 }, { "epoch": 1.4137323943661972, "grad_norm": 0.322587788105011, "learning_rate": 6.357466305470805e-06, "loss": 0.3361, "step": 5621 }, { "epoch": 1.4139839034205233, "grad_norm": 0.31895244121551514, "learning_rate": 6.356057958817879e-06, "loss": 0.3561, "step": 5622 }, { "epoch": 1.414235412474849, "grad_norm": 0.36562541127204895, "learning_rate": 6.354649496027075e-06, "loss": 0.3231, "step": 5623 }, { "epoch": 1.414486921529175, "grad_norm": 0.3357252776622772, "learning_rate": 6.353240917219014e-06, "loss": 0.3615, "step": 5624 }, { "epoch": 1.4147384305835011, "grad_norm": 0.3266746699810028, "learning_rate": 6.351832222514335e-06, "loss": 0.3515, "step": 5625 }, { "epoch": 1.414989939637827, "grad_norm": 0.317125529050827, "learning_rate": 6.350423412033683e-06, "loss": 0.3172, "step": 5626 }, { "epoch": 1.415241448692153, "grad_norm": 0.3371025621891022, "learning_rate": 6.349014485897714e-06, "loss": 0.351, "step": 5627 }, { "epoch": 1.415492957746479, "grad_norm": 0.33615246415138245, "learning_rate": 6.347605444227093e-06, "loss": 0.3431, "step": 5628 }, { "epoch": 1.4157444668008048, "grad_norm": 0.36137500405311584, "learning_rate": 6.346196287142497e-06, "loss": 0.3449, "step": 5629 }, { "epoch": 1.4159959758551308, "grad_norm": 0.37134072184562683, "learning_rate": 6.344787014764611e-06, "loss": 0.3563, "step": 5630 }, { "epoch": 1.4162474849094568, "grad_norm": 0.3492390811443329, "learning_rate": 6.34337762721413e-06, "loss": 0.3607, "step": 5631 }, { "epoch": 1.4164989939637826, "grad_norm": 0.3468017578125, "learning_rate": 6.341968124611759e-06, "loss": 0.3554, "step": 5632 }, { "epoch": 1.4167505030181087, "grad_norm": 0.2991659939289093, "learning_rate": 6.340558507078215e-06, "loss": 0.3202, "step": 5633 }, { "epoch": 1.4170020120724347, "grad_norm": 0.3461068272590637, "learning_rate": 6.339148774734221e-06, "loss": 0.35, "step": 5634 }, { "epoch": 1.4172535211267605, "grad_norm": 0.3426593542098999, "learning_rate": 6.33773892770051e-06, "loss": 0.3411, "step": 5635 }, { "epoch": 1.4175050301810865, "grad_norm": 0.3247561752796173, "learning_rate": 6.3363289660978315e-06, "loss": 0.3484, "step": 5636 }, { "epoch": 1.4177565392354126, "grad_norm": 0.3241268992424011, "learning_rate": 6.334918890046935e-06, "loss": 0.3471, "step": 5637 }, { "epoch": 1.4180080482897384, "grad_norm": 0.36099836230278015, "learning_rate": 6.333508699668587e-06, "loss": 0.3545, "step": 5638 }, { "epoch": 1.4182595573440644, "grad_norm": 0.31838127970695496, "learning_rate": 6.332098395083562e-06, "loss": 0.3412, "step": 5639 }, { "epoch": 1.4185110663983904, "grad_norm": 0.3607694208621979, "learning_rate": 6.330687976412642e-06, "loss": 0.3937, "step": 5640 }, { "epoch": 1.4187625754527162, "grad_norm": 0.3097631335258484, "learning_rate": 6.329277443776623e-06, "loss": 0.3405, "step": 5641 }, { "epoch": 1.4190140845070423, "grad_norm": 0.3108190894126892, "learning_rate": 6.327866797296306e-06, "loss": 0.3474, "step": 5642 }, { "epoch": 1.4192655935613683, "grad_norm": 0.32645246386528015, "learning_rate": 6.326456037092505e-06, "loss": 0.3367, "step": 5643 }, { "epoch": 1.419517102615694, "grad_norm": 0.3206358850002289, "learning_rate": 6.325045163286043e-06, "loss": 0.3654, "step": 5644 }, { "epoch": 1.41976861167002, "grad_norm": 0.3293161988258362, "learning_rate": 6.323634175997753e-06, "loss": 0.3539, "step": 5645 }, { "epoch": 1.4200201207243461, "grad_norm": 0.3224540948867798, "learning_rate": 6.322223075348475e-06, "loss": 0.3403, "step": 5646 }, { "epoch": 1.420271629778672, "grad_norm": 0.3604409694671631, "learning_rate": 6.320811861459063e-06, "loss": 0.3479, "step": 5647 }, { "epoch": 1.420523138832998, "grad_norm": 0.337251216173172, "learning_rate": 6.319400534450378e-06, "loss": 0.3347, "step": 5648 }, { "epoch": 1.420774647887324, "grad_norm": 0.2904793322086334, "learning_rate": 6.317989094443291e-06, "loss": 0.3252, "step": 5649 }, { "epoch": 1.4210261569416498, "grad_norm": 0.31111976504325867, "learning_rate": 6.316577541558683e-06, "loss": 0.3448, "step": 5650 }, { "epoch": 1.4212776659959758, "grad_norm": 0.3352508544921875, "learning_rate": 6.315165875917446e-06, "loss": 0.3272, "step": 5651 }, { "epoch": 1.4215291750503019, "grad_norm": 0.33469197154045105, "learning_rate": 6.313754097640479e-06, "loss": 0.3502, "step": 5652 }, { "epoch": 1.4217806841046277, "grad_norm": 0.3249984383583069, "learning_rate": 6.312342206848693e-06, "loss": 0.3638, "step": 5653 }, { "epoch": 1.4220321931589537, "grad_norm": 0.3012884259223938, "learning_rate": 6.310930203663006e-06, "loss": 0.3517, "step": 5654 }, { "epoch": 1.4222837022132797, "grad_norm": 0.32274380326271057, "learning_rate": 6.309518088204349e-06, "loss": 0.3494, "step": 5655 }, { "epoch": 1.4225352112676055, "grad_norm": 0.3095703721046448, "learning_rate": 6.3081058605936594e-06, "loss": 0.3247, "step": 5656 }, { "epoch": 1.4227867203219315, "grad_norm": 0.3313826024532318, "learning_rate": 6.3066935209518875e-06, "loss": 0.3503, "step": 5657 }, { "epoch": 1.4230382293762576, "grad_norm": 0.3489868938922882, "learning_rate": 6.305281069399989e-06, "loss": 0.349, "step": 5658 }, { "epoch": 1.4232897384305834, "grad_norm": 0.2928728759288788, "learning_rate": 6.303868506058933e-06, "loss": 0.3532, "step": 5659 }, { "epoch": 1.4235412474849094, "grad_norm": 0.3247617781162262, "learning_rate": 6.302455831049696e-06, "loss": 0.3393, "step": 5660 }, { "epoch": 1.4237927565392354, "grad_norm": 0.32944563031196594, "learning_rate": 6.301043044493266e-06, "loss": 0.3453, "step": 5661 }, { "epoch": 1.4240442655935612, "grad_norm": 0.31871670484542847, "learning_rate": 6.299630146510638e-06, "loss": 0.3624, "step": 5662 }, { "epoch": 1.4242957746478873, "grad_norm": 0.34367579221725464, "learning_rate": 6.2982171372228196e-06, "loss": 0.3523, "step": 5663 }, { "epoch": 1.4245472837022133, "grad_norm": 0.3165755569934845, "learning_rate": 6.296804016750824e-06, "loss": 0.3379, "step": 5664 }, { "epoch": 1.4247987927565393, "grad_norm": 0.31686344742774963, "learning_rate": 6.295390785215677e-06, "loss": 0.3231, "step": 5665 }, { "epoch": 1.4250503018108651, "grad_norm": 0.3904785215854645, "learning_rate": 6.293977442738414e-06, "loss": 0.3522, "step": 5666 }, { "epoch": 1.4253018108651911, "grad_norm": 0.336385041475296, "learning_rate": 6.292563989440077e-06, "loss": 0.358, "step": 5667 }, { "epoch": 1.4255533199195172, "grad_norm": 0.3112278878688812, "learning_rate": 6.291150425441721e-06, "loss": 0.3522, "step": 5668 }, { "epoch": 1.4258048289738432, "grad_norm": 0.3297823667526245, "learning_rate": 6.289736750864409e-06, "loss": 0.3437, "step": 5669 }, { "epoch": 1.426056338028169, "grad_norm": 0.3268705904483795, "learning_rate": 6.288322965829212e-06, "loss": 0.3288, "step": 5670 }, { "epoch": 1.426307847082495, "grad_norm": 0.3283686339855194, "learning_rate": 6.286909070457213e-06, "loss": 0.3501, "step": 5671 }, { "epoch": 1.426559356136821, "grad_norm": 0.3172062635421753, "learning_rate": 6.285495064869503e-06, "loss": 0.3426, "step": 5672 }, { "epoch": 1.4268108651911469, "grad_norm": 0.35733988881111145, "learning_rate": 6.284080949187183e-06, "loss": 0.3113, "step": 5673 }, { "epoch": 1.4270623742454729, "grad_norm": 0.32833603024482727, "learning_rate": 6.282666723531363e-06, "loss": 0.345, "step": 5674 }, { "epoch": 1.427313883299799, "grad_norm": 0.3474726378917694, "learning_rate": 6.281252388023162e-06, "loss": 0.351, "step": 5675 }, { "epoch": 1.4275653923541247, "grad_norm": 0.3398433029651642, "learning_rate": 6.279837942783711e-06, "loss": 0.3523, "step": 5676 }, { "epoch": 1.4278169014084507, "grad_norm": 0.35440170764923096, "learning_rate": 6.278423387934145e-06, "loss": 0.3421, "step": 5677 }, { "epoch": 1.4280684104627768, "grad_norm": 0.3336888551712036, "learning_rate": 6.277008723595615e-06, "loss": 0.3525, "step": 5678 }, { "epoch": 1.4283199195171026, "grad_norm": 0.2921485900878906, "learning_rate": 6.275593949889276e-06, "loss": 0.3576, "step": 5679 }, { "epoch": 1.4285714285714286, "grad_norm": 0.3030543029308319, "learning_rate": 6.274179066936294e-06, "loss": 0.364, "step": 5680 }, { "epoch": 1.4288229376257546, "grad_norm": 0.328914612531662, "learning_rate": 6.272764074857848e-06, "loss": 0.3624, "step": 5681 }, { "epoch": 1.4290744466800804, "grad_norm": 0.3531835973262787, "learning_rate": 6.27134897377512e-06, "loss": 0.3486, "step": 5682 }, { "epoch": 1.4293259557344065, "grad_norm": 0.3383164703845978, "learning_rate": 6.269933763809306e-06, "loss": 0.3471, "step": 5683 }, { "epoch": 1.4295774647887325, "grad_norm": 0.31009918451309204, "learning_rate": 6.268518445081611e-06, "loss": 0.3755, "step": 5684 }, { "epoch": 1.4298289738430583, "grad_norm": 0.29445788264274597, "learning_rate": 6.2671030177132466e-06, "loss": 0.3177, "step": 5685 }, { "epoch": 1.4300804828973843, "grad_norm": 0.33102452754974365, "learning_rate": 6.265687481825435e-06, "loss": 0.3525, "step": 5686 }, { "epoch": 1.4303319919517103, "grad_norm": 0.31349411606788635, "learning_rate": 6.26427183753941e-06, "loss": 0.3394, "step": 5687 }, { "epoch": 1.4305835010060362, "grad_norm": 0.30602481961250305, "learning_rate": 6.262856084976411e-06, "loss": 0.3581, "step": 5688 }, { "epoch": 1.4308350100603622, "grad_norm": 0.31970512866973877, "learning_rate": 6.261440224257688e-06, "loss": 0.3677, "step": 5689 }, { "epoch": 1.4310865191146882, "grad_norm": 0.29605525732040405, "learning_rate": 6.260024255504502e-06, "loss": 0.3446, "step": 5690 }, { "epoch": 1.431338028169014, "grad_norm": 0.3755524754524231, "learning_rate": 6.258608178838122e-06, "loss": 0.3658, "step": 5691 }, { "epoch": 1.43158953722334, "grad_norm": 0.31317082047462463, "learning_rate": 6.2571919943798235e-06, "loss": 0.3382, "step": 5692 }, { "epoch": 1.431841046277666, "grad_norm": 0.32691314816474915, "learning_rate": 6.255775702250895e-06, "loss": 0.349, "step": 5693 }, { "epoch": 1.4320925553319919, "grad_norm": 0.31760266423225403, "learning_rate": 6.254359302572635e-06, "loss": 0.3833, "step": 5694 }, { "epoch": 1.432344064386318, "grad_norm": 0.3521325886249542, "learning_rate": 6.252942795466348e-06, "loss": 0.3629, "step": 5695 }, { "epoch": 1.432595573440644, "grad_norm": 0.30917102098464966, "learning_rate": 6.251526181053349e-06, "loss": 0.3508, "step": 5696 }, { "epoch": 1.4328470824949697, "grad_norm": 0.3195309340953827, "learning_rate": 6.250109459454963e-06, "loss": 0.3367, "step": 5697 }, { "epoch": 1.4330985915492958, "grad_norm": 0.32293376326560974, "learning_rate": 6.248692630792521e-06, "loss": 0.3347, "step": 5698 }, { "epoch": 1.4333501006036218, "grad_norm": 0.34250807762145996, "learning_rate": 6.247275695187368e-06, "loss": 0.3414, "step": 5699 }, { "epoch": 1.4336016096579476, "grad_norm": 0.3318396806716919, "learning_rate": 6.245858652760854e-06, "loss": 0.336, "step": 5700 }, { "epoch": 1.4338531187122736, "grad_norm": 0.34146398305892944, "learning_rate": 6.244441503634341e-06, "loss": 0.357, "step": 5701 }, { "epoch": 1.4341046277665996, "grad_norm": 0.2918093204498291, "learning_rate": 6.243024247929198e-06, "loss": 0.3461, "step": 5702 }, { "epoch": 1.4343561368209254, "grad_norm": 0.32614874839782715, "learning_rate": 6.2416068857668045e-06, "loss": 0.3522, "step": 5703 }, { "epoch": 1.4346076458752515, "grad_norm": 0.3218587040901184, "learning_rate": 6.240189417268548e-06, "loss": 0.3755, "step": 5704 }, { "epoch": 1.4348591549295775, "grad_norm": 0.3028290271759033, "learning_rate": 6.238771842555828e-06, "loss": 0.3393, "step": 5705 }, { "epoch": 1.4351106639839033, "grad_norm": 0.2967846393585205, "learning_rate": 6.2373541617500475e-06, "loss": 0.3598, "step": 5706 }, { "epoch": 1.4353621730382293, "grad_norm": 0.3133407235145569, "learning_rate": 6.235936374972626e-06, "loss": 0.3657, "step": 5707 }, { "epoch": 1.4356136820925554, "grad_norm": 0.30290549993515015, "learning_rate": 6.234518482344985e-06, "loss": 0.34, "step": 5708 }, { "epoch": 1.4358651911468812, "grad_norm": 0.2992716133594513, "learning_rate": 6.233100483988559e-06, "loss": 0.3348, "step": 5709 }, { "epoch": 1.4361167002012072, "grad_norm": 0.34080713987350464, "learning_rate": 6.231682380024792e-06, "loss": 0.3632, "step": 5710 }, { "epoch": 1.4363682092555332, "grad_norm": 0.3630107641220093, "learning_rate": 6.230264170575133e-06, "loss": 0.3345, "step": 5711 }, { "epoch": 1.436619718309859, "grad_norm": 0.31429681181907654, "learning_rate": 6.228845855761044e-06, "loss": 0.3585, "step": 5712 }, { "epoch": 1.436871227364185, "grad_norm": 0.298191100358963, "learning_rate": 6.227427435703997e-06, "loss": 0.3379, "step": 5713 }, { "epoch": 1.437122736418511, "grad_norm": 0.33849817514419556, "learning_rate": 6.226008910525466e-06, "loss": 0.3389, "step": 5714 }, { "epoch": 1.437374245472837, "grad_norm": 0.3714556396007538, "learning_rate": 6.224590280346944e-06, "loss": 0.3591, "step": 5715 }, { "epoch": 1.437625754527163, "grad_norm": 0.3296235501766205, "learning_rate": 6.223171545289925e-06, "loss": 0.3273, "step": 5716 }, { "epoch": 1.437877263581489, "grad_norm": 0.34104108810424805, "learning_rate": 6.221752705475915e-06, "loss": 0.3745, "step": 5717 }, { "epoch": 1.438128772635815, "grad_norm": 0.3797450065612793, "learning_rate": 6.22033376102643e-06, "loss": 0.3646, "step": 5718 }, { "epoch": 1.438380281690141, "grad_norm": 0.3067338764667511, "learning_rate": 6.218914712062992e-06, "loss": 0.3526, "step": 5719 }, { "epoch": 1.4386317907444668, "grad_norm": 0.30397993326187134, "learning_rate": 6.217495558707135e-06, "loss": 0.3417, "step": 5720 }, { "epoch": 1.4388832997987928, "grad_norm": 0.34097644686698914, "learning_rate": 6.2160763010803995e-06, "loss": 0.3868, "step": 5721 }, { "epoch": 1.4391348088531188, "grad_norm": 0.3221248686313629, "learning_rate": 6.214656939304337e-06, "loss": 0.3695, "step": 5722 }, { "epoch": 1.4393863179074446, "grad_norm": 0.30336886644363403, "learning_rate": 6.213237473500505e-06, "loss": 0.3425, "step": 5723 }, { "epoch": 1.4396378269617707, "grad_norm": 0.30754998326301575, "learning_rate": 6.2118179037904755e-06, "loss": 0.3354, "step": 5724 }, { "epoch": 1.4398893360160967, "grad_norm": 0.35792577266693115, "learning_rate": 6.2103982302958225e-06, "loss": 0.3458, "step": 5725 }, { "epoch": 1.4401408450704225, "grad_norm": 0.3202516436576843, "learning_rate": 6.208978453138134e-06, "loss": 0.3495, "step": 5726 }, { "epoch": 1.4403923541247485, "grad_norm": 0.30896228551864624, "learning_rate": 6.207558572439003e-06, "loss": 0.3359, "step": 5727 }, { "epoch": 1.4406438631790746, "grad_norm": 0.3637436330318451, "learning_rate": 6.2061385883200365e-06, "loss": 0.3406, "step": 5728 }, { "epoch": 1.4408953722334004, "grad_norm": 0.32914838194847107, "learning_rate": 6.204718500902845e-06, "loss": 0.3735, "step": 5729 }, { "epoch": 1.4411468812877264, "grad_norm": 0.3156082332134247, "learning_rate": 6.20329831030905e-06, "loss": 0.3455, "step": 5730 }, { "epoch": 1.4413983903420524, "grad_norm": 0.35359394550323486, "learning_rate": 6.201878016660282e-06, "loss": 0.3381, "step": 5731 }, { "epoch": 1.4416498993963782, "grad_norm": 0.3604298532009125, "learning_rate": 6.200457620078182e-06, "loss": 0.3768, "step": 5732 }, { "epoch": 1.4419014084507042, "grad_norm": 0.31656792759895325, "learning_rate": 6.199037120684396e-06, "loss": 0.3786, "step": 5733 }, { "epoch": 1.4421529175050303, "grad_norm": 0.32002219557762146, "learning_rate": 6.1976165186005825e-06, "loss": 0.3784, "step": 5734 }, { "epoch": 1.442404426559356, "grad_norm": 0.33488380908966064, "learning_rate": 6.196195813948406e-06, "loss": 0.3552, "step": 5735 }, { "epoch": 1.442655935613682, "grad_norm": 0.33732855319976807, "learning_rate": 6.194775006849541e-06, "loss": 0.3446, "step": 5736 }, { "epoch": 1.4429074446680081, "grad_norm": 0.27075111865997314, "learning_rate": 6.19335409742567e-06, "loss": 0.3357, "step": 5737 }, { "epoch": 1.443158953722334, "grad_norm": 0.3128473162651062, "learning_rate": 6.191933085798488e-06, "loss": 0.3443, "step": 5738 }, { "epoch": 1.44341046277666, "grad_norm": 0.3088710606098175, "learning_rate": 6.190511972089694e-06, "loss": 0.3581, "step": 5739 }, { "epoch": 1.443661971830986, "grad_norm": 0.29833412170410156, "learning_rate": 6.189090756420997e-06, "loss": 0.3203, "step": 5740 }, { "epoch": 1.4439134808853118, "grad_norm": 0.318301260471344, "learning_rate": 6.187669438914116e-06, "loss": 0.3681, "step": 5741 }, { "epoch": 1.4441649899396378, "grad_norm": 0.3490622341632843, "learning_rate": 6.186248019690777e-06, "loss": 0.3474, "step": 5742 }, { "epoch": 1.4444164989939638, "grad_norm": 0.33258891105651855, "learning_rate": 6.1848264988727165e-06, "loss": 0.3604, "step": 5743 }, { "epoch": 1.4446680080482897, "grad_norm": 0.354710191488266, "learning_rate": 6.183404876581679e-06, "loss": 0.3331, "step": 5744 }, { "epoch": 1.4449195171026157, "grad_norm": 0.3129853904247284, "learning_rate": 6.181983152939417e-06, "loss": 0.3661, "step": 5745 }, { "epoch": 1.4451710261569417, "grad_norm": 0.3277970850467682, "learning_rate": 6.180561328067692e-06, "loss": 0.3596, "step": 5746 }, { "epoch": 1.4454225352112675, "grad_norm": 0.3437343239784241, "learning_rate": 6.179139402088275e-06, "loss": 0.3577, "step": 5747 }, { "epoch": 1.4456740442655935, "grad_norm": 0.3663479685783386, "learning_rate": 6.1777173751229445e-06, "loss": 0.3585, "step": 5748 }, { "epoch": 1.4459255533199196, "grad_norm": 0.3225582242012024, "learning_rate": 6.176295247293489e-06, "loss": 0.347, "step": 5749 }, { "epoch": 1.4461770623742454, "grad_norm": 0.33006182312965393, "learning_rate": 6.174873018721705e-06, "loss": 0.3493, "step": 5750 }, { "epoch": 1.4464285714285714, "grad_norm": 0.33908021450042725, "learning_rate": 6.173450689529397e-06, "loss": 0.3456, "step": 5751 }, { "epoch": 1.4466800804828974, "grad_norm": 0.35512205958366394, "learning_rate": 6.172028259838378e-06, "loss": 0.3777, "step": 5752 }, { "epoch": 1.4469315895372232, "grad_norm": 0.3256780207157135, "learning_rate": 6.17060572977047e-06, "loss": 0.3644, "step": 5753 }, { "epoch": 1.4471830985915493, "grad_norm": 0.39678511023521423, "learning_rate": 6.169183099447505e-06, "loss": 0.3304, "step": 5754 }, { "epoch": 1.4474346076458753, "grad_norm": 0.3706040680408478, "learning_rate": 6.167760368991322e-06, "loss": 0.3491, "step": 5755 }, { "epoch": 1.447686116700201, "grad_norm": 0.3017033040523529, "learning_rate": 6.16633753852377e-06, "loss": 0.3393, "step": 5756 }, { "epoch": 1.4479376257545271, "grad_norm": 0.32443055510520935, "learning_rate": 6.164914608166703e-06, "loss": 0.3369, "step": 5757 }, { "epoch": 1.4481891348088531, "grad_norm": 0.33810749650001526, "learning_rate": 6.163491578041988e-06, "loss": 0.3254, "step": 5758 }, { "epoch": 1.448440643863179, "grad_norm": 0.3356841504573822, "learning_rate": 6.1620684482714975e-06, "loss": 0.3624, "step": 5759 }, { "epoch": 1.448692152917505, "grad_norm": 0.32188060879707336, "learning_rate": 6.160645218977115e-06, "loss": 0.327, "step": 5760 }, { "epoch": 1.448943661971831, "grad_norm": 0.30660948157310486, "learning_rate": 6.159221890280731e-06, "loss": 0.3529, "step": 5761 }, { "epoch": 1.4491951710261568, "grad_norm": 0.3086193799972534, "learning_rate": 6.157798462304243e-06, "loss": 0.3633, "step": 5762 }, { "epoch": 1.4494466800804828, "grad_norm": 0.33312803506851196, "learning_rate": 6.15637493516956e-06, "loss": 0.3652, "step": 5763 }, { "epoch": 1.4496981891348089, "grad_norm": 0.3129318058490753, "learning_rate": 6.154951308998599e-06, "loss": 0.378, "step": 5764 }, { "epoch": 1.4499496981891349, "grad_norm": 0.31855911016464233, "learning_rate": 6.153527583913284e-06, "loss": 0.338, "step": 5765 }, { "epoch": 1.4502012072434607, "grad_norm": 0.3277183473110199, "learning_rate": 6.152103760035546e-06, "loss": 0.3527, "step": 5766 }, { "epoch": 1.4504527162977867, "grad_norm": 0.34277212619781494, "learning_rate": 6.15067983748733e-06, "loss": 0.3351, "step": 5767 }, { "epoch": 1.4507042253521127, "grad_norm": 0.3247811198234558, "learning_rate": 6.149255816390585e-06, "loss": 0.3464, "step": 5768 }, { "epoch": 1.4509557344064388, "grad_norm": 0.3350554406642914, "learning_rate": 6.147831696867266e-06, "loss": 0.3549, "step": 5769 }, { "epoch": 1.4512072434607646, "grad_norm": 0.33898022770881653, "learning_rate": 6.146407479039345e-06, "loss": 0.3493, "step": 5770 }, { "epoch": 1.4514587525150906, "grad_norm": 0.3372337818145752, "learning_rate": 6.144983163028796e-06, "loss": 0.3463, "step": 5771 }, { "epoch": 1.4517102615694166, "grad_norm": 0.36416393518447876, "learning_rate": 6.143558748957601e-06, "loss": 0.3475, "step": 5772 }, { "epoch": 1.4519617706237424, "grad_norm": 0.33351224660873413, "learning_rate": 6.142134236947755e-06, "loss": 0.358, "step": 5773 }, { "epoch": 1.4522132796780685, "grad_norm": 0.29332223534584045, "learning_rate": 6.140709627121255e-06, "loss": 0.3688, "step": 5774 }, { "epoch": 1.4524647887323945, "grad_norm": 0.34419727325439453, "learning_rate": 6.1392849196001125e-06, "loss": 0.3338, "step": 5775 }, { "epoch": 1.4527162977867203, "grad_norm": 0.32147595286369324, "learning_rate": 6.137860114506343e-06, "loss": 0.3517, "step": 5776 }, { "epoch": 1.4529678068410463, "grad_norm": 0.3297555148601532, "learning_rate": 6.136435211961974e-06, "loss": 0.3182, "step": 5777 }, { "epoch": 1.4532193158953723, "grad_norm": 0.3482721745967865, "learning_rate": 6.135010212089038e-06, "loss": 0.3632, "step": 5778 }, { "epoch": 1.4534708249496981, "grad_norm": 0.32314664125442505, "learning_rate": 6.133585115009579e-06, "loss": 0.3837, "step": 5779 }, { "epoch": 1.4537223340040242, "grad_norm": 0.33287930488586426, "learning_rate": 6.132159920845645e-06, "loss": 0.3742, "step": 5780 }, { "epoch": 1.4539738430583502, "grad_norm": 0.32902443408966064, "learning_rate": 6.1307346297192984e-06, "loss": 0.3698, "step": 5781 }, { "epoch": 1.454225352112676, "grad_norm": 0.3330136835575104, "learning_rate": 6.129309241752603e-06, "loss": 0.358, "step": 5782 }, { "epoch": 1.454476861167002, "grad_norm": 0.3234924376010895, "learning_rate": 6.127883757067636e-06, "loss": 0.3546, "step": 5783 }, { "epoch": 1.454728370221328, "grad_norm": 0.3510439693927765, "learning_rate": 6.126458175786483e-06, "loss": 0.3506, "step": 5784 }, { "epoch": 1.4549798792756539, "grad_norm": 0.32338663935661316, "learning_rate": 6.125032498031234e-06, "loss": 0.3674, "step": 5785 }, { "epoch": 1.45523138832998, "grad_norm": 0.3300342857837677, "learning_rate": 6.1236067239239885e-06, "loss": 0.3535, "step": 5786 }, { "epoch": 1.455482897384306, "grad_norm": 0.31873607635498047, "learning_rate": 6.122180853586857e-06, "loss": 0.3526, "step": 5787 }, { "epoch": 1.4557344064386317, "grad_norm": 0.3264380991458893, "learning_rate": 6.120754887141955e-06, "loss": 0.3741, "step": 5788 }, { "epoch": 1.4559859154929577, "grad_norm": 0.36225321888923645, "learning_rate": 6.119328824711409e-06, "loss": 0.3473, "step": 5789 }, { "epoch": 1.4562374245472838, "grad_norm": 0.3539520800113678, "learning_rate": 6.117902666417352e-06, "loss": 0.3592, "step": 5790 }, { "epoch": 1.4564889336016096, "grad_norm": 0.3481525182723999, "learning_rate": 6.116476412381926e-06, "loss": 0.364, "step": 5791 }, { "epoch": 1.4567404426559356, "grad_norm": 0.3597382605075836, "learning_rate": 6.115050062727278e-06, "loss": 0.3502, "step": 5792 }, { "epoch": 1.4569919517102616, "grad_norm": 0.3276865482330322, "learning_rate": 6.113623617575568e-06, "loss": 0.3589, "step": 5793 }, { "epoch": 1.4572434607645874, "grad_norm": 0.35708415508270264, "learning_rate": 6.112197077048963e-06, "loss": 0.374, "step": 5794 }, { "epoch": 1.4574949698189135, "grad_norm": 0.33358344435691833, "learning_rate": 6.110770441269636e-06, "loss": 0.3551, "step": 5795 }, { "epoch": 1.4577464788732395, "grad_norm": 0.3249559998512268, "learning_rate": 6.10934371035977e-06, "loss": 0.3376, "step": 5796 }, { "epoch": 1.4579979879275653, "grad_norm": 0.35899099707603455, "learning_rate": 6.1079168844415535e-06, "loss": 0.3644, "step": 5797 }, { "epoch": 1.4582494969818913, "grad_norm": 0.33779412508010864, "learning_rate": 6.106489963637189e-06, "loss": 0.3545, "step": 5798 }, { "epoch": 1.4585010060362174, "grad_norm": 0.3385300040245056, "learning_rate": 6.105062948068881e-06, "loss": 0.3381, "step": 5799 }, { "epoch": 1.4587525150905432, "grad_norm": 0.3292633295059204, "learning_rate": 6.103635837858844e-06, "loss": 0.3323, "step": 5800 }, { "epoch": 1.4590040241448692, "grad_norm": 0.3348362445831299, "learning_rate": 6.1022086331293005e-06, "loss": 0.3462, "step": 5801 }, { "epoch": 1.4592555331991952, "grad_norm": 0.3152986764907837, "learning_rate": 6.100781334002485e-06, "loss": 0.3209, "step": 5802 }, { "epoch": 1.459507042253521, "grad_norm": 0.3165169656276703, "learning_rate": 6.099353940600634e-06, "loss": 0.336, "step": 5803 }, { "epoch": 1.459758551307847, "grad_norm": 0.36417675018310547, "learning_rate": 6.097926453045996e-06, "loss": 0.3508, "step": 5804 }, { "epoch": 1.460010060362173, "grad_norm": 0.3556537926197052, "learning_rate": 6.0964988714608255e-06, "loss": 0.3644, "step": 5805 }, { "epoch": 1.4602615694164989, "grad_norm": 0.3152537941932678, "learning_rate": 6.0950711959673854e-06, "loss": 0.3538, "step": 5806 }, { "epoch": 1.460513078470825, "grad_norm": 0.3294949531555176, "learning_rate": 6.093643426687949e-06, "loss": 0.3665, "step": 5807 }, { "epoch": 1.460764587525151, "grad_norm": 0.33261191844940186, "learning_rate": 6.092215563744797e-06, "loss": 0.3564, "step": 5808 }, { "epoch": 1.4610160965794767, "grad_norm": 0.36204931139945984, "learning_rate": 6.0907876072602126e-06, "loss": 0.3704, "step": 5809 }, { "epoch": 1.4612676056338028, "grad_norm": 0.34487199783325195, "learning_rate": 6.0893595573564935e-06, "loss": 0.3626, "step": 5810 }, { "epoch": 1.4615191146881288, "grad_norm": 0.3406592607498169, "learning_rate": 6.0879314141559434e-06, "loss": 0.3619, "step": 5811 }, { "epoch": 1.4617706237424548, "grad_norm": 0.3268572986125946, "learning_rate": 6.086503177780874e-06, "loss": 0.3383, "step": 5812 }, { "epoch": 1.4620221327967806, "grad_norm": 0.33901724219322205, "learning_rate": 6.085074848353604e-06, "loss": 0.3209, "step": 5813 }, { "epoch": 1.4622736418511066, "grad_norm": 0.2882567346096039, "learning_rate": 6.083646425996462e-06, "loss": 0.3496, "step": 5814 }, { "epoch": 1.4625251509054327, "grad_norm": 0.30257371068000793, "learning_rate": 6.082217910831784e-06, "loss": 0.3685, "step": 5815 }, { "epoch": 1.4627766599597585, "grad_norm": 0.34829580783843994, "learning_rate": 6.080789302981911e-06, "loss": 0.356, "step": 5816 }, { "epoch": 1.4630281690140845, "grad_norm": 0.32623910903930664, "learning_rate": 6.079360602569196e-06, "loss": 0.3614, "step": 5817 }, { "epoch": 1.4632796780684105, "grad_norm": 0.3189256191253662, "learning_rate": 6.0779318097159965e-06, "loss": 0.3708, "step": 5818 }, { "epoch": 1.4635311871227366, "grad_norm": 0.32534220814704895, "learning_rate": 6.076502924544683e-06, "loss": 0.3314, "step": 5819 }, { "epoch": 1.4637826961770624, "grad_norm": 0.3330930471420288, "learning_rate": 6.075073947177628e-06, "loss": 0.3503, "step": 5820 }, { "epoch": 1.4640342052313884, "grad_norm": 0.32736390829086304, "learning_rate": 6.073644877737215e-06, "loss": 0.3712, "step": 5821 }, { "epoch": 1.4642857142857144, "grad_norm": 0.3611101806163788, "learning_rate": 6.072215716345835e-06, "loss": 0.3876, "step": 5822 }, { "epoch": 1.4645372233400402, "grad_norm": 0.33069029450416565, "learning_rate": 6.070786463125885e-06, "loss": 0.3498, "step": 5823 }, { "epoch": 1.4647887323943662, "grad_norm": 0.32250040769577026, "learning_rate": 6.069357118199775e-06, "loss": 0.3651, "step": 5824 }, { "epoch": 1.4650402414486923, "grad_norm": 0.3161625564098358, "learning_rate": 6.067927681689917e-06, "loss": 0.367, "step": 5825 }, { "epoch": 1.465291750503018, "grad_norm": 0.3699602484703064, "learning_rate": 6.066498153718735e-06, "loss": 0.342, "step": 5826 }, { "epoch": 1.465543259557344, "grad_norm": 0.33579832315444946, "learning_rate": 6.065068534408657e-06, "loss": 0.3465, "step": 5827 }, { "epoch": 1.4657947686116701, "grad_norm": 0.3177899718284607, "learning_rate": 6.063638823882123e-06, "loss": 0.3512, "step": 5828 }, { "epoch": 1.466046277665996, "grad_norm": 0.31834790110588074, "learning_rate": 6.062209022261577e-06, "loss": 0.3652, "step": 5829 }, { "epoch": 1.466297786720322, "grad_norm": 0.30503425002098083, "learning_rate": 6.060779129669474e-06, "loss": 0.3319, "step": 5830 }, { "epoch": 1.466549295774648, "grad_norm": 0.32431966066360474, "learning_rate": 6.059349146228275e-06, "loss": 0.35, "step": 5831 }, { "epoch": 1.4668008048289738, "grad_norm": 0.3199666440486908, "learning_rate": 6.057919072060448e-06, "loss": 0.3537, "step": 5832 }, { "epoch": 1.4670523138832998, "grad_norm": 0.3279325067996979, "learning_rate": 6.0564889072884715e-06, "loss": 0.3408, "step": 5833 }, { "epoch": 1.4673038229376258, "grad_norm": 0.3161599338054657, "learning_rate": 6.0550586520348285e-06, "loss": 0.3576, "step": 5834 }, { "epoch": 1.4675553319919517, "grad_norm": 0.32007861137390137, "learning_rate": 6.053628306422014e-06, "loss": 0.3436, "step": 5835 }, { "epoch": 1.4678068410462777, "grad_norm": 0.3110705316066742, "learning_rate": 6.052197870572525e-06, "loss": 0.3723, "step": 5836 }, { "epoch": 1.4680583501006037, "grad_norm": 0.33387455344200134, "learning_rate": 6.0507673446088726e-06, "loss": 0.3686, "step": 5837 }, { "epoch": 1.4683098591549295, "grad_norm": 0.3183239996433258, "learning_rate": 6.049336728653569e-06, "loss": 0.367, "step": 5838 }, { "epoch": 1.4685613682092555, "grad_norm": 0.3348864018917084, "learning_rate": 6.0479060228291396e-06, "loss": 0.3625, "step": 5839 }, { "epoch": 1.4688128772635816, "grad_norm": 0.3361799120903015, "learning_rate": 6.046475227258115e-06, "loss": 0.3475, "step": 5840 }, { "epoch": 1.4690643863179074, "grad_norm": 0.3200930655002594, "learning_rate": 6.045044342063034e-06, "loss": 0.3711, "step": 5841 }, { "epoch": 1.4693158953722334, "grad_norm": 0.2997252345085144, "learning_rate": 6.043613367366444e-06, "loss": 0.3506, "step": 5842 }, { "epoch": 1.4695674044265594, "grad_norm": 0.3714944124221802, "learning_rate": 6.042182303290898e-06, "loss": 0.3711, "step": 5843 }, { "epoch": 1.4698189134808852, "grad_norm": 0.32264792919158936, "learning_rate": 6.040751149958955e-06, "loss": 0.3467, "step": 5844 }, { "epoch": 1.4700704225352113, "grad_norm": 0.3230764865875244, "learning_rate": 6.0393199074931886e-06, "loss": 0.3556, "step": 5845 }, { "epoch": 1.4703219315895373, "grad_norm": 0.31362685561180115, "learning_rate": 6.037888576016174e-06, "loss": 0.3421, "step": 5846 }, { "epoch": 1.470573440643863, "grad_norm": 0.3665870130062103, "learning_rate": 6.036457155650496e-06, "loss": 0.3703, "step": 5847 }, { "epoch": 1.470824949698189, "grad_norm": 0.327773779630661, "learning_rate": 6.035025646518747e-06, "loss": 0.357, "step": 5848 }, { "epoch": 1.4710764587525151, "grad_norm": 0.32785624265670776, "learning_rate": 6.033594048743525e-06, "loss": 0.3694, "step": 5849 }, { "epoch": 1.471327967806841, "grad_norm": 0.3332252502441406, "learning_rate": 6.03216236244744e-06, "loss": 0.3334, "step": 5850 }, { "epoch": 1.471579476861167, "grad_norm": 0.3057732582092285, "learning_rate": 6.030730587753106e-06, "loss": 0.37, "step": 5851 }, { "epoch": 1.471830985915493, "grad_norm": 0.28898757696151733, "learning_rate": 6.0292987247831455e-06, "loss": 0.3595, "step": 5852 }, { "epoch": 1.4720824949698188, "grad_norm": 0.3072353005409241, "learning_rate": 6.0278667736601885e-06, "loss": 0.3623, "step": 5853 }, { "epoch": 1.4723340040241448, "grad_norm": 0.32753127813339233, "learning_rate": 6.026434734506872e-06, "loss": 0.3604, "step": 5854 }, { "epoch": 1.4725855130784709, "grad_norm": 0.33749762177467346, "learning_rate": 6.025002607445842e-06, "loss": 0.3586, "step": 5855 }, { "epoch": 1.4728370221327967, "grad_norm": 0.3170812427997589, "learning_rate": 6.023570392599751e-06, "loss": 0.3351, "step": 5856 }, { "epoch": 1.4730885311871227, "grad_norm": 0.3415549099445343, "learning_rate": 6.02213809009126e-06, "loss": 0.334, "step": 5857 }, { "epoch": 1.4733400402414487, "grad_norm": 0.32296910881996155, "learning_rate": 6.020705700043036e-06, "loss": 0.3375, "step": 5858 }, { "epoch": 1.4735915492957745, "grad_norm": 0.3227507770061493, "learning_rate": 6.019273222577754e-06, "loss": 0.3523, "step": 5859 }, { "epoch": 1.4738430583501005, "grad_norm": 0.3127809166908264, "learning_rate": 6.017840657818097e-06, "loss": 0.3279, "step": 5860 }, { "epoch": 1.4740945674044266, "grad_norm": 0.3176402449607849, "learning_rate": 6.016408005886756e-06, "loss": 0.3604, "step": 5861 }, { "epoch": 1.4743460764587526, "grad_norm": 0.3369061350822449, "learning_rate": 6.014975266906427e-06, "loss": 0.3406, "step": 5862 }, { "epoch": 1.4745975855130784, "grad_norm": 0.3053755462169647, "learning_rate": 6.0135424409998156e-06, "loss": 0.3587, "step": 5863 }, { "epoch": 1.4748490945674044, "grad_norm": 0.30855026841163635, "learning_rate": 6.0121095282896356e-06, "loss": 0.3404, "step": 5864 }, { "epoch": 1.4751006036217305, "grad_norm": 0.31946873664855957, "learning_rate": 6.010676528898606e-06, "loss": 0.334, "step": 5865 }, { "epoch": 1.4753521126760563, "grad_norm": 0.34600701928138733, "learning_rate": 6.009243442949454e-06, "loss": 0.3573, "step": 5866 }, { "epoch": 1.4756036217303823, "grad_norm": 0.32230931520462036, "learning_rate": 6.007810270564916e-06, "loss": 0.3549, "step": 5867 }, { "epoch": 1.4758551307847083, "grad_norm": 0.3308599293231964, "learning_rate": 6.006377011867732e-06, "loss": 0.3662, "step": 5868 }, { "epoch": 1.4761066398390343, "grad_norm": 0.3071610629558563, "learning_rate": 6.004943666980654e-06, "loss": 0.334, "step": 5869 }, { "epoch": 1.4763581488933601, "grad_norm": 0.30614978075027466, "learning_rate": 6.003510236026436e-06, "loss": 0.3623, "step": 5870 }, { "epoch": 1.4766096579476862, "grad_norm": 0.325398325920105, "learning_rate": 6.0020767191278465e-06, "loss": 0.3395, "step": 5871 }, { "epoch": 1.4768611670020122, "grad_norm": 0.3325035572052002, "learning_rate": 6.000643116407654e-06, "loss": 0.3651, "step": 5872 }, { "epoch": 1.477112676056338, "grad_norm": 0.30189478397369385, "learning_rate": 5.999209427988638e-06, "loss": 0.3646, "step": 5873 }, { "epoch": 1.477364185110664, "grad_norm": 0.3175562918186188, "learning_rate": 5.997775653993586e-06, "loss": 0.3582, "step": 5874 }, { "epoch": 1.47761569416499, "grad_norm": 0.3137342929840088, "learning_rate": 5.996341794545292e-06, "loss": 0.3407, "step": 5875 }, { "epoch": 1.4778672032193159, "grad_norm": 0.3235321342945099, "learning_rate": 5.9949078497665555e-06, "loss": 0.3353, "step": 5876 }, { "epoch": 1.4781187122736419, "grad_norm": 0.3290002942085266, "learning_rate": 5.993473819780185e-06, "loss": 0.3676, "step": 5877 }, { "epoch": 1.478370221327968, "grad_norm": 0.3220723271369934, "learning_rate": 5.992039704708998e-06, "loss": 0.356, "step": 5878 }, { "epoch": 1.4786217303822937, "grad_norm": 0.33870404958724976, "learning_rate": 5.9906055046758174e-06, "loss": 0.3572, "step": 5879 }, { "epoch": 1.4788732394366197, "grad_norm": 0.3206806778907776, "learning_rate": 5.989171219803471e-06, "loss": 0.3593, "step": 5880 }, { "epoch": 1.4791247484909458, "grad_norm": 0.35171839594841003, "learning_rate": 5.987736850214798e-06, "loss": 0.357, "step": 5881 }, { "epoch": 1.4793762575452716, "grad_norm": 0.3402028977870941, "learning_rate": 5.986302396032644e-06, "loss": 0.3724, "step": 5882 }, { "epoch": 1.4796277665995976, "grad_norm": 0.35204577445983887, "learning_rate": 5.98486785737986e-06, "loss": 0.35, "step": 5883 }, { "epoch": 1.4798792756539236, "grad_norm": 0.3249172568321228, "learning_rate": 5.983433234379306e-06, "loss": 0.3657, "step": 5884 }, { "epoch": 1.4801307847082494, "grad_norm": 0.3084418475627899, "learning_rate": 5.9819985271538465e-06, "loss": 0.3527, "step": 5885 }, { "epoch": 1.4803822937625755, "grad_norm": 0.3349623382091522, "learning_rate": 5.980563735826355e-06, "loss": 0.3399, "step": 5886 }, { "epoch": 1.4806338028169015, "grad_norm": 0.31369122862815857, "learning_rate": 5.9791288605197175e-06, "loss": 0.3405, "step": 5887 }, { "epoch": 1.4808853118712273, "grad_norm": 0.3203195035457611, "learning_rate": 5.9776939013568145e-06, "loss": 0.3463, "step": 5888 }, { "epoch": 1.4811368209255533, "grad_norm": 0.31735894083976746, "learning_rate": 5.976258858460548e-06, "loss": 0.3336, "step": 5889 }, { "epoch": 1.4813883299798793, "grad_norm": 0.3369232416152954, "learning_rate": 5.974823731953817e-06, "loss": 0.3262, "step": 5890 }, { "epoch": 1.4816398390342052, "grad_norm": 0.31136730313301086, "learning_rate": 5.973388521959532e-06, "loss": 0.3543, "step": 5891 }, { "epoch": 1.4818913480885312, "grad_norm": 0.3076231777667999, "learning_rate": 5.97195322860061e-06, "loss": 0.3416, "step": 5892 }, { "epoch": 1.4821428571428572, "grad_norm": 0.33634480834007263, "learning_rate": 5.970517851999974e-06, "loss": 0.345, "step": 5893 }, { "epoch": 1.482394366197183, "grad_norm": 0.32039153575897217, "learning_rate": 5.969082392280557e-06, "loss": 0.3473, "step": 5894 }, { "epoch": 1.482645875251509, "grad_norm": 0.3011680543422699, "learning_rate": 5.967646849565294e-06, "loss": 0.343, "step": 5895 }, { "epoch": 1.482897384305835, "grad_norm": 0.33874598145484924, "learning_rate": 5.966211223977132e-06, "loss": 0.36, "step": 5896 }, { "epoch": 1.4831488933601609, "grad_norm": 0.3423505127429962, "learning_rate": 5.964775515639023e-06, "loss": 0.3646, "step": 5897 }, { "epoch": 1.483400402414487, "grad_norm": 0.33873575925827026, "learning_rate": 5.963339724673928e-06, "loss": 0.3458, "step": 5898 }, { "epoch": 1.483651911468813, "grad_norm": 0.3190835416316986, "learning_rate": 5.96190385120481e-06, "loss": 0.3767, "step": 5899 }, { "epoch": 1.4839034205231387, "grad_norm": 0.33720406889915466, "learning_rate": 5.960467895354646e-06, "loss": 0.3535, "step": 5900 }, { "epoch": 1.4841549295774648, "grad_norm": 0.3207988440990448, "learning_rate": 5.959031857246415e-06, "loss": 0.3518, "step": 5901 }, { "epoch": 1.4844064386317908, "grad_norm": 0.3778199255466461, "learning_rate": 5.9575957370031065e-06, "loss": 0.3516, "step": 5902 }, { "epoch": 1.4846579476861166, "grad_norm": 0.3221379220485687, "learning_rate": 5.956159534747713e-06, "loss": 0.3216, "step": 5903 }, { "epoch": 1.4849094567404426, "grad_norm": 0.296059787273407, "learning_rate": 5.954723250603237e-06, "loss": 0.3198, "step": 5904 }, { "epoch": 1.4851609657947686, "grad_norm": 0.394718736410141, "learning_rate": 5.953286884692688e-06, "loss": 0.3623, "step": 5905 }, { "epoch": 1.4854124748490944, "grad_norm": 0.3284577429294586, "learning_rate": 5.9518504371390805e-06, "loss": 0.3453, "step": 5906 }, { "epoch": 1.4856639839034205, "grad_norm": 0.3131723999977112, "learning_rate": 5.950413908065437e-06, "loss": 0.3722, "step": 5907 }, { "epoch": 1.4859154929577465, "grad_norm": 0.3282751142978668, "learning_rate": 5.94897729759479e-06, "loss": 0.3348, "step": 5908 }, { "epoch": 1.4861670020120723, "grad_norm": 0.3348959684371948, "learning_rate": 5.947540605850173e-06, "loss": 0.3454, "step": 5909 }, { "epoch": 1.4864185110663983, "grad_norm": 0.34094908833503723, "learning_rate": 5.946103832954631e-06, "loss": 0.3677, "step": 5910 }, { "epoch": 1.4866700201207244, "grad_norm": 0.35626813769340515, "learning_rate": 5.944666979031215e-06, "loss": 0.3592, "step": 5911 }, { "epoch": 1.4869215291750504, "grad_norm": 0.3323785960674286, "learning_rate": 5.943230044202981e-06, "loss": 0.3671, "step": 5912 }, { "epoch": 1.4871730382293762, "grad_norm": 0.32840991020202637, "learning_rate": 5.941793028592996e-06, "loss": 0.3488, "step": 5913 }, { "epoch": 1.4874245472837022, "grad_norm": 0.3254261314868927, "learning_rate": 5.940355932324329e-06, "loss": 0.343, "step": 5914 }, { "epoch": 1.4876760563380282, "grad_norm": 0.3155461847782135, "learning_rate": 5.938918755520059e-06, "loss": 0.3398, "step": 5915 }, { "epoch": 1.487927565392354, "grad_norm": 0.3168039619922638, "learning_rate": 5.9374814983032724e-06, "loss": 0.3664, "step": 5916 }, { "epoch": 1.48817907444668, "grad_norm": 0.344101220369339, "learning_rate": 5.9360441607970585e-06, "loss": 0.3417, "step": 5917 }, { "epoch": 1.488430583501006, "grad_norm": 0.3618612587451935, "learning_rate": 5.93460674312452e-06, "loss": 0.3684, "step": 5918 }, { "epoch": 1.4886820925553321, "grad_norm": 0.32880085706710815, "learning_rate": 5.933169245408761e-06, "loss": 0.3381, "step": 5919 }, { "epoch": 1.488933601609658, "grad_norm": 0.3657226860523224, "learning_rate": 5.931731667772893e-06, "loss": 0.3754, "step": 5920 }, { "epoch": 1.489185110663984, "grad_norm": 0.3417291045188904, "learning_rate": 5.930294010340035e-06, "loss": 0.3586, "step": 5921 }, { "epoch": 1.48943661971831, "grad_norm": 0.3205607235431671, "learning_rate": 5.928856273233316e-06, "loss": 0.3415, "step": 5922 }, { "epoch": 1.4896881287726358, "grad_norm": 0.35127079486846924, "learning_rate": 5.92741845657587e-06, "loss": 0.3403, "step": 5923 }, { "epoch": 1.4899396378269618, "grad_norm": 0.3201601207256317, "learning_rate": 5.925980560490834e-06, "loss": 0.3326, "step": 5924 }, { "epoch": 1.4901911468812878, "grad_norm": 0.3274860978126526, "learning_rate": 5.924542585101356e-06, "loss": 0.3409, "step": 5925 }, { "epoch": 1.4904426559356136, "grad_norm": 0.3243807852268219, "learning_rate": 5.923104530530589e-06, "loss": 0.3353, "step": 5926 }, { "epoch": 1.4906941649899397, "grad_norm": 0.3296162486076355, "learning_rate": 5.921666396901694e-06, "loss": 0.3653, "step": 5927 }, { "epoch": 1.4909456740442657, "grad_norm": 0.31820058822631836, "learning_rate": 5.9202281843378385e-06, "loss": 0.343, "step": 5928 }, { "epoch": 1.4911971830985915, "grad_norm": 0.3398142457008362, "learning_rate": 5.918789892962196e-06, "loss": 0.3509, "step": 5929 }, { "epoch": 1.4914486921529175, "grad_norm": 0.32767853140830994, "learning_rate": 5.917351522897946e-06, "loss": 0.3578, "step": 5930 }, { "epoch": 1.4917002012072436, "grad_norm": 0.30963650345802307, "learning_rate": 5.915913074268277e-06, "loss": 0.3416, "step": 5931 }, { "epoch": 1.4919517102615694, "grad_norm": 0.3078908920288086, "learning_rate": 5.914474547196384e-06, "loss": 0.3493, "step": 5932 }, { "epoch": 1.4922032193158954, "grad_norm": 0.32465776801109314, "learning_rate": 5.913035941805467e-06, "loss": 0.3434, "step": 5933 }, { "epoch": 1.4924547283702214, "grad_norm": 0.32990938425064087, "learning_rate": 5.911597258218733e-06, "loss": 0.3676, "step": 5934 }, { "epoch": 1.4927062374245472, "grad_norm": 0.33850571513175964, "learning_rate": 5.9101584965593975e-06, "loss": 0.342, "step": 5935 }, { "epoch": 1.4929577464788732, "grad_norm": 0.3495611846446991, "learning_rate": 5.90871965695068e-06, "loss": 0.3366, "step": 5936 }, { "epoch": 1.4932092555331993, "grad_norm": 0.32349708676338196, "learning_rate": 5.907280739515809e-06, "loss": 0.3408, "step": 5937 }, { "epoch": 1.493460764587525, "grad_norm": 0.3027772307395935, "learning_rate": 5.905841744378019e-06, "loss": 0.3582, "step": 5938 }, { "epoch": 1.493712273641851, "grad_norm": 0.3265613913536072, "learning_rate": 5.904402671660551e-06, "loss": 0.3438, "step": 5939 }, { "epoch": 1.4939637826961771, "grad_norm": 0.3158648908138275, "learning_rate": 5.902963521486651e-06, "loss": 0.3569, "step": 5940 }, { "epoch": 1.494215291750503, "grad_norm": 0.3225020468235016, "learning_rate": 5.901524293979575e-06, "loss": 0.3403, "step": 5941 }, { "epoch": 1.494466800804829, "grad_norm": 0.33680132031440735, "learning_rate": 5.900084989262581e-06, "loss": 0.3514, "step": 5942 }, { "epoch": 1.494718309859155, "grad_norm": 0.30138635635375977, "learning_rate": 5.898645607458941e-06, "loss": 0.3465, "step": 5943 }, { "epoch": 1.4949698189134808, "grad_norm": 0.3087744116783142, "learning_rate": 5.897206148691925e-06, "loss": 0.3362, "step": 5944 }, { "epoch": 1.4952213279678068, "grad_norm": 0.3069364130496979, "learning_rate": 5.895766613084817e-06, "loss": 0.3122, "step": 5945 }, { "epoch": 1.4954728370221329, "grad_norm": 0.289499968290329, "learning_rate": 5.8943270007609026e-06, "loss": 0.3384, "step": 5946 }, { "epoch": 1.4957243460764587, "grad_norm": 0.3094853162765503, "learning_rate": 5.892887311843474e-06, "loss": 0.333, "step": 5947 }, { "epoch": 1.4959758551307847, "grad_norm": 0.28878265619277954, "learning_rate": 5.891447546455833e-06, "loss": 0.32, "step": 5948 }, { "epoch": 1.4962273641851107, "grad_norm": 0.32799869775772095, "learning_rate": 5.890007704721288e-06, "loss": 0.3462, "step": 5949 }, { "epoch": 1.4964788732394365, "grad_norm": 0.30716651678085327, "learning_rate": 5.88856778676315e-06, "loss": 0.3626, "step": 5950 }, { "epoch": 1.4967303822937625, "grad_norm": 0.31167250871658325, "learning_rate": 5.88712779270474e-06, "loss": 0.3524, "step": 5951 }, { "epoch": 1.4969818913480886, "grad_norm": 0.31440672278404236, "learning_rate": 5.885687722669384e-06, "loss": 0.3277, "step": 5952 }, { "epoch": 1.4972334004024144, "grad_norm": 0.28327929973602295, "learning_rate": 5.884247576780416e-06, "loss": 0.347, "step": 5953 }, { "epoch": 1.4974849094567404, "grad_norm": 0.33180156350135803, "learning_rate": 5.882807355161174e-06, "loss": 0.3556, "step": 5954 }, { "epoch": 1.4977364185110664, "grad_norm": 0.30268731713294983, "learning_rate": 5.881367057935005e-06, "loss": 0.337, "step": 5955 }, { "epoch": 1.4979879275653922, "grad_norm": 0.32002800703048706, "learning_rate": 5.879926685225264e-06, "loss": 0.3529, "step": 5956 }, { "epoch": 1.4982394366197183, "grad_norm": 0.31586262583732605, "learning_rate": 5.878486237155304e-06, "loss": 0.3336, "step": 5957 }, { "epoch": 1.4984909456740443, "grad_norm": 0.3224563002586365, "learning_rate": 5.877045713848495e-06, "loss": 0.3587, "step": 5958 }, { "epoch": 1.49874245472837, "grad_norm": 0.32255980372428894, "learning_rate": 5.875605115428207e-06, "loss": 0.3308, "step": 5959 }, { "epoch": 1.4989939637826961, "grad_norm": 0.3053814172744751, "learning_rate": 5.874164442017819e-06, "loss": 0.3301, "step": 5960 }, { "epoch": 1.4992454728370221, "grad_norm": 0.32673344016075134, "learning_rate": 5.872723693740715e-06, "loss": 0.3345, "step": 5961 }, { "epoch": 1.4994969818913482, "grad_norm": 0.3168867230415344, "learning_rate": 5.871282870720286e-06, "loss": 0.3376, "step": 5962 }, { "epoch": 1.499748490945674, "grad_norm": 0.3028486371040344, "learning_rate": 5.869841973079931e-06, "loss": 0.3365, "step": 5963 }, { "epoch": 1.5, "grad_norm": 0.34289392828941345, "learning_rate": 5.868401000943051e-06, "loss": 0.348, "step": 5964 }, { "epoch": 1.5002515090543258, "grad_norm": 0.3447049856185913, "learning_rate": 5.866959954433058e-06, "loss": 0.3576, "step": 5965 }, { "epoch": 1.500503018108652, "grad_norm": 0.3279689848423004, "learning_rate": 5.86551883367337e-06, "loss": 0.3803, "step": 5966 }, { "epoch": 1.5007545271629779, "grad_norm": 0.3094026744365692, "learning_rate": 5.8640776387874085e-06, "loss": 0.351, "step": 5967 }, { "epoch": 1.5010060362173037, "grad_norm": 0.3381515443325043, "learning_rate": 5.8626363698986025e-06, "loss": 0.3711, "step": 5968 }, { "epoch": 1.50125754527163, "grad_norm": 0.3412519693374634, "learning_rate": 5.861195027130388e-06, "loss": 0.3376, "step": 5969 }, { "epoch": 1.5015090543259557, "grad_norm": 0.33772408962249756, "learning_rate": 5.859753610606207e-06, "loss": 0.3629, "step": 5970 }, { "epoch": 1.5017605633802817, "grad_norm": 0.3265193700790405, "learning_rate": 5.858312120449507e-06, "loss": 0.347, "step": 5971 }, { "epoch": 1.5020120724346078, "grad_norm": 0.3511604368686676, "learning_rate": 5.856870556783746e-06, "loss": 0.3296, "step": 5972 }, { "epoch": 1.5022635814889336, "grad_norm": 0.3105302155017853, "learning_rate": 5.85542891973238e-06, "loss": 0.3374, "step": 5973 }, { "epoch": 1.5025150905432596, "grad_norm": 0.34747567772865295, "learning_rate": 5.85398720941888e-06, "loss": 0.3545, "step": 5974 }, { "epoch": 1.5027665995975856, "grad_norm": 0.3080263137817383, "learning_rate": 5.852545425966717e-06, "loss": 0.3281, "step": 5975 }, { "epoch": 1.5030181086519114, "grad_norm": 0.34550175070762634, "learning_rate": 5.851103569499372e-06, "loss": 0.3526, "step": 5976 }, { "epoch": 1.5032696177062375, "grad_norm": 0.3092076778411865, "learning_rate": 5.849661640140332e-06, "loss": 0.3381, "step": 5977 }, { "epoch": 1.5035211267605635, "grad_norm": 0.31573188304901123, "learning_rate": 5.848219638013086e-06, "loss": 0.3599, "step": 5978 }, { "epoch": 1.5037726358148893, "grad_norm": 0.2985522449016571, "learning_rate": 5.846777563241136e-06, "loss": 0.3504, "step": 5979 }, { "epoch": 1.5040241448692153, "grad_norm": 0.2984470725059509, "learning_rate": 5.845335415947985e-06, "loss": 0.334, "step": 5980 }, { "epoch": 1.5042756539235413, "grad_norm": 0.3081437349319458, "learning_rate": 5.8438931962571435e-06, "loss": 0.3274, "step": 5981 }, { "epoch": 1.5045271629778671, "grad_norm": 0.31499478220939636, "learning_rate": 5.842450904292128e-06, "loss": 0.3468, "step": 5982 }, { "epoch": 1.5047786720321932, "grad_norm": 0.30446550250053406, "learning_rate": 5.841008540176465e-06, "loss": 0.3299, "step": 5983 }, { "epoch": 1.5050301810865192, "grad_norm": 0.2896031141281128, "learning_rate": 5.83956610403368e-06, "loss": 0.3371, "step": 5984 }, { "epoch": 1.505281690140845, "grad_norm": 0.3181334435939789, "learning_rate": 5.838123595987312e-06, "loss": 0.3294, "step": 5985 }, { "epoch": 1.505533199195171, "grad_norm": 0.32969263195991516, "learning_rate": 5.8366810161609e-06, "loss": 0.3314, "step": 5986 }, { "epoch": 1.505784708249497, "grad_norm": 0.3354569375514984, "learning_rate": 5.835238364677994e-06, "loss": 0.3647, "step": 5987 }, { "epoch": 1.5060362173038229, "grad_norm": 0.30361291766166687, "learning_rate": 5.8337956416621465e-06, "loss": 0.3302, "step": 5988 }, { "epoch": 1.506287726358149, "grad_norm": 0.3423859775066376, "learning_rate": 5.832352847236919e-06, "loss": 0.3275, "step": 5989 }, { "epoch": 1.506539235412475, "grad_norm": 0.3418254554271698, "learning_rate": 5.830909981525879e-06, "loss": 0.3161, "step": 5990 }, { "epoch": 1.5067907444668007, "grad_norm": 0.33632370829582214, "learning_rate": 5.829467044652595e-06, "loss": 0.3405, "step": 5991 }, { "epoch": 1.5070422535211268, "grad_norm": 0.31483152508735657, "learning_rate": 5.828024036740649e-06, "loss": 0.3588, "step": 5992 }, { "epoch": 1.5072937625754528, "grad_norm": 0.3332071900367737, "learning_rate": 5.826580957913624e-06, "loss": 0.3365, "step": 5993 }, { "epoch": 1.5075452716297786, "grad_norm": 0.3333156704902649, "learning_rate": 5.825137808295111e-06, "loss": 0.3691, "step": 5994 }, { "epoch": 1.5077967806841046, "grad_norm": 0.32617276906967163, "learning_rate": 5.823694588008707e-06, "loss": 0.3303, "step": 5995 }, { "epoch": 1.5080482897384306, "grad_norm": 0.3059340715408325, "learning_rate": 5.822251297178014e-06, "loss": 0.3357, "step": 5996 }, { "epoch": 1.5082997987927564, "grad_norm": 0.3015100359916687, "learning_rate": 5.82080793592664e-06, "loss": 0.3366, "step": 5997 }, { "epoch": 1.5085513078470825, "grad_norm": 0.3392370939254761, "learning_rate": 5.819364504378203e-06, "loss": 0.3774, "step": 5998 }, { "epoch": 1.5088028169014085, "grad_norm": 0.34105855226516724, "learning_rate": 5.817921002656323e-06, "loss": 0.3507, "step": 5999 }, { "epoch": 1.5090543259557343, "grad_norm": 0.3226035237312317, "learning_rate": 5.816477430884625e-06, "loss": 0.3672, "step": 6000 }, { "epoch": 1.5093058350100603, "grad_norm": 0.32897132635116577, "learning_rate": 5.815033789186743e-06, "loss": 0.354, "step": 6001 }, { "epoch": 1.5095573440643864, "grad_norm": 0.30205658078193665, "learning_rate": 5.813590077686315e-06, "loss": 0.3416, "step": 6002 }, { "epoch": 1.5098088531187122, "grad_norm": 0.33135828375816345, "learning_rate": 5.812146296506987e-06, "loss": 0.3654, "step": 6003 }, { "epoch": 1.5100603621730382, "grad_norm": 0.3252605199813843, "learning_rate": 5.810702445772411e-06, "loss": 0.3617, "step": 6004 }, { "epoch": 1.5103118712273642, "grad_norm": 0.3258083760738373, "learning_rate": 5.809258525606241e-06, "loss": 0.3513, "step": 6005 }, { "epoch": 1.51056338028169, "grad_norm": 0.3420291244983673, "learning_rate": 5.807814536132141e-06, "loss": 0.3803, "step": 6006 }, { "epoch": 1.510814889336016, "grad_norm": 0.317022442817688, "learning_rate": 5.806370477473778e-06, "loss": 0.35, "step": 6007 }, { "epoch": 1.511066398390342, "grad_norm": 0.30522531270980835, "learning_rate": 5.80492634975483e-06, "loss": 0.3376, "step": 6008 }, { "epoch": 1.5113179074446679, "grad_norm": 0.3109252452850342, "learning_rate": 5.803482153098976e-06, "loss": 0.3218, "step": 6009 }, { "epoch": 1.5115694164989941, "grad_norm": 0.3221626281738281, "learning_rate": 5.802037887629902e-06, "loss": 0.3244, "step": 6010 }, { "epoch": 1.51182092555332, "grad_norm": 0.31988272070884705, "learning_rate": 5.800593553471301e-06, "loss": 0.3714, "step": 6011 }, { "epoch": 1.5120724346076457, "grad_norm": 0.298286497592926, "learning_rate": 5.79914915074687e-06, "loss": 0.3457, "step": 6012 }, { "epoch": 1.512323943661972, "grad_norm": 0.3138798773288727, "learning_rate": 5.797704679580313e-06, "loss": 0.3422, "step": 6013 }, { "epoch": 1.5125754527162978, "grad_norm": 0.307769238948822, "learning_rate": 5.796260140095343e-06, "loss": 0.3435, "step": 6014 }, { "epoch": 1.5128269617706236, "grad_norm": 0.30271801352500916, "learning_rate": 5.79481553241567e-06, "loss": 0.3565, "step": 6015 }, { "epoch": 1.5130784708249498, "grad_norm": 0.31750625371932983, "learning_rate": 5.793370856665021e-06, "loss": 0.3732, "step": 6016 }, { "epoch": 1.5133299798792756, "grad_norm": 0.3040974736213684, "learning_rate": 5.791926112967122e-06, "loss": 0.3527, "step": 6017 }, { "epoch": 1.5135814889336014, "grad_norm": 0.2947345972061157, "learning_rate": 5.790481301445704e-06, "loss": 0.3503, "step": 6018 }, { "epoch": 1.5138329979879277, "grad_norm": 0.3347560167312622, "learning_rate": 5.789036422224508e-06, "loss": 0.3437, "step": 6019 }, { "epoch": 1.5140845070422535, "grad_norm": 0.2965729236602783, "learning_rate": 5.78759147542728e-06, "loss": 0.3459, "step": 6020 }, { "epoch": 1.5143360160965795, "grad_norm": 0.2949177622795105, "learning_rate": 5.786146461177769e-06, "loss": 0.3359, "step": 6021 }, { "epoch": 1.5145875251509056, "grad_norm": 0.3233572542667389, "learning_rate": 5.7847013795997306e-06, "loss": 0.376, "step": 6022 }, { "epoch": 1.5148390342052314, "grad_norm": 0.3046778440475464, "learning_rate": 5.7832562308169286e-06, "loss": 0.339, "step": 6023 }, { "epoch": 1.5150905432595574, "grad_norm": 0.2932363450527191, "learning_rate": 5.78181101495313e-06, "loss": 0.3297, "step": 6024 }, { "epoch": 1.5153420523138834, "grad_norm": 0.29733753204345703, "learning_rate": 5.78036573213211e-06, "loss": 0.3531, "step": 6025 }, { "epoch": 1.5155935613682092, "grad_norm": 0.2947341501712799, "learning_rate": 5.778920382477647e-06, "loss": 0.3486, "step": 6026 }, { "epoch": 1.5158450704225352, "grad_norm": 0.3160892724990845, "learning_rate": 5.7774749661135255e-06, "loss": 0.3706, "step": 6027 }, { "epoch": 1.5160965794768613, "grad_norm": 0.3119431734085083, "learning_rate": 5.776029483163538e-06, "loss": 0.3675, "step": 6028 }, { "epoch": 1.516348088531187, "grad_norm": 0.30674445629119873, "learning_rate": 5.774583933751477e-06, "loss": 0.3458, "step": 6029 }, { "epoch": 1.516599597585513, "grad_norm": 0.3203643560409546, "learning_rate": 5.773138318001151e-06, "loss": 0.3581, "step": 6030 }, { "epoch": 1.5168511066398391, "grad_norm": 0.3252681493759155, "learning_rate": 5.771692636036365e-06, "loss": 0.3449, "step": 6031 }, { "epoch": 1.517102615694165, "grad_norm": 0.30788522958755493, "learning_rate": 5.77024688798093e-06, "loss": 0.3391, "step": 6032 }, { "epoch": 1.517354124748491, "grad_norm": 0.3264254331588745, "learning_rate": 5.76880107395867e-06, "loss": 0.3492, "step": 6033 }, { "epoch": 1.517605633802817, "grad_norm": 0.31012457609176636, "learning_rate": 5.767355194093407e-06, "loss": 0.3388, "step": 6034 }, { "epoch": 1.5178571428571428, "grad_norm": 0.2838296592235565, "learning_rate": 5.765909248508972e-06, "loss": 0.344, "step": 6035 }, { "epoch": 1.5181086519114688, "grad_norm": 0.29018688201904297, "learning_rate": 5.764463237329201e-06, "loss": 0.3587, "step": 6036 }, { "epoch": 1.5183601609657948, "grad_norm": 0.3031754195690155, "learning_rate": 5.7630171606779365e-06, "loss": 0.3667, "step": 6037 }, { "epoch": 1.5186116700201207, "grad_norm": 0.3265339732170105, "learning_rate": 5.761571018679025e-06, "loss": 0.3765, "step": 6038 }, { "epoch": 1.5188631790744467, "grad_norm": 0.3091321289539337, "learning_rate": 5.760124811456322e-06, "loss": 0.3388, "step": 6039 }, { "epoch": 1.5191146881287727, "grad_norm": 0.32367005944252014, "learning_rate": 5.758678539133682e-06, "loss": 0.342, "step": 6040 }, { "epoch": 1.5193661971830985, "grad_norm": 0.3425564765930176, "learning_rate": 5.757232201834973e-06, "loss": 0.3465, "step": 6041 }, { "epoch": 1.5196177062374245, "grad_norm": 0.28837791085243225, "learning_rate": 5.755785799684063e-06, "loss": 0.3377, "step": 6042 }, { "epoch": 1.5198692152917506, "grad_norm": 0.31125667691230774, "learning_rate": 5.754339332804826e-06, "loss": 0.3478, "step": 6043 }, { "epoch": 1.5201207243460764, "grad_norm": 0.342917263507843, "learning_rate": 5.752892801321146e-06, "loss": 0.3409, "step": 6044 }, { "epoch": 1.5203722334004024, "grad_norm": 0.3406009376049042, "learning_rate": 5.751446205356906e-06, "loss": 0.3429, "step": 6045 }, { "epoch": 1.5206237424547284, "grad_norm": 0.32107996940612793, "learning_rate": 5.749999545036001e-06, "loss": 0.332, "step": 6046 }, { "epoch": 1.5208752515090542, "grad_norm": 0.31515464186668396, "learning_rate": 5.7485528204823275e-06, "loss": 0.3561, "step": 6047 }, { "epoch": 1.5211267605633803, "grad_norm": 0.2875670790672302, "learning_rate": 5.7471060318197856e-06, "loss": 0.3564, "step": 6048 }, { "epoch": 1.5213782696177063, "grad_norm": 0.3263548016548157, "learning_rate": 5.7456591791722875e-06, "loss": 0.3738, "step": 6049 }, { "epoch": 1.521629778672032, "grad_norm": 0.30848369002342224, "learning_rate": 5.744212262663745e-06, "loss": 0.351, "step": 6050 }, { "epoch": 1.5218812877263581, "grad_norm": 0.29957401752471924, "learning_rate": 5.742765282418077e-06, "loss": 0.3695, "step": 6051 }, { "epoch": 1.5221327967806841, "grad_norm": 0.30603325366973877, "learning_rate": 5.74131823855921e-06, "loss": 0.3635, "step": 6052 }, { "epoch": 1.52238430583501, "grad_norm": 0.31580379605293274, "learning_rate": 5.739871131211074e-06, "loss": 0.3334, "step": 6053 }, { "epoch": 1.522635814889336, "grad_norm": 0.31090569496154785, "learning_rate": 5.738423960497604e-06, "loss": 0.3362, "step": 6054 }, { "epoch": 1.522887323943662, "grad_norm": 0.33442890644073486, "learning_rate": 5.736976726542742e-06, "loss": 0.363, "step": 6055 }, { "epoch": 1.5231388329979878, "grad_norm": 0.3184659481048584, "learning_rate": 5.735529429470433e-06, "loss": 0.3529, "step": 6056 }, { "epoch": 1.5233903420523138, "grad_norm": 0.3236134946346283, "learning_rate": 5.734082069404631e-06, "loss": 0.3506, "step": 6057 }, { "epoch": 1.5236418511066399, "grad_norm": 0.29839229583740234, "learning_rate": 5.732634646469291e-06, "loss": 0.3242, "step": 6058 }, { "epoch": 1.5238933601609657, "grad_norm": 0.30354323983192444, "learning_rate": 5.731187160788377e-06, "loss": 0.3657, "step": 6059 }, { "epoch": 1.524144869215292, "grad_norm": 0.3145615756511688, "learning_rate": 5.729739612485857e-06, "loss": 0.3592, "step": 6060 }, { "epoch": 1.5243963782696177, "grad_norm": 0.322421133518219, "learning_rate": 5.728292001685706e-06, "loss": 0.3392, "step": 6061 }, { "epoch": 1.5246478873239435, "grad_norm": 0.3052886128425598, "learning_rate": 5.726844328511898e-06, "loss": 0.3416, "step": 6062 }, { "epoch": 1.5248993963782698, "grad_norm": 0.2889304459095001, "learning_rate": 5.725396593088423e-06, "loss": 0.3424, "step": 6063 }, { "epoch": 1.5251509054325956, "grad_norm": 0.3219023048877716, "learning_rate": 5.723948795539267e-06, "loss": 0.3305, "step": 6064 }, { "epoch": 1.5254024144869214, "grad_norm": 0.3205178678035736, "learning_rate": 5.722500935988425e-06, "loss": 0.3336, "step": 6065 }, { "epoch": 1.5256539235412476, "grad_norm": 0.3172166347503662, "learning_rate": 5.721053014559898e-06, "loss": 0.3496, "step": 6066 }, { "epoch": 1.5259054325955734, "grad_norm": 0.30717623233795166, "learning_rate": 5.719605031377693e-06, "loss": 0.3197, "step": 6067 }, { "epoch": 1.5261569416498992, "grad_norm": 0.33775895833969116, "learning_rate": 5.718156986565817e-06, "loss": 0.3463, "step": 6068 }, { "epoch": 1.5264084507042255, "grad_norm": 0.31737133860588074, "learning_rate": 5.71670888024829e-06, "loss": 0.3468, "step": 6069 }, { "epoch": 1.5266599597585513, "grad_norm": 0.33389195799827576, "learning_rate": 5.715260712549129e-06, "loss": 0.364, "step": 6070 }, { "epoch": 1.5269114688128773, "grad_norm": 0.32125452160835266, "learning_rate": 5.713812483592364e-06, "loss": 0.331, "step": 6071 }, { "epoch": 1.5271629778672033, "grad_norm": 0.319833904504776, "learning_rate": 5.712364193502024e-06, "loss": 0.3332, "step": 6072 }, { "epoch": 1.5274144869215291, "grad_norm": 0.3113959729671478, "learning_rate": 5.710915842402147e-06, "loss": 0.3545, "step": 6073 }, { "epoch": 1.5276659959758552, "grad_norm": 0.31851494312286377, "learning_rate": 5.7094674304167766e-06, "loss": 0.3371, "step": 6074 }, { "epoch": 1.5279175050301812, "grad_norm": 0.3372229337692261, "learning_rate": 5.708018957669959e-06, "loss": 0.3374, "step": 6075 }, { "epoch": 1.528169014084507, "grad_norm": 0.3625347316265106, "learning_rate": 5.706570424285747e-06, "loss": 0.3519, "step": 6076 }, { "epoch": 1.528420523138833, "grad_norm": 0.31328415870666504, "learning_rate": 5.705121830388199e-06, "loss": 0.3275, "step": 6077 }, { "epoch": 1.528672032193159, "grad_norm": 0.36509522795677185, "learning_rate": 5.703673176101377e-06, "loss": 0.3426, "step": 6078 }, { "epoch": 1.5289235412474849, "grad_norm": 0.3331649601459503, "learning_rate": 5.702224461549351e-06, "loss": 0.348, "step": 6079 }, { "epoch": 1.529175050301811, "grad_norm": 0.33841848373413086, "learning_rate": 5.700775686856192e-06, "loss": 0.3439, "step": 6080 }, { "epoch": 1.529426559356137, "grad_norm": 0.35388943552970886, "learning_rate": 5.699326852145979e-06, "loss": 0.3504, "step": 6081 }, { "epoch": 1.5296780684104627, "grad_norm": 0.31031563878059387, "learning_rate": 5.697877957542795e-06, "loss": 0.3449, "step": 6082 }, { "epoch": 1.5299295774647887, "grad_norm": 0.34058165550231934, "learning_rate": 5.696429003170732e-06, "loss": 0.347, "step": 6083 }, { "epoch": 1.5301810865191148, "grad_norm": 0.30972498655319214, "learning_rate": 5.69497998915388e-06, "loss": 0.3401, "step": 6084 }, { "epoch": 1.5304325955734406, "grad_norm": 0.32734933495521545, "learning_rate": 5.693530915616341e-06, "loss": 0.3593, "step": 6085 }, { "epoch": 1.5306841046277666, "grad_norm": 0.34089869260787964, "learning_rate": 5.6920817826822176e-06, "loss": 0.3545, "step": 6086 }, { "epoch": 1.5309356136820926, "grad_norm": 0.3333452641963959, "learning_rate": 5.690632590475618e-06, "loss": 0.3751, "step": 6087 }, { "epoch": 1.5311871227364184, "grad_norm": 0.3344314992427826, "learning_rate": 5.6891833391206585e-06, "loss": 0.3573, "step": 6088 }, { "epoch": 1.5314386317907445, "grad_norm": 0.2930935323238373, "learning_rate": 5.687734028741459e-06, "loss": 0.3553, "step": 6089 }, { "epoch": 1.5316901408450705, "grad_norm": 0.32319191098213196, "learning_rate": 5.68628465946214e-06, "loss": 0.3395, "step": 6090 }, { "epoch": 1.5319416498993963, "grad_norm": 0.30757129192352295, "learning_rate": 5.684835231406835e-06, "loss": 0.3526, "step": 6091 }, { "epoch": 1.5321931589537223, "grad_norm": 0.31132009625434875, "learning_rate": 5.683385744699675e-06, "loss": 0.3385, "step": 6092 }, { "epoch": 1.5324446680080483, "grad_norm": 0.31980809569358826, "learning_rate": 5.681936199464803e-06, "loss": 0.3532, "step": 6093 }, { "epoch": 1.5326961770623742, "grad_norm": 0.29808422923088074, "learning_rate": 5.680486595826361e-06, "loss": 0.3306, "step": 6094 }, { "epoch": 1.5329476861167002, "grad_norm": 0.3331838548183441, "learning_rate": 5.6790369339085e-06, "loss": 0.3695, "step": 6095 }, { "epoch": 1.5331991951710262, "grad_norm": 0.3027997612953186, "learning_rate": 5.677587213835372e-06, "loss": 0.3521, "step": 6096 }, { "epoch": 1.533450704225352, "grad_norm": 0.3264061212539673, "learning_rate": 5.676137435731139e-06, "loss": 0.3425, "step": 6097 }, { "epoch": 1.533702213279678, "grad_norm": 0.32032856345176697, "learning_rate": 5.674687599719963e-06, "loss": 0.343, "step": 6098 }, { "epoch": 1.533953722334004, "grad_norm": 0.33043432235717773, "learning_rate": 5.673237705926018e-06, "loss": 0.3522, "step": 6099 }, { "epoch": 1.5342052313883299, "grad_norm": 0.30190953612327576, "learning_rate": 5.6717877544734735e-06, "loss": 0.3511, "step": 6100 }, { "epoch": 1.534456740442656, "grad_norm": 0.2926798462867737, "learning_rate": 5.670337745486511e-06, "loss": 0.3662, "step": 6101 }, { "epoch": 1.534708249496982, "grad_norm": 0.289917528629303, "learning_rate": 5.668887679089314e-06, "loss": 0.3499, "step": 6102 }, { "epoch": 1.5349597585513077, "grad_norm": 0.3212122619152069, "learning_rate": 5.66743755540607e-06, "loss": 0.3655, "step": 6103 }, { "epoch": 1.5352112676056338, "grad_norm": 0.31146296858787537, "learning_rate": 5.665987374560977e-06, "loss": 0.3524, "step": 6104 }, { "epoch": 1.5354627766599598, "grad_norm": 0.3085387647151947, "learning_rate": 5.66453713667823e-06, "loss": 0.3444, "step": 6105 }, { "epoch": 1.5357142857142856, "grad_norm": 0.3076164126396179, "learning_rate": 5.663086841882036e-06, "loss": 0.3589, "step": 6106 }, { "epoch": 1.5359657947686118, "grad_norm": 0.36159980297088623, "learning_rate": 5.661636490296602e-06, "loss": 0.3798, "step": 6107 }, { "epoch": 1.5362173038229376, "grad_norm": 0.33638373017311096, "learning_rate": 5.660186082046142e-06, "loss": 0.3668, "step": 6108 }, { "epoch": 1.5364688128772634, "grad_norm": 0.2957940697669983, "learning_rate": 5.658735617254874e-06, "loss": 0.3312, "step": 6109 }, { "epoch": 1.5367203219315897, "grad_norm": 0.3306633532047272, "learning_rate": 5.6572850960470215e-06, "loss": 0.3716, "step": 6110 }, { "epoch": 1.5369718309859155, "grad_norm": 0.2976609468460083, "learning_rate": 5.655834518546813e-06, "loss": 0.3471, "step": 6111 }, { "epoch": 1.5372233400402413, "grad_norm": 0.34043240547180176, "learning_rate": 5.654383884878481e-06, "loss": 0.332, "step": 6112 }, { "epoch": 1.5374748490945676, "grad_norm": 0.3192569315433502, "learning_rate": 5.6529331951662615e-06, "loss": 0.3513, "step": 6113 }, { "epoch": 1.5377263581488934, "grad_norm": 0.2940548062324524, "learning_rate": 5.6514824495344e-06, "loss": 0.3441, "step": 6114 }, { "epoch": 1.5379778672032192, "grad_norm": 0.3519013524055481, "learning_rate": 5.650031648107142e-06, "loss": 0.3306, "step": 6115 }, { "epoch": 1.5382293762575454, "grad_norm": 0.3195810616016388, "learning_rate": 5.648580791008739e-06, "loss": 0.35, "step": 6116 }, { "epoch": 1.5384808853118712, "grad_norm": 0.324769526720047, "learning_rate": 5.647129878363449e-06, "loss": 0.3416, "step": 6117 }, { "epoch": 1.538732394366197, "grad_norm": 0.310882568359375, "learning_rate": 5.645678910295533e-06, "loss": 0.3592, "step": 6118 }, { "epoch": 1.5389839034205233, "grad_norm": 0.3428153097629547, "learning_rate": 5.64422788692926e-06, "loss": 0.3694, "step": 6119 }, { "epoch": 1.539235412474849, "grad_norm": 0.39914482831954956, "learning_rate": 5.642776808388897e-06, "loss": 0.3532, "step": 6120 }, { "epoch": 1.539486921529175, "grad_norm": 0.3099226653575897, "learning_rate": 5.641325674798722e-06, "loss": 0.3303, "step": 6121 }, { "epoch": 1.5397384305835011, "grad_norm": 0.32525354623794556, "learning_rate": 5.639874486283015e-06, "loss": 0.3348, "step": 6122 }, { "epoch": 1.539989939637827, "grad_norm": 0.3201609253883362, "learning_rate": 5.638423242966061e-06, "loss": 0.3441, "step": 6123 }, { "epoch": 1.540241448692153, "grad_norm": 0.31889769434928894, "learning_rate": 5.63697194497215e-06, "loss": 0.3829, "step": 6124 }, { "epoch": 1.540492957746479, "grad_norm": 0.31827834248542786, "learning_rate": 5.635520592425579e-06, "loss": 0.3438, "step": 6125 }, { "epoch": 1.5407444668008048, "grad_norm": 0.30125829577445984, "learning_rate": 5.634069185450642e-06, "loss": 0.3563, "step": 6126 }, { "epoch": 1.5409959758551308, "grad_norm": 0.2951527237892151, "learning_rate": 5.6326177241716466e-06, "loss": 0.3313, "step": 6127 }, { "epoch": 1.5412474849094568, "grad_norm": 0.32351893186569214, "learning_rate": 5.631166208712902e-06, "loss": 0.354, "step": 6128 }, { "epoch": 1.5414989939637826, "grad_norm": 0.3129163384437561, "learning_rate": 5.629714639198719e-06, "loss": 0.3546, "step": 6129 }, { "epoch": 1.5417505030181087, "grad_norm": 0.29270297288894653, "learning_rate": 5.628263015753418e-06, "loss": 0.3406, "step": 6130 }, { "epoch": 1.5420020120724347, "grad_norm": 0.29806989431381226, "learning_rate": 5.626811338501319e-06, "loss": 0.3735, "step": 6131 }, { "epoch": 1.5422535211267605, "grad_norm": 0.31030187010765076, "learning_rate": 5.625359607566751e-06, "loss": 0.3505, "step": 6132 }, { "epoch": 1.5425050301810865, "grad_norm": 0.30579960346221924, "learning_rate": 5.623907823074044e-06, "loss": 0.3578, "step": 6133 }, { "epoch": 1.5427565392354126, "grad_norm": 0.3121272325515747, "learning_rate": 5.622455985147536e-06, "loss": 0.3878, "step": 6134 }, { "epoch": 1.5430080482897384, "grad_norm": 0.3233649730682373, "learning_rate": 5.621004093911566e-06, "loss": 0.3413, "step": 6135 }, { "epoch": 1.5432595573440644, "grad_norm": 0.32058221101760864, "learning_rate": 5.6195521494904815e-06, "loss": 0.3535, "step": 6136 }, { "epoch": 1.5435110663983904, "grad_norm": 0.3023648262023926, "learning_rate": 5.61810015200863e-06, "loss": 0.3657, "step": 6137 }, { "epoch": 1.5437625754527162, "grad_norm": 0.3155832290649414, "learning_rate": 5.616648101590367e-06, "loss": 0.3481, "step": 6138 }, { "epoch": 1.5440140845070423, "grad_norm": 0.33503812551498413, "learning_rate": 5.615195998360053e-06, "loss": 0.3623, "step": 6139 }, { "epoch": 1.5442655935613683, "grad_norm": 0.31416836380958557, "learning_rate": 5.61374384244205e-06, "loss": 0.3691, "step": 6140 }, { "epoch": 1.544517102615694, "grad_norm": 0.3015648424625397, "learning_rate": 5.612291633960727e-06, "loss": 0.3305, "step": 6141 }, { "epoch": 1.54476861167002, "grad_norm": 0.33664670586586, "learning_rate": 5.610839373040455e-06, "loss": 0.3321, "step": 6142 }, { "epoch": 1.5450201207243461, "grad_norm": 0.31471818685531616, "learning_rate": 5.609387059805614e-06, "loss": 0.3553, "step": 6143 }, { "epoch": 1.545271629778672, "grad_norm": 0.3041958212852478, "learning_rate": 5.607934694380581e-06, "loss": 0.3485, "step": 6144 }, { "epoch": 1.545523138832998, "grad_norm": 0.3049089312553406, "learning_rate": 5.606482276889746e-06, "loss": 0.3366, "step": 6145 }, { "epoch": 1.545774647887324, "grad_norm": 0.2998283803462982, "learning_rate": 5.605029807457499e-06, "loss": 0.3243, "step": 6146 }, { "epoch": 1.5460261569416498, "grad_norm": 0.32313594222068787, "learning_rate": 5.603577286208234e-06, "loss": 0.3821, "step": 6147 }, { "epoch": 1.5462776659959758, "grad_norm": 0.3039160668849945, "learning_rate": 5.602124713266349e-06, "loss": 0.3721, "step": 6148 }, { "epoch": 1.5465291750503019, "grad_norm": 0.31081444025039673, "learning_rate": 5.60067208875625e-06, "loss": 0.3554, "step": 6149 }, { "epoch": 1.5467806841046277, "grad_norm": 0.3036777675151825, "learning_rate": 5.599219412802344e-06, "loss": 0.3459, "step": 6150 }, { "epoch": 1.5470321931589537, "grad_norm": 0.2954856753349304, "learning_rate": 5.597766685529043e-06, "loss": 0.3439, "step": 6151 }, { "epoch": 1.5472837022132797, "grad_norm": 0.3063904941082001, "learning_rate": 5.596313907060766e-06, "loss": 0.3452, "step": 6152 }, { "epoch": 1.5475352112676055, "grad_norm": 0.33368048071861267, "learning_rate": 5.594861077521935e-06, "loss": 0.3523, "step": 6153 }, { "epoch": 1.5477867203219315, "grad_norm": 0.31460052728652954, "learning_rate": 5.593408197036973e-06, "loss": 0.3389, "step": 6154 }, { "epoch": 1.5480382293762576, "grad_norm": 0.32115140557289124, "learning_rate": 5.59195526573031e-06, "loss": 0.3714, "step": 6155 }, { "epoch": 1.5482897384305834, "grad_norm": 0.3152860105037689, "learning_rate": 5.590502283726383e-06, "loss": 0.3539, "step": 6156 }, { "epoch": 1.5485412474849096, "grad_norm": 0.33016350865364075, "learning_rate": 5.5890492511496294e-06, "loss": 0.366, "step": 6157 }, { "epoch": 1.5487927565392354, "grad_norm": 0.3130359351634979, "learning_rate": 5.587596168124493e-06, "loss": 0.3789, "step": 6158 }, { "epoch": 1.5490442655935612, "grad_norm": 0.3576628565788269, "learning_rate": 5.5861430347754195e-06, "loss": 0.3626, "step": 6159 }, { "epoch": 1.5492957746478875, "grad_norm": 0.28835195302963257, "learning_rate": 5.584689851226863e-06, "loss": 0.3559, "step": 6160 }, { "epoch": 1.5495472837022133, "grad_norm": 0.32597804069519043, "learning_rate": 5.583236617603278e-06, "loss": 0.3582, "step": 6161 }, { "epoch": 1.549798792756539, "grad_norm": 0.29985523223876953, "learning_rate": 5.5817833340291265e-06, "loss": 0.3467, "step": 6162 }, { "epoch": 1.5500503018108653, "grad_norm": 0.3118489384651184, "learning_rate": 5.5803300006288704e-06, "loss": 0.3401, "step": 6163 }, { "epoch": 1.5503018108651911, "grad_norm": 0.302613228559494, "learning_rate": 5.578876617526982e-06, "loss": 0.3266, "step": 6164 }, { "epoch": 1.550553319919517, "grad_norm": 0.3214384615421295, "learning_rate": 5.577423184847932e-06, "loss": 0.3369, "step": 6165 }, { "epoch": 1.5508048289738432, "grad_norm": 0.2934117019176483, "learning_rate": 5.575969702716199e-06, "loss": 0.3559, "step": 6166 }, { "epoch": 1.551056338028169, "grad_norm": 0.3137935698032379, "learning_rate": 5.574516171256263e-06, "loss": 0.3543, "step": 6167 }, { "epoch": 1.5513078470824948, "grad_norm": 0.29603636264801025, "learning_rate": 5.5730625905926114e-06, "loss": 0.3514, "step": 6168 }, { "epoch": 1.551559356136821, "grad_norm": 0.3102024793624878, "learning_rate": 5.571608960849735e-06, "loss": 0.3519, "step": 6169 }, { "epoch": 1.5518108651911469, "grad_norm": 0.2983706593513489, "learning_rate": 5.570155282152125e-06, "loss": 0.3428, "step": 6170 }, { "epoch": 1.5520623742454729, "grad_norm": 0.31096917390823364, "learning_rate": 5.568701554624284e-06, "loss": 0.3497, "step": 6171 }, { "epoch": 1.552313883299799, "grad_norm": 0.28655806183815, "learning_rate": 5.567247778390712e-06, "loss": 0.3459, "step": 6172 }, { "epoch": 1.5525653923541247, "grad_norm": 0.30332696437835693, "learning_rate": 5.565793953575916e-06, "loss": 0.3527, "step": 6173 }, { "epoch": 1.5528169014084507, "grad_norm": 0.3258723020553589, "learning_rate": 5.5643400803044075e-06, "loss": 0.3454, "step": 6174 }, { "epoch": 1.5530684104627768, "grad_norm": 0.34094491600990295, "learning_rate": 5.5628861587007035e-06, "loss": 0.3808, "step": 6175 }, { "epoch": 1.5533199195171026, "grad_norm": 0.2914903461933136, "learning_rate": 5.5614321888893195e-06, "loss": 0.346, "step": 6176 }, { "epoch": 1.5535714285714286, "grad_norm": 0.338826060295105, "learning_rate": 5.559978170994781e-06, "loss": 0.3567, "step": 6177 }, { "epoch": 1.5538229376257546, "grad_norm": 0.3329686224460602, "learning_rate": 5.558524105141616e-06, "loss": 0.3439, "step": 6178 }, { "epoch": 1.5540744466800804, "grad_norm": 0.3257056474685669, "learning_rate": 5.557069991454356e-06, "loss": 0.3378, "step": 6179 }, { "epoch": 1.5543259557344065, "grad_norm": 0.3230164349079132, "learning_rate": 5.5556158300575345e-06, "loss": 0.3419, "step": 6180 }, { "epoch": 1.5545774647887325, "grad_norm": 0.3321927487850189, "learning_rate": 5.554161621075693e-06, "loss": 0.3163, "step": 6181 }, { "epoch": 1.5548289738430583, "grad_norm": 0.36797481775283813, "learning_rate": 5.552707364633376e-06, "loss": 0.3797, "step": 6182 }, { "epoch": 1.5550804828973843, "grad_norm": 0.3352421820163727, "learning_rate": 5.5512530608551315e-06, "loss": 0.3408, "step": 6183 }, { "epoch": 1.5553319919517103, "grad_norm": 0.2971833050251007, "learning_rate": 5.549798709865512e-06, "loss": 0.3406, "step": 6184 }, { "epoch": 1.5555835010060362, "grad_norm": 0.30301129817962646, "learning_rate": 5.5483443117890715e-06, "loss": 0.3458, "step": 6185 }, { "epoch": 1.5558350100603622, "grad_norm": 0.34085342288017273, "learning_rate": 5.546889866750371e-06, "loss": 0.3523, "step": 6186 }, { "epoch": 1.5560865191146882, "grad_norm": 0.3280344307422638, "learning_rate": 5.5454353748739755e-06, "loss": 0.3563, "step": 6187 }, { "epoch": 1.556338028169014, "grad_norm": 0.32044655084609985, "learning_rate": 5.543980836284451e-06, "loss": 0.3555, "step": 6188 }, { "epoch": 1.55658953722334, "grad_norm": 0.30782240629196167, "learning_rate": 5.542526251106372e-06, "loss": 0.3402, "step": 6189 }, { "epoch": 1.556841046277666, "grad_norm": 0.3036595284938812, "learning_rate": 5.541071619464314e-06, "loss": 0.3148, "step": 6190 }, { "epoch": 1.5570925553319919, "grad_norm": 0.30741608142852783, "learning_rate": 5.539616941482855e-06, "loss": 0.3522, "step": 6191 }, { "epoch": 1.557344064386318, "grad_norm": 0.32885679602622986, "learning_rate": 5.538162217286581e-06, "loss": 0.3384, "step": 6192 }, { "epoch": 1.557595573440644, "grad_norm": 0.29733267426490784, "learning_rate": 5.53670744700008e-06, "loss": 0.3465, "step": 6193 }, { "epoch": 1.5578470824949697, "grad_norm": 0.33164188265800476, "learning_rate": 5.535252630747945e-06, "loss": 0.3429, "step": 6194 }, { "epoch": 1.5580985915492958, "grad_norm": 0.3217810392379761, "learning_rate": 5.533797768654771e-06, "loss": 0.3594, "step": 6195 }, { "epoch": 1.5583501006036218, "grad_norm": 0.29132455587387085, "learning_rate": 5.532342860845157e-06, "loss": 0.3428, "step": 6196 }, { "epoch": 1.5586016096579476, "grad_norm": 0.30485719442367554, "learning_rate": 5.5308879074437065e-06, "loss": 0.3193, "step": 6197 }, { "epoch": 1.5588531187122736, "grad_norm": 0.310776025056839, "learning_rate": 5.529432908575029e-06, "loss": 0.3504, "step": 6198 }, { "epoch": 1.5591046277665996, "grad_norm": 0.32021844387054443, "learning_rate": 5.527977864363734e-06, "loss": 0.3495, "step": 6199 }, { "epoch": 1.5593561368209254, "grad_norm": 0.2971351146697998, "learning_rate": 5.5265227749344385e-06, "loss": 0.3765, "step": 6200 }, { "epoch": 1.5596076458752515, "grad_norm": 0.30885204672813416, "learning_rate": 5.525067640411761e-06, "loss": 0.3426, "step": 6201 }, { "epoch": 1.5598591549295775, "grad_norm": 0.31158149242401123, "learning_rate": 5.523612460920326e-06, "loss": 0.3468, "step": 6202 }, { "epoch": 1.5601106639839033, "grad_norm": 0.3428870439529419, "learning_rate": 5.5221572365847565e-06, "loss": 0.3758, "step": 6203 }, { "epoch": 1.5603621730382293, "grad_norm": 0.35247480869293213, "learning_rate": 5.520701967529689e-06, "loss": 0.3744, "step": 6204 }, { "epoch": 1.5606136820925554, "grad_norm": 0.3332814872264862, "learning_rate": 5.519246653879754e-06, "loss": 0.3641, "step": 6205 }, { "epoch": 1.5608651911468812, "grad_norm": 0.3046468198299408, "learning_rate": 5.517791295759592e-06, "loss": 0.3338, "step": 6206 }, { "epoch": 1.5611167002012074, "grad_norm": 0.32283511757850647, "learning_rate": 5.516335893293846e-06, "loss": 0.3549, "step": 6207 }, { "epoch": 1.5613682092555332, "grad_norm": 0.2913826107978821, "learning_rate": 5.514880446607161e-06, "loss": 0.319, "step": 6208 }, { "epoch": 1.561619718309859, "grad_norm": 0.33693376183509827, "learning_rate": 5.513424955824185e-06, "loss": 0.3555, "step": 6209 }, { "epoch": 1.5618712273641853, "grad_norm": 0.348818838596344, "learning_rate": 5.511969421069574e-06, "loss": 0.3493, "step": 6210 }, { "epoch": 1.562122736418511, "grad_norm": 0.3071136474609375, "learning_rate": 5.510513842467986e-06, "loss": 0.3714, "step": 6211 }, { "epoch": 1.5623742454728369, "grad_norm": 0.34389618039131165, "learning_rate": 5.50905822014408e-06, "loss": 0.3539, "step": 6212 }, { "epoch": 1.5626257545271631, "grad_norm": 0.3115088641643524, "learning_rate": 5.507602554222523e-06, "loss": 0.3671, "step": 6213 }, { "epoch": 1.562877263581489, "grad_norm": 0.3112759292125702, "learning_rate": 5.506146844827981e-06, "loss": 0.3616, "step": 6214 }, { "epoch": 1.5631287726358147, "grad_norm": 0.32954081892967224, "learning_rate": 5.50469109208513e-06, "loss": 0.333, "step": 6215 }, { "epoch": 1.563380281690141, "grad_norm": 0.3064148426055908, "learning_rate": 5.503235296118643e-06, "loss": 0.3339, "step": 6216 }, { "epoch": 1.5636317907444668, "grad_norm": 0.29228824377059937, "learning_rate": 5.501779457053202e-06, "loss": 0.3435, "step": 6217 }, { "epoch": 1.5638832997987926, "grad_norm": 0.32584089040756226, "learning_rate": 5.50032357501349e-06, "loss": 0.3494, "step": 6218 }, { "epoch": 1.5641348088531188, "grad_norm": 0.29713818430900574, "learning_rate": 5.498867650124193e-06, "loss": 0.3463, "step": 6219 }, { "epoch": 1.5643863179074446, "grad_norm": 0.31487134099006653, "learning_rate": 5.4974116825100035e-06, "loss": 0.3632, "step": 6220 }, { "epoch": 1.5646378269617707, "grad_norm": 0.30101948976516724, "learning_rate": 5.495955672295615e-06, "loss": 0.3582, "step": 6221 }, { "epoch": 1.5648893360160967, "grad_norm": 0.30046340823173523, "learning_rate": 5.494499619605725e-06, "loss": 0.3532, "step": 6222 }, { "epoch": 1.5651408450704225, "grad_norm": 0.3313204050064087, "learning_rate": 5.493043524565037e-06, "loss": 0.3435, "step": 6223 }, { "epoch": 1.5653923541247485, "grad_norm": 0.30352333188056946, "learning_rate": 5.491587387298256e-06, "loss": 0.3368, "step": 6224 }, { "epoch": 1.5656438631790746, "grad_norm": 0.30736225843429565, "learning_rate": 5.490131207930089e-06, "loss": 0.3715, "step": 6225 }, { "epoch": 1.5658953722334004, "grad_norm": 0.3192325234413147, "learning_rate": 5.488674986585252e-06, "loss": 0.3587, "step": 6226 }, { "epoch": 1.5661468812877264, "grad_norm": 0.32336869835853577, "learning_rate": 5.487218723388459e-06, "loss": 0.3587, "step": 6227 }, { "epoch": 1.5663983903420524, "grad_norm": 0.33127787709236145, "learning_rate": 5.48576241846443e-06, "loss": 0.3737, "step": 6228 }, { "epoch": 1.5666498993963782, "grad_norm": 0.30386435985565186, "learning_rate": 5.484306071937889e-06, "loss": 0.3378, "step": 6229 }, { "epoch": 1.5669014084507042, "grad_norm": 0.30401545763015747, "learning_rate": 5.4828496839335635e-06, "loss": 0.3511, "step": 6230 }, { "epoch": 1.5671529175050303, "grad_norm": 0.327298641204834, "learning_rate": 5.4813932545761815e-06, "loss": 0.3497, "step": 6231 }, { "epoch": 1.567404426559356, "grad_norm": 0.3015185296535492, "learning_rate": 5.4799367839904805e-06, "loss": 0.3462, "step": 6232 }, { "epoch": 1.567655935613682, "grad_norm": 0.3208427429199219, "learning_rate": 5.478480272301195e-06, "loss": 0.3293, "step": 6233 }, { "epoch": 1.5679074446680081, "grad_norm": 0.34195858240127563, "learning_rate": 5.477023719633069e-06, "loss": 0.3793, "step": 6234 }, { "epoch": 1.568158953722334, "grad_norm": 0.31113073229789734, "learning_rate": 5.4755671261108445e-06, "loss": 0.3565, "step": 6235 }, { "epoch": 1.56841046277666, "grad_norm": 0.3247259557247162, "learning_rate": 5.474110491859272e-06, "loss": 0.3352, "step": 6236 }, { "epoch": 1.568661971830986, "grad_norm": 0.3409235179424286, "learning_rate": 5.4726538170031e-06, "loss": 0.3498, "step": 6237 }, { "epoch": 1.5689134808853118, "grad_norm": 0.32643982768058777, "learning_rate": 5.471197101667087e-06, "loss": 0.3437, "step": 6238 }, { "epoch": 1.5691649899396378, "grad_norm": 0.3240383267402649, "learning_rate": 5.469740345975989e-06, "loss": 0.3527, "step": 6239 }, { "epoch": 1.5694164989939638, "grad_norm": 0.34534913301467896, "learning_rate": 5.468283550054571e-06, "loss": 0.3658, "step": 6240 }, { "epoch": 1.5696680080482897, "grad_norm": 0.2972480356693268, "learning_rate": 5.466826714027595e-06, "loss": 0.3357, "step": 6241 }, { "epoch": 1.5699195171026157, "grad_norm": 0.3179325461387634, "learning_rate": 5.465369838019832e-06, "loss": 0.3268, "step": 6242 }, { "epoch": 1.5701710261569417, "grad_norm": 0.3161191940307617, "learning_rate": 5.463912922156053e-06, "loss": 0.3631, "step": 6243 }, { "epoch": 1.5704225352112675, "grad_norm": 0.32680991291999817, "learning_rate": 5.462455966561034e-06, "loss": 0.3689, "step": 6244 }, { "epoch": 1.5706740442655935, "grad_norm": 0.31402167677879333, "learning_rate": 5.460998971359556e-06, "loss": 0.3468, "step": 6245 }, { "epoch": 1.5709255533199196, "grad_norm": 0.3343561589717865, "learning_rate": 5.459541936676398e-06, "loss": 0.3655, "step": 6246 }, { "epoch": 1.5711770623742454, "grad_norm": 0.30885884165763855, "learning_rate": 5.45808486263635e-06, "loss": 0.3284, "step": 6247 }, { "epoch": 1.5714285714285714, "grad_norm": 0.3167196214199066, "learning_rate": 5.456627749364198e-06, "loss": 0.3454, "step": 6248 }, { "epoch": 1.5716800804828974, "grad_norm": 0.33008265495300293, "learning_rate": 5.455170596984738e-06, "loss": 0.3528, "step": 6249 }, { "epoch": 1.5719315895372232, "grad_norm": 0.30799832940101624, "learning_rate": 5.4537134056227626e-06, "loss": 0.3536, "step": 6250 }, { "epoch": 1.5721830985915493, "grad_norm": 0.29477211833000183, "learning_rate": 5.452256175403072e-06, "loss": 0.3448, "step": 6251 }, { "epoch": 1.5724346076458753, "grad_norm": 0.3047988712787628, "learning_rate": 5.4507989064504695e-06, "loss": 0.3506, "step": 6252 }, { "epoch": 1.572686116700201, "grad_norm": 0.32256054878234863, "learning_rate": 5.4493415988897615e-06, "loss": 0.3653, "step": 6253 }, { "epoch": 1.5729376257545271, "grad_norm": 0.3178042471408844, "learning_rate": 5.4478842528457565e-06, "loss": 0.3709, "step": 6254 }, { "epoch": 1.5731891348088531, "grad_norm": 0.329878568649292, "learning_rate": 5.4464268684432664e-06, "loss": 0.3329, "step": 6255 }, { "epoch": 1.573440643863179, "grad_norm": 0.30290189385414124, "learning_rate": 5.444969445807109e-06, "loss": 0.3458, "step": 6256 }, { "epoch": 1.5736921529175052, "grad_norm": 0.3331491947174072, "learning_rate": 5.4435119850621e-06, "loss": 0.3516, "step": 6257 }, { "epoch": 1.573943661971831, "grad_norm": 0.2870064675807953, "learning_rate": 5.442054486333066e-06, "loss": 0.3636, "step": 6258 }, { "epoch": 1.5741951710261568, "grad_norm": 0.3208286762237549, "learning_rate": 5.440596949744831e-06, "loss": 0.3486, "step": 6259 }, { "epoch": 1.574446680080483, "grad_norm": 0.30544915795326233, "learning_rate": 5.439139375422223e-06, "loss": 0.3647, "step": 6260 }, { "epoch": 1.5746981891348089, "grad_norm": 0.3159911036491394, "learning_rate": 5.437681763490075e-06, "loss": 0.3524, "step": 6261 }, { "epoch": 1.5749496981891347, "grad_norm": 0.315897136926651, "learning_rate": 5.4362241140732215e-06, "loss": 0.3794, "step": 6262 }, { "epoch": 1.575201207243461, "grad_norm": 0.3100701570510864, "learning_rate": 5.434766427296502e-06, "loss": 0.3537, "step": 6263 }, { "epoch": 1.5754527162977867, "grad_norm": 0.327070951461792, "learning_rate": 5.433308703284759e-06, "loss": 0.3974, "step": 6264 }, { "epoch": 1.5757042253521125, "grad_norm": 0.31196388602256775, "learning_rate": 5.431850942162834e-06, "loss": 0.3373, "step": 6265 }, { "epoch": 1.5759557344064388, "grad_norm": 0.28920623660087585, "learning_rate": 5.430393144055579e-06, "loss": 0.3308, "step": 6266 }, { "epoch": 1.5762072434607646, "grad_norm": 0.30474618077278137, "learning_rate": 5.428935309087844e-06, "loss": 0.3323, "step": 6267 }, { "epoch": 1.5764587525150904, "grad_norm": 0.3304254412651062, "learning_rate": 5.427477437384482e-06, "loss": 0.3647, "step": 6268 }, { "epoch": 1.5767102615694166, "grad_norm": 0.28863289952278137, "learning_rate": 5.426019529070352e-06, "loss": 0.362, "step": 6269 }, { "epoch": 1.5769617706237424, "grad_norm": 0.31353408098220825, "learning_rate": 5.4245615842703146e-06, "loss": 0.3637, "step": 6270 }, { "epoch": 1.5772132796780685, "grad_norm": 0.3224716782569885, "learning_rate": 5.4231036031092345e-06, "loss": 0.3273, "step": 6271 }, { "epoch": 1.5774647887323945, "grad_norm": 0.31004685163497925, "learning_rate": 5.4216455857119765e-06, "loss": 0.354, "step": 6272 }, { "epoch": 1.5777162977867203, "grad_norm": 0.32508277893066406, "learning_rate": 5.420187532203413e-06, "loss": 0.3552, "step": 6273 }, { "epoch": 1.5779678068410463, "grad_norm": 0.33112356066703796, "learning_rate": 5.418729442708416e-06, "loss": 0.3846, "step": 6274 }, { "epoch": 1.5782193158953723, "grad_norm": 0.32484641671180725, "learning_rate": 5.417271317351861e-06, "loss": 0.3589, "step": 6275 }, { "epoch": 1.5784708249496981, "grad_norm": 0.3176564574241638, "learning_rate": 5.415813156258628e-06, "loss": 0.3525, "step": 6276 }, { "epoch": 1.5787223340040242, "grad_norm": 0.31098705530166626, "learning_rate": 5.4143549595536e-06, "loss": 0.3355, "step": 6277 }, { "epoch": 1.5789738430583502, "grad_norm": 0.34657394886016846, "learning_rate": 5.412896727361663e-06, "loss": 0.3566, "step": 6278 }, { "epoch": 1.579225352112676, "grad_norm": 0.31053581833839417, "learning_rate": 5.411438459807703e-06, "loss": 0.3741, "step": 6279 }, { "epoch": 1.579476861167002, "grad_norm": 0.28460362553596497, "learning_rate": 5.4099801570166135e-06, "loss": 0.3299, "step": 6280 }, { "epoch": 1.579728370221328, "grad_norm": 0.3039803206920624, "learning_rate": 5.408521819113287e-06, "loss": 0.3297, "step": 6281 }, { "epoch": 1.5799798792756539, "grad_norm": 0.31220072507858276, "learning_rate": 5.407063446222623e-06, "loss": 0.3368, "step": 6282 }, { "epoch": 1.58023138832998, "grad_norm": 0.31574392318725586, "learning_rate": 5.4056050384695225e-06, "loss": 0.3582, "step": 6283 }, { "epoch": 1.580482897384306, "grad_norm": 0.34987935423851013, "learning_rate": 5.404146595978887e-06, "loss": 0.3484, "step": 6284 }, { "epoch": 1.5807344064386317, "grad_norm": 0.3038938641548157, "learning_rate": 5.402688118875624e-06, "loss": 0.3401, "step": 6285 }, { "epoch": 1.5809859154929577, "grad_norm": 0.29709485173225403, "learning_rate": 5.401229607284644e-06, "loss": 0.3588, "step": 6286 }, { "epoch": 1.5812374245472838, "grad_norm": 0.3312198221683502, "learning_rate": 5.3997710613308565e-06, "loss": 0.3632, "step": 6287 }, { "epoch": 1.5814889336016096, "grad_norm": 0.30179715156555176, "learning_rate": 5.398312481139179e-06, "loss": 0.3404, "step": 6288 }, { "epoch": 1.5817404426559356, "grad_norm": 0.3145090639591217, "learning_rate": 5.396853866834529e-06, "loss": 0.3781, "step": 6289 }, { "epoch": 1.5819919517102616, "grad_norm": 0.33599305152893066, "learning_rate": 5.395395218541829e-06, "loss": 0.3496, "step": 6290 }, { "epoch": 1.5822434607645874, "grad_norm": 0.30032995343208313, "learning_rate": 5.393936536386001e-06, "loss": 0.3498, "step": 6291 }, { "epoch": 1.5824949698189135, "grad_norm": 0.30552947521209717, "learning_rate": 5.392477820491974e-06, "loss": 0.3472, "step": 6292 }, { "epoch": 1.5827464788732395, "grad_norm": 0.29774901270866394, "learning_rate": 5.391019070984676e-06, "loss": 0.3306, "step": 6293 }, { "epoch": 1.5829979879275653, "grad_norm": 0.3048769235610962, "learning_rate": 5.389560287989043e-06, "loss": 0.3284, "step": 6294 }, { "epoch": 1.5832494969818913, "grad_norm": 0.33023905754089355, "learning_rate": 5.388101471630006e-06, "loss": 0.3384, "step": 6295 }, { "epoch": 1.5835010060362174, "grad_norm": 0.30348482728004456, "learning_rate": 5.3866426220325075e-06, "loss": 0.3713, "step": 6296 }, { "epoch": 1.5837525150905432, "grad_norm": 0.3093292713165283, "learning_rate": 5.385183739321486e-06, "loss": 0.3527, "step": 6297 }, { "epoch": 1.5840040241448692, "grad_norm": 0.30583062767982483, "learning_rate": 5.383724823621889e-06, "loss": 0.3345, "step": 6298 }, { "epoch": 1.5842555331991952, "grad_norm": 0.33037832379341125, "learning_rate": 5.38226587505866e-06, "loss": 0.3787, "step": 6299 }, { "epoch": 1.584507042253521, "grad_norm": 0.312497615814209, "learning_rate": 5.380806893756748e-06, "loss": 0.3446, "step": 6300 }, { "epoch": 1.584758551307847, "grad_norm": 0.3048454225063324, "learning_rate": 5.3793478798411105e-06, "loss": 0.3678, "step": 6301 }, { "epoch": 1.585010060362173, "grad_norm": 0.290546178817749, "learning_rate": 5.3778888334367e-06, "loss": 0.3401, "step": 6302 }, { "epoch": 1.5852615694164989, "grad_norm": 0.3122500479221344, "learning_rate": 5.376429754668475e-06, "loss": 0.3374, "step": 6303 }, { "epoch": 1.585513078470825, "grad_norm": 0.3183901011943817, "learning_rate": 5.374970643661397e-06, "loss": 0.3668, "step": 6304 }, { "epoch": 1.585764587525151, "grad_norm": 0.3228647708892822, "learning_rate": 5.373511500540428e-06, "loss": 0.3438, "step": 6305 }, { "epoch": 1.5860160965794767, "grad_norm": 0.3075144290924072, "learning_rate": 5.372052325430537e-06, "loss": 0.3626, "step": 6306 }, { "epoch": 1.586267605633803, "grad_norm": 0.3228583335876465, "learning_rate": 5.37059311845669e-06, "loss": 0.3396, "step": 6307 }, { "epoch": 1.5865191146881288, "grad_norm": 0.33655041456222534, "learning_rate": 5.3691338797438615e-06, "loss": 0.3342, "step": 6308 }, { "epoch": 1.5867706237424546, "grad_norm": 0.3635300397872925, "learning_rate": 5.3676746094170265e-06, "loss": 0.355, "step": 6309 }, { "epoch": 1.5870221327967808, "grad_norm": 0.32811570167541504, "learning_rate": 5.3662153076011614e-06, "loss": 0.3313, "step": 6310 }, { "epoch": 1.5872736418511066, "grad_norm": 0.3333826959133148, "learning_rate": 5.364755974421244e-06, "loss": 0.342, "step": 6311 }, { "epoch": 1.5875251509054324, "grad_norm": 0.34751078486442566, "learning_rate": 5.363296610002261e-06, "loss": 0.348, "step": 6312 }, { "epoch": 1.5877766599597587, "grad_norm": 0.3453575372695923, "learning_rate": 5.361837214469197e-06, "loss": 0.349, "step": 6313 }, { "epoch": 1.5880281690140845, "grad_norm": 0.3269634544849396, "learning_rate": 5.360377787947037e-06, "loss": 0.3284, "step": 6314 }, { "epoch": 1.5882796780684103, "grad_norm": 0.3597438335418701, "learning_rate": 5.358918330560776e-06, "loss": 0.3663, "step": 6315 }, { "epoch": 1.5885311871227366, "grad_norm": 0.3321945369243622, "learning_rate": 5.357458842435405e-06, "loss": 0.3514, "step": 6316 }, { "epoch": 1.5887826961770624, "grad_norm": 0.33890068531036377, "learning_rate": 5.35599932369592e-06, "loss": 0.3409, "step": 6317 }, { "epoch": 1.5890342052313882, "grad_norm": 0.33739712834358215, "learning_rate": 5.35453977446732e-06, "loss": 0.3655, "step": 6318 }, { "epoch": 1.5892857142857144, "grad_norm": 0.3216194808483124, "learning_rate": 5.353080194874606e-06, "loss": 0.325, "step": 6319 }, { "epoch": 1.5895372233400402, "grad_norm": 0.2983427941799164, "learning_rate": 5.351620585042783e-06, "loss": 0.354, "step": 6320 }, { "epoch": 1.5897887323943662, "grad_norm": 0.3386406898498535, "learning_rate": 5.350160945096856e-06, "loss": 0.3736, "step": 6321 }, { "epoch": 1.5900402414486923, "grad_norm": 0.3367750346660614, "learning_rate": 5.348701275161834e-06, "loss": 0.3222, "step": 6322 }, { "epoch": 1.590291750503018, "grad_norm": 0.3235786259174347, "learning_rate": 5.347241575362729e-06, "loss": 0.3701, "step": 6323 }, { "epoch": 1.590543259557344, "grad_norm": 0.29877564311027527, "learning_rate": 5.345781845824557e-06, "loss": 0.3367, "step": 6324 }, { "epoch": 1.5907947686116701, "grad_norm": 0.30587801337242126, "learning_rate": 5.344322086672332e-06, "loss": 0.3363, "step": 6325 }, { "epoch": 1.591046277665996, "grad_norm": 0.31401360034942627, "learning_rate": 5.3428622980310755e-06, "loss": 0.3309, "step": 6326 }, { "epoch": 1.591297786720322, "grad_norm": 0.2830161452293396, "learning_rate": 5.341402480025808e-06, "loss": 0.3416, "step": 6327 }, { "epoch": 1.591549295774648, "grad_norm": 0.3165720999240875, "learning_rate": 5.339942632781553e-06, "loss": 0.3532, "step": 6328 }, { "epoch": 1.5918008048289738, "grad_norm": 0.3214383125305176, "learning_rate": 5.338482756423339e-06, "loss": 0.3412, "step": 6329 }, { "epoch": 1.5920523138832998, "grad_norm": 0.2742040455341339, "learning_rate": 5.337022851076193e-06, "loss": 0.3516, "step": 6330 }, { "epoch": 1.5923038229376258, "grad_norm": 0.2959214150905609, "learning_rate": 5.33556291686515e-06, "loss": 0.3405, "step": 6331 }, { "epoch": 1.5925553319919517, "grad_norm": 0.28972509503364563, "learning_rate": 5.334102953915242e-06, "loss": 0.3324, "step": 6332 }, { "epoch": 1.5928068410462777, "grad_norm": 0.34085187315940857, "learning_rate": 5.332642962351505e-06, "loss": 0.3419, "step": 6333 }, { "epoch": 1.5930583501006037, "grad_norm": 0.32344910502433777, "learning_rate": 5.331182942298981e-06, "loss": 0.3387, "step": 6334 }, { "epoch": 1.5933098591549295, "grad_norm": 0.3067835867404938, "learning_rate": 5.329722893882708e-06, "loss": 0.3325, "step": 6335 }, { "epoch": 1.5935613682092555, "grad_norm": 0.31385210156440735, "learning_rate": 5.328262817227733e-06, "loss": 0.3588, "step": 6336 }, { "epoch": 1.5938128772635816, "grad_norm": 0.30561771988868713, "learning_rate": 5.326802712459101e-06, "loss": 0.3438, "step": 6337 }, { "epoch": 1.5940643863179074, "grad_norm": 0.30326759815216064, "learning_rate": 5.325342579701862e-06, "loss": 0.3669, "step": 6338 }, { "epoch": 1.5943158953722334, "grad_norm": 0.31566211581230164, "learning_rate": 5.323882419081066e-06, "loss": 0.3275, "step": 6339 }, { "epoch": 1.5945674044265594, "grad_norm": 0.32427820563316345, "learning_rate": 5.3224222307217665e-06, "loss": 0.3351, "step": 6340 }, { "epoch": 1.5948189134808852, "grad_norm": 0.33437636494636536, "learning_rate": 5.32096201474902e-06, "loss": 0.3573, "step": 6341 }, { "epoch": 1.5950704225352113, "grad_norm": 0.3199196457862854, "learning_rate": 5.319501771287885e-06, "loss": 0.3591, "step": 6342 }, { "epoch": 1.5953219315895373, "grad_norm": 0.332581102848053, "learning_rate": 5.318041500463423e-06, "loss": 0.3374, "step": 6343 }, { "epoch": 1.595573440643863, "grad_norm": 0.3242298662662506, "learning_rate": 5.316581202400694e-06, "loss": 0.3266, "step": 6344 }, { "epoch": 1.595824949698189, "grad_norm": 0.3219207525253296, "learning_rate": 5.315120877224767e-06, "loss": 0.3477, "step": 6345 }, { "epoch": 1.5960764587525151, "grad_norm": 0.3285122215747833, "learning_rate": 5.313660525060709e-06, "loss": 0.3691, "step": 6346 }, { "epoch": 1.596327967806841, "grad_norm": 0.32978108525276184, "learning_rate": 5.312200146033588e-06, "loss": 0.3434, "step": 6347 }, { "epoch": 1.596579476861167, "grad_norm": 0.3488015830516815, "learning_rate": 5.310739740268478e-06, "loss": 0.3484, "step": 6348 }, { "epoch": 1.596830985915493, "grad_norm": 0.31981801986694336, "learning_rate": 5.309279307890453e-06, "loss": 0.3259, "step": 6349 }, { "epoch": 1.5970824949698188, "grad_norm": 0.31969207525253296, "learning_rate": 5.3078188490245905e-06, "loss": 0.3416, "step": 6350 }, { "epoch": 1.5973340040241448, "grad_norm": 0.3058636784553528, "learning_rate": 5.30635836379597e-06, "loss": 0.3111, "step": 6351 }, { "epoch": 1.5975855130784709, "grad_norm": 0.3224903345108032, "learning_rate": 5.304897852329671e-06, "loss": 0.3454, "step": 6352 }, { "epoch": 1.5978370221327967, "grad_norm": 0.33611172437667847, "learning_rate": 5.303437314750779e-06, "loss": 0.3447, "step": 6353 }, { "epoch": 1.5980885311871227, "grad_norm": 0.3095466196537018, "learning_rate": 5.301976751184379e-06, "loss": 0.3523, "step": 6354 }, { "epoch": 1.5983400402414487, "grad_norm": 0.3098112642765045, "learning_rate": 5.300516161755559e-06, "loss": 0.3419, "step": 6355 }, { "epoch": 1.5985915492957745, "grad_norm": 0.3331829309463501, "learning_rate": 5.29905554658941e-06, "loss": 0.3652, "step": 6356 }, { "epoch": 1.5988430583501008, "grad_norm": 0.30637791752815247, "learning_rate": 5.297594905811024e-06, "loss": 0.3591, "step": 6357 }, { "epoch": 1.5990945674044266, "grad_norm": 0.3300563395023346, "learning_rate": 5.296134239545497e-06, "loss": 0.3505, "step": 6358 }, { "epoch": 1.5993460764587524, "grad_norm": 0.34214162826538086, "learning_rate": 5.294673547917925e-06, "loss": 0.3663, "step": 6359 }, { "epoch": 1.5995975855130786, "grad_norm": 0.3289168179035187, "learning_rate": 5.293212831053407e-06, "loss": 0.3356, "step": 6360 }, { "epoch": 1.5998490945674044, "grad_norm": 0.3313751816749573, "learning_rate": 5.291752089077044e-06, "loss": 0.3586, "step": 6361 }, { "epoch": 1.6001006036217302, "grad_norm": 0.32884520292282104, "learning_rate": 5.29029132211394e-06, "loss": 0.3481, "step": 6362 }, { "epoch": 1.6003521126760565, "grad_norm": 0.3014932870864868, "learning_rate": 5.2888305302891996e-06, "loss": 0.3677, "step": 6363 }, { "epoch": 1.6006036217303823, "grad_norm": 0.3143293857574463, "learning_rate": 5.287369713727933e-06, "loss": 0.3352, "step": 6364 }, { "epoch": 1.600855130784708, "grad_norm": 0.30233773589134216, "learning_rate": 5.285908872555247e-06, "loss": 0.3466, "step": 6365 }, { "epoch": 1.6011066398390343, "grad_norm": 0.3170585036277771, "learning_rate": 5.284448006896252e-06, "loss": 0.3253, "step": 6366 }, { "epoch": 1.6013581488933601, "grad_norm": 0.3214609920978546, "learning_rate": 5.282987116876068e-06, "loss": 0.3341, "step": 6367 }, { "epoch": 1.6016096579476862, "grad_norm": 0.3339797854423523, "learning_rate": 5.281526202619808e-06, "loss": 0.3594, "step": 6368 }, { "epoch": 1.6018611670020122, "grad_norm": 0.30358412861824036, "learning_rate": 5.2800652642525885e-06, "loss": 0.3265, "step": 6369 }, { "epoch": 1.602112676056338, "grad_norm": 0.3258301615715027, "learning_rate": 5.278604301899531e-06, "loss": 0.335, "step": 6370 }, { "epoch": 1.602364185110664, "grad_norm": 0.32706478238105774, "learning_rate": 5.27714331568576e-06, "loss": 0.3561, "step": 6371 }, { "epoch": 1.60261569416499, "grad_norm": 0.3139190077781677, "learning_rate": 5.275682305736396e-06, "loss": 0.3447, "step": 6372 }, { "epoch": 1.6028672032193159, "grad_norm": 0.30157265067100525, "learning_rate": 5.274221272176569e-06, "loss": 0.3914, "step": 6373 }, { "epoch": 1.6031187122736419, "grad_norm": 0.311866819858551, "learning_rate": 5.2727602151314035e-06, "loss": 0.3594, "step": 6374 }, { "epoch": 1.603370221327968, "grad_norm": 0.29979294538497925, "learning_rate": 5.271299134726034e-06, "loss": 0.3382, "step": 6375 }, { "epoch": 1.6036217303822937, "grad_norm": 0.3057425320148468, "learning_rate": 5.269838031085588e-06, "loss": 0.3378, "step": 6376 }, { "epoch": 1.6038732394366197, "grad_norm": 0.33399319648742676, "learning_rate": 5.268376904335204e-06, "loss": 0.3625, "step": 6377 }, { "epoch": 1.6041247484909458, "grad_norm": 0.31966760754585266, "learning_rate": 5.266915754600018e-06, "loss": 0.3683, "step": 6378 }, { "epoch": 1.6043762575452716, "grad_norm": 0.3125464916229248, "learning_rate": 5.265454582005167e-06, "loss": 0.3498, "step": 6379 }, { "epoch": 1.6046277665995976, "grad_norm": 0.30603522062301636, "learning_rate": 5.263993386675792e-06, "loss": 0.3682, "step": 6380 }, { "epoch": 1.6048792756539236, "grad_norm": 0.3024091422557831, "learning_rate": 5.2625321687370345e-06, "loss": 0.3384, "step": 6381 }, { "epoch": 1.6051307847082494, "grad_norm": 0.2936726212501526, "learning_rate": 5.261070928314039e-06, "loss": 0.373, "step": 6382 }, { "epoch": 1.6053822937625755, "grad_norm": 0.31139662861824036, "learning_rate": 5.259609665531951e-06, "loss": 0.3445, "step": 6383 }, { "epoch": 1.6056338028169015, "grad_norm": 0.30623286962509155, "learning_rate": 5.258148380515922e-06, "loss": 0.3562, "step": 6384 }, { "epoch": 1.6058853118712273, "grad_norm": 0.3077852129936218, "learning_rate": 5.256687073391097e-06, "loss": 0.3313, "step": 6385 }, { "epoch": 1.6061368209255533, "grad_norm": 0.28741592168807983, "learning_rate": 5.255225744282631e-06, "loss": 0.354, "step": 6386 }, { "epoch": 1.6063883299798793, "grad_norm": 0.32665884494781494, "learning_rate": 5.253764393315674e-06, "loss": 0.3458, "step": 6387 }, { "epoch": 1.6066398390342052, "grad_norm": 0.31474220752716064, "learning_rate": 5.252303020615387e-06, "loss": 0.3425, "step": 6388 }, { "epoch": 1.6068913480885312, "grad_norm": 0.3298763930797577, "learning_rate": 5.250841626306924e-06, "loss": 0.3634, "step": 6389 }, { "epoch": 1.6071428571428572, "grad_norm": 0.2954833209514618, "learning_rate": 5.249380210515446e-06, "loss": 0.3702, "step": 6390 }, { "epoch": 1.607394366197183, "grad_norm": 0.3127298057079315, "learning_rate": 5.247918773366112e-06, "loss": 0.348, "step": 6391 }, { "epoch": 1.607645875251509, "grad_norm": 0.2956436574459076, "learning_rate": 5.246457314984086e-06, "loss": 0.3504, "step": 6392 }, { "epoch": 1.607897384305835, "grad_norm": 0.35721251368522644, "learning_rate": 5.2449958354945326e-06, "loss": 0.3357, "step": 6393 }, { "epoch": 1.6081488933601609, "grad_norm": 0.31743982434272766, "learning_rate": 5.24353433502262e-06, "loss": 0.3412, "step": 6394 }, { "epoch": 1.608400402414487, "grad_norm": 0.31196045875549316, "learning_rate": 5.242072813693514e-06, "loss": 0.3253, "step": 6395 }, { "epoch": 1.608651911468813, "grad_norm": 0.29665273427963257, "learning_rate": 5.240611271632386e-06, "loss": 0.3622, "step": 6396 }, { "epoch": 1.6089034205231387, "grad_norm": 0.36455845832824707, "learning_rate": 5.239149708964409e-06, "loss": 0.3435, "step": 6397 }, { "epoch": 1.6091549295774648, "grad_norm": 0.3024010956287384, "learning_rate": 5.237688125814752e-06, "loss": 0.3549, "step": 6398 }, { "epoch": 1.6094064386317908, "grad_norm": 0.3012137711048126, "learning_rate": 5.236226522308596e-06, "loss": 0.3332, "step": 6399 }, { "epoch": 1.6096579476861166, "grad_norm": 0.31425225734710693, "learning_rate": 5.234764898571118e-06, "loss": 0.3436, "step": 6400 }, { "epoch": 1.6099094567404426, "grad_norm": 0.3358578383922577, "learning_rate": 5.233303254727493e-06, "loss": 0.3441, "step": 6401 }, { "epoch": 1.6101609657947686, "grad_norm": 0.3274340033531189, "learning_rate": 5.231841590902905e-06, "loss": 0.3244, "step": 6402 }, { "epoch": 1.6104124748490944, "grad_norm": 0.3106974959373474, "learning_rate": 5.230379907222535e-06, "loss": 0.3764, "step": 6403 }, { "epoch": 1.6106639839034205, "grad_norm": 0.2865849435329437, "learning_rate": 5.228918203811566e-06, "loss": 0.3393, "step": 6404 }, { "epoch": 1.6109154929577465, "grad_norm": 0.31629478931427, "learning_rate": 5.227456480795187e-06, "loss": 0.3581, "step": 6405 }, { "epoch": 1.6111670020120723, "grad_norm": 0.3469734787940979, "learning_rate": 5.225994738298582e-06, "loss": 0.3523, "step": 6406 }, { "epoch": 1.6114185110663986, "grad_norm": 0.31348875164985657, "learning_rate": 5.224532976446941e-06, "loss": 0.3381, "step": 6407 }, { "epoch": 1.6116700201207244, "grad_norm": 0.29473093152046204, "learning_rate": 5.223071195365456e-06, "loss": 0.3483, "step": 6408 }, { "epoch": 1.6119215291750502, "grad_norm": 0.3042793869972229, "learning_rate": 5.221609395179319e-06, "loss": 0.3646, "step": 6409 }, { "epoch": 1.6121730382293764, "grad_norm": 0.3180801272392273, "learning_rate": 5.220147576013724e-06, "loss": 0.3661, "step": 6410 }, { "epoch": 1.6124245472837022, "grad_norm": 0.318566232919693, "learning_rate": 5.218685737993865e-06, "loss": 0.3538, "step": 6411 }, { "epoch": 1.612676056338028, "grad_norm": 0.3094564974308014, "learning_rate": 5.217223881244942e-06, "loss": 0.3289, "step": 6412 }, { "epoch": 1.6129275653923543, "grad_norm": 0.3494105041027069, "learning_rate": 5.215762005892151e-06, "loss": 0.3474, "step": 6413 }, { "epoch": 1.61317907444668, "grad_norm": 0.312987357378006, "learning_rate": 5.214300112060695e-06, "loss": 0.3601, "step": 6414 }, { "epoch": 1.6134305835010059, "grad_norm": 0.30890733003616333, "learning_rate": 5.212838199875775e-06, "loss": 0.3478, "step": 6415 }, { "epoch": 1.6136820925553321, "grad_norm": 0.3125660717487335, "learning_rate": 5.211376269462594e-06, "loss": 0.3657, "step": 6416 }, { "epoch": 1.613933601609658, "grad_norm": 0.32715532183647156, "learning_rate": 5.209914320946359e-06, "loss": 0.3662, "step": 6417 }, { "epoch": 1.614185110663984, "grad_norm": 0.3133423626422882, "learning_rate": 5.208452354452275e-06, "loss": 0.3472, "step": 6418 }, { "epoch": 1.61443661971831, "grad_norm": 0.3222804069519043, "learning_rate": 5.20699037010555e-06, "loss": 0.3355, "step": 6419 }, { "epoch": 1.6146881287726358, "grad_norm": 0.3086826205253601, "learning_rate": 5.205528368031395e-06, "loss": 0.3552, "step": 6420 }, { "epoch": 1.6149396378269618, "grad_norm": 0.3171122968196869, "learning_rate": 5.204066348355022e-06, "loss": 0.3557, "step": 6421 }, { "epoch": 1.6151911468812878, "grad_norm": 0.31034934520721436, "learning_rate": 5.202604311201642e-06, "loss": 0.3478, "step": 6422 }, { "epoch": 1.6154426559356136, "grad_norm": 0.29711443185806274, "learning_rate": 5.201142256696472e-06, "loss": 0.349, "step": 6423 }, { "epoch": 1.6156941649899397, "grad_norm": 0.3413085639476776, "learning_rate": 5.199680184964725e-06, "loss": 0.3472, "step": 6424 }, { "epoch": 1.6159456740442657, "grad_norm": 0.2911038100719452, "learning_rate": 5.198218096131619e-06, "loss": 0.342, "step": 6425 }, { "epoch": 1.6161971830985915, "grad_norm": 0.291364461183548, "learning_rate": 5.196755990322373e-06, "loss": 0.3439, "step": 6426 }, { "epoch": 1.6164486921529175, "grad_norm": 0.3112189769744873, "learning_rate": 5.195293867662208e-06, "loss": 0.3266, "step": 6427 }, { "epoch": 1.6167002012072436, "grad_norm": 0.33983948826789856, "learning_rate": 5.193831728276345e-06, "loss": 0.3386, "step": 6428 }, { "epoch": 1.6169517102615694, "grad_norm": 0.3202919363975525, "learning_rate": 5.192369572290007e-06, "loss": 0.3472, "step": 6429 }, { "epoch": 1.6172032193158954, "grad_norm": 0.31747010350227356, "learning_rate": 5.190907399828418e-06, "loss": 0.3429, "step": 6430 }, { "epoch": 1.6174547283702214, "grad_norm": 0.3446648120880127, "learning_rate": 5.189445211016804e-06, "loss": 0.3562, "step": 6431 }, { "epoch": 1.6177062374245472, "grad_norm": 0.3192874491214752, "learning_rate": 5.187983005980393e-06, "loss": 0.3591, "step": 6432 }, { "epoch": 1.6179577464788732, "grad_norm": 0.3468160331249237, "learning_rate": 5.186520784844416e-06, "loss": 0.3513, "step": 6433 }, { "epoch": 1.6182092555331993, "grad_norm": 0.32596200704574585, "learning_rate": 5.185058547734098e-06, "loss": 0.3382, "step": 6434 }, { "epoch": 1.618460764587525, "grad_norm": 0.2944319546222687, "learning_rate": 5.1835962947746744e-06, "loss": 0.3301, "step": 6435 }, { "epoch": 1.618712273641851, "grad_norm": 0.32907602190971375, "learning_rate": 5.1821340260913765e-06, "loss": 0.3355, "step": 6436 }, { "epoch": 1.6189637826961771, "grad_norm": 0.3250274956226349, "learning_rate": 5.180671741809439e-06, "loss": 0.3566, "step": 6437 }, { "epoch": 1.619215291750503, "grad_norm": 0.3051334321498871, "learning_rate": 5.179209442054096e-06, "loss": 0.3507, "step": 6438 }, { "epoch": 1.619466800804829, "grad_norm": 0.3153745234012604, "learning_rate": 5.177747126950587e-06, "loss": 0.3558, "step": 6439 }, { "epoch": 1.619718309859155, "grad_norm": 0.31299081444740295, "learning_rate": 5.176284796624147e-06, "loss": 0.3314, "step": 6440 }, { "epoch": 1.6199698189134808, "grad_norm": 0.29618367552757263, "learning_rate": 5.174822451200018e-06, "loss": 0.3563, "step": 6441 }, { "epoch": 1.6202213279678068, "grad_norm": 0.3016599118709564, "learning_rate": 5.173360090803437e-06, "loss": 0.3545, "step": 6442 }, { "epoch": 1.6204728370221329, "grad_norm": 0.30213463306427, "learning_rate": 5.1718977155596515e-06, "loss": 0.348, "step": 6443 }, { "epoch": 1.6207243460764587, "grad_norm": 0.3281251788139343, "learning_rate": 5.170435325593902e-06, "loss": 0.3423, "step": 6444 }, { "epoch": 1.6209758551307847, "grad_norm": 0.33480846881866455, "learning_rate": 5.168972921031433e-06, "loss": 0.3396, "step": 6445 }, { "epoch": 1.6212273641851107, "grad_norm": 0.32940176129341125, "learning_rate": 5.1675105019974905e-06, "loss": 0.3567, "step": 6446 }, { "epoch": 1.6214788732394365, "grad_norm": 0.3426181674003601, "learning_rate": 5.166048068617321e-06, "loss": 0.3681, "step": 6447 }, { "epoch": 1.6217303822937625, "grad_norm": 0.3707010746002197, "learning_rate": 5.164585621016174e-06, "loss": 0.3475, "step": 6448 }, { "epoch": 1.6219818913480886, "grad_norm": 0.32672780752182007, "learning_rate": 5.163123159319298e-06, "loss": 0.3372, "step": 6449 }, { "epoch": 1.6222334004024144, "grad_norm": 0.32823729515075684, "learning_rate": 5.161660683651943e-06, "loss": 0.3486, "step": 6450 }, { "epoch": 1.6224849094567404, "grad_norm": 0.3079800009727478, "learning_rate": 5.160198194139362e-06, "loss": 0.3535, "step": 6451 }, { "epoch": 1.6227364185110664, "grad_norm": 0.34681087732315063, "learning_rate": 5.158735690906808e-06, "loss": 0.3498, "step": 6452 }, { "epoch": 1.6229879275653922, "grad_norm": 0.33672547340393066, "learning_rate": 5.157273174079535e-06, "loss": 0.3343, "step": 6453 }, { "epoch": 1.6232394366197183, "grad_norm": 0.34650522470474243, "learning_rate": 5.155810643782798e-06, "loss": 0.3391, "step": 6454 }, { "epoch": 1.6234909456740443, "grad_norm": 0.3248770236968994, "learning_rate": 5.154348100141855e-06, "loss": 0.3862, "step": 6455 }, { "epoch": 1.62374245472837, "grad_norm": 0.3029992878437042, "learning_rate": 5.152885543281964e-06, "loss": 0.3286, "step": 6456 }, { "epoch": 1.6239939637826963, "grad_norm": 0.32535600662231445, "learning_rate": 5.151422973328381e-06, "loss": 0.3105, "step": 6457 }, { "epoch": 1.6242454728370221, "grad_norm": 0.3738095164299011, "learning_rate": 5.149960390406368e-06, "loss": 0.3534, "step": 6458 }, { "epoch": 1.624496981891348, "grad_norm": 0.31076717376708984, "learning_rate": 5.1484977946411855e-06, "loss": 0.3666, "step": 6459 }, { "epoch": 1.6247484909456742, "grad_norm": 0.32721608877182007, "learning_rate": 5.147035186158096e-06, "loss": 0.3678, "step": 6460 }, { "epoch": 1.625, "grad_norm": 0.3344939947128296, "learning_rate": 5.145572565082363e-06, "loss": 0.3726, "step": 6461 }, { "epoch": 1.6252515090543258, "grad_norm": 0.29688549041748047, "learning_rate": 5.144109931539251e-06, "loss": 0.3751, "step": 6462 }, { "epoch": 1.625503018108652, "grad_norm": 0.29007625579833984, "learning_rate": 5.142647285654023e-06, "loss": 0.3259, "step": 6463 }, { "epoch": 1.6257545271629779, "grad_norm": 0.3192233443260193, "learning_rate": 5.14118462755195e-06, "loss": 0.378, "step": 6464 }, { "epoch": 1.6260060362173037, "grad_norm": 0.3485788106918335, "learning_rate": 5.139721957358295e-06, "loss": 0.3603, "step": 6465 }, { "epoch": 1.62625754527163, "grad_norm": 0.30432939529418945, "learning_rate": 5.138259275198329e-06, "loss": 0.3284, "step": 6466 }, { "epoch": 1.6265090543259557, "grad_norm": 0.3238847851753235, "learning_rate": 5.1367965811973204e-06, "loss": 0.3459, "step": 6467 }, { "epoch": 1.6267605633802817, "grad_norm": 0.2998338043689728, "learning_rate": 5.135333875480541e-06, "loss": 0.3437, "step": 6468 }, { "epoch": 1.6270120724346078, "grad_norm": 0.3004499673843384, "learning_rate": 5.133871158173262e-06, "loss": 0.3473, "step": 6469 }, { "epoch": 1.6272635814889336, "grad_norm": 0.3121063709259033, "learning_rate": 5.132408429400755e-06, "loss": 0.3421, "step": 6470 }, { "epoch": 1.6275150905432596, "grad_norm": 0.3265574276447296, "learning_rate": 5.130945689288295e-06, "loss": 0.3465, "step": 6471 }, { "epoch": 1.6277665995975856, "grad_norm": 0.32837405800819397, "learning_rate": 5.129482937961155e-06, "loss": 0.3612, "step": 6472 }, { "epoch": 1.6280181086519114, "grad_norm": 0.3113304078578949, "learning_rate": 5.128020175544612e-06, "loss": 0.3388, "step": 6473 }, { "epoch": 1.6282696177062375, "grad_norm": 0.32058820128440857, "learning_rate": 5.126557402163943e-06, "loss": 0.3485, "step": 6474 }, { "epoch": 1.6285211267605635, "grad_norm": 0.34431353211402893, "learning_rate": 5.125094617944424e-06, "loss": 0.3268, "step": 6475 }, { "epoch": 1.6287726358148893, "grad_norm": 0.3116149604320526, "learning_rate": 5.123631823011333e-06, "loss": 0.3588, "step": 6476 }, { "epoch": 1.6290241448692153, "grad_norm": 0.2909471392631531, "learning_rate": 5.122169017489949e-06, "loss": 0.3541, "step": 6477 }, { "epoch": 1.6292756539235413, "grad_norm": 0.35828930139541626, "learning_rate": 5.120706201505554e-06, "loss": 0.3597, "step": 6478 }, { "epoch": 1.6295271629778671, "grad_norm": 0.28314337134361267, "learning_rate": 5.119243375183427e-06, "loss": 0.3599, "step": 6479 }, { "epoch": 1.6297786720321932, "grad_norm": 0.2970763146877289, "learning_rate": 5.1177805386488525e-06, "loss": 0.3418, "step": 6480 }, { "epoch": 1.6300301810865192, "grad_norm": 0.3072241246700287, "learning_rate": 5.116317692027111e-06, "loss": 0.3745, "step": 6481 }, { "epoch": 1.630281690140845, "grad_norm": 0.32199907302856445, "learning_rate": 5.114854835443486e-06, "loss": 0.355, "step": 6482 }, { "epoch": 1.630533199195171, "grad_norm": 0.328183650970459, "learning_rate": 5.113391969023264e-06, "loss": 0.358, "step": 6483 }, { "epoch": 1.630784708249497, "grad_norm": 0.3165997266769409, "learning_rate": 5.1119290928917285e-06, "loss": 0.3576, "step": 6484 }, { "epoch": 1.6310362173038229, "grad_norm": 0.3254222273826599, "learning_rate": 5.110466207174165e-06, "loss": 0.3737, "step": 6485 }, { "epoch": 1.631287726358149, "grad_norm": 0.3073231279850006, "learning_rate": 5.109003311995864e-06, "loss": 0.3435, "step": 6486 }, { "epoch": 1.631539235412475, "grad_norm": 0.3206403851509094, "learning_rate": 5.107540407482111e-06, "loss": 0.3568, "step": 6487 }, { "epoch": 1.6317907444668007, "grad_norm": 0.29873090982437134, "learning_rate": 5.106077493758195e-06, "loss": 0.3167, "step": 6488 }, { "epoch": 1.6320422535211268, "grad_norm": 0.2889578342437744, "learning_rate": 5.104614570949404e-06, "loss": 0.3574, "step": 6489 }, { "epoch": 1.6322937625754528, "grad_norm": 0.3086026608943939, "learning_rate": 5.1031516391810306e-06, "loss": 0.3079, "step": 6490 }, { "epoch": 1.6325452716297786, "grad_norm": 0.2884649634361267, "learning_rate": 5.101688698578364e-06, "loss": 0.3466, "step": 6491 }, { "epoch": 1.6327967806841046, "grad_norm": 0.31144869327545166, "learning_rate": 5.100225749266698e-06, "loss": 0.3572, "step": 6492 }, { "epoch": 1.6330482897384306, "grad_norm": 0.3050963282585144, "learning_rate": 5.098762791371322e-06, "loss": 0.3376, "step": 6493 }, { "epoch": 1.6332997987927564, "grad_norm": 0.2924579083919525, "learning_rate": 5.097299825017532e-06, "loss": 0.3409, "step": 6494 }, { "epoch": 1.6335513078470825, "grad_norm": 0.31079205870628357, "learning_rate": 5.09583685033062e-06, "loss": 0.3827, "step": 6495 }, { "epoch": 1.6338028169014085, "grad_norm": 0.30637675523757935, "learning_rate": 5.09437386743588e-06, "loss": 0.3354, "step": 6496 }, { "epoch": 1.6340543259557343, "grad_norm": 0.29085665941238403, "learning_rate": 5.09291087645861e-06, "loss": 0.349, "step": 6497 }, { "epoch": 1.6343058350100603, "grad_norm": 0.2851710915565491, "learning_rate": 5.091447877524105e-06, "loss": 0.3623, "step": 6498 }, { "epoch": 1.6345573440643864, "grad_norm": 0.29473739862442017, "learning_rate": 5.089984870757661e-06, "loss": 0.3461, "step": 6499 }, { "epoch": 1.6348088531187122, "grad_norm": 0.3163786828517914, "learning_rate": 5.088521856284576e-06, "loss": 0.3542, "step": 6500 }, { "epoch": 1.6350603621730382, "grad_norm": 0.3166099786758423, "learning_rate": 5.087058834230148e-06, "loss": 0.3335, "step": 6501 }, { "epoch": 1.6353118712273642, "grad_norm": 0.3093219995498657, "learning_rate": 5.0855958047196744e-06, "loss": 0.3258, "step": 6502 }, { "epoch": 1.63556338028169, "grad_norm": 0.3152346611022949, "learning_rate": 5.084132767878457e-06, "loss": 0.3328, "step": 6503 }, { "epoch": 1.635814889336016, "grad_norm": 0.2982290983200073, "learning_rate": 5.082669723831793e-06, "loss": 0.35, "step": 6504 }, { "epoch": 1.636066398390342, "grad_norm": 0.3094666004180908, "learning_rate": 5.081206672704986e-06, "loss": 0.3647, "step": 6505 }, { "epoch": 1.6363179074446679, "grad_norm": 0.31432268023490906, "learning_rate": 5.079743614623334e-06, "loss": 0.3449, "step": 6506 }, { "epoch": 1.6365694164989941, "grad_norm": 0.32027164101600647, "learning_rate": 5.07828054971214e-06, "loss": 0.3704, "step": 6507 }, { "epoch": 1.63682092555332, "grad_norm": 0.30292704701423645, "learning_rate": 5.076817478096707e-06, "loss": 0.3522, "step": 6508 }, { "epoch": 1.6370724346076457, "grad_norm": 0.30981820821762085, "learning_rate": 5.075354399902338e-06, "loss": 0.3399, "step": 6509 }, { "epoch": 1.637323943661972, "grad_norm": 0.3107950687408447, "learning_rate": 5.073891315254337e-06, "loss": 0.3439, "step": 6510 }, { "epoch": 1.6375754527162978, "grad_norm": 0.29735639691352844, "learning_rate": 5.072428224278005e-06, "loss": 0.3326, "step": 6511 }, { "epoch": 1.6378269617706236, "grad_norm": 0.34328925609588623, "learning_rate": 5.07096512709865e-06, "loss": 0.3414, "step": 6512 }, { "epoch": 1.6380784708249498, "grad_norm": 0.306029736995697, "learning_rate": 5.069502023841576e-06, "loss": 0.3733, "step": 6513 }, { "epoch": 1.6383299798792756, "grad_norm": 0.33722612261772156, "learning_rate": 5.068038914632088e-06, "loss": 0.3623, "step": 6514 }, { "epoch": 1.6385814889336014, "grad_norm": 0.3304372727870941, "learning_rate": 5.066575799595494e-06, "loss": 0.3489, "step": 6515 }, { "epoch": 1.6388329979879277, "grad_norm": 0.32422342896461487, "learning_rate": 5.065112678857097e-06, "loss": 0.3753, "step": 6516 }, { "epoch": 1.6390845070422535, "grad_norm": 0.341206431388855, "learning_rate": 5.063649552542208e-06, "loss": 0.3352, "step": 6517 }, { "epoch": 1.6393360160965795, "grad_norm": 0.30149486660957336, "learning_rate": 5.062186420776132e-06, "loss": 0.3636, "step": 6518 }, { "epoch": 1.6395875251509056, "grad_norm": 0.31636372208595276, "learning_rate": 5.060723283684178e-06, "loss": 0.3196, "step": 6519 }, { "epoch": 1.6398390342052314, "grad_norm": 0.3157700300216675, "learning_rate": 5.0592601413916555e-06, "loss": 0.3368, "step": 6520 }, { "epoch": 1.6400905432595574, "grad_norm": 0.3472077548503876, "learning_rate": 5.057796994023873e-06, "loss": 0.3561, "step": 6521 }, { "epoch": 1.6403420523138834, "grad_norm": 0.3037931025028229, "learning_rate": 5.056333841706138e-06, "loss": 0.3218, "step": 6522 }, { "epoch": 1.6405935613682092, "grad_norm": 0.3278881907463074, "learning_rate": 5.054870684563763e-06, "loss": 0.3403, "step": 6523 }, { "epoch": 1.6408450704225352, "grad_norm": 0.326613187789917, "learning_rate": 5.053407522722057e-06, "loss": 0.3386, "step": 6524 }, { "epoch": 1.6410965794768613, "grad_norm": 0.31864145398139954, "learning_rate": 5.0519443563063306e-06, "loss": 0.3662, "step": 6525 }, { "epoch": 1.641348088531187, "grad_norm": 0.29436439275741577, "learning_rate": 5.0504811854418946e-06, "loss": 0.3386, "step": 6526 }, { "epoch": 1.641599597585513, "grad_norm": 0.3037160634994507, "learning_rate": 5.049018010254062e-06, "loss": 0.3429, "step": 6527 }, { "epoch": 1.6418511066398391, "grad_norm": 0.29753682017326355, "learning_rate": 5.047554830868142e-06, "loss": 0.3608, "step": 6528 }, { "epoch": 1.642102615694165, "grad_norm": 0.3260503113269806, "learning_rate": 5.04609164740945e-06, "loss": 0.3306, "step": 6529 }, { "epoch": 1.642354124748491, "grad_norm": 0.34160879254341125, "learning_rate": 5.044628460003296e-06, "loss": 0.3614, "step": 6530 }, { "epoch": 1.642605633802817, "grad_norm": 0.31074759364128113, "learning_rate": 5.043165268774993e-06, "loss": 0.3435, "step": 6531 }, { "epoch": 1.6428571428571428, "grad_norm": 0.29596835374832153, "learning_rate": 5.041702073849856e-06, "loss": 0.3331, "step": 6532 }, { "epoch": 1.6431086519114688, "grad_norm": 0.3086645305156708, "learning_rate": 5.040238875353196e-06, "loss": 0.3534, "step": 6533 }, { "epoch": 1.6433601609657948, "grad_norm": 0.3223525285720825, "learning_rate": 5.038775673410329e-06, "loss": 0.3513, "step": 6534 }, { "epoch": 1.6436116700201207, "grad_norm": 0.3153035044670105, "learning_rate": 5.037312468146567e-06, "loss": 0.3537, "step": 6535 }, { "epoch": 1.6438631790744467, "grad_norm": 0.3239792287349701, "learning_rate": 5.035849259687227e-06, "loss": 0.3684, "step": 6536 }, { "epoch": 1.6441146881287727, "grad_norm": 0.31517869234085083, "learning_rate": 5.034386048157622e-06, "loss": 0.3477, "step": 6537 }, { "epoch": 1.6443661971830985, "grad_norm": 0.334823876619339, "learning_rate": 5.032922833683066e-06, "loss": 0.3537, "step": 6538 }, { "epoch": 1.6446177062374245, "grad_norm": 0.2976732552051544, "learning_rate": 5.031459616388874e-06, "loss": 0.3599, "step": 6539 }, { "epoch": 1.6448692152917506, "grad_norm": 0.3268083333969116, "learning_rate": 5.029996396400365e-06, "loss": 0.3449, "step": 6540 }, { "epoch": 1.6451207243460764, "grad_norm": 0.3218528628349304, "learning_rate": 5.028533173842851e-06, "loss": 0.3338, "step": 6541 }, { "epoch": 1.6453722334004024, "grad_norm": 0.28955259919166565, "learning_rate": 5.02706994884165e-06, "loss": 0.3325, "step": 6542 }, { "epoch": 1.6456237424547284, "grad_norm": 0.2912788987159729, "learning_rate": 5.025606721522077e-06, "loss": 0.3465, "step": 6543 }, { "epoch": 1.6458752515090542, "grad_norm": 0.33705586194992065, "learning_rate": 5.024143492009449e-06, "loss": 0.3647, "step": 6544 }, { "epoch": 1.6461267605633803, "grad_norm": 0.32726725935935974, "learning_rate": 5.022680260429082e-06, "loss": 0.3435, "step": 6545 }, { "epoch": 1.6463782696177063, "grad_norm": 0.32821711897850037, "learning_rate": 5.021217026906292e-06, "loss": 0.3583, "step": 6546 }, { "epoch": 1.646629778672032, "grad_norm": 0.3105396330356598, "learning_rate": 5.019753791566396e-06, "loss": 0.34, "step": 6547 }, { "epoch": 1.6468812877263581, "grad_norm": 0.3292068839073181, "learning_rate": 5.0182905545347125e-06, "loss": 0.3265, "step": 6548 }, { "epoch": 1.6471327967806841, "grad_norm": 0.3053254783153534, "learning_rate": 5.016827315936557e-06, "loss": 0.3391, "step": 6549 }, { "epoch": 1.64738430583501, "grad_norm": 0.3218199908733368, "learning_rate": 5.015364075897246e-06, "loss": 0.3596, "step": 6550 }, { "epoch": 1.647635814889336, "grad_norm": 0.28854915499687195, "learning_rate": 5.013900834542099e-06, "loss": 0.3419, "step": 6551 }, { "epoch": 1.647887323943662, "grad_norm": 0.33648034930229187, "learning_rate": 5.012437591996432e-06, "loss": 0.3496, "step": 6552 }, { "epoch": 1.6481388329979878, "grad_norm": 0.2896369993686676, "learning_rate": 5.010974348385565e-06, "loss": 0.3423, "step": 6553 }, { "epoch": 1.6483903420523138, "grad_norm": 0.31751370429992676, "learning_rate": 5.009511103834811e-06, "loss": 0.3562, "step": 6554 }, { "epoch": 1.6486418511066399, "grad_norm": 0.2887558341026306, "learning_rate": 5.008047858469492e-06, "loss": 0.3316, "step": 6555 }, { "epoch": 1.6488933601609657, "grad_norm": 0.3214130103588104, "learning_rate": 5.006584612414924e-06, "loss": 0.3598, "step": 6556 }, { "epoch": 1.649144869215292, "grad_norm": 0.32030197978019714, "learning_rate": 5.0051213657964245e-06, "loss": 0.3675, "step": 6557 }, { "epoch": 1.6493963782696177, "grad_norm": 0.3157503008842468, "learning_rate": 5.003658118739313e-06, "loss": 0.3495, "step": 6558 }, { "epoch": 1.6496478873239435, "grad_norm": 0.31887757778167725, "learning_rate": 5.0021948713689064e-06, "loss": 0.3527, "step": 6559 }, { "epoch": 1.6498993963782698, "grad_norm": 0.3290422558784485, "learning_rate": 5.000731623810523e-06, "loss": 0.3633, "step": 6560 }, { "epoch": 1.6501509054325956, "grad_norm": 0.31094884872436523, "learning_rate": 4.99926837618948e-06, "loss": 0.3623, "step": 6561 }, { "epoch": 1.6504024144869214, "grad_norm": 0.2901002764701843, "learning_rate": 4.997805128631095e-06, "loss": 0.355, "step": 6562 }, { "epoch": 1.6506539235412476, "grad_norm": 0.28659021854400635, "learning_rate": 4.996341881260689e-06, "loss": 0.3392, "step": 6563 }, { "epoch": 1.6509054325955734, "grad_norm": 0.324534147977829, "learning_rate": 4.994878634203576e-06, "loss": 0.3179, "step": 6564 }, { "epoch": 1.6511569416498992, "grad_norm": 0.3127976357936859, "learning_rate": 4.993415387585079e-06, "loss": 0.3493, "step": 6565 }, { "epoch": 1.6514084507042255, "grad_norm": 0.3226813077926636, "learning_rate": 4.991952141530509e-06, "loss": 0.3516, "step": 6566 }, { "epoch": 1.6516599597585513, "grad_norm": 0.30921557545661926, "learning_rate": 4.9904888961651895e-06, "loss": 0.3334, "step": 6567 }, { "epoch": 1.6519114688128773, "grad_norm": 0.31748196482658386, "learning_rate": 4.989025651614438e-06, "loss": 0.3315, "step": 6568 }, { "epoch": 1.6521629778672033, "grad_norm": 0.3072637915611267, "learning_rate": 4.987562408003568e-06, "loss": 0.3572, "step": 6569 }, { "epoch": 1.6524144869215291, "grad_norm": 0.3248019218444824, "learning_rate": 4.9860991654579025e-06, "loss": 0.3596, "step": 6570 }, { "epoch": 1.6526659959758552, "grad_norm": 0.3171079754829407, "learning_rate": 4.984635924102754e-06, "loss": 0.3551, "step": 6571 }, { "epoch": 1.6529175050301812, "grad_norm": 0.3320086598396301, "learning_rate": 4.983172684063446e-06, "loss": 0.3329, "step": 6572 }, { "epoch": 1.653169014084507, "grad_norm": 0.3475695848464966, "learning_rate": 4.981709445465288e-06, "loss": 0.3538, "step": 6573 }, { "epoch": 1.653420523138833, "grad_norm": 0.29529550671577454, "learning_rate": 4.980246208433606e-06, "loss": 0.315, "step": 6574 }, { "epoch": 1.653672032193159, "grad_norm": 0.31380680203437805, "learning_rate": 4.978782973093709e-06, "loss": 0.3334, "step": 6575 }, { "epoch": 1.6539235412474849, "grad_norm": 0.3308258354663849, "learning_rate": 4.977319739570921e-06, "loss": 0.3196, "step": 6576 }, { "epoch": 1.654175050301811, "grad_norm": 0.3524826169013977, "learning_rate": 4.975856507990552e-06, "loss": 0.3344, "step": 6577 }, { "epoch": 1.654426559356137, "grad_norm": 0.3204286992549896, "learning_rate": 4.974393278477923e-06, "loss": 0.3661, "step": 6578 }, { "epoch": 1.6546780684104627, "grad_norm": 0.3220480978488922, "learning_rate": 4.972930051158351e-06, "loss": 0.3719, "step": 6579 }, { "epoch": 1.6549295774647887, "grad_norm": 0.3321162760257721, "learning_rate": 4.971466826157149e-06, "loss": 0.3661, "step": 6580 }, { "epoch": 1.6551810865191148, "grad_norm": 0.3444232642650604, "learning_rate": 4.970003603599637e-06, "loss": 0.3676, "step": 6581 }, { "epoch": 1.6554325955734406, "grad_norm": 0.33695197105407715, "learning_rate": 4.968540383611126e-06, "loss": 0.3604, "step": 6582 }, { "epoch": 1.6556841046277666, "grad_norm": 0.35238873958587646, "learning_rate": 4.967077166316937e-06, "loss": 0.3389, "step": 6583 }, { "epoch": 1.6559356136820926, "grad_norm": 0.30956074595451355, "learning_rate": 4.96561395184238e-06, "loss": 0.3846, "step": 6584 }, { "epoch": 1.6561871227364184, "grad_norm": 0.29510757327079773, "learning_rate": 4.964150740312776e-06, "loss": 0.3391, "step": 6585 }, { "epoch": 1.6564386317907445, "grad_norm": 0.309314489364624, "learning_rate": 4.962687531853434e-06, "loss": 0.3519, "step": 6586 }, { "epoch": 1.6566901408450705, "grad_norm": 0.3318527638912201, "learning_rate": 4.961224326589674e-06, "loss": 0.3607, "step": 6587 }, { "epoch": 1.6569416498993963, "grad_norm": 0.3277136981487274, "learning_rate": 4.959761124646805e-06, "loss": 0.351, "step": 6588 }, { "epoch": 1.6571931589537223, "grad_norm": 0.29636046290397644, "learning_rate": 4.958297926150146e-06, "loss": 0.3345, "step": 6589 }, { "epoch": 1.6574446680080483, "grad_norm": 0.2861737310886383, "learning_rate": 4.956834731225008e-06, "loss": 0.3306, "step": 6590 }, { "epoch": 1.6576961770623742, "grad_norm": 0.31142330169677734, "learning_rate": 4.955371539996706e-06, "loss": 0.3601, "step": 6591 }, { "epoch": 1.6579476861167002, "grad_norm": 0.319341778755188, "learning_rate": 4.953908352590552e-06, "loss": 0.3525, "step": 6592 }, { "epoch": 1.6581991951710262, "grad_norm": 0.295574426651001, "learning_rate": 4.9524451691318585e-06, "loss": 0.338, "step": 6593 }, { "epoch": 1.658450704225352, "grad_norm": 0.3026335537433624, "learning_rate": 4.95098198974594e-06, "loss": 0.3506, "step": 6594 }, { "epoch": 1.658702213279678, "grad_norm": 0.3181147873401642, "learning_rate": 4.949518814558106e-06, "loss": 0.3806, "step": 6595 }, { "epoch": 1.658953722334004, "grad_norm": 0.30840447545051575, "learning_rate": 4.948055643693671e-06, "loss": 0.3635, "step": 6596 }, { "epoch": 1.6592052313883299, "grad_norm": 0.3271403908729553, "learning_rate": 4.946592477277945e-06, "loss": 0.3833, "step": 6597 }, { "epoch": 1.659456740442656, "grad_norm": 0.3057454228401184, "learning_rate": 4.945129315436239e-06, "loss": 0.3459, "step": 6598 }, { "epoch": 1.659708249496982, "grad_norm": 0.30508214235305786, "learning_rate": 4.943666158293864e-06, "loss": 0.3659, "step": 6599 }, { "epoch": 1.6599597585513077, "grad_norm": 0.3431748151779175, "learning_rate": 4.942203005976128e-06, "loss": 0.3873, "step": 6600 }, { "epoch": 1.6602112676056338, "grad_norm": 0.3456651270389557, "learning_rate": 4.940739858608346e-06, "loss": 0.3472, "step": 6601 }, { "epoch": 1.6604627766599598, "grad_norm": 0.3188634514808655, "learning_rate": 4.939276716315822e-06, "loss": 0.3422, "step": 6602 }, { "epoch": 1.6607142857142856, "grad_norm": 0.31093767285346985, "learning_rate": 4.937813579223871e-06, "loss": 0.359, "step": 6603 }, { "epoch": 1.6609657947686118, "grad_norm": 0.30429354310035706, "learning_rate": 4.9363504474577936e-06, "loss": 0.3307, "step": 6604 }, { "epoch": 1.6612173038229376, "grad_norm": 0.31555190682411194, "learning_rate": 4.934887321142905e-06, "loss": 0.3468, "step": 6605 }, { "epoch": 1.6614688128772634, "grad_norm": 0.30331486463546753, "learning_rate": 4.933424200404508e-06, "loss": 0.3412, "step": 6606 }, { "epoch": 1.6617203219315897, "grad_norm": 0.3019322454929352, "learning_rate": 4.9319610853679136e-06, "loss": 0.3432, "step": 6607 }, { "epoch": 1.6619718309859155, "grad_norm": 0.3192391097545624, "learning_rate": 4.9304979761584256e-06, "loss": 0.3571, "step": 6608 }, { "epoch": 1.6622233400402413, "grad_norm": 0.31620514392852783, "learning_rate": 4.929034872901352e-06, "loss": 0.3415, "step": 6609 }, { "epoch": 1.6624748490945676, "grad_norm": 0.32536083459854126, "learning_rate": 4.927571775721996e-06, "loss": 0.3433, "step": 6610 }, { "epoch": 1.6627263581488934, "grad_norm": 0.2909981608390808, "learning_rate": 4.926108684745664e-06, "loss": 0.3614, "step": 6611 }, { "epoch": 1.6629778672032192, "grad_norm": 0.30204445123672485, "learning_rate": 4.924645600097663e-06, "loss": 0.3639, "step": 6612 }, { "epoch": 1.6632293762575454, "grad_norm": 0.3101191818714142, "learning_rate": 4.923182521903293e-06, "loss": 0.3451, "step": 6613 }, { "epoch": 1.6634808853118712, "grad_norm": 0.3223671615123749, "learning_rate": 4.9217194502878615e-06, "loss": 0.353, "step": 6614 }, { "epoch": 1.663732394366197, "grad_norm": 0.3006145656108856, "learning_rate": 4.920256385376668e-06, "loss": 0.3356, "step": 6615 }, { "epoch": 1.6639839034205233, "grad_norm": 0.2919999957084656, "learning_rate": 4.918793327295018e-06, "loss": 0.3371, "step": 6616 }, { "epoch": 1.664235412474849, "grad_norm": 0.3268287777900696, "learning_rate": 4.917330276168208e-06, "loss": 0.3501, "step": 6617 }, { "epoch": 1.664486921529175, "grad_norm": 0.29942867159843445, "learning_rate": 4.915867232121546e-06, "loss": 0.3629, "step": 6618 }, { "epoch": 1.6647384305835011, "grad_norm": 0.30160030722618103, "learning_rate": 4.914404195280326e-06, "loss": 0.3347, "step": 6619 }, { "epoch": 1.664989939637827, "grad_norm": 0.30396658182144165, "learning_rate": 4.912941165769855e-06, "loss": 0.367, "step": 6620 }, { "epoch": 1.665241448692153, "grad_norm": 0.34666627645492554, "learning_rate": 4.9114781437154255e-06, "loss": 0.3528, "step": 6621 }, { "epoch": 1.665492957746479, "grad_norm": 0.324301153421402, "learning_rate": 4.910015129242339e-06, "loss": 0.3496, "step": 6622 }, { "epoch": 1.6657444668008048, "grad_norm": 0.3196171224117279, "learning_rate": 4.908552122475897e-06, "loss": 0.338, "step": 6623 }, { "epoch": 1.6659959758551308, "grad_norm": 0.2906225323677063, "learning_rate": 4.90708912354139e-06, "loss": 0.3447, "step": 6624 }, { "epoch": 1.6662474849094568, "grad_norm": 0.3057789206504822, "learning_rate": 4.905626132564121e-06, "loss": 0.3315, "step": 6625 }, { "epoch": 1.6664989939637826, "grad_norm": 0.34539157152175903, "learning_rate": 4.904163149669382e-06, "loss": 0.3487, "step": 6626 }, { "epoch": 1.6667505030181087, "grad_norm": 0.3429698944091797, "learning_rate": 4.902700174982471e-06, "loss": 0.3373, "step": 6627 }, { "epoch": 1.6670020120724347, "grad_norm": 0.29753679037094116, "learning_rate": 4.901237208628679e-06, "loss": 0.3371, "step": 6628 }, { "epoch": 1.6672535211267605, "grad_norm": 0.3603936433792114, "learning_rate": 4.899774250733305e-06, "loss": 0.3302, "step": 6629 }, { "epoch": 1.6675050301810865, "grad_norm": 0.315326452255249, "learning_rate": 4.8983113014216365e-06, "loss": 0.3192, "step": 6630 }, { "epoch": 1.6677565392354126, "grad_norm": 0.28168460726737976, "learning_rate": 4.896848360818971e-06, "loss": 0.3242, "step": 6631 }, { "epoch": 1.6680080482897384, "grad_norm": 0.3165770173072815, "learning_rate": 4.895385429050597e-06, "loss": 0.3665, "step": 6632 }, { "epoch": 1.6682595573440644, "grad_norm": 0.3269800543785095, "learning_rate": 4.893922506241806e-06, "loss": 0.3463, "step": 6633 }, { "epoch": 1.6685110663983904, "grad_norm": 0.3024964928627014, "learning_rate": 4.8924595925178905e-06, "loss": 0.3233, "step": 6634 }, { "epoch": 1.6687625754527162, "grad_norm": 0.3181130290031433, "learning_rate": 4.890996688004136e-06, "loss": 0.3512, "step": 6635 }, { "epoch": 1.6690140845070423, "grad_norm": 0.3129771053791046, "learning_rate": 4.889533792825836e-06, "loss": 0.3609, "step": 6636 }, { "epoch": 1.6692655935613683, "grad_norm": 0.297387957572937, "learning_rate": 4.888070907108273e-06, "loss": 0.3696, "step": 6637 }, { "epoch": 1.669517102615694, "grad_norm": 0.3127029836177826, "learning_rate": 4.886608030976739e-06, "loss": 0.3338, "step": 6638 }, { "epoch": 1.66976861167002, "grad_norm": 0.326416939496994, "learning_rate": 4.885145164556516e-06, "loss": 0.3449, "step": 6639 }, { "epoch": 1.6700201207243461, "grad_norm": 0.3048277795314789, "learning_rate": 4.8836823079728925e-06, "loss": 0.3643, "step": 6640 }, { "epoch": 1.670271629778672, "grad_norm": 0.3344593644142151, "learning_rate": 4.882219461351149e-06, "loss": 0.3471, "step": 6641 }, { "epoch": 1.670523138832998, "grad_norm": 0.3117375075817108, "learning_rate": 4.880756624816574e-06, "loss": 0.3416, "step": 6642 }, { "epoch": 1.670774647887324, "grad_norm": 0.31037411093711853, "learning_rate": 4.879293798494448e-06, "loss": 0.3518, "step": 6643 }, { "epoch": 1.6710261569416498, "grad_norm": 0.3362254500389099, "learning_rate": 4.877830982510052e-06, "loss": 0.3156, "step": 6644 }, { "epoch": 1.6712776659959758, "grad_norm": 0.3232327699661255, "learning_rate": 4.876368176988669e-06, "loss": 0.3462, "step": 6645 }, { "epoch": 1.6715291750503019, "grad_norm": 0.29828834533691406, "learning_rate": 4.874905382055578e-06, "loss": 0.3654, "step": 6646 }, { "epoch": 1.6717806841046277, "grad_norm": 0.3095129430294037, "learning_rate": 4.873442597836058e-06, "loss": 0.3525, "step": 6647 }, { "epoch": 1.6720321931589537, "grad_norm": 0.3279082775115967, "learning_rate": 4.871979824455388e-06, "loss": 0.3591, "step": 6648 }, { "epoch": 1.6722837022132797, "grad_norm": 0.33682674169540405, "learning_rate": 4.870517062038846e-06, "loss": 0.3456, "step": 6649 }, { "epoch": 1.6725352112676055, "grad_norm": 0.3893029987812042, "learning_rate": 4.869054310711707e-06, "loss": 0.3861, "step": 6650 }, { "epoch": 1.6727867203219315, "grad_norm": 0.346723735332489, "learning_rate": 4.867591570599247e-06, "loss": 0.3562, "step": 6651 }, { "epoch": 1.6730382293762576, "grad_norm": 0.3105736970901489, "learning_rate": 4.8661288418267395e-06, "loss": 0.3493, "step": 6652 }, { "epoch": 1.6732897384305834, "grad_norm": 0.33905136585235596, "learning_rate": 4.8646661245194605e-06, "loss": 0.336, "step": 6653 }, { "epoch": 1.6735412474849096, "grad_norm": 0.3350204527378082, "learning_rate": 4.863203418802681e-06, "loss": 0.3214, "step": 6654 }, { "epoch": 1.6737927565392354, "grad_norm": 0.3009431064128876, "learning_rate": 4.861740724801673e-06, "loss": 0.336, "step": 6655 }, { "epoch": 1.6740442655935612, "grad_norm": 0.3083348572254181, "learning_rate": 4.860278042641707e-06, "loss": 0.3652, "step": 6656 }, { "epoch": 1.6742957746478875, "grad_norm": 0.30141690373420715, "learning_rate": 4.858815372448053e-06, "loss": 0.3533, "step": 6657 }, { "epoch": 1.6745472837022133, "grad_norm": 0.2887999415397644, "learning_rate": 4.857352714345978e-06, "loss": 0.349, "step": 6658 }, { "epoch": 1.674798792756539, "grad_norm": 0.28986456990242004, "learning_rate": 4.8558900684607515e-06, "loss": 0.3252, "step": 6659 }, { "epoch": 1.6750503018108653, "grad_norm": 0.3002329468727112, "learning_rate": 4.854427434917638e-06, "loss": 0.3624, "step": 6660 }, { "epoch": 1.6753018108651911, "grad_norm": 0.3201340138912201, "learning_rate": 4.852964813841906e-06, "loss": 0.3457, "step": 6661 }, { "epoch": 1.675553319919517, "grad_norm": 0.30298128724098206, "learning_rate": 4.851502205358816e-06, "loss": 0.365, "step": 6662 }, { "epoch": 1.6758048289738432, "grad_norm": 0.33912432193756104, "learning_rate": 4.850039609593634e-06, "loss": 0.3453, "step": 6663 }, { "epoch": 1.676056338028169, "grad_norm": 0.3226577341556549, "learning_rate": 4.84857702667162e-06, "loss": 0.3462, "step": 6664 }, { "epoch": 1.6763078470824948, "grad_norm": 0.2974978983402252, "learning_rate": 4.847114456718039e-06, "loss": 0.3518, "step": 6665 }, { "epoch": 1.676559356136821, "grad_norm": 0.3077523708343506, "learning_rate": 4.8456518998581445e-06, "loss": 0.3547, "step": 6666 }, { "epoch": 1.6768108651911469, "grad_norm": 0.32342010736465454, "learning_rate": 4.844189356217203e-06, "loss": 0.3691, "step": 6667 }, { "epoch": 1.6770623742454729, "grad_norm": 0.33299824595451355, "learning_rate": 4.842726825920466e-06, "loss": 0.368, "step": 6668 }, { "epoch": 1.677313883299799, "grad_norm": 0.2916051149368286, "learning_rate": 4.8412643090931945e-06, "loss": 0.3292, "step": 6669 }, { "epoch": 1.6775653923541247, "grad_norm": 0.3211728632450104, "learning_rate": 4.839801805860639e-06, "loss": 0.3667, "step": 6670 }, { "epoch": 1.6778169014084507, "grad_norm": 0.3185436725616455, "learning_rate": 4.83833931634806e-06, "loss": 0.3416, "step": 6671 }, { "epoch": 1.6780684104627768, "grad_norm": 0.3072052001953125, "learning_rate": 4.8368768406807045e-06, "loss": 0.3526, "step": 6672 }, { "epoch": 1.6783199195171026, "grad_norm": 0.29810407757759094, "learning_rate": 4.8354143789838285e-06, "loss": 0.339, "step": 6673 }, { "epoch": 1.6785714285714286, "grad_norm": 0.3369450271129608, "learning_rate": 4.83395193138268e-06, "loss": 0.3568, "step": 6674 }, { "epoch": 1.6788229376257546, "grad_norm": 0.33037352561950684, "learning_rate": 4.83248949800251e-06, "loss": 0.3522, "step": 6675 }, { "epoch": 1.6790744466800804, "grad_norm": 0.36880165338516235, "learning_rate": 4.831027078968568e-06, "loss": 0.3664, "step": 6676 }, { "epoch": 1.6793259557344065, "grad_norm": 0.3295780420303345, "learning_rate": 4.829564674406098e-06, "loss": 0.3236, "step": 6677 }, { "epoch": 1.6795774647887325, "grad_norm": 0.327239066362381, "learning_rate": 4.828102284440349e-06, "loss": 0.3442, "step": 6678 }, { "epoch": 1.6798289738430583, "grad_norm": 0.3397291898727417, "learning_rate": 4.826639909196562e-06, "loss": 0.3433, "step": 6679 }, { "epoch": 1.6800804828973843, "grad_norm": 0.3069293797016144, "learning_rate": 4.825177548799985e-06, "loss": 0.3455, "step": 6680 }, { "epoch": 1.6803319919517103, "grad_norm": 0.35793232917785645, "learning_rate": 4.823715203375854e-06, "loss": 0.3675, "step": 6681 }, { "epoch": 1.6805835010060362, "grad_norm": 0.291510671377182, "learning_rate": 4.822252873049416e-06, "loss": 0.3461, "step": 6682 }, { "epoch": 1.6808350100603622, "grad_norm": 0.32121041417121887, "learning_rate": 4.8207905579459054e-06, "loss": 0.3352, "step": 6683 }, { "epoch": 1.6810865191146882, "grad_norm": 0.337119460105896, "learning_rate": 4.819328258190564e-06, "loss": 0.3171, "step": 6684 }, { "epoch": 1.681338028169014, "grad_norm": 0.32898882031440735, "learning_rate": 4.817865973908625e-06, "loss": 0.35, "step": 6685 }, { "epoch": 1.68158953722334, "grad_norm": 0.3288995623588562, "learning_rate": 4.816403705225326e-06, "loss": 0.3474, "step": 6686 }, { "epoch": 1.681841046277666, "grad_norm": 0.3149915039539337, "learning_rate": 4.814941452265903e-06, "loss": 0.3283, "step": 6687 }, { "epoch": 1.6820925553319919, "grad_norm": 0.3553139865398407, "learning_rate": 4.813479215155585e-06, "loss": 0.378, "step": 6688 }, { "epoch": 1.682344064386318, "grad_norm": 0.3398416042327881, "learning_rate": 4.812016994019607e-06, "loss": 0.3614, "step": 6689 }, { "epoch": 1.682595573440644, "grad_norm": 0.33960628509521484, "learning_rate": 4.810554788983196e-06, "loss": 0.348, "step": 6690 }, { "epoch": 1.6828470824949697, "grad_norm": 0.3453803062438965, "learning_rate": 4.809092600171584e-06, "loss": 0.3615, "step": 6691 }, { "epoch": 1.6830985915492958, "grad_norm": 0.3408578634262085, "learning_rate": 4.807630427709995e-06, "loss": 0.3595, "step": 6692 }, { "epoch": 1.6833501006036218, "grad_norm": 0.3130299746990204, "learning_rate": 4.806168271723657e-06, "loss": 0.3441, "step": 6693 }, { "epoch": 1.6836016096579476, "grad_norm": 0.3115828037261963, "learning_rate": 4.804706132337793e-06, "loss": 0.3505, "step": 6694 }, { "epoch": 1.6838531187122736, "grad_norm": 0.34020087122917175, "learning_rate": 4.803244009677629e-06, "loss": 0.3569, "step": 6695 }, { "epoch": 1.6841046277665996, "grad_norm": 0.3176514804363251, "learning_rate": 4.801781903868383e-06, "loss": 0.3452, "step": 6696 }, { "epoch": 1.6843561368209254, "grad_norm": 0.33271244168281555, "learning_rate": 4.8003198150352755e-06, "loss": 0.3536, "step": 6697 }, { "epoch": 1.6846076458752515, "grad_norm": 0.3202335834503174, "learning_rate": 4.79885774330353e-06, "loss": 0.3692, "step": 6698 }, { "epoch": 1.6848591549295775, "grad_norm": 0.314801424741745, "learning_rate": 4.797395688798358e-06, "loss": 0.3468, "step": 6699 }, { "epoch": 1.6851106639839033, "grad_norm": 0.35798442363739014, "learning_rate": 4.7959336516449795e-06, "loss": 0.3449, "step": 6700 }, { "epoch": 1.6853621730382293, "grad_norm": 0.35215941071510315, "learning_rate": 4.794471631968606e-06, "loss": 0.3699, "step": 6701 }, { "epoch": 1.6856136820925554, "grad_norm": 0.30209723114967346, "learning_rate": 4.793009629894451e-06, "loss": 0.3386, "step": 6702 }, { "epoch": 1.6858651911468812, "grad_norm": 0.3090112507343292, "learning_rate": 4.791547645547727e-06, "loss": 0.3756, "step": 6703 }, { "epoch": 1.6861167002012074, "grad_norm": 0.32377490401268005, "learning_rate": 4.790085679053644e-06, "loss": 0.3814, "step": 6704 }, { "epoch": 1.6863682092555332, "grad_norm": 0.30719804763793945, "learning_rate": 4.788623730537407e-06, "loss": 0.3386, "step": 6705 }, { "epoch": 1.686619718309859, "grad_norm": 0.3107820451259613, "learning_rate": 4.787161800124228e-06, "loss": 0.3603, "step": 6706 }, { "epoch": 1.6868712273641853, "grad_norm": 0.30711448192596436, "learning_rate": 4.785699887939307e-06, "loss": 0.344, "step": 6707 }, { "epoch": 1.687122736418511, "grad_norm": 0.30240827798843384, "learning_rate": 4.78423799410785e-06, "loss": 0.3494, "step": 6708 }, { "epoch": 1.6873742454728369, "grad_norm": 0.30051419138908386, "learning_rate": 4.782776118755061e-06, "loss": 0.3477, "step": 6709 }, { "epoch": 1.6876257545271631, "grad_norm": 0.3172437846660614, "learning_rate": 4.7813142620061365e-06, "loss": 0.3885, "step": 6710 }, { "epoch": 1.687877263581489, "grad_norm": 0.30793458223342896, "learning_rate": 4.779852423986278e-06, "loss": 0.3312, "step": 6711 }, { "epoch": 1.6881287726358147, "grad_norm": 0.31784626841545105, "learning_rate": 4.778390604820683e-06, "loss": 0.3646, "step": 6712 }, { "epoch": 1.688380281690141, "grad_norm": 0.3140884041786194, "learning_rate": 4.776928804634545e-06, "loss": 0.3276, "step": 6713 }, { "epoch": 1.6886317907444668, "grad_norm": 0.3013300895690918, "learning_rate": 4.775467023553061e-06, "loss": 0.3607, "step": 6714 }, { "epoch": 1.6888832997987926, "grad_norm": 0.30073946714401245, "learning_rate": 4.77400526170142e-06, "loss": 0.348, "step": 6715 }, { "epoch": 1.6891348088531188, "grad_norm": 0.3390004336833954, "learning_rate": 4.7725435192048156e-06, "loss": 0.324, "step": 6716 }, { "epoch": 1.6893863179074446, "grad_norm": 0.31539881229400635, "learning_rate": 4.771081796188435e-06, "loss": 0.3331, "step": 6717 }, { "epoch": 1.6896378269617707, "grad_norm": 0.34204819798469543, "learning_rate": 4.7696200927774675e-06, "loss": 0.3243, "step": 6718 }, { "epoch": 1.6898893360160967, "grad_norm": 0.312477171421051, "learning_rate": 4.768158409097096e-06, "loss": 0.3378, "step": 6719 }, { "epoch": 1.6901408450704225, "grad_norm": 0.32249870896339417, "learning_rate": 4.766696745272508e-06, "loss": 0.3498, "step": 6720 }, { "epoch": 1.6903923541247485, "grad_norm": 0.31593847274780273, "learning_rate": 4.765235101428883e-06, "loss": 0.3244, "step": 6721 }, { "epoch": 1.6906438631790746, "grad_norm": 0.31690171360969543, "learning_rate": 4.7637734776914045e-06, "loss": 0.3346, "step": 6722 }, { "epoch": 1.6908953722334004, "grad_norm": 0.3169795274734497, "learning_rate": 4.7623118741852484e-06, "loss": 0.3551, "step": 6723 }, { "epoch": 1.6911468812877264, "grad_norm": 0.2886812686920166, "learning_rate": 4.760850291035595e-06, "loss": 0.3425, "step": 6724 }, { "epoch": 1.6913983903420524, "grad_norm": 0.3049076199531555, "learning_rate": 4.759388728367615e-06, "loss": 0.3521, "step": 6725 }, { "epoch": 1.6916498993963782, "grad_norm": 0.2920475900173187, "learning_rate": 4.757927186306489e-06, "loss": 0.3332, "step": 6726 }, { "epoch": 1.6919014084507042, "grad_norm": 0.3055409789085388, "learning_rate": 4.756465664977381e-06, "loss": 0.3586, "step": 6727 }, { "epoch": 1.6921529175050303, "grad_norm": 0.3127298057079315, "learning_rate": 4.75500416450547e-06, "loss": 0.3309, "step": 6728 }, { "epoch": 1.692404426559356, "grad_norm": 0.3161258101463318, "learning_rate": 4.753542685015916e-06, "loss": 0.3436, "step": 6729 }, { "epoch": 1.692655935613682, "grad_norm": 0.3124731481075287, "learning_rate": 4.752081226633888e-06, "loss": 0.3495, "step": 6730 }, { "epoch": 1.6929074446680081, "grad_norm": 0.30646365880966187, "learning_rate": 4.750619789484556e-06, "loss": 0.328, "step": 6731 }, { "epoch": 1.693158953722334, "grad_norm": 0.30611762404441833, "learning_rate": 4.749158373693076e-06, "loss": 0.363, "step": 6732 }, { "epoch": 1.69341046277666, "grad_norm": 0.3001714050769806, "learning_rate": 4.7476969793846136e-06, "loss": 0.3489, "step": 6733 }, { "epoch": 1.693661971830986, "grad_norm": 0.30453163385391235, "learning_rate": 4.746235606684326e-06, "loss": 0.3424, "step": 6734 }, { "epoch": 1.6939134808853118, "grad_norm": 0.31667834520339966, "learning_rate": 4.744774255717372e-06, "loss": 0.3638, "step": 6735 }, { "epoch": 1.6941649899396378, "grad_norm": 0.30841580033302307, "learning_rate": 4.7433129266089045e-06, "loss": 0.3691, "step": 6736 }, { "epoch": 1.6944164989939638, "grad_norm": 0.3112299144268036, "learning_rate": 4.74185161948408e-06, "loss": 0.3445, "step": 6737 }, { "epoch": 1.6946680080482897, "grad_norm": 0.32629552483558655, "learning_rate": 4.7403903344680495e-06, "loss": 0.344, "step": 6738 }, { "epoch": 1.6949195171026157, "grad_norm": 0.3291604220867157, "learning_rate": 4.7389290716859634e-06, "loss": 0.3614, "step": 6739 }, { "epoch": 1.6951710261569417, "grad_norm": 0.3310836851596832, "learning_rate": 4.737467831262967e-06, "loss": 0.3459, "step": 6740 }, { "epoch": 1.6954225352112675, "grad_norm": 0.3385966718196869, "learning_rate": 4.736006613324209e-06, "loss": 0.3583, "step": 6741 }, { "epoch": 1.6956740442655935, "grad_norm": 0.29955390095710754, "learning_rate": 4.734545417994834e-06, "loss": 0.3815, "step": 6742 }, { "epoch": 1.6959255533199196, "grad_norm": 0.30086326599121094, "learning_rate": 4.7330842453999825e-06, "loss": 0.3576, "step": 6743 }, { "epoch": 1.6961770623742454, "grad_norm": 0.29754674434661865, "learning_rate": 4.731623095664797e-06, "loss": 0.3435, "step": 6744 }, { "epoch": 1.6964285714285714, "grad_norm": 0.34076476097106934, "learning_rate": 4.730161968914412e-06, "loss": 0.3372, "step": 6745 }, { "epoch": 1.6966800804828974, "grad_norm": 0.3299402594566345, "learning_rate": 4.72870086527397e-06, "loss": 0.34, "step": 6746 }, { "epoch": 1.6969315895372232, "grad_norm": 0.3097115755081177, "learning_rate": 4.727239784868597e-06, "loss": 0.3463, "step": 6747 }, { "epoch": 1.6971830985915493, "grad_norm": 0.30106401443481445, "learning_rate": 4.725778727823434e-06, "loss": 0.3376, "step": 6748 }, { "epoch": 1.6974346076458753, "grad_norm": 0.3247431814670563, "learning_rate": 4.724317694263605e-06, "loss": 0.3387, "step": 6749 }, { "epoch": 1.697686116700201, "grad_norm": 0.30322572588920593, "learning_rate": 4.7228566843142426e-06, "loss": 0.3589, "step": 6750 }, { "epoch": 1.6979376257545271, "grad_norm": 0.3284371793270111, "learning_rate": 4.72139569810047e-06, "loss": 0.3592, "step": 6751 }, { "epoch": 1.6981891348088531, "grad_norm": 0.3332383632659912, "learning_rate": 4.7199347357474115e-06, "loss": 0.357, "step": 6752 }, { "epoch": 1.698440643863179, "grad_norm": 0.32514792680740356, "learning_rate": 4.7184737973801945e-06, "loss": 0.3224, "step": 6753 }, { "epoch": 1.6986921529175052, "grad_norm": 0.2966492176055908, "learning_rate": 4.717012883123932e-06, "loss": 0.3646, "step": 6754 }, { "epoch": 1.698943661971831, "grad_norm": 0.32919812202453613, "learning_rate": 4.715551993103749e-06, "loss": 0.3455, "step": 6755 }, { "epoch": 1.6991951710261568, "grad_norm": 0.29699820280075073, "learning_rate": 4.714091127444755e-06, "loss": 0.3478, "step": 6756 }, { "epoch": 1.699446680080483, "grad_norm": 0.33212509751319885, "learning_rate": 4.712630286272071e-06, "loss": 0.351, "step": 6757 }, { "epoch": 1.6996981891348089, "grad_norm": 0.323261022567749, "learning_rate": 4.711169469710802e-06, "loss": 0.353, "step": 6758 }, { "epoch": 1.6999496981891347, "grad_norm": 0.3002278804779053, "learning_rate": 4.7097086778860625e-06, "loss": 0.3433, "step": 6759 }, { "epoch": 1.700201207243461, "grad_norm": 0.31812816858291626, "learning_rate": 4.708247910922958e-06, "loss": 0.3633, "step": 6760 }, { "epoch": 1.7004527162977867, "grad_norm": 0.3050650656223297, "learning_rate": 4.706787168946596e-06, "loss": 0.3376, "step": 6761 }, { "epoch": 1.7007042253521125, "grad_norm": 0.3200231194496155, "learning_rate": 4.705326452082076e-06, "loss": 0.3421, "step": 6762 }, { "epoch": 1.7009557344064388, "grad_norm": 0.32024839520454407, "learning_rate": 4.703865760454503e-06, "loss": 0.3384, "step": 6763 }, { "epoch": 1.7012072434607646, "grad_norm": 0.3096054494380951, "learning_rate": 4.702405094188977e-06, "loss": 0.3466, "step": 6764 }, { "epoch": 1.7014587525150904, "grad_norm": 0.30760541558265686, "learning_rate": 4.70094445341059e-06, "loss": 0.3337, "step": 6765 }, { "epoch": 1.7017102615694166, "grad_norm": 0.2908000349998474, "learning_rate": 4.699483838244443e-06, "loss": 0.3563, "step": 6766 }, { "epoch": 1.7019617706237424, "grad_norm": 0.3102479875087738, "learning_rate": 4.698023248815623e-06, "loss": 0.34, "step": 6767 }, { "epoch": 1.7022132796780685, "grad_norm": 0.317518413066864, "learning_rate": 4.6965626852492235e-06, "loss": 0.3706, "step": 6768 }, { "epoch": 1.7024647887323945, "grad_norm": 0.33676648139953613, "learning_rate": 4.6951021476703304e-06, "loss": 0.348, "step": 6769 }, { "epoch": 1.7027162977867203, "grad_norm": 0.30047839879989624, "learning_rate": 4.6936416362040325e-06, "loss": 0.3411, "step": 6770 }, { "epoch": 1.7029678068410463, "grad_norm": 0.325817734003067, "learning_rate": 4.69218115097541e-06, "loss": 0.3657, "step": 6771 }, { "epoch": 1.7032193158953723, "grad_norm": 0.28770050406455994, "learning_rate": 4.690720692109549e-06, "loss": 0.3375, "step": 6772 }, { "epoch": 1.7034708249496981, "grad_norm": 0.30655109882354736, "learning_rate": 4.689260259731523e-06, "loss": 0.3397, "step": 6773 }, { "epoch": 1.7037223340040242, "grad_norm": 0.3254563510417938, "learning_rate": 4.687799853966413e-06, "loss": 0.3299, "step": 6774 }, { "epoch": 1.7039738430583502, "grad_norm": 0.3062305450439453, "learning_rate": 4.686339474939293e-06, "loss": 0.3617, "step": 6775 }, { "epoch": 1.704225352112676, "grad_norm": 0.3447815179824829, "learning_rate": 4.6848791227752335e-06, "loss": 0.3491, "step": 6776 }, { "epoch": 1.704476861167002, "grad_norm": 0.3192574083805084, "learning_rate": 4.6834187975993065e-06, "loss": 0.3619, "step": 6777 }, { "epoch": 1.704728370221328, "grad_norm": 0.3120723068714142, "learning_rate": 4.681958499536579e-06, "loss": 0.3453, "step": 6778 }, { "epoch": 1.7049798792756539, "grad_norm": 0.31706029176712036, "learning_rate": 4.680498228712116e-06, "loss": 0.3423, "step": 6779 }, { "epoch": 1.70523138832998, "grad_norm": 0.3090144693851471, "learning_rate": 4.679037985250981e-06, "loss": 0.3368, "step": 6780 }, { "epoch": 1.705482897384306, "grad_norm": 0.3182971179485321, "learning_rate": 4.677577769278235e-06, "loss": 0.3429, "step": 6781 }, { "epoch": 1.7057344064386317, "grad_norm": 0.2918050289154053, "learning_rate": 4.6761175809189366e-06, "loss": 0.3477, "step": 6782 }, { "epoch": 1.7059859154929577, "grad_norm": 0.3405851721763611, "learning_rate": 4.67465742029814e-06, "loss": 0.3507, "step": 6783 }, { "epoch": 1.7062374245472838, "grad_norm": 0.2979619801044464, "learning_rate": 4.6731972875409e-06, "loss": 0.3183, "step": 6784 }, { "epoch": 1.7064889336016096, "grad_norm": 0.3200087249279022, "learning_rate": 4.671737182772267e-06, "loss": 0.3588, "step": 6785 }, { "epoch": 1.7067404426559356, "grad_norm": 0.3113258183002472, "learning_rate": 4.6702771061172935e-06, "loss": 0.3237, "step": 6786 }, { "epoch": 1.7069919517102616, "grad_norm": 0.3095763325691223, "learning_rate": 4.66881705770102e-06, "loss": 0.3481, "step": 6787 }, { "epoch": 1.7072434607645874, "grad_norm": 0.32275089621543884, "learning_rate": 4.667357037648496e-06, "loss": 0.3434, "step": 6788 }, { "epoch": 1.7074949698189135, "grad_norm": 0.3030339777469635, "learning_rate": 4.665897046084759e-06, "loss": 0.3369, "step": 6789 }, { "epoch": 1.7077464788732395, "grad_norm": 0.32319900393486023, "learning_rate": 4.6644370831348524e-06, "loss": 0.3355, "step": 6790 }, { "epoch": 1.7079979879275653, "grad_norm": 0.2972315847873688, "learning_rate": 4.662977148923808e-06, "loss": 0.337, "step": 6791 }, { "epoch": 1.7082494969818913, "grad_norm": 0.3062729239463806, "learning_rate": 4.6615172435766636e-06, "loss": 0.3504, "step": 6792 }, { "epoch": 1.7085010060362174, "grad_norm": 0.3187355697154999, "learning_rate": 4.660057367218448e-06, "loss": 0.3579, "step": 6793 }, { "epoch": 1.7087525150905432, "grad_norm": 0.33169421553611755, "learning_rate": 4.658597519974193e-06, "loss": 0.3504, "step": 6794 }, { "epoch": 1.7090040241448692, "grad_norm": 0.3317626118659973, "learning_rate": 4.657137701968925e-06, "loss": 0.3571, "step": 6795 }, { "epoch": 1.7092555331991952, "grad_norm": 0.31924623250961304, "learning_rate": 4.655677913327668e-06, "loss": 0.3457, "step": 6796 }, { "epoch": 1.709507042253521, "grad_norm": 0.3141118288040161, "learning_rate": 4.654218154175444e-06, "loss": 0.3178, "step": 6797 }, { "epoch": 1.709758551307847, "grad_norm": 0.3203374445438385, "learning_rate": 4.652758424637271e-06, "loss": 0.3472, "step": 6798 }, { "epoch": 1.710010060362173, "grad_norm": 0.33453568816185, "learning_rate": 4.651298724838168e-06, "loss": 0.3485, "step": 6799 }, { "epoch": 1.7102615694164989, "grad_norm": 0.3020557761192322, "learning_rate": 4.649839054903146e-06, "loss": 0.3516, "step": 6800 }, { "epoch": 1.710513078470825, "grad_norm": 0.32627707719802856, "learning_rate": 4.6483794149572196e-06, "loss": 0.3663, "step": 6801 }, { "epoch": 1.710764587525151, "grad_norm": 0.29445070028305054, "learning_rate": 4.646919805125396e-06, "loss": 0.3552, "step": 6802 }, { "epoch": 1.7110160965794767, "grad_norm": 0.30278632044792175, "learning_rate": 4.645460225532683e-06, "loss": 0.3602, "step": 6803 }, { "epoch": 1.711267605633803, "grad_norm": 0.3449458181858063, "learning_rate": 4.644000676304082e-06, "loss": 0.3329, "step": 6804 }, { "epoch": 1.7115191146881288, "grad_norm": 0.32875609397888184, "learning_rate": 4.642541157564596e-06, "loss": 0.3313, "step": 6805 }, { "epoch": 1.7117706237424546, "grad_norm": 0.3220992386341095, "learning_rate": 4.641081669439226e-06, "loss": 0.3739, "step": 6806 }, { "epoch": 1.7120221327967808, "grad_norm": 0.30020537972450256, "learning_rate": 4.6396222120529625e-06, "loss": 0.33, "step": 6807 }, { "epoch": 1.7122736418511066, "grad_norm": 0.3377690315246582, "learning_rate": 4.638162785530805e-06, "loss": 0.3478, "step": 6808 }, { "epoch": 1.7125251509054324, "grad_norm": 0.30975767970085144, "learning_rate": 4.636703389997739e-06, "loss": 0.3479, "step": 6809 }, { "epoch": 1.7127766599597587, "grad_norm": 0.31768518686294556, "learning_rate": 4.635244025578757e-06, "loss": 0.3301, "step": 6810 }, { "epoch": 1.7130281690140845, "grad_norm": 0.28654173016548157, "learning_rate": 4.63378469239884e-06, "loss": 0.3402, "step": 6811 }, { "epoch": 1.7132796780684103, "grad_norm": 0.33102425932884216, "learning_rate": 4.632325390582976e-06, "loss": 0.3487, "step": 6812 }, { "epoch": 1.7135311871227366, "grad_norm": 0.30774351954460144, "learning_rate": 4.630866120256139e-06, "loss": 0.3409, "step": 6813 }, { "epoch": 1.7137826961770624, "grad_norm": 0.3273797035217285, "learning_rate": 4.629406881543312e-06, "loss": 0.3329, "step": 6814 }, { "epoch": 1.7140342052313882, "grad_norm": 0.31635209918022156, "learning_rate": 4.6279476745694655e-06, "loss": 0.3387, "step": 6815 }, { "epoch": 1.7142857142857144, "grad_norm": 0.3162465989589691, "learning_rate": 4.6264884994595725e-06, "loss": 0.3405, "step": 6816 }, { "epoch": 1.7145372233400402, "grad_norm": 0.35132935643196106, "learning_rate": 4.625029356338605e-06, "loss": 0.3522, "step": 6817 }, { "epoch": 1.7147887323943662, "grad_norm": 0.31276100873947144, "learning_rate": 4.623570245331525e-06, "loss": 0.3402, "step": 6818 }, { "epoch": 1.7150402414486923, "grad_norm": 0.2836771309375763, "learning_rate": 4.622111166563301e-06, "loss": 0.3389, "step": 6819 }, { "epoch": 1.715291750503018, "grad_norm": 0.33196336030960083, "learning_rate": 4.6206521201588894e-06, "loss": 0.3398, "step": 6820 }, { "epoch": 1.715543259557344, "grad_norm": 0.28593510389328003, "learning_rate": 4.6191931062432526e-06, "loss": 0.366, "step": 6821 }, { "epoch": 1.7157947686116701, "grad_norm": 0.3059460520744324, "learning_rate": 4.617734124941342e-06, "loss": 0.3467, "step": 6822 }, { "epoch": 1.716046277665996, "grad_norm": 0.3157053589820862, "learning_rate": 4.6162751763781146e-06, "loss": 0.309, "step": 6823 }, { "epoch": 1.716297786720322, "grad_norm": 0.29486018419265747, "learning_rate": 4.6148162606785144e-06, "loss": 0.3461, "step": 6824 }, { "epoch": 1.716549295774648, "grad_norm": 0.32476910948753357, "learning_rate": 4.613357377967495e-06, "loss": 0.3405, "step": 6825 }, { "epoch": 1.7168008048289738, "grad_norm": 0.3237256705760956, "learning_rate": 4.611898528369995e-06, "loss": 0.3262, "step": 6826 }, { "epoch": 1.7170523138832998, "grad_norm": 0.3211307227611542, "learning_rate": 4.610439712010959e-06, "loss": 0.3476, "step": 6827 }, { "epoch": 1.7173038229376258, "grad_norm": 0.29768112301826477, "learning_rate": 4.6089809290153245e-06, "loss": 0.3629, "step": 6828 }, { "epoch": 1.7175553319919517, "grad_norm": 0.2961174547672272, "learning_rate": 4.607522179508027e-06, "loss": 0.343, "step": 6829 }, { "epoch": 1.7178068410462777, "grad_norm": 0.2991441488265991, "learning_rate": 4.606063463614e-06, "loss": 0.3571, "step": 6830 }, { "epoch": 1.7180583501006037, "grad_norm": 0.2887977063655853, "learning_rate": 4.604604781458173e-06, "loss": 0.3516, "step": 6831 }, { "epoch": 1.7183098591549295, "grad_norm": 0.31125563383102417, "learning_rate": 4.6031461331654725e-06, "loss": 0.3294, "step": 6832 }, { "epoch": 1.7185613682092555, "grad_norm": 0.3178209960460663, "learning_rate": 4.601687518860823e-06, "loss": 0.3519, "step": 6833 }, { "epoch": 1.7188128772635816, "grad_norm": 0.29017218947410583, "learning_rate": 4.600228938669146e-06, "loss": 0.3451, "step": 6834 }, { "epoch": 1.7190643863179074, "grad_norm": 0.3055189549922943, "learning_rate": 4.5987703927153575e-06, "loss": 0.3346, "step": 6835 }, { "epoch": 1.7193158953722334, "grad_norm": 0.3083680272102356, "learning_rate": 4.597311881124378e-06, "loss": 0.3386, "step": 6836 }, { "epoch": 1.7195674044265594, "grad_norm": 0.32472705841064453, "learning_rate": 4.595853404021114e-06, "loss": 0.3384, "step": 6837 }, { "epoch": 1.7198189134808852, "grad_norm": 0.3365086317062378, "learning_rate": 4.594394961530479e-06, "loss": 0.3496, "step": 6838 }, { "epoch": 1.7200704225352113, "grad_norm": 0.33195602893829346, "learning_rate": 4.592936553777378e-06, "loss": 0.3715, "step": 6839 }, { "epoch": 1.7203219315895373, "grad_norm": 0.3010450005531311, "learning_rate": 4.591478180886714e-06, "loss": 0.3476, "step": 6840 }, { "epoch": 1.720573440643863, "grad_norm": 0.3121878206729889, "learning_rate": 4.590019842983389e-06, "loss": 0.3477, "step": 6841 }, { "epoch": 1.720824949698189, "grad_norm": 0.3093717396259308, "learning_rate": 4.588561540192299e-06, "loss": 0.3335, "step": 6842 }, { "epoch": 1.7210764587525151, "grad_norm": 0.3174624741077423, "learning_rate": 4.587103272638339e-06, "loss": 0.355, "step": 6843 }, { "epoch": 1.721327967806841, "grad_norm": 0.3113724887371063, "learning_rate": 4.585645040446401e-06, "loss": 0.3357, "step": 6844 }, { "epoch": 1.721579476861167, "grad_norm": 0.33507058024406433, "learning_rate": 4.584186843741373e-06, "loss": 0.3498, "step": 6845 }, { "epoch": 1.721830985915493, "grad_norm": 0.34897923469543457, "learning_rate": 4.58272868264814e-06, "loss": 0.3539, "step": 6846 }, { "epoch": 1.7220824949698188, "grad_norm": 0.3107760548591614, "learning_rate": 4.581270557291586e-06, "loss": 0.3618, "step": 6847 }, { "epoch": 1.7223340040241448, "grad_norm": 0.3432888984680176, "learning_rate": 4.579812467796588e-06, "loss": 0.3521, "step": 6848 }, { "epoch": 1.7225855130784709, "grad_norm": 0.35445329546928406, "learning_rate": 4.5783544142880235e-06, "loss": 0.3892, "step": 6849 }, { "epoch": 1.7228370221327967, "grad_norm": 0.35715755820274353, "learning_rate": 4.576896396890767e-06, "loss": 0.3621, "step": 6850 }, { "epoch": 1.7230885311871227, "grad_norm": 0.30892953276634216, "learning_rate": 4.575438415729685e-06, "loss": 0.3596, "step": 6851 }, { "epoch": 1.7233400402414487, "grad_norm": 0.339541494846344, "learning_rate": 4.573980470929649e-06, "loss": 0.3262, "step": 6852 }, { "epoch": 1.7235915492957745, "grad_norm": 0.33129116892814636, "learning_rate": 4.572522562615519e-06, "loss": 0.3431, "step": 6853 }, { "epoch": 1.7238430583501008, "grad_norm": 0.3051982522010803, "learning_rate": 4.5710646909121585e-06, "loss": 0.3451, "step": 6854 }, { "epoch": 1.7240945674044266, "grad_norm": 0.3270474076271057, "learning_rate": 4.5696068559444225e-06, "loss": 0.3626, "step": 6855 }, { "epoch": 1.7243460764587524, "grad_norm": 0.3316960334777832, "learning_rate": 4.568149057837168e-06, "loss": 0.3443, "step": 6856 }, { "epoch": 1.7245975855130786, "grad_norm": 0.32199814915657043, "learning_rate": 4.5666912967152435e-06, "loss": 0.3544, "step": 6857 }, { "epoch": 1.7248490945674044, "grad_norm": 0.32749658823013306, "learning_rate": 4.5652335727035e-06, "loss": 0.3256, "step": 6858 }, { "epoch": 1.7251006036217302, "grad_norm": 0.3357948362827301, "learning_rate": 4.56377588592678e-06, "loss": 0.3441, "step": 6859 }, { "epoch": 1.7253521126760565, "grad_norm": 0.3046301603317261, "learning_rate": 4.562318236509926e-06, "loss": 0.3464, "step": 6860 }, { "epoch": 1.7256036217303823, "grad_norm": 0.33477479219436646, "learning_rate": 4.560860624577779e-06, "loss": 0.3365, "step": 6861 }, { "epoch": 1.725855130784708, "grad_norm": 0.3378863036632538, "learning_rate": 4.559403050255169e-06, "loss": 0.3526, "step": 6862 }, { "epoch": 1.7261066398390343, "grad_norm": 0.3373473584651947, "learning_rate": 4.557945513666935e-06, "loss": 0.3376, "step": 6863 }, { "epoch": 1.7263581488933601, "grad_norm": 0.33986005187034607, "learning_rate": 4.5564880149378995e-06, "loss": 0.3471, "step": 6864 }, { "epoch": 1.7266096579476862, "grad_norm": 0.2948022782802582, "learning_rate": 4.555030554192894e-06, "loss": 0.3384, "step": 6865 }, { "epoch": 1.7268611670020122, "grad_norm": 0.3061770796775818, "learning_rate": 4.553573131556734e-06, "loss": 0.3311, "step": 6866 }, { "epoch": 1.727112676056338, "grad_norm": 0.32700714468955994, "learning_rate": 4.552115747154247e-06, "loss": 0.3456, "step": 6867 }, { "epoch": 1.727364185110664, "grad_norm": 0.3140343725681305, "learning_rate": 4.55065840111024e-06, "loss": 0.3903, "step": 6868 }, { "epoch": 1.72761569416499, "grad_norm": 0.32476603984832764, "learning_rate": 4.549201093549533e-06, "loss": 0.3226, "step": 6869 }, { "epoch": 1.7278672032193159, "grad_norm": 0.32490959763526917, "learning_rate": 4.547743824596929e-06, "loss": 0.3408, "step": 6870 }, { "epoch": 1.7281187122736419, "grad_norm": 0.3557870388031006, "learning_rate": 4.546286594377238e-06, "loss": 0.3438, "step": 6871 }, { "epoch": 1.728370221327968, "grad_norm": 0.3313372731208801, "learning_rate": 4.544829403015264e-06, "loss": 0.3448, "step": 6872 }, { "epoch": 1.7286217303822937, "grad_norm": 0.3483891189098358, "learning_rate": 4.543372250635801e-06, "loss": 0.3346, "step": 6873 }, { "epoch": 1.7288732394366197, "grad_norm": 0.3317503035068512, "learning_rate": 4.541915137363651e-06, "loss": 0.3537, "step": 6874 }, { "epoch": 1.7291247484909458, "grad_norm": 0.33802077174186707, "learning_rate": 4.540458063323601e-06, "loss": 0.3488, "step": 6875 }, { "epoch": 1.7293762575452716, "grad_norm": 0.30749836564064026, "learning_rate": 4.539001028640447e-06, "loss": 0.35, "step": 6876 }, { "epoch": 1.7296277665995976, "grad_norm": 0.3114013671875, "learning_rate": 4.537544033438967e-06, "loss": 0.3305, "step": 6877 }, { "epoch": 1.7298792756539236, "grad_norm": 0.3545249104499817, "learning_rate": 4.53608707784395e-06, "loss": 0.347, "step": 6878 }, { "epoch": 1.7301307847082494, "grad_norm": 0.3405088484287262, "learning_rate": 4.534630161980171e-06, "loss": 0.3498, "step": 6879 }, { "epoch": 1.7303822937625755, "grad_norm": 0.3269940912723541, "learning_rate": 4.533173285972408e-06, "loss": 0.3648, "step": 6880 }, { "epoch": 1.7306338028169015, "grad_norm": 0.30962666869163513, "learning_rate": 4.531716449945431e-06, "loss": 0.3474, "step": 6881 }, { "epoch": 1.7308853118712273, "grad_norm": 0.3638971149921417, "learning_rate": 4.530259654024011e-06, "loss": 0.3424, "step": 6882 }, { "epoch": 1.7311368209255533, "grad_norm": 0.38066476583480835, "learning_rate": 4.528802898332914e-06, "loss": 0.3332, "step": 6883 }, { "epoch": 1.7313883299798793, "grad_norm": 0.30477452278137207, "learning_rate": 4.5273461829969e-06, "loss": 0.3625, "step": 6884 }, { "epoch": 1.7316398390342052, "grad_norm": 0.3417944014072418, "learning_rate": 4.525889508140731e-06, "loss": 0.3661, "step": 6885 }, { "epoch": 1.7318913480885312, "grad_norm": 0.340406209230423, "learning_rate": 4.524432873889156e-06, "loss": 0.3576, "step": 6886 }, { "epoch": 1.7321428571428572, "grad_norm": 0.40321967005729675, "learning_rate": 4.522976280366934e-06, "loss": 0.346, "step": 6887 }, { "epoch": 1.732394366197183, "grad_norm": 0.3876849412918091, "learning_rate": 4.5215197276988055e-06, "loss": 0.3598, "step": 6888 }, { "epoch": 1.732645875251509, "grad_norm": 0.3189951479434967, "learning_rate": 4.520063216009522e-06, "loss": 0.3289, "step": 6889 }, { "epoch": 1.732897384305835, "grad_norm": 0.3129585087299347, "learning_rate": 4.518606745423819e-06, "loss": 0.325, "step": 6890 }, { "epoch": 1.7331488933601609, "grad_norm": 0.2990803122520447, "learning_rate": 4.517150316066439e-06, "loss": 0.3333, "step": 6891 }, { "epoch": 1.733400402414487, "grad_norm": 0.31391507387161255, "learning_rate": 4.515693928062112e-06, "loss": 0.3527, "step": 6892 }, { "epoch": 1.733651911468813, "grad_norm": 0.3259546756744385, "learning_rate": 4.514237581535571e-06, "loss": 0.3729, "step": 6893 }, { "epoch": 1.7339034205231387, "grad_norm": 0.3225707411766052, "learning_rate": 4.512781276611542e-06, "loss": 0.3587, "step": 6894 }, { "epoch": 1.7341549295774648, "grad_norm": 0.311168909072876, "learning_rate": 4.511325013414749e-06, "loss": 0.3533, "step": 6895 }, { "epoch": 1.7344064386317908, "grad_norm": 0.30714693665504456, "learning_rate": 4.509868792069912e-06, "loss": 0.3373, "step": 6896 }, { "epoch": 1.7346579476861166, "grad_norm": 0.31528759002685547, "learning_rate": 4.508412612701746e-06, "loss": 0.3632, "step": 6897 }, { "epoch": 1.7349094567404426, "grad_norm": 0.29422181844711304, "learning_rate": 4.506956475434964e-06, "loss": 0.3536, "step": 6898 }, { "epoch": 1.7351609657947686, "grad_norm": 0.2980017364025116, "learning_rate": 4.505500380394276e-06, "loss": 0.3205, "step": 6899 }, { "epoch": 1.7354124748490944, "grad_norm": 0.32730206847190857, "learning_rate": 4.504044327704387e-06, "loss": 0.3645, "step": 6900 }, { "epoch": 1.7356639839034205, "grad_norm": 0.3445815145969391, "learning_rate": 4.502588317489997e-06, "loss": 0.382, "step": 6901 }, { "epoch": 1.7359154929577465, "grad_norm": 0.3304295241832733, "learning_rate": 4.501132349875808e-06, "loss": 0.3683, "step": 6902 }, { "epoch": 1.7361670020120723, "grad_norm": 0.30265411734580994, "learning_rate": 4.499676424986512e-06, "loss": 0.3542, "step": 6903 }, { "epoch": 1.7364185110663986, "grad_norm": 0.3029594421386719, "learning_rate": 4.498220542946798e-06, "loss": 0.3597, "step": 6904 }, { "epoch": 1.7366700201207244, "grad_norm": 0.327571302652359, "learning_rate": 4.4967647038813575e-06, "loss": 0.353, "step": 6905 }, { "epoch": 1.7369215291750502, "grad_norm": 0.2830412685871124, "learning_rate": 4.495308907914871e-06, "loss": 0.3255, "step": 6906 }, { "epoch": 1.7371730382293764, "grad_norm": 0.32870203256607056, "learning_rate": 4.49385315517202e-06, "loss": 0.3445, "step": 6907 }, { "epoch": 1.7374245472837022, "grad_norm": 0.34851449728012085, "learning_rate": 4.492397445777479e-06, "loss": 0.3283, "step": 6908 }, { "epoch": 1.737676056338028, "grad_norm": 0.3187304735183716, "learning_rate": 4.490941779855922e-06, "loss": 0.3495, "step": 6909 }, { "epoch": 1.7379275653923543, "grad_norm": 0.31370389461517334, "learning_rate": 4.489486157532016e-06, "loss": 0.3392, "step": 6910 }, { "epoch": 1.73817907444668, "grad_norm": 0.3236584961414337, "learning_rate": 4.488030578930428e-06, "loss": 0.3764, "step": 6911 }, { "epoch": 1.7384305835010059, "grad_norm": 0.3015926778316498, "learning_rate": 4.486575044175817e-06, "loss": 0.348, "step": 6912 }, { "epoch": 1.7386820925553321, "grad_norm": 0.33164599537849426, "learning_rate": 4.485119553392843e-06, "loss": 0.3407, "step": 6913 }, { "epoch": 1.738933601609658, "grad_norm": 0.3237990736961365, "learning_rate": 4.483664106706155e-06, "loss": 0.3614, "step": 6914 }, { "epoch": 1.739185110663984, "grad_norm": 0.30540236830711365, "learning_rate": 4.482208704240408e-06, "loss": 0.3377, "step": 6915 }, { "epoch": 1.73943661971831, "grad_norm": 0.32697343826293945, "learning_rate": 4.480753346120247e-06, "loss": 0.336, "step": 6916 }, { "epoch": 1.7396881287726358, "grad_norm": 0.32882174849510193, "learning_rate": 4.479298032470312e-06, "loss": 0.3385, "step": 6917 }, { "epoch": 1.7399396378269618, "grad_norm": 0.315578430891037, "learning_rate": 4.477842763415244e-06, "loss": 0.3481, "step": 6918 }, { "epoch": 1.7401911468812878, "grad_norm": 0.2991701066493988, "learning_rate": 4.476387539079676e-06, "loss": 0.321, "step": 6919 }, { "epoch": 1.7404426559356136, "grad_norm": 0.3147648274898529, "learning_rate": 4.474932359588241e-06, "loss": 0.3375, "step": 6920 }, { "epoch": 1.7406941649899397, "grad_norm": 0.34548136591911316, "learning_rate": 4.473477225065563e-06, "loss": 0.3568, "step": 6921 }, { "epoch": 1.7409456740442657, "grad_norm": 0.3195883631706238, "learning_rate": 4.472022135636268e-06, "loss": 0.3509, "step": 6922 }, { "epoch": 1.7411971830985915, "grad_norm": 0.2943090498447418, "learning_rate": 4.470567091424973e-06, "loss": 0.3495, "step": 6923 }, { "epoch": 1.7414486921529175, "grad_norm": 0.32778680324554443, "learning_rate": 4.469112092556296e-06, "loss": 0.3478, "step": 6924 }, { "epoch": 1.7417002012072436, "grad_norm": 0.2987135052680969, "learning_rate": 4.467657139154845e-06, "loss": 0.3446, "step": 6925 }, { "epoch": 1.7419517102615694, "grad_norm": 0.3135947287082672, "learning_rate": 4.466202231345229e-06, "loss": 0.3638, "step": 6926 }, { "epoch": 1.7422032193158954, "grad_norm": 0.29023781418800354, "learning_rate": 4.464747369252056e-06, "loss": 0.3696, "step": 6927 }, { "epoch": 1.7424547283702214, "grad_norm": 0.31927579641342163, "learning_rate": 4.463292552999919e-06, "loss": 0.3555, "step": 6928 }, { "epoch": 1.7427062374245472, "grad_norm": 0.36215946078300476, "learning_rate": 4.4618377827134205e-06, "loss": 0.3435, "step": 6929 }, { "epoch": 1.7429577464788732, "grad_norm": 0.30876463651657104, "learning_rate": 4.460383058517146e-06, "loss": 0.3416, "step": 6930 }, { "epoch": 1.7432092555331993, "grad_norm": 0.2847077250480652, "learning_rate": 4.458928380535689e-06, "loss": 0.3375, "step": 6931 }, { "epoch": 1.743460764587525, "grad_norm": 0.30569058656692505, "learning_rate": 4.45747374889363e-06, "loss": 0.3246, "step": 6932 }, { "epoch": 1.743712273641851, "grad_norm": 0.2917396128177643, "learning_rate": 4.456019163715552e-06, "loss": 0.3466, "step": 6933 }, { "epoch": 1.7439637826961771, "grad_norm": 0.29794490337371826, "learning_rate": 4.454564625126026e-06, "loss": 0.3593, "step": 6934 }, { "epoch": 1.744215291750503, "grad_norm": 0.31993839144706726, "learning_rate": 4.45311013324963e-06, "loss": 0.3431, "step": 6935 }, { "epoch": 1.744466800804829, "grad_norm": 0.2840554714202881, "learning_rate": 4.45165568821093e-06, "loss": 0.3485, "step": 6936 }, { "epoch": 1.744718309859155, "grad_norm": 0.32249221205711365, "learning_rate": 4.450201290134489e-06, "loss": 0.3301, "step": 6937 }, { "epoch": 1.7449698189134808, "grad_norm": 0.290263831615448, "learning_rate": 4.448746939144869e-06, "loss": 0.3413, "step": 6938 }, { "epoch": 1.7452213279678068, "grad_norm": 0.30761563777923584, "learning_rate": 4.447292635366623e-06, "loss": 0.3572, "step": 6939 }, { "epoch": 1.7454728370221329, "grad_norm": 0.3331648111343384, "learning_rate": 4.4458383789243086e-06, "loss": 0.3372, "step": 6940 }, { "epoch": 1.7457243460764587, "grad_norm": 0.3228667080402374, "learning_rate": 4.444384169942466e-06, "loss": 0.3245, "step": 6941 }, { "epoch": 1.7459758551307847, "grad_norm": 0.2900792360305786, "learning_rate": 4.4429300085456475e-06, "loss": 0.3437, "step": 6942 }, { "epoch": 1.7462273641851107, "grad_norm": 0.31676149368286133, "learning_rate": 4.4414758948583855e-06, "loss": 0.3397, "step": 6943 }, { "epoch": 1.7464788732394365, "grad_norm": 0.32281917333602905, "learning_rate": 4.440021829005221e-06, "loss": 0.3333, "step": 6944 }, { "epoch": 1.7467303822937625, "grad_norm": 0.3074769377708435, "learning_rate": 4.438567811110682e-06, "loss": 0.3588, "step": 6945 }, { "epoch": 1.7469818913480886, "grad_norm": 0.31130295991897583, "learning_rate": 4.437113841299297e-06, "loss": 0.3617, "step": 6946 }, { "epoch": 1.7472334004024144, "grad_norm": 0.3043542802333832, "learning_rate": 4.435659919695593e-06, "loss": 0.3479, "step": 6947 }, { "epoch": 1.7474849094567404, "grad_norm": 0.37658563256263733, "learning_rate": 4.434206046424085e-06, "loss": 0.3292, "step": 6948 }, { "epoch": 1.7477364185110664, "grad_norm": 0.3038911521434784, "learning_rate": 4.43275222160929e-06, "loss": 0.3645, "step": 6949 }, { "epoch": 1.7479879275653922, "grad_norm": 0.3258115351200104, "learning_rate": 4.431298445375717e-06, "loss": 0.3271, "step": 6950 }, { "epoch": 1.7482394366197183, "grad_norm": 0.3135131001472473, "learning_rate": 4.429844717847876e-06, "loss": 0.349, "step": 6951 }, { "epoch": 1.7484909456740443, "grad_norm": 0.32563337683677673, "learning_rate": 4.428391039150266e-06, "loss": 0.3503, "step": 6952 }, { "epoch": 1.74874245472837, "grad_norm": 0.3328086733818054, "learning_rate": 4.426937409407391e-06, "loss": 0.3368, "step": 6953 }, { "epoch": 1.7489939637826963, "grad_norm": 0.3279817998409271, "learning_rate": 4.4254838287437386e-06, "loss": 0.3378, "step": 6954 }, { "epoch": 1.7492454728370221, "grad_norm": 0.30334845185279846, "learning_rate": 4.424030297283805e-06, "loss": 0.3261, "step": 6955 }, { "epoch": 1.749496981891348, "grad_norm": 0.32851114869117737, "learning_rate": 4.42257681515207e-06, "loss": 0.3525, "step": 6956 }, { "epoch": 1.7497484909456742, "grad_norm": 0.32159623503685, "learning_rate": 4.42112338247302e-06, "loss": 0.3465, "step": 6957 }, { "epoch": 1.75, "grad_norm": 0.3375226855278015, "learning_rate": 4.41966999937113e-06, "loss": 0.3525, "step": 6958 }, { "epoch": 1.7502515090543258, "grad_norm": 0.34559088945388794, "learning_rate": 4.418216665970875e-06, "loss": 0.3692, "step": 6959 }, { "epoch": 1.750503018108652, "grad_norm": 0.2969364523887634, "learning_rate": 4.416763382396723e-06, "loss": 0.3436, "step": 6960 }, { "epoch": 1.7507545271629779, "grad_norm": 0.3050820529460907, "learning_rate": 4.4153101487731385e-06, "loss": 0.3235, "step": 6961 }, { "epoch": 1.7510060362173037, "grad_norm": 0.32389354705810547, "learning_rate": 4.413856965224581e-06, "loss": 0.3538, "step": 6962 }, { "epoch": 1.75125754527163, "grad_norm": 0.36416080594062805, "learning_rate": 4.412403831875509e-06, "loss": 0.3653, "step": 6963 }, { "epoch": 1.7515090543259557, "grad_norm": 0.31853196024894714, "learning_rate": 4.410950748850372e-06, "loss": 0.3283, "step": 6964 }, { "epoch": 1.7517605633802817, "grad_norm": 0.29971179366111755, "learning_rate": 4.409497716273618e-06, "loss": 0.3397, "step": 6965 }, { "epoch": 1.7520120724346078, "grad_norm": 0.3168172836303711, "learning_rate": 4.408044734269692e-06, "loss": 0.3361, "step": 6966 }, { "epoch": 1.7522635814889336, "grad_norm": 0.3141171336174011, "learning_rate": 4.40659180296303e-06, "loss": 0.3341, "step": 6967 }, { "epoch": 1.7525150905432596, "grad_norm": 0.3332131803035736, "learning_rate": 4.405138922478066e-06, "loss": 0.327, "step": 6968 }, { "epoch": 1.7527665995975856, "grad_norm": 0.3187667727470398, "learning_rate": 4.403686092939235e-06, "loss": 0.3512, "step": 6969 }, { "epoch": 1.7530181086519114, "grad_norm": 0.3000306487083435, "learning_rate": 4.4022333144709566e-06, "loss": 0.363, "step": 6970 }, { "epoch": 1.7532696177062375, "grad_norm": 0.32261931896209717, "learning_rate": 4.400780587197658e-06, "loss": 0.3832, "step": 6971 }, { "epoch": 1.7535211267605635, "grad_norm": 0.31589117646217346, "learning_rate": 4.399327911243751e-06, "loss": 0.3646, "step": 6972 }, { "epoch": 1.7537726358148893, "grad_norm": 0.3187357187271118, "learning_rate": 4.3978752867336536e-06, "loss": 0.3653, "step": 6973 }, { "epoch": 1.7540241448692153, "grad_norm": 0.3000258505344391, "learning_rate": 4.396422713791768e-06, "loss": 0.3428, "step": 6974 }, { "epoch": 1.7542756539235413, "grad_norm": 0.30427271127700806, "learning_rate": 4.394970192542504e-06, "loss": 0.3423, "step": 6975 }, { "epoch": 1.7545271629778671, "grad_norm": 0.3039720952510834, "learning_rate": 4.3935177231102544e-06, "loss": 0.3351, "step": 6976 }, { "epoch": 1.7547786720321932, "grad_norm": 0.29543939232826233, "learning_rate": 4.3920653056194205e-06, "loss": 0.3216, "step": 6977 }, { "epoch": 1.7550301810865192, "grad_norm": 0.3125627934932709, "learning_rate": 4.390612940194388e-06, "loss": 0.3434, "step": 6978 }, { "epoch": 1.755281690140845, "grad_norm": 0.2896956205368042, "learning_rate": 4.389160626959545e-06, "loss": 0.3435, "step": 6979 }, { "epoch": 1.755533199195171, "grad_norm": 0.3043994903564453, "learning_rate": 4.387708366039275e-06, "loss": 0.3524, "step": 6980 }, { "epoch": 1.755784708249497, "grad_norm": 0.3003641664981842, "learning_rate": 4.38625615755795e-06, "loss": 0.321, "step": 6981 }, { "epoch": 1.7560362173038229, "grad_norm": 0.33770516514778137, "learning_rate": 4.384804001639948e-06, "loss": 0.3518, "step": 6982 }, { "epoch": 1.756287726358149, "grad_norm": 0.3379949927330017, "learning_rate": 4.383351898409634e-06, "loss": 0.34, "step": 6983 }, { "epoch": 1.756539235412475, "grad_norm": 0.33452707529067993, "learning_rate": 4.381899847991372e-06, "loss": 0.385, "step": 6984 }, { "epoch": 1.7567907444668007, "grad_norm": 0.30171477794647217, "learning_rate": 4.38044785050952e-06, "loss": 0.3481, "step": 6985 }, { "epoch": 1.7570422535211268, "grad_norm": 0.2875131666660309, "learning_rate": 4.378995906088436e-06, "loss": 0.3327, "step": 6986 }, { "epoch": 1.7572937625754528, "grad_norm": 0.2994171977043152, "learning_rate": 4.377544014852466e-06, "loss": 0.3355, "step": 6987 }, { "epoch": 1.7575452716297786, "grad_norm": 0.3051261007785797, "learning_rate": 4.3760921769259585e-06, "loss": 0.3314, "step": 6988 }, { "epoch": 1.7577967806841046, "grad_norm": 0.29593223333358765, "learning_rate": 4.374640392433251e-06, "loss": 0.3445, "step": 6989 }, { "epoch": 1.7580482897384306, "grad_norm": 0.3013860285282135, "learning_rate": 4.3731886614986815e-06, "loss": 0.3661, "step": 6990 }, { "epoch": 1.7582997987927564, "grad_norm": 0.2953547537326813, "learning_rate": 4.371736984246584e-06, "loss": 0.3256, "step": 6991 }, { "epoch": 1.7585513078470825, "grad_norm": 0.3246706426143646, "learning_rate": 4.370285360801281e-06, "loss": 0.3298, "step": 6992 }, { "epoch": 1.7588028169014085, "grad_norm": 0.3016349673271179, "learning_rate": 4.3688337912871e-06, "loss": 0.3447, "step": 6993 }, { "epoch": 1.7590543259557343, "grad_norm": 0.295219361782074, "learning_rate": 4.367382275828353e-06, "loss": 0.3534, "step": 6994 }, { "epoch": 1.7593058350100603, "grad_norm": 0.3270989954471588, "learning_rate": 4.36593081454936e-06, "loss": 0.3554, "step": 6995 }, { "epoch": 1.7595573440643864, "grad_norm": 0.31494227051734924, "learning_rate": 4.364479407574424e-06, "loss": 0.3628, "step": 6996 }, { "epoch": 1.7598088531187122, "grad_norm": 0.3002457022666931, "learning_rate": 4.363028055027852e-06, "loss": 0.3426, "step": 6997 }, { "epoch": 1.7600603621730382, "grad_norm": 0.3095671236515045, "learning_rate": 4.36157675703394e-06, "loss": 0.3341, "step": 6998 }, { "epoch": 1.7603118712273642, "grad_norm": 0.2987361252307892, "learning_rate": 4.360125513716988e-06, "loss": 0.3406, "step": 6999 }, { "epoch": 1.76056338028169, "grad_norm": 0.30728045105934143, "learning_rate": 4.35867432520128e-06, "loss": 0.3648, "step": 7000 }, { "epoch": 1.760814889336016, "grad_norm": 0.279835969209671, "learning_rate": 4.357223191611103e-06, "loss": 0.3338, "step": 7001 }, { "epoch": 1.761066398390342, "grad_norm": 0.32082340121269226, "learning_rate": 4.355772113070742e-06, "loss": 0.364, "step": 7002 }, { "epoch": 1.7613179074446679, "grad_norm": 0.2994687259197235, "learning_rate": 4.354321089704466e-06, "loss": 0.3723, "step": 7003 }, { "epoch": 1.7615694164989941, "grad_norm": 0.3012014329433441, "learning_rate": 4.352870121636553e-06, "loss": 0.3351, "step": 7004 }, { "epoch": 1.76182092555332, "grad_norm": 0.288789838552475, "learning_rate": 4.351419208991262e-06, "loss": 0.3324, "step": 7005 }, { "epoch": 1.7620724346076457, "grad_norm": 0.2942191958427429, "learning_rate": 4.349968351892861e-06, "loss": 0.3425, "step": 7006 }, { "epoch": 1.762323943661972, "grad_norm": 0.29075539112091064, "learning_rate": 4.348517550465602e-06, "loss": 0.3457, "step": 7007 }, { "epoch": 1.7625754527162978, "grad_norm": 0.31270796060562134, "learning_rate": 4.34706680483374e-06, "loss": 0.3489, "step": 7008 }, { "epoch": 1.7628269617706236, "grad_norm": 0.2807566821575165, "learning_rate": 4.345616115121521e-06, "loss": 0.3408, "step": 7009 }, { "epoch": 1.7630784708249498, "grad_norm": 0.3200899064540863, "learning_rate": 4.34416548145319e-06, "loss": 0.3233, "step": 7010 }, { "epoch": 1.7633299798792756, "grad_norm": 0.2903282642364502, "learning_rate": 4.342714903952979e-06, "loss": 0.3452, "step": 7011 }, { "epoch": 1.7635814889336014, "grad_norm": 0.3277839720249176, "learning_rate": 4.341264382745127e-06, "loss": 0.3368, "step": 7012 }, { "epoch": 1.7638329979879277, "grad_norm": 0.28098973631858826, "learning_rate": 4.339813917953859e-06, "loss": 0.3404, "step": 7013 }, { "epoch": 1.7640845070422535, "grad_norm": 0.3015226423740387, "learning_rate": 4.338363509703399e-06, "loss": 0.3395, "step": 7014 }, { "epoch": 1.7643360160965795, "grad_norm": 0.33274605870246887, "learning_rate": 4.336913158117965e-06, "loss": 0.3596, "step": 7015 }, { "epoch": 1.7645875251509056, "grad_norm": 0.31674689054489136, "learning_rate": 4.33546286332177e-06, "loss": 0.3396, "step": 7016 }, { "epoch": 1.7648390342052314, "grad_norm": 0.31130534410476685, "learning_rate": 4.3340126254390255e-06, "loss": 0.3906, "step": 7017 }, { "epoch": 1.7650905432595574, "grad_norm": 0.30620113015174866, "learning_rate": 4.3325624445939306e-06, "loss": 0.3371, "step": 7018 }, { "epoch": 1.7653420523138834, "grad_norm": 0.33001548051834106, "learning_rate": 4.33111232091069e-06, "loss": 0.3542, "step": 7019 }, { "epoch": 1.7655935613682092, "grad_norm": 0.3208904266357422, "learning_rate": 4.329662254513492e-06, "loss": 0.3387, "step": 7020 }, { "epoch": 1.7658450704225352, "grad_norm": 0.32378435134887695, "learning_rate": 4.32821224552653e-06, "loss": 0.3505, "step": 7021 }, { "epoch": 1.7660965794768613, "grad_norm": 0.3116562068462372, "learning_rate": 4.326762294073984e-06, "loss": 0.3457, "step": 7022 }, { "epoch": 1.766348088531187, "grad_norm": 0.3385581374168396, "learning_rate": 4.3253124002800376e-06, "loss": 0.3637, "step": 7023 }, { "epoch": 1.766599597585513, "grad_norm": 0.29773086309432983, "learning_rate": 4.323862564268862e-06, "loss": 0.334, "step": 7024 }, { "epoch": 1.7668511066398391, "grad_norm": 0.3357481062412262, "learning_rate": 4.32241278616463e-06, "loss": 0.3617, "step": 7025 }, { "epoch": 1.767102615694165, "grad_norm": 0.32160642743110657, "learning_rate": 4.320963066091503e-06, "loss": 0.3617, "step": 7026 }, { "epoch": 1.767354124748491, "grad_norm": 0.29699718952178955, "learning_rate": 4.319513404173641e-06, "loss": 0.3469, "step": 7027 }, { "epoch": 1.767605633802817, "grad_norm": 0.31124669313430786, "learning_rate": 4.318063800535199e-06, "loss": 0.3523, "step": 7028 }, { "epoch": 1.7678571428571428, "grad_norm": 0.3128409683704376, "learning_rate": 4.316614255300326e-06, "loss": 0.3102, "step": 7029 }, { "epoch": 1.7681086519114688, "grad_norm": 0.2870144844055176, "learning_rate": 4.315164768593167e-06, "loss": 0.3445, "step": 7030 }, { "epoch": 1.7683601609657948, "grad_norm": 0.3071323037147522, "learning_rate": 4.313715340537861e-06, "loss": 0.3552, "step": 7031 }, { "epoch": 1.7686116700201207, "grad_norm": 0.33453187346458435, "learning_rate": 4.312265971258544e-06, "loss": 0.3461, "step": 7032 }, { "epoch": 1.7688631790744467, "grad_norm": 0.3303498923778534, "learning_rate": 4.310816660879342e-06, "loss": 0.3508, "step": 7033 }, { "epoch": 1.7691146881287727, "grad_norm": 0.30803173780441284, "learning_rate": 4.3093674095243825e-06, "loss": 0.3221, "step": 7034 }, { "epoch": 1.7693661971830985, "grad_norm": 0.3237571716308594, "learning_rate": 4.307918217317785e-06, "loss": 0.3409, "step": 7035 }, { "epoch": 1.7696177062374245, "grad_norm": 0.3842838704586029, "learning_rate": 4.30646908438366e-06, "loss": 0.3455, "step": 7036 }, { "epoch": 1.7698692152917506, "grad_norm": 0.3077596426010132, "learning_rate": 4.305020010846121e-06, "loss": 0.3569, "step": 7037 }, { "epoch": 1.7701207243460764, "grad_norm": 0.32035428285598755, "learning_rate": 4.303570996829269e-06, "loss": 0.3468, "step": 7038 }, { "epoch": 1.7703722334004024, "grad_norm": 0.3082960546016693, "learning_rate": 4.302122042457206e-06, "loss": 0.3595, "step": 7039 }, { "epoch": 1.7706237424547284, "grad_norm": 0.32630711793899536, "learning_rate": 4.300673147854023e-06, "loss": 0.323, "step": 7040 }, { "epoch": 1.7708752515090542, "grad_norm": 0.3460298180580139, "learning_rate": 4.299224313143811e-06, "loss": 0.3253, "step": 7041 }, { "epoch": 1.7711267605633803, "grad_norm": 0.30822697281837463, "learning_rate": 4.297775538450651e-06, "loss": 0.3232, "step": 7042 }, { "epoch": 1.7713782696177063, "grad_norm": 0.3184284567832947, "learning_rate": 4.296326823898625e-06, "loss": 0.3431, "step": 7043 }, { "epoch": 1.771629778672032, "grad_norm": 0.3198661804199219, "learning_rate": 4.294878169611802e-06, "loss": 0.3349, "step": 7044 }, { "epoch": 1.7718812877263581, "grad_norm": 0.3085322976112366, "learning_rate": 4.2934295757142526e-06, "loss": 0.3351, "step": 7045 }, { "epoch": 1.7721327967806841, "grad_norm": 0.31835395097732544, "learning_rate": 4.291981042330042e-06, "loss": 0.3702, "step": 7046 }, { "epoch": 1.77238430583501, "grad_norm": 0.3139986991882324, "learning_rate": 4.290532569583223e-06, "loss": 0.33, "step": 7047 }, { "epoch": 1.772635814889336, "grad_norm": 0.30537134408950806, "learning_rate": 4.289084157597854e-06, "loss": 0.3513, "step": 7048 }, { "epoch": 1.772887323943662, "grad_norm": 0.2940613627433777, "learning_rate": 4.287635806497977e-06, "loss": 0.3523, "step": 7049 }, { "epoch": 1.7731388329979878, "grad_norm": 0.3057369887828827, "learning_rate": 4.2861875164076394e-06, "loss": 0.3706, "step": 7050 }, { "epoch": 1.7733903420523138, "grad_norm": 0.328139066696167, "learning_rate": 4.284739287450873e-06, "loss": 0.3603, "step": 7051 }, { "epoch": 1.7736418511066399, "grad_norm": 0.32239678502082825, "learning_rate": 4.283291119751714e-06, "loss": 0.3415, "step": 7052 }, { "epoch": 1.7738933601609657, "grad_norm": 0.3281613290309906, "learning_rate": 4.2818430134341835e-06, "loss": 0.3683, "step": 7053 }, { "epoch": 1.774144869215292, "grad_norm": 0.29835787415504456, "learning_rate": 4.280394968622309e-06, "loss": 0.3449, "step": 7054 }, { "epoch": 1.7743963782696177, "grad_norm": 0.3036617040634155, "learning_rate": 4.2789469854401025e-06, "loss": 0.3389, "step": 7055 }, { "epoch": 1.7746478873239435, "grad_norm": 0.2952130138874054, "learning_rate": 4.277499064011575e-06, "loss": 0.3251, "step": 7056 }, { "epoch": 1.7748993963782698, "grad_norm": 0.3045006990432739, "learning_rate": 4.276051204460735e-06, "loss": 0.3491, "step": 7057 }, { "epoch": 1.7751509054325956, "grad_norm": 0.304092675447464, "learning_rate": 4.274603406911578e-06, "loss": 0.3373, "step": 7058 }, { "epoch": 1.7754024144869214, "grad_norm": 0.3368243873119354, "learning_rate": 4.273155671488103e-06, "loss": 0.3633, "step": 7059 }, { "epoch": 1.7756539235412476, "grad_norm": 0.3125411868095398, "learning_rate": 4.271707998314296e-06, "loss": 0.3279, "step": 7060 }, { "epoch": 1.7759054325955734, "grad_norm": 0.31142720580101013, "learning_rate": 4.270260387514145e-06, "loss": 0.3408, "step": 7061 }, { "epoch": 1.7761569416498992, "grad_norm": 0.3212254047393799, "learning_rate": 4.268812839211624e-06, "loss": 0.3604, "step": 7062 }, { "epoch": 1.7764084507042255, "grad_norm": 0.3029959797859192, "learning_rate": 4.267365353530711e-06, "loss": 0.3625, "step": 7063 }, { "epoch": 1.7766599597585513, "grad_norm": 0.30155423283576965, "learning_rate": 4.265917930595371e-06, "loss": 0.3484, "step": 7064 }, { "epoch": 1.7769114688128773, "grad_norm": 0.3170860707759857, "learning_rate": 4.264470570529569e-06, "loss": 0.3534, "step": 7065 }, { "epoch": 1.7771629778672033, "grad_norm": 0.3197275698184967, "learning_rate": 4.2630232734572594e-06, "loss": 0.3507, "step": 7066 }, { "epoch": 1.7774144869215291, "grad_norm": 0.3387235999107361, "learning_rate": 4.2615760395023956e-06, "loss": 0.3531, "step": 7067 }, { "epoch": 1.7776659959758552, "grad_norm": 0.30116546154022217, "learning_rate": 4.260128868788927e-06, "loss": 0.3455, "step": 7068 }, { "epoch": 1.7779175050301812, "grad_norm": 0.32368162274360657, "learning_rate": 4.25868176144079e-06, "loss": 0.3489, "step": 7069 }, { "epoch": 1.778169014084507, "grad_norm": 0.29791736602783203, "learning_rate": 4.2572347175819245e-06, "loss": 0.3504, "step": 7070 }, { "epoch": 1.778420523138833, "grad_norm": 0.33331525325775146, "learning_rate": 4.255787737336257e-06, "loss": 0.3276, "step": 7071 }, { "epoch": 1.778672032193159, "grad_norm": 0.28021112084388733, "learning_rate": 4.254340820827715e-06, "loss": 0.3205, "step": 7072 }, { "epoch": 1.7789235412474849, "grad_norm": 0.29119399189949036, "learning_rate": 4.252893968180215e-06, "loss": 0.3536, "step": 7073 }, { "epoch": 1.779175050301811, "grad_norm": 0.29139408469200134, "learning_rate": 4.251447179517676e-06, "loss": 0.3535, "step": 7074 }, { "epoch": 1.779426559356137, "grad_norm": 0.3197605311870575, "learning_rate": 4.250000454964001e-06, "loss": 0.336, "step": 7075 }, { "epoch": 1.7796780684104627, "grad_norm": 0.3005029261112213, "learning_rate": 4.248553794643096e-06, "loss": 0.3385, "step": 7076 }, { "epoch": 1.7799295774647887, "grad_norm": 0.31606951355934143, "learning_rate": 4.247107198678856e-06, "loss": 0.3525, "step": 7077 }, { "epoch": 1.7801810865191148, "grad_norm": 0.3125983476638794, "learning_rate": 4.245660667195175e-06, "loss": 0.3413, "step": 7078 }, { "epoch": 1.7804325955734406, "grad_norm": 0.3108173906803131, "learning_rate": 4.244214200315939e-06, "loss": 0.3516, "step": 7079 }, { "epoch": 1.7806841046277666, "grad_norm": 0.31591638922691345, "learning_rate": 4.242767798165028e-06, "loss": 0.3521, "step": 7080 }, { "epoch": 1.7809356136820926, "grad_norm": 0.30157795548439026, "learning_rate": 4.241321460866319e-06, "loss": 0.3294, "step": 7081 }, { "epoch": 1.7811871227364184, "grad_norm": 0.3349739611148834, "learning_rate": 4.23987518854368e-06, "loss": 0.3625, "step": 7082 }, { "epoch": 1.7814386317907445, "grad_norm": 0.3226284086704254, "learning_rate": 4.2384289813209754e-06, "loss": 0.3247, "step": 7083 }, { "epoch": 1.7816901408450705, "grad_norm": 0.3141878545284271, "learning_rate": 4.236982839322064e-06, "loss": 0.3171, "step": 7084 }, { "epoch": 1.7819416498993963, "grad_norm": 0.3055501878261566, "learning_rate": 4.235536762670801e-06, "loss": 0.3376, "step": 7085 }, { "epoch": 1.7821931589537223, "grad_norm": 0.31992486119270325, "learning_rate": 4.23409075149103e-06, "loss": 0.3444, "step": 7086 }, { "epoch": 1.7824446680080483, "grad_norm": 0.3101184070110321, "learning_rate": 4.2326448059065935e-06, "loss": 0.3499, "step": 7087 }, { "epoch": 1.7826961770623742, "grad_norm": 0.33392560482025146, "learning_rate": 4.231198926041332e-06, "loss": 0.3843, "step": 7088 }, { "epoch": 1.7829476861167002, "grad_norm": 0.2991205155849457, "learning_rate": 4.229753112019069e-06, "loss": 0.3313, "step": 7089 }, { "epoch": 1.7831991951710262, "grad_norm": 0.314127653837204, "learning_rate": 4.2283073639636376e-06, "loss": 0.3379, "step": 7090 }, { "epoch": 1.783450704225352, "grad_norm": 0.31345435976982117, "learning_rate": 4.226861681998849e-06, "loss": 0.3603, "step": 7091 }, { "epoch": 1.783702213279678, "grad_norm": 0.3256407082080841, "learning_rate": 4.2254160662485236e-06, "loss": 0.3277, "step": 7092 }, { "epoch": 1.783953722334004, "grad_norm": 0.3122624158859253, "learning_rate": 4.2239705168364636e-06, "loss": 0.3499, "step": 7093 }, { "epoch": 1.7842052313883299, "grad_norm": 0.2897671163082123, "learning_rate": 4.222525033886476e-06, "loss": 0.3465, "step": 7094 }, { "epoch": 1.784456740442656, "grad_norm": 0.31568416953086853, "learning_rate": 4.221079617522354e-06, "loss": 0.3393, "step": 7095 }, { "epoch": 1.784708249496982, "grad_norm": 0.31326398253440857, "learning_rate": 4.219634267867892e-06, "loss": 0.3521, "step": 7096 }, { "epoch": 1.7849597585513077, "grad_norm": 0.346648246049881, "learning_rate": 4.2181889850468704e-06, "loss": 0.3414, "step": 7097 }, { "epoch": 1.7852112676056338, "grad_norm": 0.29954788088798523, "learning_rate": 4.216743769183071e-06, "loss": 0.3569, "step": 7098 }, { "epoch": 1.7854627766599598, "grad_norm": 0.3176994323730469, "learning_rate": 4.215298620400271e-06, "loss": 0.3513, "step": 7099 }, { "epoch": 1.7857142857142856, "grad_norm": 0.3110980987548828, "learning_rate": 4.213853538822232e-06, "loss": 0.3463, "step": 7100 }, { "epoch": 1.7859657947686118, "grad_norm": 0.3204716145992279, "learning_rate": 4.212408524572722e-06, "loss": 0.3722, "step": 7101 }, { "epoch": 1.7862173038229376, "grad_norm": 0.3436601161956787, "learning_rate": 4.210963577775492e-06, "loss": 0.3489, "step": 7102 }, { "epoch": 1.7864688128772634, "grad_norm": 0.31376877427101135, "learning_rate": 4.209518698554298e-06, "loss": 0.3449, "step": 7103 }, { "epoch": 1.7867203219315897, "grad_norm": 0.31126469373703003, "learning_rate": 4.2080738870328795e-06, "loss": 0.3685, "step": 7104 }, { "epoch": 1.7869718309859155, "grad_norm": 0.29963016510009766, "learning_rate": 4.206629143334981e-06, "loss": 0.3533, "step": 7105 }, { "epoch": 1.7872233400402413, "grad_norm": 0.3086322546005249, "learning_rate": 4.20518446758433e-06, "loss": 0.3215, "step": 7106 }, { "epoch": 1.7874748490945676, "grad_norm": 0.35310494899749756, "learning_rate": 4.20373985990466e-06, "loss": 0.3563, "step": 7107 }, { "epoch": 1.7877263581488934, "grad_norm": 0.2881607413291931, "learning_rate": 4.202295320419687e-06, "loss": 0.3393, "step": 7108 }, { "epoch": 1.7879778672032192, "grad_norm": 0.3135796785354614, "learning_rate": 4.2008508492531305e-06, "loss": 0.3605, "step": 7109 }, { "epoch": 1.7882293762575454, "grad_norm": 0.3010595440864563, "learning_rate": 4.1994064465287e-06, "loss": 0.356, "step": 7110 }, { "epoch": 1.7884808853118712, "grad_norm": 0.3232562839984894, "learning_rate": 4.1979621123700976e-06, "loss": 0.335, "step": 7111 }, { "epoch": 1.788732394366197, "grad_norm": 0.3245859742164612, "learning_rate": 4.196517846901025e-06, "loss": 0.3427, "step": 7112 }, { "epoch": 1.7889839034205233, "grad_norm": 0.323621928691864, "learning_rate": 4.195073650245169e-06, "loss": 0.3566, "step": 7113 }, { "epoch": 1.789235412474849, "grad_norm": 0.3078625500202179, "learning_rate": 4.193629522526223e-06, "loss": 0.3304, "step": 7114 }, { "epoch": 1.789486921529175, "grad_norm": 0.3693898320198059, "learning_rate": 4.19218546386786e-06, "loss": 0.3466, "step": 7115 }, { "epoch": 1.7897384305835011, "grad_norm": 0.2945307493209839, "learning_rate": 4.190741474393762e-06, "loss": 0.3452, "step": 7116 }, { "epoch": 1.789989939637827, "grad_norm": 0.3134191930294037, "learning_rate": 4.18929755422759e-06, "loss": 0.3609, "step": 7117 }, { "epoch": 1.790241448692153, "grad_norm": 0.3266321122646332, "learning_rate": 4.187853703493014e-06, "loss": 0.3513, "step": 7118 }, { "epoch": 1.790492957746479, "grad_norm": 0.3312418758869171, "learning_rate": 4.186409922313686e-06, "loss": 0.3385, "step": 7119 }, { "epoch": 1.7907444668008048, "grad_norm": 0.30360767245292664, "learning_rate": 4.184966210813258e-06, "loss": 0.3557, "step": 7120 }, { "epoch": 1.7909959758551308, "grad_norm": 0.3027835786342621, "learning_rate": 4.183522569115377e-06, "loss": 0.3553, "step": 7121 }, { "epoch": 1.7912474849094568, "grad_norm": 0.317096084356308, "learning_rate": 4.182078997343678e-06, "loss": 0.3794, "step": 7122 }, { "epoch": 1.7914989939637826, "grad_norm": 0.31175005435943604, "learning_rate": 4.180635495621798e-06, "loss": 0.3567, "step": 7123 }, { "epoch": 1.7917505030181087, "grad_norm": 0.30733904242515564, "learning_rate": 4.1791920640733596e-06, "loss": 0.3446, "step": 7124 }, { "epoch": 1.7920020120724347, "grad_norm": 0.3084380030632019, "learning_rate": 4.177748702821988e-06, "loss": 0.3611, "step": 7125 }, { "epoch": 1.7922535211267605, "grad_norm": 0.3090236783027649, "learning_rate": 4.176305411991295e-06, "loss": 0.3313, "step": 7126 }, { "epoch": 1.7925050301810865, "grad_norm": 0.2927131950855255, "learning_rate": 4.174862191704892e-06, "loss": 0.3476, "step": 7127 }, { "epoch": 1.7927565392354126, "grad_norm": 0.3258267343044281, "learning_rate": 4.173419042086377e-06, "loss": 0.3354, "step": 7128 }, { "epoch": 1.7930080482897384, "grad_norm": 0.2800391614437103, "learning_rate": 4.171975963259354e-06, "loss": 0.352, "step": 7129 }, { "epoch": 1.7932595573440644, "grad_norm": 0.32517415285110474, "learning_rate": 4.170532955347406e-06, "loss": 0.3151, "step": 7130 }, { "epoch": 1.7935110663983904, "grad_norm": 0.30167156457901, "learning_rate": 4.169090018474122e-06, "loss": 0.3481, "step": 7131 }, { "epoch": 1.7937625754527162, "grad_norm": 0.31728750467300415, "learning_rate": 4.1676471527630815e-06, "loss": 0.3554, "step": 7132 }, { "epoch": 1.7940140845070423, "grad_norm": 0.306044340133667, "learning_rate": 4.1662043583378534e-06, "loss": 0.3496, "step": 7133 }, { "epoch": 1.7942655935613683, "grad_norm": 0.3083249628543854, "learning_rate": 4.164761635322007e-06, "loss": 0.3371, "step": 7134 }, { "epoch": 1.794517102615694, "grad_norm": 0.2971269190311432, "learning_rate": 4.163318983839101e-06, "loss": 0.3446, "step": 7135 }, { "epoch": 1.79476861167002, "grad_norm": 0.29189154505729675, "learning_rate": 4.1618764040126905e-06, "loss": 0.3526, "step": 7136 }, { "epoch": 1.7950201207243461, "grad_norm": 0.31266725063323975, "learning_rate": 4.1604338959663204e-06, "loss": 0.3506, "step": 7137 }, { "epoch": 1.795271629778672, "grad_norm": 0.294013112783432, "learning_rate": 4.158991459823538e-06, "loss": 0.3593, "step": 7138 }, { "epoch": 1.795523138832998, "grad_norm": 0.31676429510116577, "learning_rate": 4.1575490957078725e-06, "loss": 0.3357, "step": 7139 }, { "epoch": 1.795774647887324, "grad_norm": 0.3218402862548828, "learning_rate": 4.15610680374286e-06, "loss": 0.3462, "step": 7140 }, { "epoch": 1.7960261569416498, "grad_norm": 0.2809396982192993, "learning_rate": 4.154664584052018e-06, "loss": 0.3368, "step": 7141 }, { "epoch": 1.7962776659959758, "grad_norm": 0.30327579379081726, "learning_rate": 4.153222436758866e-06, "loss": 0.3773, "step": 7142 }, { "epoch": 1.7965291750503019, "grad_norm": 0.2996901869773865, "learning_rate": 4.151780361986915e-06, "loss": 0.3433, "step": 7143 }, { "epoch": 1.7967806841046277, "grad_norm": 0.30090850591659546, "learning_rate": 4.1503383598596705e-06, "loss": 0.3518, "step": 7144 }, { "epoch": 1.7970321931589537, "grad_norm": 0.3128022849559784, "learning_rate": 4.148896430500629e-06, "loss": 0.3521, "step": 7145 }, { "epoch": 1.7972837022132797, "grad_norm": 0.26742905378341675, "learning_rate": 4.147454574033284e-06, "loss": 0.3435, "step": 7146 }, { "epoch": 1.7975352112676055, "grad_norm": 0.31213289499282837, "learning_rate": 4.146012790581121e-06, "loss": 0.3451, "step": 7147 }, { "epoch": 1.7977867203219315, "grad_norm": 0.2901371419429779, "learning_rate": 4.144571080267621e-06, "loss": 0.3496, "step": 7148 }, { "epoch": 1.7980382293762576, "grad_norm": 0.3160885274410248, "learning_rate": 4.143129443216256e-06, "loss": 0.3258, "step": 7149 }, { "epoch": 1.7982897384305834, "grad_norm": 0.3092404007911682, "learning_rate": 4.141687879550494e-06, "loss": 0.3679, "step": 7150 }, { "epoch": 1.7985412474849096, "grad_norm": 0.3174445629119873, "learning_rate": 4.140246389393794e-06, "loss": 0.3338, "step": 7151 }, { "epoch": 1.7987927565392354, "grad_norm": 0.28808581829071045, "learning_rate": 4.138804972869613e-06, "loss": 0.3477, "step": 7152 }, { "epoch": 1.7990442655935612, "grad_norm": 0.3383561670780182, "learning_rate": 4.137363630101398e-06, "loss": 0.3548, "step": 7153 }, { "epoch": 1.7992957746478875, "grad_norm": 0.30686813592910767, "learning_rate": 4.135922361212593e-06, "loss": 0.3549, "step": 7154 }, { "epoch": 1.7995472837022133, "grad_norm": 0.2914334833621979, "learning_rate": 4.134481166326631e-06, "loss": 0.3306, "step": 7155 }, { "epoch": 1.799798792756539, "grad_norm": 0.3026004135608673, "learning_rate": 4.133040045566942e-06, "loss": 0.3397, "step": 7156 }, { "epoch": 1.8000503018108653, "grad_norm": 0.3466000258922577, "learning_rate": 4.13159899905695e-06, "loss": 0.3348, "step": 7157 }, { "epoch": 1.8003018108651911, "grad_norm": 0.30805760622024536, "learning_rate": 4.130158026920072e-06, "loss": 0.3512, "step": 7158 }, { "epoch": 1.800553319919517, "grad_norm": 0.32429513335227966, "learning_rate": 4.128717129279715e-06, "loss": 0.3828, "step": 7159 }, { "epoch": 1.8008048289738432, "grad_norm": 0.3071700930595398, "learning_rate": 4.127276306259288e-06, "loss": 0.3582, "step": 7160 }, { "epoch": 1.801056338028169, "grad_norm": 0.31725555658340454, "learning_rate": 4.125835557982183e-06, "loss": 0.361, "step": 7161 }, { "epoch": 1.8013078470824948, "grad_norm": 0.31528329849243164, "learning_rate": 4.124394884571796e-06, "loss": 0.3451, "step": 7162 }, { "epoch": 1.801559356136821, "grad_norm": 0.3092787563800812, "learning_rate": 4.122954286151507e-06, "loss": 0.3431, "step": 7163 }, { "epoch": 1.8018108651911469, "grad_norm": 0.3064475953578949, "learning_rate": 4.121513762844696e-06, "loss": 0.3313, "step": 7164 }, { "epoch": 1.8020623742454729, "grad_norm": 0.30549487471580505, "learning_rate": 4.120073314774739e-06, "loss": 0.3576, "step": 7165 }, { "epoch": 1.802313883299799, "grad_norm": 0.30158209800720215, "learning_rate": 4.118632942064995e-06, "loss": 0.3607, "step": 7166 }, { "epoch": 1.8025653923541247, "grad_norm": 0.2950282394886017, "learning_rate": 4.117192644838827e-06, "loss": 0.3414, "step": 7167 }, { "epoch": 1.8028169014084507, "grad_norm": 0.3308418393135071, "learning_rate": 4.115752423219585e-06, "loss": 0.3561, "step": 7168 }, { "epoch": 1.8030684104627768, "grad_norm": 0.30721715092658997, "learning_rate": 4.114312277330617e-06, "loss": 0.3459, "step": 7169 }, { "epoch": 1.8033199195171026, "grad_norm": 0.300238698720932, "learning_rate": 4.112872207295262e-06, "loss": 0.3266, "step": 7170 }, { "epoch": 1.8035714285714286, "grad_norm": 0.3531527519226074, "learning_rate": 4.1114322132368524e-06, "loss": 0.3471, "step": 7171 }, { "epoch": 1.8038229376257546, "grad_norm": 0.3371135890483856, "learning_rate": 4.109992295278714e-06, "loss": 0.3284, "step": 7172 }, { "epoch": 1.8040744466800804, "grad_norm": 0.30015435814857483, "learning_rate": 4.108552453544169e-06, "loss": 0.3471, "step": 7173 }, { "epoch": 1.8043259557344065, "grad_norm": 0.29010263085365295, "learning_rate": 4.107112688156528e-06, "loss": 0.3366, "step": 7174 }, { "epoch": 1.8045774647887325, "grad_norm": 0.3293246626853943, "learning_rate": 4.105672999239098e-06, "loss": 0.3481, "step": 7175 }, { "epoch": 1.8048289738430583, "grad_norm": 0.34103676676750183, "learning_rate": 4.104233386915185e-06, "loss": 0.341, "step": 7176 }, { "epoch": 1.8050804828973843, "grad_norm": 0.2750874161720276, "learning_rate": 4.102793851308074e-06, "loss": 0.3327, "step": 7177 }, { "epoch": 1.8053319919517103, "grad_norm": 0.30653923749923706, "learning_rate": 4.101354392541061e-06, "loss": 0.335, "step": 7178 }, { "epoch": 1.8055835010060362, "grad_norm": 0.32137489318847656, "learning_rate": 4.099915010737419e-06, "loss": 0.3409, "step": 7179 }, { "epoch": 1.8058350100603622, "grad_norm": 0.3056570291519165, "learning_rate": 4.098475706020428e-06, "loss": 0.3229, "step": 7180 }, { "epoch": 1.8060865191146882, "grad_norm": 0.30447155237197876, "learning_rate": 4.0970364785133506e-06, "loss": 0.3495, "step": 7181 }, { "epoch": 1.806338028169014, "grad_norm": 0.2888796925544739, "learning_rate": 4.0955973283394525e-06, "loss": 0.3445, "step": 7182 }, { "epoch": 1.80658953722334, "grad_norm": 0.29918986558914185, "learning_rate": 4.094158255621983e-06, "loss": 0.3205, "step": 7183 }, { "epoch": 1.806841046277666, "grad_norm": 0.31096193194389343, "learning_rate": 4.0927192604841935e-06, "loss": 0.3415, "step": 7184 }, { "epoch": 1.8070925553319919, "grad_norm": 0.3002293109893799, "learning_rate": 4.0912803430493215e-06, "loss": 0.33, "step": 7185 }, { "epoch": 1.807344064386318, "grad_norm": 0.2879883348941803, "learning_rate": 4.089841503440603e-06, "loss": 0.3347, "step": 7186 }, { "epoch": 1.807595573440644, "grad_norm": 0.2978774607181549, "learning_rate": 4.088402741781269e-06, "loss": 0.3424, "step": 7187 }, { "epoch": 1.8078470824949697, "grad_norm": 0.3050556480884552, "learning_rate": 4.086964058194534e-06, "loss": 0.3397, "step": 7188 }, { "epoch": 1.8080985915492958, "grad_norm": 0.30784785747528076, "learning_rate": 4.085525452803618e-06, "loss": 0.3586, "step": 7189 }, { "epoch": 1.8083501006036218, "grad_norm": 0.3016720712184906, "learning_rate": 4.084086925731723e-06, "loss": 0.3537, "step": 7190 }, { "epoch": 1.8086016096579476, "grad_norm": 0.3045290410518646, "learning_rate": 4.0826484771020565e-06, "loss": 0.3393, "step": 7191 }, { "epoch": 1.8088531187122736, "grad_norm": 0.2894311249256134, "learning_rate": 4.081210107037806e-06, "loss": 0.351, "step": 7192 }, { "epoch": 1.8091046277665996, "grad_norm": 0.30208760499954224, "learning_rate": 4.079771815662164e-06, "loss": 0.3349, "step": 7193 }, { "epoch": 1.8093561368209254, "grad_norm": 0.3042428195476532, "learning_rate": 4.078333603098307e-06, "loss": 0.3831, "step": 7194 }, { "epoch": 1.8096076458752515, "grad_norm": 0.3321954905986786, "learning_rate": 4.076895469469413e-06, "loss": 0.3566, "step": 7195 }, { "epoch": 1.8098591549295775, "grad_norm": 0.3012005686759949, "learning_rate": 4.075457414898646e-06, "loss": 0.3242, "step": 7196 }, { "epoch": 1.8101106639839033, "grad_norm": 0.29138419032096863, "learning_rate": 4.074019439509168e-06, "loss": 0.3078, "step": 7197 }, { "epoch": 1.8103621730382293, "grad_norm": 0.29968154430389404, "learning_rate": 4.072581543424132e-06, "loss": 0.3189, "step": 7198 }, { "epoch": 1.8106136820925554, "grad_norm": 0.3333386778831482, "learning_rate": 4.071143726766683e-06, "loss": 0.3542, "step": 7199 }, { "epoch": 1.8108651911468812, "grad_norm": 0.31147098541259766, "learning_rate": 4.069705989659966e-06, "loss": 0.3586, "step": 7200 }, { "epoch": 1.8111167002012074, "grad_norm": 0.31270739436149597, "learning_rate": 4.0682683322271086e-06, "loss": 0.3408, "step": 7201 }, { "epoch": 1.8113682092555332, "grad_norm": 0.30901169776916504, "learning_rate": 4.066830754591242e-06, "loss": 0.346, "step": 7202 }, { "epoch": 1.811619718309859, "grad_norm": 0.30854368209838867, "learning_rate": 4.065393256875481e-06, "loss": 0.3383, "step": 7203 }, { "epoch": 1.8118712273641853, "grad_norm": 0.304512083530426, "learning_rate": 4.063955839202943e-06, "loss": 0.312, "step": 7204 }, { "epoch": 1.812122736418511, "grad_norm": 0.29708781838417053, "learning_rate": 4.062518501696729e-06, "loss": 0.3454, "step": 7205 }, { "epoch": 1.8123742454728369, "grad_norm": 0.304839551448822, "learning_rate": 4.061081244479943e-06, "loss": 0.3573, "step": 7206 }, { "epoch": 1.8126257545271631, "grad_norm": 0.3408084213733673, "learning_rate": 4.059644067675673e-06, "loss": 0.3337, "step": 7207 }, { "epoch": 1.812877263581489, "grad_norm": 0.2825142443180084, "learning_rate": 4.058206971407006e-06, "loss": 0.378, "step": 7208 }, { "epoch": 1.8131287726358147, "grad_norm": 0.285431444644928, "learning_rate": 4.05676995579702e-06, "loss": 0.3331, "step": 7209 }, { "epoch": 1.813380281690141, "grad_norm": 0.32098081707954407, "learning_rate": 4.055333020968787e-06, "loss": 0.342, "step": 7210 }, { "epoch": 1.8136317907444668, "grad_norm": 0.2935009002685547, "learning_rate": 4.05389616704537e-06, "loss": 0.3568, "step": 7211 }, { "epoch": 1.8138832997987926, "grad_norm": 0.29505932331085205, "learning_rate": 4.052459394149829e-06, "loss": 0.351, "step": 7212 }, { "epoch": 1.8141348088531188, "grad_norm": 0.30334311723709106, "learning_rate": 4.0510227024052115e-06, "loss": 0.3446, "step": 7213 }, { "epoch": 1.8143863179074446, "grad_norm": 0.3207496404647827, "learning_rate": 4.049586091934563e-06, "loss": 0.3443, "step": 7214 }, { "epoch": 1.8146378269617707, "grad_norm": 0.31141701340675354, "learning_rate": 4.048149562860921e-06, "loss": 0.3418, "step": 7215 }, { "epoch": 1.8148893360160967, "grad_norm": 0.3144972622394562, "learning_rate": 4.046713115307314e-06, "loss": 0.3316, "step": 7216 }, { "epoch": 1.8151408450704225, "grad_norm": 0.30265048146247864, "learning_rate": 4.045276749396764e-06, "loss": 0.3368, "step": 7217 }, { "epoch": 1.8153923541247485, "grad_norm": 0.2881247401237488, "learning_rate": 4.043840465252289e-06, "loss": 0.3493, "step": 7218 }, { "epoch": 1.8156438631790746, "grad_norm": 0.30661842226982117, "learning_rate": 4.042404262996894e-06, "loss": 0.3615, "step": 7219 }, { "epoch": 1.8158953722334004, "grad_norm": 0.2946421205997467, "learning_rate": 4.0409681427535855e-06, "loss": 0.331, "step": 7220 }, { "epoch": 1.8161468812877264, "grad_norm": 0.34275466203689575, "learning_rate": 4.039532104645354e-06, "loss": 0.3441, "step": 7221 }, { "epoch": 1.8163983903420524, "grad_norm": 0.3059110641479492, "learning_rate": 4.0380961487951915e-06, "loss": 0.3422, "step": 7222 }, { "epoch": 1.8166498993963782, "grad_norm": 0.29724571108818054, "learning_rate": 4.0366602753260745e-06, "loss": 0.3245, "step": 7223 }, { "epoch": 1.8169014084507042, "grad_norm": 0.31005269289016724, "learning_rate": 4.035224484360979e-06, "loss": 0.335, "step": 7224 }, { "epoch": 1.8171529175050303, "grad_norm": 0.31909066438674927, "learning_rate": 4.03378877602287e-06, "loss": 0.359, "step": 7225 }, { "epoch": 1.817404426559356, "grad_norm": 0.2967131435871124, "learning_rate": 4.032353150434709e-06, "loss": 0.3227, "step": 7226 }, { "epoch": 1.817655935613682, "grad_norm": 0.3029683828353882, "learning_rate": 4.030917607719446e-06, "loss": 0.3581, "step": 7227 }, { "epoch": 1.8179074446680081, "grad_norm": 0.3194609582424164, "learning_rate": 4.029482148000028e-06, "loss": 0.3354, "step": 7228 }, { "epoch": 1.818158953722334, "grad_norm": 0.3186091482639313, "learning_rate": 4.028046771399391e-06, "loss": 0.3477, "step": 7229 }, { "epoch": 1.81841046277666, "grad_norm": 0.28328919410705566, "learning_rate": 4.026611478040468e-06, "loss": 0.3498, "step": 7230 }, { "epoch": 1.818661971830986, "grad_norm": 0.28632745146751404, "learning_rate": 4.025176268046184e-06, "loss": 0.3525, "step": 7231 }, { "epoch": 1.8189134808853118, "grad_norm": 0.310872346162796, "learning_rate": 4.023741141539453e-06, "loss": 0.3418, "step": 7232 }, { "epoch": 1.8191649899396378, "grad_norm": 0.3138779401779175, "learning_rate": 4.022306098643186e-06, "loss": 0.3636, "step": 7233 }, { "epoch": 1.8194164989939638, "grad_norm": 0.31211575865745544, "learning_rate": 4.020871139480285e-06, "loss": 0.3466, "step": 7234 }, { "epoch": 1.8196680080482897, "grad_norm": 0.2843218445777893, "learning_rate": 4.019436264173646e-06, "loss": 0.3238, "step": 7235 }, { "epoch": 1.8199195171026157, "grad_norm": 0.3115207552909851, "learning_rate": 4.018001472846156e-06, "loss": 0.342, "step": 7236 }, { "epoch": 1.8201710261569417, "grad_norm": 0.3359984755516052, "learning_rate": 4.0165667656206975e-06, "loss": 0.3548, "step": 7237 }, { "epoch": 1.8204225352112675, "grad_norm": 0.31638795137405396, "learning_rate": 4.0151321426201414e-06, "loss": 0.363, "step": 7238 }, { "epoch": 1.8206740442655935, "grad_norm": 0.3151678442955017, "learning_rate": 4.013697603967356e-06, "loss": 0.3564, "step": 7239 }, { "epoch": 1.8209255533199196, "grad_norm": 0.2856763005256653, "learning_rate": 4.012263149785203e-06, "loss": 0.3497, "step": 7240 }, { "epoch": 1.8211770623742454, "grad_norm": 0.32092490792274475, "learning_rate": 4.010828780196529e-06, "loss": 0.3394, "step": 7241 }, { "epoch": 1.8214285714285714, "grad_norm": 0.3086574971675873, "learning_rate": 4.009394495324185e-06, "loss": 0.3707, "step": 7242 }, { "epoch": 1.8216800804828974, "grad_norm": 0.3423568606376648, "learning_rate": 4.007960295291002e-06, "loss": 0.3474, "step": 7243 }, { "epoch": 1.8219315895372232, "grad_norm": 0.28429266810417175, "learning_rate": 4.006526180219816e-06, "loss": 0.3341, "step": 7244 }, { "epoch": 1.8221830985915493, "grad_norm": 0.2965366244316101, "learning_rate": 4.005092150233445e-06, "loss": 0.3395, "step": 7245 }, { "epoch": 1.8224346076458753, "grad_norm": 0.34402769804000854, "learning_rate": 4.00365820545471e-06, "loss": 0.3643, "step": 7246 }, { "epoch": 1.822686116700201, "grad_norm": 0.30077889561653137, "learning_rate": 4.002224346006415e-06, "loss": 0.3334, "step": 7247 }, { "epoch": 1.8229376257545271, "grad_norm": 0.31439444422721863, "learning_rate": 4.000790572011365e-06, "loss": 0.3382, "step": 7248 }, { "epoch": 1.8231891348088531, "grad_norm": 0.34265372157096863, "learning_rate": 3.999356883592348e-06, "loss": 0.3576, "step": 7249 }, { "epoch": 1.823440643863179, "grad_norm": 0.3146701753139496, "learning_rate": 3.997923280872154e-06, "loss": 0.3728, "step": 7250 }, { "epoch": 1.8236921529175052, "grad_norm": 0.3109131455421448, "learning_rate": 3.9964897639735644e-06, "loss": 0.3297, "step": 7251 }, { "epoch": 1.823943661971831, "grad_norm": 0.28482386469841003, "learning_rate": 3.995056333019347e-06, "loss": 0.33, "step": 7252 }, { "epoch": 1.8241951710261568, "grad_norm": 0.3102104067802429, "learning_rate": 3.993622988132269e-06, "loss": 0.3738, "step": 7253 }, { "epoch": 1.824446680080483, "grad_norm": 0.34564486145973206, "learning_rate": 3.992189729435085e-06, "loss": 0.3482, "step": 7254 }, { "epoch": 1.8246981891348089, "grad_norm": 0.3167095482349396, "learning_rate": 3.990756557050548e-06, "loss": 0.3487, "step": 7255 }, { "epoch": 1.8249496981891347, "grad_norm": 0.28097379207611084, "learning_rate": 3.989323471101395e-06, "loss": 0.3564, "step": 7256 }, { "epoch": 1.825201207243461, "grad_norm": 0.29964274168014526, "learning_rate": 3.987890471710367e-06, "loss": 0.3434, "step": 7257 }, { "epoch": 1.8254527162977867, "grad_norm": 0.32765746116638184, "learning_rate": 3.986457559000185e-06, "loss": 0.3327, "step": 7258 }, { "epoch": 1.8257042253521125, "grad_norm": 0.3484762907028198, "learning_rate": 3.985024733093576e-06, "loss": 0.3504, "step": 7259 }, { "epoch": 1.8259557344064388, "grad_norm": 0.3308793306350708, "learning_rate": 3.9835919941132464e-06, "loss": 0.3428, "step": 7260 }, { "epoch": 1.8262072434607646, "grad_norm": 0.3149087429046631, "learning_rate": 3.982159342181904e-06, "loss": 0.3478, "step": 7261 }, { "epoch": 1.8264587525150904, "grad_norm": 0.3215365707874298, "learning_rate": 3.9807267774222475e-06, "loss": 0.3417, "step": 7262 }, { "epoch": 1.8267102615694166, "grad_norm": 0.3201581835746765, "learning_rate": 3.979294299956965e-06, "loss": 0.3392, "step": 7263 }, { "epoch": 1.8269617706237424, "grad_norm": 0.3202977478504181, "learning_rate": 3.977861909908741e-06, "loss": 0.3425, "step": 7264 }, { "epoch": 1.8272132796780685, "grad_norm": 0.31886807084083557, "learning_rate": 3.976429607400249e-06, "loss": 0.371, "step": 7265 }, { "epoch": 1.8274647887323945, "grad_norm": 0.3250950872898102, "learning_rate": 3.9749973925541585e-06, "loss": 0.3473, "step": 7266 }, { "epoch": 1.8277162977867203, "grad_norm": 0.30726975202560425, "learning_rate": 3.973565265493129e-06, "loss": 0.3644, "step": 7267 }, { "epoch": 1.8279678068410463, "grad_norm": 0.3059992492198944, "learning_rate": 3.972133226339812e-06, "loss": 0.3607, "step": 7268 }, { "epoch": 1.8282193158953723, "grad_norm": 0.32286500930786133, "learning_rate": 3.970701275216855e-06, "loss": 0.339, "step": 7269 }, { "epoch": 1.8284708249496981, "grad_norm": 0.319062203168869, "learning_rate": 3.969269412246895e-06, "loss": 0.3551, "step": 7270 }, { "epoch": 1.8287223340040242, "grad_norm": 0.3119877576828003, "learning_rate": 3.967837637552561e-06, "loss": 0.3506, "step": 7271 }, { "epoch": 1.8289738430583502, "grad_norm": 0.32227200269699097, "learning_rate": 3.966405951256475e-06, "loss": 0.3581, "step": 7272 }, { "epoch": 1.829225352112676, "grad_norm": 0.3127002716064453, "learning_rate": 3.964974353481254e-06, "loss": 0.3448, "step": 7273 }, { "epoch": 1.829476861167002, "grad_norm": 0.3057059049606323, "learning_rate": 3.963542844349505e-06, "loss": 0.374, "step": 7274 }, { "epoch": 1.829728370221328, "grad_norm": 0.29945117235183716, "learning_rate": 3.962111423983827e-06, "loss": 0.3443, "step": 7275 }, { "epoch": 1.8299798792756539, "grad_norm": 0.30039167404174805, "learning_rate": 3.960680092506812e-06, "loss": 0.3194, "step": 7276 }, { "epoch": 1.83023138832998, "grad_norm": 0.32396718859672546, "learning_rate": 3.9592488500410465e-06, "loss": 0.3649, "step": 7277 }, { "epoch": 1.830482897384306, "grad_norm": 0.33545982837677, "learning_rate": 3.957817696709104e-06, "loss": 0.3568, "step": 7278 }, { "epoch": 1.8307344064386317, "grad_norm": 0.2859671413898468, "learning_rate": 3.9563866326335575e-06, "loss": 0.3704, "step": 7279 }, { "epoch": 1.8309859154929577, "grad_norm": 0.2837298512458801, "learning_rate": 3.9549556579369665e-06, "loss": 0.3228, "step": 7280 }, { "epoch": 1.8312374245472838, "grad_norm": 0.2976028025150299, "learning_rate": 3.953524772741886e-06, "loss": 0.3342, "step": 7281 }, { "epoch": 1.8314889336016096, "grad_norm": 0.30670884251594543, "learning_rate": 3.952093977170861e-06, "loss": 0.3578, "step": 7282 }, { "epoch": 1.8317404426559356, "grad_norm": 0.2988281846046448, "learning_rate": 3.950663271346432e-06, "loss": 0.3489, "step": 7283 }, { "epoch": 1.8319919517102616, "grad_norm": 0.276311993598938, "learning_rate": 3.94923265539113e-06, "loss": 0.3466, "step": 7284 }, { "epoch": 1.8322434607645874, "grad_norm": 0.3069443106651306, "learning_rate": 3.947802129427476e-06, "loss": 0.3689, "step": 7285 }, { "epoch": 1.8324949698189135, "grad_norm": 0.3089459240436554, "learning_rate": 3.946371693577988e-06, "loss": 0.3199, "step": 7286 }, { "epoch": 1.8327464788732395, "grad_norm": 0.3473307192325592, "learning_rate": 3.9449413479651715e-06, "loss": 0.3832, "step": 7287 }, { "epoch": 1.8329979879275653, "grad_norm": 0.2900719940662384, "learning_rate": 3.94351109271153e-06, "loss": 0.3357, "step": 7288 }, { "epoch": 1.8332494969818913, "grad_norm": 0.29746437072753906, "learning_rate": 3.9420809279395525e-06, "loss": 0.3304, "step": 7289 }, { "epoch": 1.8335010060362174, "grad_norm": 0.3016701340675354, "learning_rate": 3.940650853771727e-06, "loss": 0.371, "step": 7290 }, { "epoch": 1.8337525150905432, "grad_norm": 0.2984379231929779, "learning_rate": 3.939220870330527e-06, "loss": 0.3555, "step": 7291 }, { "epoch": 1.8340040241448692, "grad_norm": 0.3294828534126282, "learning_rate": 3.937790977738425e-06, "loss": 0.3452, "step": 7292 }, { "epoch": 1.8342555331991952, "grad_norm": 0.3317073881626129, "learning_rate": 3.936361176117879e-06, "loss": 0.3459, "step": 7293 }, { "epoch": 1.834507042253521, "grad_norm": 0.2974916994571686, "learning_rate": 3.934931465591343e-06, "loss": 0.3601, "step": 7294 }, { "epoch": 1.834758551307847, "grad_norm": 0.30047857761383057, "learning_rate": 3.9335018462812664e-06, "loss": 0.3436, "step": 7295 }, { "epoch": 1.835010060362173, "grad_norm": 0.3131597936153412, "learning_rate": 3.9320723183100824e-06, "loss": 0.3716, "step": 7296 }, { "epoch": 1.8352615694164989, "grad_norm": 0.3238302171230316, "learning_rate": 3.930642881800227e-06, "loss": 0.3654, "step": 7297 }, { "epoch": 1.835513078470825, "grad_norm": 0.31614577770233154, "learning_rate": 3.9292135368741155e-06, "loss": 0.3463, "step": 7298 }, { "epoch": 1.835764587525151, "grad_norm": 0.31243595480918884, "learning_rate": 3.927784283654168e-06, "loss": 0.3459, "step": 7299 }, { "epoch": 1.8360160965794767, "grad_norm": 0.2986874580383301, "learning_rate": 3.926355122262787e-06, "loss": 0.3263, "step": 7300 }, { "epoch": 1.836267605633803, "grad_norm": 0.33278465270996094, "learning_rate": 3.9249260528223745e-06, "loss": 0.3431, "step": 7301 }, { "epoch": 1.8365191146881288, "grad_norm": 0.3187868893146515, "learning_rate": 3.923497075455319e-06, "loss": 0.3582, "step": 7302 }, { "epoch": 1.8367706237424546, "grad_norm": 0.33840519189834595, "learning_rate": 3.922068190284005e-06, "loss": 0.363, "step": 7303 }, { "epoch": 1.8370221327967808, "grad_norm": 0.32831573486328125, "learning_rate": 3.920639397430806e-06, "loss": 0.3694, "step": 7304 }, { "epoch": 1.8372736418511066, "grad_norm": 0.3055534362792969, "learning_rate": 3.91921069701809e-06, "loss": 0.3389, "step": 7305 }, { "epoch": 1.8375251509054324, "grad_norm": 0.31282302737236023, "learning_rate": 3.917782089168218e-06, "loss": 0.3161, "step": 7306 }, { "epoch": 1.8377766599597587, "grad_norm": 0.350875586271286, "learning_rate": 3.916353574003538e-06, "loss": 0.3565, "step": 7307 }, { "epoch": 1.8380281690140845, "grad_norm": 0.3590453267097473, "learning_rate": 3.914925151646397e-06, "loss": 0.3605, "step": 7308 }, { "epoch": 1.8382796780684103, "grad_norm": 0.3325250446796417, "learning_rate": 3.913496822219127e-06, "loss": 0.3876, "step": 7309 }, { "epoch": 1.8385311871227366, "grad_norm": 0.2975671589374542, "learning_rate": 3.912068585844059e-06, "loss": 0.3617, "step": 7310 }, { "epoch": 1.8387826961770624, "grad_norm": 0.30971285700798035, "learning_rate": 3.910640442643508e-06, "loss": 0.3263, "step": 7311 }, { "epoch": 1.8390342052313882, "grad_norm": 0.34714826941490173, "learning_rate": 3.909212392739791e-06, "loss": 0.333, "step": 7312 }, { "epoch": 1.8392857142857144, "grad_norm": 0.34102630615234375, "learning_rate": 3.907784436255205e-06, "loss": 0.3454, "step": 7313 }, { "epoch": 1.8395372233400402, "grad_norm": 0.31585681438446045, "learning_rate": 3.906356573312052e-06, "loss": 0.3302, "step": 7314 }, { "epoch": 1.8397887323943662, "grad_norm": 0.32975485920906067, "learning_rate": 3.904928804032615e-06, "loss": 0.3249, "step": 7315 }, { "epoch": 1.8400402414486923, "grad_norm": 0.3263443112373352, "learning_rate": 3.903501128539175e-06, "loss": 0.331, "step": 7316 }, { "epoch": 1.840291750503018, "grad_norm": 0.33371224999427795, "learning_rate": 3.902073546954006e-06, "loss": 0.3425, "step": 7317 }, { "epoch": 1.840543259557344, "grad_norm": 0.35445427894592285, "learning_rate": 3.900646059399367e-06, "loss": 0.3474, "step": 7318 }, { "epoch": 1.8407947686116701, "grad_norm": 0.3176020085811615, "learning_rate": 3.899218665997517e-06, "loss": 0.3538, "step": 7319 }, { "epoch": 1.841046277665996, "grad_norm": 0.34081193804740906, "learning_rate": 3.8977913668707e-06, "loss": 0.3394, "step": 7320 }, { "epoch": 1.841297786720322, "grad_norm": 0.35305309295654297, "learning_rate": 3.896364162141159e-06, "loss": 0.3447, "step": 7321 }, { "epoch": 1.841549295774648, "grad_norm": 0.2991912364959717, "learning_rate": 3.894937051931122e-06, "loss": 0.3299, "step": 7322 }, { "epoch": 1.8418008048289738, "grad_norm": 0.32608288526535034, "learning_rate": 3.8935100363628135e-06, "loss": 0.3316, "step": 7323 }, { "epoch": 1.8420523138832998, "grad_norm": 0.3336857855319977, "learning_rate": 3.892083115558447e-06, "loss": 0.3469, "step": 7324 }, { "epoch": 1.8423038229376258, "grad_norm": 0.3458016514778137, "learning_rate": 3.890656289640233e-06, "loss": 0.3666, "step": 7325 }, { "epoch": 1.8425553319919517, "grad_norm": 0.3222261667251587, "learning_rate": 3.889229558730365e-06, "loss": 0.3593, "step": 7326 }, { "epoch": 1.8428068410462777, "grad_norm": 0.31532028317451477, "learning_rate": 3.887802922951038e-06, "loss": 0.3456, "step": 7327 }, { "epoch": 1.8430583501006037, "grad_norm": 0.306510865688324, "learning_rate": 3.886376382424433e-06, "loss": 0.3506, "step": 7328 }, { "epoch": 1.8433098591549295, "grad_norm": 0.3103088438510895, "learning_rate": 3.884949937272724e-06, "loss": 0.3657, "step": 7329 }, { "epoch": 1.8435613682092555, "grad_norm": 0.3240601718425751, "learning_rate": 3.883523587618077e-06, "loss": 0.34, "step": 7330 }, { "epoch": 1.8438128772635816, "grad_norm": 0.30873456597328186, "learning_rate": 3.8820973335826494e-06, "loss": 0.3478, "step": 7331 }, { "epoch": 1.8440643863179074, "grad_norm": 0.34828251600265503, "learning_rate": 3.880671175288592e-06, "loss": 0.3613, "step": 7332 }, { "epoch": 1.8443158953722334, "grad_norm": 0.30683434009552, "learning_rate": 3.879245112858046e-06, "loss": 0.3606, "step": 7333 }, { "epoch": 1.8445674044265594, "grad_norm": 0.2935948073863983, "learning_rate": 3.877819146413144e-06, "loss": 0.3395, "step": 7334 }, { "epoch": 1.8448189134808852, "grad_norm": 0.2853759527206421, "learning_rate": 3.876393276076013e-06, "loss": 0.3525, "step": 7335 }, { "epoch": 1.8450704225352113, "grad_norm": 0.31282341480255127, "learning_rate": 3.8749675019687684e-06, "loss": 0.3469, "step": 7336 }, { "epoch": 1.8453219315895373, "grad_norm": 0.2779003083705902, "learning_rate": 3.873541824213518e-06, "loss": 0.3664, "step": 7337 }, { "epoch": 1.845573440643863, "grad_norm": 0.3223724961280823, "learning_rate": 3.872116242932363e-06, "loss": 0.3689, "step": 7338 }, { "epoch": 1.845824949698189, "grad_norm": 0.3017749786376953, "learning_rate": 3.870690758247399e-06, "loss": 0.3387, "step": 7339 }, { "epoch": 1.8460764587525151, "grad_norm": 0.3117123544216156, "learning_rate": 3.869265370280702e-06, "loss": 0.318, "step": 7340 }, { "epoch": 1.846327967806841, "grad_norm": 0.29091158509254456, "learning_rate": 3.867840079154356e-06, "loss": 0.3381, "step": 7341 }, { "epoch": 1.846579476861167, "grad_norm": 0.3203681707382202, "learning_rate": 3.866414884990422e-06, "loss": 0.3435, "step": 7342 }, { "epoch": 1.846830985915493, "grad_norm": 0.3184633255004883, "learning_rate": 3.864989787910964e-06, "loss": 0.3623, "step": 7343 }, { "epoch": 1.8470824949698188, "grad_norm": 0.2918001413345337, "learning_rate": 3.863564788038027e-06, "loss": 0.37, "step": 7344 }, { "epoch": 1.8473340040241448, "grad_norm": 0.3107215166091919, "learning_rate": 3.862139885493659e-06, "loss": 0.33, "step": 7345 }, { "epoch": 1.8475855130784709, "grad_norm": 0.3049178719520569, "learning_rate": 3.860715080399889e-06, "loss": 0.3381, "step": 7346 }, { "epoch": 1.8478370221327967, "grad_norm": 0.30147770047187805, "learning_rate": 3.859290372878748e-06, "loss": 0.3285, "step": 7347 }, { "epoch": 1.8480885311871227, "grad_norm": 0.2963356375694275, "learning_rate": 3.857865763052247e-06, "loss": 0.3157, "step": 7348 }, { "epoch": 1.8483400402414487, "grad_norm": 0.29773834347724915, "learning_rate": 3.856441251042399e-06, "loss": 0.3511, "step": 7349 }, { "epoch": 1.8485915492957745, "grad_norm": 0.3260302245616913, "learning_rate": 3.8550168369712055e-06, "loss": 0.3778, "step": 7350 }, { "epoch": 1.8488430583501008, "grad_norm": 0.30447903275489807, "learning_rate": 3.8535925209606554e-06, "loss": 0.3527, "step": 7351 }, { "epoch": 1.8490945674044266, "grad_norm": 0.3211756944656372, "learning_rate": 3.852168303132735e-06, "loss": 0.3288, "step": 7352 }, { "epoch": 1.8493460764587524, "grad_norm": 0.3038700819015503, "learning_rate": 3.8507441836094175e-06, "loss": 0.3574, "step": 7353 }, { "epoch": 1.8495975855130786, "grad_norm": 0.3078562915325165, "learning_rate": 3.849320162512672e-06, "loss": 0.3248, "step": 7354 }, { "epoch": 1.8498490945674044, "grad_norm": 0.31693726778030396, "learning_rate": 3.847896239964455e-06, "loss": 0.3387, "step": 7355 }, { "epoch": 1.8501006036217302, "grad_norm": 0.32334980368614197, "learning_rate": 3.8464724160867195e-06, "loss": 0.3423, "step": 7356 }, { "epoch": 1.8503521126760565, "grad_norm": 0.3063230812549591, "learning_rate": 3.845048691001402e-06, "loss": 0.3663, "step": 7357 }, { "epoch": 1.8506036217303823, "grad_norm": 0.30338233709335327, "learning_rate": 3.8436250648304415e-06, "loss": 0.3548, "step": 7358 }, { "epoch": 1.850855130784708, "grad_norm": 0.31649091839790344, "learning_rate": 3.842201537695758e-06, "loss": 0.3481, "step": 7359 }, { "epoch": 1.8511066398390343, "grad_norm": 0.31132972240448, "learning_rate": 3.84077810971927e-06, "loss": 0.3467, "step": 7360 }, { "epoch": 1.8513581488933601, "grad_norm": 0.3053782284259796, "learning_rate": 3.839354781022886e-06, "loss": 0.3211, "step": 7361 }, { "epoch": 1.8516096579476862, "grad_norm": 0.314434677362442, "learning_rate": 3.8379315517285025e-06, "loss": 0.3406, "step": 7362 }, { "epoch": 1.8518611670020122, "grad_norm": 0.29468590021133423, "learning_rate": 3.836508421958014e-06, "loss": 0.3442, "step": 7363 }, { "epoch": 1.852112676056338, "grad_norm": 0.3091891407966614, "learning_rate": 3.8350853918332974e-06, "loss": 0.324, "step": 7364 }, { "epoch": 1.852364185110664, "grad_norm": 0.30145037174224854, "learning_rate": 3.833662461476233e-06, "loss": 0.3225, "step": 7365 }, { "epoch": 1.85261569416499, "grad_norm": 0.31025686860084534, "learning_rate": 3.8322396310086785e-06, "loss": 0.3593, "step": 7366 }, { "epoch": 1.8528672032193159, "grad_norm": 0.32085564732551575, "learning_rate": 3.8308169005524964e-06, "loss": 0.3509, "step": 7367 }, { "epoch": 1.8531187122736419, "grad_norm": 0.3120383322238922, "learning_rate": 3.829394270229531e-06, "loss": 0.3462, "step": 7368 }, { "epoch": 1.853370221327968, "grad_norm": 0.3061147928237915, "learning_rate": 3.827971740161625e-06, "loss": 0.365, "step": 7369 }, { "epoch": 1.8536217303822937, "grad_norm": 0.2813125550746918, "learning_rate": 3.826549310470605e-06, "loss": 0.3114, "step": 7370 }, { "epoch": 1.8538732394366197, "grad_norm": 0.30677247047424316, "learning_rate": 3.825126981278296e-06, "loss": 0.368, "step": 7371 }, { "epoch": 1.8541247484909458, "grad_norm": 0.3084120452404022, "learning_rate": 3.823704752706512e-06, "loss": 0.3603, "step": 7372 }, { "epoch": 1.8543762575452716, "grad_norm": 0.29872360825538635, "learning_rate": 3.8222826248770555e-06, "loss": 0.3452, "step": 7373 }, { "epoch": 1.8546277665995976, "grad_norm": 0.32304060459136963, "learning_rate": 3.820860597911726e-06, "loss": 0.3371, "step": 7374 }, { "epoch": 1.8548792756539236, "grad_norm": 0.3058139681816101, "learning_rate": 3.819438671932308e-06, "loss": 0.3539, "step": 7375 }, { "epoch": 1.8551307847082494, "grad_norm": 0.3089655041694641, "learning_rate": 3.818016847060585e-06, "loss": 0.3519, "step": 7376 }, { "epoch": 1.8553822937625755, "grad_norm": 0.2986527681350708, "learning_rate": 3.816595123418322e-06, "loss": 0.3401, "step": 7377 }, { "epoch": 1.8556338028169015, "grad_norm": 0.33130231499671936, "learning_rate": 3.815173501127285e-06, "loss": 0.3656, "step": 7378 }, { "epoch": 1.8558853118712273, "grad_norm": 0.328773558139801, "learning_rate": 3.813751980309224e-06, "loss": 0.3466, "step": 7379 }, { "epoch": 1.8561368209255533, "grad_norm": 0.32407474517822266, "learning_rate": 3.8123305610858863e-06, "loss": 0.35, "step": 7380 }, { "epoch": 1.8563883299798793, "grad_norm": 0.3152269721031189, "learning_rate": 3.810909243579004e-06, "loss": 0.3328, "step": 7381 }, { "epoch": 1.8566398390342052, "grad_norm": 0.32206395268440247, "learning_rate": 3.8094880279103063e-06, "loss": 0.353, "step": 7382 }, { "epoch": 1.8568913480885312, "grad_norm": 0.3166285455226898, "learning_rate": 3.808066914201513e-06, "loss": 0.3548, "step": 7383 }, { "epoch": 1.8571428571428572, "grad_norm": 0.3051060438156128, "learning_rate": 3.80664590257433e-06, "loss": 0.3394, "step": 7384 }, { "epoch": 1.857394366197183, "grad_norm": 0.33410540223121643, "learning_rate": 3.8052249931504614e-06, "loss": 0.3551, "step": 7385 }, { "epoch": 1.857645875251509, "grad_norm": 0.2926395535469055, "learning_rate": 3.8038041860515962e-06, "loss": 0.3339, "step": 7386 }, { "epoch": 1.857897384305835, "grad_norm": 0.3076823949813843, "learning_rate": 3.802383481399421e-06, "loss": 0.3417, "step": 7387 }, { "epoch": 1.8581488933601609, "grad_norm": 0.3127098083496094, "learning_rate": 3.8009628793156056e-06, "loss": 0.3316, "step": 7388 }, { "epoch": 1.858400402414487, "grad_norm": 0.3178151845932007, "learning_rate": 3.799542379921821e-06, "loss": 0.3539, "step": 7389 }, { "epoch": 1.858651911468813, "grad_norm": 0.3181746006011963, "learning_rate": 3.79812198333972e-06, "loss": 0.3584, "step": 7390 }, { "epoch": 1.8589034205231387, "grad_norm": 0.33825597167015076, "learning_rate": 3.7967016896909524e-06, "loss": 0.3672, "step": 7391 }, { "epoch": 1.8591549295774648, "grad_norm": 0.2978416085243225, "learning_rate": 3.7952814990971575e-06, "loss": 0.3398, "step": 7392 }, { "epoch": 1.8594064386317908, "grad_norm": 0.3217383623123169, "learning_rate": 3.7938614116799655e-06, "loss": 0.3486, "step": 7393 }, { "epoch": 1.8596579476861166, "grad_norm": 0.34177547693252563, "learning_rate": 3.792441427560998e-06, "loss": 0.3472, "step": 7394 }, { "epoch": 1.8599094567404426, "grad_norm": 0.30881890654563904, "learning_rate": 3.791021546861868e-06, "loss": 0.3487, "step": 7395 }, { "epoch": 1.8601609657947686, "grad_norm": 0.28196999430656433, "learning_rate": 3.7896017697041788e-06, "loss": 0.3386, "step": 7396 }, { "epoch": 1.8604124748490944, "grad_norm": 0.2953253984451294, "learning_rate": 3.788182096209526e-06, "loss": 0.353, "step": 7397 }, { "epoch": 1.8606639839034205, "grad_norm": 0.31603649258613586, "learning_rate": 3.7867625264994954e-06, "loss": 0.357, "step": 7398 }, { "epoch": 1.8609154929577465, "grad_norm": 0.2854761481285095, "learning_rate": 3.7853430606956647e-06, "loss": 0.3246, "step": 7399 }, { "epoch": 1.8611670020120723, "grad_norm": 0.3070826530456543, "learning_rate": 3.783923698919602e-06, "loss": 0.3417, "step": 7400 }, { "epoch": 1.8614185110663986, "grad_norm": 0.3086259365081787, "learning_rate": 3.782504441292867e-06, "loss": 0.3459, "step": 7401 }, { "epoch": 1.8616700201207244, "grad_norm": 0.30796676874160767, "learning_rate": 3.7810852879370084e-06, "loss": 0.3557, "step": 7402 }, { "epoch": 1.8619215291750502, "grad_norm": 0.3138282001018524, "learning_rate": 3.7796662389735718e-06, "loss": 0.3609, "step": 7403 }, { "epoch": 1.8621730382293764, "grad_norm": 0.29625728726387024, "learning_rate": 3.778247294524085e-06, "loss": 0.339, "step": 7404 }, { "epoch": 1.8624245472837022, "grad_norm": 0.29120931029319763, "learning_rate": 3.7768284547100763e-06, "loss": 0.3245, "step": 7405 }, { "epoch": 1.862676056338028, "grad_norm": 0.30197152495384216, "learning_rate": 3.7754097196530566e-06, "loss": 0.3421, "step": 7406 }, { "epoch": 1.8629275653923543, "grad_norm": 0.2998182773590088, "learning_rate": 3.7739910894745345e-06, "loss": 0.3357, "step": 7407 }, { "epoch": 1.86317907444668, "grad_norm": 0.33410176634788513, "learning_rate": 3.7725725642960047e-06, "loss": 0.3829, "step": 7408 }, { "epoch": 1.8634305835010059, "grad_norm": 0.31223413348197937, "learning_rate": 3.771154144238958e-06, "loss": 0.3493, "step": 7409 }, { "epoch": 1.8636820925553321, "grad_norm": 0.32594525814056396, "learning_rate": 3.7697358294248687e-06, "loss": 0.3278, "step": 7410 }, { "epoch": 1.863933601609658, "grad_norm": 0.3015677034854889, "learning_rate": 3.7683176199752115e-06, "loss": 0.3588, "step": 7411 }, { "epoch": 1.864185110663984, "grad_norm": 0.33221563696861267, "learning_rate": 3.7668995160114424e-06, "loss": 0.368, "step": 7412 }, { "epoch": 1.86443661971831, "grad_norm": 0.3092586100101471, "learning_rate": 3.765481517655015e-06, "loss": 0.3353, "step": 7413 }, { "epoch": 1.8646881287726358, "grad_norm": 0.3521566092967987, "learning_rate": 3.7640636250273754e-06, "loss": 0.3502, "step": 7414 }, { "epoch": 1.8649396378269618, "grad_norm": 0.33217549324035645, "learning_rate": 3.7626458382499525e-06, "loss": 0.3509, "step": 7415 }, { "epoch": 1.8651911468812878, "grad_norm": 0.30001208186149597, "learning_rate": 3.7612281574441744e-06, "loss": 0.3576, "step": 7416 }, { "epoch": 1.8654426559356136, "grad_norm": 0.32171934843063354, "learning_rate": 3.7598105827314524e-06, "loss": 0.3404, "step": 7417 }, { "epoch": 1.8656941649899397, "grad_norm": 0.31113913655281067, "learning_rate": 3.758393114233198e-06, "loss": 0.3632, "step": 7418 }, { "epoch": 1.8659456740442657, "grad_norm": 0.31468674540519714, "learning_rate": 3.7569757520708034e-06, "loss": 0.3457, "step": 7419 }, { "epoch": 1.8661971830985915, "grad_norm": 0.29202166199684143, "learning_rate": 3.7555584963656615e-06, "loss": 0.3184, "step": 7420 }, { "epoch": 1.8664486921529175, "grad_norm": 0.3300865888595581, "learning_rate": 3.7541413472391474e-06, "loss": 0.3406, "step": 7421 }, { "epoch": 1.8667002012072436, "grad_norm": 0.32692527770996094, "learning_rate": 3.752724304812635e-06, "loss": 0.3436, "step": 7422 }, { "epoch": 1.8669517102615694, "grad_norm": 0.3171963393688202, "learning_rate": 3.7513073692074802e-06, "loss": 0.3179, "step": 7423 }, { "epoch": 1.8672032193158954, "grad_norm": 0.31962621212005615, "learning_rate": 3.749890540545038e-06, "loss": 0.3683, "step": 7424 }, { "epoch": 1.8674547283702214, "grad_norm": 0.3360038995742798, "learning_rate": 3.748473818946652e-06, "loss": 0.3318, "step": 7425 }, { "epoch": 1.8677062374245472, "grad_norm": 0.3103548586368561, "learning_rate": 3.7470572045336518e-06, "loss": 0.3393, "step": 7426 }, { "epoch": 1.8679577464788732, "grad_norm": 0.3288777470588684, "learning_rate": 3.745640697427366e-06, "loss": 0.3435, "step": 7427 }, { "epoch": 1.8682092555331993, "grad_norm": 0.3266128897666931, "learning_rate": 3.744224297749105e-06, "loss": 0.328, "step": 7428 }, { "epoch": 1.868460764587525, "grad_norm": 0.2997075617313385, "learning_rate": 3.7428080056201794e-06, "loss": 0.3406, "step": 7429 }, { "epoch": 1.868712273641851, "grad_norm": 0.3005223870277405, "learning_rate": 3.7413918211618804e-06, "loss": 0.3256, "step": 7430 }, { "epoch": 1.8689637826961771, "grad_norm": 0.3092925548553467, "learning_rate": 3.739975744495501e-06, "loss": 0.3591, "step": 7431 }, { "epoch": 1.869215291750503, "grad_norm": 0.31816989183425903, "learning_rate": 3.738559775742313e-06, "loss": 0.3276, "step": 7432 }, { "epoch": 1.869466800804829, "grad_norm": 0.29660165309906006, "learning_rate": 3.7371439150235923e-06, "loss": 0.3396, "step": 7433 }, { "epoch": 1.869718309859155, "grad_norm": 0.3243357241153717, "learning_rate": 3.735728162460591e-06, "loss": 0.3516, "step": 7434 }, { "epoch": 1.8699698189134808, "grad_norm": 0.29959172010421753, "learning_rate": 3.734312518174565e-06, "loss": 0.3762, "step": 7435 }, { "epoch": 1.8702213279678068, "grad_norm": 0.29366421699523926, "learning_rate": 3.732896982286755e-06, "loss": 0.3441, "step": 7436 }, { "epoch": 1.8704728370221329, "grad_norm": 0.29330679774284363, "learning_rate": 3.731481554918389e-06, "loss": 0.3299, "step": 7437 }, { "epoch": 1.8707243460764587, "grad_norm": 0.3113592565059662, "learning_rate": 3.7300662361906946e-06, "loss": 0.3302, "step": 7438 }, { "epoch": 1.8709758551307847, "grad_norm": 0.31629472970962524, "learning_rate": 3.728651026224881e-06, "loss": 0.3492, "step": 7439 }, { "epoch": 1.8712273641851107, "grad_norm": 0.3160915672779083, "learning_rate": 3.727235925142154e-06, "loss": 0.3628, "step": 7440 }, { "epoch": 1.8714788732394365, "grad_norm": 0.2999648153781891, "learning_rate": 3.725820933063707e-06, "loss": 0.342, "step": 7441 }, { "epoch": 1.8717303822937625, "grad_norm": 0.2990424931049347, "learning_rate": 3.724406050110727e-06, "loss": 0.3564, "step": 7442 }, { "epoch": 1.8719818913480886, "grad_norm": 0.29632776975631714, "learning_rate": 3.7229912764043874e-06, "loss": 0.3448, "step": 7443 }, { "epoch": 1.8722334004024144, "grad_norm": 0.33509159088134766, "learning_rate": 3.7215766120658568e-06, "loss": 0.3618, "step": 7444 }, { "epoch": 1.8724849094567404, "grad_norm": 0.328966349363327, "learning_rate": 3.720162057216291e-06, "loss": 0.3296, "step": 7445 }, { "epoch": 1.8727364185110664, "grad_norm": 0.293011337518692, "learning_rate": 3.7187476119768383e-06, "loss": 0.3452, "step": 7446 }, { "epoch": 1.8729879275653922, "grad_norm": 0.3068729043006897, "learning_rate": 3.7173332764686375e-06, "loss": 0.3468, "step": 7447 }, { "epoch": 1.8732394366197183, "grad_norm": 0.3060571849346161, "learning_rate": 3.715919050812817e-06, "loss": 0.3509, "step": 7448 }, { "epoch": 1.8734909456740443, "grad_norm": 0.31499195098876953, "learning_rate": 3.7145049351304973e-06, "loss": 0.3422, "step": 7449 }, { "epoch": 1.87374245472837, "grad_norm": 0.2951659858226776, "learning_rate": 3.7130909295427873e-06, "loss": 0.331, "step": 7450 }, { "epoch": 1.8739939637826963, "grad_norm": 0.30359193682670593, "learning_rate": 3.7116770341707893e-06, "loss": 0.3479, "step": 7451 }, { "epoch": 1.8742454728370221, "grad_norm": 0.28634950518608093, "learning_rate": 3.710263249135593e-06, "loss": 0.3286, "step": 7452 }, { "epoch": 1.874496981891348, "grad_norm": 0.30430909991264343, "learning_rate": 3.7088495745582803e-06, "loss": 0.3244, "step": 7453 }, { "epoch": 1.8747484909456742, "grad_norm": 0.3412688970565796, "learning_rate": 3.7074360105599246e-06, "loss": 0.3657, "step": 7454 }, { "epoch": 1.875, "grad_norm": 0.3176121413707733, "learning_rate": 3.706022557261588e-06, "loss": 0.3633, "step": 7455 }, { "epoch": 1.8752515090543258, "grad_norm": 0.2989017069339752, "learning_rate": 3.704609214784325e-06, "loss": 0.3417, "step": 7456 }, { "epoch": 1.875503018108652, "grad_norm": 0.31169548630714417, "learning_rate": 3.703195983249177e-06, "loss": 0.3268, "step": 7457 }, { "epoch": 1.8757545271629779, "grad_norm": 0.306718647480011, "learning_rate": 3.7017828627771825e-06, "loss": 0.3479, "step": 7458 }, { "epoch": 1.8760060362173037, "grad_norm": 0.29229748249053955, "learning_rate": 3.7003698534893623e-06, "loss": 0.3337, "step": 7459 }, { "epoch": 1.87625754527163, "grad_norm": 0.3112187683582306, "learning_rate": 3.6989569555067357e-06, "loss": 0.3334, "step": 7460 }, { "epoch": 1.8765090543259557, "grad_norm": 0.2908962368965149, "learning_rate": 3.6975441689503044e-06, "loss": 0.3319, "step": 7461 }, { "epoch": 1.8767605633802817, "grad_norm": 0.2986694872379303, "learning_rate": 3.6961314939410674e-06, "loss": 0.3649, "step": 7462 }, { "epoch": 1.8770120724346078, "grad_norm": 0.2823163568973541, "learning_rate": 3.694718930600012e-06, "loss": 0.3357, "step": 7463 }, { "epoch": 1.8772635814889336, "grad_norm": 0.28905975818634033, "learning_rate": 3.693306479048114e-06, "loss": 0.3284, "step": 7464 }, { "epoch": 1.8775150905432596, "grad_norm": 0.3094346821308136, "learning_rate": 3.6918941394063414e-06, "loss": 0.3834, "step": 7465 }, { "epoch": 1.8777665995975856, "grad_norm": 0.30385082960128784, "learning_rate": 3.6904819117956526e-06, "loss": 0.3556, "step": 7466 }, { "epoch": 1.8780181086519114, "grad_norm": 0.31416597962379456, "learning_rate": 3.6890697963369947e-06, "loss": 0.3567, "step": 7467 }, { "epoch": 1.8782696177062375, "grad_norm": 0.2962150275707245, "learning_rate": 3.6876577931513076e-06, "loss": 0.3439, "step": 7468 }, { "epoch": 1.8785211267605635, "grad_norm": 0.30000972747802734, "learning_rate": 3.686245902359522e-06, "loss": 0.3308, "step": 7469 }, { "epoch": 1.8787726358148893, "grad_norm": 0.31348156929016113, "learning_rate": 3.6848341240825548e-06, "loss": 0.3402, "step": 7470 }, { "epoch": 1.8790241448692153, "grad_norm": 0.34603050351142883, "learning_rate": 3.6834224584413183e-06, "loss": 0.3684, "step": 7471 }, { "epoch": 1.8792756539235413, "grad_norm": 0.3139626681804657, "learning_rate": 3.68201090555671e-06, "loss": 0.3524, "step": 7472 }, { "epoch": 1.8795271629778671, "grad_norm": 0.3043857216835022, "learning_rate": 3.6805994655496245e-06, "loss": 0.3485, "step": 7473 }, { "epoch": 1.8797786720321932, "grad_norm": 0.33150768280029297, "learning_rate": 3.6791881385409383e-06, "loss": 0.3492, "step": 7474 }, { "epoch": 1.8800301810865192, "grad_norm": 0.30486994981765747, "learning_rate": 3.6777769246515275e-06, "loss": 0.3585, "step": 7475 }, { "epoch": 1.880281690140845, "grad_norm": 0.2925189733505249, "learning_rate": 3.6763658240022495e-06, "loss": 0.31, "step": 7476 }, { "epoch": 1.880533199195171, "grad_norm": 0.2866535484790802, "learning_rate": 3.67495483671396e-06, "loss": 0.3426, "step": 7477 }, { "epoch": 1.880784708249497, "grad_norm": 0.32085758447647095, "learning_rate": 3.6735439629074964e-06, "loss": 0.3441, "step": 7478 }, { "epoch": 1.8810362173038229, "grad_norm": 0.3063550293445587, "learning_rate": 3.672133202703694e-06, "loss": 0.3211, "step": 7479 }, { "epoch": 1.881287726358149, "grad_norm": 0.32170259952545166, "learning_rate": 3.670722556223379e-06, "loss": 0.3347, "step": 7480 }, { "epoch": 1.881539235412475, "grad_norm": 0.30713140964508057, "learning_rate": 3.669312023587358e-06, "loss": 0.3515, "step": 7481 }, { "epoch": 1.8817907444668007, "grad_norm": 0.3304310142993927, "learning_rate": 3.66790160491644e-06, "loss": 0.3393, "step": 7482 }, { "epoch": 1.8820422535211268, "grad_norm": 0.3182964026927948, "learning_rate": 3.666491300331414e-06, "loss": 0.3511, "step": 7483 }, { "epoch": 1.8822937625754528, "grad_norm": 0.2859238088130951, "learning_rate": 3.6650811099530673e-06, "loss": 0.3363, "step": 7484 }, { "epoch": 1.8825452716297786, "grad_norm": 0.3283844292163849, "learning_rate": 3.663671033902171e-06, "loss": 0.3292, "step": 7485 }, { "epoch": 1.8827967806841046, "grad_norm": 0.31571048498153687, "learning_rate": 3.662261072299492e-06, "loss": 0.359, "step": 7486 }, { "epoch": 1.8830482897384306, "grad_norm": 0.32221272587776184, "learning_rate": 3.660851225265781e-06, "loss": 0.3513, "step": 7487 }, { "epoch": 1.8832997987927564, "grad_norm": 0.29501035809516907, "learning_rate": 3.659441492921788e-06, "loss": 0.3411, "step": 7488 }, { "epoch": 1.8835513078470825, "grad_norm": 0.31255054473876953, "learning_rate": 3.6580318753882414e-06, "loss": 0.3369, "step": 7489 }, { "epoch": 1.8838028169014085, "grad_norm": 0.3012523055076599, "learning_rate": 3.6566223727858697e-06, "loss": 0.333, "step": 7490 }, { "epoch": 1.8840543259557343, "grad_norm": 0.36300086975097656, "learning_rate": 3.65521298523539e-06, "loss": 0.3814, "step": 7491 }, { "epoch": 1.8843058350100603, "grad_norm": 0.3129633069038391, "learning_rate": 3.653803712857503e-06, "loss": 0.3338, "step": 7492 }, { "epoch": 1.8845573440643864, "grad_norm": 0.29744789004325867, "learning_rate": 3.652394555772908e-06, "loss": 0.3582, "step": 7493 }, { "epoch": 1.8848088531187122, "grad_norm": 0.30003800988197327, "learning_rate": 3.650985514102287e-06, "loss": 0.3399, "step": 7494 }, { "epoch": 1.8850603621730382, "grad_norm": 0.2842726707458496, "learning_rate": 3.6495765879663194e-06, "loss": 0.3373, "step": 7495 }, { "epoch": 1.8853118712273642, "grad_norm": 0.2723131775856018, "learning_rate": 3.6481677774856666e-06, "loss": 0.3369, "step": 7496 }, { "epoch": 1.88556338028169, "grad_norm": 0.3011725842952728, "learning_rate": 3.6467590827809885e-06, "loss": 0.3421, "step": 7497 }, { "epoch": 1.885814889336016, "grad_norm": 0.31559064984321594, "learning_rate": 3.6453505039729274e-06, "loss": 0.3782, "step": 7498 }, { "epoch": 1.886066398390342, "grad_norm": 0.29440516233444214, "learning_rate": 3.6439420411821226e-06, "loss": 0.3317, "step": 7499 }, { "epoch": 1.8863179074446679, "grad_norm": 0.2825719714164734, "learning_rate": 3.642533694529197e-06, "loss": 0.3552, "step": 7500 }, { "epoch": 1.8865694164989941, "grad_norm": 0.3134790360927582, "learning_rate": 3.641125464134768e-06, "loss": 0.3633, "step": 7501 }, { "epoch": 1.88682092555332, "grad_norm": 0.33232244849205017, "learning_rate": 3.6397173501194436e-06, "loss": 0.3448, "step": 7502 }, { "epoch": 1.8870724346076457, "grad_norm": 0.31418442726135254, "learning_rate": 3.638309352603816e-06, "loss": 0.3756, "step": 7503 }, { "epoch": 1.887323943661972, "grad_norm": 0.31419721245765686, "learning_rate": 3.6369014717084768e-06, "loss": 0.3438, "step": 7504 }, { "epoch": 1.8875754527162978, "grad_norm": 0.2999892234802246, "learning_rate": 3.635493707553996e-06, "loss": 0.3494, "step": 7505 }, { "epoch": 1.8878269617706236, "grad_norm": 0.2898130714893341, "learning_rate": 3.634086060260945e-06, "loss": 0.3625, "step": 7506 }, { "epoch": 1.8880784708249498, "grad_norm": 0.30754363536834717, "learning_rate": 3.6326785299498758e-06, "loss": 0.3224, "step": 7507 }, { "epoch": 1.8883299798792756, "grad_norm": 0.3098406493663788, "learning_rate": 3.6312711167413394e-06, "loss": 0.3585, "step": 7508 }, { "epoch": 1.8885814889336014, "grad_norm": 0.29480767250061035, "learning_rate": 3.629863820755866e-06, "loss": 0.3193, "step": 7509 }, { "epoch": 1.8888329979879277, "grad_norm": 0.3022323548793793, "learning_rate": 3.628456642113988e-06, "loss": 0.3516, "step": 7510 }, { "epoch": 1.8890845070422535, "grad_norm": 0.2908898591995239, "learning_rate": 3.627049580936215e-06, "loss": 0.3337, "step": 7511 }, { "epoch": 1.8893360160965795, "grad_norm": 0.3488399088382721, "learning_rate": 3.6256426373430577e-06, "loss": 0.3307, "step": 7512 }, { "epoch": 1.8895875251509056, "grad_norm": 0.33839771151542664, "learning_rate": 3.6242358114550104e-06, "loss": 0.3452, "step": 7513 }, { "epoch": 1.8898390342052314, "grad_norm": 0.31028321385383606, "learning_rate": 3.6228291033925596e-06, "loss": 0.3599, "step": 7514 }, { "epoch": 1.8900905432595574, "grad_norm": 0.3041622042655945, "learning_rate": 3.6214225132761806e-06, "loss": 0.3322, "step": 7515 }, { "epoch": 1.8903420523138834, "grad_norm": 0.2997733950614929, "learning_rate": 3.620016041226338e-06, "loss": 0.3191, "step": 7516 }, { "epoch": 1.8905935613682092, "grad_norm": 0.3369154632091522, "learning_rate": 3.618609687363489e-06, "loss": 0.35, "step": 7517 }, { "epoch": 1.8908450704225352, "grad_norm": 0.3129628896713257, "learning_rate": 3.6172034518080785e-06, "loss": 0.3106, "step": 7518 }, { "epoch": 1.8910965794768613, "grad_norm": 0.2884010672569275, "learning_rate": 3.615797334680541e-06, "loss": 0.3325, "step": 7519 }, { "epoch": 1.891348088531187, "grad_norm": 0.2941190004348755, "learning_rate": 3.6143913361013026e-06, "loss": 0.3346, "step": 7520 }, { "epoch": 1.891599597585513, "grad_norm": 0.314097136259079, "learning_rate": 3.6129854561907786e-06, "loss": 0.3595, "step": 7521 }, { "epoch": 1.8918511066398391, "grad_norm": 0.30389559268951416, "learning_rate": 3.611579695069372e-06, "loss": 0.3565, "step": 7522 }, { "epoch": 1.892102615694165, "grad_norm": 0.3276069760322571, "learning_rate": 3.610174052857478e-06, "loss": 0.3509, "step": 7523 }, { "epoch": 1.892354124748491, "grad_norm": 0.3453889787197113, "learning_rate": 3.608768529675484e-06, "loss": 0.3276, "step": 7524 }, { "epoch": 1.892605633802817, "grad_norm": 0.28621137142181396, "learning_rate": 3.607363125643759e-06, "loss": 0.3435, "step": 7525 }, { "epoch": 1.8928571428571428, "grad_norm": 0.33448565006256104, "learning_rate": 3.6059578408826734e-06, "loss": 0.346, "step": 7526 }, { "epoch": 1.8931086519114688, "grad_norm": 0.3352438509464264, "learning_rate": 3.604552675512574e-06, "loss": 0.3487, "step": 7527 }, { "epoch": 1.8933601609657948, "grad_norm": 0.3266422152519226, "learning_rate": 3.6031476296538113e-06, "loss": 0.341, "step": 7528 }, { "epoch": 1.8936116700201207, "grad_norm": 0.2940256595611572, "learning_rate": 3.601742703426713e-06, "loss": 0.3504, "step": 7529 }, { "epoch": 1.8938631790744467, "grad_norm": 0.34617501497268677, "learning_rate": 3.6003378969516067e-06, "loss": 0.3384, "step": 7530 }, { "epoch": 1.8941146881287727, "grad_norm": 0.33195579051971436, "learning_rate": 3.5989332103488013e-06, "loss": 0.3491, "step": 7531 }, { "epoch": 1.8943661971830985, "grad_norm": 0.3452162742614746, "learning_rate": 3.5975286437386014e-06, "loss": 0.3862, "step": 7532 }, { "epoch": 1.8946177062374245, "grad_norm": 0.2940990626811981, "learning_rate": 3.5961241972413012e-06, "loss": 0.3175, "step": 7533 }, { "epoch": 1.8948692152917506, "grad_norm": 0.30977126955986023, "learning_rate": 3.594719870977179e-06, "loss": 0.3298, "step": 7534 }, { "epoch": 1.8951207243460764, "grad_norm": 0.2974280118942261, "learning_rate": 3.5933156650665102e-06, "loss": 0.3208, "step": 7535 }, { "epoch": 1.8953722334004024, "grad_norm": 0.323830246925354, "learning_rate": 3.591911579629553e-06, "loss": 0.363, "step": 7536 }, { "epoch": 1.8956237424547284, "grad_norm": 0.308124840259552, "learning_rate": 3.590507614786561e-06, "loss": 0.3466, "step": 7537 }, { "epoch": 1.8958752515090542, "grad_norm": 0.2933880090713501, "learning_rate": 3.5891037706577736e-06, "loss": 0.3256, "step": 7538 }, { "epoch": 1.8961267605633803, "grad_norm": 0.30009788274765015, "learning_rate": 3.5877000473634227e-06, "loss": 0.3625, "step": 7539 }, { "epoch": 1.8963782696177063, "grad_norm": 0.3329128324985504, "learning_rate": 3.586296445023726e-06, "loss": 0.3725, "step": 7540 }, { "epoch": 1.896629778672032, "grad_norm": 0.2923327088356018, "learning_rate": 3.584892963758896e-06, "loss": 0.3407, "step": 7541 }, { "epoch": 1.8968812877263581, "grad_norm": 0.336452454328537, "learning_rate": 3.583489603689129e-06, "loss": 0.3387, "step": 7542 }, { "epoch": 1.8971327967806841, "grad_norm": 0.3008471131324768, "learning_rate": 3.5820863649346162e-06, "loss": 0.3415, "step": 7543 }, { "epoch": 1.89738430583501, "grad_norm": 0.3129260241985321, "learning_rate": 3.5806832476155373e-06, "loss": 0.365, "step": 7544 }, { "epoch": 1.897635814889336, "grad_norm": 0.3290889263153076, "learning_rate": 3.579280251852057e-06, "loss": 0.3611, "step": 7545 }, { "epoch": 1.897887323943662, "grad_norm": 0.2925894856452942, "learning_rate": 3.577877377764337e-06, "loss": 0.3765, "step": 7546 }, { "epoch": 1.8981388329979878, "grad_norm": 0.3327069878578186, "learning_rate": 3.5764746254725213e-06, "loss": 0.3524, "step": 7547 }, { "epoch": 1.8983903420523138, "grad_norm": 0.3372514545917511, "learning_rate": 3.5750719950967507e-06, "loss": 0.365, "step": 7548 }, { "epoch": 1.8986418511066399, "grad_norm": 0.2978571057319641, "learning_rate": 3.5736694867571465e-06, "loss": 0.3321, "step": 7549 }, { "epoch": 1.8988933601609657, "grad_norm": 0.33006635308265686, "learning_rate": 3.5722671005738303e-06, "loss": 0.3283, "step": 7550 }, { "epoch": 1.899144869215292, "grad_norm": 0.32429298758506775, "learning_rate": 3.570864836666903e-06, "loss": 0.3622, "step": 7551 }, { "epoch": 1.8993963782696177, "grad_norm": 0.3145343065261841, "learning_rate": 3.5694626951564637e-06, "loss": 0.3564, "step": 7552 }, { "epoch": 1.8996478873239435, "grad_norm": 0.30636870861053467, "learning_rate": 3.5680606761625925e-06, "loss": 0.3545, "step": 7553 }, { "epoch": 1.8998993963782698, "grad_norm": 0.27128949761390686, "learning_rate": 3.566658779805367e-06, "loss": 0.351, "step": 7554 }, { "epoch": 1.9001509054325956, "grad_norm": 0.318464070558548, "learning_rate": 3.565257006204852e-06, "loss": 0.3615, "step": 7555 }, { "epoch": 1.9004024144869214, "grad_norm": 0.3404373824596405, "learning_rate": 3.5638553554810963e-06, "loss": 0.366, "step": 7556 }, { "epoch": 1.9006539235412476, "grad_norm": 0.29662925004959106, "learning_rate": 3.5624538277541474e-06, "loss": 0.3436, "step": 7557 }, { "epoch": 1.9009054325955734, "grad_norm": 0.2770460546016693, "learning_rate": 3.5610524231440324e-06, "loss": 0.3663, "step": 7558 }, { "epoch": 1.9011569416498992, "grad_norm": 0.3167244493961334, "learning_rate": 3.559651141770778e-06, "loss": 0.3376, "step": 7559 }, { "epoch": 1.9014084507042255, "grad_norm": 0.30453893542289734, "learning_rate": 3.5582499837543894e-06, "loss": 0.3545, "step": 7560 }, { "epoch": 1.9016599597585513, "grad_norm": 0.3127380907535553, "learning_rate": 3.5568489492148728e-06, "loss": 0.3502, "step": 7561 }, { "epoch": 1.9019114688128773, "grad_norm": 0.3051506280899048, "learning_rate": 3.5554480382722134e-06, "loss": 0.3508, "step": 7562 }, { "epoch": 1.9021629778672033, "grad_norm": 0.28955167531967163, "learning_rate": 3.5540472510463947e-06, "loss": 0.3445, "step": 7563 }, { "epoch": 1.9024144869215291, "grad_norm": 0.30066999793052673, "learning_rate": 3.552646587657381e-06, "loss": 0.3658, "step": 7564 }, { "epoch": 1.9026659959758552, "grad_norm": 0.2882605791091919, "learning_rate": 3.551246048225132e-06, "loss": 0.362, "step": 7565 }, { "epoch": 1.9029175050301812, "grad_norm": 0.29483652114868164, "learning_rate": 3.549845632869598e-06, "loss": 0.3343, "step": 7566 }, { "epoch": 1.903169014084507, "grad_norm": 0.28981560468673706, "learning_rate": 3.5484453417107113e-06, "loss": 0.3553, "step": 7567 }, { "epoch": 1.903420523138833, "grad_norm": 0.321000337600708, "learning_rate": 3.547045174868402e-06, "loss": 0.3451, "step": 7568 }, { "epoch": 1.903672032193159, "grad_norm": 0.29945504665374756, "learning_rate": 3.545645132462582e-06, "loss": 0.3361, "step": 7569 }, { "epoch": 1.9039235412474849, "grad_norm": 0.31492796540260315, "learning_rate": 3.54424521461316e-06, "loss": 0.3388, "step": 7570 }, { "epoch": 1.904175050301811, "grad_norm": 0.28294259309768677, "learning_rate": 3.5428454214400265e-06, "loss": 0.3443, "step": 7571 }, { "epoch": 1.904426559356137, "grad_norm": 0.3141169846057892, "learning_rate": 3.541445753063068e-06, "loss": 0.3318, "step": 7572 }, { "epoch": 1.9046780684104627, "grad_norm": 0.2912863492965698, "learning_rate": 3.5400462096021547e-06, "loss": 0.3491, "step": 7573 }, { "epoch": 1.9049295774647887, "grad_norm": 0.32144850492477417, "learning_rate": 3.5386467911771518e-06, "loss": 0.3553, "step": 7574 }, { "epoch": 1.9051810865191148, "grad_norm": 0.33859172463417053, "learning_rate": 3.5372474979079067e-06, "loss": 0.3514, "step": 7575 }, { "epoch": 1.9054325955734406, "grad_norm": 0.3066120743751526, "learning_rate": 3.5358483299142645e-06, "loss": 0.3434, "step": 7576 }, { "epoch": 1.9056841046277666, "grad_norm": 0.35578417778015137, "learning_rate": 3.534449287316052e-06, "loss": 0.3699, "step": 7577 }, { "epoch": 1.9059356136820926, "grad_norm": 0.30244138836860657, "learning_rate": 3.5330503702330898e-06, "loss": 0.3566, "step": 7578 }, { "epoch": 1.9061871227364184, "grad_norm": 0.2991444170475006, "learning_rate": 3.5316515787851867e-06, "loss": 0.3412, "step": 7579 }, { "epoch": 1.9064386317907445, "grad_norm": 0.29319190979003906, "learning_rate": 3.53025291309214e-06, "loss": 0.343, "step": 7580 }, { "epoch": 1.9066901408450705, "grad_norm": 0.3237687945365906, "learning_rate": 3.528854373273736e-06, "loss": 0.339, "step": 7581 }, { "epoch": 1.9069416498993963, "grad_norm": 0.31021758913993835, "learning_rate": 3.5274559594497513e-06, "loss": 0.3483, "step": 7582 }, { "epoch": 1.9071931589537223, "grad_norm": 0.3291212320327759, "learning_rate": 3.5260576717399518e-06, "loss": 0.3483, "step": 7583 }, { "epoch": 1.9074446680080483, "grad_norm": 0.2789864242076874, "learning_rate": 3.5246595102640924e-06, "loss": 0.3251, "step": 7584 }, { "epoch": 1.9076961770623742, "grad_norm": 0.29820239543914795, "learning_rate": 3.523261475141916e-06, "loss": 0.3354, "step": 7585 }, { "epoch": 1.9079476861167002, "grad_norm": 0.33340615034103394, "learning_rate": 3.5218635664931556e-06, "loss": 0.352, "step": 7586 }, { "epoch": 1.9081991951710262, "grad_norm": 0.3217755854129791, "learning_rate": 3.5204657844375323e-06, "loss": 0.3632, "step": 7587 }, { "epoch": 1.908450704225352, "grad_norm": 0.28968545794487, "learning_rate": 3.5190681290947603e-06, "loss": 0.3159, "step": 7588 }, { "epoch": 1.908702213279678, "grad_norm": 0.2866925895214081, "learning_rate": 3.517670600584537e-06, "loss": 0.3443, "step": 7589 }, { "epoch": 1.908953722334004, "grad_norm": 0.30135875940322876, "learning_rate": 3.5162731990265553e-06, "loss": 0.3629, "step": 7590 }, { "epoch": 1.9092052313883299, "grad_norm": 0.28496021032333374, "learning_rate": 3.5148759245404895e-06, "loss": 0.3375, "step": 7591 }, { "epoch": 1.909456740442656, "grad_norm": 0.29872947931289673, "learning_rate": 3.513478777246012e-06, "loss": 0.32, "step": 7592 }, { "epoch": 1.909708249496982, "grad_norm": 0.3028433620929718, "learning_rate": 3.5120817572627763e-06, "loss": 0.3223, "step": 7593 }, { "epoch": 1.9099597585513077, "grad_norm": 0.32273510098457336, "learning_rate": 3.510684864710431e-06, "loss": 0.3305, "step": 7594 }, { "epoch": 1.9102112676056338, "grad_norm": 0.30309391021728516, "learning_rate": 3.5092880997086076e-06, "loss": 0.3564, "step": 7595 }, { "epoch": 1.9104627766599598, "grad_norm": 0.3080253601074219, "learning_rate": 3.5078914623769357e-06, "loss": 0.3443, "step": 7596 }, { "epoch": 1.9107142857142856, "grad_norm": 0.299723356962204, "learning_rate": 3.506494952835022e-06, "loss": 0.3494, "step": 7597 }, { "epoch": 1.9109657947686118, "grad_norm": 0.3054339587688446, "learning_rate": 3.505098571202473e-06, "loss": 0.3533, "step": 7598 }, { "epoch": 1.9112173038229376, "grad_norm": 0.2950621545314789, "learning_rate": 3.5037023175988818e-06, "loss": 0.3482, "step": 7599 }, { "epoch": 1.9114688128772634, "grad_norm": 0.279410183429718, "learning_rate": 3.502306192143824e-06, "loss": 0.346, "step": 7600 }, { "epoch": 1.9117203219315897, "grad_norm": 0.3337315320968628, "learning_rate": 3.500910194956873e-06, "loss": 0.3456, "step": 7601 }, { "epoch": 1.9119718309859155, "grad_norm": 0.3044759929180145, "learning_rate": 3.4995143261575835e-06, "loss": 0.3345, "step": 7602 }, { "epoch": 1.9122233400402413, "grad_norm": 0.33510109782218933, "learning_rate": 3.4981185858655076e-06, "loss": 0.3427, "step": 7603 }, { "epoch": 1.9124748490945676, "grad_norm": 0.30306974053382874, "learning_rate": 3.4967229742001764e-06, "loss": 0.3364, "step": 7604 }, { "epoch": 1.9127263581488934, "grad_norm": 0.31048643589019775, "learning_rate": 3.4953274912811198e-06, "loss": 0.3343, "step": 7605 }, { "epoch": 1.9129778672032192, "grad_norm": 0.30860239267349243, "learning_rate": 3.493932137227849e-06, "loss": 0.3387, "step": 7606 }, { "epoch": 1.9132293762575454, "grad_norm": 0.33586663007736206, "learning_rate": 3.4925369121598708e-06, "loss": 0.3428, "step": 7607 }, { "epoch": 1.9134808853118712, "grad_norm": 0.2864849269390106, "learning_rate": 3.4911418161966726e-06, "loss": 0.3449, "step": 7608 }, { "epoch": 1.913732394366197, "grad_norm": 0.3048544228076935, "learning_rate": 3.489746849457739e-06, "loss": 0.349, "step": 7609 }, { "epoch": 1.9139839034205233, "grad_norm": 0.3124396502971649, "learning_rate": 3.4883520120625414e-06, "loss": 0.3296, "step": 7610 }, { "epoch": 1.914235412474849, "grad_norm": 0.32257482409477234, "learning_rate": 3.486957304130535e-06, "loss": 0.3414, "step": 7611 }, { "epoch": 1.914486921529175, "grad_norm": 0.30354657769203186, "learning_rate": 3.4855627257811727e-06, "loss": 0.3571, "step": 7612 }, { "epoch": 1.9147384305835011, "grad_norm": 0.2927219867706299, "learning_rate": 3.484168277133886e-06, "loss": 0.3492, "step": 7613 }, { "epoch": 1.914989939637827, "grad_norm": 0.3123704195022583, "learning_rate": 3.4827739583081054e-06, "loss": 0.3536, "step": 7614 }, { "epoch": 1.915241448692153, "grad_norm": 0.3322194814682007, "learning_rate": 3.481379769423242e-06, "loss": 0.366, "step": 7615 }, { "epoch": 1.915492957746479, "grad_norm": 0.3077843487262726, "learning_rate": 3.479985710598702e-06, "loss": 0.3643, "step": 7616 }, { "epoch": 1.9157444668008048, "grad_norm": 0.33019596338272095, "learning_rate": 3.4785917819538757e-06, "loss": 0.3169, "step": 7617 }, { "epoch": 1.9159959758551308, "grad_norm": 0.29199883341789246, "learning_rate": 3.477197983608147e-06, "loss": 0.3315, "step": 7618 }, { "epoch": 1.9162474849094568, "grad_norm": 0.29495760798454285, "learning_rate": 3.475804315680882e-06, "loss": 0.3453, "step": 7619 }, { "epoch": 1.9164989939637826, "grad_norm": 0.2928338646888733, "learning_rate": 3.4744107782914425e-06, "loss": 0.3661, "step": 7620 }, { "epoch": 1.9167505030181087, "grad_norm": 0.3239118754863739, "learning_rate": 3.4730173715591773e-06, "loss": 0.3537, "step": 7621 }, { "epoch": 1.9170020120724347, "grad_norm": 0.29765039682388306, "learning_rate": 3.4716240956034197e-06, "loss": 0.3481, "step": 7622 }, { "epoch": 1.9172535211267605, "grad_norm": 0.2840639054775238, "learning_rate": 3.4702309505434996e-06, "loss": 0.3504, "step": 7623 }, { "epoch": 1.9175050301810865, "grad_norm": 0.3031284511089325, "learning_rate": 3.468837936498725e-06, "loss": 0.3553, "step": 7624 }, { "epoch": 1.9177565392354126, "grad_norm": 0.31842097640037537, "learning_rate": 3.4674450535884053e-06, "loss": 0.3413, "step": 7625 }, { "epoch": 1.9180080482897384, "grad_norm": 0.3358910083770752, "learning_rate": 3.4660523019318267e-06, "loss": 0.3566, "step": 7626 }, { "epoch": 1.9182595573440644, "grad_norm": 0.33151108026504517, "learning_rate": 3.4646596816482743e-06, "loss": 0.3433, "step": 7627 }, { "epoch": 1.9185110663983904, "grad_norm": 0.31569045782089233, "learning_rate": 3.4632671928570126e-06, "loss": 0.3264, "step": 7628 }, { "epoch": 1.9187625754527162, "grad_norm": 0.30424806475639343, "learning_rate": 3.4618748356773046e-06, "loss": 0.3341, "step": 7629 }, { "epoch": 1.9190140845070423, "grad_norm": 0.29679644107818604, "learning_rate": 3.460482610228392e-06, "loss": 0.3414, "step": 7630 }, { "epoch": 1.9192655935613683, "grad_norm": 0.3199297785758972, "learning_rate": 3.459090516629514e-06, "loss": 0.3571, "step": 7631 }, { "epoch": 1.919517102615694, "grad_norm": 0.3305947482585907, "learning_rate": 3.457698554999893e-06, "loss": 0.353, "step": 7632 }, { "epoch": 1.91976861167002, "grad_norm": 0.31420543789863586, "learning_rate": 3.4563067254587424e-06, "loss": 0.3556, "step": 7633 }, { "epoch": 1.9200201207243461, "grad_norm": 0.2856319844722748, "learning_rate": 3.4549150281252635e-06, "loss": 0.3611, "step": 7634 }, { "epoch": 1.920271629778672, "grad_norm": 0.30721721053123474, "learning_rate": 3.4535234631186466e-06, "loss": 0.324, "step": 7635 }, { "epoch": 1.920523138832998, "grad_norm": 0.3075389266014099, "learning_rate": 3.4521320305580697e-06, "loss": 0.3438, "step": 7636 }, { "epoch": 1.920774647887324, "grad_norm": 0.32247766852378845, "learning_rate": 3.4507407305627018e-06, "loss": 0.3569, "step": 7637 }, { "epoch": 1.9210261569416498, "grad_norm": 0.32027557492256165, "learning_rate": 3.449349563251697e-06, "loss": 0.3591, "step": 7638 }, { "epoch": 1.9212776659959758, "grad_norm": 0.292683482170105, "learning_rate": 3.4479585287442025e-06, "loss": 0.3264, "step": 7639 }, { "epoch": 1.9215291750503019, "grad_norm": 0.3263826072216034, "learning_rate": 3.4465676271593495e-06, "loss": 0.3507, "step": 7640 }, { "epoch": 1.9217806841046277, "grad_norm": 0.3174888491630554, "learning_rate": 3.445176858616262e-06, "loss": 0.346, "step": 7641 }, { "epoch": 1.9220321931589537, "grad_norm": 0.301901638507843, "learning_rate": 3.443786223234048e-06, "loss": 0.357, "step": 7642 }, { "epoch": 1.9222837022132797, "grad_norm": 0.3539835810661316, "learning_rate": 3.4423957211318092e-06, "loss": 0.3506, "step": 7643 }, { "epoch": 1.9225352112676055, "grad_norm": 0.3143352270126343, "learning_rate": 3.441005352428633e-06, "loss": 0.3439, "step": 7644 }, { "epoch": 1.9227867203219315, "grad_norm": 0.30168989300727844, "learning_rate": 3.4396151172435954e-06, "loss": 0.3677, "step": 7645 }, { "epoch": 1.9230382293762576, "grad_norm": 0.3380453884601593, "learning_rate": 3.4382250156957607e-06, "loss": 0.3338, "step": 7646 }, { "epoch": 1.9232897384305834, "grad_norm": 0.2964382767677307, "learning_rate": 3.4368350479041836e-06, "loss": 0.3358, "step": 7647 }, { "epoch": 1.9235412474849096, "grad_norm": 0.2817126512527466, "learning_rate": 3.4354452139879044e-06, "loss": 0.3568, "step": 7648 }, { "epoch": 1.9237927565392354, "grad_norm": 0.32317665219306946, "learning_rate": 3.434055514065956e-06, "loss": 0.358, "step": 7649 }, { "epoch": 1.9240442655935612, "grad_norm": 0.3008497953414917, "learning_rate": 3.4326659482573556e-06, "loss": 0.3482, "step": 7650 }, { "epoch": 1.9242957746478875, "grad_norm": 0.29205819964408875, "learning_rate": 3.431276516681112e-06, "loss": 0.3532, "step": 7651 }, { "epoch": 1.9245472837022133, "grad_norm": 0.3241797983646393, "learning_rate": 3.4298872194562203e-06, "loss": 0.3549, "step": 7652 }, { "epoch": 1.924798792756539, "grad_norm": 0.31223297119140625, "learning_rate": 3.428498056701665e-06, "loss": 0.3619, "step": 7653 }, { "epoch": 1.9250503018108653, "grad_norm": 0.3212898075580597, "learning_rate": 3.4271090285364216e-06, "loss": 0.367, "step": 7654 }, { "epoch": 1.9253018108651911, "grad_norm": 0.29246649146080017, "learning_rate": 3.4257201350794487e-06, "loss": 0.3404, "step": 7655 }, { "epoch": 1.925553319919517, "grad_norm": 0.33508625626564026, "learning_rate": 3.424331376449699e-06, "loss": 0.3407, "step": 7656 }, { "epoch": 1.9258048289738432, "grad_norm": 0.3068259358406067, "learning_rate": 3.4229427527661074e-06, "loss": 0.3378, "step": 7657 }, { "epoch": 1.926056338028169, "grad_norm": 0.2951129972934723, "learning_rate": 3.4215542641476053e-06, "loss": 0.3404, "step": 7658 }, { "epoch": 1.9263078470824948, "grad_norm": 0.28135162591934204, "learning_rate": 3.4201659107131036e-06, "loss": 0.3448, "step": 7659 }, { "epoch": 1.926559356136821, "grad_norm": 0.2802700698375702, "learning_rate": 3.4187776925815103e-06, "loss": 0.3163, "step": 7660 }, { "epoch": 1.9268108651911469, "grad_norm": 0.29353809356689453, "learning_rate": 3.4173896098717134e-06, "loss": 0.3673, "step": 7661 }, { "epoch": 1.9270623742454729, "grad_norm": 0.3133329153060913, "learning_rate": 3.4160016627025976e-06, "loss": 0.3452, "step": 7662 }, { "epoch": 1.927313883299799, "grad_norm": 0.29814791679382324, "learning_rate": 3.414613851193028e-06, "loss": 0.3653, "step": 7663 }, { "epoch": 1.9275653923541247, "grad_norm": 0.28661277890205383, "learning_rate": 3.4132261754618646e-06, "loss": 0.3484, "step": 7664 }, { "epoch": 1.9278169014084507, "grad_norm": 0.3048580586910248, "learning_rate": 3.411838635627953e-06, "loss": 0.3434, "step": 7665 }, { "epoch": 1.9280684104627768, "grad_norm": 0.3117848336696625, "learning_rate": 3.4104512318101256e-06, "loss": 0.3471, "step": 7666 }, { "epoch": 1.9283199195171026, "grad_norm": 0.31257185339927673, "learning_rate": 3.4090639641272085e-06, "loss": 0.3702, "step": 7667 }, { "epoch": 1.9285714285714286, "grad_norm": 0.30880677700042725, "learning_rate": 3.407676832698007e-06, "loss": 0.337, "step": 7668 }, { "epoch": 1.9288229376257546, "grad_norm": 0.294489324092865, "learning_rate": 3.4062898376413257e-06, "loss": 0.3519, "step": 7669 }, { "epoch": 1.9290744466800804, "grad_norm": 0.2875889539718628, "learning_rate": 3.404902979075948e-06, "loss": 0.3557, "step": 7670 }, { "epoch": 1.9293259557344065, "grad_norm": 0.33200985193252563, "learning_rate": 3.4035162571206528e-06, "loss": 0.364, "step": 7671 }, { "epoch": 1.9295774647887325, "grad_norm": 0.31349629163742065, "learning_rate": 3.4021296718942006e-06, "loss": 0.3433, "step": 7672 }, { "epoch": 1.9298289738430583, "grad_norm": 0.32347166538238525, "learning_rate": 3.400743223515348e-06, "loss": 0.3675, "step": 7673 }, { "epoch": 1.9300804828973843, "grad_norm": 0.30907049775123596, "learning_rate": 3.3993569121028306e-06, "loss": 0.3577, "step": 7674 }, { "epoch": 1.9303319919517103, "grad_norm": 0.28248360753059387, "learning_rate": 3.397970737775381e-06, "loss": 0.3327, "step": 7675 }, { "epoch": 1.9305835010060362, "grad_norm": 0.3216158449649811, "learning_rate": 3.396584700651717e-06, "loss": 0.3264, "step": 7676 }, { "epoch": 1.9308350100603622, "grad_norm": 0.3292233943939209, "learning_rate": 3.395198800850541e-06, "loss": 0.368, "step": 7677 }, { "epoch": 1.9310865191146882, "grad_norm": 0.30121901631355286, "learning_rate": 3.3938130384905495e-06, "loss": 0.3396, "step": 7678 }, { "epoch": 1.931338028169014, "grad_norm": 0.32539746165275574, "learning_rate": 3.3924274136904214e-06, "loss": 0.3689, "step": 7679 }, { "epoch": 1.93158953722334, "grad_norm": 0.3132033944129944, "learning_rate": 3.39104192656883e-06, "loss": 0.3617, "step": 7680 }, { "epoch": 1.931841046277666, "grad_norm": 0.30390465259552, "learning_rate": 3.3896565772444303e-06, "loss": 0.3432, "step": 7681 }, { "epoch": 1.9320925553319919, "grad_norm": 0.3210819959640503, "learning_rate": 3.3882713658358716e-06, "loss": 0.3315, "step": 7682 }, { "epoch": 1.932344064386318, "grad_norm": 0.2830940783023834, "learning_rate": 3.3868862924617862e-06, "loss": 0.319, "step": 7683 }, { "epoch": 1.932595573440644, "grad_norm": 0.3306945264339447, "learning_rate": 3.385501357240798e-06, "loss": 0.3682, "step": 7684 }, { "epoch": 1.9328470824949697, "grad_norm": 0.3089801073074341, "learning_rate": 3.3841165602915206e-06, "loss": 0.3496, "step": 7685 }, { "epoch": 1.9330985915492958, "grad_norm": 0.31166577339172363, "learning_rate": 3.3827319017325486e-06, "loss": 0.3641, "step": 7686 }, { "epoch": 1.9333501006036218, "grad_norm": 0.3063109517097473, "learning_rate": 3.3813473816824743e-06, "loss": 0.3454, "step": 7687 }, { "epoch": 1.9336016096579476, "grad_norm": 0.29401347041130066, "learning_rate": 3.3799630002598683e-06, "loss": 0.3354, "step": 7688 }, { "epoch": 1.9338531187122736, "grad_norm": 0.2971067726612091, "learning_rate": 3.3785787575832974e-06, "loss": 0.3436, "step": 7689 }, { "epoch": 1.9341046277665996, "grad_norm": 0.28379392623901367, "learning_rate": 3.377194653771311e-06, "loss": 0.3655, "step": 7690 }, { "epoch": 1.9343561368209254, "grad_norm": 0.3166361451148987, "learning_rate": 3.3758106889424526e-06, "loss": 0.3555, "step": 7691 }, { "epoch": 1.9346076458752515, "grad_norm": 0.2777943015098572, "learning_rate": 3.3744268632152454e-06, "loss": 0.3448, "step": 7692 }, { "epoch": 1.9348591549295775, "grad_norm": 0.3278443217277527, "learning_rate": 3.37304317670821e-06, "loss": 0.373, "step": 7693 }, { "epoch": 1.9351106639839033, "grad_norm": 0.30939531326293945, "learning_rate": 3.371659629539846e-06, "loss": 0.3366, "step": 7694 }, { "epoch": 1.9353621730382293, "grad_norm": 0.3135591447353363, "learning_rate": 3.3702762218286487e-06, "loss": 0.337, "step": 7695 }, { "epoch": 1.9356136820925554, "grad_norm": 0.3024998903274536, "learning_rate": 3.368892953693098e-06, "loss": 0.3488, "step": 7696 }, { "epoch": 1.9358651911468812, "grad_norm": 0.2925957143306732, "learning_rate": 3.367509825251662e-06, "loss": 0.3415, "step": 7697 }, { "epoch": 1.9361167002012074, "grad_norm": 0.26913854479789734, "learning_rate": 3.366126836622796e-06, "loss": 0.3147, "step": 7698 }, { "epoch": 1.9363682092555332, "grad_norm": 0.331188827753067, "learning_rate": 3.3647439879249453e-06, "loss": 0.3634, "step": 7699 }, { "epoch": 1.936619718309859, "grad_norm": 0.3402562439441681, "learning_rate": 3.363361279276541e-06, "loss": 0.356, "step": 7700 }, { "epoch": 1.9368712273641853, "grad_norm": 0.3227146863937378, "learning_rate": 3.3619787107960054e-06, "loss": 0.3238, "step": 7701 }, { "epoch": 1.937122736418511, "grad_norm": 0.31124117970466614, "learning_rate": 3.3605962826017457e-06, "loss": 0.3313, "step": 7702 }, { "epoch": 1.9373742454728369, "grad_norm": 0.303874671459198, "learning_rate": 3.359213994812158e-06, "loss": 0.338, "step": 7703 }, { "epoch": 1.9376257545271631, "grad_norm": 0.3281771242618561, "learning_rate": 3.357831847545627e-06, "loss": 0.3628, "step": 7704 }, { "epoch": 1.937877263581489, "grad_norm": 0.3109123408794403, "learning_rate": 3.356449840920525e-06, "loss": 0.3361, "step": 7705 }, { "epoch": 1.9381287726358147, "grad_norm": 0.30376991629600525, "learning_rate": 3.3550679750552107e-06, "loss": 0.3625, "step": 7706 }, { "epoch": 1.938380281690141, "grad_norm": 0.29432567954063416, "learning_rate": 3.3536862500680354e-06, "loss": 0.3326, "step": 7707 }, { "epoch": 1.9386317907444668, "grad_norm": 0.3185214102268219, "learning_rate": 3.3523046660773327e-06, "loss": 0.3599, "step": 7708 }, { "epoch": 1.9388832997987926, "grad_norm": 0.35394561290740967, "learning_rate": 3.3509232232014287e-06, "loss": 0.376, "step": 7709 }, { "epoch": 1.9391348088531188, "grad_norm": 0.3000744581222534, "learning_rate": 3.3495419215586324e-06, "loss": 0.3304, "step": 7710 }, { "epoch": 1.9393863179074446, "grad_norm": 0.29905176162719727, "learning_rate": 3.3481607612672464e-06, "loss": 0.3643, "step": 7711 }, { "epoch": 1.9396378269617707, "grad_norm": 0.3088197410106659, "learning_rate": 3.346779742445556e-06, "loss": 0.3307, "step": 7712 }, { "epoch": 1.9398893360160967, "grad_norm": 0.2897097170352936, "learning_rate": 3.3453988652118398e-06, "loss": 0.3296, "step": 7713 }, { "epoch": 1.9401408450704225, "grad_norm": 0.31739160418510437, "learning_rate": 3.344018129684358e-06, "loss": 0.3387, "step": 7714 }, { "epoch": 1.9403923541247485, "grad_norm": 0.315039724111557, "learning_rate": 3.3426375359813655e-06, "loss": 0.3463, "step": 7715 }, { "epoch": 1.9406438631790746, "grad_norm": 0.28398001194000244, "learning_rate": 3.341257084221098e-06, "loss": 0.378, "step": 7716 }, { "epoch": 1.9408953722334004, "grad_norm": 0.3012690246105194, "learning_rate": 3.339876774521783e-06, "loss": 0.3328, "step": 7717 }, { "epoch": 1.9411468812877264, "grad_norm": 0.3159896731376648, "learning_rate": 3.33849660700164e-06, "loss": 0.3328, "step": 7718 }, { "epoch": 1.9413983903420524, "grad_norm": 0.3031710088253021, "learning_rate": 3.3371165817788655e-06, "loss": 0.336, "step": 7719 }, { "epoch": 1.9416498993963782, "grad_norm": 0.3224204182624817, "learning_rate": 3.3357366989716544e-06, "loss": 0.3553, "step": 7720 }, { "epoch": 1.9419014084507042, "grad_norm": 0.31610408425331116, "learning_rate": 3.3343569586981823e-06, "loss": 0.307, "step": 7721 }, { "epoch": 1.9421529175050303, "grad_norm": 0.32398226857185364, "learning_rate": 3.332977361076618e-06, "loss": 0.375, "step": 7722 }, { "epoch": 1.942404426559356, "grad_norm": 0.3373188078403473, "learning_rate": 3.331597906225112e-06, "loss": 0.3535, "step": 7723 }, { "epoch": 1.942655935613682, "grad_norm": 0.3140803575515747, "learning_rate": 3.330218594261809e-06, "loss": 0.3512, "step": 7724 }, { "epoch": 1.9429074446680081, "grad_norm": 0.29968270659446716, "learning_rate": 3.3288394253048365e-06, "loss": 0.3476, "step": 7725 }, { "epoch": 1.943158953722334, "grad_norm": 0.29010340571403503, "learning_rate": 3.3274603994723144e-06, "loss": 0.3188, "step": 7726 }, { "epoch": 1.94341046277666, "grad_norm": 0.3180834949016571, "learning_rate": 3.3260815168823433e-06, "loss": 0.3317, "step": 7727 }, { "epoch": 1.943661971830986, "grad_norm": 0.3009601831436157, "learning_rate": 3.3247027776530183e-06, "loss": 0.3546, "step": 7728 }, { "epoch": 1.9439134808853118, "grad_norm": 0.29234665632247925, "learning_rate": 3.323324181902422e-06, "loss": 0.3357, "step": 7729 }, { "epoch": 1.9441649899396378, "grad_norm": 0.2999419867992401, "learning_rate": 3.321945729748618e-06, "loss": 0.3627, "step": 7730 }, { "epoch": 1.9444164989939638, "grad_norm": 0.2987615466117859, "learning_rate": 3.3205674213096662e-06, "loss": 0.3133, "step": 7731 }, { "epoch": 1.9446680080482897, "grad_norm": 0.30129966139793396, "learning_rate": 3.3191892567036065e-06, "loss": 0.3278, "step": 7732 }, { "epoch": 1.9449195171026157, "grad_norm": 0.2908097505569458, "learning_rate": 3.317811236048474e-06, "loss": 0.3454, "step": 7733 }, { "epoch": 1.9451710261569417, "grad_norm": 0.30645039677619934, "learning_rate": 3.316433359462283e-06, "loss": 0.336, "step": 7734 }, { "epoch": 1.9454225352112675, "grad_norm": 0.30733421444892883, "learning_rate": 3.315055627063045e-06, "loss": 0.3457, "step": 7735 }, { "epoch": 1.9456740442655935, "grad_norm": 0.3086591362953186, "learning_rate": 3.313678038968749e-06, "loss": 0.3393, "step": 7736 }, { "epoch": 1.9459255533199196, "grad_norm": 0.3118712902069092, "learning_rate": 3.312300595297382e-06, "loss": 0.3467, "step": 7737 }, { "epoch": 1.9461770623742454, "grad_norm": 0.29625868797302246, "learning_rate": 3.310923296166908e-06, "loss": 0.3222, "step": 7738 }, { "epoch": 1.9464285714285714, "grad_norm": 0.3383052349090576, "learning_rate": 3.309546141695287e-06, "loss": 0.3333, "step": 7739 }, { "epoch": 1.9466800804828974, "grad_norm": 0.29828548431396484, "learning_rate": 3.308169132000466e-06, "loss": 0.3462, "step": 7740 }, { "epoch": 1.9469315895372232, "grad_norm": 0.31392189860343933, "learning_rate": 3.3067922672003727e-06, "loss": 0.3538, "step": 7741 }, { "epoch": 1.9471830985915493, "grad_norm": 0.28369051218032837, "learning_rate": 3.3054155474129306e-06, "loss": 0.3093, "step": 7742 }, { "epoch": 1.9474346076458753, "grad_norm": 0.3025501072406769, "learning_rate": 3.304038972756044e-06, "loss": 0.3583, "step": 7743 }, { "epoch": 1.947686116700201, "grad_norm": 0.3110698461532593, "learning_rate": 3.3026625433476112e-06, "loss": 0.3538, "step": 7744 }, { "epoch": 1.9479376257545271, "grad_norm": 0.3204960823059082, "learning_rate": 3.301286259305511e-06, "loss": 0.3606, "step": 7745 }, { "epoch": 1.9481891348088531, "grad_norm": 0.28646546602249146, "learning_rate": 3.299910120747618e-06, "loss": 0.3416, "step": 7746 }, { "epoch": 1.948440643863179, "grad_norm": 0.31837978959083557, "learning_rate": 3.298534127791785e-06, "loss": 0.3218, "step": 7747 }, { "epoch": 1.9486921529175052, "grad_norm": 0.2954898476600647, "learning_rate": 3.2971582805558622e-06, "loss": 0.37, "step": 7748 }, { "epoch": 1.948943661971831, "grad_norm": 0.3241606652736664, "learning_rate": 3.295782579157677e-06, "loss": 0.3637, "step": 7749 }, { "epoch": 1.9491951710261568, "grad_norm": 0.3172752559185028, "learning_rate": 3.294407023715053e-06, "loss": 0.3458, "step": 7750 }, { "epoch": 1.949446680080483, "grad_norm": 0.3054000735282898, "learning_rate": 3.2930316143457984e-06, "loss": 0.3666, "step": 7751 }, { "epoch": 1.9496981891348089, "grad_norm": 0.27285468578338623, "learning_rate": 3.2916563511677057e-06, "loss": 0.3397, "step": 7752 }, { "epoch": 1.9499496981891347, "grad_norm": 0.32400456070899963, "learning_rate": 3.2902812342985613e-06, "loss": 0.3159, "step": 7753 }, { "epoch": 1.950201207243461, "grad_norm": 0.30845338106155396, "learning_rate": 3.2889062638561313e-06, "loss": 0.3652, "step": 7754 }, { "epoch": 1.9504527162977867, "grad_norm": 0.2901265621185303, "learning_rate": 3.287531439958177e-06, "loss": 0.3283, "step": 7755 }, { "epoch": 1.9507042253521125, "grad_norm": 0.32865509390830994, "learning_rate": 3.28615676272244e-06, "loss": 0.3355, "step": 7756 }, { "epoch": 1.9509557344064388, "grad_norm": 0.28665393590927124, "learning_rate": 3.2847822322666564e-06, "loss": 0.3413, "step": 7757 }, { "epoch": 1.9512072434607646, "grad_norm": 0.3147144019603729, "learning_rate": 3.283407848708542e-06, "loss": 0.3519, "step": 7758 }, { "epoch": 1.9514587525150904, "grad_norm": 0.32015910744667053, "learning_rate": 3.2820336121658084e-06, "loss": 0.3478, "step": 7759 }, { "epoch": 1.9517102615694166, "grad_norm": 0.29439136385917664, "learning_rate": 3.2806595227561464e-06, "loss": 0.3274, "step": 7760 }, { "epoch": 1.9519617706237424, "grad_norm": 0.303396075963974, "learning_rate": 3.279285580597241e-06, "loss": 0.3506, "step": 7761 }, { "epoch": 1.9522132796780685, "grad_norm": 0.2776741087436676, "learning_rate": 3.277911785806761e-06, "loss": 0.344, "step": 7762 }, { "epoch": 1.9524647887323945, "grad_norm": 0.3228535056114197, "learning_rate": 3.2765381385023638e-06, "loss": 0.3522, "step": 7763 }, { "epoch": 1.9527162977867203, "grad_norm": 0.3100894093513489, "learning_rate": 3.2751646388016924e-06, "loss": 0.3582, "step": 7764 }, { "epoch": 1.9529678068410463, "grad_norm": 0.3215596079826355, "learning_rate": 3.27379128682238e-06, "loss": 0.3395, "step": 7765 }, { "epoch": 1.9532193158953723, "grad_norm": 0.30445247888565063, "learning_rate": 3.2724180826820436e-06, "loss": 0.3348, "step": 7766 }, { "epoch": 1.9534708249496981, "grad_norm": 0.29653722047805786, "learning_rate": 3.2710450264982906e-06, "loss": 0.3616, "step": 7767 }, { "epoch": 1.9537223340040242, "grad_norm": 0.31945815682411194, "learning_rate": 3.269672118388716e-06, "loss": 0.3714, "step": 7768 }, { "epoch": 1.9539738430583502, "grad_norm": 0.3002147972583771, "learning_rate": 3.2682993584708988e-06, "loss": 0.3323, "step": 7769 }, { "epoch": 1.954225352112676, "grad_norm": 0.3391788601875305, "learning_rate": 3.2669267468624077e-06, "loss": 0.3334, "step": 7770 }, { "epoch": 1.954476861167002, "grad_norm": 0.28778892755508423, "learning_rate": 3.2655542836807998e-06, "loss": 0.3374, "step": 7771 }, { "epoch": 1.954728370221328, "grad_norm": 0.3348093032836914, "learning_rate": 3.264181969043615e-06, "loss": 0.3448, "step": 7772 }, { "epoch": 1.9549798792756539, "grad_norm": 0.3041500449180603, "learning_rate": 3.2628098030683873e-06, "loss": 0.3534, "step": 7773 }, { "epoch": 1.95523138832998, "grad_norm": 0.32908380031585693, "learning_rate": 3.26143778587263e-06, "loss": 0.3498, "step": 7774 }, { "epoch": 1.955482897384306, "grad_norm": 0.33852681517601013, "learning_rate": 3.2600659175738524e-06, "loss": 0.3642, "step": 7775 }, { "epoch": 1.9557344064386317, "grad_norm": 0.3201028108596802, "learning_rate": 3.2586941982895414e-06, "loss": 0.3265, "step": 7776 }, { "epoch": 1.9559859154929577, "grad_norm": 0.2781035602092743, "learning_rate": 3.2573226281371817e-06, "loss": 0.336, "step": 7777 }, { "epoch": 1.9562374245472838, "grad_norm": 0.30619314312934875, "learning_rate": 3.2559512072342342e-06, "loss": 0.3426, "step": 7778 }, { "epoch": 1.9564889336016096, "grad_norm": 0.31019851565361023, "learning_rate": 3.2545799356981566e-06, "loss": 0.3383, "step": 7779 }, { "epoch": 1.9567404426559356, "grad_norm": 0.32283902168273926, "learning_rate": 3.2532088136463867e-06, "loss": 0.3363, "step": 7780 }, { "epoch": 1.9569919517102616, "grad_norm": 0.2992129325866699, "learning_rate": 3.2518378411963565e-06, "loss": 0.328, "step": 7781 }, { "epoch": 1.9572434607645874, "grad_norm": 0.2824527323246002, "learning_rate": 3.2504670184654764e-06, "loss": 0.3299, "step": 7782 }, { "epoch": 1.9574949698189135, "grad_norm": 0.3210359811782837, "learning_rate": 3.2490963455711506e-06, "loss": 0.3574, "step": 7783 }, { "epoch": 1.9577464788732395, "grad_norm": 0.3022248446941376, "learning_rate": 3.2477258226307716e-06, "loss": 0.3502, "step": 7784 }, { "epoch": 1.9579979879275653, "grad_norm": 0.292578786611557, "learning_rate": 3.2463554497617113e-06, "loss": 0.3535, "step": 7785 }, { "epoch": 1.9582494969818913, "grad_norm": 0.2895413935184479, "learning_rate": 3.2449852270813386e-06, "loss": 0.3292, "step": 7786 }, { "epoch": 1.9585010060362174, "grad_norm": 0.3182920515537262, "learning_rate": 3.243615154706999e-06, "loss": 0.336, "step": 7787 }, { "epoch": 1.9587525150905432, "grad_norm": 0.3026027977466583, "learning_rate": 3.242245232756036e-06, "loss": 0.345, "step": 7788 }, { "epoch": 1.9590040241448692, "grad_norm": 0.28621864318847656, "learning_rate": 3.2408754613457703e-06, "loss": 0.3604, "step": 7789 }, { "epoch": 1.9592555331991952, "grad_norm": 0.2878339886665344, "learning_rate": 3.2395058405935186e-06, "loss": 0.3392, "step": 7790 }, { "epoch": 1.959507042253521, "grad_norm": 0.3039291203022003, "learning_rate": 3.238136370616576e-06, "loss": 0.3317, "step": 7791 }, { "epoch": 1.959758551307847, "grad_norm": 0.30178239941596985, "learning_rate": 3.2367670515322324e-06, "loss": 0.3338, "step": 7792 }, { "epoch": 1.960010060362173, "grad_norm": 0.2928571403026581, "learning_rate": 3.2353978834577587e-06, "loss": 0.3344, "step": 7793 }, { "epoch": 1.9602615694164989, "grad_norm": 0.3058040142059326, "learning_rate": 3.2340288665104167e-06, "loss": 0.3786, "step": 7794 }, { "epoch": 1.960513078470825, "grad_norm": 0.3003310263156891, "learning_rate": 3.232660000807457e-06, "loss": 0.351, "step": 7795 }, { "epoch": 1.960764587525151, "grad_norm": 0.30328527092933655, "learning_rate": 3.231291286466109e-06, "loss": 0.3547, "step": 7796 }, { "epoch": 1.9610160965794767, "grad_norm": 0.2882510721683502, "learning_rate": 3.2299227236035996e-06, "loss": 0.3301, "step": 7797 }, { "epoch": 1.961267605633803, "grad_norm": 0.3310767114162445, "learning_rate": 3.2285543123371333e-06, "loss": 0.3515, "step": 7798 }, { "epoch": 1.9615191146881288, "grad_norm": 0.31823989748954773, "learning_rate": 3.2271860527839104e-06, "loss": 0.3701, "step": 7799 }, { "epoch": 1.9617706237424546, "grad_norm": 0.28764277696609497, "learning_rate": 3.2258179450611086e-06, "loss": 0.3319, "step": 7800 }, { "epoch": 1.9620221327967808, "grad_norm": 0.26832863688468933, "learning_rate": 3.2244499892859032e-06, "loss": 0.3329, "step": 7801 }, { "epoch": 1.9622736418511066, "grad_norm": 0.29785555601119995, "learning_rate": 3.2230821855754464e-06, "loss": 0.3667, "step": 7802 }, { "epoch": 1.9625251509054324, "grad_norm": 0.3114742934703827, "learning_rate": 3.221714534046886e-06, "loss": 0.3327, "step": 7803 }, { "epoch": 1.9627766599597587, "grad_norm": 0.32278159260749817, "learning_rate": 3.2203470348173483e-06, "loss": 0.3631, "step": 7804 }, { "epoch": 1.9630281690140845, "grad_norm": 0.283717542886734, "learning_rate": 3.2189796880039535e-06, "loss": 0.3244, "step": 7805 }, { "epoch": 1.9632796780684103, "grad_norm": 0.3093319833278656, "learning_rate": 3.2176124937238094e-06, "loss": 0.3288, "step": 7806 }, { "epoch": 1.9635311871227366, "grad_norm": 0.295280396938324, "learning_rate": 3.2162454520940024e-06, "loss": 0.3391, "step": 7807 }, { "epoch": 1.9637826961770624, "grad_norm": 0.2879166007041931, "learning_rate": 3.214878563231615e-06, "loss": 0.3515, "step": 7808 }, { "epoch": 1.9640342052313882, "grad_norm": 0.32011473178863525, "learning_rate": 3.2135118272537093e-06, "loss": 0.3262, "step": 7809 }, { "epoch": 1.9642857142857144, "grad_norm": 0.305389940738678, "learning_rate": 3.2121452442773405e-06, "loss": 0.3509, "step": 7810 }, { "epoch": 1.9645372233400402, "grad_norm": 0.3013128936290741, "learning_rate": 3.210778814419545e-06, "loss": 0.3232, "step": 7811 }, { "epoch": 1.9647887323943662, "grad_norm": 0.28125154972076416, "learning_rate": 3.2094125377973534e-06, "loss": 0.3342, "step": 7812 }, { "epoch": 1.9650402414486923, "grad_norm": 0.30323272943496704, "learning_rate": 3.2080464145277736e-06, "loss": 0.3375, "step": 7813 }, { "epoch": 1.965291750503018, "grad_norm": 0.29133641719818115, "learning_rate": 3.20668044472781e-06, "loss": 0.3335, "step": 7814 }, { "epoch": 1.965543259557344, "grad_norm": 0.30253052711486816, "learning_rate": 3.2053146285144456e-06, "loss": 0.3318, "step": 7815 }, { "epoch": 1.9657947686116701, "grad_norm": 0.2923412024974823, "learning_rate": 3.2039489660046565e-06, "loss": 0.33, "step": 7816 }, { "epoch": 1.966046277665996, "grad_norm": 0.30035942792892456, "learning_rate": 3.2025834573154025e-06, "loss": 0.3195, "step": 7817 }, { "epoch": 1.966297786720322, "grad_norm": 0.30417102575302124, "learning_rate": 3.2012181025636303e-06, "loss": 0.3386, "step": 7818 }, { "epoch": 1.966549295774648, "grad_norm": 0.288310170173645, "learning_rate": 3.1998529018662748e-06, "loss": 0.3736, "step": 7819 }, { "epoch": 1.9668008048289738, "grad_norm": 0.2810823321342468, "learning_rate": 3.1984878553402566e-06, "loss": 0.322, "step": 7820 }, { "epoch": 1.9670523138832998, "grad_norm": 0.28224313259124756, "learning_rate": 3.1971229631024836e-06, "loss": 0.3359, "step": 7821 }, { "epoch": 1.9673038229376258, "grad_norm": 0.3008633553981781, "learning_rate": 3.19575822526985e-06, "loss": 0.3528, "step": 7822 }, { "epoch": 1.9675553319919517, "grad_norm": 0.3109699487686157, "learning_rate": 3.194393641959237e-06, "loss": 0.335, "step": 7823 }, { "epoch": 1.9678068410462777, "grad_norm": 0.2929900288581848, "learning_rate": 3.193029213287513e-06, "loss": 0.3471, "step": 7824 }, { "epoch": 1.9680583501006037, "grad_norm": 0.3091834485530853, "learning_rate": 3.1916649393715314e-06, "loss": 0.3395, "step": 7825 }, { "epoch": 1.9683098591549295, "grad_norm": 0.2836105525493622, "learning_rate": 3.190300820328135e-06, "loss": 0.3434, "step": 7826 }, { "epoch": 1.9685613682092555, "grad_norm": 0.331920325756073, "learning_rate": 3.1889368562741527e-06, "loss": 0.3478, "step": 7827 }, { "epoch": 1.9688128772635816, "grad_norm": 0.34226587414741516, "learning_rate": 3.187573047326398e-06, "loss": 0.3698, "step": 7828 }, { "epoch": 1.9690643863179074, "grad_norm": 0.31301939487457275, "learning_rate": 3.186209393601674e-06, "loss": 0.3542, "step": 7829 }, { "epoch": 1.9693158953722334, "grad_norm": 0.31900009512901306, "learning_rate": 3.184845895216768e-06, "loss": 0.3532, "step": 7830 }, { "epoch": 1.9695674044265594, "grad_norm": 0.29443228244781494, "learning_rate": 3.183482552288456e-06, "loss": 0.3383, "step": 7831 }, { "epoch": 1.9698189134808852, "grad_norm": 0.31553661823272705, "learning_rate": 3.1821193649334993e-06, "loss": 0.3484, "step": 7832 }, { "epoch": 1.9700704225352113, "grad_norm": 0.2916860580444336, "learning_rate": 3.180756333268646e-06, "loss": 0.3332, "step": 7833 }, { "epoch": 1.9703219315895373, "grad_norm": 0.3077201247215271, "learning_rate": 3.1793934574106317e-06, "loss": 0.3442, "step": 7834 }, { "epoch": 1.970573440643863, "grad_norm": 0.32148656249046326, "learning_rate": 3.1780307374761777e-06, "loss": 0.3529, "step": 7835 }, { "epoch": 1.970824949698189, "grad_norm": 0.30224621295928955, "learning_rate": 3.1766681735819926e-06, "loss": 0.3389, "step": 7836 }, { "epoch": 1.9710764587525151, "grad_norm": 0.3319030702114105, "learning_rate": 3.1753057658447726e-06, "loss": 0.3552, "step": 7837 }, { "epoch": 1.971327967806841, "grad_norm": 0.28931450843811035, "learning_rate": 3.173943514381198e-06, "loss": 0.326, "step": 7838 }, { "epoch": 1.971579476861167, "grad_norm": 0.3092653453350067, "learning_rate": 3.1725814193079384e-06, "loss": 0.3511, "step": 7839 }, { "epoch": 1.971830985915493, "grad_norm": 0.2999701499938965, "learning_rate": 3.171219480741646e-06, "loss": 0.3566, "step": 7840 }, { "epoch": 1.9720824949698188, "grad_norm": 0.29582321643829346, "learning_rate": 3.1698576987989672e-06, "loss": 0.3297, "step": 7841 }, { "epoch": 1.9723340040241448, "grad_norm": 0.288314551115036, "learning_rate": 3.168496073596524e-06, "loss": 0.3499, "step": 7842 }, { "epoch": 1.9725855130784709, "grad_norm": 0.292122483253479, "learning_rate": 3.167134605250938e-06, "loss": 0.3154, "step": 7843 }, { "epoch": 1.9728370221327967, "grad_norm": 0.3173863887786865, "learning_rate": 3.1657732938788033e-06, "loss": 0.3427, "step": 7844 }, { "epoch": 1.9730885311871227, "grad_norm": 0.3070237934589386, "learning_rate": 3.164412139596713e-06, "loss": 0.3399, "step": 7845 }, { "epoch": 1.9733400402414487, "grad_norm": 0.30909422039985657, "learning_rate": 3.163051142521238e-06, "loss": 0.3436, "step": 7846 }, { "epoch": 1.9735915492957745, "grad_norm": 0.3138062059879303, "learning_rate": 3.1616903027689407e-06, "loss": 0.3271, "step": 7847 }, { "epoch": 1.9738430583501008, "grad_norm": 0.30243563652038574, "learning_rate": 3.1603296204563707e-06, "loss": 0.3389, "step": 7848 }, { "epoch": 1.9740945674044266, "grad_norm": 0.31402406096458435, "learning_rate": 3.158969095700057e-06, "loss": 0.3529, "step": 7849 }, { "epoch": 1.9743460764587524, "grad_norm": 0.32227790355682373, "learning_rate": 3.157608728616525e-06, "loss": 0.3545, "step": 7850 }, { "epoch": 1.9745975855130786, "grad_norm": 0.301570862531662, "learning_rate": 3.156248519322278e-06, "loss": 0.337, "step": 7851 }, { "epoch": 1.9748490945674044, "grad_norm": 0.27347323298454285, "learning_rate": 3.154888467933812e-06, "loss": 0.328, "step": 7852 }, { "epoch": 1.9751006036217302, "grad_norm": 0.2885780930519104, "learning_rate": 3.153528574567605e-06, "loss": 0.3225, "step": 7853 }, { "epoch": 1.9753521126760565, "grad_norm": 0.30733510851860046, "learning_rate": 3.152168839340125e-06, "loss": 0.348, "step": 7854 }, { "epoch": 1.9756036217303823, "grad_norm": 0.30902665853500366, "learning_rate": 3.1508092623678223e-06, "loss": 0.3374, "step": 7855 }, { "epoch": 1.975855130784708, "grad_norm": 0.28816184401512146, "learning_rate": 3.14944984376714e-06, "loss": 0.3434, "step": 7856 }, { "epoch": 1.9761066398390343, "grad_norm": 0.315157026052475, "learning_rate": 3.1480905836544996e-06, "loss": 0.3565, "step": 7857 }, { "epoch": 1.9763581488933601, "grad_norm": 0.3467448651790619, "learning_rate": 3.1467314821463147e-06, "loss": 0.3529, "step": 7858 }, { "epoch": 1.9766096579476862, "grad_norm": 0.3209885358810425, "learning_rate": 3.145372539358987e-06, "loss": 0.34, "step": 7859 }, { "epoch": 1.9768611670020122, "grad_norm": 0.3004145920276642, "learning_rate": 3.1440137554088957e-06, "loss": 0.3567, "step": 7860 }, { "epoch": 1.977112676056338, "grad_norm": 0.31923210620880127, "learning_rate": 3.1426551304124187e-06, "loss": 0.3249, "step": 7861 }, { "epoch": 1.977364185110664, "grad_norm": 0.2933943569660187, "learning_rate": 3.1412966644859073e-06, "loss": 0.3495, "step": 7862 }, { "epoch": 1.97761569416499, "grad_norm": 0.31692373752593994, "learning_rate": 3.139938357745711e-06, "loss": 0.3705, "step": 7863 }, { "epoch": 1.9778672032193159, "grad_norm": 0.30593782663345337, "learning_rate": 3.138580210308155e-06, "loss": 0.3254, "step": 7864 }, { "epoch": 1.9781187122736419, "grad_norm": 0.34505024552345276, "learning_rate": 3.137222222289562e-06, "loss": 0.3704, "step": 7865 }, { "epoch": 1.978370221327968, "grad_norm": 0.322317898273468, "learning_rate": 3.1358643938062295e-06, "loss": 0.3434, "step": 7866 }, { "epoch": 1.9786217303822937, "grad_norm": 0.2983636260032654, "learning_rate": 3.134506724974452e-06, "loss": 0.3634, "step": 7867 }, { "epoch": 1.9788732394366197, "grad_norm": 0.2886245846748352, "learning_rate": 3.1331492159105007e-06, "loss": 0.3471, "step": 7868 }, { "epoch": 1.9791247484909458, "grad_norm": 0.2910291254520416, "learning_rate": 3.1317918667306406e-06, "loss": 0.3388, "step": 7869 }, { "epoch": 1.9793762575452716, "grad_norm": 0.32111358642578125, "learning_rate": 3.130434677551122e-06, "loss": 0.3532, "step": 7870 }, { "epoch": 1.9796277665995976, "grad_norm": 0.3142852783203125, "learning_rate": 3.129077648488174e-06, "loss": 0.3354, "step": 7871 }, { "epoch": 1.9798792756539236, "grad_norm": 0.29093170166015625, "learning_rate": 3.1277207796580237e-06, "loss": 0.3475, "step": 7872 }, { "epoch": 1.9801307847082494, "grad_norm": 0.3069877624511719, "learning_rate": 3.126364071176874e-06, "loss": 0.3158, "step": 7873 }, { "epoch": 1.9803822937625755, "grad_norm": 0.3028368353843689, "learning_rate": 3.125007523160921e-06, "loss": 0.3464, "step": 7874 }, { "epoch": 1.9806338028169015, "grad_norm": 0.31962573528289795, "learning_rate": 3.123651135726343e-06, "loss": 0.3508, "step": 7875 }, { "epoch": 1.9808853118712273, "grad_norm": 0.28765198588371277, "learning_rate": 3.1222949089893085e-06, "loss": 0.3446, "step": 7876 }, { "epoch": 1.9811368209255533, "grad_norm": 0.30417191982269287, "learning_rate": 3.120938843065966e-06, "loss": 0.3452, "step": 7877 }, { "epoch": 1.9813883299798793, "grad_norm": 0.2888725996017456, "learning_rate": 3.1195829380724585e-06, "loss": 0.348, "step": 7878 }, { "epoch": 1.9816398390342052, "grad_norm": 0.3087455630302429, "learning_rate": 3.1182271941249054e-06, "loss": 0.361, "step": 7879 }, { "epoch": 1.9818913480885312, "grad_norm": 0.2925972044467926, "learning_rate": 3.1168716113394224e-06, "loss": 0.3458, "step": 7880 }, { "epoch": 1.9821428571428572, "grad_norm": 0.3094989061355591, "learning_rate": 3.1155161898321064e-06, "loss": 0.3298, "step": 7881 }, { "epoch": 1.982394366197183, "grad_norm": 0.29126372933387756, "learning_rate": 3.114160929719038e-06, "loss": 0.3506, "step": 7882 }, { "epoch": 1.982645875251509, "grad_norm": 0.29920557141304016, "learning_rate": 3.1128058311162885e-06, "loss": 0.3524, "step": 7883 }, { "epoch": 1.982897384305835, "grad_norm": 0.3097069263458252, "learning_rate": 3.1114508941399135e-06, "loss": 0.3685, "step": 7884 }, { "epoch": 1.9831488933601609, "grad_norm": 0.28832173347473145, "learning_rate": 3.110096118905954e-06, "loss": 0.3692, "step": 7885 }, { "epoch": 1.983400402414487, "grad_norm": 0.30503520369529724, "learning_rate": 3.1087415055304392e-06, "loss": 0.3686, "step": 7886 }, { "epoch": 1.983651911468813, "grad_norm": 0.2966339886188507, "learning_rate": 3.1073870541293834e-06, "loss": 0.3295, "step": 7887 }, { "epoch": 1.9839034205231387, "grad_norm": 0.30514541268348694, "learning_rate": 3.1060327648187855e-06, "loss": 0.3639, "step": 7888 }, { "epoch": 1.9841549295774648, "grad_norm": 0.3116115629673004, "learning_rate": 3.1046786377146332e-06, "loss": 0.3457, "step": 7889 }, { "epoch": 1.9844064386317908, "grad_norm": 0.31909826397895813, "learning_rate": 3.103324672932898e-06, "loss": 0.3306, "step": 7890 }, { "epoch": 1.9846579476861166, "grad_norm": 0.3044300973415375, "learning_rate": 3.101970870589538e-06, "loss": 0.3527, "step": 7891 }, { "epoch": 1.9849094567404426, "grad_norm": 0.29158785939216614, "learning_rate": 3.1006172308005012e-06, "loss": 0.3387, "step": 7892 }, { "epoch": 1.9851609657947686, "grad_norm": 0.308321475982666, "learning_rate": 3.099263753681714e-06, "loss": 0.3492, "step": 7893 }, { "epoch": 1.9854124748490944, "grad_norm": 0.2938082218170166, "learning_rate": 3.0979104393490965e-06, "loss": 0.3443, "step": 7894 }, { "epoch": 1.9856639839034205, "grad_norm": 0.30831268429756165, "learning_rate": 3.0965572879185495e-06, "loss": 0.354, "step": 7895 }, { "epoch": 1.9859154929577465, "grad_norm": 0.32580068707466125, "learning_rate": 3.095204299505965e-06, "loss": 0.3326, "step": 7896 }, { "epoch": 1.9861670020120723, "grad_norm": 0.2983664572238922, "learning_rate": 3.093851474227213e-06, "loss": 0.3367, "step": 7897 }, { "epoch": 1.9864185110663986, "grad_norm": 0.2757643759250641, "learning_rate": 3.0924988121981604e-06, "loss": 0.3575, "step": 7898 }, { "epoch": 1.9866700201207244, "grad_norm": 0.2955786883831024, "learning_rate": 3.0911463135346486e-06, "loss": 0.3523, "step": 7899 }, { "epoch": 1.9869215291750502, "grad_norm": 0.285170316696167, "learning_rate": 3.0897939783525156e-06, "loss": 0.3152, "step": 7900 }, { "epoch": 1.9871730382293764, "grad_norm": 0.28338173031806946, "learning_rate": 3.0884418067675755e-06, "loss": 0.3367, "step": 7901 }, { "epoch": 1.9874245472837022, "grad_norm": 0.3066754937171936, "learning_rate": 3.0870897988956362e-06, "loss": 0.3347, "step": 7902 }, { "epoch": 1.987676056338028, "grad_norm": 0.30550864338874817, "learning_rate": 3.0857379548524914e-06, "loss": 0.3358, "step": 7903 }, { "epoch": 1.9879275653923543, "grad_norm": 0.32733213901519775, "learning_rate": 3.0843862747539123e-06, "loss": 0.3602, "step": 7904 }, { "epoch": 1.98817907444668, "grad_norm": 0.2825915813446045, "learning_rate": 3.0830347587156667e-06, "loss": 0.3433, "step": 7905 }, { "epoch": 1.9884305835010059, "grad_norm": 0.3268314003944397, "learning_rate": 3.0816834068534994e-06, "loss": 0.3502, "step": 7906 }, { "epoch": 1.9886820925553321, "grad_norm": 0.3060625195503235, "learning_rate": 3.0803322192831496e-06, "loss": 0.3397, "step": 7907 }, { "epoch": 1.988933601609658, "grad_norm": 0.28180480003356934, "learning_rate": 3.0789811961203342e-06, "loss": 0.3592, "step": 7908 }, { "epoch": 1.989185110663984, "grad_norm": 0.28377094864845276, "learning_rate": 3.077630337480764e-06, "loss": 0.3577, "step": 7909 }, { "epoch": 1.98943661971831, "grad_norm": 0.28484049439430237, "learning_rate": 3.076279643480126e-06, "loss": 0.3419, "step": 7910 }, { "epoch": 1.9896881287726358, "grad_norm": 0.3092370927333832, "learning_rate": 3.0749291142341037e-06, "loss": 0.3364, "step": 7911 }, { "epoch": 1.9899396378269618, "grad_norm": 0.29064294695854187, "learning_rate": 3.073578749858358e-06, "loss": 0.3469, "step": 7912 }, { "epoch": 1.9901911468812878, "grad_norm": 0.2736586630344391, "learning_rate": 3.0722285504685405e-06, "loss": 0.3434, "step": 7913 }, { "epoch": 1.9904426559356136, "grad_norm": 0.3202298581600189, "learning_rate": 3.0708785161802902e-06, "loss": 0.3541, "step": 7914 }, { "epoch": 1.9906941649899397, "grad_norm": 0.2941378355026245, "learning_rate": 3.0695286471092235e-06, "loss": 0.3528, "step": 7915 }, { "epoch": 1.9909456740442657, "grad_norm": 0.28272390365600586, "learning_rate": 3.0681789433709535e-06, "loss": 0.3324, "step": 7916 }, { "epoch": 1.9911971830985915, "grad_norm": 0.3015698492527008, "learning_rate": 3.06682940508107e-06, "loss": 0.3348, "step": 7917 }, { "epoch": 1.9914486921529175, "grad_norm": 0.3570209741592407, "learning_rate": 3.065480032355156e-06, "loss": 0.3712, "step": 7918 }, { "epoch": 1.9917002012072436, "grad_norm": 0.29426610469818115, "learning_rate": 3.0641308253087722e-06, "loss": 0.3465, "step": 7919 }, { "epoch": 1.9919517102615694, "grad_norm": 0.3420696258544922, "learning_rate": 3.0627817840574747e-06, "loss": 0.3412, "step": 7920 }, { "epoch": 1.9922032193158954, "grad_norm": 0.3334300220012665, "learning_rate": 3.061432908716797e-06, "loss": 0.3811, "step": 7921 }, { "epoch": 1.9924547283702214, "grad_norm": 0.3178221881389618, "learning_rate": 3.0600841994022645e-06, "loss": 0.3462, "step": 7922 }, { "epoch": 1.9927062374245472, "grad_norm": 0.2942996919155121, "learning_rate": 3.058735656229382e-06, "loss": 0.3455, "step": 7923 }, { "epoch": 1.9929577464788732, "grad_norm": 0.3083072006702423, "learning_rate": 3.057387279313646e-06, "loss": 0.353, "step": 7924 }, { "epoch": 1.9932092555331993, "grad_norm": 0.30187562108039856, "learning_rate": 3.056039068770539e-06, "loss": 0.3473, "step": 7925 }, { "epoch": 1.993460764587525, "grad_norm": 0.3180212080478668, "learning_rate": 3.0546910247155224e-06, "loss": 0.34, "step": 7926 }, { "epoch": 1.993712273641851, "grad_norm": 0.3074866235256195, "learning_rate": 3.053343147264052e-06, "loss": 0.3273, "step": 7927 }, { "epoch": 1.9939637826961771, "grad_norm": 0.3176601231098175, "learning_rate": 3.0519954365315595e-06, "loss": 0.3411, "step": 7928 }, { "epoch": 1.994215291750503, "grad_norm": 0.31003740429878235, "learning_rate": 3.050647892633474e-06, "loss": 0.3533, "step": 7929 }, { "epoch": 1.994466800804829, "grad_norm": 0.2907339632511139, "learning_rate": 3.0493005156851997e-06, "loss": 0.3365, "step": 7930 }, { "epoch": 1.994718309859155, "grad_norm": 0.31127139925956726, "learning_rate": 3.0479533058021345e-06, "loss": 0.3386, "step": 7931 }, { "epoch": 1.9949698189134808, "grad_norm": 0.30205103754997253, "learning_rate": 3.046606263099654e-06, "loss": 0.3306, "step": 7932 }, { "epoch": 1.9952213279678068, "grad_norm": 0.30589067935943604, "learning_rate": 3.0452593876931296e-06, "loss": 0.378, "step": 7933 }, { "epoch": 1.9954728370221329, "grad_norm": 0.3329513669013977, "learning_rate": 3.0439126796979074e-06, "loss": 0.3734, "step": 7934 }, { "epoch": 1.9957243460764587, "grad_norm": 0.2932153046131134, "learning_rate": 3.042566139229327e-06, "loss": 0.3653, "step": 7935 }, { "epoch": 1.9959758551307847, "grad_norm": 0.2880392074584961, "learning_rate": 3.041219766402713e-06, "loss": 0.3714, "step": 7936 }, { "epoch": 1.9962273641851107, "grad_norm": 0.31852367520332336, "learning_rate": 3.03987356133337e-06, "loss": 0.3305, "step": 7937 }, { "epoch": 1.9964788732394365, "grad_norm": 0.3372204899787903, "learning_rate": 3.0385275241365965e-06, "loss": 0.334, "step": 7938 }, { "epoch": 1.9967303822937625, "grad_norm": 0.3119423985481262, "learning_rate": 3.0371816549276667e-06, "loss": 0.3488, "step": 7939 }, { "epoch": 1.9969818913480886, "grad_norm": 0.2920767366886139, "learning_rate": 3.035835953821851e-06, "loss": 0.3238, "step": 7940 }, { "epoch": 1.9972334004024144, "grad_norm": 0.3219856321811676, "learning_rate": 3.0344904209343962e-06, "loss": 0.3492, "step": 7941 }, { "epoch": 1.9974849094567404, "grad_norm": 0.2855657935142517, "learning_rate": 3.0331450563805433e-06, "loss": 0.3, "step": 7942 }, { "epoch": 1.9977364185110664, "grad_norm": 0.3001307547092438, "learning_rate": 3.0317998602755087e-06, "loss": 0.3469, "step": 7943 }, { "epoch": 1.9979879275653922, "grad_norm": 0.29662996530532837, "learning_rate": 3.0304548327345056e-06, "loss": 0.3425, "step": 7944 }, { "epoch": 1.9982394366197183, "grad_norm": 0.28861871361732483, "learning_rate": 3.0291099738727226e-06, "loss": 0.3553, "step": 7945 }, { "epoch": 1.9984909456740443, "grad_norm": 0.30145004391670227, "learning_rate": 3.0277652838053416e-06, "loss": 0.3693, "step": 7946 }, { "epoch": 1.99874245472837, "grad_norm": 0.32663676142692566, "learning_rate": 3.0264207626475254e-06, "loss": 0.343, "step": 7947 }, { "epoch": 1.9989939637826963, "grad_norm": 0.3272598385810852, "learning_rate": 3.025076410514425e-06, "loss": 0.3422, "step": 7948 }, { "epoch": 1.9992454728370221, "grad_norm": 0.3103751540184021, "learning_rate": 3.023732227521174e-06, "loss": 0.3515, "step": 7949 }, { "epoch": 1.999496981891348, "grad_norm": 0.297633558511734, "learning_rate": 3.0223882137828947e-06, "loss": 0.3398, "step": 7950 }, { "epoch": 1.9997484909456742, "grad_norm": 0.3038352131843567, "learning_rate": 3.021044369414693e-06, "loss": 0.3405, "step": 7951 }, { "epoch": 2.0, "grad_norm": 0.3005836606025696, "learning_rate": 3.0197006945316604e-06, "loss": 0.3303, "step": 7952 }, { "epoch": 2.000251509054326, "grad_norm": 0.3221258819103241, "learning_rate": 3.018357189248875e-06, "loss": 0.3005, "step": 7953 }, { "epoch": 2.000503018108652, "grad_norm": 0.31759047508239746, "learning_rate": 3.0170138536813984e-06, "loss": 0.3198, "step": 7954 }, { "epoch": 2.000754527162978, "grad_norm": 0.327646940946579, "learning_rate": 3.015670687944281e-06, "loss": 0.3294, "step": 7955 }, { "epoch": 2.0010060362173037, "grad_norm": 0.2987602949142456, "learning_rate": 3.014327692152554e-06, "loss": 0.3277, "step": 7956 }, { "epoch": 2.00125754527163, "grad_norm": 0.31388530135154724, "learning_rate": 3.012984866421238e-06, "loss": 0.333, "step": 7957 }, { "epoch": 2.0015090543259557, "grad_norm": 0.29405900835990906, "learning_rate": 3.0116422108653387e-06, "loss": 0.3272, "step": 7958 }, { "epoch": 2.0017605633802815, "grad_norm": 0.3055470287799835, "learning_rate": 3.0102997255998433e-06, "loss": 0.3186, "step": 7959 }, { "epoch": 2.0020120724346078, "grad_norm": 0.308438777923584, "learning_rate": 3.0089574107397306e-06, "loss": 0.3175, "step": 7960 }, { "epoch": 2.0022635814889336, "grad_norm": 0.3100303113460541, "learning_rate": 3.007615266399958e-06, "loss": 0.2978, "step": 7961 }, { "epoch": 2.0025150905432594, "grad_norm": 0.28166472911834717, "learning_rate": 3.006273292695475e-06, "loss": 0.3129, "step": 7962 }, { "epoch": 2.0027665995975856, "grad_norm": 0.2950986921787262, "learning_rate": 3.0049314897412106e-06, "loss": 0.3231, "step": 7963 }, { "epoch": 2.0030181086519114, "grad_norm": 0.3130662441253662, "learning_rate": 3.0035898576520844e-06, "loss": 0.3189, "step": 7964 }, { "epoch": 2.0032696177062372, "grad_norm": 0.3303288221359253, "learning_rate": 3.002248396542996e-06, "loss": 0.3069, "step": 7965 }, { "epoch": 2.0035211267605635, "grad_norm": 0.30254948139190674, "learning_rate": 3.000907106528836e-06, "loss": 0.3085, "step": 7966 }, { "epoch": 2.0037726358148893, "grad_norm": 0.3039596676826477, "learning_rate": 2.9995659877244736e-06, "loss": 0.3117, "step": 7967 }, { "epoch": 2.004024144869215, "grad_norm": 0.30226194858551025, "learning_rate": 2.9982250402447706e-06, "loss": 0.3352, "step": 7968 }, { "epoch": 2.0042756539235413, "grad_norm": 0.29361942410469055, "learning_rate": 2.9968842642045713e-06, "loss": 0.332, "step": 7969 }, { "epoch": 2.004527162977867, "grad_norm": 0.2824215888977051, "learning_rate": 2.9955436597187016e-06, "loss": 0.3018, "step": 7970 }, { "epoch": 2.004778672032193, "grad_norm": 0.3032889664173126, "learning_rate": 2.9942032269019792e-06, "loss": 0.3291, "step": 7971 }, { "epoch": 2.005030181086519, "grad_norm": 0.2901354432106018, "learning_rate": 2.9928629658692006e-06, "loss": 0.2918, "step": 7972 }, { "epoch": 2.005281690140845, "grad_norm": 0.3217187225818634, "learning_rate": 2.991522876735154e-06, "loss": 0.3387, "step": 7973 }, { "epoch": 2.005533199195171, "grad_norm": 0.326134592294693, "learning_rate": 2.9901829596146057e-06, "loss": 0.3327, "step": 7974 }, { "epoch": 2.005784708249497, "grad_norm": 0.30647000670433044, "learning_rate": 2.9888432146223167e-06, "loss": 0.3048, "step": 7975 }, { "epoch": 2.006036217303823, "grad_norm": 0.2938840389251709, "learning_rate": 2.9875036418730218e-06, "loss": 0.3367, "step": 7976 }, { "epoch": 2.006287726358149, "grad_norm": 0.3004414737224579, "learning_rate": 2.9861642414814502e-06, "loss": 0.3116, "step": 7977 }, { "epoch": 2.006539235412475, "grad_norm": 0.3037022650241852, "learning_rate": 2.984825013562315e-06, "loss": 0.3275, "step": 7978 }, { "epoch": 2.0067907444668007, "grad_norm": 0.3147777318954468, "learning_rate": 2.983485958230308e-06, "loss": 0.3348, "step": 7979 }, { "epoch": 2.007042253521127, "grad_norm": 0.30514058470726013, "learning_rate": 2.9821470756001148e-06, "loss": 0.3299, "step": 7980 }, { "epoch": 2.0072937625754528, "grad_norm": 0.33660414814949036, "learning_rate": 2.9808083657863994e-06, "loss": 0.3422, "step": 7981 }, { "epoch": 2.0075452716297786, "grad_norm": 0.29854559898376465, "learning_rate": 2.9794698289038183e-06, "loss": 0.319, "step": 7982 }, { "epoch": 2.007796780684105, "grad_norm": 0.31165847182273865, "learning_rate": 2.9781314650670033e-06, "loss": 0.3134, "step": 7983 }, { "epoch": 2.0080482897384306, "grad_norm": 0.3158718943595886, "learning_rate": 2.9767932743905813e-06, "loss": 0.3041, "step": 7984 }, { "epoch": 2.0082997987927564, "grad_norm": 0.3166303336620331, "learning_rate": 2.9754552569891566e-06, "loss": 0.3155, "step": 7985 }, { "epoch": 2.0085513078470827, "grad_norm": 0.3297767639160156, "learning_rate": 2.9741174129773253e-06, "loss": 0.3683, "step": 7986 }, { "epoch": 2.0088028169014085, "grad_norm": 0.2979738414287567, "learning_rate": 2.972779742469662e-06, "loss": 0.2971, "step": 7987 }, { "epoch": 2.0090543259557343, "grad_norm": 0.2994316518306732, "learning_rate": 2.971442245580731e-06, "loss": 0.3391, "step": 7988 }, { "epoch": 2.0093058350100605, "grad_norm": 0.31496062874794006, "learning_rate": 2.970104922425084e-06, "loss": 0.3329, "step": 7989 }, { "epoch": 2.0095573440643864, "grad_norm": 0.3337770700454712, "learning_rate": 2.9687677731172486e-06, "loss": 0.3214, "step": 7990 }, { "epoch": 2.009808853118712, "grad_norm": 0.31632599234580994, "learning_rate": 2.9674307977717486e-06, "loss": 0.3217, "step": 7991 }, { "epoch": 2.0100603621730384, "grad_norm": 0.33396008610725403, "learning_rate": 2.9660939965030826e-06, "loss": 0.3301, "step": 7992 }, { "epoch": 2.010311871227364, "grad_norm": 0.3123500347137451, "learning_rate": 2.9647573694257436e-06, "loss": 0.3126, "step": 7993 }, { "epoch": 2.01056338028169, "grad_norm": 0.31851232051849365, "learning_rate": 2.963420916654202e-06, "loss": 0.351, "step": 7994 }, { "epoch": 2.0108148893360163, "grad_norm": 0.31659290194511414, "learning_rate": 2.96208463830292e-06, "loss": 0.3098, "step": 7995 }, { "epoch": 2.011066398390342, "grad_norm": 0.31261491775512695, "learning_rate": 2.9607485344863375e-06, "loss": 0.3452, "step": 7996 }, { "epoch": 2.011317907444668, "grad_norm": 0.3377598822116852, "learning_rate": 2.9594126053188874e-06, "loss": 0.3072, "step": 7997 }, { "epoch": 2.011569416498994, "grad_norm": 0.3177874684333801, "learning_rate": 2.95807685091498e-06, "loss": 0.3158, "step": 7998 }, { "epoch": 2.01182092555332, "grad_norm": 0.3485296368598938, "learning_rate": 2.9567412713890163e-06, "loss": 0.3123, "step": 7999 }, { "epoch": 2.0120724346076457, "grad_norm": 0.3071430027484894, "learning_rate": 2.955405866855381e-06, "loss": 0.3191, "step": 8000 }, { "epoch": 2.012323943661972, "grad_norm": 0.30622658133506775, "learning_rate": 2.9540706374284423e-06, "loss": 0.3353, "step": 8001 }, { "epoch": 2.012575452716298, "grad_norm": 0.32716530561447144, "learning_rate": 2.9527355832225542e-06, "loss": 0.3129, "step": 8002 }, { "epoch": 2.0128269617706236, "grad_norm": 0.31558316946029663, "learning_rate": 2.9514007043520555e-06, "loss": 0.3257, "step": 8003 }, { "epoch": 2.01307847082495, "grad_norm": 0.3272193968296051, "learning_rate": 2.9500660009312698e-06, "loss": 0.3263, "step": 8004 }, { "epoch": 2.0133299798792756, "grad_norm": 0.30284756422042847, "learning_rate": 2.9487314730745075e-06, "loss": 0.3047, "step": 8005 }, { "epoch": 2.0135814889336014, "grad_norm": 0.3058840036392212, "learning_rate": 2.947397120896062e-06, "loss": 0.3387, "step": 8006 }, { "epoch": 2.0138329979879277, "grad_norm": 0.31466108560562134, "learning_rate": 2.9460629445102106e-06, "loss": 0.3362, "step": 8007 }, { "epoch": 2.0140845070422535, "grad_norm": 0.3358847200870514, "learning_rate": 2.944728944031221e-06, "loss": 0.3371, "step": 8008 }, { "epoch": 2.0143360160965793, "grad_norm": 0.28841134905815125, "learning_rate": 2.9433951195733374e-06, "loss": 0.302, "step": 8009 }, { "epoch": 2.0145875251509056, "grad_norm": 0.32336318492889404, "learning_rate": 2.9420614712507966e-06, "loss": 0.3229, "step": 8010 }, { "epoch": 2.0148390342052314, "grad_norm": 0.3032439053058624, "learning_rate": 2.940727999177817e-06, "loss": 0.3102, "step": 8011 }, { "epoch": 2.015090543259557, "grad_norm": 0.29863134026527405, "learning_rate": 2.939394703468601e-06, "loss": 0.3196, "step": 8012 }, { "epoch": 2.0153420523138834, "grad_norm": 0.30820432305336, "learning_rate": 2.9380615842373372e-06, "loss": 0.3429, "step": 8013 }, { "epoch": 2.015593561368209, "grad_norm": 0.32752588391304016, "learning_rate": 2.9367286415982e-06, "loss": 0.3339, "step": 8014 }, { "epoch": 2.015845070422535, "grad_norm": 0.3171319365501404, "learning_rate": 2.935395875665346e-06, "loss": 0.3269, "step": 8015 }, { "epoch": 2.0160965794768613, "grad_norm": 0.3042650520801544, "learning_rate": 2.9340632865529194e-06, "loss": 0.3174, "step": 8016 }, { "epoch": 2.016348088531187, "grad_norm": 0.3020657002925873, "learning_rate": 2.9327308743750483e-06, "loss": 0.3195, "step": 8017 }, { "epoch": 2.016599597585513, "grad_norm": 0.298667311668396, "learning_rate": 2.931398639245845e-06, "loss": 0.3408, "step": 8018 }, { "epoch": 2.016851106639839, "grad_norm": 0.30726972222328186, "learning_rate": 2.9300665812794073e-06, "loss": 0.2885, "step": 8019 }, { "epoch": 2.017102615694165, "grad_norm": 0.2989729642868042, "learning_rate": 2.9287347005898162e-06, "loss": 0.3166, "step": 8020 }, { "epoch": 2.0173541247484907, "grad_norm": 0.305683970451355, "learning_rate": 2.9274029972911404e-06, "loss": 0.3279, "step": 8021 }, { "epoch": 2.017605633802817, "grad_norm": 0.30576571822166443, "learning_rate": 2.926071471497434e-06, "loss": 0.3285, "step": 8022 }, { "epoch": 2.017857142857143, "grad_norm": 0.3315543830394745, "learning_rate": 2.9247401233227285e-06, "loss": 0.336, "step": 8023 }, { "epoch": 2.0181086519114686, "grad_norm": 0.3203072249889374, "learning_rate": 2.923408952881051e-06, "loss": 0.3441, "step": 8024 }, { "epoch": 2.018360160965795, "grad_norm": 0.3002256751060486, "learning_rate": 2.9220779602864035e-06, "loss": 0.3027, "step": 8025 }, { "epoch": 2.0186116700201207, "grad_norm": 0.3109540641307831, "learning_rate": 2.920747145652782e-06, "loss": 0.3432, "step": 8026 }, { "epoch": 2.0188631790744465, "grad_norm": 0.3139438033103943, "learning_rate": 2.9194165090941575e-06, "loss": 0.321, "step": 8027 }, { "epoch": 2.0191146881287727, "grad_norm": 0.32630038261413574, "learning_rate": 2.9180860507244936e-06, "loss": 0.3284, "step": 8028 }, { "epoch": 2.0193661971830985, "grad_norm": 0.31174954771995544, "learning_rate": 2.916755770657733e-06, "loss": 0.2986, "step": 8029 }, { "epoch": 2.0196177062374248, "grad_norm": 0.3617261052131653, "learning_rate": 2.915425669007812e-06, "loss": 0.323, "step": 8030 }, { "epoch": 2.0198692152917506, "grad_norm": 0.28990018367767334, "learning_rate": 2.914095745888638e-06, "loss": 0.3193, "step": 8031 }, { "epoch": 2.0201207243460764, "grad_norm": 0.2929520905017853, "learning_rate": 2.912766001414116e-06, "loss": 0.3319, "step": 8032 }, { "epoch": 2.0203722334004026, "grad_norm": 0.3127540647983551, "learning_rate": 2.9114364356981274e-06, "loss": 0.3179, "step": 8033 }, { "epoch": 2.0206237424547284, "grad_norm": 0.3118465840816498, "learning_rate": 2.9101070488545424e-06, "loss": 0.3215, "step": 8034 }, { "epoch": 2.0208752515090542, "grad_norm": 0.31171420216560364, "learning_rate": 2.9087778409972132e-06, "loss": 0.3351, "step": 8035 }, { "epoch": 2.0211267605633805, "grad_norm": 0.3322293758392334, "learning_rate": 2.9074488122399813e-06, "loss": 0.3329, "step": 8036 }, { "epoch": 2.0213782696177063, "grad_norm": 0.28532707691192627, "learning_rate": 2.906119962696666e-06, "loss": 0.3257, "step": 8037 }, { "epoch": 2.021629778672032, "grad_norm": 0.2843002676963806, "learning_rate": 2.9047912924810786e-06, "loss": 0.3388, "step": 8038 }, { "epoch": 2.0218812877263583, "grad_norm": 0.3153465688228607, "learning_rate": 2.9034628017070064e-06, "loss": 0.3385, "step": 8039 }, { "epoch": 2.022132796780684, "grad_norm": 0.29112154245376587, "learning_rate": 2.9021344904882324e-06, "loss": 0.3294, "step": 8040 }, { "epoch": 2.02238430583501, "grad_norm": 0.307882696390152, "learning_rate": 2.9008063589385127e-06, "loss": 0.2989, "step": 8041 }, { "epoch": 2.022635814889336, "grad_norm": 0.3012144863605499, "learning_rate": 2.899478407171598e-06, "loss": 0.3385, "step": 8042 }, { "epoch": 2.022887323943662, "grad_norm": 0.26836854219436646, "learning_rate": 2.8981506353012145e-06, "loss": 0.3189, "step": 8043 }, { "epoch": 2.023138832997988, "grad_norm": 0.29367396235466003, "learning_rate": 2.896823043441083e-06, "loss": 0.3315, "step": 8044 }, { "epoch": 2.023390342052314, "grad_norm": 0.29954785108566284, "learning_rate": 2.895495631704898e-06, "loss": 0.3305, "step": 8045 }, { "epoch": 2.02364185110664, "grad_norm": 0.28079143166542053, "learning_rate": 2.8941684002063473e-06, "loss": 0.3255, "step": 8046 }, { "epoch": 2.0238933601609657, "grad_norm": 0.28768080472946167, "learning_rate": 2.892841349059098e-06, "loss": 0.3205, "step": 8047 }, { "epoch": 2.024144869215292, "grad_norm": 0.29650044441223145, "learning_rate": 2.8915144783768047e-06, "loss": 0.3387, "step": 8048 }, { "epoch": 2.0243963782696177, "grad_norm": 0.2993822991847992, "learning_rate": 2.8901877882731076e-06, "loss": 0.293, "step": 8049 }, { "epoch": 2.0246478873239435, "grad_norm": 0.29370445013046265, "learning_rate": 2.8888612788616256e-06, "loss": 0.3055, "step": 8050 }, { "epoch": 2.0248993963782698, "grad_norm": 0.29673752188682556, "learning_rate": 2.887534950255969e-06, "loss": 0.2896, "step": 8051 }, { "epoch": 2.0251509054325956, "grad_norm": 0.2987542748451233, "learning_rate": 2.886208802569728e-06, "loss": 0.31, "step": 8052 }, { "epoch": 2.0254024144869214, "grad_norm": 0.30512362718582153, "learning_rate": 2.8848828359164797e-06, "loss": 0.3223, "step": 8053 }, { "epoch": 2.0256539235412476, "grad_norm": 0.3173840343952179, "learning_rate": 2.883557050409783e-06, "loss": 0.3265, "step": 8054 }, { "epoch": 2.0259054325955734, "grad_norm": 0.3171699047088623, "learning_rate": 2.882231446163187e-06, "loss": 0.3339, "step": 8055 }, { "epoch": 2.0261569416498992, "grad_norm": 0.3157178461551666, "learning_rate": 2.8809060232902165e-06, "loss": 0.3051, "step": 8056 }, { "epoch": 2.0264084507042255, "grad_norm": 0.3298434615135193, "learning_rate": 2.8795807819043898e-06, "loss": 0.3176, "step": 8057 }, { "epoch": 2.0266599597585513, "grad_norm": 0.290964812040329, "learning_rate": 2.878255722119202e-06, "loss": 0.3387, "step": 8058 }, { "epoch": 2.026911468812877, "grad_norm": 0.3040768504142761, "learning_rate": 2.87693084404814e-06, "loss": 0.3093, "step": 8059 }, { "epoch": 2.0271629778672033, "grad_norm": 0.3241004943847656, "learning_rate": 2.875606147804667e-06, "loss": 0.313, "step": 8060 }, { "epoch": 2.027414486921529, "grad_norm": 0.296888142824173, "learning_rate": 2.87428163350224e-06, "loss": 0.3126, "step": 8061 }, { "epoch": 2.027665995975855, "grad_norm": 0.29444047808647156, "learning_rate": 2.87295730125429e-06, "loss": 0.3275, "step": 8062 }, { "epoch": 2.027917505030181, "grad_norm": 0.2897621989250183, "learning_rate": 2.871633151174243e-06, "loss": 0.3072, "step": 8063 }, { "epoch": 2.028169014084507, "grad_norm": 0.31098416447639465, "learning_rate": 2.8703091833754993e-06, "loss": 0.3159, "step": 8064 }, { "epoch": 2.028420523138833, "grad_norm": 0.2953088581562042, "learning_rate": 2.8689853979714505e-06, "loss": 0.314, "step": 8065 }, { "epoch": 2.028672032193159, "grad_norm": 0.2950742840766907, "learning_rate": 2.8676617950754733e-06, "loss": 0.3236, "step": 8066 }, { "epoch": 2.028923541247485, "grad_norm": 0.29954662919044495, "learning_rate": 2.866338374800921e-06, "loss": 0.3318, "step": 8067 }, { "epoch": 2.0291750503018107, "grad_norm": 0.28929510712623596, "learning_rate": 2.8650151372611414e-06, "loss": 0.341, "step": 8068 }, { "epoch": 2.029426559356137, "grad_norm": 0.32224392890930176, "learning_rate": 2.8636920825694557e-06, "loss": 0.3236, "step": 8069 }, { "epoch": 2.0296780684104627, "grad_norm": 0.2966301739215851, "learning_rate": 2.8623692108391808e-06, "loss": 0.3327, "step": 8070 }, { "epoch": 2.0299295774647885, "grad_norm": 0.2904188930988312, "learning_rate": 2.8610465221836094e-06, "loss": 0.3303, "step": 8071 }, { "epoch": 2.0301810865191148, "grad_norm": 0.3125072419643402, "learning_rate": 2.859724016716022e-06, "loss": 0.3256, "step": 8072 }, { "epoch": 2.0304325955734406, "grad_norm": 0.3028983771800995, "learning_rate": 2.858401694549683e-06, "loss": 0.3228, "step": 8073 }, { "epoch": 2.0306841046277664, "grad_norm": 0.30204835534095764, "learning_rate": 2.8570795557978413e-06, "loss": 0.3266, "step": 8074 }, { "epoch": 2.0309356136820926, "grad_norm": 0.29210224747657776, "learning_rate": 2.8557576005737286e-06, "loss": 0.3212, "step": 8075 }, { "epoch": 2.0311871227364184, "grad_norm": 0.32311296463012695, "learning_rate": 2.854435828990563e-06, "loss": 0.3015, "step": 8076 }, { "epoch": 2.0314386317907447, "grad_norm": 0.3028305470943451, "learning_rate": 2.853114241161549e-06, "loss": 0.3349, "step": 8077 }, { "epoch": 2.0316901408450705, "grad_norm": 0.3027438223361969, "learning_rate": 2.851792837199866e-06, "loss": 0.3319, "step": 8078 }, { "epoch": 2.0319416498993963, "grad_norm": 0.30897650122642517, "learning_rate": 2.85047161721869e-06, "loss": 0.3249, "step": 8079 }, { "epoch": 2.0321931589537225, "grad_norm": 0.30179494619369507, "learning_rate": 2.849150581331169e-06, "loss": 0.3486, "step": 8080 }, { "epoch": 2.0324446680080483, "grad_norm": 0.3074062466621399, "learning_rate": 2.8478297296504487e-06, "loss": 0.3201, "step": 8081 }, { "epoch": 2.032696177062374, "grad_norm": 0.29912763833999634, "learning_rate": 2.846509062289646e-06, "loss": 0.3105, "step": 8082 }, { "epoch": 2.0329476861167004, "grad_norm": 0.32092058658599854, "learning_rate": 2.8451885793618716e-06, "loss": 0.3287, "step": 8083 }, { "epoch": 2.033199195171026, "grad_norm": 0.3172852694988251, "learning_rate": 2.8438682809802133e-06, "loss": 0.3266, "step": 8084 }, { "epoch": 2.033450704225352, "grad_norm": 0.2866196036338806, "learning_rate": 2.8425481672577494e-06, "loss": 0.3152, "step": 8085 }, { "epoch": 2.0337022132796783, "grad_norm": 0.3131830394268036, "learning_rate": 2.8412282383075362e-06, "loss": 0.3061, "step": 8086 }, { "epoch": 2.033953722334004, "grad_norm": 0.308741956949234, "learning_rate": 2.8399084942426193e-06, "loss": 0.3321, "step": 8087 }, { "epoch": 2.03420523138833, "grad_norm": 0.29598724842071533, "learning_rate": 2.8385889351760283e-06, "loss": 0.3332, "step": 8088 }, { "epoch": 2.034456740442656, "grad_norm": 0.2867431640625, "learning_rate": 2.8372695612207715e-06, "loss": 0.3383, "step": 8089 }, { "epoch": 2.034708249496982, "grad_norm": 0.30722376704216003, "learning_rate": 2.8359503724898485e-06, "loss": 0.3236, "step": 8090 }, { "epoch": 2.0349597585513077, "grad_norm": 0.305992990732193, "learning_rate": 2.8346313690962358e-06, "loss": 0.3416, "step": 8091 }, { "epoch": 2.035211267605634, "grad_norm": 0.30428627133369446, "learning_rate": 2.8333125511529012e-06, "loss": 0.3309, "step": 8092 }, { "epoch": 2.03546277665996, "grad_norm": 0.3013574779033661, "learning_rate": 2.8319939187727913e-06, "loss": 0.3321, "step": 8093 }, { "epoch": 2.0357142857142856, "grad_norm": 0.2905578315258026, "learning_rate": 2.83067547206884e-06, "loss": 0.2902, "step": 8094 }, { "epoch": 2.035965794768612, "grad_norm": 0.3262609839439392, "learning_rate": 2.8293572111539625e-06, "loss": 0.3395, "step": 8095 }, { "epoch": 2.0362173038229376, "grad_norm": 0.31515759229660034, "learning_rate": 2.8280391361410614e-06, "loss": 0.347, "step": 8096 }, { "epoch": 2.0364688128772634, "grad_norm": 0.3180714547634125, "learning_rate": 2.826721247143018e-06, "loss": 0.3489, "step": 8097 }, { "epoch": 2.0367203219315897, "grad_norm": 0.30598127841949463, "learning_rate": 2.825403544272706e-06, "loss": 0.3448, "step": 8098 }, { "epoch": 2.0369718309859155, "grad_norm": 0.3095637261867523, "learning_rate": 2.824086027642976e-06, "loss": 0.3007, "step": 8099 }, { "epoch": 2.0372233400402413, "grad_norm": 0.32037293910980225, "learning_rate": 2.822768697366664e-06, "loss": 0.3288, "step": 8100 }, { "epoch": 2.0374748490945676, "grad_norm": 0.32354459166526794, "learning_rate": 2.8214515535565946e-06, "loss": 0.3091, "step": 8101 }, { "epoch": 2.0377263581488934, "grad_norm": 0.28764376044273376, "learning_rate": 2.820134596325568e-06, "loss": 0.3336, "step": 8102 }, { "epoch": 2.037977867203219, "grad_norm": 0.31268617510795593, "learning_rate": 2.8188178257863784e-06, "loss": 0.3147, "step": 8103 }, { "epoch": 2.0382293762575454, "grad_norm": 0.3202211856842041, "learning_rate": 2.8175012420517954e-06, "loss": 0.3116, "step": 8104 }, { "epoch": 2.038480885311871, "grad_norm": 0.32938629388809204, "learning_rate": 2.8161848452345784e-06, "loss": 0.3222, "step": 8105 }, { "epoch": 2.038732394366197, "grad_norm": 0.31832754611968994, "learning_rate": 2.8148686354474657e-06, "loss": 0.3221, "step": 8106 }, { "epoch": 2.0389839034205233, "grad_norm": 0.29280537366867065, "learning_rate": 2.8135526128031864e-06, "loss": 0.3317, "step": 8107 }, { "epoch": 2.039235412474849, "grad_norm": 0.2781398594379425, "learning_rate": 2.8122367774144454e-06, "loss": 0.3139, "step": 8108 }, { "epoch": 2.039486921529175, "grad_norm": 0.33072832226753235, "learning_rate": 2.8109211293939376e-06, "loss": 0.3226, "step": 8109 }, { "epoch": 2.039738430583501, "grad_norm": 0.32349687814712524, "learning_rate": 2.809605668854343e-06, "loss": 0.3172, "step": 8110 }, { "epoch": 2.039989939637827, "grad_norm": 0.2853679358959198, "learning_rate": 2.8082903959083165e-06, "loss": 0.3007, "step": 8111 }, { "epoch": 2.0402414486921527, "grad_norm": 0.2898256778717041, "learning_rate": 2.8069753106685093e-06, "loss": 0.3371, "step": 8112 }, { "epoch": 2.040492957746479, "grad_norm": 0.31160539388656616, "learning_rate": 2.8056604132475445e-06, "loss": 0.3333, "step": 8113 }, { "epoch": 2.040744466800805, "grad_norm": 0.2927866578102112, "learning_rate": 2.80434570375804e-06, "loss": 0.307, "step": 8114 }, { "epoch": 2.0409959758551306, "grad_norm": 0.34478503465652466, "learning_rate": 2.8030311823125877e-06, "loss": 0.3505, "step": 8115 }, { "epoch": 2.041247484909457, "grad_norm": 0.29528501629829407, "learning_rate": 2.8017168490237735e-06, "loss": 0.328, "step": 8116 }, { "epoch": 2.0414989939637826, "grad_norm": 0.30806538462638855, "learning_rate": 2.8004027040041555e-06, "loss": 0.3352, "step": 8117 }, { "epoch": 2.0417505030181085, "grad_norm": 0.34178322553634644, "learning_rate": 2.7990887473662875e-06, "loss": 0.3195, "step": 8118 }, { "epoch": 2.0420020120724347, "grad_norm": 0.3116021454334259, "learning_rate": 2.7977749792226978e-06, "loss": 0.3202, "step": 8119 }, { "epoch": 2.0422535211267605, "grad_norm": 0.33535781502723694, "learning_rate": 2.7964613996859037e-06, "loss": 0.3406, "step": 8120 }, { "epoch": 2.0425050301810863, "grad_norm": 0.3147551417350769, "learning_rate": 2.795148008868408e-06, "loss": 0.312, "step": 8121 }, { "epoch": 2.0427565392354126, "grad_norm": 0.30535510182380676, "learning_rate": 2.7938348068826893e-06, "loss": 0.3357, "step": 8122 }, { "epoch": 2.0430080482897384, "grad_norm": 0.27879422903060913, "learning_rate": 2.79252179384122e-06, "loss": 0.2919, "step": 8123 }, { "epoch": 2.043259557344064, "grad_norm": 0.29287123680114746, "learning_rate": 2.791208969856447e-06, "loss": 0.3204, "step": 8124 }, { "epoch": 2.0435110663983904, "grad_norm": 0.30926448106765747, "learning_rate": 2.7898963350408093e-06, "loss": 0.3161, "step": 8125 }, { "epoch": 2.0437625754527162, "grad_norm": 0.29730355739593506, "learning_rate": 2.788583889506722e-06, "loss": 0.3248, "step": 8126 }, { "epoch": 2.044014084507042, "grad_norm": 0.3108483850955963, "learning_rate": 2.7872716333665928e-06, "loss": 0.3357, "step": 8127 }, { "epoch": 2.0442655935613683, "grad_norm": 0.3125215470790863, "learning_rate": 2.7859595667328027e-06, "loss": 0.3266, "step": 8128 }, { "epoch": 2.044517102615694, "grad_norm": 0.31513240933418274, "learning_rate": 2.784647689717725e-06, "loss": 0.3317, "step": 8129 }, { "epoch": 2.0447686116700203, "grad_norm": 0.344375342130661, "learning_rate": 2.7833360024337152e-06, "loss": 0.3385, "step": 8130 }, { "epoch": 2.045020120724346, "grad_norm": 0.309125691652298, "learning_rate": 2.782024504993108e-06, "loss": 0.337, "step": 8131 }, { "epoch": 2.045271629778672, "grad_norm": 0.3255023956298828, "learning_rate": 2.780713197508228e-06, "loss": 0.3483, "step": 8132 }, { "epoch": 2.045523138832998, "grad_norm": 0.3013308644294739, "learning_rate": 2.779402080091377e-06, "loss": 0.3441, "step": 8133 }, { "epoch": 2.045774647887324, "grad_norm": 0.34748101234436035, "learning_rate": 2.778091152854847e-06, "loss": 0.3485, "step": 8134 }, { "epoch": 2.04602615694165, "grad_norm": 0.3031235337257385, "learning_rate": 2.776780415910908e-06, "loss": 0.3231, "step": 8135 }, { "epoch": 2.046277665995976, "grad_norm": 0.3195328116416931, "learning_rate": 2.7754698693718206e-06, "loss": 0.308, "step": 8136 }, { "epoch": 2.046529175050302, "grad_norm": 0.2759738266468048, "learning_rate": 2.77415951334982e-06, "loss": 0.3014, "step": 8137 }, { "epoch": 2.0467806841046277, "grad_norm": 0.3147771656513214, "learning_rate": 2.772849347957134e-06, "loss": 0.3274, "step": 8138 }, { "epoch": 2.047032193158954, "grad_norm": 0.2973617911338806, "learning_rate": 2.7715393733059657e-06, "loss": 0.3163, "step": 8139 }, { "epoch": 2.0472837022132797, "grad_norm": 0.3099863529205322, "learning_rate": 2.7702295895085097e-06, "loss": 0.3241, "step": 8140 }, { "epoch": 2.0475352112676055, "grad_norm": 0.2844668924808502, "learning_rate": 2.768919996676942e-06, "loss": 0.3176, "step": 8141 }, { "epoch": 2.0477867203219318, "grad_norm": 0.34302541613578796, "learning_rate": 2.7676105949234168e-06, "loss": 0.3257, "step": 8142 }, { "epoch": 2.0480382293762576, "grad_norm": 0.32527220249176025, "learning_rate": 2.7663013843600805e-06, "loss": 0.3046, "step": 8143 }, { "epoch": 2.0482897384305834, "grad_norm": 0.31658607721328735, "learning_rate": 2.764992365099054e-06, "loss": 0.3138, "step": 8144 }, { "epoch": 2.0485412474849096, "grad_norm": 0.31021907925605774, "learning_rate": 2.7636835372524516e-06, "loss": 0.3358, "step": 8145 }, { "epoch": 2.0487927565392354, "grad_norm": 0.3204159736633301, "learning_rate": 2.7623749009323626e-06, "loss": 0.3216, "step": 8146 }, { "epoch": 2.0490442655935612, "grad_norm": 0.2946203947067261, "learning_rate": 2.761066456250866e-06, "loss": 0.309, "step": 8147 }, { "epoch": 2.0492957746478875, "grad_norm": 0.29937252402305603, "learning_rate": 2.759758203320019e-06, "loss": 0.326, "step": 8148 }, { "epoch": 2.0495472837022133, "grad_norm": 0.30016854405403137, "learning_rate": 2.7584501422518696e-06, "loss": 0.3309, "step": 8149 }, { "epoch": 2.049798792756539, "grad_norm": 0.3086334466934204, "learning_rate": 2.75714227315844e-06, "loss": 0.3039, "step": 8150 }, { "epoch": 2.0500503018108653, "grad_norm": 0.30553144216537476, "learning_rate": 2.7558345961517422e-06, "loss": 0.3332, "step": 8151 }, { "epoch": 2.050301810865191, "grad_norm": 0.32114413380622864, "learning_rate": 2.754527111343775e-06, "loss": 0.3279, "step": 8152 }, { "epoch": 2.050553319919517, "grad_norm": 0.3155249357223511, "learning_rate": 2.753219818846511e-06, "loss": 0.3356, "step": 8153 }, { "epoch": 2.050804828973843, "grad_norm": 0.30092206597328186, "learning_rate": 2.751912718771915e-06, "loss": 0.3133, "step": 8154 }, { "epoch": 2.051056338028169, "grad_norm": 0.2950682044029236, "learning_rate": 2.75060581123193e-06, "loss": 0.3122, "step": 8155 }, { "epoch": 2.051307847082495, "grad_norm": 0.3297138214111328, "learning_rate": 2.749299096338486e-06, "loss": 0.3259, "step": 8156 }, { "epoch": 2.051559356136821, "grad_norm": 0.29814019799232483, "learning_rate": 2.7479925742034926e-06, "loss": 0.3146, "step": 8157 }, { "epoch": 2.051810865191147, "grad_norm": 0.29181772470474243, "learning_rate": 2.7466862449388483e-06, "loss": 0.3268, "step": 8158 }, { "epoch": 2.0520623742454727, "grad_norm": 0.31455108523368835, "learning_rate": 2.7453801086564284e-06, "loss": 0.3424, "step": 8159 }, { "epoch": 2.052313883299799, "grad_norm": 0.2971799969673157, "learning_rate": 2.7440741654680995e-06, "loss": 0.3083, "step": 8160 }, { "epoch": 2.0525653923541247, "grad_norm": 0.3010028898715973, "learning_rate": 2.7427684154857036e-06, "loss": 0.3197, "step": 8161 }, { "epoch": 2.0528169014084505, "grad_norm": 0.30157679319381714, "learning_rate": 2.7414628588210736e-06, "loss": 0.3374, "step": 8162 }, { "epoch": 2.0530684104627768, "grad_norm": 0.3262830078601837, "learning_rate": 2.7401574955860177e-06, "loss": 0.332, "step": 8163 }, { "epoch": 2.0533199195171026, "grad_norm": 0.3156241178512573, "learning_rate": 2.7388523258923373e-06, "loss": 0.351, "step": 8164 }, { "epoch": 2.0535714285714284, "grad_norm": 0.28493210673332214, "learning_rate": 2.737547349851808e-06, "loss": 0.3083, "step": 8165 }, { "epoch": 2.0538229376257546, "grad_norm": 0.3349147140979767, "learning_rate": 2.7362425675761955e-06, "loss": 0.3097, "step": 8166 }, { "epoch": 2.0540744466800804, "grad_norm": 0.32758259773254395, "learning_rate": 2.7349379791772434e-06, "loss": 0.3391, "step": 8167 }, { "epoch": 2.0543259557344062, "grad_norm": 0.3020482361316681, "learning_rate": 2.733633584766685e-06, "loss": 0.3545, "step": 8168 }, { "epoch": 2.0545774647887325, "grad_norm": 0.31516534090042114, "learning_rate": 2.7323293844562305e-06, "loss": 0.3087, "step": 8169 }, { "epoch": 2.0548289738430583, "grad_norm": 0.2857208549976349, "learning_rate": 2.731025378357579e-06, "loss": 0.316, "step": 8170 }, { "epoch": 2.055080482897384, "grad_norm": 0.3077753186225891, "learning_rate": 2.729721566582407e-06, "loss": 0.3417, "step": 8171 }, { "epoch": 2.0553319919517103, "grad_norm": 0.31536969542503357, "learning_rate": 2.7284179492423825e-06, "loss": 0.3375, "step": 8172 }, { "epoch": 2.055583501006036, "grad_norm": 0.3076031804084778, "learning_rate": 2.7271145264491473e-06, "loss": 0.3405, "step": 8173 }, { "epoch": 2.055835010060362, "grad_norm": 0.30033233761787415, "learning_rate": 2.725811298314336e-06, "loss": 0.3091, "step": 8174 }, { "epoch": 2.056086519114688, "grad_norm": 0.28484421968460083, "learning_rate": 2.724508264949558e-06, "loss": 0.3294, "step": 8175 }, { "epoch": 2.056338028169014, "grad_norm": 0.29345014691352844, "learning_rate": 2.723205426466413e-06, "loss": 0.3031, "step": 8176 }, { "epoch": 2.0565895372233403, "grad_norm": 0.3001652657985687, "learning_rate": 2.7219027829764777e-06, "loss": 0.3259, "step": 8177 }, { "epoch": 2.056841046277666, "grad_norm": 0.29563117027282715, "learning_rate": 2.72060033459132e-06, "loss": 0.3285, "step": 8178 }, { "epoch": 2.057092555331992, "grad_norm": 0.28922346234321594, "learning_rate": 2.719298081422481e-06, "loss": 0.323, "step": 8179 }, { "epoch": 2.057344064386318, "grad_norm": 0.3013193905353546, "learning_rate": 2.7179960235814963e-06, "loss": 0.307, "step": 8180 }, { "epoch": 2.057595573440644, "grad_norm": 0.301833838224411, "learning_rate": 2.716694161179873e-06, "loss": 0.3321, "step": 8181 }, { "epoch": 2.0578470824949697, "grad_norm": 0.3148650825023651, "learning_rate": 2.7153924943291125e-06, "loss": 0.2919, "step": 8182 }, { "epoch": 2.058098591549296, "grad_norm": 0.3130474090576172, "learning_rate": 2.7140910231406915e-06, "loss": 0.3154, "step": 8183 }, { "epoch": 2.058350100603622, "grad_norm": 0.30896395444869995, "learning_rate": 2.7127897477260723e-06, "loss": 0.2989, "step": 8184 }, { "epoch": 2.0586016096579476, "grad_norm": 0.3000273108482361, "learning_rate": 2.711488668196706e-06, "loss": 0.3157, "step": 8185 }, { "epoch": 2.058853118712274, "grad_norm": 0.32448527216911316, "learning_rate": 2.710187784664015e-06, "loss": 0.3304, "step": 8186 }, { "epoch": 2.0591046277665996, "grad_norm": 0.327384889125824, "learning_rate": 2.708887097239418e-06, "loss": 0.3002, "step": 8187 }, { "epoch": 2.0593561368209254, "grad_norm": 0.340179443359375, "learning_rate": 2.7075866060343057e-06, "loss": 0.3246, "step": 8188 }, { "epoch": 2.0596076458752517, "grad_norm": 0.35086414217948914, "learning_rate": 2.706286311160061e-06, "loss": 0.3246, "step": 8189 }, { "epoch": 2.0598591549295775, "grad_norm": 0.2781917154788971, "learning_rate": 2.704986212728043e-06, "loss": 0.3393, "step": 8190 }, { "epoch": 2.0601106639839033, "grad_norm": 0.3145841360092163, "learning_rate": 2.7036863108495996e-06, "loss": 0.2988, "step": 8191 }, { "epoch": 2.0603621730382295, "grad_norm": 0.32019394636154175, "learning_rate": 2.702386605636057e-06, "loss": 0.3337, "step": 8192 }, { "epoch": 2.0606136820925554, "grad_norm": 0.2975797951221466, "learning_rate": 2.701087097198729e-06, "loss": 0.296, "step": 8193 }, { "epoch": 2.060865191146881, "grad_norm": 0.2773379385471344, "learning_rate": 2.6997877856489073e-06, "loss": 0.32, "step": 8194 }, { "epoch": 2.0611167002012074, "grad_norm": 0.31435322761535645, "learning_rate": 2.698488671097872e-06, "loss": 0.3106, "step": 8195 }, { "epoch": 2.061368209255533, "grad_norm": 0.32029151916503906, "learning_rate": 2.6971897536568853e-06, "loss": 0.3177, "step": 8196 }, { "epoch": 2.061619718309859, "grad_norm": 0.3515484035015106, "learning_rate": 2.695891033437188e-06, "loss": 0.3305, "step": 8197 }, { "epoch": 2.0618712273641853, "grad_norm": 0.29757028818130493, "learning_rate": 2.6945925105500117e-06, "loss": 0.3383, "step": 8198 }, { "epoch": 2.062122736418511, "grad_norm": 0.32565537095069885, "learning_rate": 2.693294185106562e-06, "loss": 0.3371, "step": 8199 }, { "epoch": 2.062374245472837, "grad_norm": 0.3038772642612457, "learning_rate": 2.691996057218036e-06, "loss": 0.3313, "step": 8200 }, { "epoch": 2.062625754527163, "grad_norm": 0.3129657506942749, "learning_rate": 2.6906981269956077e-06, "loss": 0.3235, "step": 8201 }, { "epoch": 2.062877263581489, "grad_norm": 0.3186679780483246, "learning_rate": 2.6894003945504393e-06, "loss": 0.3371, "step": 8202 }, { "epoch": 2.0631287726358147, "grad_norm": 0.29854926466941833, "learning_rate": 2.6881028599936705e-06, "loss": 0.3208, "step": 8203 }, { "epoch": 2.063380281690141, "grad_norm": 0.28703761100769043, "learning_rate": 2.6868055234364304e-06, "loss": 0.3137, "step": 8204 }, { "epoch": 2.063631790744467, "grad_norm": 0.3135847747325897, "learning_rate": 2.685508384989824e-06, "loss": 0.3263, "step": 8205 }, { "epoch": 2.0638832997987926, "grad_norm": 0.32722026109695435, "learning_rate": 2.684211444764945e-06, "loss": 0.3446, "step": 8206 }, { "epoch": 2.064134808853119, "grad_norm": 0.2794181704521179, "learning_rate": 2.6829147028728695e-06, "loss": 0.3239, "step": 8207 }, { "epoch": 2.0643863179074446, "grad_norm": 0.29036521911621094, "learning_rate": 2.6816181594246534e-06, "loss": 0.3363, "step": 8208 }, { "epoch": 2.0646378269617705, "grad_norm": 0.29604461789131165, "learning_rate": 2.6803218145313392e-06, "loss": 0.2908, "step": 8209 }, { "epoch": 2.0648893360160967, "grad_norm": 0.3000899851322174, "learning_rate": 2.6790256683039485e-06, "loss": 0.314, "step": 8210 }, { "epoch": 2.0651408450704225, "grad_norm": 0.3037608563899994, "learning_rate": 2.6777297208534903e-06, "loss": 0.3712, "step": 8211 }, { "epoch": 2.0653923541247483, "grad_norm": 0.30880311131477356, "learning_rate": 2.6764339722909523e-06, "loss": 0.3349, "step": 8212 }, { "epoch": 2.0656438631790746, "grad_norm": 0.3168122470378876, "learning_rate": 2.67513842272731e-06, "loss": 0.307, "step": 8213 }, { "epoch": 2.0658953722334004, "grad_norm": 0.29152563214302063, "learning_rate": 2.6738430722735155e-06, "loss": 0.3272, "step": 8214 }, { "epoch": 2.066146881287726, "grad_norm": 0.29437023401260376, "learning_rate": 2.6725479210405114e-06, "loss": 0.3171, "step": 8215 }, { "epoch": 2.0663983903420524, "grad_norm": 0.2880735397338867, "learning_rate": 2.671252969139216e-06, "loss": 0.3026, "step": 8216 }, { "epoch": 2.066649899396378, "grad_norm": 0.3284808099269867, "learning_rate": 2.669958216680535e-06, "loss": 0.3173, "step": 8217 }, { "epoch": 2.066901408450704, "grad_norm": 0.30216220021247864, "learning_rate": 2.668663663775357e-06, "loss": 0.3389, "step": 8218 }, { "epoch": 2.0671529175050303, "grad_norm": 0.3077835738658905, "learning_rate": 2.6673693105345506e-06, "loss": 0.318, "step": 8219 }, { "epoch": 2.067404426559356, "grad_norm": 0.3287010192871094, "learning_rate": 2.6660751570689715e-06, "loss": 0.3402, "step": 8220 }, { "epoch": 2.067655935613682, "grad_norm": 0.28192028403282166, "learning_rate": 2.6647812034894516e-06, "loss": 0.301, "step": 8221 }, { "epoch": 2.067907444668008, "grad_norm": 0.3122316598892212, "learning_rate": 2.6634874499068154e-06, "loss": 0.3272, "step": 8222 }, { "epoch": 2.068158953722334, "grad_norm": 0.2841907739639282, "learning_rate": 2.6621938964318593e-06, "loss": 0.3069, "step": 8223 }, { "epoch": 2.0684104627766597, "grad_norm": 0.2972943186759949, "learning_rate": 2.6609005431753733e-06, "loss": 0.3083, "step": 8224 }, { "epoch": 2.068661971830986, "grad_norm": 0.29851382970809937, "learning_rate": 2.65960739024812e-06, "loss": 0.3166, "step": 8225 }, { "epoch": 2.068913480885312, "grad_norm": 0.32675349712371826, "learning_rate": 2.658314437760855e-06, "loss": 0.3267, "step": 8226 }, { "epoch": 2.0691649899396376, "grad_norm": 0.2977888584136963, "learning_rate": 2.6570216858243057e-06, "loss": 0.3502, "step": 8227 }, { "epoch": 2.069416498993964, "grad_norm": 0.30917125940322876, "learning_rate": 2.655729134549192e-06, "loss": 0.3134, "step": 8228 }, { "epoch": 2.0696680080482897, "grad_norm": 0.3009907603263855, "learning_rate": 2.654436784046214e-06, "loss": 0.3215, "step": 8229 }, { "epoch": 2.069919517102616, "grad_norm": 0.29445791244506836, "learning_rate": 2.6531446344260503e-06, "loss": 0.3027, "step": 8230 }, { "epoch": 2.0701710261569417, "grad_norm": 0.2821619212627411, "learning_rate": 2.651852685799368e-06, "loss": 0.2985, "step": 8231 }, { "epoch": 2.0704225352112675, "grad_norm": 0.28280389308929443, "learning_rate": 2.6505609382768117e-06, "loss": 0.3142, "step": 8232 }, { "epoch": 2.0706740442655938, "grad_norm": 0.3183631896972656, "learning_rate": 2.649269391969015e-06, "loss": 0.3409, "step": 8233 }, { "epoch": 2.0709255533199196, "grad_norm": 0.30372148752212524, "learning_rate": 2.6479780469865864e-06, "loss": 0.3102, "step": 8234 }, { "epoch": 2.0711770623742454, "grad_norm": 0.29154670238494873, "learning_rate": 2.646686903440126e-06, "loss": 0.3123, "step": 8235 }, { "epoch": 2.0714285714285716, "grad_norm": 0.30079391598701477, "learning_rate": 2.645395961440208e-06, "loss": 0.3023, "step": 8236 }, { "epoch": 2.0716800804828974, "grad_norm": 0.304799348115921, "learning_rate": 2.6441052210973974e-06, "loss": 0.3398, "step": 8237 }, { "epoch": 2.0719315895372232, "grad_norm": 0.2910962998867035, "learning_rate": 2.6428146825222344e-06, "loss": 0.3074, "step": 8238 }, { "epoch": 2.0721830985915495, "grad_norm": 0.28885748982429504, "learning_rate": 2.641524345825248e-06, "loss": 0.3215, "step": 8239 }, { "epoch": 2.0724346076458753, "grad_norm": 0.28982120752334595, "learning_rate": 2.6402342111169476e-06, "loss": 0.324, "step": 8240 }, { "epoch": 2.072686116700201, "grad_norm": 0.30088311433792114, "learning_rate": 2.6389442785078227e-06, "loss": 0.325, "step": 8241 }, { "epoch": 2.0729376257545273, "grad_norm": 0.30647531151771545, "learning_rate": 2.637654548108352e-06, "loss": 0.3286, "step": 8242 }, { "epoch": 2.073189134808853, "grad_norm": 0.3003426492214203, "learning_rate": 2.636365020028988e-06, "loss": 0.3363, "step": 8243 }, { "epoch": 2.073440643863179, "grad_norm": 0.30914726853370667, "learning_rate": 2.635075694380176e-06, "loss": 0.3139, "step": 8244 }, { "epoch": 2.073692152917505, "grad_norm": 0.3180095851421356, "learning_rate": 2.633786571272333e-06, "loss": 0.3075, "step": 8245 }, { "epoch": 2.073943661971831, "grad_norm": 0.31122735142707825, "learning_rate": 2.6324976508158697e-06, "loss": 0.3222, "step": 8246 }, { "epoch": 2.074195171026157, "grad_norm": 0.30536743998527527, "learning_rate": 2.6312089331211693e-06, "loss": 0.3163, "step": 8247 }, { "epoch": 2.074446680080483, "grad_norm": 0.32808443903923035, "learning_rate": 2.6299204182986072e-06, "loss": 0.3442, "step": 8248 }, { "epoch": 2.074698189134809, "grad_norm": 0.2878994941711426, "learning_rate": 2.6286321064585315e-06, "loss": 0.3282, "step": 8249 }, { "epoch": 2.0749496981891347, "grad_norm": 0.303153395652771, "learning_rate": 2.6273439977112803e-06, "loss": 0.3128, "step": 8250 }, { "epoch": 2.075201207243461, "grad_norm": 0.29553067684173584, "learning_rate": 2.626056092167175e-06, "loss": 0.3316, "step": 8251 }, { "epoch": 2.0754527162977867, "grad_norm": 0.3366231918334961, "learning_rate": 2.6247683899365117e-06, "loss": 0.3172, "step": 8252 }, { "epoch": 2.0757042253521125, "grad_norm": 0.33030417561531067, "learning_rate": 2.623480891129579e-06, "loss": 0.3245, "step": 8253 }, { "epoch": 2.0759557344064388, "grad_norm": 0.30303242802619934, "learning_rate": 2.622193595856638e-06, "loss": 0.3321, "step": 8254 }, { "epoch": 2.0762072434607646, "grad_norm": 0.29723554849624634, "learning_rate": 2.6209065042279426e-06, "loss": 0.3069, "step": 8255 }, { "epoch": 2.0764587525150904, "grad_norm": 0.294047087430954, "learning_rate": 2.619619616353719e-06, "loss": 0.3519, "step": 8256 }, { "epoch": 2.0767102615694166, "grad_norm": 0.31295087933540344, "learning_rate": 2.618332932344185e-06, "loss": 0.3249, "step": 8257 }, { "epoch": 2.0769617706237424, "grad_norm": 0.3490563929080963, "learning_rate": 2.617046452309535e-06, "loss": 0.326, "step": 8258 }, { "epoch": 2.0772132796780682, "grad_norm": 0.30121055245399475, "learning_rate": 2.6157601763599504e-06, "loss": 0.3128, "step": 8259 }, { "epoch": 2.0774647887323945, "grad_norm": 0.29791775345802307, "learning_rate": 2.614474104605589e-06, "loss": 0.3224, "step": 8260 }, { "epoch": 2.0777162977867203, "grad_norm": 0.3081105947494507, "learning_rate": 2.613188237156596e-06, "loss": 0.3093, "step": 8261 }, { "epoch": 2.077967806841046, "grad_norm": 0.30400124192237854, "learning_rate": 2.6119025741231007e-06, "loss": 0.3172, "step": 8262 }, { "epoch": 2.0782193158953723, "grad_norm": 0.3303559422492981, "learning_rate": 2.610617115615208e-06, "loss": 0.3163, "step": 8263 }, { "epoch": 2.078470824949698, "grad_norm": 0.2974235415458679, "learning_rate": 2.609331861743014e-06, "loss": 0.3333, "step": 8264 }, { "epoch": 2.078722334004024, "grad_norm": 0.32158637046813965, "learning_rate": 2.608046812616588e-06, "loss": 0.328, "step": 8265 }, { "epoch": 2.07897384305835, "grad_norm": 0.3166216313838959, "learning_rate": 2.6067619683459904e-06, "loss": 0.317, "step": 8266 }, { "epoch": 2.079225352112676, "grad_norm": 0.2718227803707123, "learning_rate": 2.605477329041256e-06, "loss": 0.3305, "step": 8267 }, { "epoch": 2.079476861167002, "grad_norm": 0.33164918422698975, "learning_rate": 2.6041928948124107e-06, "loss": 0.322, "step": 8268 }, { "epoch": 2.079728370221328, "grad_norm": 0.3085431158542633, "learning_rate": 2.6029086657694537e-06, "loss": 0.3276, "step": 8269 }, { "epoch": 2.079979879275654, "grad_norm": 0.30075356364250183, "learning_rate": 2.6016246420223744e-06, "loss": 0.325, "step": 8270 }, { "epoch": 2.0802313883299797, "grad_norm": 0.3089926540851593, "learning_rate": 2.600340823681139e-06, "loss": 0.3003, "step": 8271 }, { "epoch": 2.080482897384306, "grad_norm": 0.3290434777736664, "learning_rate": 2.5990572108557e-06, "loss": 0.3306, "step": 8272 }, { "epoch": 2.0807344064386317, "grad_norm": 0.3045661747455597, "learning_rate": 2.597773803655993e-06, "loss": 0.3234, "step": 8273 }, { "epoch": 2.080985915492958, "grad_norm": 0.2929016053676605, "learning_rate": 2.596490602191929e-06, "loss": 0.312, "step": 8274 }, { "epoch": 2.0812374245472838, "grad_norm": 0.2901984453201294, "learning_rate": 2.59520760657341e-06, "loss": 0.3147, "step": 8275 }, { "epoch": 2.0814889336016096, "grad_norm": 0.33070576190948486, "learning_rate": 2.5939248169103136e-06, "loss": 0.3309, "step": 8276 }, { "epoch": 2.081740442655936, "grad_norm": 0.3062666654586792, "learning_rate": 2.5926422333125066e-06, "loss": 0.3098, "step": 8277 }, { "epoch": 2.0819919517102616, "grad_norm": 0.31025469303131104, "learning_rate": 2.59135985588983e-06, "loss": 0.3265, "step": 8278 }, { "epoch": 2.0822434607645874, "grad_norm": 0.29651591181755066, "learning_rate": 2.5900776847521148e-06, "loss": 0.3186, "step": 8279 }, { "epoch": 2.0824949698189137, "grad_norm": 0.31347307562828064, "learning_rate": 2.588795720009168e-06, "loss": 0.3182, "step": 8280 }, { "epoch": 2.0827464788732395, "grad_norm": 0.3125113546848297, "learning_rate": 2.587513961770785e-06, "loss": 0.339, "step": 8281 }, { "epoch": 2.0829979879275653, "grad_norm": 0.3104122281074524, "learning_rate": 2.586232410146737e-06, "loss": 0.3019, "step": 8282 }, { "epoch": 2.0832494969818915, "grad_norm": 0.7815317511558533, "learning_rate": 2.584951065246784e-06, "loss": 0.3071, "step": 8283 }, { "epoch": 2.0835010060362174, "grad_norm": 0.32731983065605164, "learning_rate": 2.583669927180662e-06, "loss": 0.3127, "step": 8284 }, { "epoch": 2.083752515090543, "grad_norm": 0.3034411668777466, "learning_rate": 2.5823889960580967e-06, "loss": 0.3335, "step": 8285 }, { "epoch": 2.0840040241448694, "grad_norm": 0.30236032605171204, "learning_rate": 2.581108271988787e-06, "loss": 0.3237, "step": 8286 }, { "epoch": 2.084255533199195, "grad_norm": 0.3101537525653839, "learning_rate": 2.5798277550824238e-06, "loss": 0.2826, "step": 8287 }, { "epoch": 2.084507042253521, "grad_norm": 0.2881515324115753, "learning_rate": 2.5785474454486696e-06, "loss": 0.3407, "step": 8288 }, { "epoch": 2.0847585513078473, "grad_norm": 0.31731492280960083, "learning_rate": 2.5772673431971805e-06, "loss": 0.3017, "step": 8289 }, { "epoch": 2.085010060362173, "grad_norm": 0.29667213559150696, "learning_rate": 2.575987448437586e-06, "loss": 0.3199, "step": 8290 }, { "epoch": 2.085261569416499, "grad_norm": 0.29330283403396606, "learning_rate": 2.574707761279503e-06, "loss": 0.3388, "step": 8291 }, { "epoch": 2.085513078470825, "grad_norm": 0.2838901877403259, "learning_rate": 2.5734282818325256e-06, "loss": 0.3097, "step": 8292 }, { "epoch": 2.085764587525151, "grad_norm": 0.3035966753959656, "learning_rate": 2.5721490102062373e-06, "loss": 0.3277, "step": 8293 }, { "epoch": 2.0860160965794767, "grad_norm": 0.32544568181037903, "learning_rate": 2.570869946510196e-06, "loss": 0.347, "step": 8294 }, { "epoch": 2.086267605633803, "grad_norm": 0.30072399973869324, "learning_rate": 2.5695910908539494e-06, "loss": 0.3299, "step": 8295 }, { "epoch": 2.086519114688129, "grad_norm": 0.3020775318145752, "learning_rate": 2.568312443347019e-06, "loss": 0.3178, "step": 8296 }, { "epoch": 2.0867706237424546, "grad_norm": 0.3153866231441498, "learning_rate": 2.567034004098917e-06, "loss": 0.3386, "step": 8297 }, { "epoch": 2.087022132796781, "grad_norm": 0.29892367124557495, "learning_rate": 2.565755773219131e-06, "loss": 0.3268, "step": 8298 }, { "epoch": 2.0872736418511066, "grad_norm": 0.30764761567115784, "learning_rate": 2.564477750817135e-06, "loss": 0.3481, "step": 8299 }, { "epoch": 2.0875251509054324, "grad_norm": 0.2927669882774353, "learning_rate": 2.563199937002382e-06, "loss": 0.3064, "step": 8300 }, { "epoch": 2.0877766599597587, "grad_norm": 0.30873027443885803, "learning_rate": 2.561922331884311e-06, "loss": 0.3375, "step": 8301 }, { "epoch": 2.0880281690140845, "grad_norm": 0.30623045563697815, "learning_rate": 2.560644935572338e-06, "loss": 0.3257, "step": 8302 }, { "epoch": 2.0882796780684103, "grad_norm": 0.29799333214759827, "learning_rate": 2.559367748175867e-06, "loss": 0.3113, "step": 8303 }, { "epoch": 2.0885311871227366, "grad_norm": 0.31221136450767517, "learning_rate": 2.5580907698042802e-06, "loss": 0.3235, "step": 8304 }, { "epoch": 2.0887826961770624, "grad_norm": 0.30643758177757263, "learning_rate": 2.5568140005669414e-06, "loss": 0.3348, "step": 8305 }, { "epoch": 2.089034205231388, "grad_norm": 0.3108401894569397, "learning_rate": 2.5555374405732e-06, "loss": 0.3332, "step": 8306 }, { "epoch": 2.0892857142857144, "grad_norm": 0.31820324063301086, "learning_rate": 2.5542610899323826e-06, "loss": 0.3098, "step": 8307 }, { "epoch": 2.08953722334004, "grad_norm": 0.30521127581596375, "learning_rate": 2.552984948753805e-06, "loss": 0.3333, "step": 8308 }, { "epoch": 2.089788732394366, "grad_norm": 0.304718941450119, "learning_rate": 2.5517090171467557e-06, "loss": 0.3161, "step": 8309 }, { "epoch": 2.0900402414486923, "grad_norm": 0.33663034439086914, "learning_rate": 2.550433295220515e-06, "loss": 0.3282, "step": 8310 }, { "epoch": 2.090291750503018, "grad_norm": 0.3158341646194458, "learning_rate": 2.549157783084335e-06, "loss": 0.3157, "step": 8311 }, { "epoch": 2.090543259557344, "grad_norm": 0.3130324184894562, "learning_rate": 2.5478824808474613e-06, "loss": 0.3192, "step": 8312 }, { "epoch": 2.09079476861167, "grad_norm": 0.3025255799293518, "learning_rate": 2.546607388619111e-06, "loss": 0.3188, "step": 8313 }, { "epoch": 2.091046277665996, "grad_norm": 0.3400377333164215, "learning_rate": 2.5453325065084887e-06, "loss": 0.3063, "step": 8314 }, { "epoch": 2.0912977867203217, "grad_norm": 0.300741046667099, "learning_rate": 2.5440578346247834e-06, "loss": 0.3058, "step": 8315 }, { "epoch": 2.091549295774648, "grad_norm": 0.31331273913383484, "learning_rate": 2.5427833730771577e-06, "loss": 0.3098, "step": 8316 }, { "epoch": 2.091800804828974, "grad_norm": 0.2970178425312042, "learning_rate": 2.541509121974766e-06, "loss": 0.3035, "step": 8317 }, { "epoch": 2.0920523138832996, "grad_norm": 0.31109151244163513, "learning_rate": 2.5402350814267364e-06, "loss": 0.3272, "step": 8318 }, { "epoch": 2.092303822937626, "grad_norm": 0.29535481333732605, "learning_rate": 2.538961251542185e-06, "loss": 0.3169, "step": 8319 }, { "epoch": 2.0925553319919517, "grad_norm": 0.30848875641822815, "learning_rate": 2.5376876324302045e-06, "loss": 0.3327, "step": 8320 }, { "epoch": 2.0928068410462775, "grad_norm": 0.28316324949264526, "learning_rate": 2.5364142241998755e-06, "loss": 0.3399, "step": 8321 }, { "epoch": 2.0930583501006037, "grad_norm": 0.3024502992630005, "learning_rate": 2.535141026960255e-06, "loss": 0.3063, "step": 8322 }, { "epoch": 2.0933098591549295, "grad_norm": 0.33002567291259766, "learning_rate": 2.5338680408203875e-06, "loss": 0.3255, "step": 8323 }, { "epoch": 2.0935613682092553, "grad_norm": 0.2928857207298279, "learning_rate": 2.5325952658892916e-06, "loss": 0.3213, "step": 8324 }, { "epoch": 2.0938128772635816, "grad_norm": 0.30271077156066895, "learning_rate": 2.531322702275976e-06, "loss": 0.3324, "step": 8325 }, { "epoch": 2.0940643863179074, "grad_norm": 0.2813225984573364, "learning_rate": 2.530050350089428e-06, "loss": 0.3231, "step": 8326 }, { "epoch": 2.094315895372233, "grad_norm": 0.3095519542694092, "learning_rate": 2.528778209438614e-06, "loss": 0.3165, "step": 8327 }, { "epoch": 2.0945674044265594, "grad_norm": 0.30373334884643555, "learning_rate": 2.527506280432488e-06, "loss": 0.3349, "step": 8328 }, { "epoch": 2.0948189134808852, "grad_norm": 0.30294981598854065, "learning_rate": 2.5262345631799794e-06, "loss": 0.3178, "step": 8329 }, { "epoch": 2.0950704225352115, "grad_norm": 0.3043777644634247, "learning_rate": 2.524963057790007e-06, "loss": 0.3115, "step": 8330 }, { "epoch": 2.0953219315895373, "grad_norm": 0.288493275642395, "learning_rate": 2.5236917643714628e-06, "loss": 0.3383, "step": 8331 }, { "epoch": 2.095573440643863, "grad_norm": 0.29598721861839294, "learning_rate": 2.5224206830332286e-06, "loss": 0.3202, "step": 8332 }, { "epoch": 2.0958249496981893, "grad_norm": 0.299293577671051, "learning_rate": 2.521149813884162e-06, "loss": 0.3185, "step": 8333 }, { "epoch": 2.096076458752515, "grad_norm": 0.2833903729915619, "learning_rate": 2.5198791570331083e-06, "loss": 0.3333, "step": 8334 }, { "epoch": 2.096327967806841, "grad_norm": 0.33051058650016785, "learning_rate": 2.5186087125888863e-06, "loss": 0.3338, "step": 8335 }, { "epoch": 2.096579476861167, "grad_norm": 0.293345183134079, "learning_rate": 2.5173384806603052e-06, "loss": 0.3244, "step": 8336 }, { "epoch": 2.096830985915493, "grad_norm": 0.2857302725315094, "learning_rate": 2.516068461356154e-06, "loss": 0.2933, "step": 8337 }, { "epoch": 2.097082494969819, "grad_norm": 0.3116956949234009, "learning_rate": 2.514798654785197e-06, "loss": 0.315, "step": 8338 }, { "epoch": 2.097334004024145, "grad_norm": 0.278083473443985, "learning_rate": 2.51352906105619e-06, "loss": 0.3115, "step": 8339 }, { "epoch": 2.097585513078471, "grad_norm": 0.30503949522972107, "learning_rate": 2.512259680277862e-06, "loss": 0.3144, "step": 8340 }, { "epoch": 2.0978370221327967, "grad_norm": 0.3300677537918091, "learning_rate": 2.510990512558931e-06, "loss": 0.3183, "step": 8341 }, { "epoch": 2.098088531187123, "grad_norm": 0.3177797496318817, "learning_rate": 2.509721558008089e-06, "loss": 0.3577, "step": 8342 }, { "epoch": 2.0983400402414487, "grad_norm": 0.3060501515865326, "learning_rate": 2.508452816734019e-06, "loss": 0.3328, "step": 8343 }, { "epoch": 2.0985915492957745, "grad_norm": 0.3117055594921112, "learning_rate": 2.507184288845376e-06, "loss": 0.314, "step": 8344 }, { "epoch": 2.0988430583501008, "grad_norm": 0.31024450063705444, "learning_rate": 2.5059159744508055e-06, "loss": 0.2982, "step": 8345 }, { "epoch": 2.0990945674044266, "grad_norm": 0.33131489157676697, "learning_rate": 2.5046478736589264e-06, "loss": 0.3093, "step": 8346 }, { "epoch": 2.0993460764587524, "grad_norm": 0.3035459816455841, "learning_rate": 2.503379986578347e-06, "loss": 0.321, "step": 8347 }, { "epoch": 2.0995975855130786, "grad_norm": 0.3351083993911743, "learning_rate": 2.502112313317654e-06, "loss": 0.3098, "step": 8348 }, { "epoch": 2.0998490945674044, "grad_norm": 0.3084772825241089, "learning_rate": 2.5008448539854134e-06, "loss": 0.3327, "step": 8349 }, { "epoch": 2.1001006036217302, "grad_norm": 0.3130403459072113, "learning_rate": 2.499577608690178e-06, "loss": 0.3342, "step": 8350 }, { "epoch": 2.1003521126760565, "grad_norm": 0.32867324352264404, "learning_rate": 2.498310577540476e-06, "loss": 0.3216, "step": 8351 }, { "epoch": 2.1006036217303823, "grad_norm": 0.3281696140766144, "learning_rate": 2.4970437606448245e-06, "loss": 0.3087, "step": 8352 }, { "epoch": 2.100855130784708, "grad_norm": 0.3127487897872925, "learning_rate": 2.495777158111714e-06, "loss": 0.3194, "step": 8353 }, { "epoch": 2.1011066398390343, "grad_norm": 0.29324328899383545, "learning_rate": 2.4945107700496263e-06, "loss": 0.3323, "step": 8354 }, { "epoch": 2.10135814889336, "grad_norm": 0.27764442563056946, "learning_rate": 2.4932445965670145e-06, "loss": 0.2898, "step": 8355 }, { "epoch": 2.101609657947686, "grad_norm": 0.3088850677013397, "learning_rate": 2.4919786377723225e-06, "loss": 0.3096, "step": 8356 }, { "epoch": 2.101861167002012, "grad_norm": 0.29290542006492615, "learning_rate": 2.490712893773968e-06, "loss": 0.3038, "step": 8357 }, { "epoch": 2.102112676056338, "grad_norm": 0.30638036131858826, "learning_rate": 2.489447364680357e-06, "loss": 0.3554, "step": 8358 }, { "epoch": 2.102364185110664, "grad_norm": 0.3109561800956726, "learning_rate": 2.4881820505998743e-06, "loss": 0.319, "step": 8359 }, { "epoch": 2.10261569416499, "grad_norm": 0.2938198745250702, "learning_rate": 2.486916951640884e-06, "loss": 0.319, "step": 8360 }, { "epoch": 2.102867203219316, "grad_norm": 0.32792484760284424, "learning_rate": 2.4856520679117357e-06, "loss": 0.3347, "step": 8361 }, { "epoch": 2.1031187122736417, "grad_norm": 0.3298247158527374, "learning_rate": 2.4843873995207567e-06, "loss": 0.3105, "step": 8362 }, { "epoch": 2.103370221327968, "grad_norm": 0.32708612084388733, "learning_rate": 2.483122946576262e-06, "loss": 0.3043, "step": 8363 }, { "epoch": 2.1036217303822937, "grad_norm": 0.31453192234039307, "learning_rate": 2.4818587091865386e-06, "loss": 0.3036, "step": 8364 }, { "epoch": 2.1038732394366195, "grad_norm": 0.2795354425907135, "learning_rate": 2.480594687459865e-06, "loss": 0.3167, "step": 8365 }, { "epoch": 2.1041247484909458, "grad_norm": 0.29401612281799316, "learning_rate": 2.4793308815044943e-06, "loss": 0.3266, "step": 8366 }, { "epoch": 2.1043762575452716, "grad_norm": 0.3023243844509125, "learning_rate": 2.4780672914286652e-06, "loss": 0.322, "step": 8367 }, { "epoch": 2.1046277665995974, "grad_norm": 0.3229201138019562, "learning_rate": 2.476803917340594e-06, "loss": 0.3315, "step": 8368 }, { "epoch": 2.1048792756539236, "grad_norm": 0.3104352355003357, "learning_rate": 2.475540759348482e-06, "loss": 0.3132, "step": 8369 }, { "epoch": 2.1051307847082494, "grad_norm": 0.30430683493614197, "learning_rate": 2.474277817560513e-06, "loss": 0.3303, "step": 8370 }, { "epoch": 2.1053822937625752, "grad_norm": 0.296283483505249, "learning_rate": 2.473015092084846e-06, "loss": 0.3168, "step": 8371 }, { "epoch": 2.1056338028169015, "grad_norm": 0.31060439348220825, "learning_rate": 2.4717525830296295e-06, "loss": 0.3165, "step": 8372 }, { "epoch": 2.1058853118712273, "grad_norm": 0.3210805654525757, "learning_rate": 2.470490290502986e-06, "loss": 0.3222, "step": 8373 }, { "epoch": 2.1061368209255535, "grad_norm": 0.3033636212348938, "learning_rate": 2.4692282146130266e-06, "loss": 0.3395, "step": 8374 }, { "epoch": 2.1063883299798793, "grad_norm": 0.3115089535713196, "learning_rate": 2.4679663554678357e-06, "loss": 0.3245, "step": 8375 }, { "epoch": 2.106639839034205, "grad_norm": 0.319614976644516, "learning_rate": 2.4667047131754884e-06, "loss": 0.3062, "step": 8376 }, { "epoch": 2.1068913480885314, "grad_norm": 0.30483153462409973, "learning_rate": 2.465443287844032e-06, "loss": 0.3157, "step": 8377 }, { "epoch": 2.107142857142857, "grad_norm": 0.30842599272727966, "learning_rate": 2.464182079581504e-06, "loss": 0.3497, "step": 8378 }, { "epoch": 2.107394366197183, "grad_norm": 0.30280083417892456, "learning_rate": 2.462921088495915e-06, "loss": 0.3356, "step": 8379 }, { "epoch": 2.1076458752515093, "grad_norm": 0.2851159870624542, "learning_rate": 2.4616603146952628e-06, "loss": 0.3035, "step": 8380 }, { "epoch": 2.107897384305835, "grad_norm": 0.3186687231063843, "learning_rate": 2.4603997582875266e-06, "loss": 0.322, "step": 8381 }, { "epoch": 2.108148893360161, "grad_norm": 0.2962452471256256, "learning_rate": 2.4591394193806615e-06, "loss": 0.3278, "step": 8382 }, { "epoch": 2.108400402414487, "grad_norm": 0.29877567291259766, "learning_rate": 2.4578792980826114e-06, "loss": 0.3511, "step": 8383 }, { "epoch": 2.108651911468813, "grad_norm": 0.2987919747829437, "learning_rate": 2.4566193945012946e-06, "loss": 0.3165, "step": 8384 }, { "epoch": 2.1089034205231387, "grad_norm": 0.28243282437324524, "learning_rate": 2.4553597087446163e-06, "loss": 0.3505, "step": 8385 }, { "epoch": 2.109154929577465, "grad_norm": 0.31294065713882446, "learning_rate": 2.4541002409204584e-06, "loss": 0.3179, "step": 8386 }, { "epoch": 2.109406438631791, "grad_norm": 0.2762846052646637, "learning_rate": 2.4528409911366897e-06, "loss": 0.3258, "step": 8387 }, { "epoch": 2.1096579476861166, "grad_norm": 0.28698083758354187, "learning_rate": 2.4515819595011532e-06, "loss": 0.3219, "step": 8388 }, { "epoch": 2.109909456740443, "grad_norm": 0.32928720116615295, "learning_rate": 2.450323146121681e-06, "loss": 0.3351, "step": 8389 }, { "epoch": 2.1101609657947686, "grad_norm": 0.30943191051483154, "learning_rate": 2.4490645511060784e-06, "loss": 0.3292, "step": 8390 }, { "epoch": 2.1104124748490944, "grad_norm": 0.2983676493167877, "learning_rate": 2.4478061745621383e-06, "loss": 0.3354, "step": 8391 }, { "epoch": 2.1106639839034207, "grad_norm": 0.29229018092155457, "learning_rate": 2.446548016597635e-06, "loss": 0.3172, "step": 8392 }, { "epoch": 2.1109154929577465, "grad_norm": 0.30713313817977905, "learning_rate": 2.4452900773203182e-06, "loss": 0.3452, "step": 8393 }, { "epoch": 2.1111670020120723, "grad_norm": 0.3006666600704193, "learning_rate": 2.4440323568379255e-06, "loss": 0.2936, "step": 8394 }, { "epoch": 2.1114185110663986, "grad_norm": 0.30126234889030457, "learning_rate": 2.4427748552581694e-06, "loss": 0.3048, "step": 8395 }, { "epoch": 2.1116700201207244, "grad_norm": 0.2818053960800171, "learning_rate": 2.4415175726887513e-06, "loss": 0.3103, "step": 8396 }, { "epoch": 2.11192152917505, "grad_norm": 0.2906033992767334, "learning_rate": 2.440260509237345e-06, "loss": 0.3261, "step": 8397 }, { "epoch": 2.1121730382293764, "grad_norm": 0.28882506489753723, "learning_rate": 2.4390036650116144e-06, "loss": 0.3137, "step": 8398 }, { "epoch": 2.112424547283702, "grad_norm": 0.31607574224472046, "learning_rate": 2.4377470401191965e-06, "loss": 0.3077, "step": 8399 }, { "epoch": 2.112676056338028, "grad_norm": 0.3000749945640564, "learning_rate": 2.436490634667717e-06, "loss": 0.3225, "step": 8400 }, { "epoch": 2.1129275653923543, "grad_norm": 0.29554128646850586, "learning_rate": 2.4352344487647755e-06, "loss": 0.3399, "step": 8401 }, { "epoch": 2.11317907444668, "grad_norm": 0.308362752199173, "learning_rate": 2.4339784825179606e-06, "loss": 0.3279, "step": 8402 }, { "epoch": 2.113430583501006, "grad_norm": 0.28427183628082275, "learning_rate": 2.4327227360348333e-06, "loss": 0.3256, "step": 8403 }, { "epoch": 2.113682092555332, "grad_norm": 0.3028574287891388, "learning_rate": 2.431467209422945e-06, "loss": 0.3003, "step": 8404 }, { "epoch": 2.113933601609658, "grad_norm": 0.32093802094459534, "learning_rate": 2.4302119027898195e-06, "loss": 0.3018, "step": 8405 }, { "epoch": 2.1141851106639837, "grad_norm": 0.3018190562725067, "learning_rate": 2.42895681624297e-06, "loss": 0.3397, "step": 8406 }, { "epoch": 2.11443661971831, "grad_norm": 0.30792275071144104, "learning_rate": 2.427701949889883e-06, "loss": 0.3254, "step": 8407 }, { "epoch": 2.114688128772636, "grad_norm": 0.2988547384738922, "learning_rate": 2.426447303838033e-06, "loss": 0.3281, "step": 8408 }, { "epoch": 2.1149396378269616, "grad_norm": 0.2943371832370758, "learning_rate": 2.4251928781948704e-06, "loss": 0.3151, "step": 8409 }, { "epoch": 2.115191146881288, "grad_norm": 0.31441688537597656, "learning_rate": 2.423938673067831e-06, "loss": 0.3286, "step": 8410 }, { "epoch": 2.1154426559356136, "grad_norm": 0.31938257813453674, "learning_rate": 2.422684688564327e-06, "loss": 0.3401, "step": 8411 }, { "epoch": 2.1156941649899395, "grad_norm": 0.29643604159355164, "learning_rate": 2.4214309247917558e-06, "loss": 0.3549, "step": 8412 }, { "epoch": 2.1159456740442657, "grad_norm": 0.30069106817245483, "learning_rate": 2.4201773818574956e-06, "loss": 0.3058, "step": 8413 }, { "epoch": 2.1161971830985915, "grad_norm": 0.2893010079860687, "learning_rate": 2.4189240598689025e-06, "loss": 0.3402, "step": 8414 }, { "epoch": 2.1164486921529173, "grad_norm": 0.30323195457458496, "learning_rate": 2.4176709589333173e-06, "loss": 0.3333, "step": 8415 }, { "epoch": 2.1167002012072436, "grad_norm": 0.29671111702919006, "learning_rate": 2.4164180791580584e-06, "loss": 0.3183, "step": 8416 }, { "epoch": 2.1169517102615694, "grad_norm": 0.2824574112892151, "learning_rate": 2.41516542065043e-06, "loss": 0.3322, "step": 8417 }, { "epoch": 2.117203219315895, "grad_norm": 0.30182933807373047, "learning_rate": 2.4139129835177104e-06, "loss": 0.313, "step": 8418 }, { "epoch": 2.1174547283702214, "grad_norm": 0.325040727853775, "learning_rate": 2.4126607678671672e-06, "loss": 0.3391, "step": 8419 }, { "epoch": 2.1177062374245472, "grad_norm": 0.3018413484096527, "learning_rate": 2.411408773806041e-06, "loss": 0.316, "step": 8420 }, { "epoch": 2.117957746478873, "grad_norm": 0.29537421464920044, "learning_rate": 2.410157001441561e-06, "loss": 0.3042, "step": 8421 }, { "epoch": 2.1182092555331993, "grad_norm": 0.2987903952598572, "learning_rate": 2.40890545088093e-06, "loss": 0.3171, "step": 8422 }, { "epoch": 2.118460764587525, "grad_norm": 0.30431345105171204, "learning_rate": 2.407654122231339e-06, "loss": 0.3215, "step": 8423 }, { "epoch": 2.118712273641851, "grad_norm": 0.277266263961792, "learning_rate": 2.4064030155999534e-06, "loss": 0.3201, "step": 8424 }, { "epoch": 2.118963782696177, "grad_norm": 0.3193032443523407, "learning_rate": 2.4051521310939258e-06, "loss": 0.3517, "step": 8425 }, { "epoch": 2.119215291750503, "grad_norm": 0.31553915143013, "learning_rate": 2.4039014688203825e-06, "loss": 0.3249, "step": 8426 }, { "epoch": 2.119466800804829, "grad_norm": 0.2827959656715393, "learning_rate": 2.4026510288864396e-06, "loss": 0.3281, "step": 8427 }, { "epoch": 2.119718309859155, "grad_norm": 0.28876793384552, "learning_rate": 2.4014008113991855e-06, "loss": 0.3331, "step": 8428 }, { "epoch": 2.119969818913481, "grad_norm": 0.2849387526512146, "learning_rate": 2.400150816465696e-06, "loss": 0.3234, "step": 8429 }, { "epoch": 2.120221327967807, "grad_norm": 0.3101980984210968, "learning_rate": 2.398901044193023e-06, "loss": 0.34, "step": 8430 }, { "epoch": 2.120472837022133, "grad_norm": 0.2996865212917328, "learning_rate": 2.3976514946882057e-06, "loss": 0.3353, "step": 8431 }, { "epoch": 2.1207243460764587, "grad_norm": 0.2950674593448639, "learning_rate": 2.396402168058255e-06, "loss": 0.3319, "step": 8432 }, { "epoch": 2.120975855130785, "grad_norm": 0.28375566005706787, "learning_rate": 2.395153064410171e-06, "loss": 0.34, "step": 8433 }, { "epoch": 2.1212273641851107, "grad_norm": 0.33272629976272583, "learning_rate": 2.3939041838509324e-06, "loss": 0.3399, "step": 8434 }, { "epoch": 2.1214788732394365, "grad_norm": 0.28813275694847107, "learning_rate": 2.3926555264874956e-06, "loss": 0.3091, "step": 8435 }, { "epoch": 2.1217303822937628, "grad_norm": 0.3202812075614929, "learning_rate": 2.391407092426803e-06, "loss": 0.3143, "step": 8436 }, { "epoch": 2.1219818913480886, "grad_norm": 0.2992876470088959, "learning_rate": 2.390158881775772e-06, "loss": 0.3201, "step": 8437 }, { "epoch": 2.1222334004024144, "grad_norm": 0.3007931113243103, "learning_rate": 2.388910894641307e-06, "loss": 0.322, "step": 8438 }, { "epoch": 2.1224849094567406, "grad_norm": 0.2985740303993225, "learning_rate": 2.387663131130288e-06, "loss": 0.324, "step": 8439 }, { "epoch": 2.1227364185110664, "grad_norm": 0.29932522773742676, "learning_rate": 2.3864155913495803e-06, "loss": 0.3135, "step": 8440 }, { "epoch": 2.1229879275653922, "grad_norm": 0.3008473813533783, "learning_rate": 2.3851682754060247e-06, "loss": 0.3353, "step": 8441 }, { "epoch": 2.1232394366197185, "grad_norm": 0.3180418610572815, "learning_rate": 2.3839211834064496e-06, "loss": 0.2981, "step": 8442 }, { "epoch": 2.1234909456740443, "grad_norm": 0.29798197746276855, "learning_rate": 2.3826743154576576e-06, "loss": 0.333, "step": 8443 }, { "epoch": 2.12374245472837, "grad_norm": 0.29840409755706787, "learning_rate": 2.3814276716664365e-06, "loss": 0.3147, "step": 8444 }, { "epoch": 2.1239939637826963, "grad_norm": 0.30087023973464966, "learning_rate": 2.3801812521395557e-06, "loss": 0.3251, "step": 8445 }, { "epoch": 2.124245472837022, "grad_norm": 0.27059686183929443, "learning_rate": 2.3789350569837588e-06, "loss": 0.3134, "step": 8446 }, { "epoch": 2.124496981891348, "grad_norm": 0.2948879599571228, "learning_rate": 2.377689086305779e-06, "loss": 0.3309, "step": 8447 }, { "epoch": 2.124748490945674, "grad_norm": 0.30460086464881897, "learning_rate": 2.3764433402123223e-06, "loss": 0.3201, "step": 8448 }, { "epoch": 2.125, "grad_norm": 0.2858458161354065, "learning_rate": 2.3751978188100816e-06, "loss": 0.3085, "step": 8449 }, { "epoch": 2.125251509054326, "grad_norm": 0.2818026542663574, "learning_rate": 2.3739525222057257e-06, "loss": 0.3384, "step": 8450 }, { "epoch": 2.125503018108652, "grad_norm": 0.2949872612953186, "learning_rate": 2.37270745050591e-06, "loss": 0.3338, "step": 8451 }, { "epoch": 2.125754527162978, "grad_norm": 0.33432409167289734, "learning_rate": 2.3714626038172623e-06, "loss": 0.3343, "step": 8452 }, { "epoch": 2.1260060362173037, "grad_norm": 0.32601264119148254, "learning_rate": 2.3702179822464006e-06, "loss": 0.3158, "step": 8453 }, { "epoch": 2.12625754527163, "grad_norm": 0.31106406450271606, "learning_rate": 2.3689735858999152e-06, "loss": 0.3377, "step": 8454 }, { "epoch": 2.1265090543259557, "grad_norm": 0.3100818991661072, "learning_rate": 2.367729414884383e-06, "loss": 0.31, "step": 8455 }, { "epoch": 2.1267605633802815, "grad_norm": 0.29893726110458374, "learning_rate": 2.3664854693063598e-06, "loss": 0.3122, "step": 8456 }, { "epoch": 2.1270120724346078, "grad_norm": 0.2957155108451843, "learning_rate": 2.3652417492723795e-06, "loss": 0.3108, "step": 8457 }, { "epoch": 2.1272635814889336, "grad_norm": 0.3101329803466797, "learning_rate": 2.3639982548889623e-06, "loss": 0.3182, "step": 8458 }, { "epoch": 2.1275150905432594, "grad_norm": 0.29328593611717224, "learning_rate": 2.3627549862626014e-06, "loss": 0.324, "step": 8459 }, { "epoch": 2.1277665995975856, "grad_norm": 0.30886930227279663, "learning_rate": 2.3615119434997803e-06, "loss": 0.312, "step": 8460 }, { "epoch": 2.1280181086519114, "grad_norm": 0.339282751083374, "learning_rate": 2.360269126706952e-06, "loss": 0.3421, "step": 8461 }, { "epoch": 2.1282696177062372, "grad_norm": 0.3135293126106262, "learning_rate": 2.359026535990561e-06, "loss": 0.2935, "step": 8462 }, { "epoch": 2.1285211267605635, "grad_norm": 0.3130805492401123, "learning_rate": 2.357784171457024e-06, "loss": 0.3256, "step": 8463 }, { "epoch": 2.1287726358148893, "grad_norm": 0.3069337010383606, "learning_rate": 2.3565420332127447e-06, "loss": 0.3241, "step": 8464 }, { "epoch": 2.129024144869215, "grad_norm": 0.3093840181827545, "learning_rate": 2.355300121364101e-06, "loss": 0.31, "step": 8465 }, { "epoch": 2.1292756539235413, "grad_norm": 0.2900824546813965, "learning_rate": 2.354058436017456e-06, "loss": 0.355, "step": 8466 }, { "epoch": 2.129527162977867, "grad_norm": 0.3184683322906494, "learning_rate": 2.352816977279156e-06, "loss": 0.3043, "step": 8467 }, { "epoch": 2.129778672032193, "grad_norm": 0.29197773337364197, "learning_rate": 2.351575745255519e-06, "loss": 0.3052, "step": 8468 }, { "epoch": 2.130030181086519, "grad_norm": 0.2952864468097687, "learning_rate": 2.350334740052852e-06, "loss": 0.319, "step": 8469 }, { "epoch": 2.130281690140845, "grad_norm": 0.29490870237350464, "learning_rate": 2.349093961777437e-06, "loss": 0.3397, "step": 8470 }, { "epoch": 2.1305331991951713, "grad_norm": 0.33022499084472656, "learning_rate": 2.3478534105355423e-06, "loss": 0.3102, "step": 8471 }, { "epoch": 2.130784708249497, "grad_norm": 0.3145847022533417, "learning_rate": 2.3466130864334085e-06, "loss": 0.3431, "step": 8472 }, { "epoch": 2.131036217303823, "grad_norm": 0.2934006154537201, "learning_rate": 2.345372989577267e-06, "loss": 0.3187, "step": 8473 }, { "epoch": 2.131287726358149, "grad_norm": 0.29067620635032654, "learning_rate": 2.344133120073319e-06, "loss": 0.2961, "step": 8474 }, { "epoch": 2.131539235412475, "grad_norm": 0.28378283977508545, "learning_rate": 2.3428934780277567e-06, "loss": 0.3215, "step": 8475 }, { "epoch": 2.1317907444668007, "grad_norm": 0.2955572009086609, "learning_rate": 2.341654063546743e-06, "loss": 0.3309, "step": 8476 }, { "epoch": 2.132042253521127, "grad_norm": 0.31690630316734314, "learning_rate": 2.340414876736429e-06, "loss": 0.3254, "step": 8477 }, { "epoch": 2.1322937625754528, "grad_norm": 0.297673761844635, "learning_rate": 2.339175917702943e-06, "loss": 0.3179, "step": 8478 }, { "epoch": 2.1325452716297786, "grad_norm": 0.31934690475463867, "learning_rate": 2.3379371865523926e-06, "loss": 0.3356, "step": 8479 }, { "epoch": 2.132796780684105, "grad_norm": 0.2896483540534973, "learning_rate": 2.33669868339087e-06, "loss": 0.3258, "step": 8480 }, { "epoch": 2.1330482897384306, "grad_norm": 0.2923082113265991, "learning_rate": 2.335460408324442e-06, "loss": 0.326, "step": 8481 }, { "epoch": 2.1332997987927564, "grad_norm": 0.30978333950042725, "learning_rate": 2.3342223614591623e-06, "loss": 0.3199, "step": 8482 }, { "epoch": 2.1335513078470827, "grad_norm": 0.2992372512817383, "learning_rate": 2.332984542901059e-06, "loss": 0.3611, "step": 8483 }, { "epoch": 2.1338028169014085, "grad_norm": 0.3130771517753601, "learning_rate": 2.331746952756146e-06, "loss": 0.3089, "step": 8484 }, { "epoch": 2.1340543259557343, "grad_norm": 0.31406500935554504, "learning_rate": 2.3305095911304123e-06, "loss": 0.3132, "step": 8485 }, { "epoch": 2.1343058350100605, "grad_norm": 0.30472251772880554, "learning_rate": 2.3292724581298338e-06, "loss": 0.3216, "step": 8486 }, { "epoch": 2.1345573440643864, "grad_norm": 0.31913644075393677, "learning_rate": 2.328035553860359e-06, "loss": 0.3051, "step": 8487 }, { "epoch": 2.134808853118712, "grad_norm": 0.3231859803199768, "learning_rate": 2.326798878427924e-06, "loss": 0.3293, "step": 8488 }, { "epoch": 2.1350603621730384, "grad_norm": 0.2944040298461914, "learning_rate": 2.325562431938442e-06, "loss": 0.3373, "step": 8489 }, { "epoch": 2.135311871227364, "grad_norm": 0.2909225523471832, "learning_rate": 2.3243262144978063e-06, "loss": 0.3285, "step": 8490 }, { "epoch": 2.13556338028169, "grad_norm": 0.283944696187973, "learning_rate": 2.3230902262118922e-06, "loss": 0.3177, "step": 8491 }, { "epoch": 2.1358148893360163, "grad_norm": 0.31086745858192444, "learning_rate": 2.321854467186552e-06, "loss": 0.3492, "step": 8492 }, { "epoch": 2.136066398390342, "grad_norm": 0.3098077178001404, "learning_rate": 2.320618937527624e-06, "loss": 0.3144, "step": 8493 }, { "epoch": 2.136317907444668, "grad_norm": 0.31440508365631104, "learning_rate": 2.31938363734092e-06, "loss": 0.3136, "step": 8494 }, { "epoch": 2.136569416498994, "grad_norm": 0.3078277111053467, "learning_rate": 2.3181485667322397e-06, "loss": 0.3126, "step": 8495 }, { "epoch": 2.13682092555332, "grad_norm": 0.3191978633403778, "learning_rate": 2.316913725807355e-06, "loss": 0.2969, "step": 8496 }, { "epoch": 2.1370724346076457, "grad_norm": 0.2999849319458008, "learning_rate": 2.3156791146720266e-06, "loss": 0.3327, "step": 8497 }, { "epoch": 2.137323943661972, "grad_norm": 0.2959345281124115, "learning_rate": 2.3144447334319866e-06, "loss": 0.3355, "step": 8498 }, { "epoch": 2.137575452716298, "grad_norm": 0.29375961422920227, "learning_rate": 2.313210582192954e-06, "loss": 0.3151, "step": 8499 }, { "epoch": 2.1378269617706236, "grad_norm": 0.2988927960395813, "learning_rate": 2.3119766610606293e-06, "loss": 0.3479, "step": 8500 }, { "epoch": 2.13807847082495, "grad_norm": 0.32058557868003845, "learning_rate": 2.3107429701406845e-06, "loss": 0.3118, "step": 8501 }, { "epoch": 2.1383299798792756, "grad_norm": 0.31546249985694885, "learning_rate": 2.309509509538783e-06, "loss": 0.3289, "step": 8502 }, { "epoch": 2.1385814889336014, "grad_norm": 0.3331628739833832, "learning_rate": 2.3082762793605582e-06, "loss": 0.3276, "step": 8503 }, { "epoch": 2.1388329979879277, "grad_norm": 0.3143128752708435, "learning_rate": 2.307043279711633e-06, "loss": 0.3154, "step": 8504 }, { "epoch": 2.1390845070422535, "grad_norm": 0.33894532918930054, "learning_rate": 2.3058105106976013e-06, "loss": 0.3275, "step": 8505 }, { "epoch": 2.1393360160965793, "grad_norm": 0.30815622210502625, "learning_rate": 2.3045779724240468e-06, "loss": 0.3163, "step": 8506 }, { "epoch": 2.1395875251509056, "grad_norm": 0.29994967579841614, "learning_rate": 2.3033456649965246e-06, "loss": 0.3283, "step": 8507 }, { "epoch": 2.1398390342052314, "grad_norm": 0.3090510666370392, "learning_rate": 2.302113588520578e-06, "loss": 0.3087, "step": 8508 }, { "epoch": 2.140090543259557, "grad_norm": 0.3417157530784607, "learning_rate": 2.3008817431017225e-06, "loss": 0.3144, "step": 8509 }, { "epoch": 2.1403420523138834, "grad_norm": 0.3093573749065399, "learning_rate": 2.2996501288454606e-06, "loss": 0.3099, "step": 8510 }, { "epoch": 2.140593561368209, "grad_norm": 0.2987154722213745, "learning_rate": 2.2984187458572727e-06, "loss": 0.3293, "step": 8511 }, { "epoch": 2.140845070422535, "grad_norm": 0.31684228777885437, "learning_rate": 2.297187594242617e-06, "loss": 0.3171, "step": 8512 }, { "epoch": 2.1410965794768613, "grad_norm": 0.2938196063041687, "learning_rate": 2.2959566741069365e-06, "loss": 0.3196, "step": 8513 }, { "epoch": 2.141348088531187, "grad_norm": 0.30638089776039124, "learning_rate": 2.2947259855556493e-06, "loss": 0.3256, "step": 8514 }, { "epoch": 2.141599597585513, "grad_norm": 0.2842661738395691, "learning_rate": 2.2934955286941583e-06, "loss": 0.3199, "step": 8515 }, { "epoch": 2.141851106639839, "grad_norm": 0.30369311571121216, "learning_rate": 2.292265303627842e-06, "loss": 0.3241, "step": 8516 }, { "epoch": 2.142102615694165, "grad_norm": 0.3095688223838806, "learning_rate": 2.2910353104620647e-06, "loss": 0.3437, "step": 8517 }, { "epoch": 2.1423541247484907, "grad_norm": 0.29922717809677124, "learning_rate": 2.2898055493021644e-06, "loss": 0.3326, "step": 8518 }, { "epoch": 2.142605633802817, "grad_norm": 0.30769720673561096, "learning_rate": 2.288576020253465e-06, "loss": 0.3249, "step": 8519 }, { "epoch": 2.142857142857143, "grad_norm": 0.2776482403278351, "learning_rate": 2.2873467234212654e-06, "loss": 0.327, "step": 8520 }, { "epoch": 2.1431086519114686, "grad_norm": 0.30429205298423767, "learning_rate": 2.2861176589108487e-06, "loss": 0.3109, "step": 8521 }, { "epoch": 2.143360160965795, "grad_norm": 0.2737998366355896, "learning_rate": 2.284888826827478e-06, "loss": 0.3143, "step": 8522 }, { "epoch": 2.1436116700201207, "grad_norm": 0.29733067750930786, "learning_rate": 2.2836602272763924e-06, "loss": 0.3157, "step": 8523 }, { "epoch": 2.1438631790744465, "grad_norm": 0.32914045453071594, "learning_rate": 2.2824318603628163e-06, "loss": 0.3138, "step": 8524 }, { "epoch": 2.1441146881287727, "grad_norm": 0.3098900020122528, "learning_rate": 2.2812037261919483e-06, "loss": 0.3287, "step": 8525 }, { "epoch": 2.1443661971830985, "grad_norm": 0.3037329316139221, "learning_rate": 2.2799758248689747e-06, "loss": 0.3236, "step": 8526 }, { "epoch": 2.1446177062374243, "grad_norm": 0.334514856338501, "learning_rate": 2.2787481564990533e-06, "loss": 0.3309, "step": 8527 }, { "epoch": 2.1448692152917506, "grad_norm": 0.28961655497550964, "learning_rate": 2.27752072118733e-06, "loss": 0.3469, "step": 8528 }, { "epoch": 2.1451207243460764, "grad_norm": 0.31919920444488525, "learning_rate": 2.2762935190389233e-06, "loss": 0.3104, "step": 8529 }, { "epoch": 2.1453722334004026, "grad_norm": 0.29657045006752014, "learning_rate": 2.275066550158939e-06, "loss": 0.3139, "step": 8530 }, { "epoch": 2.1456237424547284, "grad_norm": 0.29360705614089966, "learning_rate": 2.273839814652456e-06, "loss": 0.3169, "step": 8531 }, { "epoch": 2.1458752515090542, "grad_norm": 0.331571102142334, "learning_rate": 2.27261331262454e-06, "loss": 0.3003, "step": 8532 }, { "epoch": 2.1461267605633805, "grad_norm": 0.28920167684555054, "learning_rate": 2.2713870441802287e-06, "loss": 0.3175, "step": 8533 }, { "epoch": 2.1463782696177063, "grad_norm": 0.27948108315467834, "learning_rate": 2.2701610094245496e-06, "loss": 0.2903, "step": 8534 }, { "epoch": 2.146629778672032, "grad_norm": 0.2875995934009552, "learning_rate": 2.2689352084625e-06, "loss": 0.3286, "step": 8535 }, { "epoch": 2.1468812877263583, "grad_norm": 0.2751031219959259, "learning_rate": 2.267709641399066e-06, "loss": 0.2945, "step": 8536 }, { "epoch": 2.147132796780684, "grad_norm": 0.3058602213859558, "learning_rate": 2.2664843083392063e-06, "loss": 0.3593, "step": 8537 }, { "epoch": 2.14738430583501, "grad_norm": 0.31076836585998535, "learning_rate": 2.265259209387867e-06, "loss": 0.332, "step": 8538 }, { "epoch": 2.147635814889336, "grad_norm": 0.2831321954727173, "learning_rate": 2.2640343446499656e-06, "loss": 0.3137, "step": 8539 }, { "epoch": 2.147887323943662, "grad_norm": 0.2820286452770233, "learning_rate": 2.2628097142304083e-06, "loss": 0.3312, "step": 8540 }, { "epoch": 2.148138832997988, "grad_norm": 0.29765576124191284, "learning_rate": 2.2615853182340737e-06, "loss": 0.3077, "step": 8541 }, { "epoch": 2.148390342052314, "grad_norm": 0.3157854676246643, "learning_rate": 2.2603611567658267e-06, "loss": 0.3455, "step": 8542 }, { "epoch": 2.14864185110664, "grad_norm": 0.30632486939430237, "learning_rate": 2.259137229930506e-06, "loss": 0.3374, "step": 8543 }, { "epoch": 2.1488933601609657, "grad_norm": 0.312173068523407, "learning_rate": 2.2579135378329357e-06, "loss": 0.3233, "step": 8544 }, { "epoch": 2.149144869215292, "grad_norm": 0.2874966263771057, "learning_rate": 2.2566900805779157e-06, "loss": 0.3079, "step": 8545 }, { "epoch": 2.1493963782696177, "grad_norm": 0.30250847339630127, "learning_rate": 2.2554668582702294e-06, "loss": 0.3519, "step": 8546 }, { "epoch": 2.1496478873239435, "grad_norm": 0.2963038384914398, "learning_rate": 2.2542438710146354e-06, "loss": 0.3195, "step": 8547 }, { "epoch": 2.1498993963782698, "grad_norm": 0.299123615026474, "learning_rate": 2.253021118915878e-06, "loss": 0.3204, "step": 8548 }, { "epoch": 2.1501509054325956, "grad_norm": 0.28678908944129944, "learning_rate": 2.2517986020786745e-06, "loss": 0.308, "step": 8549 }, { "epoch": 2.1504024144869214, "grad_norm": 0.2993088662624359, "learning_rate": 2.2505763206077306e-06, "loss": 0.306, "step": 8550 }, { "epoch": 2.1506539235412476, "grad_norm": 0.30504879355430603, "learning_rate": 2.249354274607723e-06, "loss": 0.3226, "step": 8551 }, { "epoch": 2.1509054325955734, "grad_norm": 0.33471235632896423, "learning_rate": 2.2481324641833146e-06, "loss": 0.3101, "step": 8552 }, { "epoch": 2.1511569416498992, "grad_norm": 0.30294346809387207, "learning_rate": 2.246910889439144e-06, "loss": 0.3211, "step": 8553 }, { "epoch": 2.1514084507042255, "grad_norm": 0.2953152656555176, "learning_rate": 2.2456895504798322e-06, "loss": 0.3226, "step": 8554 }, { "epoch": 2.1516599597585513, "grad_norm": 0.2956666052341461, "learning_rate": 2.2444684474099807e-06, "loss": 0.3312, "step": 8555 }, { "epoch": 2.151911468812877, "grad_norm": 0.3139225244522095, "learning_rate": 2.2432475803341674e-06, "loss": 0.3122, "step": 8556 }, { "epoch": 2.1521629778672033, "grad_norm": 0.3028757572174072, "learning_rate": 2.242026949356954e-06, "loss": 0.3361, "step": 8557 }, { "epoch": 2.152414486921529, "grad_norm": 0.31237322092056274, "learning_rate": 2.2408065545828765e-06, "loss": 0.338, "step": 8558 }, { "epoch": 2.152665995975855, "grad_norm": 0.3021441400051117, "learning_rate": 2.239586396116458e-06, "loss": 0.2959, "step": 8559 }, { "epoch": 2.152917505030181, "grad_norm": 0.29930517077445984, "learning_rate": 2.238366474062194e-06, "loss": 0.3408, "step": 8560 }, { "epoch": 2.153169014084507, "grad_norm": 0.2866148352622986, "learning_rate": 2.2371467885245667e-06, "loss": 0.3145, "step": 8561 }, { "epoch": 2.153420523138833, "grad_norm": 0.3019120693206787, "learning_rate": 2.2359273396080306e-06, "loss": 0.3423, "step": 8562 }, { "epoch": 2.153672032193159, "grad_norm": 0.31220588088035583, "learning_rate": 2.2347081274170273e-06, "loss": 0.3306, "step": 8563 }, { "epoch": 2.153923541247485, "grad_norm": 0.31017324328422546, "learning_rate": 2.233489152055972e-06, "loss": 0.3089, "step": 8564 }, { "epoch": 2.1541750503018107, "grad_norm": 0.2911800146102905, "learning_rate": 2.2322704136292632e-06, "loss": 0.3092, "step": 8565 }, { "epoch": 2.154426559356137, "grad_norm": 0.2987365424633026, "learning_rate": 2.231051912241281e-06, "loss": 0.3316, "step": 8566 }, { "epoch": 2.1546780684104627, "grad_norm": 0.30714505910873413, "learning_rate": 2.229833647996378e-06, "loss": 0.2903, "step": 8567 }, { "epoch": 2.1549295774647885, "grad_norm": 0.31266728043556213, "learning_rate": 2.228615620998894e-06, "loss": 0.3185, "step": 8568 }, { "epoch": 2.1551810865191148, "grad_norm": 0.27466437220573425, "learning_rate": 2.2273978313531436e-06, "loss": 0.294, "step": 8569 }, { "epoch": 2.1554325955734406, "grad_norm": 0.3066222071647644, "learning_rate": 2.2261802791634245e-06, "loss": 0.3285, "step": 8570 }, { "epoch": 2.155684104627767, "grad_norm": 0.31608250737190247, "learning_rate": 2.2249629645340104e-06, "loss": 0.3345, "step": 8571 }, { "epoch": 2.1559356136820926, "grad_norm": 0.3242324888706207, "learning_rate": 2.2237458875691592e-06, "loss": 0.2912, "step": 8572 }, { "epoch": 2.1561871227364184, "grad_norm": 0.2953372895717621, "learning_rate": 2.2225290483731034e-06, "loss": 0.3208, "step": 8573 }, { "epoch": 2.1564386317907447, "grad_norm": 0.3065500259399414, "learning_rate": 2.221312447050058e-06, "loss": 0.333, "step": 8574 }, { "epoch": 2.1566901408450705, "grad_norm": 0.2919135093688965, "learning_rate": 2.2200960837042202e-06, "loss": 0.3145, "step": 8575 }, { "epoch": 2.1569416498993963, "grad_norm": 0.2694363594055176, "learning_rate": 2.2188799584397604e-06, "loss": 0.3248, "step": 8576 }, { "epoch": 2.1571931589537225, "grad_norm": 0.29707813262939453, "learning_rate": 2.2176640713608345e-06, "loss": 0.2982, "step": 8577 }, { "epoch": 2.1574446680080483, "grad_norm": 0.3023638129234314, "learning_rate": 2.2164484225715734e-06, "loss": 0.3199, "step": 8578 }, { "epoch": 2.157696177062374, "grad_norm": 0.29433873295783997, "learning_rate": 2.215233012176093e-06, "loss": 0.3304, "step": 8579 }, { "epoch": 2.1579476861167004, "grad_norm": 0.2980920672416687, "learning_rate": 2.2140178402784814e-06, "loss": 0.3326, "step": 8580 }, { "epoch": 2.158199195171026, "grad_norm": 0.3132590651512146, "learning_rate": 2.212802906982815e-06, "loss": 0.3246, "step": 8581 }, { "epoch": 2.158450704225352, "grad_norm": 0.29961881041526794, "learning_rate": 2.2115882123931403e-06, "loss": 0.3313, "step": 8582 }, { "epoch": 2.1587022132796783, "grad_norm": 0.3205260634422302, "learning_rate": 2.210373756613494e-06, "loss": 0.3257, "step": 8583 }, { "epoch": 2.158953722334004, "grad_norm": 0.2848275601863861, "learning_rate": 2.209159539747881e-06, "loss": 0.3306, "step": 8584 }, { "epoch": 2.15920523138833, "grad_norm": 0.3031473457813263, "learning_rate": 2.2079455619002936e-06, "loss": 0.3369, "step": 8585 }, { "epoch": 2.159456740442656, "grad_norm": 0.3208923935890198, "learning_rate": 2.2067318231747047e-06, "loss": 0.3092, "step": 8586 }, { "epoch": 2.159708249496982, "grad_norm": 0.31306546926498413, "learning_rate": 2.2055183236750577e-06, "loss": 0.3051, "step": 8587 }, { "epoch": 2.1599597585513077, "grad_norm": 0.3087550103664398, "learning_rate": 2.2043050635052866e-06, "loss": 0.3105, "step": 8588 }, { "epoch": 2.160211267605634, "grad_norm": 0.2876415252685547, "learning_rate": 2.2030920427692947e-06, "loss": 0.3006, "step": 8589 }, { "epoch": 2.16046277665996, "grad_norm": 0.3287197947502136, "learning_rate": 2.201879261570974e-06, "loss": 0.3298, "step": 8590 }, { "epoch": 2.1607142857142856, "grad_norm": 0.30581316351890564, "learning_rate": 2.2006667200141877e-06, "loss": 0.3205, "step": 8591 }, { "epoch": 2.160965794768612, "grad_norm": 0.30748024582862854, "learning_rate": 2.199454418202786e-06, "loss": 0.3159, "step": 8592 }, { "epoch": 2.1612173038229376, "grad_norm": 0.32226288318634033, "learning_rate": 2.1982423562405915e-06, "loss": 0.3129, "step": 8593 }, { "epoch": 2.1614688128772634, "grad_norm": 0.2897011935710907, "learning_rate": 2.1970305342314135e-06, "loss": 0.336, "step": 8594 }, { "epoch": 2.1617203219315897, "grad_norm": 0.3219929039478302, "learning_rate": 2.1958189522790325e-06, "loss": 0.3294, "step": 8595 }, { "epoch": 2.1619718309859155, "grad_norm": 0.3067570626735687, "learning_rate": 2.194607610487216e-06, "loss": 0.3422, "step": 8596 }, { "epoch": 2.1622233400402413, "grad_norm": 0.2897886633872986, "learning_rate": 2.1933965089597087e-06, "loss": 0.3096, "step": 8597 }, { "epoch": 2.1624748490945676, "grad_norm": 0.2907717525959015, "learning_rate": 2.1921856478002302e-06, "loss": 0.3176, "step": 8598 }, { "epoch": 2.1627263581488934, "grad_norm": 0.31856968998908997, "learning_rate": 2.1909750271124873e-06, "loss": 0.324, "step": 8599 }, { "epoch": 2.162977867203219, "grad_norm": 0.30686262249946594, "learning_rate": 2.1897646470001588e-06, "loss": 0.3286, "step": 8600 }, { "epoch": 2.1632293762575454, "grad_norm": 0.30653515458106995, "learning_rate": 2.188554507566909e-06, "loss": 0.3303, "step": 8601 }, { "epoch": 2.163480885311871, "grad_norm": 0.3120279908180237, "learning_rate": 2.187344608916375e-06, "loss": 0.3467, "step": 8602 }, { "epoch": 2.163732394366197, "grad_norm": 0.32078132033348083, "learning_rate": 2.1861349511521817e-06, "loss": 0.3224, "step": 8603 }, { "epoch": 2.1639839034205233, "grad_norm": 0.2890268862247467, "learning_rate": 2.1849255343779246e-06, "loss": 0.3249, "step": 8604 }, { "epoch": 2.164235412474849, "grad_norm": 0.3207026720046997, "learning_rate": 2.183716358697186e-06, "loss": 0.3349, "step": 8605 }, { "epoch": 2.164486921529175, "grad_norm": 0.30875587463378906, "learning_rate": 2.1825074242135206e-06, "loss": 0.2937, "step": 8606 }, { "epoch": 2.164738430583501, "grad_norm": 0.27507445216178894, "learning_rate": 2.181298731030469e-06, "loss": 0.3357, "step": 8607 }, { "epoch": 2.164989939637827, "grad_norm": 0.2858785092830658, "learning_rate": 2.180090279251548e-06, "loss": 0.3203, "step": 8608 }, { "epoch": 2.1652414486921527, "grad_norm": 0.3202469050884247, "learning_rate": 2.1788820689802524e-06, "loss": 0.329, "step": 8609 }, { "epoch": 2.165492957746479, "grad_norm": 0.30295151472091675, "learning_rate": 2.1776741003200603e-06, "loss": 0.3369, "step": 8610 }, { "epoch": 2.165744466800805, "grad_norm": 0.3162943720817566, "learning_rate": 2.1764663733744234e-06, "loss": 0.3167, "step": 8611 }, { "epoch": 2.1659959758551306, "grad_norm": 0.2848318815231323, "learning_rate": 2.175258888246779e-06, "loss": 0.311, "step": 8612 }, { "epoch": 2.166247484909457, "grad_norm": 0.31551775336265564, "learning_rate": 2.174051645040538e-06, "loss": 0.3374, "step": 8613 }, { "epoch": 2.1664989939637826, "grad_norm": 0.2893933057785034, "learning_rate": 2.172844643859096e-06, "loss": 0.3212, "step": 8614 }, { "epoch": 2.1667505030181085, "grad_norm": 0.295808881521225, "learning_rate": 2.1716378848058217e-06, "loss": 0.3155, "step": 8615 }, { "epoch": 2.1670020120724347, "grad_norm": 0.3089081346988678, "learning_rate": 2.1704313679840706e-06, "loss": 0.3483, "step": 8616 }, { "epoch": 2.1672535211267605, "grad_norm": 0.3219684064388275, "learning_rate": 2.169225093497169e-06, "loss": 0.3358, "step": 8617 }, { "epoch": 2.1675050301810863, "grad_norm": 0.3217262029647827, "learning_rate": 2.1680190614484292e-06, "loss": 0.3385, "step": 8618 }, { "epoch": 2.1677565392354126, "grad_norm": 0.28920111060142517, "learning_rate": 2.166813271941141e-06, "loss": 0.3109, "step": 8619 }, { "epoch": 2.1680080482897384, "grad_norm": 0.32903382182121277, "learning_rate": 2.165607725078571e-06, "loss": 0.3133, "step": 8620 }, { "epoch": 2.168259557344064, "grad_norm": 0.3116111159324646, "learning_rate": 2.1644024209639687e-06, "loss": 0.3415, "step": 8621 }, { "epoch": 2.1685110663983904, "grad_norm": 0.30731502175331116, "learning_rate": 2.1631973597005574e-06, "loss": 0.3426, "step": 8622 }, { "epoch": 2.1687625754527162, "grad_norm": 0.29497724771499634, "learning_rate": 2.1619925413915475e-06, "loss": 0.3216, "step": 8623 }, { "epoch": 2.169014084507042, "grad_norm": 0.30117177963256836, "learning_rate": 2.16078796614012e-06, "loss": 0.3228, "step": 8624 }, { "epoch": 2.1692655935613683, "grad_norm": 0.29439517855644226, "learning_rate": 2.159583634049443e-06, "loss": 0.3107, "step": 8625 }, { "epoch": 2.169517102615694, "grad_norm": 0.2719261944293976, "learning_rate": 2.1583795452226563e-06, "loss": 0.2919, "step": 8626 }, { "epoch": 2.16976861167002, "grad_norm": 0.32442960143089294, "learning_rate": 2.157175699762886e-06, "loss": 0.3018, "step": 8627 }, { "epoch": 2.170020120724346, "grad_norm": 0.2717970013618469, "learning_rate": 2.155972097773231e-06, "loss": 0.3407, "step": 8628 }, { "epoch": 2.170271629778672, "grad_norm": 0.2874637842178345, "learning_rate": 2.1547687393567736e-06, "loss": 0.2937, "step": 8629 }, { "epoch": 2.170523138832998, "grad_norm": 0.28952640295028687, "learning_rate": 2.153565624616576e-06, "loss": 0.3032, "step": 8630 }, { "epoch": 2.170774647887324, "grad_norm": 0.30068325996398926, "learning_rate": 2.1523627536556728e-06, "loss": 0.344, "step": 8631 }, { "epoch": 2.17102615694165, "grad_norm": 0.2989504933357239, "learning_rate": 2.1511601265770876e-06, "loss": 0.3452, "step": 8632 }, { "epoch": 2.171277665995976, "grad_norm": 0.30707207322120667, "learning_rate": 2.1499577434838132e-06, "loss": 0.2972, "step": 8633 }, { "epoch": 2.171529175050302, "grad_norm": 0.30241820216178894, "learning_rate": 2.14875560447883e-06, "loss": 0.3093, "step": 8634 }, { "epoch": 2.1717806841046277, "grad_norm": 0.28262701630592346, "learning_rate": 2.14755370966509e-06, "loss": 0.3204, "step": 8635 }, { "epoch": 2.172032193158954, "grad_norm": 0.3182533085346222, "learning_rate": 2.146352059145532e-06, "loss": 0.3132, "step": 8636 }, { "epoch": 2.1722837022132797, "grad_norm": 0.30099421739578247, "learning_rate": 2.1451506530230654e-06, "loss": 0.3211, "step": 8637 }, { "epoch": 2.1725352112676055, "grad_norm": 0.28418776392936707, "learning_rate": 2.1439494914005877e-06, "loss": 0.3228, "step": 8638 }, { "epoch": 2.1727867203219318, "grad_norm": 0.31524962186813354, "learning_rate": 2.1427485743809667e-06, "loss": 0.3276, "step": 8639 }, { "epoch": 2.1730382293762576, "grad_norm": 0.28617793321609497, "learning_rate": 2.141547902067056e-06, "loss": 0.3266, "step": 8640 }, { "epoch": 2.1732897384305834, "grad_norm": 0.2999754548072815, "learning_rate": 2.1403474745616863e-06, "loss": 0.3383, "step": 8641 }, { "epoch": 2.1735412474849096, "grad_norm": 0.3075141906738281, "learning_rate": 2.1391472919676637e-06, "loss": 0.313, "step": 8642 }, { "epoch": 2.1737927565392354, "grad_norm": 0.3021905720233917, "learning_rate": 2.13794735438778e-06, "loss": 0.3151, "step": 8643 }, { "epoch": 2.1740442655935612, "grad_norm": 0.3037421703338623, "learning_rate": 2.136747661924799e-06, "loss": 0.3213, "step": 8644 }, { "epoch": 2.1742957746478875, "grad_norm": 0.3154190480709076, "learning_rate": 2.1355482146814693e-06, "loss": 0.3147, "step": 8645 }, { "epoch": 2.1745472837022133, "grad_norm": 0.3106088638305664, "learning_rate": 2.1343490127605136e-06, "loss": 0.3102, "step": 8646 }, { "epoch": 2.174798792756539, "grad_norm": 0.3053724765777588, "learning_rate": 2.1331500562646396e-06, "loss": 0.3177, "step": 8647 }, { "epoch": 2.1750503018108653, "grad_norm": 0.30620327591896057, "learning_rate": 2.1319513452965264e-06, "loss": 0.314, "step": 8648 }, { "epoch": 2.175301810865191, "grad_norm": 0.291100412607193, "learning_rate": 2.13075287995884e-06, "loss": 0.3018, "step": 8649 }, { "epoch": 2.175553319919517, "grad_norm": 0.3090902864933014, "learning_rate": 2.129554660354217e-06, "loss": 0.3476, "step": 8650 }, { "epoch": 2.175804828973843, "grad_norm": 0.27171772718429565, "learning_rate": 2.1283566865852824e-06, "loss": 0.3119, "step": 8651 }, { "epoch": 2.176056338028169, "grad_norm": 0.31004947423934937, "learning_rate": 2.1271589587546303e-06, "loss": 0.3367, "step": 8652 }, { "epoch": 2.176307847082495, "grad_norm": 0.2942144274711609, "learning_rate": 2.1259614769648434e-06, "loss": 0.3417, "step": 8653 }, { "epoch": 2.176559356136821, "grad_norm": 0.29157233238220215, "learning_rate": 2.124764241318474e-06, "loss": 0.3177, "step": 8654 }, { "epoch": 2.176810865191147, "grad_norm": 0.34676676988601685, "learning_rate": 2.1235672519180615e-06, "loss": 0.3086, "step": 8655 }, { "epoch": 2.1770623742454727, "grad_norm": 0.30454134941101074, "learning_rate": 2.1223705088661174e-06, "loss": 0.3145, "step": 8656 }, { "epoch": 2.177313883299799, "grad_norm": 0.2989632487297058, "learning_rate": 2.121174012265138e-06, "loss": 0.3293, "step": 8657 }, { "epoch": 2.1775653923541247, "grad_norm": 0.3130037784576416, "learning_rate": 2.119977762217594e-06, "loss": 0.3147, "step": 8658 }, { "epoch": 2.1778169014084505, "grad_norm": 0.28079167008399963, "learning_rate": 2.118781758825938e-06, "loss": 0.3225, "step": 8659 }, { "epoch": 2.1780684104627768, "grad_norm": 0.30156850814819336, "learning_rate": 2.117586002192598e-06, "loss": 0.3157, "step": 8660 }, { "epoch": 2.1783199195171026, "grad_norm": 0.3073710799217224, "learning_rate": 2.1163904924199865e-06, "loss": 0.3166, "step": 8661 }, { "epoch": 2.1785714285714284, "grad_norm": 0.3458421528339386, "learning_rate": 2.1151952296104876e-06, "loss": 0.3273, "step": 8662 }, { "epoch": 2.1788229376257546, "grad_norm": 0.3005492687225342, "learning_rate": 2.1140002138664718e-06, "loss": 0.3192, "step": 8663 }, { "epoch": 2.1790744466800804, "grad_norm": 0.301626056432724, "learning_rate": 2.1128054452902812e-06, "loss": 0.2996, "step": 8664 }, { "epoch": 2.1793259557344062, "grad_norm": 0.28979891538619995, "learning_rate": 2.111610923984244e-06, "loss": 0.3323, "step": 8665 }, { "epoch": 2.1795774647887325, "grad_norm": 0.32507914304733276, "learning_rate": 2.1104166500506596e-06, "loss": 0.3134, "step": 8666 }, { "epoch": 2.1798289738430583, "grad_norm": 0.30215296149253845, "learning_rate": 2.1092226235918135e-06, "loss": 0.343, "step": 8667 }, { "epoch": 2.1800804828973845, "grad_norm": 0.3291065990924835, "learning_rate": 2.1080288447099635e-06, "loss": 0.3444, "step": 8668 }, { "epoch": 2.1803319919517103, "grad_norm": 0.29154881834983826, "learning_rate": 2.106835313507352e-06, "loss": 0.3347, "step": 8669 }, { "epoch": 2.180583501006036, "grad_norm": 0.3173246383666992, "learning_rate": 2.1056420300861953e-06, "loss": 0.3234, "step": 8670 }, { "epoch": 2.1808350100603624, "grad_norm": 0.29975295066833496, "learning_rate": 2.104448994548693e-06, "loss": 0.3172, "step": 8671 }, { "epoch": 2.181086519114688, "grad_norm": 0.29570671916007996, "learning_rate": 2.103256206997018e-06, "loss": 0.3228, "step": 8672 }, { "epoch": 2.181338028169014, "grad_norm": 0.3010045289993286, "learning_rate": 2.1020636675333273e-06, "loss": 0.3299, "step": 8673 }, { "epoch": 2.1815895372233403, "grad_norm": 0.32294243574142456, "learning_rate": 2.1008713762597554e-06, "loss": 0.3119, "step": 8674 }, { "epoch": 2.181841046277666, "grad_norm": 0.3337547779083252, "learning_rate": 2.0996793332784116e-06, "loss": 0.3317, "step": 8675 }, { "epoch": 2.182092555331992, "grad_norm": 0.3114114999771118, "learning_rate": 2.0984875386913904e-06, "loss": 0.3526, "step": 8676 }, { "epoch": 2.182344064386318, "grad_norm": 0.31616508960723877, "learning_rate": 2.097295992600758e-06, "loss": 0.2981, "step": 8677 }, { "epoch": 2.182595573440644, "grad_norm": 0.2805394232273102, "learning_rate": 2.0961046951085662e-06, "loss": 0.3403, "step": 8678 }, { "epoch": 2.1828470824949697, "grad_norm": 0.28356558084487915, "learning_rate": 2.094913646316839e-06, "loss": 0.3279, "step": 8679 }, { "epoch": 2.183098591549296, "grad_norm": 0.2977490723133087, "learning_rate": 2.0937228463275854e-06, "loss": 0.3316, "step": 8680 }, { "epoch": 2.183350100603622, "grad_norm": 0.27247682213783264, "learning_rate": 2.092532295242787e-06, "loss": 0.3144, "step": 8681 }, { "epoch": 2.1836016096579476, "grad_norm": 0.307102233171463, "learning_rate": 2.0913419931644095e-06, "loss": 0.3271, "step": 8682 }, { "epoch": 2.183853118712274, "grad_norm": 0.2990267276763916, "learning_rate": 2.0901519401943924e-06, "loss": 0.2964, "step": 8683 }, { "epoch": 2.1841046277665996, "grad_norm": 0.30906108021736145, "learning_rate": 2.088962136434658e-06, "loss": 0.3318, "step": 8684 }, { "epoch": 2.1843561368209254, "grad_norm": 0.2764362394809723, "learning_rate": 2.0877725819871065e-06, "loss": 0.3023, "step": 8685 }, { "epoch": 2.1846076458752517, "grad_norm": 0.3090154826641083, "learning_rate": 2.0865832769536125e-06, "loss": 0.346, "step": 8686 }, { "epoch": 2.1848591549295775, "grad_norm": 0.3105575442314148, "learning_rate": 2.0853942214360365e-06, "loss": 0.3426, "step": 8687 }, { "epoch": 2.1851106639839033, "grad_norm": 0.2777365744113922, "learning_rate": 2.0842054155362105e-06, "loss": 0.3196, "step": 8688 }, { "epoch": 2.1853621730382295, "grad_norm": 0.29215675592422485, "learning_rate": 2.0830168593559513e-06, "loss": 0.3114, "step": 8689 }, { "epoch": 2.1856136820925554, "grad_norm": 0.29614973068237305, "learning_rate": 2.081828552997047e-06, "loss": 0.3229, "step": 8690 }, { "epoch": 2.185865191146881, "grad_norm": 0.30876439809799194, "learning_rate": 2.0806404965612737e-06, "loss": 0.3022, "step": 8691 }, { "epoch": 2.1861167002012074, "grad_norm": 0.3041656017303467, "learning_rate": 2.0794526901503757e-06, "loss": 0.3057, "step": 8692 }, { "epoch": 2.186368209255533, "grad_norm": 0.29282814264297485, "learning_rate": 2.0782651338660862e-06, "loss": 0.3257, "step": 8693 }, { "epoch": 2.186619718309859, "grad_norm": 0.3103329837322235, "learning_rate": 2.077077827810108e-06, "loss": 0.3163, "step": 8694 }, { "epoch": 2.1868712273641853, "grad_norm": 0.28378820419311523, "learning_rate": 2.075890772084128e-06, "loss": 0.3226, "step": 8695 }, { "epoch": 2.187122736418511, "grad_norm": 0.30652692914009094, "learning_rate": 2.074703966789812e-06, "loss": 0.3087, "step": 8696 }, { "epoch": 2.187374245472837, "grad_norm": 0.3030233681201935, "learning_rate": 2.073517412028799e-06, "loss": 0.3096, "step": 8697 }, { "epoch": 2.187625754527163, "grad_norm": 0.28076255321502686, "learning_rate": 2.072331107902713e-06, "loss": 0.3153, "step": 8698 }, { "epoch": 2.187877263581489, "grad_norm": 0.2828161418437958, "learning_rate": 2.0711450545131505e-06, "loss": 0.3112, "step": 8699 }, { "epoch": 2.1881287726358147, "grad_norm": 0.29777970910072327, "learning_rate": 2.0699592519616934e-06, "loss": 0.3253, "step": 8700 }, { "epoch": 2.188380281690141, "grad_norm": 0.32333430647850037, "learning_rate": 2.0687737003498944e-06, "loss": 0.3359, "step": 8701 }, { "epoch": 2.188631790744467, "grad_norm": 0.30716678500175476, "learning_rate": 2.0675883997792913e-06, "loss": 0.3126, "step": 8702 }, { "epoch": 2.1888832997987926, "grad_norm": 0.28720593452453613, "learning_rate": 2.0664033503513953e-06, "loss": 0.3196, "step": 8703 }, { "epoch": 2.189134808853119, "grad_norm": 0.29374513030052185, "learning_rate": 2.0652185521677016e-06, "loss": 0.3285, "step": 8704 }, { "epoch": 2.1893863179074446, "grad_norm": 0.30496105551719666, "learning_rate": 2.064034005329677e-06, "loss": 0.3008, "step": 8705 }, { "epoch": 2.1896378269617705, "grad_norm": 0.29388949275016785, "learning_rate": 2.0628497099387727e-06, "loss": 0.3009, "step": 8706 }, { "epoch": 2.1898893360160967, "grad_norm": 0.3176174461841583, "learning_rate": 2.061665666096418e-06, "loss": 0.3277, "step": 8707 }, { "epoch": 2.1901408450704225, "grad_norm": 0.3095850646495819, "learning_rate": 2.0604818739040143e-06, "loss": 0.3092, "step": 8708 }, { "epoch": 2.1903923541247483, "grad_norm": 0.28293004631996155, "learning_rate": 2.0592983334629506e-06, "loss": 0.3213, "step": 8709 }, { "epoch": 2.1906438631790746, "grad_norm": 0.2949610650539398, "learning_rate": 2.0581150448745863e-06, "loss": 0.3194, "step": 8710 }, { "epoch": 2.1908953722334004, "grad_norm": 0.299869179725647, "learning_rate": 2.0569320082402654e-06, "loss": 0.3376, "step": 8711 }, { "epoch": 2.191146881287726, "grad_norm": 0.28059983253479004, "learning_rate": 2.055749223661305e-06, "loss": 0.3159, "step": 8712 }, { "epoch": 2.1913983903420524, "grad_norm": 0.3038329780101776, "learning_rate": 2.0545666912390053e-06, "loss": 0.3033, "step": 8713 }, { "epoch": 2.191649899396378, "grad_norm": 0.3102431893348694, "learning_rate": 2.053384411074641e-06, "loss": 0.3305, "step": 8714 }, { "epoch": 2.191901408450704, "grad_norm": 0.30974650382995605, "learning_rate": 2.0522023832694694e-06, "loss": 0.3425, "step": 8715 }, { "epoch": 2.1921529175050303, "grad_norm": 0.31163522601127625, "learning_rate": 2.051020607924721e-06, "loss": 0.3385, "step": 8716 }, { "epoch": 2.192404426559356, "grad_norm": 0.29782193899154663, "learning_rate": 2.049839085141608e-06, "loss": 0.3237, "step": 8717 }, { "epoch": 2.192655935613682, "grad_norm": 0.3062836229801178, "learning_rate": 2.048657815021323e-06, "loss": 0.3187, "step": 8718 }, { "epoch": 2.192907444668008, "grad_norm": 0.32062938809394836, "learning_rate": 2.0474767976650313e-06, "loss": 0.2985, "step": 8719 }, { "epoch": 2.193158953722334, "grad_norm": 0.3006410300731659, "learning_rate": 2.0462960331738824e-06, "loss": 0.3242, "step": 8720 }, { "epoch": 2.1934104627766597, "grad_norm": 0.28875410556793213, "learning_rate": 2.0451155216489983e-06, "loss": 0.3338, "step": 8721 }, { "epoch": 2.193661971830986, "grad_norm": 0.3076934218406677, "learning_rate": 2.043935263191486e-06, "loss": 0.3354, "step": 8722 }, { "epoch": 2.193913480885312, "grad_norm": 0.301831990480423, "learning_rate": 2.0427552579024234e-06, "loss": 0.3407, "step": 8723 }, { "epoch": 2.1941649899396376, "grad_norm": 0.31272050738334656, "learning_rate": 2.041575505882874e-06, "loss": 0.3296, "step": 8724 }, { "epoch": 2.194416498993964, "grad_norm": 0.29415902495384216, "learning_rate": 2.040396007233873e-06, "loss": 0.315, "step": 8725 }, { "epoch": 2.1946680080482897, "grad_norm": 0.32999396324157715, "learning_rate": 2.039216762056439e-06, "loss": 0.3289, "step": 8726 }, { "epoch": 2.1949195171026155, "grad_norm": 0.3093070089817047, "learning_rate": 2.0380377704515687e-06, "loss": 0.3259, "step": 8727 }, { "epoch": 2.1951710261569417, "grad_norm": 0.31014779210090637, "learning_rate": 2.0368590325202315e-06, "loss": 0.318, "step": 8728 }, { "epoch": 2.1954225352112675, "grad_norm": 0.29741162061691284, "learning_rate": 2.035680548363382e-06, "loss": 0.3241, "step": 8729 }, { "epoch": 2.1956740442655938, "grad_norm": 0.27723386883735657, "learning_rate": 2.0345023180819474e-06, "loss": 0.3286, "step": 8730 }, { "epoch": 2.1959255533199196, "grad_norm": 0.31631845235824585, "learning_rate": 2.033324341776839e-06, "loss": 0.2967, "step": 8731 }, { "epoch": 2.1961770623742454, "grad_norm": 0.3349016606807709, "learning_rate": 2.03214661954894e-06, "loss": 0.3171, "step": 8732 }, { "epoch": 2.1964285714285716, "grad_norm": 0.3033004701137543, "learning_rate": 2.030969151499117e-06, "loss": 0.3221, "step": 8733 }, { "epoch": 2.1966800804828974, "grad_norm": 0.29788362979888916, "learning_rate": 2.0297919377282106e-06, "loss": 0.3317, "step": 8734 }, { "epoch": 2.1969315895372232, "grad_norm": 0.2985301911830902, "learning_rate": 2.0286149783370453e-06, "loss": 0.3315, "step": 8735 }, { "epoch": 2.1971830985915495, "grad_norm": 0.30903440713882446, "learning_rate": 2.027438273426416e-06, "loss": 0.3339, "step": 8736 }, { "epoch": 2.1974346076458753, "grad_norm": 0.33624449372291565, "learning_rate": 2.0262618230971023e-06, "loss": 0.3379, "step": 8737 }, { "epoch": 2.197686116700201, "grad_norm": 0.31500041484832764, "learning_rate": 2.0250856274498617e-06, "loss": 0.3312, "step": 8738 }, { "epoch": 2.1979376257545273, "grad_norm": 0.3018837571144104, "learning_rate": 2.0239096865854243e-06, "loss": 0.3333, "step": 8739 }, { "epoch": 2.198189134808853, "grad_norm": 0.305803120136261, "learning_rate": 2.0227340006045056e-06, "loss": 0.3403, "step": 8740 }, { "epoch": 2.198440643863179, "grad_norm": 0.2978799045085907, "learning_rate": 2.021558569607792e-06, "loss": 0.3093, "step": 8741 }, { "epoch": 2.198692152917505, "grad_norm": 0.2844066023826599, "learning_rate": 2.020383393695956e-06, "loss": 0.3106, "step": 8742 }, { "epoch": 2.198943661971831, "grad_norm": 0.30605459213256836, "learning_rate": 2.01920847296964e-06, "loss": 0.3195, "step": 8743 }, { "epoch": 2.199195171026157, "grad_norm": 0.30778858065605164, "learning_rate": 2.0180338075294726e-06, "loss": 0.3221, "step": 8744 }, { "epoch": 2.199446680080483, "grad_norm": 0.29119858145713806, "learning_rate": 2.016859397476052e-06, "loss": 0.3172, "step": 8745 }, { "epoch": 2.199698189134809, "grad_norm": 0.3088303804397583, "learning_rate": 2.0156852429099638e-06, "loss": 0.3166, "step": 8746 }, { "epoch": 2.1999496981891347, "grad_norm": 0.281408429145813, "learning_rate": 2.014511343931763e-06, "loss": 0.3352, "step": 8747 }, { "epoch": 2.200201207243461, "grad_norm": 0.30251288414001465, "learning_rate": 2.0133377006419885e-06, "loss": 0.3042, "step": 8748 }, { "epoch": 2.2004527162977867, "grad_norm": 0.3207908868789673, "learning_rate": 2.0121643131411568e-06, "loss": 0.3338, "step": 8749 }, { "epoch": 2.2007042253521125, "grad_norm": 0.3297126889228821, "learning_rate": 2.0109911815297585e-06, "loss": 0.3362, "step": 8750 }, { "epoch": 2.2009557344064388, "grad_norm": 0.29778042435646057, "learning_rate": 2.0098183059082675e-06, "loss": 0.3456, "step": 8751 }, { "epoch": 2.2012072434607646, "grad_norm": 0.30129680037498474, "learning_rate": 2.008645686377131e-06, "loss": 0.3625, "step": 8752 }, { "epoch": 2.2014587525150904, "grad_norm": 0.2970339059829712, "learning_rate": 2.007473323036779e-06, "loss": 0.3177, "step": 8753 }, { "epoch": 2.2017102615694166, "grad_norm": 0.3131774961948395, "learning_rate": 2.0063012159876138e-06, "loss": 0.3363, "step": 8754 }, { "epoch": 2.2019617706237424, "grad_norm": 0.31431764364242554, "learning_rate": 2.005129365330023e-06, "loss": 0.3446, "step": 8755 }, { "epoch": 2.2022132796780682, "grad_norm": 0.2815970182418823, "learning_rate": 2.0039577711643642e-06, "loss": 0.3325, "step": 8756 }, { "epoch": 2.2024647887323945, "grad_norm": 0.2978397309780121, "learning_rate": 2.002786433590981e-06, "loss": 0.3338, "step": 8757 }, { "epoch": 2.2027162977867203, "grad_norm": 0.3014795780181885, "learning_rate": 2.001615352710188e-06, "loss": 0.3291, "step": 8758 }, { "epoch": 2.202967806841046, "grad_norm": 0.3280890882015228, "learning_rate": 2.0004445286222818e-06, "loss": 0.3253, "step": 8759 }, { "epoch": 2.2032193158953723, "grad_norm": 0.3170720636844635, "learning_rate": 1.999273961427538e-06, "loss": 0.3304, "step": 8760 }, { "epoch": 2.203470824949698, "grad_norm": 0.31641075015068054, "learning_rate": 1.9981036512262054e-06, "loss": 0.3233, "step": 8761 }, { "epoch": 2.203722334004024, "grad_norm": 0.30692046880722046, "learning_rate": 1.9969335981185173e-06, "loss": 0.3543, "step": 8762 }, { "epoch": 2.20397384305835, "grad_norm": 0.31193432211875916, "learning_rate": 1.9957638022046773e-06, "loss": 0.3305, "step": 8763 }, { "epoch": 2.204225352112676, "grad_norm": 0.3167397677898407, "learning_rate": 1.9945942635848745e-06, "loss": 0.3031, "step": 8764 }, { "epoch": 2.204476861167002, "grad_norm": 0.29315993189811707, "learning_rate": 1.9934249823592703e-06, "loss": 0.3366, "step": 8765 }, { "epoch": 2.204728370221328, "grad_norm": 0.2992209196090698, "learning_rate": 1.992255958628009e-06, "loss": 0.3327, "step": 8766 }, { "epoch": 2.204979879275654, "grad_norm": 0.29840826988220215, "learning_rate": 1.9910871924912063e-06, "loss": 0.3068, "step": 8767 }, { "epoch": 2.20523138832998, "grad_norm": 0.318357914686203, "learning_rate": 1.989918684048964e-06, "loss": 0.3294, "step": 8768 }, { "epoch": 2.205482897384306, "grad_norm": 0.28239601850509644, "learning_rate": 1.9887504334013534e-06, "loss": 0.3296, "step": 8769 }, { "epoch": 2.2057344064386317, "grad_norm": 0.32247522473335266, "learning_rate": 1.9875824406484318e-06, "loss": 0.332, "step": 8770 }, { "epoch": 2.205985915492958, "grad_norm": 0.2952177822589874, "learning_rate": 1.986414705890226e-06, "loss": 0.3287, "step": 8771 }, { "epoch": 2.2062374245472838, "grad_norm": 0.3215632438659668, "learning_rate": 1.9852472292267505e-06, "loss": 0.3458, "step": 8772 }, { "epoch": 2.2064889336016096, "grad_norm": 0.30382493138313293, "learning_rate": 1.9840800107579872e-06, "loss": 0.3165, "step": 8773 }, { "epoch": 2.206740442655936, "grad_norm": 0.31115108728408813, "learning_rate": 1.9829130505839058e-06, "loss": 0.3306, "step": 8774 }, { "epoch": 2.2069919517102616, "grad_norm": 0.29950037598609924, "learning_rate": 1.9817463488044446e-06, "loss": 0.3125, "step": 8775 }, { "epoch": 2.2072434607645874, "grad_norm": 0.2986210882663727, "learning_rate": 1.9805799055195264e-06, "loss": 0.3062, "step": 8776 }, { "epoch": 2.2074949698189137, "grad_norm": 0.3211692273616791, "learning_rate": 1.9794137208290516e-06, "loss": 0.3129, "step": 8777 }, { "epoch": 2.2077464788732395, "grad_norm": 0.29951202869415283, "learning_rate": 1.9782477948328933e-06, "loss": 0.3337, "step": 8778 }, { "epoch": 2.2079979879275653, "grad_norm": 0.3311370611190796, "learning_rate": 1.9770821276309093e-06, "loss": 0.3185, "step": 8779 }, { "epoch": 2.2082494969818915, "grad_norm": 0.31037113070487976, "learning_rate": 1.9759167193229277e-06, "loss": 0.3226, "step": 8780 }, { "epoch": 2.2085010060362174, "grad_norm": 0.315602570772171, "learning_rate": 1.974751570008762e-06, "loss": 0.334, "step": 8781 }, { "epoch": 2.208752515090543, "grad_norm": 0.2930223345756531, "learning_rate": 1.9735866797881977e-06, "loss": 0.2912, "step": 8782 }, { "epoch": 2.2090040241448694, "grad_norm": 0.2838136553764343, "learning_rate": 1.972422048761002e-06, "loss": 0.3148, "step": 8783 }, { "epoch": 2.209255533199195, "grad_norm": 0.3084590435028076, "learning_rate": 1.9712576770269155e-06, "loss": 0.3255, "step": 8784 }, { "epoch": 2.209507042253521, "grad_norm": 0.3039499819278717, "learning_rate": 1.9700935646856634e-06, "loss": 0.341, "step": 8785 }, { "epoch": 2.2097585513078473, "grad_norm": 0.30746522545814514, "learning_rate": 1.9689297118369403e-06, "loss": 0.3122, "step": 8786 }, { "epoch": 2.210010060362173, "grad_norm": 0.31201913952827454, "learning_rate": 1.967766118580427e-06, "loss": 0.2904, "step": 8787 }, { "epoch": 2.210261569416499, "grad_norm": 0.3182002007961273, "learning_rate": 1.9666027850157745e-06, "loss": 0.3133, "step": 8788 }, { "epoch": 2.210513078470825, "grad_norm": 0.3027884066104889, "learning_rate": 1.965439711242618e-06, "loss": 0.307, "step": 8789 }, { "epoch": 2.210764587525151, "grad_norm": 0.3081648051738739, "learning_rate": 1.964276897360565e-06, "loss": 0.3231, "step": 8790 }, { "epoch": 2.2110160965794767, "grad_norm": 0.30565327405929565, "learning_rate": 1.9631143434692054e-06, "loss": 0.3237, "step": 8791 }, { "epoch": 2.211267605633803, "grad_norm": 0.2992105484008789, "learning_rate": 1.9619520496681015e-06, "loss": 0.3361, "step": 8792 }, { "epoch": 2.211519114688129, "grad_norm": 0.28107115626335144, "learning_rate": 1.960790016056801e-06, "loss": 0.3208, "step": 8793 }, { "epoch": 2.2117706237424546, "grad_norm": 0.31731727719306946, "learning_rate": 1.9596282427348206e-06, "loss": 0.3032, "step": 8794 }, { "epoch": 2.212022132796781, "grad_norm": 0.30339574813842773, "learning_rate": 1.958466729801662e-06, "loss": 0.3053, "step": 8795 }, { "epoch": 2.2122736418511066, "grad_norm": 0.29197800159454346, "learning_rate": 1.957305477356799e-06, "loss": 0.3513, "step": 8796 }, { "epoch": 2.2125251509054324, "grad_norm": 0.2851543426513672, "learning_rate": 1.956144485499688e-06, "loss": 0.3059, "step": 8797 }, { "epoch": 2.2127766599597587, "grad_norm": 0.2786988615989685, "learning_rate": 1.9549837543297585e-06, "loss": 0.3259, "step": 8798 }, { "epoch": 2.2130281690140845, "grad_norm": 0.2844609022140503, "learning_rate": 1.953823283946422e-06, "loss": 0.3198, "step": 8799 }, { "epoch": 2.2132796780684103, "grad_norm": 0.32058948278427124, "learning_rate": 1.952663074449063e-06, "loss": 0.3098, "step": 8800 }, { "epoch": 2.2135311871227366, "grad_norm": 0.2962084412574768, "learning_rate": 1.9515031259370493e-06, "loss": 0.3343, "step": 8801 }, { "epoch": 2.2137826961770624, "grad_norm": 0.2991352081298828, "learning_rate": 1.95034343850972e-06, "loss": 0.3374, "step": 8802 }, { "epoch": 2.214034205231388, "grad_norm": 0.3038289248943329, "learning_rate": 1.9491840122663965e-06, "loss": 0.3438, "step": 8803 }, { "epoch": 2.2142857142857144, "grad_norm": 0.3067372143268585, "learning_rate": 1.948024847306378e-06, "loss": 0.302, "step": 8804 }, { "epoch": 2.21453722334004, "grad_norm": 0.27954307198524475, "learning_rate": 1.946865943728936e-06, "loss": 0.3406, "step": 8805 }, { "epoch": 2.214788732394366, "grad_norm": 0.2837284207344055, "learning_rate": 1.945707301633328e-06, "loss": 0.3044, "step": 8806 }, { "epoch": 2.2150402414486923, "grad_norm": 0.29118677973747253, "learning_rate": 1.94454892111878e-06, "loss": 0.3169, "step": 8807 }, { "epoch": 2.215291750503018, "grad_norm": 0.301509827375412, "learning_rate": 1.9433908022845046e-06, "loss": 0.3166, "step": 8808 }, { "epoch": 2.215543259557344, "grad_norm": 0.3074178099632263, "learning_rate": 1.9422329452296825e-06, "loss": 0.3188, "step": 8809 }, { "epoch": 2.21579476861167, "grad_norm": 0.2955106496810913, "learning_rate": 1.941075350053481e-06, "loss": 0.3116, "step": 8810 }, { "epoch": 2.216046277665996, "grad_norm": 0.2759988009929657, "learning_rate": 1.9399180168550374e-06, "loss": 0.311, "step": 8811 }, { "epoch": 2.2162977867203217, "grad_norm": 0.3274937570095062, "learning_rate": 1.9387609457334734e-06, "loss": 0.3431, "step": 8812 }, { "epoch": 2.216549295774648, "grad_norm": 0.2869795858860016, "learning_rate": 1.937604136787882e-06, "loss": 0.3158, "step": 8813 }, { "epoch": 2.216800804828974, "grad_norm": 0.33137309551239014, "learning_rate": 1.936447590117338e-06, "loss": 0.3242, "step": 8814 }, { "epoch": 2.2170523138832996, "grad_norm": 0.2800137996673584, "learning_rate": 1.935291305820894e-06, "loss": 0.3108, "step": 8815 }, { "epoch": 2.217303822937626, "grad_norm": 0.29524585604667664, "learning_rate": 1.9341352839975753e-06, "loss": 0.3126, "step": 8816 }, { "epoch": 2.2175553319919517, "grad_norm": 0.30382615327835083, "learning_rate": 1.9329795247463913e-06, "loss": 0.3184, "step": 8817 }, { "epoch": 2.2178068410462775, "grad_norm": 0.3121460974216461, "learning_rate": 1.9318240281663215e-06, "loss": 0.3162, "step": 8818 }, { "epoch": 2.2180583501006037, "grad_norm": 0.28468015789985657, "learning_rate": 1.930668794356331e-06, "loss": 0.3268, "step": 8819 }, { "epoch": 2.2183098591549295, "grad_norm": 0.3230193257331848, "learning_rate": 1.929513823415356e-06, "loss": 0.309, "step": 8820 }, { "epoch": 2.2185613682092553, "grad_norm": 0.30588406324386597, "learning_rate": 1.928359115442314e-06, "loss": 0.3162, "step": 8821 }, { "epoch": 2.2188128772635816, "grad_norm": 0.2859831750392914, "learning_rate": 1.9272046705360958e-06, "loss": 0.3161, "step": 8822 }, { "epoch": 2.2190643863179074, "grad_norm": 0.2836097478866577, "learning_rate": 1.926050488795576e-06, "loss": 0.3085, "step": 8823 }, { "epoch": 2.219315895372233, "grad_norm": 0.29898855090141296, "learning_rate": 1.9248965703196e-06, "loss": 0.3115, "step": 8824 }, { "epoch": 2.2195674044265594, "grad_norm": 0.3057454228401184, "learning_rate": 1.9237429152069948e-06, "loss": 0.2977, "step": 8825 }, { "epoch": 2.2198189134808852, "grad_norm": 0.30841681361198425, "learning_rate": 1.922589523556565e-06, "loss": 0.3491, "step": 8826 }, { "epoch": 2.2200704225352115, "grad_norm": 0.3082001507282257, "learning_rate": 1.9214363954670895e-06, "loss": 0.3165, "step": 8827 }, { "epoch": 2.2203219315895373, "grad_norm": 0.3125540614128113, "learning_rate": 1.9202835310373285e-06, "loss": 0.3291, "step": 8828 }, { "epoch": 2.220573440643863, "grad_norm": 0.3130517303943634, "learning_rate": 1.9191309303660145e-06, "loss": 0.3229, "step": 8829 }, { "epoch": 2.2208249496981893, "grad_norm": 0.3235696852207184, "learning_rate": 1.9179785935518647e-06, "loss": 0.328, "step": 8830 }, { "epoch": 2.221076458752515, "grad_norm": 0.30910971760749817, "learning_rate": 1.9168265206935655e-06, "loss": 0.3185, "step": 8831 }, { "epoch": 2.221327967806841, "grad_norm": 0.282240092754364, "learning_rate": 1.9156747118897878e-06, "loss": 0.32, "step": 8832 }, { "epoch": 2.221579476861167, "grad_norm": 0.30179286003112793, "learning_rate": 1.914523167239174e-06, "loss": 0.3176, "step": 8833 }, { "epoch": 2.221830985915493, "grad_norm": 0.3033350110054016, "learning_rate": 1.91337188684035e-06, "loss": 0.3161, "step": 8834 }, { "epoch": 2.222082494969819, "grad_norm": 0.30857256054878235, "learning_rate": 1.9122208707919125e-06, "loss": 0.3243, "step": 8835 }, { "epoch": 2.222334004024145, "grad_norm": 0.3050358295440674, "learning_rate": 1.9110701191924403e-06, "loss": 0.3213, "step": 8836 }, { "epoch": 2.222585513078471, "grad_norm": 0.3051794171333313, "learning_rate": 1.9099196321404895e-06, "loss": 0.3034, "step": 8837 }, { "epoch": 2.2228370221327967, "grad_norm": 0.30538561940193176, "learning_rate": 1.9087694097345895e-06, "loss": 0.3213, "step": 8838 }, { "epoch": 2.223088531187123, "grad_norm": 0.3018161952495575, "learning_rate": 1.9076194520732523e-06, "loss": 0.3263, "step": 8839 }, { "epoch": 2.2233400402414487, "grad_norm": 0.3152303397655487, "learning_rate": 1.9064697592549613e-06, "loss": 0.323, "step": 8840 }, { "epoch": 2.2235915492957745, "grad_norm": 0.2975950539112091, "learning_rate": 1.9053203313781843e-06, "loss": 0.3556, "step": 8841 }, { "epoch": 2.2238430583501008, "grad_norm": 0.3171286880970001, "learning_rate": 1.9041711685413588e-06, "loss": 0.3144, "step": 8842 }, { "epoch": 2.2240945674044266, "grad_norm": 0.2995269298553467, "learning_rate": 1.903022270842907e-06, "loss": 0.3153, "step": 8843 }, { "epoch": 2.2243460764587524, "grad_norm": 0.3209341764450073, "learning_rate": 1.9018736383812214e-06, "loss": 0.3278, "step": 8844 }, { "epoch": 2.2245975855130786, "grad_norm": 0.3014548718929291, "learning_rate": 1.9007252712546786e-06, "loss": 0.3342, "step": 8845 }, { "epoch": 2.2248490945674044, "grad_norm": 0.3181338310241699, "learning_rate": 1.8995771695616255e-06, "loss": 0.3352, "step": 8846 }, { "epoch": 2.2251006036217302, "grad_norm": 0.28974583745002747, "learning_rate": 1.8984293334003917e-06, "loss": 0.3346, "step": 8847 }, { "epoch": 2.2253521126760565, "grad_norm": 0.30787721276283264, "learning_rate": 1.897281762869284e-06, "loss": 0.3173, "step": 8848 }, { "epoch": 2.2256036217303823, "grad_norm": 0.29022449254989624, "learning_rate": 1.8961344580665808e-06, "loss": 0.3134, "step": 8849 }, { "epoch": 2.225855130784708, "grad_norm": 0.2811683416366577, "learning_rate": 1.8949874190905453e-06, "loss": 0.2889, "step": 8850 }, { "epoch": 2.2261066398390343, "grad_norm": 0.2896963059902191, "learning_rate": 1.893840646039411e-06, "loss": 0.3004, "step": 8851 }, { "epoch": 2.22635814889336, "grad_norm": 0.2802124619483948, "learning_rate": 1.8926941390113946e-06, "loss": 0.3045, "step": 8852 }, { "epoch": 2.226609657947686, "grad_norm": 0.293868750333786, "learning_rate": 1.8915478981046847e-06, "loss": 0.3134, "step": 8853 }, { "epoch": 2.226861167002012, "grad_norm": 0.28831586241722107, "learning_rate": 1.8904019234174526e-06, "loss": 0.3105, "step": 8854 }, { "epoch": 2.227112676056338, "grad_norm": 0.2980342507362366, "learning_rate": 1.88925621504784e-06, "loss": 0.3228, "step": 8855 }, { "epoch": 2.227364185110664, "grad_norm": 0.30006951093673706, "learning_rate": 1.8881107730939734e-06, "loss": 0.3245, "step": 8856 }, { "epoch": 2.22761569416499, "grad_norm": 0.2884455919265747, "learning_rate": 1.8869655976539502e-06, "loss": 0.3142, "step": 8857 }, { "epoch": 2.227867203219316, "grad_norm": 0.32939469814300537, "learning_rate": 1.885820688825848e-06, "loss": 0.3118, "step": 8858 }, { "epoch": 2.2281187122736417, "grad_norm": 0.30000847578048706, "learning_rate": 1.8846760467077236e-06, "loss": 0.3248, "step": 8859 }, { "epoch": 2.228370221327968, "grad_norm": 0.29850441217422485, "learning_rate": 1.8835316713976043e-06, "loss": 0.3101, "step": 8860 }, { "epoch": 2.2286217303822937, "grad_norm": 0.2936405837535858, "learning_rate": 1.882387562993503e-06, "loss": 0.3186, "step": 8861 }, { "epoch": 2.2288732394366195, "grad_norm": 0.2933601438999176, "learning_rate": 1.881243721593401e-06, "loss": 0.3265, "step": 8862 }, { "epoch": 2.2291247484909458, "grad_norm": 0.2859182059764862, "learning_rate": 1.8801001472952651e-06, "loss": 0.3317, "step": 8863 }, { "epoch": 2.2293762575452716, "grad_norm": 0.2935737371444702, "learning_rate": 1.878956840197032e-06, "loss": 0.3221, "step": 8864 }, { "epoch": 2.2296277665995974, "grad_norm": 0.31956246495246887, "learning_rate": 1.8778138003966218e-06, "loss": 0.3337, "step": 8865 }, { "epoch": 2.2298792756539236, "grad_norm": 0.3022806644439697, "learning_rate": 1.876671027991926e-06, "loss": 0.3045, "step": 8866 }, { "epoch": 2.2301307847082494, "grad_norm": 0.29735061526298523, "learning_rate": 1.8755285230808185e-06, "loss": 0.3275, "step": 8867 }, { "epoch": 2.2303822937625757, "grad_norm": 0.3103269934654236, "learning_rate": 1.874386285761145e-06, "loss": 0.3247, "step": 8868 }, { "epoch": 2.2306338028169015, "grad_norm": 0.3051782250404358, "learning_rate": 1.8732443161307323e-06, "loss": 0.3151, "step": 8869 }, { "epoch": 2.2308853118712273, "grad_norm": 0.2836689352989197, "learning_rate": 1.8721026142873843e-06, "loss": 0.2943, "step": 8870 }, { "epoch": 2.2311368209255535, "grad_norm": 0.31249019503593445, "learning_rate": 1.8709611803288779e-06, "loss": 0.3151, "step": 8871 }, { "epoch": 2.2313883299798793, "grad_norm": 0.289578378200531, "learning_rate": 1.8698200143529733e-06, "loss": 0.321, "step": 8872 }, { "epoch": 2.231639839034205, "grad_norm": 0.3105667531490326, "learning_rate": 1.8686791164573997e-06, "loss": 0.3235, "step": 8873 }, { "epoch": 2.2318913480885314, "grad_norm": 0.31519603729248047, "learning_rate": 1.8675384867398722e-06, "loss": 0.3308, "step": 8874 }, { "epoch": 2.232142857142857, "grad_norm": 0.30213382840156555, "learning_rate": 1.866398125298075e-06, "loss": 0.2995, "step": 8875 }, { "epoch": 2.232394366197183, "grad_norm": 0.30496177077293396, "learning_rate": 1.8652580322296766e-06, "loss": 0.3019, "step": 8876 }, { "epoch": 2.2326458752515093, "grad_norm": 0.30280569195747375, "learning_rate": 1.864118207632315e-06, "loss": 0.355, "step": 8877 }, { "epoch": 2.232897384305835, "grad_norm": 0.3069732189178467, "learning_rate": 1.8629786516036109e-06, "loss": 0.3714, "step": 8878 }, { "epoch": 2.233148893360161, "grad_norm": 0.3117799162864685, "learning_rate": 1.861839364241162e-06, "loss": 0.3448, "step": 8879 }, { "epoch": 2.233400402414487, "grad_norm": 0.29441627860069275, "learning_rate": 1.860700345642537e-06, "loss": 0.324, "step": 8880 }, { "epoch": 2.233651911468813, "grad_norm": 0.29074686765670776, "learning_rate": 1.8595615959052905e-06, "loss": 0.323, "step": 8881 }, { "epoch": 2.2339034205231387, "grad_norm": 0.2976393699645996, "learning_rate": 1.8584231151269444e-06, "loss": 0.3263, "step": 8882 }, { "epoch": 2.234154929577465, "grad_norm": 0.3090425133705139, "learning_rate": 1.8572849034050066e-06, "loss": 0.347, "step": 8883 }, { "epoch": 2.234406438631791, "grad_norm": 0.2783370316028595, "learning_rate": 1.8561469608369547e-06, "loss": 0.3137, "step": 8884 }, { "epoch": 2.2346579476861166, "grad_norm": 0.2869153320789337, "learning_rate": 1.8550092875202497e-06, "loss": 0.3286, "step": 8885 }, { "epoch": 2.234909456740443, "grad_norm": 0.3169291317462921, "learning_rate": 1.8538718835523217e-06, "loss": 0.3386, "step": 8886 }, { "epoch": 2.2351609657947686, "grad_norm": 0.2993890643119812, "learning_rate": 1.852734749030587e-06, "loss": 0.3145, "step": 8887 }, { "epoch": 2.2354124748490944, "grad_norm": 0.3018781244754791, "learning_rate": 1.8515978840524302e-06, "loss": 0.3272, "step": 8888 }, { "epoch": 2.2356639839034207, "grad_norm": 0.29869964718818665, "learning_rate": 1.850461288715218e-06, "loss": 0.3214, "step": 8889 }, { "epoch": 2.2359154929577465, "grad_norm": 0.2824086844921112, "learning_rate": 1.8493249631162947e-06, "loss": 0.3178, "step": 8890 }, { "epoch": 2.2361670020120723, "grad_norm": 0.2874877452850342, "learning_rate": 1.8481889073529762e-06, "loss": 0.3227, "step": 8891 }, { "epoch": 2.2364185110663986, "grad_norm": 0.29150328040122986, "learning_rate": 1.8470531215225617e-06, "loss": 0.3245, "step": 8892 }, { "epoch": 2.2366700201207244, "grad_norm": 0.28248754143714905, "learning_rate": 1.845917605722321e-06, "loss": 0.325, "step": 8893 }, { "epoch": 2.23692152917505, "grad_norm": 0.28792303800582886, "learning_rate": 1.8447823600495068e-06, "loss": 0.3175, "step": 8894 }, { "epoch": 2.2371730382293764, "grad_norm": 0.28080111742019653, "learning_rate": 1.8436473846013432e-06, "loss": 0.3382, "step": 8895 }, { "epoch": 2.237424547283702, "grad_norm": 0.28693875670433044, "learning_rate": 1.842512679475037e-06, "loss": 0.3263, "step": 8896 }, { "epoch": 2.237676056338028, "grad_norm": 0.2953447103500366, "learning_rate": 1.8413782447677641e-06, "loss": 0.3322, "step": 8897 }, { "epoch": 2.2379275653923543, "grad_norm": 0.3262675404548645, "learning_rate": 1.8402440805766863e-06, "loss": 0.317, "step": 8898 }, { "epoch": 2.23817907444668, "grad_norm": 0.28162655234336853, "learning_rate": 1.8391101869989341e-06, "loss": 0.3345, "step": 8899 }, { "epoch": 2.238430583501006, "grad_norm": 0.27264997363090515, "learning_rate": 1.8379765641316216e-06, "loss": 0.2932, "step": 8900 }, { "epoch": 2.238682092555332, "grad_norm": 0.30955466628074646, "learning_rate": 1.8368432120718332e-06, "loss": 0.3388, "step": 8901 }, { "epoch": 2.238933601609658, "grad_norm": 0.29133230447769165, "learning_rate": 1.8357101309166364e-06, "loss": 0.3317, "step": 8902 }, { "epoch": 2.2391851106639837, "grad_norm": 0.3130471110343933, "learning_rate": 1.8345773207630696e-06, "loss": 0.31, "step": 8903 }, { "epoch": 2.23943661971831, "grad_norm": 0.30992329120635986, "learning_rate": 1.833444781708154e-06, "loss": 0.3204, "step": 8904 }, { "epoch": 2.239688128772636, "grad_norm": 0.2960168719291687, "learning_rate": 1.8323125138488818e-06, "loss": 0.3244, "step": 8905 }, { "epoch": 2.2399396378269616, "grad_norm": 0.31481972336769104, "learning_rate": 1.8311805172822272e-06, "loss": 0.3318, "step": 8906 }, { "epoch": 2.240191146881288, "grad_norm": 0.3201163113117218, "learning_rate": 1.8300487921051352e-06, "loss": 0.3213, "step": 8907 }, { "epoch": 2.2404426559356136, "grad_norm": 0.31222665309906006, "learning_rate": 1.8289173384145354e-06, "loss": 0.2939, "step": 8908 }, { "epoch": 2.2406941649899395, "grad_norm": 0.3136330246925354, "learning_rate": 1.8277861563073252e-06, "loss": 0.3286, "step": 8909 }, { "epoch": 2.2409456740442657, "grad_norm": 0.3031679391860962, "learning_rate": 1.8266552458803872e-06, "loss": 0.3048, "step": 8910 }, { "epoch": 2.2411971830985915, "grad_norm": 0.29609373211860657, "learning_rate": 1.8255246072305727e-06, "loss": 0.2845, "step": 8911 }, { "epoch": 2.2414486921529173, "grad_norm": 0.32585108280181885, "learning_rate": 1.8243942404547183e-06, "loss": 0.3267, "step": 8912 }, { "epoch": 2.2417002012072436, "grad_norm": 0.30860912799835205, "learning_rate": 1.8232641456496292e-06, "loss": 0.3092, "step": 8913 }, { "epoch": 2.2419517102615694, "grad_norm": 0.30172842741012573, "learning_rate": 1.822134322912094e-06, "loss": 0.2914, "step": 8914 }, { "epoch": 2.242203219315895, "grad_norm": 0.30393847823143005, "learning_rate": 1.8210047723388718e-06, "loss": 0.3236, "step": 8915 }, { "epoch": 2.2424547283702214, "grad_norm": 0.2871536314487457, "learning_rate": 1.8198754940267044e-06, "loss": 0.3247, "step": 8916 }, { "epoch": 2.2427062374245472, "grad_norm": 0.31319135427474976, "learning_rate": 1.818746488072305e-06, "loss": 0.3169, "step": 8917 }, { "epoch": 2.242957746478873, "grad_norm": 0.3419165015220642, "learning_rate": 1.8176177545723683e-06, "loss": 0.3388, "step": 8918 }, { "epoch": 2.2432092555331993, "grad_norm": 0.308427631855011, "learning_rate": 1.8164892936235602e-06, "loss": 0.3216, "step": 8919 }, { "epoch": 2.243460764587525, "grad_norm": 0.33541321754455566, "learning_rate": 1.81536110532253e-06, "loss": 0.3406, "step": 8920 }, { "epoch": 2.243712273641851, "grad_norm": 0.3217279314994812, "learning_rate": 1.8142331897658967e-06, "loss": 0.3293, "step": 8921 }, { "epoch": 2.243963782696177, "grad_norm": 0.30454570055007935, "learning_rate": 1.8131055470502601e-06, "loss": 0.3415, "step": 8922 }, { "epoch": 2.244215291750503, "grad_norm": 0.30971288681030273, "learning_rate": 1.8119781772721984e-06, "loss": 0.3416, "step": 8923 }, { "epoch": 2.2444668008048287, "grad_norm": 0.3124922513961792, "learning_rate": 1.8108510805282598e-06, "loss": 0.3392, "step": 8924 }, { "epoch": 2.244718309859155, "grad_norm": 0.29240474104881287, "learning_rate": 1.8097242569149765e-06, "loss": 0.3084, "step": 8925 }, { "epoch": 2.244969818913481, "grad_norm": 0.2906413674354553, "learning_rate": 1.8085977065288502e-06, "loss": 0.3244, "step": 8926 }, { "epoch": 2.245221327967807, "grad_norm": 0.27822086215019226, "learning_rate": 1.807471429466367e-06, "loss": 0.3249, "step": 8927 }, { "epoch": 2.245472837022133, "grad_norm": 0.3114204704761505, "learning_rate": 1.8063454258239821e-06, "loss": 0.3361, "step": 8928 }, { "epoch": 2.2457243460764587, "grad_norm": 0.3120749890804291, "learning_rate": 1.8052196956981333e-06, "loss": 0.3021, "step": 8929 }, { "epoch": 2.245975855130785, "grad_norm": 0.31990909576416016, "learning_rate": 1.8040942391852296e-06, "loss": 0.3188, "step": 8930 }, { "epoch": 2.2462273641851107, "grad_norm": 0.3102615475654602, "learning_rate": 1.8029690563816626e-06, "loss": 0.3298, "step": 8931 }, { "epoch": 2.2464788732394365, "grad_norm": 0.29477155208587646, "learning_rate": 1.8018441473837934e-06, "loss": 0.3149, "step": 8932 }, { "epoch": 2.2467303822937628, "grad_norm": 0.2773665189743042, "learning_rate": 1.8007195122879656e-06, "loss": 0.3093, "step": 8933 }, { "epoch": 2.2469818913480886, "grad_norm": 0.31135937571525574, "learning_rate": 1.7995951511904985e-06, "loss": 0.334, "step": 8934 }, { "epoch": 2.2472334004024144, "grad_norm": 0.28784048557281494, "learning_rate": 1.7984710641876829e-06, "loss": 0.3057, "step": 8935 }, { "epoch": 2.2474849094567406, "grad_norm": 0.3066675364971161, "learning_rate": 1.7973472513757945e-06, "loss": 0.3211, "step": 8936 }, { "epoch": 2.2477364185110664, "grad_norm": 0.30136793851852417, "learning_rate": 1.7962237128510761e-06, "loss": 0.3269, "step": 8937 }, { "epoch": 2.2479879275653922, "grad_norm": 0.30192577838897705, "learning_rate": 1.7951004487097557e-06, "loss": 0.3124, "step": 8938 }, { "epoch": 2.2482394366197185, "grad_norm": 0.29201701283454895, "learning_rate": 1.7939774590480301e-06, "loss": 0.3281, "step": 8939 }, { "epoch": 2.2484909456740443, "grad_norm": 0.29174402356147766, "learning_rate": 1.7928547439620808e-06, "loss": 0.3287, "step": 8940 }, { "epoch": 2.24874245472837, "grad_norm": 0.3136996924877167, "learning_rate": 1.7917323035480567e-06, "loss": 0.3081, "step": 8941 }, { "epoch": 2.2489939637826963, "grad_norm": 0.2897532880306244, "learning_rate": 1.7906101379020912e-06, "loss": 0.3015, "step": 8942 }, { "epoch": 2.249245472837022, "grad_norm": 0.30037471652030945, "learning_rate": 1.7894882471202884e-06, "loss": 0.344, "step": 8943 }, { "epoch": 2.249496981891348, "grad_norm": 0.32366058230400085, "learning_rate": 1.7883666312987319e-06, "loss": 0.328, "step": 8944 }, { "epoch": 2.249748490945674, "grad_norm": 0.295149564743042, "learning_rate": 1.7872452905334836e-06, "loss": 0.3338, "step": 8945 }, { "epoch": 2.25, "grad_norm": 0.31807345151901245, "learning_rate": 1.7861242249205752e-06, "loss": 0.3177, "step": 8946 }, { "epoch": 2.250251509054326, "grad_norm": 0.31685301661491394, "learning_rate": 1.785003434556023e-06, "loss": 0.3012, "step": 8947 }, { "epoch": 2.250503018108652, "grad_norm": 0.3292388617992401, "learning_rate": 1.783882919535812e-06, "loss": 0.3129, "step": 8948 }, { "epoch": 2.250754527162978, "grad_norm": 0.3227696716785431, "learning_rate": 1.7827626799559105e-06, "loss": 0.3408, "step": 8949 }, { "epoch": 2.2510060362173037, "grad_norm": 0.2962954044342041, "learning_rate": 1.7816427159122569e-06, "loss": 0.339, "step": 8950 }, { "epoch": 2.25125754527163, "grad_norm": 0.3421285152435303, "learning_rate": 1.7805230275007724e-06, "loss": 0.3196, "step": 8951 }, { "epoch": 2.2515090543259557, "grad_norm": 0.3049623668193817, "learning_rate": 1.7794036148173477e-06, "loss": 0.344, "step": 8952 }, { "epoch": 2.2517605633802815, "grad_norm": 0.29739803075790405, "learning_rate": 1.7782844779578574e-06, "loss": 0.3286, "step": 8953 }, { "epoch": 2.2520120724346078, "grad_norm": 0.31580233573913574, "learning_rate": 1.7771656170181445e-06, "loss": 0.3177, "step": 8954 }, { "epoch": 2.2522635814889336, "grad_norm": 0.321826308965683, "learning_rate": 1.7760470320940348e-06, "loss": 0.3254, "step": 8955 }, { "epoch": 2.2525150905432594, "grad_norm": 0.2998609244823456, "learning_rate": 1.7749287232813296e-06, "loss": 0.3193, "step": 8956 }, { "epoch": 2.2527665995975856, "grad_norm": 0.29203975200653076, "learning_rate": 1.7738106906758013e-06, "loss": 0.3187, "step": 8957 }, { "epoch": 2.2530181086519114, "grad_norm": 0.30509132146835327, "learning_rate": 1.7726929343732059e-06, "loss": 0.3282, "step": 8958 }, { "epoch": 2.2532696177062372, "grad_norm": 0.29113608598709106, "learning_rate": 1.7715754544692692e-06, "loss": 0.3167, "step": 8959 }, { "epoch": 2.2535211267605635, "grad_norm": 0.322614848613739, "learning_rate": 1.7704582510596996e-06, "loss": 0.3343, "step": 8960 }, { "epoch": 2.2537726358148893, "grad_norm": 0.31486186385154724, "learning_rate": 1.7693413242401753e-06, "loss": 0.3303, "step": 8961 }, { "epoch": 2.2540241448692155, "grad_norm": 0.30907559394836426, "learning_rate": 1.7682246741063568e-06, "loss": 0.3308, "step": 8962 }, { "epoch": 2.2542756539235413, "grad_norm": 0.3086276054382324, "learning_rate": 1.7671083007538765e-06, "loss": 0.3373, "step": 8963 }, { "epoch": 2.254527162977867, "grad_norm": 0.31900346279144287, "learning_rate": 1.7659922042783463e-06, "loss": 0.3326, "step": 8964 }, { "epoch": 2.2547786720321934, "grad_norm": 0.29411938786506653, "learning_rate": 1.7648763847753497e-06, "loss": 0.3219, "step": 8965 }, { "epoch": 2.255030181086519, "grad_norm": 0.2990444004535675, "learning_rate": 1.7637608423404524e-06, "loss": 0.3265, "step": 8966 }, { "epoch": 2.255281690140845, "grad_norm": 0.3038404881954193, "learning_rate": 1.7626455770691947e-06, "loss": 0.2958, "step": 8967 }, { "epoch": 2.2555331991951713, "grad_norm": 0.2940180003643036, "learning_rate": 1.7615305890570888e-06, "loss": 0.3148, "step": 8968 }, { "epoch": 2.255784708249497, "grad_norm": 0.31063640117645264, "learning_rate": 1.76041587839963e-06, "loss": 0.3319, "step": 8969 }, { "epoch": 2.256036217303823, "grad_norm": 0.2945641875267029, "learning_rate": 1.759301445192283e-06, "loss": 0.3332, "step": 8970 }, { "epoch": 2.256287726358149, "grad_norm": 0.2893473505973816, "learning_rate": 1.7581872895304947e-06, "loss": 0.3137, "step": 8971 }, { "epoch": 2.256539235412475, "grad_norm": 0.2820425033569336, "learning_rate": 1.7570734115096827e-06, "loss": 0.3094, "step": 8972 }, { "epoch": 2.2567907444668007, "grad_norm": 0.27644819021224976, "learning_rate": 1.7559598112252475e-06, "loss": 0.326, "step": 8973 }, { "epoch": 2.257042253521127, "grad_norm": 0.276979923248291, "learning_rate": 1.7548464887725576e-06, "loss": 0.3127, "step": 8974 }, { "epoch": 2.2572937625754528, "grad_norm": 0.3153163492679596, "learning_rate": 1.753733444246966e-06, "loss": 0.3206, "step": 8975 }, { "epoch": 2.2575452716297786, "grad_norm": 0.286739706993103, "learning_rate": 1.7526206777437948e-06, "loss": 0.3228, "step": 8976 }, { "epoch": 2.257796780684105, "grad_norm": 0.3067798912525177, "learning_rate": 1.7515081893583469e-06, "loss": 0.3415, "step": 8977 }, { "epoch": 2.2580482897384306, "grad_norm": 0.28727710247039795, "learning_rate": 1.7503959791859016e-06, "loss": 0.3038, "step": 8978 }, { "epoch": 2.2582997987927564, "grad_norm": 0.29552116990089417, "learning_rate": 1.7492840473217099e-06, "loss": 0.3148, "step": 8979 }, { "epoch": 2.2585513078470827, "grad_norm": 0.2867220342159271, "learning_rate": 1.7481723938610045e-06, "loss": 0.3154, "step": 8980 }, { "epoch": 2.2588028169014085, "grad_norm": 0.2924220860004425, "learning_rate": 1.747061018898989e-06, "loss": 0.288, "step": 8981 }, { "epoch": 2.2590543259557343, "grad_norm": 0.29958319664001465, "learning_rate": 1.745949922530848e-06, "loss": 0.3132, "step": 8982 }, { "epoch": 2.2593058350100605, "grad_norm": 0.3001472055912018, "learning_rate": 1.7448391048517378e-06, "loss": 0.3353, "step": 8983 }, { "epoch": 2.2595573440643864, "grad_norm": 0.32707712054252625, "learning_rate": 1.7437285659567954e-06, "loss": 0.3129, "step": 8984 }, { "epoch": 2.259808853118712, "grad_norm": 0.2877408266067505, "learning_rate": 1.7426183059411284e-06, "loss": 0.334, "step": 8985 }, { "epoch": 2.2600603621730384, "grad_norm": 0.3208245635032654, "learning_rate": 1.7415083248998271e-06, "loss": 0.3472, "step": 8986 }, { "epoch": 2.260311871227364, "grad_norm": 0.31004029512405396, "learning_rate": 1.7403986229279506e-06, "loss": 0.3382, "step": 8987 }, { "epoch": 2.26056338028169, "grad_norm": 0.3009289503097534, "learning_rate": 1.7392892001205409e-06, "loss": 0.3078, "step": 8988 }, { "epoch": 2.2608148893360163, "grad_norm": 0.29705020785331726, "learning_rate": 1.7381800565726138e-06, "loss": 0.3133, "step": 8989 }, { "epoch": 2.261066398390342, "grad_norm": 0.30671584606170654, "learning_rate": 1.7370711923791567e-06, "loss": 0.3117, "step": 8990 }, { "epoch": 2.261317907444668, "grad_norm": 0.2912043333053589, "learning_rate": 1.735962607635141e-06, "loss": 0.3099, "step": 8991 }, { "epoch": 2.261569416498994, "grad_norm": 0.3244048058986664, "learning_rate": 1.7348543024355068e-06, "loss": 0.329, "step": 8992 }, { "epoch": 2.26182092555332, "grad_norm": 0.28510650992393494, "learning_rate": 1.7337462768751766e-06, "loss": 0.3192, "step": 8993 }, { "epoch": 2.2620724346076457, "grad_norm": 0.3037286698818207, "learning_rate": 1.7326385310490424e-06, "loss": 0.3109, "step": 8994 }, { "epoch": 2.262323943661972, "grad_norm": 0.3154160678386688, "learning_rate": 1.731531065051979e-06, "loss": 0.3267, "step": 8995 }, { "epoch": 2.262575452716298, "grad_norm": 0.2968575358390808, "learning_rate": 1.7304238789788308e-06, "loss": 0.3137, "step": 8996 }, { "epoch": 2.2628269617706236, "grad_norm": 0.2917513847351074, "learning_rate": 1.7293169729244247e-06, "loss": 0.2899, "step": 8997 }, { "epoch": 2.26307847082495, "grad_norm": 0.29885080456733704, "learning_rate": 1.728210346983557e-06, "loss": 0.3232, "step": 8998 }, { "epoch": 2.2633299798792756, "grad_norm": 0.2979438006877899, "learning_rate": 1.7271040012510044e-06, "loss": 0.3017, "step": 8999 }, { "epoch": 2.2635814889336014, "grad_norm": 0.2976253628730774, "learning_rate": 1.7259979358215213e-06, "loss": 0.3289, "step": 9000 }, { "epoch": 2.2638329979879277, "grad_norm": 0.2976281940937042, "learning_rate": 1.7248921507898304e-06, "loss": 0.3013, "step": 9001 }, { "epoch": 2.2640845070422535, "grad_norm": 0.31368160247802734, "learning_rate": 1.7237866462506398e-06, "loss": 0.2938, "step": 9002 }, { "epoch": 2.2643360160965793, "grad_norm": 0.30961135029792786, "learning_rate": 1.7226814222986255e-06, "loss": 0.3168, "step": 9003 }, { "epoch": 2.2645875251509056, "grad_norm": 0.2974013686180115, "learning_rate": 1.7215764790284462e-06, "loss": 0.3062, "step": 9004 }, { "epoch": 2.2648390342052314, "grad_norm": 0.2913673520088196, "learning_rate": 1.7204718165347301e-06, "loss": 0.3068, "step": 9005 }, { "epoch": 2.265090543259557, "grad_norm": 0.3057686686515808, "learning_rate": 1.7193674349120877e-06, "loss": 0.3078, "step": 9006 }, { "epoch": 2.2653420523138834, "grad_norm": 0.29526326060295105, "learning_rate": 1.7182633342550991e-06, "loss": 0.2993, "step": 9007 }, { "epoch": 2.265593561368209, "grad_norm": 0.30530017614364624, "learning_rate": 1.7171595146583275e-06, "loss": 0.3163, "step": 9008 }, { "epoch": 2.265845070422535, "grad_norm": 0.31532004475593567, "learning_rate": 1.7160559762163042e-06, "loss": 0.3276, "step": 9009 }, { "epoch": 2.2660965794768613, "grad_norm": 0.30409690737724304, "learning_rate": 1.714952719023542e-06, "loss": 0.3166, "step": 9010 }, { "epoch": 2.266348088531187, "grad_norm": 0.3003282845020294, "learning_rate": 1.7138497431745304e-06, "loss": 0.3452, "step": 9011 }, { "epoch": 2.266599597585513, "grad_norm": 0.29305264353752136, "learning_rate": 1.7127470487637282e-06, "loss": 0.3278, "step": 9012 }, { "epoch": 2.266851106639839, "grad_norm": 0.27770164608955383, "learning_rate": 1.7116446358855781e-06, "loss": 0.3196, "step": 9013 }, { "epoch": 2.267102615694165, "grad_norm": 0.2922765016555786, "learning_rate": 1.7105425046344914e-06, "loss": 0.3444, "step": 9014 }, { "epoch": 2.2673541247484907, "grad_norm": 0.32455912232398987, "learning_rate": 1.709440655104862e-06, "loss": 0.2895, "step": 9015 }, { "epoch": 2.267605633802817, "grad_norm": 0.2932599186897278, "learning_rate": 1.7083390873910533e-06, "loss": 0.3285, "step": 9016 }, { "epoch": 2.267857142857143, "grad_norm": 0.29908427596092224, "learning_rate": 1.7072378015874107e-06, "loss": 0.3236, "step": 9017 }, { "epoch": 2.2681086519114686, "grad_norm": 0.31389617919921875, "learning_rate": 1.7061367977882493e-06, "loss": 0.3431, "step": 9018 }, { "epoch": 2.268360160965795, "grad_norm": 0.2915651798248291, "learning_rate": 1.7050360760878665e-06, "loss": 0.3442, "step": 9019 }, { "epoch": 2.2686116700201207, "grad_norm": 0.3061988949775696, "learning_rate": 1.7039356365805286e-06, "loss": 0.3118, "step": 9020 }, { "epoch": 2.2688631790744465, "grad_norm": 0.3040686249732971, "learning_rate": 1.7028354793604857e-06, "loss": 0.3257, "step": 9021 }, { "epoch": 2.2691146881287727, "grad_norm": 0.3051724135875702, "learning_rate": 1.7017356045219545e-06, "loss": 0.3085, "step": 9022 }, { "epoch": 2.2693661971830985, "grad_norm": 0.32119080424308777, "learning_rate": 1.700636012159137e-06, "loss": 0.3363, "step": 9023 }, { "epoch": 2.2696177062374243, "grad_norm": 0.28925296664237976, "learning_rate": 1.699536702366203e-06, "loss": 0.3113, "step": 9024 }, { "epoch": 2.2698692152917506, "grad_norm": 0.30859729647636414, "learning_rate": 1.6984376752373038e-06, "loss": 0.2988, "step": 9025 }, { "epoch": 2.2701207243460764, "grad_norm": 0.3168920576572418, "learning_rate": 1.6973389308665617e-06, "loss": 0.3182, "step": 9026 }, { "epoch": 2.270372233400402, "grad_norm": 0.3211313784122467, "learning_rate": 1.6962404693480805e-06, "loss": 0.3277, "step": 9027 }, { "epoch": 2.2706237424547284, "grad_norm": 0.30997171998023987, "learning_rate": 1.6951422907759336e-06, "loss": 0.3244, "step": 9028 }, { "epoch": 2.2708752515090542, "grad_norm": 0.3075638711452484, "learning_rate": 1.6940443952441755e-06, "loss": 0.3263, "step": 9029 }, { "epoch": 2.2711267605633805, "grad_norm": 0.31429892778396606, "learning_rate": 1.6929467828468316e-06, "loss": 0.3171, "step": 9030 }, { "epoch": 2.2713782696177063, "grad_norm": 0.2928819954395294, "learning_rate": 1.6918494536779084e-06, "loss": 0.2967, "step": 9031 }, { "epoch": 2.271629778672032, "grad_norm": 0.2912546694278717, "learning_rate": 1.690752407831382e-06, "loss": 0.3185, "step": 9032 }, { "epoch": 2.2718812877263583, "grad_norm": 0.31666284799575806, "learning_rate": 1.689655645401211e-06, "loss": 0.351, "step": 9033 }, { "epoch": 2.272132796780684, "grad_norm": 0.30313748121261597, "learning_rate": 1.6885591664813228e-06, "loss": 0.3183, "step": 9034 }, { "epoch": 2.27238430583501, "grad_norm": 0.2988080084323883, "learning_rate": 1.6874629711656275e-06, "loss": 0.3295, "step": 9035 }, { "epoch": 2.272635814889336, "grad_norm": 0.2955930531024933, "learning_rate": 1.6863670595480042e-06, "loss": 0.3343, "step": 9036 }, { "epoch": 2.272887323943662, "grad_norm": 0.33139654994010925, "learning_rate": 1.6852714317223118e-06, "loss": 0.3049, "step": 9037 }, { "epoch": 2.273138832997988, "grad_norm": 0.29346272349357605, "learning_rate": 1.684176087782386e-06, "loss": 0.3158, "step": 9038 }, { "epoch": 2.273390342052314, "grad_norm": 0.3030097186565399, "learning_rate": 1.6830810278220327e-06, "loss": 0.3183, "step": 9039 }, { "epoch": 2.27364185110664, "grad_norm": 0.28629571199417114, "learning_rate": 1.681986251935041e-06, "loss": 0.3128, "step": 9040 }, { "epoch": 2.2738933601609657, "grad_norm": 0.31391236186027527, "learning_rate": 1.6808917602151676e-06, "loss": 0.3154, "step": 9041 }, { "epoch": 2.274144869215292, "grad_norm": 0.27841538190841675, "learning_rate": 1.6797975527561522e-06, "loss": 0.3176, "step": 9042 }, { "epoch": 2.2743963782696177, "grad_norm": 0.2939876914024353, "learning_rate": 1.6787036296517034e-06, "loss": 0.3298, "step": 9043 }, { "epoch": 2.2746478873239435, "grad_norm": 0.3155568540096283, "learning_rate": 1.677609990995513e-06, "loss": 0.3457, "step": 9044 }, { "epoch": 2.2748993963782698, "grad_norm": 0.30136945843696594, "learning_rate": 1.6765166368812403e-06, "loss": 0.3086, "step": 9045 }, { "epoch": 2.2751509054325956, "grad_norm": 0.2959335744380951, "learning_rate": 1.6754235674025271e-06, "loss": 0.3092, "step": 9046 }, { "epoch": 2.2754024144869214, "grad_norm": 0.2871185541152954, "learning_rate": 1.6743307826529858e-06, "loss": 0.3144, "step": 9047 }, { "epoch": 2.2756539235412476, "grad_norm": 0.30033811926841736, "learning_rate": 1.6732382827262089e-06, "loss": 0.31, "step": 9048 }, { "epoch": 2.2759054325955734, "grad_norm": 0.3050321936607361, "learning_rate": 1.6721460677157591e-06, "loss": 0.3331, "step": 9049 }, { "epoch": 2.2761569416498992, "grad_norm": 0.33643415570259094, "learning_rate": 1.6710541377151818e-06, "loss": 0.3273, "step": 9050 }, { "epoch": 2.2764084507042255, "grad_norm": 0.32750841975212097, "learning_rate": 1.6699624928179897e-06, "loss": 0.3269, "step": 9051 }, { "epoch": 2.2766599597585513, "grad_norm": 0.29840290546417236, "learning_rate": 1.6688711331176777e-06, "loss": 0.313, "step": 9052 }, { "epoch": 2.276911468812877, "grad_norm": 0.3018534481525421, "learning_rate": 1.6677800587077153e-06, "loss": 0.3041, "step": 9053 }, { "epoch": 2.2771629778672033, "grad_norm": 0.297966867685318, "learning_rate": 1.6666892696815428e-06, "loss": 0.3085, "step": 9054 }, { "epoch": 2.277414486921529, "grad_norm": 0.29202499985694885, "learning_rate": 1.6655987661325835e-06, "loss": 0.313, "step": 9055 }, { "epoch": 2.277665995975855, "grad_norm": 0.27722373604774475, "learning_rate": 1.6645085481542273e-06, "loss": 0.3335, "step": 9056 }, { "epoch": 2.277917505030181, "grad_norm": 0.3057959973812103, "learning_rate": 1.6634186158398496e-06, "loss": 0.3158, "step": 9057 }, { "epoch": 2.278169014084507, "grad_norm": 0.29990729689598083, "learning_rate": 1.6623289692827916e-06, "loss": 0.3162, "step": 9058 }, { "epoch": 2.278420523138833, "grad_norm": 0.3051862418651581, "learning_rate": 1.6612396085763794e-06, "loss": 0.2969, "step": 9059 }, { "epoch": 2.278672032193159, "grad_norm": 0.30120375752449036, "learning_rate": 1.660150533813905e-06, "loss": 0.3275, "step": 9060 }, { "epoch": 2.278923541247485, "grad_norm": 0.3087018132209778, "learning_rate": 1.6590617450886453e-06, "loss": 0.3146, "step": 9061 }, { "epoch": 2.279175050301811, "grad_norm": 0.3511127531528473, "learning_rate": 1.657973242493845e-06, "loss": 0.3205, "step": 9062 }, { "epoch": 2.279426559356137, "grad_norm": 0.29214683175086975, "learning_rate": 1.6568850261227282e-06, "loss": 0.3271, "step": 9063 }, { "epoch": 2.2796780684104627, "grad_norm": 0.3279742896556854, "learning_rate": 1.6557970960684965e-06, "loss": 0.3187, "step": 9064 }, { "epoch": 2.279929577464789, "grad_norm": 0.2921777367591858, "learning_rate": 1.6547094524243207e-06, "loss": 0.3241, "step": 9065 }, { "epoch": 2.2801810865191148, "grad_norm": 0.2994241714477539, "learning_rate": 1.6536220952833536e-06, "loss": 0.3162, "step": 9066 }, { "epoch": 2.2804325955734406, "grad_norm": 0.29721885919570923, "learning_rate": 1.6525350247387178e-06, "loss": 0.3329, "step": 9067 }, { "epoch": 2.280684104627767, "grad_norm": 0.3295932412147522, "learning_rate": 1.651448240883517e-06, "loss": 0.312, "step": 9068 }, { "epoch": 2.2809356136820926, "grad_norm": 0.3002535402774811, "learning_rate": 1.6503617438108243e-06, "loss": 0.3275, "step": 9069 }, { "epoch": 2.2811871227364184, "grad_norm": 0.2872998118400574, "learning_rate": 1.6492755336136945e-06, "loss": 0.306, "step": 9070 }, { "epoch": 2.2814386317907447, "grad_norm": 0.29634666442871094, "learning_rate": 1.6481896103851513e-06, "loss": 0.3245, "step": 9071 }, { "epoch": 2.2816901408450705, "grad_norm": 0.28420212864875793, "learning_rate": 1.647103974218201e-06, "loss": 0.3308, "step": 9072 }, { "epoch": 2.2819416498993963, "grad_norm": 0.3027746081352234, "learning_rate": 1.6460186252058181e-06, "loss": 0.3234, "step": 9073 }, { "epoch": 2.2821931589537225, "grad_norm": 0.3052777945995331, "learning_rate": 1.6449335634409575e-06, "loss": 0.3114, "step": 9074 }, { "epoch": 2.2824446680080483, "grad_norm": 0.299187570810318, "learning_rate": 1.6438487890165494e-06, "loss": 0.3206, "step": 9075 }, { "epoch": 2.282696177062374, "grad_norm": 0.2914719879627228, "learning_rate": 1.6427643020254951e-06, "loss": 0.3431, "step": 9076 }, { "epoch": 2.2829476861167004, "grad_norm": 0.29015323519706726, "learning_rate": 1.641680102560677e-06, "loss": 0.3297, "step": 9077 }, { "epoch": 2.283199195171026, "grad_norm": 0.28477156162261963, "learning_rate": 1.640596190714947e-06, "loss": 0.3376, "step": 9078 }, { "epoch": 2.283450704225352, "grad_norm": 0.2982317805290222, "learning_rate": 1.6395125665811385e-06, "loss": 0.3274, "step": 9079 }, { "epoch": 2.2837022132796783, "grad_norm": 0.3212500512599945, "learning_rate": 1.6384292302520537e-06, "loss": 0.3396, "step": 9080 }, { "epoch": 2.283953722334004, "grad_norm": 0.2954564392566681, "learning_rate": 1.6373461818204773e-06, "loss": 0.3362, "step": 9081 }, { "epoch": 2.28420523138833, "grad_norm": 0.306144118309021, "learning_rate": 1.6362634213791617e-06, "loss": 0.34, "step": 9082 }, { "epoch": 2.284456740442656, "grad_norm": 0.28161266446113586, "learning_rate": 1.6351809490208426e-06, "loss": 0.3189, "step": 9083 }, { "epoch": 2.284708249496982, "grad_norm": 0.3151404857635498, "learning_rate": 1.6340987648382233e-06, "loss": 0.3294, "step": 9084 }, { "epoch": 2.2849597585513077, "grad_norm": 0.28344619274139404, "learning_rate": 1.6330168689239879e-06, "loss": 0.317, "step": 9085 }, { "epoch": 2.285211267605634, "grad_norm": 0.2821776568889618, "learning_rate": 1.6319352613707956e-06, "loss": 0.3065, "step": 9086 }, { "epoch": 2.28546277665996, "grad_norm": 0.29392242431640625, "learning_rate": 1.6308539422712756e-06, "loss": 0.3496, "step": 9087 }, { "epoch": 2.2857142857142856, "grad_norm": 0.2900041937828064, "learning_rate": 1.62977291171804e-06, "loss": 0.303, "step": 9088 }, { "epoch": 2.285965794768612, "grad_norm": 0.30908921360969543, "learning_rate": 1.6286921698036685e-06, "loss": 0.3404, "step": 9089 }, { "epoch": 2.2862173038229376, "grad_norm": 0.28658002614974976, "learning_rate": 1.627611716620724e-06, "loss": 0.3158, "step": 9090 }, { "epoch": 2.2864688128772634, "grad_norm": 0.2914651930332184, "learning_rate": 1.6265315522617365e-06, "loss": 0.3237, "step": 9091 }, { "epoch": 2.2867203219315897, "grad_norm": 0.3053009808063507, "learning_rate": 1.6254516768192185e-06, "loss": 0.3383, "step": 9092 }, { "epoch": 2.2869718309859155, "grad_norm": 0.2992967367172241, "learning_rate": 1.6243720903856518e-06, "loss": 0.3272, "step": 9093 }, { "epoch": 2.2872233400402413, "grad_norm": 0.2941541075706482, "learning_rate": 1.6232927930534997e-06, "loss": 0.3164, "step": 9094 }, { "epoch": 2.2874748490945676, "grad_norm": 0.28971970081329346, "learning_rate": 1.6222137849151932e-06, "loss": 0.3291, "step": 9095 }, { "epoch": 2.2877263581488934, "grad_norm": 0.27451595664024353, "learning_rate": 1.6211350660631448e-06, "loss": 0.2967, "step": 9096 }, { "epoch": 2.287977867203219, "grad_norm": 0.2858002483844757, "learning_rate": 1.6200566365897414e-06, "loss": 0.3311, "step": 9097 }, { "epoch": 2.2882293762575454, "grad_norm": 0.28279751539230347, "learning_rate": 1.6189784965873407e-06, "loss": 0.3334, "step": 9098 }, { "epoch": 2.288480885311871, "grad_norm": 0.29283228516578674, "learning_rate": 1.617900646148282e-06, "loss": 0.3158, "step": 9099 }, { "epoch": 2.288732394366197, "grad_norm": 0.29870137572288513, "learning_rate": 1.6168230853648725e-06, "loss": 0.3458, "step": 9100 }, { "epoch": 2.2889839034205233, "grad_norm": 0.2867933213710785, "learning_rate": 1.615745814329403e-06, "loss": 0.3067, "step": 9101 }, { "epoch": 2.289235412474849, "grad_norm": 0.27168020606040955, "learning_rate": 1.6146688331341303e-06, "loss": 0.3111, "step": 9102 }, { "epoch": 2.289486921529175, "grad_norm": 0.2864697277545929, "learning_rate": 1.6135921418712959e-06, "loss": 0.3208, "step": 9103 }, { "epoch": 2.289738430583501, "grad_norm": 0.2777710556983948, "learning_rate": 1.6125157406331065e-06, "loss": 0.3309, "step": 9104 }, { "epoch": 2.289989939637827, "grad_norm": 0.27852359414100647, "learning_rate": 1.6114396295117547e-06, "loss": 0.3057, "step": 9105 }, { "epoch": 2.2902414486921527, "grad_norm": 0.30460140109062195, "learning_rate": 1.6103638085993972e-06, "loss": 0.3214, "step": 9106 }, { "epoch": 2.290492957746479, "grad_norm": 0.3020101487636566, "learning_rate": 1.6092882779881746e-06, "loss": 0.3219, "step": 9107 }, { "epoch": 2.290744466800805, "grad_norm": 0.29373854398727417, "learning_rate": 1.6082130377702004e-06, "loss": 0.3454, "step": 9108 }, { "epoch": 2.2909959758551306, "grad_norm": 0.27670902013778687, "learning_rate": 1.6071380880375586e-06, "loss": 0.3154, "step": 9109 }, { "epoch": 2.291247484909457, "grad_norm": 0.3227442502975464, "learning_rate": 1.6060634288823158e-06, "loss": 0.3082, "step": 9110 }, { "epoch": 2.2914989939637826, "grad_norm": 0.3054077923297882, "learning_rate": 1.6049890603965063e-06, "loss": 0.3121, "step": 9111 }, { "epoch": 2.2917505030181085, "grad_norm": 0.2847161293029785, "learning_rate": 1.6039149826721462e-06, "loss": 0.2923, "step": 9112 }, { "epoch": 2.2920020120724347, "grad_norm": 0.3265150487422943, "learning_rate": 1.6028411958012203e-06, "loss": 0.3319, "step": 9113 }, { "epoch": 2.2922535211267605, "grad_norm": 0.2997869551181793, "learning_rate": 1.6017676998756947e-06, "loss": 0.3079, "step": 9114 }, { "epoch": 2.2925050301810863, "grad_norm": 0.3061138391494751, "learning_rate": 1.6006944949875052e-06, "loss": 0.3217, "step": 9115 }, { "epoch": 2.2927565392354126, "grad_norm": 0.2861214876174927, "learning_rate": 1.5996215812285682e-06, "loss": 0.3015, "step": 9116 }, { "epoch": 2.2930080482897384, "grad_norm": 0.31791844964027405, "learning_rate": 1.5985489586907676e-06, "loss": 0.3303, "step": 9117 }, { "epoch": 2.293259557344064, "grad_norm": 0.2969111502170563, "learning_rate": 1.5974766274659697e-06, "loss": 0.3213, "step": 9118 }, { "epoch": 2.2935110663983904, "grad_norm": 0.2913528084754944, "learning_rate": 1.5964045876460143e-06, "loss": 0.3531, "step": 9119 }, { "epoch": 2.2937625754527162, "grad_norm": 0.30313563346862793, "learning_rate": 1.5953328393227113e-06, "loss": 0.3048, "step": 9120 }, { "epoch": 2.294014084507042, "grad_norm": 0.3050040304660797, "learning_rate": 1.5942613825878527e-06, "loss": 0.3299, "step": 9121 }, { "epoch": 2.2942655935613683, "grad_norm": 0.2819101810455322, "learning_rate": 1.5931902175331986e-06, "loss": 0.3041, "step": 9122 }, { "epoch": 2.294517102615694, "grad_norm": 0.28545475006103516, "learning_rate": 1.5921193442504918e-06, "loss": 0.3063, "step": 9123 }, { "epoch": 2.29476861167002, "grad_norm": 0.33305785059928894, "learning_rate": 1.5910487628314414e-06, "loss": 0.3053, "step": 9124 }, { "epoch": 2.295020120724346, "grad_norm": 0.30768680572509766, "learning_rate": 1.5899784733677397e-06, "loss": 0.3215, "step": 9125 }, { "epoch": 2.295271629778672, "grad_norm": 0.28684544563293457, "learning_rate": 1.588908475951047e-06, "loss": 0.3256, "step": 9126 }, { "epoch": 2.2955231388329977, "grad_norm": 0.2967512011528015, "learning_rate": 1.5878387706730053e-06, "loss": 0.3377, "step": 9127 }, { "epoch": 2.295774647887324, "grad_norm": 0.29876625537872314, "learning_rate": 1.5867693576252252e-06, "loss": 0.3176, "step": 9128 }, { "epoch": 2.29602615694165, "grad_norm": 0.3200327754020691, "learning_rate": 1.5857002368992963e-06, "loss": 0.3297, "step": 9129 }, { "epoch": 2.296277665995976, "grad_norm": 0.31204643845558167, "learning_rate": 1.5846314085867836e-06, "loss": 0.3412, "step": 9130 }, { "epoch": 2.296529175050302, "grad_norm": 0.29177501797676086, "learning_rate": 1.5835628727792228e-06, "loss": 0.3191, "step": 9131 }, { "epoch": 2.2967806841046277, "grad_norm": 0.2729922831058502, "learning_rate": 1.582494629568131e-06, "loss": 0.3133, "step": 9132 }, { "epoch": 2.297032193158954, "grad_norm": 0.3033842146396637, "learning_rate": 1.5814266790449922e-06, "loss": 0.313, "step": 9133 }, { "epoch": 2.2972837022132797, "grad_norm": 0.29062163829803467, "learning_rate": 1.5803590213012738e-06, "loss": 0.2842, "step": 9134 }, { "epoch": 2.2975352112676055, "grad_norm": 0.2890336215496063, "learning_rate": 1.57929165642841e-06, "loss": 0.3044, "step": 9135 }, { "epoch": 2.2977867203219318, "grad_norm": 0.2879352569580078, "learning_rate": 1.578224584517818e-06, "loss": 0.3019, "step": 9136 }, { "epoch": 2.2980382293762576, "grad_norm": 0.3032044768333435, "learning_rate": 1.5771578056608816e-06, "loss": 0.3173, "step": 9137 }, { "epoch": 2.2982897384305834, "grad_norm": 0.3006070852279663, "learning_rate": 1.5760913199489674e-06, "loss": 0.3308, "step": 9138 }, { "epoch": 2.2985412474849096, "grad_norm": 0.3287603259086609, "learning_rate": 1.5750251274734107e-06, "loss": 0.3182, "step": 9139 }, { "epoch": 2.2987927565392354, "grad_norm": 0.2758319675922394, "learning_rate": 1.5739592283255251e-06, "loss": 0.3166, "step": 9140 }, { "epoch": 2.2990442655935612, "grad_norm": 0.29488644003868103, "learning_rate": 1.5728936225966002e-06, "loss": 0.293, "step": 9141 }, { "epoch": 2.2992957746478875, "grad_norm": 0.31970569491386414, "learning_rate": 1.5718283103778941e-06, "loss": 0.313, "step": 9142 }, { "epoch": 2.2995472837022133, "grad_norm": 0.29525116086006165, "learning_rate": 1.5707632917606491e-06, "loss": 0.3212, "step": 9143 }, { "epoch": 2.299798792756539, "grad_norm": 0.29070043563842773, "learning_rate": 1.569698566836073e-06, "loss": 0.3151, "step": 9144 }, { "epoch": 2.3000503018108653, "grad_norm": 0.3086007237434387, "learning_rate": 1.5686341356953566e-06, "loss": 0.3343, "step": 9145 }, { "epoch": 2.300301810865191, "grad_norm": 0.3106892704963684, "learning_rate": 1.5675699984296584e-06, "loss": 0.3108, "step": 9146 }, { "epoch": 2.300553319919517, "grad_norm": 0.2823115289211273, "learning_rate": 1.5665061551301175e-06, "loss": 0.3255, "step": 9147 }, { "epoch": 2.300804828973843, "grad_norm": 0.3261948227882385, "learning_rate": 1.5654426058878436e-06, "loss": 0.3265, "step": 9148 }, { "epoch": 2.301056338028169, "grad_norm": 0.30333390831947327, "learning_rate": 1.5643793507939253e-06, "loss": 0.3367, "step": 9149 }, { "epoch": 2.301307847082495, "grad_norm": 0.28751394152641296, "learning_rate": 1.5633163899394211e-06, "loss": 0.3166, "step": 9150 }, { "epoch": 2.301559356136821, "grad_norm": 0.2967854142189026, "learning_rate": 1.5622537234153695e-06, "loss": 0.3093, "step": 9151 }, { "epoch": 2.301810865191147, "grad_norm": 0.2856552004814148, "learning_rate": 1.561191351312779e-06, "loss": 0.3194, "step": 9152 }, { "epoch": 2.3020623742454727, "grad_norm": 0.30799177289009094, "learning_rate": 1.560129273722637e-06, "loss": 0.313, "step": 9153 }, { "epoch": 2.302313883299799, "grad_norm": 0.2922762334346771, "learning_rate": 1.559067490735902e-06, "loss": 0.3232, "step": 9154 }, { "epoch": 2.3025653923541247, "grad_norm": 0.28383079171180725, "learning_rate": 1.5580060024435112e-06, "loss": 0.3039, "step": 9155 }, { "epoch": 2.3028169014084505, "grad_norm": 0.2966978847980499, "learning_rate": 1.556944808936372e-06, "loss": 0.3438, "step": 9156 }, { "epoch": 2.3030684104627768, "grad_norm": 0.3017922639846802, "learning_rate": 1.5558839103053713e-06, "loss": 0.3177, "step": 9157 }, { "epoch": 2.3033199195171026, "grad_norm": 0.2967662513256073, "learning_rate": 1.554823306641366e-06, "loss": 0.2977, "step": 9158 }, { "epoch": 2.3035714285714284, "grad_norm": 0.2964570224285126, "learning_rate": 1.5537629980351932e-06, "loss": 0.3134, "step": 9159 }, { "epoch": 2.3038229376257546, "grad_norm": 0.307412713766098, "learning_rate": 1.5527029845776587e-06, "loss": 0.3429, "step": 9160 }, { "epoch": 2.3040744466800804, "grad_norm": 0.32412174344062805, "learning_rate": 1.5516432663595483e-06, "loss": 0.3201, "step": 9161 }, { "epoch": 2.3043259557344067, "grad_norm": 0.3185696601867676, "learning_rate": 1.550583843471618e-06, "loss": 0.3388, "step": 9162 }, { "epoch": 2.3045774647887325, "grad_norm": 0.2888423502445221, "learning_rate": 1.5495247160046039e-06, "loss": 0.3304, "step": 9163 }, { "epoch": 2.3048289738430583, "grad_norm": 0.2889467477798462, "learning_rate": 1.54846588404921e-06, "loss": 0.3088, "step": 9164 }, { "epoch": 2.3050804828973845, "grad_norm": 0.3054656982421875, "learning_rate": 1.5474073476961216e-06, "loss": 0.3256, "step": 9165 }, { "epoch": 2.3053319919517103, "grad_norm": 0.30939677357673645, "learning_rate": 1.5463491070359937e-06, "loss": 0.3316, "step": 9166 }, { "epoch": 2.305583501006036, "grad_norm": 0.31722491979599, "learning_rate": 1.5452911621594596e-06, "loss": 0.314, "step": 9167 }, { "epoch": 2.3058350100603624, "grad_norm": 0.31227657198905945, "learning_rate": 1.544233513157124e-06, "loss": 0.3222, "step": 9168 }, { "epoch": 2.306086519114688, "grad_norm": 0.2999171018600464, "learning_rate": 1.54317616011957e-06, "loss": 0.3119, "step": 9169 }, { "epoch": 2.306338028169014, "grad_norm": 0.30300676822662354, "learning_rate": 1.54211910313735e-06, "loss": 0.2988, "step": 9170 }, { "epoch": 2.3065895372233403, "grad_norm": 0.31904926896095276, "learning_rate": 1.541062342300997e-06, "loss": 0.3285, "step": 9171 }, { "epoch": 2.306841046277666, "grad_norm": 0.29842498898506165, "learning_rate": 1.5400058777010169e-06, "loss": 0.3163, "step": 9172 }, { "epoch": 2.307092555331992, "grad_norm": 0.3134821951389313, "learning_rate": 1.5389497094278861e-06, "loss": 0.3168, "step": 9173 }, { "epoch": 2.307344064386318, "grad_norm": 0.3033539950847626, "learning_rate": 1.537893837572062e-06, "loss": 0.2992, "step": 9174 }, { "epoch": 2.307595573440644, "grad_norm": 0.2625449001789093, "learning_rate": 1.5368382622239703e-06, "loss": 0.3167, "step": 9175 }, { "epoch": 2.3078470824949697, "grad_norm": 0.3125070035457611, "learning_rate": 1.5357829834740174e-06, "loss": 0.3273, "step": 9176 }, { "epoch": 2.308098591549296, "grad_norm": 0.3032437264919281, "learning_rate": 1.5347280014125782e-06, "loss": 0.3069, "step": 9177 }, { "epoch": 2.308350100603622, "grad_norm": 0.3202145993709564, "learning_rate": 1.5336733161300088e-06, "loss": 0.3211, "step": 9178 }, { "epoch": 2.3086016096579476, "grad_norm": 0.30158209800720215, "learning_rate": 1.5326189277166325e-06, "loss": 0.3295, "step": 9179 }, { "epoch": 2.308853118712274, "grad_norm": 0.3015068769454956, "learning_rate": 1.5315648362627556e-06, "loss": 0.3257, "step": 9180 }, { "epoch": 2.3091046277665996, "grad_norm": 0.30036771297454834, "learning_rate": 1.53051104185865e-06, "loss": 0.3132, "step": 9181 }, { "epoch": 2.3093561368209254, "grad_norm": 0.2994939386844635, "learning_rate": 1.5294575445945687e-06, "loss": 0.3275, "step": 9182 }, { "epoch": 2.3096076458752517, "grad_norm": 0.3159419298171997, "learning_rate": 1.5284043445607383e-06, "loss": 0.2978, "step": 9183 }, { "epoch": 2.3098591549295775, "grad_norm": 0.3039325475692749, "learning_rate": 1.5273514418473566e-06, "loss": 0.3127, "step": 9184 }, { "epoch": 2.3101106639839033, "grad_norm": 0.32907170057296753, "learning_rate": 1.5262988365446002e-06, "loss": 0.3283, "step": 9185 }, { "epoch": 2.3103621730382295, "grad_norm": 0.30235663056373596, "learning_rate": 1.5252465287426154e-06, "loss": 0.3055, "step": 9186 }, { "epoch": 2.3106136820925554, "grad_norm": 0.30011188983917236, "learning_rate": 1.5241945185315292e-06, "loss": 0.3435, "step": 9187 }, { "epoch": 2.310865191146881, "grad_norm": 0.3140828311443329, "learning_rate": 1.5231428060014363e-06, "loss": 0.3229, "step": 9188 }, { "epoch": 2.3111167002012074, "grad_norm": 0.30959945917129517, "learning_rate": 1.5220913912424128e-06, "loss": 0.3224, "step": 9189 }, { "epoch": 2.311368209255533, "grad_norm": 0.2961542010307312, "learning_rate": 1.5210402743445018e-06, "loss": 0.343, "step": 9190 }, { "epoch": 2.311619718309859, "grad_norm": 0.31126269698143005, "learning_rate": 1.519989455397729e-06, "loss": 0.2833, "step": 9191 }, { "epoch": 2.3118712273641853, "grad_norm": 0.2786300480365753, "learning_rate": 1.518938934492087e-06, "loss": 0.3245, "step": 9192 }, { "epoch": 2.312122736418511, "grad_norm": 0.30616483092308044, "learning_rate": 1.5178887117175472e-06, "loss": 0.3253, "step": 9193 }, { "epoch": 2.312374245472837, "grad_norm": 0.27918025851249695, "learning_rate": 1.5168387871640572e-06, "loss": 0.325, "step": 9194 }, { "epoch": 2.312625754527163, "grad_norm": 0.2911374866962433, "learning_rate": 1.5157891609215331e-06, "loss": 0.3209, "step": 9195 }, { "epoch": 2.312877263581489, "grad_norm": 0.28203797340393066, "learning_rate": 1.5147398330798712e-06, "loss": 0.3063, "step": 9196 }, { "epoch": 2.3131287726358147, "grad_norm": 0.29836559295654297, "learning_rate": 1.5136908037289377e-06, "loss": 0.3154, "step": 9197 }, { "epoch": 2.313380281690141, "grad_norm": 0.3110038936138153, "learning_rate": 1.5126420729585784e-06, "loss": 0.3066, "step": 9198 }, { "epoch": 2.313631790744467, "grad_norm": 0.3166778087615967, "learning_rate": 1.5115936408586069e-06, "loss": 0.3624, "step": 9199 }, { "epoch": 2.3138832997987926, "grad_norm": 0.2844400107860565, "learning_rate": 1.510545507518818e-06, "loss": 0.3125, "step": 9200 }, { "epoch": 2.314134808853119, "grad_norm": 0.3151662051677704, "learning_rate": 1.5094976730289751e-06, "loss": 0.3391, "step": 9201 }, { "epoch": 2.3143863179074446, "grad_norm": 0.30355745553970337, "learning_rate": 1.5084501374788213e-06, "loss": 0.3132, "step": 9202 }, { "epoch": 2.3146378269617705, "grad_norm": 0.3106285631656647, "learning_rate": 1.5074029009580687e-06, "loss": 0.3235, "step": 9203 }, { "epoch": 2.3148893360160967, "grad_norm": 0.31202206015586853, "learning_rate": 1.5063559635564078e-06, "loss": 0.362, "step": 9204 }, { "epoch": 2.3151408450704225, "grad_norm": 0.2972472012042999, "learning_rate": 1.5053093253635043e-06, "loss": 0.3213, "step": 9205 }, { "epoch": 2.3153923541247483, "grad_norm": 0.29215607047080994, "learning_rate": 1.5042629864689927e-06, "loss": 0.3171, "step": 9206 }, { "epoch": 2.3156438631790746, "grad_norm": 0.3004598617553711, "learning_rate": 1.5032169469624892e-06, "loss": 0.3167, "step": 9207 }, { "epoch": 2.3158953722334004, "grad_norm": 0.2856040894985199, "learning_rate": 1.502171206933576e-06, "loss": 0.3203, "step": 9208 }, { "epoch": 2.316146881287726, "grad_norm": 0.2829803228378296, "learning_rate": 1.5011257664718187e-06, "loss": 0.315, "step": 9209 }, { "epoch": 2.3163983903420524, "grad_norm": 0.28959813714027405, "learning_rate": 1.500080625666749e-06, "loss": 0.3013, "step": 9210 }, { "epoch": 2.316649899396378, "grad_norm": 0.3246660530567169, "learning_rate": 1.4990357846078795e-06, "loss": 0.3109, "step": 9211 }, { "epoch": 2.316901408450704, "grad_norm": 0.30734169483184814, "learning_rate": 1.4979912433846917e-06, "loss": 0.3233, "step": 9212 }, { "epoch": 2.3171529175050303, "grad_norm": 0.3003253638744354, "learning_rate": 1.4969470020866467e-06, "loss": 0.3286, "step": 9213 }, { "epoch": 2.317404426559356, "grad_norm": 0.31494301557540894, "learning_rate": 1.4959030608031749e-06, "loss": 0.3244, "step": 9214 }, { "epoch": 2.317655935613682, "grad_norm": 0.3177368640899658, "learning_rate": 1.4948594196236838e-06, "loss": 0.2982, "step": 9215 }, { "epoch": 2.317907444668008, "grad_norm": 0.2995319664478302, "learning_rate": 1.4938160786375571e-06, "loss": 0.3289, "step": 9216 }, { "epoch": 2.318158953722334, "grad_norm": 0.32530516386032104, "learning_rate": 1.4927730379341476e-06, "loss": 0.3391, "step": 9217 }, { "epoch": 2.3184104627766597, "grad_norm": 0.2980848252773285, "learning_rate": 1.491730297602787e-06, "loss": 0.3546, "step": 9218 }, { "epoch": 2.318661971830986, "grad_norm": 0.2969314455986023, "learning_rate": 1.4906878577327776e-06, "loss": 0.3178, "step": 9219 }, { "epoch": 2.318913480885312, "grad_norm": 0.2775447368621826, "learning_rate": 1.4896457184134005e-06, "loss": 0.3162, "step": 9220 }, { "epoch": 2.3191649899396376, "grad_norm": 0.2855249047279358, "learning_rate": 1.4886038797339058e-06, "loss": 0.3313, "step": 9221 }, { "epoch": 2.319416498993964, "grad_norm": 0.3028770089149475, "learning_rate": 1.4875623417835227e-06, "loss": 0.3522, "step": 9222 }, { "epoch": 2.3196680080482897, "grad_norm": 0.2914048731327057, "learning_rate": 1.4865211046514494e-06, "loss": 0.2839, "step": 9223 }, { "epoch": 2.3199195171026155, "grad_norm": 0.3016049563884735, "learning_rate": 1.4854801684268655e-06, "loss": 0.3268, "step": 9224 }, { "epoch": 2.3201710261569417, "grad_norm": 0.3333890438079834, "learning_rate": 1.4844395331989164e-06, "loss": 0.324, "step": 9225 }, { "epoch": 2.3204225352112675, "grad_norm": 0.2845839858055115, "learning_rate": 1.4833991990567281e-06, "loss": 0.3258, "step": 9226 }, { "epoch": 2.3206740442655933, "grad_norm": 0.30603617429733276, "learning_rate": 1.4823591660894e-06, "loss": 0.3091, "step": 9227 }, { "epoch": 2.3209255533199196, "grad_norm": 0.2999797761440277, "learning_rate": 1.4813194343860015e-06, "loss": 0.3111, "step": 9228 }, { "epoch": 2.3211770623742454, "grad_norm": 0.29560771584510803, "learning_rate": 1.4802800040355825e-06, "loss": 0.3128, "step": 9229 }, { "epoch": 2.3214285714285716, "grad_norm": 0.28773796558380127, "learning_rate": 1.4792408751271603e-06, "loss": 0.3219, "step": 9230 }, { "epoch": 2.3216800804828974, "grad_norm": 0.3093893826007843, "learning_rate": 1.4782020477497328e-06, "loss": 0.3332, "step": 9231 }, { "epoch": 2.3219315895372232, "grad_norm": 0.3124389350414276, "learning_rate": 1.4771635219922658e-06, "loss": 0.3106, "step": 9232 }, { "epoch": 2.3221830985915495, "grad_norm": 0.29394763708114624, "learning_rate": 1.4761252979437062e-06, "loss": 0.3145, "step": 9233 }, { "epoch": 2.3224346076458753, "grad_norm": 0.29244881868362427, "learning_rate": 1.475087375692968e-06, "loss": 0.3295, "step": 9234 }, { "epoch": 2.322686116700201, "grad_norm": 0.3140466511249542, "learning_rate": 1.4740497553289456e-06, "loss": 0.3344, "step": 9235 }, { "epoch": 2.3229376257545273, "grad_norm": 0.2974216341972351, "learning_rate": 1.473012436940502e-06, "loss": 0.3238, "step": 9236 }, { "epoch": 2.323189134808853, "grad_norm": 0.29725712537765503, "learning_rate": 1.4719754206164782e-06, "loss": 0.3307, "step": 9237 }, { "epoch": 2.323440643863179, "grad_norm": 0.3167237937450409, "learning_rate": 1.4709387064456899e-06, "loss": 0.3255, "step": 9238 }, { "epoch": 2.323692152917505, "grad_norm": 0.28957289457321167, "learning_rate": 1.4699022945169221e-06, "loss": 0.3095, "step": 9239 }, { "epoch": 2.323943661971831, "grad_norm": 0.2918396294116974, "learning_rate": 1.4688661849189407e-06, "loss": 0.34, "step": 9240 }, { "epoch": 2.324195171026157, "grad_norm": 0.29980701208114624, "learning_rate": 1.4678303777404778e-06, "loss": 0.3276, "step": 9241 }, { "epoch": 2.324446680080483, "grad_norm": 0.2971269488334656, "learning_rate": 1.4667948730702474e-06, "loss": 0.3113, "step": 9242 }, { "epoch": 2.324698189134809, "grad_norm": 0.29910194873809814, "learning_rate": 1.4657596709969313e-06, "loss": 0.3286, "step": 9243 }, { "epoch": 2.3249496981891347, "grad_norm": 0.2877163887023926, "learning_rate": 1.46472477160919e-06, "loss": 0.3216, "step": 9244 }, { "epoch": 2.325201207243461, "grad_norm": 0.308214396238327, "learning_rate": 1.4636901749956544e-06, "loss": 0.3337, "step": 9245 }, { "epoch": 2.3254527162977867, "grad_norm": 0.3051459491252899, "learning_rate": 1.4626558812449337e-06, "loss": 0.3487, "step": 9246 }, { "epoch": 2.3257042253521125, "grad_norm": 0.29465314745903015, "learning_rate": 1.461621890445606e-06, "loss": 0.3044, "step": 9247 }, { "epoch": 2.3259557344064388, "grad_norm": 0.3071502447128296, "learning_rate": 1.4605882026862267e-06, "loss": 0.3422, "step": 9248 }, { "epoch": 2.3262072434607646, "grad_norm": 0.3024512529373169, "learning_rate": 1.4595548180553275e-06, "loss": 0.3562, "step": 9249 }, { "epoch": 2.3264587525150904, "grad_norm": 0.2800012528896332, "learning_rate": 1.4585217366414072e-06, "loss": 0.3417, "step": 9250 }, { "epoch": 2.3267102615694166, "grad_norm": 0.30198147892951965, "learning_rate": 1.4574889585329466e-06, "loss": 0.3352, "step": 9251 }, { "epoch": 2.3269617706237424, "grad_norm": 0.2976208031177521, "learning_rate": 1.456456483818393e-06, "loss": 0.3333, "step": 9252 }, { "epoch": 2.3272132796780682, "grad_norm": 0.30566921830177307, "learning_rate": 1.455424312586175e-06, "loss": 0.3108, "step": 9253 }, { "epoch": 2.3274647887323945, "grad_norm": 0.2987402379512787, "learning_rate": 1.4543924449246882e-06, "loss": 0.3167, "step": 9254 }, { "epoch": 2.3277162977867203, "grad_norm": 0.3012560307979584, "learning_rate": 1.4533608809223087e-06, "loss": 0.3464, "step": 9255 }, { "epoch": 2.327967806841046, "grad_norm": 0.3095744848251343, "learning_rate": 1.452329620667381e-06, "loss": 0.3325, "step": 9256 }, { "epoch": 2.3282193158953723, "grad_norm": 0.29700353741645813, "learning_rate": 1.4512986642482279e-06, "loss": 0.3131, "step": 9257 }, { "epoch": 2.328470824949698, "grad_norm": 0.2864930033683777, "learning_rate": 1.4502680117531425e-06, "loss": 0.334, "step": 9258 }, { "epoch": 2.328722334004024, "grad_norm": 0.3204570710659027, "learning_rate": 1.4492376632703947e-06, "loss": 0.345, "step": 9259 }, { "epoch": 2.32897384305835, "grad_norm": 0.2854207158088684, "learning_rate": 1.4482076188882293e-06, "loss": 0.3135, "step": 9260 }, { "epoch": 2.329225352112676, "grad_norm": 0.2826644480228424, "learning_rate": 1.4471778786948598e-06, "loss": 0.3473, "step": 9261 }, { "epoch": 2.3294768611670023, "grad_norm": 0.29891934990882874, "learning_rate": 1.44614844277848e-06, "loss": 0.3143, "step": 9262 }, { "epoch": 2.329728370221328, "grad_norm": 0.3028249144554138, "learning_rate": 1.4451193112272515e-06, "loss": 0.3143, "step": 9263 }, { "epoch": 2.329979879275654, "grad_norm": 0.30594244599342346, "learning_rate": 1.4440904841293168e-06, "loss": 0.3291, "step": 9264 }, { "epoch": 2.33023138832998, "grad_norm": 0.3240032196044922, "learning_rate": 1.4430619615727842e-06, "loss": 0.3143, "step": 9265 }, { "epoch": 2.330482897384306, "grad_norm": 0.2798107862472534, "learning_rate": 1.442033743645745e-06, "loss": 0.3249, "step": 9266 }, { "epoch": 2.3307344064386317, "grad_norm": 0.3072926104068756, "learning_rate": 1.4410058304362546e-06, "loss": 0.347, "step": 9267 }, { "epoch": 2.330985915492958, "grad_norm": 0.3058062493801117, "learning_rate": 1.4399782220323515e-06, "loss": 0.3252, "step": 9268 }, { "epoch": 2.3312374245472838, "grad_norm": 0.2971736490726471, "learning_rate": 1.4389509185220412e-06, "loss": 0.3435, "step": 9269 }, { "epoch": 2.3314889336016096, "grad_norm": 0.2837081253528595, "learning_rate": 1.4379239199933082e-06, "loss": 0.3381, "step": 9270 }, { "epoch": 2.331740442655936, "grad_norm": 0.3149973452091217, "learning_rate": 1.4368972265341052e-06, "loss": 0.3382, "step": 9271 }, { "epoch": 2.3319919517102616, "grad_norm": 0.3013942837715149, "learning_rate": 1.435870838232366e-06, "loss": 0.3117, "step": 9272 }, { "epoch": 2.3322434607645874, "grad_norm": 0.3119725286960602, "learning_rate": 1.4348447551759908e-06, "loss": 0.3132, "step": 9273 }, { "epoch": 2.3324949698189137, "grad_norm": 0.3200342059135437, "learning_rate": 1.4338189774528605e-06, "loss": 0.3183, "step": 9274 }, { "epoch": 2.3327464788732395, "grad_norm": 0.2716279923915863, "learning_rate": 1.432793505150823e-06, "loss": 0.3251, "step": 9275 }, { "epoch": 2.3329979879275653, "grad_norm": 0.31028926372528076, "learning_rate": 1.4317683383577074e-06, "loss": 0.3335, "step": 9276 }, { "epoch": 2.3332494969818915, "grad_norm": 0.2938306927680969, "learning_rate": 1.4307434771613087e-06, "loss": 0.3327, "step": 9277 }, { "epoch": 2.3335010060362174, "grad_norm": 0.30416056513786316, "learning_rate": 1.429718921649404e-06, "loss": 0.3397, "step": 9278 }, { "epoch": 2.333752515090543, "grad_norm": 0.3303104341030121, "learning_rate": 1.428694671909736e-06, "loss": 0.3197, "step": 9279 }, { "epoch": 2.3340040241448694, "grad_norm": 0.29095789790153503, "learning_rate": 1.4276707280300295e-06, "loss": 0.3101, "step": 9280 }, { "epoch": 2.334255533199195, "grad_norm": 0.308417946100235, "learning_rate": 1.4266470900979746e-06, "loss": 0.2987, "step": 9281 }, { "epoch": 2.334507042253521, "grad_norm": 0.33862265944480896, "learning_rate": 1.4256237582012433e-06, "loss": 0.3286, "step": 9282 }, { "epoch": 2.3347585513078473, "grad_norm": 0.28483128547668457, "learning_rate": 1.4246007324274747e-06, "loss": 0.3082, "step": 9283 }, { "epoch": 2.335010060362173, "grad_norm": 0.3114943504333496, "learning_rate": 1.4235780128642867e-06, "loss": 0.3211, "step": 9284 }, { "epoch": 2.335261569416499, "grad_norm": 0.30490976572036743, "learning_rate": 1.4225555995992668e-06, "loss": 0.3289, "step": 9285 }, { "epoch": 2.335513078470825, "grad_norm": 0.29261600971221924, "learning_rate": 1.4215334927199808e-06, "loss": 0.3234, "step": 9286 }, { "epoch": 2.335764587525151, "grad_norm": 0.30190035700798035, "learning_rate": 1.4205116923139628e-06, "loss": 0.3004, "step": 9287 }, { "epoch": 2.3360160965794767, "grad_norm": 0.30952534079551697, "learning_rate": 1.4194901984687266e-06, "loss": 0.3236, "step": 9288 }, { "epoch": 2.336267605633803, "grad_norm": 0.3120158910751343, "learning_rate": 1.4184690112717536e-06, "loss": 0.3316, "step": 9289 }, { "epoch": 2.336519114688129, "grad_norm": 0.2920396029949188, "learning_rate": 1.417448130810506e-06, "loss": 0.3116, "step": 9290 }, { "epoch": 2.3367706237424546, "grad_norm": 0.31028714776039124, "learning_rate": 1.4164275571724112e-06, "loss": 0.3198, "step": 9291 }, { "epoch": 2.337022132796781, "grad_norm": 0.33299851417541504, "learning_rate": 1.4154072904448778e-06, "loss": 0.3254, "step": 9292 }, { "epoch": 2.3372736418511066, "grad_norm": 0.2730899155139923, "learning_rate": 1.4143873307152867e-06, "loss": 0.3093, "step": 9293 }, { "epoch": 2.3375251509054324, "grad_norm": 0.30700913071632385, "learning_rate": 1.413367678070987e-06, "loss": 0.322, "step": 9294 }, { "epoch": 2.3377766599597587, "grad_norm": 0.31583860516548157, "learning_rate": 1.41234833259931e-06, "loss": 0.3232, "step": 9295 }, { "epoch": 2.3380281690140845, "grad_norm": 0.31791380047798157, "learning_rate": 1.4113292943875518e-06, "loss": 0.3248, "step": 9296 }, { "epoch": 2.3382796780684103, "grad_norm": 0.2840316593647003, "learning_rate": 1.4103105635229907e-06, "loss": 0.311, "step": 9297 }, { "epoch": 2.3385311871227366, "grad_norm": 0.31344839930534363, "learning_rate": 1.4092921400928717e-06, "loss": 0.3031, "step": 9298 }, { "epoch": 2.3387826961770624, "grad_norm": 0.29697540402412415, "learning_rate": 1.4082740241844185e-06, "loss": 0.3319, "step": 9299 }, { "epoch": 2.339034205231388, "grad_norm": 0.30100035667419434, "learning_rate": 1.4072562158848241e-06, "loss": 0.2954, "step": 9300 }, { "epoch": 2.3392857142857144, "grad_norm": 0.31279945373535156, "learning_rate": 1.4062387152812595e-06, "loss": 0.3215, "step": 9301 }, { "epoch": 2.33953722334004, "grad_norm": 0.29303503036499023, "learning_rate": 1.4052215224608656e-06, "loss": 0.3127, "step": 9302 }, { "epoch": 2.339788732394366, "grad_norm": 0.3038380444049835, "learning_rate": 1.4042046375107592e-06, "loss": 0.3176, "step": 9303 }, { "epoch": 2.3400402414486923, "grad_norm": 0.3016948103904724, "learning_rate": 1.4031880605180325e-06, "loss": 0.3134, "step": 9304 }, { "epoch": 2.340291750503018, "grad_norm": 0.3166118264198303, "learning_rate": 1.4021717915697448e-06, "loss": 0.3081, "step": 9305 }, { "epoch": 2.340543259557344, "grad_norm": 0.31366658210754395, "learning_rate": 1.4011558307529366e-06, "loss": 0.3166, "step": 9306 }, { "epoch": 2.34079476861167, "grad_norm": 0.3000653088092804, "learning_rate": 1.400140178154616e-06, "loss": 0.3153, "step": 9307 }, { "epoch": 2.341046277665996, "grad_norm": 0.2916873097419739, "learning_rate": 1.3991248338617697e-06, "loss": 0.3262, "step": 9308 }, { "epoch": 2.3412977867203217, "grad_norm": 0.29192009568214417, "learning_rate": 1.3981097979613528e-06, "loss": 0.323, "step": 9309 }, { "epoch": 2.341549295774648, "grad_norm": 0.31591734290122986, "learning_rate": 1.3970950705403003e-06, "loss": 0.3382, "step": 9310 }, { "epoch": 2.341800804828974, "grad_norm": 0.32917144894599915, "learning_rate": 1.3960806516855136e-06, "loss": 0.3127, "step": 9311 }, { "epoch": 2.3420523138832996, "grad_norm": 0.29949262738227844, "learning_rate": 1.3950665414838744e-06, "loss": 0.3415, "step": 9312 }, { "epoch": 2.342303822937626, "grad_norm": 0.31109297275543213, "learning_rate": 1.394052740022232e-06, "loss": 0.3125, "step": 9313 }, { "epoch": 2.3425553319919517, "grad_norm": 0.31198766827583313, "learning_rate": 1.3930392473874138e-06, "loss": 0.3121, "step": 9314 }, { "epoch": 2.3428068410462775, "grad_norm": 0.30766913294792175, "learning_rate": 1.3920260636662208e-06, "loss": 0.3147, "step": 9315 }, { "epoch": 2.3430583501006037, "grad_norm": 0.3334919512271881, "learning_rate": 1.391013188945422e-06, "loss": 0.3141, "step": 9316 }, { "epoch": 2.3433098591549295, "grad_norm": 0.3124428391456604, "learning_rate": 1.3900006233117675e-06, "loss": 0.341, "step": 9317 }, { "epoch": 2.3435613682092553, "grad_norm": 0.29679644107818604, "learning_rate": 1.3889883668519744e-06, "loss": 0.304, "step": 9318 }, { "epoch": 2.3438128772635816, "grad_norm": 0.2995522916316986, "learning_rate": 1.3879764196527384e-06, "loss": 0.321, "step": 9319 }, { "epoch": 2.3440643863179074, "grad_norm": 0.31305447220802307, "learning_rate": 1.3869647818007236e-06, "loss": 0.2999, "step": 9320 }, { "epoch": 2.344315895372233, "grad_norm": 0.2792012095451355, "learning_rate": 1.385953453382574e-06, "loss": 0.3074, "step": 9321 }, { "epoch": 2.3445674044265594, "grad_norm": 0.29924044013023376, "learning_rate": 1.3849424344849e-06, "loss": 0.2991, "step": 9322 }, { "epoch": 2.3448189134808852, "grad_norm": 0.30037921667099, "learning_rate": 1.3839317251942907e-06, "loss": 0.3035, "step": 9323 }, { "epoch": 2.345070422535211, "grad_norm": 0.3079024851322174, "learning_rate": 1.3829213255973089e-06, "loss": 0.321, "step": 9324 }, { "epoch": 2.3453219315895373, "grad_norm": 0.2921197712421417, "learning_rate": 1.3819112357804859e-06, "loss": 0.3223, "step": 9325 }, { "epoch": 2.345573440643863, "grad_norm": 0.28539982438087463, "learning_rate": 1.3809014558303319e-06, "loss": 0.321, "step": 9326 }, { "epoch": 2.345824949698189, "grad_norm": 0.2913080155849457, "learning_rate": 1.3798919858333254e-06, "loss": 0.3126, "step": 9327 }, { "epoch": 2.346076458752515, "grad_norm": 0.31199610233306885, "learning_rate": 1.3788828258759251e-06, "loss": 0.3611, "step": 9328 }, { "epoch": 2.346327967806841, "grad_norm": 0.301520973443985, "learning_rate": 1.3778739760445552e-06, "loss": 0.3339, "step": 9329 }, { "epoch": 2.346579476861167, "grad_norm": 0.29440373182296753, "learning_rate": 1.3768654364256212e-06, "loss": 0.3115, "step": 9330 }, { "epoch": 2.346830985915493, "grad_norm": 0.29134684801101685, "learning_rate": 1.375857207105495e-06, "loss": 0.3267, "step": 9331 }, { "epoch": 2.347082494969819, "grad_norm": 0.2973969280719757, "learning_rate": 1.3748492881705272e-06, "loss": 0.3272, "step": 9332 }, { "epoch": 2.347334004024145, "grad_norm": 0.30077022314071655, "learning_rate": 1.373841679707038e-06, "loss": 0.3198, "step": 9333 }, { "epoch": 2.347585513078471, "grad_norm": 0.27227941155433655, "learning_rate": 1.3728343818013233e-06, "loss": 0.282, "step": 9334 }, { "epoch": 2.3478370221327967, "grad_norm": 0.30526816844940186, "learning_rate": 1.3718273945396542e-06, "loss": 0.3189, "step": 9335 }, { "epoch": 2.348088531187123, "grad_norm": 0.27349480986595154, "learning_rate": 1.3708207180082694e-06, "loss": 0.3208, "step": 9336 }, { "epoch": 2.3483400402414487, "grad_norm": 0.30724623799324036, "learning_rate": 1.3698143522933876e-06, "loss": 0.3169, "step": 9337 }, { "epoch": 2.3485915492957745, "grad_norm": 0.2816196084022522, "learning_rate": 1.368808297481195e-06, "loss": 0.3152, "step": 9338 }, { "epoch": 2.3488430583501008, "grad_norm": 0.2888517379760742, "learning_rate": 1.3678025536578559e-06, "loss": 0.3093, "step": 9339 }, { "epoch": 2.3490945674044266, "grad_norm": 0.2849167287349701, "learning_rate": 1.3667971209095039e-06, "loss": 0.3124, "step": 9340 }, { "epoch": 2.3493460764587524, "grad_norm": 0.29156047105789185, "learning_rate": 1.3657919993222507e-06, "loss": 0.3193, "step": 9341 }, { "epoch": 2.3495975855130786, "grad_norm": 0.30401670932769775, "learning_rate": 1.3647871889821762e-06, "loss": 0.3127, "step": 9342 }, { "epoch": 2.3498490945674044, "grad_norm": 0.2958643436431885, "learning_rate": 1.363782689975338e-06, "loss": 0.3034, "step": 9343 }, { "epoch": 2.3501006036217302, "grad_norm": 0.2841004729270935, "learning_rate": 1.3627785023877633e-06, "loss": 0.3383, "step": 9344 }, { "epoch": 2.3503521126760565, "grad_norm": 0.2793041169643402, "learning_rate": 1.3617746263054548e-06, "loss": 0.3506, "step": 9345 }, { "epoch": 2.3506036217303823, "grad_norm": 0.29398274421691895, "learning_rate": 1.360771061814391e-06, "loss": 0.3331, "step": 9346 }, { "epoch": 2.350855130784708, "grad_norm": 0.3107350468635559, "learning_rate": 1.3597678090005168e-06, "loss": 0.3286, "step": 9347 }, { "epoch": 2.3511066398390343, "grad_norm": 0.2840394973754883, "learning_rate": 1.3587648679497583e-06, "loss": 0.3182, "step": 9348 }, { "epoch": 2.35135814889336, "grad_norm": 0.3232376277446747, "learning_rate": 1.3577622387480082e-06, "loss": 0.3361, "step": 9349 }, { "epoch": 2.351609657947686, "grad_norm": 0.30280598998069763, "learning_rate": 1.3567599214811379e-06, "loss": 0.3451, "step": 9350 }, { "epoch": 2.351861167002012, "grad_norm": 0.2855769693851471, "learning_rate": 1.3557579162349864e-06, "loss": 0.2765, "step": 9351 }, { "epoch": 2.352112676056338, "grad_norm": 0.30225083231925964, "learning_rate": 1.3547562230953726e-06, "loss": 0.3393, "step": 9352 }, { "epoch": 2.352364185110664, "grad_norm": 0.29712530970573425, "learning_rate": 1.353754842148083e-06, "loss": 0.316, "step": 9353 }, { "epoch": 2.35261569416499, "grad_norm": 0.3048205077648163, "learning_rate": 1.352753773478881e-06, "loss": 0.3233, "step": 9354 }, { "epoch": 2.352867203219316, "grad_norm": 0.3305099308490753, "learning_rate": 1.3517530171735e-06, "loss": 0.3363, "step": 9355 }, { "epoch": 2.3531187122736417, "grad_norm": 0.27165278792381287, "learning_rate": 1.3507525733176497e-06, "loss": 0.331, "step": 9356 }, { "epoch": 2.353370221327968, "grad_norm": 0.3076144754886627, "learning_rate": 1.3497524419970132e-06, "loss": 0.3293, "step": 9357 }, { "epoch": 2.3536217303822937, "grad_norm": 0.31551241874694824, "learning_rate": 1.348752623297243e-06, "loss": 0.3077, "step": 9358 }, { "epoch": 2.3538732394366195, "grad_norm": 0.3101344108581543, "learning_rate": 1.3477531173039697e-06, "loss": 0.3015, "step": 9359 }, { "epoch": 2.3541247484909458, "grad_norm": 0.3111810088157654, "learning_rate": 1.3467539241027922e-06, "loss": 0.3292, "step": 9360 }, { "epoch": 2.3543762575452716, "grad_norm": 0.32074761390686035, "learning_rate": 1.3457550437792876e-06, "loss": 0.3281, "step": 9361 }, { "epoch": 2.354627766599598, "grad_norm": 0.2921229600906372, "learning_rate": 1.344756476419002e-06, "loss": 0.3177, "step": 9362 }, { "epoch": 2.3548792756539236, "grad_norm": 0.28211191296577454, "learning_rate": 1.3437582221074574e-06, "loss": 0.3583, "step": 9363 }, { "epoch": 2.3551307847082494, "grad_norm": 0.31738191843032837, "learning_rate": 1.342760280930147e-06, "loss": 0.3441, "step": 9364 }, { "epoch": 2.3553822937625757, "grad_norm": 0.28729522228240967, "learning_rate": 1.3417626529725402e-06, "loss": 0.3326, "step": 9365 }, { "epoch": 2.3556338028169015, "grad_norm": 0.3042784333229065, "learning_rate": 1.3407653383200747e-06, "loss": 0.3347, "step": 9366 }, { "epoch": 2.3558853118712273, "grad_norm": 0.2783034145832062, "learning_rate": 1.339768337058166e-06, "loss": 0.2803, "step": 9367 }, { "epoch": 2.3561368209255535, "grad_norm": 0.2911806106567383, "learning_rate": 1.3387716492722025e-06, "loss": 0.303, "step": 9368 }, { "epoch": 2.3563883299798793, "grad_norm": 0.2815414071083069, "learning_rate": 1.337775275047541e-06, "loss": 0.3222, "step": 9369 }, { "epoch": 2.356639839034205, "grad_norm": 0.2898373007774353, "learning_rate": 1.336779214469518e-06, "loss": 0.3224, "step": 9370 }, { "epoch": 2.3568913480885314, "grad_norm": 0.3051788806915283, "learning_rate": 1.3357834676234366e-06, "loss": 0.3103, "step": 9371 }, { "epoch": 2.357142857142857, "grad_norm": 0.31733277440071106, "learning_rate": 1.3347880345945796e-06, "loss": 0.3446, "step": 9372 }, { "epoch": 2.357394366197183, "grad_norm": 0.30566954612731934, "learning_rate": 1.333792915468196e-06, "loss": 0.3109, "step": 9373 }, { "epoch": 2.3576458752515093, "grad_norm": 0.2991909682750702, "learning_rate": 1.3327981103295156e-06, "loss": 0.3123, "step": 9374 }, { "epoch": 2.357897384305835, "grad_norm": 0.2768964469432831, "learning_rate": 1.3318036192637334e-06, "loss": 0.3224, "step": 9375 }, { "epoch": 2.358148893360161, "grad_norm": 0.2982604205608368, "learning_rate": 1.3308094423560242e-06, "loss": 0.3151, "step": 9376 }, { "epoch": 2.358400402414487, "grad_norm": 0.31598472595214844, "learning_rate": 1.3298155796915307e-06, "loss": 0.3188, "step": 9377 }, { "epoch": 2.358651911468813, "grad_norm": 0.29461905360221863, "learning_rate": 1.3288220313553723e-06, "loss": 0.3171, "step": 9378 }, { "epoch": 2.3589034205231387, "grad_norm": 0.2900139391422272, "learning_rate": 1.3278287974326415e-06, "loss": 0.3277, "step": 9379 }, { "epoch": 2.359154929577465, "grad_norm": 0.2897052764892578, "learning_rate": 1.3268358780083995e-06, "loss": 0.3078, "step": 9380 }, { "epoch": 2.359406438631791, "grad_norm": 0.2946465015411377, "learning_rate": 1.3258432731676867e-06, "loss": 0.3077, "step": 9381 }, { "epoch": 2.3596579476861166, "grad_norm": 0.29683101177215576, "learning_rate": 1.324850982995511e-06, "loss": 0.3171, "step": 9382 }, { "epoch": 2.359909456740443, "grad_norm": 0.32809895277023315, "learning_rate": 1.3238590075768582e-06, "loss": 0.3467, "step": 9383 }, { "epoch": 2.3601609657947686, "grad_norm": 0.28296294808387756, "learning_rate": 1.3228673469966819e-06, "loss": 0.3348, "step": 9384 }, { "epoch": 2.3604124748490944, "grad_norm": 0.31060582399368286, "learning_rate": 1.321876001339915e-06, "loss": 0.317, "step": 9385 }, { "epoch": 2.3606639839034207, "grad_norm": 0.2970585525035858, "learning_rate": 1.3208849706914567e-06, "loss": 0.3378, "step": 9386 }, { "epoch": 2.3609154929577465, "grad_norm": 0.310907781124115, "learning_rate": 1.319894255136186e-06, "loss": 0.3151, "step": 9387 }, { "epoch": 2.3611670020120723, "grad_norm": 0.3074939250946045, "learning_rate": 1.3189038547589479e-06, "loss": 0.3049, "step": 9388 }, { "epoch": 2.3614185110663986, "grad_norm": 0.2910417318344116, "learning_rate": 1.317913769644567e-06, "loss": 0.325, "step": 9389 }, { "epoch": 2.3616700201207244, "grad_norm": 0.29934588074684143, "learning_rate": 1.3169239998778361e-06, "loss": 0.3217, "step": 9390 }, { "epoch": 2.36192152917505, "grad_norm": 0.3142791986465454, "learning_rate": 1.3159345455435241e-06, "loss": 0.3297, "step": 9391 }, { "epoch": 2.3621730382293764, "grad_norm": 0.30525627732276917, "learning_rate": 1.3149454067263696e-06, "loss": 0.3088, "step": 9392 }, { "epoch": 2.362424547283702, "grad_norm": 0.2881489396095276, "learning_rate": 1.3139565835110884e-06, "loss": 0.308, "step": 9393 }, { "epoch": 2.362676056338028, "grad_norm": 0.32259997725486755, "learning_rate": 1.3129680759823648e-06, "loss": 0.3287, "step": 9394 }, { "epoch": 2.3629275653923543, "grad_norm": 0.30433225631713867, "learning_rate": 1.311979884224861e-06, "loss": 0.3147, "step": 9395 }, { "epoch": 2.36317907444668, "grad_norm": 0.3119969964027405, "learning_rate": 1.3109920083232064e-06, "loss": 0.3184, "step": 9396 }, { "epoch": 2.363430583501006, "grad_norm": 0.2946832776069641, "learning_rate": 1.3100044483620094e-06, "loss": 0.318, "step": 9397 }, { "epoch": 2.363682092555332, "grad_norm": 0.282425194978714, "learning_rate": 1.3090172044258458e-06, "loss": 0.3321, "step": 9398 }, { "epoch": 2.363933601609658, "grad_norm": 0.3065638244152069, "learning_rate": 1.3080302765992692e-06, "loss": 0.3006, "step": 9399 }, { "epoch": 2.3641851106639837, "grad_norm": 0.2910194993019104, "learning_rate": 1.3070436649668006e-06, "loss": 0.3033, "step": 9400 }, { "epoch": 2.36443661971831, "grad_norm": 0.306165874004364, "learning_rate": 1.3060573696129396e-06, "loss": 0.3083, "step": 9401 }, { "epoch": 2.364688128772636, "grad_norm": 0.3192187249660492, "learning_rate": 1.305071390622157e-06, "loss": 0.302, "step": 9402 }, { "epoch": 2.3649396378269616, "grad_norm": 0.3046712577342987, "learning_rate": 1.3040857280788927e-06, "loss": 0.302, "step": 9403 }, { "epoch": 2.365191146881288, "grad_norm": 0.2873310446739197, "learning_rate": 1.3031003820675659e-06, "loss": 0.2988, "step": 9404 }, { "epoch": 2.3654426559356136, "grad_norm": 0.31300392746925354, "learning_rate": 1.3021153526725615e-06, "loss": 0.317, "step": 9405 }, { "epoch": 2.3656941649899395, "grad_norm": 0.29510292410850525, "learning_rate": 1.3011306399782458e-06, "loss": 0.2967, "step": 9406 }, { "epoch": 2.3659456740442657, "grad_norm": 0.3187848627567291, "learning_rate": 1.3001462440689488e-06, "loss": 0.3349, "step": 9407 }, { "epoch": 2.3661971830985915, "grad_norm": 0.2933845818042755, "learning_rate": 1.2991621650289809e-06, "loss": 0.2987, "step": 9408 }, { "epoch": 2.3664486921529173, "grad_norm": 0.2807075083255768, "learning_rate": 1.2981784029426203e-06, "loss": 0.2975, "step": 9409 }, { "epoch": 2.3667002012072436, "grad_norm": 0.2867673635482788, "learning_rate": 1.2971949578941217e-06, "loss": 0.3161, "step": 9410 }, { "epoch": 2.3669517102615694, "grad_norm": 0.2991779148578644, "learning_rate": 1.2962118299677095e-06, "loss": 0.3163, "step": 9411 }, { "epoch": 2.367203219315895, "grad_norm": 0.3035879135131836, "learning_rate": 1.2952290192475848e-06, "loss": 0.29, "step": 9412 }, { "epoch": 2.3674547283702214, "grad_norm": 0.3021244704723358, "learning_rate": 1.2942465258179155e-06, "loss": 0.3111, "step": 9413 }, { "epoch": 2.3677062374245472, "grad_norm": 0.2943519949913025, "learning_rate": 1.29326434976285e-06, "loss": 0.3085, "step": 9414 }, { "epoch": 2.367957746478873, "grad_norm": 0.29150792956352234, "learning_rate": 1.2922824911665021e-06, "loss": 0.3083, "step": 9415 }, { "epoch": 2.3682092555331993, "grad_norm": 0.3039129972457886, "learning_rate": 1.2913009501129653e-06, "loss": 0.3093, "step": 9416 }, { "epoch": 2.368460764587525, "grad_norm": 0.28910478949546814, "learning_rate": 1.2903197266862989e-06, "loss": 0.3038, "step": 9417 }, { "epoch": 2.368712273641851, "grad_norm": 0.29740893840789795, "learning_rate": 1.289338820970541e-06, "loss": 0.3274, "step": 9418 }, { "epoch": 2.368963782696177, "grad_norm": 0.2945566773414612, "learning_rate": 1.2883582330496986e-06, "loss": 0.3546, "step": 9419 }, { "epoch": 2.369215291750503, "grad_norm": 0.2992318272590637, "learning_rate": 1.287377963007755e-06, "loss": 0.3222, "step": 9420 }, { "epoch": 2.3694668008048287, "grad_norm": 0.2896125614643097, "learning_rate": 1.2863980109286605e-06, "loss": 0.3324, "step": 9421 }, { "epoch": 2.369718309859155, "grad_norm": 0.31392091512680054, "learning_rate": 1.2854183768963453e-06, "loss": 0.318, "step": 9422 }, { "epoch": 2.369969818913481, "grad_norm": 0.2963343560695648, "learning_rate": 1.2844390609947082e-06, "loss": 0.3077, "step": 9423 }, { "epoch": 2.3702213279678066, "grad_norm": 0.32114240527153015, "learning_rate": 1.2834600633076205e-06, "loss": 0.332, "step": 9424 }, { "epoch": 2.370472837022133, "grad_norm": 0.28790682554244995, "learning_rate": 1.2824813839189288e-06, "loss": 0.3242, "step": 9425 }, { "epoch": 2.3707243460764587, "grad_norm": 0.31781139969825745, "learning_rate": 1.2815030229124481e-06, "loss": 0.3148, "step": 9426 }, { "epoch": 2.3709758551307845, "grad_norm": 0.3021380305290222, "learning_rate": 1.2805249803719722e-06, "loss": 0.3122, "step": 9427 }, { "epoch": 2.3712273641851107, "grad_norm": 0.3446398973464966, "learning_rate": 1.2795472563812617e-06, "loss": 0.3437, "step": 9428 }, { "epoch": 2.3714788732394365, "grad_norm": 0.2881867587566376, "learning_rate": 1.2785698510240546e-06, "loss": 0.3348, "step": 9429 }, { "epoch": 2.3717303822937628, "grad_norm": 0.28368863463401794, "learning_rate": 1.2775927643840575e-06, "loss": 0.3034, "step": 9430 }, { "epoch": 2.3719818913480886, "grad_norm": 0.281934529542923, "learning_rate": 1.2766159965449543e-06, "loss": 0.3107, "step": 9431 }, { "epoch": 2.3722334004024144, "grad_norm": 0.2791815996170044, "learning_rate": 1.275639547590396e-06, "loss": 0.3286, "step": 9432 }, { "epoch": 2.3724849094567406, "grad_norm": 0.29985862970352173, "learning_rate": 1.2746634176040107e-06, "loss": 0.3342, "step": 9433 }, { "epoch": 2.3727364185110664, "grad_norm": 0.31482386589050293, "learning_rate": 1.2736876066693999e-06, "loss": 0.3313, "step": 9434 }, { "epoch": 2.3729879275653922, "grad_norm": 0.2999832332134247, "learning_rate": 1.2727121148701322e-06, "loss": 0.3334, "step": 9435 }, { "epoch": 2.3732394366197185, "grad_norm": 0.3035413920879364, "learning_rate": 1.2717369422897552e-06, "loss": 0.3196, "step": 9436 }, { "epoch": 2.3734909456740443, "grad_norm": 0.28583768010139465, "learning_rate": 1.270762089011784e-06, "loss": 0.3175, "step": 9437 }, { "epoch": 2.37374245472837, "grad_norm": 0.2991746962070465, "learning_rate": 1.2697875551197113e-06, "loss": 0.3391, "step": 9438 }, { "epoch": 2.3739939637826963, "grad_norm": 0.29370561242103577, "learning_rate": 1.268813340696997e-06, "loss": 0.3131, "step": 9439 }, { "epoch": 2.374245472837022, "grad_norm": 0.31026825308799744, "learning_rate": 1.2678394458270794e-06, "loss": 0.3207, "step": 9440 }, { "epoch": 2.374496981891348, "grad_norm": 0.31360548734664917, "learning_rate": 1.2668658705933628e-06, "loss": 0.314, "step": 9441 }, { "epoch": 2.374748490945674, "grad_norm": 0.31127217411994934, "learning_rate": 1.2658926150792321e-06, "loss": 0.3276, "step": 9442 }, { "epoch": 2.375, "grad_norm": 0.31766191124916077, "learning_rate": 1.2649196793680367e-06, "loss": 0.328, "step": 9443 }, { "epoch": 2.375251509054326, "grad_norm": 0.32087278366088867, "learning_rate": 1.2639470635431044e-06, "loss": 0.3228, "step": 9444 }, { "epoch": 2.375503018108652, "grad_norm": 0.29224544763565063, "learning_rate": 1.2629747676877347e-06, "loss": 0.3348, "step": 9445 }, { "epoch": 2.375754527162978, "grad_norm": 0.3009779155254364, "learning_rate": 1.2620027918851956e-06, "loss": 0.3244, "step": 9446 }, { "epoch": 2.3760060362173037, "grad_norm": 0.304565966129303, "learning_rate": 1.2610311362187343e-06, "loss": 0.3357, "step": 9447 }, { "epoch": 2.37625754527163, "grad_norm": 0.3026162087917328, "learning_rate": 1.260059800771564e-06, "loss": 0.3098, "step": 9448 }, { "epoch": 2.3765090543259557, "grad_norm": 0.3093135952949524, "learning_rate": 1.2590887856268764e-06, "loss": 0.3289, "step": 9449 }, { "epoch": 2.3767605633802815, "grad_norm": 0.2858860194683075, "learning_rate": 1.2581180908678291e-06, "loss": 0.3005, "step": 9450 }, { "epoch": 2.3770120724346078, "grad_norm": 0.28006115555763245, "learning_rate": 1.2571477165775597e-06, "loss": 0.3127, "step": 9451 }, { "epoch": 2.3772635814889336, "grad_norm": 0.2762000560760498, "learning_rate": 1.2561776628391725e-06, "loss": 0.3349, "step": 9452 }, { "epoch": 2.3775150905432594, "grad_norm": 0.3034118413925171, "learning_rate": 1.2552079297357478e-06, "loss": 0.318, "step": 9453 }, { "epoch": 2.3777665995975856, "grad_norm": 0.29778799414634705, "learning_rate": 1.2542385173503359e-06, "loss": 0.3035, "step": 9454 }, { "epoch": 2.3780181086519114, "grad_norm": 0.30300748348236084, "learning_rate": 1.2532694257659605e-06, "loss": 0.3182, "step": 9455 }, { "epoch": 2.3782696177062372, "grad_norm": 0.27301889657974243, "learning_rate": 1.252300655065622e-06, "loss": 0.3322, "step": 9456 }, { "epoch": 2.3785211267605635, "grad_norm": 0.3212936222553253, "learning_rate": 1.2513322053322847e-06, "loss": 0.3413, "step": 9457 }, { "epoch": 2.3787726358148893, "grad_norm": 0.3001827597618103, "learning_rate": 1.250364076648894e-06, "loss": 0.3137, "step": 9458 }, { "epoch": 2.3790241448692155, "grad_norm": 0.29002684354782104, "learning_rate": 1.2493962690983608e-06, "loss": 0.296, "step": 9459 }, { "epoch": 2.3792756539235413, "grad_norm": 0.30628740787506104, "learning_rate": 1.248428782763575e-06, "loss": 0.3288, "step": 9460 }, { "epoch": 2.379527162977867, "grad_norm": 0.30561593174934387, "learning_rate": 1.2474616177273928e-06, "loss": 0.3324, "step": 9461 }, { "epoch": 2.3797786720321934, "grad_norm": 0.3026500642299652, "learning_rate": 1.2464947740726491e-06, "loss": 0.3171, "step": 9462 }, { "epoch": 2.380030181086519, "grad_norm": 0.2937304377555847, "learning_rate": 1.2455282518821442e-06, "loss": 0.3395, "step": 9463 }, { "epoch": 2.380281690140845, "grad_norm": 0.30851301550865173, "learning_rate": 1.244562051238658e-06, "loss": 0.3576, "step": 9464 }, { "epoch": 2.3805331991951713, "grad_norm": 0.3095259666442871, "learning_rate": 1.2435961722249374e-06, "loss": 0.332, "step": 9465 }, { "epoch": 2.380784708249497, "grad_norm": 0.2995477616786957, "learning_rate": 1.2426306149237039e-06, "loss": 0.3151, "step": 9466 }, { "epoch": 2.381036217303823, "grad_norm": 0.2909550964832306, "learning_rate": 1.2416653794176542e-06, "loss": 0.3098, "step": 9467 }, { "epoch": 2.381287726358149, "grad_norm": 0.28925126791000366, "learning_rate": 1.2407004657894505e-06, "loss": 0.3065, "step": 9468 }, { "epoch": 2.381539235412475, "grad_norm": 0.28470999002456665, "learning_rate": 1.2397358741217359e-06, "loss": 0.3151, "step": 9469 }, { "epoch": 2.3817907444668007, "grad_norm": 0.3109789192676544, "learning_rate": 1.2387716044971181e-06, "loss": 0.3137, "step": 9470 }, { "epoch": 2.382042253521127, "grad_norm": 0.3258463740348816, "learning_rate": 1.2378076569981833e-06, "loss": 0.3246, "step": 9471 }, { "epoch": 2.3822937625754528, "grad_norm": 0.3138425052165985, "learning_rate": 1.2368440317074854e-06, "loss": 0.3248, "step": 9472 }, { "epoch": 2.3825452716297786, "grad_norm": 0.31474605202674866, "learning_rate": 1.2358807287075553e-06, "loss": 0.3273, "step": 9473 }, { "epoch": 2.382796780684105, "grad_norm": 0.30715155601501465, "learning_rate": 1.234917748080891e-06, "loss": 0.3247, "step": 9474 }, { "epoch": 2.3830482897384306, "grad_norm": 0.29947811365127563, "learning_rate": 1.2339550899099673e-06, "loss": 0.319, "step": 9475 }, { "epoch": 2.3832997987927564, "grad_norm": 0.27288398146629333, "learning_rate": 1.2329927542772314e-06, "loss": 0.338, "step": 9476 }, { "epoch": 2.3835513078470827, "grad_norm": 0.30378079414367676, "learning_rate": 1.2320307412650978e-06, "loss": 0.3123, "step": 9477 }, { "epoch": 2.3838028169014085, "grad_norm": 0.2979941666126251, "learning_rate": 1.2310690509559609e-06, "loss": 0.3077, "step": 9478 }, { "epoch": 2.3840543259557343, "grad_norm": 0.3131090998649597, "learning_rate": 1.2301076834321796e-06, "loss": 0.3274, "step": 9479 }, { "epoch": 2.3843058350100605, "grad_norm": 0.2883933186531067, "learning_rate": 1.2291466387760925e-06, "loss": 0.3344, "step": 9480 }, { "epoch": 2.3845573440643864, "grad_norm": 0.3163345158100128, "learning_rate": 1.2281859170700039e-06, "loss": 0.3107, "step": 9481 }, { "epoch": 2.384808853118712, "grad_norm": 0.2901724874973297, "learning_rate": 1.2272255183961968e-06, "loss": 0.3282, "step": 9482 }, { "epoch": 2.3850603621730384, "grad_norm": 0.31056809425354004, "learning_rate": 1.2262654428369198e-06, "loss": 0.3323, "step": 9483 }, { "epoch": 2.385311871227364, "grad_norm": 0.299684077501297, "learning_rate": 1.225305690474401e-06, "loss": 0.3099, "step": 9484 }, { "epoch": 2.38556338028169, "grad_norm": 0.31283077597618103, "learning_rate": 1.2243462613908336e-06, "loss": 0.3079, "step": 9485 }, { "epoch": 2.3858148893360163, "grad_norm": 0.28932955861091614, "learning_rate": 1.2233871556683891e-06, "loss": 0.2907, "step": 9486 }, { "epoch": 2.386066398390342, "grad_norm": 0.30000898241996765, "learning_rate": 1.222428373389209e-06, "loss": 0.3257, "step": 9487 }, { "epoch": 2.386317907444668, "grad_norm": 0.3037915825843811, "learning_rate": 1.2214699146354054e-06, "loss": 0.3359, "step": 9488 }, { "epoch": 2.386569416498994, "grad_norm": 0.30737778544425964, "learning_rate": 1.2205117794890665e-06, "loss": 0.3159, "step": 9489 }, { "epoch": 2.38682092555332, "grad_norm": 0.318010538816452, "learning_rate": 1.2195539680322476e-06, "loss": 0.3197, "step": 9490 }, { "epoch": 2.3870724346076457, "grad_norm": 0.32174649834632874, "learning_rate": 1.2185964803469824e-06, "loss": 0.3168, "step": 9491 }, { "epoch": 2.387323943661972, "grad_norm": 0.28798073530197144, "learning_rate": 1.2176393165152712e-06, "loss": 0.3204, "step": 9492 }, { "epoch": 2.387575452716298, "grad_norm": 0.3134564757347107, "learning_rate": 1.2166824766190916e-06, "loss": 0.3338, "step": 9493 }, { "epoch": 2.3878269617706236, "grad_norm": 0.2944769859313965, "learning_rate": 1.2157259607403877e-06, "loss": 0.3366, "step": 9494 }, { "epoch": 2.38807847082495, "grad_norm": 0.28981396555900574, "learning_rate": 1.2147697689610826e-06, "loss": 0.3254, "step": 9495 }, { "epoch": 2.3883299798792756, "grad_norm": 0.28675341606140137, "learning_rate": 1.213813901363065e-06, "loss": 0.3123, "step": 9496 }, { "epoch": 2.3885814889336014, "grad_norm": 0.29373446106910706, "learning_rate": 1.2128583580282005e-06, "loss": 0.3495, "step": 9497 }, { "epoch": 2.3888329979879277, "grad_norm": 0.286771684885025, "learning_rate": 1.2119031390383268e-06, "loss": 0.3153, "step": 9498 }, { "epoch": 2.3890845070422535, "grad_norm": 0.29528728127479553, "learning_rate": 1.210948244475249e-06, "loss": 0.3318, "step": 9499 }, { "epoch": 2.3893360160965793, "grad_norm": 0.2970670759677887, "learning_rate": 1.209993674420752e-06, "loss": 0.3444, "step": 9500 }, { "epoch": 2.3895875251509056, "grad_norm": 0.2939586639404297, "learning_rate": 1.2090394289565849e-06, "loss": 0.3132, "step": 9501 }, { "epoch": 2.3898390342052314, "grad_norm": 0.2839318513870239, "learning_rate": 1.208085508164476e-06, "loss": 0.3193, "step": 9502 }, { "epoch": 2.390090543259557, "grad_norm": 0.31266748905181885, "learning_rate": 1.2071319121261194e-06, "loss": 0.3341, "step": 9503 }, { "epoch": 2.3903420523138834, "grad_norm": 0.29940277338027954, "learning_rate": 1.2061786409231884e-06, "loss": 0.3222, "step": 9504 }, { "epoch": 2.390593561368209, "grad_norm": 0.2878960967063904, "learning_rate": 1.2052256946373209e-06, "loss": 0.3252, "step": 9505 }, { "epoch": 2.390845070422535, "grad_norm": 0.3191094398498535, "learning_rate": 1.204273073350134e-06, "loss": 0.3082, "step": 9506 }, { "epoch": 2.3910965794768613, "grad_norm": 0.3221496343612671, "learning_rate": 1.203320777143211e-06, "loss": 0.3168, "step": 9507 }, { "epoch": 2.391348088531187, "grad_norm": 0.30812880396842957, "learning_rate": 1.202368806098112e-06, "loss": 0.3156, "step": 9508 }, { "epoch": 2.391599597585513, "grad_norm": 0.28987714648246765, "learning_rate": 1.2014171602963676e-06, "loss": 0.3236, "step": 9509 }, { "epoch": 2.391851106639839, "grad_norm": 0.2956389784812927, "learning_rate": 1.2004658398194786e-06, "loss": 0.3101, "step": 9510 }, { "epoch": 2.392102615694165, "grad_norm": 0.2824747860431671, "learning_rate": 1.199514844748922e-06, "loss": 0.3145, "step": 9511 }, { "epoch": 2.3923541247484907, "grad_norm": 0.3140904903411865, "learning_rate": 1.1985641751661415e-06, "loss": 0.3101, "step": 9512 }, { "epoch": 2.392605633802817, "grad_norm": 0.288084477186203, "learning_rate": 1.1976138311525592e-06, "loss": 0.3343, "step": 9513 }, { "epoch": 2.392857142857143, "grad_norm": 0.27887189388275146, "learning_rate": 1.196663812789563e-06, "loss": 0.3242, "step": 9514 }, { "epoch": 2.3931086519114686, "grad_norm": 0.30040696263313293, "learning_rate": 1.1957141201585193e-06, "loss": 0.3303, "step": 9515 }, { "epoch": 2.393360160965795, "grad_norm": 0.2744455635547638, "learning_rate": 1.1947647533407602e-06, "loss": 0.3199, "step": 9516 }, { "epoch": 2.3936116700201207, "grad_norm": 0.3067541718482971, "learning_rate": 1.1938157124175959e-06, "loss": 0.3125, "step": 9517 }, { "epoch": 2.3938631790744465, "grad_norm": 0.29640766978263855, "learning_rate": 1.1928669974703033e-06, "loss": 0.3321, "step": 9518 }, { "epoch": 2.3941146881287727, "grad_norm": 0.30596280097961426, "learning_rate": 1.191918608580136e-06, "loss": 0.3078, "step": 9519 }, { "epoch": 2.3943661971830985, "grad_norm": 0.27518826723098755, "learning_rate": 1.1909705458283155e-06, "loss": 0.3197, "step": 9520 }, { "epoch": 2.3946177062374243, "grad_norm": 0.27755483984947205, "learning_rate": 1.1900228092960398e-06, "loss": 0.3256, "step": 9521 }, { "epoch": 2.3948692152917506, "grad_norm": 0.30145710706710815, "learning_rate": 1.1890753990644738e-06, "loss": 0.3207, "step": 9522 }, { "epoch": 2.3951207243460764, "grad_norm": 0.3164133131504059, "learning_rate": 1.1881283152147606e-06, "loss": 0.3314, "step": 9523 }, { "epoch": 2.395372233400402, "grad_norm": 0.3069988787174225, "learning_rate": 1.1871815578280083e-06, "loss": 0.3108, "step": 9524 }, { "epoch": 2.3956237424547284, "grad_norm": 0.3104003965854645, "learning_rate": 1.186235126985304e-06, "loss": 0.3224, "step": 9525 }, { "epoch": 2.3958752515090542, "grad_norm": 0.2976137399673462, "learning_rate": 1.185289022767701e-06, "loss": 0.3175, "step": 9526 }, { "epoch": 2.3961267605633805, "grad_norm": 0.3276063799858093, "learning_rate": 1.1843432452562303e-06, "loss": 0.3342, "step": 9527 }, { "epoch": 2.3963782696177063, "grad_norm": 0.3153345584869385, "learning_rate": 1.183397794531888e-06, "loss": 0.3397, "step": 9528 }, { "epoch": 2.396629778672032, "grad_norm": 0.3022550940513611, "learning_rate": 1.18245267067565e-06, "loss": 0.3021, "step": 9529 }, { "epoch": 2.3968812877263583, "grad_norm": 0.3086456060409546, "learning_rate": 1.1815078737684566e-06, "loss": 0.3211, "step": 9530 }, { "epoch": 2.397132796780684, "grad_norm": 0.3157150447368622, "learning_rate": 1.1805634038912268e-06, "loss": 0.3374, "step": 9531 }, { "epoch": 2.39738430583501, "grad_norm": 0.2981681823730469, "learning_rate": 1.1796192611248452e-06, "loss": 0.3354, "step": 9532 }, { "epoch": 2.397635814889336, "grad_norm": 0.2994699478149414, "learning_rate": 1.1786754455501759e-06, "loss": 0.3359, "step": 9533 }, { "epoch": 2.397887323943662, "grad_norm": 0.3035818338394165, "learning_rate": 1.1777319572480468e-06, "loss": 0.3408, "step": 9534 }, { "epoch": 2.398138832997988, "grad_norm": 0.2822018265724182, "learning_rate": 1.1767887962992647e-06, "loss": 0.3129, "step": 9535 }, { "epoch": 2.398390342052314, "grad_norm": 0.2857167720794678, "learning_rate": 1.1758459627846031e-06, "loss": 0.3171, "step": 9536 }, { "epoch": 2.39864185110664, "grad_norm": 0.2896609306335449, "learning_rate": 1.1749034567848122e-06, "loss": 0.3178, "step": 9537 }, { "epoch": 2.3988933601609657, "grad_norm": 0.3051433563232422, "learning_rate": 1.1739612783806092e-06, "loss": 0.3435, "step": 9538 }, { "epoch": 2.399144869215292, "grad_norm": 0.3050413727760315, "learning_rate": 1.1730194276526885e-06, "loss": 0.3223, "step": 9539 }, { "epoch": 2.3993963782696177, "grad_norm": 0.30371037125587463, "learning_rate": 1.1720779046817104e-06, "loss": 0.3187, "step": 9540 }, { "epoch": 2.3996478873239435, "grad_norm": 0.28884759545326233, "learning_rate": 1.1711367095483134e-06, "loss": 0.3246, "step": 9541 }, { "epoch": 2.3998993963782698, "grad_norm": 0.3120008409023285, "learning_rate": 1.1701958423331044e-06, "loss": 0.3344, "step": 9542 }, { "epoch": 2.4001509054325956, "grad_norm": 0.28870880603790283, "learning_rate": 1.1692553031166616e-06, "loss": 0.3225, "step": 9543 }, { "epoch": 2.4004024144869214, "grad_norm": 0.28078019618988037, "learning_rate": 1.1683150919795378e-06, "loss": 0.2904, "step": 9544 }, { "epoch": 2.4006539235412476, "grad_norm": 0.28186681866645813, "learning_rate": 1.1673752090022544e-06, "loss": 0.3233, "step": 9545 }, { "epoch": 2.4009054325955734, "grad_norm": 0.2807130813598633, "learning_rate": 1.1664356542653088e-06, "loss": 0.3163, "step": 9546 }, { "epoch": 2.4011569416498992, "grad_norm": 0.28840017318725586, "learning_rate": 1.1654964278491653e-06, "loss": 0.3297, "step": 9547 }, { "epoch": 2.4014084507042255, "grad_norm": 0.2861834466457367, "learning_rate": 1.1645575298342659e-06, "loss": 0.3246, "step": 9548 }, { "epoch": 2.4016599597585513, "grad_norm": 0.2845926582813263, "learning_rate": 1.1636189603010179e-06, "loss": 0.3242, "step": 9549 }, { "epoch": 2.401911468812877, "grad_norm": 0.28834056854248047, "learning_rate": 1.1626807193298073e-06, "loss": 0.3234, "step": 9550 }, { "epoch": 2.4021629778672033, "grad_norm": 0.31181180477142334, "learning_rate": 1.161742807000985e-06, "loss": 0.3505, "step": 9551 }, { "epoch": 2.402414486921529, "grad_norm": 0.30017805099487305, "learning_rate": 1.1608052233948797e-06, "loss": 0.332, "step": 9552 }, { "epoch": 2.402665995975855, "grad_norm": 0.2945021986961365, "learning_rate": 1.1598679685917901e-06, "loss": 0.3185, "step": 9553 }, { "epoch": 2.402917505030181, "grad_norm": 0.29077422618865967, "learning_rate": 1.158931042671984e-06, "loss": 0.3215, "step": 9554 }, { "epoch": 2.403169014084507, "grad_norm": 0.28947049379348755, "learning_rate": 1.157994445715706e-06, "loss": 0.3264, "step": 9555 }, { "epoch": 2.403420523138833, "grad_norm": 0.2847462296485901, "learning_rate": 1.1570581778031665e-06, "loss": 0.3298, "step": 9556 }, { "epoch": 2.403672032193159, "grad_norm": 0.29877012968063354, "learning_rate": 1.1561222390145543e-06, "loss": 0.324, "step": 9557 }, { "epoch": 2.403923541247485, "grad_norm": 0.2849181294441223, "learning_rate": 1.1551866294300234e-06, "loss": 0.321, "step": 9558 }, { "epoch": 2.404175050301811, "grad_norm": 0.3224382698535919, "learning_rate": 1.1542513491297063e-06, "loss": 0.3194, "step": 9559 }, { "epoch": 2.404426559356137, "grad_norm": 0.2912129759788513, "learning_rate": 1.1533163981937012e-06, "loss": 0.3128, "step": 9560 }, { "epoch": 2.4046780684104627, "grad_norm": 0.2958779036998749, "learning_rate": 1.1523817767020829e-06, "loss": 0.3136, "step": 9561 }, { "epoch": 2.404929577464789, "grad_norm": 0.300225168466568, "learning_rate": 1.1514474847348934e-06, "loss": 0.338, "step": 9562 }, { "epoch": 2.4051810865191148, "grad_norm": 0.31059449911117554, "learning_rate": 1.1505135223721498e-06, "loss": 0.3326, "step": 9563 }, { "epoch": 2.4054325955734406, "grad_norm": 0.293338805437088, "learning_rate": 1.1495798896938426e-06, "loss": 0.3294, "step": 9564 }, { "epoch": 2.405684104627767, "grad_norm": 0.3122572898864746, "learning_rate": 1.1486465867799284e-06, "loss": 0.3319, "step": 9565 }, { "epoch": 2.4059356136820926, "grad_norm": 0.2884367108345032, "learning_rate": 1.147713613710341e-06, "loss": 0.2962, "step": 9566 }, { "epoch": 2.4061871227364184, "grad_norm": 0.3003442585468292, "learning_rate": 1.1467809705649817e-06, "loss": 0.3139, "step": 9567 }, { "epoch": 2.4064386317907447, "grad_norm": 0.2922108471393585, "learning_rate": 1.1458486574237281e-06, "loss": 0.3144, "step": 9568 }, { "epoch": 2.4066901408450705, "grad_norm": 0.2977858781814575, "learning_rate": 1.144916674366424e-06, "loss": 0.3182, "step": 9569 }, { "epoch": 2.4069416498993963, "grad_norm": 0.2883601486682892, "learning_rate": 1.1439850214728908e-06, "loss": 0.3377, "step": 9570 }, { "epoch": 2.4071931589537225, "grad_norm": 0.28815150260925293, "learning_rate": 1.1430536988229157e-06, "loss": 0.3322, "step": 9571 }, { "epoch": 2.4074446680080483, "grad_norm": 0.3212835192680359, "learning_rate": 1.1421227064962641e-06, "loss": 0.303, "step": 9572 }, { "epoch": 2.407696177062374, "grad_norm": 0.2907494008541107, "learning_rate": 1.1411920445726666e-06, "loss": 0.3007, "step": 9573 }, { "epoch": 2.4079476861167004, "grad_norm": 0.3153952360153198, "learning_rate": 1.1402617131318295e-06, "loss": 0.3223, "step": 9574 }, { "epoch": 2.408199195171026, "grad_norm": 0.2907307744026184, "learning_rate": 1.1393317122534315e-06, "loss": 0.3325, "step": 9575 }, { "epoch": 2.408450704225352, "grad_norm": 0.30540433526039124, "learning_rate": 1.1384020420171194e-06, "loss": 0.3305, "step": 9576 }, { "epoch": 2.4087022132796783, "grad_norm": 0.2847049832344055, "learning_rate": 1.137472702502515e-06, "loss": 0.3229, "step": 9577 }, { "epoch": 2.408953722334004, "grad_norm": 0.29284361004829407, "learning_rate": 1.1365436937892082e-06, "loss": 0.3176, "step": 9578 }, { "epoch": 2.40920523138833, "grad_norm": 0.28661638498306274, "learning_rate": 1.1356150159567664e-06, "loss": 0.338, "step": 9579 }, { "epoch": 2.409456740442656, "grad_norm": 0.2953725755214691, "learning_rate": 1.1346866690847214e-06, "loss": 0.3218, "step": 9580 }, { "epoch": 2.409708249496982, "grad_norm": 0.30523139238357544, "learning_rate": 1.133758653252583e-06, "loss": 0.3014, "step": 9581 }, { "epoch": 2.4099597585513077, "grad_norm": 0.2769249379634857, "learning_rate": 1.1328309685398275e-06, "loss": 0.3114, "step": 9582 }, { "epoch": 2.410211267605634, "grad_norm": 0.2869117856025696, "learning_rate": 1.1319036150259078e-06, "loss": 0.339, "step": 9583 }, { "epoch": 2.41046277665996, "grad_norm": 0.2989134192466736, "learning_rate": 1.1309765927902439e-06, "loss": 0.2997, "step": 9584 }, { "epoch": 2.4107142857142856, "grad_norm": 0.29306572675704956, "learning_rate": 1.1300499019122295e-06, "loss": 0.321, "step": 9585 }, { "epoch": 2.410965794768612, "grad_norm": 0.27259159088134766, "learning_rate": 1.1291235424712328e-06, "loss": 0.3093, "step": 9586 }, { "epoch": 2.4112173038229376, "grad_norm": 0.3309045732021332, "learning_rate": 1.1281975145465867e-06, "loss": 0.316, "step": 9587 }, { "epoch": 2.4114688128772634, "grad_norm": 0.3107389211654663, "learning_rate": 1.1272718182176034e-06, "loss": 0.3225, "step": 9588 }, { "epoch": 2.4117203219315897, "grad_norm": 0.29425039887428284, "learning_rate": 1.1263464535635594e-06, "loss": 0.336, "step": 9589 }, { "epoch": 2.4119718309859155, "grad_norm": 0.29913732409477234, "learning_rate": 1.1254214206637099e-06, "loss": 0.3135, "step": 9590 }, { "epoch": 2.4122233400402413, "grad_norm": 0.2878449261188507, "learning_rate": 1.1244967195972745e-06, "loss": 0.3308, "step": 9591 }, { "epoch": 2.4124748490945676, "grad_norm": 0.2862037122249603, "learning_rate": 1.123572350443452e-06, "loss": 0.3186, "step": 9592 }, { "epoch": 2.4127263581488934, "grad_norm": 0.30815479159355164, "learning_rate": 1.1226483132814048e-06, "loss": 0.3305, "step": 9593 }, { "epoch": 2.412977867203219, "grad_norm": 0.29880401492118835, "learning_rate": 1.121724608190275e-06, "loss": 0.3288, "step": 9594 }, { "epoch": 2.4132293762575454, "grad_norm": 0.3055490553379059, "learning_rate": 1.1208012352491681e-06, "loss": 0.3273, "step": 9595 }, { "epoch": 2.413480885311871, "grad_norm": 0.31553953886032104, "learning_rate": 1.1198781945371673e-06, "loss": 0.2943, "step": 9596 }, { "epoch": 2.413732394366197, "grad_norm": 0.293268084526062, "learning_rate": 1.118955486133327e-06, "loss": 0.323, "step": 9597 }, { "epoch": 2.4139839034205233, "grad_norm": 0.304035484790802, "learning_rate": 1.1180331101166675e-06, "loss": 0.3229, "step": 9598 }, { "epoch": 2.414235412474849, "grad_norm": 0.2874673008918762, "learning_rate": 1.1171110665661888e-06, "loss": 0.3199, "step": 9599 }, { "epoch": 2.414486921529175, "grad_norm": 0.2967674434185028, "learning_rate": 1.1161893555608538e-06, "loss": 0.3396, "step": 9600 }, { "epoch": 2.414738430583501, "grad_norm": 0.3168094754219055, "learning_rate": 1.1152679771796054e-06, "loss": 0.3291, "step": 9601 }, { "epoch": 2.414989939637827, "grad_norm": 0.3081880509853363, "learning_rate": 1.1143469315013505e-06, "loss": 0.3123, "step": 9602 }, { "epoch": 2.4152414486921527, "grad_norm": 0.3019590377807617, "learning_rate": 1.1134262186049732e-06, "loss": 0.3103, "step": 9603 }, { "epoch": 2.415492957746479, "grad_norm": 0.3133490979671478, "learning_rate": 1.1125058385693255e-06, "loss": 0.3362, "step": 9604 }, { "epoch": 2.415744466800805, "grad_norm": 0.2901817262172699, "learning_rate": 1.1115857914732332e-06, "loss": 0.3293, "step": 9605 }, { "epoch": 2.4159959758551306, "grad_norm": 0.2860255241394043, "learning_rate": 1.1106660773954908e-06, "loss": 0.3074, "step": 9606 }, { "epoch": 2.416247484909457, "grad_norm": 0.29888466000556946, "learning_rate": 1.109746696414868e-06, "loss": 0.3458, "step": 9607 }, { "epoch": 2.4164989939637826, "grad_norm": 0.30433139204978943, "learning_rate": 1.1088276486101034e-06, "loss": 0.3235, "step": 9608 }, { "epoch": 2.4167505030181085, "grad_norm": 0.30524951219558716, "learning_rate": 1.107908934059907e-06, "loss": 0.3208, "step": 9609 }, { "epoch": 2.4170020120724347, "grad_norm": 0.29082682728767395, "learning_rate": 1.1069905528429631e-06, "loss": 0.3234, "step": 9610 }, { "epoch": 2.4172535211267605, "grad_norm": 0.2806370258331299, "learning_rate": 1.1060725050379223e-06, "loss": 0.3113, "step": 9611 }, { "epoch": 2.4175050301810863, "grad_norm": 0.2872363328933716, "learning_rate": 1.1051547907234122e-06, "loss": 0.3013, "step": 9612 }, { "epoch": 2.4177565392354126, "grad_norm": 0.2908620834350586, "learning_rate": 1.1042374099780268e-06, "loss": 0.3258, "step": 9613 }, { "epoch": 2.4180080482897384, "grad_norm": 0.2825472354888916, "learning_rate": 1.1033203628803369e-06, "loss": 0.3147, "step": 9614 }, { "epoch": 2.418259557344064, "grad_norm": 0.29791221022605896, "learning_rate": 1.1024036495088792e-06, "loss": 0.3144, "step": 9615 }, { "epoch": 2.4185110663983904, "grad_norm": 0.3021547198295593, "learning_rate": 1.1014872699421669e-06, "loss": 0.3039, "step": 9616 }, { "epoch": 2.4187625754527162, "grad_norm": 0.2768807113170624, "learning_rate": 1.100571224258679e-06, "loss": 0.3238, "step": 9617 }, { "epoch": 2.419014084507042, "grad_norm": 0.29537469148635864, "learning_rate": 1.099655512536872e-06, "loss": 0.3087, "step": 9618 }, { "epoch": 2.4192655935613683, "grad_norm": 0.28753364086151123, "learning_rate": 1.0987401348551702e-06, "loss": 0.3266, "step": 9619 }, { "epoch": 2.419517102615694, "grad_norm": 0.30412012338638306, "learning_rate": 1.097825091291969e-06, "loss": 0.3196, "step": 9620 }, { "epoch": 2.41976861167002, "grad_norm": 0.29906362295150757, "learning_rate": 1.0969103819256377e-06, "loss": 0.3135, "step": 9621 }, { "epoch": 2.420020120724346, "grad_norm": 0.3171037435531616, "learning_rate": 1.0959960068345139e-06, "loss": 0.3056, "step": 9622 }, { "epoch": 2.420271629778672, "grad_norm": 0.2972031831741333, "learning_rate": 1.0950819660969092e-06, "loss": 0.3344, "step": 9623 }, { "epoch": 2.4205231388329977, "grad_norm": 0.29488030076026917, "learning_rate": 1.094168259791104e-06, "loss": 0.3244, "step": 9624 }, { "epoch": 2.420774647887324, "grad_norm": 0.290930837392807, "learning_rate": 1.0932548879953543e-06, "loss": 0.3018, "step": 9625 }, { "epoch": 2.42102615694165, "grad_norm": 0.29524073004722595, "learning_rate": 1.0923418507878807e-06, "loss": 0.3232, "step": 9626 }, { "epoch": 2.421277665995976, "grad_norm": 0.30457374453544617, "learning_rate": 1.091429148246882e-06, "loss": 0.3264, "step": 9627 }, { "epoch": 2.421529175050302, "grad_norm": 0.27748963236808777, "learning_rate": 1.0905167804505263e-06, "loss": 0.3311, "step": 9628 }, { "epoch": 2.4217806841046277, "grad_norm": 0.29854872822761536, "learning_rate": 1.0896047474769489e-06, "loss": 0.3514, "step": 9629 }, { "epoch": 2.422032193158954, "grad_norm": 0.3030698597431183, "learning_rate": 1.088693049404263e-06, "loss": 0.3006, "step": 9630 }, { "epoch": 2.4222837022132797, "grad_norm": 0.31721991300582886, "learning_rate": 1.0877816863105473e-06, "loss": 0.3031, "step": 9631 }, { "epoch": 2.4225352112676055, "grad_norm": 0.2863737642765045, "learning_rate": 1.0868706582738563e-06, "loss": 0.3057, "step": 9632 }, { "epoch": 2.4227867203219318, "grad_norm": 0.303460031747818, "learning_rate": 1.085959965372212e-06, "loss": 0.3319, "step": 9633 }, { "epoch": 2.4230382293762576, "grad_norm": 0.29836776852607727, "learning_rate": 1.085049607683612e-06, "loss": 0.3197, "step": 9634 }, { "epoch": 2.4232897384305834, "grad_norm": 0.2951622009277344, "learning_rate": 1.0841395852860194e-06, "loss": 0.2886, "step": 9635 }, { "epoch": 2.4235412474849096, "grad_norm": 0.29916274547576904, "learning_rate": 1.0832298982573753e-06, "loss": 0.3294, "step": 9636 }, { "epoch": 2.4237927565392354, "grad_norm": 0.29172125458717346, "learning_rate": 1.0823205466755858e-06, "loss": 0.3011, "step": 9637 }, { "epoch": 2.4240442655935612, "grad_norm": 0.28251975774765015, "learning_rate": 1.0814115306185342e-06, "loss": 0.3193, "step": 9638 }, { "epoch": 2.4242957746478875, "grad_norm": 0.31814032793045044, "learning_rate": 1.0805028501640686e-06, "loss": 0.3236, "step": 9639 }, { "epoch": 2.4245472837022133, "grad_norm": 0.313716858625412, "learning_rate": 1.0795945053900153e-06, "loss": 0.2972, "step": 9640 }, { "epoch": 2.424798792756539, "grad_norm": 0.29684916138648987, "learning_rate": 1.078686496374165e-06, "loss": 0.3449, "step": 9641 }, { "epoch": 2.4250503018108653, "grad_norm": 0.2841853201389313, "learning_rate": 1.0777788231942859e-06, "loss": 0.3192, "step": 9642 }, { "epoch": 2.425301810865191, "grad_norm": 0.29728415608406067, "learning_rate": 1.076871485928112e-06, "loss": 0.3058, "step": 9643 }, { "epoch": 2.425553319919517, "grad_norm": 0.29951390624046326, "learning_rate": 1.075964484653354e-06, "loss": 0.3084, "step": 9644 }, { "epoch": 2.425804828973843, "grad_norm": 0.3010987937450409, "learning_rate": 1.0750578194476875e-06, "loss": 0.3049, "step": 9645 }, { "epoch": 2.426056338028169, "grad_norm": 0.29705509543418884, "learning_rate": 1.0741514903887657e-06, "loss": 0.3206, "step": 9646 }, { "epoch": 2.426307847082495, "grad_norm": 0.28296321630477905, "learning_rate": 1.0732454975542079e-06, "loss": 0.3216, "step": 9647 }, { "epoch": 2.426559356136821, "grad_norm": 0.2731419503688812, "learning_rate": 1.0723398410216085e-06, "loss": 0.3335, "step": 9648 }, { "epoch": 2.426810865191147, "grad_norm": 0.29805314540863037, "learning_rate": 1.071434520868529e-06, "loss": 0.3268, "step": 9649 }, { "epoch": 2.4270623742454727, "grad_norm": 0.27815183997154236, "learning_rate": 1.0705295371725066e-06, "loss": 0.3374, "step": 9650 }, { "epoch": 2.427313883299799, "grad_norm": 0.31619152426719666, "learning_rate": 1.0696248900110461e-06, "loss": 0.332, "step": 9651 }, { "epoch": 2.4275653923541247, "grad_norm": 0.27347874641418457, "learning_rate": 1.0687205794616262e-06, "loss": 0.2954, "step": 9652 }, { "epoch": 2.4278169014084505, "grad_norm": 0.323209285736084, "learning_rate": 1.0678166056016936e-06, "loss": 0.3199, "step": 9653 }, { "epoch": 2.4280684104627768, "grad_norm": 0.2880942225456238, "learning_rate": 1.0669129685086705e-06, "loss": 0.3555, "step": 9654 }, { "epoch": 2.4283199195171026, "grad_norm": 0.30394867062568665, "learning_rate": 1.0660096682599453e-06, "loss": 0.3058, "step": 9655 }, { "epoch": 2.4285714285714284, "grad_norm": 0.29747632145881653, "learning_rate": 1.0651067049328818e-06, "loss": 0.324, "step": 9656 }, { "epoch": 2.4288229376257546, "grad_norm": 0.305722177028656, "learning_rate": 1.0642040786048113e-06, "loss": 0.3026, "step": 9657 }, { "epoch": 2.4290744466800804, "grad_norm": 0.30282852053642273, "learning_rate": 1.0633017893530407e-06, "loss": 0.3272, "step": 9658 }, { "epoch": 2.4293259557344067, "grad_norm": 0.29805997014045715, "learning_rate": 1.0623998372548423e-06, "loss": 0.3306, "step": 9659 }, { "epoch": 2.4295774647887325, "grad_norm": 0.2963772118091583, "learning_rate": 1.0614982223874642e-06, "loss": 0.3272, "step": 9660 }, { "epoch": 2.4298289738430583, "grad_norm": 0.3043665587902069, "learning_rate": 1.0605969448281257e-06, "loss": 0.3119, "step": 9661 }, { "epoch": 2.4300804828973845, "grad_norm": 0.2912227511405945, "learning_rate": 1.0596960046540129e-06, "loss": 0.3362, "step": 9662 }, { "epoch": 2.4303319919517103, "grad_norm": 0.3193990886211395, "learning_rate": 1.0587954019422874e-06, "loss": 0.3238, "step": 9663 }, { "epoch": 2.430583501006036, "grad_norm": 0.2979458272457123, "learning_rate": 1.057895136770079e-06, "loss": 0.3141, "step": 9664 }, { "epoch": 2.4308350100603624, "grad_norm": 0.2856670916080475, "learning_rate": 1.056995209214492e-06, "loss": 0.3083, "step": 9665 }, { "epoch": 2.431086519114688, "grad_norm": 0.3085869550704956, "learning_rate": 1.0560956193525961e-06, "loss": 0.3193, "step": 9666 }, { "epoch": 2.431338028169014, "grad_norm": 0.30326932668685913, "learning_rate": 1.0551963672614385e-06, "loss": 0.3145, "step": 9667 }, { "epoch": 2.4315895372233403, "grad_norm": 0.30177420377731323, "learning_rate": 1.0542974530180327e-06, "loss": 0.3422, "step": 9668 }, { "epoch": 2.431841046277666, "grad_norm": 0.28768646717071533, "learning_rate": 1.0533988766993668e-06, "loss": 0.3213, "step": 9669 }, { "epoch": 2.432092555331992, "grad_norm": 0.3230489194393158, "learning_rate": 1.052500638382396e-06, "loss": 0.3326, "step": 9670 }, { "epoch": 2.432344064386318, "grad_norm": 0.3124891519546509, "learning_rate": 1.0516027381440502e-06, "loss": 0.3219, "step": 9671 }, { "epoch": 2.432595573440644, "grad_norm": 0.2979585826396942, "learning_rate": 1.0507051760612302e-06, "loss": 0.304, "step": 9672 }, { "epoch": 2.4328470824949697, "grad_norm": 0.2868764400482178, "learning_rate": 1.0498079522108034e-06, "loss": 0.349, "step": 9673 }, { "epoch": 2.433098591549296, "grad_norm": 0.32363319396972656, "learning_rate": 1.0489110666696144e-06, "loss": 0.3378, "step": 9674 }, { "epoch": 2.433350100603622, "grad_norm": 0.28327086567878723, "learning_rate": 1.0480145195144736e-06, "loss": 0.3041, "step": 9675 }, { "epoch": 2.4336016096579476, "grad_norm": 0.30430731177330017, "learning_rate": 1.0471183108221673e-06, "loss": 0.3099, "step": 9676 }, { "epoch": 2.433853118712274, "grad_norm": 0.3252510130405426, "learning_rate": 1.0462224406694471e-06, "loss": 0.3302, "step": 9677 }, { "epoch": 2.4341046277665996, "grad_norm": 0.29767587780952454, "learning_rate": 1.045326909133041e-06, "loss": 0.2966, "step": 9678 }, { "epoch": 2.4343561368209254, "grad_norm": 0.3064614534378052, "learning_rate": 1.0444317162896433e-06, "loss": 0.3123, "step": 9679 }, { "epoch": 2.4346076458752517, "grad_norm": 0.30733683705329895, "learning_rate": 1.0435368622159254e-06, "loss": 0.2995, "step": 9680 }, { "epoch": 2.4348591549295775, "grad_norm": 0.31988590955734253, "learning_rate": 1.0426423469885216e-06, "loss": 0.3176, "step": 9681 }, { "epoch": 2.4351106639839033, "grad_norm": 0.29145556688308716, "learning_rate": 1.0417481706840439e-06, "loss": 0.3056, "step": 9682 }, { "epoch": 2.4353621730382295, "grad_norm": 0.2926798462867737, "learning_rate": 1.0408543333790738e-06, "loss": 0.3149, "step": 9683 }, { "epoch": 2.4356136820925554, "grad_norm": 0.2882416844367981, "learning_rate": 1.0399608351501606e-06, "loss": 0.3247, "step": 9684 }, { "epoch": 2.435865191146881, "grad_norm": 0.3121548593044281, "learning_rate": 1.0390676760738289e-06, "loss": 0.336, "step": 9685 }, { "epoch": 2.4361167002012074, "grad_norm": 0.2849773168563843, "learning_rate": 1.0381748562265704e-06, "loss": 0.3229, "step": 9686 }, { "epoch": 2.436368209255533, "grad_norm": 0.30011287331581116, "learning_rate": 1.037282375684851e-06, "loss": 0.3302, "step": 9687 }, { "epoch": 2.436619718309859, "grad_norm": 0.298657089471817, "learning_rate": 1.0363902345251048e-06, "loss": 0.3117, "step": 9688 }, { "epoch": 2.4368712273641853, "grad_norm": 0.3116760551929474, "learning_rate": 1.0354984328237399e-06, "loss": 0.3293, "step": 9689 }, { "epoch": 2.437122736418511, "grad_norm": 0.30939438939094543, "learning_rate": 1.034606970657131e-06, "loss": 0.3208, "step": 9690 }, { "epoch": 2.437374245472837, "grad_norm": 0.3038461208343506, "learning_rate": 1.0337158481016285e-06, "loss": 0.3762, "step": 9691 }, { "epoch": 2.437625754527163, "grad_norm": 0.2885994613170624, "learning_rate": 1.0328250652335497e-06, "loss": 0.3434, "step": 9692 }, { "epoch": 2.437877263581489, "grad_norm": 0.28599095344543457, "learning_rate": 1.031934622129186e-06, "loss": 0.3283, "step": 9693 }, { "epoch": 2.4381287726358147, "grad_norm": 0.2979143559932709, "learning_rate": 1.0310445188647983e-06, "loss": 0.3298, "step": 9694 }, { "epoch": 2.438380281690141, "grad_norm": 0.300065815448761, "learning_rate": 1.030154755516617e-06, "loss": 0.3153, "step": 9695 }, { "epoch": 2.438631790744467, "grad_norm": 0.29073888063430786, "learning_rate": 1.029265332160847e-06, "loss": 0.3157, "step": 9696 }, { "epoch": 2.4388832997987926, "grad_norm": 0.28464123606681824, "learning_rate": 1.0283762488736588e-06, "loss": 0.2994, "step": 9697 }, { "epoch": 2.439134808853119, "grad_norm": 0.299254447221756, "learning_rate": 1.0274875057312001e-06, "loss": 0.3058, "step": 9698 }, { "epoch": 2.4393863179074446, "grad_norm": 0.29035767912864685, "learning_rate": 1.0265991028095828e-06, "loss": 0.3407, "step": 9699 }, { "epoch": 2.4396378269617705, "grad_norm": 0.30013376474380493, "learning_rate": 1.0257110401848963e-06, "loss": 0.3296, "step": 9700 }, { "epoch": 2.4398893360160967, "grad_norm": 0.30098024010658264, "learning_rate": 1.0248233179331952e-06, "loss": 0.3096, "step": 9701 }, { "epoch": 2.4401408450704225, "grad_norm": 0.3151046633720398, "learning_rate": 1.0239359361305091e-06, "loss": 0.2879, "step": 9702 }, { "epoch": 2.4403923541247483, "grad_norm": 0.273389607667923, "learning_rate": 1.023048894852835e-06, "loss": 0.3119, "step": 9703 }, { "epoch": 2.4406438631790746, "grad_norm": 0.31045857071876526, "learning_rate": 1.0221621941761428e-06, "loss": 0.3154, "step": 9704 }, { "epoch": 2.4408953722334004, "grad_norm": 0.27846378087997437, "learning_rate": 1.0212758341763752e-06, "loss": 0.3276, "step": 9705 }, { "epoch": 2.441146881287726, "grad_norm": 0.3135693371295929, "learning_rate": 1.02038981492944e-06, "loss": 0.3199, "step": 9706 }, { "epoch": 2.4413983903420524, "grad_norm": 0.30014142394065857, "learning_rate": 1.0195041365112224e-06, "loss": 0.3388, "step": 9707 }, { "epoch": 2.441649899396378, "grad_norm": 0.30678755044937134, "learning_rate": 1.0186187989975722e-06, "loss": 0.3294, "step": 9708 }, { "epoch": 2.441901408450704, "grad_norm": 0.2942301332950592, "learning_rate": 1.0177338024643157e-06, "loss": 0.3288, "step": 9709 }, { "epoch": 2.4421529175050303, "grad_norm": 0.2993881404399872, "learning_rate": 1.0168491469872444e-06, "loss": 0.3219, "step": 9710 }, { "epoch": 2.442404426559356, "grad_norm": 0.322645366191864, "learning_rate": 1.0159648326421268e-06, "loss": 0.2956, "step": 9711 }, { "epoch": 2.442655935613682, "grad_norm": 0.2843986749649048, "learning_rate": 1.0150808595046963e-06, "loss": 0.3088, "step": 9712 }, { "epoch": 2.442907444668008, "grad_norm": 0.2960312068462372, "learning_rate": 1.0141972276506612e-06, "loss": 0.3394, "step": 9713 }, { "epoch": 2.443158953722334, "grad_norm": 0.27722686529159546, "learning_rate": 1.013313937155697e-06, "loss": 0.3286, "step": 9714 }, { "epoch": 2.4434104627766597, "grad_norm": 0.31136950850486755, "learning_rate": 1.012430988095454e-06, "loss": 0.2942, "step": 9715 }, { "epoch": 2.443661971830986, "grad_norm": 0.28949692845344543, "learning_rate": 1.0115483805455522e-06, "loss": 0.3309, "step": 9716 }, { "epoch": 2.443913480885312, "grad_norm": 0.29630956053733826, "learning_rate": 1.0106661145815783e-06, "loss": 0.3099, "step": 9717 }, { "epoch": 2.4441649899396376, "grad_norm": 0.3057538866996765, "learning_rate": 1.0097841902790955e-06, "loss": 0.3265, "step": 9718 }, { "epoch": 2.444416498993964, "grad_norm": 0.2971940338611603, "learning_rate": 1.0089026077136333e-06, "loss": 0.3257, "step": 9719 }, { "epoch": 2.4446680080482897, "grad_norm": 0.30409669876098633, "learning_rate": 1.0080213669606958e-06, "loss": 0.3457, "step": 9720 }, { "epoch": 2.4449195171026155, "grad_norm": 0.30436453223228455, "learning_rate": 1.007140468095753e-06, "loss": 0.3052, "step": 9721 }, { "epoch": 2.4451710261569417, "grad_norm": 0.28796908259391785, "learning_rate": 1.006259911194251e-06, "loss": 0.3238, "step": 9722 }, { "epoch": 2.4454225352112675, "grad_norm": 0.3076968491077423, "learning_rate": 1.005379696331602e-06, "loss": 0.3161, "step": 9723 }, { "epoch": 2.4456740442655933, "grad_norm": 0.2974853813648224, "learning_rate": 1.0044998235831927e-06, "loss": 0.3177, "step": 9724 }, { "epoch": 2.4459255533199196, "grad_norm": 0.2881965637207031, "learning_rate": 1.0036202930243766e-06, "loss": 0.3106, "step": 9725 }, { "epoch": 2.4461770623742454, "grad_norm": 0.2931132912635803, "learning_rate": 1.0027411047304808e-06, "loss": 0.319, "step": 9726 }, { "epoch": 2.4464285714285716, "grad_norm": 0.2949577569961548, "learning_rate": 1.001862258776804e-06, "loss": 0.3085, "step": 9727 }, { "epoch": 2.4466800804828974, "grad_norm": 0.2667643129825592, "learning_rate": 1.0009837552386114e-06, "loss": 0.3312, "step": 9728 }, { "epoch": 2.4469315895372232, "grad_norm": 0.30988672375679016, "learning_rate": 1.0001055941911437e-06, "loss": 0.3215, "step": 9729 }, { "epoch": 2.4471830985915495, "grad_norm": 0.3098249137401581, "learning_rate": 9.992277757096069e-07, "loss": 0.303, "step": 9730 }, { "epoch": 2.4474346076458753, "grad_norm": 0.27878841757774353, "learning_rate": 9.98350299869184e-07, "loss": 0.322, "step": 9731 }, { "epoch": 2.447686116700201, "grad_norm": 0.306535005569458, "learning_rate": 9.974731667450227e-07, "loss": 0.3044, "step": 9732 }, { "epoch": 2.4479376257545273, "grad_norm": 0.3038245737552643, "learning_rate": 9.965963764122455e-07, "loss": 0.342, "step": 9733 }, { "epoch": 2.448189134808853, "grad_norm": 0.2671661972999573, "learning_rate": 9.957199289459424e-07, "loss": 0.3162, "step": 9734 }, { "epoch": 2.448440643863179, "grad_norm": 0.3104800879955292, "learning_rate": 9.948438244211784e-07, "loss": 0.3096, "step": 9735 }, { "epoch": 2.448692152917505, "grad_norm": 0.3143140971660614, "learning_rate": 9.939680629129828e-07, "loss": 0.318, "step": 9736 }, { "epoch": 2.448943661971831, "grad_norm": 0.2797510325908661, "learning_rate": 9.930926444963612e-07, "loss": 0.311, "step": 9737 }, { "epoch": 2.449195171026157, "grad_norm": 0.2918142080307007, "learning_rate": 9.922175692462887e-07, "loss": 0.3394, "step": 9738 }, { "epoch": 2.449446680080483, "grad_norm": 0.3032245337963104, "learning_rate": 9.913428372377077e-07, "loss": 0.3326, "step": 9739 }, { "epoch": 2.449698189134809, "grad_norm": 0.2985784411430359, "learning_rate": 9.904684485455358e-07, "loss": 0.3224, "step": 9740 }, { "epoch": 2.4499496981891347, "grad_norm": 0.30142176151275635, "learning_rate": 9.895944032446563e-07, "loss": 0.335, "step": 9741 }, { "epoch": 2.450201207243461, "grad_norm": 0.3030637204647064, "learning_rate": 9.887207014099288e-07, "loss": 0.3076, "step": 9742 }, { "epoch": 2.4504527162977867, "grad_norm": 0.2667844593524933, "learning_rate": 9.878473431161767e-07, "loss": 0.3327, "step": 9743 }, { "epoch": 2.4507042253521125, "grad_norm": 0.3067658245563507, "learning_rate": 9.869743284382016e-07, "loss": 0.3115, "step": 9744 }, { "epoch": 2.4509557344064388, "grad_norm": 0.2902940511703491, "learning_rate": 9.861016574507686e-07, "loss": 0.3084, "step": 9745 }, { "epoch": 2.4512072434607646, "grad_norm": 0.28992483019828796, "learning_rate": 9.852293302286186e-07, "loss": 0.3156, "step": 9746 }, { "epoch": 2.4514587525150904, "grad_norm": 0.3165602385997772, "learning_rate": 9.843573468464596e-07, "loss": 0.3362, "step": 9747 }, { "epoch": 2.4517102615694166, "grad_norm": 0.2829355299472809, "learning_rate": 9.834857073789716e-07, "loss": 0.3233, "step": 9748 }, { "epoch": 2.4519617706237424, "grad_norm": 0.3075374960899353, "learning_rate": 9.826144119008068e-07, "loss": 0.3281, "step": 9749 }, { "epoch": 2.4522132796780682, "grad_norm": 0.32207030057907104, "learning_rate": 9.817434604865833e-07, "loss": 0.332, "step": 9750 }, { "epoch": 2.4524647887323945, "grad_norm": 0.29507938027381897, "learning_rate": 9.808728532108963e-07, "loss": 0.3202, "step": 9751 }, { "epoch": 2.4527162977867203, "grad_norm": 0.29577136039733887, "learning_rate": 9.80002590148304e-07, "loss": 0.349, "step": 9752 }, { "epoch": 2.452967806841046, "grad_norm": 0.2732732594013214, "learning_rate": 9.791326713733424e-07, "loss": 0.3364, "step": 9753 }, { "epoch": 2.4532193158953723, "grad_norm": 0.2873142957687378, "learning_rate": 9.782630969605116e-07, "loss": 0.3449, "step": 9754 }, { "epoch": 2.453470824949698, "grad_norm": 0.28171807527542114, "learning_rate": 9.77393866984288e-07, "loss": 0.3179, "step": 9755 }, { "epoch": 2.453722334004024, "grad_norm": 0.28104138374328613, "learning_rate": 9.76524981519113e-07, "loss": 0.3137, "step": 9756 }, { "epoch": 2.45397384305835, "grad_norm": 0.3065677881240845, "learning_rate": 9.756564406394042e-07, "loss": 0.3079, "step": 9757 }, { "epoch": 2.454225352112676, "grad_norm": 0.2809961438179016, "learning_rate": 9.747882444195434e-07, "loss": 0.3275, "step": 9758 }, { "epoch": 2.4544768611670023, "grad_norm": 0.2860454022884369, "learning_rate": 9.739203929338892e-07, "loss": 0.3225, "step": 9759 }, { "epoch": 2.454728370221328, "grad_norm": 0.29092496633529663, "learning_rate": 9.730528862567645e-07, "loss": 0.3164, "step": 9760 }, { "epoch": 2.454979879275654, "grad_norm": 0.31237539649009705, "learning_rate": 9.721857244624695e-07, "loss": 0.3227, "step": 9761 }, { "epoch": 2.45523138832998, "grad_norm": 0.28859943151474, "learning_rate": 9.713189076252676e-07, "loss": 0.3339, "step": 9762 }, { "epoch": 2.455482897384306, "grad_norm": 0.2893824875354767, "learning_rate": 9.704524358193996e-07, "loss": 0.3166, "step": 9763 }, { "epoch": 2.4557344064386317, "grad_norm": 0.2801450788974762, "learning_rate": 9.6958630911907e-07, "loss": 0.3077, "step": 9764 }, { "epoch": 2.455985915492958, "grad_norm": 0.2814953029155731, "learning_rate": 9.68720527598459e-07, "loss": 0.3253, "step": 9765 }, { "epoch": 2.4562374245472838, "grad_norm": 0.2909027338027954, "learning_rate": 9.678550913317169e-07, "loss": 0.3334, "step": 9766 }, { "epoch": 2.4564889336016096, "grad_norm": 0.3029521703720093, "learning_rate": 9.669900003929595e-07, "loss": 0.321, "step": 9767 }, { "epoch": 2.456740442655936, "grad_norm": 0.2950665056705475, "learning_rate": 9.661252548562794e-07, "loss": 0.3358, "step": 9768 }, { "epoch": 2.4569919517102616, "grad_norm": 0.2943587005138397, "learning_rate": 9.652608547957343e-07, "loss": 0.3022, "step": 9769 }, { "epoch": 2.4572434607645874, "grad_norm": 0.2907446622848511, "learning_rate": 9.643968002853566e-07, "loss": 0.3282, "step": 9770 }, { "epoch": 2.4574949698189137, "grad_norm": 0.2860673666000366, "learning_rate": 9.635330913991453e-07, "loss": 0.3258, "step": 9771 }, { "epoch": 2.4577464788732395, "grad_norm": 0.2849615514278412, "learning_rate": 9.626697282110743e-07, "loss": 0.3418, "step": 9772 }, { "epoch": 2.4579979879275653, "grad_norm": 0.2882292866706848, "learning_rate": 9.61806710795082e-07, "loss": 0.3055, "step": 9773 }, { "epoch": 2.4582494969818915, "grad_norm": 0.28438448905944824, "learning_rate": 9.609440392250829e-07, "loss": 0.3548, "step": 9774 }, { "epoch": 2.4585010060362174, "grad_norm": 0.28415539860725403, "learning_rate": 9.600817135749579e-07, "loss": 0.3328, "step": 9775 }, { "epoch": 2.458752515090543, "grad_norm": 0.2894395887851715, "learning_rate": 9.592197339185617e-07, "loss": 0.3477, "step": 9776 }, { "epoch": 2.4590040241448694, "grad_norm": 0.2790740728378296, "learning_rate": 9.583581003297148e-07, "loss": 0.326, "step": 9777 }, { "epoch": 2.459255533199195, "grad_norm": 0.29626601934432983, "learning_rate": 9.574968128822138e-07, "loss": 0.3037, "step": 9778 }, { "epoch": 2.459507042253521, "grad_norm": 0.29274749755859375, "learning_rate": 9.566358716498192e-07, "loss": 0.3609, "step": 9779 }, { "epoch": 2.4597585513078473, "grad_norm": 0.2905212640762329, "learning_rate": 9.557752767062683e-07, "loss": 0.3246, "step": 9780 }, { "epoch": 2.460010060362173, "grad_norm": 0.2953580915927887, "learning_rate": 9.549150281252633e-07, "loss": 0.3493, "step": 9781 }, { "epoch": 2.460261569416499, "grad_norm": 0.2971348464488983, "learning_rate": 9.540551259804814e-07, "loss": 0.3454, "step": 9782 }, { "epoch": 2.460513078470825, "grad_norm": 0.2992364168167114, "learning_rate": 9.531955703455654e-07, "loss": 0.3294, "step": 9783 }, { "epoch": 2.460764587525151, "grad_norm": 0.3017844259738922, "learning_rate": 9.523363612941333e-07, "loss": 0.3127, "step": 9784 }, { "epoch": 2.4610160965794767, "grad_norm": 0.2864784300327301, "learning_rate": 9.514774988997683e-07, "loss": 0.3399, "step": 9785 }, { "epoch": 2.461267605633803, "grad_norm": 0.29324662685394287, "learning_rate": 9.506189832360296e-07, "loss": 0.3104, "step": 9786 }, { "epoch": 2.461519114688129, "grad_norm": 0.30676597356796265, "learning_rate": 9.497608143764403e-07, "loss": 0.298, "step": 9787 }, { "epoch": 2.4617706237424546, "grad_norm": 0.2943531274795532, "learning_rate": 9.489029923945009e-07, "loss": 0.3265, "step": 9788 }, { "epoch": 2.462022132796781, "grad_norm": 0.30594533681869507, "learning_rate": 9.480455173636754e-07, "loss": 0.3198, "step": 9789 }, { "epoch": 2.4622736418511066, "grad_norm": 0.2777751684188843, "learning_rate": 9.471883893574019e-07, "loss": 0.3289, "step": 9790 }, { "epoch": 2.4625251509054324, "grad_norm": 0.3079993426799774, "learning_rate": 9.463316084490903e-07, "loss": 0.3278, "step": 9791 }, { "epoch": 2.4627766599597587, "grad_norm": 0.29290157556533813, "learning_rate": 9.454751747121149e-07, "loss": 0.3278, "step": 9792 }, { "epoch": 2.4630281690140845, "grad_norm": 0.2852899432182312, "learning_rate": 9.446190882198275e-07, "loss": 0.3066, "step": 9793 }, { "epoch": 2.4632796780684103, "grad_norm": 0.2716057300567627, "learning_rate": 9.437633490455434e-07, "loss": 0.3427, "step": 9794 }, { "epoch": 2.4635311871227366, "grad_norm": 0.2801603674888611, "learning_rate": 9.429079572625543e-07, "loss": 0.331, "step": 9795 }, { "epoch": 2.4637826961770624, "grad_norm": 0.2798592150211334, "learning_rate": 9.420529129441159e-07, "loss": 0.3137, "step": 9796 }, { "epoch": 2.464034205231388, "grad_norm": 0.2886095345020294, "learning_rate": 9.411982161634603e-07, "loss": 0.2978, "step": 9797 }, { "epoch": 2.4642857142857144, "grad_norm": 0.31059303879737854, "learning_rate": 9.403438669937848e-07, "loss": 0.3069, "step": 9798 }, { "epoch": 2.46453722334004, "grad_norm": 0.2892918884754181, "learning_rate": 9.394898655082607e-07, "loss": 0.3412, "step": 9799 }, { "epoch": 2.464788732394366, "grad_norm": 0.29814252257347107, "learning_rate": 9.386362117800262e-07, "loss": 0.313, "step": 9800 }, { "epoch": 2.4650402414486923, "grad_norm": 0.30424758791923523, "learning_rate": 9.377829058821924e-07, "loss": 0.3106, "step": 9801 }, { "epoch": 2.465291750503018, "grad_norm": 0.3049749732017517, "learning_rate": 9.369299478878408e-07, "loss": 0.3122, "step": 9802 }, { "epoch": 2.465543259557344, "grad_norm": 0.2787761092185974, "learning_rate": 9.360773378700194e-07, "loss": 0.319, "step": 9803 }, { "epoch": 2.46579476861167, "grad_norm": 0.3029969334602356, "learning_rate": 9.352250759017517e-07, "loss": 0.3046, "step": 9804 }, { "epoch": 2.466046277665996, "grad_norm": 0.2986536920070648, "learning_rate": 9.343731620560254e-07, "loss": 0.3045, "step": 9805 }, { "epoch": 2.4662977867203217, "grad_norm": 0.3007206618785858, "learning_rate": 9.335215964058047e-07, "loss": 0.3246, "step": 9806 }, { "epoch": 2.466549295774648, "grad_norm": 0.31535202264785767, "learning_rate": 9.326703790240183e-07, "loss": 0.3106, "step": 9807 }, { "epoch": 2.466800804828974, "grad_norm": 0.29556044936180115, "learning_rate": 9.318195099835697e-07, "loss": 0.3332, "step": 9808 }, { "epoch": 2.4670523138832996, "grad_norm": 0.2860323190689087, "learning_rate": 9.309689893573287e-07, "loss": 0.3225, "step": 9809 }, { "epoch": 2.467303822937626, "grad_norm": 0.3032556176185608, "learning_rate": 9.301188172181391e-07, "loss": 0.2963, "step": 9810 }, { "epoch": 2.4675553319919517, "grad_norm": 0.29713839292526245, "learning_rate": 9.292689936388106e-07, "loss": 0.327, "step": 9811 }, { "epoch": 2.4678068410462775, "grad_norm": 0.2727225124835968, "learning_rate": 9.284195186921263e-07, "loss": 0.3128, "step": 9812 }, { "epoch": 2.4680583501006037, "grad_norm": 0.2951027452945709, "learning_rate": 9.2757039245084e-07, "loss": 0.3165, "step": 9813 }, { "epoch": 2.4683098591549295, "grad_norm": 0.31366926431655884, "learning_rate": 9.267216149876712e-07, "loss": 0.3337, "step": 9814 }, { "epoch": 2.4685613682092553, "grad_norm": 0.30813443660736084, "learning_rate": 9.258731863753145e-07, "loss": 0.3278, "step": 9815 }, { "epoch": 2.4688128772635816, "grad_norm": 0.30502599477767944, "learning_rate": 9.250251066864308e-07, "loss": 0.3154, "step": 9816 }, { "epoch": 2.4690643863179074, "grad_norm": 0.2972979247570038, "learning_rate": 9.241773759936551e-07, "loss": 0.3481, "step": 9817 }, { "epoch": 2.469315895372233, "grad_norm": 0.32507577538490295, "learning_rate": 9.233299943695878e-07, "loss": 0.3105, "step": 9818 }, { "epoch": 2.4695674044265594, "grad_norm": 0.30700263381004333, "learning_rate": 9.224829618868037e-07, "loss": 0.3226, "step": 9819 }, { "epoch": 2.4698189134808852, "grad_norm": 0.3043920397758484, "learning_rate": 9.21636278617844e-07, "loss": 0.3128, "step": 9820 }, { "epoch": 2.470070422535211, "grad_norm": 0.2863234579563141, "learning_rate": 9.207899446352242e-07, "loss": 0.3091, "step": 9821 }, { "epoch": 2.4703219315895373, "grad_norm": 0.3094150722026825, "learning_rate": 9.199439600114247e-07, "loss": 0.3213, "step": 9822 }, { "epoch": 2.470573440643863, "grad_norm": 0.2926952838897705, "learning_rate": 9.190983248189006e-07, "loss": 0.3025, "step": 9823 }, { "epoch": 2.470824949698189, "grad_norm": 0.3109200596809387, "learning_rate": 9.18253039130076e-07, "loss": 0.3124, "step": 9824 }, { "epoch": 2.471076458752515, "grad_norm": 0.32803231477737427, "learning_rate": 9.174081030173421e-07, "loss": 0.2898, "step": 9825 }, { "epoch": 2.471327967806841, "grad_norm": 0.30386537313461304, "learning_rate": 9.165635165530645e-07, "loss": 0.3086, "step": 9826 }, { "epoch": 2.471579476861167, "grad_norm": 0.27869728207588196, "learning_rate": 9.157192798095748e-07, "loss": 0.341, "step": 9827 }, { "epoch": 2.471830985915493, "grad_norm": 0.2737056612968445, "learning_rate": 9.148753928591791e-07, "loss": 0.318, "step": 9828 }, { "epoch": 2.472082494969819, "grad_norm": 0.3147308826446533, "learning_rate": 9.140318557741479e-07, "loss": 0.309, "step": 9829 }, { "epoch": 2.472334004024145, "grad_norm": 0.324812650680542, "learning_rate": 9.131886686267277e-07, "loss": 0.3185, "step": 9830 }, { "epoch": 2.472585513078471, "grad_norm": 0.29894959926605225, "learning_rate": 9.123458314891304e-07, "loss": 0.3313, "step": 9831 }, { "epoch": 2.4728370221327967, "grad_norm": 0.278921514749527, "learning_rate": 9.115033444335408e-07, "loss": 0.3172, "step": 9832 }, { "epoch": 2.473088531187123, "grad_norm": 0.27208349108695984, "learning_rate": 9.106612075321114e-07, "loss": 0.3138, "step": 9833 }, { "epoch": 2.4733400402414487, "grad_norm": 0.27421247959136963, "learning_rate": 9.098194208569666e-07, "loss": 0.3112, "step": 9834 }, { "epoch": 2.4735915492957745, "grad_norm": 0.3077579736709595, "learning_rate": 9.089779844802016e-07, "loss": 0.3335, "step": 9835 }, { "epoch": 2.4738430583501008, "grad_norm": 0.2903458774089813, "learning_rate": 9.081368984738781e-07, "loss": 0.3324, "step": 9836 }, { "epoch": 2.4740945674044266, "grad_norm": 0.2951434254646301, "learning_rate": 9.072961629100313e-07, "loss": 0.3341, "step": 9837 }, { "epoch": 2.4743460764587524, "grad_norm": 0.28142881393432617, "learning_rate": 9.064557778606631e-07, "loss": 0.3054, "step": 9838 }, { "epoch": 2.4745975855130786, "grad_norm": 0.2758212387561798, "learning_rate": 9.056157433977497e-07, "loss": 0.3227, "step": 9839 }, { "epoch": 2.4748490945674044, "grad_norm": 0.2981395125389099, "learning_rate": 9.04776059593232e-07, "loss": 0.3408, "step": 9840 }, { "epoch": 2.4751006036217302, "grad_norm": 0.2696145176887512, "learning_rate": 9.039367265190268e-07, "loss": 0.3244, "step": 9841 }, { "epoch": 2.4753521126760565, "grad_norm": 0.2982428967952728, "learning_rate": 9.030977442470146e-07, "loss": 0.3094, "step": 9842 }, { "epoch": 2.4756036217303823, "grad_norm": 0.2985045611858368, "learning_rate": 9.022591128490521e-07, "loss": 0.3323, "step": 9843 }, { "epoch": 2.475855130784708, "grad_norm": 0.2910267114639282, "learning_rate": 9.014208323969598e-07, "loss": 0.3386, "step": 9844 }, { "epoch": 2.4761066398390343, "grad_norm": 0.290507972240448, "learning_rate": 9.005829029625324e-07, "loss": 0.3003, "step": 9845 }, { "epoch": 2.47635814889336, "grad_norm": 0.2682448923587799, "learning_rate": 8.997453246175347e-07, "loss": 0.3272, "step": 9846 }, { "epoch": 2.476609657947686, "grad_norm": 0.2868577241897583, "learning_rate": 8.989080974336972e-07, "loss": 0.3217, "step": 9847 }, { "epoch": 2.476861167002012, "grad_norm": 0.28790926933288574, "learning_rate": 8.98071221482727e-07, "loss": 0.3042, "step": 9848 }, { "epoch": 2.477112676056338, "grad_norm": 0.29729732871055603, "learning_rate": 8.972346968362932e-07, "loss": 0.304, "step": 9849 }, { "epoch": 2.477364185110664, "grad_norm": 0.28235840797424316, "learning_rate": 8.963985235660422e-07, "loss": 0.325, "step": 9850 }, { "epoch": 2.47761569416499, "grad_norm": 0.2943761646747589, "learning_rate": 8.955627017435841e-07, "loss": 0.3345, "step": 9851 }, { "epoch": 2.477867203219316, "grad_norm": 0.29644346237182617, "learning_rate": 8.947272314405048e-07, "loss": 0.326, "step": 9852 }, { "epoch": 2.4781187122736417, "grad_norm": 0.29010266065597534, "learning_rate": 8.938921127283545e-07, "loss": 0.337, "step": 9853 }, { "epoch": 2.478370221327968, "grad_norm": 0.31179898977279663, "learning_rate": 8.930573456786584e-07, "loss": 0.3329, "step": 9854 }, { "epoch": 2.4786217303822937, "grad_norm": 0.30490022897720337, "learning_rate": 8.922229303629059e-07, "loss": 0.3509, "step": 9855 }, { "epoch": 2.4788732394366195, "grad_norm": 0.2934214472770691, "learning_rate": 8.913888668525616e-07, "loss": 0.3173, "step": 9856 }, { "epoch": 2.4791247484909458, "grad_norm": 0.29519298672676086, "learning_rate": 8.905551552190589e-07, "loss": 0.3348, "step": 9857 }, { "epoch": 2.4793762575452716, "grad_norm": 0.2930413484573364, "learning_rate": 8.897217955337967e-07, "loss": 0.3564, "step": 9858 }, { "epoch": 2.479627766599598, "grad_norm": 0.2853700816631317, "learning_rate": 8.888887878681507e-07, "loss": 0.3438, "step": 9859 }, { "epoch": 2.4798792756539236, "grad_norm": 0.30569860339164734, "learning_rate": 8.880561322934595e-07, "loss": 0.3153, "step": 9860 }, { "epoch": 2.4801307847082494, "grad_norm": 0.30227360129356384, "learning_rate": 8.872238288810375e-07, "loss": 0.3207, "step": 9861 }, { "epoch": 2.4803822937625757, "grad_norm": 0.2963835895061493, "learning_rate": 8.86391877702164e-07, "loss": 0.3277, "step": 9862 }, { "epoch": 2.4806338028169015, "grad_norm": 0.29063740372657776, "learning_rate": 8.855602788280926e-07, "loss": 0.3126, "step": 9863 }, { "epoch": 2.4808853118712273, "grad_norm": 0.2818237245082855, "learning_rate": 8.847290323300423e-07, "loss": 0.3289, "step": 9864 }, { "epoch": 2.4811368209255535, "grad_norm": 0.2938326299190521, "learning_rate": 8.838981382792067e-07, "loss": 0.3429, "step": 9865 }, { "epoch": 2.4813883299798793, "grad_norm": 0.30507832765579224, "learning_rate": 8.830675967467439e-07, "loss": 0.3219, "step": 9866 }, { "epoch": 2.481639839034205, "grad_norm": 0.2869739234447479, "learning_rate": 8.822374078037859e-07, "loss": 0.305, "step": 9867 }, { "epoch": 2.4818913480885314, "grad_norm": 0.3017198145389557, "learning_rate": 8.814075715214348e-07, "loss": 0.3455, "step": 9868 }, { "epoch": 2.482142857142857, "grad_norm": 0.2792239189147949, "learning_rate": 8.805780879707582e-07, "loss": 0.3077, "step": 9869 }, { "epoch": 2.482394366197183, "grad_norm": 0.2923182249069214, "learning_rate": 8.797489572227985e-07, "loss": 0.3207, "step": 9870 }, { "epoch": 2.4826458752515093, "grad_norm": 0.3037014901638031, "learning_rate": 8.78920179348563e-07, "loss": 0.3091, "step": 9871 }, { "epoch": 2.482897384305835, "grad_norm": 0.312212735414505, "learning_rate": 8.780917544190337e-07, "loss": 0.3114, "step": 9872 }, { "epoch": 2.483148893360161, "grad_norm": 0.29248368740081787, "learning_rate": 8.772636825051584e-07, "loss": 0.3165, "step": 9873 }, { "epoch": 2.483400402414487, "grad_norm": 0.291723370552063, "learning_rate": 8.76435963677858e-07, "loss": 0.3269, "step": 9874 }, { "epoch": 2.483651911468813, "grad_norm": 0.26432013511657715, "learning_rate": 8.756085980080193e-07, "loss": 0.3193, "step": 9875 }, { "epoch": 2.4839034205231387, "grad_norm": 0.2941466271877289, "learning_rate": 8.747815855665026e-07, "loss": 0.3385, "step": 9876 }, { "epoch": 2.484154929577465, "grad_norm": 0.2962441146373749, "learning_rate": 8.739549264241353e-07, "loss": 0.3287, "step": 9877 }, { "epoch": 2.484406438631791, "grad_norm": 0.29611530900001526, "learning_rate": 8.731286206517158e-07, "loss": 0.3197, "step": 9878 }, { "epoch": 2.4846579476861166, "grad_norm": 0.2853570878505707, "learning_rate": 8.723026683200136e-07, "loss": 0.3227, "step": 9879 }, { "epoch": 2.484909456740443, "grad_norm": 0.3189356029033661, "learning_rate": 8.714770694997637e-07, "loss": 0.351, "step": 9880 }, { "epoch": 2.4851609657947686, "grad_norm": 0.2843788266181946, "learning_rate": 8.706518242616762e-07, "loss": 0.357, "step": 9881 }, { "epoch": 2.4854124748490944, "grad_norm": 0.2974276542663574, "learning_rate": 8.698269326764258e-07, "loss": 0.3195, "step": 9882 }, { "epoch": 2.4856639839034207, "grad_norm": 0.28935566544532776, "learning_rate": 8.690023948146614e-07, "loss": 0.3518, "step": 9883 }, { "epoch": 2.4859154929577465, "grad_norm": 0.29735007882118225, "learning_rate": 8.681782107469971e-07, "loss": 0.3126, "step": 9884 }, { "epoch": 2.4861670020120723, "grad_norm": 0.29163575172424316, "learning_rate": 8.673543805440216e-07, "loss": 0.3077, "step": 9885 }, { "epoch": 2.4864185110663986, "grad_norm": 0.29763737320899963, "learning_rate": 8.665309042762888e-07, "loss": 0.3097, "step": 9886 }, { "epoch": 2.4866700201207244, "grad_norm": 0.3058236539363861, "learning_rate": 8.657077820143262e-07, "loss": 0.296, "step": 9887 }, { "epoch": 2.48692152917505, "grad_norm": 0.2766462564468384, "learning_rate": 8.648850138286263e-07, "loss": 0.3218, "step": 9888 }, { "epoch": 2.4871730382293764, "grad_norm": 0.3000984489917755, "learning_rate": 8.640625997896579e-07, "loss": 0.3407, "step": 9889 }, { "epoch": 2.487424547283702, "grad_norm": 0.3003200888633728, "learning_rate": 8.632405399678518e-07, "loss": 0.3151, "step": 9890 }, { "epoch": 2.487676056338028, "grad_norm": 0.288412481546402, "learning_rate": 8.624188344336148e-07, "loss": 0.3449, "step": 9891 }, { "epoch": 2.4879275653923543, "grad_norm": 0.3167831599712372, "learning_rate": 8.615974832573193e-07, "loss": 0.3157, "step": 9892 }, { "epoch": 2.48817907444668, "grad_norm": 0.27706781029701233, "learning_rate": 8.607764865093104e-07, "loss": 0.3028, "step": 9893 }, { "epoch": 2.488430583501006, "grad_norm": 0.2621653079986572, "learning_rate": 8.599558442598998e-07, "loss": 0.3303, "step": 9894 }, { "epoch": 2.488682092555332, "grad_norm": 0.29237544536590576, "learning_rate": 8.591355565793724e-07, "loss": 0.3163, "step": 9895 }, { "epoch": 2.488933601609658, "grad_norm": 0.31671497225761414, "learning_rate": 8.583156235379774e-07, "loss": 0.3258, "step": 9896 }, { "epoch": 2.4891851106639837, "grad_norm": 0.3017057478427887, "learning_rate": 8.574960452059411e-07, "loss": 0.3216, "step": 9897 }, { "epoch": 2.48943661971831, "grad_norm": 0.2894316017627716, "learning_rate": 8.566768216534516e-07, "loss": 0.3205, "step": 9898 }, { "epoch": 2.489688128772636, "grad_norm": 0.28005433082580566, "learning_rate": 8.558579529506728e-07, "loss": 0.3178, "step": 9899 }, { "epoch": 2.4899396378269616, "grad_norm": 0.3050450086593628, "learning_rate": 8.55039439167733e-07, "loss": 0.3008, "step": 9900 }, { "epoch": 2.490191146881288, "grad_norm": 0.2795218527317047, "learning_rate": 8.542212803747363e-07, "loss": 0.3197, "step": 9901 }, { "epoch": 2.4904426559356136, "grad_norm": 0.2750023901462555, "learning_rate": 8.534034766417498e-07, "loss": 0.3155, "step": 9902 }, { "epoch": 2.4906941649899395, "grad_norm": 0.30676746368408203, "learning_rate": 8.525860280388154e-07, "loss": 0.31, "step": 9903 }, { "epoch": 2.4909456740442657, "grad_norm": 0.28755173087120056, "learning_rate": 8.517689346359409e-07, "loss": 0.3182, "step": 9904 }, { "epoch": 2.4911971830985915, "grad_norm": 0.30206596851348877, "learning_rate": 8.509521965031064e-07, "loss": 0.299, "step": 9905 }, { "epoch": 2.4914486921529173, "grad_norm": 0.28528329730033875, "learning_rate": 8.501358137102589e-07, "loss": 0.3261, "step": 9906 }, { "epoch": 2.4917002012072436, "grad_norm": 0.27517518401145935, "learning_rate": 8.493197863273189e-07, "loss": 0.3225, "step": 9907 }, { "epoch": 2.4919517102615694, "grad_norm": 0.29692593216896057, "learning_rate": 8.485041144241712e-07, "loss": 0.3461, "step": 9908 }, { "epoch": 2.492203219315895, "grad_norm": 0.288084477186203, "learning_rate": 8.476887980706761e-07, "loss": 0.3184, "step": 9909 }, { "epoch": 2.4924547283702214, "grad_norm": 0.2816907465457916, "learning_rate": 8.468738373366569e-07, "loss": 0.3151, "step": 9910 }, { "epoch": 2.4927062374245472, "grad_norm": 0.3093920946121216, "learning_rate": 8.46059232291912e-07, "loss": 0.3172, "step": 9911 }, { "epoch": 2.492957746478873, "grad_norm": 0.30304044485092163, "learning_rate": 8.452449830062082e-07, "loss": 0.3061, "step": 9912 }, { "epoch": 2.4932092555331993, "grad_norm": 0.2883797287940979, "learning_rate": 8.44431089549278e-07, "loss": 0.3292, "step": 9913 }, { "epoch": 2.493460764587525, "grad_norm": 0.2927064597606659, "learning_rate": 8.436175519908291e-07, "loss": 0.3416, "step": 9914 }, { "epoch": 2.493712273641851, "grad_norm": 0.29678410291671753, "learning_rate": 8.428043704005334e-07, "loss": 0.3239, "step": 9915 }, { "epoch": 2.493963782696177, "grad_norm": 0.299121230840683, "learning_rate": 8.419915448480376e-07, "loss": 0.3275, "step": 9916 }, { "epoch": 2.494215291750503, "grad_norm": 0.2699803411960602, "learning_rate": 8.41179075402952e-07, "loss": 0.3286, "step": 9917 }, { "epoch": 2.4944668008048287, "grad_norm": 0.29386457800865173, "learning_rate": 8.40366962134862e-07, "loss": 0.3343, "step": 9918 }, { "epoch": 2.494718309859155, "grad_norm": 0.29827818274497986, "learning_rate": 8.395552051133182e-07, "loss": 0.3185, "step": 9919 }, { "epoch": 2.494969818913481, "grad_norm": 0.3134204149246216, "learning_rate": 8.387438044078439e-07, "loss": 0.3283, "step": 9920 }, { "epoch": 2.4952213279678066, "grad_norm": 0.2914014458656311, "learning_rate": 8.379327600879306e-07, "loss": 0.3361, "step": 9921 }, { "epoch": 2.495472837022133, "grad_norm": 0.29312896728515625, "learning_rate": 8.371220722230378e-07, "loss": 0.3204, "step": 9922 }, { "epoch": 2.4957243460764587, "grad_norm": 0.29510068893432617, "learning_rate": 8.363117408825972e-07, "loss": 0.3093, "step": 9923 }, { "epoch": 2.4959758551307845, "grad_norm": 0.29542407393455505, "learning_rate": 8.355017661360077e-07, "loss": 0.3169, "step": 9924 }, { "epoch": 2.4962273641851107, "grad_norm": 0.3483355939388275, "learning_rate": 8.346921480526393e-07, "loss": 0.3323, "step": 9925 }, { "epoch": 2.4964788732394365, "grad_norm": 0.302891343832016, "learning_rate": 8.338828867018295e-07, "loss": 0.3261, "step": 9926 }, { "epoch": 2.4967303822937628, "grad_norm": 0.28868502378463745, "learning_rate": 8.330739821528888e-07, "loss": 0.3307, "step": 9927 }, { "epoch": 2.4969818913480886, "grad_norm": 0.2844146192073822, "learning_rate": 8.322654344750914e-07, "loss": 0.3226, "step": 9928 }, { "epoch": 2.4972334004024144, "grad_norm": 0.30544909834861755, "learning_rate": 8.314572437376883e-07, "loss": 0.3061, "step": 9929 }, { "epoch": 2.4974849094567406, "grad_norm": 0.2997104525566101, "learning_rate": 8.306494100098922e-07, "loss": 0.3298, "step": 9930 }, { "epoch": 2.4977364185110664, "grad_norm": 0.311212956905365, "learning_rate": 8.298419333608909e-07, "loss": 0.3136, "step": 9931 }, { "epoch": 2.4979879275653922, "grad_norm": 0.3107351064682007, "learning_rate": 8.290348138598408e-07, "loss": 0.3482, "step": 9932 }, { "epoch": 2.4982394366197185, "grad_norm": 0.3184583783149719, "learning_rate": 8.282280515758639e-07, "loss": 0.321, "step": 9933 }, { "epoch": 2.4984909456740443, "grad_norm": 0.3004460036754608, "learning_rate": 8.274216465780577e-07, "loss": 0.3046, "step": 9934 }, { "epoch": 2.49874245472837, "grad_norm": 0.26762136816978455, "learning_rate": 8.266155989354823e-07, "loss": 0.3174, "step": 9935 }, { "epoch": 2.4989939637826963, "grad_norm": 0.31421777606010437, "learning_rate": 8.258099087171734e-07, "loss": 0.2931, "step": 9936 }, { "epoch": 2.499245472837022, "grad_norm": 0.311367005109787, "learning_rate": 8.25004575992131e-07, "loss": 0.3506, "step": 9937 }, { "epoch": 2.499496981891348, "grad_norm": 0.28332027792930603, "learning_rate": 8.241996008293295e-07, "loss": 0.307, "step": 9938 }, { "epoch": 2.499748490945674, "grad_norm": 0.2985660433769226, "learning_rate": 8.233949832977067e-07, "loss": 0.3112, "step": 9939 }, { "epoch": 2.5, "grad_norm": 0.30060288310050964, "learning_rate": 8.225907234661767e-07, "loss": 0.344, "step": 9940 }, { "epoch": 2.500251509054326, "grad_norm": 0.2916584610939026, "learning_rate": 8.217868214036156e-07, "loss": 0.3283, "step": 9941 }, { "epoch": 2.500503018108652, "grad_norm": 0.3100978434085846, "learning_rate": 8.20983277178875e-07, "loss": 0.3318, "step": 9942 }, { "epoch": 2.500754527162978, "grad_norm": 0.27890169620513916, "learning_rate": 8.201800908607738e-07, "loss": 0.3292, "step": 9943 }, { "epoch": 2.5010060362173037, "grad_norm": 0.29491671919822693, "learning_rate": 8.193772625180974e-07, "loss": 0.3101, "step": 9944 }, { "epoch": 2.50125754527163, "grad_norm": 0.2769851088523865, "learning_rate": 8.185747922196063e-07, "loss": 0.3225, "step": 9945 }, { "epoch": 2.5015090543259557, "grad_norm": 0.2963421046733856, "learning_rate": 8.177726800340235e-07, "loss": 0.318, "step": 9946 }, { "epoch": 2.501760563380282, "grad_norm": 0.2658748924732208, "learning_rate": 8.169709260300485e-07, "loss": 0.319, "step": 9947 }, { "epoch": 2.5020120724346078, "grad_norm": 0.2909032106399536, "learning_rate": 8.161695302763434e-07, "loss": 0.3245, "step": 9948 }, { "epoch": 2.5022635814889336, "grad_norm": 0.28727254271507263, "learning_rate": 8.15368492841545e-07, "loss": 0.3029, "step": 9949 }, { "epoch": 2.50251509054326, "grad_norm": 0.29924967885017395, "learning_rate": 8.145678137942553e-07, "loss": 0.3283, "step": 9950 }, { "epoch": 2.5027665995975856, "grad_norm": 0.28090178966522217, "learning_rate": 8.137674932030499e-07, "loss": 0.3244, "step": 9951 }, { "epoch": 2.5030181086519114, "grad_norm": 0.29318609833717346, "learning_rate": 8.129675311364682e-07, "loss": 0.334, "step": 9952 }, { "epoch": 2.5032696177062377, "grad_norm": 0.28759893774986267, "learning_rate": 8.121679276630235e-07, "loss": 0.3267, "step": 9953 }, { "epoch": 2.5035211267605635, "grad_norm": 0.3086696267127991, "learning_rate": 8.113686828511974e-07, "loss": 0.3411, "step": 9954 }, { "epoch": 2.5037726358148893, "grad_norm": 0.298218697309494, "learning_rate": 8.105697967694393e-07, "loss": 0.3243, "step": 9955 }, { "epoch": 2.5040241448692155, "grad_norm": 0.31472504138946533, "learning_rate": 8.097712694861698e-07, "loss": 0.3349, "step": 9956 }, { "epoch": 2.5042756539235413, "grad_norm": 0.3033403754234314, "learning_rate": 8.08973101069776e-07, "loss": 0.3157, "step": 9957 }, { "epoch": 2.504527162977867, "grad_norm": 0.25785255432128906, "learning_rate": 8.081752915886182e-07, "loss": 0.3112, "step": 9958 }, { "epoch": 2.5047786720321934, "grad_norm": 0.29093462228775024, "learning_rate": 8.073778411110216e-07, "loss": 0.33, "step": 9959 }, { "epoch": 2.505030181086519, "grad_norm": 0.28467297554016113, "learning_rate": 8.065807497052852e-07, "loss": 0.3359, "step": 9960 }, { "epoch": 2.505281690140845, "grad_norm": 0.28814148902893066, "learning_rate": 8.057840174396725e-07, "loss": 0.3229, "step": 9961 }, { "epoch": 2.5055331991951713, "grad_norm": 0.301241934299469, "learning_rate": 8.049876443824212e-07, "loss": 0.3187, "step": 9962 }, { "epoch": 2.505784708249497, "grad_norm": 0.2975336015224457, "learning_rate": 8.041916306017322e-07, "loss": 0.3204, "step": 9963 }, { "epoch": 2.506036217303823, "grad_norm": 0.30471301078796387, "learning_rate": 8.033959761657817e-07, "loss": 0.3381, "step": 9964 }, { "epoch": 2.506287726358149, "grad_norm": 0.2744012176990509, "learning_rate": 8.026006811427134e-07, "loss": 0.3128, "step": 9965 }, { "epoch": 2.506539235412475, "grad_norm": 0.2855711877346039, "learning_rate": 8.018057456006362e-07, "loss": 0.32, "step": 9966 }, { "epoch": 2.5067907444668007, "grad_norm": 0.28882476687431335, "learning_rate": 8.010111696076344e-07, "loss": 0.3253, "step": 9967 }, { "epoch": 2.507042253521127, "grad_norm": 0.29218101501464844, "learning_rate": 8.002169532317566e-07, "loss": 0.3323, "step": 9968 }, { "epoch": 2.5072937625754528, "grad_norm": 0.3086097836494446, "learning_rate": 7.994230965410232e-07, "loss": 0.3173, "step": 9969 }, { "epoch": 2.5075452716297786, "grad_norm": 0.2888603210449219, "learning_rate": 7.986295996034221e-07, "loss": 0.3351, "step": 9970 }, { "epoch": 2.507796780684105, "grad_norm": 0.27783140540122986, "learning_rate": 7.978364624869134e-07, "loss": 0.3177, "step": 9971 }, { "epoch": 2.5080482897384306, "grad_norm": 0.2933598756790161, "learning_rate": 7.970436852594221e-07, "loss": 0.3139, "step": 9972 }, { "epoch": 2.5082997987927564, "grad_norm": 0.2918246388435364, "learning_rate": 7.962512679888462e-07, "loss": 0.3066, "step": 9973 }, { "epoch": 2.5085513078470827, "grad_norm": 0.28106623888015747, "learning_rate": 7.954592107430498e-07, "loss": 0.3278, "step": 9974 }, { "epoch": 2.5088028169014085, "grad_norm": 0.3085310459136963, "learning_rate": 7.946675135898679e-07, "loss": 0.2984, "step": 9975 }, { "epoch": 2.5090543259557343, "grad_norm": 0.2928948402404785, "learning_rate": 7.938761765971065e-07, "loss": 0.3113, "step": 9976 }, { "epoch": 2.5093058350100605, "grad_norm": 0.29120776057243347, "learning_rate": 7.930851998325362e-07, "loss": 0.3081, "step": 9977 }, { "epoch": 2.5095573440643864, "grad_norm": 0.289899080991745, "learning_rate": 7.922945833639012e-07, "loss": 0.3236, "step": 9978 }, { "epoch": 2.509808853118712, "grad_norm": 0.27034732699394226, "learning_rate": 7.915043272589106e-07, "loss": 0.3194, "step": 9979 }, { "epoch": 2.5100603621730384, "grad_norm": 0.2954181730747223, "learning_rate": 7.907144315852472e-07, "loss": 0.3179, "step": 9980 }, { "epoch": 2.510311871227364, "grad_norm": 0.2821013033390045, "learning_rate": 7.899248964105583e-07, "loss": 0.3298, "step": 9981 }, { "epoch": 2.51056338028169, "grad_norm": 0.2877882122993469, "learning_rate": 7.891357218024653e-07, "loss": 0.3265, "step": 9982 }, { "epoch": 2.5108148893360163, "grad_norm": 0.32784876227378845, "learning_rate": 7.883469078285533e-07, "loss": 0.3191, "step": 9983 }, { "epoch": 2.511066398390342, "grad_norm": 0.28723639249801636, "learning_rate": 7.875584545563819e-07, "loss": 0.346, "step": 9984 }, { "epoch": 2.511317907444668, "grad_norm": 0.2926671504974365, "learning_rate": 7.867703620534744e-07, "loss": 0.2905, "step": 9985 }, { "epoch": 2.511569416498994, "grad_norm": 0.2825871706008911, "learning_rate": 7.85982630387328e-07, "loss": 0.3087, "step": 9986 }, { "epoch": 2.51182092555332, "grad_norm": 0.28117263317108154, "learning_rate": 7.851952596254076e-07, "loss": 0.3221, "step": 9987 }, { "epoch": 2.5120724346076457, "grad_norm": 0.2999492883682251, "learning_rate": 7.844082498351441e-07, "loss": 0.3078, "step": 9988 }, { "epoch": 2.512323943661972, "grad_norm": 0.2990661859512329, "learning_rate": 7.836216010839426e-07, "loss": 0.3162, "step": 9989 }, { "epoch": 2.512575452716298, "grad_norm": 0.30873674154281616, "learning_rate": 7.82835313439172e-07, "loss": 0.312, "step": 9990 }, { "epoch": 2.5128269617706236, "grad_norm": 0.300627201795578, "learning_rate": 7.820493869681761e-07, "loss": 0.318, "step": 9991 }, { "epoch": 2.51307847082495, "grad_norm": 0.30525755882263184, "learning_rate": 7.812638217382612e-07, "loss": 0.3613, "step": 9992 }, { "epoch": 2.5133299798792756, "grad_norm": 0.30198240280151367, "learning_rate": 7.804786178167085e-07, "loss": 0.3029, "step": 9993 }, { "epoch": 2.5135814889336014, "grad_norm": 0.3166106343269348, "learning_rate": 7.796937752707639e-07, "loss": 0.3309, "step": 9994 }, { "epoch": 2.5138329979879277, "grad_norm": 0.3065146207809448, "learning_rate": 7.789092941676468e-07, "loss": 0.345, "step": 9995 }, { "epoch": 2.5140845070422535, "grad_norm": 0.3031245470046997, "learning_rate": 7.781251745745405e-07, "loss": 0.3172, "step": 9996 }, { "epoch": 2.5143360160965793, "grad_norm": 0.29413849115371704, "learning_rate": 7.773414165586007e-07, "loss": 0.322, "step": 9997 }, { "epoch": 2.5145875251509056, "grad_norm": 0.29748326539993286, "learning_rate": 7.765580201869527e-07, "loss": 0.2931, "step": 9998 }, { "epoch": 2.5148390342052314, "grad_norm": 0.3018585443496704, "learning_rate": 7.757749855266878e-07, "loss": 0.3339, "step": 9999 }, { "epoch": 2.515090543259557, "grad_norm": 0.30707988142967224, "learning_rate": 7.749923126448694e-07, "loss": 0.3281, "step": 10000 }, { "epoch": 2.5153420523138834, "grad_norm": 0.3032652735710144, "learning_rate": 7.742100016085269e-07, "loss": 0.3183, "step": 10001 }, { "epoch": 2.515593561368209, "grad_norm": 0.30448129773139954, "learning_rate": 7.734280524846627e-07, "loss": 0.298, "step": 10002 }, { "epoch": 2.515845070422535, "grad_norm": 0.3087362051010132, "learning_rate": 7.726464653402432e-07, "loss": 0.3155, "step": 10003 }, { "epoch": 2.5160965794768613, "grad_norm": 0.2881805896759033, "learning_rate": 7.718652402422088e-07, "loss": 0.3456, "step": 10004 }, { "epoch": 2.516348088531187, "grad_norm": 0.301011323928833, "learning_rate": 7.710843772574644e-07, "loss": 0.3288, "step": 10005 }, { "epoch": 2.516599597585513, "grad_norm": 0.31224364042282104, "learning_rate": 7.70303876452888e-07, "loss": 0.3128, "step": 10006 }, { "epoch": 2.516851106639839, "grad_norm": 0.2763179838657379, "learning_rate": 7.695237378953224e-07, "loss": 0.3003, "step": 10007 }, { "epoch": 2.517102615694165, "grad_norm": 0.3021027445793152, "learning_rate": 7.687439616515846e-07, "loss": 0.3198, "step": 10008 }, { "epoch": 2.5173541247484907, "grad_norm": 0.2829072177410126, "learning_rate": 7.67964547788454e-07, "loss": 0.2887, "step": 10009 }, { "epoch": 2.517605633802817, "grad_norm": 0.31430134177207947, "learning_rate": 7.67185496372686e-07, "loss": 0.3293, "step": 10010 }, { "epoch": 2.517857142857143, "grad_norm": 0.30119824409484863, "learning_rate": 7.664068074709985e-07, "loss": 0.3175, "step": 10011 }, { "epoch": 2.5181086519114686, "grad_norm": 0.2803148329257965, "learning_rate": 7.656284811500842e-07, "loss": 0.3335, "step": 10012 }, { "epoch": 2.518360160965795, "grad_norm": 0.28627124428749084, "learning_rate": 7.648505174765986e-07, "loss": 0.3127, "step": 10013 }, { "epoch": 2.5186116700201207, "grad_norm": 0.2865979075431824, "learning_rate": 7.640729165171723e-07, "loss": 0.2974, "step": 10014 }, { "epoch": 2.5188631790744465, "grad_norm": 0.2760888636112213, "learning_rate": 7.632956783383999e-07, "loss": 0.3136, "step": 10015 }, { "epoch": 2.5191146881287727, "grad_norm": 0.2837379276752472, "learning_rate": 7.625188030068492e-07, "loss": 0.324, "step": 10016 }, { "epoch": 2.5193661971830985, "grad_norm": 0.3180212676525116, "learning_rate": 7.617422905890521e-07, "loss": 0.3085, "step": 10017 }, { "epoch": 2.5196177062374243, "grad_norm": 0.29850107431411743, "learning_rate": 7.609661411515146e-07, "loss": 0.3149, "step": 10018 }, { "epoch": 2.5198692152917506, "grad_norm": 0.32031044363975525, "learning_rate": 7.601903547607064e-07, "loss": 0.3302, "step": 10019 }, { "epoch": 2.5201207243460764, "grad_norm": 0.30878177285194397, "learning_rate": 7.594149314830717e-07, "loss": 0.3003, "step": 10020 }, { "epoch": 2.520372233400402, "grad_norm": 0.2950962781906128, "learning_rate": 7.586398713850179e-07, "loss": 0.327, "step": 10021 }, { "epoch": 2.5206237424547284, "grad_norm": 0.29020336270332336, "learning_rate": 7.578651745329263e-07, "loss": 0.3268, "step": 10022 }, { "epoch": 2.5208752515090542, "grad_norm": 0.3015232980251312, "learning_rate": 7.570908409931427e-07, "loss": 0.3087, "step": 10023 }, { "epoch": 2.52112676056338, "grad_norm": 0.29202380776405334, "learning_rate": 7.563168708319857e-07, "loss": 0.3301, "step": 10024 }, { "epoch": 2.5213782696177063, "grad_norm": 0.2775900065898895, "learning_rate": 7.555432641157396e-07, "loss": 0.3149, "step": 10025 }, { "epoch": 2.521629778672032, "grad_norm": 0.2708669900894165, "learning_rate": 7.547700209106606e-07, "loss": 0.3238, "step": 10026 }, { "epoch": 2.521881287726358, "grad_norm": 0.30167317390441895, "learning_rate": 7.539971412829705e-07, "loss": 0.3001, "step": 10027 }, { "epoch": 2.522132796780684, "grad_norm": 0.3165789246559143, "learning_rate": 7.532246252988617e-07, "loss": 0.3382, "step": 10028 }, { "epoch": 2.52238430583501, "grad_norm": 0.29492056369781494, "learning_rate": 7.524524730244975e-07, "loss": 0.3383, "step": 10029 }, { "epoch": 2.5226358148893357, "grad_norm": 0.2822633981704712, "learning_rate": 7.516806845260055e-07, "loss": 0.3003, "step": 10030 }, { "epoch": 2.522887323943662, "grad_norm": 0.3063894212245941, "learning_rate": 7.509092598694861e-07, "loss": 0.3138, "step": 10031 }, { "epoch": 2.523138832997988, "grad_norm": 0.32523077726364136, "learning_rate": 7.501381991210061e-07, "loss": 0.3164, "step": 10032 }, { "epoch": 2.5233903420523136, "grad_norm": 0.28941982984542847, "learning_rate": 7.493675023466024e-07, "loss": 0.3347, "step": 10033 }, { "epoch": 2.52364185110664, "grad_norm": 0.29666292667388916, "learning_rate": 7.485971696122796e-07, "loss": 0.324, "step": 10034 }, { "epoch": 2.5238933601609657, "grad_norm": 0.3100225031375885, "learning_rate": 7.478272009840137e-07, "loss": 0.3254, "step": 10035 }, { "epoch": 2.524144869215292, "grad_norm": 0.28848564624786377, "learning_rate": 7.470575965277455e-07, "loss": 0.3452, "step": 10036 }, { "epoch": 2.5243963782696177, "grad_norm": 0.2774829864501953, "learning_rate": 7.462883563093887e-07, "loss": 0.3218, "step": 10037 }, { "epoch": 2.5246478873239435, "grad_norm": 0.3099637031555176, "learning_rate": 7.455194803948218e-07, "loss": 0.2875, "step": 10038 }, { "epoch": 2.5248993963782698, "grad_norm": 0.28145939111709595, "learning_rate": 7.447509688498971e-07, "loss": 0.3332, "step": 10039 }, { "epoch": 2.5251509054325956, "grad_norm": 0.296622633934021, "learning_rate": 7.439828217404293e-07, "loss": 0.3478, "step": 10040 }, { "epoch": 2.5254024144869214, "grad_norm": 0.27812138199806213, "learning_rate": 7.432150391322079e-07, "loss": 0.3302, "step": 10041 }, { "epoch": 2.5256539235412476, "grad_norm": 0.26120641827583313, "learning_rate": 7.424476210909893e-07, "loss": 0.2886, "step": 10042 }, { "epoch": 2.5259054325955734, "grad_norm": 0.29901203513145447, "learning_rate": 7.41680567682495e-07, "loss": 0.3021, "step": 10043 }, { "epoch": 2.5261569416498992, "grad_norm": 0.28617119789123535, "learning_rate": 7.409138789724213e-07, "loss": 0.321, "step": 10044 }, { "epoch": 2.5264084507042255, "grad_norm": 0.28953617811203003, "learning_rate": 7.401475550264286e-07, "loss": 0.346, "step": 10045 }, { "epoch": 2.5266599597585513, "grad_norm": 0.28805407881736755, "learning_rate": 7.393815959101491e-07, "loss": 0.3206, "step": 10046 }, { "epoch": 2.5269114688128775, "grad_norm": 0.29841816425323486, "learning_rate": 7.386160016891802e-07, "loss": 0.3242, "step": 10047 }, { "epoch": 2.5271629778672033, "grad_norm": 0.2983440160751343, "learning_rate": 7.378507724290929e-07, "loss": 0.3347, "step": 10048 }, { "epoch": 2.527414486921529, "grad_norm": 0.2765296697616577, "learning_rate": 7.370859081954219e-07, "loss": 0.3093, "step": 10049 }, { "epoch": 2.5276659959758554, "grad_norm": 0.2982845902442932, "learning_rate": 7.363214090536752e-07, "loss": 0.3454, "step": 10050 }, { "epoch": 2.527917505030181, "grad_norm": 0.2902871370315552, "learning_rate": 7.355572750693252e-07, "loss": 0.3436, "step": 10051 }, { "epoch": 2.528169014084507, "grad_norm": 0.319418340921402, "learning_rate": 7.347935063078165e-07, "loss": 0.3195, "step": 10052 }, { "epoch": 2.5284205231388333, "grad_norm": 0.2917003631591797, "learning_rate": 7.34030102834562e-07, "loss": 0.3125, "step": 10053 }, { "epoch": 2.528672032193159, "grad_norm": 0.30017781257629395, "learning_rate": 7.332670647149398e-07, "loss": 0.3478, "step": 10054 }, { "epoch": 2.528923541247485, "grad_norm": 0.3094007968902588, "learning_rate": 7.325043920143027e-07, "loss": 0.3284, "step": 10055 }, { "epoch": 2.529175050301811, "grad_norm": 0.3049699366092682, "learning_rate": 7.317420847979656e-07, "loss": 0.3051, "step": 10056 }, { "epoch": 2.529426559356137, "grad_norm": 0.29030466079711914, "learning_rate": 7.30980143131218e-07, "loss": 0.3203, "step": 10057 }, { "epoch": 2.5296780684104627, "grad_norm": 0.2822028398513794, "learning_rate": 7.302185670793132e-07, "loss": 0.3209, "step": 10058 }, { "epoch": 2.529929577464789, "grad_norm": 0.28473353385925293, "learning_rate": 7.294573567074776e-07, "loss": 0.3205, "step": 10059 }, { "epoch": 2.5301810865191148, "grad_norm": 0.2997729778289795, "learning_rate": 7.286965120809014e-07, "loss": 0.3134, "step": 10060 }, { "epoch": 2.5304325955734406, "grad_norm": 0.2938189208507538, "learning_rate": 7.279360332647495e-07, "loss": 0.313, "step": 10061 }, { "epoch": 2.530684104627767, "grad_norm": 0.291318416595459, "learning_rate": 7.271759203241485e-07, "loss": 0.2945, "step": 10062 }, { "epoch": 2.5309356136820926, "grad_norm": 0.29242825508117676, "learning_rate": 7.264161733241998e-07, "loss": 0.2912, "step": 10063 }, { "epoch": 2.5311871227364184, "grad_norm": 0.2778705358505249, "learning_rate": 7.256567923299712e-07, "loss": 0.3547, "step": 10064 }, { "epoch": 2.5314386317907447, "grad_norm": 0.29071563482284546, "learning_rate": 7.248977774064975e-07, "loss": 0.3357, "step": 10065 }, { "epoch": 2.5316901408450705, "grad_norm": 0.2635411322116852, "learning_rate": 7.241391286187849e-07, "loss": 0.3257, "step": 10066 }, { "epoch": 2.5319416498993963, "grad_norm": 0.2990947961807251, "learning_rate": 7.233808460318054e-07, "loss": 0.3175, "step": 10067 }, { "epoch": 2.5321931589537225, "grad_norm": 0.2650429904460907, "learning_rate": 7.226229297105031e-07, "loss": 0.3322, "step": 10068 }, { "epoch": 2.5324446680080483, "grad_norm": 0.2894965410232544, "learning_rate": 7.218653797197861e-07, "loss": 0.307, "step": 10069 }, { "epoch": 2.532696177062374, "grad_norm": 0.26556655764579773, "learning_rate": 7.21108196124537e-07, "loss": 0.3372, "step": 10070 }, { "epoch": 2.5329476861167004, "grad_norm": 0.3001581132411957, "learning_rate": 7.20351378989601e-07, "loss": 0.3338, "step": 10071 }, { "epoch": 2.533199195171026, "grad_norm": 0.3141966164112091, "learning_rate": 7.195949283797959e-07, "loss": 0.3287, "step": 10072 }, { "epoch": 2.533450704225352, "grad_norm": 0.2945497930049896, "learning_rate": 7.188388443599081e-07, "loss": 0.3309, "step": 10073 }, { "epoch": 2.5337022132796783, "grad_norm": 0.3076508641242981, "learning_rate": 7.180831269946898e-07, "loss": 0.335, "step": 10074 }, { "epoch": 2.533953722334004, "grad_norm": 0.2986814081668854, "learning_rate": 7.173277763488646e-07, "loss": 0.3138, "step": 10075 }, { "epoch": 2.53420523138833, "grad_norm": 0.2792844772338867, "learning_rate": 7.165727924871224e-07, "loss": 0.3147, "step": 10076 }, { "epoch": 2.534456740442656, "grad_norm": 0.3163931667804718, "learning_rate": 7.158181754741239e-07, "loss": 0.3245, "step": 10077 }, { "epoch": 2.534708249496982, "grad_norm": 0.2911807894706726, "learning_rate": 7.150639253744967e-07, "loss": 0.3191, "step": 10078 }, { "epoch": 2.5349597585513077, "grad_norm": 0.284397691488266, "learning_rate": 7.143100422528382e-07, "loss": 0.3186, "step": 10079 }, { "epoch": 2.535211267605634, "grad_norm": 0.2803077697753906, "learning_rate": 7.135565261737126e-07, "loss": 0.3276, "step": 10080 }, { "epoch": 2.53546277665996, "grad_norm": 0.27358850836753845, "learning_rate": 7.128033772016557e-07, "loss": 0.3073, "step": 10081 }, { "epoch": 2.5357142857142856, "grad_norm": 0.30038154125213623, "learning_rate": 7.120505954011681e-07, "loss": 0.3076, "step": 10082 }, { "epoch": 2.535965794768612, "grad_norm": 0.3066074848175049, "learning_rate": 7.112981808367214e-07, "loss": 0.3354, "step": 10083 }, { "epoch": 2.5362173038229376, "grad_norm": 0.30170679092407227, "learning_rate": 7.105461335727564e-07, "loss": 0.3102, "step": 10084 }, { "epoch": 2.5364688128772634, "grad_norm": 0.3071909546852112, "learning_rate": 7.097944536736795e-07, "loss": 0.3112, "step": 10085 }, { "epoch": 2.5367203219315897, "grad_norm": 0.2948451638221741, "learning_rate": 7.09043141203869e-07, "loss": 0.3097, "step": 10086 }, { "epoch": 2.5369718309859155, "grad_norm": 0.31133005023002625, "learning_rate": 7.082921962276684e-07, "loss": 0.3196, "step": 10087 }, { "epoch": 2.5372233400402413, "grad_norm": 0.29081660509109497, "learning_rate": 7.075416188093936e-07, "loss": 0.2977, "step": 10088 }, { "epoch": 2.5374748490945676, "grad_norm": 0.2829488515853882, "learning_rate": 7.067914090133244e-07, "loss": 0.3158, "step": 10089 }, { "epoch": 2.5377263581488934, "grad_norm": 0.2930876612663269, "learning_rate": 7.060415669037135e-07, "loss": 0.3419, "step": 10090 }, { "epoch": 2.537977867203219, "grad_norm": 0.32287168502807617, "learning_rate": 7.052920925447792e-07, "loss": 0.3263, "step": 10091 }, { "epoch": 2.5382293762575454, "grad_norm": 0.31697362661361694, "learning_rate": 7.045429860007102e-07, "loss": 0.2973, "step": 10092 }, { "epoch": 2.538480885311871, "grad_norm": 0.28679776191711426, "learning_rate": 7.037942473356607e-07, "loss": 0.3167, "step": 10093 }, { "epoch": 2.538732394366197, "grad_norm": 0.3053102195262909, "learning_rate": 7.030458766137577e-07, "loss": 0.3345, "step": 10094 }, { "epoch": 2.5389839034205233, "grad_norm": 0.2991425395011902, "learning_rate": 7.022978738990943e-07, "loss": 0.3406, "step": 10095 }, { "epoch": 2.539235412474849, "grad_norm": 0.3642655909061432, "learning_rate": 7.015502392557305e-07, "loss": 0.3412, "step": 10096 }, { "epoch": 2.539486921529175, "grad_norm": 0.31416499614715576, "learning_rate": 7.00802972747699e-07, "loss": 0.3052, "step": 10097 }, { "epoch": 2.539738430583501, "grad_norm": 0.296009361743927, "learning_rate": 7.000560744389962e-07, "loss": 0.3015, "step": 10098 }, { "epoch": 2.539989939637827, "grad_norm": 0.31365057826042175, "learning_rate": 6.99309544393591e-07, "loss": 0.3134, "step": 10099 }, { "epoch": 2.5402414486921527, "grad_norm": 0.2986956536769867, "learning_rate": 6.985633826754173e-07, "loss": 0.3184, "step": 10100 }, { "epoch": 2.540492957746479, "grad_norm": 0.2894130349159241, "learning_rate": 6.978175893483812e-07, "loss": 0.312, "step": 10101 }, { "epoch": 2.540744466800805, "grad_norm": 0.29604774713516235, "learning_rate": 6.970721644763534e-07, "loss": 0.3198, "step": 10102 }, { "epoch": 2.5409959758551306, "grad_norm": 0.28080493211746216, "learning_rate": 6.963271081231765e-07, "loss": 0.3098, "step": 10103 }, { "epoch": 2.541247484909457, "grad_norm": 0.2949555218219757, "learning_rate": 6.955824203526585e-07, "loss": 0.3102, "step": 10104 }, { "epoch": 2.5414989939637826, "grad_norm": 0.304453581571579, "learning_rate": 6.948381012285771e-07, "loss": 0.3332, "step": 10105 }, { "epoch": 2.5417505030181085, "grad_norm": 0.3298734128475189, "learning_rate": 6.940941508146809e-07, "loss": 0.3199, "step": 10106 }, { "epoch": 2.5420020120724347, "grad_norm": 0.2873101830482483, "learning_rate": 6.933505691746816e-07, "loss": 0.3262, "step": 10107 }, { "epoch": 2.5422535211267605, "grad_norm": 0.2953130900859833, "learning_rate": 6.926073563722652e-07, "loss": 0.3264, "step": 10108 }, { "epoch": 2.5425050301810863, "grad_norm": 0.3112882971763611, "learning_rate": 6.918645124710805e-07, "loss": 0.3349, "step": 10109 }, { "epoch": 2.5427565392354126, "grad_norm": 0.28245386481285095, "learning_rate": 6.911220375347499e-07, "loss": 0.2928, "step": 10110 }, { "epoch": 2.5430080482897384, "grad_norm": 0.30176132917404175, "learning_rate": 6.903799316268595e-07, "loss": 0.3107, "step": 10111 }, { "epoch": 2.543259557344064, "grad_norm": 0.29093387722969055, "learning_rate": 6.896381948109682e-07, "loss": 0.2997, "step": 10112 }, { "epoch": 2.5435110663983904, "grad_norm": 0.2985793650150299, "learning_rate": 6.888968271505986e-07, "loss": 0.317, "step": 10113 }, { "epoch": 2.5437625754527162, "grad_norm": 0.3139978349208832, "learning_rate": 6.881558287092466e-07, "loss": 0.3097, "step": 10114 }, { "epoch": 2.544014084507042, "grad_norm": 0.30869972705841064, "learning_rate": 6.87415199550372e-07, "loss": 0.3236, "step": 10115 }, { "epoch": 2.5442655935613683, "grad_norm": 0.2910330593585968, "learning_rate": 6.866749397374062e-07, "loss": 0.303, "step": 10116 }, { "epoch": 2.544517102615694, "grad_norm": 0.31802523136138916, "learning_rate": 6.859350493337491e-07, "loss": 0.3032, "step": 10117 }, { "epoch": 2.54476861167002, "grad_norm": 0.2831653356552124, "learning_rate": 6.85195528402765e-07, "loss": 0.3226, "step": 10118 }, { "epoch": 2.545020120724346, "grad_norm": 0.29345449805259705, "learning_rate": 6.844563770077917e-07, "loss": 0.3364, "step": 10119 }, { "epoch": 2.545271629778672, "grad_norm": 0.30557841062545776, "learning_rate": 6.837175952121305e-07, "loss": 0.3013, "step": 10120 }, { "epoch": 2.5455231388329977, "grad_norm": 0.2902613580226898, "learning_rate": 6.829791830790555e-07, "loss": 0.3354, "step": 10121 }, { "epoch": 2.545774647887324, "grad_norm": 0.3239326775074005, "learning_rate": 6.82241140671806e-07, "loss": 0.3009, "step": 10122 }, { "epoch": 2.54602615694165, "grad_norm": 0.30113399028778076, "learning_rate": 6.815034680535915e-07, "loss": 0.3348, "step": 10123 }, { "epoch": 2.5462776659959756, "grad_norm": 0.30587083101272583, "learning_rate": 6.807661652875875e-07, "loss": 0.3163, "step": 10124 }, { "epoch": 2.546529175050302, "grad_norm": 0.2883608639240265, "learning_rate": 6.800292324369417e-07, "loss": 0.3324, "step": 10125 }, { "epoch": 2.5467806841046277, "grad_norm": 0.30463874340057373, "learning_rate": 6.79292669564765e-07, "loss": 0.3423, "step": 10126 }, { "epoch": 2.5470321931589535, "grad_norm": 0.28268906474113464, "learning_rate": 6.785564767341423e-07, "loss": 0.301, "step": 10127 }, { "epoch": 2.5472837022132797, "grad_norm": 0.29041969776153564, "learning_rate": 6.778206540081211e-07, "loss": 0.3189, "step": 10128 }, { "epoch": 2.5475352112676055, "grad_norm": 0.2949490547180176, "learning_rate": 6.77085201449722e-07, "loss": 0.3236, "step": 10129 }, { "epoch": 2.5477867203219313, "grad_norm": 0.31995368003845215, "learning_rate": 6.763501191219319e-07, "loss": 0.3262, "step": 10130 }, { "epoch": 2.5480382293762576, "grad_norm": 0.29409512877464294, "learning_rate": 6.756154070877047e-07, "loss": 0.3152, "step": 10131 }, { "epoch": 2.5482897384305834, "grad_norm": 0.3013671040534973, "learning_rate": 6.748810654099652e-07, "loss": 0.3187, "step": 10132 }, { "epoch": 2.5485412474849096, "grad_norm": 0.3068297207355499, "learning_rate": 6.741470941516043e-07, "loss": 0.302, "step": 10133 }, { "epoch": 2.5487927565392354, "grad_norm": 0.2889004945755005, "learning_rate": 6.734134933754826e-07, "loss": 0.3019, "step": 10134 }, { "epoch": 2.5490442655935612, "grad_norm": 0.3107074499130249, "learning_rate": 6.726802631444274e-07, "loss": 0.3373, "step": 10135 }, { "epoch": 2.5492957746478875, "grad_norm": 0.30381691455841064, "learning_rate": 6.719474035212376e-07, "loss": 0.3, "step": 10136 }, { "epoch": 2.5495472837022133, "grad_norm": 0.29392245411872864, "learning_rate": 6.712149145686748e-07, "loss": 0.3328, "step": 10137 }, { "epoch": 2.549798792756539, "grad_norm": 0.2984176576137543, "learning_rate": 6.704827963494748e-07, "loss": 0.2996, "step": 10138 }, { "epoch": 2.5500503018108653, "grad_norm": 0.31401655077934265, "learning_rate": 6.697510489263371e-07, "loss": 0.3237, "step": 10139 }, { "epoch": 2.550301810865191, "grad_norm": 0.2747150659561157, "learning_rate": 6.69019672361933e-07, "loss": 0.323, "step": 10140 }, { "epoch": 2.550553319919517, "grad_norm": 0.29566800594329834, "learning_rate": 6.682886667188987e-07, "loss": 0.3063, "step": 10141 }, { "epoch": 2.550804828973843, "grad_norm": 0.3180108964443207, "learning_rate": 6.675580320598418e-07, "loss": 0.3439, "step": 10142 }, { "epoch": 2.551056338028169, "grad_norm": 0.2787119150161743, "learning_rate": 6.668277684473346e-07, "loss": 0.3225, "step": 10143 }, { "epoch": 2.551307847082495, "grad_norm": 0.2893412709236145, "learning_rate": 6.660978759439219e-07, "loss": 0.3334, "step": 10144 }, { "epoch": 2.551559356136821, "grad_norm": 0.30765849351882935, "learning_rate": 6.653683546121126e-07, "loss": 0.3181, "step": 10145 }, { "epoch": 2.551810865191147, "grad_norm": 0.29015520215034485, "learning_rate": 6.646392045143868e-07, "loss": 0.327, "step": 10146 }, { "epoch": 2.552062374245473, "grad_norm": 0.29602178931236267, "learning_rate": 6.639104257131907e-07, "loss": 0.3178, "step": 10147 }, { "epoch": 2.552313883299799, "grad_norm": 0.3179301917552948, "learning_rate": 6.631820182709409e-07, "loss": 0.334, "step": 10148 }, { "epoch": 2.5525653923541247, "grad_norm": 0.315472811460495, "learning_rate": 6.624539822500192e-07, "loss": 0.3132, "step": 10149 }, { "epoch": 2.552816901408451, "grad_norm": 0.3242339491844177, "learning_rate": 6.617263177127797e-07, "loss": 0.3154, "step": 10150 }, { "epoch": 2.5530684104627768, "grad_norm": 0.26861920952796936, "learning_rate": 6.609990247215393e-07, "loss": 0.3406, "step": 10151 }, { "epoch": 2.5533199195171026, "grad_norm": 0.29673323035240173, "learning_rate": 6.602721033385889e-07, "loss": 0.322, "step": 10152 }, { "epoch": 2.553571428571429, "grad_norm": 0.32738587260246277, "learning_rate": 6.59545553626183e-07, "loss": 0.3157, "step": 10153 }, { "epoch": 2.5538229376257546, "grad_norm": 0.3015281558036804, "learning_rate": 6.588193756465472e-07, "loss": 0.3277, "step": 10154 }, { "epoch": 2.5540744466800804, "grad_norm": 0.2911074161529541, "learning_rate": 6.580935694618728e-07, "loss": 0.3128, "step": 10155 }, { "epoch": 2.5543259557344067, "grad_norm": 0.29950693249702454, "learning_rate": 6.573681351343226e-07, "loss": 0.3063, "step": 10156 }, { "epoch": 2.5545774647887325, "grad_norm": 0.28311774134635925, "learning_rate": 6.566430727260226e-07, "loss": 0.2949, "step": 10157 }, { "epoch": 2.5548289738430583, "grad_norm": 0.27676114439964294, "learning_rate": 6.559183822990727e-07, "loss": 0.3299, "step": 10158 }, { "epoch": 2.5550804828973845, "grad_norm": 0.2759498059749603, "learning_rate": 6.551940639155357e-07, "loss": 0.3193, "step": 10159 }, { "epoch": 2.5553319919517103, "grad_norm": 0.29512062668800354, "learning_rate": 6.544701176374462e-07, "loss": 0.3159, "step": 10160 }, { "epoch": 2.555583501006036, "grad_norm": 0.2883027195930481, "learning_rate": 6.537465435268065e-07, "loss": 0.3145, "step": 10161 }, { "epoch": 2.5558350100603624, "grad_norm": 0.2832835614681244, "learning_rate": 6.530233416455845e-07, "loss": 0.3234, "step": 10162 }, { "epoch": 2.556086519114688, "grad_norm": 0.28715407848358154, "learning_rate": 6.523005120557197e-07, "loss": 0.3323, "step": 10163 }, { "epoch": 2.556338028169014, "grad_norm": 0.3091478645801544, "learning_rate": 6.515780548191159e-07, "loss": 0.3248, "step": 10164 }, { "epoch": 2.5565895372233403, "grad_norm": 0.2894414961338043, "learning_rate": 6.508559699976486e-07, "loss": 0.3113, "step": 10165 }, { "epoch": 2.556841046277666, "grad_norm": 0.2907220721244812, "learning_rate": 6.501342576531589e-07, "loss": 0.3341, "step": 10166 }, { "epoch": 2.557092555331992, "grad_norm": 0.31087636947631836, "learning_rate": 6.494129178474579e-07, "loss": 0.338, "step": 10167 }, { "epoch": 2.557344064386318, "grad_norm": 0.2967066168785095, "learning_rate": 6.486919506423228e-07, "loss": 0.3225, "step": 10168 }, { "epoch": 2.557595573440644, "grad_norm": 0.31147968769073486, "learning_rate": 6.47971356099501e-07, "loss": 0.3556, "step": 10169 }, { "epoch": 2.5578470824949697, "grad_norm": 0.3024539053440094, "learning_rate": 6.472511342807052e-07, "loss": 0.3199, "step": 10170 }, { "epoch": 2.558098591549296, "grad_norm": 0.28230634331703186, "learning_rate": 6.465312852476197e-07, "loss": 0.3253, "step": 10171 }, { "epoch": 2.558350100603622, "grad_norm": 0.30267390608787537, "learning_rate": 6.458118090618948e-07, "loss": 0.3296, "step": 10172 }, { "epoch": 2.5586016096579476, "grad_norm": 0.27803835272789, "learning_rate": 6.450927057851481e-07, "loss": 0.296, "step": 10173 }, { "epoch": 2.558853118712274, "grad_norm": 0.2754371166229248, "learning_rate": 6.44373975478968e-07, "loss": 0.3001, "step": 10174 }, { "epoch": 2.5591046277665996, "grad_norm": 0.2934453785419464, "learning_rate": 6.436556182049069e-07, "loss": 0.3301, "step": 10175 }, { "epoch": 2.5593561368209254, "grad_norm": 0.29431700706481934, "learning_rate": 6.429376340244897e-07, "loss": 0.3223, "step": 10176 }, { "epoch": 2.5596076458752517, "grad_norm": 0.26826512813568115, "learning_rate": 6.42220022999206e-07, "loss": 0.3048, "step": 10177 }, { "epoch": 2.5598591549295775, "grad_norm": 0.2905735671520233, "learning_rate": 6.415027851905159e-07, "loss": 0.3369, "step": 10178 }, { "epoch": 2.5601106639839033, "grad_norm": 0.2780333161354065, "learning_rate": 6.407859206598443e-07, "loss": 0.3325, "step": 10179 }, { "epoch": 2.5603621730382295, "grad_norm": 0.3008924126625061, "learning_rate": 6.400694294685889e-07, "loss": 0.3048, "step": 10180 }, { "epoch": 2.5606136820925554, "grad_norm": 0.2865042984485626, "learning_rate": 6.393533116781098e-07, "loss": 0.3104, "step": 10181 }, { "epoch": 2.560865191146881, "grad_norm": 0.28145965933799744, "learning_rate": 6.386375673497397e-07, "loss": 0.3036, "step": 10182 }, { "epoch": 2.5611167002012074, "grad_norm": 0.2906494438648224, "learning_rate": 6.379221965447785e-07, "loss": 0.312, "step": 10183 }, { "epoch": 2.561368209255533, "grad_norm": 0.2934057116508484, "learning_rate": 6.37207199324491e-07, "loss": 0.3203, "step": 10184 }, { "epoch": 2.561619718309859, "grad_norm": 0.3017788827419281, "learning_rate": 6.364925757501139e-07, "loss": 0.3339, "step": 10185 }, { "epoch": 2.5618712273641853, "grad_norm": 0.2982634902000427, "learning_rate": 6.357783258828493e-07, "loss": 0.3241, "step": 10186 }, { "epoch": 2.562122736418511, "grad_norm": 0.28276070952415466, "learning_rate": 6.350644497838692e-07, "loss": 0.3201, "step": 10187 }, { "epoch": 2.562374245472837, "grad_norm": 0.27246373891830444, "learning_rate": 6.343509475143112e-07, "loss": 0.3196, "step": 10188 }, { "epoch": 2.562625754527163, "grad_norm": 0.2898280918598175, "learning_rate": 6.336378191352838e-07, "loss": 0.3311, "step": 10189 }, { "epoch": 2.562877263581489, "grad_norm": 0.29259827733039856, "learning_rate": 6.329250647078605e-07, "loss": 0.3205, "step": 10190 }, { "epoch": 2.5631287726358147, "grad_norm": 0.3051588237285614, "learning_rate": 6.322126842930864e-07, "loss": 0.3302, "step": 10191 }, { "epoch": 2.563380281690141, "grad_norm": 0.28429844975471497, "learning_rate": 6.3150067795197e-07, "loss": 0.3169, "step": 10192 }, { "epoch": 2.563631790744467, "grad_norm": 0.2904362082481384, "learning_rate": 6.307890457454907e-07, "loss": 0.3111, "step": 10193 }, { "epoch": 2.5638832997987926, "grad_norm": 0.28321024775505066, "learning_rate": 6.300777877345976e-07, "loss": 0.3277, "step": 10194 }, { "epoch": 2.564134808853119, "grad_norm": 0.2912009656429291, "learning_rate": 6.293669039802025e-07, "loss": 0.3177, "step": 10195 }, { "epoch": 2.5643863179074446, "grad_norm": 0.29499733448028564, "learning_rate": 6.286563945431906e-07, "loss": 0.3204, "step": 10196 }, { "epoch": 2.5646378269617705, "grad_norm": 0.2899915874004364, "learning_rate": 6.279462594844105e-07, "loss": 0.3003, "step": 10197 }, { "epoch": 2.5648893360160967, "grad_norm": 0.29533299803733826, "learning_rate": 6.272364988646828e-07, "loss": 0.3252, "step": 10198 }, { "epoch": 2.5651408450704225, "grad_norm": 0.2786716818809509, "learning_rate": 6.26527112744792e-07, "loss": 0.3268, "step": 10199 }, { "epoch": 2.5653923541247483, "grad_norm": 0.28114524483680725, "learning_rate": 6.258181011854947e-07, "loss": 0.3183, "step": 10200 }, { "epoch": 2.5656438631790746, "grad_norm": 0.30359089374542236, "learning_rate": 6.251094642475108e-07, "loss": 0.3293, "step": 10201 }, { "epoch": 2.5658953722334004, "grad_norm": 0.295454204082489, "learning_rate": 6.244012019915335e-07, "loss": 0.3227, "step": 10202 }, { "epoch": 2.566146881287726, "grad_norm": 0.29742130637168884, "learning_rate": 6.236933144782187e-07, "loss": 0.3293, "step": 10203 }, { "epoch": 2.5663983903420524, "grad_norm": 0.29131677746772766, "learning_rate": 6.229858017681933e-07, "loss": 0.3234, "step": 10204 }, { "epoch": 2.566649899396378, "grad_norm": 0.280320942401886, "learning_rate": 6.222786639220524e-07, "loss": 0.3104, "step": 10205 }, { "epoch": 2.566901408450704, "grad_norm": 0.33856505155563354, "learning_rate": 6.215719010003557e-07, "loss": 0.3169, "step": 10206 }, { "epoch": 2.5671529175050303, "grad_norm": 0.2823481559753418, "learning_rate": 6.208655130636354e-07, "loss": 0.3126, "step": 10207 }, { "epoch": 2.567404426559356, "grad_norm": 0.28553295135498047, "learning_rate": 6.201595001723876e-07, "loss": 0.3226, "step": 10208 }, { "epoch": 2.567655935613682, "grad_norm": 0.28577616810798645, "learning_rate": 6.194538623870794e-07, "loss": 0.3168, "step": 10209 }, { "epoch": 2.567907444668008, "grad_norm": 0.29407092928886414, "learning_rate": 6.187485997681419e-07, "loss": 0.31, "step": 10210 }, { "epoch": 2.568158953722334, "grad_norm": 0.29761141538619995, "learning_rate": 6.180437123759786e-07, "loss": 0.2955, "step": 10211 }, { "epoch": 2.5684104627766597, "grad_norm": 0.27446889877319336, "learning_rate": 6.173392002709572e-07, "loss": 0.3325, "step": 10212 }, { "epoch": 2.568661971830986, "grad_norm": 0.2877654731273651, "learning_rate": 6.166350635134166e-07, "loss": 0.3209, "step": 10213 }, { "epoch": 2.568913480885312, "grad_norm": 0.30045285820961, "learning_rate": 6.159313021636593e-07, "loss": 0.3168, "step": 10214 }, { "epoch": 2.5691649899396376, "grad_norm": 0.28202614188194275, "learning_rate": 6.152279162819597e-07, "loss": 0.3216, "step": 10215 }, { "epoch": 2.569416498993964, "grad_norm": 0.33453133702278137, "learning_rate": 6.145249059285585e-07, "loss": 0.3074, "step": 10216 }, { "epoch": 2.5696680080482897, "grad_norm": 0.28775471448898315, "learning_rate": 6.138222711636632e-07, "loss": 0.3357, "step": 10217 }, { "epoch": 2.5699195171026155, "grad_norm": 0.29132527112960815, "learning_rate": 6.131200120474512e-07, "loss": 0.3223, "step": 10218 }, { "epoch": 2.5701710261569417, "grad_norm": 0.29309695959091187, "learning_rate": 6.124181286400649e-07, "loss": 0.3157, "step": 10219 }, { "epoch": 2.5704225352112675, "grad_norm": 0.2960768938064575, "learning_rate": 6.117166210016184e-07, "loss": 0.3223, "step": 10220 }, { "epoch": 2.5706740442655933, "grad_norm": 0.2980702817440033, "learning_rate": 6.110154891921894e-07, "loss": 0.3371, "step": 10221 }, { "epoch": 2.5709255533199196, "grad_norm": 0.2795586884021759, "learning_rate": 6.103147332718274e-07, "loss": 0.3141, "step": 10222 }, { "epoch": 2.5711770623742454, "grad_norm": 0.2727452516555786, "learning_rate": 6.096143533005455e-07, "loss": 0.3137, "step": 10223 }, { "epoch": 2.571428571428571, "grad_norm": 0.28229624032974243, "learning_rate": 6.089143493383281e-07, "loss": 0.3211, "step": 10224 }, { "epoch": 2.5716800804828974, "grad_norm": 0.3388809263706207, "learning_rate": 6.082147214451272e-07, "loss": 0.3339, "step": 10225 }, { "epoch": 2.5719315895372232, "grad_norm": 0.2950616776943207, "learning_rate": 6.075154696808594e-07, "loss": 0.3158, "step": 10226 }, { "epoch": 2.572183098591549, "grad_norm": 0.2900426983833313, "learning_rate": 6.068165941054133e-07, "loss": 0.3371, "step": 10227 }, { "epoch": 2.5724346076458753, "grad_norm": 0.29088470339775085, "learning_rate": 6.061180947786411e-07, "loss": 0.3193, "step": 10228 }, { "epoch": 2.572686116700201, "grad_norm": 0.30321744084358215, "learning_rate": 6.054199717603671e-07, "loss": 0.3315, "step": 10229 }, { "epoch": 2.572937625754527, "grad_norm": 0.3024834990501404, "learning_rate": 6.047222251103796e-07, "loss": 0.3233, "step": 10230 }, { "epoch": 2.573189134808853, "grad_norm": 0.2948141396045685, "learning_rate": 6.04024854888437e-07, "loss": 0.3318, "step": 10231 }, { "epoch": 2.573440643863179, "grad_norm": 0.323097825050354, "learning_rate": 6.033278611542642e-07, "loss": 0.3179, "step": 10232 }, { "epoch": 2.573692152917505, "grad_norm": 0.2814940810203552, "learning_rate": 6.026312439675553e-07, "loss": 0.3333, "step": 10233 }, { "epoch": 2.573943661971831, "grad_norm": 0.2813478708267212, "learning_rate": 6.01935003387969e-07, "loss": 0.3158, "step": 10234 }, { "epoch": 2.574195171026157, "grad_norm": 0.27595117688179016, "learning_rate": 6.01239139475136e-07, "loss": 0.2883, "step": 10235 }, { "epoch": 2.574446680080483, "grad_norm": 0.278484970331192, "learning_rate": 6.005436522886532e-07, "loss": 0.308, "step": 10236 }, { "epoch": 2.574698189134809, "grad_norm": 0.285978227853775, "learning_rate": 5.998485418880822e-07, "loss": 0.3014, "step": 10237 }, { "epoch": 2.5749496981891347, "grad_norm": 0.3101520538330078, "learning_rate": 5.991538083329579e-07, "loss": 0.3149, "step": 10238 }, { "epoch": 2.575201207243461, "grad_norm": 0.2651280462741852, "learning_rate": 5.984594516827769e-07, "loss": 0.3223, "step": 10239 }, { "epoch": 2.5754527162977867, "grad_norm": 0.307229608297348, "learning_rate": 5.977654719970088e-07, "loss": 0.3224, "step": 10240 }, { "epoch": 2.5757042253521125, "grad_norm": 0.28054678440093994, "learning_rate": 5.970718693350874e-07, "loss": 0.3274, "step": 10241 }, { "epoch": 2.5759557344064388, "grad_norm": 0.2816450297832489, "learning_rate": 5.963786437564161e-07, "loss": 0.3536, "step": 10242 }, { "epoch": 2.5762072434607646, "grad_norm": 0.281474769115448, "learning_rate": 5.956857953203643e-07, "loss": 0.324, "step": 10243 }, { "epoch": 2.5764587525150904, "grad_norm": 0.3121716380119324, "learning_rate": 5.949933240862715e-07, "loss": 0.3248, "step": 10244 }, { "epoch": 2.5767102615694166, "grad_norm": 0.28747543692588806, "learning_rate": 5.94301230113442e-07, "loss": 0.3225, "step": 10245 }, { "epoch": 2.5769617706237424, "grad_norm": 0.29833775758743286, "learning_rate": 5.936095134611508e-07, "loss": 0.2994, "step": 10246 }, { "epoch": 2.5772132796780687, "grad_norm": 0.2817133069038391, "learning_rate": 5.929181741886386e-07, "loss": 0.3039, "step": 10247 }, { "epoch": 2.5774647887323945, "grad_norm": 0.28056350350379944, "learning_rate": 5.922272123551137e-07, "loss": 0.3196, "step": 10248 }, { "epoch": 2.5777162977867203, "grad_norm": 0.28009307384490967, "learning_rate": 5.915366280197537e-07, "loss": 0.3259, "step": 10249 }, { "epoch": 2.5779678068410465, "grad_norm": 0.303177148103714, "learning_rate": 5.908464212417014e-07, "loss": 0.3336, "step": 10250 }, { "epoch": 2.5782193158953723, "grad_norm": 0.2899198532104492, "learning_rate": 5.901565920800711e-07, "loss": 0.2883, "step": 10251 }, { "epoch": 2.578470824949698, "grad_norm": 0.28011229634284973, "learning_rate": 5.894671405939389e-07, "loss": 0.331, "step": 10252 }, { "epoch": 2.5787223340040244, "grad_norm": 0.2783331274986267, "learning_rate": 5.887780668423553e-07, "loss": 0.3173, "step": 10253 }, { "epoch": 2.57897384305835, "grad_norm": 0.31792569160461426, "learning_rate": 5.880893708843332e-07, "loss": 0.3361, "step": 10254 }, { "epoch": 2.579225352112676, "grad_norm": 0.2781875729560852, "learning_rate": 5.87401052778856e-07, "loss": 0.3333, "step": 10255 }, { "epoch": 2.5794768611670023, "grad_norm": 0.29065370559692383, "learning_rate": 5.867131125848729e-07, "loss": 0.3436, "step": 10256 }, { "epoch": 2.579728370221328, "grad_norm": 0.2745533585548401, "learning_rate": 5.860255503613033e-07, "loss": 0.3263, "step": 10257 }, { "epoch": 2.579979879275654, "grad_norm": 0.2924485206604004, "learning_rate": 5.853383661670303e-07, "loss": 0.3291, "step": 10258 }, { "epoch": 2.58023138832998, "grad_norm": 0.288885235786438, "learning_rate": 5.846515600609093e-07, "loss": 0.3057, "step": 10259 }, { "epoch": 2.580482897384306, "grad_norm": 0.2955508530139923, "learning_rate": 5.839651321017586e-07, "loss": 0.3173, "step": 10260 }, { "epoch": 2.5807344064386317, "grad_norm": 0.289850115776062, "learning_rate": 5.832790823483691e-07, "loss": 0.3112, "step": 10261 }, { "epoch": 2.580985915492958, "grad_norm": 0.30250605940818787, "learning_rate": 5.825934108594938e-07, "loss": 0.3184, "step": 10262 }, { "epoch": 2.5812374245472838, "grad_norm": 0.28620943427085876, "learning_rate": 5.81908117693859e-07, "loss": 0.3132, "step": 10263 }, { "epoch": 2.5814889336016096, "grad_norm": 0.2834034264087677, "learning_rate": 5.812232029101533e-07, "loss": 0.3057, "step": 10264 }, { "epoch": 2.581740442655936, "grad_norm": 0.2939947545528412, "learning_rate": 5.805386665670376e-07, "loss": 0.3077, "step": 10265 }, { "epoch": 2.5819919517102616, "grad_norm": 0.3035759925842285, "learning_rate": 5.79854508723136e-07, "loss": 0.3171, "step": 10266 }, { "epoch": 2.5822434607645874, "grad_norm": 0.28236502408981323, "learning_rate": 5.791707294370447e-07, "loss": 0.3288, "step": 10267 }, { "epoch": 2.5824949698189137, "grad_norm": 0.2845684587955475, "learning_rate": 5.784873287673226e-07, "loss": 0.3225, "step": 10268 }, { "epoch": 2.5827464788732395, "grad_norm": 0.31281614303588867, "learning_rate": 5.778043067725009e-07, "loss": 0.3177, "step": 10269 }, { "epoch": 2.5829979879275653, "grad_norm": 0.33208024501800537, "learning_rate": 5.771216635110738e-07, "loss": 0.2962, "step": 10270 }, { "epoch": 2.5832494969818915, "grad_norm": 0.28506436944007874, "learning_rate": 5.764393990415079e-07, "loss": 0.3374, "step": 10271 }, { "epoch": 2.5835010060362174, "grad_norm": 0.29046157002449036, "learning_rate": 5.757575134222332e-07, "loss": 0.3216, "step": 10272 }, { "epoch": 2.583752515090543, "grad_norm": 0.2891055941581726, "learning_rate": 5.750760067116501e-07, "loss": 0.3129, "step": 10273 }, { "epoch": 2.5840040241448694, "grad_norm": 0.2932874858379364, "learning_rate": 5.743948789681236e-07, "loss": 0.2921, "step": 10274 }, { "epoch": 2.584255533199195, "grad_norm": 0.3003051280975342, "learning_rate": 5.73714130249991e-07, "loss": 0.3158, "step": 10275 }, { "epoch": 2.584507042253521, "grad_norm": 0.278804749250412, "learning_rate": 5.730337606155506e-07, "loss": 0.3074, "step": 10276 }, { "epoch": 2.5847585513078473, "grad_norm": 0.297124445438385, "learning_rate": 5.723537701230747e-07, "loss": 0.3316, "step": 10277 }, { "epoch": 2.585010060362173, "grad_norm": 0.28858664631843567, "learning_rate": 5.716741588307983e-07, "loss": 0.3064, "step": 10278 }, { "epoch": 2.585261569416499, "grad_norm": 0.30511143803596497, "learning_rate": 5.709949267969267e-07, "loss": 0.3197, "step": 10279 }, { "epoch": 2.585513078470825, "grad_norm": 0.2882242500782013, "learning_rate": 5.703160740796332e-07, "loss": 0.3344, "step": 10280 }, { "epoch": 2.585764587525151, "grad_norm": 0.32658329606056213, "learning_rate": 5.696376007370541e-07, "loss": 0.3257, "step": 10281 }, { "epoch": 2.5860160965794767, "grad_norm": 0.3152960538864136, "learning_rate": 5.689595068273002e-07, "loss": 0.3103, "step": 10282 }, { "epoch": 2.586267605633803, "grad_norm": 0.2786172330379486, "learning_rate": 5.682817924084422e-07, "loss": 0.3174, "step": 10283 }, { "epoch": 2.586519114688129, "grad_norm": 0.29683375358581543, "learning_rate": 5.676044575385254e-07, "loss": 0.3426, "step": 10284 }, { "epoch": 2.5867706237424546, "grad_norm": 0.28705403208732605, "learning_rate": 5.669275022755566e-07, "loss": 0.3055, "step": 10285 }, { "epoch": 2.587022132796781, "grad_norm": 0.29597151279449463, "learning_rate": 5.662509266775151e-07, "loss": 0.3117, "step": 10286 }, { "epoch": 2.5872736418511066, "grad_norm": 0.29061004519462585, "learning_rate": 5.655747308023434e-07, "loss": 0.3414, "step": 10287 }, { "epoch": 2.5875251509054324, "grad_norm": 0.3225204646587372, "learning_rate": 5.648989147079553e-07, "loss": 0.3319, "step": 10288 }, { "epoch": 2.5877766599597587, "grad_norm": 0.27866053581237793, "learning_rate": 5.642234784522282e-07, "loss": 0.3036, "step": 10289 }, { "epoch": 2.5880281690140845, "grad_norm": 0.3126038610935211, "learning_rate": 5.635484220930098e-07, "loss": 0.3094, "step": 10290 }, { "epoch": 2.5882796780684103, "grad_norm": 0.2966510057449341, "learning_rate": 5.628737456881161e-07, "loss": 0.3066, "step": 10291 }, { "epoch": 2.5885311871227366, "grad_norm": 0.2831152677536011, "learning_rate": 5.621994492953264e-07, "loss": 0.3296, "step": 10292 }, { "epoch": 2.5887826961770624, "grad_norm": 0.2895983159542084, "learning_rate": 5.615255329723917e-07, "loss": 0.3145, "step": 10293 }, { "epoch": 2.589034205231388, "grad_norm": 0.29133400321006775, "learning_rate": 5.608519967770276e-07, "loss": 0.3194, "step": 10294 }, { "epoch": 2.5892857142857144, "grad_norm": 0.2729475200176239, "learning_rate": 5.601788407669196e-07, "loss": 0.3218, "step": 10295 }, { "epoch": 2.58953722334004, "grad_norm": 0.2915710508823395, "learning_rate": 5.595060649997175e-07, "loss": 0.3297, "step": 10296 }, { "epoch": 2.589788732394366, "grad_norm": 0.31030383706092834, "learning_rate": 5.588336695330421e-07, "loss": 0.3357, "step": 10297 }, { "epoch": 2.5900402414486923, "grad_norm": 0.3029539883136749, "learning_rate": 5.581616544244778e-07, "loss": 0.2825, "step": 10298 }, { "epoch": 2.590291750503018, "grad_norm": 0.30874502658843994, "learning_rate": 5.574900197315814e-07, "loss": 0.3133, "step": 10299 }, { "epoch": 2.590543259557344, "grad_norm": 0.3056495785713196, "learning_rate": 5.568187655118712e-07, "loss": 0.3328, "step": 10300 }, { "epoch": 2.59079476861167, "grad_norm": 0.29649117588996887, "learning_rate": 5.561478918228369e-07, "loss": 0.31, "step": 10301 }, { "epoch": 2.591046277665996, "grad_norm": 0.2896314263343811, "learning_rate": 5.55477398721937e-07, "loss": 0.3237, "step": 10302 }, { "epoch": 2.5912977867203217, "grad_norm": 0.2920903265476227, "learning_rate": 5.548072862665909e-07, "loss": 0.2982, "step": 10303 }, { "epoch": 2.591549295774648, "grad_norm": 0.309803307056427, "learning_rate": 5.541375545141936e-07, "loss": 0.3153, "step": 10304 }, { "epoch": 2.591800804828974, "grad_norm": 0.3066963255405426, "learning_rate": 5.534682035221e-07, "loss": 0.3182, "step": 10305 }, { "epoch": 2.5920523138832996, "grad_norm": 0.27324411273002625, "learning_rate": 5.527992333476389e-07, "loss": 0.3304, "step": 10306 }, { "epoch": 2.592303822937626, "grad_norm": 0.3093414604663849, "learning_rate": 5.521306440481005e-07, "loss": 0.331, "step": 10307 }, { "epoch": 2.5925553319919517, "grad_norm": 0.29584071040153503, "learning_rate": 5.514624356807474e-07, "loss": 0.3306, "step": 10308 }, { "epoch": 2.5928068410462775, "grad_norm": 0.27764448523521423, "learning_rate": 5.507946083028059e-07, "loss": 0.3037, "step": 10309 }, { "epoch": 2.5930583501006037, "grad_norm": 0.30795976519584656, "learning_rate": 5.501271619714732e-07, "loss": 0.311, "step": 10310 }, { "epoch": 2.5933098591549295, "grad_norm": 0.29350659251213074, "learning_rate": 5.494600967439095e-07, "loss": 0.3163, "step": 10311 }, { "epoch": 2.5935613682092553, "grad_norm": 0.3054122030735016, "learning_rate": 5.487934126772465e-07, "loss": 0.327, "step": 10312 }, { "epoch": 2.5938128772635816, "grad_norm": 0.29255709052085876, "learning_rate": 5.481271098285818e-07, "loss": 0.3198, "step": 10313 }, { "epoch": 2.5940643863179074, "grad_norm": 0.26492074131965637, "learning_rate": 5.474611882549785e-07, "loss": 0.3185, "step": 10314 }, { "epoch": 2.594315895372233, "grad_norm": 0.306562215089798, "learning_rate": 5.4679564801347e-07, "loss": 0.3028, "step": 10315 }, { "epoch": 2.5945674044265594, "grad_norm": 0.3125389814376831, "learning_rate": 5.461304891610541e-07, "loss": 0.3101, "step": 10316 }, { "epoch": 2.5948189134808852, "grad_norm": 0.31623390316963196, "learning_rate": 5.454657117546996e-07, "loss": 0.3141, "step": 10317 }, { "epoch": 2.595070422535211, "grad_norm": 0.2654491066932678, "learning_rate": 5.448013158513388e-07, "loss": 0.3211, "step": 10318 }, { "epoch": 2.5953219315895373, "grad_norm": 0.30861228704452515, "learning_rate": 5.441373015078744e-07, "loss": 0.3259, "step": 10319 }, { "epoch": 2.595573440643863, "grad_norm": 0.26960867643356323, "learning_rate": 5.434736687811731e-07, "loss": 0.3155, "step": 10320 }, { "epoch": 2.595824949698189, "grad_norm": 0.26485928893089294, "learning_rate": 5.428104177280735e-07, "loss": 0.3251, "step": 10321 }, { "epoch": 2.596076458752515, "grad_norm": 0.29853057861328125, "learning_rate": 5.421475484053762e-07, "loss": 0.3669, "step": 10322 }, { "epoch": 2.596327967806841, "grad_norm": 0.29327720403671265, "learning_rate": 5.414850608698535e-07, "loss": 0.3355, "step": 10323 }, { "epoch": 2.5965794768611667, "grad_norm": 0.2685185372829437, "learning_rate": 5.408229551782435e-07, "loss": 0.3319, "step": 10324 }, { "epoch": 2.596830985915493, "grad_norm": 0.3062201142311096, "learning_rate": 5.4016123138725e-07, "loss": 0.3081, "step": 10325 }, { "epoch": 2.597082494969819, "grad_norm": 0.3145555555820465, "learning_rate": 5.394998895535475e-07, "loss": 0.312, "step": 10326 }, { "epoch": 2.5973340040241446, "grad_norm": 0.30714958906173706, "learning_rate": 5.388389297337737e-07, "loss": 0.3083, "step": 10327 }, { "epoch": 2.597585513078471, "grad_norm": 0.28293806314468384, "learning_rate": 5.381783519845374e-07, "loss": 0.3066, "step": 10328 }, { "epoch": 2.5978370221327967, "grad_norm": 0.2907370626926422, "learning_rate": 5.375181563624116e-07, "loss": 0.3577, "step": 10329 }, { "epoch": 2.5980885311871225, "grad_norm": 0.3050544559955597, "learning_rate": 5.368583429239394e-07, "loss": 0.3285, "step": 10330 }, { "epoch": 2.5983400402414487, "grad_norm": 0.3092591166496277, "learning_rate": 5.361989117256277e-07, "loss": 0.3384, "step": 10331 }, { "epoch": 2.5985915492957745, "grad_norm": 0.27648666501045227, "learning_rate": 5.355398628239544e-07, "loss": 0.3422, "step": 10332 }, { "epoch": 2.5988430583501008, "grad_norm": 0.3023586869239807, "learning_rate": 5.34881196275362e-07, "loss": 0.3052, "step": 10333 }, { "epoch": 2.5990945674044266, "grad_norm": 0.2839665710926056, "learning_rate": 5.342229121362607e-07, "loss": 0.2988, "step": 10334 }, { "epoch": 2.5993460764587524, "grad_norm": 0.3002241849899292, "learning_rate": 5.335650104630308e-07, "loss": 0.3427, "step": 10335 }, { "epoch": 2.5995975855130786, "grad_norm": 0.27492570877075195, "learning_rate": 5.329074913120141e-07, "loss": 0.3233, "step": 10336 }, { "epoch": 2.5998490945674044, "grad_norm": 0.29533281922340393, "learning_rate": 5.322503547395263e-07, "loss": 0.3129, "step": 10337 }, { "epoch": 2.6001006036217302, "grad_norm": 0.285576194524765, "learning_rate": 5.31593600801844e-07, "loss": 0.314, "step": 10338 }, { "epoch": 2.6003521126760565, "grad_norm": 0.2748103439807892, "learning_rate": 5.309372295552173e-07, "loss": 0.3299, "step": 10339 }, { "epoch": 2.6006036217303823, "grad_norm": 0.2951182425022125, "learning_rate": 5.302812410558567e-07, "loss": 0.3202, "step": 10340 }, { "epoch": 2.600855130784708, "grad_norm": 0.30300065875053406, "learning_rate": 5.296256353599466e-07, "loss": 0.3153, "step": 10341 }, { "epoch": 2.6011066398390343, "grad_norm": 0.3175194561481476, "learning_rate": 5.289704125236333e-07, "loss": 0.3119, "step": 10342 }, { "epoch": 2.60135814889336, "grad_norm": 0.2759941518306732, "learning_rate": 5.283155726030348e-07, "loss": 0.3194, "step": 10343 }, { "epoch": 2.6016096579476864, "grad_norm": 0.30253252387046814, "learning_rate": 5.276611156542316e-07, "loss": 0.2918, "step": 10344 }, { "epoch": 2.601861167002012, "grad_norm": 0.2967860698699951, "learning_rate": 5.270070417332745e-07, "loss": 0.3106, "step": 10345 }, { "epoch": 2.602112676056338, "grad_norm": 0.2763741612434387, "learning_rate": 5.263533508961827e-07, "loss": 0.3111, "step": 10346 }, { "epoch": 2.6023641851106643, "grad_norm": 0.28950735926628113, "learning_rate": 5.257000431989384e-07, "loss": 0.3368, "step": 10347 }, { "epoch": 2.60261569416499, "grad_norm": 0.2865469753742218, "learning_rate": 5.250471186974954e-07, "loss": 0.3163, "step": 10348 }, { "epoch": 2.602867203219316, "grad_norm": 0.27980104088783264, "learning_rate": 5.243945774477699e-07, "loss": 0.333, "step": 10349 }, { "epoch": 2.603118712273642, "grad_norm": 0.2943830192089081, "learning_rate": 5.237424195056512e-07, "loss": 0.3181, "step": 10350 }, { "epoch": 2.603370221327968, "grad_norm": 0.2930108606815338, "learning_rate": 5.230906449269895e-07, "loss": 0.3005, "step": 10351 }, { "epoch": 2.6036217303822937, "grad_norm": 0.291398823261261, "learning_rate": 5.224392537676077e-07, "loss": 0.3232, "step": 10352 }, { "epoch": 2.60387323943662, "grad_norm": 0.30126404762268066, "learning_rate": 5.217882460832912e-07, "loss": 0.3264, "step": 10353 }, { "epoch": 2.6041247484909458, "grad_norm": 0.28334009647369385, "learning_rate": 5.21137621929797e-07, "loss": 0.3262, "step": 10354 }, { "epoch": 2.6043762575452716, "grad_norm": 0.2912385165691376, "learning_rate": 5.204873813628447e-07, "loss": 0.3388, "step": 10355 }, { "epoch": 2.604627766599598, "grad_norm": 0.29965826869010925, "learning_rate": 5.198375244381243e-07, "loss": 0.324, "step": 10356 }, { "epoch": 2.6048792756539236, "grad_norm": 0.2777501046657562, "learning_rate": 5.191880512112934e-07, "loss": 0.3292, "step": 10357 }, { "epoch": 2.6051307847082494, "grad_norm": 0.2996978163719177, "learning_rate": 5.185389617379727e-07, "loss": 0.337, "step": 10358 }, { "epoch": 2.6053822937625757, "grad_norm": 0.2818640470504761, "learning_rate": 5.178902560737554e-07, "loss": 0.3316, "step": 10359 }, { "epoch": 2.6056338028169015, "grad_norm": 0.2935522794723511, "learning_rate": 5.172419342741963e-07, "loss": 0.3255, "step": 10360 }, { "epoch": 2.6058853118712273, "grad_norm": 0.27610835433006287, "learning_rate": 5.165939963948225e-07, "loss": 0.3366, "step": 10361 }, { "epoch": 2.6061368209255535, "grad_norm": 0.3033030331134796, "learning_rate": 5.159464424911242e-07, "loss": 0.3233, "step": 10362 }, { "epoch": 2.6063883299798793, "grad_norm": 0.29915034770965576, "learning_rate": 5.152992726185619e-07, "loss": 0.3066, "step": 10363 }, { "epoch": 2.606639839034205, "grad_norm": 0.3014090955257416, "learning_rate": 5.146524868325592e-07, "loss": 0.3178, "step": 10364 }, { "epoch": 2.6068913480885314, "grad_norm": 0.31432896852493286, "learning_rate": 5.140060851885109e-07, "loss": 0.3133, "step": 10365 }, { "epoch": 2.607142857142857, "grad_norm": 0.28758054971694946, "learning_rate": 5.133600677417782e-07, "loss": 0.3389, "step": 10366 }, { "epoch": 2.607394366197183, "grad_norm": 0.2802176773548126, "learning_rate": 5.127144345476865e-07, "loss": 0.2957, "step": 10367 }, { "epoch": 2.6076458752515093, "grad_norm": 0.30097344517707825, "learning_rate": 5.120691856615323e-07, "loss": 0.3412, "step": 10368 }, { "epoch": 2.607897384305835, "grad_norm": 0.28366386890411377, "learning_rate": 5.114243211385744e-07, "loss": 0.3119, "step": 10369 }, { "epoch": 2.608148893360161, "grad_norm": 0.2719464898109436, "learning_rate": 5.107798410340442e-07, "loss": 0.3054, "step": 10370 }, { "epoch": 2.608400402414487, "grad_norm": 0.2885161340236664, "learning_rate": 5.101357454031352e-07, "loss": 0.3178, "step": 10371 }, { "epoch": 2.608651911468813, "grad_norm": 0.29005712270736694, "learning_rate": 5.094920343010124e-07, "loss": 0.3128, "step": 10372 }, { "epoch": 2.6089034205231387, "grad_norm": 0.27925872802734375, "learning_rate": 5.08848707782803e-07, "loss": 0.3333, "step": 10373 }, { "epoch": 2.609154929577465, "grad_norm": 0.272777795791626, "learning_rate": 5.082057659036061e-07, "loss": 0.3407, "step": 10374 }, { "epoch": 2.609406438631791, "grad_norm": 0.2825217843055725, "learning_rate": 5.07563208718484e-07, "loss": 0.3213, "step": 10375 }, { "epoch": 2.6096579476861166, "grad_norm": 0.2770220637321472, "learning_rate": 5.069210362824694e-07, "loss": 0.3095, "step": 10376 }, { "epoch": 2.609909456740443, "grad_norm": 0.296953946352005, "learning_rate": 5.062792486505586e-07, "loss": 0.3356, "step": 10377 }, { "epoch": 2.6101609657947686, "grad_norm": 0.305620402097702, "learning_rate": 5.056378458777183e-07, "loss": 0.3265, "step": 10378 }, { "epoch": 2.6104124748490944, "grad_norm": 0.2921883463859558, "learning_rate": 5.049968280188788e-07, "loss": 0.3275, "step": 10379 }, { "epoch": 2.6106639839034207, "grad_norm": 0.31497353315353394, "learning_rate": 5.043561951289411e-07, "loss": 0.3168, "step": 10380 }, { "epoch": 2.6109154929577465, "grad_norm": 0.2880299687385559, "learning_rate": 5.0371594726277e-07, "loss": 0.3245, "step": 10381 }, { "epoch": 2.6111670020120723, "grad_norm": 0.2965189814567566, "learning_rate": 5.030760844752003e-07, "loss": 0.3289, "step": 10382 }, { "epoch": 2.6114185110663986, "grad_norm": 0.29246169328689575, "learning_rate": 5.024366068210307e-07, "loss": 0.3146, "step": 10383 }, { "epoch": 2.6116700201207244, "grad_norm": 0.2918647229671478, "learning_rate": 5.017975143550296e-07, "loss": 0.3293, "step": 10384 }, { "epoch": 2.61192152917505, "grad_norm": 0.30444636940956116, "learning_rate": 5.011588071319295e-07, "loss": 0.3029, "step": 10385 }, { "epoch": 2.6121730382293764, "grad_norm": 0.3097932040691376, "learning_rate": 5.005204852064344e-07, "loss": 0.3443, "step": 10386 }, { "epoch": 2.612424547283702, "grad_norm": 0.2824104428291321, "learning_rate": 4.998825486332098e-07, "loss": 0.3114, "step": 10387 }, { "epoch": 2.612676056338028, "grad_norm": 0.27352190017700195, "learning_rate": 4.992449974668933e-07, "loss": 0.2861, "step": 10388 }, { "epoch": 2.6129275653923543, "grad_norm": 0.31323835253715515, "learning_rate": 4.986078317620852e-07, "loss": 0.3132, "step": 10389 }, { "epoch": 2.61317907444668, "grad_norm": 0.3092406690120697, "learning_rate": 4.979710515733566e-07, "loss": 0.2916, "step": 10390 }, { "epoch": 2.613430583501006, "grad_norm": 0.28336817026138306, "learning_rate": 4.973346569552417e-07, "loss": 0.314, "step": 10391 }, { "epoch": 2.613682092555332, "grad_norm": 0.27843061089515686, "learning_rate": 4.966986479622454e-07, "loss": 0.3101, "step": 10392 }, { "epoch": 2.613933601609658, "grad_norm": 0.2896105945110321, "learning_rate": 4.96063024648838e-07, "loss": 0.3245, "step": 10393 }, { "epoch": 2.6141851106639837, "grad_norm": 0.2984098792076111, "learning_rate": 4.954277870694552e-07, "loss": 0.3126, "step": 10394 }, { "epoch": 2.61443661971831, "grad_norm": 0.3082030117511749, "learning_rate": 4.947929352785024e-07, "loss": 0.298, "step": 10395 }, { "epoch": 2.614688128772636, "grad_norm": 0.3350622355937958, "learning_rate": 4.941584693303497e-07, "loss": 0.3467, "step": 10396 }, { "epoch": 2.6149396378269616, "grad_norm": 0.2979256510734558, "learning_rate": 4.935243892793362e-07, "loss": 0.3165, "step": 10397 }, { "epoch": 2.615191146881288, "grad_norm": 0.3071826100349426, "learning_rate": 4.928906951797657e-07, "loss": 0.3366, "step": 10398 }, { "epoch": 2.6154426559356136, "grad_norm": 0.2692726254463196, "learning_rate": 4.922573870859115e-07, "loss": 0.2949, "step": 10399 }, { "epoch": 2.6156941649899395, "grad_norm": 0.2713705003261566, "learning_rate": 4.916244650520108e-07, "loss": 0.3231, "step": 10400 }, { "epoch": 2.6159456740442657, "grad_norm": 0.282787561416626, "learning_rate": 4.909919291322718e-07, "loss": 0.3244, "step": 10401 }, { "epoch": 2.6161971830985915, "grad_norm": 0.2967943549156189, "learning_rate": 4.90359779380864e-07, "loss": 0.3158, "step": 10402 }, { "epoch": 2.6164486921529173, "grad_norm": 0.30457451939582825, "learning_rate": 4.897280158519307e-07, "loss": 0.3176, "step": 10403 }, { "epoch": 2.6167002012072436, "grad_norm": 0.3144105076789856, "learning_rate": 4.890966385995754e-07, "loss": 0.3444, "step": 10404 }, { "epoch": 2.6169517102615694, "grad_norm": 0.28179723024368286, "learning_rate": 4.884656476778738e-07, "loss": 0.3197, "step": 10405 }, { "epoch": 2.617203219315895, "grad_norm": 0.2976565361022949, "learning_rate": 4.878350431408641e-07, "loss": 0.32, "step": 10406 }, { "epoch": 2.6174547283702214, "grad_norm": 0.2884789705276489, "learning_rate": 4.872048250425565e-07, "loss": 0.3244, "step": 10407 }, { "epoch": 2.6177062374245472, "grad_norm": 0.30798858404159546, "learning_rate": 4.865749934369224e-07, "loss": 0.3102, "step": 10408 }, { "epoch": 2.617957746478873, "grad_norm": 0.2883943021297455, "learning_rate": 4.859455483779041e-07, "loss": 0.3245, "step": 10409 }, { "epoch": 2.6182092555331993, "grad_norm": 0.28204745054244995, "learning_rate": 4.853164899194107e-07, "loss": 0.3222, "step": 10410 }, { "epoch": 2.618460764587525, "grad_norm": 0.28429892659187317, "learning_rate": 4.846878181153153e-07, "loss": 0.3085, "step": 10411 }, { "epoch": 2.618712273641851, "grad_norm": 0.2938331663608551, "learning_rate": 4.840595330194614e-07, "loss": 0.3001, "step": 10412 }, { "epoch": 2.618963782696177, "grad_norm": 0.28804659843444824, "learning_rate": 4.834316346856565e-07, "loss": 0.352, "step": 10413 }, { "epoch": 2.619215291750503, "grad_norm": 0.2770189642906189, "learning_rate": 4.828041231676766e-07, "loss": 0.3067, "step": 10414 }, { "epoch": 2.6194668008048287, "grad_norm": 0.2969238758087158, "learning_rate": 4.821769985192637e-07, "loss": 0.3361, "step": 10415 }, { "epoch": 2.619718309859155, "grad_norm": 0.2775512933731079, "learning_rate": 4.815502607941286e-07, "loss": 0.3031, "step": 10416 }, { "epoch": 2.619969818913481, "grad_norm": 0.287936806678772, "learning_rate": 4.809239100459451e-07, "loss": 0.3292, "step": 10417 }, { "epoch": 2.6202213279678066, "grad_norm": 0.2935434877872467, "learning_rate": 4.80297946328358e-07, "loss": 0.2995, "step": 10418 }, { "epoch": 2.620472837022133, "grad_norm": 0.3020229637622833, "learning_rate": 4.796723696949762e-07, "loss": 0.3263, "step": 10419 }, { "epoch": 2.6207243460764587, "grad_norm": 0.29107797145843506, "learning_rate": 4.790471801993768e-07, "loss": 0.2952, "step": 10420 }, { "epoch": 2.6209758551307845, "grad_norm": 0.29532748460769653, "learning_rate": 4.784223778951042e-07, "loss": 0.3259, "step": 10421 }, { "epoch": 2.6212273641851107, "grad_norm": 0.29053303599357605, "learning_rate": 4.777979628356672e-07, "loss": 0.3274, "step": 10422 }, { "epoch": 2.6214788732394365, "grad_norm": 0.3015473783016205, "learning_rate": 4.771739350745447e-07, "loss": 0.3069, "step": 10423 }, { "epoch": 2.6217303822937623, "grad_norm": 0.29153281450271606, "learning_rate": 4.7655029466517897e-07, "loss": 0.3218, "step": 10424 }, { "epoch": 2.6219818913480886, "grad_norm": 0.30175068974494934, "learning_rate": 4.759270416609829e-07, "loss": 0.3358, "step": 10425 }, { "epoch": 2.6222334004024144, "grad_norm": 0.2931424081325531, "learning_rate": 4.753041761153326e-07, "loss": 0.3102, "step": 10426 }, { "epoch": 2.62248490945674, "grad_norm": 0.2768230438232422, "learning_rate": 4.746816980815738e-07, "loss": 0.3365, "step": 10427 }, { "epoch": 2.6227364185110664, "grad_norm": 0.2788086235523224, "learning_rate": 4.7405960761301606e-07, "loss": 0.3253, "step": 10428 }, { "epoch": 2.6229879275653922, "grad_norm": 0.27975523471832275, "learning_rate": 4.7343790476294005e-07, "loss": 0.3132, "step": 10429 }, { "epoch": 2.623239436619718, "grad_norm": 0.28822508454322815, "learning_rate": 4.7281658958458877e-07, "loss": 0.3114, "step": 10430 }, { "epoch": 2.6234909456740443, "grad_norm": 0.29682502150535583, "learning_rate": 4.7219566213117406e-07, "loss": 0.326, "step": 10431 }, { "epoch": 2.62374245472837, "grad_norm": 0.290158748626709, "learning_rate": 4.7157512245587623e-07, "loss": 0.3218, "step": 10432 }, { "epoch": 2.6239939637826963, "grad_norm": 0.3001720607280731, "learning_rate": 4.7095497061183826e-07, "loss": 0.3339, "step": 10433 }, { "epoch": 2.624245472837022, "grad_norm": 0.2894241511821747, "learning_rate": 4.703352066521749e-07, "loss": 0.3424, "step": 10434 }, { "epoch": 2.624496981891348, "grad_norm": 0.2992819547653198, "learning_rate": 4.697158306299621e-07, "loss": 0.2913, "step": 10435 }, { "epoch": 2.624748490945674, "grad_norm": 0.2854137420654297, "learning_rate": 4.6909684259824785e-07, "loss": 0.3385, "step": 10436 }, { "epoch": 2.625, "grad_norm": 0.28658536076545715, "learning_rate": 4.6847824261004313e-07, "loss": 0.3146, "step": 10437 }, { "epoch": 2.625251509054326, "grad_norm": 0.2668541371822357, "learning_rate": 4.6786003071832895e-07, "loss": 0.3246, "step": 10438 }, { "epoch": 2.625503018108652, "grad_norm": 0.3052661120891571, "learning_rate": 4.6724220697604904e-07, "loss": 0.3195, "step": 10439 }, { "epoch": 2.625754527162978, "grad_norm": 0.2899504005908966, "learning_rate": 4.666247714361183e-07, "loss": 0.3266, "step": 10440 }, { "epoch": 2.6260060362173037, "grad_norm": 0.3089952766895294, "learning_rate": 4.6600772415141437e-07, "loss": 0.3129, "step": 10441 }, { "epoch": 2.62625754527163, "grad_norm": 0.281039834022522, "learning_rate": 4.6539106517478394e-07, "loss": 0.2899, "step": 10442 }, { "epoch": 2.6265090543259557, "grad_norm": 0.2719120383262634, "learning_rate": 4.647747945590414e-07, "loss": 0.3245, "step": 10443 }, { "epoch": 2.626760563380282, "grad_norm": 0.2826182544231415, "learning_rate": 4.6415891235696453e-07, "loss": 0.3268, "step": 10444 }, { "epoch": 2.6270120724346078, "grad_norm": 0.30722248554229736, "learning_rate": 4.635434186213017e-07, "loss": 0.3347, "step": 10445 }, { "epoch": 2.6272635814889336, "grad_norm": 0.2918623685836792, "learning_rate": 4.6292831340476406e-07, "loss": 0.3351, "step": 10446 }, { "epoch": 2.62751509054326, "grad_norm": 0.29145464301109314, "learning_rate": 4.623135967600334e-07, "loss": 0.3048, "step": 10447 }, { "epoch": 2.6277665995975856, "grad_norm": 0.298068106174469, "learning_rate": 4.616992687397548e-07, "loss": 0.2789, "step": 10448 }, { "epoch": 2.6280181086519114, "grad_norm": 0.3069632649421692, "learning_rate": 4.610853293965434e-07, "loss": 0.3214, "step": 10449 }, { "epoch": 2.6282696177062377, "grad_norm": 0.28947851061820984, "learning_rate": 4.6047177878297654e-07, "loss": 0.3121, "step": 10450 }, { "epoch": 2.6285211267605635, "grad_norm": 0.2965468168258667, "learning_rate": 4.5985861695160393e-07, "loss": 0.3294, "step": 10451 }, { "epoch": 2.6287726358148893, "grad_norm": 0.2832191288471222, "learning_rate": 4.592458439549369e-07, "loss": 0.3213, "step": 10452 }, { "epoch": 2.6290241448692155, "grad_norm": 0.3064347803592682, "learning_rate": 4.586334598454567e-07, "loss": 0.3124, "step": 10453 }, { "epoch": 2.6292756539235413, "grad_norm": 0.3119833469390869, "learning_rate": 4.5802146467561035e-07, "loss": 0.3171, "step": 10454 }, { "epoch": 2.629527162977867, "grad_norm": 0.2910575568675995, "learning_rate": 4.574098584978104e-07, "loss": 0.2897, "step": 10455 }, { "epoch": 2.6297786720321934, "grad_norm": 0.30219748616218567, "learning_rate": 4.5679864136443874e-07, "loss": 0.3135, "step": 10456 }, { "epoch": 2.630030181086519, "grad_norm": 0.30393391847610474, "learning_rate": 4.5618781332784026e-07, "loss": 0.343, "step": 10457 }, { "epoch": 2.630281690140845, "grad_norm": 0.3024074137210846, "learning_rate": 4.5557737444033025e-07, "loss": 0.3359, "step": 10458 }, { "epoch": 2.6305331991951713, "grad_norm": 0.2811712324619293, "learning_rate": 4.549673247541875e-07, "loss": 0.3059, "step": 10459 }, { "epoch": 2.630784708249497, "grad_norm": 0.2942906320095062, "learning_rate": 4.543576643216607e-07, "loss": 0.3207, "step": 10460 }, { "epoch": 2.631036217303823, "grad_norm": 0.2751673758029938, "learning_rate": 4.5374839319496156e-07, "loss": 0.3229, "step": 10461 }, { "epoch": 2.631287726358149, "grad_norm": 0.2935747504234314, "learning_rate": 4.531395114262721e-07, "loss": 0.3112, "step": 10462 }, { "epoch": 2.631539235412475, "grad_norm": 0.27842676639556885, "learning_rate": 4.525310190677379e-07, "loss": 0.3132, "step": 10463 }, { "epoch": 2.6317907444668007, "grad_norm": 0.27436089515686035, "learning_rate": 4.5192291617147274e-07, "loss": 0.3149, "step": 10464 }, { "epoch": 2.632042253521127, "grad_norm": 0.27890241146087646, "learning_rate": 4.5131520278955785e-07, "loss": 0.2893, "step": 10465 }, { "epoch": 2.6322937625754528, "grad_norm": 0.27693304419517517, "learning_rate": 4.507078789740388e-07, "loss": 0.315, "step": 10466 }, { "epoch": 2.6325452716297786, "grad_norm": 0.309488445520401, "learning_rate": 4.5010094477693057e-07, "loss": 0.3331, "step": 10467 }, { "epoch": 2.632796780684105, "grad_norm": 0.29617559909820557, "learning_rate": 4.4949440025021105e-07, "loss": 0.3234, "step": 10468 }, { "epoch": 2.6330482897384306, "grad_norm": 0.2881315350532532, "learning_rate": 4.488882454458299e-07, "loss": 0.3431, "step": 10469 }, { "epoch": 2.6332997987927564, "grad_norm": 0.3251035213470459, "learning_rate": 4.482824804156971e-07, "loss": 0.3014, "step": 10470 }, { "epoch": 2.6335513078470827, "grad_norm": 0.3029896020889282, "learning_rate": 4.476771052116957e-07, "loss": 0.2974, "step": 10471 }, { "epoch": 2.6338028169014085, "grad_norm": 0.2852858901023865, "learning_rate": 4.4707211988567036e-07, "loss": 0.2965, "step": 10472 }, { "epoch": 2.6340543259557343, "grad_norm": 0.2950040400028229, "learning_rate": 4.464675244894351e-07, "loss": 0.3049, "step": 10473 }, { "epoch": 2.6343058350100605, "grad_norm": 0.3013119399547577, "learning_rate": 4.458633190747691e-07, "loss": 0.3099, "step": 10474 }, { "epoch": 2.6345573440643864, "grad_norm": 0.30011510848999023, "learning_rate": 4.452595036934193e-07, "loss": 0.3137, "step": 10475 }, { "epoch": 2.634808853118712, "grad_norm": 0.2907141149044037, "learning_rate": 4.4465607839709934e-07, "loss": 0.3165, "step": 10476 }, { "epoch": 2.6350603621730384, "grad_norm": 0.28316307067871094, "learning_rate": 4.440530432374873e-07, "loss": 0.3043, "step": 10477 }, { "epoch": 2.635311871227364, "grad_norm": 0.315193772315979, "learning_rate": 4.434503982662314e-07, "loss": 0.3224, "step": 10478 }, { "epoch": 2.63556338028169, "grad_norm": 0.27484557032585144, "learning_rate": 4.4284814353494187e-07, "loss": 0.2935, "step": 10479 }, { "epoch": 2.6358148893360163, "grad_norm": 0.30151981115341187, "learning_rate": 4.4224627909520034e-07, "loss": 0.3119, "step": 10480 }, { "epoch": 2.636066398390342, "grad_norm": 0.2999078631401062, "learning_rate": 4.4164480499855114e-07, "loss": 0.3054, "step": 10481 }, { "epoch": 2.636317907444668, "grad_norm": 0.30388715863227844, "learning_rate": 4.410437212965085e-07, "loss": 0.3041, "step": 10482 }, { "epoch": 2.636569416498994, "grad_norm": 0.2887566387653351, "learning_rate": 4.404430280405492e-07, "loss": 0.2967, "step": 10483 }, { "epoch": 2.63682092555332, "grad_norm": 0.27546751499176025, "learning_rate": 4.3984272528212077e-07, "loss": 0.3144, "step": 10484 }, { "epoch": 2.6370724346076457, "grad_norm": 0.2938852310180664, "learning_rate": 4.3924281307263397e-07, "loss": 0.3284, "step": 10485 }, { "epoch": 2.637323943661972, "grad_norm": 0.30135706067085266, "learning_rate": 4.3864329146346804e-07, "loss": 0.3305, "step": 10486 }, { "epoch": 2.637575452716298, "grad_norm": 0.2923177182674408, "learning_rate": 4.3804416050596933e-07, "loss": 0.3229, "step": 10487 }, { "epoch": 2.6378269617706236, "grad_norm": 0.28316444158554077, "learning_rate": 4.374454202514483e-07, "loss": 0.3427, "step": 10488 }, { "epoch": 2.63807847082495, "grad_norm": 0.28909602761268616, "learning_rate": 4.3684707075118403e-07, "loss": 0.3228, "step": 10489 }, { "epoch": 2.6383299798792756, "grad_norm": 0.28920796513557434, "learning_rate": 4.362491120564205e-07, "loss": 0.3346, "step": 10490 }, { "epoch": 2.6385814889336014, "grad_norm": 0.2695193290710449, "learning_rate": 4.356515442183712e-07, "loss": 0.3424, "step": 10491 }, { "epoch": 2.6388329979879277, "grad_norm": 0.28552255034446716, "learning_rate": 4.3505436728821125e-07, "loss": 0.3054, "step": 10492 }, { "epoch": 2.6390845070422535, "grad_norm": 0.27092212438583374, "learning_rate": 4.344575813170876e-07, "loss": 0.3152, "step": 10493 }, { "epoch": 2.6393360160965793, "grad_norm": 0.29936766624450684, "learning_rate": 4.3386118635610875e-07, "loss": 0.3311, "step": 10494 }, { "epoch": 2.6395875251509056, "grad_norm": 0.2581084966659546, "learning_rate": 4.3326518245635494e-07, "loss": 0.3198, "step": 10495 }, { "epoch": 2.6398390342052314, "grad_norm": 0.27823901176452637, "learning_rate": 4.326695696688682e-07, "loss": 0.3174, "step": 10496 }, { "epoch": 2.640090543259557, "grad_norm": 0.2890242636203766, "learning_rate": 4.320743480446593e-07, "loss": 0.3019, "step": 10497 }, { "epoch": 2.6403420523138834, "grad_norm": 0.2868576645851135, "learning_rate": 4.31479517634707e-07, "loss": 0.3138, "step": 10498 }, { "epoch": 2.640593561368209, "grad_norm": 0.3102249205112457, "learning_rate": 4.308850784899521e-07, "loss": 0.3264, "step": 10499 }, { "epoch": 2.640845070422535, "grad_norm": 0.27739331126213074, "learning_rate": 4.3029103066130673e-07, "loss": 0.3361, "step": 10500 }, { "epoch": 2.6410965794768613, "grad_norm": 0.28932255506515503, "learning_rate": 4.296973741996463e-07, "loss": 0.3229, "step": 10501 }, { "epoch": 2.641348088531187, "grad_norm": 0.2557661235332489, "learning_rate": 4.291041091558146e-07, "loss": 0.3158, "step": 10502 }, { "epoch": 2.641599597585513, "grad_norm": 0.282735675573349, "learning_rate": 4.2851123558061927e-07, "loss": 0.3341, "step": 10503 }, { "epoch": 2.641851106639839, "grad_norm": 0.2887149155139923, "learning_rate": 4.2791875352483857e-07, "loss": 0.3283, "step": 10504 }, { "epoch": 2.642102615694165, "grad_norm": 0.29206418991088867, "learning_rate": 4.273266630392131e-07, "loss": 0.3085, "step": 10505 }, { "epoch": 2.6423541247484907, "grad_norm": 0.26142215728759766, "learning_rate": 4.267349641744534e-07, "loss": 0.3334, "step": 10506 }, { "epoch": 2.642605633802817, "grad_norm": 0.2744672894477844, "learning_rate": 4.261436569812322e-07, "loss": 0.3323, "step": 10507 }, { "epoch": 2.642857142857143, "grad_norm": 0.2745734453201294, "learning_rate": 4.2555274151019456e-07, "loss": 0.335, "step": 10508 }, { "epoch": 2.6431086519114686, "grad_norm": 0.2822088897228241, "learning_rate": 4.2496221781194557e-07, "loss": 0.3042, "step": 10509 }, { "epoch": 2.643360160965795, "grad_norm": 0.2912130653858185, "learning_rate": 4.2437208593706204e-07, "loss": 0.3195, "step": 10510 }, { "epoch": 2.6436116700201207, "grad_norm": 0.284824401140213, "learning_rate": 4.2378234593608346e-07, "loss": 0.3012, "step": 10511 }, { "epoch": 2.6438631790744465, "grad_norm": 0.28413233160972595, "learning_rate": 4.231929978595195e-07, "loss": 0.3298, "step": 10512 }, { "epoch": 2.6441146881287727, "grad_norm": 0.3148362636566162, "learning_rate": 4.226040417578414e-07, "loss": 0.3313, "step": 10513 }, { "epoch": 2.6443661971830985, "grad_norm": 0.3029150366783142, "learning_rate": 4.2201547768149277e-07, "loss": 0.319, "step": 10514 }, { "epoch": 2.6446177062374243, "grad_norm": 0.3129327595233917, "learning_rate": 4.214273056808771e-07, "loss": 0.2861, "step": 10515 }, { "epoch": 2.6448692152917506, "grad_norm": 0.2981908917427063, "learning_rate": 4.208395258063702e-07, "loss": 0.3343, "step": 10516 }, { "epoch": 2.6451207243460764, "grad_norm": 0.2968314588069916, "learning_rate": 4.202521381083102e-07, "loss": 0.3344, "step": 10517 }, { "epoch": 2.645372233400402, "grad_norm": 0.30850720405578613, "learning_rate": 4.196651426370041e-07, "loss": 0.3308, "step": 10518 }, { "epoch": 2.6456237424547284, "grad_norm": 0.2996951937675476, "learning_rate": 4.190785394427238e-07, "loss": 0.3206, "step": 10519 }, { "epoch": 2.6458752515090542, "grad_norm": 0.2981989085674286, "learning_rate": 4.184923285757092e-07, "loss": 0.334, "step": 10520 }, { "epoch": 2.64612676056338, "grad_norm": 0.26357463002204895, "learning_rate": 4.1790651008616343e-07, "loss": 0.335, "step": 10521 }, { "epoch": 2.6463782696177063, "grad_norm": 0.2741795480251312, "learning_rate": 4.1732108402426087e-07, "loss": 0.3304, "step": 10522 }, { "epoch": 2.646629778672032, "grad_norm": 0.292894572019577, "learning_rate": 4.167360504401374e-07, "loss": 0.3099, "step": 10523 }, { "epoch": 2.646881287726358, "grad_norm": 0.30272337794303894, "learning_rate": 4.161514093838992e-07, "loss": 0.3106, "step": 10524 }, { "epoch": 2.647132796780684, "grad_norm": 0.31396594643592834, "learning_rate": 4.155671609056156e-07, "loss": 0.314, "step": 10525 }, { "epoch": 2.64738430583501, "grad_norm": 0.29578897356987, "learning_rate": 4.1498330505532533e-07, "loss": 0.3251, "step": 10526 }, { "epoch": 2.6476358148893357, "grad_norm": 0.3138672709465027, "learning_rate": 4.1439984188303027e-07, "loss": 0.3185, "step": 10527 }, { "epoch": 2.647887323943662, "grad_norm": 0.28872033953666687, "learning_rate": 4.1381677143870147e-07, "loss": 0.3277, "step": 10528 }, { "epoch": 2.648138832997988, "grad_norm": 0.28270837664604187, "learning_rate": 4.1323409377227565e-07, "loss": 0.3312, "step": 10529 }, { "epoch": 2.6483903420523136, "grad_norm": 0.3057926595211029, "learning_rate": 4.1265180893365453e-07, "loss": 0.3222, "step": 10530 }, { "epoch": 2.64864185110664, "grad_norm": 0.30965495109558105, "learning_rate": 4.1206991697270825e-07, "loss": 0.3254, "step": 10531 }, { "epoch": 2.6488933601609657, "grad_norm": 0.2973712086677551, "learning_rate": 4.114884179392709e-07, "loss": 0.332, "step": 10532 }, { "epoch": 2.649144869215292, "grad_norm": 0.26278215646743774, "learning_rate": 4.1090731188314583e-07, "loss": 0.32, "step": 10533 }, { "epoch": 2.6493963782696177, "grad_norm": 0.28730037808418274, "learning_rate": 4.103265988540989e-07, "loss": 0.3395, "step": 10534 }, { "epoch": 2.6496478873239435, "grad_norm": 0.2832060754299164, "learning_rate": 4.0974627890186745e-07, "loss": 0.2925, "step": 10535 }, { "epoch": 2.6498993963782698, "grad_norm": 0.28094425797462463, "learning_rate": 4.0916635207614906e-07, "loss": 0.3131, "step": 10536 }, { "epoch": 2.6501509054325956, "grad_norm": 0.2945941984653473, "learning_rate": 4.0858681842661395e-07, "loss": 0.3151, "step": 10537 }, { "epoch": 2.6504024144869214, "grad_norm": 0.28159478306770325, "learning_rate": 4.080076780028924e-07, "loss": 0.3046, "step": 10538 }, { "epoch": 2.6506539235412476, "grad_norm": 0.3074738085269928, "learning_rate": 4.0742893085458644e-07, "loss": 0.3226, "step": 10539 }, { "epoch": 2.6509054325955734, "grad_norm": 0.28362908959388733, "learning_rate": 4.068505770312625e-07, "loss": 0.3322, "step": 10540 }, { "epoch": 2.6511569416498992, "grad_norm": 0.3007882833480835, "learning_rate": 4.062726165824504e-07, "loss": 0.3386, "step": 10541 }, { "epoch": 2.6514084507042255, "grad_norm": 0.3021930158138275, "learning_rate": 4.0569504955765227e-07, "loss": 0.3079, "step": 10542 }, { "epoch": 2.6516599597585513, "grad_norm": 0.2768537700176239, "learning_rate": 4.0511787600632955e-07, "loss": 0.3495, "step": 10543 }, { "epoch": 2.6519114688128775, "grad_norm": 0.2638303339481354, "learning_rate": 4.045410959779167e-07, "loss": 0.3116, "step": 10544 }, { "epoch": 2.6521629778672033, "grad_norm": 0.27994871139526367, "learning_rate": 4.0396470952180857e-07, "loss": 0.3143, "step": 10545 }, { "epoch": 2.652414486921529, "grad_norm": 0.286160409450531, "learning_rate": 4.033887166873712e-07, "loss": 0.3084, "step": 10546 }, { "epoch": 2.6526659959758554, "grad_norm": 0.28356102108955383, "learning_rate": 4.028131175239336e-07, "loss": 0.3367, "step": 10547 }, { "epoch": 2.652917505030181, "grad_norm": 0.31378769874572754, "learning_rate": 4.022379120807929e-07, "loss": 0.3136, "step": 10548 }, { "epoch": 2.653169014084507, "grad_norm": 0.29306289553642273, "learning_rate": 4.016631004072108e-07, "loss": 0.3319, "step": 10549 }, { "epoch": 2.6534205231388333, "grad_norm": 0.2860982418060303, "learning_rate": 4.010886825524174e-07, "loss": 0.3403, "step": 10550 }, { "epoch": 2.653672032193159, "grad_norm": 0.29843586683273315, "learning_rate": 4.0051465856560836e-07, "loss": 0.2994, "step": 10551 }, { "epoch": 2.653923541247485, "grad_norm": 0.29828014969825745, "learning_rate": 3.999410284959432e-07, "loss": 0.3126, "step": 10552 }, { "epoch": 2.654175050301811, "grad_norm": 0.2804122567176819, "learning_rate": 3.9936779239255207e-07, "loss": 0.3178, "step": 10553 }, { "epoch": 2.654426559356137, "grad_norm": 0.29577210545539856, "learning_rate": 3.987949503045274e-07, "loss": 0.326, "step": 10554 }, { "epoch": 2.6546780684104627, "grad_norm": 0.2835969030857086, "learning_rate": 3.982225022809311e-07, "loss": 0.3026, "step": 10555 }, { "epoch": 2.654929577464789, "grad_norm": 0.27092668414115906, "learning_rate": 3.9765044837078825e-07, "loss": 0.2987, "step": 10556 }, { "epoch": 2.6551810865191148, "grad_norm": 0.28689372539520264, "learning_rate": 3.970787886230926e-07, "loss": 0.3423, "step": 10557 }, { "epoch": 2.6554325955734406, "grad_norm": 0.294495165348053, "learning_rate": 3.965075230868026e-07, "loss": 0.3245, "step": 10558 }, { "epoch": 2.655684104627767, "grad_norm": 0.2807731628417969, "learning_rate": 3.9593665181084427e-07, "loss": 0.3214, "step": 10559 }, { "epoch": 2.6559356136820926, "grad_norm": 0.29218292236328125, "learning_rate": 3.953661748441079e-07, "loss": 0.2978, "step": 10560 }, { "epoch": 2.6561871227364184, "grad_norm": 0.3011685013771057, "learning_rate": 3.947960922354527e-07, "loss": 0.3007, "step": 10561 }, { "epoch": 2.6564386317907447, "grad_norm": 0.2898184359073639, "learning_rate": 3.9422640403370236e-07, "loss": 0.3316, "step": 10562 }, { "epoch": 2.6566901408450705, "grad_norm": 0.28175485134124756, "learning_rate": 3.936571102876463e-07, "loss": 0.319, "step": 10563 }, { "epoch": 2.6569416498993963, "grad_norm": 0.2649247944355011, "learning_rate": 3.930882110460421e-07, "loss": 0.3151, "step": 10564 }, { "epoch": 2.6571931589537225, "grad_norm": 0.2886830270290375, "learning_rate": 3.925197063576114e-07, "loss": 0.3035, "step": 10565 }, { "epoch": 2.6574446680080483, "grad_norm": 0.28880345821380615, "learning_rate": 3.9195159627104465e-07, "loss": 0.3268, "step": 10566 }, { "epoch": 2.657696177062374, "grad_norm": 0.2972525954246521, "learning_rate": 3.913838808349946e-07, "loss": 0.3078, "step": 10567 }, { "epoch": 2.6579476861167004, "grad_norm": 0.298945814371109, "learning_rate": 3.908165600980845e-07, "loss": 0.3142, "step": 10568 }, { "epoch": 2.658199195171026, "grad_norm": 0.3075450658798218, "learning_rate": 3.9024963410890015e-07, "loss": 0.3152, "step": 10569 }, { "epoch": 2.658450704225352, "grad_norm": 0.31689146161079407, "learning_rate": 3.8968310291599753e-07, "loss": 0.3172, "step": 10570 }, { "epoch": 2.6587022132796783, "grad_norm": 0.2927457094192505, "learning_rate": 3.89116966567894e-07, "loss": 0.3291, "step": 10571 }, { "epoch": 2.658953722334004, "grad_norm": 0.2956581115722656, "learning_rate": 3.885512251130763e-07, "loss": 0.3041, "step": 10572 }, { "epoch": 2.65920523138833, "grad_norm": 0.29633039236068726, "learning_rate": 3.879858785999979e-07, "loss": 0.3256, "step": 10573 }, { "epoch": 2.659456740442656, "grad_norm": 0.2878909707069397, "learning_rate": 3.874209270770762e-07, "loss": 0.3356, "step": 10574 }, { "epoch": 2.659708249496982, "grad_norm": 0.3077471852302551, "learning_rate": 3.8685637059269587e-07, "loss": 0.3131, "step": 10575 }, { "epoch": 2.6599597585513077, "grad_norm": 0.28095880150794983, "learning_rate": 3.862922091952076e-07, "loss": 0.3165, "step": 10576 }, { "epoch": 2.660211267605634, "grad_norm": 0.3052351772785187, "learning_rate": 3.857284429329289e-07, "loss": 0.3114, "step": 10577 }, { "epoch": 2.66046277665996, "grad_norm": 0.3149491250514984, "learning_rate": 3.851650718541411e-07, "loss": 0.3186, "step": 10578 }, { "epoch": 2.6607142857142856, "grad_norm": 0.30292919278144836, "learning_rate": 3.846020960070956e-07, "loss": 0.3441, "step": 10579 }, { "epoch": 2.660965794768612, "grad_norm": 0.3036801218986511, "learning_rate": 3.8403951544000617e-07, "loss": 0.3152, "step": 10580 }, { "epoch": 2.6612173038229376, "grad_norm": 0.2941226661205292, "learning_rate": 3.834773302010553e-07, "loss": 0.3112, "step": 10581 }, { "epoch": 2.6614688128772634, "grad_norm": 0.30887505412101746, "learning_rate": 3.829155403383894e-07, "loss": 0.3293, "step": 10582 }, { "epoch": 2.6617203219315897, "grad_norm": 0.28087976574897766, "learning_rate": 3.823541459001234e-07, "loss": 0.3246, "step": 10583 }, { "epoch": 2.6619718309859155, "grad_norm": 0.28773432970046997, "learning_rate": 3.8179314693433775e-07, "loss": 0.3033, "step": 10584 }, { "epoch": 2.6622233400402413, "grad_norm": 0.27361011505126953, "learning_rate": 3.8123254348907676e-07, "loss": 0.3338, "step": 10585 }, { "epoch": 2.6624748490945676, "grad_norm": 0.28717923164367676, "learning_rate": 3.806723356123543e-07, "loss": 0.3316, "step": 10586 }, { "epoch": 2.6627263581488934, "grad_norm": 0.2967682182788849, "learning_rate": 3.801125233521469e-07, "loss": 0.3631, "step": 10587 }, { "epoch": 2.662977867203219, "grad_norm": 0.25790077447891235, "learning_rate": 3.7955310675640066e-07, "loss": 0.3155, "step": 10588 }, { "epoch": 2.6632293762575454, "grad_norm": 0.25773194432258606, "learning_rate": 3.789940858730251e-07, "loss": 0.2978, "step": 10589 }, { "epoch": 2.663480885311871, "grad_norm": 0.295448899269104, "learning_rate": 3.7843546074989747e-07, "loss": 0.3467, "step": 10590 }, { "epoch": 2.663732394366197, "grad_norm": 0.27285072207450867, "learning_rate": 3.778772314348594e-07, "loss": 0.3165, "step": 10591 }, { "epoch": 2.6639839034205233, "grad_norm": 0.2875916361808777, "learning_rate": 3.773193979757217e-07, "loss": 0.2996, "step": 10592 }, { "epoch": 2.664235412474849, "grad_norm": 0.3071140646934509, "learning_rate": 3.7676196042025715e-07, "loss": 0.3157, "step": 10593 }, { "epoch": 2.664486921529175, "grad_norm": 0.3103017807006836, "learning_rate": 3.7620491881620814e-07, "loss": 0.3411, "step": 10594 }, { "epoch": 2.664738430583501, "grad_norm": 0.3201421797275543, "learning_rate": 3.7564827321128203e-07, "loss": 0.3409, "step": 10595 }, { "epoch": 2.664989939637827, "grad_norm": 0.2804863154888153, "learning_rate": 3.750920236531502e-07, "loss": 0.3188, "step": 10596 }, { "epoch": 2.6652414486921527, "grad_norm": 0.30754902958869934, "learning_rate": 3.7453617018945453e-07, "loss": 0.3183, "step": 10597 }, { "epoch": 2.665492957746479, "grad_norm": 0.2903851866722107, "learning_rate": 3.739807128677986e-07, "loss": 0.3096, "step": 10598 }, { "epoch": 2.665744466800805, "grad_norm": 0.2837523818016052, "learning_rate": 3.734256517357543e-07, "loss": 0.3035, "step": 10599 }, { "epoch": 2.6659959758551306, "grad_norm": 0.3047906160354614, "learning_rate": 3.7287098684085867e-07, "loss": 0.3157, "step": 10600 }, { "epoch": 2.666247484909457, "grad_norm": 0.2783104181289673, "learning_rate": 3.72316718230617e-07, "loss": 0.3306, "step": 10601 }, { "epoch": 2.6664989939637826, "grad_norm": 0.27863505482673645, "learning_rate": 3.717628459524963e-07, "loss": 0.3356, "step": 10602 }, { "epoch": 2.6667505030181085, "grad_norm": 0.2805593013763428, "learning_rate": 3.7120937005393487e-07, "loss": 0.3201, "step": 10603 }, { "epoch": 2.6670020120724347, "grad_norm": 0.30254265666007996, "learning_rate": 3.7065629058233245e-07, "loss": 0.2921, "step": 10604 }, { "epoch": 2.6672535211267605, "grad_norm": 0.30693769454956055, "learning_rate": 3.701036075850578e-07, "loss": 0.3277, "step": 10605 }, { "epoch": 2.6675050301810863, "grad_norm": 0.29164475202560425, "learning_rate": 3.695513211094448e-07, "loss": 0.3082, "step": 10606 }, { "epoch": 2.6677565392354126, "grad_norm": 0.2975282371044159, "learning_rate": 3.689994312027928e-07, "loss": 0.3147, "step": 10607 }, { "epoch": 2.6680080482897384, "grad_norm": 0.2964397072792053, "learning_rate": 3.6844793791236897e-07, "loss": 0.3245, "step": 10608 }, { "epoch": 2.668259557344064, "grad_norm": 0.29004722833633423, "learning_rate": 3.678968412854034e-07, "loss": 0.3261, "step": 10609 }, { "epoch": 2.6685110663983904, "grad_norm": 0.27896592020988464, "learning_rate": 3.67346141369096e-07, "loss": 0.3172, "step": 10610 }, { "epoch": 2.6687625754527162, "grad_norm": 0.2943844497203827, "learning_rate": 3.6679583821060904e-07, "loss": 0.3059, "step": 10611 }, { "epoch": 2.669014084507042, "grad_norm": 0.3017599880695343, "learning_rate": 3.662459318570738e-07, "loss": 0.3441, "step": 10612 }, { "epoch": 2.6692655935613683, "grad_norm": 0.29490116238594055, "learning_rate": 3.656964223555848e-07, "loss": 0.3213, "step": 10613 }, { "epoch": 2.669517102615694, "grad_norm": 0.30418360233306885, "learning_rate": 3.6514730975320657e-07, "loss": 0.3226, "step": 10614 }, { "epoch": 2.66976861167002, "grad_norm": 0.2704751789569855, "learning_rate": 3.645985940969643e-07, "loss": 0.3592, "step": 10615 }, { "epoch": 2.670020120724346, "grad_norm": 0.27038201689720154, "learning_rate": 3.6405027543385374e-07, "loss": 0.3128, "step": 10616 }, { "epoch": 2.670271629778672, "grad_norm": 0.3021993637084961, "learning_rate": 3.6350235381083563e-07, "loss": 0.3064, "step": 10617 }, { "epoch": 2.6705231388329977, "grad_norm": 0.2776448428630829, "learning_rate": 3.629548292748342e-07, "loss": 0.3311, "step": 10618 }, { "epoch": 2.670774647887324, "grad_norm": 0.2895011901855469, "learning_rate": 3.624077018727429e-07, "loss": 0.2909, "step": 10619 }, { "epoch": 2.67102615694165, "grad_norm": 0.27862903475761414, "learning_rate": 3.6186097165141875e-07, "loss": 0.3213, "step": 10620 }, { "epoch": 2.6712776659959756, "grad_norm": 0.3124517798423767, "learning_rate": 3.6131463865768714e-07, "loss": 0.3084, "step": 10621 }, { "epoch": 2.671529175050302, "grad_norm": 0.28285762667655945, "learning_rate": 3.607687029383361e-07, "loss": 0.3213, "step": 10622 }, { "epoch": 2.6717806841046277, "grad_norm": 0.30333417654037476, "learning_rate": 3.6022316454012387e-07, "loss": 0.3243, "step": 10623 }, { "epoch": 2.6720321931589535, "grad_norm": 0.2860763370990753, "learning_rate": 3.5967802350977024e-07, "loss": 0.3055, "step": 10624 }, { "epoch": 2.6722837022132797, "grad_norm": 0.3174950182437897, "learning_rate": 3.591332798939651e-07, "loss": 0.3369, "step": 10625 }, { "epoch": 2.6725352112676055, "grad_norm": 0.3028983175754547, "learning_rate": 3.5858893373936e-07, "loss": 0.3352, "step": 10626 }, { "epoch": 2.6727867203219313, "grad_norm": 0.27620789408683777, "learning_rate": 3.5804498509257766e-07, "loss": 0.3241, "step": 10627 }, { "epoch": 2.6730382293762576, "grad_norm": 0.2942667007446289, "learning_rate": 3.575014340002009e-07, "loss": 0.3004, "step": 10628 }, { "epoch": 2.6732897384305834, "grad_norm": 0.300465852022171, "learning_rate": 3.56958280508784e-07, "loss": 0.3163, "step": 10629 }, { "epoch": 2.6735412474849096, "grad_norm": 0.30546244978904724, "learning_rate": 3.564155246648432e-07, "loss": 0.3497, "step": 10630 }, { "epoch": 2.6737927565392354, "grad_norm": 0.27925771474838257, "learning_rate": 3.558731665148629e-07, "loss": 0.3144, "step": 10631 }, { "epoch": 2.6740442655935612, "grad_norm": 0.2924439013004303, "learning_rate": 3.5533120610529157e-07, "loss": 0.3182, "step": 10632 }, { "epoch": 2.6742957746478875, "grad_norm": 0.27821362018585205, "learning_rate": 3.547896434825465e-07, "loss": 0.3016, "step": 10633 }, { "epoch": 2.6745472837022133, "grad_norm": 0.3052501976490021, "learning_rate": 3.542484786930073e-07, "loss": 0.3394, "step": 10634 }, { "epoch": 2.674798792756539, "grad_norm": 0.3052050769329071, "learning_rate": 3.537077117830229e-07, "loss": 0.2972, "step": 10635 }, { "epoch": 2.6750503018108653, "grad_norm": 0.2863207757472992, "learning_rate": 3.531673427989046e-07, "loss": 0.3262, "step": 10636 }, { "epoch": 2.675301810865191, "grad_norm": 0.26985007524490356, "learning_rate": 3.526273717869344e-07, "loss": 0.3168, "step": 10637 }, { "epoch": 2.675553319919517, "grad_norm": 0.31778794527053833, "learning_rate": 3.5208779879335465e-07, "loss": 0.3115, "step": 10638 }, { "epoch": 2.675804828973843, "grad_norm": 0.2770874500274658, "learning_rate": 3.5154862386437894e-07, "loss": 0.3377, "step": 10639 }, { "epoch": 2.676056338028169, "grad_norm": 0.2914242148399353, "learning_rate": 3.5100984704618145e-07, "loss": 0.3152, "step": 10640 }, { "epoch": 2.676307847082495, "grad_norm": 0.313717246055603, "learning_rate": 3.504714683849081e-07, "loss": 0.3311, "step": 10641 }, { "epoch": 2.676559356136821, "grad_norm": 0.31820422410964966, "learning_rate": 3.499334879266653e-07, "loss": 0.3342, "step": 10642 }, { "epoch": 2.676810865191147, "grad_norm": 0.2878066897392273, "learning_rate": 3.4939590571752893e-07, "loss": 0.3092, "step": 10643 }, { "epoch": 2.677062374245473, "grad_norm": 0.32320746779441833, "learning_rate": 3.488587218035383e-07, "loss": 0.3276, "step": 10644 }, { "epoch": 2.677313883299799, "grad_norm": 0.26483622193336487, "learning_rate": 3.4832193623070167e-07, "loss": 0.2976, "step": 10645 }, { "epoch": 2.6775653923541247, "grad_norm": 0.29713913798332214, "learning_rate": 3.4778554904498996e-07, "loss": 0.3227, "step": 10646 }, { "epoch": 2.677816901408451, "grad_norm": 0.2784908711910248, "learning_rate": 3.47249560292342e-07, "loss": 0.3209, "step": 10647 }, { "epoch": 2.6780684104627768, "grad_norm": 0.3123219609260559, "learning_rate": 3.4671397001866116e-07, "loss": 0.3209, "step": 10648 }, { "epoch": 2.6783199195171026, "grad_norm": 0.27715906500816345, "learning_rate": 3.4617877826981785e-07, "loss": 0.3067, "step": 10649 }, { "epoch": 2.678571428571429, "grad_norm": 0.2937963902950287, "learning_rate": 3.4564398509164877e-07, "loss": 0.3111, "step": 10650 }, { "epoch": 2.6788229376257546, "grad_norm": 0.2874857783317566, "learning_rate": 3.451095905299545e-07, "loss": 0.3135, "step": 10651 }, { "epoch": 2.6790744466800804, "grad_norm": 0.2816859781742096, "learning_rate": 3.4457559463050293e-07, "loss": 0.3099, "step": 10652 }, { "epoch": 2.6793259557344067, "grad_norm": 0.28699028491973877, "learning_rate": 3.440419974390269e-07, "loss": 0.3174, "step": 10653 }, { "epoch": 2.6795774647887325, "grad_norm": 0.30524781346321106, "learning_rate": 3.43508799001227e-07, "loss": 0.3135, "step": 10654 }, { "epoch": 2.6798289738430583, "grad_norm": 0.2902679741382599, "learning_rate": 3.429759993627674e-07, "loss": 0.3229, "step": 10655 }, { "epoch": 2.6800804828973845, "grad_norm": 0.331782728433609, "learning_rate": 3.424435985692792e-07, "loss": 0.3208, "step": 10656 }, { "epoch": 2.6803319919517103, "grad_norm": 0.2915107309818268, "learning_rate": 3.419115966663583e-07, "loss": 0.3151, "step": 10657 }, { "epoch": 2.680583501006036, "grad_norm": 0.3128476142883301, "learning_rate": 3.413799936995699e-07, "loss": 0.3463, "step": 10658 }, { "epoch": 2.6808350100603624, "grad_norm": 0.2803356349468231, "learning_rate": 3.4084878971443914e-07, "loss": 0.3348, "step": 10659 }, { "epoch": 2.681086519114688, "grad_norm": 0.2853963375091553, "learning_rate": 3.403179847564625e-07, "loss": 0.3282, "step": 10660 }, { "epoch": 2.681338028169014, "grad_norm": 0.28760576248168945, "learning_rate": 3.397875788711003e-07, "loss": 0.3046, "step": 10661 }, { "epoch": 2.6815895372233403, "grad_norm": 0.298556923866272, "learning_rate": 3.3925757210377664e-07, "loss": 0.2909, "step": 10662 }, { "epoch": 2.681841046277666, "grad_norm": 0.2910586893558502, "learning_rate": 3.387279644998853e-07, "loss": 0.3091, "step": 10663 }, { "epoch": 2.682092555331992, "grad_norm": 0.3114005923271179, "learning_rate": 3.3819875610478225e-07, "loss": 0.3163, "step": 10664 }, { "epoch": 2.682344064386318, "grad_norm": 0.29191139340400696, "learning_rate": 3.3766994696379283e-07, "loss": 0.3394, "step": 10665 }, { "epoch": 2.682595573440644, "grad_norm": 0.2941957116127014, "learning_rate": 3.3714153712220364e-07, "loss": 0.3197, "step": 10666 }, { "epoch": 2.6828470824949697, "grad_norm": 0.269555002450943, "learning_rate": 3.3661352662527234e-07, "loss": 0.2953, "step": 10667 }, { "epoch": 2.683098591549296, "grad_norm": 0.30141013860702515, "learning_rate": 3.3608591551821724e-07, "loss": 0.3079, "step": 10668 }, { "epoch": 2.683350100603622, "grad_norm": 0.3198448121547699, "learning_rate": 3.35558703846226e-07, "loss": 0.3246, "step": 10669 }, { "epoch": 2.6836016096579476, "grad_norm": 0.3114519715309143, "learning_rate": 3.3503189165445205e-07, "loss": 0.3223, "step": 10670 }, { "epoch": 2.683853118712274, "grad_norm": 0.2955555319786072, "learning_rate": 3.34505478988012e-07, "loss": 0.3222, "step": 10671 }, { "epoch": 2.6841046277665996, "grad_norm": 0.31283295154571533, "learning_rate": 3.339794658919915e-07, "loss": 0.3082, "step": 10672 }, { "epoch": 2.6843561368209254, "grad_norm": 0.2927229702472687, "learning_rate": 3.334538524114378e-07, "loss": 0.3144, "step": 10673 }, { "epoch": 2.6846076458752517, "grad_norm": 0.2944906949996948, "learning_rate": 3.329286385913688e-07, "loss": 0.3193, "step": 10674 }, { "epoch": 2.6848591549295775, "grad_norm": 0.2984817326068878, "learning_rate": 3.3240382447676413e-07, "loss": 0.3145, "step": 10675 }, { "epoch": 2.6851106639839033, "grad_norm": 0.28597378730773926, "learning_rate": 3.3187941011257217e-07, "loss": 0.3065, "step": 10676 }, { "epoch": 2.6853621730382295, "grad_norm": 0.2896621525287628, "learning_rate": 3.313553955437049e-07, "loss": 0.3322, "step": 10677 }, { "epoch": 2.6856136820925554, "grad_norm": 0.29271525144577026, "learning_rate": 3.308317808150413e-07, "loss": 0.3155, "step": 10678 }, { "epoch": 2.685865191146881, "grad_norm": 0.2880185544490814, "learning_rate": 3.303085659714245e-07, "loss": 0.3204, "step": 10679 }, { "epoch": 2.6861167002012074, "grad_norm": 0.29037216305732727, "learning_rate": 3.297857510576658e-07, "loss": 0.3253, "step": 10680 }, { "epoch": 2.686368209255533, "grad_norm": 0.2900163531303406, "learning_rate": 3.29263336118541e-07, "loss": 0.3198, "step": 10681 }, { "epoch": 2.686619718309859, "grad_norm": 0.28541356325149536, "learning_rate": 3.287413211987911e-07, "loss": 0.3171, "step": 10682 }, { "epoch": 2.6868712273641853, "grad_norm": 0.3070562183856964, "learning_rate": 3.2821970634312474e-07, "loss": 0.3107, "step": 10683 }, { "epoch": 2.687122736418511, "grad_norm": 0.296577513217926, "learning_rate": 3.276984915962128e-07, "loss": 0.3507, "step": 10684 }, { "epoch": 2.687374245472837, "grad_norm": 0.286769300699234, "learning_rate": 3.271776770026963e-07, "loss": 0.3122, "step": 10685 }, { "epoch": 2.687625754527163, "grad_norm": 0.31535083055496216, "learning_rate": 3.266572626071773e-07, "loss": 0.3166, "step": 10686 }, { "epoch": 2.687877263581489, "grad_norm": 0.2796669602394104, "learning_rate": 3.261372484542291e-07, "loss": 0.3233, "step": 10687 }, { "epoch": 2.6881287726358147, "grad_norm": 0.29072704911231995, "learning_rate": 3.2561763458838436e-07, "loss": 0.3206, "step": 10688 }, { "epoch": 2.688380281690141, "grad_norm": 0.291552871465683, "learning_rate": 3.25098421054148e-07, "loss": 0.3319, "step": 10689 }, { "epoch": 2.688631790744467, "grad_norm": 0.2940013110637665, "learning_rate": 3.2457960789598453e-07, "loss": 0.3319, "step": 10690 }, { "epoch": 2.6888832997987926, "grad_norm": 0.2845515310764313, "learning_rate": 3.2406119515832777e-07, "loss": 0.3211, "step": 10691 }, { "epoch": 2.689134808853119, "grad_norm": 0.30485305190086365, "learning_rate": 3.235431828855784e-07, "loss": 0.3121, "step": 10692 }, { "epoch": 2.6893863179074446, "grad_norm": 0.2907279431819916, "learning_rate": 3.230255711220992e-07, "loss": 0.3204, "step": 10693 }, { "epoch": 2.6896378269617705, "grad_norm": 0.2880786061286926, "learning_rate": 3.2250835991222084e-07, "loss": 0.3015, "step": 10694 }, { "epoch": 2.6898893360160967, "grad_norm": 0.28251487016677856, "learning_rate": 3.2199154930023903e-07, "loss": 0.3232, "step": 10695 }, { "epoch": 2.6901408450704225, "grad_norm": 0.27719196677207947, "learning_rate": 3.214751393304155e-07, "loss": 0.3025, "step": 10696 }, { "epoch": 2.6903923541247483, "grad_norm": 0.2866508364677429, "learning_rate": 3.209591300469772e-07, "loss": 0.3137, "step": 10697 }, { "epoch": 2.6906438631790746, "grad_norm": 0.269786536693573, "learning_rate": 3.204435214941182e-07, "loss": 0.3266, "step": 10698 }, { "epoch": 2.6908953722334004, "grad_norm": 0.27028945088386536, "learning_rate": 3.1992831371599584e-07, "loss": 0.3135, "step": 10699 }, { "epoch": 2.691146881287726, "grad_norm": 0.2978735864162445, "learning_rate": 3.1941350675673557e-07, "loss": 0.3323, "step": 10700 }, { "epoch": 2.6913983903420524, "grad_norm": 0.3069797456264496, "learning_rate": 3.1889910066042587e-07, "loss": 0.3128, "step": 10701 }, { "epoch": 2.691649899396378, "grad_norm": 0.2964140474796295, "learning_rate": 3.183850954711232e-07, "loss": 0.3123, "step": 10702 }, { "epoch": 2.691901408450704, "grad_norm": 0.3021068572998047, "learning_rate": 3.178714912328501e-07, "loss": 0.287, "step": 10703 }, { "epoch": 2.6921529175050303, "grad_norm": 0.2956176698207855, "learning_rate": 3.1735828798959146e-07, "loss": 0.3147, "step": 10704 }, { "epoch": 2.692404426559356, "grad_norm": 0.27685198187828064, "learning_rate": 3.168454857853015e-07, "loss": 0.3162, "step": 10705 }, { "epoch": 2.692655935613682, "grad_norm": 0.285156786441803, "learning_rate": 3.163330846638979e-07, "loss": 0.324, "step": 10706 }, { "epoch": 2.692907444668008, "grad_norm": 0.31111207604408264, "learning_rate": 3.1582108466926497e-07, "loss": 0.3137, "step": 10707 }, { "epoch": 2.693158953722334, "grad_norm": 0.32243606448173523, "learning_rate": 3.15309485845251e-07, "loss": 0.3186, "step": 10708 }, { "epoch": 2.6934104627766597, "grad_norm": 0.2985250651836395, "learning_rate": 3.147982882356732e-07, "loss": 0.3265, "step": 10709 }, { "epoch": 2.693661971830986, "grad_norm": 0.2669808566570282, "learning_rate": 3.1428749188431086e-07, "loss": 0.3053, "step": 10710 }, { "epoch": 2.693913480885312, "grad_norm": 0.3001328706741333, "learning_rate": 3.1377709683491185e-07, "loss": 0.3213, "step": 10711 }, { "epoch": 2.6941649899396376, "grad_norm": 0.2827167809009552, "learning_rate": 3.132671031311868e-07, "loss": 0.3212, "step": 10712 }, { "epoch": 2.694416498993964, "grad_norm": 0.27102166414260864, "learning_rate": 3.127575108168146e-07, "loss": 0.3306, "step": 10713 }, { "epoch": 2.6946680080482897, "grad_norm": 0.2781563997268677, "learning_rate": 3.1224831993543867e-07, "loss": 0.3274, "step": 10714 }, { "epoch": 2.6949195171026155, "grad_norm": 0.3081703782081604, "learning_rate": 3.11739530530667e-07, "loss": 0.3186, "step": 10715 }, { "epoch": 2.6951710261569417, "grad_norm": 0.31926673650741577, "learning_rate": 3.1123114264607515e-07, "loss": 0.3128, "step": 10716 }, { "epoch": 2.6954225352112675, "grad_norm": 0.28970199823379517, "learning_rate": 3.1072315632520277e-07, "loss": 0.2909, "step": 10717 }, { "epoch": 2.6956740442655933, "grad_norm": 0.28923845291137695, "learning_rate": 3.102155716115568e-07, "loss": 0.3278, "step": 10718 }, { "epoch": 2.6959255533199196, "grad_norm": 0.3150719106197357, "learning_rate": 3.097083885486074e-07, "loss": 0.3314, "step": 10719 }, { "epoch": 2.6961770623742454, "grad_norm": 0.2829613983631134, "learning_rate": 3.0920160717979264e-07, "loss": 0.3328, "step": 10720 }, { "epoch": 2.696428571428571, "grad_norm": 0.2828682065010071, "learning_rate": 3.0869522754851387e-07, "loss": 0.3164, "step": 10721 }, { "epoch": 2.6966800804828974, "grad_norm": 0.2709435522556305, "learning_rate": 3.081892496981409e-07, "loss": 0.3154, "step": 10722 }, { "epoch": 2.6969315895372232, "grad_norm": 0.3398783504962921, "learning_rate": 3.0768367367200624e-07, "loss": 0.3052, "step": 10723 }, { "epoch": 2.697183098591549, "grad_norm": 0.30523836612701416, "learning_rate": 3.0717849951340973e-07, "loss": 0.3241, "step": 10724 }, { "epoch": 2.6974346076458753, "grad_norm": 0.2881937623023987, "learning_rate": 3.066737272656173e-07, "loss": 0.3551, "step": 10725 }, { "epoch": 2.697686116700201, "grad_norm": 0.28372272849082947, "learning_rate": 3.061693569718577e-07, "loss": 0.3136, "step": 10726 }, { "epoch": 2.697937625754527, "grad_norm": 0.2901321053504944, "learning_rate": 3.056653886753297e-07, "loss": 0.3264, "step": 10727 }, { "epoch": 2.698189134808853, "grad_norm": 0.2705398499965668, "learning_rate": 3.051618224191921e-07, "loss": 0.3243, "step": 10728 }, { "epoch": 2.698440643863179, "grad_norm": 0.2847079336643219, "learning_rate": 3.046586582465744e-07, "loss": 0.3083, "step": 10729 }, { "epoch": 2.698692152917505, "grad_norm": 0.29345259070396423, "learning_rate": 3.0415589620056807e-07, "loss": 0.3141, "step": 10730 }, { "epoch": 2.698943661971831, "grad_norm": 0.2997452914714813, "learning_rate": 3.036535363242327e-07, "loss": 0.2915, "step": 10731 }, { "epoch": 2.699195171026157, "grad_norm": 0.2906875014305115, "learning_rate": 3.0315157866059154e-07, "loss": 0.3336, "step": 10732 }, { "epoch": 2.699446680080483, "grad_norm": 0.2780359387397766, "learning_rate": 3.0265002325263415e-07, "loss": 0.3313, "step": 10733 }, { "epoch": 2.699698189134809, "grad_norm": 0.2860005497932434, "learning_rate": 3.021488701433156e-07, "loss": 0.3464, "step": 10734 }, { "epoch": 2.6999496981891347, "grad_norm": 0.30452266335487366, "learning_rate": 3.016481193755566e-07, "loss": 0.3243, "step": 10735 }, { "epoch": 2.700201207243461, "grad_norm": 0.28733357787132263, "learning_rate": 3.011477709922439e-07, "loss": 0.3178, "step": 10736 }, { "epoch": 2.7004527162977867, "grad_norm": 0.26170480251312256, "learning_rate": 3.0064782503622827e-07, "loss": 0.3241, "step": 10737 }, { "epoch": 2.7007042253521125, "grad_norm": 0.27204710245132446, "learning_rate": 3.001482815503276e-07, "loss": 0.3139, "step": 10738 }, { "epoch": 2.7009557344064388, "grad_norm": 0.3037412464618683, "learning_rate": 2.9964914057732386e-07, "loss": 0.3181, "step": 10739 }, { "epoch": 2.7012072434607646, "grad_norm": 0.27614688873291016, "learning_rate": 2.9915040215996726e-07, "loss": 0.3135, "step": 10740 }, { "epoch": 2.7014587525150904, "grad_norm": 0.2942797839641571, "learning_rate": 2.986520663409687e-07, "loss": 0.3279, "step": 10741 }, { "epoch": 2.7017102615694166, "grad_norm": 0.2742573320865631, "learning_rate": 2.9815413316301055e-07, "loss": 0.3289, "step": 10742 }, { "epoch": 2.7019617706237424, "grad_norm": 0.27266111969947815, "learning_rate": 2.97656602668735e-07, "loss": 0.3331, "step": 10743 }, { "epoch": 2.7022132796780687, "grad_norm": 0.29972144961357117, "learning_rate": 2.9715947490075447e-07, "loss": 0.3313, "step": 10744 }, { "epoch": 2.7024647887323945, "grad_norm": 0.30087292194366455, "learning_rate": 2.9666274990164336e-07, "loss": 0.3195, "step": 10745 }, { "epoch": 2.7027162977867203, "grad_norm": 0.28101980686187744, "learning_rate": 2.961664277139448e-07, "loss": 0.3034, "step": 10746 }, { "epoch": 2.7029678068410465, "grad_norm": 0.2926780879497528, "learning_rate": 2.956705083801636e-07, "loss": 0.3363, "step": 10747 }, { "epoch": 2.7032193158953723, "grad_norm": 0.30072760581970215, "learning_rate": 2.951749919427738e-07, "loss": 0.3311, "step": 10748 }, { "epoch": 2.703470824949698, "grad_norm": 0.3085022568702698, "learning_rate": 2.9467987844421173e-07, "loss": 0.3272, "step": 10749 }, { "epoch": 2.7037223340040244, "grad_norm": 0.28719770908355713, "learning_rate": 2.941851679268831e-07, "loss": 0.3186, "step": 10750 }, { "epoch": 2.70397384305835, "grad_norm": 0.30047592520713806, "learning_rate": 2.93690860433154e-07, "loss": 0.308, "step": 10751 }, { "epoch": 2.704225352112676, "grad_norm": 0.29841721057891846, "learning_rate": 2.93196956005361e-07, "loss": 0.315, "step": 10752 }, { "epoch": 2.7044768611670023, "grad_norm": 0.3019961416721344, "learning_rate": 2.927034546858026e-07, "loss": 0.3273, "step": 10753 }, { "epoch": 2.704728370221328, "grad_norm": 0.2860671579837799, "learning_rate": 2.92210356516745e-07, "loss": 0.3405, "step": 10754 }, { "epoch": 2.704979879275654, "grad_norm": 0.2890463173389435, "learning_rate": 2.917176615404177e-07, "loss": 0.3264, "step": 10755 }, { "epoch": 2.70523138832998, "grad_norm": 0.27873101830482483, "learning_rate": 2.9122536979901807e-07, "loss": 0.3134, "step": 10756 }, { "epoch": 2.705482897384306, "grad_norm": 0.28098541498184204, "learning_rate": 2.9073348133470844e-07, "loss": 0.3087, "step": 10757 }, { "epoch": 2.7057344064386317, "grad_norm": 0.27469319105148315, "learning_rate": 2.902419961896136e-07, "loss": 0.3253, "step": 10758 }, { "epoch": 2.705985915492958, "grad_norm": 0.295611172914505, "learning_rate": 2.8975091440582915e-07, "loss": 0.3224, "step": 10759 }, { "epoch": 2.7062374245472838, "grad_norm": 0.2876059114933014, "learning_rate": 2.892602360254104e-07, "loss": 0.3465, "step": 10760 }, { "epoch": 2.7064889336016096, "grad_norm": 0.285182923078537, "learning_rate": 2.887699610903827e-07, "loss": 0.3092, "step": 10761 }, { "epoch": 2.706740442655936, "grad_norm": 0.2798616290092468, "learning_rate": 2.8828008964273404e-07, "loss": 0.3096, "step": 10762 }, { "epoch": 2.7069919517102616, "grad_norm": 0.2900584042072296, "learning_rate": 2.877906217244203e-07, "loss": 0.3103, "step": 10763 }, { "epoch": 2.7072434607645874, "grad_norm": 0.2769961357116699, "learning_rate": 2.873015573773591e-07, "loss": 0.3217, "step": 10764 }, { "epoch": 2.7074949698189137, "grad_norm": 0.2770218849182129, "learning_rate": 2.86812896643438e-07, "loss": 0.3292, "step": 10765 }, { "epoch": 2.7077464788732395, "grad_norm": 0.29273056983947754, "learning_rate": 2.8632463956450583e-07, "loss": 0.3286, "step": 10766 }, { "epoch": 2.7079979879275653, "grad_norm": 0.2939911186695099, "learning_rate": 2.858367861823802e-07, "loss": 0.3022, "step": 10767 }, { "epoch": 2.7082494969818915, "grad_norm": 0.3015715777873993, "learning_rate": 2.8534933653884157e-07, "loss": 0.3051, "step": 10768 }, { "epoch": 2.7085010060362174, "grad_norm": 0.28284311294555664, "learning_rate": 2.848622906756382e-07, "loss": 0.3136, "step": 10769 }, { "epoch": 2.708752515090543, "grad_norm": 0.2819089889526367, "learning_rate": 2.843756486344812e-07, "loss": 0.3146, "step": 10770 }, { "epoch": 2.7090040241448694, "grad_norm": 0.3178308606147766, "learning_rate": 2.8388941045705e-07, "loss": 0.3401, "step": 10771 }, { "epoch": 2.709255533199195, "grad_norm": 0.2823493182659149, "learning_rate": 2.834035761849857e-07, "loss": 0.3334, "step": 10772 }, { "epoch": 2.709507042253521, "grad_norm": 0.2783769369125366, "learning_rate": 2.8291814585989894e-07, "loss": 0.3232, "step": 10773 }, { "epoch": 2.7097585513078473, "grad_norm": 0.2950974404811859, "learning_rate": 2.824331195233626e-07, "loss": 0.3237, "step": 10774 }, { "epoch": 2.710010060362173, "grad_norm": 0.2713472247123718, "learning_rate": 2.8194849721691673e-07, "loss": 0.2991, "step": 10775 }, { "epoch": 2.710261569416499, "grad_norm": 0.2863728702068329, "learning_rate": 2.81464278982066e-07, "loss": 0.3167, "step": 10776 }, { "epoch": 2.710513078470825, "grad_norm": 0.285849392414093, "learning_rate": 2.80980464860281e-07, "loss": 0.3086, "step": 10777 }, { "epoch": 2.710764587525151, "grad_norm": 0.31550291180610657, "learning_rate": 2.804970548929964e-07, "loss": 0.3127, "step": 10778 }, { "epoch": 2.7110160965794767, "grad_norm": 0.29624804854393005, "learning_rate": 2.8001404912161413e-07, "loss": 0.3238, "step": 10779 }, { "epoch": 2.711267605633803, "grad_norm": 0.2810332179069519, "learning_rate": 2.7953144758750107e-07, "loss": 0.3298, "step": 10780 }, { "epoch": 2.711519114688129, "grad_norm": 0.29689785838127136, "learning_rate": 2.790492503319875e-07, "loss": 0.3364, "step": 10781 }, { "epoch": 2.7117706237424546, "grad_norm": 0.30866745114326477, "learning_rate": 2.7856745739637206e-07, "loss": 0.3216, "step": 10782 }, { "epoch": 2.712022132796781, "grad_norm": 0.26995787024497986, "learning_rate": 2.7808606882191615e-07, "loss": 0.328, "step": 10783 }, { "epoch": 2.7122736418511066, "grad_norm": 0.2834140956401825, "learning_rate": 2.7760508464984904e-07, "loss": 0.3015, "step": 10784 }, { "epoch": 2.7125251509054324, "grad_norm": 0.28610095381736755, "learning_rate": 2.771245049213628e-07, "loss": 0.3174, "step": 10785 }, { "epoch": 2.7127766599597587, "grad_norm": 0.2732725441455841, "learning_rate": 2.7664432967761667e-07, "loss": 0.3077, "step": 10786 }, { "epoch": 2.7130281690140845, "grad_norm": 0.2708452641963959, "learning_rate": 2.7616455895973393e-07, "loss": 0.3036, "step": 10787 }, { "epoch": 2.7132796780684103, "grad_norm": 0.28488287329673767, "learning_rate": 2.756851928088056e-07, "loss": 0.3226, "step": 10788 }, { "epoch": 2.7135311871227366, "grad_norm": 0.3026772737503052, "learning_rate": 2.752062312658838e-07, "loss": 0.3404, "step": 10789 }, { "epoch": 2.7137826961770624, "grad_norm": 0.28403767943382263, "learning_rate": 2.7472767437199067e-07, "loss": 0.325, "step": 10790 }, { "epoch": 2.714034205231388, "grad_norm": 0.2924075424671173, "learning_rate": 2.742495221681113e-07, "loss": 0.322, "step": 10791 }, { "epoch": 2.7142857142857144, "grad_norm": 0.28228023648262024, "learning_rate": 2.7377177469519565e-07, "loss": 0.3184, "step": 10792 }, { "epoch": 2.71453722334004, "grad_norm": 0.29336363077163696, "learning_rate": 2.7329443199416105e-07, "loss": 0.3155, "step": 10793 }, { "epoch": 2.714788732394366, "grad_norm": 0.31586167216300964, "learning_rate": 2.7281749410588753e-07, "loss": 0.3242, "step": 10794 }, { "epoch": 2.7150402414486923, "grad_norm": 0.30801641941070557, "learning_rate": 2.7234096107122357e-07, "loss": 0.3315, "step": 10795 }, { "epoch": 2.715291750503018, "grad_norm": 0.29662755131721497, "learning_rate": 2.7186483293097863e-07, "loss": 0.3071, "step": 10796 }, { "epoch": 2.715543259557344, "grad_norm": 0.28053396940231323, "learning_rate": 2.71389109725933e-07, "loss": 0.3257, "step": 10797 }, { "epoch": 2.71579476861167, "grad_norm": 0.29082953929901123, "learning_rate": 2.7091379149682683e-07, "loss": 0.2968, "step": 10798 }, { "epoch": 2.716046277665996, "grad_norm": 0.30169904232025146, "learning_rate": 2.7043887828437033e-07, "loss": 0.3146, "step": 10799 }, { "epoch": 2.7162977867203217, "grad_norm": 0.3031311333179474, "learning_rate": 2.699643701292348e-07, "loss": 0.3383, "step": 10800 }, { "epoch": 2.716549295774648, "grad_norm": 0.28714466094970703, "learning_rate": 2.694902670720606e-07, "loss": 0.3333, "step": 10801 }, { "epoch": 2.716800804828974, "grad_norm": 0.2786526679992676, "learning_rate": 2.690165691534513e-07, "loss": 0.3273, "step": 10802 }, { "epoch": 2.7170523138832996, "grad_norm": 0.2807994484901428, "learning_rate": 2.6854327641397504e-07, "loss": 0.2979, "step": 10803 }, { "epoch": 2.717303822937626, "grad_norm": 0.3144852817058563, "learning_rate": 2.6807038889416824e-07, "loss": 0.3153, "step": 10804 }, { "epoch": 2.7175553319919517, "grad_norm": 0.2780417501926422, "learning_rate": 2.675979066345291e-07, "loss": 0.3198, "step": 10805 }, { "epoch": 2.7178068410462775, "grad_norm": 0.27267181873321533, "learning_rate": 2.671258296755241e-07, "loss": 0.2898, "step": 10806 }, { "epoch": 2.7180583501006037, "grad_norm": 0.29780054092407227, "learning_rate": 2.6665415805758264e-07, "loss": 0.3345, "step": 10807 }, { "epoch": 2.7183098591549295, "grad_norm": 0.286492258310318, "learning_rate": 2.661828918211012e-07, "loss": 0.3259, "step": 10808 }, { "epoch": 2.7185613682092553, "grad_norm": 0.2818504571914673, "learning_rate": 2.657120310064393e-07, "loss": 0.32, "step": 10809 }, { "epoch": 2.7188128772635816, "grad_norm": 0.28963735699653625, "learning_rate": 2.6524157565392506e-07, "loss": 0.3339, "step": 10810 }, { "epoch": 2.7190643863179074, "grad_norm": 0.268822580575943, "learning_rate": 2.647715258038497e-07, "loss": 0.3148, "step": 10811 }, { "epoch": 2.719315895372233, "grad_norm": 0.304812490940094, "learning_rate": 2.643018814964687e-07, "loss": 0.3164, "step": 10812 }, { "epoch": 2.7195674044265594, "grad_norm": 0.28156810998916626, "learning_rate": 2.6383264277200616e-07, "loss": 0.3312, "step": 10813 }, { "epoch": 2.7198189134808852, "grad_norm": 0.2927444875240326, "learning_rate": 2.6336380967064754e-07, "loss": 0.3059, "step": 10814 }, { "epoch": 2.720070422535211, "grad_norm": 0.2759106159210205, "learning_rate": 2.6289538223254695e-07, "loss": 0.3408, "step": 10815 }, { "epoch": 2.7203219315895373, "grad_norm": 0.2832070589065552, "learning_rate": 2.624273604978211e-07, "loss": 0.3276, "step": 10816 }, { "epoch": 2.720573440643863, "grad_norm": 0.27949801087379456, "learning_rate": 2.6195974450655415e-07, "loss": 0.3654, "step": 10817 }, { "epoch": 2.720824949698189, "grad_norm": 0.29671213030815125, "learning_rate": 2.6149253429879397e-07, "loss": 0.3069, "step": 10818 }, { "epoch": 2.721076458752515, "grad_norm": 0.31909680366516113, "learning_rate": 2.610257299145541e-07, "loss": 0.3114, "step": 10819 }, { "epoch": 2.721327967806841, "grad_norm": 0.3081822395324707, "learning_rate": 2.6055933139381315e-07, "loss": 0.3447, "step": 10820 }, { "epoch": 2.7215794768611667, "grad_norm": 0.30881619453430176, "learning_rate": 2.600933387765159e-07, "loss": 0.2926, "step": 10821 }, { "epoch": 2.721830985915493, "grad_norm": 0.28827062249183655, "learning_rate": 2.596277521025714e-07, "loss": 0.326, "step": 10822 }, { "epoch": 2.722082494969819, "grad_norm": 0.30842483043670654, "learning_rate": 2.5916257141185395e-07, "loss": 0.3216, "step": 10823 }, { "epoch": 2.7223340040241446, "grad_norm": 0.3006271421909332, "learning_rate": 2.586977967442045e-07, "loss": 0.3144, "step": 10824 }, { "epoch": 2.722585513078471, "grad_norm": 0.2951270341873169, "learning_rate": 2.5823342813942665e-07, "loss": 0.329, "step": 10825 }, { "epoch": 2.7228370221327967, "grad_norm": 0.2893744111061096, "learning_rate": 2.57769465637292e-07, "loss": 0.3137, "step": 10826 }, { "epoch": 2.7230885311871225, "grad_norm": 0.2886696457862854, "learning_rate": 2.573059092775343e-07, "loss": 0.3488, "step": 10827 }, { "epoch": 2.7233400402414487, "grad_norm": 0.3092022240161896, "learning_rate": 2.568427590998557e-07, "loss": 0.3293, "step": 10828 }, { "epoch": 2.7235915492957745, "grad_norm": 0.2914911210536957, "learning_rate": 2.563800151439216e-07, "loss": 0.3183, "step": 10829 }, { "epoch": 2.7238430583501008, "grad_norm": 0.29686030745506287, "learning_rate": 2.559176774493638e-07, "loss": 0.3196, "step": 10830 }, { "epoch": 2.7240945674044266, "grad_norm": 0.303915411233902, "learning_rate": 2.554557460557772e-07, "loss": 0.3234, "step": 10831 }, { "epoch": 2.7243460764587524, "grad_norm": 0.3081934452056885, "learning_rate": 2.54994221002724e-07, "loss": 0.3263, "step": 10832 }, { "epoch": 2.7245975855130786, "grad_norm": 0.290477454662323, "learning_rate": 2.5453310232973205e-07, "loss": 0.3441, "step": 10833 }, { "epoch": 2.7248490945674044, "grad_norm": 0.2770725190639496, "learning_rate": 2.5407239007629145e-07, "loss": 0.3166, "step": 10834 }, { "epoch": 2.7251006036217302, "grad_norm": 0.28399229049682617, "learning_rate": 2.536120842818612e-07, "loss": 0.2891, "step": 10835 }, { "epoch": 2.7253521126760565, "grad_norm": 0.2806536853313446, "learning_rate": 2.53152184985862e-07, "loss": 0.3178, "step": 10836 }, { "epoch": 2.7256036217303823, "grad_norm": 0.27946868538856506, "learning_rate": 2.5269269222768234e-07, "loss": 0.3259, "step": 10837 }, { "epoch": 2.725855130784708, "grad_norm": 0.2891778349876404, "learning_rate": 2.5223360604667404e-07, "loss": 0.3271, "step": 10838 }, { "epoch": 2.7261066398390343, "grad_norm": 0.2895258069038391, "learning_rate": 2.517749264821556e-07, "loss": 0.3193, "step": 10839 }, { "epoch": 2.72635814889336, "grad_norm": 0.2947310507297516, "learning_rate": 2.5131665357340963e-07, "loss": 0.3362, "step": 10840 }, { "epoch": 2.7266096579476864, "grad_norm": 0.28745022416114807, "learning_rate": 2.508587873596857e-07, "loss": 0.2958, "step": 10841 }, { "epoch": 2.726861167002012, "grad_norm": 0.2851249873638153, "learning_rate": 2.504013278801948e-07, "loss": 0.327, "step": 10842 }, { "epoch": 2.727112676056338, "grad_norm": 0.28858688473701477, "learning_rate": 2.499442751741171e-07, "loss": 0.3361, "step": 10843 }, { "epoch": 2.7273641851106643, "grad_norm": 0.29522156715393066, "learning_rate": 2.4948762928059647e-07, "loss": 0.3238, "step": 10844 }, { "epoch": 2.72761569416499, "grad_norm": 0.2840255796909332, "learning_rate": 2.490313902387409e-07, "loss": 0.3464, "step": 10845 }, { "epoch": 2.727867203219316, "grad_norm": 0.29043489694595337, "learning_rate": 2.485755580876248e-07, "loss": 0.3275, "step": 10846 }, { "epoch": 2.728118712273642, "grad_norm": 0.29880261421203613, "learning_rate": 2.4812013286628747e-07, "loss": 0.3568, "step": 10847 }, { "epoch": 2.728370221327968, "grad_norm": 0.31489261984825134, "learning_rate": 2.4766511461373324e-07, "loss": 0.3404, "step": 10848 }, { "epoch": 2.7286217303822937, "grad_norm": 0.28683874011039734, "learning_rate": 2.4721050336893094e-07, "loss": 0.3227, "step": 10849 }, { "epoch": 2.72887323943662, "grad_norm": 0.27683448791503906, "learning_rate": 2.467562991708161e-07, "loss": 0.3275, "step": 10850 }, { "epoch": 2.7291247484909458, "grad_norm": 0.28187745809555054, "learning_rate": 2.4630250205828767e-07, "loss": 0.331, "step": 10851 }, { "epoch": 2.7293762575452716, "grad_norm": 0.30056044459342957, "learning_rate": 2.458491120702117e-07, "loss": 0.2948, "step": 10852 }, { "epoch": 2.729627766599598, "grad_norm": 0.287810742855072, "learning_rate": 2.453961292454166e-07, "loss": 0.327, "step": 10853 }, { "epoch": 2.7298792756539236, "grad_norm": 0.2935195565223694, "learning_rate": 2.4494355362269796e-07, "loss": 0.3268, "step": 10854 }, { "epoch": 2.7301307847082494, "grad_norm": 0.2871311902999878, "learning_rate": 2.444913852408176e-07, "loss": 0.3373, "step": 10855 }, { "epoch": 2.7303822937625757, "grad_norm": 0.2980871796607971, "learning_rate": 2.440396241384985e-07, "loss": 0.3115, "step": 10856 }, { "epoch": 2.7306338028169015, "grad_norm": 0.29173511266708374, "learning_rate": 2.435882703544334e-07, "loss": 0.3228, "step": 10857 }, { "epoch": 2.7308853118712273, "grad_norm": 0.3014184534549713, "learning_rate": 2.4313732392727664e-07, "loss": 0.3397, "step": 10858 }, { "epoch": 2.7311368209255535, "grad_norm": 0.29081061482429504, "learning_rate": 2.4268678489564935e-07, "loss": 0.3183, "step": 10859 }, { "epoch": 2.7313883299798793, "grad_norm": 0.3027068078517914, "learning_rate": 2.422366532981368e-07, "loss": 0.3241, "step": 10860 }, { "epoch": 2.731639839034205, "grad_norm": 0.29450279474258423, "learning_rate": 2.4178692917329106e-07, "loss": 0.3406, "step": 10861 }, { "epoch": 2.7318913480885314, "grad_norm": 0.26403874158859253, "learning_rate": 2.413376125596267e-07, "loss": 0.2853, "step": 10862 }, { "epoch": 2.732142857142857, "grad_norm": 0.3012741804122925, "learning_rate": 2.4088870349562644e-07, "loss": 0.3511, "step": 10863 }, { "epoch": 2.732394366197183, "grad_norm": 0.2905932068824768, "learning_rate": 2.404402020197355e-07, "loss": 0.303, "step": 10864 }, { "epoch": 2.7326458752515093, "grad_norm": 0.2958056926727295, "learning_rate": 2.399921081703654e-07, "loss": 0.3264, "step": 10865 }, { "epoch": 2.732897384305835, "grad_norm": 0.2873068153858185, "learning_rate": 2.3954442198589334e-07, "loss": 0.2966, "step": 10866 }, { "epoch": 2.733148893360161, "grad_norm": 0.3035353720188141, "learning_rate": 2.390971435046596e-07, "loss": 0.3086, "step": 10867 }, { "epoch": 2.733400402414487, "grad_norm": 0.3155568838119507, "learning_rate": 2.38650272764972e-07, "loss": 0.3157, "step": 10868 }, { "epoch": 2.733651911468813, "grad_norm": 0.2832005023956299, "learning_rate": 2.3820380980510093e-07, "loss": 0.3133, "step": 10869 }, { "epoch": 2.7339034205231387, "grad_norm": 0.28932392597198486, "learning_rate": 2.3775775466328422e-07, "loss": 0.3163, "step": 10870 }, { "epoch": 2.734154929577465, "grad_norm": 0.30421337485313416, "learning_rate": 2.3731210737772293e-07, "loss": 0.3355, "step": 10871 }, { "epoch": 2.734406438631791, "grad_norm": 0.2934373915195465, "learning_rate": 2.3686686798658543e-07, "loss": 0.3051, "step": 10872 }, { "epoch": 2.7346579476861166, "grad_norm": 0.28928595781326294, "learning_rate": 2.364220365280012e-07, "loss": 0.3199, "step": 10873 }, { "epoch": 2.734909456740443, "grad_norm": 0.30236467719078064, "learning_rate": 2.3597761304006984e-07, "loss": 0.3511, "step": 10874 }, { "epoch": 2.7351609657947686, "grad_norm": 0.29965662956237793, "learning_rate": 2.3553359756085192e-07, "loss": 0.2945, "step": 10875 }, { "epoch": 2.7354124748490944, "grad_norm": 0.31335750222206116, "learning_rate": 2.3508999012837484e-07, "loss": 0.3408, "step": 10876 }, { "epoch": 2.7356639839034207, "grad_norm": 0.28357815742492676, "learning_rate": 2.3464679078063102e-07, "loss": 0.3215, "step": 10877 }, { "epoch": 2.7359154929577465, "grad_norm": 0.30339914560317993, "learning_rate": 2.3420399955557782e-07, "loss": 0.3115, "step": 10878 }, { "epoch": 2.7361670020120723, "grad_norm": 0.31013423204421997, "learning_rate": 2.337616164911366e-07, "loss": 0.3253, "step": 10879 }, { "epoch": 2.7364185110663986, "grad_norm": 0.30664506554603577, "learning_rate": 2.333196416251965e-07, "loss": 0.306, "step": 10880 }, { "epoch": 2.7366700201207244, "grad_norm": 0.27958711981773376, "learning_rate": 2.3287807499560777e-07, "loss": 0.3085, "step": 10881 }, { "epoch": 2.73692152917505, "grad_norm": 0.27122312784194946, "learning_rate": 2.3243691664018964e-07, "loss": 0.3313, "step": 10882 }, { "epoch": 2.7371730382293764, "grad_norm": 0.2907143533229828, "learning_rate": 2.3199616659672352e-07, "loss": 0.3293, "step": 10883 }, { "epoch": 2.737424547283702, "grad_norm": 0.29304689168930054, "learning_rate": 2.315558249029576e-07, "loss": 0.3059, "step": 10884 }, { "epoch": 2.737676056338028, "grad_norm": 0.2985115945339203, "learning_rate": 2.311158915966033e-07, "loss": 0.3035, "step": 10885 }, { "epoch": 2.7379275653923543, "grad_norm": 0.3043895661830902, "learning_rate": 2.3067636671533944e-07, "loss": 0.3192, "step": 10886 }, { "epoch": 2.73817907444668, "grad_norm": 0.2754703462123871, "learning_rate": 2.30237250296807e-07, "loss": 0.3498, "step": 10887 }, { "epoch": 2.738430583501006, "grad_norm": 0.27213260531425476, "learning_rate": 2.2979854237861588e-07, "loss": 0.3159, "step": 10888 }, { "epoch": 2.738682092555332, "grad_norm": 0.29739394783973694, "learning_rate": 2.2936024299833605e-07, "loss": 0.3277, "step": 10889 }, { "epoch": 2.738933601609658, "grad_norm": 0.2812947928905487, "learning_rate": 2.2892235219350745e-07, "loss": 0.3192, "step": 10890 }, { "epoch": 2.7391851106639837, "grad_norm": 0.29224130511283875, "learning_rate": 2.2848487000163067e-07, "loss": 0.3248, "step": 10891 }, { "epoch": 2.73943661971831, "grad_norm": 0.2976722717285156, "learning_rate": 2.2804779646017517e-07, "loss": 0.3271, "step": 10892 }, { "epoch": 2.739688128772636, "grad_norm": 0.28829827904701233, "learning_rate": 2.276111316065721e-07, "loss": 0.3171, "step": 10893 }, { "epoch": 2.7399396378269616, "grad_norm": 0.29329490661621094, "learning_rate": 2.2717487547821992e-07, "loss": 0.3202, "step": 10894 }, { "epoch": 2.740191146881288, "grad_norm": 0.29280152916908264, "learning_rate": 2.267390281124804e-07, "loss": 0.305, "step": 10895 }, { "epoch": 2.7404426559356136, "grad_norm": 0.2917034924030304, "learning_rate": 2.2630358954668253e-07, "loss": 0.3526, "step": 10896 }, { "epoch": 2.7406941649899395, "grad_norm": 0.2969276010990143, "learning_rate": 2.258685598181176e-07, "loss": 0.333, "step": 10897 }, { "epoch": 2.7409456740442657, "grad_norm": 0.2692883610725403, "learning_rate": 2.2543393896404308e-07, "loss": 0.329, "step": 10898 }, { "epoch": 2.7411971830985915, "grad_norm": 0.29979148507118225, "learning_rate": 2.2499972702168304e-07, "loss": 0.3182, "step": 10899 }, { "epoch": 2.7414486921529173, "grad_norm": 0.2951495349407196, "learning_rate": 2.245659240282233e-07, "loss": 0.3015, "step": 10900 }, { "epoch": 2.7417002012072436, "grad_norm": 0.2918504774570465, "learning_rate": 2.2413253002081803e-07, "loss": 0.3195, "step": 10901 }, { "epoch": 2.7419517102615694, "grad_norm": 0.28916507959365845, "learning_rate": 2.2369954503658308e-07, "loss": 0.3268, "step": 10902 }, { "epoch": 2.742203219315895, "grad_norm": 0.29391786456108093, "learning_rate": 2.2326696911260215e-07, "loss": 0.3068, "step": 10903 }, { "epoch": 2.7424547283702214, "grad_norm": 0.28322654962539673, "learning_rate": 2.2283480228592168e-07, "loss": 0.3066, "step": 10904 }, { "epoch": 2.7427062374245472, "grad_norm": 0.3049914538860321, "learning_rate": 2.2240304459355544e-07, "loss": 0.3235, "step": 10905 }, { "epoch": 2.742957746478873, "grad_norm": 0.26875460147857666, "learning_rate": 2.2197169607247882e-07, "loss": 0.3386, "step": 10906 }, { "epoch": 2.7432092555331993, "grad_norm": 0.29541999101638794, "learning_rate": 2.2154075675963617e-07, "loss": 0.3232, "step": 10907 }, { "epoch": 2.743460764587525, "grad_norm": 0.26899954676628113, "learning_rate": 2.2111022669193293e-07, "loss": 0.3177, "step": 10908 }, { "epoch": 2.743712273641851, "grad_norm": 0.27981895208358765, "learning_rate": 2.206801059062419e-07, "loss": 0.3132, "step": 10909 }, { "epoch": 2.743963782696177, "grad_norm": 0.28188595175743103, "learning_rate": 2.2025039443940134e-07, "loss": 0.3502, "step": 10910 }, { "epoch": 2.744215291750503, "grad_norm": 0.27629154920578003, "learning_rate": 2.198210923282118e-07, "loss": 0.324, "step": 10911 }, { "epoch": 2.7444668008048287, "grad_norm": 0.2914685904979706, "learning_rate": 2.1939219960944168e-07, "loss": 0.3242, "step": 10912 }, { "epoch": 2.744718309859155, "grad_norm": 0.2767096757888794, "learning_rate": 2.1896371631982162e-07, "loss": 0.3063, "step": 10913 }, { "epoch": 2.744969818913481, "grad_norm": 0.28286007046699524, "learning_rate": 2.1853564249604996e-07, "loss": 0.3, "step": 10914 }, { "epoch": 2.7452213279678066, "grad_norm": 0.28479135036468506, "learning_rate": 2.181079781747869e-07, "loss": 0.3035, "step": 10915 }, { "epoch": 2.745472837022133, "grad_norm": 0.27827152609825134, "learning_rate": 2.176807233926609e-07, "loss": 0.3327, "step": 10916 }, { "epoch": 2.7457243460764587, "grad_norm": 0.29502740502357483, "learning_rate": 2.172538781862621e-07, "loss": 0.3055, "step": 10917 }, { "epoch": 2.7459758551307845, "grad_norm": 0.2725423276424408, "learning_rate": 2.168274425921485e-07, "loss": 0.3063, "step": 10918 }, { "epoch": 2.7462273641851107, "grad_norm": 0.29838913679122925, "learning_rate": 2.1640141664684034e-07, "loss": 0.2981, "step": 10919 }, { "epoch": 2.7464788732394365, "grad_norm": 0.29055625200271606, "learning_rate": 2.1597580038682453e-07, "loss": 0.32, "step": 10920 }, { "epoch": 2.7467303822937623, "grad_norm": 0.27832192182540894, "learning_rate": 2.1555059384855358e-07, "loss": 0.3281, "step": 10921 }, { "epoch": 2.7469818913480886, "grad_norm": 0.27420729398727417, "learning_rate": 2.1512579706844227e-07, "loss": 0.3345, "step": 10922 }, { "epoch": 2.7472334004024144, "grad_norm": 0.3057540953159332, "learning_rate": 2.1470141008287316e-07, "loss": 0.3079, "step": 10923 }, { "epoch": 2.74748490945674, "grad_norm": 0.27829062938690186, "learning_rate": 2.1427743292819047e-07, "loss": 0.3347, "step": 10924 }, { "epoch": 2.7477364185110664, "grad_norm": 0.3032315969467163, "learning_rate": 2.1385386564070688e-07, "loss": 0.3091, "step": 10925 }, { "epoch": 2.7479879275653922, "grad_norm": 0.30215707421302795, "learning_rate": 2.1343070825669776e-07, "loss": 0.3233, "step": 10926 }, { "epoch": 2.748239436619718, "grad_norm": 0.3079783320426941, "learning_rate": 2.130079608124036e-07, "loss": 0.3209, "step": 10927 }, { "epoch": 2.7484909456740443, "grad_norm": 0.3027101457118988, "learning_rate": 2.1258562334402987e-07, "loss": 0.3379, "step": 10928 }, { "epoch": 2.74874245472837, "grad_norm": 0.3107944130897522, "learning_rate": 2.121636958877482e-07, "loss": 0.3205, "step": 10929 }, { "epoch": 2.7489939637826963, "grad_norm": 0.2983773946762085, "learning_rate": 2.1174217847969302e-07, "loss": 0.3062, "step": 10930 }, { "epoch": 2.749245472837022, "grad_norm": 0.29391881823539734, "learning_rate": 2.1132107115596434e-07, "loss": 0.3116, "step": 10931 }, { "epoch": 2.749496981891348, "grad_norm": 0.3001260459423065, "learning_rate": 2.1090037395262941e-07, "loss": 0.3488, "step": 10932 }, { "epoch": 2.749748490945674, "grad_norm": 0.3142930269241333, "learning_rate": 2.104800869057161e-07, "loss": 0.3339, "step": 10933 }, { "epoch": 2.75, "grad_norm": 0.28879308700561523, "learning_rate": 2.1006021005122057e-07, "loss": 0.291, "step": 10934 }, { "epoch": 2.750251509054326, "grad_norm": 0.3108893930912018, "learning_rate": 2.0964074342510187e-07, "loss": 0.3433, "step": 10935 }, { "epoch": 2.750503018108652, "grad_norm": 0.30376648902893066, "learning_rate": 2.0922168706328572e-07, "loss": 0.3144, "step": 10936 }, { "epoch": 2.750754527162978, "grad_norm": 0.2809644639492035, "learning_rate": 2.0880304100166004e-07, "loss": 0.3439, "step": 10937 }, { "epoch": 2.7510060362173037, "grad_norm": 0.29834315180778503, "learning_rate": 2.0838480527608118e-07, "loss": 0.3044, "step": 10938 }, { "epoch": 2.75125754527163, "grad_norm": 0.28084221482276917, "learning_rate": 2.0796697992236713e-07, "loss": 0.3288, "step": 10939 }, { "epoch": 2.7515090543259557, "grad_norm": 0.2842169404029846, "learning_rate": 2.0754956497630262e-07, "loss": 0.3257, "step": 10940 }, { "epoch": 2.751760563380282, "grad_norm": 0.2855300009250641, "learning_rate": 2.0713256047363573e-07, "loss": 0.3319, "step": 10941 }, { "epoch": 2.7520120724346078, "grad_norm": 0.2749643921852112, "learning_rate": 2.067159664500812e-07, "loss": 0.3204, "step": 10942 }, { "epoch": 2.7522635814889336, "grad_norm": 0.2852446436882019, "learning_rate": 2.0629978294131824e-07, "loss": 0.3053, "step": 10943 }, { "epoch": 2.75251509054326, "grad_norm": 0.2823444604873657, "learning_rate": 2.058840099829884e-07, "loss": 0.3115, "step": 10944 }, { "epoch": 2.7527665995975856, "grad_norm": 0.28557664155960083, "learning_rate": 2.0546864761070262e-07, "loss": 0.3398, "step": 10945 }, { "epoch": 2.7530181086519114, "grad_norm": 0.28932204842567444, "learning_rate": 2.050536958600313e-07, "loss": 0.3197, "step": 10946 }, { "epoch": 2.7532696177062377, "grad_norm": 0.27391183376312256, "learning_rate": 2.0463915476651496e-07, "loss": 0.3173, "step": 10947 }, { "epoch": 2.7535211267605635, "grad_norm": 0.28741297125816345, "learning_rate": 2.0422502436565462e-07, "loss": 0.3155, "step": 10948 }, { "epoch": 2.7537726358148893, "grad_norm": 0.2882639169692993, "learning_rate": 2.038113046929191e-07, "loss": 0.3166, "step": 10949 }, { "epoch": 2.7540241448692155, "grad_norm": 0.27754950523376465, "learning_rate": 2.0339799578373954e-07, "loss": 0.3292, "step": 10950 }, { "epoch": 2.7542756539235413, "grad_norm": 0.2686426043510437, "learning_rate": 2.0298509767351538e-07, "loss": 0.3227, "step": 10951 }, { "epoch": 2.754527162977867, "grad_norm": 0.3171316981315613, "learning_rate": 2.0257261039760667e-07, "loss": 0.351, "step": 10952 }, { "epoch": 2.7547786720321934, "grad_norm": 0.3160313367843628, "learning_rate": 2.0216053399134127e-07, "loss": 0.3276, "step": 10953 }, { "epoch": 2.755030181086519, "grad_norm": 0.3004869520664215, "learning_rate": 2.0174886849001207e-07, "loss": 0.2926, "step": 10954 }, { "epoch": 2.755281690140845, "grad_norm": 0.2918458580970764, "learning_rate": 2.0133761392887308e-07, "loss": 0.3214, "step": 10955 }, { "epoch": 2.7555331991951713, "grad_norm": 0.279135137796402, "learning_rate": 2.0092677034314834e-07, "loss": 0.3276, "step": 10956 }, { "epoch": 2.755784708249497, "grad_norm": 0.28750714659690857, "learning_rate": 2.0051633776802192e-07, "loss": 0.3329, "step": 10957 }, { "epoch": 2.756036217303823, "grad_norm": 0.29169023036956787, "learning_rate": 2.001063162386463e-07, "loss": 0.3052, "step": 10958 }, { "epoch": 2.756287726358149, "grad_norm": 0.3238070011138916, "learning_rate": 1.996967057901361e-07, "loss": 0.339, "step": 10959 }, { "epoch": 2.756539235412475, "grad_norm": 0.31088271737098694, "learning_rate": 1.9928750645757332e-07, "loss": 0.3617, "step": 10960 }, { "epoch": 2.7567907444668007, "grad_norm": 0.28064385056495667, "learning_rate": 1.9887871827600158e-07, "loss": 0.3263, "step": 10961 }, { "epoch": 2.757042253521127, "grad_norm": 0.2962087094783783, "learning_rate": 1.9847034128043175e-07, "loss": 0.3306, "step": 10962 }, { "epoch": 2.7572937625754528, "grad_norm": 0.2970735728740692, "learning_rate": 1.9806237550583974e-07, "loss": 0.3042, "step": 10963 }, { "epoch": 2.7575452716297786, "grad_norm": 0.30512532591819763, "learning_rate": 1.976548209871637e-07, "loss": 0.2982, "step": 10964 }, { "epoch": 2.757796780684105, "grad_norm": 0.3158949315547943, "learning_rate": 1.9724767775930965e-07, "loss": 0.3253, "step": 10965 }, { "epoch": 2.7580482897384306, "grad_norm": 0.30506396293640137, "learning_rate": 1.9684094585714575e-07, "loss": 0.3229, "step": 10966 }, { "epoch": 2.7582997987927564, "grad_norm": 0.2968839704990387, "learning_rate": 1.964346253155064e-07, "loss": 0.3332, "step": 10967 }, { "epoch": 2.7585513078470827, "grad_norm": 0.28158625960350037, "learning_rate": 1.9602871616918985e-07, "loss": 0.3014, "step": 10968 }, { "epoch": 2.7588028169014085, "grad_norm": 0.30511099100112915, "learning_rate": 1.9562321845296106e-07, "loss": 0.3046, "step": 10969 }, { "epoch": 2.7590543259557343, "grad_norm": 0.3017299175262451, "learning_rate": 1.9521813220154672e-07, "loss": 0.2923, "step": 10970 }, { "epoch": 2.7593058350100605, "grad_norm": 0.3004690706729889, "learning_rate": 1.948134574496413e-07, "loss": 0.3107, "step": 10971 }, { "epoch": 2.7595573440643864, "grad_norm": 0.28788694739341736, "learning_rate": 1.9440919423190208e-07, "loss": 0.3172, "step": 10972 }, { "epoch": 2.759808853118712, "grad_norm": 0.265638530254364, "learning_rate": 1.9400534258295078e-07, "loss": 0.3227, "step": 10973 }, { "epoch": 2.7600603621730384, "grad_norm": 0.32271093130111694, "learning_rate": 1.9360190253737698e-07, "loss": 0.297, "step": 10974 }, { "epoch": 2.760311871227364, "grad_norm": 0.30242830514907837, "learning_rate": 1.9319887412973083e-07, "loss": 0.3182, "step": 10975 }, { "epoch": 2.76056338028169, "grad_norm": 0.2825239300727844, "learning_rate": 1.9279625739453022e-07, "loss": 0.3349, "step": 10976 }, { "epoch": 2.7608148893360163, "grad_norm": 0.2780284285545349, "learning_rate": 1.923940523662554e-07, "loss": 0.3108, "step": 10977 }, { "epoch": 2.761066398390342, "grad_norm": 0.2897617816925049, "learning_rate": 1.9199225907935492e-07, "loss": 0.3242, "step": 10978 }, { "epoch": 2.761317907444668, "grad_norm": 0.287583589553833, "learning_rate": 1.9159087756823792e-07, "loss": 0.3197, "step": 10979 }, { "epoch": 2.761569416498994, "grad_norm": 0.27631574869155884, "learning_rate": 1.911899078672813e-07, "loss": 0.3144, "step": 10980 }, { "epoch": 2.76182092555332, "grad_norm": 0.2759281396865845, "learning_rate": 1.9078935001082487e-07, "loss": 0.3438, "step": 10981 }, { "epoch": 2.7620724346076457, "grad_norm": 0.30593448877334595, "learning_rate": 1.9038920403317507e-07, "loss": 0.3062, "step": 10982 }, { "epoch": 2.762323943661972, "grad_norm": 0.30097198486328125, "learning_rate": 1.8998946996860002e-07, "loss": 0.316, "step": 10983 }, { "epoch": 2.762575452716298, "grad_norm": 0.28567543625831604, "learning_rate": 1.8959014785133621e-07, "loss": 0.3111, "step": 10984 }, { "epoch": 2.7628269617706236, "grad_norm": 0.2774016857147217, "learning_rate": 1.8919123771558246e-07, "loss": 0.3204, "step": 10985 }, { "epoch": 2.76307847082495, "grad_norm": 0.317721962928772, "learning_rate": 1.8879273959550248e-07, "loss": 0.3089, "step": 10986 }, { "epoch": 2.7633299798792756, "grad_norm": 0.28425851464271545, "learning_rate": 1.8839465352522623e-07, "loss": 0.3328, "step": 10987 }, { "epoch": 2.7635814889336014, "grad_norm": 0.2964561879634857, "learning_rate": 1.8799697953884587e-07, "loss": 0.3114, "step": 10988 }, { "epoch": 2.7638329979879277, "grad_norm": 0.2809069752693176, "learning_rate": 1.8759971767042085e-07, "loss": 0.3146, "step": 10989 }, { "epoch": 2.7640845070422535, "grad_norm": 0.305742084980011, "learning_rate": 1.8720286795397335e-07, "loss": 0.3204, "step": 10990 }, { "epoch": 2.7643360160965793, "grad_norm": 0.298260360956192, "learning_rate": 1.868064304234918e-07, "loss": 0.3394, "step": 10991 }, { "epoch": 2.7645875251509056, "grad_norm": 0.2834104299545288, "learning_rate": 1.8641040511292786e-07, "loss": 0.3074, "step": 10992 }, { "epoch": 2.7648390342052314, "grad_norm": 0.2777165472507477, "learning_rate": 1.8601479205619945e-07, "loss": 0.3359, "step": 10993 }, { "epoch": 2.765090543259557, "grad_norm": 0.29171082377433777, "learning_rate": 1.856195912871872e-07, "loss": 0.3155, "step": 10994 }, { "epoch": 2.7653420523138834, "grad_norm": 0.29711124300956726, "learning_rate": 1.8522480283973908e-07, "loss": 0.3459, "step": 10995 }, { "epoch": 2.765593561368209, "grad_norm": 0.32558995485305786, "learning_rate": 1.8483042674766527e-07, "loss": 0.3161, "step": 10996 }, { "epoch": 2.765845070422535, "grad_norm": 0.29762402176856995, "learning_rate": 1.8443646304474206e-07, "loss": 0.3293, "step": 10997 }, { "epoch": 2.7660965794768613, "grad_norm": 0.2941772937774658, "learning_rate": 1.8404291176470857e-07, "loss": 0.3248, "step": 10998 }, { "epoch": 2.766348088531187, "grad_norm": 0.2893567681312561, "learning_rate": 1.836497729412723e-07, "loss": 0.3181, "step": 10999 }, { "epoch": 2.766599597585513, "grad_norm": 0.28409847617149353, "learning_rate": 1.8325704660810128e-07, "loss": 0.3346, "step": 11000 }, { "epoch": 2.766851106639839, "grad_norm": 0.2963768541812897, "learning_rate": 1.8286473279883142e-07, "loss": 0.3117, "step": 11001 }, { "epoch": 2.767102615694165, "grad_norm": 0.30031341314315796, "learning_rate": 1.8247283154706085e-07, "loss": 0.3261, "step": 11002 }, { "epoch": 2.7673541247484907, "grad_norm": 0.292661190032959, "learning_rate": 1.8208134288635438e-07, "loss": 0.3137, "step": 11003 }, { "epoch": 2.767605633802817, "grad_norm": 0.2892690598964691, "learning_rate": 1.816902668502396e-07, "loss": 0.3256, "step": 11004 }, { "epoch": 2.767857142857143, "grad_norm": 0.2964717745780945, "learning_rate": 1.812996034722103e-07, "loss": 0.3335, "step": 11005 }, { "epoch": 2.7681086519114686, "grad_norm": 0.26202383637428284, "learning_rate": 1.809093527857242e-07, "loss": 0.3231, "step": 11006 }, { "epoch": 2.768360160965795, "grad_norm": 0.26758965849876404, "learning_rate": 1.805195148242045e-07, "loss": 0.3121, "step": 11007 }, { "epoch": 2.7686116700201207, "grad_norm": 0.2875462472438812, "learning_rate": 1.8013008962103674e-07, "loss": 0.3221, "step": 11008 }, { "epoch": 2.7688631790744465, "grad_norm": 0.2862987220287323, "learning_rate": 1.7974107720957478e-07, "loss": 0.33, "step": 11009 }, { "epoch": 2.7691146881287727, "grad_norm": 0.289815753698349, "learning_rate": 1.7935247762313312e-07, "loss": 0.318, "step": 11010 }, { "epoch": 2.7693661971830985, "grad_norm": 0.2913775146007538, "learning_rate": 1.7896429089499455e-07, "loss": 0.3035, "step": 11011 }, { "epoch": 2.7696177062374243, "grad_norm": 0.2775018513202667, "learning_rate": 1.7857651705840419e-07, "loss": 0.318, "step": 11012 }, { "epoch": 2.7698692152917506, "grad_norm": 0.27448347210884094, "learning_rate": 1.781891561465726e-07, "loss": 0.339, "step": 11013 }, { "epoch": 2.7701207243460764, "grad_norm": 0.31805574893951416, "learning_rate": 1.778022081926739e-07, "loss": 0.3193, "step": 11014 }, { "epoch": 2.770372233400402, "grad_norm": 0.292459100484848, "learning_rate": 1.774156732298493e-07, "loss": 0.3134, "step": 11015 }, { "epoch": 2.7706237424547284, "grad_norm": 0.28790146112442017, "learning_rate": 1.7702955129120125e-07, "loss": 0.3145, "step": 11016 }, { "epoch": 2.7708752515090542, "grad_norm": 0.26574012637138367, "learning_rate": 1.7664384240979993e-07, "loss": 0.3131, "step": 11017 }, { "epoch": 2.77112676056338, "grad_norm": 0.2886962890625, "learning_rate": 1.7625854661867947e-07, "loss": 0.2903, "step": 11018 }, { "epoch": 2.7713782696177063, "grad_norm": 0.2845294773578644, "learning_rate": 1.7587366395083683e-07, "loss": 0.3289, "step": 11019 }, { "epoch": 2.771629778672032, "grad_norm": 0.27203091979026794, "learning_rate": 1.754891944392356e-07, "loss": 0.3224, "step": 11020 }, { "epoch": 2.771881287726358, "grad_norm": 0.27143409848213196, "learning_rate": 1.751051381168023e-07, "loss": 0.3132, "step": 11021 }, { "epoch": 2.772132796780684, "grad_norm": 0.2910093665122986, "learning_rate": 1.7472149501643e-07, "loss": 0.3269, "step": 11022 }, { "epoch": 2.77238430583501, "grad_norm": 0.29057279229164124, "learning_rate": 1.7433826517097407e-07, "loss": 0.3368, "step": 11023 }, { "epoch": 2.7726358148893357, "grad_norm": 0.2875846326351166, "learning_rate": 1.7395544861325718e-07, "loss": 0.3195, "step": 11024 }, { "epoch": 2.772887323943662, "grad_norm": 0.2830538749694824, "learning_rate": 1.7357304537606367e-07, "loss": 0.2901, "step": 11025 }, { "epoch": 2.773138832997988, "grad_norm": 0.29084324836730957, "learning_rate": 1.7319105549214564e-07, "loss": 0.3114, "step": 11026 }, { "epoch": 2.7733903420523136, "grad_norm": 0.29110777378082275, "learning_rate": 1.7280947899421695e-07, "loss": 0.3129, "step": 11027 }, { "epoch": 2.77364185110664, "grad_norm": 0.2726239562034607, "learning_rate": 1.7242831591495701e-07, "loss": 0.3164, "step": 11028 }, { "epoch": 2.7738933601609657, "grad_norm": 0.28240305185317993, "learning_rate": 1.7204756628701192e-07, "loss": 0.3127, "step": 11029 }, { "epoch": 2.774144869215292, "grad_norm": 0.29125747084617615, "learning_rate": 1.716672301429878e-07, "loss": 0.3089, "step": 11030 }, { "epoch": 2.7743963782696177, "grad_norm": 0.2842182219028473, "learning_rate": 1.7128730751546086e-07, "loss": 0.3137, "step": 11031 }, { "epoch": 2.7746478873239435, "grad_norm": 0.28499892354011536, "learning_rate": 1.709077984369667e-07, "loss": 0.3305, "step": 11032 }, { "epoch": 2.7748993963782698, "grad_norm": 0.29583901166915894, "learning_rate": 1.7052870294000933e-07, "loss": 0.3467, "step": 11033 }, { "epoch": 2.7751509054325956, "grad_norm": 0.2947319746017456, "learning_rate": 1.7015002105705502e-07, "loss": 0.3388, "step": 11034 }, { "epoch": 2.7754024144869214, "grad_norm": 0.28705930709838867, "learning_rate": 1.6977175282053672e-07, "loss": 0.3503, "step": 11035 }, { "epoch": 2.7756539235412476, "grad_norm": 0.27521228790283203, "learning_rate": 1.6939389826284903e-07, "loss": 0.3195, "step": 11036 }, { "epoch": 2.7759054325955734, "grad_norm": 0.2797527313232422, "learning_rate": 1.690164574163544e-07, "loss": 0.3026, "step": 11037 }, { "epoch": 2.7761569416498992, "grad_norm": 0.29513728618621826, "learning_rate": 1.686394303133776e-07, "loss": 0.3057, "step": 11038 }, { "epoch": 2.7764084507042255, "grad_norm": 0.27043822407722473, "learning_rate": 1.6826281698620827e-07, "loss": 0.3386, "step": 11039 }, { "epoch": 2.7766599597585513, "grad_norm": 0.2766996920108795, "learning_rate": 1.6788661746710178e-07, "loss": 0.3178, "step": 11040 }, { "epoch": 2.7769114688128775, "grad_norm": 0.2824966609477997, "learning_rate": 1.6751083178827675e-07, "loss": 0.3275, "step": 11041 }, { "epoch": 2.7771629778672033, "grad_norm": 0.27287888526916504, "learning_rate": 1.6713545998191748e-07, "loss": 0.3203, "step": 11042 }, { "epoch": 2.777414486921529, "grad_norm": 0.28277119994163513, "learning_rate": 1.6676050208017102e-07, "loss": 0.3313, "step": 11043 }, { "epoch": 2.7776659959758554, "grad_norm": 0.2877527177333832, "learning_rate": 1.663859581151517e-07, "loss": 0.3023, "step": 11044 }, { "epoch": 2.777917505030181, "grad_norm": 0.2927461266517639, "learning_rate": 1.6601182811893545e-07, "loss": 0.334, "step": 11045 }, { "epoch": 2.778169014084507, "grad_norm": 0.28662925958633423, "learning_rate": 1.6563811212356506e-07, "loss": 0.32, "step": 11046 }, { "epoch": 2.7784205231388333, "grad_norm": 0.300090491771698, "learning_rate": 1.6526481016104655e-07, "loss": 0.3101, "step": 11047 }, { "epoch": 2.778672032193159, "grad_norm": 0.28218507766723633, "learning_rate": 1.6489192226335104e-07, "loss": 0.3205, "step": 11048 }, { "epoch": 2.778923541247485, "grad_norm": 0.2887854278087616, "learning_rate": 1.6451944846241408e-07, "loss": 0.3097, "step": 11049 }, { "epoch": 2.779175050301811, "grad_norm": 0.28003057837486267, "learning_rate": 1.641473887901357e-07, "loss": 0.3098, "step": 11050 }, { "epoch": 2.779426559356137, "grad_norm": 0.2808135449886322, "learning_rate": 1.6377574327838041e-07, "loss": 0.319, "step": 11051 }, { "epoch": 2.7796780684104627, "grad_norm": 0.2711102366447449, "learning_rate": 1.6340451195897722e-07, "loss": 0.3376, "step": 11052 }, { "epoch": 2.779929577464789, "grad_norm": 0.30259403586387634, "learning_rate": 1.6303369486372067e-07, "loss": 0.3333, "step": 11053 }, { "epoch": 2.7801810865191148, "grad_norm": 0.27301356196403503, "learning_rate": 1.6266329202436758e-07, "loss": 0.318, "step": 11054 }, { "epoch": 2.7804325955734406, "grad_norm": 0.28594970703125, "learning_rate": 1.622933034726415e-07, "loss": 0.3366, "step": 11055 }, { "epoch": 2.780684104627767, "grad_norm": 0.2891000509262085, "learning_rate": 1.6192372924022925e-07, "loss": 0.3187, "step": 11056 }, { "epoch": 2.7809356136820926, "grad_norm": 0.299022376537323, "learning_rate": 1.6155456935878277e-07, "loss": 0.3196, "step": 11057 }, { "epoch": 2.7811871227364184, "grad_norm": 0.27598682045936584, "learning_rate": 1.6118582385991787e-07, "loss": 0.3121, "step": 11058 }, { "epoch": 2.7814386317907447, "grad_norm": 0.29095298051834106, "learning_rate": 1.6081749277521598e-07, "loss": 0.2999, "step": 11059 }, { "epoch": 2.7816901408450705, "grad_norm": 0.28317791223526, "learning_rate": 1.604495761362218e-07, "loss": 0.3121, "step": 11060 }, { "epoch": 2.7819416498993963, "grad_norm": 0.2773258686065674, "learning_rate": 1.600820739744452e-07, "loss": 0.3309, "step": 11061 }, { "epoch": 2.7821931589537225, "grad_norm": 0.28628113865852356, "learning_rate": 1.5971498632136096e-07, "loss": 0.3194, "step": 11062 }, { "epoch": 2.7824446680080483, "grad_norm": 0.28664273023605347, "learning_rate": 1.5934831320840672e-07, "loss": 0.3022, "step": 11063 }, { "epoch": 2.782696177062374, "grad_norm": 0.2975166141986847, "learning_rate": 1.5898205466698736e-07, "loss": 0.3185, "step": 11064 }, { "epoch": 2.7829476861167004, "grad_norm": 0.27436116337776184, "learning_rate": 1.586162107284689e-07, "loss": 0.3214, "step": 11065 }, { "epoch": 2.783199195171026, "grad_norm": 0.2933892607688904, "learning_rate": 1.5825078142418516e-07, "loss": 0.3318, "step": 11066 }, { "epoch": 2.783450704225352, "grad_norm": 0.2979046702384949, "learning_rate": 1.578857667854311e-07, "loss": 0.3252, "step": 11067 }, { "epoch": 2.7837022132796783, "grad_norm": 0.2885100841522217, "learning_rate": 1.5752116684347008e-07, "loss": 0.3235, "step": 11068 }, { "epoch": 2.783953722334004, "grad_norm": 0.2883824110031128, "learning_rate": 1.5715698162952597e-07, "loss": 0.3425, "step": 11069 }, { "epoch": 2.78420523138833, "grad_norm": 0.2968808114528656, "learning_rate": 1.5679321117478995e-07, "loss": 0.3058, "step": 11070 }, { "epoch": 2.784456740442656, "grad_norm": 0.27541083097457886, "learning_rate": 1.56429855510416e-07, "loss": 0.3413, "step": 11071 }, { "epoch": 2.784708249496982, "grad_norm": 0.26949572563171387, "learning_rate": 1.5606691466752366e-07, "loss": 0.3173, "step": 11072 }, { "epoch": 2.7849597585513077, "grad_norm": 0.28457048535346985, "learning_rate": 1.5570438867719695e-07, "loss": 0.3225, "step": 11073 }, { "epoch": 2.785211267605634, "grad_norm": 0.27355846762657166, "learning_rate": 1.553422775704827e-07, "loss": 0.322, "step": 11074 }, { "epoch": 2.78546277665996, "grad_norm": 0.2736510634422302, "learning_rate": 1.5498058137839555e-07, "loss": 0.3038, "step": 11075 }, { "epoch": 2.7857142857142856, "grad_norm": 0.2820262908935547, "learning_rate": 1.5461930013191018e-07, "loss": 0.3123, "step": 11076 }, { "epoch": 2.785965794768612, "grad_norm": 0.2771058976650238, "learning_rate": 1.5425843386197015e-07, "loss": 0.3065, "step": 11077 }, { "epoch": 2.7862173038229376, "grad_norm": 0.2988390326499939, "learning_rate": 1.5389798259947908e-07, "loss": 0.3116, "step": 11078 }, { "epoch": 2.7864688128772634, "grad_norm": 0.27741125226020813, "learning_rate": 1.5353794637531005e-07, "loss": 0.305, "step": 11079 }, { "epoch": 2.7867203219315897, "grad_norm": 0.3002147078514099, "learning_rate": 1.5317832522029563e-07, "loss": 0.3438, "step": 11080 }, { "epoch": 2.7869718309859155, "grad_norm": 0.2924443483352661, "learning_rate": 1.5281911916523672e-07, "loss": 0.3429, "step": 11081 }, { "epoch": 2.7872233400402413, "grad_norm": 0.2645252048969269, "learning_rate": 1.524603282408954e-07, "loss": 0.3502, "step": 11082 }, { "epoch": 2.7874748490945676, "grad_norm": 0.26844504475593567, "learning_rate": 1.5210195247800153e-07, "loss": 0.3112, "step": 11083 }, { "epoch": 2.7877263581488934, "grad_norm": 0.2783125638961792, "learning_rate": 1.517439919072472e-07, "loss": 0.3137, "step": 11084 }, { "epoch": 2.787977867203219, "grad_norm": 0.29496461153030396, "learning_rate": 1.5138644655928848e-07, "loss": 0.3234, "step": 11085 }, { "epoch": 2.7882293762575454, "grad_norm": 0.3056720793247223, "learning_rate": 1.5102931646474917e-07, "loss": 0.3139, "step": 11086 }, { "epoch": 2.788480885311871, "grad_norm": 0.2965206801891327, "learning_rate": 1.506726016542126e-07, "loss": 0.3233, "step": 11087 }, { "epoch": 2.788732394366197, "grad_norm": 0.27822843194007874, "learning_rate": 1.50316302158231e-07, "loss": 0.3161, "step": 11088 }, { "epoch": 2.7889839034205233, "grad_norm": 0.2970999479293823, "learning_rate": 1.4996041800731832e-07, "loss": 0.3139, "step": 11089 }, { "epoch": 2.789235412474849, "grad_norm": 0.2687007188796997, "learning_rate": 1.4960494923195457e-07, "loss": 0.3199, "step": 11090 }, { "epoch": 2.789486921529175, "grad_norm": 0.31234511733055115, "learning_rate": 1.4924989586258265e-07, "loss": 0.3306, "step": 11091 }, { "epoch": 2.789738430583501, "grad_norm": 0.312133252620697, "learning_rate": 1.4889525792961103e-07, "loss": 0.3264, "step": 11092 }, { "epoch": 2.789989939637827, "grad_norm": 0.28549107909202576, "learning_rate": 1.4854103546341204e-07, "loss": 0.3121, "step": 11093 }, { "epoch": 2.7902414486921527, "grad_norm": 0.28895583748817444, "learning_rate": 1.4818722849432253e-07, "loss": 0.3081, "step": 11094 }, { "epoch": 2.790492957746479, "grad_norm": 0.2902131974697113, "learning_rate": 1.4783383705264444e-07, "loss": 0.3254, "step": 11095 }, { "epoch": 2.790744466800805, "grad_norm": 0.2827642858028412, "learning_rate": 1.474808611686429e-07, "loss": 0.3218, "step": 11096 }, { "epoch": 2.7909959758551306, "grad_norm": 0.3049061894416809, "learning_rate": 1.4712830087254825e-07, "loss": 0.3173, "step": 11097 }, { "epoch": 2.791247484909457, "grad_norm": 0.2864426076412201, "learning_rate": 1.467761561945552e-07, "loss": 0.326, "step": 11098 }, { "epoch": 2.7914989939637826, "grad_norm": 0.28067395091056824, "learning_rate": 1.4642442716482298e-07, "loss": 0.3218, "step": 11099 }, { "epoch": 2.7917505030181085, "grad_norm": 0.28718701004981995, "learning_rate": 1.4607311381347467e-07, "loss": 0.3006, "step": 11100 }, { "epoch": 2.7920020120724347, "grad_norm": 0.30900463461875916, "learning_rate": 1.457222161705979e-07, "loss": 0.3336, "step": 11101 }, { "epoch": 2.7922535211267605, "grad_norm": 0.31647589802742004, "learning_rate": 1.453717342662453e-07, "loss": 0.3224, "step": 11102 }, { "epoch": 2.7925050301810863, "grad_norm": 0.27756398916244507, "learning_rate": 1.4502166813043283e-07, "loss": 0.3268, "step": 11103 }, { "epoch": 2.7927565392354126, "grad_norm": 0.2882534861564636, "learning_rate": 1.446720177931421e-07, "loss": 0.3331, "step": 11104 }, { "epoch": 2.7930080482897384, "grad_norm": 0.2910817861557007, "learning_rate": 1.4432278328431748e-07, "loss": 0.3222, "step": 11105 }, { "epoch": 2.793259557344064, "grad_norm": 0.26894426345825195, "learning_rate": 1.4397396463387059e-07, "loss": 0.3128, "step": 11106 }, { "epoch": 2.7935110663983904, "grad_norm": 0.28713172674179077, "learning_rate": 1.4362556187167365e-07, "loss": 0.3278, "step": 11107 }, { "epoch": 2.7937625754527162, "grad_norm": 0.28895851969718933, "learning_rate": 1.4327757502756668e-07, "loss": 0.3292, "step": 11108 }, { "epoch": 2.794014084507042, "grad_norm": 0.285756379365921, "learning_rate": 1.4293000413135084e-07, "loss": 0.3139, "step": 11109 }, { "epoch": 2.7942655935613683, "grad_norm": 0.3005828857421875, "learning_rate": 1.4258284921279565e-07, "loss": 0.3371, "step": 11110 }, { "epoch": 2.794517102615694, "grad_norm": 0.282633900642395, "learning_rate": 1.4223611030163064e-07, "loss": 0.3343, "step": 11111 }, { "epoch": 2.79476861167002, "grad_norm": 0.29902184009552, "learning_rate": 1.4188978742755322e-07, "loss": 0.2928, "step": 11112 }, { "epoch": 2.795020120724346, "grad_norm": 0.3080330491065979, "learning_rate": 1.415438806202224e-07, "loss": 0.3043, "step": 11113 }, { "epoch": 2.795271629778672, "grad_norm": 0.2750524878501892, "learning_rate": 1.4119838990926448e-07, "loss": 0.3241, "step": 11114 }, { "epoch": 2.7955231388329977, "grad_norm": 0.27586209774017334, "learning_rate": 1.4085331532426748e-07, "loss": 0.326, "step": 11115 }, { "epoch": 2.795774647887324, "grad_norm": 0.29278305172920227, "learning_rate": 1.405086568947861e-07, "loss": 0.3371, "step": 11116 }, { "epoch": 2.79602615694165, "grad_norm": 0.30196496844291687, "learning_rate": 1.401644146503367e-07, "loss": 0.3354, "step": 11117 }, { "epoch": 2.7962776659959756, "grad_norm": 0.2952038645744324, "learning_rate": 1.3982058862040238e-07, "loss": 0.2915, "step": 11118 }, { "epoch": 2.796529175050302, "grad_norm": 0.29630571603775024, "learning_rate": 1.3947717883442903e-07, "loss": 0.3196, "step": 11119 }, { "epoch": 2.7967806841046277, "grad_norm": 0.27728769183158875, "learning_rate": 1.391341853218281e-07, "loss": 0.3405, "step": 11120 }, { "epoch": 2.7970321931589535, "grad_norm": 0.272607684135437, "learning_rate": 1.3879160811197556e-07, "loss": 0.3274, "step": 11121 }, { "epoch": 2.7972837022132797, "grad_norm": 0.29455098509788513, "learning_rate": 1.3844944723420906e-07, "loss": 0.3242, "step": 11122 }, { "epoch": 2.7975352112676055, "grad_norm": 0.2698933184146881, "learning_rate": 1.3810770271783457e-07, "loss": 0.3153, "step": 11123 }, { "epoch": 2.7977867203219313, "grad_norm": 0.2703859806060791, "learning_rate": 1.3776637459211872e-07, "loss": 0.3044, "step": 11124 }, { "epoch": 2.7980382293762576, "grad_norm": 0.29078856110572815, "learning_rate": 1.3742546288629532e-07, "loss": 0.3257, "step": 11125 }, { "epoch": 2.7982897384305834, "grad_norm": 0.2826457917690277, "learning_rate": 1.3708496762956047e-07, "loss": 0.3087, "step": 11126 }, { "epoch": 2.7985412474849096, "grad_norm": 0.2665698826313019, "learning_rate": 1.367448888510764e-07, "loss": 0.3194, "step": 11127 }, { "epoch": 2.7987927565392354, "grad_norm": 0.27445247769355774, "learning_rate": 1.3640522657996757e-07, "loss": 0.3108, "step": 11128 }, { "epoch": 2.7990442655935612, "grad_norm": 0.2883208096027374, "learning_rate": 1.3606598084532517e-07, "loss": 0.3259, "step": 11129 }, { "epoch": 2.7992957746478875, "grad_norm": 0.2990553677082062, "learning_rate": 1.357271516762021e-07, "loss": 0.3369, "step": 11130 }, { "epoch": 2.7995472837022133, "grad_norm": 0.2815016806125641, "learning_rate": 1.3538873910161788e-07, "loss": 0.3235, "step": 11131 }, { "epoch": 2.799798792756539, "grad_norm": 0.2700446844100952, "learning_rate": 1.3505074315055545e-07, "loss": 0.3025, "step": 11132 }, { "epoch": 2.8000503018108653, "grad_norm": 0.2885153889656067, "learning_rate": 1.347131638519622e-07, "loss": 0.3216, "step": 11133 }, { "epoch": 2.800301810865191, "grad_norm": 0.2758094370365143, "learning_rate": 1.3437600123474837e-07, "loss": 0.3108, "step": 11134 }, { "epoch": 2.800553319919517, "grad_norm": 0.2886246144771576, "learning_rate": 1.3403925532779137e-07, "loss": 0.3076, "step": 11135 }, { "epoch": 2.800804828973843, "grad_norm": 0.28727173805236816, "learning_rate": 1.3370292615993098e-07, "loss": 0.3063, "step": 11136 }, { "epoch": 2.801056338028169, "grad_norm": 0.2814168632030487, "learning_rate": 1.333670137599713e-07, "loss": 0.3179, "step": 11137 }, { "epoch": 2.801307847082495, "grad_norm": 0.28780585527420044, "learning_rate": 1.3303151815668103e-07, "loss": 0.3431, "step": 11138 }, { "epoch": 2.801559356136821, "grad_norm": 0.296987384557724, "learning_rate": 1.3269643937879384e-07, "loss": 0.3119, "step": 11139 }, { "epoch": 2.801810865191147, "grad_norm": 0.2830405831336975, "learning_rate": 1.323617774550068e-07, "loss": 0.3119, "step": 11140 }, { "epoch": 2.802062374245473, "grad_norm": 0.29808276891708374, "learning_rate": 1.3202753241398192e-07, "loss": 0.3218, "step": 11141 }, { "epoch": 2.802313883299799, "grad_norm": 0.27036961913108826, "learning_rate": 1.316937042843447e-07, "loss": 0.3352, "step": 11142 }, { "epoch": 2.8025653923541247, "grad_norm": 0.2818096876144409, "learning_rate": 1.3136029309468612e-07, "loss": 0.3312, "step": 11143 }, { "epoch": 2.802816901408451, "grad_norm": 0.3205699026584625, "learning_rate": 1.3102729887355947e-07, "loss": 0.3306, "step": 11144 }, { "epoch": 2.8030684104627768, "grad_norm": 0.28567755222320557, "learning_rate": 1.3069472164948526e-07, "loss": 0.3176, "step": 11145 }, { "epoch": 2.8033199195171026, "grad_norm": 0.30168774724006653, "learning_rate": 1.3036256145094516e-07, "loss": 0.3242, "step": 11146 }, { "epoch": 2.803571428571429, "grad_norm": 0.304684579372406, "learning_rate": 1.3003081830638752e-07, "loss": 0.3341, "step": 11147 }, { "epoch": 2.8038229376257546, "grad_norm": 0.29443493485450745, "learning_rate": 1.2969949224422407e-07, "loss": 0.3268, "step": 11148 }, { "epoch": 2.8040744466800804, "grad_norm": 0.2812149226665497, "learning_rate": 1.2936858329283043e-07, "loss": 0.3193, "step": 11149 }, { "epoch": 2.8043259557344067, "grad_norm": 0.3072992265224457, "learning_rate": 1.290380914805478e-07, "loss": 0.3232, "step": 11150 }, { "epoch": 2.8045774647887325, "grad_norm": 0.2899894416332245, "learning_rate": 1.2870801683567913e-07, "loss": 0.3289, "step": 11151 }, { "epoch": 2.8048289738430583, "grad_norm": 0.29002153873443604, "learning_rate": 1.2837835938649456e-07, "loss": 0.3479, "step": 11152 }, { "epoch": 2.8050804828973845, "grad_norm": 0.30149370431900024, "learning_rate": 1.2804911916122596e-07, "loss": 0.305, "step": 11153 }, { "epoch": 2.8053319919517103, "grad_norm": 0.2866424322128296, "learning_rate": 1.2772029618807247e-07, "loss": 0.3309, "step": 11154 }, { "epoch": 2.805583501006036, "grad_norm": 0.3057340383529663, "learning_rate": 1.2739189049519429e-07, "loss": 0.2923, "step": 11155 }, { "epoch": 2.8058350100603624, "grad_norm": 0.27844423055648804, "learning_rate": 1.2706390211071784e-07, "loss": 0.3382, "step": 11156 }, { "epoch": 2.806086519114688, "grad_norm": 0.28191810846328735, "learning_rate": 1.2673633106273286e-07, "loss": 0.3181, "step": 11157 }, { "epoch": 2.806338028169014, "grad_norm": 0.2744201719760895, "learning_rate": 1.2640917737929414e-07, "loss": 0.3168, "step": 11158 }, { "epoch": 2.8065895372233403, "grad_norm": 0.28745532035827637, "learning_rate": 1.2608244108842094e-07, "loss": 0.3212, "step": 11159 }, { "epoch": 2.806841046277666, "grad_norm": 0.2886742055416107, "learning_rate": 1.2575612221809476e-07, "loss": 0.3253, "step": 11160 }, { "epoch": 2.807092555331992, "grad_norm": 0.2701511085033417, "learning_rate": 1.2543022079626376e-07, "loss": 0.3295, "step": 11161 }, { "epoch": 2.807344064386318, "grad_norm": 0.2751189172267914, "learning_rate": 1.25104736850839e-07, "loss": 0.3468, "step": 11162 }, { "epoch": 2.807595573440644, "grad_norm": 0.2670914828777313, "learning_rate": 1.2477967040969708e-07, "loss": 0.3246, "step": 11163 }, { "epoch": 2.8078470824949697, "grad_norm": 0.27149543166160583, "learning_rate": 1.2445502150067623e-07, "loss": 0.3093, "step": 11164 }, { "epoch": 2.808098591549296, "grad_norm": 0.3040313124656677, "learning_rate": 1.2413079015158202e-07, "loss": 0.3027, "step": 11165 }, { "epoch": 2.808350100603622, "grad_norm": 0.2989967465400696, "learning_rate": 1.238069763901817e-07, "loss": 0.3109, "step": 11166 }, { "epoch": 2.8086016096579476, "grad_norm": 0.2808802127838135, "learning_rate": 1.2348358024420914e-07, "loss": 0.3019, "step": 11167 }, { "epoch": 2.808853118712274, "grad_norm": 0.28532177209854126, "learning_rate": 1.2316060174136e-07, "loss": 0.3205, "step": 11168 }, { "epoch": 2.8091046277665996, "grad_norm": 0.29730409383773804, "learning_rate": 1.2283804090929608e-07, "loss": 0.3281, "step": 11169 }, { "epoch": 2.8093561368209254, "grad_norm": 0.28388866782188416, "learning_rate": 1.2251589777564242e-07, "loss": 0.3285, "step": 11170 }, { "epoch": 2.8096076458752517, "grad_norm": 0.2799801230430603, "learning_rate": 1.221941723679887e-07, "loss": 0.3403, "step": 11171 }, { "epoch": 2.8098591549295775, "grad_norm": 0.2923330068588257, "learning_rate": 1.2187286471388893e-07, "loss": 0.36, "step": 11172 }, { "epoch": 2.8101106639839033, "grad_norm": 0.31289908289909363, "learning_rate": 1.2155197484086055e-07, "loss": 0.3146, "step": 11173 }, { "epoch": 2.8103621730382295, "grad_norm": 0.30059537291526794, "learning_rate": 1.2123150277638662e-07, "loss": 0.3224, "step": 11174 }, { "epoch": 2.8106136820925554, "grad_norm": 0.27516844868659973, "learning_rate": 1.2091144854791237e-07, "loss": 0.3201, "step": 11175 }, { "epoch": 2.810865191146881, "grad_norm": 0.2999837398529053, "learning_rate": 1.2059181218284922e-07, "loss": 0.3061, "step": 11176 }, { "epoch": 2.8111167002012074, "grad_norm": 0.2801435887813568, "learning_rate": 1.2027259370857193e-07, "loss": 0.3218, "step": 11177 }, { "epoch": 2.811368209255533, "grad_norm": 0.28390711545944214, "learning_rate": 1.1995379315241972e-07, "loss": 0.3263, "step": 11178 }, { "epoch": 2.811619718309859, "grad_norm": 0.28144189715385437, "learning_rate": 1.1963541054169526e-07, "loss": 0.3042, "step": 11179 }, { "epoch": 2.8118712273641853, "grad_norm": 0.3061021566390991, "learning_rate": 1.193174459036661e-07, "loss": 0.3118, "step": 11180 }, { "epoch": 2.812122736418511, "grad_norm": 0.267780601978302, "learning_rate": 1.1899989926556498e-07, "loss": 0.334, "step": 11181 }, { "epoch": 2.812374245472837, "grad_norm": 0.301758736371994, "learning_rate": 1.1868277065458677e-07, "loss": 0.3052, "step": 11182 }, { "epoch": 2.812625754527163, "grad_norm": 0.2828764319419861, "learning_rate": 1.1836606009789198e-07, "loss": 0.3152, "step": 11183 }, { "epoch": 2.812877263581489, "grad_norm": 0.27735185623168945, "learning_rate": 1.1804976762260445e-07, "loss": 0.3104, "step": 11184 }, { "epoch": 2.8131287726358147, "grad_norm": 0.2653750777244568, "learning_rate": 1.1773389325581363e-07, "loss": 0.3207, "step": 11185 }, { "epoch": 2.813380281690141, "grad_norm": 0.2903718948364258, "learning_rate": 1.1741843702457068e-07, "loss": 0.3296, "step": 11186 }, { "epoch": 2.813631790744467, "grad_norm": 0.2752559185028076, "learning_rate": 1.171033989558934e-07, "loss": 0.3011, "step": 11187 }, { "epoch": 2.8138832997987926, "grad_norm": 0.3056381642818451, "learning_rate": 1.16788779076763e-07, "loss": 0.3116, "step": 11188 }, { "epoch": 2.814134808853119, "grad_norm": 0.28672274947166443, "learning_rate": 1.164745774141246e-07, "loss": 0.3503, "step": 11189 }, { "epoch": 2.8143863179074446, "grad_norm": 0.2924738824367523, "learning_rate": 1.1616079399488667e-07, "loss": 0.3123, "step": 11190 }, { "epoch": 2.8146378269617705, "grad_norm": 0.2763361930847168, "learning_rate": 1.1584742884592382e-07, "loss": 0.2954, "step": 11191 }, { "epoch": 2.8148893360160967, "grad_norm": 0.28140226006507874, "learning_rate": 1.1553448199407346e-07, "loss": 0.3254, "step": 11192 }, { "epoch": 2.8151408450704225, "grad_norm": 0.281065434217453, "learning_rate": 1.1522195346613752e-07, "loss": 0.3213, "step": 11193 }, { "epoch": 2.8153923541247483, "grad_norm": 0.2887207269668579, "learning_rate": 1.1490984328888288e-07, "loss": 0.3029, "step": 11194 }, { "epoch": 2.8156438631790746, "grad_norm": 0.26352542638778687, "learning_rate": 1.1459815148903819e-07, "loss": 0.3126, "step": 11195 }, { "epoch": 2.8158953722334004, "grad_norm": 0.2894093692302704, "learning_rate": 1.1428687809329986e-07, "loss": 0.3023, "step": 11196 }, { "epoch": 2.816146881287726, "grad_norm": 0.285803884267807, "learning_rate": 1.139760231283249e-07, "loss": 0.3372, "step": 11197 }, { "epoch": 2.8163983903420524, "grad_norm": 0.2947225868701935, "learning_rate": 1.1366558662073646e-07, "loss": 0.3078, "step": 11198 }, { "epoch": 2.816649899396378, "grad_norm": 0.3021707236766815, "learning_rate": 1.1335556859712216e-07, "loss": 0.3293, "step": 11199 }, { "epoch": 2.816901408450704, "grad_norm": 0.27112025022506714, "learning_rate": 1.1304596908403243e-07, "loss": 0.3064, "step": 11200 }, { "epoch": 2.8171529175050303, "grad_norm": 0.2846040427684784, "learning_rate": 1.1273678810798272e-07, "loss": 0.3141, "step": 11201 }, { "epoch": 2.817404426559356, "grad_norm": 0.28327932953834534, "learning_rate": 1.1242802569545241e-07, "loss": 0.334, "step": 11202 }, { "epoch": 2.817655935613682, "grad_norm": 0.2747574746608734, "learning_rate": 1.1211968187288535e-07, "loss": 0.3375, "step": 11203 }, { "epoch": 2.817907444668008, "grad_norm": 0.2933041751384735, "learning_rate": 1.1181175666668931e-07, "loss": 0.3076, "step": 11204 }, { "epoch": 2.818158953722334, "grad_norm": 0.288150817155838, "learning_rate": 1.1150425010323541e-07, "loss": 0.3362, "step": 11205 }, { "epoch": 2.8184104627766597, "grad_norm": 0.27248165011405945, "learning_rate": 1.1119716220886034e-07, "loss": 0.3292, "step": 11206 }, { "epoch": 2.818661971830986, "grad_norm": 0.2919750511646271, "learning_rate": 1.1089049300986421e-07, "loss": 0.3198, "step": 11207 }, { "epoch": 2.818913480885312, "grad_norm": 0.28943032026290894, "learning_rate": 1.1058424253251099e-07, "loss": 0.3201, "step": 11208 }, { "epoch": 2.8191649899396376, "grad_norm": 0.28583675622940063, "learning_rate": 1.102784108030297e-07, "loss": 0.3293, "step": 11209 }, { "epoch": 2.819416498993964, "grad_norm": 0.2858569025993347, "learning_rate": 1.0997299784761218e-07, "loss": 0.3149, "step": 11210 }, { "epoch": 2.8196680080482897, "grad_norm": 0.3017948269844055, "learning_rate": 1.0966800369241526e-07, "loss": 0.3415, "step": 11211 }, { "epoch": 2.8199195171026155, "grad_norm": 0.2693357765674591, "learning_rate": 1.0936342836356028e-07, "loss": 0.3366, "step": 11212 }, { "epoch": 2.8201710261569417, "grad_norm": 0.29551607370376587, "learning_rate": 1.0905927188713195e-07, "loss": 0.3274, "step": 11213 }, { "epoch": 2.8204225352112675, "grad_norm": 0.3079835772514343, "learning_rate": 1.0875553428917939e-07, "loss": 0.3176, "step": 11214 }, { "epoch": 2.8206740442655933, "grad_norm": 0.28076887130737305, "learning_rate": 1.0845221559571517e-07, "loss": 0.3163, "step": 11215 }, { "epoch": 2.8209255533199196, "grad_norm": 0.2994937598705292, "learning_rate": 1.0814931583271848e-07, "loss": 0.3265, "step": 11216 }, { "epoch": 2.8211770623742454, "grad_norm": 0.293682724237442, "learning_rate": 1.0784683502612858e-07, "loss": 0.3358, "step": 11217 }, { "epoch": 2.821428571428571, "grad_norm": 0.3048734664916992, "learning_rate": 1.0754477320185253e-07, "loss": 0.3298, "step": 11218 }, { "epoch": 2.8216800804828974, "grad_norm": 0.27152693271636963, "learning_rate": 1.0724313038575906e-07, "loss": 0.3252, "step": 11219 }, { "epoch": 2.8219315895372232, "grad_norm": 0.3005521893501282, "learning_rate": 1.0694190660368309e-07, "loss": 0.3093, "step": 11220 }, { "epoch": 2.822183098591549, "grad_norm": 0.281059205532074, "learning_rate": 1.0664110188142118e-07, "loss": 0.3335, "step": 11221 }, { "epoch": 2.8224346076458753, "grad_norm": 0.2841849625110626, "learning_rate": 1.0634071624473719e-07, "loss": 0.3297, "step": 11222 }, { "epoch": 2.822686116700201, "grad_norm": 0.29039302468299866, "learning_rate": 1.0604074971935497e-07, "loss": 0.323, "step": 11223 }, { "epoch": 2.822937625754527, "grad_norm": 0.2728997766971588, "learning_rate": 1.0574120233096674e-07, "loss": 0.3357, "step": 11224 }, { "epoch": 2.823189134808853, "grad_norm": 0.2747051417827606, "learning_rate": 1.0544207410522644e-07, "loss": 0.324, "step": 11225 }, { "epoch": 2.823440643863179, "grad_norm": 0.286855548620224, "learning_rate": 1.0514336506775135e-07, "loss": 0.2887, "step": 11226 }, { "epoch": 2.823692152917505, "grad_norm": 0.28169578313827515, "learning_rate": 1.0484507524412602e-07, "loss": 0.3123, "step": 11227 }, { "epoch": 2.823943661971831, "grad_norm": 0.2875922918319702, "learning_rate": 1.0454720465989498e-07, "loss": 0.3339, "step": 11228 }, { "epoch": 2.824195171026157, "grad_norm": 0.31413838267326355, "learning_rate": 1.0424975334057064e-07, "loss": 0.3335, "step": 11229 }, { "epoch": 2.824446680080483, "grad_norm": 0.2952876091003418, "learning_rate": 1.0395272131162703e-07, "loss": 0.3128, "step": 11230 }, { "epoch": 2.824698189134809, "grad_norm": 0.2815072238445282, "learning_rate": 1.0365610859850328e-07, "loss": 0.324, "step": 11231 }, { "epoch": 2.8249496981891347, "grad_norm": 0.2810538709163666, "learning_rate": 1.0335991522660239e-07, "loss": 0.3117, "step": 11232 }, { "epoch": 2.825201207243461, "grad_norm": 0.2817157804965973, "learning_rate": 1.0306414122129127e-07, "loss": 0.3148, "step": 11233 }, { "epoch": 2.8254527162977867, "grad_norm": 0.2859732210636139, "learning_rate": 1.0276878660790135e-07, "loss": 0.3146, "step": 11234 }, { "epoch": 2.8257042253521125, "grad_norm": 0.2922956347465515, "learning_rate": 1.0247385141172794e-07, "loss": 0.3036, "step": 11235 }, { "epoch": 2.8259557344064388, "grad_norm": 0.2939150631427765, "learning_rate": 1.0217933565803085e-07, "loss": 0.3114, "step": 11236 }, { "epoch": 2.8262072434607646, "grad_norm": 0.2930050492286682, "learning_rate": 1.018852393720321e-07, "loss": 0.3278, "step": 11237 }, { "epoch": 2.8264587525150904, "grad_norm": 0.2771678566932678, "learning_rate": 1.01591562578921e-07, "loss": 0.3314, "step": 11238 }, { "epoch": 2.8267102615694166, "grad_norm": 0.2838783264160156, "learning_rate": 1.0129830530384743e-07, "loss": 0.302, "step": 11239 }, { "epoch": 2.8269617706237424, "grad_norm": 0.28282177448272705, "learning_rate": 1.0100546757192853e-07, "loss": 0.3397, "step": 11240 }, { "epoch": 2.8272132796780687, "grad_norm": 0.28225719928741455, "learning_rate": 1.0071304940824255e-07, "loss": 0.3246, "step": 11241 }, { "epoch": 2.8274647887323945, "grad_norm": 0.29289302229881287, "learning_rate": 1.0042105083783449e-07, "loss": 0.312, "step": 11242 }, { "epoch": 2.8277162977867203, "grad_norm": 0.2851783335208893, "learning_rate": 1.0012947188571154e-07, "loss": 0.3026, "step": 11243 }, { "epoch": 2.8279678068410465, "grad_norm": 0.3011089265346527, "learning_rate": 9.983831257684651e-08, "loss": 0.3134, "step": 11244 }, { "epoch": 2.8282193158953723, "grad_norm": 0.28961464762687683, "learning_rate": 9.95475729361739e-08, "loss": 0.3119, "step": 11245 }, { "epoch": 2.828470824949698, "grad_norm": 0.30916205048561096, "learning_rate": 9.92572529885949e-08, "loss": 0.3098, "step": 11246 }, { "epoch": 2.8287223340040244, "grad_norm": 0.29237234592437744, "learning_rate": 9.896735275897296e-08, "loss": 0.3082, "step": 11247 }, { "epoch": 2.82897384305835, "grad_norm": 0.3051128387451172, "learning_rate": 9.867787227213655e-08, "loss": 0.3121, "step": 11248 }, { "epoch": 2.829225352112676, "grad_norm": 0.2796797454357147, "learning_rate": 9.838881155287806e-08, "loss": 0.3058, "step": 11249 }, { "epoch": 2.8294768611670023, "grad_norm": 0.28661051392555237, "learning_rate": 9.810017062595322e-08, "loss": 0.3158, "step": 11250 }, { "epoch": 2.829728370221328, "grad_norm": 0.28525805473327637, "learning_rate": 9.781194951608286e-08, "loss": 0.3155, "step": 11251 }, { "epoch": 2.829979879275654, "grad_norm": 0.2928963601589203, "learning_rate": 9.75241482479511e-08, "loss": 0.3174, "step": 11252 }, { "epoch": 2.83023138832998, "grad_norm": 0.28580477833747864, "learning_rate": 9.723676684620542e-08, "loss": 0.3443, "step": 11253 }, { "epoch": 2.830482897384306, "grad_norm": 0.29559531807899475, "learning_rate": 9.694980533546005e-08, "loss": 0.3303, "step": 11254 }, { "epoch": 2.8307344064386317, "grad_norm": 0.27034255862236023, "learning_rate": 9.666326374028979e-08, "loss": 0.3093, "step": 11255 }, { "epoch": 2.830985915492958, "grad_norm": 0.29185721278190613, "learning_rate": 9.637714208523664e-08, "loss": 0.331, "step": 11256 }, { "epoch": 2.8312374245472838, "grad_norm": 0.2903611660003662, "learning_rate": 9.609144039480323e-08, "loss": 0.3167, "step": 11257 }, { "epoch": 2.8314889336016096, "grad_norm": 0.28841114044189453, "learning_rate": 9.580615869345999e-08, "loss": 0.3126, "step": 11258 }, { "epoch": 2.831740442655936, "grad_norm": 0.2874075174331665, "learning_rate": 9.55212970056385e-08, "loss": 0.3075, "step": 11259 }, { "epoch": 2.8319919517102616, "grad_norm": 0.2626476287841797, "learning_rate": 9.52368553557359e-08, "loss": 0.3212, "step": 11260 }, { "epoch": 2.8322434607645874, "grad_norm": 0.28571265935897827, "learning_rate": 9.49528337681116e-08, "loss": 0.3141, "step": 11261 }, { "epoch": 2.8324949698189137, "grad_norm": 0.27694886922836304, "learning_rate": 9.466923226709223e-08, "loss": 0.3197, "step": 11262 }, { "epoch": 2.8327464788732395, "grad_norm": 0.30933791399002075, "learning_rate": 9.43860508769645e-08, "loss": 0.333, "step": 11263 }, { "epoch": 2.8329979879275653, "grad_norm": 0.28589534759521484, "learning_rate": 9.410328962198289e-08, "loss": 0.3427, "step": 11264 }, { "epoch": 2.8332494969818915, "grad_norm": 0.2972225248813629, "learning_rate": 9.382094852636303e-08, "loss": 0.3231, "step": 11265 }, { "epoch": 2.8335010060362174, "grad_norm": 0.2830668091773987, "learning_rate": 9.353902761428557e-08, "loss": 0.3156, "step": 11266 }, { "epoch": 2.833752515090543, "grad_norm": 0.30200186371803284, "learning_rate": 9.325752690989676e-08, "loss": 0.3334, "step": 11267 }, { "epoch": 2.8340040241448694, "grad_norm": 0.27993977069854736, "learning_rate": 9.297644643730342e-08, "loss": 0.3043, "step": 11268 }, { "epoch": 2.834255533199195, "grad_norm": 0.27513226866722107, "learning_rate": 9.269578622057962e-08, "loss": 0.3329, "step": 11269 }, { "epoch": 2.834507042253521, "grad_norm": 0.2780003249645233, "learning_rate": 9.241554628376115e-08, "loss": 0.3006, "step": 11270 }, { "epoch": 2.8347585513078473, "grad_norm": 0.2845858633518219, "learning_rate": 9.213572665084991e-08, "loss": 0.3041, "step": 11271 }, { "epoch": 2.835010060362173, "grad_norm": 0.27587416768074036, "learning_rate": 9.185632734581008e-08, "loss": 0.3032, "step": 11272 }, { "epoch": 2.835261569416499, "grad_norm": 0.28815388679504395, "learning_rate": 9.157734839257026e-08, "loss": 0.3105, "step": 11273 }, { "epoch": 2.835513078470825, "grad_norm": 0.279449999332428, "learning_rate": 9.12987898150236e-08, "loss": 0.3063, "step": 11274 }, { "epoch": 2.835764587525151, "grad_norm": 0.2929026484489441, "learning_rate": 9.102065163702767e-08, "loss": 0.3263, "step": 11275 }, { "epoch": 2.8360160965794767, "grad_norm": 0.28392016887664795, "learning_rate": 9.074293388240118e-08, "loss": 0.3048, "step": 11276 }, { "epoch": 2.836267605633803, "grad_norm": 0.27088797092437744, "learning_rate": 9.046563657493068e-08, "loss": 0.3295, "step": 11277 }, { "epoch": 2.836519114688129, "grad_norm": 0.28667935729026794, "learning_rate": 9.018875973836493e-08, "loss": 0.3453, "step": 11278 }, { "epoch": 2.8367706237424546, "grad_norm": 0.29696646332740784, "learning_rate": 8.991230339641554e-08, "loss": 0.286, "step": 11279 }, { "epoch": 2.837022132796781, "grad_norm": 0.2868374288082123, "learning_rate": 8.963626757276078e-08, "loss": 0.3175, "step": 11280 }, { "epoch": 2.8372736418511066, "grad_norm": 0.2921113967895508, "learning_rate": 8.936065229104007e-08, "loss": 0.3318, "step": 11281 }, { "epoch": 2.8375251509054324, "grad_norm": 0.2965297996997833, "learning_rate": 8.908545757485843e-08, "loss": 0.3135, "step": 11282 }, { "epoch": 2.8377766599597587, "grad_norm": 0.2960835099220276, "learning_rate": 8.881068344778476e-08, "loss": 0.3107, "step": 11283 }, { "epoch": 2.8380281690140845, "grad_norm": 0.27558842301368713, "learning_rate": 8.853632993335248e-08, "loss": 0.3091, "step": 11284 }, { "epoch": 2.8382796780684103, "grad_norm": 0.28575974702835083, "learning_rate": 8.826239705505668e-08, "loss": 0.3177, "step": 11285 }, { "epoch": 2.8385311871227366, "grad_norm": 0.2759111225605011, "learning_rate": 8.79888848363597e-08, "loss": 0.3333, "step": 11286 }, { "epoch": 2.8387826961770624, "grad_norm": 0.2808525264263153, "learning_rate": 8.771579330068447e-08, "loss": 0.3325, "step": 11287 }, { "epoch": 2.839034205231388, "grad_norm": 0.2843991816043854, "learning_rate": 8.744312247142005e-08, "loss": 0.297, "step": 11288 }, { "epoch": 2.8392857142857144, "grad_norm": 0.28934431076049805, "learning_rate": 8.717087237192057e-08, "loss": 0.3257, "step": 11289 }, { "epoch": 2.83953722334004, "grad_norm": 0.3172331750392914, "learning_rate": 8.689904302550011e-08, "loss": 0.3271, "step": 11290 }, { "epoch": 2.839788732394366, "grad_norm": 0.27618086338043213, "learning_rate": 8.662763445544121e-08, "loss": 0.34, "step": 11291 }, { "epoch": 2.8400402414486923, "grad_norm": 0.29746612906455994, "learning_rate": 8.635664668498744e-08, "loss": 0.3051, "step": 11292 }, { "epoch": 2.840291750503018, "grad_norm": 0.3068857789039612, "learning_rate": 8.608607973734695e-08, "loss": 0.3271, "step": 11293 }, { "epoch": 2.840543259557344, "grad_norm": 0.28964129090309143, "learning_rate": 8.58159336356923e-08, "loss": 0.3228, "step": 11294 }, { "epoch": 2.84079476861167, "grad_norm": 0.28352829813957214, "learning_rate": 8.554620840315997e-08, "loss": 0.3148, "step": 11295 }, { "epoch": 2.841046277665996, "grad_norm": 0.3184901475906372, "learning_rate": 8.527690406285039e-08, "loss": 0.3446, "step": 11296 }, { "epoch": 2.8412977867203217, "grad_norm": 0.2846779525279999, "learning_rate": 8.500802063782732e-08, "loss": 0.3211, "step": 11297 }, { "epoch": 2.841549295774648, "grad_norm": 0.2861534655094147, "learning_rate": 8.4739558151119e-08, "loss": 0.3138, "step": 11298 }, { "epoch": 2.841800804828974, "grad_norm": 0.3019251227378845, "learning_rate": 8.447151662571762e-08, "loss": 0.3251, "step": 11299 }, { "epoch": 2.8420523138832996, "grad_norm": 0.27594611048698425, "learning_rate": 8.420389608458035e-08, "loss": 0.3178, "step": 11300 }, { "epoch": 2.842303822937626, "grad_norm": 0.28519585728645325, "learning_rate": 8.393669655062553e-08, "loss": 0.3354, "step": 11301 }, { "epoch": 2.8425553319919517, "grad_norm": 0.31750738620758057, "learning_rate": 8.366991804673818e-08, "loss": 0.3143, "step": 11302 }, { "epoch": 2.8428068410462775, "grad_norm": 0.3031100332736969, "learning_rate": 8.340356059576505e-08, "loss": 0.3148, "step": 11303 }, { "epoch": 2.8430583501006037, "grad_norm": 0.2872277498245239, "learning_rate": 8.313762422052008e-08, "loss": 0.321, "step": 11304 }, { "epoch": 2.8433098591549295, "grad_norm": 0.2903568744659424, "learning_rate": 8.287210894377673e-08, "loss": 0.312, "step": 11305 }, { "epoch": 2.8435613682092553, "grad_norm": 0.2857918441295624, "learning_rate": 8.260701478827626e-08, "loss": 0.3355, "step": 11306 }, { "epoch": 2.8438128772635816, "grad_norm": 0.2868918180465698, "learning_rate": 8.234234177672217e-08, "loss": 0.3188, "step": 11307 }, { "epoch": 2.8440643863179074, "grad_norm": 0.2801552414894104, "learning_rate": 8.207808993178134e-08, "loss": 0.3317, "step": 11308 }, { "epoch": 2.844315895372233, "grad_norm": 0.28546252846717834, "learning_rate": 8.181425927608566e-08, "loss": 0.3225, "step": 11309 }, { "epoch": 2.8445674044265594, "grad_norm": 0.3166808485984802, "learning_rate": 8.155084983223038e-08, "loss": 0.3293, "step": 11310 }, { "epoch": 2.8448189134808852, "grad_norm": 0.287341445684433, "learning_rate": 8.12878616227758e-08, "loss": 0.3176, "step": 11311 }, { "epoch": 2.845070422535211, "grad_norm": 0.27534937858581543, "learning_rate": 8.102529467024389e-08, "loss": 0.3313, "step": 11312 }, { "epoch": 2.8453219315895373, "grad_norm": 0.28979775309562683, "learning_rate": 8.076314899712279e-08, "loss": 0.3389, "step": 11313 }, { "epoch": 2.845573440643863, "grad_norm": 0.2847757339477539, "learning_rate": 8.050142462586285e-08, "loss": 0.3058, "step": 11314 }, { "epoch": 2.845824949698189, "grad_norm": 0.2654733657836914, "learning_rate": 8.024012157888062e-08, "loss": 0.3408, "step": 11315 }, { "epoch": 2.846076458752515, "grad_norm": 0.2785297632217407, "learning_rate": 7.997923987855316e-08, "loss": 0.3114, "step": 11316 }, { "epoch": 2.846327967806841, "grad_norm": 0.27078497409820557, "learning_rate": 7.971877954722429e-08, "loss": 0.305, "step": 11317 }, { "epoch": 2.8465794768611667, "grad_norm": 0.29301923513412476, "learning_rate": 7.945874060720116e-08, "loss": 0.3411, "step": 11318 }, { "epoch": 2.846830985915493, "grad_norm": 0.2895153760910034, "learning_rate": 7.919912308075428e-08, "loss": 0.2994, "step": 11319 }, { "epoch": 2.847082494969819, "grad_norm": 0.3017180263996124, "learning_rate": 7.893992699011754e-08, "loss": 0.3418, "step": 11320 }, { "epoch": 2.8473340040241446, "grad_norm": 0.3073883652687073, "learning_rate": 7.868115235748986e-08, "loss": 0.3158, "step": 11321 }, { "epoch": 2.847585513078471, "grad_norm": 0.28526899218559265, "learning_rate": 7.842279920503404e-08, "loss": 0.3253, "step": 11322 }, { "epoch": 2.8478370221327967, "grad_norm": 0.2872920036315918, "learning_rate": 7.81648675548763e-08, "loss": 0.3285, "step": 11323 }, { "epoch": 2.8480885311871225, "grad_norm": 0.2851753532886505, "learning_rate": 7.790735742910671e-08, "loss": 0.3204, "step": 11324 }, { "epoch": 2.8483400402414487, "grad_norm": 0.28143948316574097, "learning_rate": 7.765026884977934e-08, "loss": 0.3232, "step": 11325 }, { "epoch": 2.8485915492957745, "grad_norm": 0.28405773639678955, "learning_rate": 7.739360183891265e-08, "loss": 0.3175, "step": 11326 }, { "epoch": 2.8488430583501008, "grad_norm": 0.3014451265335083, "learning_rate": 7.713735641848796e-08, "loss": 0.3247, "step": 11327 }, { "epoch": 2.8490945674044266, "grad_norm": 0.2807544469833374, "learning_rate": 7.688153261045161e-08, "loss": 0.3086, "step": 11328 }, { "epoch": 2.8493460764587524, "grad_norm": 0.2870287001132965, "learning_rate": 7.662613043671274e-08, "loss": 0.3077, "step": 11329 }, { "epoch": 2.8495975855130786, "grad_norm": 0.2920602560043335, "learning_rate": 7.637114991914552e-08, "loss": 0.3255, "step": 11330 }, { "epoch": 2.8498490945674044, "grad_norm": 0.29726293683052063, "learning_rate": 7.611659107958692e-08, "loss": 0.3174, "step": 11331 }, { "epoch": 2.8501006036217302, "grad_norm": 0.2930297553539276, "learning_rate": 7.586245393983837e-08, "loss": 0.3149, "step": 11332 }, { "epoch": 2.8503521126760565, "grad_norm": 0.29084110260009766, "learning_rate": 7.560873852166584e-08, "loss": 0.3169, "step": 11333 }, { "epoch": 2.8506036217303823, "grad_norm": 0.29585593938827515, "learning_rate": 7.535544484679747e-08, "loss": 0.3074, "step": 11334 }, { "epoch": 2.850855130784708, "grad_norm": 0.29283884167671204, "learning_rate": 7.510257293692702e-08, "loss": 0.3137, "step": 11335 }, { "epoch": 2.8511066398390343, "grad_norm": 0.31102699041366577, "learning_rate": 7.485012281371107e-08, "loss": 0.3049, "step": 11336 }, { "epoch": 2.85135814889336, "grad_norm": 0.3003372848033905, "learning_rate": 7.45980944987712e-08, "loss": 0.3204, "step": 11337 }, { "epoch": 2.8516096579476864, "grad_norm": 0.2767249643802643, "learning_rate": 7.434648801369015e-08, "loss": 0.3144, "step": 11338 }, { "epoch": 2.851861167002012, "grad_norm": 0.2756442725658417, "learning_rate": 7.409530338001846e-08, "loss": 0.3237, "step": 11339 }, { "epoch": 2.852112676056338, "grad_norm": 0.29652678966522217, "learning_rate": 7.384454061926727e-08, "loss": 0.3038, "step": 11340 }, { "epoch": 2.8523641851106643, "grad_norm": 0.266886830329895, "learning_rate": 7.359419975291326e-08, "loss": 0.3405, "step": 11341 }, { "epoch": 2.85261569416499, "grad_norm": 0.284145325422287, "learning_rate": 7.334428080239653e-08, "loss": 0.3099, "step": 11342 }, { "epoch": 2.852867203219316, "grad_norm": 0.2697547674179077, "learning_rate": 7.309478378912105e-08, "loss": 0.309, "step": 11343 }, { "epoch": 2.853118712273642, "grad_norm": 0.29870113730430603, "learning_rate": 7.28457087344553e-08, "loss": 0.3209, "step": 11344 }, { "epoch": 2.853370221327968, "grad_norm": 0.2862483561038971, "learning_rate": 7.259705565972941e-08, "loss": 0.3286, "step": 11345 }, { "epoch": 2.8536217303822937, "grad_norm": 0.29490405321121216, "learning_rate": 7.234882458624081e-08, "loss": 0.3179, "step": 11346 }, { "epoch": 2.85387323943662, "grad_norm": 0.2868093252182007, "learning_rate": 7.210101553524751e-08, "loss": 0.3436, "step": 11347 }, { "epoch": 2.8541247484909458, "grad_norm": 0.2712860703468323, "learning_rate": 7.185362852797417e-08, "loss": 0.331, "step": 11348 }, { "epoch": 2.8543762575452716, "grad_norm": 0.2829849421977997, "learning_rate": 7.160666358560664e-08, "loss": 0.32, "step": 11349 }, { "epoch": 2.854627766599598, "grad_norm": 0.28862541913986206, "learning_rate": 7.136012072929632e-08, "loss": 0.3101, "step": 11350 }, { "epoch": 2.8548792756539236, "grad_norm": 0.27747079730033875, "learning_rate": 7.111399998015856e-08, "loss": 0.3098, "step": 11351 }, { "epoch": 2.8551307847082494, "grad_norm": 0.2933838963508606, "learning_rate": 7.086830135927148e-08, "loss": 0.3195, "step": 11352 }, { "epoch": 2.8553822937625757, "grad_norm": 0.2843649685382843, "learning_rate": 7.06230248876777e-08, "loss": 0.3176, "step": 11353 }, { "epoch": 2.8556338028169015, "grad_norm": 0.2857533395290375, "learning_rate": 7.037817058638375e-08, "loss": 0.2968, "step": 11354 }, { "epoch": 2.8558853118712273, "grad_norm": 0.27660998702049255, "learning_rate": 7.013373847636007e-08, "loss": 0.3313, "step": 11355 }, { "epoch": 2.8561368209255535, "grad_norm": 0.28614291548728943, "learning_rate": 6.988972857853993e-08, "loss": 0.3525, "step": 11356 }, { "epoch": 2.8563883299798793, "grad_norm": 0.2815568447113037, "learning_rate": 6.964614091382272e-08, "loss": 0.318, "step": 11357 }, { "epoch": 2.856639839034205, "grad_norm": 0.2796972393989563, "learning_rate": 6.940297550306895e-08, "loss": 0.3181, "step": 11358 }, { "epoch": 2.8568913480885314, "grad_norm": 0.2727015018463135, "learning_rate": 6.916023236710478e-08, "loss": 0.339, "step": 11359 }, { "epoch": 2.857142857142857, "grad_norm": 0.2959345281124115, "learning_rate": 6.891791152671912e-08, "loss": 0.3253, "step": 11360 }, { "epoch": 2.857394366197183, "grad_norm": 0.29117295145988464, "learning_rate": 6.867601300266647e-08, "loss": 0.3153, "step": 11361 }, { "epoch": 2.8576458752515093, "grad_norm": 0.31557777523994446, "learning_rate": 6.843453681566192e-08, "loss": 0.3248, "step": 11362 }, { "epoch": 2.857897384305835, "grad_norm": 0.28794246912002563, "learning_rate": 6.819348298638839e-08, "loss": 0.3368, "step": 11363 }, { "epoch": 2.858148893360161, "grad_norm": 0.2903740108013153, "learning_rate": 6.79528515354888e-08, "loss": 0.3016, "step": 11364 }, { "epoch": 2.858400402414487, "grad_norm": 0.2773711681365967, "learning_rate": 6.771264248357389e-08, "loss": 0.3324, "step": 11365 }, { "epoch": 2.858651911468813, "grad_norm": 0.3121846318244934, "learning_rate": 6.747285585121388e-08, "loss": 0.3131, "step": 11366 }, { "epoch": 2.8589034205231387, "grad_norm": 0.27057939767837524, "learning_rate": 6.723349165894621e-08, "loss": 0.3408, "step": 11367 }, { "epoch": 2.859154929577465, "grad_norm": 0.3090129494667053, "learning_rate": 6.699454992727061e-08, "loss": 0.3223, "step": 11368 }, { "epoch": 2.859406438631791, "grad_norm": 0.2833699882030487, "learning_rate": 6.675603067665182e-08, "loss": 0.3158, "step": 11369 }, { "epoch": 2.8596579476861166, "grad_norm": 0.29948365688323975, "learning_rate": 6.651793392751571e-08, "loss": 0.325, "step": 11370 }, { "epoch": 2.859909456740443, "grad_norm": 0.27591371536254883, "learning_rate": 6.628025970025542e-08, "loss": 0.3463, "step": 11371 }, { "epoch": 2.8601609657947686, "grad_norm": 0.3021274209022522, "learning_rate": 6.604300801522523e-08, "loss": 0.2923, "step": 11372 }, { "epoch": 2.8604124748490944, "grad_norm": 0.26336097717285156, "learning_rate": 6.580617889274498e-08, "loss": 0.3211, "step": 11373 }, { "epoch": 2.8606639839034207, "grad_norm": 0.26855531334877014, "learning_rate": 6.55697723530968e-08, "loss": 0.3167, "step": 11374 }, { "epoch": 2.8609154929577465, "grad_norm": 0.28841209411621094, "learning_rate": 6.533378841652893e-08, "loss": 0.3344, "step": 11375 }, { "epoch": 2.8611670020120723, "grad_norm": 0.28485679626464844, "learning_rate": 6.509822710324964e-08, "loss": 0.3074, "step": 11376 }, { "epoch": 2.8614185110663986, "grad_norm": 0.3013753294944763, "learning_rate": 6.486308843343558e-08, "loss": 0.3302, "step": 11377 }, { "epoch": 2.8616700201207244, "grad_norm": 0.29902198910713196, "learning_rate": 6.462837242722342e-08, "loss": 0.341, "step": 11378 }, { "epoch": 2.86192152917505, "grad_norm": 0.29432061314582825, "learning_rate": 6.43940791047154e-08, "loss": 0.3172, "step": 11379 }, { "epoch": 2.8621730382293764, "grad_norm": 0.28921979665756226, "learning_rate": 6.41602084859777e-08, "loss": 0.3307, "step": 11380 }, { "epoch": 2.862424547283702, "grad_norm": 0.3158515989780426, "learning_rate": 6.392676059103986e-08, "loss": 0.3208, "step": 11381 }, { "epoch": 2.862676056338028, "grad_norm": 0.27476269006729126, "learning_rate": 6.36937354398942e-08, "loss": 0.3132, "step": 11382 }, { "epoch": 2.8629275653923543, "grad_norm": 0.2913151681423187, "learning_rate": 6.346113305249923e-08, "loss": 0.2988, "step": 11383 }, { "epoch": 2.86317907444668, "grad_norm": 0.3120485544204712, "learning_rate": 6.322895344877566e-08, "loss": 0.3129, "step": 11384 }, { "epoch": 2.863430583501006, "grad_norm": 0.2824515402317047, "learning_rate": 6.299719664860704e-08, "loss": 0.301, "step": 11385 }, { "epoch": 2.863682092555332, "grad_norm": 0.28883475065231323, "learning_rate": 6.27658626718436e-08, "loss": 0.3207, "step": 11386 }, { "epoch": 2.863933601609658, "grad_norm": 0.3073241710662842, "learning_rate": 6.253495153829614e-08, "loss": 0.3431, "step": 11387 }, { "epoch": 2.8641851106639837, "grad_norm": 0.2900136709213257, "learning_rate": 6.230446326774165e-08, "loss": 0.3236, "step": 11388 }, { "epoch": 2.86443661971831, "grad_norm": 0.3047274351119995, "learning_rate": 6.207439787991986e-08, "loss": 0.3098, "step": 11389 }, { "epoch": 2.864688128772636, "grad_norm": 0.2926124930381775, "learning_rate": 6.184475539453394e-08, "loss": 0.3156, "step": 11390 }, { "epoch": 2.8649396378269616, "grad_norm": 0.290855348110199, "learning_rate": 6.161553583125202e-08, "loss": 0.3187, "step": 11391 }, { "epoch": 2.865191146881288, "grad_norm": 0.30072471499443054, "learning_rate": 6.138673920970505e-08, "loss": 0.3089, "step": 11392 }, { "epoch": 2.8654426559356136, "grad_norm": 0.2740122675895691, "learning_rate": 6.115836554948795e-08, "loss": 0.3369, "step": 11393 }, { "epoch": 2.8656941649899395, "grad_norm": 0.2824988067150116, "learning_rate": 6.09304148701595e-08, "loss": 0.3327, "step": 11394 }, { "epoch": 2.8659456740442657, "grad_norm": 0.28075578808784485, "learning_rate": 6.07028871912424e-08, "loss": 0.3295, "step": 11395 }, { "epoch": 2.8661971830985915, "grad_norm": 0.32692331075668335, "learning_rate": 6.047578253222274e-08, "loss": 0.3303, "step": 11396 }, { "epoch": 2.8664486921529173, "grad_norm": 0.27784934639930725, "learning_rate": 6.024910091255054e-08, "loss": 0.3183, "step": 11397 }, { "epoch": 2.8667002012072436, "grad_norm": 0.30311983823776245, "learning_rate": 6.002284235164024e-08, "loss": 0.3179, "step": 11398 }, { "epoch": 2.8669517102615694, "grad_norm": 0.2793339490890503, "learning_rate": 5.979700686886914e-08, "loss": 0.3189, "step": 11399 }, { "epoch": 2.867203219315895, "grad_norm": 0.3019621968269348, "learning_rate": 5.9571594483577855e-08, "loss": 0.3127, "step": 11400 }, { "epoch": 2.8674547283702214, "grad_norm": 0.28011688590049744, "learning_rate": 5.9346605215073185e-08, "loss": 0.3086, "step": 11401 }, { "epoch": 2.8677062374245472, "grad_norm": 0.31461960077285767, "learning_rate": 5.9122039082622486e-08, "loss": 0.3178, "step": 11402 }, { "epoch": 2.867957746478873, "grad_norm": 0.27287060022354126, "learning_rate": 5.889789610545982e-08, "loss": 0.3329, "step": 11403 }, { "epoch": 2.8682092555331993, "grad_norm": 0.28381481766700745, "learning_rate": 5.867417630277983e-08, "loss": 0.3345, "step": 11404 }, { "epoch": 2.868460764587525, "grad_norm": 0.29278185963630676, "learning_rate": 5.845087969374497e-08, "loss": 0.3284, "step": 11405 }, { "epoch": 2.868712273641851, "grad_norm": 0.27680209279060364, "learning_rate": 5.8228006297477156e-08, "loss": 0.3217, "step": 11406 }, { "epoch": 2.868963782696177, "grad_norm": 0.2894262373447418, "learning_rate": 5.8005556133065575e-08, "loss": 0.2931, "step": 11407 }, { "epoch": 2.869215291750503, "grad_norm": 0.26620855927467346, "learning_rate": 5.7783529219560544e-08, "loss": 0.3274, "step": 11408 }, { "epoch": 2.8694668008048287, "grad_norm": 0.2928333878517151, "learning_rate": 5.756192557597795e-08, "loss": 0.314, "step": 11409 }, { "epoch": 2.869718309859155, "grad_norm": 0.2882349193096161, "learning_rate": 5.734074522129707e-08, "loss": 0.34, "step": 11410 }, { "epoch": 2.869969818913481, "grad_norm": 0.2803259789943695, "learning_rate": 5.711998817445996e-08, "loss": 0.3263, "step": 11411 }, { "epoch": 2.8702213279678066, "grad_norm": 0.3050937354564667, "learning_rate": 5.689965445437318e-08, "loss": 0.3102, "step": 11412 }, { "epoch": 2.870472837022133, "grad_norm": 0.2841440439224243, "learning_rate": 5.6679744079907176e-08, "loss": 0.3356, "step": 11413 }, { "epoch": 2.8707243460764587, "grad_norm": 0.28401076793670654, "learning_rate": 5.646025706989577e-08, "loss": 0.3222, "step": 11414 }, { "epoch": 2.8709758551307845, "grad_norm": 0.3013293147087097, "learning_rate": 5.624119344313672e-08, "loss": 0.3261, "step": 11415 }, { "epoch": 2.8712273641851107, "grad_norm": 0.2837771773338318, "learning_rate": 5.6022553218391674e-08, "loss": 0.313, "step": 11416 }, { "epoch": 2.8714788732394365, "grad_norm": 0.28893938660621643, "learning_rate": 5.580433641438454e-08, "loss": 0.3093, "step": 11417 }, { "epoch": 2.8717303822937623, "grad_norm": 0.2678500711917877, "learning_rate": 5.558654304980593e-08, "loss": 0.3037, "step": 11418 }, { "epoch": 2.8719818913480886, "grad_norm": 0.29921528697013855, "learning_rate": 5.536917314330759e-08, "loss": 0.3199, "step": 11419 }, { "epoch": 2.8722334004024144, "grad_norm": 0.2949788570404053, "learning_rate": 5.5152226713506285e-08, "loss": 0.311, "step": 11420 }, { "epoch": 2.87248490945674, "grad_norm": 0.31055039167404175, "learning_rate": 5.493570377898161e-08, "loss": 0.3221, "step": 11421 }, { "epoch": 2.8727364185110664, "grad_norm": 0.28630343079566956, "learning_rate": 5.4719604358277615e-08, "loss": 0.33, "step": 11422 }, { "epoch": 2.8729879275653922, "grad_norm": 0.30379021167755127, "learning_rate": 5.450392846990227e-08, "loss": 0.3115, "step": 11423 }, { "epoch": 2.873239436619718, "grad_norm": 0.2785795032978058, "learning_rate": 5.428867613232636e-08, "loss": 0.2873, "step": 11424 }, { "epoch": 2.8734909456740443, "grad_norm": 0.271603524684906, "learning_rate": 5.407384736398513e-08, "loss": 0.3243, "step": 11425 }, { "epoch": 2.87374245472837, "grad_norm": 0.2870146930217743, "learning_rate": 5.385944218327721e-08, "loss": 0.3185, "step": 11426 }, { "epoch": 2.8739939637826963, "grad_norm": 0.2926969528198242, "learning_rate": 5.3645460608565124e-08, "loss": 0.301, "step": 11427 }, { "epoch": 2.874245472837022, "grad_norm": 0.2959466278553009, "learning_rate": 5.3431902658174776e-08, "loss": 0.3264, "step": 11428 }, { "epoch": 2.874496981891348, "grad_norm": 0.3116055130958557, "learning_rate": 5.321876835039652e-08, "loss": 0.2992, "step": 11429 }, { "epoch": 2.874748490945674, "grad_norm": 0.28724607825279236, "learning_rate": 5.30060577034841e-08, "loss": 0.3454, "step": 11430 }, { "epoch": 2.875, "grad_norm": 0.29540908336639404, "learning_rate": 5.279377073565406e-08, "loss": 0.2933, "step": 11431 }, { "epoch": 2.875251509054326, "grad_norm": 0.3000968098640442, "learning_rate": 5.258190746508796e-08, "loss": 0.3071, "step": 11432 }, { "epoch": 2.875503018108652, "grad_norm": 0.29470768570899963, "learning_rate": 5.237046790993072e-08, "loss": 0.324, "step": 11433 }, { "epoch": 2.875754527162978, "grad_norm": 0.29645785689353943, "learning_rate": 5.2159452088290654e-08, "loss": 0.3147, "step": 11434 }, { "epoch": 2.8760060362173037, "grad_norm": 0.27087098360061646, "learning_rate": 5.1948860018239954e-08, "loss": 0.3253, "step": 11435 }, { "epoch": 2.87625754527163, "grad_norm": 0.30059030652046204, "learning_rate": 5.17386917178142e-08, "loss": 0.3133, "step": 11436 }, { "epoch": 2.8765090543259557, "grad_norm": 0.27557048201560974, "learning_rate": 5.152894720501345e-08, "loss": 0.312, "step": 11437 }, { "epoch": 2.876760563380282, "grad_norm": 0.2941986322402954, "learning_rate": 5.131962649780109e-08, "loss": 0.3058, "step": 11438 }, { "epoch": 2.8770120724346078, "grad_norm": 0.2895359694957733, "learning_rate": 5.111072961410335e-08, "loss": 0.3238, "step": 11439 }, { "epoch": 2.8772635814889336, "grad_norm": 0.3026026785373688, "learning_rate": 5.090225657181203e-08, "loss": 0.3418, "step": 11440 }, { "epoch": 2.87751509054326, "grad_norm": 0.29785895347595215, "learning_rate": 5.069420738878061e-08, "loss": 0.3185, "step": 11441 }, { "epoch": 2.8777665995975856, "grad_norm": 0.3039131164550781, "learning_rate": 5.048658208282764e-08, "loss": 0.3096, "step": 11442 }, { "epoch": 2.8780181086519114, "grad_norm": 0.29077792167663574, "learning_rate": 5.027938067173499e-08, "loss": 0.3351, "step": 11443 }, { "epoch": 2.8782696177062377, "grad_norm": 0.2855316698551178, "learning_rate": 5.0072603173247914e-08, "loss": 0.3141, "step": 11444 }, { "epoch": 2.8785211267605635, "grad_norm": 0.2709047496318817, "learning_rate": 4.986624960507558e-08, "loss": 0.3076, "step": 11445 }, { "epoch": 2.8787726358148893, "grad_norm": 0.27553707361221313, "learning_rate": 4.966031998489107e-08, "loss": 0.3167, "step": 11446 }, { "epoch": 2.8790241448692155, "grad_norm": 0.29868531227111816, "learning_rate": 4.945481433033139e-08, "loss": 0.3118, "step": 11447 }, { "epoch": 2.8792756539235413, "grad_norm": 0.28663668036460876, "learning_rate": 4.92497326589958e-08, "loss": 0.3408, "step": 11448 }, { "epoch": 2.879527162977867, "grad_norm": 0.2877984046936035, "learning_rate": 4.9045074988449125e-08, "loss": 0.2728, "step": 11449 }, { "epoch": 2.8797786720321934, "grad_norm": 0.2943393290042877, "learning_rate": 4.884084133621847e-08, "loss": 0.3304, "step": 11450 }, { "epoch": 2.880030181086519, "grad_norm": 0.28473910689353943, "learning_rate": 4.863703171979539e-08, "loss": 0.3062, "step": 11451 }, { "epoch": 2.880281690140845, "grad_norm": 0.2870289981365204, "learning_rate": 4.843364615663537e-08, "loss": 0.3047, "step": 11452 }, { "epoch": 2.8805331991951713, "grad_norm": 0.2811976671218872, "learning_rate": 4.823068466415615e-08, "loss": 0.3187, "step": 11453 }, { "epoch": 2.880784708249497, "grad_norm": 0.2888941466808319, "learning_rate": 4.802814725974048e-08, "loss": 0.3171, "step": 11454 }, { "epoch": 2.881036217303823, "grad_norm": 0.2838338613510132, "learning_rate": 4.782603396073504e-08, "loss": 0.3013, "step": 11455 }, { "epoch": 2.881287726358149, "grad_norm": 0.29402175545692444, "learning_rate": 4.7624344784448774e-08, "loss": 0.3276, "step": 11456 }, { "epoch": 2.881539235412475, "grad_norm": 0.28359174728393555, "learning_rate": 4.742307974815563e-08, "loss": 0.3127, "step": 11457 }, { "epoch": 2.8817907444668007, "grad_norm": 0.3000732660293579, "learning_rate": 4.7222238869092386e-08, "loss": 0.3278, "step": 11458 }, { "epoch": 2.882042253521127, "grad_norm": 0.2840462327003479, "learning_rate": 4.702182216445972e-08, "loss": 0.3335, "step": 11459 }, { "epoch": 2.8822937625754528, "grad_norm": 0.3007800281047821, "learning_rate": 4.682182965142279e-08, "loss": 0.3129, "step": 11460 }, { "epoch": 2.8825452716297786, "grad_norm": 0.291432648897171, "learning_rate": 4.6622261347108456e-08, "loss": 0.3206, "step": 11461 }, { "epoch": 2.882796780684105, "grad_norm": 0.2831004858016968, "learning_rate": 4.6423117268609704e-08, "loss": 0.3142, "step": 11462 }, { "epoch": 2.8830482897384306, "grad_norm": 0.29433655738830566, "learning_rate": 4.622439743298124e-08, "loss": 0.3303, "step": 11463 }, { "epoch": 2.8832997987927564, "grad_norm": 0.28008735179901123, "learning_rate": 4.6026101857242765e-08, "loss": 0.3134, "step": 11464 }, { "epoch": 2.8835513078470827, "grad_norm": 0.26906129717826843, "learning_rate": 4.582823055837626e-08, "loss": 0.3519, "step": 11465 }, { "epoch": 2.8838028169014085, "grad_norm": 0.27299466729164124, "learning_rate": 4.563078355332873e-08, "loss": 0.3303, "step": 11466 }, { "epoch": 2.8840543259557343, "grad_norm": 0.29059502482414246, "learning_rate": 4.543376085901052e-08, "loss": 0.3287, "step": 11467 }, { "epoch": 2.8843058350100605, "grad_norm": 0.28941747546195984, "learning_rate": 4.523716249229426e-08, "loss": 0.3179, "step": 11468 }, { "epoch": 2.8845573440643864, "grad_norm": 0.2766706645488739, "learning_rate": 4.504098847001925e-08, "loss": 0.307, "step": 11469 }, { "epoch": 2.884808853118712, "grad_norm": 0.28143224120140076, "learning_rate": 4.484523880898428e-08, "loss": 0.3123, "step": 11470 }, { "epoch": 2.8850603621730384, "grad_norm": 0.30837175250053406, "learning_rate": 4.464991352595593e-08, "loss": 0.2922, "step": 11471 }, { "epoch": 2.885311871227364, "grad_norm": 0.26661986112594604, "learning_rate": 4.445501263766194e-08, "loss": 0.3325, "step": 11472 }, { "epoch": 2.88556338028169, "grad_norm": 0.2966688275337219, "learning_rate": 4.426053616079395e-08, "loss": 0.3129, "step": 11473 }, { "epoch": 2.8858148893360163, "grad_norm": 0.2732764184474945, "learning_rate": 4.406648411200809e-08, "loss": 0.2998, "step": 11474 }, { "epoch": 2.886066398390342, "grad_norm": 0.3051884174346924, "learning_rate": 4.3872856507923835e-08, "loss": 0.3071, "step": 11475 }, { "epoch": 2.886317907444668, "grad_norm": 0.2977435290813446, "learning_rate": 4.367965336512403e-08, "loss": 0.3247, "step": 11476 }, { "epoch": 2.886569416498994, "grad_norm": 0.27795737981796265, "learning_rate": 4.348687470015489e-08, "loss": 0.3144, "step": 11477 }, { "epoch": 2.88682092555332, "grad_norm": 0.29412925243377686, "learning_rate": 4.329452052952765e-08, "loss": 0.3354, "step": 11478 }, { "epoch": 2.8870724346076457, "grad_norm": 0.30797427892684937, "learning_rate": 4.3102590869715246e-08, "loss": 0.3209, "step": 11479 }, { "epoch": 2.887323943661972, "grad_norm": 0.3010835349559784, "learning_rate": 4.291108573715563e-08, "loss": 0.3181, "step": 11480 }, { "epoch": 2.887575452716298, "grad_norm": 0.27650344371795654, "learning_rate": 4.272000514825014e-08, "loss": 0.3232, "step": 11481 }, { "epoch": 2.8878269617706236, "grad_norm": 0.284249871969223, "learning_rate": 4.2529349119364014e-08, "loss": 0.3218, "step": 11482 }, { "epoch": 2.88807847082495, "grad_norm": 0.27574625611305237, "learning_rate": 4.233911766682475e-08, "loss": 0.3078, "step": 11483 }, { "epoch": 2.8883299798792756, "grad_norm": 0.31248676776885986, "learning_rate": 4.214931080692486e-08, "loss": 0.3008, "step": 11484 }, { "epoch": 2.8885814889336014, "grad_norm": 0.2914111316204071, "learning_rate": 4.195992855592079e-08, "loss": 0.3229, "step": 11485 }, { "epoch": 2.8888329979879277, "grad_norm": 0.31353798508644104, "learning_rate": 4.177097093003124e-08, "loss": 0.3246, "step": 11486 }, { "epoch": 2.8890845070422535, "grad_norm": 0.2765747308731079, "learning_rate": 4.158243794543992e-08, "loss": 0.3021, "step": 11487 }, { "epoch": 2.8893360160965793, "grad_norm": 0.2932724058628082, "learning_rate": 4.1394329618292265e-08, "loss": 0.3156, "step": 11488 }, { "epoch": 2.8895875251509056, "grad_norm": 0.2794465720653534, "learning_rate": 4.120664596469981e-08, "loss": 0.3164, "step": 11489 }, { "epoch": 2.8898390342052314, "grad_norm": 0.2837032973766327, "learning_rate": 4.101938700073582e-08, "loss": 0.3202, "step": 11490 }, { "epoch": 2.890090543259557, "grad_norm": 0.27074161171913147, "learning_rate": 4.083255274243858e-08, "loss": 0.3186, "step": 11491 }, { "epoch": 2.8903420523138834, "grad_norm": 0.290571391582489, "learning_rate": 4.0646143205808063e-08, "loss": 0.3222, "step": 11492 }, { "epoch": 2.890593561368209, "grad_norm": 0.2796350419521332, "learning_rate": 4.046015840680984e-08, "loss": 0.3166, "step": 11493 }, { "epoch": 2.890845070422535, "grad_norm": 0.30049675703048706, "learning_rate": 4.0274598361372266e-08, "loss": 0.3055, "step": 11494 }, { "epoch": 2.8910965794768613, "grad_norm": 0.3056521415710449, "learning_rate": 4.008946308538764e-08, "loss": 0.3097, "step": 11495 }, { "epoch": 2.891348088531187, "grad_norm": 0.2760073244571686, "learning_rate": 3.990475259471105e-08, "loss": 0.3126, "step": 11496 }, { "epoch": 2.891599597585513, "grad_norm": 0.2556244134902954, "learning_rate": 3.9720466905162625e-08, "loss": 0.3084, "step": 11497 }, { "epoch": 2.891851106639839, "grad_norm": 0.2833138704299927, "learning_rate": 3.953660603252474e-08, "loss": 0.3218, "step": 11498 }, { "epoch": 2.892102615694165, "grad_norm": 0.27805668115615845, "learning_rate": 3.9353169992543684e-08, "loss": 0.3332, "step": 11499 }, { "epoch": 2.8923541247484907, "grad_norm": 0.30059728026390076, "learning_rate": 3.917015880092967e-08, "loss": 0.3212, "step": 11500 }, { "epoch": 2.892605633802817, "grad_norm": 0.27901148796081543, "learning_rate": 3.898757247335738e-08, "loss": 0.3197, "step": 11501 }, { "epoch": 2.892857142857143, "grad_norm": 0.29673993587493896, "learning_rate": 3.8805411025463204e-08, "loss": 0.3502, "step": 11502 }, { "epoch": 2.8931086519114686, "grad_norm": 0.2697283923625946, "learning_rate": 3.862367447284854e-08, "loss": 0.2868, "step": 11503 }, { "epoch": 2.893360160965795, "grad_norm": 0.27969419956207275, "learning_rate": 3.8442362831077603e-08, "loss": 0.3168, "step": 11504 }, { "epoch": 2.8936116700201207, "grad_norm": 0.29848575592041016, "learning_rate": 3.82614761156791e-08, "loss": 0.3243, "step": 11505 }, { "epoch": 2.8938631790744465, "grad_norm": 0.2888948619365692, "learning_rate": 3.8081014342144506e-08, "loss": 0.3113, "step": 11506 }, { "epoch": 2.8941146881287727, "grad_norm": 0.2911056876182556, "learning_rate": 3.790097752592925e-08, "loss": 0.3319, "step": 11507 }, { "epoch": 2.8943661971830985, "grad_norm": 0.2891317903995514, "learning_rate": 3.772136568245266e-08, "loss": 0.3201, "step": 11508 }, { "epoch": 2.8946177062374243, "grad_norm": 0.3018838167190552, "learning_rate": 3.754217882709743e-08, "loss": 0.2983, "step": 11509 }, { "epoch": 2.8948692152917506, "grad_norm": 0.30178946256637573, "learning_rate": 3.7363416975209065e-08, "loss": 0.3322, "step": 11510 }, { "epoch": 2.8951207243460764, "grad_norm": 0.28581055998802185, "learning_rate": 3.718508014209809e-08, "loss": 0.3176, "step": 11511 }, { "epoch": 2.895372233400402, "grad_norm": 0.3123716711997986, "learning_rate": 3.700716834303786e-08, "loss": 0.328, "step": 11512 }, { "epoch": 2.8956237424547284, "grad_norm": 0.2836924195289612, "learning_rate": 3.682968159326505e-08, "loss": 0.3073, "step": 11513 }, { "epoch": 2.8958752515090542, "grad_norm": 0.2993314266204834, "learning_rate": 3.665261990798086e-08, "loss": 0.327, "step": 11514 }, { "epoch": 2.89612676056338, "grad_norm": 0.2848893105983734, "learning_rate": 3.6475983302348695e-08, "loss": 0.3163, "step": 11515 }, { "epoch": 2.8963782696177063, "grad_norm": 0.30268558859825134, "learning_rate": 3.629977179149702e-08, "loss": 0.3199, "step": 11516 }, { "epoch": 2.896629778672032, "grad_norm": 0.28077784180641174, "learning_rate": 3.6123985390517094e-08, "loss": 0.3212, "step": 11517 }, { "epoch": 2.896881287726358, "grad_norm": 0.31762558221817017, "learning_rate": 3.5948624114464094e-08, "loss": 0.3415, "step": 11518 }, { "epoch": 2.897132796780684, "grad_norm": 0.267600953578949, "learning_rate": 3.577368797835601e-08, "loss": 0.3174, "step": 11519 }, { "epoch": 2.89738430583501, "grad_norm": 0.29275646805763245, "learning_rate": 3.5599176997175853e-08, "loss": 0.308, "step": 11520 }, { "epoch": 2.8976358148893357, "grad_norm": 0.29874688386917114, "learning_rate": 3.54250911858689e-08, "loss": 0.2987, "step": 11521 }, { "epoch": 2.897887323943662, "grad_norm": 0.29817038774490356, "learning_rate": 3.525143055934488e-08, "loss": 0.3041, "step": 11522 }, { "epoch": 2.898138832997988, "grad_norm": 0.28878530859947205, "learning_rate": 3.507819513247579e-08, "loss": 0.3076, "step": 11523 }, { "epoch": 2.8983903420523136, "grad_norm": 0.2917855679988861, "learning_rate": 3.4905384920099204e-08, "loss": 0.3071, "step": 11524 }, { "epoch": 2.89864185110664, "grad_norm": 0.28223884105682373, "learning_rate": 3.473299993701496e-08, "loss": 0.3096, "step": 11525 }, { "epoch": 2.8988933601609657, "grad_norm": 0.2846827805042267, "learning_rate": 3.4561040197986785e-08, "loss": 0.3293, "step": 11526 }, { "epoch": 2.899144869215292, "grad_norm": 0.31608250737190247, "learning_rate": 3.4389505717741246e-08, "loss": 0.3281, "step": 11527 }, { "epoch": 2.8993963782696177, "grad_norm": 0.29066160321235657, "learning_rate": 3.421839651096992e-08, "loss": 0.3134, "step": 11528 }, { "epoch": 2.8996478873239435, "grad_norm": 0.28401198983192444, "learning_rate": 3.4047712592327753e-08, "loss": 0.3228, "step": 11529 }, { "epoch": 2.8998993963782698, "grad_norm": 0.27312031388282776, "learning_rate": 3.3877453976431386e-08, "loss": 0.3142, "step": 11530 }, { "epoch": 2.9001509054325956, "grad_norm": 0.27587342262268066, "learning_rate": 3.37076206778636e-08, "loss": 0.3202, "step": 11531 }, { "epoch": 2.9004024144869214, "grad_norm": 0.2901318073272705, "learning_rate": 3.353821271116886e-08, "loss": 0.3086, "step": 11532 }, { "epoch": 2.9006539235412476, "grad_norm": 0.28231751918792725, "learning_rate": 3.336923009085613e-08, "loss": 0.3257, "step": 11533 }, { "epoch": 2.9009054325955734, "grad_norm": 0.27121472358703613, "learning_rate": 3.320067283139772e-08, "loss": 0.3256, "step": 11534 }, { "epoch": 2.9011569416498992, "grad_norm": 0.2887016832828522, "learning_rate": 3.3032540947229296e-08, "loss": 0.3162, "step": 11535 }, { "epoch": 2.9014084507042255, "grad_norm": 0.2676198184490204, "learning_rate": 3.286483445275046e-08, "loss": 0.3181, "step": 11536 }, { "epoch": 2.9016599597585513, "grad_norm": 0.2852703630924225, "learning_rate": 3.269755336232472e-08, "loss": 0.2963, "step": 11537 }, { "epoch": 2.9019114688128775, "grad_norm": 0.2976604402065277, "learning_rate": 3.2530697690277835e-08, "loss": 0.348, "step": 11538 }, { "epoch": 2.9021629778672033, "grad_norm": 0.27679896354675293, "learning_rate": 3.236426745090004e-08, "loss": 0.3239, "step": 11539 }, { "epoch": 2.902414486921529, "grad_norm": 0.2862226068973541, "learning_rate": 3.219826265844606e-08, "loss": 0.3037, "step": 11540 }, { "epoch": 2.9026659959758554, "grad_norm": 0.27300092577934265, "learning_rate": 3.203268332713172e-08, "loss": 0.328, "step": 11541 }, { "epoch": 2.902917505030181, "grad_norm": 0.2864827513694763, "learning_rate": 3.1867529471139025e-08, "loss": 0.3227, "step": 11542 }, { "epoch": 2.903169014084507, "grad_norm": 0.2821025848388672, "learning_rate": 3.1702801104611655e-08, "loss": 0.312, "step": 11543 }, { "epoch": 2.9034205231388333, "grad_norm": 0.27062085270881653, "learning_rate": 3.153849824165778e-08, "loss": 0.2998, "step": 11544 }, { "epoch": 2.903672032193159, "grad_norm": 0.2897660434246063, "learning_rate": 3.1374620896348905e-08, "loss": 0.3393, "step": 11545 }, { "epoch": 2.903923541247485, "grad_norm": 0.3015936017036438, "learning_rate": 3.121116908272048e-08, "loss": 0.315, "step": 11546 }, { "epoch": 2.904175050301811, "grad_norm": 0.28271710872650146, "learning_rate": 3.10481428147702e-08, "loss": 0.3177, "step": 11547 }, { "epoch": 2.904426559356137, "grad_norm": 0.2824450135231018, "learning_rate": 3.088554210646133e-08, "loss": 0.3422, "step": 11548 }, { "epoch": 2.9046780684104627, "grad_norm": 0.289212703704834, "learning_rate": 3.072336697171885e-08, "loss": 0.3493, "step": 11549 }, { "epoch": 2.904929577464789, "grad_norm": 0.29587748646736145, "learning_rate": 3.05616174244322e-08, "loss": 0.2915, "step": 11550 }, { "epoch": 2.9051810865191148, "grad_norm": 0.26594677567481995, "learning_rate": 3.040029347845419e-08, "loss": 0.3406, "step": 11551 }, { "epoch": 2.9054325955734406, "grad_norm": 0.28409308195114136, "learning_rate": 3.0239395147601547e-08, "loss": 0.3331, "step": 11552 }, { "epoch": 2.905684104627767, "grad_norm": 0.29760369658470154, "learning_rate": 3.007892244565436e-08, "loss": 0.3406, "step": 11553 }, { "epoch": 2.9059356136820926, "grad_norm": 0.276475191116333, "learning_rate": 2.991887538635496e-08, "loss": 0.3105, "step": 11554 }, { "epoch": 2.9061871227364184, "grad_norm": 0.29626041650772095, "learning_rate": 2.975925398341184e-08, "loss": 0.3304, "step": 11555 }, { "epoch": 2.9064386317907447, "grad_norm": 0.29270699620246887, "learning_rate": 2.960005825049461e-08, "loss": 0.342, "step": 11556 }, { "epoch": 2.9066901408450705, "grad_norm": 0.2765859365463257, "learning_rate": 2.944128820123737e-08, "loss": 0.326, "step": 11557 }, { "epoch": 2.9069416498993963, "grad_norm": 0.2837342917919159, "learning_rate": 2.9282943849238687e-08, "loss": 0.3028, "step": 11558 }, { "epoch": 2.9071931589537225, "grad_norm": 0.3145996630191803, "learning_rate": 2.9125025208058823e-08, "loss": 0.3317, "step": 11559 }, { "epoch": 2.9074446680080483, "grad_norm": 0.28416410088539124, "learning_rate": 2.8967532291222512e-08, "loss": 0.3253, "step": 11560 }, { "epoch": 2.907696177062374, "grad_norm": 0.2724005877971649, "learning_rate": 2.8810465112218965e-08, "loss": 0.3177, "step": 11561 }, { "epoch": 2.9079476861167004, "grad_norm": 0.2829074561595917, "learning_rate": 2.8653823684499093e-08, "loss": 0.3286, "step": 11562 }, { "epoch": 2.908199195171026, "grad_norm": 0.2702285349369049, "learning_rate": 2.849760802147883e-08, "loss": 0.3161, "step": 11563 }, { "epoch": 2.908450704225352, "grad_norm": 0.2694869637489319, "learning_rate": 2.8341818136536915e-08, "loss": 0.3177, "step": 11564 }, { "epoch": 2.9087022132796783, "grad_norm": 0.302658349275589, "learning_rate": 2.8186454043014898e-08, "loss": 0.3182, "step": 11565 }, { "epoch": 2.908953722334004, "grad_norm": 0.2830633521080017, "learning_rate": 2.8031515754220473e-08, "loss": 0.3262, "step": 11566 }, { "epoch": 2.90920523138833, "grad_norm": 0.25605013966560364, "learning_rate": 2.7877003283421356e-08, "loss": 0.3138, "step": 11567 }, { "epoch": 2.909456740442656, "grad_norm": 0.30849385261535645, "learning_rate": 2.772291664385196e-08, "loss": 0.3408, "step": 11568 }, { "epoch": 2.909708249496982, "grad_norm": 0.2905694246292114, "learning_rate": 2.7569255848708397e-08, "loss": 0.332, "step": 11569 }, { "epoch": 2.9099597585513077, "grad_norm": 0.31673821806907654, "learning_rate": 2.7416020911150144e-08, "loss": 0.3283, "step": 11570 }, { "epoch": 2.910211267605634, "grad_norm": 0.28510385751724243, "learning_rate": 2.7263211844301695e-08, "loss": 0.3045, "step": 11571 }, { "epoch": 2.91046277665996, "grad_norm": 0.2822716534137726, "learning_rate": 2.7110828661249255e-08, "loss": 0.3522, "step": 11572 }, { "epoch": 2.9107142857142856, "grad_norm": 0.3001321256160736, "learning_rate": 2.6958871375044605e-08, "loss": 0.3117, "step": 11573 }, { "epoch": 2.910965794768612, "grad_norm": 0.2811189889907837, "learning_rate": 2.680733999870122e-08, "loss": 0.3131, "step": 11574 }, { "epoch": 2.9112173038229376, "grad_norm": 0.3301267921924591, "learning_rate": 2.6656234545197057e-08, "loss": 0.3362, "step": 11575 }, { "epoch": 2.9114688128772634, "grad_norm": 0.2918173372745514, "learning_rate": 2.6505555027472875e-08, "loss": 0.3034, "step": 11576 }, { "epoch": 2.9117203219315897, "grad_norm": 0.29059723019599915, "learning_rate": 2.6355301458434457e-08, "loss": 0.3262, "step": 11577 }, { "epoch": 2.9119718309859155, "grad_norm": 0.29665839672088623, "learning_rate": 2.620547385094929e-08, "loss": 0.3288, "step": 11578 }, { "epoch": 2.9122233400402413, "grad_norm": 0.275056391954422, "learning_rate": 2.6056072217848783e-08, "loss": 0.3151, "step": 11579 }, { "epoch": 2.9124748490945676, "grad_norm": 0.2742392122745514, "learning_rate": 2.5907096571929357e-08, "loss": 0.3128, "step": 11580 }, { "epoch": 2.9127263581488934, "grad_norm": 0.2982200086116791, "learning_rate": 2.5758546925949148e-08, "loss": 0.3303, "step": 11581 }, { "epoch": 2.912977867203219, "grad_norm": 0.284332275390625, "learning_rate": 2.5610423292630195e-08, "loss": 0.335, "step": 11582 }, { "epoch": 2.9132293762575454, "grad_norm": 0.289644718170166, "learning_rate": 2.5462725684659573e-08, "loss": 0.3368, "step": 11583 }, { "epoch": 2.913480885311871, "grad_norm": 0.27438437938690186, "learning_rate": 2.53154541146855e-08, "loss": 0.3279, "step": 11584 }, { "epoch": 2.913732394366197, "grad_norm": 0.2902214527130127, "learning_rate": 2.516860859532122e-08, "loss": 0.3175, "step": 11585 }, { "epoch": 2.9139839034205233, "grad_norm": 0.28719666600227356, "learning_rate": 2.5022189139143338e-08, "loss": 0.3358, "step": 11586 }, { "epoch": 2.914235412474849, "grad_norm": 0.298453688621521, "learning_rate": 2.487619575869127e-08, "loss": 0.3301, "step": 11587 }, { "epoch": 2.914486921529175, "grad_norm": 0.2861069142818451, "learning_rate": 2.4730628466468898e-08, "loss": 0.288, "step": 11588 }, { "epoch": 2.914738430583501, "grad_norm": 0.3006594479084015, "learning_rate": 2.4585487274942922e-08, "loss": 0.3185, "step": 11589 }, { "epoch": 2.914989939637827, "grad_norm": 0.2652274966239929, "learning_rate": 2.444077219654395e-08, "loss": 0.3174, "step": 11590 }, { "epoch": 2.9152414486921527, "grad_norm": 0.29787126183509827, "learning_rate": 2.4296483243665958e-08, "loss": 0.3119, "step": 11591 }, { "epoch": 2.915492957746479, "grad_norm": 0.26815494894981384, "learning_rate": 2.4152620428666284e-08, "loss": 0.3128, "step": 11592 }, { "epoch": 2.915744466800805, "grad_norm": 0.28812792897224426, "learning_rate": 2.400918376386563e-08, "loss": 0.3161, "step": 11593 }, { "epoch": 2.9159959758551306, "grad_norm": 0.27825263142585754, "learning_rate": 2.386617326154861e-08, "loss": 0.2885, "step": 11594 }, { "epoch": 2.916247484909457, "grad_norm": 0.30951130390167236, "learning_rate": 2.372358893396376e-08, "loss": 0.3131, "step": 11595 }, { "epoch": 2.9164989939637826, "grad_norm": 0.3053039610385895, "learning_rate": 2.358143079332187e-08, "loss": 0.3242, "step": 11596 }, { "epoch": 2.9167505030181085, "grad_norm": 0.2770836651325226, "learning_rate": 2.3439698851797643e-08, "loss": 0.33, "step": 11597 }, { "epoch": 2.9170020120724347, "grad_norm": 0.29645803570747375, "learning_rate": 2.329839312153026e-08, "loss": 0.3375, "step": 11598 }, { "epoch": 2.9172535211267605, "grad_norm": 0.26674768328666687, "learning_rate": 2.3157513614621706e-08, "loss": 0.3247, "step": 11599 }, { "epoch": 2.9175050301810863, "grad_norm": 0.2916862666606903, "learning_rate": 2.3017060343136223e-08, "loss": 0.322, "step": 11600 }, { "epoch": 2.9177565392354126, "grad_norm": 0.2905628979206085, "learning_rate": 2.287703331910418e-08, "loss": 0.2903, "step": 11601 }, { "epoch": 2.9180080482897384, "grad_norm": 0.28413960337638855, "learning_rate": 2.273743255451766e-08, "loss": 0.3085, "step": 11602 }, { "epoch": 2.918259557344064, "grad_norm": 0.290414035320282, "learning_rate": 2.2598258061331536e-08, "loss": 0.3394, "step": 11603 }, { "epoch": 2.9185110663983904, "grad_norm": 0.2848317623138428, "learning_rate": 2.2459509851466833e-08, "loss": 0.3058, "step": 11604 }, { "epoch": 2.9187625754527162, "grad_norm": 0.2922884225845337, "learning_rate": 2.232118793680571e-08, "loss": 0.3092, "step": 11605 }, { "epoch": 2.919014084507042, "grad_norm": 0.2839473485946655, "learning_rate": 2.218329232919425e-08, "loss": 0.3318, "step": 11606 }, { "epoch": 2.9192655935613683, "grad_norm": 0.28309670090675354, "learning_rate": 2.2045823040443005e-08, "loss": 0.31, "step": 11607 }, { "epoch": 2.919517102615694, "grad_norm": 0.2878527045249939, "learning_rate": 2.1908780082324777e-08, "loss": 0.3089, "step": 11608 }, { "epoch": 2.91976861167002, "grad_norm": 0.3030252158641815, "learning_rate": 2.177216346657629e-08, "loss": 0.2988, "step": 11609 }, { "epoch": 2.920020120724346, "grad_norm": 0.2812774181365967, "learning_rate": 2.1635973204899296e-08, "loss": 0.3232, "step": 11610 }, { "epoch": 2.920271629778672, "grad_norm": 0.2975054383277893, "learning_rate": 2.1500209308956132e-08, "loss": 0.3178, "step": 11611 }, { "epoch": 2.9205231388329977, "grad_norm": 0.262926310300827, "learning_rate": 2.136487179037472e-08, "loss": 0.3276, "step": 11612 }, { "epoch": 2.920774647887324, "grad_norm": 0.30193185806274414, "learning_rate": 2.122996066074523e-08, "loss": 0.3339, "step": 11613 }, { "epoch": 2.92102615694165, "grad_norm": 0.28565213084220886, "learning_rate": 2.1095475931623422e-08, "loss": 0.3043, "step": 11614 }, { "epoch": 2.9212776659959756, "grad_norm": 0.32027721405029297, "learning_rate": 2.0961417614525638e-08, "loss": 0.2899, "step": 11615 }, { "epoch": 2.921529175050302, "grad_norm": 0.29877468943595886, "learning_rate": 2.0827785720933803e-08, "loss": 0.3295, "step": 11616 }, { "epoch": 2.9217806841046277, "grad_norm": 0.291421502828598, "learning_rate": 2.0694580262292096e-08, "loss": 0.3109, "step": 11617 }, { "epoch": 2.9220321931589535, "grad_norm": 0.2898477613925934, "learning_rate": 2.0561801250009727e-08, "loss": 0.3352, "step": 11618 }, { "epoch": 2.9222837022132797, "grad_norm": 0.27962225675582886, "learning_rate": 2.04294486954576e-08, "loss": 0.329, "step": 11619 }, { "epoch": 2.9225352112676055, "grad_norm": 0.31196466088294983, "learning_rate": 2.0297522609971087e-08, "loss": 0.3335, "step": 11620 }, { "epoch": 2.9227867203219313, "grad_norm": 0.2565595507621765, "learning_rate": 2.0166023004848934e-08, "loss": 0.2932, "step": 11621 }, { "epoch": 2.9230382293762576, "grad_norm": 0.2930534780025482, "learning_rate": 2.003494989135324e-08, "loss": 0.329, "step": 11622 }, { "epoch": 2.9232897384305834, "grad_norm": 0.2888554632663727, "learning_rate": 1.990430328070947e-08, "loss": 0.3193, "step": 11623 }, { "epoch": 2.9235412474849096, "grad_norm": 0.27373868227005005, "learning_rate": 1.977408318410645e-08, "loss": 0.3213, "step": 11624 }, { "epoch": 2.9237927565392354, "grad_norm": 0.2846567630767822, "learning_rate": 1.9644289612697487e-08, "loss": 0.3173, "step": 11625 }, { "epoch": 2.9240442655935612, "grad_norm": 0.25684884190559387, "learning_rate": 1.951492257759757e-08, "loss": 0.3325, "step": 11626 }, { "epoch": 2.9242957746478875, "grad_norm": 0.2954108417034149, "learning_rate": 1.938598208988729e-08, "loss": 0.3258, "step": 11627 }, { "epoch": 2.9245472837022133, "grad_norm": 0.28941529989242554, "learning_rate": 1.9257468160608917e-08, "loss": 0.3436, "step": 11628 }, { "epoch": 2.924798792756539, "grad_norm": 0.310966432094574, "learning_rate": 1.912938080076865e-08, "loss": 0.3186, "step": 11629 }, { "epoch": 2.9250503018108653, "grad_norm": 0.29559624195098877, "learning_rate": 1.90017200213366e-08, "loss": 0.3081, "step": 11630 }, { "epoch": 2.925301810865191, "grad_norm": 0.2723540663719177, "learning_rate": 1.887448583324625e-08, "loss": 0.3291, "step": 11631 }, { "epoch": 2.925553319919517, "grad_norm": 0.290840208530426, "learning_rate": 1.8747678247394984e-08, "loss": 0.3344, "step": 11632 }, { "epoch": 2.925804828973843, "grad_norm": 0.31380322575569153, "learning_rate": 1.8621297274641904e-08, "loss": 0.2994, "step": 11633 }, { "epoch": 2.926056338028169, "grad_norm": 0.279624342918396, "learning_rate": 1.8495342925811122e-08, "loss": 0.3212, "step": 11634 }, { "epoch": 2.926307847082495, "grad_norm": 0.27431485056877136, "learning_rate": 1.8369815211690678e-08, "loss": 0.2976, "step": 11635 }, { "epoch": 2.926559356136821, "grad_norm": 0.27514395117759705, "learning_rate": 1.8244714143029752e-08, "loss": 0.305, "step": 11636 }, { "epoch": 2.926810865191147, "grad_norm": 0.27842938899993896, "learning_rate": 1.8120039730544214e-08, "loss": 0.3311, "step": 11637 }, { "epoch": 2.927062374245473, "grad_norm": 0.26175856590270996, "learning_rate": 1.799579198490997e-08, "loss": 0.323, "step": 11638 }, { "epoch": 2.927313883299799, "grad_norm": 0.3041093647480011, "learning_rate": 1.7871970916769067e-08, "loss": 0.3209, "step": 11639 }, { "epoch": 2.9275653923541247, "grad_norm": 0.28131407499313354, "learning_rate": 1.774857653672579e-08, "loss": 0.3049, "step": 11640 }, { "epoch": 2.927816901408451, "grad_norm": 0.3082786500453949, "learning_rate": 1.7625608855348365e-08, "loss": 0.302, "step": 11641 }, { "epoch": 2.9280684104627768, "grad_norm": 0.28668394684791565, "learning_rate": 1.7503067883167247e-08, "loss": 0.3034, "step": 11642 }, { "epoch": 2.9283199195171026, "grad_norm": 0.2656381130218506, "learning_rate": 1.7380953630678488e-08, "loss": 0.3191, "step": 11643 }, { "epoch": 2.928571428571429, "grad_norm": 0.30544763803482056, "learning_rate": 1.7259266108339833e-08, "loss": 0.3299, "step": 11644 }, { "epoch": 2.9288229376257546, "grad_norm": 0.2998693287372589, "learning_rate": 1.713800532657295e-08, "loss": 0.3154, "step": 11645 }, { "epoch": 2.9290744466800804, "grad_norm": 0.2941758334636688, "learning_rate": 1.7017171295763412e-08, "loss": 0.3054, "step": 11646 }, { "epoch": 2.9293259557344067, "grad_norm": 0.29245221614837646, "learning_rate": 1.6896764026259616e-08, "loss": 0.326, "step": 11647 }, { "epoch": 2.9295774647887325, "grad_norm": 0.2588028907775879, "learning_rate": 1.6776783528373864e-08, "loss": 0.2949, "step": 11648 }, { "epoch": 2.9298289738430583, "grad_norm": 0.27226272225379944, "learning_rate": 1.665722981238127e-08, "loss": 0.3068, "step": 11649 }, { "epoch": 2.9300804828973845, "grad_norm": 0.27903351187705994, "learning_rate": 1.6538102888521423e-08, "loss": 0.3074, "step": 11650 }, { "epoch": 2.9303319919517103, "grad_norm": 0.2830352187156677, "learning_rate": 1.6419402766996717e-08, "loss": 0.3179, "step": 11651 }, { "epoch": 2.930583501006036, "grad_norm": 0.29141873121261597, "learning_rate": 1.630112945797291e-08, "loss": 0.3243, "step": 11652 }, { "epoch": 2.9308350100603624, "grad_norm": 0.28866273164749146, "learning_rate": 1.6183282971579673e-08, "loss": 0.3409, "step": 11653 }, { "epoch": 2.931086519114688, "grad_norm": 0.27328038215637207, "learning_rate": 1.606586331790949e-08, "loss": 0.3436, "step": 11654 }, { "epoch": 2.931338028169014, "grad_norm": 0.2887606620788574, "learning_rate": 1.5948870507018766e-08, "loss": 0.3276, "step": 11655 }, { "epoch": 2.9315895372233403, "grad_norm": 0.2820006012916565, "learning_rate": 1.5832304548926703e-08, "loss": 0.2983, "step": 11656 }, { "epoch": 2.931841046277666, "grad_norm": 0.2761184573173523, "learning_rate": 1.571616545361754e-08, "loss": 0.3168, "step": 11657 }, { "epoch": 2.932092555331992, "grad_norm": 0.3011208474636078, "learning_rate": 1.560045323103665e-08, "loss": 0.3198, "step": 11658 }, { "epoch": 2.932344064386318, "grad_norm": 0.27800092101097107, "learning_rate": 1.5485167891095e-08, "loss": 0.321, "step": 11659 }, { "epoch": 2.932595573440644, "grad_norm": 0.2734206020832062, "learning_rate": 1.53703094436658e-08, "loss": 0.3157, "step": 11660 }, { "epoch": 2.9328470824949697, "grad_norm": 0.26894569396972656, "learning_rate": 1.5255877898585624e-08, "loss": 0.3178, "step": 11661 }, { "epoch": 2.933098591549296, "grad_norm": 0.2924859821796417, "learning_rate": 1.5141873265654973e-08, "loss": 0.3364, "step": 11662 }, { "epoch": 2.933350100603622, "grad_norm": 0.2777864634990692, "learning_rate": 1.5028295554637695e-08, "loss": 0.3277, "step": 11663 }, { "epoch": 2.9336016096579476, "grad_norm": 0.3020499348640442, "learning_rate": 1.4915144775261014e-08, "loss": 0.3211, "step": 11664 }, { "epoch": 2.933853118712274, "grad_norm": 0.2888851761817932, "learning_rate": 1.4802420937216066e-08, "loss": 0.3298, "step": 11665 }, { "epoch": 2.9341046277665996, "grad_norm": 0.27491968870162964, "learning_rate": 1.4690124050155686e-08, "loss": 0.3193, "step": 11666 }, { "epoch": 2.9343561368209254, "grad_norm": 0.29931673407554626, "learning_rate": 1.4578254123698844e-08, "loss": 0.299, "step": 11667 }, { "epoch": 2.9346076458752517, "grad_norm": 0.2893234193325043, "learning_rate": 1.4466811167425655e-08, "loss": 0.3203, "step": 11668 }, { "epoch": 2.9348591549295775, "grad_norm": 0.2912534475326538, "learning_rate": 1.4355795190880707e-08, "loss": 0.3138, "step": 11669 }, { "epoch": 2.9351106639839033, "grad_norm": 0.28769242763519287, "learning_rate": 1.4245206203571393e-08, "loss": 0.3178, "step": 11670 }, { "epoch": 2.9353621730382295, "grad_norm": 0.31816333532333374, "learning_rate": 1.4135044214969585e-08, "loss": 0.3455, "step": 11671 }, { "epoch": 2.9356136820925554, "grad_norm": 0.3172784149646759, "learning_rate": 1.4025309234510509e-08, "loss": 0.3227, "step": 11672 }, { "epoch": 2.935865191146881, "grad_norm": 0.31336894631385803, "learning_rate": 1.391600127159054e-08, "loss": 0.3134, "step": 11673 }, { "epoch": 2.9361167002012074, "grad_norm": 0.2920610010623932, "learning_rate": 1.3807120335572743e-08, "loss": 0.3017, "step": 11674 }, { "epoch": 2.936368209255533, "grad_norm": 0.27723726630210876, "learning_rate": 1.3698666435781327e-08, "loss": 0.3341, "step": 11675 }, { "epoch": 2.936619718309859, "grad_norm": 0.28226420283317566, "learning_rate": 1.3590639581504971e-08, "loss": 0.3079, "step": 11676 }, { "epoch": 2.9368712273641853, "grad_norm": 0.3186614215373993, "learning_rate": 1.3483039781995721e-08, "loss": 0.3145, "step": 11677 }, { "epoch": 2.937122736418511, "grad_norm": 0.27414506673812866, "learning_rate": 1.3375867046468427e-08, "loss": 0.2952, "step": 11678 }, { "epoch": 2.937374245472837, "grad_norm": 0.28623631596565247, "learning_rate": 1.3269121384101857e-08, "loss": 0.34, "step": 11679 }, { "epoch": 2.937625754527163, "grad_norm": 0.29475000500679016, "learning_rate": 1.316280280403759e-08, "loss": 0.3194, "step": 11680 }, { "epoch": 2.937877263581489, "grad_norm": 0.3122510015964508, "learning_rate": 1.3056911315382226e-08, "loss": 0.3142, "step": 11681 }, { "epoch": 2.9381287726358147, "grad_norm": 0.29271435737609863, "learning_rate": 1.2951446927204625e-08, "loss": 0.3127, "step": 11682 }, { "epoch": 2.938380281690141, "grad_norm": 0.3412143886089325, "learning_rate": 1.2846409648535896e-08, "loss": 0.3269, "step": 11683 }, { "epoch": 2.938631790744467, "grad_norm": 0.2909950613975525, "learning_rate": 1.2741799488373285e-08, "loss": 0.3061, "step": 11684 }, { "epoch": 2.9388832997987926, "grad_norm": 0.276928573846817, "learning_rate": 1.2637616455675183e-08, "loss": 0.3038, "step": 11685 }, { "epoch": 2.939134808853119, "grad_norm": 0.2644422948360443, "learning_rate": 1.2533860559363897e-08, "loss": 0.332, "step": 11686 }, { "epoch": 2.9393863179074446, "grad_norm": 0.28948256373405457, "learning_rate": 1.2430531808326763e-08, "loss": 0.325, "step": 11687 }, { "epoch": 2.9396378269617705, "grad_norm": 0.2958967387676239, "learning_rate": 1.23276302114117e-08, "loss": 0.3028, "step": 11688 }, { "epoch": 2.9398893360160967, "grad_norm": 0.2886587977409363, "learning_rate": 1.2225155777432773e-08, "loss": 0.3312, "step": 11689 }, { "epoch": 2.9401408450704225, "grad_norm": 0.3025323748588562, "learning_rate": 1.2123108515165738e-08, "loss": 0.3017, "step": 11690 }, { "epoch": 2.9403923541247483, "grad_norm": 0.28794747591018677, "learning_rate": 1.202148843335027e-08, "loss": 0.3094, "step": 11691 }, { "epoch": 2.9406438631790746, "grad_norm": 0.287518709897995, "learning_rate": 1.1920295540689964e-08, "loss": 0.3325, "step": 11692 }, { "epoch": 2.9408953722334004, "grad_norm": 0.3010087013244629, "learning_rate": 1.1819529845850664e-08, "loss": 0.3359, "step": 11693 }, { "epoch": 2.941146881287726, "grad_norm": 0.28999802470207214, "learning_rate": 1.1719191357463245e-08, "loss": 0.3375, "step": 11694 }, { "epoch": 2.9413983903420524, "grad_norm": 0.2955481708049774, "learning_rate": 1.1619280084119722e-08, "loss": 0.3295, "step": 11695 }, { "epoch": 2.941649899396378, "grad_norm": 0.28115126490592957, "learning_rate": 1.151979603437825e-08, "loss": 0.3115, "step": 11696 }, { "epoch": 2.941901408450704, "grad_norm": 0.2838100492954254, "learning_rate": 1.1420739216758125e-08, "loss": 0.3389, "step": 11697 }, { "epoch": 2.9421529175050303, "grad_norm": 0.291797935962677, "learning_rate": 1.1322109639743117e-08, "loss": 0.3357, "step": 11698 }, { "epoch": 2.942404426559356, "grad_norm": 0.2834652066230774, "learning_rate": 1.1223907311780358e-08, "loss": 0.303, "step": 11699 }, { "epoch": 2.942655935613682, "grad_norm": 0.2929726243019104, "learning_rate": 1.1126132241280342e-08, "loss": 0.3303, "step": 11700 }, { "epoch": 2.942907444668008, "grad_norm": 0.290096253156662, "learning_rate": 1.1028784436616923e-08, "loss": 0.323, "step": 11701 }, { "epoch": 2.943158953722334, "grad_norm": 0.2806238830089569, "learning_rate": 1.0931863906127327e-08, "loss": 0.3357, "step": 11702 }, { "epoch": 2.9434104627766597, "grad_norm": 0.2727678716182709, "learning_rate": 1.0835370658111577e-08, "loss": 0.3191, "step": 11703 }, { "epoch": 2.943661971830986, "grad_norm": 0.3112581670284271, "learning_rate": 1.073930470083473e-08, "loss": 0.314, "step": 11704 }, { "epoch": 2.943913480885312, "grad_norm": 0.29780250787734985, "learning_rate": 1.0643666042523537e-08, "loss": 0.3292, "step": 11705 }, { "epoch": 2.9441649899396376, "grad_norm": 0.283771276473999, "learning_rate": 1.054845469136867e-08, "loss": 0.3173, "step": 11706 }, { "epoch": 2.944416498993964, "grad_norm": 0.2684139311313629, "learning_rate": 1.0453670655525273e-08, "loss": 0.3129, "step": 11707 }, { "epoch": 2.9446680080482897, "grad_norm": 0.29727786779403687, "learning_rate": 1.0359313943110183e-08, "loss": 0.3189, "step": 11708 }, { "epoch": 2.9449195171026155, "grad_norm": 0.28367939591407776, "learning_rate": 1.0265384562205272e-08, "loss": 0.3273, "step": 11709 }, { "epoch": 2.9451710261569417, "grad_norm": 0.28062888979911804, "learning_rate": 1.0171882520853548e-08, "loss": 0.3059, "step": 11710 }, { "epoch": 2.9454225352112675, "grad_norm": 0.28939807415008545, "learning_rate": 1.0078807827064718e-08, "loss": 0.3318, "step": 11711 }, { "epoch": 2.9456740442655933, "grad_norm": 0.27794498205184937, "learning_rate": 9.986160488808517e-09, "loss": 0.3177, "step": 11712 }, { "epoch": 2.9459255533199196, "grad_norm": 0.3011113107204437, "learning_rate": 9.893940514020817e-09, "loss": 0.3293, "step": 11713 }, { "epoch": 2.9461770623742454, "grad_norm": 0.2947550117969513, "learning_rate": 9.802147910598637e-09, "loss": 0.3097, "step": 11714 }, { "epoch": 2.946428571428571, "grad_norm": 0.2753145694732666, "learning_rate": 9.71078268640402e-09, "loss": 0.3261, "step": 11715 }, { "epoch": 2.9466800804828974, "grad_norm": 0.29693037271499634, "learning_rate": 9.619844849261816e-09, "loss": 0.3104, "step": 11716 }, { "epoch": 2.9469315895372232, "grad_norm": 0.2914050221443176, "learning_rate": 9.529334406960245e-09, "loss": 0.3201, "step": 11717 }, { "epoch": 2.947183098591549, "grad_norm": 0.3129412829875946, "learning_rate": 9.439251367250879e-09, "loss": 0.3052, "step": 11718 }, { "epoch": 2.9474346076458753, "grad_norm": 0.26742586493492126, "learning_rate": 9.349595737848105e-09, "loss": 0.3316, "step": 11719 }, { "epoch": 2.947686116700201, "grad_norm": 0.2630806565284729, "learning_rate": 9.260367526431891e-09, "loss": 0.3085, "step": 11720 }, { "epoch": 2.947937625754527, "grad_norm": 0.28567931056022644, "learning_rate": 9.17156674064279e-09, "loss": 0.3279, "step": 11721 }, { "epoch": 2.948189134808853, "grad_norm": 0.2721531391143799, "learning_rate": 9.083193388086941e-09, "loss": 0.3058, "step": 11722 }, { "epoch": 2.948440643863179, "grad_norm": 0.31495407223701477, "learning_rate": 8.99524747633218e-09, "loss": 0.3336, "step": 11723 }, { "epoch": 2.948692152917505, "grad_norm": 0.26969239115715027, "learning_rate": 8.907729012910814e-09, "loss": 0.3234, "step": 11724 }, { "epoch": 2.948943661971831, "grad_norm": 0.3003351092338562, "learning_rate": 8.820638005317961e-09, "loss": 0.3474, "step": 11725 }, { "epoch": 2.949195171026157, "grad_norm": 0.2833467125892639, "learning_rate": 8.733974461013207e-09, "loss": 0.3048, "step": 11726 }, { "epoch": 2.949446680080483, "grad_norm": 0.2887994050979614, "learning_rate": 8.647738387418391e-09, "loss": 0.3077, "step": 11727 }, { "epoch": 2.949698189134809, "grad_norm": 0.2787559926509857, "learning_rate": 8.561929791918722e-09, "loss": 0.3414, "step": 11728 }, { "epoch": 2.9499496981891347, "grad_norm": 0.2879605293273926, "learning_rate": 8.476548681863316e-09, "loss": 0.3144, "step": 11729 }, { "epoch": 2.950201207243461, "grad_norm": 0.2889518141746521, "learning_rate": 8.391595064564661e-09, "loss": 0.3077, "step": 11730 }, { "epoch": 2.9504527162977867, "grad_norm": 0.2559886872768402, "learning_rate": 8.307068947299157e-09, "loss": 0.3284, "step": 11731 }, { "epoch": 2.9507042253521125, "grad_norm": 0.3104651868343353, "learning_rate": 8.222970337304347e-09, "loss": 0.3091, "step": 11732 }, { "epoch": 2.9509557344064388, "grad_norm": 0.2882402241230011, "learning_rate": 8.139299241783916e-09, "loss": 0.3232, "step": 11733 }, { "epoch": 2.9512072434607646, "grad_norm": 0.2959415018558502, "learning_rate": 8.056055667903795e-09, "loss": 0.3191, "step": 11734 }, { "epoch": 2.9514587525150904, "grad_norm": 0.2943558394908905, "learning_rate": 7.973239622792728e-09, "loss": 0.3241, "step": 11735 }, { "epoch": 2.9517102615694166, "grad_norm": 0.271892786026001, "learning_rate": 7.89085111354393e-09, "loss": 0.3026, "step": 11736 }, { "epoch": 2.9519617706237424, "grad_norm": 0.2949487268924713, "learning_rate": 7.808890147213422e-09, "loss": 0.3119, "step": 11737 }, { "epoch": 2.9522132796780687, "grad_norm": 0.2683142125606537, "learning_rate": 7.727356730820035e-09, "loss": 0.3042, "step": 11738 }, { "epoch": 2.9524647887323945, "grad_norm": 0.3025915026664734, "learning_rate": 7.646250871347072e-09, "loss": 0.3253, "step": 11739 }, { "epoch": 2.9527162977867203, "grad_norm": 0.2929423451423645, "learning_rate": 7.565572575740087e-09, "loss": 0.3055, "step": 11740 }, { "epoch": 2.9529678068410465, "grad_norm": 0.27529823780059814, "learning_rate": 7.485321850910221e-09, "loss": 0.3363, "step": 11741 }, { "epoch": 2.9532193158953723, "grad_norm": 0.28709059953689575, "learning_rate": 7.405498703728642e-09, "loss": 0.3161, "step": 11742 }, { "epoch": 2.953470824949698, "grad_norm": 0.28413447737693787, "learning_rate": 7.326103141033214e-09, "loss": 0.2844, "step": 11743 }, { "epoch": 2.9537223340040244, "grad_norm": 0.2665967643260956, "learning_rate": 7.247135169622388e-09, "loss": 0.2992, "step": 11744 }, { "epoch": 2.95397384305835, "grad_norm": 0.29413825273513794, "learning_rate": 7.1685947962601976e-09, "loss": 0.3074, "step": 11745 }, { "epoch": 2.954225352112676, "grad_norm": 0.29388710856437683, "learning_rate": 7.0904820276729294e-09, "loss": 0.311, "step": 11746 }, { "epoch": 2.9544768611670023, "grad_norm": 0.30368688702583313, "learning_rate": 7.012796870549676e-09, "loss": 0.3278, "step": 11747 }, { "epoch": 2.954728370221328, "grad_norm": 0.30217698216438293, "learning_rate": 6.935539331545116e-09, "loss": 0.3486, "step": 11748 }, { "epoch": 2.954979879275654, "grad_norm": 0.30664876103401184, "learning_rate": 6.858709417274512e-09, "loss": 0.3202, "step": 11749 }, { "epoch": 2.95523138832998, "grad_norm": 0.2738303244113922, "learning_rate": 6.7823071343187106e-09, "loss": 0.3249, "step": 11750 }, { "epoch": 2.955482897384306, "grad_norm": 0.30232658982276917, "learning_rate": 6.7063324892208125e-09, "loss": 0.3419, "step": 11751 }, { "epoch": 2.9557344064386317, "grad_norm": 0.2941600978374481, "learning_rate": 6.630785488487834e-09, "loss": 0.3004, "step": 11752 }, { "epoch": 2.955985915492958, "grad_norm": 0.27397286891937256, "learning_rate": 6.5556661385896005e-09, "loss": 0.3072, "step": 11753 }, { "epoch": 2.9562374245472838, "grad_norm": 0.2965390980243683, "learning_rate": 6.480974445959298e-09, "loss": 0.345, "step": 11754 }, { "epoch": 2.9564889336016096, "grad_norm": 0.27063122391700745, "learning_rate": 6.406710416994588e-09, "loss": 0.2997, "step": 11755 }, { "epoch": 2.956740442655936, "grad_norm": 0.2771882712841034, "learning_rate": 6.3328740580548275e-09, "loss": 0.3317, "step": 11756 }, { "epoch": 2.9569919517102616, "grad_norm": 0.2760043442249298, "learning_rate": 6.259465375464402e-09, "loss": 0.3197, "step": 11757 }, { "epoch": 2.9572434607645874, "grad_norm": 0.30299848318099976, "learning_rate": 6.1864843755099495e-09, "loss": 0.3115, "step": 11758 }, { "epoch": 2.9574949698189137, "grad_norm": 0.2945834994316101, "learning_rate": 6.113931064442025e-09, "loss": 0.3084, "step": 11759 }, { "epoch": 2.9577464788732395, "grad_norm": 0.2729983329772949, "learning_rate": 6.041805448474547e-09, "loss": 0.3012, "step": 11760 }, { "epoch": 2.9579979879275653, "grad_norm": 0.29064232110977173, "learning_rate": 5.970107533783687e-09, "loss": 0.3246, "step": 11761 }, { "epoch": 2.9582494969818915, "grad_norm": 0.29307058453559875, "learning_rate": 5.898837326511197e-09, "loss": 0.3051, "step": 11762 }, { "epoch": 2.9585010060362174, "grad_norm": 0.2878889739513397, "learning_rate": 5.8279948327599754e-09, "loss": 0.3432, "step": 11763 }, { "epoch": 2.958752515090543, "grad_norm": 0.272480309009552, "learning_rate": 5.75758005859739e-09, "loss": 0.3215, "step": 11764 }, { "epoch": 2.9590040241448694, "grad_norm": 0.280304878950119, "learning_rate": 5.6875930100541706e-09, "loss": 0.3251, "step": 11765 }, { "epoch": 2.959255533199195, "grad_norm": 0.2817912697792053, "learning_rate": 5.618033693124414e-09, "loss": 0.3462, "step": 11766 }, { "epoch": 2.959507042253521, "grad_norm": 0.2780134677886963, "learning_rate": 5.548902113765575e-09, "loss": 0.299, "step": 11767 }, { "epoch": 2.9597585513078473, "grad_norm": 0.28865960240364075, "learning_rate": 5.480198277897919e-09, "loss": 0.3421, "step": 11768 }, { "epoch": 2.960010060362173, "grad_norm": 0.2794874608516693, "learning_rate": 5.411922191405627e-09, "loss": 0.3387, "step": 11769 }, { "epoch": 2.960261569416499, "grad_norm": 0.27201390266418457, "learning_rate": 5.34407386013569e-09, "loss": 0.3346, "step": 11770 }, { "epoch": 2.960513078470825, "grad_norm": 0.28588712215423584, "learning_rate": 5.276653289900124e-09, "loss": 0.3482, "step": 11771 }, { "epoch": 2.960764587525151, "grad_norm": 0.2717914283275604, "learning_rate": 5.20966048647209e-09, "loss": 0.3096, "step": 11772 }, { "epoch": 2.9610160965794767, "grad_norm": 0.2592178285121918, "learning_rate": 5.14309545558922e-09, "loss": 0.2934, "step": 11773 }, { "epoch": 2.961267605633803, "grad_norm": 0.2764432728290558, "learning_rate": 5.076958202952509e-09, "loss": 0.3238, "step": 11774 }, { "epoch": 2.961519114688129, "grad_norm": 0.3098163902759552, "learning_rate": 5.01124873422576e-09, "loss": 0.3151, "step": 11775 }, { "epoch": 2.9617706237424546, "grad_norm": 0.2991572916507721, "learning_rate": 4.945967055037803e-09, "loss": 0.3173, "step": 11776 }, { "epoch": 2.962022132796781, "grad_norm": 0.26591500639915466, "learning_rate": 4.881113170978058e-09, "loss": 0.3266, "step": 11777 }, { "epoch": 2.9622736418511066, "grad_norm": 0.29749444127082825, "learning_rate": 4.8166870876020786e-09, "loss": 0.3228, "step": 11778 }, { "epoch": 2.9625251509054324, "grad_norm": 0.2841416001319885, "learning_rate": 4.75268881042712e-09, "loss": 0.3343, "step": 11779 }, { "epoch": 2.9627766599597587, "grad_norm": 0.26449331641197205, "learning_rate": 4.689118344933796e-09, "loss": 0.3179, "step": 11780 }, { "epoch": 2.9630281690140845, "grad_norm": 0.3073911666870117, "learning_rate": 4.625975696567197e-09, "loss": 0.3033, "step": 11781 }, { "epoch": 2.9632796780684103, "grad_norm": 0.28680184483528137, "learning_rate": 4.56326087073522e-09, "loss": 0.2853, "step": 11782 }, { "epoch": 2.9635311871227366, "grad_norm": 0.2971070408821106, "learning_rate": 4.500973872808012e-09, "loss": 0.3466, "step": 11783 }, { "epoch": 2.9637826961770624, "grad_norm": 0.2824273705482483, "learning_rate": 4.439114708120751e-09, "loss": 0.3272, "step": 11784 }, { "epoch": 2.964034205231388, "grad_norm": 0.30176204442977905, "learning_rate": 4.37768338197142e-09, "loss": 0.3077, "step": 11785 }, { "epoch": 2.9642857142857144, "grad_norm": 0.27803835272789, "learning_rate": 4.3166798996208125e-09, "loss": 0.3356, "step": 11786 }, { "epoch": 2.96453722334004, "grad_norm": 0.30758580565452576, "learning_rate": 4.256104266293637e-09, "loss": 0.3263, "step": 11787 }, { "epoch": 2.964788732394366, "grad_norm": 0.27492862939834595, "learning_rate": 4.19595648717741e-09, "loss": 0.3243, "step": 11788 }, { "epoch": 2.9650402414486923, "grad_norm": 0.2788017988204956, "learning_rate": 4.136236567424679e-09, "loss": 0.3405, "step": 11789 }, { "epoch": 2.965291750503018, "grad_norm": 0.2948797047138214, "learning_rate": 4.076944512148573e-09, "loss": 0.3143, "step": 11790 }, { "epoch": 2.965543259557344, "grad_norm": 0.2663567364215851, "learning_rate": 4.018080326428364e-09, "loss": 0.2989, "step": 11791 }, { "epoch": 2.96579476861167, "grad_norm": 0.2669561803340912, "learning_rate": 3.9596440153044645e-09, "loss": 0.3026, "step": 11792 }, { "epoch": 2.966046277665996, "grad_norm": 0.28775742650032043, "learning_rate": 3.901635583782315e-09, "loss": 0.333, "step": 11793 }, { "epoch": 2.9662977867203217, "grad_norm": 0.293270468711853, "learning_rate": 3.844055036829053e-09, "loss": 0.3608, "step": 11794 }, { "epoch": 2.966549295774648, "grad_norm": 0.2894499897956848, "learning_rate": 3.786902379376844e-09, "loss": 0.3251, "step": 11795 }, { "epoch": 2.966800804828974, "grad_norm": 0.2831835448741913, "learning_rate": 3.730177616320108e-09, "loss": 0.3216, "step": 11796 }, { "epoch": 2.9670523138832996, "grad_norm": 0.2966955900192261, "learning_rate": 3.6738807525171784e-09, "loss": 0.3252, "step": 11797 }, { "epoch": 2.967303822937626, "grad_norm": 0.2994356155395508, "learning_rate": 3.618011792789755e-09, "loss": 0.3243, "step": 11798 }, { "epoch": 2.9675553319919517, "grad_norm": 0.2818380892276764, "learning_rate": 3.562570741921789e-09, "loss": 0.3147, "step": 11799 }, { "epoch": 2.9678068410462775, "grad_norm": 0.29389482736587524, "learning_rate": 3.5075576046628145e-09, "loss": 0.3545, "step": 11800 }, { "epoch": 2.9680583501006037, "grad_norm": 0.2830759584903717, "learning_rate": 3.4529723857229526e-09, "loss": 0.3221, "step": 11801 }, { "epoch": 2.9683098591549295, "grad_norm": 0.28202909231185913, "learning_rate": 3.3988150897779073e-09, "loss": 0.3163, "step": 11802 }, { "epoch": 2.9685613682092553, "grad_norm": 0.3006686866283417, "learning_rate": 3.345085721465635e-09, "loss": 0.3173, "step": 11803 }, { "epoch": 2.9688128772635816, "grad_norm": 0.28550851345062256, "learning_rate": 3.291784285387456e-09, "loss": 0.3222, "step": 11804 }, { "epoch": 2.9690643863179074, "grad_norm": 0.2890929877758026, "learning_rate": 3.238910786109162e-09, "loss": 0.305, "step": 11805 }, { "epoch": 2.969315895372233, "grad_norm": 0.27377450466156006, "learning_rate": 3.186465228158242e-09, "loss": 0.3526, "step": 11806 }, { "epoch": 2.9695674044265594, "grad_norm": 0.285478413105011, "learning_rate": 3.1344476160266592e-09, "loss": 0.3079, "step": 11807 }, { "epoch": 2.9698189134808852, "grad_norm": 0.29703089594841003, "learning_rate": 3.082857954169738e-09, "loss": 0.2883, "step": 11808 }, { "epoch": 2.970070422535211, "grad_norm": 0.2617098391056061, "learning_rate": 3.031696247005056e-09, "loss": 0.3055, "step": 11809 }, { "epoch": 2.9703219315895373, "grad_norm": 0.2684277594089508, "learning_rate": 2.9809624989146633e-09, "loss": 0.3213, "step": 11810 }, { "epoch": 2.970573440643863, "grad_norm": 0.28313401341438293, "learning_rate": 2.9306567142434183e-09, "loss": 0.3155, "step": 11811 }, { "epoch": 2.970824949698189, "grad_norm": 0.2776716351509094, "learning_rate": 2.8807788973000962e-09, "loss": 0.3116, "step": 11812 }, { "epoch": 2.971076458752515, "grad_norm": 0.29418906569480896, "learning_rate": 2.83132905235628e-09, "loss": 0.2839, "step": 11813 }, { "epoch": 2.971327967806841, "grad_norm": 0.2912454903125763, "learning_rate": 2.78230718364636e-09, "loss": 0.3275, "step": 11814 }, { "epoch": 2.9715794768611667, "grad_norm": 0.2744319438934326, "learning_rate": 2.7337132953697555e-09, "loss": 0.3291, "step": 11815 }, { "epoch": 2.971830985915493, "grad_norm": 0.2988802194595337, "learning_rate": 2.685547391688137e-09, "loss": 0.3212, "step": 11816 }, { "epoch": 2.972082494969819, "grad_norm": 0.28176239132881165, "learning_rate": 2.6378094767259833e-09, "loss": 0.3297, "step": 11817 }, { "epoch": 2.9723340040241446, "grad_norm": 0.30423882603645325, "learning_rate": 2.5904995545716903e-09, "loss": 0.3004, "step": 11818 }, { "epoch": 2.972585513078471, "grad_norm": 0.27662205696105957, "learning_rate": 2.5436176292781277e-09, "loss": 0.3055, "step": 11819 }, { "epoch": 2.9728370221327967, "grad_norm": 0.28385183215141296, "learning_rate": 2.497163704859307e-09, "loss": 0.2986, "step": 11820 }, { "epoch": 2.9730885311871225, "grad_norm": 0.2943600118160248, "learning_rate": 2.4511377852937114e-09, "loss": 0.3284, "step": 11821 }, { "epoch": 2.9733400402414487, "grad_norm": 0.27290356159210205, "learning_rate": 2.4055398745242987e-09, "loss": 0.3114, "step": 11822 }, { "epoch": 2.9735915492957745, "grad_norm": 0.2819903790950775, "learning_rate": 2.360369976455168e-09, "loss": 0.3206, "step": 11823 }, { "epoch": 2.9738430583501008, "grad_norm": 0.28330057859420776, "learning_rate": 2.3156280949554465e-09, "loss": 0.3173, "step": 11824 }, { "epoch": 2.9740945674044266, "grad_norm": 0.3153158724308014, "learning_rate": 2.2713142338565142e-09, "loss": 0.3447, "step": 11825 }, { "epoch": 2.9743460764587524, "grad_norm": 0.278213232755661, "learning_rate": 2.2274283969542233e-09, "loss": 0.3002, "step": 11826 }, { "epoch": 2.9745975855130786, "grad_norm": 0.29102084040641785, "learning_rate": 2.1839705880061235e-09, "loss": 0.3253, "step": 11827 }, { "epoch": 2.9748490945674044, "grad_norm": 0.28229936957359314, "learning_rate": 2.1409408107353478e-09, "loss": 0.3337, "step": 11828 }, { "epoch": 2.9751006036217302, "grad_norm": 0.28082865476608276, "learning_rate": 2.0983390688261718e-09, "loss": 0.3255, "step": 11829 }, { "epoch": 2.9753521126760565, "grad_norm": 0.27319207787513733, "learning_rate": 2.056165365927343e-09, "loss": 0.3132, "step": 11830 }, { "epoch": 2.9756036217303823, "grad_norm": 0.2930520474910736, "learning_rate": 2.0144197056509717e-09, "loss": 0.3183, "step": 11831 }, { "epoch": 2.975855130784708, "grad_norm": 0.28141433000564575, "learning_rate": 1.9731020915725317e-09, "loss": 0.3456, "step": 11832 }, { "epoch": 2.9761066398390343, "grad_norm": 0.28628331422805786, "learning_rate": 1.9322125272297488e-09, "loss": 0.3227, "step": 11833 }, { "epoch": 2.97635814889336, "grad_norm": 0.30301791429519653, "learning_rate": 1.8917510161259312e-09, "loss": 0.315, "step": 11834 }, { "epoch": 2.9766096579476864, "grad_norm": 0.28687921166419983, "learning_rate": 1.851717561724975e-09, "loss": 0.3036, "step": 11835 }, { "epoch": 2.976861167002012, "grad_norm": 0.26738491654396057, "learning_rate": 1.812112167456359e-09, "loss": 0.3211, "step": 11836 }, { "epoch": 2.977112676056338, "grad_norm": 0.3000773787498474, "learning_rate": 1.7729348367118148e-09, "loss": 0.3202, "step": 11837 }, { "epoch": 2.9773641851106643, "grad_norm": 0.28814077377319336, "learning_rate": 1.7341855728464363e-09, "loss": 0.2894, "step": 11838 }, { "epoch": 2.97761569416499, "grad_norm": 0.26308709383010864, "learning_rate": 1.695864379179235e-09, "loss": 0.3039, "step": 11839 }, { "epoch": 2.977867203219316, "grad_norm": 0.28434452414512634, "learning_rate": 1.6579712589914754e-09, "loss": 0.322, "step": 11840 }, { "epoch": 2.978118712273642, "grad_norm": 0.2894355058670044, "learning_rate": 1.6205062155294494e-09, "loss": 0.3197, "step": 11841 }, { "epoch": 2.978370221327968, "grad_norm": 0.31105583906173706, "learning_rate": 1.5834692520011462e-09, "loss": 0.3124, "step": 11842 }, { "epoch": 2.9786217303822937, "grad_norm": 0.2865217924118042, "learning_rate": 1.5468603715784736e-09, "loss": 0.3336, "step": 11843 }, { "epoch": 2.97887323943662, "grad_norm": 0.3048990070819855, "learning_rate": 1.510679577397256e-09, "loss": 0.3265, "step": 11844 }, { "epoch": 2.9791247484909458, "grad_norm": 0.27794545888900757, "learning_rate": 1.4749268725555709e-09, "loss": 0.3072, "step": 11845 }, { "epoch": 2.9793762575452716, "grad_norm": 0.31118738651275635, "learning_rate": 1.4396022601159687e-09, "loss": 0.3398, "step": 11846 }, { "epoch": 2.979627766599598, "grad_norm": 0.28356632590293884, "learning_rate": 1.404705743103807e-09, "loss": 0.3246, "step": 11847 }, { "epoch": 2.9798792756539236, "grad_norm": 0.28750303387641907, "learning_rate": 1.370237324507251e-09, "loss": 0.3162, "step": 11848 }, { "epoch": 2.9801307847082494, "grad_norm": 0.346623957157135, "learning_rate": 1.3361970072783836e-09, "loss": 0.2966, "step": 11849 }, { "epoch": 2.9803822937625757, "grad_norm": 0.309396892786026, "learning_rate": 1.3025847943326508e-09, "loss": 0.3277, "step": 11850 }, { "epoch": 2.9806338028169015, "grad_norm": 0.2721833884716034, "learning_rate": 1.2694006885488609e-09, "loss": 0.3412, "step": 11851 }, { "epoch": 2.9808853118712273, "grad_norm": 0.3036994934082031, "learning_rate": 1.2366446927691844e-09, "loss": 0.2995, "step": 11852 }, { "epoch": 2.9811368209255535, "grad_norm": 0.2673596441745758, "learning_rate": 1.2043168097986002e-09, "loss": 0.3089, "step": 11853 }, { "epoch": 2.9813883299798793, "grad_norm": 0.3089800179004669, "learning_rate": 1.1724170424054493e-09, "loss": 0.3185, "step": 11854 }, { "epoch": 2.981639839034205, "grad_norm": 0.26984110474586487, "learning_rate": 1.1409453933225457e-09, "loss": 0.3277, "step": 11855 }, { "epoch": 2.9818913480885314, "grad_norm": 0.2922016382217407, "learning_rate": 1.1099018652449557e-09, "loss": 0.3058, "step": 11856 }, { "epoch": 2.982142857142857, "grad_norm": 0.2652096152305603, "learning_rate": 1.0792864608316634e-09, "loss": 0.3217, "step": 11857 }, { "epoch": 2.982394366197183, "grad_norm": 0.3048350512981415, "learning_rate": 1.0490991827039055e-09, "loss": 0.2961, "step": 11858 }, { "epoch": 2.9826458752515093, "grad_norm": 0.26128822565078735, "learning_rate": 1.0193400334473913e-09, "loss": 0.3407, "step": 11859 }, { "epoch": 2.982897384305835, "grad_norm": 0.28069064021110535, "learning_rate": 9.90009015611193e-10, "loss": 0.3106, "step": 11860 }, { "epoch": 2.983148893360161, "grad_norm": 0.2878744602203369, "learning_rate": 9.61106131706635e-10, "loss": 0.3249, "step": 11861 }, { "epoch": 2.983400402414487, "grad_norm": 0.2882971167564392, "learning_rate": 9.326313842100698e-10, "loss": 0.3099, "step": 11862 }, { "epoch": 2.983651911468813, "grad_norm": 0.2869325578212738, "learning_rate": 9.045847755589921e-10, "loss": 0.3298, "step": 11863 }, { "epoch": 2.9839034205231387, "grad_norm": 0.29187971353530884, "learning_rate": 8.769663081559243e-10, "loss": 0.3112, "step": 11864 }, { "epoch": 2.984154929577465, "grad_norm": 0.2968631684780121, "learning_rate": 8.497759843667519e-10, "loss": 0.3326, "step": 11865 }, { "epoch": 2.984406438631791, "grad_norm": 0.3074916899204254, "learning_rate": 8.230138065196125e-10, "loss": 0.3237, "step": 11866 }, { "epoch": 2.9846579476861166, "grad_norm": 0.2778856158256531, "learning_rate": 7.966797769065615e-10, "loss": 0.3291, "step": 11867 }, { "epoch": 2.984909456740443, "grad_norm": 0.2621375620365143, "learning_rate": 7.707738977824619e-10, "loss": 0.3146, "step": 11868 }, { "epoch": 2.9851609657947686, "grad_norm": 0.25857844948768616, "learning_rate": 7.452961713672046e-10, "loss": 0.3171, "step": 11869 }, { "epoch": 2.9854124748490944, "grad_norm": 0.2810332775115967, "learning_rate": 7.202465998412678e-10, "loss": 0.3306, "step": 11870 }, { "epoch": 2.9856639839034207, "grad_norm": 0.2880176603794098, "learning_rate": 6.956251853512675e-10, "loss": 0.3065, "step": 11871 }, { "epoch": 2.9859154929577465, "grad_norm": 0.2959819436073303, "learning_rate": 6.714319300055172e-10, "loss": 0.332, "step": 11872 }, { "epoch": 2.9861670020120723, "grad_norm": 0.2593652606010437, "learning_rate": 6.476668358762483e-10, "loss": 0.3388, "step": 11873 }, { "epoch": 2.9864185110663986, "grad_norm": 0.28062987327575684, "learning_rate": 6.243299049979445e-10, "loss": 0.3382, "step": 11874 }, { "epoch": 2.9866700201207244, "grad_norm": 0.27674224972724915, "learning_rate": 6.014211393695623e-10, "loss": 0.3121, "step": 11875 }, { "epoch": 2.98692152917505, "grad_norm": 0.2833153009414673, "learning_rate": 5.789405409539761e-10, "loss": 0.3147, "step": 11876 }, { "epoch": 2.9871730382293764, "grad_norm": 0.2868058383464813, "learning_rate": 5.568881116752023e-10, "loss": 0.3182, "step": 11877 }, { "epoch": 2.987424547283702, "grad_norm": 0.2811480760574341, "learning_rate": 5.352638534228405e-10, "loss": 0.2941, "step": 11878 }, { "epoch": 2.987676056338028, "grad_norm": 0.29555627703666687, "learning_rate": 5.140677680487427e-10, "loss": 0.3058, "step": 11879 }, { "epoch": 2.9879275653923543, "grad_norm": 0.2923000752925873, "learning_rate": 4.932998573681236e-10, "loss": 0.3494, "step": 11880 }, { "epoch": 2.98817907444668, "grad_norm": 0.27389127016067505, "learning_rate": 4.729601231590053e-10, "loss": 0.3071, "step": 11881 }, { "epoch": 2.988430583501006, "grad_norm": 0.27962055802345276, "learning_rate": 4.5304856716443803e-10, "loss": 0.3109, "step": 11882 }, { "epoch": 2.988682092555332, "grad_norm": 0.30079811811447144, "learning_rate": 4.3356519108916914e-10, "loss": 0.3221, "step": 11883 }, { "epoch": 2.988933601609658, "grad_norm": 0.2800407409667969, "learning_rate": 4.14509996601864e-10, "loss": 0.3115, "step": 11884 }, { "epoch": 2.9891851106639837, "grad_norm": 0.27782750129699707, "learning_rate": 3.9588298533399515e-10, "loss": 0.3386, "step": 11885 }, { "epoch": 2.98943661971831, "grad_norm": 0.2804793119430542, "learning_rate": 3.7768415888150835e-10, "loss": 0.3244, "step": 11886 }, { "epoch": 2.989688128772636, "grad_norm": 0.2901947796344757, "learning_rate": 3.5991351880315663e-10, "loss": 0.3037, "step": 11887 }, { "epoch": 2.9899396378269616, "grad_norm": 0.2956394553184509, "learning_rate": 3.4257106662050066e-10, "loss": 0.3442, "step": 11888 }, { "epoch": 2.990191146881288, "grad_norm": 0.3086378574371338, "learning_rate": 3.2565680381846377e-10, "loss": 0.3066, "step": 11889 }, { "epoch": 2.9904426559356136, "grad_norm": 0.30673331022262573, "learning_rate": 3.091707318464421e-10, "loss": 0.3219, "step": 11890 }, { "epoch": 2.9906941649899395, "grad_norm": 0.28686726093292236, "learning_rate": 2.931128521160842e-10, "loss": 0.3265, "step": 11891 }, { "epoch": 2.9909456740442657, "grad_norm": 0.2747303247451782, "learning_rate": 2.774831660018462e-10, "loss": 0.3198, "step": 11892 }, { "epoch": 2.9911971830985915, "grad_norm": 0.27951404452323914, "learning_rate": 2.622816748437673e-10, "loss": 0.3252, "step": 11893 }, { "epoch": 2.9914486921529173, "grad_norm": 0.27234378457069397, "learning_rate": 2.475083799424738e-10, "loss": 0.2957, "step": 11894 }, { "epoch": 2.9917002012072436, "grad_norm": 0.28382253646850586, "learning_rate": 2.33163282564175e-10, "loss": 0.3286, "step": 11895 }, { "epoch": 2.9919517102615694, "grad_norm": 0.28595641255378723, "learning_rate": 2.1924638393677755e-10, "loss": 0.3089, "step": 11896 }, { "epoch": 2.992203219315895, "grad_norm": 0.28392264246940613, "learning_rate": 2.0575768525266105e-10, "loss": 0.3039, "step": 11897 }, { "epoch": 2.9924547283702214, "grad_norm": 0.307187020778656, "learning_rate": 1.926971876664574e-10, "loss": 0.3291, "step": 11898 }, { "epoch": 2.9927062374245472, "grad_norm": 0.29181286692619324, "learning_rate": 1.800648922967163e-10, "loss": 0.3418, "step": 11899 }, { "epoch": 2.992957746478873, "grad_norm": 0.2649078965187073, "learning_rate": 1.6786080022646034e-10, "loss": 0.3342, "step": 11900 }, { "epoch": 2.9932092555331993, "grad_norm": 0.2709779739379883, "learning_rate": 1.5608491249929913e-10, "loss": 0.3176, "step": 11901 }, { "epoch": 2.993460764587525, "grad_norm": 0.2838386595249176, "learning_rate": 1.4473723012498053e-10, "loss": 0.3059, "step": 11902 }, { "epoch": 2.993712273641851, "grad_norm": 0.2875151038169861, "learning_rate": 1.338177540749497e-10, "loss": 0.3346, "step": 11903 }, { "epoch": 2.993963782696177, "grad_norm": 0.29026007652282715, "learning_rate": 1.2332648528401436e-10, "loss": 0.3377, "step": 11904 }, { "epoch": 2.994215291750503, "grad_norm": 0.29916590452194214, "learning_rate": 1.1326342465145523e-10, "loss": 0.3209, "step": 11905 }, { "epoch": 2.9944668008048287, "grad_norm": 0.28423720598220825, "learning_rate": 1.0362857303825025e-10, "loss": 0.3078, "step": 11906 }, { "epoch": 2.994718309859155, "grad_norm": 0.2835725247859955, "learning_rate": 9.442193127040533e-11, "loss": 0.318, "step": 11907 }, { "epoch": 2.994969818913481, "grad_norm": 0.27757924795150757, "learning_rate": 8.564350013617884e-11, "loss": 0.3137, "step": 11908 }, { "epoch": 2.9952213279678066, "grad_norm": 0.28157174587249756, "learning_rate": 7.729328038663663e-11, "loss": 0.3522, "step": 11909 }, { "epoch": 2.995472837022133, "grad_norm": 0.3048500716686249, "learning_rate": 6.937127273787258e-11, "loss": 0.3175, "step": 11910 }, { "epoch": 2.9957243460764587, "grad_norm": 0.28835025429725647, "learning_rate": 6.187747786767783e-11, "loss": 0.3322, "step": 11911 }, { "epoch": 2.9959758551307845, "grad_norm": 0.2930144965648651, "learning_rate": 5.4811896418316414e-11, "loss": 0.3168, "step": 11912 }, { "epoch": 2.9962273641851107, "grad_norm": 0.2962915599346161, "learning_rate": 4.817452899485986e-11, "loss": 0.3037, "step": 11913 }, { "epoch": 2.9964788732394365, "grad_norm": 0.2874841094017029, "learning_rate": 4.1965376165742365e-11, "loss": 0.3398, "step": 11914 }, { "epoch": 2.9967303822937623, "grad_norm": 0.2807866632938385, "learning_rate": 3.618443846276076e-11, "loss": 0.3102, "step": 11915 }, { "epoch": 2.9969818913480886, "grad_norm": 0.27110809087753296, "learning_rate": 3.0831716380519406e-11, "loss": 0.3245, "step": 11916 }, { "epoch": 2.9972334004024144, "grad_norm": 0.30204376578330994, "learning_rate": 2.5907210378095514e-11, "loss": 0.3111, "step": 11917 }, { "epoch": 2.99748490945674, "grad_norm": 0.29356303811073303, "learning_rate": 2.141092087681873e-11, "loss": 0.3641, "step": 11918 }, { "epoch": 2.9977364185110664, "grad_norm": 0.3049434721469879, "learning_rate": 1.734284826193644e-11, "loss": 0.3259, "step": 11919 }, { "epoch": 2.9979879275653922, "grad_norm": 0.28138816356658936, "learning_rate": 1.3702992882058675e-11, "loss": 0.3219, "step": 11920 }, { "epoch": 2.998239436619718, "grad_norm": 0.29172345995903015, "learning_rate": 1.049135504804788e-11, "loss": 0.3215, "step": 11921 }, { "epoch": 2.9984909456740443, "grad_norm": 0.28759902715682983, "learning_rate": 7.70793503634959e-12, "loss": 0.3142, "step": 11922 }, { "epoch": 2.99874245472837, "grad_norm": 0.3014575242996216, "learning_rate": 5.352733084551531e-12, "loss": 0.3054, "step": 11923 }, { "epoch": 2.9989939637826963, "grad_norm": 0.2920631766319275, "learning_rate": 3.4257493941591837e-12, "loss": 0.3298, "step": 11924 }, { "epoch": 2.999245472837022, "grad_norm": 0.2961787283420563, "learning_rate": 1.926984130595777e-12, "loss": 0.3011, "step": 11925 }, { "epoch": 2.999496981891348, "grad_norm": 0.28382429480552673, "learning_rate": 8.564374220920713e-13, "loss": 0.3406, "step": 11926 }, { "epoch": 2.999748490945674, "grad_norm": 0.2791459858417511, "learning_rate": 2.1410936024146568e-13, "loss": 0.3243, "step": 11927 }, { "epoch": 3.0, "grad_norm": 0.2965925633907318, "learning_rate": 0.0, "loss": 0.2831, "step": 11928 }, { "epoch": 3.0, "step": 11928, "total_flos": 1.1291892680425472e+16, "train_loss": 0.36148936700982826, "train_runtime": 214787.5482, "train_samples_per_second": 5.331, "train_steps_per_second": 0.056 } ], "logging_steps": 1.0, "max_steps": 11928, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1291892680425472e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }